From ed70f3a264e9f746eaf17c96ccc4c9b7eda742dc Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Date: Wed, 4 Jun 2014 10:11:06 +0100
Subject: ASoC: arizona: Implement TDM support for Arizona devices

Signed-off-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 include/linux/mfd/arizona/core.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/arizona/core.h b/include/linux/mfd/arizona/core.h
index 5cf8b91ce996..11783b511b9a 100644
--- a/include/linux/mfd/arizona/core.h
+++ b/include/linux/mfd/arizona/core.h
@@ -110,6 +110,9 @@ struct arizona {
 	int clk32k_ref;
 
 	struct snd_soc_dapm_context *dapm;
+
+	int tdm_width[ARIZONA_MAX_AIF];
+	int tdm_slots[ARIZONA_MAX_AIF];
 };
 
 int arizona_clk32k_enable(struct arizona *arizona);
-- 
cgit v1.2.3-59-g8ed1b


From cc9e92431ee9c7fe974266e0e6533a1a68e45539 Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Date: Fri, 6 Jun 2014 14:14:05 +0100
Subject: ASoC: wm5102: Add controls to allow shaping of ultrasonic response

Add controls to allow custom shaping of the ultrasonic response. This
custom shaping can be turned on/off at runtime, although, it should be
noted that settings will not affect a currently open audio stream,
they will be applied when the next audio stream is started.

Signed-off-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 include/linux/mfd/arizona/core.h |  3 ++
 sound/soc/codecs/arizona.c       | 34 ++++++++++++++++++++++
 sound/soc/codecs/wm5102.c        | 62 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 99 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/arizona/core.h b/include/linux/mfd/arizona/core.h
index 11783b511b9a..55926517d50b 100644
--- a/include/linux/mfd/arizona/core.h
+++ b/include/linux/mfd/arizona/core.h
@@ -113,6 +113,9 @@ struct arizona {
 
 	int tdm_width[ARIZONA_MAX_AIF];
 	int tdm_slots[ARIZONA_MAX_AIF];
+
+	uint16_t dac_comp_coeff;
+	uint8_t dac_comp_enabled;
 };
 
 int arizona_clk32k_enable(struct arizona *arizona);
diff --git a/sound/soc/codecs/arizona.c b/sound/soc/codecs/arizona.c
index e77f61c387f7..41b56ee6ff51 100644
--- a/sound/soc/codecs/arizona.c
+++ b/sound/soc/codecs/arizona.c
@@ -1127,6 +1127,31 @@ static int arizona_startup(struct snd_pcm_substream *substream,
 					  constraint);
 }
 
+static void arizona_wm5102_set_dac_comp(struct snd_soc_codec *codec,
+					unsigned int rate)
+{
+	struct arizona_priv *priv = snd_soc_codec_get_drvdata(codec);
+	struct arizona *arizona = priv->arizona;
+	struct reg_default dac_comp[] = {
+		{ 0x80, 0x3 },
+		{ ARIZONA_DAC_COMP_1, 0 },
+		{ ARIZONA_DAC_COMP_2, 0 },
+		{ 0x80, 0x0 },
+	};
+
+	mutex_lock(&codec->mutex);
+
+	dac_comp[1].def = arizona->dac_comp_coeff;
+	if (rate >= 176400)
+		dac_comp[2].def = arizona->dac_comp_enabled;
+
+	mutex_unlock(&codec->mutex);
+
+	regmap_multi_reg_write(arizona->regmap,
+			       dac_comp,
+			       ARRAY_SIZE(dac_comp));
+}
+
 static int arizona_hw_params_rate(struct snd_pcm_substream *substream,
 				  struct snd_pcm_hw_params *params,
 				  struct snd_soc_dai *dai)
@@ -1153,6 +1178,15 @@ static int arizona_hw_params_rate(struct snd_pcm_substream *substream,
 
 	switch (dai_priv->clk) {
 	case ARIZONA_CLK_SYSCLK:
+		switch (priv->arizona->type) {
+		case WM5102:
+			arizona_wm5102_set_dac_comp(codec,
+						    params_rate(params));
+			break;
+		default:
+			break;
+		}
+
 		snd_soc_update_bits(codec, ARIZONA_SAMPLE_RATE_1,
 				    ARIZONA_SAMPLE_RATE_1_MASK, sr_val);
 		if (base)
diff --git a/sound/soc/codecs/wm5102.c b/sound/soc/codecs/wm5102.c
index dcf1d12cfef8..7bf2397fc25a 100644
--- a/sound/soc/codecs/wm5102.c
+++ b/sound/soc/codecs/wm5102.c
@@ -612,6 +612,62 @@ static int wm5102_sysclk_ev(struct snd_soc_dapm_widget *w,
 	return 0;
 }
 
+static int wm5102_out_comp_coeff_get(struct snd_kcontrol *kcontrol,
+				     struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct arizona *arizona = dev_get_drvdata(codec->dev->parent);
+	uint16_t data;
+
+	mutex_lock(&codec->mutex);
+	data = cpu_to_be16(arizona->dac_comp_coeff);
+	memcpy(ucontrol->value.bytes.data, &data, sizeof(data));
+	mutex_unlock(&codec->mutex);
+
+	return 0;
+}
+
+static int wm5102_out_comp_coeff_put(struct snd_kcontrol *kcontrol,
+				     struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct arizona *arizona = dev_get_drvdata(codec->dev->parent);
+
+	mutex_lock(&codec->mutex);
+	memcpy(&arizona->dac_comp_coeff, ucontrol->value.bytes.data,
+	       sizeof(arizona->dac_comp_coeff));
+	arizona->dac_comp_coeff = be16_to_cpu(arizona->dac_comp_coeff);
+	mutex_unlock(&codec->mutex);
+
+	return 0;
+}
+
+static int wm5102_out_comp_switch_get(struct snd_kcontrol *kcontrol,
+				      struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct arizona *arizona = dev_get_drvdata(codec->dev->parent);
+
+	mutex_lock(&codec->mutex);
+	ucontrol->value.integer.value[0] = arizona->dac_comp_enabled;
+	mutex_unlock(&codec->mutex);
+
+	return 0;
+}
+
+static int wm5102_out_comp_switch_put(struct snd_kcontrol *kcontrol,
+				      struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct arizona *arizona = dev_get_drvdata(codec->dev->parent);
+
+	mutex_lock(&codec->mutex);
+	arizona->dac_comp_enabled = ucontrol->value.integer.value[0];
+	mutex_unlock(&codec->mutex);
+
+	return 0;
+}
+
 static const char *wm5102_osr_text[] = {
 	"Low power", "Normal", "High performance",
 };
@@ -843,6 +899,12 @@ SOC_SINGLE_TLV("Noise Gate Threshold Volume", ARIZONA_NOISE_GATE_CONTROL,
 	       ARIZONA_NGATE_THR_SHIFT, 7, 1, ng_tlv),
 SOC_ENUM("Noise Gate Hold", arizona_ng_hold),
 
+SND_SOC_BYTES_EXT("Output Compensation Coefficient", 2,
+		  wm5102_out_comp_coeff_get, wm5102_out_comp_coeff_put),
+
+SOC_SINGLE_EXT("Output Compensation Switch", 0, 0, 1, 0,
+	       wm5102_out_comp_switch_get, wm5102_out_comp_switch_put),
+
 WM5102_NG_SRC("HPOUT1L", ARIZONA_NOISE_GATE_SELECT_1L),
 WM5102_NG_SRC("HPOUT1R", ARIZONA_NOISE_GATE_SELECT_1R),
 WM5102_NG_SRC("HPOUT2L", ARIZONA_NOISE_GATE_SELECT_2L),
-- 
cgit v1.2.3-59-g8ed1b


From 398fd22b6b94cb15c1c299bceecd63644a1b17b4 Mon Sep 17 00:00:00 2001
From: Peter Meerwald <pmeerw@pmeerw.net>
Date: Sat, 6 Dec 2014 06:46:00 +0000
Subject: iio: Remove timestamp argument from iio_trigger_poll() and
 iio_trigger_poll_chained()

argument has been ignored; adjust drivers accordingly

Signed-off-by: Peter Meerwald <pmeerw@pmeerw.net>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/adc/ad_sigma_delta.c                    | 2 +-
 drivers/iio/adc/at91_adc.c                          | 2 +-
 drivers/iio/adc/xilinx-xadc-core.c                  | 2 +-
 drivers/iio/industrialio-trigger.c                  | 8 ++++----
 drivers/iio/light/gp2ap020a00f.c                    | 2 +-
 drivers/iio/proximity/as3935.c                      | 2 +-
 drivers/iio/trigger/iio-trig-interrupt.c            | 3 +--
 drivers/iio/trigger/iio-trig-sysfs.c                | 2 +-
 drivers/staging/iio/accel/lis3l02dq_ring.c          | 2 +-
 drivers/staging/iio/adc/mxs-lradc.c                 | 2 +-
 drivers/staging/iio/meter/ade7758_trigger.c         | 2 +-
 drivers/staging/iio/trigger/iio-trig-bfin-timer.c   | 2 +-
 drivers/staging/iio/trigger/iio-trig-periodic-rtc.c | 3 +--
 include/linux/iio/trigger.h                         | 5 ++---
 14 files changed, 18 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/adc/ad_sigma_delta.c b/drivers/iio/adc/ad_sigma_delta.c
index 9a4e0e32a771..c55b81f7f970 100644
--- a/drivers/iio/adc/ad_sigma_delta.c
+++ b/drivers/iio/adc/ad_sigma_delta.c
@@ -410,7 +410,7 @@ static irqreturn_t ad_sd_data_rdy_trig_poll(int irq, void *private)
 	complete(&sigma_delta->completion);
 	disable_irq_nosync(irq);
 	sigma_delta->irq_dis = true;
-	iio_trigger_poll(sigma_delta->trig, iio_get_time_ns());
+	iio_trigger_poll(sigma_delta->trig);
 
 	return IRQ_HANDLED;
 }
diff --git a/drivers/iio/adc/at91_adc.c b/drivers/iio/adc/at91_adc.c
index 89777ed9abd8..b9592010511e 100644
--- a/drivers/iio/adc/at91_adc.c
+++ b/drivers/iio/adc/at91_adc.c
@@ -149,7 +149,7 @@ void handle_adc_eoc_trigger(int irq, struct iio_dev *idev)
 
 	if (iio_buffer_enabled(idev)) {
 		disable_irq_nosync(irq);
-		iio_trigger_poll(idev->trig, iio_get_time_ns());
+		iio_trigger_poll(idev->trig);
 	} else {
 		st->last_value = at91_adc_readl(st, AT91_ADC_LCDR);
 		st->done = true;
diff --git a/drivers/iio/adc/xilinx-xadc-core.c b/drivers/iio/adc/xilinx-xadc-core.c
index ab52be29141b..fd2745c62943 100644
--- a/drivers/iio/adc/xilinx-xadc-core.c
+++ b/drivers/iio/adc/xilinx-xadc-core.c
@@ -486,7 +486,7 @@ static irqreturn_t xadc_axi_interrupt_handler(int irq, void *devid)
 		return IRQ_NONE;
 
 	if ((status & XADC_AXI_INT_EOS) && xadc->trigger)
-		iio_trigger_poll(xadc->trigger, 0);
+		iio_trigger_poll(xadc->trigger);
 
 	if (status & XADC_AXI_INT_ALARM_MASK) {
 		/*
diff --git a/drivers/iio/industrialio-trigger.c b/drivers/iio/industrialio-trigger.c
index 3383b025f62e..d31098e0c43f 100644
--- a/drivers/iio/industrialio-trigger.c
+++ b/drivers/iio/industrialio-trigger.c
@@ -114,7 +114,7 @@ static struct iio_trigger *iio_trigger_find_by_name(const char *name,
 	return trig;
 }
 
-void iio_trigger_poll(struct iio_trigger *trig, s64 time)
+void iio_trigger_poll(struct iio_trigger *trig)
 {
 	int i;
 
@@ -133,12 +133,12 @@ EXPORT_SYMBOL(iio_trigger_poll);
 
 irqreturn_t iio_trigger_generic_data_rdy_poll(int irq, void *private)
 {
-	iio_trigger_poll(private, iio_get_time_ns());
+	iio_trigger_poll(private);
 	return IRQ_HANDLED;
 }
 EXPORT_SYMBOL(iio_trigger_generic_data_rdy_poll);
 
-void iio_trigger_poll_chained(struct iio_trigger *trig, s64 time)
+void iio_trigger_poll_chained(struct iio_trigger *trig)
 {
 	int i;
 
@@ -161,7 +161,7 @@ void iio_trigger_notify_done(struct iio_trigger *trig)
 		trig->ops->try_reenable)
 		if (trig->ops->try_reenable(trig))
 			/* Missed an interrupt so launch new poll now */
-			iio_trigger_poll(trig, 0);
+			iio_trigger_poll(trig);
 }
 EXPORT_SYMBOL(iio_trigger_notify_done);
 
diff --git a/drivers/iio/light/gp2ap020a00f.c b/drivers/iio/light/gp2ap020a00f.c
index 04bdb85d2d9f..221ed16de1f7 100644
--- a/drivers/iio/light/gp2ap020a00f.c
+++ b/drivers/iio/light/gp2ap020a00f.c
@@ -827,7 +827,7 @@ static void gp2ap020a00f_iio_trigger_work(struct irq_work *work)
 	struct gp2ap020a00f_data *data =
 		container_of(work, struct gp2ap020a00f_data, work);
 
-	iio_trigger_poll(data->trig, 0);
+	iio_trigger_poll(data->trig);
 }
 
 static irqreturn_t gp2ap020a00f_prox_sensing_handler(int irq, void *data)
diff --git a/drivers/iio/proximity/as3935.c b/drivers/iio/proximity/as3935.c
index bf677bfe8eb2..5e780ef206f3 100644
--- a/drivers/iio/proximity/as3935.c
+++ b/drivers/iio/proximity/as3935.c
@@ -232,7 +232,7 @@ static void as3935_event_work(struct work_struct *work)
 
 	switch (val) {
 	case AS3935_EVENT_INT:
-		iio_trigger_poll(st->trig, iio_get_time_ns());
+		iio_trigger_poll(st->trig);
 		break;
 	case AS3935_NOISE_INT:
 		dev_warn(&st->spi->dev, "noise level is too high");
diff --git a/drivers/iio/trigger/iio-trig-interrupt.c b/drivers/iio/trigger/iio-trig-interrupt.c
index 02577ec54c6b..7a149a7822bc 100644
--- a/drivers/iio/trigger/iio-trig-interrupt.c
+++ b/drivers/iio/trigger/iio-trig-interrupt.c
@@ -24,8 +24,7 @@ struct iio_interrupt_trigger_info {
 
 static irqreturn_t iio_interrupt_trigger_poll(int irq, void *private)
 {
-	/* Timestamp not currently provided */
-	iio_trigger_poll(private, 0);
+	iio_trigger_poll(private);
 	return IRQ_HANDLED;
 }
 
diff --git a/drivers/iio/trigger/iio-trig-sysfs.c b/drivers/iio/trigger/iio-trig-sysfs.c
index 15e3b850f513..254c7e906127 100644
--- a/drivers/iio/trigger/iio-trig-sysfs.c
+++ b/drivers/iio/trigger/iio-trig-sysfs.c
@@ -96,7 +96,7 @@ static void iio_sysfs_trigger_work(struct irq_work *work)
 	struct iio_sysfs_trig *trig = container_of(work, struct iio_sysfs_trig,
 							work);
 
-	iio_trigger_poll(trig->trig, 0);
+	iio_trigger_poll(trig->trig);
 }
 
 static ssize_t iio_sysfs_trigger_poll(struct device *dev,
diff --git a/drivers/staging/iio/accel/lis3l02dq_ring.c b/drivers/staging/iio/accel/lis3l02dq_ring.c
index 79cefe0a516a..bf33fdead479 100644
--- a/drivers/staging/iio/accel/lis3l02dq_ring.c
+++ b/drivers/staging/iio/accel/lis3l02dq_ring.c
@@ -31,7 +31,7 @@ irqreturn_t lis3l02dq_data_rdy_trig_poll(int irq, void *private)
 	struct lis3l02dq_state *st = iio_priv(indio_dev);
 
 	if (st->trigger_on) {
-		iio_trigger_poll(st->trig, iio_get_time_ns());
+		iio_trigger_poll(st->trig);
 		return IRQ_HANDLED;
 	} else
 		return IRQ_WAKE_THREAD;
diff --git a/drivers/staging/iio/adc/mxs-lradc.c b/drivers/staging/iio/adc/mxs-lradc.c
index dae8d1a9038e..d8619993c6ff 100644
--- a/drivers/staging/iio/adc/mxs-lradc.c
+++ b/drivers/staging/iio/adc/mxs-lradc.c
@@ -1166,7 +1166,7 @@ static irqreturn_t mxs_lradc_handle_irq(int irq, void *data)
 		mxs_lradc_handle_touch(lradc);
 
 	if (iio_buffer_enabled(iio))
-		iio_trigger_poll(iio->trig, iio_get_time_ns());
+		iio_trigger_poll(iio->trig);
 	else if (reg & LRADC_CTRL1_LRADC_IRQ(0))
 		complete(&lradc->completion);
 
diff --git a/drivers/staging/iio/meter/ade7758_trigger.c b/drivers/staging/iio/meter/ade7758_trigger.c
index 7a94ddd42f59..ea01b8f7a2c3 100644
--- a/drivers/staging/iio/meter/ade7758_trigger.c
+++ b/drivers/staging/iio/meter/ade7758_trigger.c
@@ -21,7 +21,7 @@
 static irqreturn_t ade7758_data_rdy_trig_poll(int irq, void *private)
 {
 	disable_irq_nosync(irq);
-	iio_trigger_poll(private, iio_get_time_ns());
+	iio_trigger_poll(private);
 
 	return IRQ_HANDLED;
 }
diff --git a/drivers/staging/iio/trigger/iio-trig-bfin-timer.c b/drivers/staging/iio/trigger/iio-trig-bfin-timer.c
index 26e1ca0b7800..16f1a06bcd89 100644
--- a/drivers/staging/iio/trigger/iio-trig-bfin-timer.c
+++ b/drivers/staging/iio/trigger/iio-trig-bfin-timer.c
@@ -154,7 +154,7 @@ static irqreturn_t iio_bfin_tmr_trigger_isr(int irq, void *devid)
 	struct bfin_tmr_state *st = devid;
 
 	clear_gptimer_intr(st->t->id);
-	iio_trigger_poll(st->trig, 0);
+	iio_trigger_poll(st->trig);
 
 	return IRQ_HANDLED;
 }
diff --git a/drivers/staging/iio/trigger/iio-trig-periodic-rtc.c b/drivers/staging/iio/trigger/iio-trig-periodic-rtc.c
index b5108a163e37..b1aeb88273c9 100644
--- a/drivers/staging/iio/trigger/iio-trig-periodic-rtc.c
+++ b/drivers/staging/iio/trigger/iio-trig-periodic-rtc.c
@@ -106,8 +106,7 @@ static const struct attribute_group *iio_trig_prtc_attr_groups[] = {
 
 static void iio_prtc_trigger_poll(void *private_data)
 {
-	/* Timestamp is not provided currently */
-	iio_trigger_poll(private_data, 0);
+	iio_trigger_poll(private_data);
 }
 
 static const struct iio_trigger_ops iio_prtc_trigger_ops = {
diff --git a/include/linux/iio/trigger.h b/include/linux/iio/trigger.h
index 369cf2cd5144..4b79ffe7b188 100644
--- a/include/linux/iio/trigger.h
+++ b/include/linux/iio/trigger.h
@@ -129,12 +129,11 @@ void iio_trigger_unregister(struct iio_trigger *trig_info);
 /**
  * iio_trigger_poll() - called on a trigger occurring
  * @trig:	trigger which occurred
- * @time:	timestamp when trigger occurred
  *
  * Typically called in relevant hardware interrupt handler.
  **/
-void iio_trigger_poll(struct iio_trigger *trig, s64 time);
-void iio_trigger_poll_chained(struct iio_trigger *trig, s64 time);
+void iio_trigger_poll(struct iio_trigger *trig);
+void iio_trigger_poll_chained(struct iio_trigger *trig);
 
 irqreturn_t iio_trigger_generic_data_rdy_poll(int irq, void *private);
 
-- 
cgit v1.2.3-59-g8ed1b


From 1a4fbf6a9286a6e3db497bc7bbae2024f0f1ad90 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Thu, 6 Nov 2014 23:07:00 +0000
Subject: iio: accel: kxcjk1013 3-axis accelerometer driver

This patch adds IIO driver for KXCJK 1013 triaxis accelerometer sensor.
The specifications for this driver is downloaded from:
http://www.kionix.com/sites/default/files/KXCJK-1013%20Specifications%20Rev%202.pdf

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/accel/Kconfig            |  12 +
 drivers/iio/accel/Makefile           |   1 +
 drivers/iio/accel/kxcjk-1013.c       | 760 +++++++++++++++++++++++++++++++++++
 include/linux/iio/accel/kxcjk_1013.h |  22 +
 4 files changed, 795 insertions(+)
 create mode 100644 drivers/iio/accel/kxcjk-1013.c
 create mode 100644 include/linux/iio/accel/kxcjk_1013.h

(limited to 'include/linux')

diff --git a/drivers/iio/accel/Kconfig b/drivers/iio/accel/Kconfig
index 1e120fa1e156..12addf272a61 100644
--- a/drivers/iio/accel/Kconfig
+++ b/drivers/iio/accel/Kconfig
@@ -77,4 +77,16 @@ config MMA8452
 	  To compile this driver as a module, choose M here: the module
 	  will be called mma8452.
 
+config KXCJK1013
+	tristate "Kionix 3-Axis Accelerometer Driver"
+	depends on I2C
+	select IIO_BUFFER
+	select IIO_TRIGGERED_BUFFER
+	help
+	  Say Y here if you want to build a driver for the Kionix KXCJK-1013
+	  triaxial acceleration sensor.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called kxcjk-1013.
+
 endmenu
diff --git a/drivers/iio/accel/Makefile b/drivers/iio/accel/Makefile
index dc0e379c2592..6578ca1a8e09 100644
--- a/drivers/iio/accel/Makefile
+++ b/drivers/iio/accel/Makefile
@@ -5,6 +5,7 @@
 # When adding new entries keep the list in alphabetical order
 obj-$(CONFIG_BMA180) += bma180.o
 obj-$(CONFIG_HID_SENSOR_ACCEL_3D) += hid-sensor-accel-3d.o
+obj-$(CONFIG_KXCJK1013) += kxcjk-1013.o
 obj-$(CONFIG_KXSD9)	+= kxsd9.o
 obj-$(CONFIG_MMA8452)	+= mma8452.o
 
diff --git a/drivers/iio/accel/kxcjk-1013.c b/drivers/iio/accel/kxcjk-1013.c
new file mode 100644
index 000000000000..2326e430c999
--- /dev/null
+++ b/drivers/iio/accel/kxcjk-1013.c
@@ -0,0 +1,760 @@
+/*
+ * KXCJK-1013 3-axis accelerometer driver
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/bitops.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/acpi.h>
+#include <linux/gpio/consumer.h>
+#include <linux/iio/iio.h>
+#include <linux/iio/sysfs.h>
+#include <linux/iio/buffer.h>
+#include <linux/iio/trigger.h>
+#include <linux/iio/trigger_consumer.h>
+#include <linux/iio/triggered_buffer.h>
+#include <linux/iio/accel/kxcjk_1013.h>
+
+#define KXCJK1013_DRV_NAME "kxcjk1013"
+#define KXCJK1013_IRQ_NAME "kxcjk1013_event"
+
+#define KXCJK1013_REG_XOUT_L		0x06
+/*
+ * From low byte X axis register, all the other addresses of Y and Z can be
+ * obtained by just applying axis offset. The following axis defines are just
+ * provide clarity, but not used.
+ */
+#define KXCJK1013_REG_XOUT_H		0x07
+#define KXCJK1013_REG_YOUT_L		0x08
+#define KXCJK1013_REG_YOUT_H		0x09
+#define KXCJK1013_REG_ZOUT_L		0x0A
+#define KXCJK1013_REG_ZOUT_H		0x0B
+
+#define KXCJK1013_REG_DCST_RESP		0x0C
+#define KXCJK1013_REG_WHO_AM_I		0x0F
+#define KXCJK1013_REG_INT_SRC1		0x16
+#define KXCJK1013_REG_INT_SRC2		0x17
+#define KXCJK1013_REG_STATUS_REG	0x18
+#define KXCJK1013_REG_INT_REL		0x1A
+#define KXCJK1013_REG_CTRL1		0x1B
+#define KXCJK1013_REG_CTRL2		0x1D
+#define KXCJK1013_REG_INT_CTRL1		0x1E
+#define KXCJK1013_REG_INT_CTRL2		0x1F
+#define KXCJK1013_REG_DATA_CTRL		0x21
+#define KXCJK1013_REG_WAKE_TIMER	0x29
+#define KXCJK1013_REG_SELF_TEST		0x3A
+#define KXCJK1013_REG_WAKE_THRES	0x6A
+
+#define KXCJK1013_REG_CTRL1_BIT_PC1	BIT(7)
+#define KXCJK1013_REG_CTRL1_BIT_RES	BIT(6)
+#define KXCJK1013_REG_CTRL1_BIT_DRDY	BIT(5)
+#define KXCJK1013_REG_CTRL1_BIT_GSEL1	BIT(4)
+#define KXCJK1013_REG_CTRL1_BIT_GSEL0	BIT(3)
+#define KXCJK1013_REG_CTRL1_BIT_WUFE	BIT(1)
+#define KXCJK1013_REG_INT_REG1_BIT_IEA	BIT(4)
+#define KXCJK1013_REG_INT_REG1_BIT_IEN	BIT(5)
+
+#define KXCJK1013_DATA_MASK_12_BIT	0x0FFF
+#define KXCJK1013_MAX_STARTUP_TIME_US	100000
+
+struct kxcjk1013_data {
+	struct i2c_client *client;
+	struct iio_trigger *trig;
+	bool trig_mode;
+	struct mutex mutex;
+	s16 buffer[8];
+	int power_state;
+	u8 odr_bits;
+	bool active_high_intr;
+};
+
+enum kxcjk1013_axis {
+	AXIS_X,
+	AXIS_Y,
+	AXIS_Z,
+};
+
+enum kxcjk1013_mode {
+	STANDBY,
+	OPERATION,
+};
+
+static const struct {
+	int val;
+	int val2;
+	int odr_bits;
+} samp_freq_table[] = { {0, 781000, 0x08}, {1, 563000, 0x09},
+			{3, 125000, 0x0A}, {6, 25000, 0x0B}, {12, 5000, 0},
+			{25, 0, 0x01}, {50, 0, 0x02}, {100, 0, 0x03},
+			{200, 0, 0x04}, {400, 0, 0x05}, {800, 0, 0x06},
+			{1600, 0, 0x07} };
+
+/* Refer to section 4 of the specification */
+static const struct {
+	int odr_bits;
+	int usec;
+} odr_start_up_times[] = { {0x08, 100000}, {0x09, 100000}, {0x0A, 100000},
+			   {0x0B, 100000}, { 0, 80000}, {0x01, 41000},
+			   {0x02, 21000}, {0x03, 11000}, {0x04, 6400},
+			   {0x05, 3900}, {0x06, 2700}, {0x07, 2100} };
+
+static int kxcjk1013_set_mode(struct kxcjk1013_data *data,
+			      enum kxcjk1013_mode mode)
+{
+	int ret;
+
+	ret = i2c_smbus_read_byte_data(data->client, KXCJK1013_REG_CTRL1);
+	if (ret < 0) {
+		dev_err(&data->client->dev, "Error reading reg_ctrl1\n");
+		return ret;
+	}
+
+	if (mode == STANDBY)
+		ret &= ~KXCJK1013_REG_CTRL1_BIT_PC1;
+	else
+		ret |= KXCJK1013_REG_CTRL1_BIT_PC1;
+
+	ret = i2c_smbus_write_byte_data(data->client,
+					KXCJK1013_REG_CTRL1, ret);
+	if (ret < 0) {
+		dev_err(&data->client->dev, "Error writing reg_ctrl1\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int kxcjk1013_chip_ack_intr(struct kxcjk1013_data *data)
+{
+	int ret;
+
+	ret = i2c_smbus_read_byte_data(data->client, KXCJK1013_REG_INT_REL);
+	if (ret < 0) {
+		dev_err(&data->client->dev, "Error writing reg_int_rel\n");
+		return ret;
+	}
+
+	return ret;
+}
+
+static int kxcjk1013_chip_init(struct kxcjk1013_data *data)
+{
+	int ret;
+
+	ret = i2c_smbus_read_byte_data(data->client, KXCJK1013_REG_WHO_AM_I);
+	if (ret < 0) {
+		dev_err(&data->client->dev, "Error reading who_am_i\n");
+		return ret;
+	}
+
+	dev_dbg(&data->client->dev, "KXCJK1013 Chip Id %x\n", ret);
+
+	ret = kxcjk1013_set_mode(data, STANDBY);
+	if (ret < 0)
+		return ret;
+
+	ret = i2c_smbus_read_byte_data(data->client, KXCJK1013_REG_CTRL1);
+	if (ret < 0) {
+		dev_err(&data->client->dev, "Error reading reg_ctrl1\n");
+		return ret;
+	}
+
+	/* Setting range to 4G */
+	ret |= KXCJK1013_REG_CTRL1_BIT_GSEL0;
+	ret &= ~KXCJK1013_REG_CTRL1_BIT_GSEL1;
+
+	/* Set 12 bit mode */
+	ret |= KXCJK1013_REG_CTRL1_BIT_RES;
+
+	ret = i2c_smbus_write_byte_data(data->client, KXCJK1013_REG_CTRL1,
+					ret);
+	if (ret < 0) {
+		dev_err(&data->client->dev, "Error reading reg_ctrl\n");
+		return ret;
+	}
+
+	ret = i2c_smbus_read_byte_data(data->client, KXCJK1013_REG_DATA_CTRL);
+	if (ret < 0) {
+		dev_err(&data->client->dev, "Error reading reg_data_ctrl\n");
+		return ret;
+	}
+
+	data->odr_bits = ret;
+
+	/* Set up INT polarity */
+	ret = i2c_smbus_read_byte_data(data->client, KXCJK1013_REG_INT_CTRL1);
+	if (ret < 0) {
+		dev_err(&data->client->dev, "Error reading reg_int_ctrl1\n");
+		return ret;
+	}
+
+	if (data->active_high_intr)
+		ret |= KXCJK1013_REG_INT_REG1_BIT_IEA;
+	else
+		ret &= ~KXCJK1013_REG_INT_REG1_BIT_IEA;
+
+	ret = i2c_smbus_write_byte_data(data->client, KXCJK1013_REG_INT_CTRL1,
+					ret);
+	if (ret < 0) {
+		dev_err(&data->client->dev, "Error writing reg_int_ctrl1\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int kxcjk1013_chip_setup_interrupt(struct kxcjk1013_data *data,
+					  bool status)
+{
+	int ret;
+
+	/* This is requirement by spec to change state to STANDBY */
+	ret = kxcjk1013_set_mode(data, STANDBY);
+	if (ret < 0)
+		return ret;
+
+	ret = i2c_smbus_read_byte_data(data->client, KXCJK1013_REG_INT_CTRL1);
+	if (ret < 0) {
+		dev_err(&data->client->dev, "Error reading reg_int_ctrl1\n");
+		return ret;
+	}
+
+	if (status)
+		ret |= KXCJK1013_REG_INT_REG1_BIT_IEN;
+	else
+		ret &= ~KXCJK1013_REG_INT_REG1_BIT_IEN;
+
+	ret = i2c_smbus_write_byte_data(data->client, KXCJK1013_REG_INT_CTRL1,
+					ret);
+	if (ret < 0) {
+		dev_err(&data->client->dev, "Error writing reg_int_ctrl1\n");
+		return ret;
+	}
+
+	ret = i2c_smbus_read_byte_data(data->client, KXCJK1013_REG_CTRL1);
+	if (ret < 0) {
+		dev_err(&data->client->dev, "Error reading reg_ctrl1\n");
+		return ret;
+	}
+
+	if (status)
+		ret |= KXCJK1013_REG_CTRL1_BIT_DRDY;
+	else
+		ret &= ~KXCJK1013_REG_CTRL1_BIT_DRDY;
+
+	ret = i2c_smbus_write_byte_data(data->client,
+					KXCJK1013_REG_CTRL1, ret);
+	if (ret < 0) {
+		dev_err(&data->client->dev, "Error writing reg_ctrl1\n");
+		return ret;
+	}
+
+	return ret;
+}
+
+static int kxcjk1013_convert_freq_to_bit(int val, int val2)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(samp_freq_table); ++i) {
+		if (samp_freq_table[i].val == val &&
+			samp_freq_table[i].val2 == val2) {
+			return samp_freq_table[i].odr_bits;
+		}
+	}
+
+	return -EINVAL;
+}
+
+static int kxcjk1013_set_odr(struct kxcjk1013_data *data, int val, int val2)
+{
+	int ret;
+	int odr_bits;
+
+	odr_bits = kxcjk1013_convert_freq_to_bit(val, val2);
+	if (odr_bits < 0)
+		return odr_bits;
+
+	/* To change ODR, the chip must be set to STANDBY as per spec */
+	ret = kxcjk1013_set_mode(data, STANDBY);
+	if (ret < 0)
+		return ret;
+
+	ret = i2c_smbus_write_byte_data(data->client, KXCJK1013_REG_DATA_CTRL,
+					odr_bits);
+	if (ret < 0) {
+		dev_err(&data->client->dev, "Error writing data_ctrl\n");
+		return ret;
+	}
+
+	data->odr_bits = odr_bits;
+
+	/* Check, if the ODR is changed after data enable */
+	if (data->power_state) {
+		/* Set the state back to operation */
+		ret = kxcjk1013_set_mode(data, OPERATION);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int kxcjk1013_get_odr(struct kxcjk1013_data *data, int *val, int *val2)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(samp_freq_table); ++i) {
+		if (samp_freq_table[i].odr_bits == data->odr_bits) {
+			*val = samp_freq_table[i].val;
+			*val2 = samp_freq_table[i].val2;
+			return IIO_VAL_INT_PLUS_MICRO;
+		}
+	}
+
+	return -EINVAL;
+}
+
+static int kxcjk1013_get_acc_reg(struct kxcjk1013_data *data, int axis)
+{
+	u8 reg = KXCJK1013_REG_XOUT_L + axis * 2;
+	int ret;
+
+	ret = i2c_smbus_read_word_data(data->client, reg);
+	if (ret < 0) {
+		dev_err(&data->client->dev,
+			"failed to read accel_%c registers\n", 'x' + axis);
+		return ret;
+	}
+
+	return ret;
+}
+
+static int kxcjk1013_get_startup_times(struct kxcjk1013_data *data)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(odr_start_up_times); ++i) {
+		if (odr_start_up_times[i].odr_bits == data->odr_bits)
+			return odr_start_up_times[i].usec;
+	}
+
+	return KXCJK1013_MAX_STARTUP_TIME_US;
+}
+
+static int kxcjk1013_read_raw(struct iio_dev *indio_dev,
+			      struct iio_chan_spec const *chan, int *val,
+			      int *val2, long mask)
+{
+	struct kxcjk1013_data *data = iio_priv(indio_dev);
+	int ret;
+
+	switch (mask) {
+	case IIO_CHAN_INFO_RAW:
+		mutex_lock(&data->mutex);
+		if (iio_buffer_enabled(indio_dev))
+			ret = -EBUSY;
+		else {
+			int sleep_val;
+
+			ret = kxcjk1013_set_mode(data, OPERATION);
+			if (ret < 0)
+				return ret;
+			++data->power_state;
+			sleep_val = kxcjk1013_get_startup_times(data);
+			if (sleep_val < 20000)
+				usleep_range(sleep_val, 20000);
+			else
+				msleep_interruptible(sleep_val/1000);
+			ret = kxcjk1013_get_acc_reg(data, chan->scan_index);
+			if (--data->power_state == 0)
+				kxcjk1013_set_mode(data, STANDBY);
+		}
+		mutex_unlock(&data->mutex);
+
+		if (ret < 0)
+			return ret;
+
+		*val = sign_extend32(ret >> 4, 11);
+		return IIO_VAL_INT;
+
+	case IIO_CHAN_INFO_SCALE:
+		*val = 0;
+		*val2 = 19163; /* range +-4g (4/2047*9.806650) */
+		return IIO_VAL_INT_PLUS_MICRO;
+
+	case IIO_CHAN_INFO_SAMP_FREQ:
+		mutex_lock(&data->mutex);
+		ret = kxcjk1013_get_odr(data, val, val2);
+		mutex_unlock(&data->mutex);
+		return ret;
+
+	default:
+		return -EINVAL;
+	}
+}
+
+static int kxcjk1013_write_raw(struct iio_dev *indio_dev,
+			       struct iio_chan_spec const *chan, int val,
+			       int val2, long mask)
+{
+	struct kxcjk1013_data *data = iio_priv(indio_dev);
+	int ret;
+
+	switch (mask) {
+	case IIO_CHAN_INFO_SAMP_FREQ:
+		mutex_lock(&data->mutex);
+		ret = kxcjk1013_set_odr(data, val, val2);
+		mutex_unlock(&data->mutex);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static int kxcjk1013_validate_trigger(struct iio_dev *indio_dev,
+				      struct iio_trigger *trig)
+{
+	struct kxcjk1013_data *data = iio_priv(indio_dev);
+
+	if (data->trig != trig)
+		return -EINVAL;
+
+	return 0;
+}
+
+static IIO_CONST_ATTR_SAMP_FREQ_AVAIL(
+	"0.781000 1.563000 3.125000 6.250000 12.500000 25 50 100 200 400 800 1600");
+
+static struct attribute *kxcjk1013_attributes[] = {
+	&iio_const_attr_sampling_frequency_available.dev_attr.attr,
+	NULL,
+};
+
+static const struct attribute_group kxcjk1013_attrs_group = {
+	.attrs = kxcjk1013_attributes,
+};
+
+#define KXCJK1013_CHANNEL(_axis) {					\
+	.type = IIO_ACCEL,						\
+	.modified = 1,							\
+	.channel2 = IIO_MOD_##_axis,					\
+	.info_mask_separate = BIT(IIO_CHAN_INFO_RAW),			\
+	.info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE) |		\
+				BIT(IIO_CHAN_INFO_SAMP_FREQ),		\
+	.scan_index = AXIS_##_axis,					\
+	.scan_type = {							\
+		.sign = 's',						\
+		.realbits = 12,						\
+		.storagebits = 16,					\
+		.shift = 4,						\
+		.endianness = IIO_LE,					\
+	},								\
+}
+
+static const struct iio_chan_spec kxcjk1013_channels[] = {
+	KXCJK1013_CHANNEL(X),
+	KXCJK1013_CHANNEL(Y),
+	KXCJK1013_CHANNEL(Z),
+	IIO_CHAN_SOFT_TIMESTAMP(3),
+};
+
+static const struct iio_info kxcjk1013_info = {
+	.attrs			= &kxcjk1013_attrs_group,
+	.read_raw		= kxcjk1013_read_raw,
+	.write_raw		= kxcjk1013_write_raw,
+	.validate_trigger	= kxcjk1013_validate_trigger,
+	.driver_module		= THIS_MODULE,
+};
+
+static irqreturn_t kxcjk1013_trigger_handler(int irq, void *p)
+{
+	struct iio_poll_func *pf = p;
+	struct iio_dev *indio_dev = pf->indio_dev;
+	struct kxcjk1013_data *data = iio_priv(indio_dev);
+	int bit, ret, i = 0;
+
+	mutex_lock(&data->mutex);
+
+	for_each_set_bit(bit, indio_dev->buffer->scan_mask,
+			 indio_dev->masklength) {
+		ret = kxcjk1013_get_acc_reg(data, bit);
+		if (ret < 0) {
+			kxcjk1013_chip_ack_intr(data);
+			mutex_unlock(&data->mutex);
+			goto err;
+		}
+		data->buffer[i++] = ret;
+	}
+
+	kxcjk1013_chip_ack_intr(data);
+
+	mutex_unlock(&data->mutex);
+
+	iio_push_to_buffers_with_timestamp(indio_dev, data->buffer,
+					   pf->timestamp);
+err:
+	iio_trigger_notify_done(indio_dev->trig);
+
+	return IRQ_HANDLED;
+}
+
+static int kxcjk1013_data_rdy_trigger_set_state(struct iio_trigger *trig,
+						bool state)
+{
+	struct iio_dev *indio_dev = iio_trigger_get_drvdata(trig);
+	struct kxcjk1013_data *data = iio_priv(indio_dev);
+
+	mutex_lock(&data->mutex);
+	if (state) {
+		kxcjk1013_chip_setup_interrupt(data, true);
+		kxcjk1013_set_mode(data, OPERATION);
+		++data->power_state;
+	} else {
+		if (--data->power_state)
+			return 0;
+		kxcjk1013_chip_setup_interrupt(data, false);
+		kxcjk1013_set_mode(data, STANDBY);
+	}
+	mutex_unlock(&data->mutex);
+
+	return 0;
+}
+
+static const struct iio_trigger_ops kxcjk1013_trigger_ops = {
+	.set_trigger_state = kxcjk1013_data_rdy_trigger_set_state,
+	.owner = THIS_MODULE,
+};
+
+static int kxcjk1013_acpi_gpio_probe(struct i2c_client *client,
+				     struct kxcjk1013_data *data)
+{
+	const struct acpi_device_id *id;
+	struct device *dev;
+	struct gpio_desc *gpio;
+	int ret;
+
+	if (!client)
+		return -EINVAL;
+
+	dev = &client->dev;
+	if (!ACPI_HANDLE(dev))
+		return -ENODEV;
+
+	id = acpi_match_device(dev->driver->acpi_match_table, dev);
+	if (!id)
+		return -ENODEV;
+
+	/* data ready gpio interrupt pin */
+	gpio = devm_gpiod_get_index(dev, "kxcjk1013_int", 0);
+	if (IS_ERR(gpio)) {
+		dev_err(dev, "acpi gpio get index failed\n");
+		return PTR_ERR(gpio);
+	}
+
+	ret = gpiod_direction_input(gpio);
+	if (ret)
+		return ret;
+
+	ret = gpiod_to_irq(gpio);
+
+	dev_dbg(dev, "GPIO resource, no:%d irq:%d\n", desc_to_gpio(gpio), ret);
+
+	return ret;
+}
+
+static int kxcjk1013_probe(struct i2c_client *client,
+			   const struct i2c_device_id *id)
+{
+	struct kxcjk1013_data *data;
+	struct iio_dev *indio_dev;
+	struct iio_trigger *trig = NULL;
+	struct kxcjk_1013_platform_data *pdata;
+	int ret;
+
+	indio_dev = devm_iio_device_alloc(&client->dev, sizeof(*data));
+	if (!indio_dev)
+		return -ENOMEM;
+
+	data = iio_priv(indio_dev);
+	i2c_set_clientdata(client, indio_dev);
+	data->client = client;
+
+	pdata = dev_get_platdata(&client->dev);
+	if (pdata)
+		data->active_high_intr = pdata->active_high_intr;
+	else
+		data->active_high_intr = true; /* default polarity */
+
+	ret = kxcjk1013_chip_init(data);
+	if (ret < 0)
+		return ret;
+
+	mutex_init(&data->mutex);
+
+	indio_dev->dev.parent = &client->dev;
+	indio_dev->channels = kxcjk1013_channels;
+	indio_dev->num_channels = ARRAY_SIZE(kxcjk1013_channels);
+	indio_dev->name = KXCJK1013_DRV_NAME;
+	indio_dev->modes = INDIO_DIRECT_MODE;
+	indio_dev->info = &kxcjk1013_info;
+
+	if (client->irq < 0)
+		client->irq = kxcjk1013_acpi_gpio_probe(client, data);
+
+	if (client->irq >= 0) {
+		trig = iio_trigger_alloc("%s-dev%d", indio_dev->name,
+					 indio_dev->id);
+		if (!trig)
+			return -ENOMEM;
+
+		data->trig_mode = true;
+
+		ret = devm_request_irq(&client->dev, client->irq,
+					iio_trigger_generic_data_rdy_poll,
+					IRQF_TRIGGER_RISING,
+					KXCJK1013_IRQ_NAME,
+					trig);
+		if (ret) {
+			dev_err(&client->dev, "unable to request IRQ\n");
+			goto err_trigger_free;
+		}
+
+		trig->dev.parent = &client->dev;
+		trig->ops = &kxcjk1013_trigger_ops;
+		iio_trigger_set_drvdata(trig, indio_dev);
+		data->trig = trig;
+		indio_dev->trig = trig;
+
+		ret = iio_trigger_register(trig);
+		if (ret)
+			goto err_trigger_free;
+
+		ret = iio_triggered_buffer_setup(indio_dev,
+						&iio_pollfunc_store_time,
+						kxcjk1013_trigger_handler,
+						NULL);
+		if (ret < 0) {
+			dev_err(&client->dev,
+					"iio triggered buffer setup failed\n");
+			goto err_trigger_unregister;
+		}
+	}
+
+	ret = devm_iio_device_register(&client->dev, indio_dev);
+	if (ret < 0) {
+		dev_err(&client->dev, "unable to register iio device\n");
+		goto err_buffer_cleanup;
+	}
+
+	return 0;
+
+err_buffer_cleanup:
+	if (data->trig_mode)
+		iio_triggered_buffer_cleanup(indio_dev);
+err_trigger_unregister:
+	if (data->trig_mode)
+		iio_trigger_unregister(trig);
+err_trigger_free:
+	if (data->trig_mode)
+		iio_trigger_free(trig);
+
+	return ret;
+}
+
+static int kxcjk1013_remove(struct i2c_client *client)
+{
+	struct iio_dev *indio_dev = i2c_get_clientdata(client);
+	struct kxcjk1013_data *data = iio_priv(indio_dev);
+
+	if (data->trig_mode) {
+		iio_triggered_buffer_cleanup(indio_dev);
+		iio_trigger_unregister(data->trig);
+		iio_trigger_free(data->trig);
+	}
+
+	mutex_lock(&data->mutex);
+	kxcjk1013_set_mode(data, STANDBY);
+	mutex_unlock(&data->mutex);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int kxcjk1013_suspend(struct device *dev)
+{
+	struct iio_dev *indio_dev = i2c_get_clientdata(to_i2c_client(dev));
+	struct kxcjk1013_data *data = iio_priv(indio_dev);
+
+	mutex_lock(&data->mutex);
+	kxcjk1013_set_mode(data, STANDBY);
+	mutex_unlock(&data->mutex);
+
+	return 0;
+}
+
+static int kxcjk1013_resume(struct device *dev)
+{
+	struct iio_dev *indio_dev = i2c_get_clientdata(to_i2c_client(dev));
+	struct kxcjk1013_data *data = iio_priv(indio_dev);
+
+	mutex_lock(&data->mutex);
+
+	if (data->power_state)
+		kxcjk1013_set_mode(data, OPERATION);
+
+	mutex_unlock(&data->mutex);
+
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(kxcjk1013_pm_ops, kxcjk1013_suspend, kxcjk1013_resume);
+#define KXCJK1013_PM_OPS (&kxcjk1013_pm_ops)
+#else
+#define KXCJK1013_PM_OPS NULL
+#endif
+
+static const struct acpi_device_id kx_acpi_match[] = {
+	{"KXCJ1013", 0},
+	{ },
+};
+MODULE_DEVICE_TABLE(acpi, kx_acpi_match);
+
+static const struct i2c_device_id kxcjk1013_id[] = {
+	{"kxcjk1013", 0},
+	{}
+};
+
+MODULE_DEVICE_TABLE(i2c, kxcjk1013_id);
+
+static struct i2c_driver kxcjk1013_driver = {
+	.driver = {
+		.name	= KXCJK1013_DRV_NAME,
+		.acpi_match_table = ACPI_PTR(kx_acpi_match),
+		.pm	= KXCJK1013_PM_OPS,
+	},
+	.probe		= kxcjk1013_probe,
+	.remove		= kxcjk1013_remove,
+	.id_table	= kxcjk1013_id,
+};
+module_i2c_driver(kxcjk1013_driver);
+
+MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("KXCJK1013 accelerometer driver");
diff --git a/include/linux/iio/accel/kxcjk_1013.h b/include/linux/iio/accel/kxcjk_1013.h
new file mode 100644
index 000000000000..fd1d540ea62d
--- /dev/null
+++ b/include/linux/iio/accel/kxcjk_1013.h
@@ -0,0 +1,22 @@
+/*
+ * KXCJK-1013 3-axis accelerometer Interface
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __IIO_KXCJK_1013_H__
+#define __IIO_KXCJK_1013_H__
+
+struct kxcjk_1013_platform_data {
+	bool active_high_intr;
+};
+
+#endif
-- 
cgit v1.2.3-59-g8ed1b


From a288d648715bab6a1ab2b72cb1c1cc79cdc8cb43 Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.wolfsonmicro.com>
Date: Fri, 23 May 2014 12:54:57 +0100
Subject: extcon: arizona: support inverted jack detect switch

Add config option for inverted jack detect switch that
opens when jack is inserted.

Signed-off-by: Richard Fitzgerald <rf@opensource.wolfsonmicro.com>
[Acked by Lee Jones for MFD part]
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/extcon/extcon-arizona.c   | 34 ++++++++++++++++++++++++++--------
 include/linux/mfd/arizona/pdata.h |  3 +++
 2 files changed, 29 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/extcon/extcon-arizona.c b/drivers/extcon/extcon-arizona.c
index 6c84e3d12043..cf907430a698 100644
--- a/drivers/extcon/extcon-arizona.c
+++ b/drivers/extcon/extcon-arizona.c
@@ -39,6 +39,11 @@
 #define ARIZONA_ACCDET_MODE_HPL 1
 #define ARIZONA_ACCDET_MODE_HPR 2
 
+#define ARIZONA_MICD_CLAMP_MODE_JDL      0x4
+#define ARIZONA_MICD_CLAMP_MODE_JDH      0x5
+#define ARIZONA_MICD_CLAMP_MODE_JDL_GP5H 0x9
+#define ARIZONA_MICD_CLAMP_MODE_JDH_GP5H 0xb
+
 #define ARIZONA_HPDET_MAX 10000
 
 #define HPDET_DEBOUNCE 500
@@ -962,10 +967,16 @@ static irqreturn_t arizona_jackdet(int irq, void *data)
 
 	if (arizona->pdata.jd_gpio5) {
 		mask = ARIZONA_MICD_CLAMP_STS;
-		present = 0;
+		if (arizona->pdata.jd_invert)
+			present = ARIZONA_MICD_CLAMP_STS;
+		else
+			present = 0;
 	} else {
 		mask = ARIZONA_JD1_STS;
-		present = ARIZONA_JD1_STS;
+		if (arizona->pdata.jd_invert)
+			present = 0;
+		else
+			present = ARIZONA_JD1_STS;
 	}
 
 	ret = regmap_read(arizona->regmap, ARIZONA_AOD_IRQ_RAW_STATUS, &val);
@@ -1096,6 +1107,7 @@ static int arizona_extcon_probe(struct platform_device *pdev)
 	struct arizona_pdata *pdata = &arizona->pdata;
 	struct arizona_extcon_info *info;
 	unsigned int val;
+	unsigned int clamp_mode;
 	int jack_irq_fall, jack_irq_rise;
 	int ret, mode, i, j;
 
@@ -1305,15 +1317,21 @@ static int arizona_extcon_probe(struct platform_device *pdev)
 			regmap_write(arizona->regmap, ARIZONA_GPIO5_CTRL,
 				     val);
 
-			regmap_update_bits(arizona->regmap,
-					   ARIZONA_MICD_CLAMP_CONTROL,
-					   ARIZONA_MICD_CLAMP_MODE_MASK, 0x9);
+			if (arizona->pdata.jd_invert)
+				clamp_mode = ARIZONA_MICD_CLAMP_MODE_JDH_GP5H;
+			else
+				clamp_mode = ARIZONA_MICD_CLAMP_MODE_JDL_GP5H;
 		} else {
-			regmap_update_bits(arizona->regmap,
-					   ARIZONA_MICD_CLAMP_CONTROL,
-					   ARIZONA_MICD_CLAMP_MODE_MASK, 0x4);
+			if (arizona->pdata.jd_invert)
+				clamp_mode = ARIZONA_MICD_CLAMP_MODE_JDH;
+			else
+				clamp_mode = ARIZONA_MICD_CLAMP_MODE_JDL;
 		}
 
+		regmap_update_bits(arizona->regmap,
+				   ARIZONA_MICD_CLAMP_CONTROL,
+				   ARIZONA_MICD_CLAMP_MODE_MASK, clamp_mode);
+
 		regmap_update_bits(arizona->regmap,
 				   ARIZONA_JACK_DETECT_DEBOUNCE,
 				   ARIZONA_MICD_CLAMP_DB,
diff --git a/include/linux/mfd/arizona/pdata.h b/include/linux/mfd/arizona/pdata.h
index 12a5c135c746..4578c72c9b86 100644
--- a/include/linux/mfd/arizona/pdata.h
+++ b/include/linux/mfd/arizona/pdata.h
@@ -127,6 +127,9 @@ struct arizona_pdata {
 	/** Internal pull on GPIO5 is disabled when used for jack detection */
 	bool jd_gpio5_nopull;
 
+	/** set to true if jackdet contact opens on insert */
+	bool jd_invert;
+
 	/** Use the headphone detect circuit to identify the accessory */
 	bool hpdet_acc_id;
 
-- 
cgit v1.2.3-59-g8ed1b


From 478850160636c4f0b2558451df0e42f8c5a10939 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 8 May 2014 01:37:48 +0200
Subject: irq_work: Implement remote queueing

irq work currently only supports local callbacks. However its code
is mostly ready to run remote callbacks and we have some potential user.

The full nohz subsystem currently open codes its own remote irq work
on top of the scheduler ipi when it wants a CPU to reevaluate its next
tick. However this ad hoc solution bloats the scheduler IPI.

Lets just extend the irq work subsystem to support remote queuing on top
of the generic SMP IPI to handle this kind of user. This shouldn't add
noticeable overhead.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Kevin Hilman <khilman@linaro.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/irq_work.h |  5 +++++
 kernel/irq_work.c        | 25 ++++++++++++++++++++++++-
 kernel/smp.c             |  9 +++++++++
 3 files changed, 38 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
index 19ae05d4b8ec..bf9422c3aefe 100644
--- a/include/linux/irq_work.h
+++ b/include/linux/irq_work.h
@@ -33,6 +33,11 @@ void init_irq_work(struct irq_work *work, void (*func)(struct irq_work *))
 #define DEFINE_IRQ_WORK(name, _f) struct irq_work name = { .func = (_f), }
 
 bool irq_work_queue(struct irq_work *work);
+
+#ifdef CONFIG_SMP
+bool irq_work_queue_on(struct irq_work *work, int cpu);
+#endif
+
 void irq_work_run(void);
 void irq_work_sync(struct irq_work *work);
 
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index 126f254614bf..4b0a890a304a 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -16,6 +16,7 @@
 #include <linux/tick.h>
 #include <linux/cpu.h>
 #include <linux/notifier.h>
+#include <linux/smp.h>
 #include <asm/processor.h>
 
 
@@ -55,12 +56,34 @@ void __weak arch_irq_work_raise(void)
 	 */
 }
 
+#ifdef CONFIG_SMP
 /*
- * Enqueue the irq_work @entry unless it's already pending
+ * Enqueue the irq_work @work on @cpu unless it's already pending
  * somewhere.
  *
  * Can be re-enqueued while the callback is still in progress.
  */
+bool irq_work_queue_on(struct irq_work *work, int cpu)
+{
+	/* All work should have been flushed before going offline */
+	WARN_ON_ONCE(cpu_is_offline(cpu));
+
+	/* Arch remote IPI send/receive backend aren't NMI safe */
+	WARN_ON_ONCE(in_nmi());
+
+	/* Only queue if not already pending */
+	if (!irq_work_claim(work))
+		return false;
+
+	if (llist_add(&work->llnode, &per_cpu(raised_list, cpu)))
+		arch_send_call_function_single_ipi(cpu);
+
+	return true;
+}
+EXPORT_SYMBOL_GPL(irq_work_queue_on);
+#endif
+
+/* Enqueue the irq work @work on the current CPU */
 bool irq_work_queue(struct irq_work *work)
 {
 	/* Only queue if not already pending */
diff --git a/kernel/smp.c b/kernel/smp.c
index 306f8180b0d5..a1812d184aed 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -3,6 +3,7 @@
  *
  * (C) Jens Axboe <jens.axboe@oracle.com> 2008
  */
+#include <linux/irq_work.h>
 #include <linux/rcupdate.h>
 #include <linux/rculist.h>
 #include <linux/kernel.h>
@@ -210,6 +211,14 @@ void generic_smp_call_function_single_interrupt(void)
 		csd->func(csd->info);
 		csd_unlock(csd);
 	}
+
+	/*
+	 * Handle irq works queued remotely by irq_work_queue_on().
+	 * Smp functions above are typically synchronous so they
+	 * better run first since some other CPUs may be busy waiting
+	 * for them.
+	 */
+	irq_work_run();
 }
 
 /*
-- 
cgit v1.2.3-59-g8ed1b


From 3d36aebc2e78923095575df954f3f3b430ac0a30 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 4 Jun 2014 16:17:33 +0200
Subject: nohz: Support nohz full remote kick

Remotely kicking a full nohz CPU in order to make it re-evaluate its
next tick is currently implemented using the scheduler IPI.

However this bloats a scheduler fast path with an off-topic feature.
The scheduler tick was abused here for its cool "callable
anywhere/anytime" properties.

But now that the irq work subsystem can queue remote callbacks, it's
a perfect fit to safely queue IPIs when interrupts are disabled
without worrying about concurrent callers.

So lets implement remote kick on top of irq work. This is going to
be used when a new event requires the next tick to be recalculated:
more than 1 task competing on the CPU, timer armed, ...

Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Kevin Hilman <khilman@linaro.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/tick.h     |  9 ++++++++-
 kernel/time/tick-sched.c | 10 ++++++----
 2 files changed, 14 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tick.h b/include/linux/tick.h
index b84773cb9f4c..8a4987f2294a 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -181,7 +181,13 @@ static inline bool tick_nohz_full_cpu(int cpu)
 
 extern void tick_nohz_init(void);
 extern void __tick_nohz_full_check(void);
-extern void tick_nohz_full_kick(void);
+extern void tick_nohz_full_kick_cpu(int cpu);
+
+static inline void tick_nohz_full_kick(void)
+{
+	tick_nohz_full_kick_cpu(smp_processor_id());
+}
+
 extern void tick_nohz_full_kick_all(void);
 extern void __tick_nohz_task_switch(struct task_struct *tsk);
 #else
@@ -189,6 +195,7 @@ static inline void tick_nohz_init(void) { }
 static inline bool tick_nohz_full_enabled(void) { return false; }
 static inline bool tick_nohz_full_cpu(int cpu) { return false; }
 static inline void __tick_nohz_full_check(void) { }
+static inline void tick_nohz_full_kick_cpu(int cpu) { }
 static inline void tick_nohz_full_kick(void) { }
 static inline void tick_nohz_full_kick_all(void) { }
 static inline void __tick_nohz_task_switch(struct task_struct *tsk) { }
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 6558b7ac112d..3d63944a3eca 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -224,13 +224,15 @@ static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
 };
 
 /*
- * Kick the current CPU if it's full dynticks in order to force it to
+ * Kick the CPU if it's full dynticks in order to force it to
  * re-evaluate its dependency on the tick and restart it if necessary.
  */
-void tick_nohz_full_kick(void)
+void tick_nohz_full_kick_cpu(int cpu)
 {
-	if (tick_nohz_full_cpu(smp_processor_id()))
-		irq_work_queue(&__get_cpu_var(nohz_full_kick_work));
+	if (!tick_nohz_full_cpu(cpu))
+		return;
+
+	irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu);
 }
 
 static void nohz_full_kick_ipi(void *info)
-- 
cgit v1.2.3-59-g8ed1b


From d0540f91cf74fab90e1143d8d40da8a5b5fabc8a Mon Sep 17 00:00:00 2001
From: Robert Baldyga <r.baldyga@samsung.com>
Date: Wed, 21 May 2014 08:52:47 +0200
Subject: mfd: max77693: Remove unnecessary wrapper functions

This patch removes wrapper functions used to access regmap, and
make driver using regmap_*() functions instead.

Signed-off-by: Robert Baldyga <r.baldyga@samsung.com>
Reviewed-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Acked-by: Mark Brown <broonie@linaro.org>
Acked-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/extcon/extcon-max77693.c     | 32 ++++++++++-----------
 drivers/mfd/max77693-irq.c           | 50 +++++++++++++++++++-------------
 drivers/mfd/max77693.c               | 56 ++----------------------------------
 drivers/regulator/max77693.c         | 12 ++++----
 include/linux/mfd/max77693-private.h |  8 ------
 5 files changed, 54 insertions(+), 104 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/extcon/extcon-max77693.c b/drivers/extcon/extcon-max77693.c
index 2c7c3e191591..ba84a6e77e03 100644
--- a/drivers/extcon/extcon-max77693.c
+++ b/drivers/extcon/extcon-max77693.c
@@ -255,10 +255,10 @@ static int max77693_muic_set_debounce_time(struct max77693_muic_info *info,
 	case ADC_DEBOUNCE_TIME_10MS:
 	case ADC_DEBOUNCE_TIME_25MS:
 	case ADC_DEBOUNCE_TIME_38_62MS:
-		ret = max77693_update_reg(info->max77693->regmap_muic,
+		ret = regmap_update_bits(info->max77693->regmap_muic,
 					  MAX77693_MUIC_REG_CTRL3,
-					  time << CONTROL3_ADCDBSET_SHIFT,
-					  CONTROL3_ADCDBSET_MASK);
+					  CONTROL3_ADCDBSET_MASK,
+					  time << CONTROL3_ADCDBSET_SHIFT);
 		if (ret) {
 			dev_err(info->dev, "failed to set ADC debounce time\n");
 			return ret;
@@ -286,15 +286,15 @@ static int max77693_muic_set_path(struct max77693_muic_info *info,
 		u8 val, bool attached)
 {
 	int ret = 0;
-	u8 ctrl1, ctrl2 = 0;
+	unsigned int ctrl1, ctrl2 = 0;
 
 	if (attached)
 		ctrl1 = val;
 	else
 		ctrl1 = CONTROL1_SW_OPEN;
 
-	ret = max77693_update_reg(info->max77693->regmap_muic,
-			MAX77693_MUIC_REG_CTRL1, ctrl1, COMP_SW_MASK);
+	ret = regmap_update_bits(info->max77693->regmap_muic,
+			MAX77693_MUIC_REG_CTRL1, COMP_SW_MASK, ctrl1);
 	if (ret < 0) {
 		dev_err(info->dev, "failed to update MUIC register\n");
 		return ret;
@@ -305,9 +305,9 @@ static int max77693_muic_set_path(struct max77693_muic_info *info,
 	else
 		ctrl2 |= CONTROL2_LOWPWR_MASK;	/* LowPwr=1, CPEn=0 */
 
-	ret = max77693_update_reg(info->max77693->regmap_muic,
-			MAX77693_MUIC_REG_CTRL2, ctrl2,
-			CONTROL2_LOWPWR_MASK | CONTROL2_CPEN_MASK);
+	ret = regmap_update_bits(info->max77693->regmap_muic,
+			MAX77693_MUIC_REG_CTRL2,
+			CONTROL2_LOWPWR_MASK | CONTROL2_CPEN_MASK, ctrl2);
 	if (ret < 0) {
 		dev_err(info->dev, "failed to update MUIC register\n");
 		return ret;
@@ -969,8 +969,8 @@ static void max77693_muic_irq_work(struct work_struct *work)
 		if (info->irq == muic_irqs[i].virq)
 			irq_type = muic_irqs[i].irq;
 
-	ret = max77693_bulk_read(info->max77693->regmap_muic,
-			MAX77693_MUIC_REG_STATUS1, 2, info->status);
+	ret = regmap_bulk_read(info->max77693->regmap_muic,
+			MAX77693_MUIC_REG_STATUS1, info->status, 2);
 	if (ret) {
 		dev_err(info->dev, "failed to read MUIC register\n");
 		mutex_unlock(&info->mutex);
@@ -1042,8 +1042,8 @@ static int max77693_muic_detect_accessory(struct max77693_muic_info *info)
 	mutex_lock(&info->mutex);
 
 	/* Read STATUSx register to detect accessory */
-	ret = max77693_bulk_read(info->max77693->regmap_muic,
-			MAX77693_MUIC_REG_STATUS1, 2, info->status);
+	ret = regmap_bulk_read(info->max77693->regmap_muic,
+			MAX77693_MUIC_REG_STATUS1, info->status, 2);
 	if (ret) {
 		dev_err(info->dev, "failed to read MUIC register\n");
 		mutex_unlock(&info->mutex);
@@ -1095,7 +1095,7 @@ static int max77693_muic_probe(struct platform_device *pdev)
 	int delay_jiffies;
 	int ret;
 	int i;
-	u8 id;
+	unsigned int id;
 
 	info = devm_kzalloc(&pdev->dev, sizeof(struct max77693_muic_info),
 				   GFP_KERNEL);
@@ -1204,7 +1204,7 @@ static int max77693_muic_probe(struct platform_device *pdev)
 		enum max77693_irq_source irq_src
 				= MAX77693_IRQ_GROUP_NR;
 
-		max77693_write_reg(info->max77693->regmap_muic,
+		regmap_write(info->max77693->regmap_muic,
 				init_data[i].addr,
 				init_data[i].data);
 
@@ -1262,7 +1262,7 @@ static int max77693_muic_probe(struct platform_device *pdev)
 	 max77693_muic_set_path(info, info->path_uart, true);
 
 	/* Check revision number of MUIC device*/
-	ret = max77693_read_reg(info->max77693->regmap_muic,
+	ret = regmap_read(info->max77693->regmap_muic,
 			MAX77693_MUIC_REG_ID, &id);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "failed to read revision number\n");
diff --git a/drivers/mfd/max77693-irq.c b/drivers/mfd/max77693-irq.c
index 66b58fe77094..7d8f99f94f27 100644
--- a/drivers/mfd/max77693-irq.c
+++ b/drivers/mfd/max77693-irq.c
@@ -30,8 +30,9 @@
 #include <linux/irqdomain.h>
 #include <linux/mfd/max77693.h>
 #include <linux/mfd/max77693-private.h>
+#include <linux/regmap.h>
 
-static const u8 max77693_mask_reg[] = {
+static const unsigned int max77693_mask_reg[] = {
 	[LED_INT] = MAX77693_LED_REG_FLASH_INT_MASK,
 	[TOPSYS_INT] = MAX77693_PMIC_REG_TOPSYS_INT_MASK,
 	[CHG_INT] = MAX77693_CHG_REG_CHG_INT_MASK,
@@ -118,7 +119,7 @@ static void max77693_irq_sync_unlock(struct irq_data *data)
 			continue;
 		max77693->irq_masks_cache[i] = max77693->irq_masks_cur[i];
 
-		max77693_write_reg(map, max77693_mask_reg[i],
+		regmap_write(map, max77693_mask_reg[i],
 				max77693->irq_masks_cur[i]);
 	}
 
@@ -178,11 +179,11 @@ static irqreturn_t max77693_irq_thread(int irq, void *data)
 {
 	struct max77693_dev *max77693 = data;
 	u8 irq_reg[MAX77693_IRQ_GROUP_NR] = {};
-	u8 irq_src;
+	unsigned int irq_src;
 	int ret;
 	int i, cur_irq;
 
-	ret = max77693_read_reg(max77693->regmap, MAX77693_PMIC_REG_INTSRC,
+	ret = regmap_read(max77693->regmap, MAX77693_PMIC_REG_INTSRC,
 				&irq_src);
 	if (ret < 0) {
 		dev_err(max77693->dev, "Failed to read interrupt source: %d\n",
@@ -190,25 +191,34 @@ static irqreturn_t max77693_irq_thread(int irq, void *data)
 		return IRQ_NONE;
 	}
 
-	if (irq_src & MAX77693_IRQSRC_CHG)
+	if (irq_src & MAX77693_IRQSRC_CHG) {
 		/* CHG_INT */
-		ret = max77693_read_reg(max77693->regmap, MAX77693_CHG_REG_CHG_INT,
-				&irq_reg[CHG_INT]);
+		unsigned int data;
+		ret = regmap_read(max77693->regmap,
+				MAX77693_CHG_REG_CHG_INT, &data);
+		irq_reg[CHG_INT] = data;
+	}
 
-	if (irq_src & MAX77693_IRQSRC_TOP)
+	if (irq_src & MAX77693_IRQSRC_TOP) {
 		/* TOPSYS_INT */
-		ret = max77693_read_reg(max77693->regmap,
-			MAX77693_PMIC_REG_TOPSYS_INT, &irq_reg[TOPSYS_INT]);
+		unsigned int data;
+		ret = regmap_read(max77693->regmap,
+			MAX77693_PMIC_REG_TOPSYS_INT, &data);
+		irq_reg[TOPSYS_INT] = data;
+	}
 
-	if (irq_src & MAX77693_IRQSRC_FLASH)
+	if (irq_src & MAX77693_IRQSRC_FLASH) {
 		/* LED_INT */
-		ret = max77693_read_reg(max77693->regmap,
-			MAX77693_LED_REG_FLASH_INT, &irq_reg[LED_INT]);
+		unsigned int data;
+		ret = regmap_read(max77693->regmap,
+			MAX77693_LED_REG_FLASH_INT, &data);
+		irq_reg[LED_INT] = data;
+	}
 
 	if (irq_src & MAX77693_IRQSRC_MUIC)
 		/* MUIC INT1 ~ INT3 */
-		max77693_bulk_read(max77693->regmap_muic, MAX77693_MUIC_REG_INT1,
-			MAX77693_NUM_IRQ_MUIC_REGS, &irq_reg[MUIC_INT1]);
+		regmap_bulk_read(max77693->regmap_muic, MAX77693_MUIC_REG_INT1,
+			&irq_reg[MUIC_INT1], MAX77693_NUM_IRQ_MUIC_REGS);
 
 	/* Apply masking */
 	for (i = 0; i < MAX77693_IRQ_GROUP_NR; i++) {
@@ -263,7 +273,7 @@ int max77693_irq_init(struct max77693_dev *max77693)
 	struct irq_domain *domain;
 	int i;
 	int ret = 0;
-	u8 intsrc_mask;
+	unsigned int intsrc_mask;
 
 	mutex_init(&max77693->irqlock);
 
@@ -286,9 +296,9 @@ int max77693_irq_init(struct max77693_dev *max77693)
 		if (max77693_mask_reg[i] == MAX77693_REG_INVALID)
 			continue;
 		if (i >= MUIC_INT1 && i <= MUIC_INT3)
-			max77693_write_reg(map, max77693_mask_reg[i], 0x00);
+			regmap_write(map, max77693_mask_reg[i], 0x00);
 		else
-			max77693_write_reg(map, max77693_mask_reg[i], 0xff);
+			regmap_write(map, max77693_mask_reg[i], 0xff);
 	}
 
 	domain = irq_domain_add_linear(NULL, MAX77693_IRQ_NR,
@@ -301,7 +311,7 @@ int max77693_irq_init(struct max77693_dev *max77693)
 	max77693->irq_domain = domain;
 
 	/* Unmask max77693 interrupt */
-	ret = max77693_read_reg(max77693->regmap,
+	ret = regmap_read(max77693->regmap,
 			MAX77693_PMIC_REG_INTSRC_MASK, &intsrc_mask);
 	if (ret < 0) {
 		dev_err(max77693->dev, "fail to read PMIC register\n");
@@ -311,7 +321,7 @@ int max77693_irq_init(struct max77693_dev *max77693)
 	intsrc_mask &= ~(MAX77693_IRQSRC_CHG);
 	intsrc_mask &= ~(MAX77693_IRQSRC_FLASH);
 	intsrc_mask &= ~(MAX77693_IRQSRC_MUIC);
-	ret = max77693_write_reg(max77693->regmap,
+	ret = regmap_write(max77693->regmap,
 			MAX77693_PMIC_REG_INTSRC_MASK, intsrc_mask);
 	if (ret < 0) {
 		dev_err(max77693->dev, "fail to write PMIC register\n");
diff --git a/drivers/mfd/max77693.c b/drivers/mfd/max77693.c
index 7e05428c756d..a0308336adaf 100644
--- a/drivers/mfd/max77693.c
+++ b/drivers/mfd/max77693.c
@@ -49,58 +49,6 @@ static const struct mfd_cell max77693_devs[] = {
 	{ .name = "max77693-haptic", },
 };
 
-int max77693_read_reg(struct regmap *map, u8 reg, u8 *dest)
-{
-	unsigned int val;
-	int ret;
-
-	ret = regmap_read(map, reg, &val);
-	*dest = val;
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(max77693_read_reg);
-
-int max77693_bulk_read(struct regmap *map, u8 reg, int count, u8 *buf)
-{
-	int ret;
-
-	ret = regmap_bulk_read(map, reg, buf, count);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(max77693_bulk_read);
-
-int max77693_write_reg(struct regmap *map, u8 reg, u8 value)
-{
-	int ret;
-
-	ret = regmap_write(map, reg, value);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(max77693_write_reg);
-
-int max77693_bulk_write(struct regmap *map, u8 reg, int count, u8 *buf)
-{
-	int ret;
-
-	ret = regmap_bulk_write(map, reg, buf, count);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(max77693_bulk_write);
-
-int max77693_update_reg(struct regmap *map, u8 reg, u8 val, u8 mask)
-{
-	int ret;
-
-	ret = regmap_update_bits(map, reg, mask, val);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(max77693_update_reg);
-
 static const struct regmap_config max77693_regmap_config = {
 	.reg_bits = 8,
 	.val_bits = 8,
@@ -117,7 +65,7 @@ static int max77693_i2c_probe(struct i2c_client *i2c,
 			      const struct i2c_device_id *id)
 {
 	struct max77693_dev *max77693;
-	u8 reg_data;
+	unsigned int reg_data;
 	int ret = 0;
 
 	max77693 = devm_kzalloc(&i2c->dev,
@@ -139,7 +87,7 @@ static int max77693_i2c_probe(struct i2c_client *i2c,
 		return ret;
 	}
 
-	ret = max77693_read_reg(max77693->regmap, MAX77693_PMIC_REG_PMIC_ID2,
+	ret = regmap_read(max77693->regmap, MAX77693_PMIC_REG_PMIC_ID2,
 				&reg_data);
 	if (ret < 0) {
 		dev_err(max77693->dev, "device not found on this channel\n");
diff --git a/drivers/regulator/max77693.c b/drivers/regulator/max77693.c
index 653a58b49cdf..c67ff05fc1dd 100644
--- a/drivers/regulator/max77693.c
+++ b/drivers/regulator/max77693.c
@@ -31,6 +31,7 @@
 #include <linux/mfd/max77693.h>
 #include <linux/mfd/max77693-private.h>
 #include <linux/regulator/of_regulator.h>
+#include <linux/regmap.h>
 
 #define CHGIN_ILIM_STEP_20mA			20000
 
@@ -39,9 +40,9 @@
 static int max77693_chg_is_enabled(struct regulator_dev *rdev)
 {
 	int ret;
-	u8 val;
+	unsigned int val;
 
-	ret = max77693_read_reg(rdev->regmap, rdev->desc->enable_reg, &val);
+	ret = regmap_read(rdev->regmap, rdev->desc->enable_reg, &val);
 	if (ret)
 		return ret;
 
@@ -57,12 +58,11 @@ static int max77693_chg_get_current_limit(struct regulator_dev *rdev)
 {
 	unsigned int chg_min_uA = rdev->constraints->min_uA;
 	unsigned int chg_max_uA = rdev->constraints->max_uA;
-	u8 reg, sel;
+	unsigned int reg, sel;
 	unsigned int val;
 	int ret;
 
-	ret = max77693_read_reg(rdev->regmap,
-				MAX77693_CHG_REG_CHG_CNFG_09, &reg);
+	ret = regmap_read(rdev->regmap, MAX77693_CHG_REG_CHG_CNFG_09, &reg);
 	if (ret < 0)
 		return ret;
 
@@ -96,7 +96,7 @@ static int max77693_chg_set_current_limit(struct regulator_dev *rdev,
 	/* the first four codes for charger current are all 60mA */
 	sel += 3;
 
-	return max77693_write_reg(rdev->regmap,
+	return regmap_write(rdev->regmap,
 				MAX77693_CHG_REG_CHG_CNFG_09, sel);
 }
 /* end of CHARGER regulator ops */
diff --git a/include/linux/mfd/max77693-private.h b/include/linux/mfd/max77693-private.h
index 3e050b933dd0..80ec31d561c4 100644
--- a/include/linux/mfd/max77693-private.h
+++ b/include/linux/mfd/max77693-private.h
@@ -332,14 +332,6 @@ enum max77693_types {
 	TYPE_MAX77693,
 };
 
-extern int max77693_read_reg(struct regmap *map, u8 reg, u8 *dest);
-extern int max77693_bulk_read(struct regmap *map, u8 reg, int count,
-				u8 *buf);
-extern int max77693_write_reg(struct regmap *map, u8 reg, u8 value);
-extern int max77693_bulk_write(struct regmap *map, u8 reg, int count,
-				u8 *buf);
-extern int max77693_update_reg(struct regmap *map, u8 reg, u8 val, u8 mask);
-
 extern int max77693_irq_init(struct max77693_dev *max77686);
 extern void max77693_irq_exit(struct max77693_dev *max77686);
 extern int max77693_irq_resume(struct max77693_dev *max77686);
-- 
cgit v1.2.3-59-g8ed1b


From 342d669c1ee421323f552a62729d3a3d0065093c Mon Sep 17 00:00:00 2001
From: Robert Baldyga <r.baldyga@samsung.com>
Date: Wed, 21 May 2014 08:52:48 +0200
Subject: mfd: max77693: Handle IRQs using regmap

This patch modifies mfd driver to use regmap for handling interrupts.
It allows to simplify irq handling process. This modifications needed
to make small changes in function drivers, which use interrupts.

Signed-off-by: Robert Baldyga <r.baldyga@samsung.com>
Reviewed-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Acked-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/extcon/extcon-max77693.c     |   3 +-
 drivers/mfd/Kconfig                  |   1 +
 drivers/mfd/Makefile                 |   2 +-
 drivers/mfd/max77693-irq.c           | 346 -----------------------------------
 drivers/mfd/max77693.c               | 158 ++++++++++++++--
 include/linux/mfd/max77693-private.h |  46 ++++-
 6 files changed, 195 insertions(+), 361 deletions(-)
 delete mode 100644 drivers/mfd/max77693-irq.c

(limited to 'include/linux')

diff --git a/drivers/extcon/extcon-max77693.c b/drivers/extcon/extcon-max77693.c
index ba84a6e77e03..c7278b1649da 100644
--- a/drivers/extcon/extcon-max77693.c
+++ b/drivers/extcon/extcon-max77693.c
@@ -1154,7 +1154,8 @@ static int max77693_muic_probe(struct platform_device *pdev)
 		struct max77693_muic_irq *muic_irq = &muic_irqs[i];
 		unsigned int virq = 0;
 
-		virq = irq_create_mapping(max77693->irq_domain, muic_irq->irq);
+		virq = regmap_irq_get_virq(max77693->irq_data_muic,
+					muic_irq->irq);
 		if (!virq) {
 			ret = -EINVAL;
 			goto err_irq;
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index ee8204cc31e9..2feac14d1085 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -384,6 +384,7 @@ config MFD_MAX77693
 	depends on I2C=y
 	select MFD_CORE
 	select REGMAP_I2C
+	select REGMAP_IRQ
 	help
 	  Say yes here to add support for Maxim Semiconductor MAX77693.
 	  This is a companion Power Management IC with Flash, Haptic, Charger,
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 8afedba535c7..8c6e7bba4660 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -116,7 +116,7 @@ obj-$(CONFIG_MFD_DA9063)	+= da9063.o
 
 obj-$(CONFIG_MFD_MAX14577)	+= max14577.o
 obj-$(CONFIG_MFD_MAX77686)	+= max77686.o max77686-irq.o
-obj-$(CONFIG_MFD_MAX77693)	+= max77693.o max77693-irq.o
+obj-$(CONFIG_MFD_MAX77693)	+= max77693.o
 obj-$(CONFIG_MFD_MAX8907)	+= max8907.o
 max8925-objs			:= max8925-core.o max8925-i2c.o
 obj-$(CONFIG_MFD_MAX8925)	+= max8925.o
diff --git a/drivers/mfd/max77693-irq.c b/drivers/mfd/max77693-irq.c
deleted file mode 100644
index 7d8f99f94f27..000000000000
--- a/drivers/mfd/max77693-irq.c
+++ /dev/null
@@ -1,346 +0,0 @@
-/*
- * max77693-irq.c - Interrupt controller support for MAX77693
- *
- * Copyright (C) 2012 Samsung Electronics Co.Ltd
- * SangYoung Son <hello.son@samsung.com>
- *
- * This program is not provided / owned by Maxim Integrated Products.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- * This driver is based on max8997-irq.c
- */
-
-#include <linux/err.h>
-#include <linux/irq.h>
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/irqdomain.h>
-#include <linux/mfd/max77693.h>
-#include <linux/mfd/max77693-private.h>
-#include <linux/regmap.h>
-
-static const unsigned int max77693_mask_reg[] = {
-	[LED_INT] = MAX77693_LED_REG_FLASH_INT_MASK,
-	[TOPSYS_INT] = MAX77693_PMIC_REG_TOPSYS_INT_MASK,
-	[CHG_INT] = MAX77693_CHG_REG_CHG_INT_MASK,
-	[MUIC_INT1] = MAX77693_MUIC_REG_INTMASK1,
-	[MUIC_INT2] = MAX77693_MUIC_REG_INTMASK2,
-	[MUIC_INT3] = MAX77693_MUIC_REG_INTMASK3,
-};
-
-static struct regmap *max77693_get_regmap(struct max77693_dev *max77693,
-				enum max77693_irq_source src)
-{
-	switch (src) {
-	case LED_INT ... CHG_INT:
-		return max77693->regmap;
-	case MUIC_INT1 ... MUIC_INT3:
-		return max77693->regmap_muic;
-	default:
-		return ERR_PTR(-EINVAL);
-	}
-}
-
-struct max77693_irq_data {
-	int mask;
-	enum max77693_irq_source group;
-};
-
-#define DECLARE_IRQ(idx, _group, _mask)		\
-	[(idx)] = { .group = (_group), .mask = (_mask) }
-static const struct max77693_irq_data max77693_irqs[] = {
-	DECLARE_IRQ(MAX77693_LED_IRQ_FLED2_OPEN,	LED_INT, 1 << 0),
-	DECLARE_IRQ(MAX77693_LED_IRQ_FLED2_SHORT,	LED_INT, 1 << 1),
-	DECLARE_IRQ(MAX77693_LED_IRQ_FLED1_OPEN,	LED_INT, 1 << 2),
-	DECLARE_IRQ(MAX77693_LED_IRQ_FLED1_SHORT,	LED_INT, 1 << 3),
-	DECLARE_IRQ(MAX77693_LED_IRQ_MAX_FLASH,		LED_INT, 1 << 4),
-
-	DECLARE_IRQ(MAX77693_TOPSYS_IRQ_T120C_INT,	TOPSYS_INT, 1 << 0),
-	DECLARE_IRQ(MAX77693_TOPSYS_IRQ_T140C_INT,	TOPSYS_INT, 1 << 1),
-	DECLARE_IRQ(MAX77693_TOPSYS_IRQ_LOWSYS_INT,	TOPSYS_INT, 1 << 3),
-
-	DECLARE_IRQ(MAX77693_CHG_IRQ_BYP_I,		CHG_INT, 1 << 0),
-	DECLARE_IRQ(MAX77693_CHG_IRQ_THM_I,		CHG_INT, 1 << 2),
-	DECLARE_IRQ(MAX77693_CHG_IRQ_BAT_I,		CHG_INT, 1 << 3),
-	DECLARE_IRQ(MAX77693_CHG_IRQ_CHG_I,		CHG_INT, 1 << 4),
-	DECLARE_IRQ(MAX77693_CHG_IRQ_CHGIN_I,		CHG_INT, 1 << 6),
-
-	DECLARE_IRQ(MAX77693_MUIC_IRQ_INT1_ADC,		MUIC_INT1, 1 << 0),
-	DECLARE_IRQ(MAX77693_MUIC_IRQ_INT1_ADC_LOW,	MUIC_INT1, 1 << 1),
-	DECLARE_IRQ(MAX77693_MUIC_IRQ_INT1_ADC_ERR,	MUIC_INT1, 1 << 2),
-	DECLARE_IRQ(MAX77693_MUIC_IRQ_INT1_ADC1K,	MUIC_INT1, 1 << 3),
-
-	DECLARE_IRQ(MAX77693_MUIC_IRQ_INT2_CHGTYP,	MUIC_INT2, 1 << 0),
-	DECLARE_IRQ(MAX77693_MUIC_IRQ_INT2_CHGDETREUN,	MUIC_INT2, 1 << 1),
-	DECLARE_IRQ(MAX77693_MUIC_IRQ_INT2_DCDTMR,	MUIC_INT2, 1 << 2),
-	DECLARE_IRQ(MAX77693_MUIC_IRQ_INT2_DXOVP,	MUIC_INT2, 1 << 3),
-	DECLARE_IRQ(MAX77693_MUIC_IRQ_INT2_VBVOLT,	MUIC_INT2, 1 << 4),
-	DECLARE_IRQ(MAX77693_MUIC_IRQ_INT2_VIDRM,	MUIC_INT2, 1 << 5),
-
-	DECLARE_IRQ(MAX77693_MUIC_IRQ_INT3_EOC,		MUIC_INT3, 1 << 0),
-	DECLARE_IRQ(MAX77693_MUIC_IRQ_INT3_CGMBC,	MUIC_INT3, 1 << 1),
-	DECLARE_IRQ(MAX77693_MUIC_IRQ_INT3_OVP,		MUIC_INT3, 1 << 2),
-	DECLARE_IRQ(MAX77693_MUIC_IRQ_INT3_MBCCHG_ERR,	MUIC_INT3, 1 << 3),
-	DECLARE_IRQ(MAX77693_MUIC_IRQ_INT3_CHG_ENABLED,	MUIC_INT3, 1 << 4),
-	DECLARE_IRQ(MAX77693_MUIC_IRQ_INT3_BAT_DET,	MUIC_INT3, 1 << 5),
-};
-
-static void max77693_irq_lock(struct irq_data *data)
-{
-	struct max77693_dev *max77693 = irq_get_chip_data(data->irq);
-
-	mutex_lock(&max77693->irqlock);
-}
-
-static void max77693_irq_sync_unlock(struct irq_data *data)
-{
-	struct max77693_dev *max77693 = irq_get_chip_data(data->irq);
-	int i;
-
-	for (i = 0; i < MAX77693_IRQ_GROUP_NR; i++) {
-		u8 mask_reg = max77693_mask_reg[i];
-		struct regmap *map = max77693_get_regmap(max77693, i);
-
-		if (mask_reg == MAX77693_REG_INVALID ||
-				IS_ERR_OR_NULL(map))
-			continue;
-		max77693->irq_masks_cache[i] = max77693->irq_masks_cur[i];
-
-		regmap_write(map, max77693_mask_reg[i],
-				max77693->irq_masks_cur[i]);
-	}
-
-	mutex_unlock(&max77693->irqlock);
-}
-
-static const inline struct max77693_irq_data *
-irq_to_max77693_irq(struct max77693_dev *max77693, int irq)
-{
-	struct irq_data *data = irq_get_irq_data(irq);
-	return &max77693_irqs[data->hwirq];
-}
-
-static void max77693_irq_mask(struct irq_data *data)
-{
-	struct max77693_dev *max77693 = irq_get_chip_data(data->irq);
-	const struct max77693_irq_data *irq_data =
-				irq_to_max77693_irq(max77693, data->irq);
-
-	if (irq_data->group >= MAX77693_IRQ_GROUP_NR)
-		return;
-
-	if (irq_data->group >= MUIC_INT1 && irq_data->group <= MUIC_INT3)
-		max77693->irq_masks_cur[irq_data->group] &= ~irq_data->mask;
-	else
-		max77693->irq_masks_cur[irq_data->group] |= irq_data->mask;
-}
-
-static void max77693_irq_unmask(struct irq_data *data)
-{
-	struct max77693_dev *max77693 = irq_get_chip_data(data->irq);
-	const struct max77693_irq_data *irq_data =
-	    irq_to_max77693_irq(max77693, data->irq);
-
-	if (irq_data->group >= MAX77693_IRQ_GROUP_NR)
-		return;
-
-	if (irq_data->group >= MUIC_INT1 && irq_data->group <= MUIC_INT3)
-		max77693->irq_masks_cur[irq_data->group] |= irq_data->mask;
-	else
-		max77693->irq_masks_cur[irq_data->group] &= ~irq_data->mask;
-}
-
-static struct irq_chip max77693_irq_chip = {
-	.name			= "max77693",
-	.irq_bus_lock		= max77693_irq_lock,
-	.irq_bus_sync_unlock	= max77693_irq_sync_unlock,
-	.irq_mask		= max77693_irq_mask,
-	.irq_unmask		= max77693_irq_unmask,
-};
-
-#define MAX77693_IRQSRC_CHG		(1 << 0)
-#define MAX77693_IRQSRC_TOP		(1 << 1)
-#define MAX77693_IRQSRC_FLASH		(1 << 2)
-#define MAX77693_IRQSRC_MUIC		(1 << 3)
-static irqreturn_t max77693_irq_thread(int irq, void *data)
-{
-	struct max77693_dev *max77693 = data;
-	u8 irq_reg[MAX77693_IRQ_GROUP_NR] = {};
-	unsigned int irq_src;
-	int ret;
-	int i, cur_irq;
-
-	ret = regmap_read(max77693->regmap, MAX77693_PMIC_REG_INTSRC,
-				&irq_src);
-	if (ret < 0) {
-		dev_err(max77693->dev, "Failed to read interrupt source: %d\n",
-				ret);
-		return IRQ_NONE;
-	}
-
-	if (irq_src & MAX77693_IRQSRC_CHG) {
-		/* CHG_INT */
-		unsigned int data;
-		ret = regmap_read(max77693->regmap,
-				MAX77693_CHG_REG_CHG_INT, &data);
-		irq_reg[CHG_INT] = data;
-	}
-
-	if (irq_src & MAX77693_IRQSRC_TOP) {
-		/* TOPSYS_INT */
-		unsigned int data;
-		ret = regmap_read(max77693->regmap,
-			MAX77693_PMIC_REG_TOPSYS_INT, &data);
-		irq_reg[TOPSYS_INT] = data;
-	}
-
-	if (irq_src & MAX77693_IRQSRC_FLASH) {
-		/* LED_INT */
-		unsigned int data;
-		ret = regmap_read(max77693->regmap,
-			MAX77693_LED_REG_FLASH_INT, &data);
-		irq_reg[LED_INT] = data;
-	}
-
-	if (irq_src & MAX77693_IRQSRC_MUIC)
-		/* MUIC INT1 ~ INT3 */
-		regmap_bulk_read(max77693->regmap_muic, MAX77693_MUIC_REG_INT1,
-			&irq_reg[MUIC_INT1], MAX77693_NUM_IRQ_MUIC_REGS);
-
-	/* Apply masking */
-	for (i = 0; i < MAX77693_IRQ_GROUP_NR; i++) {
-		if (i >= MUIC_INT1 && i <= MUIC_INT3)
-			irq_reg[i] &= max77693->irq_masks_cur[i];
-		else
-			irq_reg[i] &= ~max77693->irq_masks_cur[i];
-	}
-
-	/* Report */
-	for (i = 0; i < MAX77693_IRQ_NR; i++) {
-		if (irq_reg[max77693_irqs[i].group] & max77693_irqs[i].mask) {
-			cur_irq = irq_find_mapping(max77693->irq_domain, i);
-			if (cur_irq)
-				handle_nested_irq(cur_irq);
-		}
-	}
-
-	return IRQ_HANDLED;
-}
-
-int max77693_irq_resume(struct max77693_dev *max77693)
-{
-	if (max77693->irq)
-		max77693_irq_thread(0, max77693);
-
-	return 0;
-}
-
-static int max77693_irq_domain_map(struct irq_domain *d, unsigned int irq,
-				irq_hw_number_t hw)
-{
-	struct max77693_dev *max77693 = d->host_data;
-
-	irq_set_chip_data(irq, max77693);
-	irq_set_chip_and_handler(irq, &max77693_irq_chip, handle_edge_irq);
-	irq_set_nested_thread(irq, 1);
-#ifdef CONFIG_ARM
-	set_irq_flags(irq, IRQF_VALID);
-#else
-	irq_set_noprobe(irq);
-#endif
-	return 0;
-}
-
-static struct irq_domain_ops max77693_irq_domain_ops = {
-	.map = max77693_irq_domain_map,
-};
-
-int max77693_irq_init(struct max77693_dev *max77693)
-{
-	struct irq_domain *domain;
-	int i;
-	int ret = 0;
-	unsigned int intsrc_mask;
-
-	mutex_init(&max77693->irqlock);
-
-	/* Mask individual interrupt sources */
-	for (i = 0; i < MAX77693_IRQ_GROUP_NR; i++) {
-		struct regmap *map;
-		/* MUIC IRQ  0:MASK 1:NOT MASK */
-		/* Other IRQ 1:MASK 0:NOT MASK */
-		if (i >= MUIC_INT1 && i <= MUIC_INT3) {
-			max77693->irq_masks_cur[i] = 0x00;
-			max77693->irq_masks_cache[i] = 0x00;
-		} else {
-			max77693->irq_masks_cur[i] = 0xff;
-			max77693->irq_masks_cache[i] = 0xff;
-		}
-		map = max77693_get_regmap(max77693, i);
-
-		if (IS_ERR_OR_NULL(map))
-			continue;
-		if (max77693_mask_reg[i] == MAX77693_REG_INVALID)
-			continue;
-		if (i >= MUIC_INT1 && i <= MUIC_INT3)
-			regmap_write(map, max77693_mask_reg[i], 0x00);
-		else
-			regmap_write(map, max77693_mask_reg[i], 0xff);
-	}
-
-	domain = irq_domain_add_linear(NULL, MAX77693_IRQ_NR,
-					&max77693_irq_domain_ops, max77693);
-	if (!domain) {
-		dev_err(max77693->dev, "could not create irq domain\n");
-		ret = -ENODEV;
-		goto err_irq;
-	}
-	max77693->irq_domain = domain;
-
-	/* Unmask max77693 interrupt */
-	ret = regmap_read(max77693->regmap,
-			MAX77693_PMIC_REG_INTSRC_MASK, &intsrc_mask);
-	if (ret < 0) {
-		dev_err(max77693->dev, "fail to read PMIC register\n");
-		goto err_irq;
-	}
-
-	intsrc_mask &= ~(MAX77693_IRQSRC_CHG);
-	intsrc_mask &= ~(MAX77693_IRQSRC_FLASH);
-	intsrc_mask &= ~(MAX77693_IRQSRC_MUIC);
-	ret = regmap_write(max77693->regmap,
-			MAX77693_PMIC_REG_INTSRC_MASK, intsrc_mask);
-	if (ret < 0) {
-		dev_err(max77693->dev, "fail to write PMIC register\n");
-		goto err_irq;
-	}
-
-	ret = request_threaded_irq(max77693->irq, NULL, max77693_irq_thread,
-				   IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
-				   "max77693-irq", max77693);
-	if (ret)
-		dev_err(max77693->dev, "Failed to request IRQ %d: %d\n",
-			max77693->irq, ret);
-
-err_irq:
-	return ret;
-}
-
-void max77693_irq_exit(struct max77693_dev *max77693)
-{
-	if (max77693->irq)
-		free_irq(max77693->irq, max77693);
-}
diff --git a/drivers/mfd/max77693.c b/drivers/mfd/max77693.c
index a0308336adaf..249c139ef04a 100644
--- a/drivers/mfd/max77693.c
+++ b/drivers/mfd/max77693.c
@@ -55,12 +55,95 @@ static const struct regmap_config max77693_regmap_config = {
 	.max_register = MAX77693_PMIC_REG_END,
 };
 
+static const struct regmap_irq max77693_led_irqs[] = {
+	{ .mask = LED_IRQ_FLED2_OPEN,  },
+	{ .mask = LED_IRQ_FLED2_SHORT, },
+	{ .mask = LED_IRQ_FLED1_OPEN,  },
+	{ .mask = LED_IRQ_FLED1_SHORT, },
+	{ .mask = LED_IRQ_MAX_FLASH,   },
+};
+
+static const struct regmap_irq_chip max77693_led_irq_chip = {
+	.name			= "max77693-led",
+	.status_base		= MAX77693_LED_REG_FLASH_INT,
+	.mask_base		= MAX77693_LED_REG_FLASH_INT_MASK,
+	.mask_invert		= false,
+	.num_regs		= 1,
+	.irqs			= max77693_led_irqs,
+	.num_irqs		= ARRAY_SIZE(max77693_led_irqs),
+};
+
+static const struct regmap_irq max77693_topsys_irqs[] = {
+	{ .mask = TOPSYS_IRQ_T120C_INT,  },
+	{ .mask = TOPSYS_IRQ_T140C_INT,  },
+	{ .mask = TOPSYS_IRQ_LOWSYS_INT, },
+};
+
+static const struct regmap_irq_chip max77693_topsys_irq_chip = {
+	.name			= "max77693-topsys",
+	.status_base		= MAX77693_PMIC_REG_TOPSYS_INT,
+	.mask_base		= MAX77693_PMIC_REG_TOPSYS_INT_MASK,
+	.mask_invert		= false,
+	.num_regs		= 1,
+	.irqs			= max77693_topsys_irqs,
+	.num_irqs		= ARRAY_SIZE(max77693_topsys_irqs),
+};
+
+static const struct regmap_irq max77693_charger_irqs[] = {
+	{ .mask = CHG_IRQ_BYP_I,   },
+	{ .mask = CHG_IRQ_THM_I,   },
+	{ .mask = CHG_IRQ_BAT_I,   },
+	{ .mask = CHG_IRQ_CHG_I,   },
+	{ .mask = CHG_IRQ_CHGIN_I, },
+};
+
+static const struct regmap_irq_chip max77693_charger_irq_chip = {
+	.name			= "max77693-charger",
+	.status_base		= MAX77693_CHG_REG_CHG_INT,
+	.mask_base		= MAX77693_CHG_REG_CHG_INT_MASK,
+	.mask_invert		= false,
+	.num_regs		= 1,
+	.irqs			= max77693_charger_irqs,
+	.num_irqs		= ARRAY_SIZE(max77693_charger_irqs),
+};
+
 static const struct regmap_config max77693_regmap_muic_config = {
 	.reg_bits = 8,
 	.val_bits = 8,
 	.max_register = MAX77693_MUIC_REG_END,
 };
 
+static const struct regmap_irq max77693_muic_irqs[] = {
+	{ .reg_offset = 0, .mask = MUIC_IRQ_INT1_ADC,		},
+	{ .reg_offset = 0, .mask = MUIC_IRQ_INT1_ADC_LOW,	},
+	{ .reg_offset = 0, .mask = MUIC_IRQ_INT1_ADC_ERR,	},
+	{ .reg_offset = 0, .mask = MUIC_IRQ_INT1_ADC1K,		},
+
+	{ .reg_offset = 1, .mask = MUIC_IRQ_INT2_CHGTYP,	},
+	{ .reg_offset = 1, .mask = MUIC_IRQ_INT2_CHGDETREUN,	},
+	{ .reg_offset = 1, .mask = MUIC_IRQ_INT2_DCDTMR,	},
+	{ .reg_offset = 1, .mask = MUIC_IRQ_INT2_DXOVP,		},
+	{ .reg_offset = 1, .mask = MUIC_IRQ_INT2_VBVOLT,	},
+	{ .reg_offset = 1, .mask = MUIC_IRQ_INT2_VIDRM,		},
+
+	{ .reg_offset = 2, .mask = MUIC_IRQ_INT3_EOC,		},
+	{ .reg_offset = 2, .mask = MUIC_IRQ_INT3_CGMBC,		},
+	{ .reg_offset = 2, .mask = MUIC_IRQ_INT3_OVP,		},
+	{ .reg_offset = 2, .mask = MUIC_IRQ_INT3_MBCCHG_ERR,	},
+	{ .reg_offset = 2, .mask = MUIC_IRQ_INT3_CHG_ENABLED,	},
+	{ .reg_offset = 2, .mask = MUIC_IRQ_INT3_BAT_DET,	},
+};
+
+static const struct regmap_irq_chip max77693_muic_irq_chip = {
+	.name			= "max77693-muic",
+	.status_base		= MAX77693_MUIC_REG_INT1,
+	.mask_base		= MAX77693_MUIC_REG_INTMASK1,
+	.mask_invert		= true,
+	.num_regs		= 3,
+	.irqs			= max77693_muic_irqs,
+	.num_irqs		= ARRAY_SIZE(max77693_muic_irqs),
+};
+
 static int max77693_i2c_probe(struct i2c_client *i2c,
 			      const struct i2c_device_id *id)
 {
@@ -124,9 +207,45 @@ static int max77693_i2c_probe(struct i2c_client *i2c,
 		goto err_regmap_muic;
 	}
 
-	ret = max77693_irq_init(max77693);
-	if (ret < 0)
-		goto err_irq;
+	ret = regmap_add_irq_chip(max77693->regmap, max77693->irq,
+				IRQF_ONESHOT | IRQF_SHARED |
+				IRQF_TRIGGER_FALLING, 0,
+				&max77693_led_irq_chip,
+				&max77693->irq_data_led);
+	if (ret) {
+		dev_err(max77693->dev, "failed to add irq chip: %d\n", ret);
+		goto err_regmap_muic;
+	}
+
+	ret = regmap_add_irq_chip(max77693->regmap, max77693->irq,
+				IRQF_ONESHOT | IRQF_SHARED |
+				IRQF_TRIGGER_FALLING, 0,
+				&max77693_topsys_irq_chip,
+				&max77693->irq_data_topsys);
+	if (ret) {
+		dev_err(max77693->dev, "failed to add irq chip: %d\n", ret);
+		goto err_irq_topsys;
+	}
+
+	ret = regmap_add_irq_chip(max77693->regmap, max77693->irq,
+				IRQF_ONESHOT | IRQF_SHARED |
+				IRQF_TRIGGER_FALLING, 0,
+				&max77693_charger_irq_chip,
+				&max77693->irq_data_charger);
+	if (ret) {
+		dev_err(max77693->dev, "failed to add irq chip: %d\n", ret);
+		goto err_irq_charger;
+	}
+
+	ret = regmap_add_irq_chip(max77693->regmap, max77693->irq,
+				IRQF_ONESHOT | IRQF_SHARED |
+				IRQF_TRIGGER_FALLING, 0,
+				&max77693_muic_irq_chip,
+				&max77693->irq_data_muic);
+	if (ret) {
+		dev_err(max77693->dev, "failed to add irq chip: %d\n", ret);
+		goto err_irq_muic;
+	}
 
 	pm_runtime_set_active(max77693->dev);
 
@@ -138,8 +257,14 @@ static int max77693_i2c_probe(struct i2c_client *i2c,
 	return ret;
 
 err_mfd:
-	max77693_irq_exit(max77693);
-err_irq:
+	mfd_remove_devices(max77693->dev);
+	regmap_del_irq_chip(max77693->irq, max77693->irq_data_muic);
+err_irq_muic:
+	regmap_del_irq_chip(max77693->irq, max77693->irq_data_charger);
+err_irq_charger:
+	regmap_del_irq_chip(max77693->irq, max77693->irq_data_topsys);
+err_irq_topsys:
+	regmap_del_irq_chip(max77693->irq, max77693->irq_data_led);
 err_regmap_muic:
 	i2c_unregister_device(max77693->haptic);
 err_i2c_haptic:
@@ -152,7 +277,12 @@ static int max77693_i2c_remove(struct i2c_client *i2c)
 	struct max77693_dev *max77693 = i2c_get_clientdata(i2c);
 
 	mfd_remove_devices(max77693->dev);
-	max77693_irq_exit(max77693);
+
+	regmap_del_irq_chip(max77693->irq, max77693->irq_data_muic);
+	regmap_del_irq_chip(max77693->irq, max77693->irq_data_charger);
+	regmap_del_irq_chip(max77693->irq, max77693->irq_data_topsys);
+	regmap_del_irq_chip(max77693->irq, max77693->irq_data_led);
+
 	i2c_unregister_device(max77693->muic);
 	i2c_unregister_device(max77693->haptic);
 
@@ -170,8 +300,11 @@ static int max77693_suspend(struct device *dev)
 	struct i2c_client *i2c = container_of(dev, struct i2c_client, dev);
 	struct max77693_dev *max77693 = i2c_get_clientdata(i2c);
 
-	if (device_may_wakeup(dev))
-		irq_set_irq_wake(max77693->irq, 1);
+	if (device_may_wakeup(dev)) {
+		enable_irq_wake(max77693->irq);
+		disable_irq(max77693->irq);
+	}
+
 	return 0;
 }
 
@@ -180,9 +313,12 @@ static int max77693_resume(struct device *dev)
 	struct i2c_client *i2c = container_of(dev, struct i2c_client, dev);
 	struct max77693_dev *max77693 = i2c_get_clientdata(i2c);
 
-	if (device_may_wakeup(dev))
-		irq_set_irq_wake(max77693->irq, 0);
-	return max77693_irq_resume(max77693);
+	if (device_may_wakeup(dev)) {
+		disable_irq_wake(max77693->irq);
+		enable_irq(max77693->irq);
+	}
+
+	return 0;
 }
 
 static const struct dev_pm_ops max77693_pm = {
diff --git a/include/linux/mfd/max77693-private.h b/include/linux/mfd/max77693-private.h
index 80ec31d561c4..c466ff3e16b8 100644
--- a/include/linux/mfd/max77693-private.h
+++ b/include/linux/mfd/max77693-private.h
@@ -262,6 +262,41 @@ enum max77693_irq_source {
 	MAX77693_IRQ_GROUP_NR,
 };
 
+#define LED_IRQ_FLED2_OPEN		BIT(0)
+#define LED_IRQ_FLED2_SHORT		BIT(1)
+#define LED_IRQ_FLED1_OPEN		BIT(2)
+#define LED_IRQ_FLED1_SHORT		BIT(3)
+#define LED_IRQ_MAX_FLASH		BIT(4)
+
+#define TOPSYS_IRQ_T120C_INT		BIT(0)
+#define TOPSYS_IRQ_T140C_INT		BIT(1)
+#define TOPSYS_IRQ_LOWSYS_INT		BIT(3)
+
+#define CHG_IRQ_BYP_I			BIT(0)
+#define CHG_IRQ_THM_I			BIT(2)
+#define CHG_IRQ_BAT_I			BIT(3)
+#define CHG_IRQ_CHG_I			BIT(4)
+#define CHG_IRQ_CHGIN_I			BIT(6)
+
+#define MUIC_IRQ_INT1_ADC		BIT(0)
+#define MUIC_IRQ_INT1_ADC_LOW		BIT(1)
+#define MUIC_IRQ_INT1_ADC_ERR		BIT(2)
+#define MUIC_IRQ_INT1_ADC1K		BIT(3)
+
+#define MUIC_IRQ_INT2_CHGTYP		BIT(0)
+#define MUIC_IRQ_INT2_CHGDETREUN	BIT(1)
+#define MUIC_IRQ_INT2_DCDTMR		BIT(2)
+#define MUIC_IRQ_INT2_DXOVP		BIT(3)
+#define MUIC_IRQ_INT2_VBVOLT		BIT(4)
+#define MUIC_IRQ_INT2_VIDRM		BIT(5)
+
+#define MUIC_IRQ_INT3_EOC		BIT(0)
+#define MUIC_IRQ_INT3_CGMBC		BIT(1)
+#define MUIC_IRQ_INT3_OVP		BIT(2)
+#define MUIC_IRQ_INT3_MBCCHG_ERR	BIT(3)
+#define MUIC_IRQ_INT3_CHG_ENABLED	BIT(4)
+#define MUIC_IRQ_INT3_BAT_DET		BIT(5)
+
 enum max77693_irq {
 	/* PMIC - FLASH */
 	MAX77693_LED_IRQ_FLED2_OPEN,
@@ -282,6 +317,10 @@ enum max77693_irq {
 	MAX77693_CHG_IRQ_CHG_I,
 	MAX77693_CHG_IRQ_CHGIN_I,
 
+	MAX77693_IRQ_NR,
+};
+
+enum max77693_irq_muic {
 	/* MUIC INT1 */
 	MAX77693_MUIC_IRQ_INT1_ADC,
 	MAX77693_MUIC_IRQ_INT1_ADC_LOW,
@@ -304,7 +343,7 @@ enum max77693_irq {
 	MAX77693_MUIC_IRQ_INT3_CHG_ENABLED,
 	MAX77693_MUIC_IRQ_INT3_BAT_DET,
 
-	MAX77693_IRQ_NR,
+	MAX77693_MUIC_IRQ_NR,
 };
 
 struct max77693_dev {
@@ -319,7 +358,10 @@ struct max77693_dev {
 	struct regmap *regmap_muic;
 	struct regmap *regmap_haptic;
 
-	struct irq_domain *irq_domain;
+	struct regmap_irq_chip_data *irq_data_led;
+	struct regmap_irq_chip_data *irq_data_topsys;
+	struct regmap_irq_chip_data *irq_data_charger;
+	struct regmap_irq_chip_data *irq_data_muic;
 
 	int irq;
 	int irq_gpio;
-- 
cgit v1.2.3-59-g8ed1b


From 5165238460068e53c740eaa621ebb6623dc4a50d Mon Sep 17 00:00:00 2001
From: "Zhu, Lejun" <lejun.zhu@linux.intel.com>
Date: Tue, 3 Jun 2014 13:26:02 +0800
Subject: mfd: intel_soc_pmic: Core driver

This patch provides the common I2C driver code for Intel SoC PMICs.

Signed-off-by: Yang, Bin <bin.yang@intel.com>
Signed-off-by: Zhu, Lejun <lejun.zhu@linux.intel.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/intel_soc_pmic_core.c  | 168 +++++++++++++++++++++++++++++++++++++
 drivers/mfd/intel_soc_pmic_core.h  |  32 +++++++
 include/linux/mfd/intel_soc_pmic.h |  30 +++++++
 3 files changed, 230 insertions(+)
 create mode 100644 drivers/mfd/intel_soc_pmic_core.c
 create mode 100644 drivers/mfd/intel_soc_pmic_core.h
 create mode 100644 include/linux/mfd/intel_soc_pmic.h

(limited to 'include/linux')

diff --git a/drivers/mfd/intel_soc_pmic_core.c b/drivers/mfd/intel_soc_pmic_core.c
new file mode 100644
index 000000000000..cddbf5a72f89
--- /dev/null
+++ b/drivers/mfd/intel_soc_pmic_core.c
@@ -0,0 +1,168 @@
+/*
+ * intel_soc_pmic_core.c - Intel SoC PMIC MFD Driver
+ *
+ * Copyright (C) 2013, 2014 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Author: Yang, Bin <bin.yang@intel.com>
+ * Author: Zhu, Lejun <lejun.zhu@linux.intel.com>
+ */
+
+#include <linux/module.h>
+#include <linux/mfd/core.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/gpio/consumer.h>
+#include <linux/acpi.h>
+#include <linux/regmap.h>
+#include <linux/mfd/intel_soc_pmic.h>
+#include "intel_soc_pmic_core.h"
+
+/*
+ * On some boards the PMIC interrupt may come from a GPIO line.
+ * Try to lookup the ACPI table and see if such connection exists. If not,
+ * return -ENOENT and use the IRQ provided by I2C.
+ */
+static int intel_soc_pmic_find_gpio_irq(struct device *dev)
+{
+	struct gpio_desc *desc;
+	int irq;
+
+	desc = devm_gpiod_get_index(dev, "intel_soc_pmic", 0);
+	if (IS_ERR(desc))
+		return -ENOENT;
+
+	irq = gpiod_to_irq(desc);
+	if (irq < 0)
+		dev_warn(dev, "Can't get irq: %d\n", irq);
+
+	return irq;
+}
+
+static int intel_soc_pmic_i2c_probe(struct i2c_client *i2c,
+				    const struct i2c_device_id *i2c_id)
+{
+	struct device *dev = &i2c->dev;
+	const struct acpi_device_id *id;
+	struct intel_soc_pmic_config *config;
+	struct intel_soc_pmic *pmic;
+	int ret;
+	int irq;
+
+	id = acpi_match_device(dev->driver->acpi_match_table, dev);
+	if (!id || !id->driver_data)
+		return -ENODEV;
+
+	config = (struct intel_soc_pmic_config *)id->driver_data;
+
+	pmic = devm_kzalloc(dev, sizeof(*pmic), GFP_KERNEL);
+	dev_set_drvdata(dev, pmic);
+
+	pmic->regmap = devm_regmap_init_i2c(i2c, config->regmap_config);
+
+	irq = intel_soc_pmic_find_gpio_irq(dev);
+	pmic->irq = (irq < 0) ? i2c->irq : irq;
+
+	ret = regmap_add_irq_chip(pmic->regmap, pmic->irq,
+				  config->irq_flags | IRQF_ONESHOT,
+				  0, config->irq_chip,
+				  &pmic->irq_chip_data);
+	if (ret)
+		return ret;
+
+	ret = enable_irq_wake(pmic->irq);
+	if (ret)
+		dev_warn(dev, "Can't enable IRQ as wake source: %d\n", ret);
+
+	ret = mfd_add_devices(dev, -1, config->cell_dev,
+			      config->n_cell_devs, NULL, 0,
+			      regmap_irq_get_domain(pmic->irq_chip_data));
+	if (ret)
+		goto err_del_irq_chip;
+
+	return 0;
+
+err_del_irq_chip:
+	regmap_del_irq_chip(pmic->irq, pmic->irq_chip_data);
+	return ret;
+}
+
+static int intel_soc_pmic_i2c_remove(struct i2c_client *i2c)
+{
+	struct intel_soc_pmic *pmic = dev_get_drvdata(&i2c->dev);
+
+	regmap_del_irq_chip(pmic->irq, pmic->irq_chip_data);
+
+	mfd_remove_devices(&i2c->dev);
+
+	return 0;
+}
+
+static void intel_soc_pmic_shutdown(struct i2c_client *i2c)
+{
+	struct intel_soc_pmic *pmic = dev_get_drvdata(&i2c->dev);
+
+	disable_irq(pmic->irq);
+
+	return;
+}
+
+static int intel_soc_pmic_suspend(struct device *dev)
+{
+	struct intel_soc_pmic *pmic = dev_get_drvdata(dev);
+
+	disable_irq(pmic->irq);
+
+	return 0;
+}
+
+static int intel_soc_pmic_resume(struct device *dev)
+{
+	struct intel_soc_pmic *pmic = dev_get_drvdata(dev);
+
+	enable_irq(pmic->irq);
+
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(intel_soc_pmic_pm_ops, intel_soc_pmic_suspend,
+			 intel_soc_pmic_resume);
+
+static const struct i2c_device_id intel_soc_pmic_i2c_id[] = {
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, intel_soc_pmic_i2c_id);
+
+static struct acpi_device_id intel_soc_pmic_acpi_match[] = {
+	{"INT33FD", (kernel_ulong_t)&intel_soc_pmic_config_crc},
+	{ },
+};
+MODULE_DEVICE_TABLE(acpi, intel_soc_pmic_acpi_match);
+
+static struct i2c_driver intel_soc_pmic_i2c_driver = {
+	.driver = {
+		.name = "intel_soc_pmic_i2c",
+		.owner = THIS_MODULE,
+		.pm = &intel_soc_pmic_pm_ops,
+		.acpi_match_table = ACPI_PTR(intel_soc_pmic_acpi_match),
+	},
+	.probe = intel_soc_pmic_i2c_probe,
+	.remove = intel_soc_pmic_i2c_remove,
+	.id_table = intel_soc_pmic_i2c_id,
+	.shutdown = intel_soc_pmic_shutdown,
+};
+
+module_i2c_driver(intel_soc_pmic_i2c_driver);
+
+MODULE_DESCRIPTION("I2C driver for Intel SoC PMIC");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Yang, Bin <bin.yang@intel.com>");
+MODULE_AUTHOR("Zhu, Lejun <lejun.zhu@linux.intel.com>");
diff --git a/drivers/mfd/intel_soc_pmic_core.h b/drivers/mfd/intel_soc_pmic_core.h
new file mode 100644
index 000000000000..33aacd9baddc
--- /dev/null
+++ b/drivers/mfd/intel_soc_pmic_core.h
@@ -0,0 +1,32 @@
+/*
+ * intel_soc_pmic_core.h - Intel SoC PMIC MFD Driver
+ *
+ * Copyright (C) 2012-2014 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Author: Yang, Bin <bin.yang@intel.com>
+ * Author: Zhu, Lejun <lejun.zhu@linux.intel.com>
+ */
+
+#ifndef __INTEL_SOC_PMIC_CORE_H__
+#define __INTEL_SOC_PMIC_CORE_H__
+
+struct intel_soc_pmic_config {
+	unsigned long irq_flags;
+	struct mfd_cell *cell_dev;
+	int n_cell_devs;
+	struct regmap_config *regmap_config;
+	struct regmap_irq_chip *irq_chip;
+};
+
+extern struct intel_soc_pmic_config intel_soc_pmic_config_crc;
+
+#endif	/* __INTEL_SOC_PMIC_CORE_H__ */
diff --git a/include/linux/mfd/intel_soc_pmic.h b/include/linux/mfd/intel_soc_pmic.h
new file mode 100644
index 000000000000..abcbfcf32d10
--- /dev/null
+++ b/include/linux/mfd/intel_soc_pmic.h
@@ -0,0 +1,30 @@
+/*
+ * intel_soc_pmic.h - Intel SoC PMIC Driver
+ *
+ * Copyright (C) 2012-2014 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Author: Yang, Bin <bin.yang@intel.com>
+ * Author: Zhu, Lejun <lejun.zhu@linux.intel.com>
+ */
+
+#ifndef __INTEL_SOC_PMIC_H__
+#define __INTEL_SOC_PMIC_H__
+
+#include <linux/regmap.h>
+
+struct intel_soc_pmic {
+	int irq;
+	struct regmap *regmap;
+	struct regmap_irq_chip_data *irq_chip_data;
+};
+
+#endif	/* __INTEL_SOC_PMIC_H__ */
-- 
cgit v1.2.3-59-g8ed1b


From 62fde54123fb64879326c8b71c3f92cc5db1c452 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 17 Jun 2014 19:12:34 -0400
Subject: percpu: include/asm-generic/percpu.h should contain only
 arch-overridable parts

The roles of the various percpu header files has become unclear.
There are four header files involved.

 include/linux/percpu-defs.h
 include/linux/percpu.h
 include/asm-generic/percpu.h
 arch/*/include/asm/percpu.h

The original intention for include/asm-generic/percpu.h is providing
generic definitions for arch-overridable parts; however, it now hosts
various stuff which can't be overridden by archs.

Also, include/linux/percpu-defs.h was initially added to contain
section and percpu variable definition macros so that arch header
files can make use of them without worrying about introducing cyclic
inclusion dependency by including include/linux/percpu.h; however,
arch headers sometimes need to access percpu variables too and this is
one of the reasons why some accessors were implemented in
include/linux/asm-generic/percpu.h.

Let's clear up the situation by making include/asm-generic/percpu.h
contain only arch-overridable parts and moving accessors and
operations into include/linux/percpu-defs.  Note that this patch only
moves things from include/asm-generic/percpu.h.
include/linux/percpu.h will be taken care of by later patches.

This patch moves the followings.

* SHIFT_PERCPU_PTR() / VERIFY_PERCPU_PTR()
* per_cpu()
* raw_cpu_ptr()
* this_cpu_ptr()
* __get_cpu_var()
* __raw_get_cpu_var()
* __this_cpu_ptr()
* PER_CPU_[SHARED_]ALIGNED_SECTION
* PER_CPU_[SHARED_]ALIGNED_SECTION
* PER_CPU_FIRST_SECTION

This patch is pure reorganization.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Christoph Lameter <cl@linux.com>
---
 include/asm-generic/percpu.h | 64 -------------------------------
 include/linux/percpu-defs.h  | 89 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 89 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index a247d80b6630..e5ace4d49084 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -35,24 +35,6 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
 #define my_cpu_offset __my_cpu_offset
 #endif
 
-/*
- * Add an offset to a pointer but keep the pointer as-is.  Use RELOC_HIDE()
- * to prevent the compiler from making incorrect assumptions about the
- * pointer value.  The weird cast keeps both GCC and sparse happy.
- */
-#define SHIFT_PERCPU_PTR(__p, __offset)	({				\
-	__verify_pcpu_ptr((__p));					\
-	RELOC_HIDE((typeof(*(__p)) __kernel __force *)(__p), (__offset)); \
-})
-
-/*
- * A percpu variable may point to a discarded regions. The following are
- * established ways to produce a usable pointer from the percpu variable
- * offset.
- */
-#define per_cpu(var, cpu) \
-	(*SHIFT_PERCPU_PTR(&(var), per_cpu_offset(cpu)))
-
 /*
  * Arch may define arch_raw_cpu_ptr() to provide more efficient address
  * translations for raw_cpu_ptr().
@@ -61,34 +43,10 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
 #define arch_raw_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset)
 #endif
 
-#define raw_cpu_ptr(ptr) arch_raw_cpu_ptr(ptr)
-
-#ifdef CONFIG_DEBUG_PREEMPT
-#define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset)
-#else
-#define this_cpu_ptr(ptr) raw_cpu_ptr(ptr)
-#endif
-
-#define __get_cpu_var(var) (*this_cpu_ptr(&(var)))
-#define __raw_get_cpu_var(var) (*raw_cpu_ptr(&(var)))
-
 #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
 extern void setup_per_cpu_areas(void);
 #endif
 
-#else /* ! SMP */
-
-#define VERIFY_PERCPU_PTR(__p) ({			\
-	__verify_pcpu_ptr((__p));			\
-	(typeof(*(__p)) __kernel __force *)(__p);	\
-})
-
-#define per_cpu(var, cpu)	(*((void)(cpu), VERIFY_PERCPU_PTR(&(var))))
-#define __get_cpu_var(var)	(*VERIFY_PERCPU_PTR(&(var)))
-#define __raw_get_cpu_var(var)	(*VERIFY_PERCPU_PTR(&(var)))
-#define this_cpu_ptr(ptr)	per_cpu_ptr(ptr, 0)
-#define raw_cpu_ptr(ptr)	this_cpu_ptr(ptr)
-
 #endif	/* SMP */
 
 #ifndef PER_CPU_BASE_SECTION
@@ -99,25 +57,6 @@ extern void setup_per_cpu_areas(void);
 #endif
 #endif
 
-#ifdef CONFIG_SMP
-
-#ifdef MODULE
-#define PER_CPU_SHARED_ALIGNED_SECTION ""
-#define PER_CPU_ALIGNED_SECTION ""
-#else
-#define PER_CPU_SHARED_ALIGNED_SECTION "..shared_aligned"
-#define PER_CPU_ALIGNED_SECTION "..shared_aligned"
-#endif
-#define PER_CPU_FIRST_SECTION "..first"
-
-#else
-
-#define PER_CPU_SHARED_ALIGNED_SECTION ""
-#define PER_CPU_ALIGNED_SECTION "..shared_aligned"
-#define PER_CPU_FIRST_SECTION ""
-
-#endif
-
 #ifndef PER_CPU_ATTRIBUTES
 #define PER_CPU_ATTRIBUTES
 #endif
@@ -126,7 +65,4 @@ extern void setup_per_cpu_areas(void);
 #define PER_CPU_DEF_ATTRIBUTES
 #endif
 
-/* Keep until we have removed all uses of __this_cpu_ptr */
-#define __this_cpu_ptr raw_cpu_ptr
-
 #endif /* _ASM_GENERIC_PERCPU_H_ */
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index a5fc7d01aad6..1a1af3e06a71 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -1,6 +1,40 @@
+/*
+ * linux/percpu-defs.h - basic definitions for percpu areas
+ *
+ * DO NOT INCLUDE DIRECTLY OUTSIDE PERCPU IMPLEMENTATION PROPER.
+ *
+ * This file is separate from linux/percpu.h to avoid cyclic inclusion
+ * dependency from arch header files.  Only to be included from
+ * asm/percpu.h.
+ *
+ * This file includes macros necessary to declare percpu sections and
+ * variables, and definitions of percpu accessors and operations.  It
+ * should provide enough percpu features to arch header files even when
+ * they can only include asm/percpu.h to avoid cyclic inclusion dependency.
+ */
+
 #ifndef _LINUX_PERCPU_DEFS_H
 #define _LINUX_PERCPU_DEFS_H
 
+#ifdef CONFIG_SMP
+
+#ifdef MODULE
+#define PER_CPU_SHARED_ALIGNED_SECTION ""
+#define PER_CPU_ALIGNED_SECTION ""
+#else
+#define PER_CPU_SHARED_ALIGNED_SECTION "..shared_aligned"
+#define PER_CPU_ALIGNED_SECTION "..shared_aligned"
+#endif
+#define PER_CPU_FIRST_SECTION "..first"
+
+#else
+
+#define PER_CPU_SHARED_ALIGNED_SECTION ""
+#define PER_CPU_ALIGNED_SECTION "..shared_aligned"
+#define PER_CPU_FIRST_SECTION ""
+
+#endif
+
 /*
  * Base implementations of per-CPU variable declarations and definitions, where
  * the section in which the variable is to be placed is provided by the
@@ -164,4 +198,59 @@
 #define EXPORT_PER_CPU_SYMBOL_GPL(var)
 #endif
 
+/*
+ * Accessors and operations.
+ */
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_SMP
+
+/*
+ * Add an offset to a pointer but keep the pointer as-is.  Use RELOC_HIDE()
+ * to prevent the compiler from making incorrect assumptions about the
+ * pointer value.  The weird cast keeps both GCC and sparse happy.
+ */
+#define SHIFT_PERCPU_PTR(__p, __offset)	({				\
+	__verify_pcpu_ptr((__p));					\
+	RELOC_HIDE((typeof(*(__p)) __kernel __force *)(__p), (__offset)); \
+})
+
+/*
+ * A percpu variable may point to a discarded regions. The following are
+ * established ways to produce a usable pointer from the percpu variable
+ * offset.
+ */
+#define per_cpu(var, cpu) \
+	(*SHIFT_PERCPU_PTR(&(var), per_cpu_offset(cpu)))
+
+#define raw_cpu_ptr(ptr) arch_raw_cpu_ptr(ptr)
+
+#ifdef CONFIG_DEBUG_PREEMPT
+#define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset)
+#else
+#define this_cpu_ptr(ptr) raw_cpu_ptr(ptr)
+#endif
+
+#define __get_cpu_var(var) (*this_cpu_ptr(&(var)))
+#define __raw_get_cpu_var(var) (*raw_cpu_ptr(&(var)))
+
+#else	/* CONFIG_SMP */
+
+#define VERIFY_PERCPU_PTR(__p) ({			\
+	__verify_pcpu_ptr((__p));			\
+	(typeof(*(__p)) __kernel __force *)(__p);	\
+})
+
+#define per_cpu(var, cpu)	(*((void)(cpu), VERIFY_PERCPU_PTR(&(var))))
+#define __get_cpu_var(var)	(*VERIFY_PERCPU_PTR(&(var)))
+#define __raw_get_cpu_var(var)	(*VERIFY_PERCPU_PTR(&(var)))
+#define this_cpu_ptr(ptr)	per_cpu_ptr(ptr, 0)
+#define raw_cpu_ptr(ptr)	this_cpu_ptr(ptr)
+
+#endif	/* CONFIG_SMP */
+
+/* keep until we have removed all uses of __this_cpu_ptr */
+#define __this_cpu_ptr(ptr)	raw_cpu_ptr(ptr)
+
+#endif /* __ASSEMBLY__ */
 #endif /* _LINUX_PERCPU_DEFS_H */
-- 
cgit v1.2.3-59-g8ed1b


From 9defda18f913181debfe7cdc8c0a752f707ac861 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 17 Jun 2014 19:12:34 -0400
Subject: percpu: move accessors from include/linux/percpu.h to percpu-defs.h

include/linux/percpu-defs.h is gonna host all accessors and operations
so that arch headers can make use of them too without worrying about
circular dependency through include/linux/percpu.h.

This patch moves the following accessors from include/linux/percpu.h
to include/linux/percpu-defs.h.

* get/put_cpu_var()
* get/put_cpu_ptr()
* per_cpu_ptr()

This is pure reorgniazation.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Christoph Lameter <cl@linux.com>
---
 include/linux/percpu-defs.h | 32 ++++++++++++++++++++++++++++++++
 include/linux/percpu.h      | 37 -------------------------------------
 2 files changed, 32 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index 1a1af3e06a71..f782f98004db 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -252,5 +252,37 @@
 /* keep until we have removed all uses of __this_cpu_ptr */
 #define __this_cpu_ptr(ptr)	raw_cpu_ptr(ptr)
 
+/*
+ * Must be an lvalue. Since @var must be a simple identifier,
+ * we force a syntax error here if it isn't.
+ */
+#define get_cpu_var(var) (*({				\
+	preempt_disable();				\
+	this_cpu_ptr(&var); }))
+
+/*
+ * The weird & is necessary because sparse considers (void)(var) to be
+ * a direct dereference of percpu variable (var).
+ */
+#define put_cpu_var(var) do {				\
+	(void)&(var);					\
+	preempt_enable();				\
+} while (0)
+
+#define get_cpu_ptr(var) ({				\
+	preempt_disable();				\
+	this_cpu_ptr(var); })
+
+#define put_cpu_ptr(var) do {				\
+	(void)(var);					\
+	preempt_enable();				\
+} while (0)
+
+#ifdef CONFIG_SMP
+#define per_cpu_ptr(ptr, cpu)	SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu)))
+#else
+#define per_cpu_ptr(ptr, cpu)	({ (void)(cpu); VERIFY_PERCPU_PTR((ptr)); })
+#endif
+
 #endif /* __ASSEMBLY__ */
 #endif /* _LINUX_PERCPU_DEFS_H */
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 8419053d0f2e..97b207990c45 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -23,32 +23,6 @@
 	 PERCPU_MODULE_RESERVE)
 #endif
 
-/*
- * Must be an lvalue. Since @var must be a simple identifier,
- * we force a syntax error here if it isn't.
- */
-#define get_cpu_var(var) (*({				\
-	preempt_disable();				\
-	this_cpu_ptr(&var); }))
-
-/*
- * The weird & is necessary because sparse considers (void)(var) to be
- * a direct dereference of percpu variable (var).
- */
-#define put_cpu_var(var) do {				\
-	(void)&(var);					\
-	preempt_enable();				\
-} while (0)
-
-#define get_cpu_ptr(var) ({				\
-	preempt_disable();				\
-	this_cpu_ptr(var); })
-
-#define put_cpu_ptr(var) do {				\
-	(void)(var);					\
-	preempt_enable();				\
-} while (0)
-
 /* minimum unit size, also is the maximum supported allocation size */
 #define PCPU_MIN_UNIT_SIZE		PFN_ALIGN(32 << 10)
 
@@ -140,17 +114,6 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size,
 				pcpu_fc_populate_pte_fn_t populate_pte_fn);
 #endif
 
-/*
- * Use this to get to a cpu's version of the per-cpu object
- * dynamically allocated. Non-atomic access to the current CPU's
- * version should probably be combined with get_cpu()/put_cpu().
- */
-#ifdef CONFIG_SMP
-#define per_cpu_ptr(ptr, cpu)	SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu)))
-#else
-#define per_cpu_ptr(ptr, cpu)	({ (void)(cpu); VERIFY_PERCPU_PTR((ptr)); })
-#endif
-
 extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align);
 extern bool is_kernel_percpu_address(unsigned long addr);
 
-- 
cgit v1.2.3-59-g8ed1b


From 3b8ed91d6463f48ab180f5ebedc9663eddfa0587 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 17 Jun 2014 19:12:37 -0400
Subject: percpu: reorganize include/linux/percpu-defs.h

Reorganize for better readability.

* Accessor definitions are collected into one place and SMP and UP now
  define them in the same order.

* Definitions are layered when possible - e.g. per_cpu() is now
  defined in terms of this_cpu_ptr().

* Rather pointless comment dropped.

* per_cpu(), __raw_get_cpu_var() and __get_cpu_var() are defined in a
  way which can be shared between SMP and UP and moved out of
  CONFIG_SMP blocks.

This patch doesn't introduce any functional difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Christoph Lameter <cl@linux-foundation.org>
---
 include/linux/percpu-defs.h | 32 +++++++++-----------------------
 1 file changed, 9 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index f782f98004db..94cd90afadac 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -215,15 +215,8 @@
 	RELOC_HIDE((typeof(*(__p)) __kernel __force *)(__p), (__offset)); \
 })
 
-/*
- * A percpu variable may point to a discarded regions. The following are
- * established ways to produce a usable pointer from the percpu variable
- * offset.
- */
-#define per_cpu(var, cpu) \
-	(*SHIFT_PERCPU_PTR(&(var), per_cpu_offset(cpu)))
-
-#define raw_cpu_ptr(ptr) arch_raw_cpu_ptr(ptr)
+#define per_cpu_ptr(ptr, cpu)	SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu)))
+#define raw_cpu_ptr(ptr)	arch_raw_cpu_ptr(ptr)
 
 #ifdef CONFIG_DEBUG_PREEMPT
 #define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset)
@@ -231,9 +224,6 @@
 #define this_cpu_ptr(ptr) raw_cpu_ptr(ptr)
 #endif
 
-#define __get_cpu_var(var) (*this_cpu_ptr(&(var)))
-#define __raw_get_cpu_var(var) (*raw_cpu_ptr(&(var)))
-
 #else	/* CONFIG_SMP */
 
 #define VERIFY_PERCPU_PTR(__p) ({			\
@@ -241,14 +231,16 @@
 	(typeof(*(__p)) __kernel __force *)(__p);	\
 })
 
-#define per_cpu(var, cpu)	(*((void)(cpu), VERIFY_PERCPU_PTR(&(var))))
-#define __get_cpu_var(var)	(*VERIFY_PERCPU_PTR(&(var)))
-#define __raw_get_cpu_var(var)	(*VERIFY_PERCPU_PTR(&(var)))
-#define this_cpu_ptr(ptr)	per_cpu_ptr(ptr, 0)
-#define raw_cpu_ptr(ptr)	this_cpu_ptr(ptr)
+#define per_cpu_ptr(ptr, cpu)	({ (void)(cpu); VERIFY_PERCPU_PTR((ptr)); })
+#define raw_cpu_ptr(ptr)	per_cpu_ptr(ptr, 0)
+#define this_cpu_ptr(ptr)	raw_cpu_ptr(ptr)
 
 #endif	/* CONFIG_SMP */
 
+#define per_cpu(var, cpu)	(*per_cpu_ptr(&(var), cpu))
+#define __raw_get_cpu_var(var)	(*raw_cpu_ptr(&(var)))
+#define __get_cpu_var(var)	(*this_cpu_ptr(&(var)))
+
 /* keep until we have removed all uses of __this_cpu_ptr */
 #define __this_cpu_ptr(ptr)	raw_cpu_ptr(ptr)
 
@@ -278,11 +270,5 @@
 	preempt_enable();				\
 } while (0)
 
-#ifdef CONFIG_SMP
-#define per_cpu_ptr(ptr, cpu)	SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu)))
-#else
-#define per_cpu_ptr(ptr, cpu)	({ (void)(cpu); VERIFY_PERCPU_PTR((ptr)); })
-#endif
-
 #endif /* __ASSEMBLY__ */
 #endif /* _LINUX_PERCPU_DEFS_H */
-- 
cgit v1.2.3-59-g8ed1b


From dcba4333683c3a0642fd575e475c6c740122a037 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 17 Jun 2014 19:12:39 -0400
Subject: percpu: only allow sized arch overrides for {raw|this}_cpu_*() ops

Currently, percpu allows two separate methods for overriding
{raw|this}_cpu_*() ops - for a given operation, an arch can provide
whole replacement or sized sub operations to override specific parts
of it.  e.g. arch either can provide this_cpu_add() or
this_cpu_add_4() to override only the 4 byte operation.

While quite flexible on a glance, the dual-overriding scheme
complicates the code path for no actual gain.  It compilcates the
already complex operation definitions and if an arch wants to override
all sizes, it can easily provide all variants anyway.  In fact, no
arch is actually making use of whole operation override.

Another oddity is that __this_cpu_*() operations are defined in the
same way as raw_cpu_*() but ignores full overrides of the raw_cpu_*()
and doesn't allow full operation override, so if an arch provides
whole overrides for raw_cpu_*() operations __this_cpu_*() ends up
using the generic implementations.

More importantly, it takes away the layering between arch-specific and
generic parts making it impossible for the generic part to implement
arch-independent features on top of arch-specific overrides.

This patch removes the support for whole operation overrides.  As no
arch is using it, this doesn't cause any actual difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Christoph Lameter <cl@linux.com>
---
 include/linux/percpu.h | 94 +++-----------------------------------------------
 1 file changed, 5 insertions(+), 89 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 97b207990c45..95d380e5d246 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -226,17 +226,11 @@ do {									\
  * safe. Interrupts may occur. If the interrupt modifies the variable
  * too then RMW actions will not be reliable.
  *
- * The arch code can provide optimized functions in two ways:
- *
- * 1. Override the function completely. F.e. define this_cpu_add().
- *    The arch must then ensure that the various scalar format passed
- *    are handled correctly.
- *
- * 2. Provide functions for certain scalar sizes. F.e. provide
- *    this_cpu_add_2() to provide per cpu atomic operations for 2 byte
- *    sized RMW actions. If arch code does not provide operations for
- *    a scalar size then the fallback in the generic code will be
- *    used.
+ * The arch code can provide optimized implementation by defining macros
+ * for certain scalar sizes. F.e. provide this_cpu_add_2() to provide per
+ * cpu atomic operations for 2 byte sized RMW actions. If arch code does
+ * not provide operations for a scalar size then the fallback in the
+ * generic code will be used.
  */
 
 #define _this_cpu_generic_read(pcp)					\
@@ -247,7 +241,6 @@ do {									\
 	ret__;								\
 })
 
-#ifndef this_cpu_read
 # ifndef this_cpu_read_1
 #  define this_cpu_read_1(pcp)	_this_cpu_generic_read(pcp)
 # endif
@@ -261,7 +254,6 @@ do {									\
 #  define this_cpu_read_8(pcp)	_this_cpu_generic_read(pcp)
 # endif
 # define this_cpu_read(pcp)	__pcpu_size_call_return(this_cpu_read_, (pcp))
-#endif
 
 #define _this_cpu_generic_to_op(pcp, val, op)				\
 do {									\
@@ -271,7 +263,6 @@ do {									\
 	raw_local_irq_restore(flags);					\
 } while (0)
 
-#ifndef this_cpu_write
 # ifndef this_cpu_write_1
 #  define this_cpu_write_1(pcp, val)	_this_cpu_generic_to_op((pcp), (val), =)
 # endif
@@ -285,9 +276,7 @@ do {									\
 #  define this_cpu_write_8(pcp, val)	_this_cpu_generic_to_op((pcp), (val), =)
 # endif
 # define this_cpu_write(pcp, val)	__pcpu_size_call(this_cpu_write_, (pcp), (val))
-#endif
 
-#ifndef this_cpu_add
 # ifndef this_cpu_add_1
 #  define this_cpu_add_1(pcp, val)	_this_cpu_generic_to_op((pcp), (val), +=)
 # endif
@@ -301,21 +290,11 @@ do {									\
 #  define this_cpu_add_8(pcp, val)	_this_cpu_generic_to_op((pcp), (val), +=)
 # endif
 # define this_cpu_add(pcp, val)		__pcpu_size_call(this_cpu_add_, (pcp), (val))
-#endif
 
-#ifndef this_cpu_sub
 # define this_cpu_sub(pcp, val)		this_cpu_add((pcp), -(typeof(pcp))(val))
-#endif
-
-#ifndef this_cpu_inc
 # define this_cpu_inc(pcp)		this_cpu_add((pcp), 1)
-#endif
-
-#ifndef this_cpu_dec
 # define this_cpu_dec(pcp)		this_cpu_sub((pcp), 1)
-#endif
 
-#ifndef this_cpu_and
 # ifndef this_cpu_and_1
 #  define this_cpu_and_1(pcp, val)	_this_cpu_generic_to_op((pcp), (val), &=)
 # endif
@@ -329,9 +308,7 @@ do {									\
 #  define this_cpu_and_8(pcp, val)	_this_cpu_generic_to_op((pcp), (val), &=)
 # endif
 # define this_cpu_and(pcp, val)		__pcpu_size_call(this_cpu_and_, (pcp), (val))
-#endif
 
-#ifndef this_cpu_or
 # ifndef this_cpu_or_1
 #  define this_cpu_or_1(pcp, val)	_this_cpu_generic_to_op((pcp), (val), |=)
 # endif
@@ -345,7 +322,6 @@ do {									\
 #  define this_cpu_or_8(pcp, val)	_this_cpu_generic_to_op((pcp), (val), |=)
 # endif
 # define this_cpu_or(pcp, val)		__pcpu_size_call(this_cpu_or_, (pcp), (val))
-#endif
 
 #define _this_cpu_generic_add_return(pcp, val)				\
 ({									\
@@ -358,7 +334,6 @@ do {									\
 	ret__;								\
 })
 
-#ifndef this_cpu_add_return
 # ifndef this_cpu_add_return_1
 #  define this_cpu_add_return_1(pcp, val)	_this_cpu_generic_add_return(pcp, val)
 # endif
@@ -372,7 +347,6 @@ do {									\
 #  define this_cpu_add_return_8(pcp, val)	_this_cpu_generic_add_return(pcp, val)
 # endif
 # define this_cpu_add_return(pcp, val)	__pcpu_size_call_return2(this_cpu_add_return_, pcp, val)
-#endif
 
 #define this_cpu_sub_return(pcp, val)	this_cpu_add_return(pcp, -(typeof(pcp))(val))
 #define this_cpu_inc_return(pcp)	this_cpu_add_return(pcp, 1)
@@ -388,7 +362,6 @@ do {									\
 	ret__;								\
 })
 
-#ifndef this_cpu_xchg
 # ifndef this_cpu_xchg_1
 #  define this_cpu_xchg_1(pcp, nval)	_this_cpu_generic_xchg(pcp, nval)
 # endif
@@ -403,7 +376,6 @@ do {									\
 # endif
 # define this_cpu_xchg(pcp, nval)	\
 	__pcpu_size_call_return2(this_cpu_xchg_, (pcp), nval)
-#endif
 
 #define _this_cpu_generic_cmpxchg(pcp, oval, nval)			\
 ({									\
@@ -417,7 +389,6 @@ do {									\
 	ret__;								\
 })
 
-#ifndef this_cpu_cmpxchg
 # ifndef this_cpu_cmpxchg_1
 #  define this_cpu_cmpxchg_1(pcp, oval, nval)	_this_cpu_generic_cmpxchg(pcp, oval, nval)
 # endif
@@ -432,7 +403,6 @@ do {									\
 # endif
 # define this_cpu_cmpxchg(pcp, oval, nval)	\
 	__pcpu_size_call_return2(this_cpu_cmpxchg_, pcp, oval, nval)
-#endif
 
 /*
  * cmpxchg_double replaces two adjacent scalars at once.  The first
@@ -453,7 +423,6 @@ do {									\
 	ret__;								\
 })
 
-#ifndef this_cpu_cmpxchg_double
 # ifndef this_cpu_cmpxchg_double_1
 #  define this_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
 	_this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
@@ -472,7 +441,6 @@ do {									\
 # endif
 # define this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
 	__pcpu_double_call_return_bool(this_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2))
-#endif
 
 /*
  * Generic percpu operations for contexts where we do not want to do
@@ -484,7 +452,6 @@ do {									\
  * or an interrupt occurred and the same percpu variable was modified from
  * the interrupt context.
  */
-#ifndef raw_cpu_read
 # ifndef raw_cpu_read_1
 #  define raw_cpu_read_1(pcp)	(*raw_cpu_ptr(&(pcp)))
 # endif
@@ -498,15 +465,12 @@ do {									\
 #  define raw_cpu_read_8(pcp)	(*raw_cpu_ptr(&(pcp)))
 # endif
 # define raw_cpu_read(pcp)	__pcpu_size_call_return(raw_cpu_read_, (pcp))
-#endif
 
 #define raw_cpu_generic_to_op(pcp, val, op)				\
 do {									\
 	*raw_cpu_ptr(&(pcp)) op val;					\
 } while (0)
 
-
-#ifndef raw_cpu_write
 # ifndef raw_cpu_write_1
 #  define raw_cpu_write_1(pcp, val)	raw_cpu_generic_to_op((pcp), (val), =)
 # endif
@@ -520,9 +484,7 @@ do {									\
 #  define raw_cpu_write_8(pcp, val)	raw_cpu_generic_to_op((pcp), (val), =)
 # endif
 # define raw_cpu_write(pcp, val)	__pcpu_size_call(raw_cpu_write_, (pcp), (val))
-#endif
 
-#ifndef raw_cpu_add
 # ifndef raw_cpu_add_1
 #  define raw_cpu_add_1(pcp, val)	raw_cpu_generic_to_op((pcp), (val), +=)
 # endif
@@ -536,21 +498,13 @@ do {									\
 #  define raw_cpu_add_8(pcp, val)	raw_cpu_generic_to_op((pcp), (val), +=)
 # endif
 # define raw_cpu_add(pcp, val)	__pcpu_size_call(raw_cpu_add_, (pcp), (val))
-#endif
 
-#ifndef raw_cpu_sub
 # define raw_cpu_sub(pcp, val)	raw_cpu_add((pcp), -(val))
-#endif
 
-#ifndef raw_cpu_inc
 # define raw_cpu_inc(pcp)		raw_cpu_add((pcp), 1)
-#endif
 
-#ifndef raw_cpu_dec
 # define raw_cpu_dec(pcp)		raw_cpu_sub((pcp), 1)
-#endif
 
-#ifndef raw_cpu_and
 # ifndef raw_cpu_and_1
 #  define raw_cpu_and_1(pcp, val)	raw_cpu_generic_to_op((pcp), (val), &=)
 # endif
@@ -564,9 +518,7 @@ do {									\
 #  define raw_cpu_and_8(pcp, val)	raw_cpu_generic_to_op((pcp), (val), &=)
 # endif
 # define raw_cpu_and(pcp, val)	__pcpu_size_call(raw_cpu_and_, (pcp), (val))
-#endif
 
-#ifndef raw_cpu_or
 # ifndef raw_cpu_or_1
 #  define raw_cpu_or_1(pcp, val)	raw_cpu_generic_to_op((pcp), (val), |=)
 # endif
@@ -580,7 +532,6 @@ do {									\
 #  define raw_cpu_or_8(pcp, val)	raw_cpu_generic_to_op((pcp), (val), |=)
 # endif
 # define raw_cpu_or(pcp, val)	__pcpu_size_call(raw_cpu_or_, (pcp), (val))
-#endif
 
 #define raw_cpu_generic_add_return(pcp, val)				\
 ({									\
@@ -588,7 +539,6 @@ do {									\
 	raw_cpu_read(pcp);						\
 })
 
-#ifndef raw_cpu_add_return
 # ifndef raw_cpu_add_return_1
 #  define raw_cpu_add_return_1(pcp, val)	raw_cpu_generic_add_return(pcp, val)
 # endif
@@ -603,7 +553,6 @@ do {									\
 # endif
 # define raw_cpu_add_return(pcp, val)	\
 	__pcpu_size_call_return2(raw_cpu_add_return_, pcp, val)
-#endif
 
 #define raw_cpu_sub_return(pcp, val)	raw_cpu_add_return(pcp, -(typeof(pcp))(val))
 #define raw_cpu_inc_return(pcp)	raw_cpu_add_return(pcp, 1)
@@ -616,7 +565,6 @@ do {									\
 	ret__;								\
 })
 
-#ifndef raw_cpu_xchg
 # ifndef raw_cpu_xchg_1
 #  define raw_cpu_xchg_1(pcp, nval)	raw_cpu_generic_xchg(pcp, nval)
 # endif
@@ -631,7 +579,6 @@ do {									\
 # endif
 # define raw_cpu_xchg(pcp, nval)	\
 	__pcpu_size_call_return2(raw_cpu_xchg_, (pcp), nval)
-#endif
 
 #define raw_cpu_generic_cmpxchg(pcp, oval, nval)			\
 ({									\
@@ -642,7 +589,6 @@ do {									\
 	ret__;								\
 })
 
-#ifndef raw_cpu_cmpxchg
 # ifndef raw_cpu_cmpxchg_1
 #  define raw_cpu_cmpxchg_1(pcp, oval, nval)	raw_cpu_generic_cmpxchg(pcp, oval, nval)
 # endif
@@ -657,7 +603,6 @@ do {									\
 # endif
 # define raw_cpu_cmpxchg(pcp, oval, nval)	\
 	__pcpu_size_call_return2(raw_cpu_cmpxchg_, pcp, oval, nval)
-#endif
 
 #define raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
 ({									\
@@ -671,7 +616,6 @@ do {									\
 	(__ret);							\
 })
 
-#ifndef raw_cpu_cmpxchg_double
 # ifndef raw_cpu_cmpxchg_double_1
 #  define raw_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
 	raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
@@ -690,79 +634,51 @@ do {									\
 # endif
 # define raw_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
 	__pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2))
-#endif
 
 /*
  * Generic percpu operations for context that are safe from preemption/interrupts.
  */
-#ifndef __this_cpu_read
 # define __this_cpu_read(pcp) \
 	(__this_cpu_preempt_check("read"),__pcpu_size_call_return(raw_cpu_read_, (pcp)))
-#endif
 
-#ifndef __this_cpu_write
 # define __this_cpu_write(pcp, val)					\
 do { __this_cpu_preempt_check("write");					\
      __pcpu_size_call(raw_cpu_write_, (pcp), (val));			\
 } while (0)
-#endif
 
-#ifndef __this_cpu_add
 # define __this_cpu_add(pcp, val)					 \
 do { __this_cpu_preempt_check("add");					\
 	__pcpu_size_call(raw_cpu_add_, (pcp), (val));			\
 } while (0)
-#endif
 
-#ifndef __this_cpu_sub
 # define __this_cpu_sub(pcp, val)	__this_cpu_add((pcp), -(typeof(pcp))(val))
-#endif
-
-#ifndef __this_cpu_inc
 # define __this_cpu_inc(pcp)		__this_cpu_add((pcp), 1)
-#endif
-
-#ifndef __this_cpu_dec
 # define __this_cpu_dec(pcp)		__this_cpu_sub((pcp), 1)
-#endif
 
-#ifndef __this_cpu_and
 # define __this_cpu_and(pcp, val)					\
 do { __this_cpu_preempt_check("and");					\
 	__pcpu_size_call(raw_cpu_and_, (pcp), (val));			\
 } while (0)
 
-#endif
-
-#ifndef __this_cpu_or
 # define __this_cpu_or(pcp, val)					\
 do { __this_cpu_preempt_check("or");					\
 	__pcpu_size_call(raw_cpu_or_, (pcp), (val));			\
 } while (0)
-#endif
 
-#ifndef __this_cpu_add_return
 # define __this_cpu_add_return(pcp, val)	\
 	(__this_cpu_preempt_check("add_return"),__pcpu_size_call_return2(raw_cpu_add_return_, pcp, val))
-#endif
 
 #define __this_cpu_sub_return(pcp, val)	__this_cpu_add_return(pcp, -(typeof(pcp))(val))
 #define __this_cpu_inc_return(pcp)	__this_cpu_add_return(pcp, 1)
 #define __this_cpu_dec_return(pcp)	__this_cpu_add_return(pcp, -1)
 
-#ifndef __this_cpu_xchg
 # define __this_cpu_xchg(pcp, nval)	\
 	(__this_cpu_preempt_check("xchg"),__pcpu_size_call_return2(raw_cpu_xchg_, (pcp), nval))
-#endif
 
-#ifndef __this_cpu_cmpxchg
 # define __this_cpu_cmpxchg(pcp, oval, nval)	\
 	(__this_cpu_preempt_check("cmpxchg"),__pcpu_size_call_return2(raw_cpu_cmpxchg_, pcp, oval, nval))
-#endif
 
-#ifndef __this_cpu_cmpxchg_double
 # define __this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
 	(__this_cpu_preempt_check("cmpxchg_double"),__pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2)))
-#endif
 
 #endif /* __LINUX_PERCPU_H */
-- 
cgit v1.2.3-59-g8ed1b


From 47b69ad673d9aa53c1d6032a6a522fc0ce8d6fc1 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 17 Jun 2014 19:12:39 -0400
Subject: percpu: move generic {raw|this}_cpu_*_N() definitions to
 include/asm-generic/percpu.h

{raw|this}_cpu_*_N() operations are expected to be provided by archs
and the generic definitions are provided as fallbacks.  As such, these
firmly belong to include/asm-generic/percpu.h.

Move the generic definitions to include/asm-generic/percpu.h.  The
code is moved mostly verbatim; however, raw_cpu_*_N() are placed above
this_cpu_*_N() which is more conventional as the raw operations may be
used to defined other variants.

This is pure reorganization.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Christoph Lameter <cl@linux.com>
---
 include/asm-generic/percpu.h | 341 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/percpu.h       | 344 -------------------------------------------
 2 files changed, 341 insertions(+), 344 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index e5ace4d49084..932ce602128f 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -65,4 +65,345 @@ extern void setup_per_cpu_areas(void);
 #define PER_CPU_DEF_ATTRIBUTES
 #endif
 
+# ifndef raw_cpu_read_1
+#  define raw_cpu_read_1(pcp)	(*raw_cpu_ptr(&(pcp)))
+# endif
+# ifndef raw_cpu_read_2
+#  define raw_cpu_read_2(pcp)	(*raw_cpu_ptr(&(pcp)))
+# endif
+# ifndef raw_cpu_read_4
+#  define raw_cpu_read_4(pcp)	(*raw_cpu_ptr(&(pcp)))
+# endif
+# ifndef raw_cpu_read_8
+#  define raw_cpu_read_8(pcp)	(*raw_cpu_ptr(&(pcp)))
+# endif
+
+#define raw_cpu_generic_to_op(pcp, val, op)				\
+do {									\
+	*raw_cpu_ptr(&(pcp)) op val;					\
+} while (0)
+
+# ifndef raw_cpu_write_1
+#  define raw_cpu_write_1(pcp, val)	raw_cpu_generic_to_op((pcp), (val), =)
+# endif
+# ifndef raw_cpu_write_2
+#  define raw_cpu_write_2(pcp, val)	raw_cpu_generic_to_op((pcp), (val), =)
+# endif
+# ifndef raw_cpu_write_4
+#  define raw_cpu_write_4(pcp, val)	raw_cpu_generic_to_op((pcp), (val), =)
+# endif
+# ifndef raw_cpu_write_8
+#  define raw_cpu_write_8(pcp, val)	raw_cpu_generic_to_op((pcp), (val), =)
+# endif
+
+# ifndef raw_cpu_add_1
+#  define raw_cpu_add_1(pcp, val)	raw_cpu_generic_to_op((pcp), (val), +=)
+# endif
+# ifndef raw_cpu_add_2
+#  define raw_cpu_add_2(pcp, val)	raw_cpu_generic_to_op((pcp), (val), +=)
+# endif
+# ifndef raw_cpu_add_4
+#  define raw_cpu_add_4(pcp, val)	raw_cpu_generic_to_op((pcp), (val), +=)
+# endif
+# ifndef raw_cpu_add_8
+#  define raw_cpu_add_8(pcp, val)	raw_cpu_generic_to_op((pcp), (val), +=)
+# endif
+
+# ifndef raw_cpu_and_1
+#  define raw_cpu_and_1(pcp, val)	raw_cpu_generic_to_op((pcp), (val), &=)
+# endif
+# ifndef raw_cpu_and_2
+#  define raw_cpu_and_2(pcp, val)	raw_cpu_generic_to_op((pcp), (val), &=)
+# endif
+# ifndef raw_cpu_and_4
+#  define raw_cpu_and_4(pcp, val)	raw_cpu_generic_to_op((pcp), (val), &=)
+# endif
+# ifndef raw_cpu_and_8
+#  define raw_cpu_and_8(pcp, val)	raw_cpu_generic_to_op((pcp), (val), &=)
+# endif
+
+# ifndef raw_cpu_or_1
+#  define raw_cpu_or_1(pcp, val)	raw_cpu_generic_to_op((pcp), (val), |=)
+# endif
+# ifndef raw_cpu_or_2
+#  define raw_cpu_or_2(pcp, val)	raw_cpu_generic_to_op((pcp), (val), |=)
+# endif
+# ifndef raw_cpu_or_4
+#  define raw_cpu_or_4(pcp, val)	raw_cpu_generic_to_op((pcp), (val), |=)
+# endif
+# ifndef raw_cpu_or_8
+#  define raw_cpu_or_8(pcp, val)	raw_cpu_generic_to_op((pcp), (val), |=)
+# endif
+
+#define raw_cpu_generic_add_return(pcp, val)				\
+({									\
+	raw_cpu_add(pcp, val);						\
+	raw_cpu_read(pcp);						\
+})
+
+# ifndef raw_cpu_add_return_1
+#  define raw_cpu_add_return_1(pcp, val)	raw_cpu_generic_add_return(pcp, val)
+# endif
+# ifndef raw_cpu_add_return_2
+#  define raw_cpu_add_return_2(pcp, val)	raw_cpu_generic_add_return(pcp, val)
+# endif
+# ifndef raw_cpu_add_return_4
+#  define raw_cpu_add_return_4(pcp, val)	raw_cpu_generic_add_return(pcp, val)
+# endif
+# ifndef raw_cpu_add_return_8
+#  define raw_cpu_add_return_8(pcp, val)	raw_cpu_generic_add_return(pcp, val)
+# endif
+
+#define raw_cpu_generic_xchg(pcp, nval)					\
+({	typeof(pcp) ret__;						\
+	ret__ = raw_cpu_read(pcp);					\
+	raw_cpu_write(pcp, nval);					\
+	ret__;								\
+})
+
+# ifndef raw_cpu_xchg_1
+#  define raw_cpu_xchg_1(pcp, nval)	raw_cpu_generic_xchg(pcp, nval)
+# endif
+# ifndef raw_cpu_xchg_2
+#  define raw_cpu_xchg_2(pcp, nval)	raw_cpu_generic_xchg(pcp, nval)
+# endif
+# ifndef raw_cpu_xchg_4
+#  define raw_cpu_xchg_4(pcp, nval)	raw_cpu_generic_xchg(pcp, nval)
+# endif
+# ifndef raw_cpu_xchg_8
+#  define raw_cpu_xchg_8(pcp, nval)	raw_cpu_generic_xchg(pcp, nval)
+# endif
+
+#define raw_cpu_generic_cmpxchg(pcp, oval, nval)			\
+({									\
+	typeof(pcp) ret__;						\
+	ret__ = raw_cpu_read(pcp);					\
+	if (ret__ == (oval))						\
+		raw_cpu_write(pcp, nval);				\
+	ret__;								\
+})
+
+# ifndef raw_cpu_cmpxchg_1
+#  define raw_cpu_cmpxchg_1(pcp, oval, nval)	raw_cpu_generic_cmpxchg(pcp, oval, nval)
+# endif
+# ifndef raw_cpu_cmpxchg_2
+#  define raw_cpu_cmpxchg_2(pcp, oval, nval)	raw_cpu_generic_cmpxchg(pcp, oval, nval)
+# endif
+# ifndef raw_cpu_cmpxchg_4
+#  define raw_cpu_cmpxchg_4(pcp, oval, nval)	raw_cpu_generic_cmpxchg(pcp, oval, nval)
+# endif
+# ifndef raw_cpu_cmpxchg_8
+#  define raw_cpu_cmpxchg_8(pcp, oval, nval)	raw_cpu_generic_cmpxchg(pcp, oval, nval)
+# endif
+
+#define raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
+({									\
+	int __ret = 0;							\
+	if (raw_cpu_read(pcp1) == (oval1) &&				\
+			 raw_cpu_read(pcp2)  == (oval2)) {		\
+		raw_cpu_write(pcp1, (nval1));				\
+		raw_cpu_write(pcp2, (nval2));				\
+		__ret = 1;						\
+	}								\
+	(__ret);							\
+})
+
+# ifndef raw_cpu_cmpxchg_double_1
+#  define raw_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
+	raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
+# endif
+# ifndef raw_cpu_cmpxchg_double_2
+#  define raw_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
+	raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
+# endif
+# ifndef raw_cpu_cmpxchg_double_4
+#  define raw_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
+	raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
+# endif
+# ifndef raw_cpu_cmpxchg_double_8
+#  define raw_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
+	raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
+# endif
+
+#define _this_cpu_generic_read(pcp)					\
+({	typeof(pcp) ret__;						\
+	preempt_disable();						\
+	ret__ = *this_cpu_ptr(&(pcp));					\
+	preempt_enable();						\
+	ret__;								\
+})
+
+# ifndef this_cpu_read_1
+#  define this_cpu_read_1(pcp)	_this_cpu_generic_read(pcp)
+# endif
+# ifndef this_cpu_read_2
+#  define this_cpu_read_2(pcp)	_this_cpu_generic_read(pcp)
+# endif
+# ifndef this_cpu_read_4
+#  define this_cpu_read_4(pcp)	_this_cpu_generic_read(pcp)
+# endif
+# ifndef this_cpu_read_8
+#  define this_cpu_read_8(pcp)	_this_cpu_generic_read(pcp)
+# endif
+
+#define _this_cpu_generic_to_op(pcp, val, op)				\
+do {									\
+	unsigned long flags;						\
+	raw_local_irq_save(flags);					\
+	*raw_cpu_ptr(&(pcp)) op val;					\
+	raw_local_irq_restore(flags);					\
+} while (0)
+
+# ifndef this_cpu_write_1
+#  define this_cpu_write_1(pcp, val)	_this_cpu_generic_to_op((pcp), (val), =)
+# endif
+# ifndef this_cpu_write_2
+#  define this_cpu_write_2(pcp, val)	_this_cpu_generic_to_op((pcp), (val), =)
+# endif
+# ifndef this_cpu_write_4
+#  define this_cpu_write_4(pcp, val)	_this_cpu_generic_to_op((pcp), (val), =)
+# endif
+# ifndef this_cpu_write_8
+#  define this_cpu_write_8(pcp, val)	_this_cpu_generic_to_op((pcp), (val), =)
+# endif
+
+# ifndef this_cpu_add_1
+#  define this_cpu_add_1(pcp, val)	_this_cpu_generic_to_op((pcp), (val), +=)
+# endif
+# ifndef this_cpu_add_2
+#  define this_cpu_add_2(pcp, val)	_this_cpu_generic_to_op((pcp), (val), +=)
+# endif
+# ifndef this_cpu_add_4
+#  define this_cpu_add_4(pcp, val)	_this_cpu_generic_to_op((pcp), (val), +=)
+# endif
+# ifndef this_cpu_add_8
+#  define this_cpu_add_8(pcp, val)	_this_cpu_generic_to_op((pcp), (val), +=)
+# endif
+
+# ifndef this_cpu_and_1
+#  define this_cpu_and_1(pcp, val)	_this_cpu_generic_to_op((pcp), (val), &=)
+# endif
+# ifndef this_cpu_and_2
+#  define this_cpu_and_2(pcp, val)	_this_cpu_generic_to_op((pcp), (val), &=)
+# endif
+# ifndef this_cpu_and_4
+#  define this_cpu_and_4(pcp, val)	_this_cpu_generic_to_op((pcp), (val), &=)
+# endif
+# ifndef this_cpu_and_8
+#  define this_cpu_and_8(pcp, val)	_this_cpu_generic_to_op((pcp), (val), &=)
+# endif
+
+# ifndef this_cpu_or_1
+#  define this_cpu_or_1(pcp, val)	_this_cpu_generic_to_op((pcp), (val), |=)
+# endif
+# ifndef this_cpu_or_2
+#  define this_cpu_or_2(pcp, val)	_this_cpu_generic_to_op((pcp), (val), |=)
+# endif
+# ifndef this_cpu_or_4
+#  define this_cpu_or_4(pcp, val)	_this_cpu_generic_to_op((pcp), (val), |=)
+# endif
+# ifndef this_cpu_or_8
+#  define this_cpu_or_8(pcp, val)	_this_cpu_generic_to_op((pcp), (val), |=)
+# endif
+
+#define _this_cpu_generic_add_return(pcp, val)				\
+({									\
+	typeof(pcp) ret__;						\
+	unsigned long flags;						\
+	raw_local_irq_save(flags);					\
+	raw_cpu_add(pcp, val);					\
+	ret__ = raw_cpu_read(pcp);					\
+	raw_local_irq_restore(flags);					\
+	ret__;								\
+})
+
+# ifndef this_cpu_add_return_1
+#  define this_cpu_add_return_1(pcp, val)	_this_cpu_generic_add_return(pcp, val)
+# endif
+# ifndef this_cpu_add_return_2
+#  define this_cpu_add_return_2(pcp, val)	_this_cpu_generic_add_return(pcp, val)
+# endif
+# ifndef this_cpu_add_return_4
+#  define this_cpu_add_return_4(pcp, val)	_this_cpu_generic_add_return(pcp, val)
+# endif
+# ifndef this_cpu_add_return_8
+#  define this_cpu_add_return_8(pcp, val)	_this_cpu_generic_add_return(pcp, val)
+# endif
+
+#define _this_cpu_generic_xchg(pcp, nval)				\
+({	typeof(pcp) ret__;						\
+	unsigned long flags;						\
+	raw_local_irq_save(flags);					\
+	ret__ = raw_cpu_read(pcp);					\
+	raw_cpu_write(pcp, nval);					\
+	raw_local_irq_restore(flags);					\
+	ret__;								\
+})
+
+# ifndef this_cpu_xchg_1
+#  define this_cpu_xchg_1(pcp, nval)	_this_cpu_generic_xchg(pcp, nval)
+# endif
+# ifndef this_cpu_xchg_2
+#  define this_cpu_xchg_2(pcp, nval)	_this_cpu_generic_xchg(pcp, nval)
+# endif
+# ifndef this_cpu_xchg_4
+#  define this_cpu_xchg_4(pcp, nval)	_this_cpu_generic_xchg(pcp, nval)
+# endif
+# ifndef this_cpu_xchg_8
+#  define this_cpu_xchg_8(pcp, nval)	_this_cpu_generic_xchg(pcp, nval)
+# endif
+
+#define _this_cpu_generic_cmpxchg(pcp, oval, nval)			\
+({									\
+	typeof(pcp) ret__;						\
+	unsigned long flags;						\
+	raw_local_irq_save(flags);					\
+	ret__ = raw_cpu_read(pcp);					\
+	if (ret__ == (oval))						\
+		raw_cpu_write(pcp, nval);				\
+	raw_local_irq_restore(flags);					\
+	ret__;								\
+})
+
+# ifndef this_cpu_cmpxchg_1
+#  define this_cpu_cmpxchg_1(pcp, oval, nval)	_this_cpu_generic_cmpxchg(pcp, oval, nval)
+# endif
+# ifndef this_cpu_cmpxchg_2
+#  define this_cpu_cmpxchg_2(pcp, oval, nval)	_this_cpu_generic_cmpxchg(pcp, oval, nval)
+# endif
+# ifndef this_cpu_cmpxchg_4
+#  define this_cpu_cmpxchg_4(pcp, oval, nval)	_this_cpu_generic_cmpxchg(pcp, oval, nval)
+# endif
+# ifndef this_cpu_cmpxchg_8
+#  define this_cpu_cmpxchg_8(pcp, oval, nval)	_this_cpu_generic_cmpxchg(pcp, oval, nval)
+# endif
+
+#define _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
+({									\
+	int ret__;							\
+	unsigned long flags;						\
+	raw_local_irq_save(flags);					\
+	ret__ = raw_cpu_generic_cmpxchg_double(pcp1, pcp2,		\
+			oval1, oval2, nval1, nval2);			\
+	raw_local_irq_restore(flags);					\
+	ret__;								\
+})
+
+# ifndef this_cpu_cmpxchg_double_1
+#  define this_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
+	_this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
+# endif
+# ifndef this_cpu_cmpxchg_double_2
+#  define this_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
+	_this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
+# endif
+# ifndef this_cpu_cmpxchg_double_4
+#  define this_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
+	_this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
+# endif
+# ifndef this_cpu_cmpxchg_double_8
+#  define this_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
+	_this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
+# endif
+
 #endif /* _ASM_GENERIC_PERCPU_H_ */
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 95d380e5d246..20b953532596 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -233,174 +233,20 @@ do {									\
  * generic code will be used.
  */
 
-#define _this_cpu_generic_read(pcp)					\
-({	typeof(pcp) ret__;						\
-	preempt_disable();						\
-	ret__ = *this_cpu_ptr(&(pcp));					\
-	preempt_enable();						\
-	ret__;								\
-})
-
-# ifndef this_cpu_read_1
-#  define this_cpu_read_1(pcp)	_this_cpu_generic_read(pcp)
-# endif
-# ifndef this_cpu_read_2
-#  define this_cpu_read_2(pcp)	_this_cpu_generic_read(pcp)
-# endif
-# ifndef this_cpu_read_4
-#  define this_cpu_read_4(pcp)	_this_cpu_generic_read(pcp)
-# endif
-# ifndef this_cpu_read_8
-#  define this_cpu_read_8(pcp)	_this_cpu_generic_read(pcp)
-# endif
 # define this_cpu_read(pcp)	__pcpu_size_call_return(this_cpu_read_, (pcp))
-
-#define _this_cpu_generic_to_op(pcp, val, op)				\
-do {									\
-	unsigned long flags;						\
-	raw_local_irq_save(flags);					\
-	*raw_cpu_ptr(&(pcp)) op val;					\
-	raw_local_irq_restore(flags);					\
-} while (0)
-
-# ifndef this_cpu_write_1
-#  define this_cpu_write_1(pcp, val)	_this_cpu_generic_to_op((pcp), (val), =)
-# endif
-# ifndef this_cpu_write_2
-#  define this_cpu_write_2(pcp, val)	_this_cpu_generic_to_op((pcp), (val), =)
-# endif
-# ifndef this_cpu_write_4
-#  define this_cpu_write_4(pcp, val)	_this_cpu_generic_to_op((pcp), (val), =)
-# endif
-# ifndef this_cpu_write_8
-#  define this_cpu_write_8(pcp, val)	_this_cpu_generic_to_op((pcp), (val), =)
-# endif
 # define this_cpu_write(pcp, val)	__pcpu_size_call(this_cpu_write_, (pcp), (val))
-
-# ifndef this_cpu_add_1
-#  define this_cpu_add_1(pcp, val)	_this_cpu_generic_to_op((pcp), (val), +=)
-# endif
-# ifndef this_cpu_add_2
-#  define this_cpu_add_2(pcp, val)	_this_cpu_generic_to_op((pcp), (val), +=)
-# endif
-# ifndef this_cpu_add_4
-#  define this_cpu_add_4(pcp, val)	_this_cpu_generic_to_op((pcp), (val), +=)
-# endif
-# ifndef this_cpu_add_8
-#  define this_cpu_add_8(pcp, val)	_this_cpu_generic_to_op((pcp), (val), +=)
-# endif
 # define this_cpu_add(pcp, val)		__pcpu_size_call(this_cpu_add_, (pcp), (val))
-
 # define this_cpu_sub(pcp, val)		this_cpu_add((pcp), -(typeof(pcp))(val))
 # define this_cpu_inc(pcp)		this_cpu_add((pcp), 1)
 # define this_cpu_dec(pcp)		this_cpu_sub((pcp), 1)
-
-# ifndef this_cpu_and_1
-#  define this_cpu_and_1(pcp, val)	_this_cpu_generic_to_op((pcp), (val), &=)
-# endif
-# ifndef this_cpu_and_2
-#  define this_cpu_and_2(pcp, val)	_this_cpu_generic_to_op((pcp), (val), &=)
-# endif
-# ifndef this_cpu_and_4
-#  define this_cpu_and_4(pcp, val)	_this_cpu_generic_to_op((pcp), (val), &=)
-# endif
-# ifndef this_cpu_and_8
-#  define this_cpu_and_8(pcp, val)	_this_cpu_generic_to_op((pcp), (val), &=)
-# endif
 # define this_cpu_and(pcp, val)		__pcpu_size_call(this_cpu_and_, (pcp), (val))
-
-# ifndef this_cpu_or_1
-#  define this_cpu_or_1(pcp, val)	_this_cpu_generic_to_op((pcp), (val), |=)
-# endif
-# ifndef this_cpu_or_2
-#  define this_cpu_or_2(pcp, val)	_this_cpu_generic_to_op((pcp), (val), |=)
-# endif
-# ifndef this_cpu_or_4
-#  define this_cpu_or_4(pcp, val)	_this_cpu_generic_to_op((pcp), (val), |=)
-# endif
-# ifndef this_cpu_or_8
-#  define this_cpu_or_8(pcp, val)	_this_cpu_generic_to_op((pcp), (val), |=)
-# endif
 # define this_cpu_or(pcp, val)		__pcpu_size_call(this_cpu_or_, (pcp), (val))
-
-#define _this_cpu_generic_add_return(pcp, val)				\
-({									\
-	typeof(pcp) ret__;						\
-	unsigned long flags;						\
-	raw_local_irq_save(flags);					\
-	raw_cpu_add(pcp, val);					\
-	ret__ = raw_cpu_read(pcp);					\
-	raw_local_irq_restore(flags);					\
-	ret__;								\
-})
-
-# ifndef this_cpu_add_return_1
-#  define this_cpu_add_return_1(pcp, val)	_this_cpu_generic_add_return(pcp, val)
-# endif
-# ifndef this_cpu_add_return_2
-#  define this_cpu_add_return_2(pcp, val)	_this_cpu_generic_add_return(pcp, val)
-# endif
-# ifndef this_cpu_add_return_4
-#  define this_cpu_add_return_4(pcp, val)	_this_cpu_generic_add_return(pcp, val)
-# endif
-# ifndef this_cpu_add_return_8
-#  define this_cpu_add_return_8(pcp, val)	_this_cpu_generic_add_return(pcp, val)
-# endif
 # define this_cpu_add_return(pcp, val)	__pcpu_size_call_return2(this_cpu_add_return_, pcp, val)
-
 #define this_cpu_sub_return(pcp, val)	this_cpu_add_return(pcp, -(typeof(pcp))(val))
 #define this_cpu_inc_return(pcp)	this_cpu_add_return(pcp, 1)
 #define this_cpu_dec_return(pcp)	this_cpu_add_return(pcp, -1)
-
-#define _this_cpu_generic_xchg(pcp, nval)				\
-({	typeof(pcp) ret__;						\
-	unsigned long flags;						\
-	raw_local_irq_save(flags);					\
-	ret__ = raw_cpu_read(pcp);					\
-	raw_cpu_write(pcp, nval);					\
-	raw_local_irq_restore(flags);					\
-	ret__;								\
-})
-
-# ifndef this_cpu_xchg_1
-#  define this_cpu_xchg_1(pcp, nval)	_this_cpu_generic_xchg(pcp, nval)
-# endif
-# ifndef this_cpu_xchg_2
-#  define this_cpu_xchg_2(pcp, nval)	_this_cpu_generic_xchg(pcp, nval)
-# endif
-# ifndef this_cpu_xchg_4
-#  define this_cpu_xchg_4(pcp, nval)	_this_cpu_generic_xchg(pcp, nval)
-# endif
-# ifndef this_cpu_xchg_8
-#  define this_cpu_xchg_8(pcp, nval)	_this_cpu_generic_xchg(pcp, nval)
-# endif
 # define this_cpu_xchg(pcp, nval)	\
 	__pcpu_size_call_return2(this_cpu_xchg_, (pcp), nval)
-
-#define _this_cpu_generic_cmpxchg(pcp, oval, nval)			\
-({									\
-	typeof(pcp) ret__;						\
-	unsigned long flags;						\
-	raw_local_irq_save(flags);					\
-	ret__ = raw_cpu_read(pcp);					\
-	if (ret__ == (oval))						\
-		raw_cpu_write(pcp, nval);				\
-	raw_local_irq_restore(flags);					\
-	ret__;								\
-})
-
-# ifndef this_cpu_cmpxchg_1
-#  define this_cpu_cmpxchg_1(pcp, oval, nval)	_this_cpu_generic_cmpxchg(pcp, oval, nval)
-# endif
-# ifndef this_cpu_cmpxchg_2
-#  define this_cpu_cmpxchg_2(pcp, oval, nval)	_this_cpu_generic_cmpxchg(pcp, oval, nval)
-# endif
-# ifndef this_cpu_cmpxchg_4
-#  define this_cpu_cmpxchg_4(pcp, oval, nval)	_this_cpu_generic_cmpxchg(pcp, oval, nval)
-# endif
-# ifndef this_cpu_cmpxchg_8
-#  define this_cpu_cmpxchg_8(pcp, oval, nval)	_this_cpu_generic_cmpxchg(pcp, oval, nval)
-# endif
 # define this_cpu_cmpxchg(pcp, oval, nval)	\
 	__pcpu_size_call_return2(this_cpu_cmpxchg_, pcp, oval, nval)
 
@@ -412,33 +258,6 @@ do {									\
  * very limited hardware support for these operations, so only certain
  * sizes may work.
  */
-#define _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
-({									\
-	int ret__;							\
-	unsigned long flags;						\
-	raw_local_irq_save(flags);					\
-	ret__ = raw_cpu_generic_cmpxchg_double(pcp1, pcp2,		\
-			oval1, oval2, nval1, nval2);			\
-	raw_local_irq_restore(flags);					\
-	ret__;								\
-})
-
-# ifndef this_cpu_cmpxchg_double_1
-#  define this_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
-	_this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
-# endif
-# ifndef this_cpu_cmpxchg_double_2
-#  define this_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
-	_this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
-# endif
-# ifndef this_cpu_cmpxchg_double_4
-#  define this_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
-	_this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
-# endif
-# ifndef this_cpu_cmpxchg_double_8
-#  define this_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
-	_this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
-# endif
 # define this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
 	__pcpu_double_call_return_bool(this_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2))
 
@@ -452,186 +271,23 @@ do {									\
  * or an interrupt occurred and the same percpu variable was modified from
  * the interrupt context.
  */
-# ifndef raw_cpu_read_1
-#  define raw_cpu_read_1(pcp)	(*raw_cpu_ptr(&(pcp)))
-# endif
-# ifndef raw_cpu_read_2
-#  define raw_cpu_read_2(pcp)	(*raw_cpu_ptr(&(pcp)))
-# endif
-# ifndef raw_cpu_read_4
-#  define raw_cpu_read_4(pcp)	(*raw_cpu_ptr(&(pcp)))
-# endif
-# ifndef raw_cpu_read_8
-#  define raw_cpu_read_8(pcp)	(*raw_cpu_ptr(&(pcp)))
-# endif
 # define raw_cpu_read(pcp)	__pcpu_size_call_return(raw_cpu_read_, (pcp))
-
-#define raw_cpu_generic_to_op(pcp, val, op)				\
-do {									\
-	*raw_cpu_ptr(&(pcp)) op val;					\
-} while (0)
-
-# ifndef raw_cpu_write_1
-#  define raw_cpu_write_1(pcp, val)	raw_cpu_generic_to_op((pcp), (val), =)
-# endif
-# ifndef raw_cpu_write_2
-#  define raw_cpu_write_2(pcp, val)	raw_cpu_generic_to_op((pcp), (val), =)
-# endif
-# ifndef raw_cpu_write_4
-#  define raw_cpu_write_4(pcp, val)	raw_cpu_generic_to_op((pcp), (val), =)
-# endif
-# ifndef raw_cpu_write_8
-#  define raw_cpu_write_8(pcp, val)	raw_cpu_generic_to_op((pcp), (val), =)
-# endif
 # define raw_cpu_write(pcp, val)	__pcpu_size_call(raw_cpu_write_, (pcp), (val))
-
-# ifndef raw_cpu_add_1
-#  define raw_cpu_add_1(pcp, val)	raw_cpu_generic_to_op((pcp), (val), +=)
-# endif
-# ifndef raw_cpu_add_2
-#  define raw_cpu_add_2(pcp, val)	raw_cpu_generic_to_op((pcp), (val), +=)
-# endif
-# ifndef raw_cpu_add_4
-#  define raw_cpu_add_4(pcp, val)	raw_cpu_generic_to_op((pcp), (val), +=)
-# endif
-# ifndef raw_cpu_add_8
-#  define raw_cpu_add_8(pcp, val)	raw_cpu_generic_to_op((pcp), (val), +=)
-# endif
 # define raw_cpu_add(pcp, val)	__pcpu_size_call(raw_cpu_add_, (pcp), (val))
-
 # define raw_cpu_sub(pcp, val)	raw_cpu_add((pcp), -(val))
-
 # define raw_cpu_inc(pcp)		raw_cpu_add((pcp), 1)
-
 # define raw_cpu_dec(pcp)		raw_cpu_sub((pcp), 1)
-
-# ifndef raw_cpu_and_1
-#  define raw_cpu_and_1(pcp, val)	raw_cpu_generic_to_op((pcp), (val), &=)
-# endif
-# ifndef raw_cpu_and_2
-#  define raw_cpu_and_2(pcp, val)	raw_cpu_generic_to_op((pcp), (val), &=)
-# endif
-# ifndef raw_cpu_and_4
-#  define raw_cpu_and_4(pcp, val)	raw_cpu_generic_to_op((pcp), (val), &=)
-# endif
-# ifndef raw_cpu_and_8
-#  define raw_cpu_and_8(pcp, val)	raw_cpu_generic_to_op((pcp), (val), &=)
-# endif
 # define raw_cpu_and(pcp, val)	__pcpu_size_call(raw_cpu_and_, (pcp), (val))
-
-# ifndef raw_cpu_or_1
-#  define raw_cpu_or_1(pcp, val)	raw_cpu_generic_to_op((pcp), (val), |=)
-# endif
-# ifndef raw_cpu_or_2
-#  define raw_cpu_or_2(pcp, val)	raw_cpu_generic_to_op((pcp), (val), |=)
-# endif
-# ifndef raw_cpu_or_4
-#  define raw_cpu_or_4(pcp, val)	raw_cpu_generic_to_op((pcp), (val), |=)
-# endif
-# ifndef raw_cpu_or_8
-#  define raw_cpu_or_8(pcp, val)	raw_cpu_generic_to_op((pcp), (val), |=)
-# endif
 # define raw_cpu_or(pcp, val)	__pcpu_size_call(raw_cpu_or_, (pcp), (val))
-
-#define raw_cpu_generic_add_return(pcp, val)				\
-({									\
-	raw_cpu_add(pcp, val);						\
-	raw_cpu_read(pcp);						\
-})
-
-# ifndef raw_cpu_add_return_1
-#  define raw_cpu_add_return_1(pcp, val)	raw_cpu_generic_add_return(pcp, val)
-# endif
-# ifndef raw_cpu_add_return_2
-#  define raw_cpu_add_return_2(pcp, val)	raw_cpu_generic_add_return(pcp, val)
-# endif
-# ifndef raw_cpu_add_return_4
-#  define raw_cpu_add_return_4(pcp, val)	raw_cpu_generic_add_return(pcp, val)
-# endif
-# ifndef raw_cpu_add_return_8
-#  define raw_cpu_add_return_8(pcp, val)	raw_cpu_generic_add_return(pcp, val)
-# endif
 # define raw_cpu_add_return(pcp, val)	\
 	__pcpu_size_call_return2(raw_cpu_add_return_, pcp, val)
-
 #define raw_cpu_sub_return(pcp, val)	raw_cpu_add_return(pcp, -(typeof(pcp))(val))
 #define raw_cpu_inc_return(pcp)	raw_cpu_add_return(pcp, 1)
 #define raw_cpu_dec_return(pcp)	raw_cpu_add_return(pcp, -1)
-
-#define raw_cpu_generic_xchg(pcp, nval)					\
-({	typeof(pcp) ret__;						\
-	ret__ = raw_cpu_read(pcp);					\
-	raw_cpu_write(pcp, nval);					\
-	ret__;								\
-})
-
-# ifndef raw_cpu_xchg_1
-#  define raw_cpu_xchg_1(pcp, nval)	raw_cpu_generic_xchg(pcp, nval)
-# endif
-# ifndef raw_cpu_xchg_2
-#  define raw_cpu_xchg_2(pcp, nval)	raw_cpu_generic_xchg(pcp, nval)
-# endif
-# ifndef raw_cpu_xchg_4
-#  define raw_cpu_xchg_4(pcp, nval)	raw_cpu_generic_xchg(pcp, nval)
-# endif
-# ifndef raw_cpu_xchg_8
-#  define raw_cpu_xchg_8(pcp, nval)	raw_cpu_generic_xchg(pcp, nval)
-# endif
 # define raw_cpu_xchg(pcp, nval)	\
 	__pcpu_size_call_return2(raw_cpu_xchg_, (pcp), nval)
-
-#define raw_cpu_generic_cmpxchg(pcp, oval, nval)			\
-({									\
-	typeof(pcp) ret__;						\
-	ret__ = raw_cpu_read(pcp);					\
-	if (ret__ == (oval))						\
-		raw_cpu_write(pcp, nval);				\
-	ret__;								\
-})
-
-# ifndef raw_cpu_cmpxchg_1
-#  define raw_cpu_cmpxchg_1(pcp, oval, nval)	raw_cpu_generic_cmpxchg(pcp, oval, nval)
-# endif
-# ifndef raw_cpu_cmpxchg_2
-#  define raw_cpu_cmpxchg_2(pcp, oval, nval)	raw_cpu_generic_cmpxchg(pcp, oval, nval)
-# endif
-# ifndef raw_cpu_cmpxchg_4
-#  define raw_cpu_cmpxchg_4(pcp, oval, nval)	raw_cpu_generic_cmpxchg(pcp, oval, nval)
-# endif
-# ifndef raw_cpu_cmpxchg_8
-#  define raw_cpu_cmpxchg_8(pcp, oval, nval)	raw_cpu_generic_cmpxchg(pcp, oval, nval)
-# endif
 # define raw_cpu_cmpxchg(pcp, oval, nval)	\
 	__pcpu_size_call_return2(raw_cpu_cmpxchg_, pcp, oval, nval)
-
-#define raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
-({									\
-	int __ret = 0;							\
-	if (raw_cpu_read(pcp1) == (oval1) &&				\
-			 raw_cpu_read(pcp2)  == (oval2)) {		\
-		raw_cpu_write(pcp1, (nval1));				\
-		raw_cpu_write(pcp2, (nval2));				\
-		__ret = 1;						\
-	}								\
-	(__ret);							\
-})
-
-# ifndef raw_cpu_cmpxchg_double_1
-#  define raw_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
-	raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
-# endif
-# ifndef raw_cpu_cmpxchg_double_2
-#  define raw_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
-	raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
-# endif
-# ifndef raw_cpu_cmpxchg_double_4
-#  define raw_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
-	raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
-# endif
-# ifndef raw_cpu_cmpxchg_double_8
-#  define raw_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
-	raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
-# endif
 # define raw_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
 	__pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2))
 
-- 
cgit v1.2.3-59-g8ed1b


From a32f8d8eda8bd49017ac5f88e2b859f1f582557f Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 17 Jun 2014 19:12:39 -0400
Subject: percpu: move {raw|this}_cpu_*() definitions to
 include/linux/percpu-defs.h

We're in the process of moving all percpu accessors and operations to
include/linux/percpu-defs.h so that they're available to arch headers
without having to include full include/linux/percpu.h which may cause
cyclic inclusion dependency.

This patch moves {raw|this}_cpu_*() definitions from
include/linux/percpu.h to include/linux/percpu-defs.h.  The code is
moved mostly verbatim; however, raw_cpu_*() are placed above
this_cpu_*() which is more conventional as the raw operations may be
used to defined other variants.

This is pure reorganization.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Christoph Lameter <cl@linux.com>
---
 include/linux/percpu-defs.h | 209 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/percpu.h      | 208 -------------------------------------------
 2 files changed, 209 insertions(+), 208 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index 94cd90afadac..6710eb9555fa 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -270,5 +270,214 @@
 	preempt_enable();				\
 } while (0)
 
+/*
+ * Branching function to split up a function into a set of functions that
+ * are called for different scalar sizes of the objects handled.
+ */
+
+extern void __bad_size_call_parameter(void);
+
+#ifdef CONFIG_DEBUG_PREEMPT
+extern void __this_cpu_preempt_check(const char *op);
+#else
+static inline void __this_cpu_preempt_check(const char *op) { }
+#endif
+
+#define __pcpu_size_call_return(stem, variable)				\
+({	typeof(variable) pscr_ret__;					\
+	__verify_pcpu_ptr(&(variable));					\
+	switch(sizeof(variable)) {					\
+	case 1: pscr_ret__ = stem##1(variable);break;			\
+	case 2: pscr_ret__ = stem##2(variable);break;			\
+	case 4: pscr_ret__ = stem##4(variable);break;			\
+	case 8: pscr_ret__ = stem##8(variable);break;			\
+	default:							\
+		__bad_size_call_parameter();break;			\
+	}								\
+	pscr_ret__;							\
+})
+
+#define __pcpu_size_call_return2(stem, variable, ...)			\
+({									\
+	typeof(variable) pscr2_ret__;					\
+	__verify_pcpu_ptr(&(variable));					\
+	switch(sizeof(variable)) {					\
+	case 1: pscr2_ret__ = stem##1(variable, __VA_ARGS__); break;	\
+	case 2: pscr2_ret__ = stem##2(variable, __VA_ARGS__); break;	\
+	case 4: pscr2_ret__ = stem##4(variable, __VA_ARGS__); break;	\
+	case 8: pscr2_ret__ = stem##8(variable, __VA_ARGS__); break;	\
+	default:							\
+		__bad_size_call_parameter(); break;			\
+	}								\
+	pscr2_ret__;							\
+})
+
+/*
+ * Special handling for cmpxchg_double.  cmpxchg_double is passed two
+ * percpu variables.  The first has to be aligned to a double word
+ * boundary and the second has to follow directly thereafter.
+ * We enforce this on all architectures even if they don't support
+ * a double cmpxchg instruction, since it's a cheap requirement, and it
+ * avoids breaking the requirement for architectures with the instruction.
+ */
+#define __pcpu_double_call_return_bool(stem, pcp1, pcp2, ...)		\
+({									\
+	bool pdcrb_ret__;						\
+	__verify_pcpu_ptr(&pcp1);					\
+	BUILD_BUG_ON(sizeof(pcp1) != sizeof(pcp2));			\
+	VM_BUG_ON((unsigned long)(&pcp1) % (2 * sizeof(pcp1)));		\
+	VM_BUG_ON((unsigned long)(&pcp2) !=				\
+		  (unsigned long)(&pcp1) + sizeof(pcp1));		\
+	switch(sizeof(pcp1)) {						\
+	case 1: pdcrb_ret__ = stem##1(pcp1, pcp2, __VA_ARGS__); break;	\
+	case 2: pdcrb_ret__ = stem##2(pcp1, pcp2, __VA_ARGS__); break;	\
+	case 4: pdcrb_ret__ = stem##4(pcp1, pcp2, __VA_ARGS__); break;	\
+	case 8: pdcrb_ret__ = stem##8(pcp1, pcp2, __VA_ARGS__); break;	\
+	default:							\
+		__bad_size_call_parameter(); break;			\
+	}								\
+	pdcrb_ret__;							\
+})
+
+#define __pcpu_size_call(stem, variable, ...)				\
+do {									\
+	__verify_pcpu_ptr(&(variable));					\
+	switch(sizeof(variable)) {					\
+		case 1: stem##1(variable, __VA_ARGS__);break;		\
+		case 2: stem##2(variable, __VA_ARGS__);break;		\
+		case 4: stem##4(variable, __VA_ARGS__);break;		\
+		case 8: stem##8(variable, __VA_ARGS__);break;		\
+		default: 						\
+			__bad_size_call_parameter();break;		\
+	}								\
+} while (0)
+
+/*
+ * this_cpu operations (C) 2008-2013 Christoph Lameter <cl@linux.com>
+ *
+ * Optimized manipulation for memory allocated through the per cpu
+ * allocator or for addresses of per cpu variables.
+ *
+ * These operation guarantee exclusivity of access for other operations
+ * on the *same* processor. The assumption is that per cpu data is only
+ * accessed by a single processor instance (the current one).
+ *
+ * The arch code can provide optimized implementation by defining macros
+ * for certain scalar sizes. F.e. provide this_cpu_add_2() to provide per
+ * cpu atomic operations for 2 byte sized RMW actions. If arch code does
+ * not provide operations for a scalar size then the fallback in the
+ * generic code will be used.
+ */
+
+/*
+ * Generic percpu operations for contexts where we do not want to do
+ * any checks for preemptiosn.
+ *
+ * If there is no other protection through preempt disable and/or
+ * disabling interupts then one of these RMW operations can show unexpected
+ * behavior because the execution thread was rescheduled on another processor
+ * or an interrupt occurred and the same percpu variable was modified from
+ * the interrupt context.
+ */
+# define raw_cpu_read(pcp)	__pcpu_size_call_return(raw_cpu_read_, (pcp))
+# define raw_cpu_write(pcp, val)	__pcpu_size_call(raw_cpu_write_, (pcp), (val))
+# define raw_cpu_add(pcp, val)	__pcpu_size_call(raw_cpu_add_, (pcp), (val))
+# define raw_cpu_sub(pcp, val)	raw_cpu_add((pcp), -(val))
+# define raw_cpu_inc(pcp)		raw_cpu_add((pcp), 1)
+# define raw_cpu_dec(pcp)		raw_cpu_sub((pcp), 1)
+# define raw_cpu_and(pcp, val)	__pcpu_size_call(raw_cpu_and_, (pcp), (val))
+# define raw_cpu_or(pcp, val)	__pcpu_size_call(raw_cpu_or_, (pcp), (val))
+# define raw_cpu_add_return(pcp, val)	\
+	__pcpu_size_call_return2(raw_cpu_add_return_, pcp, val)
+#define raw_cpu_sub_return(pcp, val)	raw_cpu_add_return(pcp, -(typeof(pcp))(val))
+#define raw_cpu_inc_return(pcp)	raw_cpu_add_return(pcp, 1)
+#define raw_cpu_dec_return(pcp)	raw_cpu_add_return(pcp, -1)
+# define raw_cpu_xchg(pcp, nval)	\
+	__pcpu_size_call_return2(raw_cpu_xchg_, (pcp), nval)
+# define raw_cpu_cmpxchg(pcp, oval, nval)	\
+	__pcpu_size_call_return2(raw_cpu_cmpxchg_, pcp, oval, nval)
+# define raw_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
+	__pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2))
+
+/*
+ * Generic percpu operations for context that are safe from preemption/interrupts.
+ */
+# define __this_cpu_read(pcp) \
+	(__this_cpu_preempt_check("read"),__pcpu_size_call_return(raw_cpu_read_, (pcp)))
+
+# define __this_cpu_write(pcp, val)					\
+do { __this_cpu_preempt_check("write");					\
+     __pcpu_size_call(raw_cpu_write_, (pcp), (val));			\
+} while (0)
+
+# define __this_cpu_add(pcp, val)					 \
+do { __this_cpu_preempt_check("add");					\
+	__pcpu_size_call(raw_cpu_add_, (pcp), (val));			\
+} while (0)
+
+# define __this_cpu_sub(pcp, val)	__this_cpu_add((pcp), -(typeof(pcp))(val))
+# define __this_cpu_inc(pcp)		__this_cpu_add((pcp), 1)
+# define __this_cpu_dec(pcp)		__this_cpu_sub((pcp), 1)
+
+# define __this_cpu_and(pcp, val)					\
+do { __this_cpu_preempt_check("and");					\
+	__pcpu_size_call(raw_cpu_and_, (pcp), (val));			\
+} while (0)
+
+# define __this_cpu_or(pcp, val)					\
+do { __this_cpu_preempt_check("or");					\
+	__pcpu_size_call(raw_cpu_or_, (pcp), (val));			\
+} while (0)
+
+# define __this_cpu_add_return(pcp, val)	\
+	(__this_cpu_preempt_check("add_return"),__pcpu_size_call_return2(raw_cpu_add_return_, pcp, val))
+
+#define __this_cpu_sub_return(pcp, val)	__this_cpu_add_return(pcp, -(typeof(pcp))(val))
+#define __this_cpu_inc_return(pcp)	__this_cpu_add_return(pcp, 1)
+#define __this_cpu_dec_return(pcp)	__this_cpu_add_return(pcp, -1)
+
+# define __this_cpu_xchg(pcp, nval)	\
+	(__this_cpu_preempt_check("xchg"),__pcpu_size_call_return2(raw_cpu_xchg_, (pcp), nval))
+
+# define __this_cpu_cmpxchg(pcp, oval, nval)	\
+	(__this_cpu_preempt_check("cmpxchg"),__pcpu_size_call_return2(raw_cpu_cmpxchg_, pcp, oval, nval))
+
+# define __this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
+	(__this_cpu_preempt_check("cmpxchg_double"),__pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2)))
+
+/*
+ * this_cpu_*() operations are used for accesses that must be done in a
+ * preemption safe way since we know that the context is not preempt
+ * safe. Interrupts may occur. If the interrupt modifies the variable too
+ * then RMW actions will not be reliable.
+ */
+# define this_cpu_read(pcp)	__pcpu_size_call_return(this_cpu_read_, (pcp))
+# define this_cpu_write(pcp, val)	__pcpu_size_call(this_cpu_write_, (pcp), (val))
+# define this_cpu_add(pcp, val)		__pcpu_size_call(this_cpu_add_, (pcp), (val))
+# define this_cpu_sub(pcp, val)		this_cpu_add((pcp), -(typeof(pcp))(val))
+# define this_cpu_inc(pcp)		this_cpu_add((pcp), 1)
+# define this_cpu_dec(pcp)		this_cpu_sub((pcp), 1)
+# define this_cpu_and(pcp, val)		__pcpu_size_call(this_cpu_and_, (pcp), (val))
+# define this_cpu_or(pcp, val)		__pcpu_size_call(this_cpu_or_, (pcp), (val))
+# define this_cpu_add_return(pcp, val)	__pcpu_size_call_return2(this_cpu_add_return_, pcp, val)
+#define this_cpu_sub_return(pcp, val)	this_cpu_add_return(pcp, -(typeof(pcp))(val))
+#define this_cpu_inc_return(pcp)	this_cpu_add_return(pcp, 1)
+#define this_cpu_dec_return(pcp)	this_cpu_add_return(pcp, -1)
+# define this_cpu_xchg(pcp, nval)	\
+	__pcpu_size_call_return2(this_cpu_xchg_, (pcp), nval)
+# define this_cpu_cmpxchg(pcp, oval, nval)	\
+	__pcpu_size_call_return2(this_cpu_cmpxchg_, pcp, oval, nval)
+
+/*
+ * cmpxchg_double replaces two adjacent scalars at once.  The first
+ * two parameters are per cpu variables which have to be of the same
+ * size.  A truth value is returned to indicate success or failure
+ * (since a double register result is difficult to handle).  There is
+ * very limited hardware support for these operations, so only certain
+ * sizes may work.
+ */
+# define this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
+	__pcpu_double_call_return_bool(this_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2))
+
 #endif /* __ASSEMBLY__ */
 #endif /* _LINUX_PERCPU_DEFS_H */
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 20b953532596..6f61b61b7996 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -129,212 +129,4 @@ extern phys_addr_t per_cpu_ptr_to_phys(void *addr);
 #define alloc_percpu(type)	\
 	(typeof(type) __percpu *)__alloc_percpu(sizeof(type), __alignof__(type))
 
-/*
- * Branching function to split up a function into a set of functions that
- * are called for different scalar sizes of the objects handled.
- */
-
-extern void __bad_size_call_parameter(void);
-
-#ifdef CONFIG_DEBUG_PREEMPT
-extern void __this_cpu_preempt_check(const char *op);
-#else
-static inline void __this_cpu_preempt_check(const char *op) { }
-#endif
-
-#define __pcpu_size_call_return(stem, variable)				\
-({	typeof(variable) pscr_ret__;					\
-	__verify_pcpu_ptr(&(variable));					\
-	switch(sizeof(variable)) {					\
-	case 1: pscr_ret__ = stem##1(variable);break;			\
-	case 2: pscr_ret__ = stem##2(variable);break;			\
-	case 4: pscr_ret__ = stem##4(variable);break;			\
-	case 8: pscr_ret__ = stem##8(variable);break;			\
-	default:							\
-		__bad_size_call_parameter();break;			\
-	}								\
-	pscr_ret__;							\
-})
-
-#define __pcpu_size_call_return2(stem, variable, ...)			\
-({									\
-	typeof(variable) pscr2_ret__;					\
-	__verify_pcpu_ptr(&(variable));					\
-	switch(sizeof(variable)) {					\
-	case 1: pscr2_ret__ = stem##1(variable, __VA_ARGS__); break;	\
-	case 2: pscr2_ret__ = stem##2(variable, __VA_ARGS__); break;	\
-	case 4: pscr2_ret__ = stem##4(variable, __VA_ARGS__); break;	\
-	case 8: pscr2_ret__ = stem##8(variable, __VA_ARGS__); break;	\
-	default:							\
-		__bad_size_call_parameter(); break;			\
-	}								\
-	pscr2_ret__;							\
-})
-
-/*
- * Special handling for cmpxchg_double.  cmpxchg_double is passed two
- * percpu variables.  The first has to be aligned to a double word
- * boundary and the second has to follow directly thereafter.
- * We enforce this on all architectures even if they don't support
- * a double cmpxchg instruction, since it's a cheap requirement, and it
- * avoids breaking the requirement for architectures with the instruction.
- */
-#define __pcpu_double_call_return_bool(stem, pcp1, pcp2, ...)		\
-({									\
-	bool pdcrb_ret__;						\
-	__verify_pcpu_ptr(&pcp1);					\
-	BUILD_BUG_ON(sizeof(pcp1) != sizeof(pcp2));			\
-	VM_BUG_ON((unsigned long)(&pcp1) % (2 * sizeof(pcp1)));		\
-	VM_BUG_ON((unsigned long)(&pcp2) !=				\
-		  (unsigned long)(&pcp1) + sizeof(pcp1));		\
-	switch(sizeof(pcp1)) {						\
-	case 1: pdcrb_ret__ = stem##1(pcp1, pcp2, __VA_ARGS__); break;	\
-	case 2: pdcrb_ret__ = stem##2(pcp1, pcp2, __VA_ARGS__); break;	\
-	case 4: pdcrb_ret__ = stem##4(pcp1, pcp2, __VA_ARGS__); break;	\
-	case 8: pdcrb_ret__ = stem##8(pcp1, pcp2, __VA_ARGS__); break;	\
-	default:							\
-		__bad_size_call_parameter(); break;			\
-	}								\
-	pdcrb_ret__;							\
-})
-
-#define __pcpu_size_call(stem, variable, ...)				\
-do {									\
-	__verify_pcpu_ptr(&(variable));					\
-	switch(sizeof(variable)) {					\
-		case 1: stem##1(variable, __VA_ARGS__);break;		\
-		case 2: stem##2(variable, __VA_ARGS__);break;		\
-		case 4: stem##4(variable, __VA_ARGS__);break;		\
-		case 8: stem##8(variable, __VA_ARGS__);break;		\
-		default: 						\
-			__bad_size_call_parameter();break;		\
-	}								\
-} while (0)
-
-/*
- * this_cpu operations (C) 2008-2013 Christoph Lameter <cl@linux.com>
- *
- * Optimized manipulation for memory allocated through the per cpu
- * allocator or for addresses of per cpu variables.
- *
- * These operation guarantee exclusivity of access for other operations
- * on the *same* processor. The assumption is that per cpu data is only
- * accessed by a single processor instance (the current one).
- *
- * The first group is used for accesses that must be done in a
- * preemption safe way since we know that the context is not preempt
- * safe. Interrupts may occur. If the interrupt modifies the variable
- * too then RMW actions will not be reliable.
- *
- * The arch code can provide optimized implementation by defining macros
- * for certain scalar sizes. F.e. provide this_cpu_add_2() to provide per
- * cpu atomic operations for 2 byte sized RMW actions. If arch code does
- * not provide operations for a scalar size then the fallback in the
- * generic code will be used.
- */
-
-# define this_cpu_read(pcp)	__pcpu_size_call_return(this_cpu_read_, (pcp))
-# define this_cpu_write(pcp, val)	__pcpu_size_call(this_cpu_write_, (pcp), (val))
-# define this_cpu_add(pcp, val)		__pcpu_size_call(this_cpu_add_, (pcp), (val))
-# define this_cpu_sub(pcp, val)		this_cpu_add((pcp), -(typeof(pcp))(val))
-# define this_cpu_inc(pcp)		this_cpu_add((pcp), 1)
-# define this_cpu_dec(pcp)		this_cpu_sub((pcp), 1)
-# define this_cpu_and(pcp, val)		__pcpu_size_call(this_cpu_and_, (pcp), (val))
-# define this_cpu_or(pcp, val)		__pcpu_size_call(this_cpu_or_, (pcp), (val))
-# define this_cpu_add_return(pcp, val)	__pcpu_size_call_return2(this_cpu_add_return_, pcp, val)
-#define this_cpu_sub_return(pcp, val)	this_cpu_add_return(pcp, -(typeof(pcp))(val))
-#define this_cpu_inc_return(pcp)	this_cpu_add_return(pcp, 1)
-#define this_cpu_dec_return(pcp)	this_cpu_add_return(pcp, -1)
-# define this_cpu_xchg(pcp, nval)	\
-	__pcpu_size_call_return2(this_cpu_xchg_, (pcp), nval)
-# define this_cpu_cmpxchg(pcp, oval, nval)	\
-	__pcpu_size_call_return2(this_cpu_cmpxchg_, pcp, oval, nval)
-
-/*
- * cmpxchg_double replaces two adjacent scalars at once.  The first
- * two parameters are per cpu variables which have to be of the same
- * size.  A truth value is returned to indicate success or failure
- * (since a double register result is difficult to handle).  There is
- * very limited hardware support for these operations, so only certain
- * sizes may work.
- */
-# define this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
-	__pcpu_double_call_return_bool(this_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2))
-
-/*
- * Generic percpu operations for contexts where we do not want to do
- * any checks for preemptiosn.
- *
- * If there is no other protection through preempt disable and/or
- * disabling interupts then one of these RMW operations can show unexpected
- * behavior because the execution thread was rescheduled on another processor
- * or an interrupt occurred and the same percpu variable was modified from
- * the interrupt context.
- */
-# define raw_cpu_read(pcp)	__pcpu_size_call_return(raw_cpu_read_, (pcp))
-# define raw_cpu_write(pcp, val)	__pcpu_size_call(raw_cpu_write_, (pcp), (val))
-# define raw_cpu_add(pcp, val)	__pcpu_size_call(raw_cpu_add_, (pcp), (val))
-# define raw_cpu_sub(pcp, val)	raw_cpu_add((pcp), -(val))
-# define raw_cpu_inc(pcp)		raw_cpu_add((pcp), 1)
-# define raw_cpu_dec(pcp)		raw_cpu_sub((pcp), 1)
-# define raw_cpu_and(pcp, val)	__pcpu_size_call(raw_cpu_and_, (pcp), (val))
-# define raw_cpu_or(pcp, val)	__pcpu_size_call(raw_cpu_or_, (pcp), (val))
-# define raw_cpu_add_return(pcp, val)	\
-	__pcpu_size_call_return2(raw_cpu_add_return_, pcp, val)
-#define raw_cpu_sub_return(pcp, val)	raw_cpu_add_return(pcp, -(typeof(pcp))(val))
-#define raw_cpu_inc_return(pcp)	raw_cpu_add_return(pcp, 1)
-#define raw_cpu_dec_return(pcp)	raw_cpu_add_return(pcp, -1)
-# define raw_cpu_xchg(pcp, nval)	\
-	__pcpu_size_call_return2(raw_cpu_xchg_, (pcp), nval)
-# define raw_cpu_cmpxchg(pcp, oval, nval)	\
-	__pcpu_size_call_return2(raw_cpu_cmpxchg_, pcp, oval, nval)
-# define raw_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
-	__pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2))
-
-/*
- * Generic percpu operations for context that are safe from preemption/interrupts.
- */
-# define __this_cpu_read(pcp) \
-	(__this_cpu_preempt_check("read"),__pcpu_size_call_return(raw_cpu_read_, (pcp)))
-
-# define __this_cpu_write(pcp, val)					\
-do { __this_cpu_preempt_check("write");					\
-     __pcpu_size_call(raw_cpu_write_, (pcp), (val));			\
-} while (0)
-
-# define __this_cpu_add(pcp, val)					 \
-do { __this_cpu_preempt_check("add");					\
-	__pcpu_size_call(raw_cpu_add_, (pcp), (val));			\
-} while (0)
-
-# define __this_cpu_sub(pcp, val)	__this_cpu_add((pcp), -(typeof(pcp))(val))
-# define __this_cpu_inc(pcp)		__this_cpu_add((pcp), 1)
-# define __this_cpu_dec(pcp)		__this_cpu_sub((pcp), 1)
-
-# define __this_cpu_and(pcp, val)					\
-do { __this_cpu_preempt_check("and");					\
-	__pcpu_size_call(raw_cpu_and_, (pcp), (val));			\
-} while (0)
-
-# define __this_cpu_or(pcp, val)					\
-do { __this_cpu_preempt_check("or");					\
-	__pcpu_size_call(raw_cpu_or_, (pcp), (val));			\
-} while (0)
-
-# define __this_cpu_add_return(pcp, val)	\
-	(__this_cpu_preempt_check("add_return"),__pcpu_size_call_return2(raw_cpu_add_return_, pcp, val))
-
-#define __this_cpu_sub_return(pcp, val)	__this_cpu_add_return(pcp, -(typeof(pcp))(val))
-#define __this_cpu_inc_return(pcp)	__this_cpu_add_return(pcp, 1)
-#define __this_cpu_dec_return(pcp)	__this_cpu_add_return(pcp, -1)
-
-# define __this_cpu_xchg(pcp, nval)	\
-	(__this_cpu_preempt_check("xchg"),__pcpu_size_call_return2(raw_cpu_xchg_, (pcp), nval))
-
-# define __this_cpu_cmpxchg(pcp, oval, nval)	\
-	(__this_cpu_preempt_check("cmpxchg"),__pcpu_size_call_return2(raw_cpu_cmpxchg_, pcp, oval, nval))
-
-# define __this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
-	(__this_cpu_preempt_check("cmpxchg_double"),__pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2)))
-
 #endif /* __LINUX_PERCPU_H */
-- 
cgit v1.2.3-59-g8ed1b


From 9c28278a24c01c0073fb89e53c1d2a605ab9587d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 17 Jun 2014 19:12:39 -0400
Subject: percpu: reorder macros in percpu header files

* In include/asm-generic/percpu.h, collect {raw|_this}_cpu_generic*()
  macros into one place.  They were dispersed through
  {raw|this}_cpu_*_N() definitions and the visiual inconsistency was
  making following the code unnecessarily difficult.

* In include/linux/percpu-defs.h, move __verify_pcpu_ptr() later in
  the file so that it's right above accessor definitions where it's
  actually used.

This is pure reorganization.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Christoph Lameter <cl@linux.com>
---
 include/asm-generic/percpu.h | 198 +++++++++++++++++++++----------------------
 include/linux/percpu-defs.h  |  26 +++---
 2 files changed, 112 insertions(+), 112 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index 932ce602128f..2300d989087b 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -65,6 +65,105 @@ extern void setup_per_cpu_areas(void);
 #define PER_CPU_DEF_ATTRIBUTES
 #endif
 
+#define raw_cpu_generic_to_op(pcp, val, op)				\
+do {									\
+	*raw_cpu_ptr(&(pcp)) op val;					\
+} while (0)
+
+#define raw_cpu_generic_add_return(pcp, val)				\
+({									\
+	raw_cpu_add(pcp, val);						\
+	raw_cpu_read(pcp);						\
+})
+
+#define raw_cpu_generic_xchg(pcp, nval)					\
+({	typeof(pcp) ret__;						\
+	ret__ = raw_cpu_read(pcp);					\
+	raw_cpu_write(pcp, nval);					\
+	ret__;								\
+})
+
+#define raw_cpu_generic_cmpxchg(pcp, oval, nval)			\
+({									\
+	typeof(pcp) ret__;						\
+	ret__ = raw_cpu_read(pcp);					\
+	if (ret__ == (oval))						\
+		raw_cpu_write(pcp, nval);				\
+	ret__;								\
+})
+
+#define raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
+({									\
+	int __ret = 0;							\
+	if (raw_cpu_read(pcp1) == (oval1) &&				\
+			 raw_cpu_read(pcp2)  == (oval2)) {		\
+		raw_cpu_write(pcp1, (nval1));				\
+		raw_cpu_write(pcp2, (nval2));				\
+		__ret = 1;						\
+	}								\
+	(__ret);							\
+})
+
+#define _this_cpu_generic_read(pcp)					\
+({	typeof(pcp) ret__;						\
+	preempt_disable();						\
+	ret__ = *this_cpu_ptr(&(pcp));					\
+	preempt_enable();						\
+	ret__;								\
+})
+
+#define _this_cpu_generic_to_op(pcp, val, op)				\
+do {									\
+	unsigned long flags;						\
+	raw_local_irq_save(flags);					\
+	*raw_cpu_ptr(&(pcp)) op val;					\
+	raw_local_irq_restore(flags);					\
+} while (0)
+
+#define _this_cpu_generic_add_return(pcp, val)				\
+({									\
+	typeof(pcp) ret__;						\
+	unsigned long flags;						\
+	raw_local_irq_save(flags);					\
+	raw_cpu_add(pcp, val);					\
+	ret__ = raw_cpu_read(pcp);					\
+	raw_local_irq_restore(flags);					\
+	ret__;								\
+})
+
+#define _this_cpu_generic_xchg(pcp, nval)				\
+({	typeof(pcp) ret__;						\
+	unsigned long flags;						\
+	raw_local_irq_save(flags);					\
+	ret__ = raw_cpu_read(pcp);					\
+	raw_cpu_write(pcp, nval);					\
+	raw_local_irq_restore(flags);					\
+	ret__;								\
+})
+
+#define _this_cpu_generic_cmpxchg(pcp, oval, nval)			\
+({									\
+	typeof(pcp) ret__;						\
+	unsigned long flags;						\
+	raw_local_irq_save(flags);					\
+	ret__ = raw_cpu_read(pcp);					\
+	if (ret__ == (oval))						\
+		raw_cpu_write(pcp, nval);				\
+	raw_local_irq_restore(flags);					\
+	ret__;								\
+})
+
+#define _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
+({									\
+	int ret__;							\
+	unsigned long flags;						\
+	raw_local_irq_save(flags);					\
+	ret__ = raw_cpu_generic_cmpxchg_double(pcp1, pcp2,		\
+			oval1, oval2, nval1, nval2);			\
+	raw_local_irq_restore(flags);					\
+	ret__;								\
+})
+
 # ifndef raw_cpu_read_1
 #  define raw_cpu_read_1(pcp)	(*raw_cpu_ptr(&(pcp)))
 # endif
@@ -78,11 +177,6 @@ extern void setup_per_cpu_areas(void);
 #  define raw_cpu_read_8(pcp)	(*raw_cpu_ptr(&(pcp)))
 # endif
 
-#define raw_cpu_generic_to_op(pcp, val, op)				\
-do {									\
-	*raw_cpu_ptr(&(pcp)) op val;					\
-} while (0)
-
 # ifndef raw_cpu_write_1
 #  define raw_cpu_write_1(pcp, val)	raw_cpu_generic_to_op((pcp), (val), =)
 # endif
@@ -135,12 +229,6 @@ do {									\
 #  define raw_cpu_or_8(pcp, val)	raw_cpu_generic_to_op((pcp), (val), |=)
 # endif
 
-#define raw_cpu_generic_add_return(pcp, val)				\
-({									\
-	raw_cpu_add(pcp, val);						\
-	raw_cpu_read(pcp);						\
-})
-
 # ifndef raw_cpu_add_return_1
 #  define raw_cpu_add_return_1(pcp, val)	raw_cpu_generic_add_return(pcp, val)
 # endif
@@ -154,13 +242,6 @@ do {									\
 #  define raw_cpu_add_return_8(pcp, val)	raw_cpu_generic_add_return(pcp, val)
 # endif
 
-#define raw_cpu_generic_xchg(pcp, nval)					\
-({	typeof(pcp) ret__;						\
-	ret__ = raw_cpu_read(pcp);					\
-	raw_cpu_write(pcp, nval);					\
-	ret__;								\
-})
-
 # ifndef raw_cpu_xchg_1
 #  define raw_cpu_xchg_1(pcp, nval)	raw_cpu_generic_xchg(pcp, nval)
 # endif
@@ -174,15 +255,6 @@ do {									\
 #  define raw_cpu_xchg_8(pcp, nval)	raw_cpu_generic_xchg(pcp, nval)
 # endif
 
-#define raw_cpu_generic_cmpxchg(pcp, oval, nval)			\
-({									\
-	typeof(pcp) ret__;						\
-	ret__ = raw_cpu_read(pcp);					\
-	if (ret__ == (oval))						\
-		raw_cpu_write(pcp, nval);				\
-	ret__;								\
-})
-
 # ifndef raw_cpu_cmpxchg_1
 #  define raw_cpu_cmpxchg_1(pcp, oval, nval)	raw_cpu_generic_cmpxchg(pcp, oval, nval)
 # endif
@@ -196,18 +268,6 @@ do {									\
 #  define raw_cpu_cmpxchg_8(pcp, oval, nval)	raw_cpu_generic_cmpxchg(pcp, oval, nval)
 # endif
 
-#define raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
-({									\
-	int __ret = 0;							\
-	if (raw_cpu_read(pcp1) == (oval1) &&				\
-			 raw_cpu_read(pcp2)  == (oval2)) {		\
-		raw_cpu_write(pcp1, (nval1));				\
-		raw_cpu_write(pcp2, (nval2));				\
-		__ret = 1;						\
-	}								\
-	(__ret);							\
-})
-
 # ifndef raw_cpu_cmpxchg_double_1
 #  define raw_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
 	raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
@@ -225,14 +285,6 @@ do {									\
 	raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
 # endif
 
-#define _this_cpu_generic_read(pcp)					\
-({	typeof(pcp) ret__;						\
-	preempt_disable();						\
-	ret__ = *this_cpu_ptr(&(pcp));					\
-	preempt_enable();						\
-	ret__;								\
-})
-
 # ifndef this_cpu_read_1
 #  define this_cpu_read_1(pcp)	_this_cpu_generic_read(pcp)
 # endif
@@ -246,14 +298,6 @@ do {									\
 #  define this_cpu_read_8(pcp)	_this_cpu_generic_read(pcp)
 # endif
 
-#define _this_cpu_generic_to_op(pcp, val, op)				\
-do {									\
-	unsigned long flags;						\
-	raw_local_irq_save(flags);					\
-	*raw_cpu_ptr(&(pcp)) op val;					\
-	raw_local_irq_restore(flags);					\
-} while (0)
-
 # ifndef this_cpu_write_1
 #  define this_cpu_write_1(pcp, val)	_this_cpu_generic_to_op((pcp), (val), =)
 # endif
@@ -306,17 +350,6 @@ do {									\
 #  define this_cpu_or_8(pcp, val)	_this_cpu_generic_to_op((pcp), (val), |=)
 # endif
 
-#define _this_cpu_generic_add_return(pcp, val)				\
-({									\
-	typeof(pcp) ret__;						\
-	unsigned long flags;						\
-	raw_local_irq_save(flags);					\
-	raw_cpu_add(pcp, val);					\
-	ret__ = raw_cpu_read(pcp);					\
-	raw_local_irq_restore(flags);					\
-	ret__;								\
-})
-
 # ifndef this_cpu_add_return_1
 #  define this_cpu_add_return_1(pcp, val)	_this_cpu_generic_add_return(pcp, val)
 # endif
@@ -330,16 +363,6 @@ do {									\
 #  define this_cpu_add_return_8(pcp, val)	_this_cpu_generic_add_return(pcp, val)
 # endif
 
-#define _this_cpu_generic_xchg(pcp, nval)				\
-({	typeof(pcp) ret__;						\
-	unsigned long flags;						\
-	raw_local_irq_save(flags);					\
-	ret__ = raw_cpu_read(pcp);					\
-	raw_cpu_write(pcp, nval);					\
-	raw_local_irq_restore(flags);					\
-	ret__;								\
-})
-
 # ifndef this_cpu_xchg_1
 #  define this_cpu_xchg_1(pcp, nval)	_this_cpu_generic_xchg(pcp, nval)
 # endif
@@ -353,18 +376,6 @@ do {									\
 #  define this_cpu_xchg_8(pcp, nval)	_this_cpu_generic_xchg(pcp, nval)
 # endif
 
-#define _this_cpu_generic_cmpxchg(pcp, oval, nval)			\
-({									\
-	typeof(pcp) ret__;						\
-	unsigned long flags;						\
-	raw_local_irq_save(flags);					\
-	ret__ = raw_cpu_read(pcp);					\
-	if (ret__ == (oval))						\
-		raw_cpu_write(pcp, nval);				\
-	raw_local_irq_restore(flags);					\
-	ret__;								\
-})
-
 # ifndef this_cpu_cmpxchg_1
 #  define this_cpu_cmpxchg_1(pcp, oval, nval)	_this_cpu_generic_cmpxchg(pcp, oval, nval)
 # endif
@@ -378,17 +389,6 @@ do {									\
 #  define this_cpu_cmpxchg_8(pcp, oval, nval)	_this_cpu_generic_cmpxchg(pcp, oval, nval)
 # endif
 
-#define _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
-({									\
-	int ret__;							\
-	unsigned long flags;						\
-	raw_local_irq_save(flags);					\
-	ret__ = raw_cpu_generic_cmpxchg_double(pcp1, pcp2,		\
-			oval1, oval2, nval1, nval2);			\
-	raw_local_irq_restore(flags);					\
-	ret__;								\
-})
-
 # ifndef this_cpu_cmpxchg_double_1
 #  define this_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
 	_this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index 6710eb9555fa..fd0b9ee19ec8 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -52,19 +52,6 @@
 #define __PCPU_DUMMY_ATTRS						\
 	__attribute__((section(".discard"), unused))
 
-/*
- * Macro which verifies @ptr is a percpu pointer without evaluating
- * @ptr.  This is to be used in percpu accessors to verify that the
- * input parameter is a percpu pointer.
- *
- * + 0 is required in order to convert the pointer type from a
- * potential array type to a pointer to a single item of the array.
- */
-#define __verify_pcpu_ptr(ptr)	do {					\
-	const void __percpu *__vpp_verify = (typeof((ptr) + 0))NULL;	\
-	(void)__vpp_verify;						\
-} while (0)
-
 /*
  * s390 and alpha modules require percpu variables to be defined as
  * weak to force the compiler to generate GOT based external
@@ -203,6 +190,19 @@
  */
 #ifndef __ASSEMBLY__
 
+/*
+ * Macro which verifies @ptr is a percpu pointer without evaluating
+ * @ptr.  This is to be used in percpu accessors to verify that the
+ * input parameter is a percpu pointer.
+ *
+ * + 0 is required in order to convert the pointer type from a
+ * potential array type to a pointer to a single item of the array.
+ */
+#define __verify_pcpu_ptr(ptr)	do {					\
+	const void __percpu *__vpp_verify = (typeof((ptr) + 0))NULL;	\
+	(void)__vpp_verify;						\
+} while (0)
+
 #ifdef CONFIG_SMP
 
 /*
-- 
cgit v1.2.3-59-g8ed1b


From cadb1c4db2d33e0a818f645cd1963a479dab91e2 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 17 Jun 2014 19:12:39 -0400
Subject: percpu: use raw_cpu_*() to define __this_cpu_*()

__this_cpu_*() operations are the same as raw_cpu_*() operations
except for the added __this_cpu_preempt_check().  Curiously, these
were defined using __pcu_size_call_*() instead of being layered on top
of raw_cpu_*().

Let's layer them so that __this_cpu_*() are defined in terms of
raw_cpu_*().  It's simpler and less error-prone this way.

This patch doesn't introduce any functional difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Christoph Lameter <cl@linux.com>
---
 include/linux/percpu-defs.h | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index fd0b9ee19ec8..215917e9a176 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -403,16 +403,16 @@ do {									\
  * Generic percpu operations for context that are safe from preemption/interrupts.
  */
 # define __this_cpu_read(pcp) \
-	(__this_cpu_preempt_check("read"),__pcpu_size_call_return(raw_cpu_read_, (pcp)))
+	(__this_cpu_preempt_check("read"),raw_cpu_read(pcp))
 
 # define __this_cpu_write(pcp, val)					\
 do { __this_cpu_preempt_check("write");					\
-     __pcpu_size_call(raw_cpu_write_, (pcp), (val));			\
+     raw_cpu_write(pcp, val);						\
 } while (0)
 
 # define __this_cpu_add(pcp, val)					 \
 do { __this_cpu_preempt_check("add");					\
-	__pcpu_size_call(raw_cpu_add_, (pcp), (val));			\
+	raw_cpu_add(pcp, val);						\
 } while (0)
 
 # define __this_cpu_sub(pcp, val)	__this_cpu_add((pcp), -(typeof(pcp))(val))
@@ -421,29 +421,29 @@ do { __this_cpu_preempt_check("add");					\
 
 # define __this_cpu_and(pcp, val)					\
 do { __this_cpu_preempt_check("and");					\
-	__pcpu_size_call(raw_cpu_and_, (pcp), (val));			\
+	raw_cpu_and(pcp, val);						\
 } while (0)
 
 # define __this_cpu_or(pcp, val)					\
 do { __this_cpu_preempt_check("or");					\
-	__pcpu_size_call(raw_cpu_or_, (pcp), (val));			\
+	raw_cpu_or(pcp, val);						\
 } while (0)
 
 # define __this_cpu_add_return(pcp, val)	\
-	(__this_cpu_preempt_check("add_return"),__pcpu_size_call_return2(raw_cpu_add_return_, pcp, val))
+	(__this_cpu_preempt_check("add_return"),raw_cpu_add_return(pcp, val))
 
 #define __this_cpu_sub_return(pcp, val)	__this_cpu_add_return(pcp, -(typeof(pcp))(val))
 #define __this_cpu_inc_return(pcp)	__this_cpu_add_return(pcp, 1)
 #define __this_cpu_dec_return(pcp)	__this_cpu_add_return(pcp, -1)
 
 # define __this_cpu_xchg(pcp, nval)	\
-	(__this_cpu_preempt_check("xchg"),__pcpu_size_call_return2(raw_cpu_xchg_, (pcp), nval))
+	(__this_cpu_preempt_check("xchg"),raw_cpu_xchg(pcp, nval))
 
 # define __this_cpu_cmpxchg(pcp, oval, nval)	\
-	(__this_cpu_preempt_check("cmpxchg"),__pcpu_size_call_return2(raw_cpu_cmpxchg_, pcp, oval, nval))
+	(__this_cpu_preempt_check("cmpxchg"),raw_cpu_cmpxchg(pcp, oval, nval))
 
 # define __this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
-	(__this_cpu_preempt_check("cmpxchg_double"),__pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2)))
+	(__this_cpu_preempt_check("cmpxchg_double"),raw_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2))
 
 /*
  * this_cpu_*() operations are used for accesses that must be done in a
-- 
cgit v1.2.3-59-g8ed1b


From eba117889ac444bea6e8270049cbaeed48169889 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 17 Jun 2014 19:12:40 -0400
Subject: percpu: preffity percpu header files

percpu macros are difficult to read.  It's partly because they're
fairly complex but also because they simply lack visual and
conventional consistency to an unusual degree.  The preceding patches
tried to organize macro definitions consistently by their roles.  This
patch makes the following cosmetic changes to improve overall
readability.

* Use consistent convention for multi-line macro definitions - "do {"
  or "({" are now put on their own lines and the line continuing '\'
  are all put on the same column.

* Temp variables used inside macro are consistently given "__" prefix.

* When a macro argument is passed to another macro or a function,
  putting extra parenthses around it doesn't help anything.  Don't put
  them.

* _this_cpu_generic_*() are renamed to this_cpu_generic_*() so that
  they're consistent with raw_cpu_generic_*().

* Reorganize raw_cpu_*() and this_cpu_*() definitions so that trivial
  wrappers are collected in one place after actual operation
  definitions.

* Other misc cleanups including reorganizing comments.

All changes in this patch are cosmetic and cause no functional
difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Christoph Lameter <cl@linux.com>
---
 include/asm-generic/percpu.h | 581 ++++++++++++++++++++++---------------------
 include/linux/percpu-defs.h  | 253 ++++++++++---------
 2 files changed, 435 insertions(+), 399 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index 2300d989087b..4d9f233c4ba8 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -77,333 +77,344 @@ do {									\
 })
 
 #define raw_cpu_generic_xchg(pcp, nval)					\
-({	typeof(pcp) ret__;						\
-	ret__ = raw_cpu_read(pcp);					\
+({									\
+	typeof(pcp) __ret;						\
+	__ret = raw_cpu_read(pcp);					\
 	raw_cpu_write(pcp, nval);					\
-	ret__;								\
+	__ret;								\
 })
 
 #define raw_cpu_generic_cmpxchg(pcp, oval, nval)			\
 ({									\
-	typeof(pcp) ret__;						\
-	ret__ = raw_cpu_read(pcp);					\
-	if (ret__ == (oval))						\
+	typeof(pcp) __ret;						\
+	__ret = raw_cpu_read(pcp);					\
+	if (__ret == (oval))						\
 		raw_cpu_write(pcp, nval);				\
-	ret__;								\
+	__ret;								\
 })
 
-#define raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
+#define raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
 ({									\
 	int __ret = 0;							\
 	if (raw_cpu_read(pcp1) == (oval1) &&				\
 			 raw_cpu_read(pcp2)  == (oval2)) {		\
-		raw_cpu_write(pcp1, (nval1));				\
-		raw_cpu_write(pcp2, (nval2));				\
+		raw_cpu_write(pcp1, nval1);				\
+		raw_cpu_write(pcp2, nval2);				\
 		__ret = 1;						\
 	}								\
 	(__ret);							\
 })
 
-#define _this_cpu_generic_read(pcp)					\
-({	typeof(pcp) ret__;						\
+#define this_cpu_generic_read(pcp)					\
+({									\
+	typeof(pcp) __ret;						\
 	preempt_disable();						\
-	ret__ = *this_cpu_ptr(&(pcp));					\
+	__ret = *this_cpu_ptr(&(pcp));					\
 	preempt_enable();						\
-	ret__;								\
+	__ret;								\
 })
 
-#define _this_cpu_generic_to_op(pcp, val, op)				\
+#define this_cpu_generic_to_op(pcp, val, op)				\
 do {									\
-	unsigned long flags;						\
-	raw_local_irq_save(flags);					\
+	unsigned long __flags;						\
+	raw_local_irq_save(__flags);					\
 	*raw_cpu_ptr(&(pcp)) op val;					\
-	raw_local_irq_restore(flags);					\
+	raw_local_irq_restore(__flags);					\
 } while (0)
 
-#define _this_cpu_generic_add_return(pcp, val)				\
+#define this_cpu_generic_add_return(pcp, val)				\
 ({									\
-	typeof(pcp) ret__;						\
-	unsigned long flags;						\
-	raw_local_irq_save(flags);					\
-	raw_cpu_add(pcp, val);					\
-	ret__ = raw_cpu_read(pcp);					\
-	raw_local_irq_restore(flags);					\
-	ret__;								\
+	typeof(pcp) __ret;						\
+	unsigned long __flags;						\
+	raw_local_irq_save(__flags);					\
+	raw_cpu_add(pcp, val);						\
+	__ret = raw_cpu_read(pcp);					\
+	raw_local_irq_restore(__flags);					\
+	__ret;								\
 })
 
-#define _this_cpu_generic_xchg(pcp, nval)				\
-({	typeof(pcp) ret__;						\
-	unsigned long flags;						\
-	raw_local_irq_save(flags);					\
-	ret__ = raw_cpu_read(pcp);					\
+#define this_cpu_generic_xchg(pcp, nval)				\
+({									\
+	typeof(pcp) __ret;						\
+	unsigned long __flags;						\
+	raw_local_irq_save(__flags);					\
+	__ret = raw_cpu_read(pcp);					\
 	raw_cpu_write(pcp, nval);					\
-	raw_local_irq_restore(flags);					\
-	ret__;								\
+	raw_local_irq_restore(__flags);					\
+	__ret;								\
 })
 
-#define _this_cpu_generic_cmpxchg(pcp, oval, nval)			\
+#define this_cpu_generic_cmpxchg(pcp, oval, nval)			\
 ({									\
-	typeof(pcp) ret__;						\
-	unsigned long flags;						\
-	raw_local_irq_save(flags);					\
-	ret__ = raw_cpu_read(pcp);					\
-	if (ret__ == (oval))						\
+	typeof(pcp) __ret;						\
+	unsigned long __flags;						\
+	raw_local_irq_save(__flags);					\
+	__ret = raw_cpu_read(pcp);					\
+	if (__ret == (oval))						\
 		raw_cpu_write(pcp, nval);				\
-	raw_local_irq_restore(flags);					\
-	ret__;								\
+	raw_local_irq_restore(__flags);					\
+	__ret;								\
 })
 
-#define _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
+#define this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
 ({									\
-	int ret__;							\
-	unsigned long flags;						\
-	raw_local_irq_save(flags);					\
-	ret__ = raw_cpu_generic_cmpxchg_double(pcp1, pcp2,		\
+	int __ret;							\
+	unsigned long __flags;						\
+	raw_local_irq_save(__flags);					\
+	__ret = raw_cpu_generic_cmpxchg_double(pcp1, pcp2,		\
 			oval1, oval2, nval1, nval2);			\
-	raw_local_irq_restore(flags);					\
-	ret__;								\
+	raw_local_irq_restore(__flags);					\
+	__ret;								\
 })
 
-# ifndef raw_cpu_read_1
-#  define raw_cpu_read_1(pcp)	(*raw_cpu_ptr(&(pcp)))
-# endif
-# ifndef raw_cpu_read_2
-#  define raw_cpu_read_2(pcp)	(*raw_cpu_ptr(&(pcp)))
-# endif
-# ifndef raw_cpu_read_4
-#  define raw_cpu_read_4(pcp)	(*raw_cpu_ptr(&(pcp)))
-# endif
-# ifndef raw_cpu_read_8
-#  define raw_cpu_read_8(pcp)	(*raw_cpu_ptr(&(pcp)))
-# endif
-
-# ifndef raw_cpu_write_1
-#  define raw_cpu_write_1(pcp, val)	raw_cpu_generic_to_op((pcp), (val), =)
-# endif
-# ifndef raw_cpu_write_2
-#  define raw_cpu_write_2(pcp, val)	raw_cpu_generic_to_op((pcp), (val), =)
-# endif
-# ifndef raw_cpu_write_4
-#  define raw_cpu_write_4(pcp, val)	raw_cpu_generic_to_op((pcp), (val), =)
-# endif
-# ifndef raw_cpu_write_8
-#  define raw_cpu_write_8(pcp, val)	raw_cpu_generic_to_op((pcp), (val), =)
-# endif
-
-# ifndef raw_cpu_add_1
-#  define raw_cpu_add_1(pcp, val)	raw_cpu_generic_to_op((pcp), (val), +=)
-# endif
-# ifndef raw_cpu_add_2
-#  define raw_cpu_add_2(pcp, val)	raw_cpu_generic_to_op((pcp), (val), +=)
-# endif
-# ifndef raw_cpu_add_4
-#  define raw_cpu_add_4(pcp, val)	raw_cpu_generic_to_op((pcp), (val), +=)
-# endif
-# ifndef raw_cpu_add_8
-#  define raw_cpu_add_8(pcp, val)	raw_cpu_generic_to_op((pcp), (val), +=)
-# endif
-
-# ifndef raw_cpu_and_1
-#  define raw_cpu_and_1(pcp, val)	raw_cpu_generic_to_op((pcp), (val), &=)
-# endif
-# ifndef raw_cpu_and_2
-#  define raw_cpu_and_2(pcp, val)	raw_cpu_generic_to_op((pcp), (val), &=)
-# endif
-# ifndef raw_cpu_and_4
-#  define raw_cpu_and_4(pcp, val)	raw_cpu_generic_to_op((pcp), (val), &=)
-# endif
-# ifndef raw_cpu_and_8
-#  define raw_cpu_and_8(pcp, val)	raw_cpu_generic_to_op((pcp), (val), &=)
-# endif
-
-# ifndef raw_cpu_or_1
-#  define raw_cpu_or_1(pcp, val)	raw_cpu_generic_to_op((pcp), (val), |=)
-# endif
-# ifndef raw_cpu_or_2
-#  define raw_cpu_or_2(pcp, val)	raw_cpu_generic_to_op((pcp), (val), |=)
-# endif
-# ifndef raw_cpu_or_4
-#  define raw_cpu_or_4(pcp, val)	raw_cpu_generic_to_op((pcp), (val), |=)
-# endif
-# ifndef raw_cpu_or_8
-#  define raw_cpu_or_8(pcp, val)	raw_cpu_generic_to_op((pcp), (val), |=)
-# endif
-
-# ifndef raw_cpu_add_return_1
-#  define raw_cpu_add_return_1(pcp, val)	raw_cpu_generic_add_return(pcp, val)
-# endif
-# ifndef raw_cpu_add_return_2
-#  define raw_cpu_add_return_2(pcp, val)	raw_cpu_generic_add_return(pcp, val)
-# endif
-# ifndef raw_cpu_add_return_4
-#  define raw_cpu_add_return_4(pcp, val)	raw_cpu_generic_add_return(pcp, val)
-# endif
-# ifndef raw_cpu_add_return_8
-#  define raw_cpu_add_return_8(pcp, val)	raw_cpu_generic_add_return(pcp, val)
-# endif
-
-# ifndef raw_cpu_xchg_1
-#  define raw_cpu_xchg_1(pcp, nval)	raw_cpu_generic_xchg(pcp, nval)
-# endif
-# ifndef raw_cpu_xchg_2
-#  define raw_cpu_xchg_2(pcp, nval)	raw_cpu_generic_xchg(pcp, nval)
-# endif
-# ifndef raw_cpu_xchg_4
-#  define raw_cpu_xchg_4(pcp, nval)	raw_cpu_generic_xchg(pcp, nval)
-# endif
-# ifndef raw_cpu_xchg_8
-#  define raw_cpu_xchg_8(pcp, nval)	raw_cpu_generic_xchg(pcp, nval)
-# endif
-
-# ifndef raw_cpu_cmpxchg_1
-#  define raw_cpu_cmpxchg_1(pcp, oval, nval)	raw_cpu_generic_cmpxchg(pcp, oval, nval)
-# endif
-# ifndef raw_cpu_cmpxchg_2
-#  define raw_cpu_cmpxchg_2(pcp, oval, nval)	raw_cpu_generic_cmpxchg(pcp, oval, nval)
-# endif
-# ifndef raw_cpu_cmpxchg_4
-#  define raw_cpu_cmpxchg_4(pcp, oval, nval)	raw_cpu_generic_cmpxchg(pcp, oval, nval)
-# endif
-# ifndef raw_cpu_cmpxchg_8
-#  define raw_cpu_cmpxchg_8(pcp, oval, nval)	raw_cpu_generic_cmpxchg(pcp, oval, nval)
-# endif
-
-# ifndef raw_cpu_cmpxchg_double_1
-#  define raw_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
+#ifndef raw_cpu_read_1
+#define raw_cpu_read_1(pcp)		(*raw_cpu_ptr(&(pcp)))
+#endif
+#ifndef raw_cpu_read_2
+#define raw_cpu_read_2(pcp)		(*raw_cpu_ptr(&(pcp)))
+#endif
+#ifndef raw_cpu_read_4
+#define raw_cpu_read_4(pcp)		(*raw_cpu_ptr(&(pcp)))
+#endif
+#ifndef raw_cpu_read_8
+#define raw_cpu_read_8(pcp)		(*raw_cpu_ptr(&(pcp)))
+#endif
+
+#ifndef raw_cpu_write_1
+#define raw_cpu_write_1(pcp, val)	raw_cpu_generic_to_op(pcp, val, =)
+#endif
+#ifndef raw_cpu_write_2
+#define raw_cpu_write_2(pcp, val)	raw_cpu_generic_to_op(pcp, val, =)
+#endif
+#ifndef raw_cpu_write_4
+#define raw_cpu_write_4(pcp, val)	raw_cpu_generic_to_op(pcp, val, =)
+#endif
+#ifndef raw_cpu_write_8
+#define raw_cpu_write_8(pcp, val)	raw_cpu_generic_to_op(pcp, val, =)
+#endif
+
+#ifndef raw_cpu_add_1
+#define raw_cpu_add_1(pcp, val)		raw_cpu_generic_to_op(pcp, val, +=)
+#endif
+#ifndef raw_cpu_add_2
+#define raw_cpu_add_2(pcp, val)		raw_cpu_generic_to_op(pcp, val, +=)
+#endif
+#ifndef raw_cpu_add_4
+#define raw_cpu_add_4(pcp, val)		raw_cpu_generic_to_op(pcp, val, +=)
+#endif
+#ifndef raw_cpu_add_8
+#define raw_cpu_add_8(pcp, val)		raw_cpu_generic_to_op(pcp, val, +=)
+#endif
+
+#ifndef raw_cpu_and_1
+#define raw_cpu_and_1(pcp, val)		raw_cpu_generic_to_op(pcp, val, &=)
+#endif
+#ifndef raw_cpu_and_2
+#define raw_cpu_and_2(pcp, val)		raw_cpu_generic_to_op(pcp, val, &=)
+#endif
+#ifndef raw_cpu_and_4
+#define raw_cpu_and_4(pcp, val)		raw_cpu_generic_to_op(pcp, val, &=)
+#endif
+#ifndef raw_cpu_and_8
+#define raw_cpu_and_8(pcp, val)		raw_cpu_generic_to_op(pcp, val, &=)
+#endif
+
+#ifndef raw_cpu_or_1
+#define raw_cpu_or_1(pcp, val)		raw_cpu_generic_to_op(pcp, val, |=)
+#endif
+#ifndef raw_cpu_or_2
+#define raw_cpu_or_2(pcp, val)		raw_cpu_generic_to_op(pcp, val, |=)
+#endif
+#ifndef raw_cpu_or_4
+#define raw_cpu_or_4(pcp, val)		raw_cpu_generic_to_op(pcp, val, |=)
+#endif
+#ifndef raw_cpu_or_8
+#define raw_cpu_or_8(pcp, val)		raw_cpu_generic_to_op(pcp, val, |=)
+#endif
+
+#ifndef raw_cpu_add_return_1
+#define raw_cpu_add_return_1(pcp, val)	raw_cpu_generic_add_return(pcp, val)
+#endif
+#ifndef raw_cpu_add_return_2
+#define raw_cpu_add_return_2(pcp, val)	raw_cpu_generic_add_return(pcp, val)
+#endif
+#ifndef raw_cpu_add_return_4
+#define raw_cpu_add_return_4(pcp, val)	raw_cpu_generic_add_return(pcp, val)
+#endif
+#ifndef raw_cpu_add_return_8
+#define raw_cpu_add_return_8(pcp, val)	raw_cpu_generic_add_return(pcp, val)
+#endif
+
+#ifndef raw_cpu_xchg_1
+#define raw_cpu_xchg_1(pcp, nval)	raw_cpu_generic_xchg(pcp, nval)
+#endif
+#ifndef raw_cpu_xchg_2
+#define raw_cpu_xchg_2(pcp, nval)	raw_cpu_generic_xchg(pcp, nval)
+#endif
+#ifndef raw_cpu_xchg_4
+#define raw_cpu_xchg_4(pcp, nval)	raw_cpu_generic_xchg(pcp, nval)
+#endif
+#ifndef raw_cpu_xchg_8
+#define raw_cpu_xchg_8(pcp, nval)	raw_cpu_generic_xchg(pcp, nval)
+#endif
+
+#ifndef raw_cpu_cmpxchg_1
+#define raw_cpu_cmpxchg_1(pcp, oval, nval) \
+	raw_cpu_generic_cmpxchg(pcp, oval, nval)
+#endif
+#ifndef raw_cpu_cmpxchg_2
+#define raw_cpu_cmpxchg_2(pcp, oval, nval) \
+	raw_cpu_generic_cmpxchg(pcp, oval, nval)
+#endif
+#ifndef raw_cpu_cmpxchg_4
+#define raw_cpu_cmpxchg_4(pcp, oval, nval) \
+	raw_cpu_generic_cmpxchg(pcp, oval, nval)
+#endif
+#ifndef raw_cpu_cmpxchg_8
+#define raw_cpu_cmpxchg_8(pcp, oval, nval) \
+	raw_cpu_generic_cmpxchg(pcp, oval, nval)
+#endif
+
+#ifndef raw_cpu_cmpxchg_double_1
+#define raw_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \
 	raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
-# endif
-# ifndef raw_cpu_cmpxchg_double_2
-#  define raw_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
+#endif
+#ifndef raw_cpu_cmpxchg_double_2
+#define raw_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2) \
 	raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
-# endif
-# ifndef raw_cpu_cmpxchg_double_4
-#  define raw_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
+#endif
+#ifndef raw_cpu_cmpxchg_double_4
+#define raw_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2) \
 	raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
-# endif
-# ifndef raw_cpu_cmpxchg_double_8
-#  define raw_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
+#endif
+#ifndef raw_cpu_cmpxchg_double_8
+#define raw_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2) \
 	raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
-# endif
-
-# ifndef this_cpu_read_1
-#  define this_cpu_read_1(pcp)	_this_cpu_generic_read(pcp)
-# endif
-# ifndef this_cpu_read_2
-#  define this_cpu_read_2(pcp)	_this_cpu_generic_read(pcp)
-# endif
-# ifndef this_cpu_read_4
-#  define this_cpu_read_4(pcp)	_this_cpu_generic_read(pcp)
-# endif
-# ifndef this_cpu_read_8
-#  define this_cpu_read_8(pcp)	_this_cpu_generic_read(pcp)
-# endif
-
-# ifndef this_cpu_write_1
-#  define this_cpu_write_1(pcp, val)	_this_cpu_generic_to_op((pcp), (val), =)
-# endif
-# ifndef this_cpu_write_2
-#  define this_cpu_write_2(pcp, val)	_this_cpu_generic_to_op((pcp), (val), =)
-# endif
-# ifndef this_cpu_write_4
-#  define this_cpu_write_4(pcp, val)	_this_cpu_generic_to_op((pcp), (val), =)
-# endif
-# ifndef this_cpu_write_8
-#  define this_cpu_write_8(pcp, val)	_this_cpu_generic_to_op((pcp), (val), =)
-# endif
-
-# ifndef this_cpu_add_1
-#  define this_cpu_add_1(pcp, val)	_this_cpu_generic_to_op((pcp), (val), +=)
-# endif
-# ifndef this_cpu_add_2
-#  define this_cpu_add_2(pcp, val)	_this_cpu_generic_to_op((pcp), (val), +=)
-# endif
-# ifndef this_cpu_add_4
-#  define this_cpu_add_4(pcp, val)	_this_cpu_generic_to_op((pcp), (val), +=)
-# endif
-# ifndef this_cpu_add_8
-#  define this_cpu_add_8(pcp, val)	_this_cpu_generic_to_op((pcp), (val), +=)
-# endif
-
-# ifndef this_cpu_and_1
-#  define this_cpu_and_1(pcp, val)	_this_cpu_generic_to_op((pcp), (val), &=)
-# endif
-# ifndef this_cpu_and_2
-#  define this_cpu_and_2(pcp, val)	_this_cpu_generic_to_op((pcp), (val), &=)
-# endif
-# ifndef this_cpu_and_4
-#  define this_cpu_and_4(pcp, val)	_this_cpu_generic_to_op((pcp), (val), &=)
-# endif
-# ifndef this_cpu_and_8
-#  define this_cpu_and_8(pcp, val)	_this_cpu_generic_to_op((pcp), (val), &=)
-# endif
-
-# ifndef this_cpu_or_1
-#  define this_cpu_or_1(pcp, val)	_this_cpu_generic_to_op((pcp), (val), |=)
-# endif
-# ifndef this_cpu_or_2
-#  define this_cpu_or_2(pcp, val)	_this_cpu_generic_to_op((pcp), (val), |=)
-# endif
-# ifndef this_cpu_or_4
-#  define this_cpu_or_4(pcp, val)	_this_cpu_generic_to_op((pcp), (val), |=)
-# endif
-# ifndef this_cpu_or_8
-#  define this_cpu_or_8(pcp, val)	_this_cpu_generic_to_op((pcp), (val), |=)
-# endif
-
-# ifndef this_cpu_add_return_1
-#  define this_cpu_add_return_1(pcp, val)	_this_cpu_generic_add_return(pcp, val)
-# endif
-# ifndef this_cpu_add_return_2
-#  define this_cpu_add_return_2(pcp, val)	_this_cpu_generic_add_return(pcp, val)
-# endif
-# ifndef this_cpu_add_return_4
-#  define this_cpu_add_return_4(pcp, val)	_this_cpu_generic_add_return(pcp, val)
-# endif
-# ifndef this_cpu_add_return_8
-#  define this_cpu_add_return_8(pcp, val)	_this_cpu_generic_add_return(pcp, val)
-# endif
-
-# ifndef this_cpu_xchg_1
-#  define this_cpu_xchg_1(pcp, nval)	_this_cpu_generic_xchg(pcp, nval)
-# endif
-# ifndef this_cpu_xchg_2
-#  define this_cpu_xchg_2(pcp, nval)	_this_cpu_generic_xchg(pcp, nval)
-# endif
-# ifndef this_cpu_xchg_4
-#  define this_cpu_xchg_4(pcp, nval)	_this_cpu_generic_xchg(pcp, nval)
-# endif
-# ifndef this_cpu_xchg_8
-#  define this_cpu_xchg_8(pcp, nval)	_this_cpu_generic_xchg(pcp, nval)
-# endif
-
-# ifndef this_cpu_cmpxchg_1
-#  define this_cpu_cmpxchg_1(pcp, oval, nval)	_this_cpu_generic_cmpxchg(pcp, oval, nval)
-# endif
-# ifndef this_cpu_cmpxchg_2
-#  define this_cpu_cmpxchg_2(pcp, oval, nval)	_this_cpu_generic_cmpxchg(pcp, oval, nval)
-# endif
-# ifndef this_cpu_cmpxchg_4
-#  define this_cpu_cmpxchg_4(pcp, oval, nval)	_this_cpu_generic_cmpxchg(pcp, oval, nval)
-# endif
-# ifndef this_cpu_cmpxchg_8
-#  define this_cpu_cmpxchg_8(pcp, oval, nval)	_this_cpu_generic_cmpxchg(pcp, oval, nval)
-# endif
-
-# ifndef this_cpu_cmpxchg_double_1
-#  define this_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
-	_this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
-# endif
-# ifndef this_cpu_cmpxchg_double_2
-#  define this_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
-	_this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
-# endif
-# ifndef this_cpu_cmpxchg_double_4
-#  define this_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
-	_this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
-# endif
-# ifndef this_cpu_cmpxchg_double_8
-#  define this_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
-	_this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
-# endif
+#endif
+
+#ifndef this_cpu_read_1
+#define this_cpu_read_1(pcp)		this_cpu_generic_read(pcp)
+#endif
+#ifndef this_cpu_read_2
+#define this_cpu_read_2(pcp)		this_cpu_generic_read(pcp)
+#endif
+#ifndef this_cpu_read_4
+#define this_cpu_read_4(pcp)		this_cpu_generic_read(pcp)
+#endif
+#ifndef this_cpu_read_8
+#define this_cpu_read_8(pcp)		this_cpu_generic_read(pcp)
+#endif
+
+#ifndef this_cpu_write_1
+#define this_cpu_write_1(pcp, val)	this_cpu_generic_to_op(pcp, val, =)
+#endif
+#ifndef this_cpu_write_2
+#define this_cpu_write_2(pcp, val)	this_cpu_generic_to_op(pcp, val, =)
+#endif
+#ifndef this_cpu_write_4
+#define this_cpu_write_4(pcp, val)	this_cpu_generic_to_op(pcp, val, =)
+#endif
+#ifndef this_cpu_write_8
+#define this_cpu_write_8(pcp, val)	this_cpu_generic_to_op(pcp, val, =)
+#endif
+
+#ifndef this_cpu_add_1
+#define this_cpu_add_1(pcp, val)	this_cpu_generic_to_op(pcp, val, +=)
+#endif
+#ifndef this_cpu_add_2
+#define this_cpu_add_2(pcp, val)	this_cpu_generic_to_op(pcp, val, +=)
+#endif
+#ifndef this_cpu_add_4
+#define this_cpu_add_4(pcp, val)	this_cpu_generic_to_op(pcp, val, +=)
+#endif
+#ifndef this_cpu_add_8
+#define this_cpu_add_8(pcp, val)	this_cpu_generic_to_op(pcp, val, +=)
+#endif
+
+#ifndef this_cpu_and_1
+#define this_cpu_and_1(pcp, val)	this_cpu_generic_to_op(pcp, val, &=)
+#endif
+#ifndef this_cpu_and_2
+#define this_cpu_and_2(pcp, val)	this_cpu_generic_to_op(pcp, val, &=)
+#endif
+#ifndef this_cpu_and_4
+#define this_cpu_and_4(pcp, val)	this_cpu_generic_to_op(pcp, val, &=)
+#endif
+#ifndef this_cpu_and_8
+#define this_cpu_and_8(pcp, val)	this_cpu_generic_to_op(pcp, val, &=)
+#endif
+
+#ifndef this_cpu_or_1
+#define this_cpu_or_1(pcp, val)		this_cpu_generic_to_op(pcp, val, |=)
+#endif
+#ifndef this_cpu_or_2
+#define this_cpu_or_2(pcp, val)		this_cpu_generic_to_op(pcp, val, |=)
+#endif
+#ifndef this_cpu_or_4
+#define this_cpu_or_4(pcp, val)		this_cpu_generic_to_op(pcp, val, |=)
+#endif
+#ifndef this_cpu_or_8
+#define this_cpu_or_8(pcp, val)		this_cpu_generic_to_op(pcp, val, |=)
+#endif
+
+#ifndef this_cpu_add_return_1
+#define this_cpu_add_return_1(pcp, val)	this_cpu_generic_add_return(pcp, val)
+#endif
+#ifndef this_cpu_add_return_2
+#define this_cpu_add_return_2(pcp, val)	this_cpu_generic_add_return(pcp, val)
+#endif
+#ifndef this_cpu_add_return_4
+#define this_cpu_add_return_4(pcp, val)	this_cpu_generic_add_return(pcp, val)
+#endif
+#ifndef this_cpu_add_return_8
+#define this_cpu_add_return_8(pcp, val)	this_cpu_generic_add_return(pcp, val)
+#endif
+
+#ifndef this_cpu_xchg_1
+#define this_cpu_xchg_1(pcp, nval)	this_cpu_generic_xchg(pcp, nval)
+#endif
+#ifndef this_cpu_xchg_2
+#define this_cpu_xchg_2(pcp, nval)	this_cpu_generic_xchg(pcp, nval)
+#endif
+#ifndef this_cpu_xchg_4
+#define this_cpu_xchg_4(pcp, nval)	this_cpu_generic_xchg(pcp, nval)
+#endif
+#ifndef this_cpu_xchg_8
+#define this_cpu_xchg_8(pcp, nval)	this_cpu_generic_xchg(pcp, nval)
+#endif
+
+#ifndef this_cpu_cmpxchg_1
+#define this_cpu_cmpxchg_1(pcp, oval, nval) \
+	this_cpu_generic_cmpxchg(pcp, oval, nval)
+#endif
+#ifndef this_cpu_cmpxchg_2
+#define this_cpu_cmpxchg_2(pcp, oval, nval) \
+	this_cpu_generic_cmpxchg(pcp, oval, nval)
+#endif
+#ifndef this_cpu_cmpxchg_4
+#define this_cpu_cmpxchg_4(pcp, oval, nval) \
+	this_cpu_generic_cmpxchg(pcp, oval, nval)
+#endif
+#ifndef this_cpu_cmpxchg_8
+#define this_cpu_cmpxchg_8(pcp, oval, nval) \
+	this_cpu_generic_cmpxchg(pcp, oval, nval)
+#endif
+
+#ifndef this_cpu_cmpxchg_double_1
+#define this_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+	this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
+#endif
+#ifndef this_cpu_cmpxchg_double_2
+#define this_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+	this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
+#endif
+#ifndef this_cpu_cmpxchg_double_4
+#define this_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+	this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
+#endif
+#ifndef this_cpu_cmpxchg_double_8
+#define this_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+	this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
+#endif
 
 #endif /* _ASM_GENERIC_PERCPU_H_ */
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index 215917e9a176..d8bb6e001c6a 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -198,7 +198,8 @@
  * + 0 is required in order to convert the pointer type from a
  * potential array type to a pointer to a single item of the array.
  */
-#define __verify_pcpu_ptr(ptr)	do {					\
+#define __verify_pcpu_ptr(ptr)						\
+do {									\
 	const void __percpu *__vpp_verify = (typeof((ptr) + 0))NULL;	\
 	(void)__vpp_verify;						\
 } while (0)
@@ -210,12 +211,13 @@
  * to prevent the compiler from making incorrect assumptions about the
  * pointer value.  The weird cast keeps both GCC and sparse happy.
  */
-#define SHIFT_PERCPU_PTR(__p, __offset)	({				\
-	__verify_pcpu_ptr((__p));					\
+#define SHIFT_PERCPU_PTR(__p, __offset)					\
+({									\
+	__verify_pcpu_ptr(__p);						\
 	RELOC_HIDE((typeof(*(__p)) __kernel __force *)(__p), (__offset)); \
 })
 
-#define per_cpu_ptr(ptr, cpu)	SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu)))
+#define per_cpu_ptr(ptr, cpu)	SHIFT_PERCPU_PTR(ptr, per_cpu_offset(cpu))
 #define raw_cpu_ptr(ptr)	arch_raw_cpu_ptr(ptr)
 
 #ifdef CONFIG_DEBUG_PREEMPT
@@ -226,12 +228,13 @@
 
 #else	/* CONFIG_SMP */
 
-#define VERIFY_PERCPU_PTR(__p) ({			\
-	__verify_pcpu_ptr((__p));			\
-	(typeof(*(__p)) __kernel __force *)(__p);	\
+#define VERIFY_PERCPU_PTR(__p)						\
+({									\
+	__verify_pcpu_ptr(__p);						\
+	(typeof(*(__p)) __kernel __force *)(__p);			\
 })
 
-#define per_cpu_ptr(ptr, cpu)	({ (void)(cpu); VERIFY_PERCPU_PTR((ptr)); })
+#define per_cpu_ptr(ptr, cpu)	({ (void)(cpu); VERIFY_PERCPU_PTR(ptr); })
 #define raw_cpu_ptr(ptr)	per_cpu_ptr(ptr, 0)
 #define this_cpu_ptr(ptr)	raw_cpu_ptr(ptr)
 
@@ -248,26 +251,32 @@
  * Must be an lvalue. Since @var must be a simple identifier,
  * we force a syntax error here if it isn't.
  */
-#define get_cpu_var(var) (*({				\
-	preempt_disable();				\
-	this_cpu_ptr(&var); }))
+#define get_cpu_var(var)						\
+(*({									\
+	preempt_disable();						\
+	this_cpu_ptr(&var);						\
+}))
 
 /*
  * The weird & is necessary because sparse considers (void)(var) to be
  * a direct dereference of percpu variable (var).
  */
-#define put_cpu_var(var) do {				\
-	(void)&(var);					\
-	preempt_enable();				\
+#define put_cpu_var(var)						\
+do {									\
+	(void)&(var);							\
+	preempt_enable();						\
 } while (0)
 
-#define get_cpu_ptr(var) ({				\
-	preempt_disable();				\
-	this_cpu_ptr(var); })
+#define get_cpu_ptr(var)						\
+({									\
+	preempt_disable();						\
+	this_cpu_ptr(var);						\
+})
 
-#define put_cpu_ptr(var) do {				\
-	(void)(var);					\
-	preempt_enable();				\
+#define put_cpu_ptr(var)						\
+do {									\
+	(void)(var);							\
+	preempt_enable();						\
 } while (0)
 
 /*
@@ -284,15 +293,16 @@ static inline void __this_cpu_preempt_check(const char *op) { }
 #endif
 
 #define __pcpu_size_call_return(stem, variable)				\
-({	typeof(variable) pscr_ret__;					\
+({									\
+	typeof(variable) pscr_ret__;					\
 	__verify_pcpu_ptr(&(variable));					\
 	switch(sizeof(variable)) {					\
-	case 1: pscr_ret__ = stem##1(variable);break;			\
-	case 2: pscr_ret__ = stem##2(variable);break;			\
-	case 4: pscr_ret__ = stem##4(variable);break;			\
-	case 8: pscr_ret__ = stem##8(variable);break;			\
+	case 1: pscr_ret__ = stem##1(variable); break;			\
+	case 2: pscr_ret__ = stem##2(variable); break;			\
+	case 4: pscr_ret__ = stem##4(variable); break;			\
+	case 8: pscr_ret__ = stem##8(variable); break;			\
 	default:							\
-		__bad_size_call_parameter();break;			\
+		__bad_size_call_parameter(); break;			\
 	}								\
 	pscr_ret__;							\
 })
@@ -323,11 +333,11 @@ static inline void __this_cpu_preempt_check(const char *op) { }
 #define __pcpu_double_call_return_bool(stem, pcp1, pcp2, ...)		\
 ({									\
 	bool pdcrb_ret__;						\
-	__verify_pcpu_ptr(&pcp1);					\
+	__verify_pcpu_ptr(&(pcp1));					\
 	BUILD_BUG_ON(sizeof(pcp1) != sizeof(pcp2));			\
-	VM_BUG_ON((unsigned long)(&pcp1) % (2 * sizeof(pcp1)));		\
-	VM_BUG_ON((unsigned long)(&pcp2) !=				\
-		  (unsigned long)(&pcp1) + sizeof(pcp1));		\
+	VM_BUG_ON((unsigned long)(&(pcp1)) % (2 * sizeof(pcp1)));	\
+	VM_BUG_ON((unsigned long)(&(pcp2)) !=				\
+		  (unsigned long)(&(pcp1)) + sizeof(pcp1));		\
 	switch(sizeof(pcp1)) {						\
 	case 1: pdcrb_ret__ = stem##1(pcp1, pcp2, __VA_ARGS__); break;	\
 	case 2: pdcrb_ret__ = stem##2(pcp1, pcp2, __VA_ARGS__); break;	\
@@ -367,117 +377,132 @@ do {									\
  * cpu atomic operations for 2 byte sized RMW actions. If arch code does
  * not provide operations for a scalar size then the fallback in the
  * generic code will be used.
+ *
+ * cmpxchg_double replaces two adjacent scalars at once.  The first two
+ * parameters are per cpu variables which have to be of the same size.  A
+ * truth value is returned to indicate success or failure (since a double
+ * register result is difficult to handle).  There is very limited hardware
+ * support for these operations, so only certain sizes may work.
  */
 
 /*
- * Generic percpu operations for contexts where we do not want to do
- * any checks for preemptiosn.
+ * Operations for contexts where we do not want to do any checks for
+ * preemptions.  Unless strictly necessary, always use [__]this_cpu_*()
+ * instead.
  *
- * If there is no other protection through preempt disable and/or
- * disabling interupts then one of these RMW operations can show unexpected
- * behavior because the execution thread was rescheduled on another processor
- * or an interrupt occurred and the same percpu variable was modified from
- * the interrupt context.
+ * If there is no other protection through preempt disable and/or disabling
+ * interupts then one of these RMW operations can show unexpected behavior
+ * because the execution thread was rescheduled on another processor or an
+ * interrupt occurred and the same percpu variable was modified from the
+ * interrupt context.
  */
-# define raw_cpu_read(pcp)	__pcpu_size_call_return(raw_cpu_read_, (pcp))
-# define raw_cpu_write(pcp, val)	__pcpu_size_call(raw_cpu_write_, (pcp), (val))
-# define raw_cpu_add(pcp, val)	__pcpu_size_call(raw_cpu_add_, (pcp), (val))
-# define raw_cpu_sub(pcp, val)	raw_cpu_add((pcp), -(val))
-# define raw_cpu_inc(pcp)		raw_cpu_add((pcp), 1)
-# define raw_cpu_dec(pcp)		raw_cpu_sub((pcp), 1)
-# define raw_cpu_and(pcp, val)	__pcpu_size_call(raw_cpu_and_, (pcp), (val))
-# define raw_cpu_or(pcp, val)	__pcpu_size_call(raw_cpu_or_, (pcp), (val))
-# define raw_cpu_add_return(pcp, val)	\
-	__pcpu_size_call_return2(raw_cpu_add_return_, pcp, val)
-#define raw_cpu_sub_return(pcp, val)	raw_cpu_add_return(pcp, -(typeof(pcp))(val))
-#define raw_cpu_inc_return(pcp)	raw_cpu_add_return(pcp, 1)
-#define raw_cpu_dec_return(pcp)	raw_cpu_add_return(pcp, -1)
-# define raw_cpu_xchg(pcp, nval)	\
-	__pcpu_size_call_return2(raw_cpu_xchg_, (pcp), nval)
-# define raw_cpu_cmpxchg(pcp, oval, nval)	\
+#define raw_cpu_read(pcp)		__pcpu_size_call_return(raw_cpu_read_, pcp)
+#define raw_cpu_write(pcp, val)		__pcpu_size_call(raw_cpu_write_, pcp, val)
+#define raw_cpu_add(pcp, val)		__pcpu_size_call(raw_cpu_add_, pcp, val)
+#define raw_cpu_and(pcp, val)		__pcpu_size_call(raw_cpu_and_, pcp, val)
+#define raw_cpu_or(pcp, val)		__pcpu_size_call(raw_cpu_or_, pcp, val)
+#define raw_cpu_add_return(pcp, val)	__pcpu_size_call_return2(raw_cpu_add_return_, pcp, val)
+#define raw_cpu_xchg(pcp, nval)		__pcpu_size_call_return2(raw_cpu_xchg_, pcp, nval)
+#define raw_cpu_cmpxchg(pcp, oval, nval) \
 	__pcpu_size_call_return2(raw_cpu_cmpxchg_, pcp, oval, nval)
-# define raw_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
-	__pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2))
+#define raw_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+	__pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, pcp1, pcp2, oval1, oval2, nval1, nval2)
+
+#define raw_cpu_sub(pcp, val)		raw_cpu_add(pcp, -(val))
+#define raw_cpu_inc(pcp)		raw_cpu_add(pcp, 1)
+#define raw_cpu_dec(pcp)		raw_cpu_sub(pcp, 1)
+#define raw_cpu_sub_return(pcp, val)	raw_cpu_add_return(pcp, -(typeof(pcp))(val))
+#define raw_cpu_inc_return(pcp)		raw_cpu_add_return(pcp, 1)
+#define raw_cpu_dec_return(pcp)		raw_cpu_add_return(pcp, -1)
 
 /*
- * Generic percpu operations for context that are safe from preemption/interrupts.
+ * Operations for contexts that are safe from preemption/interrupts.  These
+ * operations verify that preemption is disabled.
  */
-# define __this_cpu_read(pcp) \
-	(__this_cpu_preempt_check("read"),raw_cpu_read(pcp))
+#define __this_cpu_read(pcp)						\
+({									\
+	__this_cpu_preempt_check("read");				\
+	raw_cpu_read(pcp);						\
+})
 
-# define __this_cpu_write(pcp, val)					\
-do { __this_cpu_preempt_check("write");					\
-     raw_cpu_write(pcp, val);						\
-} while (0)
+#define __this_cpu_write(pcp, val)					\
+({									\
+	__this_cpu_preempt_check("write");				\
+	raw_cpu_write(pcp, val);					\
+})
 
-# define __this_cpu_add(pcp, val)					 \
-do { __this_cpu_preempt_check("add");					\
+#define __this_cpu_add(pcp, val)					\
+({									\
+	__this_cpu_preempt_check("add");				\
 	raw_cpu_add(pcp, val);						\
-} while (0)
-
-# define __this_cpu_sub(pcp, val)	__this_cpu_add((pcp), -(typeof(pcp))(val))
-# define __this_cpu_inc(pcp)		__this_cpu_add((pcp), 1)
-# define __this_cpu_dec(pcp)		__this_cpu_sub((pcp), 1)
+})
 
-# define __this_cpu_and(pcp, val)					\
-do { __this_cpu_preempt_check("and");					\
+#define __this_cpu_and(pcp, val)					\
+({									\
+	__this_cpu_preempt_check("and");				\
 	raw_cpu_and(pcp, val);						\
-} while (0)
+})
 
-# define __this_cpu_or(pcp, val)					\
-do { __this_cpu_preempt_check("or");					\
+#define __this_cpu_or(pcp, val)						\
+({									\
+	__this_cpu_preempt_check("or");					\
 	raw_cpu_or(pcp, val);						\
-} while (0)
+})
 
-# define __this_cpu_add_return(pcp, val)	\
-	(__this_cpu_preempt_check("add_return"),raw_cpu_add_return(pcp, val))
+#define __this_cpu_add_return(pcp, val)					\
+({									\
+	__this_cpu_preempt_check("add_return");				\
+	raw_cpu_add_return(pcp, val);					\
+})
 
-#define __this_cpu_sub_return(pcp, val)	__this_cpu_add_return(pcp, -(typeof(pcp))(val))
-#define __this_cpu_inc_return(pcp)	__this_cpu_add_return(pcp, 1)
-#define __this_cpu_dec_return(pcp)	__this_cpu_add_return(pcp, -1)
+#define __this_cpu_xchg(pcp, nval)					\
+({									\
+	__this_cpu_preempt_check("xchg");				\
+	raw_cpu_xchg(pcp, nval);					\
+})
 
-# define __this_cpu_xchg(pcp, nval)	\
-	(__this_cpu_preempt_check("xchg"),raw_cpu_xchg(pcp, nval))
+#define __this_cpu_cmpxchg(pcp, oval, nval)				\
+({									\
+	__this_cpu_preempt_check("cmpxchg");				\
+	raw_cpu_cmpxchg(pcp, oval, nval);				\
+})
 
-# define __this_cpu_cmpxchg(pcp, oval, nval)	\
-	(__this_cpu_preempt_check("cmpxchg"),raw_cpu_cmpxchg(pcp, oval, nval))
+#define __this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+({	__this_cpu_preempt_check("cmpxchg_double");			\
+	raw_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2);	\
+})
 
-# define __this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
-	(__this_cpu_preempt_check("cmpxchg_double"),raw_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2))
+#define __this_cpu_sub(pcp, val)	__this_cpu_add(pcp, -(typeof(pcp))(val))
+#define __this_cpu_inc(pcp)		__this_cpu_add(pcp, 1)
+#define __this_cpu_dec(pcp)		__this_cpu_sub(pcp, 1)
+#define __this_cpu_sub_return(pcp, val)	__this_cpu_add_return(pcp, -(typeof(pcp))(val))
+#define __this_cpu_inc_return(pcp)	__this_cpu_add_return(pcp, 1)
+#define __this_cpu_dec_return(pcp)	__this_cpu_add_return(pcp, -1)
 
 /*
- * this_cpu_*() operations are used for accesses that must be done in a
- * preemption safe way since we know that the context is not preempt
- * safe. Interrupts may occur. If the interrupt modifies the variable too
- * then RMW actions will not be reliable.
+ * Operations with implied preemption protection.  These operations can be
+ * used without worrying about preemption.  Note that interrupts may still
+ * occur while an operation is in progress and if the interrupt modifies
+ * the variable too then RMW actions may not be reliable.
  */
-# define this_cpu_read(pcp)	__pcpu_size_call_return(this_cpu_read_, (pcp))
-# define this_cpu_write(pcp, val)	__pcpu_size_call(this_cpu_write_, (pcp), (val))
-# define this_cpu_add(pcp, val)		__pcpu_size_call(this_cpu_add_, (pcp), (val))
-# define this_cpu_sub(pcp, val)		this_cpu_add((pcp), -(typeof(pcp))(val))
-# define this_cpu_inc(pcp)		this_cpu_add((pcp), 1)
-# define this_cpu_dec(pcp)		this_cpu_sub((pcp), 1)
-# define this_cpu_and(pcp, val)		__pcpu_size_call(this_cpu_and_, (pcp), (val))
-# define this_cpu_or(pcp, val)		__pcpu_size_call(this_cpu_or_, (pcp), (val))
-# define this_cpu_add_return(pcp, val)	__pcpu_size_call_return2(this_cpu_add_return_, pcp, val)
+#define this_cpu_read(pcp)		__pcpu_size_call_return(this_cpu_read_, pcp)
+#define this_cpu_write(pcp, val)	__pcpu_size_call(this_cpu_write_, pcp, val)
+#define this_cpu_add(pcp, val)		__pcpu_size_call(this_cpu_add_, pcp, val)
+#define this_cpu_and(pcp, val)		__pcpu_size_call(this_cpu_and_, pcp, val)
+#define this_cpu_or(pcp, val)		__pcpu_size_call(this_cpu_or_, pcp, val)
+#define this_cpu_add_return(pcp, val)	__pcpu_size_call_return2(this_cpu_add_return_, pcp, val)
+#define this_cpu_xchg(pcp, nval)	__pcpu_size_call_return2(this_cpu_xchg_, pcp, nval)
+#define this_cpu_cmpxchg(pcp, oval, nval) \
+	__pcpu_size_call_return2(this_cpu_cmpxchg_, pcp, oval, nval)
+#define this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+	__pcpu_double_call_return_bool(this_cpu_cmpxchg_double_, pcp1, pcp2, oval1, oval2, nval1, nval2)
+
+#define this_cpu_sub(pcp, val)		this_cpu_add(pcp, -(typeof(pcp))(val))
+#define this_cpu_inc(pcp)		this_cpu_add(pcp, 1)
+#define this_cpu_dec(pcp)		this_cpu_sub(pcp, 1)
 #define this_cpu_sub_return(pcp, val)	this_cpu_add_return(pcp, -(typeof(pcp))(val))
 #define this_cpu_inc_return(pcp)	this_cpu_add_return(pcp, 1)
 #define this_cpu_dec_return(pcp)	this_cpu_add_return(pcp, -1)
-# define this_cpu_xchg(pcp, nval)	\
-	__pcpu_size_call_return2(this_cpu_xchg_, (pcp), nval)
-# define this_cpu_cmpxchg(pcp, oval, nval)	\
-	__pcpu_size_call_return2(this_cpu_cmpxchg_, pcp, oval, nval)
-
-/*
- * cmpxchg_double replaces two adjacent scalars at once.  The first
- * two parameters are per cpu variables which have to be of the same
- * size.  A truth value is returned to indicate success or failure
- * (since a double register result is difficult to handle).  There is
- * very limited hardware support for these operations, so only certain
- * sizes may work.
- */
-# define this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
-	__pcpu_double_call_return_bool(this_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2))
 
 #endif /* __ASSEMBLY__ */
 #endif /* _LINUX_PERCPU_DEFS_H */
-- 
cgit v1.2.3-59-g8ed1b


From 6fbc07bbe2b5a898532f970c5a397f8789ace0d5 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 17 Jun 2014 19:12:40 -0400
Subject: percpu: invoke __verify_pcpu_ptr() from the generic part of accessors
 and operations

__verify_pcpu_ptr() is used to verify that a specified parameter is
actually an percpu pointer by percpu accessor and operation
implementations.  Currently, where it's called isn't clearly defined
and we just ensure that it's invoked at least once for all accessors
and operations.

The lack of clarity on when it should be called isn't nice and given
that this is a completely generic issue, there's no reason to make
archs worry about it.

This patch updates __verify_pcpu_ptr() invocations such that it's
always invoked from the final generic wrapper once per access or
operation.  As this is already the case for {raw|this}_cpu_*()
definitions through __pcpu_size_*(), only the {raw|per|this}_cpu_ptr()
accessors need to be updated.

This change makes it unnecessary for archs to worry about
__verify_pcpu_ptr().  x86's arch_raw_cpu_ptr() is updated accordingly.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
---
 arch/x86/include/asm/percpu.h |  1 -
 include/linux/percpu-defs.h   | 29 +++++++++++++++++++++--------
 2 files changed, 21 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 9bc23f18a6fa..fd472181a1d0 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -55,7 +55,6 @@
 #define arch_raw_cpu_ptr(ptr)				\
 ({							\
 	unsigned long tcp_ptr__;			\
-	__verify_pcpu_ptr(ptr);				\
 	asm volatile("add " __percpu_arg(1) ", %0"	\
 		     : "=r" (tcp_ptr__)			\
 		     : "m" (this_cpu_off), "0" (ptr));	\
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index d8bb6e001c6a..c93fff16776c 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -191,9 +191,12 @@
 #ifndef __ASSEMBLY__
 
 /*
- * Macro which verifies @ptr is a percpu pointer without evaluating
- * @ptr.  This is to be used in percpu accessors to verify that the
- * input parameter is a percpu pointer.
+ * __verify_pcpu_ptr() verifies @ptr is a percpu pointer without evaluating
+ * @ptr and is invoked once before a percpu area is accessed by all
+ * accessors and operations.  This is performed in the generic part of
+ * percpu and arch overrides don't need to worry about it; however, if an
+ * arch wants to implement an arch-specific percpu accessor or operation,
+ * it may use __verify_pcpu_ptr() to verify the parameters.
  *
  * + 0 is required in order to convert the pointer type from a
  * potential array type to a pointer to a single item of the array.
@@ -212,16 +215,26 @@ do {									\
  * pointer value.  The weird cast keeps both GCC and sparse happy.
  */
 #define SHIFT_PERCPU_PTR(__p, __offset)					\
+	RELOC_HIDE((typeof(*(__p)) __kernel __force *)(__p), (__offset))
+
+#define per_cpu_ptr(ptr, cpu)						\
 ({									\
-	__verify_pcpu_ptr(__p);						\
-	RELOC_HIDE((typeof(*(__p)) __kernel __force *)(__p), (__offset)); \
+	__verify_pcpu_ptr(ptr);						\
+	SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu)));			\
 })
 
-#define per_cpu_ptr(ptr, cpu)	SHIFT_PERCPU_PTR(ptr, per_cpu_offset(cpu))
-#define raw_cpu_ptr(ptr)	arch_raw_cpu_ptr(ptr)
+#define raw_cpu_ptr(ptr)						\
+({									\
+	__verify_pcpu_ptr(ptr);						\
+	arch_raw_cpu_ptr(ptr);						\
+})
 
 #ifdef CONFIG_DEBUG_PREEMPT
-#define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset)
+#define this_cpu_ptr(ptr)						\
+({									\
+	__verify_pcpu_ptr(ptr);						\
+	SHIFT_PERCPU_PTR(ptr, my_cpu_offset);				\
+})
 #else
 #define this_cpu_ptr(ptr) raw_cpu_ptr(ptr)
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 22fdcf02f6e80d64a927f702dd9d631a927d87d4 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Thu, 5 Jun 2014 11:31:01 -0400
Subject: lockdep: Revert lockdep check in raw_seqcount_begin()

This commit reverts the addition of lockdep checking to raw_seqcount_begin
for the following reasons:

 1) It violates the naming convention that raw_* functions should not
    do lockdep checks (a convention that is also followed by the other
    raw_*_seqcount_begin functions).

 2) raw_seqcount_begin does not spin, so it can only be part of an ABBA
    deadlock in very special circumstances (for instance if a lock
    is held across the entire raw_seqcount_begin()+read_seqcount_retry()
    loop while also being taken inside the write_seqcount protected area).

 3) It is causing false positives with some existing callers, and there
    is no non-lockdep alternative for those callers to use.

None of the three existing callers (__d_lookup_rcu, netdev_get_name, and
the NFS state code) appear to use the function in a manner that is ABBA
deadlock prone.

Fixes: 1ca7d67cf5d5: seqcount: Add lockdep functionality to seqcount/seqlock
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: John Stultz <john.stultz@linaro.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Waiman Long <Waiman.Long@hp.com>
Cc: Stephen Boyd <sboyd@codeaurora.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/CAHQdGtRR6SvEhXiqWo24hoUh9AU9cL82Z8Z-d8-7u951F_d+5g@mail.gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/seqlock.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 535f158977b9..8cf350325dc6 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -164,8 +164,6 @@ static inline unsigned read_seqcount_begin(const seqcount_t *s)
 static inline unsigned raw_seqcount_begin(const seqcount_t *s)
 {
 	unsigned ret = ACCESS_ONCE(s->sequence);
-
-	seqcount_lockdep_reader_access(s);
 	smp_rmb();
 	return ret & ~1;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 7d2a01b87f1682fde87461864e6682031bfaa0a9 Mon Sep 17 00:00:00 2001
From: Andreas Noever <andreas.noever@gmail.com>
Date: Tue, 3 Jun 2014 22:04:09 +0200
Subject: PCI: Add pci_fixup_suspend_late quirk pass

Add pci_fixup_suspend_late as a new pci_fixup_pass. The pass is called
from suspend_noirq and poweroff_noirq. Using the same pass for suspend
and hibernate is consistent with resume_early which is called by
resume_noirq and restore_noirq.

The new quirk pass is required for Thunderbolt support on Apple
hardware.

Signed-off-by: Andreas Noever <andreas.noever@gmail.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pci/pci-driver.c          | 18 ++++++++++++++----
 drivers/pci/quirks.c              |  7 +++++++
 include/asm-generic/vmlinux.lds.h |  3 +++
 include/linux/pci.h               | 12 +++++++++++-
 4 files changed, 35 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 3f8e3dbcaa7c..d04c5adafc16 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -582,7 +582,7 @@ static int pci_legacy_suspend_late(struct device *dev, pm_message_t state)
 			WARN_ONCE(pci_dev->current_state != prev,
 				"PCI PM: Device state not saved by %pF\n",
 				drv->suspend_late);
-			return 0;
+			goto Fixup;
 		}
 	}
 
@@ -591,6 +591,9 @@ static int pci_legacy_suspend_late(struct device *dev, pm_message_t state)
 
 	pci_pm_set_unknown_state(pci_dev);
 
+Fixup:
+	pci_fixup_device(pci_fixup_suspend_late, pci_dev);
+
 	return 0;
 }
 
@@ -734,7 +737,7 @@ static int pci_pm_suspend_noirq(struct device *dev)
 
 	if (!pm) {
 		pci_save_state(pci_dev);
-		return 0;
+		goto Fixup;
 	}
 
 	if (pm->suspend_noirq) {
@@ -751,7 +754,7 @@ static int pci_pm_suspend_noirq(struct device *dev)
 			WARN_ONCE(pci_dev->current_state != prev,
 				"PCI PM: State of device not saved by %pF\n",
 				pm->suspend_noirq);
-			return 0;
+			goto Fixup;
 		}
 	}
 
@@ -775,6 +778,9 @@ static int pci_pm_suspend_noirq(struct device *dev)
 	if (pci_dev->class == PCI_CLASS_SERIAL_USB_EHCI)
 		pci_write_config_word(pci_dev, PCI_COMMAND, 0);
 
+Fixup:
+	pci_fixup_device(pci_fixup_suspend_late, pci_dev);
+
 	return 0;
 }
 
@@ -999,8 +1005,10 @@ static int pci_pm_poweroff_noirq(struct device *dev)
 	if (pci_has_legacy_pm_support(to_pci_dev(dev)))
 		return pci_legacy_suspend_late(dev, PMSG_HIBERNATE);
 
-	if (!drv || !drv->pm)
+	if (!drv || !drv->pm) {
+		pci_fixup_device(pci_fixup_suspend_late, pci_dev);
 		return 0;
+	}
 
 	if (drv->pm->poweroff_noirq) {
 		int error;
@@ -1021,6 +1029,8 @@ static int pci_pm_poweroff_noirq(struct device *dev)
 	if (pci_dev->class == PCI_CLASS_SERIAL_USB_EHCI)
 		pci_write_config_word(pci_dev, PCI_COMMAND, 0);
 
+	pci_fixup_device(pci_fixup_suspend_late, pci_dev);
+
 	if (pcibios_pm_ops.poweroff_noirq)
 		return pcibios_pm_ops.poweroff_noirq(dev);
 
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index d0f69269eb6c..03266af20d5f 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3018,6 +3018,8 @@ extern struct pci_fixup __start_pci_fixups_resume_early[];
 extern struct pci_fixup __end_pci_fixups_resume_early[];
 extern struct pci_fixup __start_pci_fixups_suspend[];
 extern struct pci_fixup __end_pci_fixups_suspend[];
+extern struct pci_fixup __start_pci_fixups_suspend_late[];
+extern struct pci_fixup __end_pci_fixups_suspend_late[];
 
 static bool pci_apply_fixup_final_quirks;
 
@@ -3063,6 +3065,11 @@ void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev)
 		end = __end_pci_fixups_suspend;
 		break;
 
+	case pci_fixup_suspend_late:
+		start = __start_pci_fixups_suspend_late;
+		end = __end_pci_fixups_suspend_late;
+		break;
+
 	default:
 		/* stupid compiler warning, you would think with an enum... */
 		return;
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 471ba48c7ae4..47cd98656f9d 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -268,6 +268,9 @@
 		VMLINUX_SYMBOL(__start_pci_fixups_suspend) = .;		\
 		*(.pci_fixup_suspend)					\
 		VMLINUX_SYMBOL(__end_pci_fixups_suspend) = .;		\
+		VMLINUX_SYMBOL(__start_pci_fixups_suspend_late) = .;	\
+		*(.pci_fixup_suspend_late)				\
+		VMLINUX_SYMBOL(__end_pci_fixups_suspend_late) = .;	\
 	}								\
 									\
 	/* Built-in firmware blobs */					\
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 466bcd111d85..295d3a9d8ffe 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1477,8 +1477,9 @@ enum pci_fixup_pass {
 	pci_fixup_final,	/* Final phase of device fixups */
 	pci_fixup_enable,	/* pci_enable_device() time */
 	pci_fixup_resume,	/* pci_device_resume() */
-	pci_fixup_suspend,	/* pci_device_suspend */
+	pci_fixup_suspend,	/* pci_device_suspend() */
 	pci_fixup_resume_early, /* pci_device_resume_early() */
+	pci_fixup_suspend_late,	/* pci_device_suspend_late() */
 };
 
 /* Anonymous variables would be nice... */
@@ -1519,6 +1520,11 @@ enum pci_fixup_pass {
 	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_suspend,			\
 		suspend##hook, vendor, device, class,	\
 		class_shift, hook)
+#define DECLARE_PCI_FIXUP_CLASS_SUSPEND_LATE(vendor, device, class,	\
+					 class_shift, hook)		\
+	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_suspend_late,		\
+		suspend_late##hook, vendor, device,	\
+		class, class_shift, hook)
 
 #define DECLARE_PCI_FIXUP_EARLY(vendor, device, hook)			\
 	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_early,			\
@@ -1544,6 +1550,10 @@ enum pci_fixup_pass {
 	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_suspend,			\
 		suspend##hook, vendor, device,		\
 		PCI_ANY_ID, 0, hook)
+#define DECLARE_PCI_FIXUP_SUSPEND_LATE(vendor, device, hook)		\
+	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_suspend_late,		\
+		suspend_late##hook, vendor, device,	\
+		PCI_ANY_ID, 0, hook)
 
 #ifdef CONFIG_PCI_QUIRKS
 void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev);
-- 
cgit v1.2.3-59-g8ed1b


From c9d53c0f2d23c792e4b9cf1551b63de4516f839e Mon Sep 17 00:00:00 2001
From: Jingoo Han <jg1.han@samsung.com>
Date: Wed, 11 Jun 2014 14:00:05 +0900
Subject: devres: remove devm_request_and_ioremap()

devm_request_and_ioremap() was obsoleted by the commit 7509657
("lib: devres: Introduce devm_ioremap_resource()") and has been
deprecated for a long time. So, let's remove this function.
In addition, all usages of devm_request_and_ioremap() are also
removed.

Signed-off-by: Jingoo Han <jg1.han@samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/driver-model/devres.txt              |  1 -
 drivers/bus/brcmstb_gisb.c                         |  6 +-
 drivers/gpu/drm/armada/armada_crtc.c               |  8 +-
 include/linux/device.h                             |  2 -
 lib/devres.c                                       | 28 -------
 scripts/coccinelle/api/devm_ioremap_resource.cocci | 90 ----------------------
 6 files changed, 6 insertions(+), 129 deletions(-)
 delete mode 100644 scripts/coccinelle/api/devm_ioremap_resource.cocci

(limited to 'include/linux')

diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt
index 1525e30483fd..de6bc8c325e9 100644
--- a/Documentation/driver-model/devres.txt
+++ b/Documentation/driver-model/devres.txt
@@ -278,7 +278,6 @@ IOMAP
   devm_ioremap_nocache()
   devm_iounmap()
   devm_ioremap_resource() : checks resource, requests memory region, ioremaps
-  devm_request_and_ioremap() : obsoleted by devm_ioremap_resource()
   pcim_iomap()
   pcim_iounmap()
   pcim_iomap_table()	: array of mapped addresses indexed by BAR
diff --git a/drivers/bus/brcmstb_gisb.c b/drivers/bus/brcmstb_gisb.c
index 6159b7752a64..f2cd6a2d40b4 100644
--- a/drivers/bus/brcmstb_gisb.c
+++ b/drivers/bus/brcmstb_gisb.c
@@ -212,9 +212,9 @@ static int brcmstb_gisb_arb_probe(struct platform_device *pdev)
 	mutex_init(&gdev->lock);
 	INIT_LIST_HEAD(&gdev->next);
 
-	gdev->base = devm_request_and_ioremap(&pdev->dev, r);
-	if (!gdev->base)
-		return -ENOMEM;
+	gdev->base = devm_ioremap_resource(&pdev->dev, r);
+	if (IS_ERR(gdev->base))
+		return PTR_ERR(gdev->base);
 
 	err = devm_request_irq(&pdev->dev, timeout_irq,
 				brcmstb_gisb_timeout_handler, 0, pdev->name,
diff --git a/drivers/gpu/drm/armada/armada_crtc.c b/drivers/gpu/drm/armada/armada_crtc.c
index 81c34f949dfc..3aedf9e993e6 100644
--- a/drivers/gpu/drm/armada/armada_crtc.c
+++ b/drivers/gpu/drm/armada/armada_crtc.c
@@ -1039,11 +1039,9 @@ int armada_drm_crtc_create(struct drm_device *dev, unsigned num,
 	if (ret)
 		return ret;
 
-	base = devm_request_and_ioremap(dev->dev, res);
-	if (!base) {
-		DRM_ERROR("failed to ioremap register\n");
-		return -ENOMEM;
-	}
+	base = devm_ioremap_resource(dev->dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
 
 	dcrtc = kzalloc(sizeof(*dcrtc), GFP_KERNEL);
 	if (!dcrtc) {
diff --git a/include/linux/device.h b/include/linux/device.h
index af424acd393d..921fa0a74df6 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -631,8 +631,6 @@ extern unsigned long devm_get_free_pages(struct device *dev,
 extern void devm_free_pages(struct device *dev, unsigned long addr);
 
 void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res);
-void __iomem *devm_request_and_ioremap(struct device *dev,
-			struct resource *res);
 
 /* allows to add/remove a custom action to devres stack */
 int devm_add_action(struct device *dev, void (*action)(void *), void *data);
diff --git a/lib/devres.c b/lib/devres.c
index f562bf6ff71d..6a4aee8a3a7e 100644
--- a/lib/devres.c
+++ b/lib/devres.c
@@ -142,34 +142,6 @@ void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res)
 }
 EXPORT_SYMBOL(devm_ioremap_resource);
 
-/**
- * devm_request_and_ioremap() - Check, request region, and ioremap resource
- * @dev: Generic device to handle the resource for
- * @res: resource to be handled
- *
- * Takes all necessary steps to ioremap a mem resource. Uses managed device, so
- * everything is undone on driver detach. Checks arguments, so you can feed
- * it the result from e.g. platform_get_resource() directly. Returns the
- * remapped pointer or NULL on error. Usage example:
- *
- *	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- *	base = devm_request_and_ioremap(&pdev->dev, res);
- *	if (!base)
- *		return -EADDRNOTAVAIL;
- */
-void __iomem *devm_request_and_ioremap(struct device *dev,
-				       struct resource *res)
-{
-	void __iomem *dest_ptr;
-
-	dest_ptr = devm_ioremap_resource(dev, res);
-	if (IS_ERR(dest_ptr))
-		return NULL;
-
-	return dest_ptr;
-}
-EXPORT_SYMBOL(devm_request_and_ioremap);
-
 #ifdef CONFIG_HAS_IOPORT_MAP
 /*
  * Generic iomap devres
diff --git a/scripts/coccinelle/api/devm_ioremap_resource.cocci b/scripts/coccinelle/api/devm_ioremap_resource.cocci
deleted file mode 100644
index 495daa3dbf77..000000000000
--- a/scripts/coccinelle/api/devm_ioremap_resource.cocci
+++ /dev/null
@@ -1,90 +0,0 @@
-virtual patch
-virtual report
-
-@depends on patch@
-expression base, dev, res;
-@@
-
--base = devm_request_and_ioremap(dev, res);
-+base = devm_ioremap_resource(dev, res);
- ...
- if (
--base == NULL
-+IS_ERR(base)
- || ...) {
-<...
--	return ...;
-+	return PTR_ERR(base);
-...>
- }
-
-@depends on patch@
-expression e, E, ret;
-identifier l;
-@@
-
- e = devm_ioremap_resource(...);
- ...
- if (IS_ERR(e) || ...) {
- 	... when any
--	ret = E;
-+	ret = PTR_ERR(e);
- 	...
-(
- 	return ret;
-|
- 	goto l;
-)
- }
-
-@depends on patch@
-expression e;
-@@
-
- e = devm_ioremap_resource(...);
- ...
- if (IS_ERR(e) || ...) {
- 	...
--	\(dev_dbg\|dev_err\|pr_debug\|pr_err\|DRM_ERROR\)(...);
- 	...
- }
-
-@depends on patch@
-expression e;
-identifier l;
-@@
-
- e = devm_ioremap_resource(...);
- ...
- if (IS_ERR(e) || ...)
--{
-(
- 	return ...;
-|
- 	goto l;
-)
--}
-
-@r depends on report@
-expression e;
-identifier l;
-position p1;
-@@
-
-*e = devm_request_and_ioremap@p1(...);
- ...
- if (e == NULL || ...) {
- 	...
-(
- 	return ...;
-|
- 	goto l;
-)
- }
-
-@script:python depends on r@
-p1 << r.p1;
-@@
-
-msg = "ERROR: deprecated devm_request_and_ioremap() API used on line %s" % (p1[0].line)
-coccilib.report.print_report(p1[0], msg)
-- 
cgit v1.2.3-59-g8ed1b


From 3a494e710367c0a233d86bcde9853781859fc008 Mon Sep 17 00:00:00 2001
From: Haiyang Zhang <haiyangz@microsoft.com>
Date: Thu, 19 Jun 2014 18:34:36 -0700
Subject: hyperv: Add handler for RNDIS_STATUS_NETWORK_CHANGE event

The RNDIS_STATUS_NETWORK_CHANGE event is received after the Hyper-V host
sleep or hibernation. We refresh network at this time.
MS-TFS: 135162

Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Reviewed-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hyperv/hyperv_net.h   |  3 ++-
 drivers/net/hyperv/netvsc_drv.c   | 30 ++++++++++++++++++++++++++----
 drivers/net/hyperv/rndis_filter.c | 21 +--------------------
 include/linux/rndis.h             |  1 +
 4 files changed, 30 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 6cc37c15e0bf..24441ae832d1 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -170,6 +170,7 @@ struct rndis_device {
 
 	enum rndis_device_state state;
 	bool link_state;
+	bool link_change;
 	atomic_t new_req_id;
 
 	spinlock_t request_lock;
@@ -185,7 +186,7 @@ int netvsc_device_remove(struct hv_device *device);
 int netvsc_send(struct hv_device *device,
 		struct hv_netvsc_packet *packet);
 void netvsc_linkstatus_callback(struct hv_device *device_obj,
-				unsigned int status);
+				struct rndis_message *resp);
 int netvsc_recv_callback(struct hv_device *device_obj,
 			struct hv_netvsc_packet *packet,
 			struct ndis_tcp_ip_checksum_info *csum_info);
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 4fd71b75e666..9b27ca8c1d39 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -579,8 +579,9 @@ drop:
  * netvsc_linkstatus_callback - Link up/down notification
  */
 void netvsc_linkstatus_callback(struct hv_device *device_obj,
-				       unsigned int status)
+				struct rndis_message *resp)
 {
+	struct rndis_indicate_status *indicate = &resp->msg.indicate_status;
 	struct net_device *net;
 	struct net_device_context *ndev_ctx;
 	struct netvsc_device *net_device;
@@ -589,7 +590,19 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj,
 	net_device = hv_get_drvdata(device_obj);
 	rdev = net_device->extension;
 
-	rdev->link_state = status != 1;
+	switch (indicate->status) {
+	case RNDIS_STATUS_MEDIA_CONNECT:
+		rdev->link_state = false;
+		break;
+	case RNDIS_STATUS_MEDIA_DISCONNECT:
+		rdev->link_state = true;
+		break;
+	case RNDIS_STATUS_NETWORK_CHANGE:
+		rdev->link_change = true;
+		break;
+	default:
+		return;
+	}
 
 	net = net_device->ndev;
 
@@ -597,7 +610,7 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj,
 		return;
 
 	ndev_ctx = netdev_priv(net);
-	if (status == 1) {
+	if (!rdev->link_state) {
 		schedule_delayed_work(&ndev_ctx->dwork, 0);
 		schedule_delayed_work(&ndev_ctx->dwork, msecs_to_jiffies(20));
 	} else {
@@ -767,7 +780,9 @@ static void netvsc_link_change(struct work_struct *w)
 	struct net_device *net;
 	struct netvsc_device *net_device;
 	struct rndis_device *rdev;
-	bool notify;
+	bool notify, refresh = false;
+	char *argv[] = { "/etc/init.d/network", "restart", NULL };
+	char *envp[] = { "HOME=/", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL };
 
 	rtnl_lock();
 
@@ -782,10 +797,17 @@ static void netvsc_link_change(struct work_struct *w)
 	} else {
 		netif_carrier_on(net);
 		notify = true;
+		if (rdev->link_change) {
+			rdev->link_change = false;
+			refresh = true;
+		}
 	}
 
 	rtnl_unlock();
 
+	if (refresh)
+		call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
+
 	if (notify)
 		netdev_notify_peers(net);
 }
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 99c527adae5b..2b86f0b6f6d1 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -320,25 +320,6 @@ static void rndis_filter_receive_response(struct rndis_device *dev,
 	}
 }
 
-static void rndis_filter_receive_indicate_status(struct rndis_device *dev,
-					     struct rndis_message *resp)
-{
-	struct rndis_indicate_status *indicate =
-			&resp->msg.indicate_status;
-
-	if (indicate->status == RNDIS_STATUS_MEDIA_CONNECT) {
-		netvsc_linkstatus_callback(
-			dev->net_dev->dev, 1);
-	} else if (indicate->status == RNDIS_STATUS_MEDIA_DISCONNECT) {
-		netvsc_linkstatus_callback(
-			dev->net_dev->dev, 0);
-	} else {
-		/*
-		 * TODO:
-		 */
-	}
-}
-
 /*
  * Get the Per-Packet-Info with the specified type
  * return NULL if not found.
@@ -464,7 +445,7 @@ int rndis_filter_receive(struct hv_device *dev,
 
 	case RNDIS_MSG_INDICATE:
 		/* notification msgs */
-		rndis_filter_receive_indicate_status(rndis_dev, rndis_msg);
+		netvsc_linkstatus_callback(dev, rndis_msg);
 		break;
 	default:
 		netdev_err(ndev,
diff --git a/include/linux/rndis.h b/include/linux/rndis.h
index 0c8dc7195cdb..93c0a64aefa6 100644
--- a/include/linux/rndis.h
+++ b/include/linux/rndis.h
@@ -65,6 +65,7 @@
 #define	RNDIS_STATUS_MEDIA_SPECIFIC_INDICATION	0x40010012
 #define RNDIS_STATUS_WW_INDICATION		RDIA_SPECIFIC_INDICATION
 #define RNDIS_STATUS_LINK_SPEED_CHANGE		0x40010013L
+#define RNDIS_STATUS_NETWORK_CHANGE		0x40010018
 
 #define RNDIS_STATUS_NOT_RESETTABLE		0x80010001
 #define RNDIS_STATUS_SOFT_ERRORS		0x80010003
-- 
cgit v1.2.3-59-g8ed1b


From c4027faf1dcfc325663464b3f97847358b172c0b Mon Sep 17 00:00:00 2001
From: Bo Shen <voice.shen@atmel.com>
Date: Wed, 11 Jun 2014 18:14:39 +0800
Subject: ASoC: atmel-ssc: distinguish whether SSC supports fslen ext

Add compatible string to distinguish whether SSC supports
frame sync length extension.

Signed-off-by: Bo Shen <voice.shen@atmel.com>
Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/misc/atmel-ssc.c  | 13 +++++++++++++
 include/linux/atmel-ssc.h |  1 +
 2 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/misc/atmel-ssc.c b/drivers/misc/atmel-ssc.c
index 22de13727641..60843a275abd 100644
--- a/drivers/misc/atmel-ssc.c
+++ b/drivers/misc/atmel-ssc.c
@@ -83,16 +83,26 @@ EXPORT_SYMBOL(ssc_free);
 
 static struct atmel_ssc_platform_data at91rm9200_config = {
 	.use_dma = 0,
+	.has_fslen_ext = 0,
+};
+
+static struct atmel_ssc_platform_data at91sam9rl_config = {
+	.use_dma = 0,
+	.has_fslen_ext = 1,
 };
 
 static struct atmel_ssc_platform_data at91sam9g45_config = {
 	.use_dma = 1,
+	.has_fslen_ext = 1,
 };
 
 static const struct platform_device_id atmel_ssc_devtypes[] = {
 	{
 		.name = "at91rm9200_ssc",
 		.driver_data = (unsigned long) &at91rm9200_config,
+	}, {
+		.name = "at91sam9rl_ssc",
+		.driver_data = (unsigned long) &at91sam9rl_config,
 	}, {
 		.name = "at91sam9g45_ssc",
 		.driver_data = (unsigned long) &at91sam9g45_config,
@@ -106,6 +116,9 @@ static const struct of_device_id atmel_ssc_dt_ids[] = {
 	{
 		.compatible = "atmel,at91rm9200-ssc",
 		.data = &at91rm9200_config,
+	}, {
+		.compatible = "atmel,at91sam9rl-ssc",
+		.data = &at91sam9rl_config,
 	}, {
 		.compatible = "atmel,at91sam9g45-ssc",
 		.data = &at91sam9g45_config,
diff --git a/include/linux/atmel-ssc.h b/include/linux/atmel-ssc.h
index 571a12ebb018..e8dd40873d55 100644
--- a/include/linux/atmel-ssc.h
+++ b/include/linux/atmel-ssc.h
@@ -7,6 +7,7 @@
 
 struct atmel_ssc_platform_data {
 	int			use_dma;
+	int			has_fslen_ext;
 };
 
 struct ssc_device {
-- 
cgit v1.2.3-59-g8ed1b


From dfaf535665faa4b5aba4b59633f6b724a467c96e Mon Sep 17 00:00:00 2001
From: Bo Shen <voice.shen@atmel.com>
Date: Wed, 11 Jun 2014 18:14:40 +0800
Subject: ASoC: atmel_ssc_dai: enable fslen extension feature

When SSC work as master, it will generate the frame sync signal.
On old SoCs, it only supports frame sync length less or equal to
16bits, on newer SoCs, it supports frame sync length extension,
which can support frame size larger than 16 bits.
So, add this to make it supports playback 24/32 bits audio clips.

Signed-off-by: Bo Shen <voice.shen@atmel.com>
Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 include/linux/atmel-ssc.h       | 12 ++++++++++++
 sound/soc/atmel/atmel_ssc_dai.c | 34 ++++++++++++++++++----------------
 2 files changed, 30 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/atmel-ssc.h b/include/linux/atmel-ssc.h
index e8dd40873d55..7c0f6549898b 100644
--- a/include/linux/atmel-ssc.h
+++ b/include/linux/atmel-ssc.h
@@ -72,6 +72,12 @@ void ssc_free(struct ssc_device *ssc);
 #define SSC_RFMR_DATNB_OFFSET			 8
 #define SSC_RFMR_FSEDGE_SIZE			 1
 #define SSC_RFMR_FSEDGE_OFFSET			24
+/*
+ * The FSLEN_EXT exist on at91sam9rl, at91sam9g10,
+ * at91sam9g20, and at91sam9g45 and newer SoCs
+ */
+#define SSC_RFMR_FSLEN_EXT_SIZE			 4
+#define SSC_RFMR_FSLEN_EXT_OFFSET		28
 #define SSC_RFMR_FSLEN_SIZE			 4
 #define SSC_RFMR_FSLEN_OFFSET			16
 #define SSC_RFMR_FSOS_SIZE			 4
@@ -110,6 +116,12 @@ void ssc_free(struct ssc_device *ssc);
 #define SSC_TFMR_FSDEN_OFFSET			23
 #define SSC_TFMR_FSEDGE_SIZE			 1
 #define SSC_TFMR_FSEDGE_OFFSET			24
+/*
+ * The FSLEN_EXT exist on at91sam9rl, at91sam9g10,
+ * at91sam9g20, and at91sam9g45 and newer SoCs
+ */
+#define SSC_TFMR_FSLEN_EXT_SIZE			 4
+#define SSC_TFMR_FSLEN_EXT_OFFSET		28
 #define SSC_TFMR_FSLEN_SIZE			 4
 #define SSC_TFMR_FSLEN_OFFSET			16
 #define SSC_TFMR_FSOS_SIZE			 3
diff --git a/sound/soc/atmel/atmel_ssc_dai.c b/sound/soc/atmel/atmel_ssc_dai.c
index de433cfd044c..f403f399808a 100644
--- a/sound/soc/atmel/atmel_ssc_dai.c
+++ b/sound/soc/atmel/atmel_ssc_dai.c
@@ -347,6 +347,7 @@ static int atmel_ssc_hw_params(struct snd_pcm_substream *substream,
 	u32 tfmr, rfmr, tcmr, rcmr;
 	int start_event;
 	int ret;
+	int fslen, fslen_ext;
 
 	/*
 	 * Currently, there is only one set of dma params for
@@ -387,18 +388,6 @@ static int atmel_ssc_hw_params(struct snd_pcm_substream *substream,
 		return -EINVAL;
 	}
 
-	/*
-	 * The SSC only supports up to 16-bit samples in I2S format, due
-	 * to the size of the Frame Mode Register FSLEN field.
-	 */
-	if ((ssc_p->daifmt & SND_SOC_DAIFMT_FORMAT_MASK) == SND_SOC_DAIFMT_I2S
-		&& bits > 16) {
-		printk(KERN_WARNING
-				"atmel_ssc_dai: sample size %d "
-				"is too large for I2S\n", bits);
-		return -EINVAL;
-	}
-
 	/*
 	 * Compute SSC register settings.
 	 */
@@ -413,6 +402,17 @@ static int atmel_ssc_hw_params(struct snd_pcm_substream *substream,
 		 * from the MCK divider, and the BCLK signal
 		 * is output on the SSC TK line.
 		 */
+
+		if (bits > 16 && !ssc->pdata->has_fslen_ext) {
+			dev_err(dai->dev,
+				"sample size %d is too large for SSC device\n",
+				bits);
+			return -EINVAL;
+		}
+
+		fslen_ext = (bits - 1) / 16;
+		fslen = (bits - 1) % 16;
+
 		rcmr =	  SSC_BF(RCMR_PERIOD, ssc_p->rcmr_period)
 			| SSC_BF(RCMR_STTDLY, START_DELAY)
 			| SSC_BF(RCMR_START, SSC_START_FALLING_RF)
@@ -420,9 +420,10 @@ static int atmel_ssc_hw_params(struct snd_pcm_substream *substream,
 			| SSC_BF(RCMR_CKO, SSC_CKO_NONE)
 			| SSC_BF(RCMR_CKS, SSC_CKS_DIV);
 
-		rfmr =	  SSC_BF(RFMR_FSEDGE, SSC_FSEDGE_POSITIVE)
+		rfmr =    SSC_BF(RFMR_FSLEN_EXT, fslen_ext)
+			| SSC_BF(RFMR_FSEDGE, SSC_FSEDGE_POSITIVE)
 			| SSC_BF(RFMR_FSOS, SSC_FSOS_NEGATIVE)
-			| SSC_BF(RFMR_FSLEN, (bits - 1))
+			| SSC_BF(RFMR_FSLEN, fslen)
 			| SSC_BF(RFMR_DATNB, (channels - 1))
 			| SSC_BIT(RFMR_MSBF)
 			| SSC_BF(RFMR_LOOP, 0)
@@ -435,10 +436,11 @@ static int atmel_ssc_hw_params(struct snd_pcm_substream *substream,
 			| SSC_BF(TCMR_CKO, SSC_CKO_CONTINUOUS)
 			| SSC_BF(TCMR_CKS, SSC_CKS_DIV);
 
-		tfmr =	  SSC_BF(TFMR_FSEDGE, SSC_FSEDGE_POSITIVE)
+		tfmr =    SSC_BF(TFMR_FSLEN_EXT, fslen_ext)
+			| SSC_BF(TFMR_FSEDGE, SSC_FSEDGE_POSITIVE)
 			| SSC_BF(TFMR_FSDEN, 0)
 			| SSC_BF(TFMR_FSOS, SSC_FSOS_NEGATIVE)
-			| SSC_BF(TFMR_FSLEN, (bits - 1))
+			| SSC_BF(TFMR_FSLEN, fslen)
 			| SSC_BF(TFMR_DATNB, (channels - 1))
 			| SSC_BIT(TFMR_MSBF)
 			| SSC_BF(TFMR_DATDEF, 0)
-- 
cgit v1.2.3-59-g8ed1b


From c051b21f71d1ffdfd7ad406a1ef5ede5e5f974c5 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 22 May 2014 03:25:50 +0000
Subject: rtmutex: Confine deadlock logic to futex

The deadlock logic is only required for futexes.

Remove the extra arguments for the public functions and also for the
futex specific ones which get always called with deadlock detection
enabled.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/rtmutex.h         |  6 ++---
 kernel/futex.c                  | 10 +++----
 kernel/locking/rtmutex.c        | 59 +++++++++++++++++++++--------------------
 kernel/locking/rtmutex_common.h |  7 +++--
 4 files changed, 40 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h
index 3aed8d737e1a..1abba5ce2a2f 100644
--- a/include/linux/rtmutex.h
+++ b/include/linux/rtmutex.h
@@ -90,11 +90,9 @@ extern void __rt_mutex_init(struct rt_mutex *lock, const char *name);
 extern void rt_mutex_destroy(struct rt_mutex *lock);
 
 extern void rt_mutex_lock(struct rt_mutex *lock);
-extern int rt_mutex_lock_interruptible(struct rt_mutex *lock,
-						int detect_deadlock);
+extern int rt_mutex_lock_interruptible(struct rt_mutex *lock);
 extern int rt_mutex_timed_lock(struct rt_mutex *lock,
-					struct hrtimer_sleeper *timeout,
-					int detect_deadlock);
+			       struct hrtimer_sleeper *timeout);
 
 extern int rt_mutex_trylock(struct rt_mutex *lock);
 
diff --git a/kernel/futex.c b/kernel/futex.c
index b632b5f3f094..e5c6c404be1d 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1718,7 +1718,7 @@ retry_private:
 			this->pi_state = pi_state;
 			ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
 							this->rt_waiter,
-							this->task, 1);
+							this->task);
 			if (ret == 1) {
 				/* We got the lock. */
 				requeue_pi_wake_futex(this, &key2, hb2);
@@ -2337,9 +2337,9 @@ retry_private:
 	/*
 	 * Block on the PI mutex:
 	 */
-	if (!trylock)
-		ret = rt_mutex_timed_lock(&q.pi_state->pi_mutex, to, 1);
-	else {
+	if (!trylock) {
+		ret = rt_mutex_timed_futex_lock(&q.pi_state->pi_mutex, to);
+	} else {
 		ret = rt_mutex_trylock(&q.pi_state->pi_mutex);
 		/* Fixup the trylock return value: */
 		ret = ret ? 0 : -EWOULDBLOCK;
@@ -2669,7 +2669,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
 		 */
 		WARN_ON(!q.pi_state);
 		pi_mutex = &q.pi_state->pi_mutex;
-		ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1);
+		ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter);
 		debug_rt_mutex_free_waiter(&rt_waiter);
 
 		spin_lock(q.lock_ptr);
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 1e8fdabb19de..32906482edd1 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1228,16 +1228,15 @@ rt_mutex_slowunlock(struct rt_mutex *lock)
  */
 static inline int
 rt_mutex_fastlock(struct rt_mutex *lock, int state,
-		  int detect_deadlock,
 		  int (*slowfn)(struct rt_mutex *lock, int state,
 				struct hrtimer_sleeper *timeout,
 				int detect_deadlock))
 {
-	if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) {
+	if (likely(rt_mutex_cmpxchg(lock, NULL, current))) {
 		rt_mutex_deadlock_account_lock(lock, current);
 		return 0;
 	} else
-		return slowfn(lock, state, NULL, detect_deadlock);
+		return slowfn(lock, state, NULL, 0);
 }
 
 static inline int
@@ -1284,54 +1283,59 @@ void __sched rt_mutex_lock(struct rt_mutex *lock)
 {
 	might_sleep();
 
-	rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, 0, rt_mutex_slowlock);
+	rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock);
 }
 EXPORT_SYMBOL_GPL(rt_mutex_lock);
 
 /**
  * rt_mutex_lock_interruptible - lock a rt_mutex interruptible
  *
- * @lock: 		the rt_mutex to be locked
- * @detect_deadlock:	deadlock detection on/off
+ * @lock:		the rt_mutex to be locked
  *
  * Returns:
- *  0 		on success
- * -EINTR 	when interrupted by a signal
- * -EDEADLK	when the lock would deadlock (when deadlock detection is on)
+ *  0		on success
+ * -EINTR	when interrupted by a signal
  */
-int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock,
-						 int detect_deadlock)
+int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock)
 {
 	might_sleep();
 
-	return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE,
-				 detect_deadlock, rt_mutex_slowlock);
+	return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock);
 }
 EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
 
+/*
+ * Futex variant with full deadlock detection.
+ */
+int rt_mutex_timed_futex_lock(struct rt_mutex *lock,
+			      struct hrtimer_sleeper *timeout)
+{
+	might_sleep();
+
+	return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, 1,
+				       rt_mutex_slowlock);
+}
+
 /**
  * rt_mutex_timed_lock - lock a rt_mutex interruptible
  *			the timeout structure is provided
  *			by the caller
  *
- * @lock: 		the rt_mutex to be locked
+ * @lock:		the rt_mutex to be locked
  * @timeout:		timeout structure or NULL (no timeout)
- * @detect_deadlock:	deadlock detection on/off
  *
  * Returns:
- *  0 		on success
- * -EINTR 	when interrupted by a signal
+ *  0		on success
+ * -EINTR	when interrupted by a signal
  * -ETIMEDOUT	when the timeout expired
- * -EDEADLK	when the lock would deadlock (when deadlock detection is on)
  */
 int
-rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout,
-		    int detect_deadlock)
+rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout)
 {
 	might_sleep();
 
-	return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
-				       detect_deadlock, rt_mutex_slowlock);
+	return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, 0,
+				       rt_mutex_slowlock);
 }
 EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
 
@@ -1437,7 +1441,6 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock,
  * @lock:		the rt_mutex to take
  * @waiter:		the pre-initialized rt_mutex_waiter
  * @task:		the task to prepare
- * @detect_deadlock:	perform deadlock detection (1) or not (0)
  *
  * Returns:
  *  0 - task blocked on lock
@@ -1448,7 +1451,7 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock,
  */
 int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
 			      struct rt_mutex_waiter *waiter,
-			      struct task_struct *task, int detect_deadlock)
+			      struct task_struct *task)
 {
 	int ret;
 
@@ -1506,22 +1509,20 @@ struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock)
  * rt_mutex_finish_proxy_lock() - Complete lock acquisition
  * @lock:		the rt_mutex we were woken on
  * @to:			the timeout, null if none. hrtimer should already have
- * 			been started.
+ *			been started.
  * @waiter:		the pre-initialized rt_mutex_waiter
- * @detect_deadlock:	perform deadlock detection (1) or not (0)
  *
  * Complete the lock acquisition started our behalf by another thread.
  *
  * Returns:
  *  0 - success
- * <0 - error, one of -EINTR, -ETIMEDOUT, or -EDEADLK
+ * <0 - error, one of -EINTR, -ETIMEDOUT
  *
  * Special API call for PI-futex requeue support
  */
 int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
 			       struct hrtimer_sleeper *to,
-			       struct rt_mutex_waiter *waiter,
-			       int detect_deadlock)
+			       struct rt_mutex_waiter *waiter)
 {
 	int ret;
 
diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
index 7431a9c86f35..cd3ec209d0c8 100644
--- a/kernel/locking/rtmutex_common.h
+++ b/kernel/locking/rtmutex_common.h
@@ -111,12 +111,11 @@ extern void rt_mutex_proxy_unlock(struct rt_mutex *lock,
 				  struct task_struct *proxy_owner);
 extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
 				     struct rt_mutex_waiter *waiter,
-				     struct task_struct *task,
-				     int detect_deadlock);
+				     struct task_struct *task);
 extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
 				      struct hrtimer_sleeper *to,
-				      struct rt_mutex_waiter *waiter,
-				      int detect_deadlock);
+				      struct rt_mutex_waiter *waiter);
+extern int rt_mutex_timed_futex_lock(struct rt_mutex *l, struct hrtimer_sleeper *to);
 
 #ifdef CONFIG_DEBUG_RT_MUTEXES
 # include "rtmutex-debug.h"
-- 
cgit v1.2.3-59-g8ed1b


From 43a775916d63d1c822107b39987192ca5ced445c Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Mon, 9 Jun 2014 16:20:05 +0800
Subject: genirq: Export irq_domain_disassociate() to architecture interrupt
 drivers

Export irq_domain_disassociate() to architecture interrupt drivers,
so it could be used to handle legacy IRQ descriptors on x86.

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Link: http://lkml.kernel.org/r/1402302011-23642-37-git-send-email-jiang.liu@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irqdomain.h | 2 ++
 kernel/irq/irqdomain.c    | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index c983ed18c332..b0f9d16e48f6 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -172,6 +172,8 @@ extern int irq_domain_associate(struct irq_domain *domain, unsigned int irq,
 extern void irq_domain_associate_many(struct irq_domain *domain,
 				      unsigned int irq_base,
 				      irq_hw_number_t hwirq_base, int count);
+extern void irq_domain_disassociate(struct irq_domain *domain,
+				    unsigned int irq);
 
 extern unsigned int irq_create_mapping(struct irq_domain *host,
 				       irq_hw_number_t hwirq);
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index eb5e10e32e05..6534ff6ce02e 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -231,7 +231,7 @@ void irq_set_default_host(struct irq_domain *domain)
 }
 EXPORT_SYMBOL_GPL(irq_set_default_host);
 
-static void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq)
+void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq)
 {
 	struct irq_data *irq_data = irq_get_irq_data(irq);
 	irq_hw_number_t hwirq;
-- 
cgit v1.2.3-59-g8ed1b


From 03187c72db60e20354aca6802bc5cc3e42c1d6e1 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Sun, 8 Jun 2014 22:47:10 +0800
Subject: regulator: ab8500: Remove ab8500_regulator_debug_init/exit()

CONFIG_REGULATOR_AB8500_DEBUG is always not defined.
ab8500_regulator_debug_init() is not called at all now,
ab8500_regulator_debug_exit() simply return 0, thus remove them.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/regulator/ab8500.c       | 13 -------------
 include/linux/regulator/ab8500.h | 14 --------------
 2 files changed, 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/ab8500.c b/drivers/regulator/ab8500.c
index 88da8626a57a..1fda14e12ea8 100644
--- a/drivers/regulator/ab8500.c
+++ b/drivers/regulator/ab8500.c
@@ -3071,21 +3071,8 @@ static int ab8500_regulator_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int ab8500_regulator_remove(struct platform_device *pdev)
-{
-	int err;
-
-	/* remove regulator debug */
-	err = ab8500_regulator_debug_exit(pdev);
-	if (err)
-		return err;
-
-	return 0;
-}
-
 static struct platform_driver ab8500_regulator_driver = {
 	.probe = ab8500_regulator_probe,
-	.remove = ab8500_regulator_remove,
 	.driver         = {
 		.name   = "ab8500-regulator",
 		.owner  = THIS_MODULE,
diff --git a/include/linux/regulator/ab8500.h b/include/linux/regulator/ab8500.h
index 75307447cef9..d8ecefaf63ca 100644
--- a/include/linux/regulator/ab8500.h
+++ b/include/linux/regulator/ab8500.h
@@ -322,18 +322,4 @@ struct ab8500_regulator_platform_data {
 	struct regulator_init_data *ext_regulator;
 };
 
-#ifdef CONFIG_REGULATOR_AB8500_DEBUG
-int ab8500_regulator_debug_init(struct platform_device *pdev);
-int ab8500_regulator_debug_exit(struct platform_device *pdev);
-#else
-static inline int ab8500_regulator_debug_init(struct platform_device *pdev)
-{
-	return 0;
-}
-static inline int ab8500_regulator_debug_exit(struct platform_device *pdev)
-{
-	return 0;
-}
-#endif
-
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 0da6bc8cc3417a5e452efb886ff2c61e72b743d6 Mon Sep 17 00:00:00 2001
From: Varka Bhadram <varkab@cdac.in>
Date: Fri, 20 Jun 2014 17:47:15 +0530
Subject: ieee802154: cc2520: adds driver for TI CC2520 radio

This patch adds the driver support for the cc2520 radio.

Driver support:
	- Tx and Rx of IEEE-802.15.4 packets
	- Energy Detection on channel
	- Setting the Channel for the radio. [b/w 11 - 26 channels]
	- Start and Stop the radio
	- h/w address filtering

Signed-off-by: Varka Bhadram <varkab@cdac.in>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ieee802154/cc2520.c | 1039 +++++++++++++++++++++++++++++++++++++++
 include/linux/spi/cc2520.h      |   26 +
 2 files changed, 1065 insertions(+)
 create mode 100644 drivers/net/ieee802154/cc2520.c
 create mode 100644 include/linux/spi/cc2520.h

(limited to 'include/linux')

diff --git a/drivers/net/ieee802154/cc2520.c b/drivers/net/ieee802154/cc2520.c
new file mode 100644
index 000000000000..8a5ac7ab2300
--- /dev/null
+++ b/drivers/net/ieee802154/cc2520.c
@@ -0,0 +1,1039 @@
+/* Driver for TI CC2520 802.15.4 Wireless-PAN Networking controller
+ *
+ * Copyright (C) 2014 Varka Bhadram <varkab@cdac.in>
+ *		      Md.Jamal Mohiuddin <mjmohiuddin@cdac.in>
+ *		      P Sowjanya <sowjanyap@cdac.in>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/gpio.h>
+#include <linux/delay.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/cc2520.h>
+#include <linux/workqueue.h>
+#include <linux/interrupt.h>
+#include <linux/skbuff.h>
+#include <linux/pinctrl/consumer.h>
+#include <linux/of_gpio.h>
+
+#include <net/mac802154.h>
+#include <net/wpan-phy.h>
+#include <net/ieee802154.h>
+
+#define	SPI_COMMAND_BUFFER	3
+#define	HIGH			1
+#define	LOW			0
+#define	STATE_IDLE		0
+#define	RSSI_VALID		0
+#define	RSSI_OFFSET		78
+
+#define	CC2520_RAM_SIZE		640
+#define	CC2520_FIFO_SIZE	128
+
+#define	CC2520RAM_TXFIFO	0x100
+#define	CC2520RAM_RXFIFO	0x180
+#define	CC2520RAM_IEEEADDR	0x3EA
+#define	CC2520RAM_PANID		0x3F2
+#define	CC2520RAM_SHORTADDR	0x3F4
+
+#define	CC2520_FREG_MASK	0x3F
+
+/* status byte values */
+#define	CC2520_STATUS_XOSC32M_STABLE	(1 << 7)
+#define	CC2520_STATUS_RSSI_VALID	(1 << 6)
+#define	CC2520_STATUS_TX_UNDERFLOW	(1 << 3)
+
+/* IEEE-802.15.4 defined constants (2.4 GHz logical channels) */
+#define	CC2520_MINCHANNEL		11
+#define	CC2520_MAXCHANNEL		26
+#define	CC2520_CHANNEL_SPACING		5
+
+/* command strobes */
+#define	CC2520_CMD_SNOP			0x00
+#define	CC2520_CMD_IBUFLD		0x02
+#define	CC2520_CMD_SIBUFEX		0x03
+#define	CC2520_CMD_SSAMPLECCA		0x04
+#define	CC2520_CMD_SRES			0x0f
+#define	CC2520_CMD_MEMORY_MASK		0x0f
+#define	CC2520_CMD_MEMORY_READ		0x10
+#define	CC2520_CMD_MEMORY_WRITE		0x20
+#define	CC2520_CMD_RXBUF		0x30
+#define	CC2520_CMD_RXBUFCP		0x38
+#define	CC2520_CMD_RXBUFMOV		0x32
+#define	CC2520_CMD_TXBUF		0x3A
+#define	CC2520_CMD_TXBUFCP		0x3E
+#define	CC2520_CMD_RANDOM		0x3C
+#define	CC2520_CMD_SXOSCON		0x40
+#define	CC2520_CMD_STXCAL		0x41
+#define	CC2520_CMD_SRXON		0x42
+#define	CC2520_CMD_STXON		0x43
+#define	CC2520_CMD_STXONCCA		0x44
+#define	CC2520_CMD_SRFOFF		0x45
+#define	CC2520_CMD_SXOSCOFF		0x46
+#define	CC2520_CMD_SFLUSHRX		0x47
+#define	CC2520_CMD_SFLUSHTX		0x48
+#define	CC2520_CMD_SACK			0x49
+#define	CC2520_CMD_SACKPEND		0x4A
+#define	CC2520_CMD_SNACK		0x4B
+#define	CC2520_CMD_SRXMASKBITSET	0x4C
+#define	CC2520_CMD_SRXMASKBITCLR	0x4D
+#define	CC2520_CMD_RXMASKAND		0x4E
+#define	CC2520_CMD_RXMASKOR		0x4F
+#define	CC2520_CMD_MEMCP		0x50
+#define	CC2520_CMD_MEMCPR		0x52
+#define	CC2520_CMD_MEMXCP		0x54
+#define	CC2520_CMD_MEMXWR		0x56
+#define	CC2520_CMD_BCLR			0x58
+#define	CC2520_CMD_BSET			0x59
+#define	CC2520_CMD_CTR_UCTR		0x60
+#define	CC2520_CMD_CBCMAC		0x64
+#define	CC2520_CMD_UCBCMAC		0x66
+#define	CC2520_CMD_CCM			0x68
+#define	CC2520_CMD_UCCM			0x6A
+#define	CC2520_CMD_ECB			0x70
+#define	CC2520_CMD_ECBO			0x72
+#define	CC2520_CMD_ECBX			0x74
+#define	CC2520_CMD_INC			0x78
+#define	CC2520_CMD_ABORT		0x7F
+#define	CC2520_CMD_REGISTER_READ	0x80
+#define	CC2520_CMD_REGISTER_WRITE	0xC0
+
+/* status registers */
+#define	CC2520_CHIPID			0x40
+#define	CC2520_VERSION			0x42
+#define	CC2520_EXTCLOCK			0x44
+#define	CC2520_MDMCTRL0			0x46
+#define	CC2520_MDMCTRL1			0x47
+#define	CC2520_FREQEST			0x48
+#define	CC2520_RXCTRL			0x4A
+#define	CC2520_FSCTRL			0x4C
+#define	CC2520_FSCAL0			0x4E
+#define	CC2520_FSCAL1			0x4F
+#define	CC2520_FSCAL2			0x50
+#define	CC2520_FSCAL3			0x51
+#define	CC2520_AGCCTRL0			0x52
+#define	CC2520_AGCCTRL1			0x53
+#define	CC2520_AGCCTRL2			0x54
+#define	CC2520_AGCCTRL3			0x55
+#define	CC2520_ADCTEST0			0x56
+#define	CC2520_ADCTEST1			0x57
+#define	CC2520_ADCTEST2			0x58
+#define	CC2520_MDMTEST0			0x5A
+#define	CC2520_MDMTEST1			0x5B
+#define	CC2520_DACTEST0			0x5C
+#define	CC2520_DACTEST1			0x5D
+#define	CC2520_ATEST			0x5E
+#define	CC2520_DACTEST2			0x5F
+#define	CC2520_PTEST0			0x60
+#define	CC2520_PTEST1			0x61
+#define	CC2520_RESERVED			0x62
+#define	CC2520_DPUBIST			0x7A
+#define	CC2520_ACTBIST			0x7C
+#define	CC2520_RAMBIST			0x7E
+
+/* frame registers */
+#define	CC2520_FRMFILT0			0x00
+#define	CC2520_FRMFILT1			0x01
+#define	CC2520_SRCMATCH			0x02
+#define	CC2520_SRCSHORTEN0		0x04
+#define	CC2520_SRCSHORTEN1		0x05
+#define	CC2520_SRCSHORTEN2		0x06
+#define	CC2520_SRCEXTEN0		0x08
+#define	CC2520_SRCEXTEN1		0x09
+#define	CC2520_SRCEXTEN2		0x0A
+#define	CC2520_FRMCTRL0			0x0C
+#define	CC2520_FRMCTRL1			0x0D
+#define	CC2520_RXENABLE0		0x0E
+#define	CC2520_RXENABLE1		0x0F
+#define	CC2520_EXCFLAG0			0x10
+#define	CC2520_EXCFLAG1			0x11
+#define	CC2520_EXCFLAG2			0x12
+#define	CC2520_EXCMASKA0		0x14
+#define	CC2520_EXCMASKA1		0x15
+#define	CC2520_EXCMASKA2		0x16
+#define	CC2520_EXCMASKB0		0x18
+#define	CC2520_EXCMASKB1		0x19
+#define	CC2520_EXCMASKB2		0x1A
+#define	CC2520_EXCBINDX0		0x1C
+#define	CC2520_EXCBINDX1		0x1D
+#define	CC2520_EXCBINDY0		0x1E
+#define	CC2520_EXCBINDY1		0x1F
+#define	CC2520_GPIOCTRL0		0x20
+#define	CC2520_GPIOCTRL1		0x21
+#define	CC2520_GPIOCTRL2		0x22
+#define	CC2520_GPIOCTRL3		0x23
+#define	CC2520_GPIOCTRL4		0x24
+#define	CC2520_GPIOCTRL5		0x25
+#define	CC2520_GPIOPOLARITY		0x26
+#define	CC2520_GPIOCTRL			0x28
+#define	CC2520_DPUCON			0x2A
+#define	CC2520_DPUSTAT			0x2C
+#define	CC2520_FREQCTRL			0x2E
+#define	CC2520_FREQTUNE			0x2F
+#define	CC2520_TXPOWER			0x30
+#define	CC2520_TXCTRL			0x31
+#define	CC2520_FSMSTAT0			0x32
+#define	CC2520_FSMSTAT1			0x33
+#define	CC2520_FIFOPCTRL		0x34
+#define	CC2520_FSMCTRL			0x35
+#define	CC2520_CCACTRL0			0x36
+#define	CC2520_CCACTRL1			0x37
+#define	CC2520_RSSI			0x38
+#define	CC2520_RSSISTAT			0x39
+#define	CC2520_RXFIRST			0x3C
+#define	CC2520_RXFIFOCNT		0x3E
+#define	CC2520_TXFIFOCNT		0x3F
+
+/* Driver private information */
+struct cc2520_private {
+	struct spi_device *spi;		/* SPI device structure */
+	struct ieee802154_dev *dev;	/* IEEE-802.15.4 device */
+	u8 *buf;			/* SPI TX/Rx data buffer */
+	struct mutex buffer_mutex;	/* SPI buffer mutex */
+	bool is_tx;			/* Flag for sync b/w Tx and Rx */
+	int fifo_pin;			/* FIFO GPIO pin number */
+	struct work_struct fifop_irqwork;/* Workqueue for FIFOP */
+	spinlock_t lock;		/* Lock for is_tx*/
+	struct completion tx_complete;	/* Work completion for Tx */
+};
+
+/* Generic Functions */
+static int
+cc2520_cmd_strobe(struct cc2520_private *priv, u8 cmd)
+{
+	int ret;
+	u8 status = 0xff;
+	struct spi_message msg;
+	struct spi_transfer xfer = {
+		.len = 0,
+		.tx_buf = priv->buf,
+		.rx_buf = priv->buf,
+	};
+
+	spi_message_init(&msg);
+	spi_message_add_tail(&xfer, &msg);
+
+	mutex_lock(&priv->buffer_mutex);
+	priv->buf[xfer.len++] = cmd;
+	dev_vdbg(&priv->spi->dev,
+		 "command strobe buf[0] = %02x\n",
+		 priv->buf[0]);
+
+	ret = spi_sync(priv->spi, &msg);
+	if (!ret)
+		status = priv->buf[0];
+	dev_vdbg(&priv->spi->dev,
+		 "buf[0] = %02x\n", priv->buf[0]);
+	mutex_unlock(&priv->buffer_mutex);
+
+	return ret;
+}
+
+static int
+cc2520_get_status(struct cc2520_private *priv, u8 *status)
+{
+	int ret;
+	struct spi_message msg;
+	struct spi_transfer xfer = {
+		.len = 0,
+		.tx_buf = priv->buf,
+		.rx_buf = priv->buf,
+	};
+
+	spi_message_init(&msg);
+	spi_message_add_tail(&xfer, &msg);
+
+	mutex_lock(&priv->buffer_mutex);
+	priv->buf[xfer.len++] = CC2520_CMD_SNOP;
+	dev_vdbg(&priv->spi->dev,
+		 "get status command buf[0] = %02x\n", priv->buf[0]);
+
+	ret = spi_sync(priv->spi, &msg);
+	if (!ret)
+		*status = priv->buf[0];
+	dev_vdbg(&priv->spi->dev,
+		 "buf[0] = %02x\n", priv->buf[0]);
+	mutex_unlock(&priv->buffer_mutex);
+
+	return ret;
+}
+
+static int
+cc2520_write_register(struct cc2520_private *priv, u8 reg, u8 value)
+{
+	int status;
+	struct spi_message msg;
+	struct spi_transfer xfer = {
+		.len = 0,
+		.tx_buf = priv->buf,
+		.rx_buf = priv->buf,
+	};
+
+	spi_message_init(&msg);
+	spi_message_add_tail(&xfer, &msg);
+
+	mutex_lock(&priv->buffer_mutex);
+
+	if (reg <= CC2520_FREG_MASK) {
+		priv->buf[xfer.len++] = CC2520_CMD_REGISTER_WRITE | reg;
+		priv->buf[xfer.len++] = value;
+	} else {
+		priv->buf[xfer.len++] = CC2520_CMD_MEMORY_WRITE;
+		priv->buf[xfer.len++] = reg;
+		priv->buf[xfer.len++] = value;
+	}
+	status = spi_sync(priv->spi, &msg);
+	if (msg.status)
+		status = msg.status;
+
+	mutex_unlock(&priv->buffer_mutex);
+
+	return status;
+}
+
+static int
+cc2520_write_ram(struct cc2520_private *priv, u16 reg, u8 len, u8 *data)
+{
+	int status;
+	struct spi_message msg;
+	struct spi_transfer xfer_head = {
+		.len        = 0,
+		.tx_buf        = priv->buf,
+		.rx_buf        = priv->buf,
+	};
+
+	struct spi_transfer xfer_buf = {
+		.len = len,
+		.tx_buf = data,
+	};
+
+	mutex_lock(&priv->buffer_mutex);
+	priv->buf[xfer_head.len++] = (CC2520_CMD_MEMORY_WRITE |
+						((reg >> 8) & 0xff));
+	priv->buf[xfer_head.len++] = reg & 0xff;
+
+	spi_message_init(&msg);
+	spi_message_add_tail(&xfer_head, &msg);
+	spi_message_add_tail(&xfer_buf, &msg);
+
+	status = spi_sync(priv->spi, &msg);
+	dev_dbg(&priv->spi->dev, "spi status = %d\n", status);
+	if (msg.status)
+		status = msg.status;
+
+	mutex_unlock(&priv->buffer_mutex);
+	return status;
+}
+
+static int
+cc2520_read_register(struct cc2520_private *priv, u8 reg, u8 *data)
+{
+	int status;
+	struct spi_message msg;
+	struct spi_transfer xfer1 = {
+		.len = 0,
+		.tx_buf = priv->buf,
+		.rx_buf = priv->buf,
+	};
+
+	struct spi_transfer xfer2 = {
+		.len = 1,
+		.rx_buf = data,
+	};
+
+	spi_message_init(&msg);
+	spi_message_add_tail(&xfer1, &msg);
+	spi_message_add_tail(&xfer2, &msg);
+
+	mutex_lock(&priv->buffer_mutex);
+	priv->buf[xfer1.len++] = CC2520_CMD_MEMORY_READ;
+	priv->buf[xfer1.len++] = reg;
+
+	status = spi_sync(priv->spi, &msg);
+	dev_dbg(&priv->spi->dev,
+		"spi status = %d\n", status);
+	if (msg.status)
+		status = msg.status;
+
+	mutex_unlock(&priv->buffer_mutex);
+
+	return status;
+}
+
+static int
+cc2520_write_txfifo(struct cc2520_private *priv, u8 *data, u8 len)
+{
+	int status;
+
+	/* length byte must include FCS even
+	 * if it is calculated in the hardware
+	 */
+	int len_byte = len + 2;
+
+	struct spi_message msg;
+
+	struct spi_transfer xfer_head = {
+		.len = 0,
+		.tx_buf = priv->buf,
+		.rx_buf = priv->buf,
+	};
+	struct spi_transfer xfer_len = {
+		.len = 1,
+		.tx_buf = &len_byte,
+	};
+	struct spi_transfer xfer_buf = {
+		.len = len,
+		.tx_buf = data,
+	};
+
+	spi_message_init(&msg);
+	spi_message_add_tail(&xfer_head, &msg);
+	spi_message_add_tail(&xfer_len, &msg);
+	spi_message_add_tail(&xfer_buf, &msg);
+
+	mutex_lock(&priv->buffer_mutex);
+	priv->buf[xfer_head.len++] = CC2520_CMD_TXBUF;
+	dev_vdbg(&priv->spi->dev,
+		 "TX_FIFO cmd buf[0] = %02x\n", priv->buf[0]);
+
+	status = spi_sync(priv->spi, &msg);
+	dev_vdbg(&priv->spi->dev, "status = %d\n", status);
+	if (msg.status)
+		status = msg.status;
+	dev_vdbg(&priv->spi->dev, "status = %d\n", status);
+	dev_vdbg(&priv->spi->dev, "buf[0] = %02x\n", priv->buf[0]);
+	mutex_unlock(&priv->buffer_mutex);
+
+	return status;
+}
+
+static int
+cc2520_read_rxfifo(struct cc2520_private *priv, u8 *data, u8 len, u8 *lqi)
+{
+	int status;
+	struct spi_message msg;
+
+	struct spi_transfer xfer_head = {
+		.len = 0,
+		.tx_buf = priv->buf,
+		.rx_buf = priv->buf,
+	};
+	struct spi_transfer xfer_buf = {
+		.len = len,
+		.rx_buf = data,
+	};
+
+	spi_message_init(&msg);
+	spi_message_add_tail(&xfer_head, &msg);
+	spi_message_add_tail(&xfer_buf, &msg);
+
+	mutex_lock(&priv->buffer_mutex);
+	priv->buf[xfer_head.len++] = CC2520_CMD_RXBUF;
+
+	dev_vdbg(&priv->spi->dev, "read rxfifo buf[0] = %02x\n", priv->buf[0]);
+	dev_vdbg(&priv->spi->dev, "buf[1] = %02x\n", priv->buf[1]);
+
+	status = spi_sync(priv->spi, &msg);
+	dev_vdbg(&priv->spi->dev, "status = %d\n", status);
+	if (msg.status)
+		status = msg.status;
+	dev_vdbg(&priv->spi->dev, "status = %d\n", status);
+	dev_vdbg(&priv->spi->dev,
+		 "return status buf[0] = %02x\n", priv->buf[0]);
+	dev_vdbg(&priv->spi->dev, "length buf[1] = %02x\n", priv->buf[1]);
+
+	mutex_unlock(&priv->buffer_mutex);
+
+	return status;
+}
+
+static int cc2520_start(struct ieee802154_dev *dev)
+{
+	return cc2520_cmd_strobe(dev->priv, CC2520_CMD_SRXON);
+}
+
+static void cc2520_stop(struct ieee802154_dev *dev)
+{
+	cc2520_cmd_strobe(dev->priv, CC2520_CMD_SRFOFF);
+}
+
+static int
+cc2520_tx(struct ieee802154_dev *dev, struct sk_buff *skb)
+{
+	struct cc2520_private *priv = dev->priv;
+	unsigned long flags;
+	int rc;
+	u8 status = 0;
+
+	rc = cc2520_cmd_strobe(priv, CC2520_CMD_SFLUSHTX);
+	if (rc)
+		goto err_tx;
+
+	rc = cc2520_write_txfifo(priv, skb->data, skb->len);
+	if (rc)
+		goto err_tx;
+
+	rc = cc2520_get_status(priv, &status);
+	if (rc)
+		goto err_tx;
+
+	if (status & CC2520_STATUS_TX_UNDERFLOW) {
+		dev_err(&priv->spi->dev, "cc2520 tx underflow exception\n");
+		goto err_tx;
+	}
+
+	spin_lock_irqsave(&priv->lock, flags);
+	BUG_ON(priv->is_tx);
+	priv->is_tx = 1;
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	rc = cc2520_cmd_strobe(priv, CC2520_CMD_STXONCCA);
+	if (rc)
+		goto err;
+
+	rc = wait_for_completion_interruptible(&priv->tx_complete);
+	if (rc < 0)
+		goto err;
+
+	cc2520_cmd_strobe(priv, CC2520_CMD_SFLUSHTX);
+	cc2520_cmd_strobe(priv, CC2520_CMD_SRXON);
+
+	return rc;
+err:
+	spin_lock_irqsave(&priv->lock, flags);
+	priv->is_tx = 0;
+	spin_unlock_irqrestore(&priv->lock, flags);
+err_tx:
+	return rc;
+}
+
+
+static int cc2520_rx(struct cc2520_private *priv)
+{
+	u8 len = 0, lqi = 0, bytes = 1;
+	struct sk_buff *skb;
+
+	cc2520_read_rxfifo(priv, &len, bytes, &lqi);
+
+	if (len < 2 || len > IEEE802154_MTU)
+		return -EINVAL;
+
+	skb = alloc_skb(len, GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+
+	if (cc2520_read_rxfifo(priv, skb_put(skb, len), len, &lqi)) {
+		dev_dbg(&priv->spi->dev, "frame reception failed\n");
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+
+	skb_trim(skb, skb->len - 2);
+
+	ieee802154_rx_irqsafe(priv->dev, skb, lqi);
+
+	dev_vdbg(&priv->spi->dev, "RXFIFO: %x %x\n", len, lqi);
+
+	return 0;
+}
+
+static int
+cc2520_ed(struct ieee802154_dev *dev, u8 *level)
+{
+	struct cc2520_private *priv = dev->priv;
+	u8 status = 0xff;
+	u8 rssi;
+	int ret;
+
+	ret = cc2520_read_register(priv , CC2520_RSSISTAT, &status);
+	if (ret)
+		return ret;
+
+	if (status != RSSI_VALID)
+		return -EINVAL;
+
+	ret = cc2520_read_register(priv , CC2520_RSSI, &rssi);
+	if (ret)
+		return ret;
+
+	/* level = RSSI(rssi) - OFFSET [dBm] : offset is 76dBm */
+	*level = rssi - RSSI_OFFSET;
+
+	return 0;
+}
+
+static int
+cc2520_set_channel(struct ieee802154_dev *dev, int page, int channel)
+{
+	struct cc2520_private *priv = dev->priv;
+	int ret;
+
+	might_sleep();
+	dev_dbg(&priv->spi->dev, "trying to set channel\n");
+
+	BUG_ON(page != 0);
+	BUG_ON(channel < CC2520_MINCHANNEL);
+	BUG_ON(channel > CC2520_MAXCHANNEL);
+
+	ret = cc2520_write_register(priv, CC2520_FREQCTRL,
+				    11 + 5*(channel - 11));
+
+	return ret;
+}
+
+static int
+cc2520_filter(struct ieee802154_dev *dev,
+	      struct ieee802154_hw_addr_filt *filt, unsigned long changed)
+{
+	struct cc2520_private *priv = dev->priv;
+
+	if (changed & IEEE802515_AFILT_PANID_CHANGED) {
+		u16 panid = le16_to_cpu(filt->pan_id);
+
+		dev_vdbg(&priv->spi->dev,
+			 "cc2520_filter called for pan id\n");
+		cc2520_write_ram(priv, CC2520RAM_PANID,
+				 sizeof(panid), (u8 *)&panid);
+	}
+
+	if (changed & IEEE802515_AFILT_IEEEADDR_CHANGED) {
+		dev_vdbg(&priv->spi->dev,
+			 "cc2520_filter called for IEEE addr\n");
+		cc2520_write_ram(priv, CC2520RAM_IEEEADDR,
+				 sizeof(filt->ieee_addr),
+				 (u8 *)&filt->ieee_addr);
+	}
+
+	if (changed & IEEE802515_AFILT_SADDR_CHANGED) {
+		u16 addr = le16_to_cpu(filt->short_addr);
+
+		dev_vdbg(&priv->spi->dev,
+			 "cc2520_filter called for saddr\n");
+		cc2520_write_ram(priv, CC2520RAM_SHORTADDR,
+				 sizeof(addr), (u8 *)&addr);
+	}
+
+	if (changed & IEEE802515_AFILT_PANC_CHANGED) {
+		dev_vdbg(&priv->spi->dev,
+			 "cc2520_filter called for panc change\n");
+		if (filt->pan_coord)
+			cc2520_write_register(priv, CC2520_FRMFILT0, 0x02);
+		else
+			cc2520_write_register(priv, CC2520_FRMFILT0, 0x00);
+	}
+
+	return 0;
+}
+
+static struct ieee802154_ops cc2520_ops = {
+	.owner = THIS_MODULE,
+	.start = cc2520_start,
+	.stop = cc2520_stop,
+	.xmit = cc2520_tx,
+	.ed = cc2520_ed,
+	.set_channel = cc2520_set_channel,
+	.set_hw_addr_filt = cc2520_filter,
+};
+
+static int cc2520_register(struct cc2520_private *priv)
+{
+	int ret = -ENOMEM;
+
+	priv->dev = ieee802154_alloc_device(sizeof(*priv), &cc2520_ops);
+	if (!priv->dev)
+		goto err_ret;
+
+	priv->dev->priv = priv;
+	priv->dev->parent = &priv->spi->dev;
+	priv->dev->extra_tx_headroom = 0;
+
+	/* We do support only 2.4 Ghz */
+	priv->dev->phy->channels_supported[0] = 0x7FFF800;
+	priv->dev->flags = IEEE802154_HW_OMIT_CKSUM | IEEE802154_HW_AACK;
+
+	dev_vdbg(&priv->spi->dev, "registered cc2520\n");
+	ret = ieee802154_register_device(priv->dev);
+	if (ret)
+		goto err_free_device;
+
+	return 0;
+
+err_free_device:
+	ieee802154_free_device(priv->dev);
+err_ret:
+	return ret;
+}
+
+static void cc2520_fifop_irqwork(struct work_struct *work)
+{
+	struct cc2520_private *priv
+		= container_of(work, struct cc2520_private, fifop_irqwork);
+
+	dev_dbg(&priv->spi->dev, "fifop interrupt received\n");
+
+	if (gpio_get_value(priv->fifo_pin))
+		cc2520_rx(priv);
+	else
+		dev_dbg(&priv->spi->dev, "rxfifo overflow\n");
+
+	cc2520_cmd_strobe(priv, CC2520_CMD_SFLUSHRX);
+	cc2520_cmd_strobe(priv, CC2520_CMD_SFLUSHRX);
+}
+
+static irqreturn_t cc2520_fifop_isr(int irq, void *data)
+{
+	struct cc2520_private *priv = data;
+
+	schedule_work(&priv->fifop_irqwork);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t cc2520_sfd_isr(int irq, void *data)
+{
+	struct cc2520_private *priv = data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	if (priv->is_tx) {
+		priv->is_tx = 0;
+		spin_unlock_irqrestore(&priv->lock, flags);
+		dev_dbg(&priv->spi->dev, "SFD for TX\n");
+		complete(&priv->tx_complete);
+	} else {
+		spin_unlock_irqrestore(&priv->lock, flags);
+		dev_dbg(&priv->spi->dev, "SFD for RX\n");
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int cc2520_hw_init(struct cc2520_private *priv)
+{
+	u8 status = 0, state = 0xff;
+	int ret;
+	int timeout = 100;
+
+	ret = cc2520_read_register(priv, CC2520_FSMSTAT1, &state);
+	if (ret)
+		goto err_ret;
+
+	if (state != STATE_IDLE)
+		return -EINVAL;
+
+	do {
+		ret = cc2520_get_status(priv, &status);
+		if (ret)
+			goto err_ret;
+
+		if (timeout-- <= 0) {
+			dev_err(&priv->spi->dev, "oscillator start failed!\n");
+			return ret;
+		}
+		udelay(1);
+	} while (!(status & CC2520_STATUS_XOSC32M_STABLE));
+
+	dev_vdbg(&priv->spi->dev, "oscillator brought up\n");
+
+	/* Registers default value: section 28.1 in Datasheet */
+	ret = cc2520_write_register(priv, CC2520_TXPOWER, 0xF7);
+	if (ret)
+		goto err_ret;
+
+	ret = cc2520_write_register(priv, CC2520_CCACTRL0, 0x1A);
+	if (ret)
+		goto err_ret;
+
+	ret = cc2520_write_register(priv, CC2520_MDMCTRL0, 0x85);
+	if (ret)
+		goto err_ret;
+
+	ret = cc2520_write_register(priv, CC2520_MDMCTRL1, 0x14);
+	if (ret)
+		goto err_ret;
+
+	ret = cc2520_write_register(priv, CC2520_RXCTRL, 0x3f);
+	if (ret)
+		goto err_ret;
+
+	ret = cc2520_write_register(priv, CC2520_FSCTRL, 0x5a);
+	if (ret)
+		goto err_ret;
+
+	ret = cc2520_write_register(priv, CC2520_FSCAL1, 0x2b);
+	if (ret)
+		goto err_ret;
+
+	ret = cc2520_write_register(priv, CC2520_AGCCTRL1, 0x11);
+	if (ret)
+		goto err_ret;
+
+	ret = cc2520_write_register(priv, CC2520_ADCTEST0, 0x10);
+	if (ret)
+		goto err_ret;
+
+	ret = cc2520_write_register(priv, CC2520_ADCTEST1, 0x0e);
+	if (ret)
+		goto err_ret;
+
+	ret = cc2520_write_register(priv, CC2520_ADCTEST2, 0x03);
+	if (ret)
+		goto err_ret;
+
+	ret = cc2520_write_register(priv, CC2520_FRMCTRL0, 0x60);
+	if (ret)
+		goto err_ret;
+
+	ret = cc2520_write_register(priv, CC2520_FRMCTRL1, 0x03);
+	if (ret)
+		goto err_ret;
+
+	ret = cc2520_write_register(priv, CC2520_FRMFILT0, 0x00);
+	if (ret)
+		goto err_ret;
+
+	ret = cc2520_write_register(priv, CC2520_FIFOPCTRL, 127);
+	if (ret)
+		goto err_ret;
+
+	return 0;
+
+err_ret:
+	return ret;
+}
+
+static struct cc2520_platform_data *
+cc2520_get_platform_data(struct spi_device *spi)
+{
+	struct cc2520_platform_data *pdata;
+	struct device_node *np = spi->dev.of_node;
+	struct cc2520_private *priv = spi_get_drvdata(spi);
+
+	if (!np)
+		return spi->dev.platform_data;
+
+	pdata = devm_kzalloc(&spi->dev, sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		goto done;
+
+	pdata->fifo = of_get_named_gpio(np, "fifo-gpio", 0);
+	priv->fifo_pin = pdata->fifo;
+
+	pdata->fifop = of_get_named_gpio(np, "fifop-gpio", 0);
+
+	pdata->sfd = of_get_named_gpio(np, "sfd-gpio", 0);
+	pdata->cca = of_get_named_gpio(np, "cca-gpio", 0);
+	pdata->vreg = of_get_named_gpio(np, "vreg-gpio", 0);
+	pdata->reset = of_get_named_gpio(np, "reset-gpio", 0);
+
+	spi->dev.platform_data = pdata;
+
+done:
+	return pdata;
+}
+
+static int cc2520_probe(struct spi_device *spi)
+{
+	struct cc2520_private *priv;
+	struct pinctrl *pinctrl;
+	struct cc2520_platform_data *pdata;
+	int ret;
+
+	priv = devm_kzalloc(&spi->dev,
+			    sizeof(struct cc2520_private), GFP_KERNEL);
+	if (!priv) {
+		ret = -ENOMEM;
+		goto err_ret;
+	}
+
+	spi_set_drvdata(spi, priv);
+
+	pinctrl = devm_pinctrl_get_select_default(&spi->dev);
+	if (IS_ERR(pinctrl))
+		dev_warn(&spi->dev,
+			 "pinctrl pins are not configured");
+
+	pdata = cc2520_get_platform_data(spi);
+	if (!pdata) {
+		dev_err(&spi->dev, "no platform data\n");
+		return -EINVAL;
+	}
+
+	priv->spi = spi;
+
+	priv->buf = devm_kzalloc(&spi->dev,
+				 SPI_COMMAND_BUFFER, GFP_KERNEL);
+	if (!priv->buf) {
+		ret = -ENOMEM;
+		goto err_ret;
+	}
+
+	mutex_init(&priv->buffer_mutex);
+	INIT_WORK(&priv->fifop_irqwork, cc2520_fifop_irqwork);
+	spin_lock_init(&priv->lock);
+	init_completion(&priv->tx_complete);
+
+	/* Request all the gpio's */
+	if (!gpio_is_valid(pdata->fifo)) {
+		dev_err(&spi->dev, "fifo gpio is not valid\n");
+		ret = -EINVAL;
+		goto err_hw_init;
+	}
+
+	ret = devm_gpio_request_one(&spi->dev, pdata->fifo,
+				    GPIOF_IN, "fifo");
+	if (ret)
+		goto err_hw_init;
+
+	if (!gpio_is_valid(pdata->cca)) {
+		dev_err(&spi->dev, "cca gpio is not valid\n");
+		ret = -EINVAL;
+		goto err_hw_init;
+	}
+
+	ret = devm_gpio_request_one(&spi->dev, pdata->cca,
+				    GPIOF_IN, "cca");
+	if (ret)
+		goto err_hw_init;
+
+	if (!gpio_is_valid(pdata->fifop)) {
+		dev_err(&spi->dev, "fifop gpio is not valid\n");
+		ret = -EINVAL;
+		goto err_hw_init;
+	}
+
+	ret = devm_gpio_request_one(&spi->dev, pdata->fifop,
+				    GPIOF_IN, "fifop");
+	if (ret)
+		goto err_hw_init;
+
+	if (!gpio_is_valid(pdata->sfd)) {
+		dev_err(&spi->dev, "sfd gpio is not valid\n");
+		ret = -EINVAL;
+		goto err_hw_init;
+	}
+
+	ret = devm_gpio_request_one(&spi->dev, pdata->sfd,
+				    GPIOF_IN, "sfd");
+	if (ret)
+		goto err_hw_init;
+
+	if (!gpio_is_valid(pdata->reset)) {
+		dev_err(&spi->dev, "reset gpio is not valid\n");
+		ret = -EINVAL;
+		goto err_hw_init;
+	}
+
+	ret = devm_gpio_request_one(&spi->dev, pdata->reset,
+				    GPIOF_OUT_INIT_LOW, "reset");
+	if (ret)
+		goto err_hw_init;
+
+	if (!gpio_is_valid(pdata->vreg)) {
+		dev_err(&spi->dev, "vreg gpio is not valid\n");
+		ret = -EINVAL;
+		goto err_hw_init;
+	}
+
+	ret = devm_gpio_request_one(&spi->dev, pdata->vreg,
+				    GPIOF_OUT_INIT_LOW, "vreg");
+	if (ret)
+		goto err_hw_init;
+
+
+	gpio_set_value(pdata->vreg, HIGH);
+	usleep_range(100, 150);
+
+	gpio_set_value(pdata->reset, HIGH);
+	usleep_range(200, 250);
+
+	ret = cc2520_hw_init(priv);
+	if (ret)
+		goto err_hw_init;
+
+	/* Set up fifop interrupt */
+	ret = devm_request_irq(&spi->dev,
+			       gpio_to_irq(pdata->fifop),
+			       cc2520_fifop_isr,
+			       IRQF_TRIGGER_RISING,
+			       dev_name(&spi->dev),
+			       priv);
+	if (ret) {
+		dev_err(&spi->dev, "could not get fifop irq\n");
+		goto err_hw_init;
+	}
+
+	/* Set up sfd interrupt */
+	ret = devm_request_irq(&spi->dev,
+			       gpio_to_irq(pdata->sfd),
+			       cc2520_sfd_isr,
+			       IRQF_TRIGGER_FALLING,
+			       dev_name(&spi->dev),
+			       priv);
+	if (ret) {
+		dev_err(&spi->dev, "could not get sfd irq\n");
+		goto err_hw_init;
+	}
+
+	ret = cc2520_register(priv);
+	if (ret)
+		goto err_hw_init;
+
+	return 0;
+
+err_hw_init:
+	mutex_destroy(&priv->buffer_mutex);
+	flush_work(&priv->fifop_irqwork);
+
+err_ret:
+	return ret;
+}
+
+static int cc2520_remove(struct spi_device *spi)
+{
+	struct cc2520_private *priv = spi_get_drvdata(spi);
+
+	mutex_destroy(&priv->buffer_mutex);
+	flush_work(&priv->fifop_irqwork);
+
+	ieee802154_unregister_device(priv->dev);
+	ieee802154_free_device(priv->dev);
+
+	return 0;
+}
+
+static const struct spi_device_id cc2520_ids[] = {
+	{"cc2520", },
+	{},
+};
+MODULE_DEVICE_TABLE(spi, cc2520_ids);
+
+static const struct of_device_id cc2520_of_ids[] = {
+	{.compatible = "ti,cc2520", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, cc2520_of_ids);
+
+/* SPI driver structure */
+static struct spi_driver cc2520_driver = {
+	.driver = {
+		.name = "cc2520",
+		.bus = &spi_bus_type,
+		.owner = THIS_MODULE,
+		.of_match_table = of_match_ptr(cc2520_of_ids),
+	},
+	.id_table = cc2520_ids,
+	.probe = cc2520_probe,
+	.remove = cc2520_remove,
+};
+module_spi_driver(cc2520_driver);
+
+MODULE_AUTHOR("Varka Bhadram <varkab@cdac.in>");
+MODULE_DESCRIPTION("CC2520 Transceiver Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/spi/cc2520.h b/include/linux/spi/cc2520.h
new file mode 100644
index 000000000000..85b8ee67e937
--- /dev/null
+++ b/include/linux/spi/cc2520.h
@@ -0,0 +1,26 @@
+/* Header file for cc2520 radio driver
+ *
+ * Copyright (C) 2014 Varka Bhadram <varkab@cdac.in>
+ *                    Md.Jamal Mohiuddin <mjmohiuddin@cdac.in>
+ *                    P Sowjanya <sowjanyap@cdac.in>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#ifndef __CC2520_H
+#define __CC2520_H
+
+struct cc2520_platform_data {
+	int fifo;
+	int fifop;
+	int cca;
+	int sfd;
+	int reset;
+	int vreg;
+};
+
+#endif
-- 
cgit v1.2.3-59-g8ed1b


From cddd02489f52ccf635ed65931214729a23b93cd6 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Sun, 22 Jun 2014 01:29:15 +0200
Subject: hrtimer: Store cpu-number in struct hrtimer_cpu_base

In lowres mode, hrtimers are serviced by the tick instead of a clock
event. Now it works well as long as the tick stays periodic but we
must also make sure that the hrtimers are serviced in dynticks mode.

Part of that job consist in kicking a dynticks hrtimer target in order
to make it reconsider the next tick to schedule to correctly handle the
hrtimer's expiring time. And that part isn't handled by the hrtimers
subsystem.

To prepare for fixing this, we need __hrtimer_start_range_ns() to be
able to resolve the CPU target associated to a hrtimer's object
'cpu_base' so that the kick can be centralized there.

So lets store it in the 'struct hrtimer_cpu_base' to resolve the CPU
without overhead. It is set once at CPU's online notification.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/r/1403393357-2070-4-git-send-email-fweisbec@gmail.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/hrtimer.h | 2 ++
 kernel/time/hrtimer.c   | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index e7a8d3fa91d5..bb4ffff31c69 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -165,6 +165,7 @@ enum  hrtimer_base_type {
  * struct hrtimer_cpu_base - the per cpu clock bases
  * @lock:		lock protecting the base and associated clock bases
  *			and timers
+ * @cpu:		cpu number
  * @active_bases:	Bitfield to mark bases with active timers
  * @clock_was_set:	Indicates that clock was set from irq context.
  * @expires_next:	absolute time of the next event which was scheduled
@@ -179,6 +180,7 @@ enum  hrtimer_base_type {
  */
 struct hrtimer_cpu_base {
 	raw_spinlock_t			lock;
+	unsigned int			cpu;
 	unsigned int			active_bases;
 	unsigned int			clock_was_set;
 #ifdef CONFIG_HIGH_RES_TIMERS
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 3ab28993f6e0..0e32d4e7583f 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1680,6 +1680,7 @@ static void init_hrtimers_cpu(int cpu)
 		timerqueue_init_head(&cpu_base->clock_base[i].active);
 	}
 
+	cpu_base->cpu = cpu;
 	hrtimer_init_hres(cpu_base);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 027d7c2a26ad637f14c72f401dd8da0bb6df20c8 Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Wed, 18 Jun 2014 15:28:54 +0530
Subject: mfd: palmas: Add tps65917 specific definitions and enums

Add tps65917 specific definitions and enums.

Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Keerthy <j-keerthy@ti.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 include/linux/mfd/palmas.h | 793 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 793 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/palmas.h b/include/linux/mfd/palmas.h
index 3420e09e2e20..f760a07ab76e 100644
--- a/include/linux/mfd/palmas.h
+++ b/include/linux/mfd/palmas.h
@@ -30,6 +30,8 @@
 #define PALMAS_CHIP_ID			0xC035
 #define PALMAS_CHIP_CHARGER_ID		0xC036
 
+#define TPS65917_RESERVED		-1
+
 #define is_palmas(a)	(((a) == PALMAS_CHIP_OLD_ID) || \
 			((a) == PALMAS_CHIP_ID))
 #define is_palmas_charger(a) ((a) == PALMAS_CHIP_CHARGER_ID)
@@ -184,6 +186,27 @@ enum palmas_regulators {
 	PALMAS_NUM_REGS,
 };
 
+enum tps65917_regulators {
+	/* SMPS regulators */
+	TPS65917_REG_SMPS1,
+	TPS65917_REG_SMPS2,
+	TPS65917_REG_SMPS3,
+	TPS65917_REG_SMPS4,
+	TPS65917_REG_SMPS5,
+	/* LDO regulators */
+	TPS65917_REG_LDO1,
+	TPS65917_REG_LDO2,
+	TPS65917_REG_LDO3,
+	TPS65917_REG_LDO4,
+	TPS65917_REG_LDO5,
+	TPS65917_REG_REGEN1,
+	TPS65917_REG_REGEN2,
+	TPS65917_REG_REGEN3,
+
+	/* Total number of regulators */
+	TPS65917_NUM_REGS,
+};
+
 /* External controll signal name */
 enum {
 	PALMAS_EXT_CONTROL_ENABLE1      = 0x1,
@@ -228,6 +251,24 @@ enum palmas_external_requestor_id {
 	PALMAS_EXTERNAL_REQSTR_ID_MAX,
 };
 
+enum tps65917_external_requestor_id {
+	TPS65917_EXTERNAL_REQSTR_ID_REGEN1,
+	TPS65917_EXTERNAL_REQSTR_ID_REGEN2,
+	TPS65917_EXTERNAL_REQSTR_ID_REGEN3,
+	TPS65917_EXTERNAL_REQSTR_ID_SMPS1,
+	TPS65917_EXTERNAL_REQSTR_ID_SMPS2,
+	TPS65917_EXTERNAL_REQSTR_ID_SMPS3,
+	TPS65917_EXTERNAL_REQSTR_ID_SMPS4,
+	TPS65917_EXTERNAL_REQSTR_ID_SMPS5,
+	TPS65917_EXTERNAL_REQSTR_ID_LDO1,
+	TPS65917_EXTERNAL_REQSTR_ID_LDO2,
+	TPS65917_EXTERNAL_REQSTR_ID_LDO3,
+	TPS65917_EXTERNAL_REQSTR_ID_LDO4,
+	TPS65917_EXTERNAL_REQSTR_ID_LDO5,
+	/* Last entry */
+	TPS65917_EXTERNAL_REQSTR_ID_MAX,
+};
+
 struct palmas_pmic_platform_data {
 	/* An array of pointers to regulator init data indexed by regulator
 	 * ID
@@ -349,6 +390,48 @@ struct palmas_gpadc_result {
 
 #define PALMAS_MAX_CHANNELS 16
 
+/* Define the tps65917 IRQ numbers */
+enum tps65917_irqs {
+	/* INT1 registers */
+	TPS65917_RESERVED1,
+	TPS65917_PWRON_IRQ,
+	TPS65917_LONG_PRESS_KEY_IRQ,
+	TPS65917_RESERVED2,
+	TPS65917_PWRDOWN_IRQ,
+	TPS65917_HOTDIE_IRQ,
+	TPS65917_VSYS_MON_IRQ,
+	TPS65917_RESERVED3,
+	/* INT2 registers */
+	TPS65917_RESERVED4,
+	TPS65917_OTP_ERROR_IRQ,
+	TPS65917_WDT_IRQ,
+	TPS65917_RESERVED5,
+	TPS65917_RESET_IN_IRQ,
+	TPS65917_FSD_IRQ,
+	TPS65917_SHORT_IRQ,
+	TPS65917_RESERVED6,
+	/* INT3 registers */
+	TPS65917_GPADC_AUTO_0_IRQ,
+	TPS65917_GPADC_AUTO_1_IRQ,
+	TPS65917_GPADC_EOC_SW_IRQ,
+	TPS65917_RESREVED6,
+	TPS65917_RESERVED7,
+	TPS65917_RESERVED8,
+	TPS65917_RESERVED9,
+	TPS65917_VBUS_IRQ,
+	/* INT4 registers */
+	TPS65917_GPIO_0_IRQ,
+	TPS65917_GPIO_1_IRQ,
+	TPS65917_GPIO_2_IRQ,
+	TPS65917_GPIO_3_IRQ,
+	TPS65917_GPIO_4_IRQ,
+	TPS65917_GPIO_5_IRQ,
+	TPS65917_GPIO_6_IRQ,
+	TPS65917_RESERVED10,
+	/* Total Number IRQs */
+	TPS65917_NUM_IRQ,
+};
+
 /* Define the palmas IRQ numbers */
 enum palmas_irqs {
 	/* INT1 registers */
@@ -400,6 +483,7 @@ struct palmas_pmic {
 
 	int smps123;
 	int smps457;
+	int smps12;
 
 	int range[PALMAS_REG_SMPS10_OUT1];
 	unsigned int ramp_delay[PALMAS_REG_SMPS10_OUT1];
@@ -2871,6 +2955,715 @@ enum usb_irq_events {
 #define PALMAS_GPADC_TRIM15					0x0E
 #define PALMAS_GPADC_TRIM16					0x0F
 
+/* TPS65917 Interrupt registers */
+
+/* Registers for function INTERRUPT */
+#define TPS65917_INT1_STATUS					0x00
+#define TPS65917_INT1_MASK					0x01
+#define TPS65917_INT1_LINE_STATE				0x02
+#define TPS65917_INT2_STATUS					0x05
+#define TPS65917_INT2_MASK					0x06
+#define TPS65917_INT2_LINE_STATE				0x07
+#define TPS65917_INT3_STATUS					0x0A
+#define TPS65917_INT3_MASK					0x0B
+#define TPS65917_INT3_LINE_STATE				0x0C
+#define TPS65917_INT4_STATUS					0x0F
+#define TPS65917_INT4_MASK					0x10
+#define TPS65917_INT4_LINE_STATE				0x11
+#define TPS65917_INT4_EDGE_DETECT1				0x12
+#define TPS65917_INT4_EDGE_DETECT2				0x13
+#define TPS65917_INT_CTRL					0x14
+
+/* Bit definitions for INT1_STATUS */
+#define TPS65917_INT1_STATUS_VSYS_MON				0x40
+#define TPS65917_INT1_STATUS_VSYS_MON_SHIFT			0x06
+#define TPS65917_INT1_STATUS_HOTDIE				0x20
+#define TPS65917_INT1_STATUS_HOTDIE_SHIFT			0x05
+#define TPS65917_INT1_STATUS_PWRDOWN				0x10
+#define TPS65917_INT1_STATUS_PWRDOWN_SHIFT			0x04
+#define TPS65917_INT1_STATUS_LONG_PRESS_KEY			0x04
+#define TPS65917_INT1_STATUS_LONG_PRESS_KEY_SHIFT		0x02
+#define TPS65917_INT1_STATUS_PWRON				0x02
+#define TPS65917_INT1_STATUS_PWRON_SHIFT			0x01
+
+/* Bit definitions for INT1_MASK */
+#define TPS65917_INT1_MASK_VSYS_MON				0x40
+#define TPS65917_INT1_MASK_VSYS_MON_SHIFT			0x06
+#define TPS65917_INT1_MASK_HOTDIE				0x20
+#define TPS65917_INT1_MASK_HOTDIE_SHIFT			0x05
+#define TPS65917_INT1_MASK_PWRDOWN				0x10
+#define TPS65917_INT1_MASK_PWRDOWN_SHIFT			0x04
+#define TPS65917_INT1_MASK_LONG_PRESS_KEY			0x04
+#define TPS65917_INT1_MASK_LONG_PRESS_KEY_SHIFT		0x02
+#define TPS65917_INT1_MASK_PWRON				0x02
+#define TPS65917_INT1_MASK_PWRON_SHIFT				0x01
+
+/* Bit definitions for INT1_LINE_STATE */
+#define TPS65917_INT1_LINE_STATE_VSYS_MON			0x40
+#define TPS65917_INT1_LINE_STATE_VSYS_MON_SHIFT		0x06
+#define TPS65917_INT1_LINE_STATE_HOTDIE			0x20
+#define TPS65917_INT1_LINE_STATE_HOTDIE_SHIFT			0x05
+#define TPS65917_INT1_LINE_STATE_PWRDOWN			0x10
+#define TPS65917_INT1_LINE_STATE_PWRDOWN_SHIFT			0x04
+#define TPS65917_INT1_LINE_STATE_LONG_PRESS_KEY		0x04
+#define TPS65917_INT1_LINE_STATE_LONG_PRESS_KEY_SHIFT		0x02
+#define TPS65917_INT1_LINE_STATE_PWRON				0x02
+#define TPS65917_INT1_LINE_STATE_PWRON_SHIFT			0x01
+
+/* Bit definitions for INT2_STATUS */
+#define TPS65917_INT2_STATUS_SHORT				0x40
+#define TPS65917_INT2_STATUS_SHORT_SHIFT			0x06
+#define TPS65917_INT2_STATUS_FSD				0x20
+#define TPS65917_INT2_STATUS_FSD_SHIFT				0x05
+#define TPS65917_INT2_STATUS_RESET_IN				0x10
+#define TPS65917_INT2_STATUS_RESET_IN_SHIFT			0x04
+#define TPS65917_INT2_STATUS_WDT				0x04
+#define TPS65917_INT2_STATUS_WDT_SHIFT				0x02
+#define TPS65917_INT2_STATUS_OTP_ERROR				0x02
+#define TPS65917_INT2_STATUS_OTP_ERROR_SHIFT			0x01
+
+/* Bit definitions for INT2_MASK */
+#define TPS65917_INT2_MASK_SHORT				0x40
+#define TPS65917_INT2_MASK_SHORT_SHIFT				0x06
+#define TPS65917_INT2_MASK_FSD					0x20
+#define TPS65917_INT2_MASK_FSD_SHIFT				0x05
+#define TPS65917_INT2_MASK_RESET_IN				0x10
+#define TPS65917_INT2_MASK_RESET_IN_SHIFT			0x04
+#define TPS65917_INT2_MASK_WDT					0x04
+#define TPS65917_INT2_MASK_WDT_SHIFT				0x02
+#define TPS65917_INT2_MASK_OTP_ERROR_TIMER			0x02
+#define TPS65917_INT2_MASK_OTP_ERROR_SHIFT			0x01
+
+/* Bit definitions for INT2_LINE_STATE */
+#define TPS65917_INT2_LINE_STATE_SHORT				0x40
+#define TPS65917_INT2_LINE_STATE_SHORT_SHIFT			0x06
+#define TPS65917_INT2_LINE_STATE_FSD				0x20
+#define TPS65917_INT2_LINE_STATE_FSD_SHIFT			0x05
+#define TPS65917_INT2_LINE_STATE_RESET_IN			0x10
+#define TPS65917_INT2_LINE_STATE_RESET_IN_SHIFT		0x04
+#define TPS65917_INT2_LINE_STATE_WDT				0x04
+#define TPS65917_INT2_LINE_STATE_WDT_SHIFT			0x02
+#define TPS65917_INT2_LINE_STATE_OTP_ERROR			0x02
+#define TPS65917_INT2_LINE_STATE_OTP_ERROR_SHIFT		0x01
+
+/* Bit definitions for INT3_STATUS */
+#define TPS65917_INT3_STATUS_VBUS				0x80
+#define TPS65917_INT3_STATUS_VBUS_SHIFT			0x07
+#define TPS65917_INT3_STATUS_GPADC_EOC_SW			0x04
+#define TPS65917_INT3_STATUS_GPADC_EOC_SW_SHIFT		0x02
+#define TPS65917_INT3_STATUS_GPADC_AUTO_1			0x02
+#define TPS65917_INT3_STATUS_GPADC_AUTO_1_SHIFT		0x01
+#define TPS65917_INT3_STATUS_GPADC_AUTO_0			0x01
+#define TPS65917_INT3_STATUS_GPADC_AUTO_0_SHIFT		0x00
+
+/* Bit definitions for INT3_MASK */
+#define TPS65917_INT3_MASK_VBUS				0x80
+#define TPS65917_INT3_MASK_VBUS_SHIFT				0x07
+#define TPS65917_INT3_MASK_GPADC_EOC_SW			0x04
+#define TPS65917_INT3_MASK_GPADC_EOC_SW_SHIFT			0x02
+#define TPS65917_INT3_MASK_GPADC_AUTO_1			0x02
+#define TPS65917_INT3_MASK_GPADC_AUTO_1_SHIFT			0x01
+#define TPS65917_INT3_MASK_GPADC_AUTO_0			0x01
+#define TPS65917_INT3_MASK_GPADC_AUTO_0_SHIFT			0x00
+
+/* Bit definitions for INT3_LINE_STATE */
+#define TPS65917_INT3_LINE_STATE_VBUS				0x80
+#define TPS65917_INT3_LINE_STATE_VBUS_SHIFT			0x07
+#define TPS65917_INT3_LINE_STATE_GPADC_EOC_SW			0x04
+#define TPS65917_INT3_LINE_STATE_GPADC_EOC_SW_SHIFT		0x02
+#define TPS65917_INT3_LINE_STATE_GPADC_AUTO_1			0x02
+#define TPS65917_INT3_LINE_STATE_GPADC_AUTO_1_SHIFT		0x01
+#define TPS65917_INT3_LINE_STATE_GPADC_AUTO_0			0x01
+#define TPS65917_INT3_LINE_STATE_GPADC_AUTO_0_SHIFT		0x00
+
+/* Bit definitions for INT4_STATUS */
+#define TPS65917_INT4_STATUS_GPIO_6				0x40
+#define TPS65917_INT4_STATUS_GPIO_6_SHIFT			0x06
+#define TPS65917_INT4_STATUS_GPIO_5				0x20
+#define TPS65917_INT4_STATUS_GPIO_5_SHIFT			0x05
+#define TPS65917_INT4_STATUS_GPIO_4				0x10
+#define TPS65917_INT4_STATUS_GPIO_4_SHIFT			0x04
+#define TPS65917_INT4_STATUS_GPIO_3				0x08
+#define TPS65917_INT4_STATUS_GPIO_3_SHIFT			0x03
+#define TPS65917_INT4_STATUS_GPIO_2				0x04
+#define TPS65917_INT4_STATUS_GPIO_2_SHIFT			0x02
+#define TPS65917_INT4_STATUS_GPIO_1				0x02
+#define TPS65917_INT4_STATUS_GPIO_1_SHIFT			0x01
+#define TPS65917_INT4_STATUS_GPIO_0				0x01
+#define TPS65917_INT4_STATUS_GPIO_0_SHIFT			0x00
+
+/* Bit definitions for INT4_MASK */
+#define TPS65917_INT4_MASK_GPIO_6				0x40
+#define TPS65917_INT4_MASK_GPIO_6_SHIFT			0x06
+#define TPS65917_INT4_MASK_GPIO_5				0x20
+#define TPS65917_INT4_MASK_GPIO_5_SHIFT			0x05
+#define TPS65917_INT4_MASK_GPIO_4				0x10
+#define TPS65917_INT4_MASK_GPIO_4_SHIFT			0x04
+#define TPS65917_INT4_MASK_GPIO_3				0x08
+#define TPS65917_INT4_MASK_GPIO_3_SHIFT			0x03
+#define TPS65917_INT4_MASK_GPIO_2				0x04
+#define TPS65917_INT4_MASK_GPIO_2_SHIFT			0x02
+#define TPS65917_INT4_MASK_GPIO_1				0x02
+#define TPS65917_INT4_MASK_GPIO_1_SHIFT			0x01
+#define TPS65917_INT4_MASK_GPIO_0				0x01
+#define TPS65917_INT4_MASK_GPIO_0_SHIFT			0x00
+
+/* Bit definitions for INT4_LINE_STATE */
+#define TPS65917_INT4_LINE_STATE_GPIO_6			0x40
+#define TPS65917_INT4_LINE_STATE_GPIO_6_SHIFT			0x06
+#define TPS65917_INT4_LINE_STATE_GPIO_5			0x20
+#define TPS65917_INT4_LINE_STATE_GPIO_5_SHIFT			0x05
+#define TPS65917_INT4_LINE_STATE_GPIO_4			0x10
+#define TPS65917_INT4_LINE_STATE_GPIO_4_SHIFT			0x04
+#define TPS65917_INT4_LINE_STATE_GPIO_3			0x08
+#define TPS65917_INT4_LINE_STATE_GPIO_3_SHIFT			0x03
+#define TPS65917_INT4_LINE_STATE_GPIO_2			0x04
+#define TPS65917_INT4_LINE_STATE_GPIO_2_SHIFT			0x02
+#define TPS65917_INT4_LINE_STATE_GPIO_1			0x02
+#define TPS65917_INT4_LINE_STATE_GPIO_1_SHIFT			0x01
+#define TPS65917_INT4_LINE_STATE_GPIO_0			0x01
+#define TPS65917_INT4_LINE_STATE_GPIO_0_SHIFT			0x00
+
+/* Bit definitions for INT4_EDGE_DETECT1 */
+#define TPS65917_INT4_EDGE_DETECT1_GPIO_3_RISING		0x80
+#define TPS65917_INT4_EDGE_DETECT1_GPIO_3_RISING_SHIFT		0x07
+#define TPS65917_INT4_EDGE_DETECT1_GPIO_3_FALLING		0x40
+#define TPS65917_INT4_EDGE_DETECT1_GPIO_3_FALLING_SHIFT	0x06
+#define TPS65917_INT4_EDGE_DETECT1_GPIO_2_RISING		0x20
+#define TPS65917_INT4_EDGE_DETECT1_GPIO_2_RISING_SHIFT		0x05
+#define TPS65917_INT4_EDGE_DETECT1_GPIO_2_FALLING		0x10
+#define TPS65917_INT4_EDGE_DETECT1_GPIO_2_FALLING_SHIFT	0x04
+#define TPS65917_INT4_EDGE_DETECT1_GPIO_1_RISING		0x08
+#define TPS65917_INT4_EDGE_DETECT1_GPIO_1_RISING_SHIFT		0x03
+#define TPS65917_INT4_EDGE_DETECT1_GPIO_1_FALLING		0x04
+#define TPS65917_INT4_EDGE_DETECT1_GPIO_1_FALLING_SHIFT	0x02
+#define TPS65917_INT4_EDGE_DETECT1_GPIO_0_RISING		0x02
+#define TPS65917_INT4_EDGE_DETECT1_GPIO_0_RISING_SHIFT		0x01
+#define TPS65917_INT4_EDGE_DETECT1_GPIO_0_FALLING		0x01
+#define TPS65917_INT4_EDGE_DETECT1_GPIO_0_FALLING_SHIFT	0x00
+
+/* Bit definitions for INT4_EDGE_DETECT2 */
+#define TPS65917_INT4_EDGE_DETECT2_GPIO_6_RISING		0x20
+#define TPS65917_INT4_EDGE_DETECT2_GPIO_6_RISING_SHIFT		0x05
+#define TPS65917_INT4_EDGE_DETECT2_GPIO_6_FALLING		0x10
+#define TPS65917_INT4_EDGE_DETECT2_GPIO_6_FALLING_SHIFT	0x04
+#define TPS65917_INT4_EDGE_DETECT2_GPIO_5_RISING		0x08
+#define TPS65917_INT4_EDGE_DETECT2_GPIO_5_RISING_SHIFT		0x03
+#define TPS65917_INT4_EDGE_DETECT2_GPIO_5_FALLING		0x04
+#define TPS65917_INT4_EDGE_DETECT2_GPIO_5_FALLING_SHIFT	0x02
+#define TPS65917_INT4_EDGE_DETECT2_GPIO_4_RISING		0x02
+#define TPS65917_INT4_EDGE_DETECT2_GPIO_4_RISING_SHIFT		0x01
+#define TPS65917_INT4_EDGE_DETECT2_GPIO_4_FALLING		0x01
+#define TPS65917_INT4_EDGE_DETECT2_GPIO_4_FALLING_SHIFT	0x00
+
+/* Bit definitions for INT_CTRL */
+#define TPS65917_INT_CTRL_INT_PENDING				0x04
+#define TPS65917_INT_CTRL_INT_PENDING_SHIFT			0x02
+#define TPS65917_INT_CTRL_INT_CLEAR				0x01
+#define TPS65917_INT_CTRL_INT_CLEAR_SHIFT			0x00
+
+/* TPS65917 SMPS Registers */
+
+/* Registers for function SMPS */
+#define TPS65917_SMPS1_CTRL					0x00
+#define TPS65917_SMPS1_FORCE					0x02
+#define TPS65917_SMPS1_VOLTAGE					0x03
+#define TPS65917_SMPS2_CTRL					0x04
+#define TPS65917_SMPS2_FORCE					0x06
+#define TPS65917_SMPS2_VOLTAGE					0x07
+#define TPS65917_SMPS3_CTRL					0x0C
+#define TPS65917_SMPS3_FORCE					0x0E
+#define TPS65917_SMPS3_VOLTAGE					0x0F
+#define TPS65917_SMPS4_CTRL					0x10
+#define TPS65917_SMPS4_VOLTAGE					0x13
+#define TPS65917_SMPS5_CTRL					0x18
+#define TPS65917_SMPS5_VOLTAGE					0x1B
+#define TPS65917_SMPS_CTRL					0x24
+#define TPS65917_SMPS_PD_CTRL					0x25
+#define TPS65917_SMPS_THERMAL_EN				0x27
+#define TPS65917_SMPS_THERMAL_STATUS				0x28
+#define TPS65917_SMPS_SHORT_STATUS				0x29
+#define TPS65917_SMPS_NEGATIVE_CURRENT_LIMIT_EN		0x2A
+#define TPS65917_SMPS_POWERGOOD_MASK1				0x2B
+#define TPS65917_SMPS_POWERGOOD_MASK2				0x2C
+
+/* Bit definitions for SMPS1_CTRL */
+#define TPS65917_SMPS1_CTRL_WR_S				0x80
+#define TPS65917_SMPS1_CTRL_WR_S_SHIFT				0x07
+#define TPS65917_SMPS1_CTRL_ROOF_FLOOR_EN			0x40
+#define TPS65917_SMPS1_CTRL_ROOF_FLOOR_EN_SHIFT		0x06
+#define TPS65917_SMPS1_CTRL_STATUS_MASK			0x30
+#define TPS65917_SMPS1_CTRL_STATUS_SHIFT			0x04
+#define TPS65917_SMPS1_CTRL_MODE_SLEEP_MASK			0x0C
+#define TPS65917_SMPS1_CTRL_MODE_SLEEP_SHIFT			0x02
+#define TPS65917_SMPS1_CTRL_MODE_ACTIVE_MASK			0x03
+#define TPS65917_SMPS1_CTRL_MODE_ACTIVE_SHIFT			0x00
+
+/* Bit definitions for SMPS1_FORCE */
+#define TPS65917_SMPS1_FORCE_CMD				0x80
+#define TPS65917_SMPS1_FORCE_CMD_SHIFT				0x07
+#define TPS65917_SMPS1_FORCE_VSEL_MASK				0x7F
+#define TPS65917_SMPS1_FORCE_VSEL_SHIFT			0x00
+
+/* Bit definitions for SMPS1_VOLTAGE */
+#define TPS65917_SMPS1_VOLTAGE_RANGE				0x80
+#define TPS65917_SMPS1_VOLTAGE_RANGE_SHIFT			0x07
+#define TPS65917_SMPS1_VOLTAGE_VSEL_MASK			0x7F
+#define TPS65917_SMPS1_VOLTAGE_VSEL_SHIFT			0x00
+
+/* Bit definitions for SMPS2_CTRL */
+#define TPS65917_SMPS2_CTRL_WR_S				0x80
+#define TPS65917_SMPS2_CTRL_WR_S_SHIFT				0x07
+#define TPS65917_SMPS2_CTRL_ROOF_FLOOR_EN			0x40
+#define TPS65917_SMPS2_CTRL_ROOF_FLOOR_EN_SHIFT		0x06
+#define TPS65917_SMPS2_CTRL_STATUS_MASK			0x30
+#define TPS65917_SMPS2_CTRL_STATUS_SHIFT			0x04
+#define TPS65917_SMPS2_CTRL_MODE_SLEEP_MASK			0x0C
+#define TPS65917_SMPS2_CTRL_MODE_SLEEP_SHIFT			0x02
+#define TPS65917_SMPS2_CTRL_MODE_ACTIVE_MASK			0x03
+#define TPS65917_SMPS2_CTRL_MODE_ACTIVE_SHIFT			0x00
+
+/* Bit definitions for SMPS2_FORCE */
+#define TPS65917_SMPS2_FORCE_CMD				0x80
+#define TPS65917_SMPS2_FORCE_CMD_SHIFT				0x07
+#define TPS65917_SMPS2_FORCE_VSEL_MASK				0x7F
+#define TPS65917_SMPS2_FORCE_VSEL_SHIFT			0x00
+
+/* Bit definitions for SMPS2_VOLTAGE */
+#define TPS65917_SMPS2_VOLTAGE_RANGE				0x80
+#define TPS65917_SMPS2_VOLTAGE_RANGE_SHIFT			0x07
+#define TPS65917_SMPS2_VOLTAGE_VSEL_MASK			0x7F
+#define TPS65917_SMPS2_VOLTAGE_VSEL_SHIFT			0x00
+
+/* Bit definitions for SMPS3_CTRL */
+#define TPS65917_SMPS3_CTRL_WR_S				0x80
+#define TPS65917_SMPS3_CTRL_WR_S_SHIFT				0x07
+#define TPS65917_SMPS3_CTRL_ROOF_FLOOR_EN			0x40
+#define TPS65917_SMPS3_CTRL_ROOF_FLOOR_EN_SHIFT		0x06
+#define TPS65917_SMPS3_CTRL_STATUS_MASK			0x30
+#define TPS65917_SMPS3_CTRL_STATUS_SHIFT			0x04
+#define TPS65917_SMPS3_CTRL_MODE_SLEEP_MASK			0x0C
+#define TPS65917_SMPS3_CTRL_MODE_SLEEP_SHIFT			0x02
+#define TPS65917_SMPS3_CTRL_MODE_ACTIVE_MASK			0x03
+#define TPS65917_SMPS3_CTRL_MODE_ACTIVE_SHIFT			0x00
+
+/* Bit definitions for SMPS3_FORCE */
+#define TPS65917_SMPS3_FORCE_CMD				0x80
+#define TPS65917_SMPS3_FORCE_CMD_SHIFT				0x07
+#define TPS65917_SMPS3_FORCE_VSEL_MASK				0x7F
+#define TPS65917_SMPS3_FORCE_VSEL_SHIFT			0x00
+
+/* Bit definitions for SMPS3_VOLTAGE */
+#define TPS65917_SMPS3_VOLTAGE_RANGE				0x80
+#define TPS65917_SMPS3_VOLTAGE_RANGE_SHIFT			0x07
+#define TPS65917_SMPS3_VOLTAGE_VSEL_MASK			0x7F
+#define TPS65917_SMPS3_VOLTAGE_VSEL_SHIFT			0x00
+
+/* Bit definitions for SMPS4_CTRL */
+#define TPS65917_SMPS4_CTRL_WR_S				0x80
+#define TPS65917_SMPS4_CTRL_WR_S_SHIFT				0x07
+#define TPS65917_SMPS4_CTRL_ROOF_FLOOR_EN			0x40
+#define TPS65917_SMPS4_CTRL_ROOF_FLOOR_EN_SHIFT		0x06
+#define TPS65917_SMPS4_CTRL_STATUS_MASK			0x30
+#define TPS65917_SMPS4_CTRL_STATUS_SHIFT			0x04
+#define TPS65917_SMPS4_CTRL_MODE_SLEEP_MASK			0x0C
+#define TPS65917_SMPS4_CTRL_MODE_SLEEP_SHIFT			0x02
+#define TPS65917_SMPS4_CTRL_MODE_ACTIVE_MASK			0x03
+#define TPS65917_SMPS4_CTRL_MODE_ACTIVE_SHIFT			0x00
+
+/* Bit definitions for SMPS4_VOLTAGE */
+#define TPS65917_SMPS4_VOLTAGE_RANGE				0x80
+#define TPS65917_SMPS4_VOLTAGE_RANGE_SHIFT			0x07
+#define TPS65917_SMPS4_VOLTAGE_VSEL_MASK			0x7F
+#define TPS65917_SMPS4_VOLTAGE_VSEL_SHIFT			0x00
+
+/* Bit definitions for SMPS5_CTRL */
+#define TPS65917_SMPS5_CTRL_WR_S				0x80
+#define TPS65917_SMPS5_CTRL_WR_S_SHIFT				0x07
+#define TPS65917_SMPS5_CTRL_ROOF_FLOOR_EN			0x40
+#define TPS65917_SMPS5_CTRL_ROOF_FLOOR_EN_SHIFT		0x06
+#define TPS65917_SMPS5_CTRL_STATUS_MASK			0x30
+#define TPS65917_SMPS5_CTRL_STATUS_SHIFT			0x04
+#define TPS65917_SMPS5_CTRL_MODE_SLEEP_MASK			0x0C
+#define TPS65917_SMPS5_CTRL_MODE_SLEEP_SHIFT			0x02
+#define TPS65917_SMPS5_CTRL_MODE_ACTIVE_MASK			0x03
+#define TPS65917_SMPS5_CTRL_MODE_ACTIVE_SHIFT			0x00
+
+/* Bit definitions for SMPS5_VOLTAGE */
+#define TPS65917_SMPS5_VOLTAGE_RANGE				0x80
+#define TPS65917_SMPS5_VOLTAGE_RANGE_SHIFT			0x07
+#define TPS65917_SMPS5_VOLTAGE_VSEL_MASK			0x7F
+#define TPS65917_SMPS5_VOLTAGE_VSEL_SHIFT			0x00
+
+/* Bit definitions for SMPS_CTRL */
+#define TPS65917_SMPS_CTRL_SMPS1_SMPS12_EN			0x10
+#define TPS65917_SMPS_CTRL_SMPS1_SMPS12_EN_SHIFT		0x04
+#define TPS65917_SMPS_CTRL_SMPS12_PHASE_CTRL			0x03
+#define TPS65917_SMPS_CTRL_SMPS12_PHASE_CTRL_SHIFT		0x00
+
+/* Bit definitions for SMPS_PD_CTRL */
+#define TPS65917_SMPS_PD_CTRL_SMPS5				0x40
+#define TPS65917_SMPS_PD_CTRL_SMPS5_SHIFT			0x06
+#define TPS65917_SMPS_PD_CTRL_SMPS4				0x10
+#define TPS65917_SMPS_PD_CTRL_SMPS4_SHIFT			0x04
+#define TPS65917_SMPS_PD_CTRL_SMPS3				0x08
+#define TPS65917_SMPS_PD_CTRL_SMPS3_SHIFT			0x03
+#define TPS65917_SMPS_PD_CTRL_SMPS2				0x02
+#define TPS65917_SMPS_PD_CTRL_SMPS2_SHIFT			0x01
+#define TPS65917_SMPS_PD_CTRL_SMPS1				0x01
+#define TPS65917_SMPS_PD_CTRL_SMPS1_SHIFT			0x00
+
+/* Bit definitions for SMPS_THERMAL_EN */
+#define TPS65917_SMPS_THERMAL_EN_SMPS5				0x40
+#define TPS65917_SMPS_THERMAL_EN_SMPS5_SHIFT			0x06
+#define TPS65917_SMPS_THERMAL_EN_SMPS3				0x08
+#define TPS65917_SMPS_THERMAL_EN_SMPS3_SHIFT			0x03
+#define TPS65917_SMPS_THERMAL_EN_SMPS12			0x01
+#define TPS65917_SMPS_THERMAL_EN_SMPS12_SHIFT			0x00
+
+/* Bit definitions for SMPS_THERMAL_STATUS */
+#define TPS65917_SMPS_THERMAL_STATUS_SMPS5			0x40
+#define TPS65917_SMPS_THERMAL_STATUS_SMPS5_SHIFT		0x06
+#define TPS65917_SMPS_THERMAL_STATUS_SMPS3			0x08
+#define TPS65917_SMPS_THERMAL_STATUS_SMPS3_SHIFT		0x03
+#define TPS65917_SMPS_THERMAL_STATUS_SMPS12			0x01
+#define TPS65917_SMPS_THERMAL_STATUS_SMPS12_SHIFT		0x00
+
+/* Bit definitions for SMPS_SHORT_STATUS */
+#define TPS65917_SMPS_SHORT_STATUS_SMPS5			0x40
+#define TPS65917_SMPS_SHORT_STATUS_SMPS5_SHIFT			0x06
+#define TPS65917_SMPS_SHORT_STATUS_SMPS4			0x10
+#define TPS65917_SMPS_SHORT_STATUS_SMPS4_SHIFT			0x04
+#define TPS65917_SMPS_SHORT_STATUS_SMPS3			0x08
+#define TPS65917_SMPS_SHORT_STATUS_SMPS3_SHIFT			0x03
+#define TPS65917_SMPS_SHORT_STATUS_SMPS2			0x02
+#define TPS65917_SMPS_SHORT_STATUS_SMPS2_SHIFT			0x01
+#define TPS65917_SMPS_SHORT_STATUS_SMPS1			0x01
+#define TPS65917_SMPS_SHORT_STATUS_SMPS1_SHIFT			0x00
+
+/* Bit definitions for SMPS_NEGATIVE_CURRENT_LIMIT_EN */
+#define TPS65917_SMPS_NEGATIVE_CURRENT_LIMIT_EN_SMPS5		0x40
+#define TPS65917_SMPS_NEGATIVE_CURRENT_LIMIT_EN_SMPS5_SHIFT	0x06
+#define TPS65917_SMPS_NEGATIVE_CURRENT_LIMIT_EN_SMPS4		0x10
+#define TPS65917_SMPS_NEGATIVE_CURRENT_LIMIT_EN_SMPS4_SHIFT	0x04
+#define TPS65917_SMPS_NEGATIVE_CURRENT_LIMIT_EN_SMPS3		0x08
+#define TPS65917_SMPS_NEGATIVE_CURRENT_LIMIT_EN_SMPS3_SHIFT	0x03
+#define TPS65917_SMPS_NEGATIVE_CURRENT_LIMIT_EN_SMPS2		0x02
+#define TPS65917_SMPS_NEGATIVE_CURRENT_LIMIT_EN_SMPS2_SHIFT	0x01
+#define TPS65917_SMPS_NEGATIVE_CURRENT_LIMIT_EN_SMPS1		0x01
+#define TPS65917_SMPS_NEGATIVE_CURRENT_LIMIT_EN_SMPS1_SHIFT	0x00
+
+/* Bit definitions for SMPS_POWERGOOD_MASK1 */
+#define TPS65917_SMPS_POWERGOOD_MASK1_SMPS5			0x40
+#define TPS65917_SMPS_POWERGOOD_MASK1_SMPS5_SHIFT		0x06
+#define TPS65917_SMPS_POWERGOOD_MASK1_SMPS4			0x10
+#define TPS65917_SMPS_POWERGOOD_MASK1_SMPS4_SHIFT		0x04
+#define TPS65917_SMPS_POWERGOOD_MASK1_SMPS3			0x08
+#define TPS65917_SMPS_POWERGOOD_MASK1_SMPS3_SHIFT		0x03
+#define TPS65917_SMPS_POWERGOOD_MASK1_SMPS2			0x02
+#define TPS65917_SMPS_POWERGOOD_MASK1_SMPS2_SHIFT		0x01
+#define TPS65917_SMPS_POWERGOOD_MASK1_SMPS1			0x01
+#define TPS65917_SMPS_POWERGOOD_MASK1_SMPS1_SHIFT		0x00
+
+/* Bit definitions for SMPS_POWERGOOD_MASK2 */
+#define TPS65917_SMPS_POWERGOOD_MASK2_POWERGOOD_TYPE_SELECT		0x80
+#define TPS65917_SMPS_POWERGOOD_MASK2_POWERGOOD_TYPE_SELECT_SHIFT	0x07
+#define TPS65917_SMPS_POWERGOOD_MASK2_OVC_ALARM_SHIFT			0x10
+#define TPS65917_SMPS_POWERGOOD_MASK2_OVC_ALARM			0x04
+
+/* Bit definitions for SMPS_PLL_CTRL */
+
+#define TPS65917_SMPS_PLL_CTRL_PLL_EN_PLL_BYPASS_SHIFT		0x08
+#define TPS65917_SMPS_PLL_CTRL_PLL_PLL_EN_BYPASS		0x03
+#define TPS65917_SMPS_PLL_CTRL_PLL_PLL_BYPASS_CLK_SHIFT	0x04
+#define TPS65917_SMPS_PLL_CTRL_PLL_PLL_BYPASS_CLK		0x02
+
+/* Registers for function LDO */
+#define TPS65917_LDO1_CTRL					0x00
+#define TPS65917_LDO1_VOLTAGE					0x01
+#define TPS65917_LDO2_CTRL					0x02
+#define TPS65917_LDO2_VOLTAGE					0x03
+#define TPS65917_LDO3_CTRL					0x04
+#define TPS65917_LDO3_VOLTAGE					0x05
+#define TPS65917_LDO4_CTRL					0x0E
+#define TPS65917_LDO4_VOLTAGE					0x0F
+#define TPS65917_LDO5_CTRL					0x12
+#define TPS65917_LDO5_VOLTAGE					0x13
+#define TPS65917_LDO_PD_CTRL1					0x1B
+#define TPS65917_LDO_PD_CTRL2					0x1C
+#define TPS65917_LDO_SHORT_STATUS1				0x1D
+#define TPS65917_LDO_SHORT_STATUS2				0x1E
+#define TPS65917_LDO_PD_CTRL3					0x2D
+#define TPS65917_LDO_SHORT_STATUS3				0x2E
+
+/* Bit definitions for LDO1_CTRL */
+#define TPS65917_LDO1_CTRL_WR_S				0x80
+#define TPS65917_LDO1_CTRL_WR_S_SHIFT				0x07
+#define TPS65917_LDO1_CTRL_BYPASS_EN				0x40
+#define TPS65917_LDO1_CTRL_BYPASS_EN_SHIFT			0x06
+#define TPS65917_LDO1_CTRL_STATUS				0x10
+#define TPS65917_LDO1_CTRL_STATUS_SHIFT			0x04
+#define TPS65917_LDO1_CTRL_MODE_SLEEP				0x04
+#define TPS65917_LDO1_CTRL_MODE_SLEEP_SHIFT			0x02
+#define TPS65917_LDO1_CTRL_MODE_ACTIVE				0x01
+#define TPS65917_LDO1_CTRL_MODE_ACTIVE_SHIFT			0x00
+
+/* Bit definitions for LDO1_VOLTAGE */
+#define TPS65917_LDO1_VOLTAGE_VSEL_MASK			0x2F
+#define TPS65917_LDO1_VOLTAGE_VSEL_SHIFT			0x00
+
+/* Bit definitions for LDO2_CTRL */
+#define TPS65917_LDO2_CTRL_WR_S				0x80
+#define TPS65917_LDO2_CTRL_WR_S_SHIFT				0x07
+#define TPS65917_LDO2_CTRL_BYPASS_EN				0x40
+#define TPS65917_LDO2_CTRL_BYPASS_EN_SHIFT			0x06
+#define TPS65917_LDO2_CTRL_STATUS				0x10
+#define TPS65917_LDO2_CTRL_STATUS_SHIFT			0x04
+#define TPS65917_LDO2_CTRL_MODE_SLEEP				0x04
+#define TPS65917_LDO2_CTRL_MODE_SLEEP_SHIFT			0x02
+#define TPS65917_LDO2_CTRL_MODE_ACTIVE				0x01
+#define TPS65917_LDO2_CTRL_MODE_ACTIVE_SHIFT			0x00
+
+/* Bit definitions for LDO2_VOLTAGE */
+#define TPS65917_LDO2_VOLTAGE_VSEL_MASK			0x2F
+#define TPS65917_LDO2_VOLTAGE_VSEL_SHIFT			0x00
+
+/* Bit definitions for LDO3_CTRL */
+#define TPS65917_LDO3_CTRL_WR_S				0x80
+#define TPS65917_LDO3_CTRL_WR_S_SHIFT				0x07
+#define TPS65917_LDO3_CTRL_STATUS				0x10
+#define TPS65917_LDO3_CTRL_STATUS_SHIFT			0x04
+#define TPS65917_LDO3_CTRL_MODE_SLEEP				0x04
+#define TPS65917_LDO3_CTRL_MODE_SLEEP_SHIFT			0x02
+#define TPS65917_LDO3_CTRL_MODE_ACTIVE				0x01
+#define TPS65917_LDO3_CTRL_MODE_ACTIVE_SHIFT			0x00
+
+/* Bit definitions for LDO3_VOLTAGE */
+#define TPS65917_LDO3_VOLTAGE_VSEL_MASK			0x2F
+#define TPS65917_LDO3_VOLTAGE_VSEL_SHIFT			0x00
+
+/* Bit definitions for LDO4_CTRL */
+#define TPS65917_LDO4_CTRL_WR_S				0x80
+#define TPS65917_LDO4_CTRL_WR_S_SHIFT				0x07
+#define TPS65917_LDO4_CTRL_STATUS				0x10
+#define TPS65917_LDO4_CTRL_STATUS_SHIFT			0x04
+#define TPS65917_LDO4_CTRL_MODE_SLEEP				0x04
+#define TPS65917_LDO4_CTRL_MODE_SLEEP_SHIFT			0x02
+#define TPS65917_LDO4_CTRL_MODE_ACTIVE				0x01
+#define TPS65917_LDO4_CTRL_MODE_ACTIVE_SHIFT			0x00
+
+/* Bit definitions for LDO4_VOLTAGE */
+#define TPS65917_LDO4_VOLTAGE_VSEL_MASK			0x2F
+#define TPS65917_LDO4_VOLTAGE_VSEL_SHIFT			0x00
+
+/* Bit definitions for LDO5_CTRL */
+#define TPS65917_LDO5_CTRL_WR_S				0x80
+#define TPS65917_LDO5_CTRL_WR_S_SHIFT				0x07
+#define TPS65917_LDO5_CTRL_STATUS				0x10
+#define TPS65917_LDO5_CTRL_STATUS_SHIFT			0x04
+#define TPS65917_LDO5_CTRL_MODE_SLEEP				0x04
+#define TPS65917_LDO5_CTRL_MODE_SLEEP_SHIFT			0x02
+#define TPS65917_LDO5_CTRL_MODE_ACTIVE				0x01
+#define TPS65917_LDO5_CTRL_MODE_ACTIVE_SHIFT			0x00
+
+/* Bit definitions for LDO5_VOLTAGE */
+#define TPS65917_LDO5_VOLTAGE_VSEL_MASK			0x2F
+#define TPS65917_LDO5_VOLTAGE_VSEL_SHIFT			0x00
+
+/* Bit definitions for LDO_PD_CTRL1 */
+#define TPS65917_LDO_PD_CTRL1_LDO4				0x80
+#define TPS65917_LDO_PD_CTRL1_LDO4_SHIFT			0x07
+#define TPS65917_LDO_PD_CTRL1_LDO2				0x02
+#define TPS65917_LDO_PD_CTRL1_LDO2_SHIFT			0x01
+#define TPS65917_LDO_PD_CTRL1_LDO1				0x01
+#define TPS65917_LDO_PD_CTRL1_LDO1_SHIFT			0x00
+
+/* Bit definitions for LDO_PD_CTRL2 */
+#define TPS65917_LDO_PD_CTRL2_LDO3				0x04
+#define TPS65917_LDO_PD_CTRL2_LDO3_SHIFT			0x02
+#define TPS65917_LDO_PD_CTRL2_LDO5				0x02
+#define TPS65917_LDO_PD_CTRL2_LDO5_SHIFT			0x01
+
+/* Bit definitions for LDO_PD_CTRL3 */
+#define TPS65917_LDO_PD_CTRL2_LDOVANA				0x80
+#define TPS65917_LDO_PD_CTRL2_LDOVANA_SHIFT			0x07
+
+/* Bit definitions for LDO_SHORT_STATUS1 */
+#define TPS65917_LDO_SHORT_STATUS1_LDO4			0x80
+#define TPS65917_LDO_SHORT_STATUS1_LDO4_SHIFT			0x07
+#define TPS65917_LDO_SHORT_STATUS1_LDO2			0x02
+#define TPS65917_LDO_SHORT_STATUS1_LDO2_SHIFT			0x01
+#define TPS65917_LDO_SHORT_STATUS1_LDO1			0x01
+#define TPS65917_LDO_SHORT_STATUS1_LDO1_SHIFT			0x00
+
+/* Bit definitions for LDO_SHORT_STATUS2 */
+#define TPS65917_LDO_SHORT_STATUS2_LDO3			0x04
+#define TPS65917_LDO_SHORT_STATUS2_LDO3_SHIFT			0x02
+#define TPS65917_LDO_SHORT_STATUS2_LDO5			0x02
+#define TPS65917_LDO_SHORT_STATUS2_LDO5_SHIFT			0x01
+
+/* Bit definitions for LDO_SHORT_STATUS2 */
+#define TPS65917_LDO_SHORT_STATUS2_LDOVANA			0x80
+#define TPS65917_LDO_SHORT_STATUS2_LDOVANA_SHIFT		0x07
+
+/* Bit definitions for REGEN1_CTRL */
+#define TPS65917_REGEN1_CTRL_STATUS				0x10
+#define TPS65917_REGEN1_CTRL_STATUS_SHIFT			0x04
+#define TPS65917_REGEN1_CTRL_MODE_SLEEP			0x04
+#define TPS65917_REGEN1_CTRL_MODE_SLEEP_SHIFT			0x02
+#define TPS65917_REGEN1_CTRL_MODE_ACTIVE			0x01
+#define TPS65917_REGEN1_CTRL_MODE_ACTIVE_SHIFT			0x00
+
+/* Bit definitions for PLLEN_CTRL */
+#define TPS65917_PLLEN_CTRL_STATUS				0x10
+#define TPS65917_PLLEN_CTRL_STATUS_SHIFT			0x04
+#define TPS65917_PLLEN_CTRL_MODE_SLEEP				0x04
+#define TPS65917_PLLEN_CTRL_MODE_SLEEP_SHIFT			0x02
+#define TPS65917_PLLEN_CTRL_MODE_ACTIVE			0x01
+#define TPS65917_PLLEN_CTRL_MODE_ACTIVE_SHIFT			0x00
+
+/* Bit definitions for REGEN2_CTRL */
+#define TPS65917_REGEN2_CTRL_STATUS				0x10
+#define TPS65917_REGEN2_CTRL_STATUS_SHIFT			0x04
+#define TPS65917_REGEN2_CTRL_MODE_SLEEP			0x04
+#define TPS65917_REGEN2_CTRL_MODE_SLEEP_SHIFT			0x02
+#define TPS65917_REGEN2_CTRL_MODE_ACTIVE			0x01
+#define TPS65917_REGEN2_CTRL_MODE_ACTIVE_SHIFT			0x00
+
+/* Bit definitions for NSLEEP_RES_ASSIGN */
+#define TPS65917_NSLEEP_RES_ASSIGN_PLL_EN			0x08
+#define TPS65917_NSLEEP_RES_ASSIGN_PLL_EN_SHIFT		0x03
+#define TPS65917_NSLEEP_RES_ASSIGN_REGEN3			0x04
+#define TPS65917_NSLEEP_RES_ASSIGN_REGEN3_SHIFT		0x02
+#define TPS65917_NSLEEP_RES_ASSIGN_REGEN2			0x02
+#define TPS65917_NSLEEP_RES_ASSIGN_REGEN2_SHIFT		0x01
+#define TPS65917_NSLEEP_RES_ASSIGN_REGEN1			0x01
+#define TPS65917_NSLEEP_RES_ASSIGN_REGEN1_SHIFT		0x00
+
+/* Bit definitions for NSLEEP_SMPS_ASSIGN */
+#define TPS65917_NSLEEP_SMPS_ASSIGN_SMPS5			0x40
+#define TPS65917_NSLEEP_SMPS_ASSIGN_SMPS5_SHIFT		0x06
+#define TPS65917_NSLEEP_SMPS_ASSIGN_SMPS4			0x10
+#define TPS65917_NSLEEP_SMPS_ASSIGN_SMPS4_SHIFT		0x04
+#define TPS65917_NSLEEP_SMPS_ASSIGN_SMPS3			0x08
+#define TPS65917_NSLEEP_SMPS_ASSIGN_SMPS3_SHIFT		0x03
+#define TPS65917_NSLEEP_SMPS_ASSIGN_SMPS2			0x02
+#define TPS65917_NSLEEP_SMPS_ASSIGN_SMPS2_SHIFT		0x01
+#define TPS65917_NSLEEP_SMPS_ASSIGN_SMPS1			0x01
+#define TPS65917_NSLEEP_SMPS_ASSIGN_SMPS1_SHIFT		0x00
+
+/* Bit definitions for NSLEEP_LDO_ASSIGN1 */
+#define TPS65917_NSLEEP_LDO_ASSIGN1_LDO4			0x80
+#define TPS65917_NSLEEP_LDO_ASSIGN1_LDO4_SHIFT			0x07
+#define TPS65917_NSLEEP_LDO_ASSIGN1_LDO2			0x02
+#define TPS65917_NSLEEP_LDO_ASSIGN1_LDO2_SHIFT			0x01
+#define TPS65917_NSLEEP_LDO_ASSIGN1_LDO1			0x01
+#define TPS65917_NSLEEP_LDO_ASSIGN1_LDO1_SHIFT			0x00
+
+/* Bit definitions for NSLEEP_LDO_ASSIGN2 */
+#define TPS65917_NSLEEP_LDO_ASSIGN2_LDO3			0x04
+#define TPS65917_NSLEEP_LDO_ASSIGN2_LDO3_SHIFT			0x02
+#define TPS65917_NSLEEP_LDO_ASSIGN2_LDO5			0x02
+#define TPS65917_NSLEEP_LDO_ASSIGN2_LDO5_SHIFT			0x01
+
+/* Bit definitions for ENABLE1_RES_ASSIGN */
+#define TPS65917_ENABLE1_RES_ASSIGN_PLLEN			0x08
+#define TPS65917_ENABLE1_RES_ASSIGN_PLLEN_SHIFT		0x03
+#define TPS65917_ENABLE1_RES_ASSIGN_REGEN3			0x04
+#define TPS65917_ENABLE1_RES_ASSIGN_REGEN3_SHIFT		0x02
+#define TPS65917_ENABLE1_RES_ASSIGN_REGEN2			0x02
+#define TPS65917_ENABLE1_RES_ASSIGN_REGEN2_SHIFT		0x01
+#define TPS65917_ENABLE1_RES_ASSIGN_REGEN1			0x01
+#define TPS65917_ENABLE1_RES_ASSIGN_REGEN1_SHIFT		0x00
+
+/* Bit definitions for ENABLE1_SMPS_ASSIGN */
+#define TPS65917_ENABLE1_SMPS_ASSIGN_SMPS5			0x40
+#define TPS65917_ENABLE1_SMPS_ASSIGN_SMPS5_SHIFT		0x06
+#define TPS65917_ENABLE1_SMPS_ASSIGN_SMPS4			0x10
+#define TPS65917_ENABLE1_SMPS_ASSIGN_SMPS4_SHIFT		0x04
+#define TPS65917_ENABLE1_SMPS_ASSIGN_SMPS3			0x08
+#define TPS65917_ENABLE1_SMPS_ASSIGN_SMPS3_SHIFT		0x03
+#define TPS65917_ENABLE1_SMPS_ASSIGN_SMPS2			0x02
+#define TPS65917_ENABLE1_SMPS_ASSIGN_SMPS2_SHIFT		0x01
+#define TPS65917_ENABLE1_SMPS_ASSIGN_SMPS1			0x01
+#define TPS65917_ENABLE1_SMPS_ASSIGN_SMPS1_SHIFT		0x00
+
+/* Bit definitions for ENABLE1_LDO_ASSIGN1 */
+#define TPS65917_ENABLE1_LDO_ASSIGN1_LDO4			0x80
+#define TPS65917_ENABLE1_LDO_ASSIGN1_LDO4_SHIFT		0x07
+#define TPS65917_ENABLE1_LDO_ASSIGN1_LDO2			0x02
+#define TPS65917_ENABLE1_LDO_ASSIGN1_LDO2_SHIFT		0x01
+#define TPS65917_ENABLE1_LDO_ASSIGN1_LDO1			0x01
+#define TPS65917_ENABLE1_LDO_ASSIGN1_LDO1_SHIFT		0x00
+
+/* Bit definitions for ENABLE1_LDO_ASSIGN2 */
+#define TPS65917_ENABLE1_LDO_ASSIGN2_LDO3			0x04
+#define TPS65917_ENABLE1_LDO_ASSIGN2_LDO3_SHIFT		0x02
+#define TPS65917_ENABLE1_LDO_ASSIGN2_LDO5			0x02
+#define TPS65917_ENABLE1_LDO_ASSIGN2_LDO5_SHIFT		0x01
+
+/* Bit definitions for ENABLE2_RES_ASSIGN */
+#define TPS65917_ENABLE2_RES_ASSIGN_PLLEN			0x08
+#define TPS65917_ENABLE2_RES_ASSIGN_PLLEN_SHIFT		0x03
+#define TPS65917_ENABLE2_RES_ASSIGN_REGEN3			0x04
+#define TPS65917_ENABLE2_RES_ASSIGN_REGEN3_SHIFT		0x02
+#define TPS65917_ENABLE2_RES_ASSIGN_REGEN2			0x02
+#define TPS65917_ENABLE2_RES_ASSIGN_REGEN2_SHIFT		0x01
+#define TPS65917_ENABLE2_RES_ASSIGN_REGEN1			0x01
+#define TPS65917_ENABLE2_RES_ASSIGN_REGEN1_SHIFT		0x00
+
+/* Bit definitions for ENABLE2_SMPS_ASSIGN */
+#define TPS65917_ENABLE2_SMPS_ASSIGN_SMPS5			0x40
+#define TPS65917_ENABLE2_SMPS_ASSIGN_SMPS5_SHIFT		0x06
+#define TPS65917_ENABLE2_SMPS_ASSIGN_SMPS4			0x10
+#define TPS65917_ENABLE2_SMPS_ASSIGN_SMPS4_SHIFT		0x04
+#define TPS65917_ENABLE2_SMPS_ASSIGN_SMPS3			0x08
+#define TPS65917_ENABLE2_SMPS_ASSIGN_SMPS3_SHIFT		0x03
+#define TPS65917_ENABLE2_SMPS_ASSIGN_SMPS2			0x02
+#define TPS65917_ENABLE2_SMPS_ASSIGN_SMPS2_SHIFT		0x01
+#define TPS65917_ENABLE2_SMPS_ASSIGN_SMPS1			0x01
+#define TPS65917_ENABLE2_SMPS_ASSIGN_SMPS1_SHIFT		0x00
+
+/* Bit definitions for ENABLE2_LDO_ASSIGN1 */
+#define TPS65917_ENABLE2_LDO_ASSIGN1_LDO4			0x80
+#define TPS65917_ENABLE2_LDO_ASSIGN1_LDO4_SHIFT		0x07
+#define TPS65917_ENABLE2_LDO_ASSIGN1_LDO2			0x02
+#define TPS65917_ENABLE2_LDO_ASSIGN1_LDO2_SHIFT		0x01
+#define TPS65917_ENABLE2_LDO_ASSIGN1_LDO1			0x01
+#define TPS65917_ENABLE2_LDO_ASSIGN1_LDO1_SHIFT		0x00
+
+/* Bit definitions for ENABLE2_LDO_ASSIGN2 */
+#define TPS65917_ENABLE2_LDO_ASSIGN2_LDO3			0x04
+#define TPS65917_ENABLE2_LDO_ASSIGN2_LDO3_SHIFT		0x02
+#define TPS65917_ENABLE2_LDO_ASSIGN2_LDO5			0x02
+#define TPS65917_ENABLE2_LDO_ASSIGN2_LDO5_SHIFT		0x01
+
+/* Bit definitions for REGEN3_CTRL */
+#define TPS65917_REGEN3_CTRL_STATUS				0x10
+#define TPS65917_REGEN3_CTRL_STATUS_SHIFT			0x04
+#define TPS65917_REGEN3_CTRL_MODE_SLEEP			0x04
+#define TPS65917_REGEN3_CTRL_MODE_SLEEP_SHIFT			0x02
+#define TPS65917_REGEN3_CTRL_MODE_ACTIVE			0x01
+#define TPS65917_REGEN3_CTRL_MODE_ACTIVE_SHIFT			0x00
+
+/* Registers for function RESOURCE */
+#define TPS65917_REGEN1_CTRL					0x2
+#define TPS65917_PLLEN_CTRL					0x3
+#define TPS65917_NSLEEP_RES_ASSIGN				0x6
+#define TPS65917_NSLEEP_SMPS_ASSIGN				0x7
+#define TPS65917_NSLEEP_LDO_ASSIGN1				0x8
+#define TPS65917_NSLEEP_LDO_ASSIGN2				0x9
+#define TPS65917_ENABLE1_RES_ASSIGN				0xA
+#define TPS65917_ENABLE1_SMPS_ASSIGN				0xB
+#define TPS65917_ENABLE1_LDO_ASSIGN1				0xC
+#define TPS65917_ENABLE1_LDO_ASSIGN2				0xD
+#define TPS65917_ENABLE2_RES_ASSIGN				0xE
+#define TPS65917_ENABLE2_SMPS_ASSIGN				0xF
+#define TPS65917_ENABLE2_LDO_ASSIGN1				0x10
+#define TPS65917_ENABLE2_LDO_ASSIGN2				0x11
+#define TPS65917_REGEN2_CTRL					0x12
+#define TPS65917_REGEN3_CTRL					0x13
+
 static inline int palmas_read(struct palmas *palmas, unsigned int base,
 		unsigned int reg, unsigned int *val)
 {
-- 
cgit v1.2.3-59-g8ed1b


From 9f057dc1d01383e6975e515ba8e661d2aeb6bdfc Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Wed, 18 Jun 2014 15:28:56 +0530
Subject: regulator: palmas: Shift the reg_info structure definition to the
 header file

Shift the reg_info structure definition to the header file.

Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Keerthy <j-keerthy@ti.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/regulator/palmas-regulator.c | 9 ---------
 include/linux/mfd/palmas.h           | 9 +++++++++
 2 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/palmas-regulator.c b/drivers/regulator/palmas-regulator.c
index b982f0ff4e01..1cf462f90df5 100644
--- a/drivers/regulator/palmas-regulator.c
+++ b/drivers/regulator/palmas-regulator.c
@@ -27,15 +27,6 @@
 #include <linux/of_platform.h>
 #include <linux/regulator/of_regulator.h>
 
-struct regs_info {
-	char	*name;
-	char	*sname;
-	u8	vsel_addr;
-	u8	ctrl_addr;
-	u8	tstep_addr;
-	int	sleep_id;
-};
-
 static const struct regulator_linear_range smps_low_ranges[] = {
 	REGULATOR_LINEAR_RANGE(0, 0x0, 0x0, 0),
 	REGULATOR_LINEAR_RANGE(500000, 0x1, 0x6, 0),
diff --git a/include/linux/mfd/palmas.h b/include/linux/mfd/palmas.h
index f760a07ab76e..c123666c70f7 100644
--- a/include/linux/mfd/palmas.h
+++ b/include/linux/mfd/palmas.h
@@ -88,6 +88,15 @@ struct palmas {
 	u8 pwm_muxed;
 };
 
+struct regs_info {
+	char	*name;
+	char	*sname;
+	u8	vsel_addr;
+	u8	ctrl_addr;
+	u8	tstep_addr;
+	int	sleep_id;
+};
+
 struct palmas_gpadc_platform_data {
 	/* Channel 3 current source is only enabled during conversion */
 	int ch3_current;
-- 
cgit v1.2.3-59-g8ed1b


From 7ec70c73c9e93556fd19bb6bdfbbd089d9db438b Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Wed, 18 Jun 2014 15:28:57 +0530
Subject: mfd: palmas: shift the palmas_sleep_requestor_info structure
 definition to the header file

shift the palmas_sleep_requestor_info structure definition to the header file.

Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Keerthy <j-keerthy@ti.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/mfd/palmas.c       | 10 ----------
 include/linux/mfd/palmas.h | 10 ++++++++++
 2 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/palmas.c b/drivers/mfd/palmas.c
index 6e1786187dd8..c12759d1bd7c 100644
--- a/drivers/mfd/palmas.c
+++ b/drivers/mfd/palmas.c
@@ -25,16 +25,6 @@
 #include <linux/mfd/palmas.h>
 #include <linux/of_device.h>
 
-#define PALMAS_EXT_REQ (PALMAS_EXT_CONTROL_ENABLE1 |	\
-			PALMAS_EXT_CONTROL_ENABLE2 |	\
-			PALMAS_EXT_CONTROL_NSLEEP)
-
-struct palmas_sleep_requestor_info {
-	int id;
-	int reg_offset;
-	int bit_pos;
-};
-
 #define EXTERNAL_REQUESTOR(_id, _offset, _pos)		\
 	[PALMAS_EXTERNAL_REQSTR_ID_##_id] = {		\
 		.id = PALMAS_EXTERNAL_REQSTR_ID_##_id,	\
diff --git a/include/linux/mfd/palmas.h b/include/linux/mfd/palmas.h
index c123666c70f7..0136e583f7f8 100644
--- a/include/linux/mfd/palmas.h
+++ b/include/linux/mfd/palmas.h
@@ -88,6 +88,16 @@ struct palmas {
 	u8 pwm_muxed;
 };
 
+#define PALMAS_EXT_REQ (PALMAS_EXT_CONTROL_ENABLE1 |	\
+			PALMAS_EXT_CONTROL_ENABLE2 |	\
+			PALMAS_EXT_CONTROL_NSLEEP)
+
+struct palmas_sleep_requestor_info {
+	int id;
+	int reg_offset;
+	int bit_pos;
+};
+
 struct regs_info {
 	char	*name;
 	char	*sname;
-- 
cgit v1.2.3-59-g8ed1b


From fe40b173441e4519347395825d15d2c5386494c8 Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Wed, 18 Jun 2014 15:28:58 +0530
Subject: regulator: palmas: Add palmas_pmic_driver_data structure

Add palmas_pmic_driver_data structure.

Signed-off-by: Keerthy <j-keerthy@ti.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 include/linux/mfd/palmas.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/palmas.h b/include/linux/mfd/palmas.h
index 0136e583f7f8..1a045ba32c13 100644
--- a/include/linux/mfd/palmas.h
+++ b/include/linux/mfd/palmas.h
@@ -53,6 +53,8 @@ struct palmas_pmic;
 struct palmas_gpadc;
 struct palmas_resource;
 struct palmas_usb;
+struct palmas_pmic_driver_data;
+struct palmas_pmic_platform_data;
 
 enum palmas_usb_state {
 	PALMAS_USB_STATE_DISCONNECT,
@@ -76,6 +78,8 @@ struct palmas {
 	struct mutex irq_lock;
 	struct regmap_irq_chip_data *irq_data;
 
+	struct palmas_pmic_driver_data *pmic_ddata;
+
 	/* Child Devices */
 	struct palmas_pmic *pmic;
 	struct palmas_gpadc *gpadc;
@@ -107,6 +111,27 @@ struct regs_info {
 	int	sleep_id;
 };
 
+struct palmas_pmic_driver_data {
+	int smps_start;
+	int smps_end;
+	int ldo_begin;
+	int ldo_end;
+	int max_reg;
+	struct regs_info *palmas_regs_info;
+	struct of_regulator_match *palmas_matches;
+	struct palmas_sleep_requestor_info *sleep_req_info;
+	int (*smps_register)(struct palmas_pmic *pmic,
+			     struct palmas_pmic_driver_data *ddata,
+			     struct palmas_pmic_platform_data *pdata,
+			     const char *pdev_name,
+			     struct regulator_config config);
+	int (*ldo_register)(struct palmas_pmic *pmic,
+			    struct palmas_pmic_driver_data *ddata,
+			    struct palmas_pmic_platform_data *pdata,
+			    const char *pdev_name,
+			    struct regulator_config config);
+};
+
 struct palmas_gpadc_platform_data {
 	/* Channel 3 current source is only enabled during conversion */
 	int ch3_current;
-- 
cgit v1.2.3-59-g8ed1b


From c887f0d3a03283cb6fe2c32aae62229bebd3fa32 Mon Sep 17 00:00:00 2001
From: Arik Nemtsov <arik@wizery.com>
Date: Wed, 11 Jun 2014 17:18:25 +0300
Subject: mac80211: add API to request TDLS operation from userspace

Write a mac80211 to the cfg80211 API for requesting a userspace TDLS
operation. Define TDLS specific reason codes that can be used here.

Signed-off-by: Arik Nemtsov <arikx.nemtsov@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h |  3 +++
 include/net/mac80211.h    | 13 +++++++++++++
 net/mac80211/tdls.c       | 17 +++++++++++++++++
 3 files changed, 33 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 6bff13f74050..75d17e15da33 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1621,6 +1621,9 @@ enum ieee80211_reasoncode {
 	WLAN_REASON_INVALID_RSN_IE_CAP = 22,
 	WLAN_REASON_IEEE8021X_FAILED = 23,
 	WLAN_REASON_CIPHER_SUITE_REJECTED = 24,
+	/* TDLS (802.11z) */
+	WLAN_REASON_TDLS_TEARDOWN_UNREACHABLE = 25,
+	WLAN_REASON_TDLS_TEARDOWN_UNSPECIFIED = 26,
 	/* 802.11e */
 	WLAN_REASON_DISASSOC_UNSPECIFIED_QOS = 32,
 	WLAN_REASON_DISASSOC_QAP_NO_BANDWIDTH = 33,
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 421b6ecb4b2c..8d876dc8b299 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -4815,4 +4815,17 @@ int ieee80211_parse_p2p_noa(const struct ieee80211_p2p_noa_attr *attr,
  */
 void ieee80211_update_p2p_noa(struct ieee80211_noa_data *data, u32 tsf);
 
+/**
+ * ieee80211_tdls_oper - request userspace to perform a TDLS operation
+ * @vif: virtual interface
+ * @peer: the peer's destination address
+ * @oper: the requested TDLS operation
+ * @reason_code: reason code for the operation, valid for TDLS teardown
+ * @gfp: allocation flags
+ *
+ * See cfg80211_tdls_oper_request().
+ */
+void ieee80211_tdls_oper_request(struct ieee80211_vif *vif, const u8 *peer,
+				 enum nl80211_tdls_operation oper,
+				 u16 reason_code, gfp_t gfp);
 #endif /* MAC80211_H */
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index 0ba7e4c029c8..6f3a3ad0cb7c 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -8,6 +8,7 @@
  */
 
 #include <linux/ieee80211.h>
+#include <net/cfg80211.h>
 #include "ieee80211_i.h"
 
 /* give usermode some time for retries in setting up the TDLS session */
@@ -514,3 +515,19 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
 	mutex_unlock(&local->mtx);
 	return ret;
 }
+
+void ieee80211_tdls_oper_request(struct ieee80211_vif *vif, const u8 *peer,
+				 enum nl80211_tdls_operation oper,
+				 u16 reason_code, gfp_t gfp)
+{
+	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+
+	if (vif->type != NL80211_IFTYPE_STATION || !vif->bss_conf.assoc) {
+		sdata_err(sdata, "Discarding TDLS oper %d - not STA or disconnected\n",
+			  oper);
+		return;
+	}
+
+	cfg80211_tdls_oper_request(sdata->dev, peer, oper, reason_code, gfp);
+}
+EXPORT_SYMBOL(ieee80211_tdls_oper_request);
-- 
cgit v1.2.3-59-g8ed1b


From f15a5cf912f05b572d1f9f3772fba019643f4837 Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Tue, 10 Jun 2014 18:29:39 +0800
Subject: SUNRPC/NFSD: Change to type of bool for rq_usedeferral and
 rq_splice_ok

rq_usedeferral and rq_splice_ok are used as 0 and 1, just defined to bool.

Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4proc.c                | 4 ++--
 include/linux/sunrpc/svc.h        | 4 ++--
 net/sunrpc/auth_gss/svcauth_gss.c | 2 +-
 net/sunrpc/svc.c                  | 4 ++--
 4 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index baa3803f0811..be6734060d2a 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1298,7 +1298,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
 	 * Don't use the deferral mechanism for NFSv4; compounds make it
 	 * too hard to avoid non-idempotency problems.
 	 */
-	rqstp->rq_usedeferral = 0;
+	rqstp->rq_usedeferral = false;
 
 	/*
 	 * According to RFC3010, this takes precedence over all other errors.
@@ -1417,7 +1417,7 @@ encode_op:
 	BUG_ON(cstate->replay_owner);
 out:
 	/* Reset deferral mechanism for RPC deferrals */
-	rqstp->rq_usedeferral = 1;
+	rqstp->rq_usedeferral = true;
 	dprintk("nfsv4 compound returned %d\n", ntohl(status));
 	return status;
 }
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 1bc7cd05b22e..cf61ecd148e0 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -236,7 +236,7 @@ struct svc_rqst {
 	struct svc_cred		rq_cred;	/* auth info */
 	void *			rq_xprt_ctxt;	/* transport specific context ptr */
 	struct svc_deferred_req*rq_deferred;	/* deferred request we are replaying */
-	int			rq_usedeferral;	/* use deferral */
+	bool			rq_usedeferral;	/* use deferral */
 
 	size_t			rq_xprt_hlen;	/* xprt header len */
 	struct xdr_buf		rq_arg;
@@ -277,7 +277,7 @@ struct svc_rqst {
 	struct auth_domain *	rq_gssclient;	/* "gss/"-style peer info */
 	int			rq_cachetype;
 	struct svc_cacherep *	rq_cacherep;	/* cache info */
-	int			rq_splice_ok;   /* turned off in gss privacy
+	bool			rq_splice_ok;   /* turned off in gss privacy
 						 * to prevent encrypting page
 						 * cache pages */
 	wait_queue_head_t	rq_wait;	/* synchronization */
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 4ce5eccec1f6..c548ab213f76 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -886,7 +886,7 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs
 	u32 priv_len, maj_stat;
 	int pad, saved_len, remaining_len, offset;
 
-	rqstp->rq_splice_ok = 0;
+	rqstp->rq_splice_ok = false;
 
 	priv_len = svc_getnl(&buf->head[0]);
 	if (rqstp->rq_deferred) {
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 5de6801cd924..1db5007ddbce 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1086,9 +1086,9 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 		goto err_short_len;
 
 	/* Will be turned off only in gss privacy case: */
-	rqstp->rq_splice_ok = 1;
+	rqstp->rq_splice_ok = true;
 	/* Will be turned off only when NFSv4 Sessions are used */
-	rqstp->rq_usedeferral = 1;
+	rqstp->rq_usedeferral = true;
 	rqstp->rq_dropme = false;
 
 	/* Setup reply header */
-- 
cgit v1.2.3-59-g8ed1b


From 078bc005651cfb134135c5f6eca48a997afb4014 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 19 Jun 2014 21:34:38 +0000
Subject: irqchip: spear_shirq: Move private structs to source

No point in having them in a separate header file. Make the init
functions static.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20140619212713.038658058@linutronix.de
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
---
 drivers/irqchip/spear-shirq.c       | 52 ++++++++++++++++++++++++++----
 include/linux/irqchip/spear-shirq.h | 64 -------------------------------------
 2 files changed, 45 insertions(+), 71 deletions(-)
 delete mode 100644 include/linux/irqchip/spear-shirq.h

(limited to 'include/linux')

diff --git a/drivers/irqchip/spear-shirq.c b/drivers/irqchip/spear-shirq.c
index 93f2196e8a08..441e39f08135 100644
--- a/drivers/irqchip/spear-shirq.c
+++ b/drivers/irqchip/spear-shirq.c
@@ -19,7 +19,6 @@
 #include <linux/io.h>
 #include <linux/irq.h>
 #include <linux/irqdomain.h>
-#include <linux/irqchip/spear-shirq.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
@@ -27,6 +26,45 @@
 
 #include "irqchip.h"
 
+/*
+ * struct shirq_regs: shared irq register configuration
+ *
+ * enb_reg: enable register offset
+ * reset_to_enb: val 1 indicates, we need to clear bit for enabling interrupt
+ * status_reg: status register offset
+ * status_reg_mask: status register valid mask
+ * clear_reg: clear register offset
+ * reset_to_clear: val 1 indicates, we need to clear bit for clearing interrupt
+ */
+struct shirq_regs {
+	u32 enb_reg;
+	u32 reset_to_enb;
+	u32 status_reg;
+	u32 clear_reg;
+	u32 reset_to_clear;
+};
+
+/*
+ * struct spear_shirq: shared irq structure
+ *
+ * irq: hardware irq number
+ * irq_base: base irq in linux domain
+ * irq_nr: no. of shared interrupts in a particular block
+ * irq_bit_off: starting bit offset in the status register
+ * invalid_irq: irq group is currently disabled
+ * base: base address of shared irq register
+ * regs: register configuration for shared irq block
+ */
+struct spear_shirq {
+	u32 irq;
+	u32 irq_base;
+	u32 irq_nr;
+	u32 irq_bit_off;
+	int invalid_irq;
+	void __iomem *base;
+	struct shirq_regs regs;
+};
+
 static DEFINE_SPINLOCK(lock);
 
 /* spear300 shared irq registers offsets and masks */
@@ -296,24 +334,24 @@ err_unmap:
 	return -ENXIO;
 }
 
-int __init spear300_shirq_of_init(struct device_node *np,
-		struct device_node *parent)
+static int __init spear300_shirq_of_init(struct device_node *np,
+					 struct device_node *parent)
 {
 	return shirq_init(spear300_shirq_blocks,
 			ARRAY_SIZE(spear300_shirq_blocks), np);
 }
 IRQCHIP_DECLARE(spear300_shirq, "st,spear300-shirq", spear300_shirq_of_init);
 
-int __init spear310_shirq_of_init(struct device_node *np,
-		struct device_node *parent)
+static int __init spear310_shirq_of_init(struct device_node *np,
+					 struct device_node *parent)
 {
 	return shirq_init(spear310_shirq_blocks,
 			ARRAY_SIZE(spear310_shirq_blocks), np);
 }
 IRQCHIP_DECLARE(spear310_shirq, "st,spear310-shirq", spear310_shirq_of_init);
 
-int __init spear320_shirq_of_init(struct device_node *np,
-		struct device_node *parent)
+static int __init spear320_shirq_of_init(struct device_node *np,
+					 struct device_node *parent)
 {
 	return shirq_init(spear320_shirq_blocks,
 			ARRAY_SIZE(spear320_shirq_blocks), np);
diff --git a/include/linux/irqchip/spear-shirq.h b/include/linux/irqchip/spear-shirq.h
deleted file mode 100644
index c8be16d213a3..000000000000
--- a/include/linux/irqchip/spear-shirq.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * SPEAr platform shared irq layer header file
- *
- * Copyright (C) 2009-2012 ST Microelectronics
- * Viresh Kumar <viresh.linux@gmail.com>
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
- */
-
-#ifndef __SPEAR_SHIRQ_H
-#define __SPEAR_SHIRQ_H
-
-#include <linux/irq.h>
-#include <linux/types.h>
-
-/*
- * struct shirq_regs: shared irq register configuration
- *
- * enb_reg: enable register offset
- * reset_to_enb: val 1 indicates, we need to clear bit for enabling interrupt
- * status_reg: status register offset
- * status_reg_mask: status register valid mask
- * clear_reg: clear register offset
- * reset_to_clear: val 1 indicates, we need to clear bit for clearing interrupt
- */
-struct shirq_regs {
-	u32 enb_reg;
-	u32 reset_to_enb;
-	u32 status_reg;
-	u32 clear_reg;
-	u32 reset_to_clear;
-};
-
-/*
- * struct spear_shirq: shared irq structure
- *
- * irq: hardware irq number
- * irq_base: base irq in linux domain
- * irq_nr: no. of shared interrupts in a particular block
- * irq_bit_off: starting bit offset in the status register
- * invalid_irq: irq group is currently disabled
- * base: base address of shared irq register
- * regs: register configuration for shared irq block
- */
-struct spear_shirq {
-	u32 irq;
-	u32 irq_base;
-	u32 irq_nr;
-	u32 irq_bit_off;
-	int invalid_irq;
-	void __iomem *base;
-	struct shirq_regs regs;
-};
-
-int __init spear300_shirq_of_init(struct device_node *np,
-		struct device_node *parent);
-int __init spear310_shirq_of_init(struct device_node *np,
-		struct device_node *parent);
-int __init spear320_shirq_of_init(struct device_node *np,
-		struct device_node *parent);
-
-#endif /* __SPEAR_SHIRQ_H */
-- 
cgit v1.2.3-59-g8ed1b


From 1e7f3a485922211b6e4a082ebc6bf05810b0b6ea Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Mon, 9 Jun 2014 11:48:33 -0400
Subject: nfs: move nfs_pgio_data and remove nfs_rw_header

nfs_rw_header was used to allocate an nfs_pgio_header along with an
nfs_pgio_data, because a _header would need at least one _data.

Now there is only ever one nfs_pgio_data for each nfs_pgio_header -- move
it to nfs_pgio_header and get rid of nfs_rw_header.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/direct.c          |  8 ++---
 fs/nfs/internal.h        |  6 ++--
 fs/nfs/pagelist.c        | 94 +++++++++++++++---------------------------------
 fs/nfs/pnfs.c            | 24 ++++++-------
 fs/nfs/read.c            |  6 ++--
 fs/nfs/write.c           | 10 +++---
 include/linux/nfs_page.h |  4 +--
 include/linux/nfs_xdr.h  | 38 +++++++++-----------
 8 files changed, 71 insertions(+), 119 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 8f98138cbc43..179de67ca907 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -148,8 +148,8 @@ static void nfs_direct_set_hdr_verf(struct nfs_direct_req *dreq,
 {
 	struct nfs_writeverf *verfp;
 
-	verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp,
-				      hdr->data->ds_idx);
+	verfp = nfs_direct_select_verf(dreq, hdr->data.ds_clp,
+				      hdr->data.ds_idx);
 	WARN_ON_ONCE(verfp->committed >= 0);
 	memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf));
 	WARN_ON_ONCE(verfp->committed < 0);
@@ -169,8 +169,8 @@ static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq,
 {
 	struct nfs_writeverf *verfp;
 
-	verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp,
-					 hdr->data->ds_idx);
+	verfp = nfs_direct_select_verf(dreq, hdr->data.ds_clp,
+					 hdr->data.ds_idx);
 	if (verfp->committed < 0) {
 		nfs_direct_set_hdr_verf(dreq, hdr);
 		return 0;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 82ddbf46660e..5cda049c8f9b 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -238,9 +238,9 @@ void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos);
 int nfs_iocounter_wait(struct nfs_io_counter *c);
 
 extern const struct nfs_pageio_ops nfs_pgio_rw_ops;
-struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *);
-void nfs_rw_header_free(struct nfs_pgio_header *);
-void nfs_pgio_data_release(struct nfs_pgio_data *);
+struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *);
+void nfs_pgio_header_free(struct nfs_pgio_header *);
+void nfs_pgio_data_destroy(struct nfs_pgio_data *);
 int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *);
 int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_data *,
 		      const struct rpc_call_ops *, int, int);
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index b6ee3a6ee96d..e4cde476562f 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -452,95 +452,61 @@ size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
 }
 EXPORT_SYMBOL_GPL(nfs_generic_pg_test);
 
-static inline struct nfs_rw_header *NFS_RW_HEADER(struct nfs_pgio_header *hdr)
+struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *ops)
 {
-	return container_of(hdr, struct nfs_rw_header, header);
-}
-
-/**
- * nfs_rw_header_alloc - Allocate a header for a read or write
- * @ops: Read or write function vector
- */
-struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *ops)
-{
-	struct nfs_rw_header *header = ops->rw_alloc_header();
-
-	if (header) {
-		struct nfs_pgio_header *hdr = &header->header;
+	struct nfs_pgio_header *hdr = ops->rw_alloc_header();
 
+	if (hdr) {
 		INIT_LIST_HEAD(&hdr->pages);
 		spin_lock_init(&hdr->lock);
 		atomic_set(&hdr->refcnt, 0);
 		hdr->rw_ops = ops;
 	}
-	return header;
+	return hdr;
 }
-EXPORT_SYMBOL_GPL(nfs_rw_header_alloc);
+EXPORT_SYMBOL_GPL(nfs_pgio_header_alloc);
 
 /*
- * nfs_rw_header_free - Free a read or write header
+ * nfs_pgio_header_free - Free a read or write header
  * @hdr: The header to free
  */
-void nfs_rw_header_free(struct nfs_pgio_header *hdr)
+void nfs_pgio_header_free(struct nfs_pgio_header *hdr)
 {
-	hdr->rw_ops->rw_free_header(NFS_RW_HEADER(hdr));
+	hdr->rw_ops->rw_free_header(hdr);
 }
-EXPORT_SYMBOL_GPL(nfs_rw_header_free);
+EXPORT_SYMBOL_GPL(nfs_pgio_header_free);
 
 /**
  * nfs_pgio_data_alloc - Allocate pageio data
  * @hdr: The header making a request
  * @pagecount: Number of pages to create
  */
-static struct nfs_pgio_data *nfs_pgio_data_alloc(struct nfs_pgio_header *hdr,
-						 unsigned int pagecount)
+static bool nfs_pgio_data_init(struct nfs_pgio_header *hdr,
+			       unsigned int pagecount)
 {
-	struct nfs_pgio_data *data, *prealloc;
-
-	prealloc = &NFS_RW_HEADER(hdr)->rpc_data;
-	if (prealloc->header == NULL)
-		data = prealloc;
-	else
-		data = kzalloc(sizeof(*data), GFP_KERNEL);
-	if (!data)
-		goto out;
-
-	if (nfs_pgarray_set(&data->pages, pagecount)) {
-		data->header = hdr;
+	if (nfs_pgarray_set(&hdr->data.pages, pagecount)) {
+		hdr->data.header = hdr;
 		atomic_inc(&hdr->refcnt);
-	} else {
-		if (data != prealloc)
-			kfree(data);
-		data = NULL;
+		return true;
 	}
-out:
-	return data;
+	return false;
 }
 
 /**
- * nfs_pgio_data_release - Properly free pageio data
- * @data: The data to release
+ * nfs_pgio_data_destroy - Properly free pageio data
+ * @data: The data to destroy
  */
-void nfs_pgio_data_release(struct nfs_pgio_data *data)
+void nfs_pgio_data_destroy(struct nfs_pgio_data *data)
 {
 	struct nfs_pgio_header *hdr = data->header;
-	struct nfs_rw_header *pageio_header = NFS_RW_HEADER(hdr);
 
 	put_nfs_open_context(data->args.context);
 	if (data->pages.pagevec != data->pages.page_array)
 		kfree(data->pages.pagevec);
-	if (data == &pageio_header->rpc_data) {
-		data->header = NULL;
-		data = NULL;
-	}
 	if (atomic_dec_and_test(&hdr->refcnt))
 		hdr->completion_ops->completion(hdr);
-	/* Note: we only free the rpc_task after callbacks are done.
-	 * See the comment in rpc_free_task() for why
-	 */
-	kfree(data);
 }
-EXPORT_SYMBOL_GPL(nfs_pgio_data_release);
+EXPORT_SYMBOL_GPL(nfs_pgio_data_destroy);
 
 /**
  * nfs_pgio_rpcsetup - Set up arguments for a pageio call
@@ -655,8 +621,7 @@ static int nfs_pgio_error(struct nfs_pageio_descriptor *desc,
 			  struct nfs_pgio_header *hdr)
 {
 	set_bit(NFS_IOHDR_REDO, &hdr->flags);
-	nfs_pgio_data_release(hdr->data);
-	hdr->data = NULL;
+	nfs_pgio_data_destroy(&hdr->data);
 	desc->pg_completion_ops->error_cleanup(&desc->pg_list);
 	return -ENOMEM;
 }
@@ -670,7 +635,7 @@ static void nfs_pgio_release(void *calldata)
 	struct nfs_pgio_data *data = calldata;
 	if (data->header->rw_ops->rw_release)
 		data->header->rw_ops->rw_release(data);
-	nfs_pgio_data_release(data);
+	nfs_pgio_data_destroy(data);
 }
 
 /**
@@ -746,11 +711,11 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
 	struct list_head *head = &desc->pg_list;
 	struct nfs_commit_info cinfo;
 
-	data = nfs_pgio_data_alloc(hdr, nfs_page_array_len(desc->pg_base,
-							   desc->pg_count));
-	if (!data)
+	if (!nfs_pgio_data_init(hdr, nfs_page_array_len(desc->pg_base,
+			   desc->pg_count)))
 		return nfs_pgio_error(desc, hdr);
 
+	data = &hdr->data;
 	nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
 	pages = data->pages.pagevec;
 	while (!list_empty(head)) {
@@ -766,7 +731,6 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
 
 	/* Set up the argument struct */
 	nfs_pgio_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo);
-	hdr->data = data;
 	desc->pg_rpc_callops = &nfs_pgio_common_ops;
 	return 0;
 }
@@ -774,22 +738,20 @@ EXPORT_SYMBOL_GPL(nfs_generic_pgio);
 
 static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc)
 {
-	struct nfs_rw_header *rw_hdr;
 	struct nfs_pgio_header *hdr;
 	int ret;
 
-	rw_hdr = nfs_rw_header_alloc(desc->pg_rw_ops);
-	if (!rw_hdr) {
+	hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
+	if (!hdr) {
 		desc->pg_completion_ops->error_cleanup(&desc->pg_list);
 		return -ENOMEM;
 	}
-	hdr = &rw_hdr->header;
-	nfs_pgheader_init(desc, hdr, nfs_rw_header_free);
+	nfs_pgheader_init(desc, hdr, nfs_pgio_header_free);
 	atomic_inc(&hdr->refcnt);
 	ret = nfs_generic_pgio(desc, hdr);
 	if (ret == 0)
 		ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode),
-					hdr->data, desc->pg_rpc_callops,
+					&hdr->data, desc->pg_rpc_callops,
 					desc->pg_ioflags, 0);
 	if (atomic_dec_and_test(&hdr->refcnt))
 		hdr->completion_ops->completion(hdr);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 6fdcd233d6f7..067104cce181 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1546,7 +1546,7 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
 		nfs_pageio_reset_write_mds(desc);
 		desc->pg_recoalesce = 1;
 	}
-	nfs_pgio_data_release(data);
+	nfs_pgio_data_destroy(data);
 }
 
 static enum pnfs_try_status
@@ -1575,7 +1575,7 @@ static void
 pnfs_do_write(struct nfs_pageio_descriptor *desc,
 	      struct nfs_pgio_header *hdr, int how)
 {
-	struct nfs_pgio_data *data = hdr->data;
+	struct nfs_pgio_data *data = &hdr->data;
 	const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
 	struct pnfs_layout_segment *lseg = desc->pg_lseg;
 	enum pnfs_try_status trypnfs;
@@ -1590,25 +1590,23 @@ pnfs_do_write(struct nfs_pageio_descriptor *desc,
 static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
 {
 	pnfs_put_lseg(hdr->lseg);
-	nfs_rw_header_free(hdr);
+	nfs_pgio_header_free(hdr);
 }
 EXPORT_SYMBOL_GPL(pnfs_writehdr_free);
 
 int
 pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
 {
-	struct nfs_rw_header *whdr;
 	struct nfs_pgio_header *hdr;
 	int ret;
 
-	whdr = nfs_rw_header_alloc(desc->pg_rw_ops);
-	if (!whdr) {
+	hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
+	if (!hdr) {
 		desc->pg_completion_ops->error_cleanup(&desc->pg_list);
 		pnfs_put_lseg(desc->pg_lseg);
 		desc->pg_lseg = NULL;
 		return -ENOMEM;
 	}
-	hdr = &whdr->header;
 	nfs_pgheader_init(desc, hdr, pnfs_writehdr_free);
 	hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
 	atomic_inc(&hdr->refcnt);
@@ -1696,7 +1694,7 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
 		nfs_pageio_reset_read_mds(desc);
 		desc->pg_recoalesce = 1;
 	}
-	nfs_pgio_data_release(data);
+	nfs_pgio_data_destroy(data);
 }
 
 /*
@@ -1727,7 +1725,7 @@ pnfs_try_to_read_data(struct nfs_pgio_data *rdata,
 static void
 pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_data *data = hdr->data;
+	struct nfs_pgio_data *data = &hdr->data;
 	const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
 	struct pnfs_layout_segment *lseg = desc->pg_lseg;
 	enum pnfs_try_status trypnfs;
@@ -1742,26 +1740,24 @@ pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr)
 static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
 {
 	pnfs_put_lseg(hdr->lseg);
-	nfs_rw_header_free(hdr);
+	nfs_pgio_header_free(hdr);
 }
 EXPORT_SYMBOL_GPL(pnfs_readhdr_free);
 
 int
 pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
 {
-	struct nfs_rw_header *rhdr;
 	struct nfs_pgio_header *hdr;
 	int ret;
 
-	rhdr = nfs_rw_header_alloc(desc->pg_rw_ops);
-	if (!rhdr) {
+	hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
+	if (!hdr) {
 		desc->pg_completion_ops->error_cleanup(&desc->pg_list);
 		ret = -ENOMEM;
 		pnfs_put_lseg(desc->pg_lseg);
 		desc->pg_lseg = NULL;
 		return ret;
 	}
-	hdr = &rhdr->header;
 	nfs_pgheader_init(desc, hdr, pnfs_readhdr_free);
 	hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
 	atomic_inc(&hdr->refcnt);
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index e818a475ca64..d9df4ab3737b 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -33,12 +33,12 @@ static const struct nfs_rw_ops nfs_rw_read_ops;
 
 static struct kmem_cache *nfs_rdata_cachep;
 
-static struct nfs_rw_header *nfs_readhdr_alloc(void)
+static struct nfs_pgio_header *nfs_readhdr_alloc(void)
 {
 	return kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
 }
 
-static void nfs_readhdr_free(struct nfs_rw_header *rhdr)
+static void nfs_readhdr_free(struct nfs_pgio_header *rhdr)
 {
 	kmem_cache_free(nfs_rdata_cachep, rhdr);
 }
@@ -404,7 +404,7 @@ out:
 int __init nfs_init_readpagecache(void)
 {
 	nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
-					     sizeof(struct nfs_rw_header),
+					     sizeof(struct nfs_pgio_header),
 					     0, SLAB_HWCACHE_ALIGN,
 					     NULL);
 	if (nfs_rdata_cachep == NULL)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 98ff061ccaf3..d694952f0071 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -70,18 +70,18 @@ void nfs_commit_free(struct nfs_commit_data *p)
 }
 EXPORT_SYMBOL_GPL(nfs_commit_free);
 
-static struct nfs_rw_header *nfs_writehdr_alloc(void)
+static struct nfs_pgio_header *nfs_writehdr_alloc(void)
 {
-	struct nfs_rw_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO);
+	struct nfs_pgio_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO);
 
 	if (p)
 		memset(p, 0, sizeof(*p));
 	return p;
 }
 
-static void nfs_writehdr_free(struct nfs_rw_header *whdr)
+static void nfs_writehdr_free(struct nfs_pgio_header *hdr)
 {
-	mempool_free(whdr, nfs_wdata_mempool);
+	mempool_free(hdr, nfs_wdata_mempool);
 }
 
 static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
@@ -1655,7 +1655,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
 int __init nfs_init_writepagecache(void)
 {
 	nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
-					     sizeof(struct nfs_rw_header),
+					     sizeof(struct nfs_pgio_header),
 					     0, SLAB_HWCACHE_ALIGN,
 					     NULL);
 	if (nfs_wdata_cachep == NULL)
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 7d9096d95d4a..43592651cd5a 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -62,8 +62,8 @@ struct nfs_pageio_ops {
 
 struct nfs_rw_ops {
 	const fmode_t rw_mode;
-	struct nfs_rw_header *(*rw_alloc_header)(void);
-	void (*rw_free_header)(struct nfs_rw_header *);
+	struct nfs_pgio_header *(*rw_alloc_header)(void);
+	void (*rw_free_header)(struct nfs_pgio_header *);
 	void (*rw_release)(struct nfs_pgio_data *);
 	int  (*rw_done)(struct rpc_task *, struct nfs_pgio_data *, struct inode *);
 	void (*rw_result)(struct rpc_task *, struct nfs_pgio_data *);
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 9a1396e70310..e1c9437e8aac 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1257,13 +1257,27 @@ enum {
 	NFS_IOHDR_NEED_RESCHED,
 };
 
-struct nfs_pgio_data;
+struct nfs_pgio_data {
+	struct nfs_pgio_header	*header;
+	struct list_head	list;
+	struct rpc_task		task;
+	struct nfs_fattr	fattr;
+	struct nfs_writeverf	verf;		/* Used for writes */
+	struct nfs_pgio_args	args;		/* argument struct */
+	struct nfs_pgio_res	res;		/* result struct */
+	unsigned long		timestamp;	/* For lease renewal */
+	int (*pgio_done_cb)(struct rpc_task *task, struct nfs_pgio_data *data);
+	__u64			mds_offset;	/* Filelayout dense stripe */
+	struct nfs_page_array	pages;
+	struct nfs_client	*ds_clp;	/* pNFS data server */
+	int			ds_idx;		/* ds index if ds_clp is set */
+};
 
 struct nfs_pgio_header {
 	struct inode		*inode;
 	struct rpc_cred		*cred;
 	struct list_head	pages;
-	struct nfs_pgio_data	*data;
+	struct nfs_pgio_data	data;
 	atomic_t		refcnt;
 	struct nfs_page		*req;
 	struct nfs_writeverf	verf;		/* Used for writes */
@@ -1283,26 +1297,6 @@ struct nfs_pgio_header {
 	unsigned long		flags;
 };
 
-struct nfs_pgio_data {
-	struct nfs_pgio_header	*header;
-	struct rpc_task		task;
-	struct nfs_fattr	fattr;
-	struct nfs_writeverf	verf;		/* Used for writes */
-	struct nfs_pgio_args	args;		/* argument struct */
-	struct nfs_pgio_res	res;		/* result struct */
-	unsigned long		timestamp;	/* For lease renewal */
-	int (*pgio_done_cb) (struct rpc_task *task, struct nfs_pgio_data *data);
-	__u64			mds_offset;	/* Filelayout dense stripe */
-	struct nfs_page_array	pages;
-	struct nfs_client	*ds_clp;	/* pNFS data server */
-	int			ds_idx;		/* ds index if ds_clp is set */
-};
-
-struct nfs_rw_header {
-	struct nfs_pgio_header	header;
-	struct nfs_pgio_data	rpc_data;
-};
-
 struct nfs_mds_commit_info {
 	atomic_t rpcs_out;
 	unsigned long		ncommit;
-- 
cgit v1.2.3-59-g8ed1b


From 823b0c9d9800e712374cda89ac3565bd29f6701b Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Mon, 9 Jun 2014 11:48:34 -0400
Subject: nfs: rename members of nfs_pgio_data

Rename "verf" to "writeverf" and "pages" to "page_array" to prepare for
merge of nfs_pgio_data and nfs_pgio_header.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/blocklayout/blocklayout.c | 17 ++++++++++-------
 fs/nfs/objlayout/objlayout.c     |  4 ++--
 fs/nfs/pagelist.c                | 12 ++++++------
 fs/nfs/write.c                   |  9 +++++----
 include/linux/nfs_xdr.h          |  4 ++--
 5 files changed, 25 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 9b431f44fad9..36b01cef849e 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -258,7 +258,8 @@ bl_read_pagelist(struct nfs_pgio_data *rdata)
 	const bool is_dio = (header->dreq != NULL);
 
 	dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__,
-	       rdata->pages.npages, f_offset, (unsigned int)rdata->args.count);
+		rdata->page_array.npages, f_offset,
+		(unsigned int)rdata->args.count);
 
 	par = alloc_parallel(rdata);
 	if (!par)
@@ -268,7 +269,7 @@ bl_read_pagelist(struct nfs_pgio_data *rdata)
 
 	isect = (sector_t) (f_offset >> SECTOR_SHIFT);
 	/* Code assumes extents are page-aligned */
-	for (i = pg_index; i < rdata->pages.npages; i++) {
+	for (i = pg_index; i < rdata->page_array.npages; i++) {
 		if (!extent_length) {
 			/* We've used up the previous extent */
 			bl_put_extent(be);
@@ -317,7 +318,8 @@ bl_read_pagelist(struct nfs_pgio_data *rdata)
 			struct pnfs_block_extent *be_read;
 
 			be_read = (hole && cow_read) ? cow_read : be;
-			bio = do_add_page_to_bio(bio, rdata->pages.npages - i,
+			bio = do_add_page_to_bio(bio,
+						 rdata->page_array.npages - i,
 						 READ,
 						 isect, pages[i], be_read,
 						 bl_end_io_read, par,
@@ -446,7 +448,7 @@ static void bl_end_par_io_write(void *data, int num_se)
 	}
 
 	wdata->task.tk_status = wdata->header->pnfs_error;
-	wdata->verf.committed = NFS_FILE_SYNC;
+	wdata->writeverf.committed = NFS_FILE_SYNC;
 	INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup);
 	schedule_work(&wdata->task.u.tk_work);
 }
@@ -699,7 +701,7 @@ bl_write_pagelist(struct nfs_pgio_data *wdata, int sync)
 		dprintk("pnfsblock nonblock aligned DIO writes. Resend MDS\n");
 		goto out_mds;
 	}
-	/* At this point, wdata->pages is a (sequential) list of nfs_pages.
+	/* At this point, wdata->page_aray is a (sequential) list of nfs_pages.
 	 * We want to write each, and if there is an error set pnfs_error
 	 * to have it redone using nfs.
 	 */
@@ -791,7 +793,7 @@ next_page:
 
 	/* Middle pages */
 	pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT;
-	for (i = pg_index; i < wdata->pages.npages; i++) {
+	for (i = pg_index; i < wdata->page_array.npages; i++) {
 		if (!extent_length) {
 			/* We've used up the previous extent */
 			bl_put_extent(be);
@@ -862,7 +864,8 @@ next_page:
 		}
 
 
-		bio = do_add_page_to_bio(bio, wdata->pages.npages - i, WRITE,
+		bio = do_add_page_to_bio(bio, wdata->page_array.npages - i,
+					 WRITE,
 					 isect, pages[i], be,
 					 bl_end_io_write, par,
 					 pg_offset, pg_len);
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index 765d3f54e986..31fed91a8bac 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -329,7 +329,7 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
 	oir->status = wdata->task.tk_status = status;
 	if (status >= 0) {
 		wdata->res.count = status;
-		wdata->verf.committed = oir->committed;
+		wdata->writeverf.committed = oir->committed;
 	} else {
 		wdata->header->pnfs_error = status;
 	}
@@ -337,7 +337,7 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
 	/* must not use oir after this point */
 
 	dprintk("%s: Return status %zd committed %d sync=%d\n", __func__,
-		status, wdata->verf.committed, sync);
+		status, wdata->writeverf.committed, sync);
 
 	if (sync)
 		pnfs_ld_write_done(wdata);
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index e4cde476562f..5e70918f6c95 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -484,7 +484,7 @@ EXPORT_SYMBOL_GPL(nfs_pgio_header_free);
 static bool nfs_pgio_data_init(struct nfs_pgio_header *hdr,
 			       unsigned int pagecount)
 {
-	if (nfs_pgarray_set(&hdr->data.pages, pagecount)) {
+	if (nfs_pgarray_set(&hdr->data.page_array, pagecount)) {
 		hdr->data.header = hdr;
 		atomic_inc(&hdr->refcnt);
 		return true;
@@ -501,8 +501,8 @@ void nfs_pgio_data_destroy(struct nfs_pgio_data *data)
 	struct nfs_pgio_header *hdr = data->header;
 
 	put_nfs_open_context(data->args.context);
-	if (data->pages.pagevec != data->pages.page_array)
-		kfree(data->pages.pagevec);
+	if (data->page_array.pagevec != data->page_array.page_array)
+		kfree(data->page_array.pagevec);
 	if (atomic_dec_and_test(&hdr->refcnt))
 		hdr->completion_ops->completion(hdr);
 }
@@ -530,7 +530,7 @@ static void nfs_pgio_rpcsetup(struct nfs_pgio_data *data,
 	/* pnfs_set_layoutcommit needs this */
 	data->mds_offset = data->args.offset;
 	data->args.pgbase = req->wb_pgbase + offset;
-	data->args.pages  = data->pages.pagevec;
+	data->args.pages  = data->page_array.pagevec;
 	data->args.count  = count;
 	data->args.context = get_nfs_open_context(req->wb_context);
 	data->args.lock_context = req->wb_lock_context;
@@ -548,7 +548,7 @@ static void nfs_pgio_rpcsetup(struct nfs_pgio_data *data,
 	data->res.fattr   = &data->fattr;
 	data->res.count   = count;
 	data->res.eof     = 0;
-	data->res.verf    = &data->verf;
+	data->res.verf    = &data->writeverf;
 	nfs_fattr_init(&data->fattr);
 }
 
@@ -717,7 +717,7 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
 
 	data = &hdr->data;
 	nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
-	pages = data->pages.pagevec;
+	pages = data->page_array.pagevec;
 	while (!list_empty(head)) {
 		req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index d694952f0071..6afe0f679420 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -598,9 +598,9 @@ nfs_clear_request_commit(struct nfs_page *req)
 static inline
 int nfs_write_need_commit(struct nfs_pgio_data *data)
 {
-	if (data->verf.committed == NFS_DATA_SYNC)
+	if (data->writeverf.committed == NFS_DATA_SYNC)
 		return data->header->lseg == NULL;
-	return data->verf.committed != NFS_FILE_SYNC;
+	return data->writeverf.committed != NFS_FILE_SYNC;
 }
 
 #else
@@ -1095,8 +1095,9 @@ static void nfs_writeback_release_common(struct nfs_pgio_data *data)
 		if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags))
 			; /* Do nothing */
 		else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags))
-			memcpy(&hdr->verf, &data->verf, sizeof(hdr->verf));
-		else if (memcmp(&hdr->verf, &data->verf, sizeof(hdr->verf)))
+			memcpy(&hdr->verf, &data->writeverf, sizeof(hdr->verf));
+		else if (memcmp(&hdr->verf, &data->writeverf,
+			 sizeof(hdr->verf)))
 			set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags);
 		spin_unlock(&hdr->lock);
 	}
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index e1c9437e8aac..bb18dba1aefe 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1262,13 +1262,13 @@ struct nfs_pgio_data {
 	struct list_head	list;
 	struct rpc_task		task;
 	struct nfs_fattr	fattr;
-	struct nfs_writeverf	verf;		/* Used for writes */
+	struct nfs_writeverf	writeverf;	/* Used for writes */
 	struct nfs_pgio_args	args;		/* argument struct */
 	struct nfs_pgio_res	res;		/* result struct */
 	unsigned long		timestamp;	/* For lease renewal */
 	int (*pgio_done_cb)(struct rpc_task *task, struct nfs_pgio_data *data);
 	__u64			mds_offset;	/* Filelayout dense stripe */
-	struct nfs_page_array	pages;
+	struct nfs_page_array	page_array;
 	struct nfs_client	*ds_clp;	/* pNFS data server */
 	int			ds_idx;		/* ds index if ds_clp is set */
 };
-- 
cgit v1.2.3-59-g8ed1b


From d45f60c67848b9f19160692581d78e5b4757a000 Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Mon, 9 Jun 2014 11:48:35 -0400
Subject: nfs: merge nfs_pgio_data into _header

struct nfs_pgio_data only exists as a member of nfs_pgio_header, but is
passed around everywhere, because there used to be multiple _data structs
per _header. Many of these functions then use the _data to find a pointer
to the _header.  This patch cleans this up by merging the nfs_pgio_data
structure into nfs_pgio_header and passing nfs_pgio_header around instead.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/blocklayout/blocklayout.c |  98 +++++++++++-----------
 fs/nfs/direct.c                  |   8 +-
 fs/nfs/filelayout/filelayout.c   | 170 +++++++++++++++++++--------------------
 fs/nfs/internal.h                |   6 +-
 fs/nfs/nfs3proc.c                |  21 ++---
 fs/nfs/nfs4_fs.h                 |   6 +-
 fs/nfs/nfs4proc.c                | 105 ++++++++++++------------
 fs/nfs/nfs4trace.h               |  28 +++----
 fs/nfs/objlayout/objio_osd.c     |  24 +++---
 fs/nfs/objlayout/objlayout.c     |  81 +++++++++----------
 fs/nfs/objlayout/objlayout.h     |   8 +-
 fs/nfs/pagelist.c                | 120 +++++++++++++--------------
 fs/nfs/pnfs.c                    |  80 ++++++++----------
 fs/nfs/pnfs.h                    |  10 +--
 fs/nfs/proc.c                    |  27 ++++---
 fs/nfs/read.c                    |  42 +++++-----
 fs/nfs/write.c                   |  56 ++++++-------
 include/linux/nfs_page.h         |   9 ++-
 include/linux/nfs_xdr.h          |  43 +++++-----
 19 files changed, 460 insertions(+), 482 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 36b01cef849e..c3ccfe440390 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -210,8 +210,7 @@ static void bl_end_io_read(struct bio *bio, int err)
 			SetPageUptodate(bvec->bv_page);
 
 	if (err) {
-		struct nfs_pgio_data *rdata = par->data;
-		struct nfs_pgio_header *header = rdata->header;
+		struct nfs_pgio_header *header = par->data;
 
 		if (!header->pnfs_error)
 			header->pnfs_error = -EIO;
@@ -224,44 +223,44 @@ static void bl_end_io_read(struct bio *bio, int err)
 static void bl_read_cleanup(struct work_struct *work)
 {
 	struct rpc_task *task;
-	struct nfs_pgio_data *rdata;
+	struct nfs_pgio_header *hdr;
 	dprintk("%s enter\n", __func__);
 	task = container_of(work, struct rpc_task, u.tk_work);
-	rdata = container_of(task, struct nfs_pgio_data, task);
-	pnfs_ld_read_done(rdata);
+	hdr = container_of(task, struct nfs_pgio_header, task);
+	pnfs_ld_read_done(hdr);
 }
 
 static void
 bl_end_par_io_read(void *data, int unused)
 {
-	struct nfs_pgio_data *rdata = data;
+	struct nfs_pgio_header *hdr = data;
 
-	rdata->task.tk_status = rdata->header->pnfs_error;
-	INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup);
-	schedule_work(&rdata->task.u.tk_work);
+	hdr->task.tk_status = hdr->pnfs_error;
+	INIT_WORK(&hdr->task.u.tk_work, bl_read_cleanup);
+	schedule_work(&hdr->task.u.tk_work);
 }
 
 static enum pnfs_try_status
-bl_read_pagelist(struct nfs_pgio_data *rdata)
+bl_read_pagelist(struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_header *header = rdata->header;
+	struct nfs_pgio_header *header = hdr;
 	int i, hole;
 	struct bio *bio = NULL;
 	struct pnfs_block_extent *be = NULL, *cow_read = NULL;
 	sector_t isect, extent_length = 0;
 	struct parallel_io *par;
-	loff_t f_offset = rdata->args.offset;
-	size_t bytes_left = rdata->args.count;
+	loff_t f_offset = hdr->args.offset;
+	size_t bytes_left = hdr->args.count;
 	unsigned int pg_offset, pg_len;
-	struct page **pages = rdata->args.pages;
-	int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT;
+	struct page **pages = hdr->args.pages;
+	int pg_index = hdr->args.pgbase >> PAGE_CACHE_SHIFT;
 	const bool is_dio = (header->dreq != NULL);
 
 	dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__,
-		rdata->page_array.npages, f_offset,
-		(unsigned int)rdata->args.count);
+		hdr->page_array.npages, f_offset,
+		(unsigned int)hdr->args.count);
 
-	par = alloc_parallel(rdata);
+	par = alloc_parallel(hdr);
 	if (!par)
 		goto use_mds;
 	par->pnfs_callback = bl_end_par_io_read;
@@ -269,7 +268,7 @@ bl_read_pagelist(struct nfs_pgio_data *rdata)
 
 	isect = (sector_t) (f_offset >> SECTOR_SHIFT);
 	/* Code assumes extents are page-aligned */
-	for (i = pg_index; i < rdata->page_array.npages; i++) {
+	for (i = pg_index; i < hdr->page_array.npages; i++) {
 		if (!extent_length) {
 			/* We've used up the previous extent */
 			bl_put_extent(be);
@@ -319,7 +318,7 @@ bl_read_pagelist(struct nfs_pgio_data *rdata)
 
 			be_read = (hole && cow_read) ? cow_read : be;
 			bio = do_add_page_to_bio(bio,
-						 rdata->page_array.npages - i,
+						 hdr->page_array.npages - i,
 						 READ,
 						 isect, pages[i], be_read,
 						 bl_end_io_read, par,
@@ -334,10 +333,10 @@ bl_read_pagelist(struct nfs_pgio_data *rdata)
 		extent_length -= PAGE_CACHE_SECTORS;
 	}
 	if ((isect << SECTOR_SHIFT) >= header->inode->i_size) {
-		rdata->res.eof = 1;
-		rdata->res.count = header->inode->i_size - rdata->args.offset;
+		hdr->res.eof = 1;
+		hdr->res.count = header->inode->i_size - hdr->args.offset;
 	} else {
-		rdata->res.count = (isect << SECTOR_SHIFT) - rdata->args.offset;
+		hdr->res.count = (isect << SECTOR_SHIFT) - hdr->args.offset;
 	}
 out:
 	bl_put_extent(be);
@@ -392,8 +391,7 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
 	}
 
 	if (unlikely(err)) {
-		struct nfs_pgio_data *data = par->data;
-		struct nfs_pgio_header *header = data->header;
+		struct nfs_pgio_header *header = par->data;
 
 		if (!header->pnfs_error)
 			header->pnfs_error = -EIO;
@@ -407,8 +405,7 @@ static void bl_end_io_write(struct bio *bio, int err)
 {
 	struct parallel_io *par = bio->bi_private;
 	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
-	struct nfs_pgio_data *data = par->data;
-	struct nfs_pgio_header *header = data->header;
+	struct nfs_pgio_header *header = par->data;
 
 	if (!uptodate) {
 		if (!header->pnfs_error)
@@ -425,32 +422,32 @@ static void bl_end_io_write(struct bio *bio, int err)
 static void bl_write_cleanup(struct work_struct *work)
 {
 	struct rpc_task *task;
-	struct nfs_pgio_data *wdata;
+	struct nfs_pgio_header *hdr;
 	dprintk("%s enter\n", __func__);
 	task = container_of(work, struct rpc_task, u.tk_work);
-	wdata = container_of(task, struct nfs_pgio_data, task);
-	if (likely(!wdata->header->pnfs_error)) {
+	hdr = container_of(task, struct nfs_pgio_header, task);
+	if (likely(!hdr->pnfs_error)) {
 		/* Marks for LAYOUTCOMMIT */
-		mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg),
-				     wdata->args.offset, wdata->args.count);
+		mark_extents_written(BLK_LSEG2EXT(hdr->lseg),
+				     hdr->args.offset, hdr->args.count);
 	}
-	pnfs_ld_write_done(wdata);
+	pnfs_ld_write_done(hdr);
 }
 
 /* Called when last of bios associated with a bl_write_pagelist call finishes */
 static void bl_end_par_io_write(void *data, int num_se)
 {
-	struct nfs_pgio_data *wdata = data;
+	struct nfs_pgio_header *hdr = data;
 
-	if (unlikely(wdata->header->pnfs_error)) {
-		bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval,
+	if (unlikely(hdr->pnfs_error)) {
+		bl_free_short_extents(&BLK_LSEG2EXT(hdr->lseg)->bl_inval,
 					num_se);
 	}
 
-	wdata->task.tk_status = wdata->header->pnfs_error;
-	wdata->writeverf.committed = NFS_FILE_SYNC;
-	INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup);
-	schedule_work(&wdata->task.u.tk_work);
+	hdr->task.tk_status = hdr->pnfs_error;
+	hdr->writeverf.committed = NFS_FILE_SYNC;
+	INIT_WORK(&hdr->task.u.tk_work, bl_write_cleanup);
+	schedule_work(&hdr->task.u.tk_work);
 }
 
 /* FIXME STUB - mark intersection of layout and page as bad, so is not
@@ -675,18 +672,17 @@ check_page:
 }
 
 static enum pnfs_try_status
-bl_write_pagelist(struct nfs_pgio_data *wdata, int sync)
+bl_write_pagelist(struct nfs_pgio_header *header, int sync)
 {
-	struct nfs_pgio_header *header = wdata->header;
 	int i, ret, npg_zero, pg_index, last = 0;
 	struct bio *bio = NULL;
 	struct pnfs_block_extent *be = NULL, *cow_read = NULL;
 	sector_t isect, last_isect = 0, extent_length = 0;
 	struct parallel_io *par = NULL;
-	loff_t offset = wdata->args.offset;
-	size_t count = wdata->args.count;
+	loff_t offset = header->args.offset;
+	size_t count = header->args.count;
 	unsigned int pg_offset, pg_len, saved_len;
-	struct page **pages = wdata->args.pages;
+	struct page **pages = header->args.pages;
 	struct page *page;
 	pgoff_t index;
 	u64 temp;
@@ -701,11 +697,11 @@ bl_write_pagelist(struct nfs_pgio_data *wdata, int sync)
 		dprintk("pnfsblock nonblock aligned DIO writes. Resend MDS\n");
 		goto out_mds;
 	}
-	/* At this point, wdata->page_aray is a (sequential) list of nfs_pages.
+	/* At this point, header->page_aray is a (sequential) list of nfs_pages.
 	 * We want to write each, and if there is an error set pnfs_error
 	 * to have it redone using nfs.
 	 */
-	par = alloc_parallel(wdata);
+	par = alloc_parallel(header);
 	if (!par)
 		goto out_mds;
 	par->pnfs_callback = bl_end_par_io_write;
@@ -792,8 +788,8 @@ next_page:
 	bio = bl_submit_bio(WRITE, bio);
 
 	/* Middle pages */
-	pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT;
-	for (i = pg_index; i < wdata->page_array.npages; i++) {
+	pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT;
+	for (i = pg_index; i < header->page_array.npages; i++) {
 		if (!extent_length) {
 			/* We've used up the previous extent */
 			bl_put_extent(be);
@@ -864,7 +860,7 @@ next_page:
 		}
 
 
-		bio = do_add_page_to_bio(bio, wdata->page_array.npages - i,
+		bio = do_add_page_to_bio(bio, header->page_array.npages - i,
 					 WRITE,
 					 isect, pages[i], be,
 					 bl_end_io_write, par,
@@ -893,7 +889,7 @@ next_page:
 	}
 
 write_done:
-	wdata->res.count = wdata->args.count;
+	header->res.count = header->args.count;
 out:
 	bl_put_extent(be);
 	bl_put_extent(cow_read);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 179de67ca907..6c4c867ee04c 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -148,8 +148,8 @@ static void nfs_direct_set_hdr_verf(struct nfs_direct_req *dreq,
 {
 	struct nfs_writeverf *verfp;
 
-	verfp = nfs_direct_select_verf(dreq, hdr->data.ds_clp,
-				      hdr->data.ds_idx);
+	verfp = nfs_direct_select_verf(dreq, hdr->ds_clp,
+				      hdr->ds_idx);
 	WARN_ON_ONCE(verfp->committed >= 0);
 	memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf));
 	WARN_ON_ONCE(verfp->committed < 0);
@@ -169,8 +169,8 @@ static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq,
 {
 	struct nfs_writeverf *verfp;
 
-	verfp = nfs_direct_select_verf(dreq, hdr->data.ds_clp,
-					 hdr->data.ds_idx);
+	verfp = nfs_direct_select_verf(dreq, hdr->ds_clp,
+					 hdr->ds_idx);
 	if (verfp->committed < 0) {
 		nfs_direct_set_hdr_verf(dreq, hdr);
 		return 0;
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index d2eba1c13b7e..537e7f7a0b48 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -84,19 +84,18 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
 	BUG();
 }
 
-static void filelayout_reset_write(struct nfs_pgio_data *data)
+static void filelayout_reset_write(struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_header *hdr = data->header;
-	struct rpc_task *task = &data->task;
+	struct rpc_task *task = &hdr->task;
 
 	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
 		dprintk("%s Reset task %5u for i/o through MDS "
 			"(req %s/%llu, %u bytes @ offset %llu)\n", __func__,
-			data->task.tk_pid,
+			hdr->task.tk_pid,
 			hdr->inode->i_sb->s_id,
 			(unsigned long long)NFS_FILEID(hdr->inode),
-			data->args.count,
-			(unsigned long long)data->args.offset);
+			hdr->args.count,
+			(unsigned long long)hdr->args.offset);
 
 		task->tk_status = pnfs_write_done_resend_to_mds(hdr->inode,
 							&hdr->pages,
@@ -105,19 +104,18 @@ static void filelayout_reset_write(struct nfs_pgio_data *data)
 	}
 }
 
-static void filelayout_reset_read(struct nfs_pgio_data *data)
+static void filelayout_reset_read(struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_header *hdr = data->header;
-	struct rpc_task *task = &data->task;
+	struct rpc_task *task = &hdr->task;
 
 	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
 		dprintk("%s Reset task %5u for i/o through MDS "
 			"(req %s/%llu, %u bytes @ offset %llu)\n", __func__,
-			data->task.tk_pid,
+			hdr->task.tk_pid,
 			hdr->inode->i_sb->s_id,
 			(unsigned long long)NFS_FILEID(hdr->inode),
-			data->args.count,
-			(unsigned long long)data->args.offset);
+			hdr->args.count,
+			(unsigned long long)hdr->args.offset);
 
 		task->tk_status = pnfs_read_done_resend_to_mds(hdr->inode,
 							&hdr->pages,
@@ -243,18 +241,17 @@ wait_on_recovery:
 /* NFS_PROTO call done callback routines */
 
 static int filelayout_read_done_cb(struct rpc_task *task,
-				struct nfs_pgio_data *data)
+				struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_header *hdr = data->header;
 	int err;
 
-	trace_nfs4_pnfs_read(data, task->tk_status);
-	err = filelayout_async_handle_error(task, data->args.context->state,
-					    data->ds_clp, hdr->lseg);
+	trace_nfs4_pnfs_read(hdr, task->tk_status);
+	err = filelayout_async_handle_error(task, hdr->args.context->state,
+					    hdr->ds_clp, hdr->lseg);
 
 	switch (err) {
 	case -NFS4ERR_RESET_TO_MDS:
-		filelayout_reset_read(data);
+		filelayout_reset_read(hdr);
 		return task->tk_status;
 	case -EAGAIN:
 		rpc_restart_call_prepare(task);
@@ -270,15 +267,14 @@ static int filelayout_read_done_cb(struct rpc_task *task,
  * rfc5661 is not clear about which credential should be used.
  */
 static void
-filelayout_set_layoutcommit(struct nfs_pgio_data *wdata)
+filelayout_set_layoutcommit(struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_header *hdr = wdata->header;
 
 	if (FILELAYOUT_LSEG(hdr->lseg)->commit_through_mds ||
-	    wdata->res.verf->committed == NFS_FILE_SYNC)
+	    hdr->res.verf->committed == NFS_FILE_SYNC)
 		return;
 
-	pnfs_set_layoutcommit(wdata);
+	pnfs_set_layoutcommit(hdr);
 	dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino,
 		(unsigned long) NFS_I(hdr->inode)->layout->plh_lwb);
 }
@@ -305,83 +301,82 @@ filelayout_reset_to_mds(struct pnfs_layout_segment *lseg)
  */
 static void filelayout_read_prepare(struct rpc_task *task, void *data)
 {
-	struct nfs_pgio_data *rdata = data;
+	struct nfs_pgio_header *hdr = data;
 
-	if (unlikely(test_bit(NFS_CONTEXT_BAD, &rdata->args.context->flags))) {
+	if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
 		rpc_exit(task, -EIO);
 		return;
 	}
-	if (filelayout_reset_to_mds(rdata->header->lseg)) {
+	if (filelayout_reset_to_mds(hdr->lseg)) {
 		dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid);
-		filelayout_reset_read(rdata);
+		filelayout_reset_read(hdr);
 		rpc_exit(task, 0);
 		return;
 	}
-	rdata->pgio_done_cb = filelayout_read_done_cb;
+	hdr->pgio_done_cb = filelayout_read_done_cb;
 
-	if (nfs41_setup_sequence(rdata->ds_clp->cl_session,
-			&rdata->args.seq_args,
-			&rdata->res.seq_res,
+	if (nfs41_setup_sequence(hdr->ds_clp->cl_session,
+			&hdr->args.seq_args,
+			&hdr->res.seq_res,
 			task))
 		return;
-	if (nfs4_set_rw_stateid(&rdata->args.stateid, rdata->args.context,
-			rdata->args.lock_context, FMODE_READ) == -EIO)
+	if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
+			hdr->args.lock_context, FMODE_READ) == -EIO)
 		rpc_exit(task, -EIO); /* lost lock, terminate I/O */
 }
 
 static void filelayout_read_call_done(struct rpc_task *task, void *data)
 {
-	struct nfs_pgio_data *rdata = data;
+	struct nfs_pgio_header *hdr = data;
 
 	dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
 
-	if (test_bit(NFS_IOHDR_REDO, &rdata->header->flags) &&
+	if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
 	    task->tk_status == 0) {
-		nfs41_sequence_done(task, &rdata->res.seq_res);
+		nfs41_sequence_done(task, &hdr->res.seq_res);
 		return;
 	}
 
 	/* Note this may cause RPC to be resent */
-	rdata->header->mds_ops->rpc_call_done(task, data);
+	hdr->mds_ops->rpc_call_done(task, data);
 }
 
 static void filelayout_read_count_stats(struct rpc_task *task, void *data)
 {
-	struct nfs_pgio_data *rdata = data;
+	struct nfs_pgio_header *hdr = data;
 
-	rpc_count_iostats(task, NFS_SERVER(rdata->header->inode)->client->cl_metrics);
+	rpc_count_iostats(task, NFS_SERVER(hdr->inode)->client->cl_metrics);
 }
 
 static void filelayout_read_release(void *data)
 {
-	struct nfs_pgio_data *rdata = data;
-	struct pnfs_layout_hdr *lo = rdata->header->lseg->pls_layout;
+	struct nfs_pgio_header *hdr = data;
+	struct pnfs_layout_hdr *lo = hdr->lseg->pls_layout;
 
 	filelayout_fenceme(lo->plh_inode, lo);
-	nfs_put_client(rdata->ds_clp);
-	rdata->header->mds_ops->rpc_release(data);
+	nfs_put_client(hdr->ds_clp);
+	hdr->mds_ops->rpc_release(data);
 }
 
 static int filelayout_write_done_cb(struct rpc_task *task,
-				struct nfs_pgio_data *data)
+				struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_header *hdr = data->header;
 	int err;
 
-	trace_nfs4_pnfs_write(data, task->tk_status);
-	err = filelayout_async_handle_error(task, data->args.context->state,
-					    data->ds_clp, hdr->lseg);
+	trace_nfs4_pnfs_write(hdr, task->tk_status);
+	err = filelayout_async_handle_error(task, hdr->args.context->state,
+					    hdr->ds_clp, hdr->lseg);
 
 	switch (err) {
 	case -NFS4ERR_RESET_TO_MDS:
-		filelayout_reset_write(data);
+		filelayout_reset_write(hdr);
 		return task->tk_status;
 	case -EAGAIN:
 		rpc_restart_call_prepare(task);
 		return -EAGAIN;
 	}
 
-	filelayout_set_layoutcommit(data);
+	filelayout_set_layoutcommit(hdr);
 	return 0;
 }
 
@@ -419,57 +414,57 @@ static int filelayout_commit_done_cb(struct rpc_task *task,
 
 static void filelayout_write_prepare(struct rpc_task *task, void *data)
 {
-	struct nfs_pgio_data *wdata = data;
+	struct nfs_pgio_header *hdr = data;
 
-	if (unlikely(test_bit(NFS_CONTEXT_BAD, &wdata->args.context->flags))) {
+	if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
 		rpc_exit(task, -EIO);
 		return;
 	}
-	if (filelayout_reset_to_mds(wdata->header->lseg)) {
+	if (filelayout_reset_to_mds(hdr->lseg)) {
 		dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid);
-		filelayout_reset_write(wdata);
+		filelayout_reset_write(hdr);
 		rpc_exit(task, 0);
 		return;
 	}
-	if (nfs41_setup_sequence(wdata->ds_clp->cl_session,
-			&wdata->args.seq_args,
-			&wdata->res.seq_res,
+	if (nfs41_setup_sequence(hdr->ds_clp->cl_session,
+			&hdr->args.seq_args,
+			&hdr->res.seq_res,
 			task))
 		return;
-	if (nfs4_set_rw_stateid(&wdata->args.stateid, wdata->args.context,
-			wdata->args.lock_context, FMODE_WRITE) == -EIO)
+	if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
+			hdr->args.lock_context, FMODE_WRITE) == -EIO)
 		rpc_exit(task, -EIO); /* lost lock, terminate I/O */
 }
 
 static void filelayout_write_call_done(struct rpc_task *task, void *data)
 {
-	struct nfs_pgio_data *wdata = data;
+	struct nfs_pgio_header *hdr = data;
 
-	if (test_bit(NFS_IOHDR_REDO, &wdata->header->flags) &&
+	if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
 	    task->tk_status == 0) {
-		nfs41_sequence_done(task, &wdata->res.seq_res);
+		nfs41_sequence_done(task, &hdr->res.seq_res);
 		return;
 	}
 
 	/* Note this may cause RPC to be resent */
-	wdata->header->mds_ops->rpc_call_done(task, data);
+	hdr->mds_ops->rpc_call_done(task, data);
 }
 
 static void filelayout_write_count_stats(struct rpc_task *task, void *data)
 {
-	struct nfs_pgio_data *wdata = data;
+	struct nfs_pgio_header *hdr = data;
 
-	rpc_count_iostats(task, NFS_SERVER(wdata->header->inode)->client->cl_metrics);
+	rpc_count_iostats(task, NFS_SERVER(hdr->inode)->client->cl_metrics);
 }
 
 static void filelayout_write_release(void *data)
 {
-	struct nfs_pgio_data *wdata = data;
-	struct pnfs_layout_hdr *lo = wdata->header->lseg->pls_layout;
+	struct nfs_pgio_header *hdr = data;
+	struct pnfs_layout_hdr *lo = hdr->lseg->pls_layout;
 
 	filelayout_fenceme(lo->plh_inode, lo);
-	nfs_put_client(wdata->ds_clp);
-	wdata->header->mds_ops->rpc_release(data);
+	nfs_put_client(hdr->ds_clp);
+	hdr->mds_ops->rpc_release(data);
 }
 
 static void filelayout_commit_prepare(struct rpc_task *task, void *data)
@@ -529,19 +524,18 @@ static const struct rpc_call_ops filelayout_commit_call_ops = {
 };
 
 static enum pnfs_try_status
-filelayout_read_pagelist(struct nfs_pgio_data *data)
+filelayout_read_pagelist(struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_header *hdr = data->header;
 	struct pnfs_layout_segment *lseg = hdr->lseg;
 	struct nfs4_pnfs_ds *ds;
 	struct rpc_clnt *ds_clnt;
-	loff_t offset = data->args.offset;
+	loff_t offset = hdr->args.offset;
 	u32 j, idx;
 	struct nfs_fh *fh;
 
 	dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n",
 		__func__, hdr->inode->i_ino,
-		data->args.pgbase, (size_t)data->args.count, offset);
+		hdr->args.pgbase, (size_t)hdr->args.count, offset);
 
 	/* Retrieve the correct rpc_client for the byte range */
 	j = nfs4_fl_calc_j_index(lseg, offset);
@@ -559,30 +553,29 @@ filelayout_read_pagelist(struct nfs_pgio_data *data)
 
 	/* No multipath support. Use first DS */
 	atomic_inc(&ds->ds_clp->cl_count);
-	data->ds_clp = ds->ds_clp;
-	data->ds_idx = idx;
+	hdr->ds_clp = ds->ds_clp;
+	hdr->ds_idx = idx;
 	fh = nfs4_fl_select_ds_fh(lseg, j);
 	if (fh)
-		data->args.fh = fh;
+		hdr->args.fh = fh;
 
-	data->args.offset = filelayout_get_dserver_offset(lseg, offset);
-	data->mds_offset = offset;
+	hdr->args.offset = filelayout_get_dserver_offset(lseg, offset);
+	hdr->mds_offset = offset;
 
 	/* Perform an asynchronous read to ds */
-	nfs_initiate_pgio(ds_clnt, data,
+	nfs_initiate_pgio(ds_clnt, hdr,
 			    &filelayout_read_call_ops, 0, RPC_TASK_SOFTCONN);
 	return PNFS_ATTEMPTED;
 }
 
 /* Perform async writes. */
 static enum pnfs_try_status
-filelayout_write_pagelist(struct nfs_pgio_data *data, int sync)
+filelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
 {
-	struct nfs_pgio_header *hdr = data->header;
 	struct pnfs_layout_segment *lseg = hdr->lseg;
 	struct nfs4_pnfs_ds *ds;
 	struct rpc_clnt *ds_clnt;
-	loff_t offset = data->args.offset;
+	loff_t offset = hdr->args.offset;
 	u32 j, idx;
 	struct nfs_fh *fh;
 
@@ -598,21 +591,20 @@ filelayout_write_pagelist(struct nfs_pgio_data *data, int sync)
 		return PNFS_NOT_ATTEMPTED;
 
 	dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d\n",
-		__func__, hdr->inode->i_ino, sync, (size_t) data->args.count,
+		__func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count,
 		offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
 
-	data->pgio_done_cb = filelayout_write_done_cb;
+	hdr->pgio_done_cb = filelayout_write_done_cb;
 	atomic_inc(&ds->ds_clp->cl_count);
-	data->ds_clp = ds->ds_clp;
-	data->ds_idx = idx;
+	hdr->ds_clp = ds->ds_clp;
+	hdr->ds_idx = idx;
 	fh = nfs4_fl_select_ds_fh(lseg, j);
 	if (fh)
-		data->args.fh = fh;
-
-	data->args.offset = filelayout_get_dserver_offset(lseg, offset);
+		hdr->args.fh = fh;
+	hdr->args.offset = filelayout_get_dserver_offset(lseg, offset);
 
 	/* Perform an asynchronous write */
-	nfs_initiate_pgio(ds_clnt, data,
+	nfs_initiate_pgio(ds_clnt, hdr,
 				    &filelayout_write_call_ops, sync,
 				    RPC_TASK_SOFTCONN);
 	return PNFS_ATTEMPTED;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 5cda049c8f9b..3f3aedd2e8c9 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -240,9 +240,9 @@ int nfs_iocounter_wait(struct nfs_io_counter *c);
 extern const struct nfs_pageio_ops nfs_pgio_rw_ops;
 struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *);
 void nfs_pgio_header_free(struct nfs_pgio_header *);
-void nfs_pgio_data_destroy(struct nfs_pgio_data *);
+void nfs_pgio_data_destroy(struct nfs_pgio_header *);
 int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *);
-int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_data *,
+int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_header *,
 		      const struct rpc_call_ops *, int, int);
 
 static inline void nfs_iocounter_init(struct nfs_io_counter *c)
@@ -481,7 +481,7 @@ static inline void nfs_inode_dio_wait(struct inode *inode)
 extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq);
 
 /* nfs4proc.c */
-extern void __nfs4_read_done_cb(struct nfs_pgio_data *);
+extern void __nfs4_read_done_cb(struct nfs_pgio_header *);
 extern struct nfs_client *nfs4_init_client(struct nfs_client *clp,
 			    const struct rpc_timeout *timeparms,
 			    const char *ip_addr);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index e7daa42bbc86..854959db0e5d 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -795,41 +795,44 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
 	return status;
 }
 
-static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
 {
-	struct inode *inode = data->header->inode;
+	struct inode *inode = hdr->inode;
 
 	if (nfs3_async_handle_jukebox(task, inode))
 		return -EAGAIN;
 
 	nfs_invalidate_atime(inode);
-	nfs_refresh_inode(inode, &data->fattr);
+	nfs_refresh_inode(inode, &hdr->fattr);
 	return 0;
 }
 
-static void nfs3_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
+static void nfs3_proc_read_setup(struct nfs_pgio_header *hdr,
+				 struct rpc_message *msg)
 {
 	msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ];
 }
 
-static int nfs3_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs3_proc_pgio_rpc_prepare(struct rpc_task *task,
+				      struct nfs_pgio_header *hdr)
 {
 	rpc_call_start(task);
 	return 0;
 }
 
-static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
 {
-	struct inode *inode = data->header->inode;
+	struct inode *inode = hdr->inode;
 
 	if (nfs3_async_handle_jukebox(task, inode))
 		return -EAGAIN;
 	if (task->tk_status >= 0)
-		nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
+		nfs_post_op_update_inode_force_wcc(inode, hdr->res.fattr);
 	return 0;
 }
 
-static void nfs3_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
+static void nfs3_proc_write_setup(struct nfs_pgio_header *hdr,
+				  struct rpc_message *msg)
 {
 	msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE];
 }
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index ba2affa51941..b8ea4a26998c 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -337,11 +337,11 @@ nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_mode,
  */
 static inline void
 nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp,
-			 struct rpc_message *msg, struct nfs_pgio_data *wdata)
+			 struct rpc_message *msg, struct nfs_pgio_header *hdr)
 {
 	if (_nfs4_state_protect(clp, NFS_SP4_MACH_CRED_WRITE, clntp, msg) &&
 	    !test_bit(NFS_SP4_MACH_CRED_COMMIT, &clp->cl_sp4_flags))
-		wdata->args.stable = NFS_FILE_SYNC;
+		hdr->args.stable = NFS_FILE_SYNC;
 }
 #else /* CONFIG_NFS_v4_1 */
 static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
@@ -369,7 +369,7 @@ nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_flags,
 
 static inline void
 nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp,
-			 struct rpc_message *msg, struct nfs_pgio_data *wdata)
+			 struct rpc_message *msg, struct nfs_pgio_header *hdr)
 {
 }
 #endif /* CONFIG_NFS_V4_1 */
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4bf3d97cc5a0..b0e5705599bf 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4033,24 +4033,25 @@ static bool nfs4_error_stateid_expired(int err)
 	return false;
 }
 
-void __nfs4_read_done_cb(struct nfs_pgio_data *data)
+void __nfs4_read_done_cb(struct nfs_pgio_header *hdr)
 {
-	nfs_invalidate_atime(data->header->inode);
+	nfs_invalidate_atime(hdr->inode);
 }
 
-static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_header *hdr)
 {
-	struct nfs_server *server = NFS_SERVER(data->header->inode);
+	struct nfs_server *server = NFS_SERVER(hdr->inode);
 
-	trace_nfs4_read(data, task->tk_status);
-	if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) {
+	trace_nfs4_read(hdr, task->tk_status);
+	if (nfs4_async_handle_error(task, server,
+				    hdr->args.context->state) == -EAGAIN) {
 		rpc_restart_call_prepare(task);
 		return -EAGAIN;
 	}
 
-	__nfs4_read_done_cb(data);
+	__nfs4_read_done_cb(hdr);
 	if (task->tk_status > 0)
-		renew_lease(server, data->timestamp);
+		renew_lease(server, hdr->timestamp);
 	return 0;
 }
 
@@ -4068,54 +4069,59 @@ static bool nfs4_read_stateid_changed(struct rpc_task *task,
 	return true;
 }
 
-static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
 {
 
 	dprintk("--> %s\n", __func__);
 
-	if (!nfs4_sequence_done(task, &data->res.seq_res))
+	if (!nfs4_sequence_done(task, &hdr->res.seq_res))
 		return -EAGAIN;
-	if (nfs4_read_stateid_changed(task, &data->args))
+	if (nfs4_read_stateid_changed(task, &hdr->args))
 		return -EAGAIN;
-	return data->pgio_done_cb ? data->pgio_done_cb(task, data) :
-				    nfs4_read_done_cb(task, data);
+	return hdr->pgio_done_cb ? hdr->pgio_done_cb(task, hdr) :
+				    nfs4_read_done_cb(task, hdr);
 }
 
-static void nfs4_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
+static void nfs4_proc_read_setup(struct nfs_pgio_header *hdr,
+				 struct rpc_message *msg)
 {
-	data->timestamp   = jiffies;
-	data->pgio_done_cb = nfs4_read_done_cb;
+	hdr->timestamp   = jiffies;
+	hdr->pgio_done_cb = nfs4_read_done_cb;
 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
-	nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0);
+	nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0);
 }
 
-static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task,
+				      struct nfs_pgio_header *hdr)
 {
-	if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
-			&data->args.seq_args,
-			&data->res.seq_res,
+	if (nfs4_setup_sequence(NFS_SERVER(hdr->inode),
+			&hdr->args.seq_args,
+			&hdr->res.seq_res,
 			task))
 		return 0;
-	if (nfs4_set_rw_stateid(&data->args.stateid, data->args.context,
-				data->args.lock_context, data->header->rw_ops->rw_mode) == -EIO)
+	if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
+				hdr->args.lock_context,
+				hdr->rw_ops->rw_mode) == -EIO)
 		return -EIO;
-	if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags)))
+	if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags)))
 		return -EIO;
 	return 0;
 }
 
-static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs4_write_done_cb(struct rpc_task *task,
+			      struct nfs_pgio_header *hdr)
 {
-	struct inode *inode = data->header->inode;
+	struct inode *inode = hdr->inode;
 	
-	trace_nfs4_write(data, task->tk_status);
-	if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) {
+	trace_nfs4_write(hdr, task->tk_status);
+	if (nfs4_async_handle_error(task, NFS_SERVER(inode),
+				    hdr->args.context->state) == -EAGAIN) {
 		rpc_restart_call_prepare(task);
 		return -EAGAIN;
 	}
 	if (task->tk_status >= 0) {
-		renew_lease(NFS_SERVER(inode), data->timestamp);
-		nfs_post_op_update_inode_force_wcc(inode, &data->fattr);
+		renew_lease(NFS_SERVER(inode), hdr->timestamp);
+		nfs_post_op_update_inode_force_wcc(inode, &hdr->fattr);
 	}
 	return 0;
 }
@@ -4134,23 +4140,21 @@ static bool nfs4_write_stateid_changed(struct rpc_task *task,
 	return true;
 }
 
-static int nfs4_write_done(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs4_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
 {
-	if (!nfs4_sequence_done(task, &data->res.seq_res))
+	if (!nfs4_sequence_done(task, &hdr->res.seq_res))
 		return -EAGAIN;
-	if (nfs4_write_stateid_changed(task, &data->args))
+	if (nfs4_write_stateid_changed(task, &hdr->args))
 		return -EAGAIN;
-	return data->pgio_done_cb ? data->pgio_done_cb(task, data) :
-		nfs4_write_done_cb(task, data);
+	return hdr->pgio_done_cb ? hdr->pgio_done_cb(task, hdr) :
+		nfs4_write_done_cb(task, hdr);
 }
 
 static
-bool nfs4_write_need_cache_consistency_data(const struct nfs_pgio_data *data)
+bool nfs4_write_need_cache_consistency_data(struct nfs_pgio_header *hdr)
 {
-	const struct nfs_pgio_header *hdr = data->header;
-
 	/* Don't request attributes for pNFS or O_DIRECT writes */
-	if (data->ds_clp != NULL || hdr->dreq != NULL)
+	if (hdr->ds_clp != NULL || hdr->dreq != NULL)
 		return false;
 	/* Otherwise, request attributes if and only if we don't hold
 	 * a delegation
@@ -4158,23 +4162,24 @@ bool nfs4_write_need_cache_consistency_data(const struct nfs_pgio_data *data)
 	return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0;
 }
 
-static void nfs4_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
+static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr,
+				  struct rpc_message *msg)
 {
-	struct nfs_server *server = NFS_SERVER(data->header->inode);
+	struct nfs_server *server = NFS_SERVER(hdr->inode);
 
-	if (!nfs4_write_need_cache_consistency_data(data)) {
-		data->args.bitmask = NULL;
-		data->res.fattr = NULL;
+	if (!nfs4_write_need_cache_consistency_data(hdr)) {
+		hdr->args.bitmask = NULL;
+		hdr->res.fattr = NULL;
 	} else
-		data->args.bitmask = server->cache_consistency_bitmask;
+		hdr->args.bitmask = server->cache_consistency_bitmask;
 
-	if (!data->pgio_done_cb)
-		data->pgio_done_cb = nfs4_write_done_cb;
-	data->res.server = server;
-	data->timestamp   = jiffies;
+	if (!hdr->pgio_done_cb)
+		hdr->pgio_done_cb = nfs4_write_done_cb;
+	hdr->res.server = server;
+	hdr->timestamp   = jiffies;
 
 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE];
-	nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
+	nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 1);
 }
 
 static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index 0a744f3a86f6..1c32adbe728d 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -932,11 +932,11 @@ DEFINE_NFS4_IDMAP_EVENT(nfs4_map_gid_to_group);
 
 DECLARE_EVENT_CLASS(nfs4_read_event,
 		TP_PROTO(
-			const struct nfs_pgio_data *data,
+			const struct nfs_pgio_header *hdr,
 			int error
 		),
 
-		TP_ARGS(data, error),
+		TP_ARGS(hdr, error),
 
 		TP_STRUCT__entry(
 			__field(dev_t, dev)
@@ -948,12 +948,12 @@ DECLARE_EVENT_CLASS(nfs4_read_event,
 		),
 
 		TP_fast_assign(
-			const struct inode *inode = data->header->inode;
+			const struct inode *inode = hdr->inode;
 			__entry->dev = inode->i_sb->s_dev;
 			__entry->fileid = NFS_FILEID(inode);
 			__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
-			__entry->offset = data->args.offset;
-			__entry->count = data->args.count;
+			__entry->offset = hdr->args.offset;
+			__entry->count = hdr->args.count;
 			__entry->error = error;
 		),
 
@@ -972,10 +972,10 @@ DECLARE_EVENT_CLASS(nfs4_read_event,
 #define DEFINE_NFS4_READ_EVENT(name) \
 	DEFINE_EVENT(nfs4_read_event, name, \
 			TP_PROTO( \
-				const struct nfs_pgio_data *data, \
+				const struct nfs_pgio_header *hdr, \
 				int error \
 			), \
-			TP_ARGS(data, error))
+			TP_ARGS(hdr, error))
 DEFINE_NFS4_READ_EVENT(nfs4_read);
 #ifdef CONFIG_NFS_V4_1
 DEFINE_NFS4_READ_EVENT(nfs4_pnfs_read);
@@ -983,11 +983,11 @@ DEFINE_NFS4_READ_EVENT(nfs4_pnfs_read);
 
 DECLARE_EVENT_CLASS(nfs4_write_event,
 		TP_PROTO(
-			const struct nfs_pgio_data *data,
+			const struct nfs_pgio_header *hdr,
 			int error
 		),
 
-		TP_ARGS(data, error),
+		TP_ARGS(hdr, error),
 
 		TP_STRUCT__entry(
 			__field(dev_t, dev)
@@ -999,12 +999,12 @@ DECLARE_EVENT_CLASS(nfs4_write_event,
 		),
 
 		TP_fast_assign(
-			const struct inode *inode = data->header->inode;
+			const struct inode *inode = hdr->inode;
 			__entry->dev = inode->i_sb->s_dev;
 			__entry->fileid = NFS_FILEID(inode);
 			__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
-			__entry->offset = data->args.offset;
-			__entry->count = data->args.count;
+			__entry->offset = hdr->args.offset;
+			__entry->count = hdr->args.count;
 			__entry->error = error;
 		),
 
@@ -1024,10 +1024,10 @@ DECLARE_EVENT_CLASS(nfs4_write_event,
 #define DEFINE_NFS4_WRITE_EVENT(name) \
 	DEFINE_EVENT(nfs4_write_event, name, \
 			TP_PROTO( \
-				const struct nfs_pgio_data *data, \
+				const struct nfs_pgio_header *hdr, \
 				int error \
 			), \
-			TP_ARGS(data, error))
+			TP_ARGS(hdr, error))
 DEFINE_NFS4_WRITE_EVENT(nfs4_write);
 #ifdef CONFIG_NFS_V4_1
 DEFINE_NFS4_WRITE_EVENT(nfs4_pnfs_write);
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 611320753db2..ae05278b3761 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -439,22 +439,21 @@ static void _read_done(struct ore_io_state *ios, void *private)
 	objlayout_read_done(&objios->oir, status, objios->sync);
 }
 
-int objio_read_pagelist(struct nfs_pgio_data *rdata)
+int objio_read_pagelist(struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_header *hdr = rdata->header;
 	struct objio_state *objios;
 	int ret;
 
 	ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, true,
-			hdr->lseg, rdata->args.pages, rdata->args.pgbase,
-			rdata->args.offset, rdata->args.count, rdata,
+			hdr->lseg, hdr->args.pages, hdr->args.pgbase,
+			hdr->args.offset, hdr->args.count, hdr,
 			GFP_KERNEL, &objios);
 	if (unlikely(ret))
 		return ret;
 
 	objios->ios->done = _read_done;
 	dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
-		rdata->args.offset, rdata->args.count);
+		hdr->args.offset, hdr->args.count);
 	ret = ore_read(objios->ios);
 	if (unlikely(ret))
 		objio_free_result(&objios->oir);
@@ -487,11 +486,11 @@ static void _write_done(struct ore_io_state *ios, void *private)
 static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
 {
 	struct objio_state *objios = priv;
-	struct nfs_pgio_data *wdata = objios->oir.rpcdata;
-	struct address_space *mapping = wdata->header->inode->i_mapping;
+	struct nfs_pgio_header *hdr = objios->oir.rpcdata;
+	struct address_space *mapping = hdr->inode->i_mapping;
 	pgoff_t index = offset / PAGE_SIZE;
 	struct page *page;
-	loff_t i_size = i_size_read(wdata->header->inode);
+	loff_t i_size = i_size_read(hdr->inode);
 
 	if (offset >= i_size) {
 		*uptodate = true;
@@ -531,15 +530,14 @@ static const struct _ore_r4w_op _r4w_op = {
 	.put_page = &__r4w_put_page,
 };
 
-int objio_write_pagelist(struct nfs_pgio_data *wdata, int how)
+int objio_write_pagelist(struct nfs_pgio_header *hdr, int how)
 {
-	struct nfs_pgio_header *hdr = wdata->header;
 	struct objio_state *objios;
 	int ret;
 
 	ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, false,
-			hdr->lseg, wdata->args.pages, wdata->args.pgbase,
-			wdata->args.offset, wdata->args.count, wdata, GFP_NOFS,
+			hdr->lseg, hdr->args.pages, hdr->args.pgbase,
+			hdr->args.offset, hdr->args.count, hdr, GFP_NOFS,
 			&objios);
 	if (unlikely(ret))
 		return ret;
@@ -551,7 +549,7 @@ int objio_write_pagelist(struct nfs_pgio_data *wdata, int how)
 		objios->ios->done = _write_done;
 
 	dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
-		wdata->args.offset, wdata->args.count);
+		hdr->args.offset, hdr->args.count);
 	ret = ore_write(objios->ios);
 	if (unlikely(ret)) {
 		objio_free_result(&objios->oir);
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index 31fed91a8bac..86312787cee6 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -229,36 +229,36 @@ objlayout_io_set_result(struct objlayout_io_res *oir, unsigned index,
 static void _rpc_read_complete(struct work_struct *work)
 {
 	struct rpc_task *task;
-	struct nfs_pgio_data *rdata;
+	struct nfs_pgio_header *hdr;
 
 	dprintk("%s enter\n", __func__);
 	task = container_of(work, struct rpc_task, u.tk_work);
-	rdata = container_of(task, struct nfs_pgio_data, task);
+	hdr = container_of(task, struct nfs_pgio_header, task);
 
-	pnfs_ld_read_done(rdata);
+	pnfs_ld_read_done(hdr);
 }
 
 void
 objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
 {
-	struct nfs_pgio_data *rdata = oir->rpcdata;
+	struct nfs_pgio_header *hdr = oir->rpcdata;
 
-	oir->status = rdata->task.tk_status = status;
+	oir->status = hdr->task.tk_status = status;
 	if (status >= 0)
-		rdata->res.count = status;
+		hdr->res.count = status;
 	else
-		rdata->header->pnfs_error = status;
+		hdr->pnfs_error = status;
 	objlayout_iodone(oir);
 	/* must not use oir after this point */
 
 	dprintk("%s: Return status=%zd eof=%d sync=%d\n", __func__,
-		status, rdata->res.eof, sync);
+		status, hdr->res.eof, sync);
 
 	if (sync)
-		pnfs_ld_read_done(rdata);
+		pnfs_ld_read_done(hdr);
 	else {
-		INIT_WORK(&rdata->task.u.tk_work, _rpc_read_complete);
-		schedule_work(&rdata->task.u.tk_work);
+		INIT_WORK(&hdr->task.u.tk_work, _rpc_read_complete);
+		schedule_work(&hdr->task.u.tk_work);
 	}
 }
 
@@ -266,12 +266,11 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
  * Perform sync or async reads.
  */
 enum pnfs_try_status
-objlayout_read_pagelist(struct nfs_pgio_data *rdata)
+objlayout_read_pagelist(struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_header *hdr = rdata->header;
 	struct inode *inode = hdr->inode;
-	loff_t offset = rdata->args.offset;
-	size_t count = rdata->args.count;
+	loff_t offset = hdr->args.offset;
+	size_t count = hdr->args.count;
 	int err;
 	loff_t eof;
 
@@ -279,23 +278,23 @@ objlayout_read_pagelist(struct nfs_pgio_data *rdata)
 	if (unlikely(offset + count > eof)) {
 		if (offset >= eof) {
 			err = 0;
-			rdata->res.count = 0;
-			rdata->res.eof = 1;
+			hdr->res.count = 0;
+			hdr->res.eof = 1;
 			/*FIXME: do we need to call pnfs_ld_read_done() */
 			goto out;
 		}
 		count = eof - offset;
 	}
 
-	rdata->res.eof = (offset + count) >= eof;
-	_fix_verify_io_params(hdr->lseg, &rdata->args.pages,
-			      &rdata->args.pgbase,
-			      rdata->args.offset, rdata->args.count);
+	hdr->res.eof = (offset + count) >= eof;
+	_fix_verify_io_params(hdr->lseg, &hdr->args.pages,
+			      &hdr->args.pgbase,
+			      hdr->args.offset, hdr->args.count);
 
 	dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n",
-		__func__, inode->i_ino, offset, count, rdata->res.eof);
+		__func__, inode->i_ino, offset, count, hdr->res.eof);
 
-	err = objio_read_pagelist(rdata);
+	err = objio_read_pagelist(hdr);
  out:
 	if (unlikely(err)) {
 		hdr->pnfs_error = err;
@@ -312,38 +311,38 @@ objlayout_read_pagelist(struct nfs_pgio_data *rdata)
 static void _rpc_write_complete(struct work_struct *work)
 {
 	struct rpc_task *task;
-	struct nfs_pgio_data *wdata;
+	struct nfs_pgio_header *hdr;
 
 	dprintk("%s enter\n", __func__);
 	task = container_of(work, struct rpc_task, u.tk_work);
-	wdata = container_of(task, struct nfs_pgio_data, task);
+	hdr = container_of(task, struct nfs_pgio_header, task);
 
-	pnfs_ld_write_done(wdata);
+	pnfs_ld_write_done(hdr);
 }
 
 void
 objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
 {
-	struct nfs_pgio_data *wdata = oir->rpcdata;
+	struct nfs_pgio_header *hdr = oir->rpcdata;
 
-	oir->status = wdata->task.tk_status = status;
+	oir->status = hdr->task.tk_status = status;
 	if (status >= 0) {
-		wdata->res.count = status;
-		wdata->writeverf.committed = oir->committed;
+		hdr->res.count = status;
+		hdr->writeverf.committed = oir->committed;
 	} else {
-		wdata->header->pnfs_error = status;
+		hdr->pnfs_error = status;
 	}
 	objlayout_iodone(oir);
 	/* must not use oir after this point */
 
 	dprintk("%s: Return status %zd committed %d sync=%d\n", __func__,
-		status, wdata->writeverf.committed, sync);
+		status, hdr->writeverf.committed, sync);
 
 	if (sync)
-		pnfs_ld_write_done(wdata);
+		pnfs_ld_write_done(hdr);
 	else {
-		INIT_WORK(&wdata->task.u.tk_work, _rpc_write_complete);
-		schedule_work(&wdata->task.u.tk_work);
+		INIT_WORK(&hdr->task.u.tk_work, _rpc_write_complete);
+		schedule_work(&hdr->task.u.tk_work);
 	}
 }
 
@@ -351,17 +350,15 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
  * Perform sync or async writes.
  */
 enum pnfs_try_status
-objlayout_write_pagelist(struct nfs_pgio_data *wdata,
-			 int how)
+objlayout_write_pagelist(struct nfs_pgio_header *hdr, int how)
 {
-	struct nfs_pgio_header *hdr = wdata->header;
 	int err;
 
-	_fix_verify_io_params(hdr->lseg, &wdata->args.pages,
-			      &wdata->args.pgbase,
-			      wdata->args.offset, wdata->args.count);
+	_fix_verify_io_params(hdr->lseg, &hdr->args.pages,
+			      &hdr->args.pgbase,
+			      hdr->args.offset, hdr->args.count);
 
-	err = objio_write_pagelist(wdata, how);
+	err = objio_write_pagelist(hdr, how);
 	if (unlikely(err)) {
 		hdr->pnfs_error = err;
 		dprintk("%s: Returned Error %d\n", __func__, err);
diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h
index 01e041029a6c..fd13f1d2f136 100644
--- a/fs/nfs/objlayout/objlayout.h
+++ b/fs/nfs/objlayout/objlayout.h
@@ -119,8 +119,8 @@ extern void objio_free_lseg(struct pnfs_layout_segment *lseg);
  */
 extern void objio_free_result(struct objlayout_io_res *oir);
 
-extern int objio_read_pagelist(struct nfs_pgio_data *rdata);
-extern int objio_write_pagelist(struct nfs_pgio_data *wdata, int how);
+extern int objio_read_pagelist(struct nfs_pgio_header *rdata);
+extern int objio_write_pagelist(struct nfs_pgio_header *wdata, int how);
 
 /*
  * callback API
@@ -168,10 +168,10 @@ extern struct pnfs_layout_segment *objlayout_alloc_lseg(
 extern void objlayout_free_lseg(struct pnfs_layout_segment *);
 
 extern enum pnfs_try_status objlayout_read_pagelist(
-	struct nfs_pgio_data *);
+	struct nfs_pgio_header *);
 
 extern enum pnfs_try_status objlayout_write_pagelist(
-	struct nfs_pgio_data *,
+	struct nfs_pgio_header *,
 	int how);
 
 extern void objlayout_encode_layoutcommit(
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 5e70918f6c95..ecb3d4cdbc85 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -484,8 +484,7 @@ EXPORT_SYMBOL_GPL(nfs_pgio_header_free);
 static bool nfs_pgio_data_init(struct nfs_pgio_header *hdr,
 			       unsigned int pagecount)
 {
-	if (nfs_pgarray_set(&hdr->data.page_array, pagecount)) {
-		hdr->data.header = hdr;
+	if (nfs_pgarray_set(&hdr->page_array, pagecount)) {
 		atomic_inc(&hdr->refcnt);
 		return true;
 	}
@@ -493,16 +492,14 @@ static bool nfs_pgio_data_init(struct nfs_pgio_header *hdr,
 }
 
 /**
- * nfs_pgio_data_destroy - Properly free pageio data
- * @data: The data to destroy
+ * nfs_pgio_data_destroy - Properly release pageio data
+ * @hdr: The header with data to destroy
  */
-void nfs_pgio_data_destroy(struct nfs_pgio_data *data)
+void nfs_pgio_data_destroy(struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_header *hdr = data->header;
-
-	put_nfs_open_context(data->args.context);
-	if (data->page_array.pagevec != data->page_array.page_array)
-		kfree(data->page_array.pagevec);
+	put_nfs_open_context(hdr->args.context);
+	if (hdr->page_array.pagevec != hdr->page_array.page_array)
+		kfree(hdr->page_array.pagevec);
 	if (atomic_dec_and_test(&hdr->refcnt))
 		hdr->completion_ops->completion(hdr);
 }
@@ -510,31 +507,31 @@ EXPORT_SYMBOL_GPL(nfs_pgio_data_destroy);
 
 /**
  * nfs_pgio_rpcsetup - Set up arguments for a pageio call
- * @data: The pageio data
+ * @hdr: The pageio hdr
  * @count: Number of bytes to read
  * @offset: Initial offset
  * @how: How to commit data (writes only)
  * @cinfo: Commit information for the call (writes only)
  */
-static void nfs_pgio_rpcsetup(struct nfs_pgio_data *data,
+static void nfs_pgio_rpcsetup(struct nfs_pgio_header *hdr,
 			      unsigned int count, unsigned int offset,
 			      int how, struct nfs_commit_info *cinfo)
 {
-	struct nfs_page *req = data->header->req;
+	struct nfs_page *req = hdr->req;
 
 	/* Set up the RPC argument and reply structs
-	 * NB: take care not to mess about with data->commit et al. */
+	 * NB: take care not to mess about with hdr->commit et al. */
 
-	data->args.fh     = NFS_FH(data->header->inode);
-	data->args.offset = req_offset(req) + offset;
+	hdr->args.fh     = NFS_FH(hdr->inode);
+	hdr->args.offset = req_offset(req) + offset;
 	/* pnfs_set_layoutcommit needs this */
-	data->mds_offset = data->args.offset;
-	data->args.pgbase = req->wb_pgbase + offset;
-	data->args.pages  = data->page_array.pagevec;
-	data->args.count  = count;
-	data->args.context = get_nfs_open_context(req->wb_context);
-	data->args.lock_context = req->wb_lock_context;
-	data->args.stable  = NFS_UNSTABLE;
+	hdr->mds_offset = hdr->args.offset;
+	hdr->args.pgbase = req->wb_pgbase + offset;
+	hdr->args.pages  = hdr->page_array.pagevec;
+	hdr->args.count  = count;
+	hdr->args.context = get_nfs_open_context(req->wb_context);
+	hdr->args.lock_context = req->wb_lock_context;
+	hdr->args.stable  = NFS_UNSTABLE;
 	switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
 	case 0:
 		break;
@@ -542,59 +539,60 @@ static void nfs_pgio_rpcsetup(struct nfs_pgio_data *data,
 		if (nfs_reqs_to_commit(cinfo))
 			break;
 	default:
-		data->args.stable = NFS_FILE_SYNC;
+		hdr->args.stable = NFS_FILE_SYNC;
 	}
 
-	data->res.fattr   = &data->fattr;
-	data->res.count   = count;
-	data->res.eof     = 0;
-	data->res.verf    = &data->writeverf;
-	nfs_fattr_init(&data->fattr);
+	hdr->res.fattr   = &hdr->fattr;
+	hdr->res.count   = count;
+	hdr->res.eof     = 0;
+	hdr->res.verf    = &hdr->writeverf;
+	nfs_fattr_init(&hdr->fattr);
 }
 
 /**
- * nfs_pgio_prepare - Prepare pageio data to go over the wire
+ * nfs_pgio_prepare - Prepare pageio hdr to go over the wire
  * @task: The current task
- * @calldata: pageio data to prepare
+ * @calldata: pageio header to prepare
  */
 static void nfs_pgio_prepare(struct rpc_task *task, void *calldata)
 {
-	struct nfs_pgio_data *data = calldata;
+	struct nfs_pgio_header *hdr = calldata;
 	int err;
-	err = NFS_PROTO(data->header->inode)->pgio_rpc_prepare(task, data);
+	err = NFS_PROTO(hdr->inode)->pgio_rpc_prepare(task, hdr);
 	if (err)
 		rpc_exit(task, err);
 }
 
-int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_data *data,
+int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr,
 		      const struct rpc_call_ops *call_ops, int how, int flags)
 {
+	struct inode *inode = hdr->inode;
 	struct rpc_task *task;
 	struct rpc_message msg = {
-		.rpc_argp = &data->args,
-		.rpc_resp = &data->res,
-		.rpc_cred = data->header->cred,
+		.rpc_argp = &hdr->args,
+		.rpc_resp = &hdr->res,
+		.rpc_cred = hdr->cred,
 	};
 	struct rpc_task_setup task_setup_data = {
 		.rpc_client = clnt,
-		.task = &data->task,
+		.task = &hdr->task,
 		.rpc_message = &msg,
 		.callback_ops = call_ops,
-		.callback_data = data,
+		.callback_data = hdr,
 		.workqueue = nfsiod_workqueue,
 		.flags = RPC_TASK_ASYNC | flags,
 	};
 	int ret = 0;
 
-	data->header->rw_ops->rw_initiate(data, &msg, &task_setup_data, how);
+	hdr->rw_ops->rw_initiate(hdr, &msg, &task_setup_data, how);
 
 	dprintk("NFS: %5u initiated pgio call "
 		"(req %s/%llu, %u bytes @ offset %llu)\n",
-		data->task.tk_pid,
-		data->header->inode->i_sb->s_id,
-		(unsigned long long)NFS_FILEID(data->header->inode),
-		data->args.count,
-		(unsigned long long)data->args.offset);
+		hdr->task.tk_pid,
+		inode->i_sb->s_id,
+		(unsigned long long)NFS_FILEID(inode),
+		hdr->args.count,
+		(unsigned long long)hdr->args.offset);
 
 	task = rpc_run_task(&task_setup_data);
 	if (IS_ERR(task)) {
@@ -621,21 +619,21 @@ static int nfs_pgio_error(struct nfs_pageio_descriptor *desc,
 			  struct nfs_pgio_header *hdr)
 {
 	set_bit(NFS_IOHDR_REDO, &hdr->flags);
-	nfs_pgio_data_destroy(&hdr->data);
+	nfs_pgio_data_destroy(hdr);
 	desc->pg_completion_ops->error_cleanup(&desc->pg_list);
 	return -ENOMEM;
 }
 
 /**
  * nfs_pgio_release - Release pageio data
- * @calldata: The pageio data to release
+ * @calldata: The pageio header to release
  */
 static void nfs_pgio_release(void *calldata)
 {
-	struct nfs_pgio_data *data = calldata;
-	if (data->header->rw_ops->rw_release)
-		data->header->rw_ops->rw_release(data);
-	nfs_pgio_data_destroy(data);
+	struct nfs_pgio_header *hdr = calldata;
+	if (hdr->rw_ops->rw_release)
+		hdr->rw_ops->rw_release(hdr);
+	nfs_pgio_data_destroy(hdr);
 }
 
 /**
@@ -676,22 +674,22 @@ EXPORT_SYMBOL_GPL(nfs_pageio_init);
 /**
  * nfs_pgio_result - Basic pageio error handling
  * @task: The task that ran
- * @calldata: Pageio data to check
+ * @calldata: Pageio header to check
  */
 static void nfs_pgio_result(struct rpc_task *task, void *calldata)
 {
-	struct nfs_pgio_data *data = calldata;
-	struct inode *inode = data->header->inode;
+	struct nfs_pgio_header *hdr = calldata;
+	struct inode *inode = hdr->inode;
 
 	dprintk("NFS: %s: %5u, (status %d)\n", __func__,
 		task->tk_pid, task->tk_status);
 
-	if (data->header->rw_ops->rw_done(task, data, inode) != 0)
+	if (hdr->rw_ops->rw_done(task, hdr, inode) != 0)
 		return;
 	if (task->tk_status < 0)
-		nfs_set_pgio_error(data->header, task->tk_status, data->args.offset);
+		nfs_set_pgio_error(hdr, task->tk_status, hdr->args.offset);
 	else
-		data->header->rw_ops->rw_result(task, data);
+		hdr->rw_ops->rw_result(task, hdr);
 }
 
 /*
@@ -707,7 +705,6 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
 {
 	struct nfs_page		*req;
 	struct page		**pages;
-	struct nfs_pgio_data	*data;
 	struct list_head *head = &desc->pg_list;
 	struct nfs_commit_info cinfo;
 
@@ -715,9 +712,8 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
 			   desc->pg_count)))
 		return nfs_pgio_error(desc, hdr);
 
-	data = &hdr->data;
 	nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
-	pages = data->page_array.pagevec;
+	pages = hdr->page_array.pagevec;
 	while (!list_empty(head)) {
 		req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
@@ -730,7 +726,7 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
 		desc->pg_ioflags &= ~FLUSH_COND_STABLE;
 
 	/* Set up the argument struct */
-	nfs_pgio_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo);
+	nfs_pgio_rpcsetup(hdr, desc->pg_count, 0, desc->pg_ioflags, &cinfo);
 	desc->pg_rpc_callops = &nfs_pgio_common_ops;
 	return 0;
 }
@@ -751,7 +747,7 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc)
 	ret = nfs_generic_pgio(desc, hdr);
 	if (ret == 0)
 		ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode),
-					&hdr->data, desc->pg_rpc_callops,
+					hdr, desc->pg_rpc_callops,
 					desc->pg_ioflags, 0);
 	if (atomic_dec_and_test(&hdr->refcnt))
 		hdr->completion_ops->completion(hdr);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 067104cce181..ecc911347750 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1502,9 +1502,8 @@ int pnfs_write_done_resend_to_mds(struct inode *inode,
 }
 EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds);
 
-static void pnfs_ld_handle_write_error(struct nfs_pgio_data *data)
+static void pnfs_ld_handle_write_error(struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_header *hdr = data->header;
 
 	dprintk("pnfs write error = %d\n", hdr->pnfs_error);
 	if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
@@ -1512,7 +1511,7 @@ static void pnfs_ld_handle_write_error(struct nfs_pgio_data *data)
 		pnfs_return_layout(hdr->inode);
 	}
 	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
-		data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode,
+		hdr->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode,
 							&hdr->pages,
 							hdr->completion_ops,
 							hdr->dreq);
@@ -1521,41 +1520,36 @@ static void pnfs_ld_handle_write_error(struct nfs_pgio_data *data)
 /*
  * Called by non rpc-based layout drivers
  */
-void pnfs_ld_write_done(struct nfs_pgio_data *data)
+void pnfs_ld_write_done(struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_header *hdr = data->header;
-
-	trace_nfs4_pnfs_write(data, hdr->pnfs_error);
+	trace_nfs4_pnfs_write(hdr, hdr->pnfs_error);
 	if (!hdr->pnfs_error) {
-		pnfs_set_layoutcommit(data);
-		hdr->mds_ops->rpc_call_done(&data->task, data);
+		pnfs_set_layoutcommit(hdr);
+		hdr->mds_ops->rpc_call_done(&hdr->task, hdr);
 	} else
-		pnfs_ld_handle_write_error(data);
-	hdr->mds_ops->rpc_release(data);
+		pnfs_ld_handle_write_error(hdr);
+	hdr->mds_ops->rpc_release(hdr);
 }
 EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
 
 static void
 pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
-		struct nfs_pgio_data *data)
+		struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_header *hdr = data->header;
-
 	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
 		list_splice_tail_init(&hdr->pages, &desc->pg_list);
 		nfs_pageio_reset_write_mds(desc);
 		desc->pg_recoalesce = 1;
 	}
-	nfs_pgio_data_destroy(data);
+	nfs_pgio_data_destroy(hdr);
 }
 
 static enum pnfs_try_status
-pnfs_try_to_write_data(struct nfs_pgio_data *wdata,
+pnfs_try_to_write_data(struct nfs_pgio_header *hdr,
 			const struct rpc_call_ops *call_ops,
 			struct pnfs_layout_segment *lseg,
 			int how)
 {
-	struct nfs_pgio_header *hdr = wdata->header;
 	struct inode *inode = hdr->inode;
 	enum pnfs_try_status trypnfs;
 	struct nfs_server *nfss = NFS_SERVER(inode);
@@ -1563,8 +1557,8 @@ pnfs_try_to_write_data(struct nfs_pgio_data *wdata,
 	hdr->mds_ops = call_ops;
 
 	dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
-		inode->i_ino, wdata->args.count, wdata->args.offset, how);
-	trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how);
+		inode->i_ino, hdr->args.count, hdr->args.offset, how);
+	trypnfs = nfss->pnfs_curr_ld->write_pagelist(hdr, how);
 	if (trypnfs != PNFS_NOT_ATTEMPTED)
 		nfs_inc_stats(inode, NFSIOS_PNFS_WRITE);
 	dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
@@ -1575,15 +1569,14 @@ static void
 pnfs_do_write(struct nfs_pageio_descriptor *desc,
 	      struct nfs_pgio_header *hdr, int how)
 {
-	struct nfs_pgio_data *data = &hdr->data;
 	const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
 	struct pnfs_layout_segment *lseg = desc->pg_lseg;
 	enum pnfs_try_status trypnfs;
 
 	desc->pg_lseg = NULL;
-	trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
+	trypnfs = pnfs_try_to_write_data(hdr, call_ops, lseg, how);
 	if (trypnfs == PNFS_NOT_ATTEMPTED)
-		pnfs_write_through_mds(desc, data);
+		pnfs_write_through_mds(desc, hdr);
 	pnfs_put_lseg(lseg);
 }
 
@@ -1650,17 +1643,15 @@ int pnfs_read_done_resend_to_mds(struct inode *inode,
 }
 EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds);
 
-static void pnfs_ld_handle_read_error(struct nfs_pgio_data *data)
+static void pnfs_ld_handle_read_error(struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_header *hdr = data->header;
-
 	dprintk("pnfs read error = %d\n", hdr->pnfs_error);
 	if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
 	    PNFS_LAYOUTRET_ON_ERROR) {
 		pnfs_return_layout(hdr->inode);
 	}
 	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
-		data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode,
+		hdr->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode,
 							&hdr->pages,
 							hdr->completion_ops,
 							hdr->dreq);
@@ -1669,43 +1660,38 @@ static void pnfs_ld_handle_read_error(struct nfs_pgio_data *data)
 /*
  * Called by non rpc-based layout drivers
  */
-void pnfs_ld_read_done(struct nfs_pgio_data *data)
+void pnfs_ld_read_done(struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_header *hdr = data->header;
-
-	trace_nfs4_pnfs_read(data, hdr->pnfs_error);
+	trace_nfs4_pnfs_read(hdr, hdr->pnfs_error);
 	if (likely(!hdr->pnfs_error)) {
-		__nfs4_read_done_cb(data);
-		hdr->mds_ops->rpc_call_done(&data->task, data);
+		__nfs4_read_done_cb(hdr);
+		hdr->mds_ops->rpc_call_done(&hdr->task, hdr);
 	} else
-		pnfs_ld_handle_read_error(data);
-	hdr->mds_ops->rpc_release(data);
+		pnfs_ld_handle_read_error(hdr);
+	hdr->mds_ops->rpc_release(hdr);
 }
 EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
 
 static void
 pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
-		struct nfs_pgio_data *data)
+		struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_header *hdr = data->header;
-
 	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
 		list_splice_tail_init(&hdr->pages, &desc->pg_list);
 		nfs_pageio_reset_read_mds(desc);
 		desc->pg_recoalesce = 1;
 	}
-	nfs_pgio_data_destroy(data);
+	nfs_pgio_data_destroy(hdr);
 }
 
 /*
  * Call the appropriate parallel I/O subsystem read function.
  */
 static enum pnfs_try_status
-pnfs_try_to_read_data(struct nfs_pgio_data *rdata,
+pnfs_try_to_read_data(struct nfs_pgio_header *hdr,
 		       const struct rpc_call_ops *call_ops,
 		       struct pnfs_layout_segment *lseg)
 {
-	struct nfs_pgio_header *hdr = rdata->header;
 	struct inode *inode = hdr->inode;
 	struct nfs_server *nfss = NFS_SERVER(inode);
 	enum pnfs_try_status trypnfs;
@@ -1713,9 +1699,9 @@ pnfs_try_to_read_data(struct nfs_pgio_data *rdata,
 	hdr->mds_ops = call_ops;
 
 	dprintk("%s: Reading ino:%lu %u@%llu\n",
-		__func__, inode->i_ino, rdata->args.count, rdata->args.offset);
+		__func__, inode->i_ino, hdr->args.count, hdr->args.offset);
 
-	trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata);
+	trypnfs = nfss->pnfs_curr_ld->read_pagelist(hdr);
 	if (trypnfs != PNFS_NOT_ATTEMPTED)
 		nfs_inc_stats(inode, NFSIOS_PNFS_READ);
 	dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
@@ -1725,15 +1711,14 @@ pnfs_try_to_read_data(struct nfs_pgio_data *rdata,
 static void
 pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_data *data = &hdr->data;
 	const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
 	struct pnfs_layout_segment *lseg = desc->pg_lseg;
 	enum pnfs_try_status trypnfs;
 
 	desc->pg_lseg = NULL;
-	trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
+	trypnfs = pnfs_try_to_read_data(hdr, call_ops, lseg);
 	if (trypnfs == PNFS_NOT_ATTEMPTED)
-		pnfs_read_through_mds(desc, data);
+		pnfs_read_through_mds(desc, hdr);
 	pnfs_put_lseg(lseg);
 }
 
@@ -1816,12 +1801,11 @@ void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
 EXPORT_SYMBOL_GPL(pnfs_set_lo_fail);
 
 void
-pnfs_set_layoutcommit(struct nfs_pgio_data *wdata)
+pnfs_set_layoutcommit(struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_header *hdr = wdata->header;
 	struct inode *inode = hdr->inode;
 	struct nfs_inode *nfsi = NFS_I(inode);
-	loff_t end_pos = wdata->mds_offset + wdata->res.count;
+	loff_t end_pos = hdr->mds_offset + hdr->res.count;
 	bool mark_as_dirty = false;
 
 	spin_lock(&inode->i_lock);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 4fb309a2b4c4..a4a58be94064 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -113,8 +113,8 @@ struct pnfs_layoutdriver_type {
 	 * Return PNFS_ATTEMPTED to indicate the layout code has attempted
 	 * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS
 	 */
-	enum pnfs_try_status (*read_pagelist) (struct nfs_pgio_data *nfs_data);
-	enum pnfs_try_status (*write_pagelist) (struct nfs_pgio_data *nfs_data, int how);
+	enum pnfs_try_status (*read_pagelist)(struct nfs_pgio_header *);
+	enum pnfs_try_status (*write_pagelist)(struct nfs_pgio_header *, int);
 
 	void (*free_deviceid_node) (struct nfs4_deviceid_node *);
 
@@ -213,13 +213,13 @@ bool pnfs_roc(struct inode *ino);
 void pnfs_roc_release(struct inode *ino);
 void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
 bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task);
-void pnfs_set_layoutcommit(struct nfs_pgio_data *wdata);
+void pnfs_set_layoutcommit(struct nfs_pgio_header *);
 void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
 int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
 int _pnfs_return_layout(struct inode *);
 int pnfs_commit_and_return_layout(struct inode *);
-void pnfs_ld_write_done(struct nfs_pgio_data *);
-void pnfs_ld_read_done(struct nfs_pgio_data *);
+void pnfs_ld_write_done(struct nfs_pgio_header *);
+void pnfs_ld_read_done(struct nfs_pgio_header *);
 struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
 					       struct nfs_open_context *ctx,
 					       loff_t pos,
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index c171ce1a8a30..b09cc23d6f43 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -578,46 +578,49 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
 	return 0;
 }
 
-static int nfs_read_done(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
 {
-	struct inode *inode = data->header->inode;
+	struct inode *inode = hdr->inode;
 
 	nfs_invalidate_atime(inode);
 	if (task->tk_status >= 0) {
-		nfs_refresh_inode(inode, data->res.fattr);
+		nfs_refresh_inode(inode, hdr->res.fattr);
 		/* Emulate the eof flag, which isn't normally needed in NFSv2
 		 * as it is guaranteed to always return the file attributes
 		 */
-		if (data->args.offset + data->res.count >= data->res.fattr->size)
-			data->res.eof = 1;
+		if (hdr->args.offset + hdr->res.count >= hdr->res.fattr->size)
+			hdr->res.eof = 1;
 	}
 	return 0;
 }
 
-static void nfs_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
+static void nfs_proc_read_setup(struct nfs_pgio_header *hdr,
+				struct rpc_message *msg)
 {
 	msg->rpc_proc = &nfs_procedures[NFSPROC_READ];
 }
 
-static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task,
+				     struct nfs_pgio_header *hdr)
 {
 	rpc_call_start(task);
 	return 0;
 }
 
-static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
 {
-	struct inode *inode = data->header->inode;
+	struct inode *inode = hdr->inode;
 
 	if (task->tk_status >= 0)
-		nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
+		nfs_post_op_update_inode_force_wcc(inode, hdr->res.fattr);
 	return 0;
 }
 
-static void nfs_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
+static void nfs_proc_write_setup(struct nfs_pgio_header *hdr,
+				 struct rpc_message *msg)
 {
 	/* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */
-	data->args.stable = NFS_FILE_SYNC;
+	hdr->args.stable = NFS_FILE_SYNC;
 	msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE];
 }
 
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index d9df4ab3737b..b1532b73fea3 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -172,14 +172,15 @@ out:
 	hdr->release(hdr);
 }
 
-static void nfs_initiate_read(struct nfs_pgio_data *data, struct rpc_message *msg,
+static void nfs_initiate_read(struct nfs_pgio_header *hdr,
+			      struct rpc_message *msg,
 			      struct rpc_task_setup *task_setup_data, int how)
 {
-	struct inode *inode = data->header->inode;
+	struct inode *inode = hdr->inode;
 	int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
 
 	task_setup_data->flags |= swap_flags;
-	NFS_PROTO(inode)->read_setup(data, msg);
+	NFS_PROTO(inode)->read_setup(hdr, msg);
 }
 
 static void
@@ -203,14 +204,15 @@ static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = {
  * This is the callback from RPC telling us whether a reply was
  * received or some error occurred (timeout or socket shutdown).
  */
-static int nfs_readpage_done(struct rpc_task *task, struct nfs_pgio_data *data,
+static int nfs_readpage_done(struct rpc_task *task,
+			     struct nfs_pgio_header *hdr,
 			     struct inode *inode)
 {
-	int status = NFS_PROTO(inode)->read_done(task, data);
+	int status = NFS_PROTO(inode)->read_done(task, hdr);
 	if (status != 0)
 		return status;
 
-	nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, data->res.count);
+	nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, hdr->res.count);
 
 	if (task->tk_status == -ESTALE) {
 		set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
@@ -219,34 +221,34 @@ static int nfs_readpage_done(struct rpc_task *task, struct nfs_pgio_data *data,
 	return 0;
 }
 
-static void nfs_readpage_retry(struct rpc_task *task, struct nfs_pgio_data *data)
+static void nfs_readpage_retry(struct rpc_task *task,
+			       struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_args *argp = &data->args;
-	struct nfs_pgio_res  *resp = &data->res;
+	struct nfs_pgio_args *argp = &hdr->args;
+	struct nfs_pgio_res  *resp = &hdr->res;
 
 	/* This is a short read! */
-	nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD);
+	nfs_inc_stats(hdr->inode, NFSIOS_SHORTREAD);
 	/* Has the server at least made some progress? */
 	if (resp->count == 0) {
-		nfs_set_pgio_error(data->header, -EIO, argp->offset);
+		nfs_set_pgio_error(hdr, -EIO, argp->offset);
 		return;
 	}
-	/* Yes, so retry the read at the end of the data */
-	data->mds_offset += resp->count;
+	/* Yes, so retry the read at the end of the hdr */
+	hdr->mds_offset += resp->count;
 	argp->offset += resp->count;
 	argp->pgbase += resp->count;
 	argp->count -= resp->count;
 	rpc_restart_call_prepare(task);
 }
 
-static void nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *data)
+static void nfs_readpage_result(struct rpc_task *task,
+				struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_header *hdr = data->header;
-
-	if (data->res.eof) {
+	if (hdr->res.eof) {
 		loff_t bound;
 
-		bound = data->args.offset + data->res.count;
+		bound = hdr->args.offset + hdr->res.count;
 		spin_lock(&hdr->lock);
 		if (bound < hdr->io_start + hdr->good_bytes) {
 			set_bit(NFS_IOHDR_EOF, &hdr->flags);
@@ -254,8 +256,8 @@ static void nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *dat
 			hdr->good_bytes = bound - hdr->io_start;
 		}
 		spin_unlock(&hdr->lock);
-	} else if (data->res.count != data->args.count)
-		nfs_readpage_retry(task, data);
+	} else if (hdr->res.count != hdr->args.count)
+		nfs_readpage_retry(task, hdr);
 }
 
 /*
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 6afe0f679420..6a2d0986a3a3 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -596,11 +596,11 @@ nfs_clear_request_commit(struct nfs_page *req)
 }
 
 static inline
-int nfs_write_need_commit(struct nfs_pgio_data *data)
+int nfs_write_need_commit(struct nfs_pgio_header *hdr)
 {
-	if (data->writeverf.committed == NFS_DATA_SYNC)
-		return data->header->lseg == NULL;
-	return data->writeverf.committed != NFS_FILE_SYNC;
+	if (hdr->writeverf.committed == NFS_DATA_SYNC)
+		return hdr->lseg == NULL;
+	return hdr->writeverf.committed != NFS_FILE_SYNC;
 }
 
 #else
@@ -627,7 +627,7 @@ nfs_clear_request_commit(struct nfs_page *req)
 }
 
 static inline
-int nfs_write_need_commit(struct nfs_pgio_data *data)
+int nfs_write_need_commit(struct nfs_pgio_header *hdr)
 {
 	return 0;
 }
@@ -1013,17 +1013,18 @@ static int flush_task_priority(int how)
 	return RPC_PRIORITY_NORMAL;
 }
 
-static void nfs_initiate_write(struct nfs_pgio_data *data, struct rpc_message *msg,
+static void nfs_initiate_write(struct nfs_pgio_header *hdr,
+			       struct rpc_message *msg,
 			       struct rpc_task_setup *task_setup_data, int how)
 {
-	struct inode *inode = data->header->inode;
+	struct inode *inode = hdr->inode;
 	int priority = flush_task_priority(how);
 
 	task_setup_data->priority = priority;
-	NFS_PROTO(inode)->write_setup(data, msg);
+	NFS_PROTO(inode)->write_setup(hdr, msg);
 
 	nfs4_state_protect_write(NFS_SERVER(inode)->nfs_client,
-				 &task_setup_data->rpc_client, msg, data);
+				 &task_setup_data->rpc_client, msg, hdr);
 }
 
 /* If a nfs_flush_* function fails, it should remove reqs from @head and
@@ -1085,19 +1086,17 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata)
 	NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
 }
 
-static void nfs_writeback_release_common(struct nfs_pgio_data *data)
+static void nfs_writeback_release_common(struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_header *hdr = data->header;
-	int status = data->task.tk_status;
+	int status = hdr->task.tk_status;
 
-	if ((status >= 0) && nfs_write_need_commit(data)) {
+	if ((status >= 0) && nfs_write_need_commit(hdr)) {
 		spin_lock(&hdr->lock);
 		if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags))
 			; /* Do nothing */
 		else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags))
-			memcpy(&hdr->verf, &data->writeverf, sizeof(hdr->verf));
-		else if (memcmp(&hdr->verf, &data->writeverf,
-			 sizeof(hdr->verf)))
+			memcpy(&hdr->verf, &hdr->writeverf, sizeof(hdr->verf));
+		else if (memcmp(&hdr->verf, &hdr->writeverf, sizeof(hdr->verf)))
 			set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags);
 		spin_unlock(&hdr->lock);
 	}
@@ -1131,7 +1130,8 @@ static int nfs_should_remove_suid(const struct inode *inode)
 /*
  * This function is called when the WRITE call is complete.
  */
-static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data,
+static int nfs_writeback_done(struct rpc_task *task,
+			      struct nfs_pgio_header *hdr,
 			      struct inode *inode)
 {
 	int status;
@@ -1143,13 +1143,14 @@ static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data,
 	 * another writer had changed the file, but some applications
 	 * depend on tighter cache coherency when writing.
 	 */
-	status = NFS_PROTO(inode)->write_done(task, data);
+	status = NFS_PROTO(inode)->write_done(task, hdr);
 	if (status != 0)
 		return status;
-	nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, data->res.count);
+	nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, hdr->res.count);
 
 #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
-	if (data->res.verf->committed < data->args.stable && task->tk_status >= 0) {
+	if (hdr->res.verf->committed < hdr->args.stable &&
+	    task->tk_status >= 0) {
 		/* We tried a write call, but the server did not
 		 * commit data to stable storage even though we
 		 * requested it.
@@ -1165,7 +1166,7 @@ static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data,
 			dprintk("NFS:       faulty NFS server %s:"
 				" (committed = %d) != (stable = %d)\n",
 				NFS_SERVER(inode)->nfs_client->cl_hostname,
-				data->res.verf->committed, data->args.stable);
+				hdr->res.verf->committed, hdr->args.stable);
 			complain = jiffies + 300 * HZ;
 		}
 	}
@@ -1180,16 +1181,17 @@ static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data,
 /*
  * This function is called when the WRITE call is complete.
  */
-static void nfs_writeback_result(struct rpc_task *task, struct nfs_pgio_data *data)
+static void nfs_writeback_result(struct rpc_task *task,
+				 struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_args	*argp = &data->args;
-	struct nfs_pgio_res	*resp = &data->res;
+	struct nfs_pgio_args	*argp = &hdr->args;
+	struct nfs_pgio_res	*resp = &hdr->res;
 
 	if (resp->count < argp->count) {
 		static unsigned long    complain;
 
 		/* This a short write! */
-		nfs_inc_stats(data->header->inode, NFSIOS_SHORTWRITE);
+		nfs_inc_stats(hdr->inode, NFSIOS_SHORTWRITE);
 
 		/* Has the server at least made some progress? */
 		if (resp->count == 0) {
@@ -1199,14 +1201,14 @@ static void nfs_writeback_result(struct rpc_task *task, struct nfs_pgio_data *da
 				       argp->count);
 				complain = jiffies + 300 * HZ;
 			}
-			nfs_set_pgio_error(data->header, -EIO, argp->offset);
+			nfs_set_pgio_error(hdr, -EIO, argp->offset);
 			task->tk_status = -EIO;
 			return;
 		}
 		/* Was this an NFSv2 write or an NFSv3 stable write? */
 		if (resp->verf->committed != NFS_UNSTABLE) {
 			/* Resend from where the server left off */
-			data->mds_offset += resp->count;
+			hdr->mds_offset += resp->count;
 			argp->offset += resp->count;
 			argp->pgbase += resp->count;
 			argp->count -= resp->count;
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 43592651cd5a..d0fae7b78252 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -64,10 +64,11 @@ struct nfs_rw_ops {
 	const fmode_t rw_mode;
 	struct nfs_pgio_header *(*rw_alloc_header)(void);
 	void (*rw_free_header)(struct nfs_pgio_header *);
-	void (*rw_release)(struct nfs_pgio_data *);
-	int  (*rw_done)(struct rpc_task *, struct nfs_pgio_data *, struct inode *);
-	void (*rw_result)(struct rpc_task *, struct nfs_pgio_data *);
-	void (*rw_initiate)(struct nfs_pgio_data *, struct rpc_message *,
+	void (*rw_release)(struct nfs_pgio_header *);
+	int  (*rw_done)(struct rpc_task *, struct nfs_pgio_header *,
+			struct inode *);
+	void (*rw_result)(struct rpc_task *, struct nfs_pgio_header *);
+	void (*rw_initiate)(struct nfs_pgio_header *, struct rpc_message *,
 			    struct rpc_task_setup *, int);
 };
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index bb18dba1aefe..efeaf7690b51 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1257,27 +1257,10 @@ enum {
 	NFS_IOHDR_NEED_RESCHED,
 };
 
-struct nfs_pgio_data {
-	struct nfs_pgio_header	*header;
-	struct list_head	list;
-	struct rpc_task		task;
-	struct nfs_fattr	fattr;
-	struct nfs_writeverf	writeverf;	/* Used for writes */
-	struct nfs_pgio_args	args;		/* argument struct */
-	struct nfs_pgio_res	res;		/* result struct */
-	unsigned long		timestamp;	/* For lease renewal */
-	int (*pgio_done_cb)(struct rpc_task *task, struct nfs_pgio_data *data);
-	__u64			mds_offset;	/* Filelayout dense stripe */
-	struct nfs_page_array	page_array;
-	struct nfs_client	*ds_clp;	/* pNFS data server */
-	int			ds_idx;		/* ds index if ds_clp is set */
-};
-
 struct nfs_pgio_header {
 	struct inode		*inode;
 	struct rpc_cred		*cred;
 	struct list_head	pages;
-	struct nfs_pgio_data	data;
 	atomic_t		refcnt;
 	struct nfs_page		*req;
 	struct nfs_writeverf	verf;		/* Used for writes */
@@ -1295,6 +1278,21 @@ struct nfs_pgio_header {
 	int			error;		/* merge with pnfs_error */
 	unsigned long		good_bytes;	/* boundary of good data */
 	unsigned long		flags;
+
+	/*
+	 * rpc data
+	 */
+	struct rpc_task		task;
+	struct nfs_fattr	fattr;
+	struct nfs_writeverf	writeverf;	/* Used for writes */
+	struct nfs_pgio_args	args;		/* argument struct */
+	struct nfs_pgio_res	res;		/* result struct */
+	unsigned long		timestamp;	/* For lease renewal */
+	int (*pgio_done_cb)(struct rpc_task *, struct nfs_pgio_header *);
+	__u64			mds_offset;	/* Filelayout dense stripe */
+	struct nfs_page_array	page_array;
+	struct nfs_client	*ds_clp;	/* pNFS data server */
+	int			ds_idx;		/* ds index if ds_clp is set */
 };
 
 struct nfs_mds_commit_info {
@@ -1426,11 +1424,12 @@ struct nfs_rpc_ops {
 			     struct nfs_pathconf *);
 	int	(*set_capabilities)(struct nfs_server *, struct nfs_fh *);
 	int	(*decode_dirent)(struct xdr_stream *, struct nfs_entry *, int);
-	int	(*pgio_rpc_prepare)(struct rpc_task *, struct nfs_pgio_data *);
-	void	(*read_setup)   (struct nfs_pgio_data *, struct rpc_message *);
-	int	(*read_done)  (struct rpc_task *, struct nfs_pgio_data *);
-	void	(*write_setup)  (struct nfs_pgio_data *, struct rpc_message *);
-	int	(*write_done)  (struct rpc_task *, struct nfs_pgio_data *);
+	int	(*pgio_rpc_prepare)(struct rpc_task *,
+				    struct nfs_pgio_header *);
+	void	(*read_setup)(struct nfs_pgio_header *, struct rpc_message *);
+	int	(*read_done)(struct rpc_task *, struct nfs_pgio_header *);
+	void	(*write_setup)(struct nfs_pgio_header *, struct rpc_message *);
+	int	(*write_done)(struct rpc_task *, struct nfs_pgio_header *);
 	void	(*commit_setup) (struct nfs_commit_data *, struct rpc_message *);
 	void	(*commit_rpc_prepare)(struct rpc_task *, struct nfs_commit_data *);
 	int	(*commit_done) (struct rpc_task *, struct nfs_commit_data *);
-- 
cgit v1.2.3-59-g8ed1b


From c65e6254ca4db1584c5bf5f228ee26556477a9fd Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Mon, 9 Jun 2014 11:48:36 -0400
Subject: nfs: remove unused writeverf code

Remove duplicate writeverf structure from merge of nfs_pgio_header and
nfs_pgio_data and remove writeverf related flags and logic to handle
more than one RPC per nfs_pgio_header.

Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/blocklayout/blocklayout.c |  2 +-
 fs/nfs/direct.c                  | 25 ++++++++-----------------
 fs/nfs/internal.h                |  1 +
 fs/nfs/objlayout/objlayout.c     |  4 ++--
 fs/nfs/pagelist.c                |  2 +-
 fs/nfs/write.c                   | 27 +++++----------------------
 include/linux/nfs_xdr.h          |  3 ---
 7 files changed, 18 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index c3ccfe440390..04ac32b339f8 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -445,7 +445,7 @@ static void bl_end_par_io_write(void *data, int num_se)
 	}
 
 	hdr->task.tk_status = hdr->pnfs_error;
-	hdr->writeverf.committed = NFS_FILE_SYNC;
+	hdr->verf.committed = NFS_FILE_SYNC;
 	INIT_WORK(&hdr->task.u.tk_work, bl_write_cleanup);
 	schedule_work(&hdr->task.u.tk_work);
 }
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 6c4c867ee04c..2a3293a5dda0 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -715,7 +715,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
 {
 	struct nfs_direct_req *dreq = hdr->dreq;
 	struct nfs_commit_info cinfo;
-	int bit = -1;
+	bool request_commit = false;
 	struct nfs_page *req = nfs_list_entry(hdr->pages.next);
 
 	if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
@@ -729,27 +729,20 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
 		dreq->flags = 0;
 		dreq->error = hdr->error;
 	}
-	if (dreq->error != 0)
-		bit = NFS_IOHDR_ERROR;
-	else {
+	if (dreq->error == 0) {
 		dreq->count += hdr->good_bytes;
-		if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) {
-			dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
-			bit = NFS_IOHDR_NEED_RESCHED;
-		} else if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
+		if (nfs_write_need_commit(hdr)) {
 			if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
-				bit = NFS_IOHDR_NEED_RESCHED;
+				request_commit = true;
 			else if (dreq->flags == 0) {
 				nfs_direct_set_hdr_verf(dreq, hdr);
-				bit = NFS_IOHDR_NEED_COMMIT;
+				request_commit = true;
 				dreq->flags = NFS_ODIRECT_DO_COMMIT;
 			} else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) {
-				if (nfs_direct_set_or_cmp_hdr_verf(dreq, hdr)) {
+				request_commit = true;
+				if (nfs_direct_set_or_cmp_hdr_verf(dreq, hdr))
 					dreq->flags =
 						NFS_ODIRECT_RESCHED_WRITES;
-					bit = NFS_IOHDR_NEED_RESCHED;
-				} else
-					bit = NFS_IOHDR_NEED_COMMIT;
 			}
 		}
 	}
@@ -760,9 +753,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
 
 		req = nfs_list_entry(hdr->pages.next);
 		nfs_list_remove_request(req);
-		switch (bit) {
-		case NFS_IOHDR_NEED_RESCHED:
-		case NFS_IOHDR_NEED_COMMIT:
+		if (request_commit) {
 			kref_get(&req->wb_kref);
 			nfs_mark_request_commit(req, hdr->lseg, &cinfo);
 			do_destroy = false;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 3f3aedd2e8c9..da36257628c5 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -441,6 +441,7 @@ int nfs_scan_commit(struct inode *inode, struct list_head *dst,
 void nfs_mark_request_commit(struct nfs_page *req,
 			     struct pnfs_layout_segment *lseg,
 			     struct nfs_commit_info *cinfo);
+int nfs_write_need_commit(struct nfs_pgio_header *);
 int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
 			    int how, struct nfs_commit_info *cinfo);
 void nfs_retry_commit(struct list_head *page_list,
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index 86312787cee6..697a16d11fac 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -328,7 +328,7 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
 	oir->status = hdr->task.tk_status = status;
 	if (status >= 0) {
 		hdr->res.count = status;
-		hdr->writeverf.committed = oir->committed;
+		hdr->verf.committed = oir->committed;
 	} else {
 		hdr->pnfs_error = status;
 	}
@@ -336,7 +336,7 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
 	/* must not use oir after this point */
 
 	dprintk("%s: Return status %zd committed %d sync=%d\n", __func__,
-		status, hdr->writeverf.committed, sync);
+		status, hdr->verf.committed, sync);
 
 	if (sync)
 		pnfs_ld_write_done(hdr);
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index ecb3d4cdbc85..7dd0d5f101a4 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -545,7 +545,7 @@ static void nfs_pgio_rpcsetup(struct nfs_pgio_header *hdr,
 	hdr->res.fattr   = &hdr->fattr;
 	hdr->res.count   = count;
 	hdr->res.eof     = 0;
-	hdr->res.verf    = &hdr->writeverf;
+	hdr->res.verf    = &hdr->verf;
 	nfs_fattr_init(&hdr->fattr);
 }
 
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 6a2d0986a3a3..8534ee5c207d 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -595,12 +595,11 @@ nfs_clear_request_commit(struct nfs_page *req)
 	}
 }
 
-static inline
 int nfs_write_need_commit(struct nfs_pgio_header *hdr)
 {
-	if (hdr->writeverf.committed == NFS_DATA_SYNC)
+	if (hdr->verf.committed == NFS_DATA_SYNC)
 		return hdr->lseg == NULL;
-	return hdr->writeverf.committed != NFS_FILE_SYNC;
+	return hdr->verf.committed != NFS_FILE_SYNC;
 }
 
 #else
@@ -626,7 +625,6 @@ nfs_clear_request_commit(struct nfs_page *req)
 {
 }
 
-static inline
 int nfs_write_need_commit(struct nfs_pgio_header *hdr)
 {
 	return 0;
@@ -654,11 +652,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
 			nfs_context_set_write_error(req->wb_context, hdr->error);
 			goto remove_req;
 		}
-		if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) {
-			nfs_mark_request_dirty(req);
-			goto next;
-		}
-		if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
+		if (nfs_write_need_commit(hdr)) {
 			memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf));
 			nfs_mark_request_commit(req, hdr->lseg, &cinfo);
 			goto next;
@@ -668,7 +662,7 @@ remove_req:
 next:
 		nfs_unlock_request(req);
 		nfs_end_page_writeback(req);
-		do_destroy = !test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags);
+		do_destroy = !nfs_write_need_commit(hdr);
 		nfs_release_request(req);
 	}
 out:
@@ -1088,18 +1082,7 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata)
 
 static void nfs_writeback_release_common(struct nfs_pgio_header *hdr)
 {
-	int status = hdr->task.tk_status;
-
-	if ((status >= 0) && nfs_write_need_commit(hdr)) {
-		spin_lock(&hdr->lock);
-		if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags))
-			; /* Do nothing */
-		else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags))
-			memcpy(&hdr->verf, &hdr->writeverf, sizeof(hdr->verf));
-		else if (memcmp(&hdr->verf, &hdr->writeverf, sizeof(hdr->verf)))
-			set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags);
-		spin_unlock(&hdr->lock);
-	}
+	/* do nothing! */
 }
 
 /*
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index efeaf7690b51..e1b7b3b7c40f 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1253,8 +1253,6 @@ enum {
 	NFS_IOHDR_ERROR = 0,
 	NFS_IOHDR_EOF,
 	NFS_IOHDR_REDO,
-	NFS_IOHDR_NEED_COMMIT,
-	NFS_IOHDR_NEED_RESCHED,
 };
 
 struct nfs_pgio_header {
@@ -1284,7 +1282,6 @@ struct nfs_pgio_header {
 	 */
 	struct rpc_task		task;
 	struct nfs_fattr	fattr;
-	struct nfs_writeverf	writeverf;	/* Used for writes */
 	struct nfs_pgio_args	args;		/* argument struct */
 	struct nfs_pgio_res	res;		/* result struct */
 	unsigned long		timestamp;	/* For lease renewal */
-- 
cgit v1.2.3-59-g8ed1b


From 4714fb51fd03a14d8c73001438283e7f7b752f1e Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Mon, 9 Jun 2014 11:48:37 -0400
Subject: nfs: remove pgio_header refcount, related cleanup

The refcounting on nfs_pgio_header was related to there being (possibly)
more than one nfs_pgio_data. Now that nfs_pgio_data has been merged into
nfs_pgio_header, there is no reason to do this ref counting.  Just call
the completion callback on nfs_pgio_release/nfs_pgio_error.

Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/pagelist.c       | 36 +++++++++++-------------------------
 fs/nfs/pnfs.c           |  6 ------
 include/linux/nfs_xdr.h |  1 -
 3 files changed, 11 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 7dd0d5f101a4..580fc0c982e6 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -459,7 +459,6 @@ struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *ops)
 	if (hdr) {
 		INIT_LIST_HEAD(&hdr->pages);
 		spin_lock_init(&hdr->lock);
-		atomic_set(&hdr->refcnt, 0);
 		hdr->rw_ops = ops;
 	}
 	return hdr;
@@ -477,31 +476,18 @@ void nfs_pgio_header_free(struct nfs_pgio_header *hdr)
 EXPORT_SYMBOL_GPL(nfs_pgio_header_free);
 
 /**
- * nfs_pgio_data_alloc - Allocate pageio data
- * @hdr: The header making a request
- * @pagecount: Number of pages to create
- */
-static bool nfs_pgio_data_init(struct nfs_pgio_header *hdr,
-			       unsigned int pagecount)
-{
-	if (nfs_pgarray_set(&hdr->page_array, pagecount)) {
-		atomic_inc(&hdr->refcnt);
-		return true;
-	}
-	return false;
-}
-
-/**
- * nfs_pgio_data_destroy - Properly release pageio data
- * @hdr: The header with data to destroy
+ * nfs_pgio_data_destroy - make @hdr suitable for reuse
+ *
+ * Frees memory and releases refs from nfs_generic_pgio, so that it may
+ * be called again.
+ *
+ * @hdr: A header that has had nfs_generic_pgio called
  */
 void nfs_pgio_data_destroy(struct nfs_pgio_header *hdr)
 {
 	put_nfs_open_context(hdr->args.context);
 	if (hdr->page_array.pagevec != hdr->page_array.page_array)
 		kfree(hdr->page_array.pagevec);
-	if (atomic_dec_and_test(&hdr->refcnt))
-		hdr->completion_ops->completion(hdr);
 }
 EXPORT_SYMBOL_GPL(nfs_pgio_data_destroy);
 
@@ -620,6 +606,7 @@ static int nfs_pgio_error(struct nfs_pageio_descriptor *desc,
 {
 	set_bit(NFS_IOHDR_REDO, &hdr->flags);
 	nfs_pgio_data_destroy(hdr);
+	hdr->completion_ops->completion(hdr);
 	desc->pg_completion_ops->error_cleanup(&desc->pg_list);
 	return -ENOMEM;
 }
@@ -634,6 +621,7 @@ static void nfs_pgio_release(void *calldata)
 	if (hdr->rw_ops->rw_release)
 		hdr->rw_ops->rw_release(hdr);
 	nfs_pgio_data_destroy(hdr);
+	hdr->completion_ops->completion(hdr);
 }
 
 /**
@@ -707,9 +695,10 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
 	struct page		**pages;
 	struct list_head *head = &desc->pg_list;
 	struct nfs_commit_info cinfo;
+	unsigned int pagecount;
 
-	if (!nfs_pgio_data_init(hdr, nfs_page_array_len(desc->pg_base,
-			   desc->pg_count)))
+	pagecount = nfs_page_array_len(desc->pg_base, desc->pg_count);
+	if (!nfs_pgarray_set(&hdr->page_array, pagecount))
 		return nfs_pgio_error(desc, hdr);
 
 	nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
@@ -743,14 +732,11 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc)
 		return -ENOMEM;
 	}
 	nfs_pgheader_init(desc, hdr, nfs_pgio_header_free);
-	atomic_inc(&hdr->refcnt);
 	ret = nfs_generic_pgio(desc, hdr);
 	if (ret == 0)
 		ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode),
 					hdr, desc->pg_rpc_callops,
 					desc->pg_ioflags, 0);
-	if (atomic_dec_and_test(&hdr->refcnt))
-		hdr->completion_ops->completion(hdr);
 	return ret;
 }
 
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index ecc911347750..ecbed4632d11 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1602,15 +1602,12 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
 	}
 	nfs_pgheader_init(desc, hdr, pnfs_writehdr_free);
 	hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
-	atomic_inc(&hdr->refcnt);
 	ret = nfs_generic_pgio(desc, hdr);
 	if (ret != 0) {
 		pnfs_put_lseg(desc->pg_lseg);
 		desc->pg_lseg = NULL;
 	} else
 		pnfs_do_write(desc, hdr, desc->pg_ioflags);
-	if (atomic_dec_and_test(&hdr->refcnt))
-		hdr->completion_ops->completion(hdr);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
@@ -1745,15 +1742,12 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
 	}
 	nfs_pgheader_init(desc, hdr, pnfs_readhdr_free);
 	hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
-	atomic_inc(&hdr->refcnt);
 	ret = nfs_generic_pgio(desc, hdr);
 	if (ret != 0) {
 		pnfs_put_lseg(desc->pg_lseg);
 		desc->pg_lseg = NULL;
 	} else
 		pnfs_do_read(desc, hdr);
-	if (atomic_dec_and_test(&hdr->refcnt))
-		hdr->completion_ops->completion(hdr);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index e1b7b3b7c40f..81cbbf313272 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1259,7 +1259,6 @@ struct nfs_pgio_header {
 	struct inode		*inode;
 	struct rpc_cred		*cred;
 	struct list_head	pages;
-	atomic_t		refcnt;
 	struct nfs_page		*req;
 	struct nfs_writeverf	verf;		/* Used for writes */
 	struct pnfs_layout_segment *lseg;
-- 
cgit v1.2.3-59-g8ed1b


From 53113ad35e4b9ce82d949c7c67c7b666fad5d907 Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Mon, 9 Jun 2014 11:48:38 -0400
Subject: pnfs: clean up *_resend_to_mds

Clean up pnfs_read_done_resend_to_mds and pnfs_write_done_resend_to_mds:
 - instead of passing all arguments from a nfs_pgio_header, just pass the header
 - share the common code

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/filelayout/filelayout.c | 10 ++-----
 fs/nfs/pagelist.c              | 32 +++++++++++++++++++++
 fs/nfs/pnfs.c                  | 63 ++++++------------------------------------
 fs/nfs/pnfs.h                  |  8 ++----
 include/linux/nfs_page.h       |  2 ++
 5 files changed, 47 insertions(+), 68 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index 537e7f7a0b48..504d58a51d35 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -97,10 +97,7 @@ static void filelayout_reset_write(struct nfs_pgio_header *hdr)
 			hdr->args.count,
 			(unsigned long long)hdr->args.offset);
 
-		task->tk_status = pnfs_write_done_resend_to_mds(hdr->inode,
-							&hdr->pages,
-							hdr->completion_ops,
-							hdr->dreq);
+		task->tk_status = pnfs_write_done_resend_to_mds(hdr);
 	}
 }
 
@@ -117,10 +114,7 @@ static void filelayout_reset_read(struct nfs_pgio_header *hdr)
 			hdr->args.count,
 			(unsigned long long)hdr->args.offset);
 
-		task->tk_status = pnfs_read_done_resend_to_mds(hdr->inode,
-							&hdr->pages,
-							hdr->completion_ops,
-							hdr->dreq);
+		task->tk_status = pnfs_read_done_resend_to_mds(hdr);
 	}
 }
 
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 580fc0c982e6..9c6c55359394 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -949,6 +949,38 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
 }
 EXPORT_SYMBOL_GPL(nfs_pageio_add_request);
 
+/*
+ * nfs_pageio_resend - Transfer requests to new descriptor and resend
+ * @hdr - the pgio header to move request from
+ * @desc - the pageio descriptor to add requests to
+ *
+ * Try to move each request (nfs_page) from @hdr to @desc then attempt
+ * to send them.
+ *
+ * Returns 0 on success and < 0 on error.
+ */
+int nfs_pageio_resend(struct nfs_pageio_descriptor *desc,
+		      struct nfs_pgio_header *hdr)
+{
+	LIST_HEAD(failed);
+
+	desc->pg_dreq = hdr->dreq;
+	while (!list_empty(&hdr->pages)) {
+		struct nfs_page *req = nfs_list_entry(hdr->pages.next);
+
+		nfs_list_remove_request(req);
+		if (!nfs_pageio_add_request(desc, req))
+			nfs_list_add_request(req, &failed);
+	}
+	nfs_pageio_complete(desc);
+	if (!list_empty(&failed)) {
+		list_move(&failed, &hdr->pages);
+		return -EIO;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nfs_pageio_resend);
+
 /**
  * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor
  * @desc: pointer to io descriptor
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index ecbed4632d11..83ff8a05485a 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1470,35 +1470,14 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
 }
 EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
 
-int pnfs_write_done_resend_to_mds(struct inode *inode,
-				struct list_head *head,
-				const struct nfs_pgio_completion_ops *compl_ops,
-				struct nfs_direct_req *dreq)
+int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *hdr)
 {
 	struct nfs_pageio_descriptor pgio;
-	LIST_HEAD(failed);
 
 	/* Resend all requests through the MDS */
-	nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, true, compl_ops);
-	pgio.pg_dreq = dreq;
-	while (!list_empty(head)) {
-		struct nfs_page *req = nfs_list_entry(head->next);
-
-		nfs_list_remove_request(req);
-		if (!nfs_pageio_add_request(&pgio, req))
-			nfs_list_add_request(req, &failed);
-	}
-	nfs_pageio_complete(&pgio);
-
-	if (!list_empty(&failed)) {
-		/* For some reason our attempt to resend pages. Mark the
-		 * overall send request as having failed, and let
-		 * nfs_writeback_release_full deal with the error.
-		 */
-		list_move(&failed, head);
-		return -EIO;
-	}
-	return 0;
+	nfs_pageio_init_write(&pgio, hdr->inode, FLUSH_STABLE, true,
+			      hdr->completion_ops);
+	return nfs_pageio_resend(&pgio, hdr);
 }
 EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds);
 
@@ -1511,10 +1490,7 @@ static void pnfs_ld_handle_write_error(struct nfs_pgio_header *hdr)
 		pnfs_return_layout(hdr->inode);
 	}
 	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
-		hdr->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode,
-							&hdr->pages,
-							hdr->completion_ops,
-							hdr->dreq);
+		hdr->task.tk_status = pnfs_write_done_resend_to_mds(hdr);
 }
 
 /*
@@ -1612,31 +1588,13 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
 }
 EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
 
-int pnfs_read_done_resend_to_mds(struct inode *inode,
-				struct list_head *head,
-				const struct nfs_pgio_completion_ops *compl_ops,
-				struct nfs_direct_req *dreq)
+int pnfs_read_done_resend_to_mds(struct nfs_pgio_header *hdr)
 {
 	struct nfs_pageio_descriptor pgio;
-	LIST_HEAD(failed);
 
 	/* Resend all requests through the MDS */
-	nfs_pageio_init_read(&pgio, inode, true, compl_ops);
-	pgio.pg_dreq = dreq;
-	while (!list_empty(head)) {
-		struct nfs_page *req = nfs_list_entry(head->next);
-
-		nfs_list_remove_request(req);
-		if (!nfs_pageio_add_request(&pgio, req))
-			nfs_list_add_request(req, &failed);
-	}
-	nfs_pageio_complete(&pgio);
-
-	if (!list_empty(&failed)) {
-		list_move(&failed, head);
-		return -EIO;
-	}
-	return 0;
+	nfs_pageio_init_read(&pgio, hdr->inode, true, hdr->completion_ops);
+	return nfs_pageio_resend(&pgio, hdr);
 }
 EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds);
 
@@ -1648,10 +1606,7 @@ static void pnfs_ld_handle_read_error(struct nfs_pgio_header *hdr)
 		pnfs_return_layout(hdr->inode);
 	}
 	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
-		hdr->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode,
-							&hdr->pages,
-							hdr->completion_ops,
-							hdr->dreq);
+		hdr->task.tk_status = pnfs_read_done_resend_to_mds(hdr);
 }
 
 /*
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index a4a58be94064..27ddecd3847f 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -228,12 +228,8 @@ struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
 					       gfp_t gfp_flags);
 
 void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp);
-int pnfs_read_done_resend_to_mds(struct inode *inode, struct list_head *head,
-			const struct nfs_pgio_completion_ops *compl_ops,
-			struct nfs_direct_req *dreq);
-int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head,
-			const struct nfs_pgio_completion_ops *compl_ops,
-			struct nfs_direct_req *dreq);
+int pnfs_read_done_resend_to_mds(struct nfs_pgio_header *);
+int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *);
 struct nfs4_threshold *pnfs_mdsthreshold_alloc(void);
 
 /* nfs4_deviceid_flags */
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index d0fae7b78252..4b48548e700e 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -112,6 +112,8 @@ extern	void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
 			     int how);
 extern	int nfs_pageio_add_request(struct nfs_pageio_descriptor *,
 				   struct nfs_page *);
+extern  int nfs_pageio_resend(struct nfs_pageio_descriptor *,
+			      struct nfs_pgio_header *);
 extern	void nfs_pageio_complete(struct nfs_pageio_descriptor *desc);
 extern	void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *, pgoff_t);
 extern size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
-- 
cgit v1.2.3-59-g8ed1b


From 3760cd20402d4c131e1994c968ecb055fa0f74bc Mon Sep 17 00:00:00 2001
From: "Chen, Gong" <gong.chen@linux.intel.com>
Date: Wed, 11 Jun 2014 13:59:45 -0700
Subject: CPER: Adjust code flow of some functions

Some codes can be reorganzied as a common function for other usages.

Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 drivers/firmware/efi/cper.c | 155 ++++++++++++++++++++++++++++----------------
 include/linux/cper.h        |   9 +++
 2 files changed, 109 insertions(+), 55 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
index 1491dd4f08f9..ac33a9fed341 100644
--- a/drivers/firmware/efi/cper.c
+++ b/drivers/firmware/efi/cper.c
@@ -34,6 +34,9 @@
 #include <linux/aer.h>
 
 #define INDENT_SP	" "
+
+static char rcd_decode_str[CPER_REC_LEN];
+
 /*
  * CPER record ID need to be unique even after reboot, because record
  * ID is used as index for ERST storage, while CPER records from
@@ -50,18 +53,19 @@ u64 cper_next_record_id(void)
 }
 EXPORT_SYMBOL_GPL(cper_next_record_id);
 
-static const char *cper_severity_strs[] = {
+static const char * const severity_strs[] = {
 	"recoverable",
 	"fatal",
 	"corrected",
 	"info",
 };
 
-static const char *cper_severity_str(unsigned int severity)
+const char *cper_severity_str(unsigned int severity)
 {
-	return severity < ARRAY_SIZE(cper_severity_strs) ?
-		cper_severity_strs[severity] : "unknown";
+	return severity < ARRAY_SIZE(severity_strs) ?
+		severity_strs[severity] : "unknown";
 }
+EXPORT_SYMBOL_GPL(cper_severity_str);
 
 /*
  * cper_print_bits - print strings for set bits
@@ -100,32 +104,32 @@ void cper_print_bits(const char *pfx, unsigned int bits,
 		printk("%s\n", buf);
 }
 
-static const char * const cper_proc_type_strs[] = {
+static const char * const proc_type_strs[] = {
 	"IA32/X64",
 	"IA64",
 };
 
-static const char * const cper_proc_isa_strs[] = {
+static const char * const proc_isa_strs[] = {
 	"IA32",
 	"IA64",
 	"X64",
 };
 
-static const char * const cper_proc_error_type_strs[] = {
+static const char * const proc_error_type_strs[] = {
 	"cache error",
 	"TLB error",
 	"bus error",
 	"micro-architectural error",
 };
 
-static const char * const cper_proc_op_strs[] = {
+static const char * const proc_op_strs[] = {
 	"unknown or generic",
 	"data read",
 	"data write",
 	"instruction execution",
 };
 
-static const char * const cper_proc_flag_strs[] = {
+static const char * const proc_flag_strs[] = {
 	"restartable",
 	"precise IP",
 	"overflow",
@@ -137,26 +141,26 @@ static void cper_print_proc_generic(const char *pfx,
 {
 	if (proc->validation_bits & CPER_PROC_VALID_TYPE)
 		printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type,
-		       proc->proc_type < ARRAY_SIZE(cper_proc_type_strs) ?
-		       cper_proc_type_strs[proc->proc_type] : "unknown");
+		       proc->proc_type < ARRAY_SIZE(proc_type_strs) ?
+		       proc_type_strs[proc->proc_type] : "unknown");
 	if (proc->validation_bits & CPER_PROC_VALID_ISA)
 		printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa,
-		       proc->proc_isa < ARRAY_SIZE(cper_proc_isa_strs) ?
-		       cper_proc_isa_strs[proc->proc_isa] : "unknown");
+		       proc->proc_isa < ARRAY_SIZE(proc_isa_strs) ?
+		       proc_isa_strs[proc->proc_isa] : "unknown");
 	if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) {
 		printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type);
 		cper_print_bits(pfx, proc->proc_error_type,
-				cper_proc_error_type_strs,
-				ARRAY_SIZE(cper_proc_error_type_strs));
+				proc_error_type_strs,
+				ARRAY_SIZE(proc_error_type_strs));
 	}
 	if (proc->validation_bits & CPER_PROC_VALID_OPERATION)
 		printk("%s""operation: %d, %s\n", pfx, proc->operation,
-		       proc->operation < ARRAY_SIZE(cper_proc_op_strs) ?
-		       cper_proc_op_strs[proc->operation] : "unknown");
+		       proc->operation < ARRAY_SIZE(proc_op_strs) ?
+		       proc_op_strs[proc->operation] : "unknown");
 	if (proc->validation_bits & CPER_PROC_VALID_FLAGS) {
 		printk("%s""flags: 0x%02x\n", pfx, proc->flags);
-		cper_print_bits(pfx, proc->flags, cper_proc_flag_strs,
-				ARRAY_SIZE(cper_proc_flag_strs));
+		cper_print_bits(pfx, proc->flags, proc_flag_strs,
+				ARRAY_SIZE(proc_flag_strs));
 	}
 	if (proc->validation_bits & CPER_PROC_VALID_LEVEL)
 		printk("%s""level: %d\n", pfx, proc->level);
@@ -177,7 +181,7 @@ static void cper_print_proc_generic(const char *pfx,
 		printk("%s""IP: 0x%016llx\n", pfx, proc->ip);
 }
 
-static const char *cper_mem_err_type_strs[] = {
+static const char * const mem_err_type_strs[] = {
 	"unknown",
 	"no error",
 	"single-bit ECC",
@@ -196,58 +200,99 @@ static const char *cper_mem_err_type_strs[] = {
 	"physical memory map-out event",
 };
 
-static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
+const char *cper_mem_err_type_str(unsigned int etype)
 {
-	if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
-		printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
-	if (mem->validation_bits & CPER_MEM_VALID_PA)
-		printk("%s""physical_address: 0x%016llx\n",
-		       pfx, mem->physical_addr);
-	if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
-		printk("%s""physical_address_mask: 0x%016llx\n",
-		       pfx, mem->physical_addr_mask);
+	return etype < ARRAY_SIZE(mem_err_type_strs) ?
+		mem_err_type_strs[etype] : "unknown";
+}
+EXPORT_SYMBOL_GPL(cper_mem_err_type_str);
+
+static int cper_mem_err_location(const struct cper_sec_mem_err *mem, char *msg)
+{
+	u32 len, n;
+
+	if (!msg)
+		return 0;
+
+	n = 0;
+	len = CPER_REC_LEN - 1;
 	if (mem->validation_bits & CPER_MEM_VALID_NODE)
-		pr_debug("node: %d\n", mem->node);
+		n += scnprintf(msg + n, len - n, "node: %d ", mem->node);
 	if (mem->validation_bits & CPER_MEM_VALID_CARD)
-		pr_debug("card: %d\n", mem->card);
+		n += scnprintf(msg + n, len - n, "card: %d ", mem->card);
 	if (mem->validation_bits & CPER_MEM_VALID_MODULE)
-		pr_debug("module: %d\n", mem->module);
+		n += scnprintf(msg + n, len - n, "module: %d ", mem->module);
 	if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
-		pr_debug("rank: %d\n", mem->rank);
+		n += scnprintf(msg + n, len - n, "rank: %d ", mem->rank);
 	if (mem->validation_bits & CPER_MEM_VALID_BANK)
-		pr_debug("bank: %d\n", mem->bank);
+		n += scnprintf(msg + n, len - n, "bank: %d ", mem->bank);
 	if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
-		pr_debug("device: %d\n", mem->device);
+		n += scnprintf(msg + n, len - n, "device: %d ", mem->device);
 	if (mem->validation_bits & CPER_MEM_VALID_ROW)
-		pr_debug("row: %d\n", mem->row);
+		n += scnprintf(msg + n, len - n, "row: %d ", mem->row);
 	if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
-		pr_debug("column: %d\n", mem->column);
+		n += scnprintf(msg + n, len - n, "column: %d ", mem->column);
 	if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
-		pr_debug("bit_position: %d\n", mem->bit_pos);
+		n += scnprintf(msg + n, len - n, "bit_position: %d ",
+			       mem->bit_pos);
 	if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
-		pr_debug("requestor_id: 0x%016llx\n", mem->requestor_id);
+		n += scnprintf(msg + n, len - n, "requestor_id: 0x%016llx ",
+			       mem->requestor_id);
 	if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
-		pr_debug("responder_id: 0x%016llx\n", mem->responder_id);
+		n += scnprintf(msg + n, len - n, "responder_id: 0x%016llx ",
+			       mem->responder_id);
 	if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
-		pr_debug("target_id: 0x%016llx\n", mem->target_id);
+		scnprintf(msg + n, len - n, "target_id: 0x%016llx ",
+			  mem->target_id);
+
+	msg[n] = '\0';
+	return n;
+}
+
+static int cper_dimm_err_location(const struct cper_sec_mem_err *mem, char *msg)
+{
+	u32 len, n;
+	const char *bank = NULL, *device = NULL;
+
+	if (!msg || !(mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE))
+		return 0;
+
+	n = 0;
+	len = CPER_REC_LEN - 1;
+	dmi_memdev_name(mem->mem_dev_handle, &bank, &device);
+	if (bank && device)
+		n = snprintf(msg, len, "DIMM location: %s %s ", bank, device);
+	else
+		n = snprintf(msg, len,
+			     "DIMM location: not present. DMI handle: 0x%.4x ",
+			     mem->mem_dev_handle);
+
+	msg[n] = '\0';
+	return n;
+}
+
+static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
+{
+	if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
+		printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
+	if (mem->validation_bits & CPER_MEM_VALID_PA)
+		printk("%s""physical_address: 0x%016llx\n",
+		       pfx, mem->physical_addr);
+	if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
+		printk("%s""physical_address_mask: 0x%016llx\n",
+		       pfx, mem->physical_addr_mask);
+	if (cper_mem_err_location(mem, rcd_decode_str))
+		printk("%s%s\n", pfx, rcd_decode_str);
 	if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
 		u8 etype = mem->error_type;
 		printk("%s""error_type: %d, %s\n", pfx, etype,
-		       etype < ARRAY_SIZE(cper_mem_err_type_strs) ?
-		       cper_mem_err_type_strs[etype] : "unknown");
-	}
-	if (mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) {
-		const char *bank = NULL, *device = NULL;
-		dmi_memdev_name(mem->mem_dev_handle, &bank, &device);
-		if (bank != NULL && device != NULL)
-			printk("%s""DIMM location: %s %s", pfx, bank, device);
-		else
-			printk("%s""DIMM DMI handle: 0x%.4x",
-			       pfx, mem->mem_dev_handle);
+		       cper_mem_err_type_str(etype));
 	}
+	if (cper_dimm_err_location(mem, rcd_decode_str))
+		printk("%s%s\n", pfx, rcd_decode_str);
 }
 
-static const char *cper_pcie_port_type_strs[] = {
+static const char * const pcie_port_type_strs[] = {
 	"PCIe end point",
 	"legacy PCI end point",
 	"unknown",
@@ -266,8 +311,8 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
 {
 	if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
 		printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
-		       pcie->port_type < ARRAY_SIZE(cper_pcie_port_type_strs) ?
-		       cper_pcie_port_type_strs[pcie->port_type] : "unknown");
+		       pcie->port_type < ARRAY_SIZE(pcie_port_type_strs) ?
+		       pcie_port_type_strs[pcie->port_type] : "unknown");
 	if (pcie->validation_bits & CPER_PCIE_VALID_VERSION)
 		printk("%s""version: %d.%d\n", pfx,
 		       pcie->version.major, pcie->version.minor);
diff --git a/include/linux/cper.h b/include/linux/cper.h
index 2fc0ec3d89cc..ed088b9c1298 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -35,6 +35,13 @@
  */
 #define CPER_RECORD_REV				0x0100
 
+/*
+ * CPER record length contains the CPER fields which are relevant for further
+ * handling of a memory error in userspace (we don't carry all the fields
+ * defined in the UEFI spec because some of them don't make any sense.)
+ * Currently, a length of 256 should be more than enough.
+ */
+#define CPER_REC_LEN					256
 /*
  * Severity difinition for error_severity in struct cper_record_header
  * and section_severity in struct cper_section_descriptor
@@ -395,6 +402,8 @@ struct cper_sec_pcie {
 #pragma pack()
 
 u64 cper_next_record_id(void);
+const char *cper_severity_str(unsigned int);
+const char *cper_mem_err_type_str(unsigned int);
 void cper_print_bits(const char *prefix, unsigned int bits,
 		     const char * const strs[], unsigned int strs_size);
 
-- 
cgit v1.2.3-59-g8ed1b


From d963cd95bea93b7db9390a71d1e2cabbb3b2c3ea Mon Sep 17 00:00:00 2001
From: "Chen, Gong" <gong.chen@linux.intel.com>
Date: Wed, 11 Jun 2014 14:02:20 -0700
Subject: RAS, debugfs: Add debugfs interface for RAS subsystem

Implement a new debugfs interface for RAS susbsystem.
A file named daemon_active is added there accordingly.
This file is used to track if user space daemon accesses
perf/trace interface or not. One can track which daemon
opens it via "lsof /path/to/debugfs/ras/daemon_active".

Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
Link: http://lkml.kernel.org/r/1402475691-30045-5-git-send-email-gong.chen@linux.intel.com
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 drivers/ras/Makefile  |  2 +-
 drivers/ras/debugfs.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/ras/ras.c     | 14 +++++++++++++
 include/linux/ras.h   | 14 +++++++++++++
 4 files changed, 85 insertions(+), 1 deletion(-)
 create mode 100644 drivers/ras/debugfs.c
 create mode 100644 include/linux/ras.h

(limited to 'include/linux')

diff --git a/drivers/ras/Makefile b/drivers/ras/Makefile
index 223e806fa5bf..d7f73341ced3 100644
--- a/drivers/ras/Makefile
+++ b/drivers/ras/Makefile
@@ -1 +1 @@
-obj-$(CONFIG_RAS) += ras.o
+obj-$(CONFIG_RAS) += ras.o debugfs.o
diff --git a/drivers/ras/debugfs.c b/drivers/ras/debugfs.c
new file mode 100644
index 000000000000..0322acf67ea5
--- /dev/null
+++ b/drivers/ras/debugfs.c
@@ -0,0 +1,56 @@
+#include <linux/debugfs.h>
+
+static struct dentry *ras_debugfs_dir;
+
+static atomic_t trace_count = ATOMIC_INIT(0);
+
+int ras_userspace_consumers(void)
+{
+	return atomic_read(&trace_count);
+}
+EXPORT_SYMBOL_GPL(ras_userspace_consumers);
+
+static int trace_show(struct seq_file *m, void *v)
+{
+	return atomic_read(&trace_count);
+}
+
+static int trace_open(struct inode *inode, struct file *file)
+{
+	atomic_inc(&trace_count);
+	return single_open(file, trace_show, NULL);
+}
+
+static int trace_release(struct inode *inode, struct file *file)
+{
+	atomic_dec(&trace_count);
+	return single_release(inode, file);
+}
+
+static const struct file_operations trace_fops = {
+	.open    = trace_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = trace_release,
+};
+
+int __init ras_add_daemon_trace(void)
+{
+	struct dentry *fentry;
+
+	if (!ras_debugfs_dir)
+		return -ENOENT;
+
+	fentry = debugfs_create_file("daemon_active", S_IRUSR, ras_debugfs_dir,
+				     NULL, &trace_fops);
+	if (!fentry)
+		return -ENODEV;
+
+	return 0;
+
+}
+
+void __init ras_debugfs_init(void)
+{
+	ras_debugfs_dir = debugfs_create_dir("ras", NULL);
+}
diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c
index b0c6ed1d8e77..4cac43a1e25c 100644
--- a/drivers/ras/ras.c
+++ b/drivers/ras/ras.c
@@ -5,8 +5,22 @@
  *	Chen, Gong <gong.chen@linux.intel.com>
  */
 
+#include <linux/init.h>
+#include <linux/ras.h>
+
 #define CREATE_TRACE_POINTS
 #define TRACE_INCLUDE_PATH ../../include/ras
 #include <ras/ras_event.h>
 
+static int __init ras_init(void)
+{
+	int rc = 0;
+
+	ras_debugfs_init();
+	rc = ras_add_daemon_trace();
+
+	return rc;
+}
+subsys_initcall(ras_init);
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event);
diff --git a/include/linux/ras.h b/include/linux/ras.h
new file mode 100644
index 000000000000..2aceeafd6fe5
--- /dev/null
+++ b/include/linux/ras.h
@@ -0,0 +1,14 @@
+#ifndef __RAS_H__
+#define __RAS_H__
+
+#ifdef CONFIG_DEBUG_FS
+int ras_userspace_consumers(void);
+void ras_debugfs_init(void);
+int ras_add_daemon_trace(void);
+#else
+static inline int ras_userspace_consumers(void) { return 0; }
+static inline void ras_debugfs_init(void) { return; }
+static inline int ras_add_daemon_trace(void) { return 0; }
+#endif
+
+#endif
-- 
cgit v1.2.3-59-g8ed1b


From 2dfb7d51a61d7ca91b131c8db612f27d9390f2d5 Mon Sep 17 00:00:00 2001
From: "Chen, Gong" <gong.chen@linux.intel.com>
Date: Tue, 17 Jun 2014 22:33:07 -0400
Subject: trace, RAS: Add eMCA trace event interface

Add trace interface to elaborate all H/W error related information.

Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
Acked-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 drivers/acpi/Kconfig        |  4 ++-
 drivers/acpi/acpi_extlog.c  | 27 ++++++++++++++++---
 drivers/firmware/efi/cper.c | 45 ++++++++++++++++++++++++++++---
 drivers/ras/ras.c           |  3 +++
 include/linux/cper.h        | 23 ++++++++++++++++
 include/ras/ras_event.h     | 64 +++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 158 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index a34a22841002..206942b8d105 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -370,6 +370,7 @@ config ACPI_EXTLOG
 	tristate "Extended Error Log support"
 	depends on X86_MCE && X86_LOCAL_APIC
 	select UEFI_CPER
+	select RAS
 	default n
 	help
 	  Certain usages such as Predictive Failure Analysis (PFA) require
@@ -384,6 +385,7 @@ config ACPI_EXTLOG
 
 	  Enhanced MCA Logging allows firmware to provide additional error
 	  information to system software, synchronous with MCE or CMCI. This
-	  driver adds support for that functionality.
+	  driver adds support for that functionality with corresponding
+	  tracepoint which carries that information to userspace.
 
 endif	# ACPI
diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c
index 185334114d71..e61da957f30f 100644
--- a/drivers/acpi/acpi_extlog.c
+++ b/drivers/acpi/acpi_extlog.c
@@ -16,6 +16,7 @@
 #include <asm/mce.h>
 
 #include "apei/apei-internal.h"
+#include <ras/ras_event.h>
 
 #define EXT_ELOG_ENTRY_MASK	GENMASK_ULL(51, 0) /* elog entry address mask */
 
@@ -137,8 +138,12 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
 	struct mce *mce = (struct mce *)data;
 	int	bank = mce->bank;
 	int	cpu = mce->extcpu;
-	struct acpi_generic_status *estatus;
-	int rc;
+	struct acpi_generic_status *estatus, *tmp;
+	struct acpi_generic_data *gdata;
+	const uuid_le *fru_id = &NULL_UUID_LE;
+	char *fru_text = "";
+	uuid_le *sec_type;
+	static u32 err_seq;
 
 	estatus = extlog_elog_entry_check(cpu, bank);
 	if (estatus == NULL)
@@ -148,7 +153,23 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
 	/* clear record status to enable BIOS to update it again */
 	estatus->block_status = 0;
 
-	rc = print_extlog_rcd(NULL, (struct acpi_generic_status *)elog_buf, cpu);
+	tmp = (struct acpi_generic_status *)elog_buf;
+	print_extlog_rcd(NULL, tmp, cpu);
+
+	/* log event via trace */
+	err_seq++;
+	gdata = (struct acpi_generic_data *)(tmp + 1);
+	if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
+		fru_id = (uuid_le *)gdata->fru_id;
+	if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
+		fru_text = gdata->fru_text;
+	sec_type = (uuid_le *)gdata->section_type;
+	if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
+		struct cper_sec_mem_err *mem = (void *)(gdata + 1);
+		if (gdata->error_data_length >= sizeof(*mem))
+			trace_extlog_mem_event(mem, err_seq, fru_id, fru_text,
+					       (u8)gdata->error_severity);
+	}
 
 	return NOTIFY_STOP;
 }
diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
index ac33a9fed341..437e6fd47311 100644
--- a/drivers/firmware/efi/cper.c
+++ b/drivers/firmware/efi/cper.c
@@ -207,7 +207,7 @@ const char *cper_mem_err_type_str(unsigned int etype)
 }
 EXPORT_SYMBOL_GPL(cper_mem_err_type_str);
 
-static int cper_mem_err_location(const struct cper_sec_mem_err *mem, char *msg)
+static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
 {
 	u32 len, n;
 
@@ -249,7 +249,7 @@ static int cper_mem_err_location(const struct cper_sec_mem_err *mem, char *msg)
 	return n;
 }
 
-static int cper_dimm_err_location(const struct cper_sec_mem_err *mem, char *msg)
+static int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg)
 {
 	u32 len, n;
 	const char *bank = NULL, *device = NULL;
@@ -271,8 +271,44 @@ static int cper_dimm_err_location(const struct cper_sec_mem_err *mem, char *msg)
 	return n;
 }
 
+void cper_mem_err_pack(const struct cper_sec_mem_err *mem,
+		       struct cper_mem_err_compact *cmem)
+{
+	cmem->validation_bits = mem->validation_bits;
+	cmem->node = mem->node;
+	cmem->card = mem->card;
+	cmem->module = mem->module;
+	cmem->bank = mem->bank;
+	cmem->device = mem->device;
+	cmem->row = mem->row;
+	cmem->column = mem->column;
+	cmem->bit_pos = mem->bit_pos;
+	cmem->requestor_id = mem->requestor_id;
+	cmem->responder_id = mem->responder_id;
+	cmem->target_id = mem->target_id;
+	cmem->rank = mem->rank;
+	cmem->mem_array_handle = mem->mem_array_handle;
+	cmem->mem_dev_handle = mem->mem_dev_handle;
+}
+
+const char *cper_mem_err_unpack(struct trace_seq *p,
+				struct cper_mem_err_compact *cmem)
+{
+	const char *ret = p->buffer + p->len;
+
+	if (cper_mem_err_location(cmem, rcd_decode_str))
+		trace_seq_printf(p, "%s", rcd_decode_str);
+	if (cper_dimm_err_location(cmem, rcd_decode_str))
+		trace_seq_printf(p, "%s", rcd_decode_str);
+	trace_seq_putc(p, '\0');
+
+	return ret;
+}
+
 static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
 {
+	struct cper_mem_err_compact cmem;
+
 	if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
 		printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
 	if (mem->validation_bits & CPER_MEM_VALID_PA)
@@ -281,14 +317,15 @@ static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
 	if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
 		printk("%s""physical_address_mask: 0x%016llx\n",
 		       pfx, mem->physical_addr_mask);
-	if (cper_mem_err_location(mem, rcd_decode_str))
+	cper_mem_err_pack(mem, &cmem);
+	if (cper_mem_err_location(&cmem, rcd_decode_str))
 		printk("%s%s\n", pfx, rcd_decode_str);
 	if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
 		u8 etype = mem->error_type;
 		printk("%s""error_type: %d, %s\n", pfx, etype,
 		       cper_mem_err_type_str(etype));
 	}
-	if (cper_dimm_err_location(mem, rcd_decode_str))
+	if (cper_dimm_err_location(&cmem, rcd_decode_str))
 		printk("%s%s\n", pfx, rcd_decode_str);
 }
 
diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c
index 4cac43a1e25c..b67dd362b7b6 100644
--- a/drivers/ras/ras.c
+++ b/drivers/ras/ras.c
@@ -23,4 +23,7 @@ static int __init ras_init(void)
 }
 subsys_initcall(ras_init);
 
+#if defined(CONFIG_ACPI_EXTLOG) || defined(CONFIG_ACPI_EXTLOG_MODULE)
+EXPORT_TRACEPOINT_SYMBOL_GPL(extlog_mem_event);
+#endif
 EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event);
diff --git a/include/linux/cper.h b/include/linux/cper.h
index ed088b9c1298..76abba4b238e 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -22,6 +22,7 @@
 #define LINUX_CPER_H
 
 #include <linux/uuid.h>
+#include <linux/trace_seq.h>
 
 /* CPER record signature and the size */
 #define CPER_SIG_RECORD				"CPER"
@@ -363,6 +364,24 @@ struct cper_sec_mem_err {
 	__u16	mem_dev_handle;		/* module handle in UEFI 2.4 */
 };
 
+struct cper_mem_err_compact {
+	__u64	validation_bits;
+	__u16	node;
+	__u16	card;
+	__u16	module;
+	__u16	bank;
+	__u16	device;
+	__u16	row;
+	__u16	column;
+	__u16	bit_pos;
+	__u64	requestor_id;
+	__u64	responder_id;
+	__u64	target_id;
+	__u16	rank;
+	__u16	mem_array_handle;
+	__u16	mem_dev_handle;
+};
+
 struct cper_sec_pcie {
 	__u64		validation_bits;
 	__u32		port_type;
@@ -406,5 +425,9 @@ const char *cper_severity_str(unsigned int);
 const char *cper_mem_err_type_str(unsigned int);
 void cper_print_bits(const char *prefix, unsigned int bits,
 		     const char * const strs[], unsigned int strs_size);
+void cper_mem_err_pack(const struct cper_sec_mem_err *,
+		       struct cper_mem_err_compact *);
+const char *cper_mem_err_unpack(struct trace_seq *,
+				struct cper_mem_err_compact *);
 
 #endif
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index acbcbb88eaaa..47da53c27ffa 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -9,6 +9,70 @@
 #include <linux/edac.h>
 #include <linux/ktime.h>
 #include <linux/aer.h>
+#include <linux/cper.h>
+
+/*
+ * MCE Extended Error Log trace event
+ *
+ * These events are generated when hardware detects a corrected or
+ * uncorrected event.
+ */
+
+/* memory trace event */
+
+#if defined(CONFIG_ACPI_EXTLOG) || defined(CONFIG_ACPI_EXTLOG_MODULE)
+TRACE_EVENT(extlog_mem_event,
+	TP_PROTO(struct cper_sec_mem_err *mem,
+		 u32 err_seq,
+		 const uuid_le *fru_id,
+		 const char *fru_text,
+		 u8 sev),
+
+	TP_ARGS(mem, err_seq, fru_id, fru_text, sev),
+
+	TP_STRUCT__entry(
+		__field(u32, err_seq)
+		__field(u8, etype)
+		__field(u8, sev)
+		__field(u64, pa)
+		__field(u8, pa_mask_lsb)
+		__field_struct(uuid_le, fru_id)
+		__string(fru_text, fru_text)
+		__field_struct(struct cper_mem_err_compact, data)
+	),
+
+	TP_fast_assign(
+		__entry->err_seq = err_seq;
+		if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE)
+			__entry->etype = mem->error_type;
+		else
+			__entry->etype = ~0;
+		__entry->sev = sev;
+		if (mem->validation_bits & CPER_MEM_VALID_PA)
+			__entry->pa = mem->physical_addr;
+		else
+			__entry->pa = ~0ull;
+
+		if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
+			__entry->pa_mask_lsb = (u8)__ffs64(mem->physical_addr_mask);
+		else
+			__entry->pa_mask_lsb = ~0;
+		__entry->fru_id = *fru_id;
+		__assign_str(fru_text, fru_text);
+		cper_mem_err_pack(mem, &__entry->data);
+	),
+
+	TP_printk("{%d} %s error: %s physical addr: %016llx (mask lsb: %x) %sFRU: %pUl %.20s",
+		  __entry->err_seq,
+		  cper_severity_str(__entry->sev),
+		  cper_mem_err_type_str(__entry->etype),
+		  __entry->pa,
+		  __entry->pa_mask_lsb,
+		  cper_mem_err_unpack(p, &__entry->data),
+		  &__entry->fru_id,
+		  __get_str(fru_text))
+);
+#endif
 
 /*
  * Hardware Events Report
-- 
cgit v1.2.3-59-g8ed1b


From d0d480cce8f522b37c2c1de38230fc9ad15fa506 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Wed, 25 Jun 2014 10:08:44 -0700
Subject: leds: add led-class attribute-group support

Allow led-class devices to be created with optional attribute groups.

This is needed in order to allow led drivers to create custom device
attributes in a race-free manner.

Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Bryan Wu <cooloney@gmail.com>
---
 drivers/leds/led-class.c | 5 +++--
 include/linux/leds.h     | 2 ++
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c
index f37d63cf726b..aa29198fca3e 100644
--- a/drivers/leds/led-class.c
+++ b/drivers/leds/led-class.c
@@ -210,8 +210,9 @@ static const struct dev_pm_ops leds_class_dev_pm_ops = {
  */
 int led_classdev_register(struct device *parent, struct led_classdev *led_cdev)
 {
-	led_cdev->dev = device_create(leds_class, parent, 0, led_cdev,
-				      "%s", led_cdev->name);
+	led_cdev->dev = device_create_with_groups(leds_class, parent, 0,
+					led_cdev, led_cdev->groups,
+					"%s", led_cdev->name);
 	if (IS_ERR(led_cdev->dev))
 		return PTR_ERR(led_cdev->dev);
 
diff --git a/include/linux/leds.h b/include/linux/leds.h
index 0287ab296689..e43686472197 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -63,6 +63,8 @@ struct led_classdev {
 				     unsigned long *delay_off);
 
 	struct device		*dev;
+	const struct attribute_group	**groups;
+
 	struct list_head	 node;			/* LED Device list */
 	const char		*default_trigger;	/* Trigger to use */
 
-- 
cgit v1.2.3-59-g8ed1b


From 6d514b4e7737ad75a7e7e0a3f7dde45d46341691 Mon Sep 17 00:00:00 2001
From: George Spelvin <linux@horizon.com>
Date: Mon, 23 Jun 2014 15:11:54 +0200
Subject: lib: crc32: Greatly shrink CRC combining code

There's no need for a full 32x32 matrix, when rows before the last are
just shifted copies of the rows after them.

There's still room for improvement (especially on X86 processors with
CRC32 and PCLMUL instructions), but this is a large step in the
right direction [which is in particular useful for its current user,
namely SCTP checksumming over multiple skb frags[] entries, i.e. in
IPVS balancing when other CRC32 offloads are not available].

The internal primitive is now called crc32_generic_shift and takes one
less argument; the XOR with crc2 is done in inline wrappers.

Signed-off-by: George Spelvin <linux@horizon.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/crc32.h |  14 ++++-
 lib/crc32.c           | 147 ++++++++++++++++++++++++--------------------------
 2 files changed, 82 insertions(+), 79 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/crc32.h b/include/linux/crc32.h
index 7d275c4fc011..edf34e876e40 100644
--- a/include/linux/crc32.h
+++ b/include/linux/crc32.h
@@ -29,7 +29,12 @@ extern u32  crc32_be(u32 crc, unsigned char const *p, size_t len);
  * 	   with the same initializer as crc1, and crc2 seed was 0. See
  * 	   also crc32_combine_test().
  */
-extern u32  crc32_le_combine(u32 crc1, u32 crc2, size_t len2);
+u32 __attribute_const__ crc32_le_shift(u32 crc, size_t len);
+
+static inline u32 crc32_le_combine(u32 crc1, u32 crc2, size_t len2)
+{
+	return crc32_le_shift(crc1, len2) ^ crc2;
+}
 
 extern u32  __crc32c_le(u32 crc, unsigned char const *p, size_t len);
 
@@ -51,7 +56,12 @@ extern u32  __crc32c_le(u32 crc, unsigned char const *p, size_t len);
  * 	   seeded with the same initializer as crc1, and crc2 seed
  * 	   was 0. See also crc32c_combine_test().
  */
-extern u32  __crc32c_le_combine(u32 crc1, u32 crc2, size_t len2);
+u32 __attribute_const__ __crc32c_le_shift(u32 crc, size_t len);
+
+static inline u32 __crc32c_le_combine(u32 crc1, u32 crc2, size_t len2)
+{
+	return __crc32c_le_shift(crc1, len2) ^ crc2;
+}
 
 #define crc32(seed, data, length)  crc32_le(seed, (unsigned char const *)(data), length)
 
diff --git a/lib/crc32.c b/lib/crc32.c
index 21a7b2135af6..9af30ff334c5 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -50,30 +50,6 @@ MODULE_AUTHOR("Matt Domsch <Matt_Domsch@dell.com>");
 MODULE_DESCRIPTION("Various CRC32 calculations");
 MODULE_LICENSE("GPL");
 
-#define GF2_DIM		32
-
-static u32 gf2_matrix_times(u32 *mat, u32 vec)
-{
-	u32 sum = 0;
-
-	while (vec) {
-		if (vec & 1)
-			sum ^= *mat;
-		vec >>= 1;
-		mat++;
-	}
-
-	return sum;
-}
-
-static void gf2_matrix_square(u32 *square, u32 *mat)
-{
-	int i;
-
-	for (i = 0; i < GF2_DIM; i++)
-		square[i] = gf2_matrix_times(mat, mat[i]);
-}
-
 #if CRC_LE_BITS > 8 || CRC_BE_BITS > 8
 
 /* implements slicing-by-4 or slicing-by-8 algorithm */
@@ -155,51 +131,6 @@ crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256])
 }
 #endif
 
-/* For conditions of distribution and use, see copyright notice in zlib.h */
-static u32 crc32_generic_combine(u32 crc1, u32 crc2, size_t len2,
-				 u32 polynomial)
-{
-	u32 even[GF2_DIM]; /* Even-power-of-two zeros operator */
-	u32 odd[GF2_DIM];  /* Odd-power-of-two zeros operator  */
-	u32 row;
-	int i;
-
-	if (len2 <= 0)
-		return crc1;
-
-	/* Put operator for one zero bit in odd */
-	odd[0] = polynomial;
-	row = 1;
-	for (i = 1; i < GF2_DIM; i++) {
-		odd[i] = row;
-		row <<= 1;
-	}
-
-	gf2_matrix_square(even, odd); /* Put operator for two zero bits in even */
-	gf2_matrix_square(odd, even); /* Put operator for four zero bits in odd */
-
-	/* Apply len2 zeros to crc1 (first square will put the operator for one
-	 * zero byte, eight zero bits, in even).
-	 */
-	do {
-		/* Apply zeros operator for this bit of len2 */
-		gf2_matrix_square(even, odd);
-		if (len2 & 1)
-			crc1 = gf2_matrix_times(even, crc1);
-		len2 >>= 1;
-		/* If no more bits set, then done */
-		if (len2 == 0)
-			break;
-		/* Another iteration of the loop with odd and even swapped */
-		gf2_matrix_square(odd, even);
-		if (len2 & 1)
-			crc1 = gf2_matrix_times(odd, crc1);
-		len2 >>= 1;
-	} while (len2 != 0);
-
-	crc1 ^= crc2;
-	return crc1;
-}
 
 /**
  * crc32_le_generic() - Calculate bitwise little-endian Ethernet AUTODIN II
@@ -271,19 +202,81 @@ u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
 			(const u32 (*)[256])crc32ctable_le, CRC32C_POLY_LE);
 }
 #endif
-u32 __pure crc32_le_combine(u32 crc1, u32 crc2, size_t len2)
+EXPORT_SYMBOL(crc32_le);
+EXPORT_SYMBOL(__crc32c_le);
+
+/*
+ * This multiplies the polynomials x and y modulo the given modulus.
+ * This follows the "little-endian" CRC convention that the lsbit
+ * represents the highest power of x, and the msbit represents x^0.
+ */
+static u32 __attribute_const__ gf2_multiply(u32 x, u32 y, u32 modulus)
 {
-	return crc32_generic_combine(crc1, crc2, len2, CRCPOLY_LE);
+	u32 product = x & 1 ? y : 0;
+	int i;
+
+	for (i = 0; i < 31; i++) {
+		product = (product >> 1) ^ (product & 1 ? modulus : 0);
+		x >>= 1;
+		product ^= x & 1 ? y : 0;
+	}
+
+	return product;
 }
 
-u32 __pure __crc32c_le_combine(u32 crc1, u32 crc2, size_t len2)
+/**
+ * crc32_generic_shift - Append len 0 bytes to crc, in logarithmic time
+ * @crc: The original little-endian CRC (i.e. lsbit is x^31 coefficient)
+ * @len: The number of bytes. @crc is multiplied by x^(8*@len)
+ * @polynomial: The modulus used to reduce the result to 32 bits.
+ *
+ * It's possible to parallelize CRC computations by computing a CRC
+ * over separate ranges of a buffer, then summing them.
+ * This shifts the given CRC by 8*len bits (i.e. produces the same effect
+ * as appending len bytes of zero to the data), in time proportional
+ * to log(len).
+ */
+static u32 __attribute_const__ crc32_generic_shift(u32 crc, size_t len,
+						   u32 polynomial)
 {
-	return crc32_generic_combine(crc1, crc2, len2, CRC32C_POLY_LE);
+	u32 power = polynomial;	/* CRC of x^32 */
+	int i;
+
+	/* Shift up to 32 bits in the simple linear way */
+	for (i = 0; i < 8 * (int)(len & 3); i++)
+		crc = (crc >> 1) ^ (crc & 1 ? polynomial : 0);
+
+	len >>= 2;
+	if (!len)
+		return crc;
+
+	for (;;) {
+		/* "power" is x^(2^i), modulo the polynomial */
+		if (len & 1)
+			crc = gf2_multiply(crc, power, polynomial);
+
+		len >>= 1;
+		if (!len)
+			break;
+
+		/* Square power, advancing to x^(2^(i+1)) */
+		power = gf2_multiply(power, power, polynomial);
+	}
+
+	return crc;
 }
-EXPORT_SYMBOL(crc32_le);
-EXPORT_SYMBOL(crc32_le_combine);
-EXPORT_SYMBOL(__crc32c_le);
-EXPORT_SYMBOL(__crc32c_le_combine);
+
+u32 __attribute_const__ crc32_le_shift(u32 crc, size_t len)
+{
+	return crc32_generic_shift(crc, len, CRCPOLY_LE);
+}
+
+u32 __attribute_const__ __crc32c_le_shift(u32 crc, size_t len)
+{
+	return crc32_generic_shift(crc, len, CRC32C_POLY_LE);
+}
+EXPORT_SYMBOL(crc32_le_shift);
+EXPORT_SYMBOL(__crc32c_le_shift);
 
 /**
  * crc32_be_generic() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32
-- 
cgit v1.2.3-59-g8ed1b


From d8f1c4778e957273c3b5b6e045d8d3af38484ca8 Mon Sep 17 00:00:00 2001
From: George Spelvin <linux@horizon.com>
Date: Mon, 23 Jun 2014 15:11:56 +0200
Subject: lib: crc32: Add some additional __pure annotations

In case they help the compiler.

Signed-off-by: George Spelvin <linux@horizon.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/crc32.h | 6 +++---
 lib/crc32.c           | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/crc32.h b/include/linux/crc32.h
index edf34e876e40..9e8a032c1788 100644
--- a/include/linux/crc32.h
+++ b/include/linux/crc32.h
@@ -8,8 +8,8 @@
 #include <linux/types.h>
 #include <linux/bitrev.h>
 
-extern u32  crc32_le(u32 crc, unsigned char const *p, size_t len);
-extern u32  crc32_be(u32 crc, unsigned char const *p, size_t len);
+u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len);
+u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len);
 
 /**
  * crc32_le_combine - Combine two crc32 check values into one. For two
@@ -36,7 +36,7 @@ static inline u32 crc32_le_combine(u32 crc1, u32 crc2, size_t len2)
 	return crc32_le_shift(crc1, len2) ^ crc2;
 }
 
-extern u32  __crc32c_le(u32 crc, unsigned char const *p, size_t len);
+u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len);
 
 /**
  * __crc32c_le_combine - Combine two crc32c check values into one. For two
diff --git a/lib/crc32.c b/lib/crc32.c
index af938ab12468..9a907d489d95 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -53,7 +53,7 @@ MODULE_LICENSE("GPL");
 #if CRC_LE_BITS > 8 || CRC_BE_BITS > 8
 
 /* implements slicing-by-4 or slicing-by-8 algorithm */
-static inline u32
+static inline u32 __pure
 crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256])
 {
 # ifdef __LITTLE_ENDIAN
-- 
cgit v1.2.3-59-g8ed1b


From a69f5edb8ba20c87c5f7c96ec40581f9f51f2910 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 24 Jun 2014 11:20:48 -0700
Subject: mac_pton: Use bool not int return

Use bool instead of int as the return type.

All uses are tested with !.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/kernel.h |  2 +-
 lib/net_utils.c        | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 4c52907a6d8b..a9e2268ecccb 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -501,7 +501,7 @@ static inline char * __deprecated pack_hex_byte(char *buf, u8 byte)
 extern int hex_to_bin(char ch);
 extern int __must_check hex2bin(u8 *dst, const char *src, size_t count);
 
-int mac_pton(const char *s, u8 *mac);
+bool mac_pton(const char *s, u8 *mac);
 
 /*
  * General tracing related utility functions - trace_printk(),
diff --git a/lib/net_utils.c b/lib/net_utils.c
index 2e3c52c8d050..148fc6e99ef6 100644
--- a/lib/net_utils.c
+++ b/lib/net_utils.c
@@ -3,24 +3,24 @@
 #include <linux/ctype.h>
 #include <linux/kernel.h>
 
-int mac_pton(const char *s, u8 *mac)
+bool mac_pton(const char *s, u8 *mac)
 {
 	int i;
 
 	/* XX:XX:XX:XX:XX:XX */
 	if (strlen(s) < 3 * ETH_ALEN - 1)
-		return 0;
+		return false;
 
 	/* Don't dirty result unless string is valid MAC. */
 	for (i = 0; i < ETH_ALEN; i++) {
 		if (!isxdigit(s[i * 3]) || !isxdigit(s[i * 3 + 1]))
-			return 0;
+			return false;
 		if (i != ETH_ALEN - 1 && s[i * 3 + 2] != ':')
-			return 0;
+			return false;
 	}
 	for (i = 0; i < ETH_ALEN; i++) {
 		mac[i] = (hex_to_bin(s[i * 3]) << 4) | hex_to_bin(s[i * 3 + 1]);
 	}
-	return 1;
+	return true;
 }
 EXPORT_SYMBOL(mac_pton);
-- 
cgit v1.2.3-59-g8ed1b


From 3482f2c52b77bf6596e24aae82e204a0603eba66 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@linaro.org>
Date: Thu, 27 Mar 2014 17:18:55 -0700
Subject: of: Create of_console_check() for selecting a console specified in
 /chosen

The devicetree has a binding for specifying the console device in the
/chosen node, but the kernel doesn't use it consistently. This change
adds an API for testing if a device node is a console, and adds a
preferred console entry if it is.

At the same time this patch removes the of_device_is_stdout_path() API
since it is unused.

Signed-off-by: Grant Likely <grant.likely@linaro.org>
Tested-by: Sascha Hauer <s.hauer@pengutronix.de>
---
 drivers/of/base.c  | 23 +++++++++++++----------
 include/linux/of.h |  6 +++---
 2 files changed, 16 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/base.c b/drivers/of/base.c
index b9864806e9b8..df9b2bb7bb27 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -17,6 +17,7 @@
  *      as published by the Free Software Foundation; either version
  *      2 of the License, or (at your option) any later version.
  */
+#include <linux/console.h>
 #include <linux/ctype.h>
 #include <linux/cpu.h>
 #include <linux/module.h>
@@ -2180,20 +2181,22 @@ const char *of_prop_next_string(struct property *prop, const char *cur)
 EXPORT_SYMBOL_GPL(of_prop_next_string);
 
 /**
- * of_device_is_stdout_path - check if a device node matches the
- *                            linux,stdout-path property
- *
- * Check if this device node matches the linux,stdout-path property
- * in the chosen node. return true if yes, false otherwise.
+ * of_console_check() - Test and setup console for DT setup
+ * @dn - Pointer to device node
+ * @name - Name to use for preferred console without index. ex. "ttyS"
+ * @index - Index to use for preferred console.
+ *
+ * Check if the given device node matches the stdout-path property in the
+ * /chosen node. If it does then register it as the preferred console and return
+ * TRUE. Otherwise return FALSE.
  */
-int of_device_is_stdout_path(struct device_node *dn)
+bool of_console_check(struct device_node *dn, char *name, int index)
 {
-	if (!of_stdout)
+	if (!dn || dn != of_stdout || console_set_on_cmdline)
 		return false;
-
-	return of_stdout == dn;
+	return add_preferred_console(name, index, NULL);
 }
-EXPORT_SYMBOL_GPL(of_device_is_stdout_path);
+EXPORT_SYMBOL_GPL(of_console_check);
 
 /**
  *	of_find_next_cache_node - Find a node's subsidiary cache
diff --git a/include/linux/of.h b/include/linux/of.h
index 196b34c1ef4e..9d9734056e39 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -352,7 +352,7 @@ const __be32 *of_prop_next_u32(struct property *prop, const __be32 *cur,
  */
 const char *of_prop_next_string(struct property *prop, const char *cur);
 
-int of_device_is_stdout_path(struct device_node *dn);
+bool of_console_check(struct device_node *dn, char *name, int index);
 
 #else /* CONFIG_OF */
 
@@ -564,9 +564,9 @@ static inline int of_machine_is_compatible(const char *compat)
 	return 0;
 }
 
-static inline int of_device_is_stdout_path(struct device_node *dn)
+static inline bool of_console_check(const struct device_node *dn, const char *name, int index)
 {
-	return 0;
+	return false;
 }
 
 static inline const __be32 *of_prop_next_u32(struct property *prop,
-- 
cgit v1.2.3-59-g8ed1b


From a752ee56ad84bf9a35b8323af1ad22b03c1df2c4 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@linaro.org>
Date: Fri, 28 Mar 2014 08:12:18 -0700
Subject: tty: Update hypervisor tty drivers to use core stdout parsing code.

The evh_bytechan, hvc_opal and hvc_vio drivers all open code the parsing
of the stdout node in the device tree. This patch simplifies the driver
by removing the duplicated functionality.

Signed-off-by: Grant Likely <grant.likely@linaro.org>
---
 drivers/of/base.c          |  5 ++++-
 drivers/tty/ehv_bytechan.c | 43 ++++---------------------------------------
 drivers/tty/hvc/hvc_opal.c | 15 +++------------
 drivers/tty/hvc/hvc_vio.c  | 29 ++++++++++-------------------
 include/linux/of.h         |  1 +
 5 files changed, 22 insertions(+), 71 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/base.c b/drivers/of/base.c
index df9b2bb7bb27..e4f95ba0a3eb 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -36,7 +36,7 @@ struct device_node *of_allnodes;
 EXPORT_SYMBOL(of_allnodes);
 struct device_node *of_chosen;
 struct device_node *of_aliases;
-static struct device_node *of_stdout;
+struct device_node *of_stdout;
 
 static struct kset *of_kset;
 
@@ -2063,9 +2063,12 @@ void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align))
 		of_chosen = of_find_node_by_path("/chosen@0");
 
 	if (of_chosen) {
+		/* linux,stdout-path and /aliases/stdout are for legacy compatibility */
 		const char *name = of_get_property(of_chosen, "stdout-path", NULL);
 		if (!name)
 			name = of_get_property(of_chosen, "linux,stdout-path", NULL);
+		if (IS_ENABLED(CONFIG_PPC) && !name)
+			name = of_get_property(of_aliases, "stdout", NULL);
 		if (name)
 			of_stdout = of_find_node_by_path(name);
 	}
diff --git a/drivers/tty/ehv_bytechan.c b/drivers/tty/ehv_bytechan.c
index 0419b69e270f..4f485e88f60c 100644
--- a/drivers/tty/ehv_bytechan.c
+++ b/drivers/tty/ehv_bytechan.c
@@ -108,55 +108,23 @@ static void disable_tx_interrupt(struct ehv_bc_data *bc)
  *
  * The byte channel to be used for the console is specified via a "stdout"
  * property in the /chosen node.
- *
- * For compatible with legacy device trees, we also look for a "stdout" alias.
  */
 static int find_console_handle(void)
 {
-	struct device_node *np, *np2;
+	struct device_node *np = of_stdout;
 	const char *sprop = NULL;
 	const uint32_t *iprop;
 
-	np = of_find_node_by_path("/chosen");
-	if (np)
-		sprop = of_get_property(np, "stdout-path", NULL);
-
-	if (!np || !sprop) {
-		of_node_put(np);
-		np = of_find_node_by_name(NULL, "aliases");
-		if (np)
-			sprop = of_get_property(np, "stdout", NULL);
-	}
-
-	if (!sprop) {
-		of_node_put(np);
-		return 0;
-	}
-
 	/* We don't care what the aliased node is actually called.  We only
 	 * care if it's compatible with "epapr,hv-byte-channel", because that
-	 * indicates that it's a byte channel node.  We use a temporary
-	 * variable, 'np2', because we can't release 'np' until we're done with
-	 * 'sprop'.
+	 * indicates that it's a byte channel node.
 	 */
-	np2 = of_find_node_by_path(sprop);
-	of_node_put(np);
-	np = np2;
-	if (!np) {
-		pr_warning("ehv-bc: stdout node '%s' does not exist\n", sprop);
-		return 0;
-	}
-
-	/* Is it a byte channel? */
-	if (!of_device_is_compatible(np, "epapr,hv-byte-channel")) {
-		of_node_put(np);
+	if (!np || !of_device_is_compatible(np, "epapr,hv-byte-channel"))
 		return 0;
-	}
 
 	stdout_irq = irq_of_parse_and_map(np, 0);
 	if (stdout_irq == NO_IRQ) {
-		pr_err("ehv-bc: no 'interrupts' property in %s node\n", sprop);
-		of_node_put(np);
+		pr_err("ehv-bc: no 'interrupts' property in %s node\n", np->full_name);
 		return 0;
 	}
 
@@ -167,12 +135,9 @@ static int find_console_handle(void)
 	if (!iprop) {
 		pr_err("ehv-bc: no 'hv-handle' property in %s node\n",
 		       np->name);
-		of_node_put(np);
 		return 0;
 	}
 	stdout_bc = be32_to_cpu(*iprop);
-
-	of_node_put(np);
 	return 1;
 }
 
diff --git a/drivers/tty/hvc/hvc_opal.c b/drivers/tty/hvc/hvc_opal.c
index a585079b4b38..a2cc5f834c63 100644
--- a/drivers/tty/hvc/hvc_opal.c
+++ b/drivers/tty/hvc/hvc_opal.c
@@ -342,22 +342,13 @@ static void udbg_init_opal_common(void)
 
 void __init hvc_opal_init_early(void)
 {
-	struct device_node *stdout_node = NULL;
+	struct device_node *stdout_node = of_node_get(of_stdout);
 	const __be32 *termno;
-	const char *name = NULL;
 	const struct hv_ops *ops;
 	u32 index;
 
-	/* find the boot console from /chosen/stdout */
-	if (of_chosen)
-		name = of_get_property(of_chosen, "linux,stdout-path", NULL);
-	if (name) {
-		stdout_node = of_find_node_by_path(name);
-		if (!stdout_node) {
-			pr_err("hvc_opal: Failed to locate default console!\n");
-			return;
-		}
-	} else {
+	/* If the console wasn't in /chosen, try /ibm,opal */
+	if (!stdout_node) {
 		struct device_node *opal, *np;
 
 		/* Current OPAL takeover doesn't provide the stdout
diff --git a/drivers/tty/hvc/hvc_vio.c b/drivers/tty/hvc/hvc_vio.c
index b594abfbf21e..5618b5fc7500 100644
--- a/drivers/tty/hvc/hvc_vio.c
+++ b/drivers/tty/hvc/hvc_vio.c
@@ -404,42 +404,35 @@ module_exit(hvc_vio_exit);
 
 void __init hvc_vio_init_early(void)
 {
-	struct device_node *stdout_node;
 	const __be32 *termno;
 	const char *name;
 	const struct hv_ops *ops;
 
 	/* find the boot console from /chosen/stdout */
-	if (!of_chosen)
+	if (!of_stdout)
 		return;
-	name = of_get_property(of_chosen, "linux,stdout-path", NULL);
-	if (name == NULL)
-		return;
-	stdout_node = of_find_node_by_path(name);
-	if (!stdout_node)
-		return;
-	name = of_get_property(stdout_node, "name", NULL);
+	name = of_get_property(of_stdout, "name", NULL);
 	if (!name) {
 		printk(KERN_WARNING "stdout node missing 'name' property!\n");
-		goto out;
+		return;
 	}
 
 	/* Check if it's a virtual terminal */
 	if (strncmp(name, "vty", 3) != 0)
-		goto out;
-	termno = of_get_property(stdout_node, "reg", NULL);
+		return;
+	termno = of_get_property(of_stdout, "reg", NULL);
 	if (termno == NULL)
-		goto out;
+		return;
 	hvterm_priv0.termno = of_read_number(termno, 1);
 	spin_lock_init(&hvterm_priv0.buf_lock);
 	hvterm_privs[0] = &hvterm_priv0;
 
 	/* Check the protocol */
-	if (of_device_is_compatible(stdout_node, "hvterm1")) {
+	if (of_device_is_compatible(of_stdout, "hvterm1")) {
 		hvterm_priv0.proto = HV_PROTOCOL_RAW;
 		ops = &hvterm_raw_ops;
 	}
-	else if (of_device_is_compatible(stdout_node, "hvterm-protocol")) {
+	else if (of_device_is_compatible(of_stdout, "hvterm-protocol")) {
 		hvterm_priv0.proto = HV_PROTOCOL_HVSI;
 		ops = &hvterm_hvsi_ops;
 		hvsilib_init(&hvterm_priv0.hvsi, hvc_get_chars, hvc_put_chars,
@@ -447,7 +440,7 @@ void __init hvc_vio_init_early(void)
 		/* HVSI, perform the handshake now */
 		hvsilib_establish(&hvterm_priv0.hvsi);
 	} else
-		goto out;
+		return;
 	udbg_putc = udbg_hvc_putc;
 	udbg_getc = udbg_hvc_getc;
 	udbg_getc_poll = udbg_hvc_getc_poll;
@@ -456,14 +449,12 @@ void __init hvc_vio_init_early(void)
 	 * backend for HVSI, only do udbg
 	 */
 	if (hvterm_priv0.proto == HV_PROTOCOL_HVSI)
-		goto out;
+		return;
 #endif
 	/* Check whether the user has requested a different console. */
 	if (!strstr(cmd_line, "console="))
 		add_preferred_console("hvc", 0, NULL);
 	hvc_instantiate(0, 0, ops);
-out:
-	of_node_put(stdout_node);
 }
 
 /* call this from early_init() for a working debug console on
diff --git a/include/linux/of.h b/include/linux/of.h
index 9d9734056e39..f0d256273c83 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -113,6 +113,7 @@ static inline void of_node_put(struct device_node *node) { }
 extern struct device_node *of_allnodes;
 extern struct device_node *of_chosen;
 extern struct device_node *of_aliases;
+extern struct device_node *of_stdout;
 extern raw_spinlock_t devtree_lock;
 
 static inline bool of_have_populated_dt(void)
-- 
cgit v1.2.3-59-g8ed1b


From 7b8278358cc2b453ca6e75eedb3741cdb7e97236 Mon Sep 17 00:00:00 2001
From: Aristeu Rozanski <aris@redhat.com>
Date: Wed, 18 Jun 2014 11:05:01 -0300
Subject: edac: add DDR4 and RDDR4

Haswell memory controller can make use of DDR4 and Registered DDR4

Cc: tony.luck@intel.com
Signed-off-by: Aristeu Rozanski <aris@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 drivers/edac/edac_mc_sysfs.c | 4 +++-
 include/linux/edac.h         | 5 +++++
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index 01fae8289cf0..a6cd36100663 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -108,7 +108,9 @@ static const char * const mem_types[] = {
 	[MEM_RDDR2] = "Registered-DDR2",
 	[MEM_XDR] = "XDR",
 	[MEM_DDR3] = "Unbuffered-DDR3",
-	[MEM_RDDR3] = "Registered-DDR3"
+	[MEM_RDDR3] = "Registered-DDR3",
+	[MEM_DDR4] = "Unbuffered-DDR4",
+	[MEM_RDDR4] = "Registered-DDR4"
 };
 
 static const char * const dev_types[] = {
diff --git a/include/linux/edac.h b/include/linux/edac.h
index 8e6c20af11a2..e1e68da6f35c 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -194,6 +194,9 @@ static inline char *mc_event_error_type(const unsigned int err_type)
  * @MEM_DDR3:		DDR3 RAM
  * @MEM_RDDR3:		Registered DDR3 RAM
  *			This is a variant of the DDR3 memories.
+ * @MEM_DDR4:		DDR4 RAM
+ * @MEM_RDDR4:		Registered DDR4 RAM
+ *			This is a variant of the DDR4 memories.
  */
 enum mem_type {
 	MEM_EMPTY = 0,
@@ -213,6 +216,8 @@ enum mem_type {
 	MEM_XDR,
 	MEM_DDR3,
 	MEM_RDDR3,
+	MEM_DDR4,
+	MEM_RDDR4,
 };
 
 #define MEM_FLAG_EMPTY		BIT(MEM_EMPTY)
-- 
cgit v1.2.3-59-g8ed1b


From 11c32d7b6274cb0f554943d65bd4a126c4a86dcd Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Thu, 22 May 2014 23:25:14 +0200
Subject: video: move Versatile CLCD helpers

This moves the Versatile-specific helper code and panel database
down into the drivers/video folder next to the CLCD driver
itself, preserving the config symbol but also moving the header
to platform data.

This is necessary to rid the Integrator of this final <plat/*>
inclusion dependency and get us one less user of the
plat-versatile folder.

Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Jean-Christophe Plagniol-Villard <plagnioj@jcrosoft.com>
Cc: linux-fbdev@vger.kernel.org
Cc: Russell King <linux@arm.linux.org.uk>
Acked-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm/mach-integrator/Kconfig                   |   1 -
 arch/arm/mach-integrator/integrator_cp.c           |   3 +-
 arch/arm/mach-realview/core.c                      |   2 +-
 arch/arm/mach-versatile/core.c                     |   2 +-
 arch/arm/mach-vexpress/ct-ca9x4.c                  |   3 +-
 arch/arm/plat-versatile/Kconfig                    |   3 -
 arch/arm/plat-versatile/Makefile                   |   1 -
 arch/arm/plat-versatile/clcd.c                     | 182 ---------------------
 arch/arm/plat-versatile/include/plat/clcd.h        |   9 -
 drivers/video/fbdev/Kconfig                        |   7 +
 drivers/video/fbdev/Makefile                       |   1 +
 drivers/video/fbdev/amba-clcd-versatile.c          | 182 +++++++++++++++++++++
 include/linux/platform_data/video-clcd-versatile.h |   9 +
 13 files changed, 203 insertions(+), 202 deletions(-)
 delete mode 100644 arch/arm/plat-versatile/clcd.c
 delete mode 100644 arch/arm/plat-versatile/include/plat/clcd.h
 create mode 100644 drivers/video/fbdev/amba-clcd-versatile.c
 create mode 100644 include/linux/platform_data/video-clcd-versatile.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-integrator/Kconfig b/arch/arm/mach-integrator/Kconfig
index 64f8e2564a37..c455e974bbfe 100644
--- a/arch/arm/mach-integrator/Kconfig
+++ b/arch/arm/mach-integrator/Kconfig
@@ -17,7 +17,6 @@ config ARCH_INTEGRATOR_CP
 	bool "Support Integrator/CP platform"
 	select ARCH_CINTEGRATOR
 	select ARM_TIMER_SP804
-	select PLAT_VERSATILE_CLCD
 	select SERIAL_AMBA_PL011 if TTY
 	select SERIAL_AMBA_PL011_CONSOLE if TTY
 	select SOC_BUS
diff --git a/arch/arm/mach-integrator/integrator_cp.c b/arch/arm/mach-integrator/integrator_cp.c
index a938242b0c95..0228165d2d64 100644
--- a/arch/arm/mach-integrator/integrator_cp.c
+++ b/arch/arm/mach-integrator/integrator_cp.c
@@ -18,6 +18,7 @@
 #include <linux/amba/bus.h>
 #include <linux/amba/kmi.h>
 #include <linux/amba/clcd.h>
+#include <linux/platform_data/video-clcd-versatile.h>
 #include <linux/amba/mmci.h>
 #include <linux/io.h>
 #include <linux/irqchip/versatile-fpga.h>
@@ -36,8 +37,6 @@
 #include <asm/mach/map.h>
 #include <asm/mach/time.h>
 
-#include <plat/clcd.h>
-
 #include "hardware.h"
 #include "cm.h"
 #include "common.h"
diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c
index 8c1b39a0caa0..850e506926df 100644
--- a/arch/arm/mach-realview/core.c
+++ b/arch/arm/mach-realview/core.c
@@ -25,6 +25,7 @@
 #include <linux/interrupt.h>
 #include <linux/amba/bus.h>
 #include <linux/amba/clcd.h>
+#include <linux/platform_data/video-clcd-versatile.h>
 #include <linux/io.h>
 #include <linux/smsc911x.h>
 #include <linux/ata_platform.h>
@@ -48,7 +49,6 @@
 #include <mach/irqs.h>
 #include <asm/hardware/timer-sp.h>
 
-#include <plat/clcd.h>
 #include <plat/sched_clock.h>
 
 #include "core.h"
diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c
index be83ba25f81b..08fb8c89f414 100644
--- a/arch/arm/mach-versatile/core.c
+++ b/arch/arm/mach-versatile/core.c
@@ -28,6 +28,7 @@
 #include <linux/of_platform.h>
 #include <linux/amba/bus.h>
 #include <linux/amba/clcd.h>
+#include <linux/platform_data/video-clcd-versatile.h>
 #include <linux/amba/pl061.h>
 #include <linux/amba/mmci.h>
 #include <linux/amba/pl022.h>
@@ -53,7 +54,6 @@
 #include <mach/platform.h>
 #include <asm/hardware/timer-sp.h>
 
-#include <plat/clcd.h>
 #include <plat/sched_clock.h>
 
 #include "core.h"
diff --git a/arch/arm/mach-vexpress/ct-ca9x4.c b/arch/arm/mach-vexpress/ct-ca9x4.c
index 86150d7a2e7d..27bea049380a 100644
--- a/arch/arm/mach-vexpress/ct-ca9x4.c
+++ b/arch/arm/mach-vexpress/ct-ca9x4.c
@@ -8,6 +8,7 @@
 #include <linux/platform_device.h>
 #include <linux/amba/bus.h>
 #include <linux/amba/clcd.h>
+#include <linux/platform_data/video-clcd-versatile.h>
 #include <linux/clkdev.h>
 #include <linux/vexpress.h>
 #include <linux/irqchip/arm-gic.h>
@@ -29,8 +30,6 @@
 #include <mach/motherboard.h>
 #include <mach/irqs.h>
 
-#include <plat/clcd.h>
-
 static struct map_desc ct_ca9x4_io_desc[] __initdata = {
 	{
 		.virtual        = V2T_PERIPH,
diff --git a/arch/arm/plat-versatile/Kconfig b/arch/arm/plat-versatile/Kconfig
index fce41e93b6a4..a301ca2c7d00 100644
--- a/arch/arm/plat-versatile/Kconfig
+++ b/arch/arm/plat-versatile/Kconfig
@@ -3,9 +3,6 @@ if PLAT_VERSATILE
 config PLAT_VERSATILE_CLOCK
 	bool
 
-config PLAT_VERSATILE_CLCD
-	bool
-
 config PLAT_VERSATILE_SCHED_CLOCK
 	def_bool y
 
diff --git a/arch/arm/plat-versatile/Makefile b/arch/arm/plat-versatile/Makefile
index 2e0c472958ae..03c4900ac3f4 100644
--- a/arch/arm/plat-versatile/Makefile
+++ b/arch/arm/plat-versatile/Makefile
@@ -1,6 +1,5 @@
 ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/$(src)/include
 
 obj-$(CONFIG_PLAT_VERSATILE_CLOCK) += clock.o
-obj-$(CONFIG_PLAT_VERSATILE_CLCD) += clcd.o
 obj-$(CONFIG_PLAT_VERSATILE_SCHED_CLOCK) += sched-clock.o
 obj-$(CONFIG_SMP) += headsmp.o platsmp.o
diff --git a/arch/arm/plat-versatile/clcd.c b/arch/arm/plat-versatile/clcd.c
deleted file mode 100644
index 6628cc27efc5..000000000000
--- a/arch/arm/plat-versatile/clcd.c
+++ /dev/null
@@ -1,182 +0,0 @@
-#include <linux/device.h>
-#include <linux/dma-mapping.h>
-#include <linux/amba/bus.h>
-#include <linux/amba/clcd.h>
-#include <plat/clcd.h>
-
-static struct clcd_panel vga = {
-	.mode		= {
-		.name		= "VGA",
-		.refresh	= 60,
-		.xres		= 640,
-		.yres		= 480,
-		.pixclock	= 39721,
-		.left_margin	= 40,
-		.right_margin	= 24,
-		.upper_margin	= 32,
-		.lower_margin	= 11,
-		.hsync_len	= 96,
-		.vsync_len	= 2,
-		.sync		= 0,
-		.vmode		= FB_VMODE_NONINTERLACED,
-	},
-	.width		= -1,
-	.height		= -1,
-	.tim2		= TIM2_BCD | TIM2_IPC,
-	.cntl		= CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1),
-	.caps		= CLCD_CAP_5551 | CLCD_CAP_565 | CLCD_CAP_888,
-	.bpp		= 16,
-};
-
-static struct clcd_panel xvga = {
-	.mode		= {
-		.name		= "XVGA",
-		.refresh	= 60,
-		.xres		= 1024,
-		.yres		= 768,
-		.pixclock	= 15748,
-		.left_margin	= 152,
-		.right_margin	= 48,
-		.upper_margin	= 23,
-		.lower_margin	= 3,
-		.hsync_len	= 104,
-		.vsync_len	= 4,
-		.sync		= 0,
-		.vmode		= FB_VMODE_NONINTERLACED,
-	},
-	.width		= -1,
-	.height		= -1,
-	.tim2		= TIM2_BCD | TIM2_IPC,
-	.cntl		= CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1),
-	.caps		= CLCD_CAP_5551 | CLCD_CAP_565 | CLCD_CAP_888,
-	.bpp		= 16,
-};
-
-/* Sanyo TM38QV67A02A - 3.8 inch QVGA (320x240) Color TFT */
-static struct clcd_panel sanyo_tm38qv67a02a = {
-	.mode		= {
-		.name		= "Sanyo TM38QV67A02A",
-		.refresh	= 116,
-		.xres		= 320,
-		.yres		= 240,
-		.pixclock	= 100000,
-		.left_margin	= 6,
-		.right_margin	= 6,
-		.upper_margin	= 5,
-		.lower_margin	= 5,
-		.hsync_len	= 6,
-		.vsync_len	= 6,
-		.sync		= 0,
-		.vmode		= FB_VMODE_NONINTERLACED,
-	},
-	.width		= -1,
-	.height		= -1,
-	.tim2		= TIM2_BCD,
-	.cntl		= CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1),
-	.caps		= CLCD_CAP_5551,
-	.bpp		= 16,
-};
-
-static struct clcd_panel sanyo_2_5_in = {
-	.mode		= {
-		.name		= "Sanyo QVGA Portrait",
-		.refresh	= 116,
-		.xres		= 240,
-		.yres		= 320,
-		.pixclock	= 100000,
-		.left_margin	= 20,
-		.right_margin	= 10,
-		.upper_margin	= 2,
-		.lower_margin	= 2,
-		.hsync_len	= 10,
-		.vsync_len	= 2,
-		.sync		= FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
-		.vmode		= FB_VMODE_NONINTERLACED,
-	},
-	.width		= -1,
-	.height		= -1,
-	.tim2		= TIM2_IVS | TIM2_IHS | TIM2_IPC,
-	.cntl		= CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1),
-	.caps		= CLCD_CAP_5551,
-	.bpp		= 16,
-};
-
-/* Epson L2F50113T00 - 2.2 inch 176x220 Color TFT */
-static struct clcd_panel epson_l2f50113t00 = {
-	.mode		= {
-		.name		= "Epson L2F50113T00",
-		.refresh	= 390,
-		.xres		= 176,
-		.yres		= 220,
-		.pixclock	= 62500,
-		.left_margin	= 3,
-		.right_margin	= 2,
-		.upper_margin	= 1,
-		.lower_margin	= 0,
-		.hsync_len	= 3,
-		.vsync_len	= 2,
-		.sync		= 0,
-		.vmode		= FB_VMODE_NONINTERLACED,
-	},
-	.width		= -1,
-	.height		= -1,
-	.tim2		= TIM2_BCD | TIM2_IPC,
-	.cntl		= CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1),
-	.caps		= CLCD_CAP_5551,
-	.bpp		= 16,
-};
-
-static struct clcd_panel *panels[] = {
-	&vga,
-	&xvga,
-	&sanyo_tm38qv67a02a,
-	&sanyo_2_5_in,
-	&epson_l2f50113t00,
-};
-
-struct clcd_panel *versatile_clcd_get_panel(const char *name)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(panels); i++)
-		if (strcmp(panels[i]->mode.name, name) == 0)
-			break;
-
-	if (i < ARRAY_SIZE(panels))
-		return panels[i];
-
-	pr_err("CLCD: couldn't get parameters for panel %s\n", name);
-
-	return NULL;
-}
-
-int versatile_clcd_setup_dma(struct clcd_fb *fb, unsigned long framesize)
-{
-	dma_addr_t dma;
-
-	fb->fb.screen_base = dma_alloc_writecombine(&fb->dev->dev, framesize,
-						    &dma, GFP_KERNEL);
-	if (!fb->fb.screen_base) {
-		pr_err("CLCD: unable to map framebuffer\n");
-		return -ENOMEM;
-	}
-
-	fb->fb.fix.smem_start	= dma;
-	fb->fb.fix.smem_len	= framesize;
-
-	return 0;
-}
-
-int versatile_clcd_mmap_dma(struct clcd_fb *fb, struct vm_area_struct *vma)
-{
-	return dma_mmap_writecombine(&fb->dev->dev, vma,
-				     fb->fb.screen_base,
-				     fb->fb.fix.smem_start,
-				     fb->fb.fix.smem_len);
-}
-
-void versatile_clcd_remove_dma(struct clcd_fb *fb)
-{
-	dma_free_writecombine(&fb->dev->dev, fb->fb.fix.smem_len,
-			      fb->fb.screen_base, fb->fb.fix.smem_start);
-}
diff --git a/arch/arm/plat-versatile/include/plat/clcd.h b/arch/arm/plat-versatile/include/plat/clcd.h
deleted file mode 100644
index 6bb6a1d2019b..000000000000
--- a/arch/arm/plat-versatile/include/plat/clcd.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef PLAT_CLCD_H
-#define PLAT_CLCD_H
-
-struct clcd_panel *versatile_clcd_get_panel(const char *);
-int versatile_clcd_setup_dma(struct clcd_fb *, unsigned long);
-int versatile_clcd_mmap_dma(struct clcd_fb *, struct vm_area_struct *);
-void versatile_clcd_remove_dma(struct clcd_fb *);
-
-#endif
diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig
index 59c98bfd5a8a..5edc7a054e03 100644
--- a/drivers/video/fbdev/Kconfig
+++ b/drivers/video/fbdev/Kconfig
@@ -290,6 +290,13 @@ config FB_ARMCLCD
 	  here and read <file:Documentation/kbuild/modules.txt>.  The module
 	  will be called amba-clcd.
 
+# Helper logic selected only by the ARM Versatile platform family.
+config PLAT_VERSATILE_CLCD
+	depends on FB_ARMCLCD
+	depends on (PLAT_VERSATILE || ARCH_INTEGRATOR)
+	default y
+	bool
+
 config FB_ACORN
 	bool "Acorn VIDC support"
 	depends on (FB = y) && ARM && ARCH_ACORN
diff --git a/drivers/video/fbdev/Makefile b/drivers/video/fbdev/Makefile
index 0284f2a12538..0b2090d2e52e 100644
--- a/drivers/video/fbdev/Makefile
+++ b/drivers/video/fbdev/Makefile
@@ -78,6 +78,7 @@ obj-$(CONFIG_FB_ATMEL)		  += atmel_lcdfb.o
 obj-$(CONFIG_FB_PVR2)             += pvr2fb.o
 obj-$(CONFIG_FB_VOODOO1)          += sstfb.o
 obj-$(CONFIG_FB_ARMCLCD)	  += amba-clcd.o
+obj-$(CONFIG_PLAT_VERSATILE_CLCD) += amba-clcd-versatile.o
 obj-$(CONFIG_FB_GOLDFISH)         += goldfishfb.o
 obj-$(CONFIG_FB_68328)            += 68328fb.o
 obj-$(CONFIG_FB_GBE)              += gbefb.o
diff --git a/drivers/video/fbdev/amba-clcd-versatile.c b/drivers/video/fbdev/amba-clcd-versatile.c
new file mode 100644
index 000000000000..7a8afcd4573e
--- /dev/null
+++ b/drivers/video/fbdev/amba-clcd-versatile.c
@@ -0,0 +1,182 @@
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/platform_data/video-clcd-versatile.h>
+
+static struct clcd_panel vga = {
+	.mode		= {
+		.name		= "VGA",
+		.refresh	= 60,
+		.xres		= 640,
+		.yres		= 480,
+		.pixclock	= 39721,
+		.left_margin	= 40,
+		.right_margin	= 24,
+		.upper_margin	= 32,
+		.lower_margin	= 11,
+		.hsync_len	= 96,
+		.vsync_len	= 2,
+		.sync		= 0,
+		.vmode		= FB_VMODE_NONINTERLACED,
+	},
+	.width		= -1,
+	.height		= -1,
+	.tim2		= TIM2_BCD | TIM2_IPC,
+	.cntl		= CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1),
+	.caps		= CLCD_CAP_5551 | CLCD_CAP_565 | CLCD_CAP_888,
+	.bpp		= 16,
+};
+
+static struct clcd_panel xvga = {
+	.mode		= {
+		.name		= "XVGA",
+		.refresh	= 60,
+		.xres		= 1024,
+		.yres		= 768,
+		.pixclock	= 15748,
+		.left_margin	= 152,
+		.right_margin	= 48,
+		.upper_margin	= 23,
+		.lower_margin	= 3,
+		.hsync_len	= 104,
+		.vsync_len	= 4,
+		.sync		= 0,
+		.vmode		= FB_VMODE_NONINTERLACED,
+	},
+	.width		= -1,
+	.height		= -1,
+	.tim2		= TIM2_BCD | TIM2_IPC,
+	.cntl		= CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1),
+	.caps		= CLCD_CAP_5551 | CLCD_CAP_565 | CLCD_CAP_888,
+	.bpp		= 16,
+};
+
+/* Sanyo TM38QV67A02A - 3.8 inch QVGA (320x240) Color TFT */
+static struct clcd_panel sanyo_tm38qv67a02a = {
+	.mode		= {
+		.name		= "Sanyo TM38QV67A02A",
+		.refresh	= 116,
+		.xres		= 320,
+		.yres		= 240,
+		.pixclock	= 100000,
+		.left_margin	= 6,
+		.right_margin	= 6,
+		.upper_margin	= 5,
+		.lower_margin	= 5,
+		.hsync_len	= 6,
+		.vsync_len	= 6,
+		.sync		= 0,
+		.vmode		= FB_VMODE_NONINTERLACED,
+	},
+	.width		= -1,
+	.height		= -1,
+	.tim2		= TIM2_BCD,
+	.cntl		= CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1),
+	.caps		= CLCD_CAP_5551,
+	.bpp		= 16,
+};
+
+static struct clcd_panel sanyo_2_5_in = {
+	.mode		= {
+		.name		= "Sanyo QVGA Portrait",
+		.refresh	= 116,
+		.xres		= 240,
+		.yres		= 320,
+		.pixclock	= 100000,
+		.left_margin	= 20,
+		.right_margin	= 10,
+		.upper_margin	= 2,
+		.lower_margin	= 2,
+		.hsync_len	= 10,
+		.vsync_len	= 2,
+		.sync		= FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
+		.vmode		= FB_VMODE_NONINTERLACED,
+	},
+	.width		= -1,
+	.height		= -1,
+	.tim2		= TIM2_IVS | TIM2_IHS | TIM2_IPC,
+	.cntl		= CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1),
+	.caps		= CLCD_CAP_5551,
+	.bpp		= 16,
+};
+
+/* Epson L2F50113T00 - 2.2 inch 176x220 Color TFT */
+static struct clcd_panel epson_l2f50113t00 = {
+	.mode		= {
+		.name		= "Epson L2F50113T00",
+		.refresh	= 390,
+		.xres		= 176,
+		.yres		= 220,
+		.pixclock	= 62500,
+		.left_margin	= 3,
+		.right_margin	= 2,
+		.upper_margin	= 1,
+		.lower_margin	= 0,
+		.hsync_len	= 3,
+		.vsync_len	= 2,
+		.sync		= 0,
+		.vmode		= FB_VMODE_NONINTERLACED,
+	},
+	.width		= -1,
+	.height		= -1,
+	.tim2		= TIM2_BCD | TIM2_IPC,
+	.cntl		= CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1),
+	.caps		= CLCD_CAP_5551,
+	.bpp		= 16,
+};
+
+static struct clcd_panel *panels[] = {
+	&vga,
+	&xvga,
+	&sanyo_tm38qv67a02a,
+	&sanyo_2_5_in,
+	&epson_l2f50113t00,
+};
+
+struct clcd_panel *versatile_clcd_get_panel(const char *name)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(panels); i++)
+		if (strcmp(panels[i]->mode.name, name) == 0)
+			break;
+
+	if (i < ARRAY_SIZE(panels))
+		return panels[i];
+
+	pr_err("CLCD: couldn't get parameters for panel %s\n", name);
+
+	return NULL;
+}
+
+int versatile_clcd_setup_dma(struct clcd_fb *fb, unsigned long framesize)
+{
+	dma_addr_t dma;
+
+	fb->fb.screen_base = dma_alloc_writecombine(&fb->dev->dev, framesize,
+						    &dma, GFP_KERNEL);
+	if (!fb->fb.screen_base) {
+		pr_err("CLCD: unable to map framebuffer\n");
+		return -ENOMEM;
+	}
+
+	fb->fb.fix.smem_start	= dma;
+	fb->fb.fix.smem_len	= framesize;
+
+	return 0;
+}
+
+int versatile_clcd_mmap_dma(struct clcd_fb *fb, struct vm_area_struct *vma)
+{
+	return dma_mmap_writecombine(&fb->dev->dev, vma,
+				     fb->fb.screen_base,
+				     fb->fb.fix.smem_start,
+				     fb->fb.fix.smem_len);
+}
+
+void versatile_clcd_remove_dma(struct clcd_fb *fb)
+{
+	dma_free_writecombine(&fb->dev->dev, fb->fb.fix.smem_len,
+			      fb->fb.screen_base, fb->fb.fix.smem_start);
+}
diff --git a/include/linux/platform_data/video-clcd-versatile.h b/include/linux/platform_data/video-clcd-versatile.h
new file mode 100644
index 000000000000..6bb6a1d2019b
--- /dev/null
+++ b/include/linux/platform_data/video-clcd-versatile.h
@@ -0,0 +1,9 @@
+#ifndef PLAT_CLCD_H
+#define PLAT_CLCD_H
+
+struct clcd_panel *versatile_clcd_get_panel(const char *);
+int versatile_clcd_setup_dma(struct clcd_fb *, unsigned long);
+int versatile_clcd_mmap_dma(struct clcd_fb *, struct vm_area_struct *);
+void versatile_clcd_remove_dma(struct clcd_fb *);
+
+#endif
-- 
cgit v1.2.3-59-g8ed1b


From 5d98e61d337c181f199a6cb864569cc4e116ef4c Mon Sep 17 00:00:00 2001
From: Lan Tianyu <tianyu.lan@intel.com>
Date: Tue, 20 May 2014 20:59:23 +0800
Subject: I2C/ACPI: Add i2c ACPI operation region support

ACPI 5.0 spec(5.5.2.4.5) defines GenericSerialBus(i2c, spi, uart) operation region.
It allows ACPI aml code able to access such kind of devices to implement
some ACPI standard method.

ACPI Spec defines some access attribute to associate with i2c protocol.
AttribQuick 	       	       		Read/Write Quick Protocol
AttribSendReceive			Send/Receive Byte Protocol
AttribByte 			 	Read/Write Byte Protocol
AttribWord				Read/Write Word Protocol
AttribBlock				Read/Write Block Protocol
AttribBytes				Read/Write N-Bytes Protocol
AttribProcessCall			Process Call Protocol
AttribBlockProcessCall			Write Block-Read Block Process Call Protocol
AttribRawBytes 				Raw Read/Write N-BytesProtocol
AttribRawProcessBytes			Raw Process Call Protocol

On the Asus T100TA, Bios use GenericSerialBus operation region to access
i2c device to get battery info.

Sample code From Asus T100TA

    Scope (_SB.I2C1)
    {
        Name (UMPC, ResourceTemplate ()
        {
            I2cSerialBus (0x0066, ControllerInitiated, 0x00061A80,
                AddressingMode7Bit, "\\_SB.I2C1",
                0x00, ResourceConsumer, ,
                )
        })

	...

        OperationRegion (DVUM, GenericSerialBus, Zero, 0x0100)
        Field (DVUM, BufferAcc, NoLock, Preserve)
        {
            Connection (UMPC),
            Offset (0x81),
            AccessAs (BufferAcc, AttribBytes (0x3E)),
            FGC0,   8
        }
	...
     }

     Device (BATC)
     {
         Name (_HID, EisaId ("PNP0C0A"))  // _HID: Hardware ID
         Name (_UID, One)  // _UID: Unique ID
	 ...

            Method (_BST, 0, NotSerialized)  // _BST: Battery Status
            {
                If (LEqual (AVBL, One))
                {
                    Store (FGC0, BFFG)
                    If (LNotEqual (STAT, One))
                    {
                        ShiftRight (CHST, 0x04, Local0)
                        And (Local0, 0x03, Local0)
                        If (LOr (LEqual (Local0, One), LEqual (Local0, 0x02)))
                        {
                            Store (0x02, Local1)
                        }
	...

    }

The i2c operation region is defined under I2C1 scope. _BST method under
battery device BATC read battery status from the field "FCG0". The request
would be sent to i2c operation region handler.

This patch is to add i2c ACPI operation region support. Due to there are
only "Byte" and "Bytes" protocol access on the Asus T100TA, other protocols
have not been tested.

About RawBytes and RawProcessBytes protocol, they needs specific drivers to interpret
reference data from AML code according ACPI 5.0 SPEC(5.5.2.4.5.3.9 and 5.5.2.4.5.3.10).
So far, not found such case and will add when find real case.

Signed-off-by: Lan Tianyu <tianyu.lan@intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/Makefile   |   5 +-
 drivers/i2c/i2c-acpi.c | 273 +++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/i2c/i2c-core.c |   2 +
 include/linux/acpi.h   |  11 ++
 include/linux/i2c.h    |  10 ++
 5 files changed, 300 insertions(+), 1 deletion(-)
 create mode 100644 drivers/i2c/i2c-acpi.c

(limited to 'include/linux')

diff --git a/drivers/i2c/Makefile b/drivers/i2c/Makefile
index 1722f50f2473..80db3073aa84 100644
--- a/drivers/i2c/Makefile
+++ b/drivers/i2c/Makefile
@@ -2,8 +2,11 @@
 # Makefile for the i2c core.
 #
 
+i2ccore-y := i2c-core.o
+i2ccore-$(CONFIG_ACPI)		+= i2c-acpi.o
+
 obj-$(CONFIG_I2C_BOARDINFO)	+= i2c-boardinfo.o
-obj-$(CONFIG_I2C)		+= i2c-core.o
+obj-$(CONFIG_I2C)		+= i2ccore.o
 obj-$(CONFIG_I2C_SMBUS)		+= i2c-smbus.o
 obj-$(CONFIG_I2C_CHARDEV)	+= i2c-dev.o
 obj-$(CONFIG_I2C_MUX)		+= i2c-mux.o
diff --git a/drivers/i2c/i2c-acpi.c b/drivers/i2c/i2c-acpi.c
new file mode 100644
index 000000000000..f7f4c89c09b3
--- /dev/null
+++ b/drivers/i2c/i2c-acpi.c
@@ -0,0 +1,273 @@
+/*
+ * I2C ACPI code
+ *
+ * Copyright (C) 2014 Intel Corp
+ *
+ * Author: Lan Tianyu <tianyu.lan@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+#define pr_fmt(fmt) "I2C/ACPI : " fmt
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/acpi.h>
+
+struct acpi_i2c_handler_data {
+	struct acpi_connection_info info;
+	struct i2c_adapter *adapter;
+};
+
+struct gsb_buffer {
+	u8	status;
+	u8	len;
+	union {
+		u16	wdata;
+		u8	bdata;
+		u8	data[0];
+	};
+} __packed;
+
+static int acpi_gsb_i2c_read_bytes(struct i2c_client *client,
+		u8 cmd, u8 *data, u8 data_len)
+{
+
+	struct i2c_msg msgs[2];
+	int ret;
+	u8 *buffer;
+
+	buffer = kzalloc(data_len, GFP_KERNEL);
+	if (!buffer)
+		return AE_NO_MEMORY;
+
+	msgs[0].addr = client->addr;
+	msgs[0].flags = client->flags;
+	msgs[0].len = 1;
+	msgs[0].buf = &cmd;
+
+	msgs[1].addr = client->addr;
+	msgs[1].flags = client->flags | I2C_M_RD;
+	msgs[1].len = data_len;
+	msgs[1].buf = buffer;
+
+	ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
+	if (ret < 0)
+		dev_err(&client->adapter->dev, "i2c read failed\n");
+	else
+		memcpy(data, buffer, data_len);
+
+	kfree(buffer);
+	return ret;
+}
+
+static int acpi_gsb_i2c_write_bytes(struct i2c_client *client,
+		u8 cmd, u8 *data, u8 data_len)
+{
+
+	struct i2c_msg msgs[1];
+	u8 *buffer;
+	int ret = AE_OK;
+
+	buffer = kzalloc(data_len + 1, GFP_KERNEL);
+	if (!buffer)
+		return AE_NO_MEMORY;
+
+	buffer[0] = cmd;
+	memcpy(buffer + 1, data, data_len);
+
+	msgs[0].addr = client->addr;
+	msgs[0].flags = client->flags;
+	msgs[0].len = data_len + 1;
+	msgs[0].buf = buffer;
+
+	ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
+	if (ret < 0)
+		dev_err(&client->adapter->dev, "i2c write failed\n");
+
+	kfree(buffer);
+	return ret;
+}
+
+static acpi_status
+acpi_i2c_space_handler(u32 function, acpi_physical_address command,
+			u32 bits, u64 *value64,
+			void *handler_context, void *region_context)
+{
+	struct gsb_buffer *gsb = (struct gsb_buffer *)value64;
+	struct acpi_i2c_handler_data *data = handler_context;
+	struct acpi_connection_info *info = &data->info;
+	struct acpi_resource_i2c_serialbus *sb;
+	struct i2c_adapter *adapter = data->adapter;
+	struct i2c_client client;
+	struct acpi_resource *ares;
+	u32 accessor_type = function >> 16;
+	u8 action = function & ACPI_IO_MASK;
+	acpi_status ret = AE_OK;
+	int status;
+
+	ret = acpi_buffer_to_resource(info->connection, info->length, &ares);
+	if (ACPI_FAILURE(ret))
+		return ret;
+
+	if (!value64 || ares->type != ACPI_RESOURCE_TYPE_SERIAL_BUS) {
+		ret = AE_BAD_PARAMETER;
+		goto err;
+	}
+
+	sb = &ares->data.i2c_serial_bus;
+	if (sb->type != ACPI_RESOURCE_SERIAL_TYPE_I2C) {
+		ret = AE_BAD_PARAMETER;
+		goto err;
+	}
+
+	memset(&client, 0, sizeof(client));
+	client.adapter = adapter;
+	client.addr = sb->slave_address;
+	client.flags = 0;
+
+	if (sb->access_mode == ACPI_I2C_10BIT_MODE)
+		client.flags |= I2C_CLIENT_TEN;
+
+	switch (accessor_type) {
+	case ACPI_GSB_ACCESS_ATTRIB_SEND_RCV:
+		if (action == ACPI_READ) {
+			status = i2c_smbus_read_byte(&client);
+			if (status >= 0) {
+				gsb->bdata = status;
+				status = 0;
+			}
+		} else {
+			status = i2c_smbus_write_byte(&client, gsb->bdata);
+		}
+		break;
+
+	case ACPI_GSB_ACCESS_ATTRIB_BYTE:
+		if (action == ACPI_READ) {
+			status = i2c_smbus_read_byte_data(&client, command);
+			if (status >= 0) {
+				gsb->bdata = status;
+				status = 0;
+			}
+		} else {
+			status = i2c_smbus_write_byte_data(&client, command,
+					gsb->bdata);
+		}
+		break;
+
+	case ACPI_GSB_ACCESS_ATTRIB_WORD:
+		if (action == ACPI_READ) {
+			status = i2c_smbus_read_word_data(&client, command);
+			if (status >= 0) {
+				gsb->wdata = status;
+				status = 0;
+			}
+		} else {
+			status = i2c_smbus_write_word_data(&client, command,
+					gsb->wdata);
+		}
+		break;
+
+	case ACPI_GSB_ACCESS_ATTRIB_BLOCK:
+		if (action == ACPI_READ) {
+			status = i2c_smbus_read_block_data(&client, command,
+					gsb->data);
+			if (status >= 0) {
+				gsb->len = status;
+				status = 0;
+			}
+		} else {
+			status = i2c_smbus_write_block_data(&client, command,
+					gsb->len, gsb->data);
+		}
+		break;
+
+	case ACPI_GSB_ACCESS_ATTRIB_MULTIBYTE:
+		if (action == ACPI_READ) {
+			status = acpi_gsb_i2c_read_bytes(&client, command,
+					gsb->data, info->access_length);
+			if (status > 0)
+				status = 0;
+		} else {
+			status = acpi_gsb_i2c_write_bytes(&client, command,
+					gsb->data, info->access_length);
+		}
+		break;
+
+	default:
+		pr_info("protocol(0x%02x) is not supported.\n", accessor_type);
+		ret = AE_BAD_PARAMETER;
+		goto err;
+	}
+
+	gsb->status = status;
+
+ err:
+	ACPI_FREE(ares);
+	return ret;
+}
+
+
+int acpi_i2c_install_space_handler(struct i2c_adapter *adapter)
+{
+	acpi_handle handle = ACPI_HANDLE(adapter->dev.parent);
+	struct acpi_i2c_handler_data *data;
+	acpi_status status;
+
+	if (!handle)
+		return -ENODEV;
+
+	data = kzalloc(sizeof(struct acpi_i2c_handler_data),
+			    GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->adapter = adapter;
+	status = acpi_bus_attach_private_data(handle, (void *)data);
+	if (ACPI_FAILURE(status)) {
+		kfree(data);
+		return -ENOMEM;
+	}
+
+	status = acpi_install_address_space_handler(handle,
+				ACPI_ADR_SPACE_GSBUS,
+				&acpi_i2c_space_handler,
+				NULL,
+				data);
+	if (ACPI_FAILURE(status)) {
+		dev_err(&adapter->dev, "Error installing i2c space handler\n");
+		acpi_bus_detach_private_data(handle);
+		kfree(data);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+void acpi_i2c_remove_space_handler(struct i2c_adapter *adapter)
+{
+	acpi_handle handle = ACPI_HANDLE(adapter->dev.parent);
+	struct acpi_i2c_handler_data *data;
+	acpi_status status;
+
+	if (!handle)
+		return;
+
+	acpi_remove_address_space_handler(handle,
+				ACPI_ADR_SPACE_GSBUS,
+				&acpi_i2c_space_handler);
+
+	status = acpi_bus_get_private_data(handle, (void **)&data);
+	if (ACPI_SUCCESS(status))
+		kfree(data);
+
+	acpi_bus_detach_private_data(handle);
+}
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 7c7f4b856bad..e25cb84cb297 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -1293,6 +1293,7 @@ exit_recovery:
 	/* create pre-declared device nodes */
 	of_i2c_register_devices(adap);
 	acpi_i2c_register_devices(adap);
+	acpi_i2c_install_space_handler(adap);
 
 	if (adap->nr < __i2c_first_dynamic_bus_num)
 		i2c_scan_static_board_info(adap);
@@ -1466,6 +1467,7 @@ void i2c_del_adapter(struct i2c_adapter *adap)
 		return;
 	}
 
+	acpi_i2c_remove_space_handler(adap);
 	/* Tell drivers about this removal */
 	mutex_lock(&core_lock);
 	bus_for_each_drv(&i2c_bus_type, NULL, adap,
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 358c01b971db..40718e91e171 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -364,6 +364,17 @@ extern bool osc_sb_apei_support_acked;
 #define OSC_PCI_EXPRESS_CAPABILITY_CONTROL	0x00000010
 #define OSC_PCI_CONTROL_MASKS			0x0000001f
 
+#define ACPI_GSB_ACCESS_ATTRIB_QUICK		0x00000002
+#define ACPI_GSB_ACCESS_ATTRIB_SEND_RCV         0x00000004
+#define ACPI_GSB_ACCESS_ATTRIB_BYTE		0x00000006
+#define ACPI_GSB_ACCESS_ATTRIB_WORD		0x00000008
+#define ACPI_GSB_ACCESS_ATTRIB_BLOCK		0x0000000A
+#define ACPI_GSB_ACCESS_ATTRIB_MULTIBYTE	0x0000000B
+#define ACPI_GSB_ACCESS_ATTRIB_WORD_CALL	0x0000000C
+#define ACPI_GSB_ACCESS_ATTRIB_BLOCK_CALL	0x0000000D
+#define ACPI_GSB_ACCESS_ATTRIB_RAW_BYTES	0x0000000E
+#define ACPI_GSB_ACCESS_ATTRIB_RAW_PROCESS	0x0000000F
+
 extern acpi_status acpi_pci_osc_control_set(acpi_handle handle,
 					     u32 *mask, u32 req);
 
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index b556e0ab946f..f7a939a2cb56 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -577,4 +577,14 @@ static inline struct i2c_adapter *of_find_i2c_adapter_by_node(struct device_node
 }
 #endif /* CONFIG_OF */
 
+#ifdef CONFIG_ACPI
+int acpi_i2c_install_space_handler(struct i2c_adapter *adapter);
+void acpi_i2c_remove_space_handler(struct i2c_adapter *adapter);
+#else
+static inline void acpi_i2c_remove_space_handler(struct i2c_adapter *adapter)
+{ }
+static inline int acpi_i2c_install_space_handler(struct i2c_adapter *adapter)
+{ return 0; }
+#endif
+
 #endif /* _LINUX_I2C_H */
-- 
cgit v1.2.3-59-g8ed1b


From da3c6647ee08711c7edc28d7fea4ad69fc5ffcca Mon Sep 17 00:00:00 2001
From: Lan Tianyu <tianyu.lan@intel.com>
Date: Tue, 20 May 2014 20:59:24 +0800
Subject: I2C/ACPI: Clean up I2C ACPI code and Add CONFIG_I2C_ACPI config

Clean up ACPI related code in the i2c core and add CONFIG_I2C_ACPI
to enable I2C ACPI code.

Current there is a race between removing I2C ACPI operation region
and ACPI AML code accessing. So make i2c core built-in if CONFIG_I2C_ACPI
is set.

Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Lan Tianyu <tianyu.lan@intel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/Kconfig    | 18 +++++++++-
 drivers/i2c/Makefile   |  2 +-
 drivers/i2c/i2c-acpi.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++
 drivers/i2c/i2c-core.c | 95 --------------------------------------------------
 include/linux/i2c.h    |  4 ++-
 5 files changed, 110 insertions(+), 98 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/i2c/Kconfig b/drivers/i2c/Kconfig
index 7b7ea320a258..3e3b680dc007 100644
--- a/drivers/i2c/Kconfig
+++ b/drivers/i2c/Kconfig
@@ -2,7 +2,9 @@
 # I2C subsystem configuration
 #
 
-menuconfig I2C
+menu "I2C support"
+
+config I2C
 	tristate "I2C support"
 	select RT_MUTEXES
 	---help---
@@ -21,6 +23,18 @@ menuconfig I2C
 	  This I2C support can also be built as a module.  If so, the module
 	  will be called i2c-core.
 
+config I2C_ACPI
+	bool "I2C ACPI support"
+	select I2C
+	depends on ACPI
+	default y
+	help
+	  Say Y here if you want to enable ACPI I2C support. This includes support
+	  for automatic enumeration of I2C slave devices and support for ACPI I2C
+	  Operation Regions. Operation Regions allow firmware (BIOS) code to
+	  access I2C slave devices, such as smart batteries through an I2C host
+	  controller driver.
+
 if I2C
 
 config I2C_BOARDINFO
@@ -124,3 +138,5 @@ config I2C_DEBUG_BUS
 	  on.
 
 endif # I2C
+
+endmenu
diff --git a/drivers/i2c/Makefile b/drivers/i2c/Makefile
index 80db3073aa84..a1f590cbb435 100644
--- a/drivers/i2c/Makefile
+++ b/drivers/i2c/Makefile
@@ -3,7 +3,7 @@
 #
 
 i2ccore-y := i2c-core.o
-i2ccore-$(CONFIG_ACPI)		+= i2c-acpi.o
+i2ccore-$(CONFIG_I2C_ACPI)	+= i2c-acpi.o
 
 obj-$(CONFIG_I2C_BOARDINFO)	+= i2c-boardinfo.o
 obj-$(CONFIG_I2C)		+= i2ccore.o
diff --git a/drivers/i2c/i2c-acpi.c b/drivers/i2c/i2c-acpi.c
index f7f4c89c09b3..e8b61967334b 100644
--- a/drivers/i2c/i2c-acpi.c
+++ b/drivers/i2c/i2c-acpi.c
@@ -37,6 +37,95 @@ struct gsb_buffer {
 	};
 } __packed;
 
+static int acpi_i2c_add_resource(struct acpi_resource *ares, void *data)
+{
+	struct i2c_board_info *info = data;
+
+	if (ares->type == ACPI_RESOURCE_TYPE_SERIAL_BUS) {
+		struct acpi_resource_i2c_serialbus *sb;
+
+		sb = &ares->data.i2c_serial_bus;
+		if (sb->type == ACPI_RESOURCE_SERIAL_TYPE_I2C) {
+			info->addr = sb->slave_address;
+			if (sb->access_mode == ACPI_I2C_10BIT_MODE)
+				info->flags |= I2C_CLIENT_TEN;
+		}
+	} else if (info->irq < 0) {
+		struct resource r;
+
+		if (acpi_dev_resource_interrupt(ares, 0, &r))
+			info->irq = r.start;
+	}
+
+	/* Tell the ACPI core to skip this resource */
+	return 1;
+}
+
+static acpi_status acpi_i2c_add_device(acpi_handle handle, u32 level,
+				       void *data, void **return_value)
+{
+	struct i2c_adapter *adapter = data;
+	struct list_head resource_list;
+	struct i2c_board_info info;
+	struct acpi_device *adev;
+	int ret;
+
+	if (acpi_bus_get_device(handle, &adev))
+		return AE_OK;
+	if (acpi_bus_get_status(adev) || !adev->status.present)
+		return AE_OK;
+
+	memset(&info, 0, sizeof(info));
+	info.acpi_node.companion = adev;
+	info.irq = -1;
+
+	INIT_LIST_HEAD(&resource_list);
+	ret = acpi_dev_get_resources(adev, &resource_list,
+				     acpi_i2c_add_resource, &info);
+	acpi_dev_free_resource_list(&resource_list);
+
+	if (ret < 0 || !info.addr)
+		return AE_OK;
+
+	adev->power.flags.ignore_parent = true;
+	strlcpy(info.type, dev_name(&adev->dev), sizeof(info.type));
+	if (!i2c_new_device(adapter, &info)) {
+		adev->power.flags.ignore_parent = false;
+		dev_err(&adapter->dev,
+			"failed to add I2C device %s from ACPI\n",
+			dev_name(&adev->dev));
+	}
+
+	return AE_OK;
+}
+
+/**
+ * acpi_i2c_register_devices - enumerate I2C slave devices behind adapter
+ * @adap: pointer to adapter
+ *
+ * Enumerate all I2C slave devices behind this adapter by walking the ACPI
+ * namespace. When a device is found it will be added to the Linux device
+ * model and bound to the corresponding ACPI handle.
+ */
+void acpi_i2c_register_devices(struct i2c_adapter *adap)
+{
+	acpi_handle handle;
+	acpi_status status;
+
+	if (!adap->dev.parent)
+		return;
+
+	handle = ACPI_HANDLE(adap->dev.parent);
+	if (!handle)
+		return;
+
+	status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1,
+				     acpi_i2c_add_device, NULL,
+				     adap, NULL);
+	if (ACPI_FAILURE(status))
+		dev_warn(&adap->dev, "failed to enumerate I2C slaves\n");
+}
+
 static int acpi_gsb_i2c_read_bytes(struct i2c_client *client,
 		u8 cmd, u8 *data, u8 data_len)
 {
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index e25cb84cb297..4ccff114b147 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -1092,101 +1092,6 @@ EXPORT_SYMBOL(of_find_i2c_adapter_by_node);
 static void of_i2c_register_devices(struct i2c_adapter *adap) { }
 #endif /* CONFIG_OF */
 
-/* ACPI support code */
-
-#if IS_ENABLED(CONFIG_ACPI)
-static int acpi_i2c_add_resource(struct acpi_resource *ares, void *data)
-{
-	struct i2c_board_info *info = data;
-
-	if (ares->type == ACPI_RESOURCE_TYPE_SERIAL_BUS) {
-		struct acpi_resource_i2c_serialbus *sb;
-
-		sb = &ares->data.i2c_serial_bus;
-		if (sb->type == ACPI_RESOURCE_SERIAL_TYPE_I2C) {
-			info->addr = sb->slave_address;
-			if (sb->access_mode == ACPI_I2C_10BIT_MODE)
-				info->flags |= I2C_CLIENT_TEN;
-		}
-	} else if (info->irq < 0) {
-		struct resource r;
-
-		if (acpi_dev_resource_interrupt(ares, 0, &r))
-			info->irq = r.start;
-	}
-
-	/* Tell the ACPI core to skip this resource */
-	return 1;
-}
-
-static acpi_status acpi_i2c_add_device(acpi_handle handle, u32 level,
-				       void *data, void **return_value)
-{
-	struct i2c_adapter *adapter = data;
-	struct list_head resource_list;
-	struct i2c_board_info info;
-	struct acpi_device *adev;
-	int ret;
-
-	if (acpi_bus_get_device(handle, &adev))
-		return AE_OK;
-	if (acpi_bus_get_status(adev) || !adev->status.present)
-		return AE_OK;
-
-	memset(&info, 0, sizeof(info));
-	info.acpi_node.companion = adev;
-	info.irq = -1;
-
-	INIT_LIST_HEAD(&resource_list);
-	ret = acpi_dev_get_resources(adev, &resource_list,
-				     acpi_i2c_add_resource, &info);
-	acpi_dev_free_resource_list(&resource_list);
-
-	if (ret < 0 || !info.addr)
-		return AE_OK;
-
-	adev->power.flags.ignore_parent = true;
-	strlcpy(info.type, dev_name(&adev->dev), sizeof(info.type));
-	if (!i2c_new_device(adapter, &info)) {
-		adev->power.flags.ignore_parent = false;
-		dev_err(&adapter->dev,
-			"failed to add I2C device %s from ACPI\n",
-			dev_name(&adev->dev));
-	}
-
-	return AE_OK;
-}
-
-/**
- * acpi_i2c_register_devices - enumerate I2C slave devices behind adapter
- * @adap: pointer to adapter
- *
- * Enumerate all I2C slave devices behind this adapter by walking the ACPI
- * namespace. When a device is found it will be added to the Linux device
- * model and bound to the corresponding ACPI handle.
- */
-static void acpi_i2c_register_devices(struct i2c_adapter *adap)
-{
-	acpi_handle handle;
-	acpi_status status;
-
-	if (!adap->dev.parent)
-		return;
-
-	handle = ACPI_HANDLE(adap->dev.parent);
-	if (!handle)
-		return;
-
-	status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1,
-				     acpi_i2c_add_device, NULL,
-				     adap, NULL);
-	if (ACPI_FAILURE(status))
-		dev_warn(&adap->dev, "failed to enumerate I2C slaves\n");
-}
-#else
-static inline void acpi_i2c_register_devices(struct i2c_adapter *adap) {}
-#endif /* CONFIG_ACPI */
-
 static int i2c_do_add_adapter(struct i2c_driver *driver,
 			      struct i2c_adapter *adap)
 {
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index f7a939a2cb56..ea507665896c 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -577,10 +577,12 @@ static inline struct i2c_adapter *of_find_i2c_adapter_by_node(struct device_node
 }
 #endif /* CONFIG_OF */
 
-#ifdef CONFIG_ACPI
+#ifdef CONFIG_I2C_ACPI
 int acpi_i2c_install_space_handler(struct i2c_adapter *adapter);
 void acpi_i2c_remove_space_handler(struct i2c_adapter *adapter);
+void acpi_i2c_register_devices(struct i2c_adapter *adap);
 #else
+static inline void acpi_i2c_register_devices(struct i2c_adapter *adap) { }
 static inline void acpi_i2c_remove_space_handler(struct i2c_adapter *adapter)
 { }
 static inline int acpi_i2c_install_space_handler(struct i2c_adapter *adapter)
-- 
cgit v1.2.3-59-g8ed1b


From 476eab8251641ea2ae4666ca8a1436ebc2b8e9c3 Mon Sep 17 00:00:00 2001
From: Octavian Purdila <octavian.purdila@intel.com>
Date: Wed, 25 Jun 2014 17:09:52 +0300
Subject: net: remove inet6_reqsk_alloc

Since pktops is only used for IPv6 only and opts is used for IPv4
only, we can move these fields into a union and this allows us to drop
the inet6_reqsk_alloc function as after this change it becomes
equivalent with inet_reqsk_alloc.

This patch also fixes a kmemcheck issue in the IPv6 stack: the flags
field was not annotated after a request_sock was allocated.

Signed-off-by: Octavian Purdila <octavian.purdila@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h    | 10 ----------
 include/net/inet_sock.h |  6 ++++--
 net/dccp/ipv6.c         |  2 +-
 net/ipv6/syncookies.c   |  2 +-
 net/ipv6/tcp_ipv6.c     |  2 +-
 5 files changed, 7 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 2faef339d8f2..c811300b0b0c 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -256,16 +256,6 @@ static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk)
 	return inet_sk(__sk)->pinet6;
 }
 
-static inline struct request_sock *inet6_reqsk_alloc(struct request_sock_ops *ops)
-{
-	struct request_sock *req = reqsk_alloc(ops);
-
-	if (req)
-		inet_rsk(req)->pktopts = NULL;
-
-	return req;
-}
-
 static inline struct raw6_sock *raw6_sk(const struct sock *sk)
 {
 	return (struct raw6_sock *)sk;
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index b1edf17bec01..a829b77523cf 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -88,8 +88,10 @@ struct inet_request_sock {
 				acked	   : 1,
 				no_srccheck: 1;
 	kmemcheck_bitfield_end(flags);
-	struct ip_options_rcu	*opt;
-	struct sk_buff		*pktopts;
+	union {
+		struct ip_options_rcu	*opt;
+		struct sk_buff		*pktopts;
+	};
 	u32                     ir_mark;
 };
 
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 4db3c2a1679c..04cb17d4b0ce 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -386,7 +386,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
 		goto drop;
 
-	req = inet6_reqsk_alloc(&dccp6_request_sock_ops);
+	req = inet_reqsk_alloc(&dccp6_request_sock_ops);
 	if (req == NULL)
 		goto drop;
 
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index a822b880689b..83cea1d39466 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -187,7 +187,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 		goto out;
 
 	ret = NULL;
-	req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
+	req = inet_reqsk_alloc(&tcp6_request_sock_ops);
 	if (!req)
 		goto out;
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index a962455471ba..5e2d7e655c0f 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1010,7 +1010,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 		goto drop;
 	}
 
-	req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
+	req = inet_reqsk_alloc(&tcp6_request_sock_ops);
 	if (req == NULL)
 		goto drop;
 
-- 
cgit v1.2.3-59-g8ed1b


From 16bea70aa7302b6f3bf3502d5a0efb4ea2ce4712 Mon Sep 17 00:00:00 2001
From: Octavian Purdila <octavian.purdila@intel.com>
Date: Wed, 25 Jun 2014 17:09:53 +0300
Subject: tcp: add init_req method to tcp_request_sock_ops

Move the specific IPv4/IPv6 intializations to a new method in
tcp_request_sock_ops in preparation for unifying tcp_v4_conn_request
and tcp_v6_conn_request.

Signed-off-by: Octavian Purdila <octavian.purdila@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h |  3 ---
 include/net/tcp.h   |  2 ++
 net/ipv4/tcp_ipv4.c | 29 ++++++++++++++++------------
 net/ipv6/tcp_ipv6.c | 55 +++++++++++++++++++++++++++++++----------------------
 4 files changed, 51 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index a0513210798f..fa5258f322e7 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -111,10 +111,7 @@ struct tcp_request_sock_ops;
 
 struct tcp_request_sock {
 	struct inet_request_sock 	req;
-#ifdef CONFIG_TCP_MD5SIG
-	/* Only used by TCP MD5 Signature so far. */
 	const struct tcp_request_sock_ops *af_specific;
-#endif
 	struct sock			*listener; /* needed for TFO */
 	u32				rcv_isn;
 	u32				snt_isn;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 39e47c4e4f19..7ad8ce296c3b 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1613,6 +1613,8 @@ struct tcp_request_sock_ops {
 						  const struct request_sock *req,
 						  const struct sk_buff *skb);
 #endif
+	void (*init_req)(struct request_sock *req, struct sock *sk,
+			 struct sk_buff *skb);
 };
 
 int tcpv4_offload_init(void);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 145f6402c560..f86a86b30d20 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1237,6 +1237,17 @@ static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
 
 #endif
 
+static void tcp_v4_init_req(struct request_sock *req, struct sock *sk,
+			    struct sk_buff *skb)
+{
+	struct inet_request_sock *ireq = inet_rsk(req);
+
+	ireq->ir_loc_addr = ip_hdr(skb)->daddr;
+	ireq->ir_rmt_addr = ip_hdr(skb)->saddr;
+	ireq->no_srccheck = inet_sk(sk)->transparent;
+	ireq->opt = tcp_v4_save_options(skb);
+}
+
 struct request_sock_ops tcp_request_sock_ops __read_mostly = {
 	.family		=	PF_INET,
 	.obj_size	=	sizeof(struct tcp_request_sock),
@@ -1247,26 +1258,26 @@ struct request_sock_ops tcp_request_sock_ops __read_mostly = {
 	.syn_ack_timeout = 	tcp_syn_ack_timeout,
 };
 
-#ifdef CONFIG_TCP_MD5SIG
 static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
+#ifdef CONFIG_TCP_MD5SIG
 	.md5_lookup	=	tcp_v4_reqsk_md5_lookup,
 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
-};
 #endif
+	.init_req	=	tcp_v4_init_req,
+};
 
 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_options_received tmp_opt;
 	struct request_sock *req;
-	struct inet_request_sock *ireq;
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct dst_entry *dst = NULL;
 	__be32 saddr = ip_hdr(skb)->saddr;
-	__be32 daddr = ip_hdr(skb)->daddr;
 	__u32 isn = TCP_SKB_CB(skb)->when;
 	bool want_cookie = false, fastopen;
 	struct flowi4 fl4;
 	struct tcp_fastopen_cookie foc = { .len = -1 };
+	const struct tcp_request_sock_ops *af_ops;
 	int err;
 
 	/* Never answer to SYNs send to broadcast or multicast */
@@ -1298,9 +1309,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	if (!req)
 		goto drop;
 
-#ifdef CONFIG_TCP_MD5SIG
-	tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
-#endif
+	af_ops = tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
 
 	tcp_clear_options(&tmp_opt);
 	tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
@@ -1313,11 +1322,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
 	tcp_openreq_init(req, &tmp_opt, skb, sk);
 
-	ireq = inet_rsk(req);
-	ireq->ir_loc_addr = daddr;
-	ireq->ir_rmt_addr = saddr;
-	ireq->no_srccheck = inet_sk(sk)->transparent;
-	ireq->opt = tcp_v4_save_options(skb);
+	af_ops->init_req(req, sk, skb);
 
 	if (security_inet_conn_request(sk, skb, req))
 		goto drop_and_free;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5e2d7e655c0f..87a360c3eba9 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -720,6 +720,31 @@ static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
 }
 #endif
 
+static void tcp_v6_init_req(struct request_sock *req, struct sock *sk,
+			    struct sk_buff *skb)
+{
+	struct inet_request_sock *ireq = inet_rsk(req);
+	struct ipv6_pinfo *np = inet6_sk(sk);
+
+	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
+	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
+
+	ireq->ir_iif = sk->sk_bound_dev_if;
+
+	/* So that link locals have meaning */
+	if (!sk->sk_bound_dev_if &&
+	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
+		ireq->ir_iif = inet6_iif(skb);
+
+	if (!TCP_SKB_CB(skb)->when &&
+	    (ipv6_opt_accepted(sk, skb) || np->rxopt.bits.rxinfo ||
+	     np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
+	     np->rxopt.bits.rxohlim || np->repflow)) {
+		atomic_inc(&skb->users);
+		ireq->pktopts = skb;
+	}
+}
+
 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
 	.family		=	AF_INET6,
 	.obj_size	=	sizeof(struct tcp6_request_sock),
@@ -730,12 +755,13 @@ struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
 	.syn_ack_timeout =	tcp_syn_ack_timeout,
 };
 
-#ifdef CONFIG_TCP_MD5SIG
 static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
+#ifdef CONFIG_TCP_MD5SIG
 	.md5_lookup	=	tcp_v6_reqsk_md5_lookup,
 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
-};
 #endif
+	.init_req	=	tcp_v6_init_req,
+};
 
 static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
 				 u32 tsval, u32 tsecr, int oif,
@@ -983,13 +1009,13 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	struct tcp_options_received tmp_opt;
 	struct request_sock *req;
 	struct inet_request_sock *ireq;
-	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	__u32 isn = TCP_SKB_CB(skb)->when;
 	struct dst_entry *dst = NULL;
 	struct tcp_fastopen_cookie foc = { .len = -1 };
 	bool want_cookie = false, fastopen;
 	struct flowi6 fl6;
+	const struct tcp_request_sock_ops *af_ops;
 	int err;
 
 	if (skb->protocol == htons(ETH_P_IP))
@@ -1014,9 +1040,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	if (req == NULL)
 		goto drop;
 
-#ifdef CONFIG_TCP_MD5SIG
-	tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops;
-#endif
+	af_ops = tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops;
 
 	tcp_clear_options(&tmp_opt);
 	tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
@@ -1030,27 +1054,12 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	tcp_openreq_init(req, &tmp_opt, skb, sk);
 
 	ireq = inet_rsk(req);
-	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
-	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
+	af_ops->init_req(req, sk, skb);
+
 	if (!want_cookie || tmp_opt.tstamp_ok)
 		TCP_ECN_create_request(req, skb, sock_net(sk));
 
-	ireq->ir_iif = sk->sk_bound_dev_if;
-
-	/* So that link locals have meaning */
-	if (!sk->sk_bound_dev_if &&
-	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
-		ireq->ir_iif = inet6_iif(skb);
-
 	if (!isn) {
-		if (ipv6_opt_accepted(sk, skb) ||
-		    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
-		    np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim ||
-		    np->repflow) {
-			atomic_inc(&skb->users);
-			ireq->pktopts = skb;
-		}
-
 		if (want_cookie) {
 			isn = cookie_v6_init_sequence(sk, skb, &req->mss);
 			req->cookie_ts = tmp_opt.tstamp_ok;
-- 
cgit v1.2.3-59-g8ed1b


From d630dc4c9adb41e5bd1e06df2dbeaf622469ddd5 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sat, 28 Jun 2014 08:10:12 -0400
Subject: percpu-refcount: one bit is enough for REF_STATUS

percpu-refcount currently reserves two lowest bits of its percpu
pointer to indicate its state; however, only one bit is used for
PCPU_REF_DEAD.

Simplify it by removing PCPU_STATUS_BITS/MASK and testing
PCPU_REF_DEAD directly.  This also allows the compiler to choose a
more efficient instruction depending on the architecture.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Kent Overstreet <kmo@daterainc.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
---
 include/linux/percpu-refcount.h | 4 +---
 lib/percpu-refcount.c           | 2 +-
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index 5d8920e23073..bfdeb0d48e21 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -88,12 +88,10 @@ static inline void percpu_ref_kill(struct percpu_ref *ref)
 	return percpu_ref_kill_and_confirm(ref, NULL);
 }
 
-#define PCPU_STATUS_BITS	2
-#define PCPU_STATUS_MASK	((1 << PCPU_STATUS_BITS) - 1)
 #define PCPU_REF_PTR		0
 #define PCPU_REF_DEAD		1
 
-#define REF_STATUS(count)	(((unsigned long) count) & PCPU_STATUS_MASK)
+#define REF_STATUS(count)	(((unsigned long) count) & PCPU_REF_DEAD)
 
 /**
  * percpu_ref_get - increment a percpu refcount
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index 963b7034a51b..17bce2bccc14 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -96,7 +96,7 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu)
 
 	/* Mask out PCPU_REF_DEAD */
 	pcpu_count = (unsigned __percpu *)
-		(((unsigned long) pcpu_count) & ~PCPU_STATUS_MASK);
+		(((unsigned long) pcpu_count) & ~PCPU_REF_DEAD);
 
 	for_each_possible_cpu(cpu)
 		count += *per_cpu_ptr(pcpu_count, cpu);
-- 
cgit v1.2.3-59-g8ed1b


From eae7975ddf031b3084f4a5f7d88f698aefad96fb Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sat, 28 Jun 2014 08:10:13 -0400
Subject: percpu-refcount: add helpers for ->percpu_count accesses

* All four percpu_ref_*() operations implemented in the header file
  perform the same operation to determine whether the percpu_ref is
  alive and extract the percpu pointer.  Factor out the common logic
  into __pcpu_ref_alive().  This doesn't change the generated code.

* There are a couple places in percpu-refcount.c which masks out
  PCPU_REF_DEAD to obtain the percpu pointer.  Factor it out into
  pcpu_count_ptr().

* The above changes make the WARN_ON_ONCE() conditional at the top of
  percpu_ref_kill_and_confirm() the only user of REF_STATUS().  Test
  PCPU_REF_DEAD directly and remove REF_STATUS().

This patch doesn't introduce any functional change.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Kent Overstreet <kmo@daterainc.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
---
 include/linux/percpu-refcount.h | 35 +++++++++++++++++++++--------------
 lib/percpu-refcount.c           | 17 +++++++++--------
 2 files changed, 30 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index bfdeb0d48e21..b62a4ee6d6ad 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -88,10 +88,25 @@ static inline void percpu_ref_kill(struct percpu_ref *ref)
 	return percpu_ref_kill_and_confirm(ref, NULL);
 }
 
-#define PCPU_REF_PTR		0
 #define PCPU_REF_DEAD		1
 
-#define REF_STATUS(count)	(((unsigned long) count) & PCPU_REF_DEAD)
+/*
+ * Internal helper.  Don't use outside percpu-refcount proper.  The
+ * function doesn't return the pointer and let the caller test it for NULL
+ * because doing so forces the compiler to generate two conditional
+ * branches as it can't assume that @ref->pcpu_count is not NULL.
+ */
+static inline bool __pcpu_ref_alive(struct percpu_ref *ref,
+				    unsigned __percpu **pcpu_countp)
+{
+	unsigned long pcpu_ptr = (unsigned long)ACCESS_ONCE(ref->pcpu_count);
+
+	if (unlikely(pcpu_ptr & PCPU_REF_DEAD))
+		return false;
+
+	*pcpu_countp = (unsigned __percpu *)pcpu_ptr;
+	return true;
+}
 
 /**
  * percpu_ref_get - increment a percpu refcount
@@ -105,9 +120,7 @@ static inline void percpu_ref_get(struct percpu_ref *ref)
 
 	rcu_read_lock_sched();
 
-	pcpu_count = ACCESS_ONCE(ref->pcpu_count);
-
-	if (likely(REF_STATUS(pcpu_count) == PCPU_REF_PTR))
+	if (__pcpu_ref_alive(ref, &pcpu_count))
 		this_cpu_inc(*pcpu_count);
 	else
 		atomic_inc(&ref->count);
@@ -131,9 +144,7 @@ static inline bool percpu_ref_tryget(struct percpu_ref *ref)
 
 	rcu_read_lock_sched();
 
-	pcpu_count = ACCESS_ONCE(ref->pcpu_count);
-
-	if (likely(REF_STATUS(pcpu_count) == PCPU_REF_PTR)) {
+	if (__pcpu_ref_alive(ref, &pcpu_count)) {
 		this_cpu_inc(*pcpu_count);
 		ret = true;
 	} else {
@@ -166,9 +177,7 @@ static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
 
 	rcu_read_lock_sched();
 
-	pcpu_count = ACCESS_ONCE(ref->pcpu_count);
-
-	if (likely(REF_STATUS(pcpu_count) == PCPU_REF_PTR)) {
+	if (__pcpu_ref_alive(ref, &pcpu_count)) {
 		this_cpu_inc(*pcpu_count);
 		ret = true;
 	}
@@ -191,9 +200,7 @@ static inline void percpu_ref_put(struct percpu_ref *ref)
 
 	rcu_read_lock_sched();
 
-	pcpu_count = ACCESS_ONCE(ref->pcpu_count);
-
-	if (likely(REF_STATUS(pcpu_count) == PCPU_REF_PTR))
+	if (__pcpu_ref_alive(ref, &pcpu_count))
 		this_cpu_dec(*pcpu_count);
 	else if (unlikely(atomic_dec_and_test(&ref->count)))
 		ref->release(ref);
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index 17bce2bccc14..087f1a04f9bc 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -31,6 +31,11 @@
 
 #define PCPU_COUNT_BIAS		(1U << 31)
 
+static unsigned __percpu *pcpu_count_ptr(struct percpu_ref *ref)
+{
+	return (unsigned __percpu *)((unsigned long)ref->pcpu_count & ~PCPU_REF_DEAD);
+}
+
 /**
  * percpu_ref_init - initialize a percpu refcount
  * @ref: percpu_ref to initialize
@@ -74,7 +79,7 @@ EXPORT_SYMBOL_GPL(percpu_ref_init);
  */
 void percpu_ref_cancel_init(struct percpu_ref *ref)
 {
-	unsigned __percpu *pcpu_count = ref->pcpu_count;
+	unsigned __percpu *pcpu_count = pcpu_count_ptr(ref);
 	int cpu;
 
 	WARN_ON_ONCE(atomic_read(&ref->count) != 1 + PCPU_COUNT_BIAS);
@@ -82,7 +87,7 @@ void percpu_ref_cancel_init(struct percpu_ref *ref)
 	if (pcpu_count) {
 		for_each_possible_cpu(cpu)
 			WARN_ON_ONCE(*per_cpu_ptr(pcpu_count, cpu));
-		free_percpu(ref->pcpu_count);
+		free_percpu(pcpu_count);
 	}
 }
 EXPORT_SYMBOL_GPL(percpu_ref_cancel_init);
@@ -90,14 +95,10 @@ EXPORT_SYMBOL_GPL(percpu_ref_cancel_init);
 static void percpu_ref_kill_rcu(struct rcu_head *rcu)
 {
 	struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu);
-	unsigned __percpu *pcpu_count = ref->pcpu_count;
+	unsigned __percpu *pcpu_count = pcpu_count_ptr(ref);
 	unsigned count = 0;
 	int cpu;
 
-	/* Mask out PCPU_REF_DEAD */
-	pcpu_count = (unsigned __percpu *)
-		(((unsigned long) pcpu_count) & ~PCPU_REF_DEAD);
-
 	for_each_possible_cpu(cpu)
 		count += *per_cpu_ptr(pcpu_count, cpu);
 
@@ -152,7 +153,7 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu)
 void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 				 percpu_ref_func_t *confirm_kill)
 {
-	WARN_ONCE(REF_STATUS(ref->pcpu_count) == PCPU_REF_DEAD,
+	WARN_ONCE((unsigned long)ref->pcpu_count & PCPU_REF_DEAD,
 		  "percpu_ref_kill() called more than once!\n");
 
 	ref->pcpu_count = (unsigned __percpu *)
-- 
cgit v1.2.3-59-g8ed1b


From 7d742075120deb831c7b94c268ca20d409e91d60 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sat, 28 Jun 2014 08:10:13 -0400
Subject: percpu-refcount: use unsigned long for pcpu_count pointer

percpu_ref->pcpu_count is a percpu pointer with a status flag in its
lowest bit.  As such, it always goes through arithmetic operations
which is very cumbersome to do on a pointer.  It has to be first
casted to unsigned long and then back.

Let's just make the field unsigned long so that we can skip the first
casts.  While at it, rename it to pcpu_counter_ptr to clarify that
it's a pointer value.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Kent Overstreet <kmo@daterainc.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
---
 include/linux/percpu-refcount.h |  4 ++--
 lib/percpu-refcount.c           | 11 +++++------
 2 files changed, 7 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index b62a4ee6d6ad..6f8cd4c0546c 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -61,7 +61,7 @@ struct percpu_ref {
 	 * hack because we need to keep the pointer around for
 	 * percpu_ref_kill_rcu())
 	 */
-	unsigned __percpu	*pcpu_count;
+	unsigned long		pcpu_count_ptr;
 	percpu_ref_func_t	*release;
 	percpu_ref_func_t	*confirm_kill;
 	struct rcu_head		rcu;
@@ -99,7 +99,7 @@ static inline void percpu_ref_kill(struct percpu_ref *ref)
 static inline bool __pcpu_ref_alive(struct percpu_ref *ref,
 				    unsigned __percpu **pcpu_countp)
 {
-	unsigned long pcpu_ptr = (unsigned long)ACCESS_ONCE(ref->pcpu_count);
+	unsigned long pcpu_ptr = ACCESS_ONCE(ref->pcpu_count_ptr);
 
 	if (unlikely(pcpu_ptr & PCPU_REF_DEAD))
 		return false;
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index 087f1a04f9bc..94e5b624de64 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -33,7 +33,7 @@
 
 static unsigned __percpu *pcpu_count_ptr(struct percpu_ref *ref)
 {
-	return (unsigned __percpu *)((unsigned long)ref->pcpu_count & ~PCPU_REF_DEAD);
+	return (unsigned __percpu *)(ref->pcpu_count_ptr & ~PCPU_REF_DEAD);
 }
 
 /**
@@ -51,8 +51,8 @@ int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release)
 {
 	atomic_set(&ref->count, 1 + PCPU_COUNT_BIAS);
 
-	ref->pcpu_count = alloc_percpu(unsigned);
-	if (!ref->pcpu_count)
+	ref->pcpu_count_ptr = (unsigned long)alloc_percpu(unsigned);
+	if (!ref->pcpu_count_ptr)
 		return -ENOMEM;
 
 	ref->release = release;
@@ -153,11 +153,10 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu)
 void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 				 percpu_ref_func_t *confirm_kill)
 {
-	WARN_ONCE((unsigned long)ref->pcpu_count & PCPU_REF_DEAD,
+	WARN_ONCE(ref->pcpu_count_ptr & PCPU_REF_DEAD,
 		  "percpu_ref_kill() called more than once!\n");
 
-	ref->pcpu_count = (unsigned __percpu *)
-		(((unsigned long) ref->pcpu_count)|PCPU_REF_DEAD);
+	ref->pcpu_count_ptr |= PCPU_REF_DEAD;
 	ref->confirm_kill = confirm_kill;
 
 	call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu);
-- 
cgit v1.2.3-59-g8ed1b


From 9a1049da9bd2cd83fe11d46433e603c193aa9c71 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sat, 28 Jun 2014 08:10:14 -0400
Subject: percpu-refcount: require percpu_ref to be exited explicitly

Currently, a percpu_ref undoes percpu_ref_init() automatically by
freeing the allocated percpu area when the percpu_ref is killed.
While seemingly convenient, this has the following niggles.

* It's impossible to re-init a released reference counter without
  going through re-allocation.

* In the similar vein, it's impossible to initialize a percpu_ref
  count with static percpu variables.

* We need and have an explicit destructor anyway for failure paths -
  percpu_ref_cancel_init().

This patch removes the automatic percpu counter freeing in
percpu_ref_kill_rcu() and repurposes percpu_ref_cancel_init() into a
generic destructor now named percpu_ref_exit().  percpu_ref_destroy()
is considered but it gets confusing with percpu_ref_kill() while
"exit" clearly indicates that it's the counterpart of
percpu_ref_init().

All percpu_ref_cancel_init() users are updated to invoke
percpu_ref_exit() instead and explicit percpu_ref_exit() calls are
added to the destruction path of all percpu_ref users.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Benjamin LaHaise <bcrl@kvack.org>
Cc: Kent Overstreet <kmo@daterainc.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: Nicholas A. Bellinger <nab@linux-iscsi.org>
Cc: Li Zefan <lizefan@huawei.com>
---
 drivers/target/target_core_tpg.c |  4 +++-
 fs/aio.c                         |  6 ++++--
 include/linux/percpu-refcount.h  |  6 ++----
 kernel/cgroup.c                  |  8 +++++---
 lib/percpu-refcount.c            | 33 ++++++++++-----------------------
 5 files changed, 24 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c
index c036595b17cf..fddfae61222f 100644
--- a/drivers/target/target_core_tpg.c
+++ b/drivers/target/target_core_tpg.c
@@ -825,7 +825,7 @@ int core_tpg_add_lun(
 
 	ret = core_dev_export(dev, tpg, lun);
 	if (ret < 0) {
-		percpu_ref_cancel_init(&lun->lun_ref);
+		percpu_ref_exit(&lun->lun_ref);
 		return ret;
 	}
 
@@ -880,5 +880,7 @@ int core_tpg_post_dellun(
 	lun->lun_status = TRANSPORT_LUN_STATUS_FREE;
 	spin_unlock(&tpg->tpg_lun_lock);
 
+	percpu_ref_exit(&lun->lun_ref);
+
 	return 0;
 }
diff --git a/fs/aio.c b/fs/aio.c
index 5e0d7f9cb693..ea1bc2e8f4f3 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -506,6 +506,8 @@ static void free_ioctx(struct work_struct *work)
 
 	aio_free_ring(ctx);
 	free_percpu(ctx->cpu);
+	percpu_ref_exit(&ctx->reqs);
+	percpu_ref_exit(&ctx->users);
 	kmem_cache_free(kioctx_cachep, ctx);
 }
 
@@ -715,8 +717,8 @@ err_ctx:
 err:
 	mutex_unlock(&ctx->ring_lock);
 	free_percpu(ctx->cpu);
-	percpu_ref_cancel_init(&ctx->reqs);
-	percpu_ref_cancel_init(&ctx->users);
+	percpu_ref_exit(&ctx->reqs);
+	percpu_ref_exit(&ctx->users);
 	kmem_cache_free(kioctx_cachep, ctx);
 	pr_debug("error allocating ioctx %d\n", err);
 	return ERR_PTR(err);
diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index 6f8cd4c0546c..0ddd2839ca84 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -57,9 +57,7 @@ struct percpu_ref {
 	atomic_t		count;
 	/*
 	 * The low bit of the pointer indicates whether the ref is in percpu
-	 * mode; if set, then get/put will manipulate the atomic_t (this is a
-	 * hack because we need to keep the pointer around for
-	 * percpu_ref_kill_rcu())
+	 * mode; if set, then get/put will manipulate the atomic_t.
 	 */
 	unsigned long		pcpu_count_ptr;
 	percpu_ref_func_t	*release;
@@ -69,7 +67,7 @@ struct percpu_ref {
 
 int __must_check percpu_ref_init(struct percpu_ref *ref,
 				 percpu_ref_func_t *release);
-void percpu_ref_cancel_init(struct percpu_ref *ref);
+void percpu_ref_exit(struct percpu_ref *ref);
 void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 				 percpu_ref_func_t *confirm_kill);
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 7868fc3c0bc5..c06aa5e257a8 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1638,7 +1638,7 @@ destroy_root:
 exit_root_id:
 	cgroup_exit_root_id(root);
 cancel_ref:
-	percpu_ref_cancel_init(&root_cgrp->self.refcnt);
+	percpu_ref_exit(&root_cgrp->self.refcnt);
 out:
 	free_cgrp_cset_links(&tmp_links);
 	return ret;
@@ -4133,6 +4133,8 @@ static void css_free_work_fn(struct work_struct *work)
 		container_of(work, struct cgroup_subsys_state, destroy_work);
 	struct cgroup *cgrp = css->cgroup;
 
+	percpu_ref_exit(&css->refcnt);
+
 	if (css->ss) {
 		/* css free path */
 		if (css->parent)
@@ -4330,7 +4332,7 @@ err_list_del:
 err_free_id:
 	cgroup_idr_remove(&ss->css_idr, css->id);
 err_free_percpu_ref:
-	percpu_ref_cancel_init(&css->refcnt);
+	percpu_ref_exit(&css->refcnt);
 err_free_css:
 	call_rcu(&css->rcu_head, css_free_rcu_fn);
 	return err;
@@ -4441,7 +4443,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
 out_free_id:
 	cgroup_idr_remove(&root->cgroup_idr, cgrp->id);
 out_cancel_ref:
-	percpu_ref_cancel_init(&cgrp->self.refcnt);
+	percpu_ref_exit(&cgrp->self.refcnt);
 out_free_cgrp:
 	kfree(cgrp);
 out_unlock:
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index 94e5b624de64..ac4299120087 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -61,36 +61,25 @@ int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release)
 EXPORT_SYMBOL_GPL(percpu_ref_init);
 
 /**
- * percpu_ref_cancel_init - cancel percpu_ref_init()
- * @ref: percpu_ref to cancel init for
+ * percpu_ref_exit - undo percpu_ref_init()
+ * @ref: percpu_ref to exit
  *
- * Once a percpu_ref is initialized, its destruction is initiated by
- * percpu_ref_kill() and completes asynchronously, which can be painful to
- * do when destroying a half-constructed object in init failure path.
- *
- * This function destroys @ref without invoking @ref->release and the
- * memory area containing it can be freed immediately on return.  To
- * prevent accidental misuse, it's required that @ref has finished
- * percpu_ref_init(), whether successful or not, but never used.
- *
- * The weird name and usage restriction are to prevent people from using
- * this function by mistake for normal shutdown instead of
- * percpu_ref_kill().
+ * This function exits @ref.  The caller is responsible for ensuring that
+ * @ref is no longer in active use.  The usual places to invoke this
+ * function from are the @ref->release() callback or in init failure path
+ * where percpu_ref_init() succeeded but other parts of the initialization
+ * of the embedding object failed.
  */
-void percpu_ref_cancel_init(struct percpu_ref *ref)
+void percpu_ref_exit(struct percpu_ref *ref)
 {
 	unsigned __percpu *pcpu_count = pcpu_count_ptr(ref);
-	int cpu;
-
-	WARN_ON_ONCE(atomic_read(&ref->count) != 1 + PCPU_COUNT_BIAS);
 
 	if (pcpu_count) {
-		for_each_possible_cpu(cpu)
-			WARN_ON_ONCE(*per_cpu_ptr(pcpu_count, cpu));
 		free_percpu(pcpu_count);
+		ref->pcpu_count_ptr = PCPU_REF_DEAD;
 	}
 }
-EXPORT_SYMBOL_GPL(percpu_ref_cancel_init);
+EXPORT_SYMBOL_GPL(percpu_ref_exit);
 
 static void percpu_ref_kill_rcu(struct rcu_head *rcu)
 {
@@ -102,8 +91,6 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu)
 	for_each_possible_cpu(cpu)
 		count += *per_cpu_ptr(pcpu_count, cpu);
 
-	free_percpu(pcpu_count);
-
 	pr_debug("global %i pcpu %i", atomic_read(&ref->count), (int) count);
 
 	/*
-- 
cgit v1.2.3-59-g8ed1b


From 2d7227828e1475c7b272e55bd70c4cec8eea219a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sat, 28 Jun 2014 08:10:14 -0400
Subject: percpu-refcount: implement percpu_ref_reinit() and
 percpu_ref_is_zero()

Now that explicit invocation of percpu_ref_exit() is necessary to free
the percpu counter, we can implement percpu_ref_reinit() which
reinitializes a released percpu_ref.  This can be used implement
scalable gating switch which can be drained and then re-opened without
worrying about memory allocation failures.

percpu_ref_is_zero() is added to be used in a sanity check in
percpu_ref_exit().  As this function will be useful for other purposes
too, make it a public interface.

v2: Use smp_read_barrier_depends() instead of smp_load_acquire().  We
    only need data dep barrier and smp_load_acquire() is stronger and
    heavier on some archs.  Spotted by Lai Jiangshan.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Kent Overstreet <kmo@daterainc.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 include/linux/percpu-refcount.h | 19 +++++++++++++++++++
 lib/percpu-refcount.c           | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index 0ddd2839ca84..3dfbf237cd8f 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -67,6 +67,7 @@ struct percpu_ref {
 
 int __must_check percpu_ref_init(struct percpu_ref *ref,
 				 percpu_ref_func_t *release);
+void percpu_ref_reinit(struct percpu_ref *ref);
 void percpu_ref_exit(struct percpu_ref *ref);
 void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 				 percpu_ref_func_t *confirm_kill);
@@ -99,6 +100,9 @@ static inline bool __pcpu_ref_alive(struct percpu_ref *ref,
 {
 	unsigned long pcpu_ptr = ACCESS_ONCE(ref->pcpu_count_ptr);
 
+	/* paired with smp_store_release() in percpu_ref_reinit() */
+	smp_read_barrier_depends();
+
 	if (unlikely(pcpu_ptr & PCPU_REF_DEAD))
 		return false;
 
@@ -206,4 +210,19 @@ static inline void percpu_ref_put(struct percpu_ref *ref)
 	rcu_read_unlock_sched();
 }
 
+/**
+ * percpu_ref_is_zero - test whether a percpu refcount reached zero
+ * @ref: percpu_ref to test
+ *
+ * Returns %true if @ref reached zero.
+ */
+static inline bool percpu_ref_is_zero(struct percpu_ref *ref)
+{
+	unsigned __percpu *pcpu_count;
+
+	if (__pcpu_ref_alive(ref, &pcpu_count))
+		return false;
+	return !atomic_read(&ref->count);
+}
+
 #endif
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index ac4299120087..fe5a3342e960 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -60,6 +60,41 @@ int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release)
 }
 EXPORT_SYMBOL_GPL(percpu_ref_init);
 
+/**
+ * percpu_ref_reinit - re-initialize a percpu refcount
+ * @ref: perpcu_ref to re-initialize
+ *
+ * Re-initialize @ref so that it's in the same state as when it finished
+ * percpu_ref_init().  @ref must have been initialized successfully, killed
+ * and reached 0 but not exited.
+ *
+ * Note that percpu_ref_tryget[_live]() are safe to perform on @ref while
+ * this function is in progress.
+ */
+void percpu_ref_reinit(struct percpu_ref *ref)
+{
+	unsigned __percpu *pcpu_count = pcpu_count_ptr(ref);
+	int cpu;
+
+	BUG_ON(!pcpu_count);
+	WARN_ON(!percpu_ref_is_zero(ref));
+
+	atomic_set(&ref->count, 1 + PCPU_COUNT_BIAS);
+
+	/*
+	 * Restore per-cpu operation.  smp_store_release() is paired with
+	 * smp_read_barrier_depends() in __pcpu_ref_alive() and guarantees
+	 * that the zeroing is visible to all percpu accesses which can see
+	 * the following PCPU_REF_DEAD clearing.
+	 */
+	for_each_possible_cpu(cpu)
+		*per_cpu_ptr(pcpu_count, cpu) = 0;
+
+	smp_store_release(&ref->pcpu_count_ptr,
+			  ref->pcpu_count_ptr & ~PCPU_REF_DEAD);
+}
+EXPORT_SYMBOL_GPL(percpu_ref_reinit);
+
 /**
  * percpu_ref_exit - undo percpu_ref_init()
  * @ref: percpu_ref to exit
-- 
cgit v1.2.3-59-g8ed1b


From cf2cb0b27116883c23761e974acba5f3bd719d21 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Wed, 7 May 2014 12:42:28 -0400
Subject: ftrace: Use macros for numbers in ftrace rec shift bits

As new flags will be added to the ftrace dynamic record, and since
the flags field is also a counter, converting the numbers used to
do the shifting and masking into a set of macros where we only need
to deal with the max bit count of the counter and the number of bits
for the flags will prevent mistakes in the future.

Dealing with only two numbers is much easier than updating all the
macros that deal with shifting and masking.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 404a686a3644..e4e7df422021 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -322,8 +322,11 @@ enum {
 	FTRACE_FL_REGS_EN	= (1UL << 31)
 };
 
-#define FTRACE_FL_MASK		(0x7UL << 29)
-#define FTRACE_REF_MAX		((1UL << 29) - 1)
+#define FTRACE_REF_MAX_SHIFT	29
+#define FTRACE_FL_BITS		3
+#define FTRACE_FL_MASKED_BITS	((1UL << FTRACE_FL_BITS) - 1)
+#define FTRACE_FL_MASK		(FTRACE_FL_MASKED_BITS << FTRACE_REF_MAX_SHIFT)
+#define FTRACE_REF_MAX		((1UL << FTRACE_REF_MAX_SHIFT) - 1)
 
 struct dyn_ftrace {
 	unsigned long		ip; /* address of mcount call-site */
-- 
cgit v1.2.3-59-g8ed1b


From 0376bde11be5b87c9fd7d6813ac5fd7e1798b1bf Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Wed, 7 May 2014 13:46:45 -0400
Subject: ftrace: Add ftrace_rec_counter() macro to simplify the code

The ftrace dynamic record has a flags element that also has a counter.
Instead of hard coding "rec->flags & ~FTRACE_FL_MASK" all over the
place. Use a macro instead.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h |  2 ++
 kernel/trace/ftrace.c  | 12 ++++++------
 2 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index e4e7df422021..e5baa6b2c93f 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -328,6 +328,8 @@ enum {
 #define FTRACE_FL_MASK		(FTRACE_FL_MASKED_BITS << FTRACE_REF_MAX_SHIFT)
 #define FTRACE_REF_MAX		((1UL << FTRACE_REF_MAX_SHIFT) - 1)
 
+#define ftrace_rec_count(rec)	((rec)->flags & ~FTRACE_FL_MASK)
+
 struct dyn_ftrace {
 	unsigned long		ip; /* address of mcount call-site */
 	unsigned long		flags;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index b867c647e5bc..a58d840305c3 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1592,7 +1592,7 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
 
 		if (inc) {
 			rec->flags++;
-			if (FTRACE_WARN_ON((rec->flags & ~FTRACE_FL_MASK) == FTRACE_REF_MAX))
+			if (FTRACE_WARN_ON(ftrace_rec_count(rec) == FTRACE_REF_MAX))
 				return;
 			/*
 			 * If any ops wants regs saved for this function
@@ -1601,7 +1601,7 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
 			if (ops->flags & FTRACE_OPS_FL_SAVE_REGS)
 				rec->flags |= FTRACE_FL_REGS;
 		} else {
-			if (FTRACE_WARN_ON((rec->flags & ~FTRACE_FL_MASK) == 0))
+			if (FTRACE_WARN_ON(ftrace_rec_count(rec) == 0))
 				return;
 			rec->flags--;
 			/*
@@ -1610,7 +1610,7 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
 			 * still any ops for this record that wants regs.
 			 * If not, we can stop recording them.
 			 */
-			if ((rec->flags & ~FTRACE_FL_MASK) > 0 &&
+			if (ftrace_rec_count(rec) > 0 &&
 			    rec->flags & FTRACE_FL_REGS &&
 			    ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
 				if (!test_rec_ops_needs_regs(rec))
@@ -1700,7 +1700,7 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update)
 	 * If we are disabling calls, then disable all records that
 	 * are enabled.
 	 */
-	if (enable && (rec->flags & ~FTRACE_FL_MASK))
+	if (enable && ftrace_rec_count(rec))
 		flag = FTRACE_FL_ENABLED;
 
 	/*
@@ -1746,7 +1746,7 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update)
 
 	if (update) {
 		/* If there's no more users, clear all flags */
-		if (!(rec->flags & ~FTRACE_FL_MASK))
+		if (!ftrace_rec_count(rec))
 			rec->flags = 0;
 		else
 			/* Just disable the record (keep REGS state) */
@@ -2685,7 +2685,7 @@ static int t_show(struct seq_file *m, void *v)
 	seq_printf(m, "%ps", (void *)rec->ip);
 	if (iter->flags & FTRACE_ITER_ENABLED)
 		seq_printf(m, " (%ld)%s",
-			   rec->flags & ~FTRACE_FL_MASK,
+			   ftrace_rec_count(rec),
 			   rec->flags & FTRACE_FL_REGS ? " R" : "");
 	seq_printf(m, "\n");
 
-- 
cgit v1.2.3-59-g8ed1b


From a1a6cc1d2ea9e3adf81faab87b834bc903856207 Mon Sep 17 00:00:00 2001
From: Kukjin Kim <kgene.kim@samsung.com>
Date: Tue, 1 Jul 2014 06:32:23 +0900
Subject: ata: pata_samsung_cf: removes s5pc100 related ata codes

This patch removes support for s5pc100 ata because of no more support
S5PC100 SoC in mainline.

Signed-off-by: Kukjin Kim <kgene.kim@samsung.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/pata_samsung_cf.c                | 13 -------------
 include/linux/platform_data/ata-samsung_cf.h |  1 -
 2 files changed, 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/pata_samsung_cf.c b/drivers/ata/pata_samsung_cf.c
index fb528831fb92..2578fc16960a 100644
--- a/drivers/ata/pata_samsung_cf.c
+++ b/drivers/ata/pata_samsung_cf.c
@@ -54,7 +54,6 @@
 
 enum s3c_cpu_type {
 	TYPE_S3C64XX,
-	TYPE_S5PC100,
 	TYPE_S5PV210,
 };
 
@@ -476,10 +475,6 @@ static void pata_s3c_hwinit(struct s3c_ide_info *info,
 		writel(0x1b, info->ide_addr + S3C_ATA_IRQ_MSK);
 		break;
 
-	case TYPE_S5PC100:
-		pata_s3c_cfg_mode(info->sfr_addr);
-		/* FALLTHROUGH */
-
 	case TYPE_S5PV210:
 		/* Configure as little endian */
 		pata_s3c_set_endian(info->ide_addr, 0);
@@ -549,11 +544,6 @@ static int __init pata_s3c_probe(struct platform_device *pdev)
 		info->sfr_addr = info->ide_addr + 0x1800;
 		info->ide_addr += 0x1900;
 		info->fifo_status_reg = 0x94;
-	} else if (cpu_type == TYPE_S5PC100) {
-		ap->ops = &pata_s5p_port_ops;
-		info->sfr_addr = info->ide_addr + 0x1800;
-		info->ide_addr += 0x1900;
-		info->fifo_status_reg = 0x84;
 	} else {
 		ap->ops = &pata_s5p_port_ops;
 		info->fifo_status_reg = 0x84;
@@ -652,9 +642,6 @@ static struct platform_device_id pata_s3c_driver_ids[] = {
 	{
 		.name		= "s3c64xx-pata",
 		.driver_data	= TYPE_S3C64XX,
-	}, {
-		.name		= "s5pc100-pata",
-		.driver_data	= TYPE_S5PC100,
 	}, {
 		.name		= "s5pv210-pata",
 		.driver_data	= TYPE_S5PV210,
diff --git a/include/linux/platform_data/ata-samsung_cf.h b/include/linux/platform_data/ata-samsung_cf.h
index c2049e3d7444..748e71642c4a 100644
--- a/include/linux/platform_data/ata-samsung_cf.h
+++ b/include/linux/platform_data/ata-samsung_cf.h
@@ -29,7 +29,6 @@ extern void s3c_ide_set_platdata(struct s3c_ide_platdata *pdata);
 
 /* architecture-specific IDE configuration */
 extern void s3c64xx_ide_setup_gpio(void);
-extern void s5pc100_ide_setup_gpio(void);
 extern void s5pv210_ide_setup_gpio(void);
 
 #endif /*__ATA_SAMSUNG_CF_H */
-- 
cgit v1.2.3-59-g8ed1b


From adc82f77bee3487651f8ad253fb1c8a7bf4ec658 Mon Sep 17 00:00:00 2001
From: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
Date: Tue, 20 May 2014 18:30:03 +0200
Subject: usb: gadget: net2280: Add support for PLX USB338X

This patch adds support for the PLX USB3380 and USB3382.

This driver is based on the driver from the manufacturer.

Since USB338X is register compatible with NET2280, I thought that it
would be better to include this hardware into net2280 driver.

Manufacturer's driver only supported the USB33X, did not follow the
Kernel Style and contain some trivial errors. This patch has tried to
address this issues.

This patch has only been tested on USB338x hardware, but the merge has
been done trying to not affect the behaviour of NET2280.

Tested-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/Kconfig   |   10 +-
 drivers/usb/gadget/net2280.c | 1119 ++++++++++++++++++++++++++++++++++++++----
 drivers/usb/gadget/net2280.h |  121 ++++-
 include/linux/usb/usb338x.h  |  199 ++++++++
 4 files changed, 1351 insertions(+), 98 deletions(-)
 create mode 100644 include/linux/usb/usb338x.h

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig
index ba18e9c110cc..49e434ec527d 100644
--- a/drivers/usb/gadget/Kconfig
+++ b/drivers/usb/gadget/Kconfig
@@ -409,7 +409,7 @@ config USB_NET2272_DMA
 	  If unsure, say "N" here.  The driver works fine in PIO mode.
 
 config USB_NET2280
-	tristate "NetChip 228x"
+	tristate "NetChip 228x / PLX USB338x"
 	depends on PCI
 	help
 	   NetChip 2280 / 2282 is a PCI based USB peripheral controller which
@@ -419,6 +419,14 @@ config USB_NET2280
 	   (for control transfers) and several endpoints with dedicated
 	   functions.
 
+	   PLX 3380 / 3382 is a PCIe based USB peripheral controller which
+	   supports full, high speed USB 2.0 and super speed USB 3.0
+	   data transfers.
+
+	   It has eight configurable endpoints, as well as endpoint zero
+	   (for control transfers) and several endpoints with dedicated
+	   functions.
+
 	   Say "y" to link the driver statically, or "m" to build a
 	   dynamically linked module called "net2280" and force all
 	   gadget drivers to also be dynamically linked.
diff --git a/drivers/usb/gadget/net2280.c b/drivers/usb/gadget/net2280.c
index 300b3a71383b..8112d9140a90 100644
--- a/drivers/usb/gadget/net2280.c
+++ b/drivers/usb/gadget/net2280.c
@@ -18,6 +18,9 @@
  * hint to completely eliminate some IRQs, if a later IRQ is guaranteed
  * and DMA chaining is enabled.
  *
+ * MSI is enabled by default.  The legacy IRQ is used if MSI couldn't
+ * be enabled.
+ *
  * Note that almost all the errata workarounds here are only needed for
  * rev1 chips.  Rev1a silicon (0110) fixes almost all of them.
  */
@@ -25,10 +28,14 @@
 /*
  * Copyright (C) 2003 David Brownell
  * Copyright (C) 2003-2005 PLX Technology, Inc.
+ * Copyright (C) 2014 Ricardo Ribalda - Qtechnology/AS
  *
  * Modified Seth Levy 2005 PLX Technology, Inc. to provide compatibility
  *	with 2282 chip
  *
+ * Modified Ricardo Ribalda Qtechnology AS  to provide compatibility
+ *	with usb 338x chip. Based on PLX driver
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
@@ -61,9 +68,8 @@
 #include <asm/irq.h>
 #include <asm/unaligned.h>
 
-
-#define	DRIVER_DESC		"PLX NET228x USB Peripheral Controller"
-#define	DRIVER_VERSION		"2005 Sept 27"
+#define	DRIVER_DESC		"PLX NET228x/USB338x USB Peripheral Controller"
+#define	DRIVER_VERSION		"2005 Sept 27/v3.0"
 
 #define	EP_DONTUSE		13	/* nonzero */
 
@@ -73,11 +79,12 @@
 static const char driver_name [] = "net2280";
 static const char driver_desc [] = DRIVER_DESC;
 
+static const u32 ep_bit[9] = { 0, 17, 2, 19, 4, 1, 18, 3, 20 };
 static const char ep0name [] = "ep0";
 static const char *const ep_name [] = {
 	ep0name,
 	"ep-a", "ep-b", "ep-c", "ep-d",
-	"ep-e", "ep-f",
+	"ep-e", "ep-f", "ep-g", "ep-h",
 };
 
 /* use_dma -- general goodness, fewer interrupts, less cpu load (vs PIO)
@@ -90,11 +97,12 @@ static const char *const ep_name [] = {
  */
 static bool use_dma = 1;
 static bool use_dma_chaining = 0;
+static bool use_msi = 1;
 
 /* "modprobe net2280 use_dma=n" etc */
 module_param (use_dma, bool, S_IRUGO);
 module_param (use_dma_chaining, bool, S_IRUGO);
-
+module_param(use_msi, bool, S_IRUGO);
 
 /* mode 0 == ep-{a,b,c,d} 1K fifo each
  * mode 1 == ep-{a,b} 2K fifo each, ep-{c,d} unavailable
@@ -140,6 +148,18 @@ static char *type_string (u8 bmAttributes)
 #define dma_done_ie	cpu_to_le32 (1 << DMA_DONE_INTERRUPT_ENABLE)
 
 /*-------------------------------------------------------------------------*/
+static inline void enable_pciirqenb(struct net2280_ep *ep)
+{
+	u32 tmp = readl(&ep->dev->regs->pciirqenb0);
+
+	if (ep->dev->pdev->vendor == 0x17cc)
+		tmp |= 1 << ep->num;
+	else
+		tmp |= 1 << ep_bit[ep->num];
+	writel(tmp, &ep->dev->regs->pciirqenb0);
+
+	return;
+}
 
 static int
 net2280_enable (struct usb_ep *_ep, const struct usb_endpoint_descriptor *desc)
@@ -148,6 +168,7 @@ net2280_enable (struct usb_ep *_ep, const struct usb_endpoint_descriptor *desc)
 	struct net2280_ep	*ep;
 	u32			max, tmp;
 	unsigned long		flags;
+	static const u32 ep_key[9] = { 1, 0, 1, 0, 1, 1, 0, 1, 0 };
 
 	ep = container_of (_ep, struct net2280_ep, ep);
 	if (!_ep || !desc || ep->desc || _ep->name == ep0name
@@ -161,9 +182,17 @@ net2280_enable (struct usb_ep *_ep, const struct usb_endpoint_descriptor *desc)
 	if ((desc->bEndpointAddress & 0x0f) == EP_DONTUSE)
 		return -EDOM;
 
+	if (dev->pdev->vendor == 0x10b5) {
+		if ((desc->bEndpointAddress & 0x0f) >= 0x0c)
+			return -EDOM;
+		ep->is_in = !!usb_endpoint_dir_in(desc);
+		if (dev->enhanced_mode && ep->is_in && ep_key[ep->num])
+			return -EINVAL;
+	}
+
 	/* sanity check ep-e/ep-f since their fifos are small */
 	max = usb_endpoint_maxp (desc) & 0x1fff;
-	if (ep->num > 4 && max > 64)
+	if (ep->num > 4 && max > 64 && (dev->pdev->vendor == 0x17cc))
 		return -ERANGE;
 
 	spin_lock_irqsave (&dev->lock, flags);
@@ -176,7 +205,7 @@ net2280_enable (struct usb_ep *_ep, const struct usb_endpoint_descriptor *desc)
 	ep->out_overflow = 0;
 
 	/* set speed-dependent max packet; may kick in high bandwidth */
-	set_idx_reg (dev->regs, REG_EP_MAXPKT (dev, ep->num), max);
+	set_max_speed(ep, max);
 
 	/* FIFO lines can't go to different packets.  PIO is ok, so
 	 * use it instead of troublesome (non-bulk) multi-packet DMA.
@@ -199,23 +228,43 @@ net2280_enable (struct usb_ep *_ep, const struct usb_endpoint_descriptor *desc)
 				&ep->regs->ep_rsp);
 	} else if (tmp == USB_ENDPOINT_XFER_BULK) {
 		/* catch some particularly blatant driver bugs */
-		if ((dev->gadget.speed == USB_SPEED_HIGH
-					&& max != 512)
-				|| (dev->gadget.speed == USB_SPEED_FULL
-					&& max > 64)) {
-			spin_unlock_irqrestore (&dev->lock, flags);
+		if ((dev->gadget.speed == USB_SPEED_SUPER && max != 1024) ||
+		    (dev->gadget.speed == USB_SPEED_HIGH && max != 512) ||
+		    (dev->gadget.speed == USB_SPEED_FULL && max > 64)) {
+			spin_unlock_irqrestore(&dev->lock, flags);
 			return -ERANGE;
 		}
 	}
 	ep->is_iso = (tmp == USB_ENDPOINT_XFER_ISOC) ? 1 : 0;
-	tmp <<= ENDPOINT_TYPE;
-	tmp |= desc->bEndpointAddress;
-	tmp |= (4 << ENDPOINT_BYTE_COUNT);	/* default full fifo lines */
-	tmp |= 1 << ENDPOINT_ENABLE;
-	wmb ();
+	/* Enable this endpoint */
+	if (dev->pdev->vendor == 0x17cc) {
+		tmp <<= ENDPOINT_TYPE;
+		tmp |= desc->bEndpointAddress;
+		/* default full fifo lines */
+		tmp |= (4 << ENDPOINT_BYTE_COUNT);
+		tmp |= 1 << ENDPOINT_ENABLE;
+		ep->is_in = (tmp & USB_DIR_IN) != 0;
+	} else {
+		/* In Legacy mode, only OUT endpoints are used */
+		if (dev->enhanced_mode && ep->is_in) {
+			tmp <<= IN_ENDPOINT_TYPE;
+			tmp |= (1 << IN_ENDPOINT_ENABLE);
+			/* Not applicable to Legacy */
+			tmp |= (1 << ENDPOINT_DIRECTION);
+		} else {
+			tmp <<= OUT_ENDPOINT_TYPE;
+			tmp |= (1 << OUT_ENDPOINT_ENABLE);
+			tmp |= (ep->is_in << ENDPOINT_DIRECTION);
+		}
+
+		tmp |= usb_endpoint_num(desc);
+		tmp |= (ep->ep.maxburst << MAX_BURST_SIZE);
+	}
+
+	/* Make sure all the registers are written before ep_rsp*/
+	wmb();
 
 	/* for OUT transfers, block the rx fifo until a read is posted */
-	ep->is_in = (tmp & USB_DIR_IN) != 0;
 	if (!ep->is_in)
 		writel ((1 << SET_NAK_OUT_PACKETS), &ep->regs->ep_rsp);
 	else if (dev->pdev->device != 0x2280) {
@@ -226,12 +275,11 @@ net2280_enable (struct usb_ep *_ep, const struct usb_endpoint_descriptor *desc)
 			| (1 << CLEAR_NAK_OUT_PACKETS_MODE), &ep->regs->ep_rsp);
 	}
 
-	writel (tmp, &ep->regs->ep_cfg);
+	writel(tmp, &ep->cfg->ep_cfg);
 
 	/* enable irqs */
 	if (!ep->dma) {				/* pio, per-packet */
-		tmp = (1 << ep->num) | readl (&dev->regs->pciirqenb0);
-		writel (tmp, &dev->regs->pciirqenb0);
+		enable_pciirqenb(ep);
 
 		tmp = (1 << DATA_PACKET_RECEIVED_INTERRUPT_ENABLE)
 			| (1 << DATA_PACKET_TRANSMITTED_INTERRUPT_ENABLE);
@@ -251,8 +299,7 @@ net2280_enable (struct usb_ep *_ep, const struct usb_endpoint_descriptor *desc)
 			tmp = (1 << SHORT_PACKET_TRANSFERRED_INTERRUPT_ENABLE);
 			writel (tmp, &ep->regs->ep_irqenb);
 
-			tmp = (1 << ep->num) | readl (&dev->regs->pciirqenb0);
-			writel (tmp, &dev->regs->pciirqenb0);
+			enable_pciirqenb(ep);
 		}
 	}
 
@@ -286,7 +333,8 @@ static int handshake (u32 __iomem *ptr, u32 mask, u32 done, int usec)
 
 static const struct usb_ep_ops net2280_ep_ops;
 
-static void ep_reset (struct net2280_regs __iomem *regs, struct net2280_ep *ep)
+static void ep_reset_228x(struct net2280_regs __iomem *regs,
+			  struct net2280_ep *ep)
 {
 	u32		tmp;
 
@@ -361,6 +409,55 @@ static void ep_reset (struct net2280_regs __iomem *regs, struct net2280_ep *ep)
 	/* fifo size is handled separately */
 }
 
+static void ep_reset_338x(struct net2280_regs __iomem *regs,
+					struct net2280_ep *ep)
+{
+	u32 tmp, dmastat;
+
+	ep->desc = NULL;
+	INIT_LIST_HEAD(&ep->queue);
+
+	usb_ep_set_maxpacket_limit(&ep->ep, ~0);
+	ep->ep.ops = &net2280_ep_ops;
+
+	/* disable the dma, irqs, endpoint... */
+	if (ep->dma) {
+		writel(0, &ep->dma->dmactl);
+		writel((1 << DMA_ABORT_DONE_INTERRUPT) |
+		       (1 << DMA_PAUSE_DONE_INTERRUPT) |
+		       (1 << DMA_SCATTER_GATHER_DONE_INTERRUPT) |
+		       (1 << DMA_TRANSACTION_DONE_INTERRUPT)
+		       /* | (1 << DMA_ABORT) */
+		       , &ep->dma->dmastat);
+
+		dmastat = readl(&ep->dma->dmastat);
+		if (dmastat == 0x5002) {
+			WARNING(ep->dev, "The dmastat return = %x!!\n",
+			       dmastat);
+			writel(0x5a, &ep->dma->dmastat);
+		}
+
+		tmp = readl(&regs->pciirqenb0);
+		tmp &= ~(1 << ep_bit[ep->num]);
+		writel(tmp, &regs->pciirqenb0);
+	} else {
+		if (ep->num < 5) {
+			tmp = readl(&regs->pciirqenb1);
+			tmp &= ~(1 << (8 + ep->num));	/* completion */
+			writel(tmp, &regs->pciirqenb1);
+		}
+	}
+	writel(0, &ep->regs->ep_irqenb);
+
+	writel((1 << SHORT_PACKET_OUT_DONE_INTERRUPT) |
+	       (1 << SHORT_PACKET_TRANSFERRED_INTERRUPT) |
+	       (1 << FIFO_OVERFLOW) |
+	       (1 << DATA_PACKET_RECEIVED_INTERRUPT) |
+	       (1 << DATA_PACKET_TRANSMITTED_INTERRUPT) |
+	       (1 << DATA_OUT_PING_TOKEN_INTERRUPT) |
+	       (1 << DATA_IN_TOKEN_INTERRUPT), &ep->regs->ep_stat);
+}
+
 static void nuke (struct net2280_ep *);
 
 static int net2280_disable (struct usb_ep *_ep)
@@ -374,13 +471,17 @@ static int net2280_disable (struct usb_ep *_ep)
 
 	spin_lock_irqsave (&ep->dev->lock, flags);
 	nuke (ep);
-	ep_reset (ep->dev->regs, ep);
+
+	if (ep->dev->pdev->vendor == 0x10b5)
+		ep_reset_338x(ep->dev->regs, ep);
+	else
+		ep_reset_228x(ep->dev->regs, ep);
 
 	VDEBUG (ep->dev, "disabled %s %s\n",
 			ep->dma ? "dma" : "pio", _ep->name);
 
 	/* synch memory views with the device */
-	(void) readl (&ep->regs->ep_cfg);
+	(void)readl(&ep->cfg->ep_cfg);
 
 	if (use_dma && !ep->dma && ep->num >= 1 && ep->num <= 4)
 		ep->dma = &ep->dev->dma [ep->num - 1];
@@ -698,6 +799,8 @@ static void start_queue (struct net2280_ep *ep, u32 dmactl, u32 td_dma)
 	writel (readl (&dma->dmastat), &dma->dmastat);
 
 	writel (td_dma, &dma->dmadesc);
+	if (ep->dev->pdev->vendor == 0x10b5)
+		dmactl |= (0x01 << DMA_REQUEST_OUTSTANDING);
 	writel (dmactl, &dma->dmactl);
 
 	/* erratum 0116 workaround part 3:  pci arbiter away from net2280 */
@@ -772,6 +875,21 @@ static void start_dma (struct net2280_ep *ep, struct net2280_request *req)
 	start_queue (ep, tmp, req->td_dma);
 }
 
+static inline void resume_dma(struct net2280_ep *ep)
+{
+	writel(readl(&ep->dma->dmactl) | (1 << DMA_ENABLE), &ep->dma->dmactl);
+
+	ep->dma_started = true;
+}
+
+static inline void ep_stop_dma(struct net2280_ep *ep)
+{
+	writel(readl(&ep->dma->dmactl) & ~(1 << DMA_ENABLE), &ep->dma->dmactl);
+	spin_stop_dma(ep->dma);
+
+	ep->dma_started = false;
+}
+
 static inline void
 queue_dma (struct net2280_ep *ep, struct net2280_request *req, int valid)
 {
@@ -874,8 +992,23 @@ net2280_queue (struct usb_ep *_ep, struct usb_request *_req, gfp_t gfp_flags)
 
 	/* kickstart this i/o queue? */
 	if (list_empty (&ep->queue) && !ep->stopped) {
+		/* DMA request while EP halted */
+		if (ep->dma &&
+		    (readl(&ep->regs->ep_rsp) & (1 << CLEAR_ENDPOINT_HALT)) &&
+			(dev->pdev->vendor == 0x10b5)) {
+			int valid = 1;
+			if (ep->is_in) {
+				int expect;
+				expect = likely(req->req.zero ||
+						((req->req.length %
+						  ep->ep.maxpacket) != 0));
+				if (expect != ep->in_fifo_validate)
+					valid = 0;
+			}
+			queue_dma(ep, req, valid);
+		}
 		/* use DMA if the endpoint supports it, else pio */
-		if (ep->dma)
+		else if (ep->dma)
 			start_dma (ep, req);
 		else {
 			/* maybe there's no control data, just status ack */
@@ -993,6 +1126,8 @@ static void scan_dma_completions (struct net2280_ep *ep)
 		} else if (!ep->is_in
 				&& (req->req.length % ep->ep.maxpacket) != 0) {
 			tmp = readl (&ep->regs->ep_stat);
+			if (ep->dev->pdev->vendor == 0x10b5)
+				return dma_done(ep, req, tmp, 0);
 
 			/* AVOID TROUBLE HERE by not issuing short reads from
 			 * your gadget driver.  That helps avoids errata 0121,
@@ -1079,7 +1214,7 @@ static void restart_dma (struct net2280_ep *ep)
 	start_queue (ep, dmactl, req->td_dma);
 }
 
-static void abort_dma (struct net2280_ep *ep)
+static void abort_dma_228x(struct net2280_ep *ep)
 {
 	/* abort the current transfer */
 	if (likely (!list_empty (&ep->queue))) {
@@ -1091,6 +1226,19 @@ static void abort_dma (struct net2280_ep *ep)
 	scan_dma_completions (ep);
 }
 
+static void abort_dma_338x(struct net2280_ep *ep)
+{
+	writel((1 << DMA_ABORT), &ep->dma->dmastat);
+	spin_stop_dma(ep->dma);
+}
+
+static void abort_dma(struct net2280_ep *ep)
+{
+	if (ep->dev->pdev->vendor == 0x17cc)
+		return abort_dma_228x(ep);
+	return abort_dma_338x(ep);
+}
+
 /* dequeue ALL requests */
 static void nuke (struct net2280_ep *ep)
 {
@@ -1244,6 +1392,9 @@ net2280_set_halt_and_wedge(struct usb_ep *_ep, int value, int wedged)
 				ep->wedged = 1;
 		} else {
 			clear_halt (ep);
+			if (ep->dev->pdev->vendor == 0x10b5 &&
+				!list_empty(&ep->queue) && ep->td_dma)
+					restart_dma(ep);
 			ep->wedged = 0;
 		}
 		(void) readl (&ep->regs->ep_rsp);
@@ -1367,10 +1518,13 @@ static int net2280_set_selfpowered (struct usb_gadget *_gadget, int value)
 
 	spin_lock_irqsave (&dev->lock, flags);
 	tmp = readl (&dev->usb->usbctl);
-	if (value)
+	if (value) {
 		tmp |= (1 << SELF_POWERED_STATUS);
-	else
+		dev->selfpowered = 1;
+	} else {
 		tmp &= ~(1 << SELF_POWERED_STATUS);
+		dev->selfpowered = 0;
+	}
 	writel (tmp, &dev->usb->usbctl);
 	spin_unlock_irqrestore (&dev->lock, flags);
 
@@ -1504,14 +1658,14 @@ static ssize_t registers_show(struct device *_dev,
 	/* DMA Control Registers */
 
 	/* Configurable EP Control Registers */
-	for (i = 0; i < 7; i++) {
+	for (i = 0; i < dev->n_ep; i++) {
 		struct net2280_ep	*ep;
 
 		ep = &dev->ep [i];
 		if (i && !ep->desc)
 			continue;
 
-		t1 = readl (&ep->regs->ep_cfg);
+		t1 = readl(&ep->cfg->ep_cfg);
 		t2 = readl (&ep->regs->ep_rsp) & 0xff;
 		t = scnprintf (next, size,
 				"\n%s\tcfg %05x rsp (%02x) %s%s%s%s%s%s%s%s"
@@ -1571,7 +1725,7 @@ static ssize_t registers_show(struct device *_dev,
 	t = scnprintf (next, size, "\nirqs:  ");
 	size -= t;
 	next += t;
-	for (i = 0; i < 7; i++) {
+	for (i = 0; i < dev->n_ep; i++) {
 		struct net2280_ep	*ep;
 
 		ep = &dev->ep [i];
@@ -1606,7 +1760,7 @@ static ssize_t queues_show(struct device *_dev, struct device_attribute *attr,
 	size = PAGE_SIZE;
 	spin_lock_irqsave (&dev->lock, flags);
 
-	for (i = 0; i < 7; i++) {
+	for (i = 0; i < dev->n_ep; i++) {
 		struct net2280_ep		*ep = &dev->ep [i];
 		struct net2280_request		*req;
 		int				t;
@@ -1735,6 +1889,121 @@ static void set_fifo_mode (struct net2280 *dev, int mode)
 	list_add_tail (&dev->ep [6].ep.ep_list, &dev->gadget.ep_list);
 }
 
+static void defect7374_disable_data_eps(struct net2280 *dev)
+{
+	/*
+	 * For Defect 7374, disable data EPs (and more):
+	 *  - This phase undoes the earlier phase of the Defect 7374 workaround,
+	 *    returing ep regs back to normal.
+	 */
+	struct net2280_ep *ep;
+	int i;
+	unsigned char ep_sel;
+	u32 tmp_reg;
+
+	for (i = 1; i < 5; i++) {
+		ep = &dev->ep[i];
+		writel(0, &ep->cfg->ep_cfg);
+	}
+
+	/* CSROUT, CSRIN, PCIOUT, PCIIN, STATIN, RCIN */
+	for (i = 0; i < 6; i++)
+		writel(0, &dev->dep[i].dep_cfg);
+
+	for (ep_sel = 0; ep_sel <= 21; ep_sel++) {
+		/* Select an endpoint for subsequent operations: */
+		tmp_reg = readl(&dev->plregs->pl_ep_ctrl);
+		writel(((tmp_reg & ~0x1f) | ep_sel), &dev->plregs->pl_ep_ctrl);
+
+		if (ep_sel < 2 || (ep_sel > 9 && ep_sel < 14) ||
+					ep_sel == 18 || ep_sel == 20)
+			continue;
+
+		/* Change settings on some selected endpoints */
+		tmp_reg = readl(&dev->plregs->pl_ep_cfg_4);
+		tmp_reg &= ~(1 << NON_CTRL_IN_TOLERATE_BAD_DIR);
+		writel(tmp_reg, &dev->plregs->pl_ep_cfg_4);
+		tmp_reg = readl(&dev->plregs->pl_ep_ctrl);
+		tmp_reg |= (1 << EP_INITIALIZED);
+		writel(tmp_reg, &dev->plregs->pl_ep_ctrl);
+	}
+}
+
+static void defect7374_enable_data_eps_zero(struct net2280 *dev)
+{
+	u32 tmp = 0, tmp_reg;
+	u32 fsmvalue, scratch;
+	int i;
+	unsigned char ep_sel;
+
+	scratch = get_idx_reg(dev->regs, SCRATCH);
+	fsmvalue = scratch & (0xf << DEFECT7374_FSM_FIELD);
+	scratch &= ~(0xf << DEFECT7374_FSM_FIELD);
+
+	/*See if firmware needs to set up for workaround*/
+	if (fsmvalue != DEFECT7374_FSM_SS_CONTROL_READ) {
+		WARNING(dev, "Operate Defect 7374 workaround soft this time");
+		WARNING(dev, "It will operate on cold-reboot and SS connect");
+
+		/*GPEPs:*/
+		tmp = ((0 << ENDPOINT_NUMBER) | (1 << ENDPOINT_DIRECTION) |
+		       (2 << OUT_ENDPOINT_TYPE) | (2 << IN_ENDPOINT_TYPE) |
+		       ((dev->enhanced_mode) ?
+			1 << OUT_ENDPOINT_ENABLE : 1 << ENDPOINT_ENABLE) |
+		       (1 << IN_ENDPOINT_ENABLE));
+
+		for (i = 1; i < 5; i++)
+			writel(tmp, &dev->ep[i].cfg->ep_cfg);
+
+		/* CSRIN, PCIIN, STATIN, RCIN*/
+		tmp = ((0 << ENDPOINT_NUMBER) | (1 << ENDPOINT_ENABLE));
+		writel(tmp, &dev->dep[1].dep_cfg);
+		writel(tmp, &dev->dep[3].dep_cfg);
+		writel(tmp, &dev->dep[4].dep_cfg);
+		writel(tmp, &dev->dep[5].dep_cfg);
+
+		/*Implemented for development and debug.
+		 * Can be refined/tuned later.*/
+		for (ep_sel = 0; ep_sel <= 21; ep_sel++) {
+			/* Select an endpoint for subsequent operations: */
+			tmp_reg = readl(&dev->plregs->pl_ep_ctrl);
+			writel(((tmp_reg & ~0x1f) | ep_sel),
+			       &dev->plregs->pl_ep_ctrl);
+
+			if (ep_sel == 1) {
+				tmp =
+				    (readl(&dev->plregs->pl_ep_ctrl) |
+				     (1 << CLEAR_ACK_ERROR_CODE) | 0);
+				writel(tmp, &dev->plregs->pl_ep_ctrl);
+				continue;
+			}
+
+			if (ep_sel == 0 || (ep_sel > 9 && ep_sel < 14) ||
+					ep_sel == 18  || ep_sel == 20)
+				continue;
+
+			tmp = (readl(&dev->plregs->pl_ep_cfg_4) |
+				 (1 << NON_CTRL_IN_TOLERATE_BAD_DIR) | 0);
+			writel(tmp, &dev->plregs->pl_ep_cfg_4);
+
+			tmp = readl(&dev->plregs->pl_ep_ctrl) &
+				~(1 << EP_INITIALIZED);
+			writel(tmp, &dev->plregs->pl_ep_ctrl);
+
+		}
+
+		/* Set FSM to focus on the first Control Read:
+		 * - Tip: Connection speed is known upon the first
+		 * setup request.*/
+		scratch |= DEFECT7374_FSM_WAITING_FOR_CONTROL_READ;
+		set_idx_reg(dev->regs, SCRATCH, scratch);
+
+	} else{
+		WARNING(dev, "Defect 7374 workaround soft will NOT operate");
+		WARNING(dev, "It will operate on cold-reboot and SS connect");
+	}
+}
+
 /* keeping it simple:
  * - one bus driver, initted first;
  * - one function driver, initted second
@@ -1744,7 +2013,7 @@ static void set_fifo_mode (struct net2280 *dev, int mode)
  * perhaps to bind specific drivers to specific devices.
  */
 
-static void usb_reset (struct net2280 *dev)
+static void usb_reset_228x(struct net2280 *dev)
 {
 	u32	tmp;
 
@@ -1760,11 +2029,11 @@ static void usb_reset (struct net2280 *dev)
 
 	/* clear old dma and irq state */
 	for (tmp = 0; tmp < 4; tmp++) {
-		struct net2280_ep	*ep = &dev->ep [tmp + 1];
-
+		struct net2280_ep       *ep = &dev->ep[tmp + 1];
 		if (ep->dma)
-			abort_dma (ep);
+			abort_dma(ep);
 	}
+
 	writel (~0, &dev->regs->irqstat0),
 	writel (~(1 << SUSPEND_REQUEST_INTERRUPT), &dev->regs->irqstat1),
 
@@ -1780,7 +2049,67 @@ static void usb_reset (struct net2280 *dev)
 	set_fifo_mode (dev, (fifo_mode <= 2) ? fifo_mode : 0);
 }
 
-static void usb_reinit (struct net2280 *dev)
+static void usb_reset_338x(struct net2280 *dev)
+{
+	u32 tmp;
+	u32 fsmvalue;
+
+	dev->gadget.speed = USB_SPEED_UNKNOWN;
+	(void)readl(&dev->usb->usbctl);
+
+	net2280_led_init(dev);
+
+	fsmvalue = get_idx_reg(dev->regs, SCRATCH) &
+			(0xf << DEFECT7374_FSM_FIELD);
+
+	/* See if firmware needs to set up for workaround: */
+	if (fsmvalue != DEFECT7374_FSM_SS_CONTROL_READ) {
+		INFO(dev, "%s: Defect 7374 FsmValue 0x%08x\n", __func__,
+		     fsmvalue);
+	} else {
+		/* disable automatic responses, and irqs */
+		writel(0, &dev->usb->stdrsp);
+		writel(0, &dev->regs->pciirqenb0);
+		writel(0, &dev->regs->pciirqenb1);
+	}
+
+	/* clear old dma and irq state */
+	for (tmp = 0; tmp < 4; tmp++) {
+		struct net2280_ep *ep = &dev->ep[tmp + 1];
+
+		if (ep->dma)
+			abort_dma(ep);
+	}
+
+	writel(~0, &dev->regs->irqstat0), writel(~0, &dev->regs->irqstat1);
+
+	if (fsmvalue == DEFECT7374_FSM_SS_CONTROL_READ) {
+		/* reset, and enable pci */
+		tmp = readl(&dev->regs->devinit) |
+		    (1 << PCI_ENABLE) |
+		    (1 << FIFO_SOFT_RESET) |
+		    (1 << USB_SOFT_RESET) |
+		    (1 << M8051_RESET);
+
+		writel(tmp, &dev->regs->devinit);
+	}
+
+	/* always ep-{1,2,3,4} ... maybe not ep-3 or ep-4 */
+	INIT_LIST_HEAD(&dev->gadget.ep_list);
+
+	for (tmp = 1; tmp < dev->n_ep; tmp++)
+		list_add_tail(&dev->ep[tmp].ep.ep_list, &dev->gadget.ep_list);
+
+}
+
+static void usb_reset(struct net2280 *dev)
+{
+	if (dev->pdev->vendor == 0x17cc)
+		return usb_reset_228x(dev);
+	return usb_reset_338x(dev);
+}
+
+static void usb_reinit_228x(struct net2280 *dev)
 {
 	u32	tmp;
 	int	init_dma;
@@ -1803,7 +2132,8 @@ static void usb_reinit (struct net2280 *dev)
 		} else
 			ep->fifo_size = 64;
 		ep->regs = &dev->epregs [tmp];
-		ep_reset (dev->regs, ep);
+		ep->cfg = &dev->epregs[tmp];
+		ep_reset_228x(dev->regs, ep);
 	}
 	usb_ep_set_maxpacket_limit(&dev->ep [0].ep, 64);
 	usb_ep_set_maxpacket_limit(&dev->ep [5].ep, 64);
@@ -1820,7 +2150,122 @@ static void usb_reinit (struct net2280 *dev)
 		writel (EP_DONTUSE, &dev->dep [tmp].dep_cfg);
 }
 
-static void ep0_start (struct net2280 *dev)
+static void usb_reinit_338x(struct net2280 *dev)
+{
+	int init_dma;
+	int i;
+	u32 tmp, val;
+	u32 fsmvalue;
+	static const u32 ne[9] = { 0, 1, 2, 3, 4, 1, 2, 3, 4 };
+	static const u32 ep_reg_addr[9] = { 0x00, 0xC0, 0x00, 0xC0, 0x00,
+						0x00, 0xC0, 0x00, 0xC0 };
+
+	/* use_dma changes are ignored till next device re-init */
+	init_dma = use_dma;
+
+	/* basic endpoint init */
+	for (i = 0; i < dev->n_ep; i++) {
+		struct net2280_ep *ep = &dev->ep[i];
+
+		ep->ep.name = ep_name[i];
+		ep->dev = dev;
+		ep->num = i;
+
+		if (i > 0 && i <= 4 && init_dma)
+			ep->dma = &dev->dma[i - 1];
+
+		if (dev->enhanced_mode) {
+			ep->cfg = &dev->epregs[ne[i]];
+			ep->regs = (struct net2280_ep_regs __iomem *)
+				(((void *)&dev->epregs[ne[i]]) +
+				ep_reg_addr[i]);
+			ep->fiforegs = &dev->fiforegs[i];
+		} else {
+			ep->cfg = &dev->epregs[i];
+			ep->regs = &dev->epregs[i];
+			ep->fiforegs = &dev->fiforegs[i];
+		}
+
+		ep->fifo_size = (i != 0) ? 2048 : 512;
+
+		ep_reset_338x(dev->regs, ep);
+	}
+	usb_ep_set_maxpacket_limit(&dev->ep[0].ep, 512);
+
+	dev->gadget.ep0 = &dev->ep[0].ep;
+	dev->ep[0].stopped = 0;
+
+	/* Link layer set up */
+	fsmvalue = get_idx_reg(dev->regs, SCRATCH) &
+				(0xf << DEFECT7374_FSM_FIELD);
+
+	/* See if driver needs to set up for workaround: */
+	if (fsmvalue != DEFECT7374_FSM_SS_CONTROL_READ)
+		INFO(dev, "%s: Defect 7374 FsmValue %08x\n",
+						__func__, fsmvalue);
+	else {
+		tmp = readl(&dev->usb_ext->usbctl2) &
+		    ~((1 << U1_ENABLE) | (1 << U2_ENABLE) | (1 << LTM_ENABLE));
+		writel(tmp, &dev->usb_ext->usbctl2);
+	}
+
+	/* Hardware Defect and Workaround */
+	val = readl(&dev->ll_lfps_regs->ll_lfps_5);
+	val &= ~(0xf << TIMER_LFPS_6US);
+	val |= 0x5 << TIMER_LFPS_6US;
+	writel(val, &dev->ll_lfps_regs->ll_lfps_5);
+
+	val = readl(&dev->ll_lfps_regs->ll_lfps_6);
+	val &= ~(0xffff << TIMER_LFPS_80US);
+	val |= 0x0100 << TIMER_LFPS_80US;
+	writel(val, &dev->ll_lfps_regs->ll_lfps_6);
+
+	/*
+	 * AA_AB Errata. Issue 4. Workaround for SuperSpeed USB
+	 * Hot Reset Exit Handshake may Fail in Specific Case using
+	 * Default Register Settings. Workaround for Enumeration test.
+	 */
+	val = readl(&dev->ll_tsn_regs->ll_tsn_counters_2);
+	val &= ~(0x1f << HOT_TX_NORESET_TS2);
+	val |= 0x10 << HOT_TX_NORESET_TS2;
+	writel(val, &dev->ll_tsn_regs->ll_tsn_counters_2);
+
+	val = readl(&dev->ll_tsn_regs->ll_tsn_counters_3);
+	val &= ~(0x1f << HOT_RX_RESET_TS2);
+	val |= 0x3 << HOT_RX_RESET_TS2;
+	writel(val, &dev->ll_tsn_regs->ll_tsn_counters_3);
+
+	/*
+	 * Set Recovery Idle to Recover bit:
+	 * - On SS connections, setting Recovery Idle to Recover Fmw improves
+	 *   link robustness with various hosts and hubs.
+	 * - It is safe to set for all connection speeds; all chip revisions.
+	 * - R-M-W to leave other bits undisturbed.
+	 * - Reference PLX TT-7372
+	*/
+	val = readl(&dev->ll_chicken_reg->ll_tsn_chicken_bit);
+	val |= (1 << RECOVERY_IDLE_TO_RECOVER_FMW);
+	writel(val, &dev->ll_chicken_reg->ll_tsn_chicken_bit);
+
+	INIT_LIST_HEAD(&dev->gadget.ep0->ep_list);
+
+	/* disable dedicated endpoints */
+	writel(0x0D, &dev->dep[0].dep_cfg);
+	writel(0x0D, &dev->dep[1].dep_cfg);
+	writel(0x0E, &dev->dep[2].dep_cfg);
+	writel(0x0E, &dev->dep[3].dep_cfg);
+	writel(0x0F, &dev->dep[4].dep_cfg);
+	writel(0x0C, &dev->dep[5].dep_cfg);
+}
+
+static void usb_reinit(struct net2280 *dev)
+{
+	if (dev->pdev->vendor == 0x17cc)
+		return usb_reinit_228x(dev);
+	return usb_reinit_338x(dev);
+}
+
+static void ep0_start_228x(struct net2280 *dev)
 {
 	writel (  (1 << CLEAR_EP_HIDE_STATUS_PHASE)
 		| (1 << CLEAR_NAK_OUT_PACKETS)
@@ -1863,6 +2308,61 @@ static void ep0_start (struct net2280 *dev)
 	(void) readl (&dev->usb->usbctl);
 }
 
+static void ep0_start_338x(struct net2280 *dev)
+{
+	u32 fsmvalue;
+
+	fsmvalue = get_idx_reg(dev->regs, SCRATCH) &
+			(0xf << DEFECT7374_FSM_FIELD);
+
+	if (fsmvalue != DEFECT7374_FSM_SS_CONTROL_READ)
+		INFO(dev, "%s: Defect 7374 FsmValue %08x\n", __func__,
+		     fsmvalue);
+	else
+		writel((1 << CLEAR_NAK_OUT_PACKETS_MODE) |
+		       (1 << SET_EP_HIDE_STATUS_PHASE),
+		       &dev->epregs[0].ep_rsp);
+
+	/*
+	 * hardware optionally handles a bunch of standard requests
+	 * that the API hides from drivers anyway.  have it do so.
+	 * endpoint status/features are handled in software, to
+	 * help pass tests for some dubious behavior.
+	 */
+	writel((1 << SET_ISOCHRONOUS_DELAY) |
+	       (1 << SET_SEL) |
+	       (1 << SET_TEST_MODE) |
+	       (1 << SET_ADDRESS) |
+	       (1 << GET_INTERFACE_STATUS) |
+	       (1 << GET_DEVICE_STATUS),
+		&dev->usb->stdrsp);
+	dev->wakeup_enable = 1;
+	writel((1 << USB_ROOT_PORT_WAKEUP_ENABLE) |
+	       (dev->softconnect << USB_DETECT_ENABLE) |
+	       (1 << DEVICE_REMOTE_WAKEUP_ENABLE),
+	       &dev->usb->usbctl);
+
+	/* enable irqs so we can see ep0 and general operation  */
+	writel((1 << SETUP_PACKET_INTERRUPT_ENABLE) |
+	       (1 << ENDPOINT_0_INTERRUPT_ENABLE)
+	       , &dev->regs->pciirqenb0);
+	writel((1 << PCI_INTERRUPT_ENABLE) |
+	       (1 << ROOT_PORT_RESET_INTERRUPT_ENABLE) |
+	       (1 << SUSPEND_REQUEST_CHANGE_INTERRUPT_ENABLE) |
+	       (1 << VBUS_INTERRUPT_ENABLE),
+	       &dev->regs->pciirqenb1);
+
+	/* don't leave any writes posted */
+	(void)readl(&dev->usb->usbctl);
+}
+
+static void ep0_start(struct net2280 *dev)
+{
+	if (dev->pdev->vendor == 0x17cc)
+		return ep0_start_228x(dev);
+	return ep0_start_338x(dev);
+}
+
 /* when a driver is successfully registered, it will receive
  * control requests including set_configuration(), which enables
  * non-control requests.  then usb traffic follows until a
@@ -1886,7 +2386,7 @@ static int net2280_start(struct usb_gadget *_gadget,
 
 	dev = container_of (_gadget, struct net2280, gadget);
 
-	for (i = 0; i < 7; i++)
+	for (i = 0; i < dev->n_ep; i++)
 		dev->ep [i].irqs = 0;
 
 	/* hook up the driver ... */
@@ -1900,13 +2400,17 @@ static int net2280_start(struct usb_gadget *_gadget,
 	if (retval) goto err_func;
 
 	/* Enable force-full-speed testing mode, if desired */
-	if (full_speed)
+	if (full_speed && dev->pdev->vendor == 0x17cc)
 		writel(1 << FORCE_FULL_SPEED_MODE, &dev->usb->xcvrdiag);
 
 	/* ... then enable host detection and ep0; and we're ready
 	 * for set_configuration as well as eventual disconnect.
 	 */
 	net2280_led_active (dev, 1);
+
+	if (dev->pdev->vendor == 0x10b5)
+		defect7374_enable_data_eps_zero(dev);
+
 	ep0_start (dev);
 
 	DEBUG (dev, "%s ready, usbctl %08x stdrsp %08x\n",
@@ -1937,7 +2441,7 @@ stop_activity (struct net2280 *dev, struct usb_gadget_driver *driver)
 	 * and kill any outstanding requests.
 	 */
 	usb_reset (dev);
-	for (i = 0; i < 7; i++)
+	for (i = 0; i < dev->n_ep; i++)
 		nuke (&dev->ep [i]);
 
 	/* report disconnect; the driver is already quiesced */
@@ -1967,7 +2471,8 @@ static int net2280_stop(struct usb_gadget *_gadget,
 	net2280_led_active (dev, 0);
 
 	/* Disable full-speed test mode */
-	writel(0, &dev->usb->xcvrdiag);
+	if (dev->pdev->vendor == 0x17cc)
+		writel(0, &dev->usb->xcvrdiag);
 
 	device_remove_file (&dev->pdev->dev, &dev_attr_function);
 	device_remove_file (&dev->pdev->dev, &dev_attr_queues);
@@ -2219,6 +2724,350 @@ get_ep_by_addr (struct net2280 *dev, u16 wIndex)
 	return NULL;
 }
 
+static void defect7374_workaround(struct net2280 *dev, struct usb_ctrlrequest r)
+{
+	u32 scratch, fsmvalue;
+	u32 ack_wait_timeout, state;
+
+	/* Workaround for Defect 7374 (U1/U2 erroneously rejected): */
+	scratch = get_idx_reg(dev->regs, SCRATCH);
+	fsmvalue = scratch & (0xf << DEFECT7374_FSM_FIELD);
+	scratch &= ~(0xf << DEFECT7374_FSM_FIELD);
+
+	if (!((fsmvalue == DEFECT7374_FSM_WAITING_FOR_CONTROL_READ) &&
+				(r.bRequestType & USB_DIR_IN)))
+		return;
+
+	/* This is the first Control Read for this connection: */
+	if (!(readl(&dev->usb->usbstat) & (1 << SUPER_SPEED_MODE))) {
+		/*
+		 * Connection is NOT SS:
+		 * - Connection must be FS or HS.
+		 * - This FSM state should allow workaround software to
+		 * run after the next USB connection.
+		 */
+		scratch |= DEFECT7374_FSM_NON_SS_CONTROL_READ;
+		goto restore_data_eps;
+	}
+
+	/* Connection is SS: */
+	for (ack_wait_timeout = 0;
+			ack_wait_timeout < DEFECT_7374_NUMBEROF_MAX_WAIT_LOOPS;
+			ack_wait_timeout++) {
+
+		state =	readl(&dev->plregs->pl_ep_status_1)
+			& (0xff << STATE);
+		if ((state >= (ACK_GOOD_NORMAL << STATE)) &&
+			(state <= (ACK_GOOD_MORE_ACKS_TO_COME << STATE))) {
+			scratch |= DEFECT7374_FSM_SS_CONTROL_READ;
+			break;
+		}
+
+		/*
+		 * We have not yet received host's Data Phase ACK
+		 * - Wait and try again.
+		 */
+		udelay(DEFECT_7374_PROCESSOR_WAIT_TIME);
+
+		continue;
+	}
+
+
+	if (ack_wait_timeout >= DEFECT_7374_NUMBEROF_MAX_WAIT_LOOPS) {
+		ERROR(dev, "FAIL: Defect 7374 workaround waited but failed "
+		"to detect SS host's data phase ACK.");
+		ERROR(dev, "PL_EP_STATUS_1(23:16):.Expected from 0x11 to 0x16"
+		"got 0x%2.2x.\n", state >> STATE);
+	} else {
+		WARNING(dev, "INFO: Defect 7374 workaround waited about\n"
+		"%duSec for Control Read Data Phase ACK\n",
+			DEFECT_7374_PROCESSOR_WAIT_TIME * ack_wait_timeout);
+	}
+
+restore_data_eps:
+	/*
+	 * Restore data EPs to their pre-workaround settings (disabled,
+	 * initialized, and other details).
+	 */
+	defect7374_disable_data_eps(dev);
+
+	set_idx_reg(dev->regs, SCRATCH, scratch);
+
+	return;
+}
+
+static void ep_stall(struct net2280_ep *ep, int stall)
+{
+	struct net2280 *dev = ep->dev;
+	u32 val;
+	static const u32 ep_pl[9] = { 0, 3, 4, 7, 8, 2, 5, 6, 9 };
+
+	if (stall) {
+		writel((1 << SET_ENDPOINT_HALT) |
+		       /* (1 << SET_NAK_PACKETS) | */
+		       (1 << CLEAR_CONTROL_STATUS_PHASE_HANDSHAKE),
+		       &ep->regs->ep_rsp);
+		ep->is_halt = 1;
+	} else {
+		if (dev->gadget.speed == USB_SPEED_SUPER) {
+			/*
+			 * Workaround for SS SeqNum not cleared via
+			 * Endpoint Halt (Clear) bit. select endpoint
+			 */
+			val = readl(&dev->plregs->pl_ep_ctrl);
+			val = (val & ~0x1f) | ep_pl[ep->num];
+			writel(val, &dev->plregs->pl_ep_ctrl);
+
+			val |= (1 << SEQUENCE_NUMBER_RESET);
+			writel(val, &dev->plregs->pl_ep_ctrl);
+		}
+		val = readl(&ep->regs->ep_rsp);
+		val |= (1 << CLEAR_ENDPOINT_HALT) |
+			(1 << CLEAR_ENDPOINT_TOGGLE);
+		writel(val
+		       /* | (1 << CLEAR_NAK_PACKETS)*/
+		       , &ep->regs->ep_rsp);
+		ep->is_halt = 0;
+		val = readl(&ep->regs->ep_rsp);
+	}
+}
+
+static void ep_stdrsp(struct net2280_ep *ep, int value, int wedged)
+{
+	/* set/clear, then synch memory views with the device */
+	if (value) {
+		ep->stopped = 1;
+		if (ep->num == 0)
+			ep->dev->protocol_stall = 1;
+		else {
+			if (ep->dma)
+				ep_stop_dma(ep);
+			ep_stall(ep, true);
+		}
+
+		if (wedged)
+			ep->wedged = 1;
+	} else {
+		ep->stopped = 0;
+		ep->wedged = 0;
+
+		ep_stall(ep, false);
+
+		/* Flush the queue */
+		if (!list_empty(&ep->queue)) {
+			struct net2280_request *req =
+			    list_entry(ep->queue.next, struct net2280_request,
+				       queue);
+			if (ep->dma)
+				resume_dma(ep);
+			else {
+				if (ep->is_in)
+					write_fifo(ep, &req->req);
+				else {
+					if (read_fifo(ep, req))
+						done(ep, req, 0);
+				}
+			}
+		}
+	}
+}
+
+static void handle_stat0_irqs_superspeed(struct net2280 *dev,
+		struct net2280_ep *ep, struct usb_ctrlrequest r)
+{
+	int tmp = 0;
+
+#define	w_value		le16_to_cpu(r.wValue)
+#define	w_index		le16_to_cpu(r.wIndex)
+#define	w_length	le16_to_cpu(r.wLength)
+
+	switch (r.bRequest) {
+		struct net2280_ep *e;
+		u16 status;
+
+	case USB_REQ_SET_CONFIGURATION:
+		dev->addressed_state = !w_value;
+		goto usb3_delegate;
+
+	case USB_REQ_GET_STATUS:
+		switch (r.bRequestType) {
+		case (USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_DEVICE):
+			status = dev->wakeup_enable ? 0x02 : 0x00;
+			if (dev->selfpowered)
+				status |= 1 << 0;
+			status |= (dev->u1_enable << 2 | dev->u2_enable << 3 |
+							dev->ltm_enable << 4);
+			writel(0, &dev->epregs[0].ep_irqenb);
+			set_fifo_bytecount(ep, sizeof(status));
+			writel((__force u32) status, &dev->epregs[0].ep_data);
+			allow_status_338x(ep);
+			break;
+
+		case (USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_ENDPOINT):
+			e = get_ep_by_addr(dev, w_index);
+			if (!e)
+				goto do_stall3;
+			status = readl(&e->regs->ep_rsp) &
+						(1 << CLEAR_ENDPOINT_HALT);
+			writel(0, &dev->epregs[0].ep_irqenb);
+			set_fifo_bytecount(ep, sizeof(status));
+			writel((__force u32) status, &dev->epregs[0].ep_data);
+			allow_status_338x(ep);
+			break;
+
+		default:
+			goto usb3_delegate;
+		}
+		break;
+
+	case USB_REQ_CLEAR_FEATURE:
+		switch (r.bRequestType) {
+		case (USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE):
+			if (!dev->addressed_state) {
+				switch (w_value) {
+				case USB_DEVICE_U1_ENABLE:
+					dev->u1_enable = 0;
+					writel(readl(&dev->usb_ext->usbctl2) &
+						~(1 << U1_ENABLE),
+						&dev->usb_ext->usbctl2);
+					allow_status_338x(ep);
+					goto next_endpoints3;
+
+				case USB_DEVICE_U2_ENABLE:
+					dev->u2_enable = 0;
+					writel(readl(&dev->usb_ext->usbctl2) &
+						~(1 << U2_ENABLE),
+						&dev->usb_ext->usbctl2);
+					allow_status_338x(ep);
+					goto next_endpoints3;
+
+				case USB_DEVICE_LTM_ENABLE:
+					dev->ltm_enable = 0;
+					writel(readl(&dev->usb_ext->usbctl2) &
+						~(1 << LTM_ENABLE),
+						&dev->usb_ext->usbctl2);
+					allow_status_338x(ep);
+					goto next_endpoints3;
+
+				default:
+					break;
+				}
+			}
+			if (w_value == USB_DEVICE_REMOTE_WAKEUP) {
+				dev->wakeup_enable = 0;
+				writel(readl(&dev->usb->usbctl) &
+					~(1 << DEVICE_REMOTE_WAKEUP_ENABLE),
+					&dev->usb->usbctl);
+				allow_status_338x(ep);
+				break;
+			}
+			goto usb3_delegate;
+
+		case (USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_ENDPOINT):
+			e = get_ep_by_addr(dev,	w_index);
+			if (!e)
+				goto do_stall3;
+			if (w_value != USB_ENDPOINT_HALT)
+				goto do_stall3;
+			VDEBUG(dev, "%s clear halt\n", e->ep.name);
+			ep_stall(e, false);
+			if (!list_empty(&e->queue) && e->td_dma)
+				restart_dma(e);
+			allow_status(ep);
+			ep->stopped = 1;
+			break;
+
+		default:
+			goto usb3_delegate;
+		}
+		break;
+	case USB_REQ_SET_FEATURE:
+		switch (r.bRequestType) {
+		case (USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE):
+			if (!dev->addressed_state) {
+				switch (w_value) {
+				case USB_DEVICE_U1_ENABLE:
+					dev->u1_enable = 1;
+					writel(readl(&dev->usb_ext->usbctl2) |
+						(1 << U1_ENABLE),
+						&dev->usb_ext->usbctl2);
+					allow_status_338x(ep);
+					goto next_endpoints3;
+
+				case USB_DEVICE_U2_ENABLE:
+					dev->u2_enable = 1;
+					writel(readl(&dev->usb_ext->usbctl2) |
+						(1 << U2_ENABLE),
+						&dev->usb_ext->usbctl2);
+					allow_status_338x(ep);
+					goto next_endpoints3;
+
+				case USB_DEVICE_LTM_ENABLE:
+					dev->ltm_enable = 1;
+					writel(readl(&dev->usb_ext->usbctl2) |
+						(1 << LTM_ENABLE),
+						&dev->usb_ext->usbctl2);
+					allow_status_338x(ep);
+					goto next_endpoints3;
+				default:
+					break;
+				}
+			}
+
+			if (w_value == USB_DEVICE_REMOTE_WAKEUP) {
+				dev->wakeup_enable = 1;
+				writel(readl(&dev->usb->usbctl) |
+					(1 << DEVICE_REMOTE_WAKEUP_ENABLE),
+					&dev->usb->usbctl);
+				allow_status_338x(ep);
+				break;
+			}
+			goto usb3_delegate;
+
+		case (USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_ENDPOINT):
+			e = get_ep_by_addr(dev,	w_index);
+			if (!e || (w_value != USB_ENDPOINT_HALT))
+				goto do_stall3;
+			ep_stdrsp(e, true, false);
+			allow_status_338x(ep);
+			break;
+
+		default:
+			goto usb3_delegate;
+		}
+
+		break;
+	default:
+
+usb3_delegate:
+		VDEBUG(dev, "setup %02x.%02x v%04x i%04x l%04x ep_cfg %08x\n",
+				r.bRequestType, r.bRequest,
+				w_value, w_index, w_length,
+				readl(&ep->cfg->ep_cfg));
+
+		ep->responded = 0;
+		spin_unlock(&dev->lock);
+		tmp = dev->driver->setup(&dev->gadget, &r);
+		spin_lock(&dev->lock);
+	}
+do_stall3:
+	if (tmp < 0) {
+		VDEBUG(dev, "req %02x.%02x protocol STALL; stat %d\n",
+				r.bRequestType, r.bRequest, tmp);
+		dev->protocol_stall = 1;
+		/* TD 9.9 Halt Endpoint test. TD 9.22 Set feature test */
+		ep_stall(ep, true);
+	}
+
+next_endpoints3:
+
+#undef	w_value
+#undef	w_index
+#undef	w_length
+
+	return;
+}
+
 static void handle_stat0_irqs (struct net2280 *dev, u32 stat)
 {
 	struct net2280_ep	*ep;
@@ -2240,10 +3089,20 @@ static void handle_stat0_irqs (struct net2280 *dev, u32 stat)
 		struct net2280_request		*req;
 
 		if (dev->gadget.speed == USB_SPEED_UNKNOWN) {
-			if (readl (&dev->usb->usbstat) & (1 << HIGH_SPEED))
+			u32 val = readl(&dev->usb->usbstat);
+			if (val & (1 << SUPER_SPEED)) {
+				dev->gadget.speed = USB_SPEED_SUPER;
+				usb_ep_set_maxpacket_limit(&dev->ep[0].ep,
+						EP0_SS_MAX_PACKET_SIZE);
+			} else if (val & (1 << HIGH_SPEED)) {
 				dev->gadget.speed = USB_SPEED_HIGH;
-			else
+				usb_ep_set_maxpacket_limit(&dev->ep[0].ep,
+						EP0_HS_MAX_PACKET_SIZE);
+			} else {
 				dev->gadget.speed = USB_SPEED_FULL;
+				usb_ep_set_maxpacket_limit(&dev->ep[0].ep,
+						EP0_HS_MAX_PACKET_SIZE);
+			}
 			net2280_led_speed (dev, dev->gadget.speed);
 			DEBUG(dev, "%s\n", usb_speed_string(dev->gadget.speed));
 		}
@@ -2261,32 +3120,38 @@ static void handle_stat0_irqs (struct net2280 *dev, u32 stat)
 		}
 		ep->stopped = 0;
 		dev->protocol_stall = 0;
-
-		if (ep->dev->pdev->device == 0x2280)
-			tmp = (1 << FIFO_OVERFLOW)
-				| (1 << FIFO_UNDERFLOW);
-		else
-			tmp = 0;
-
-		writel (tmp | (1 << TIMEOUT)
-			| (1 << USB_STALL_SENT)
-			| (1 << USB_IN_NAK_SENT)
-			| (1 << USB_IN_ACK_RCVD)
-			| (1 << USB_OUT_PING_NAK_SENT)
-			| (1 << USB_OUT_ACK_SENT)
-			| (1 << SHORT_PACKET_OUT_DONE_INTERRUPT)
-			| (1 << SHORT_PACKET_TRANSFERRED_INTERRUPT)
-			| (1 << DATA_PACKET_RECEIVED_INTERRUPT)
-			| (1 << DATA_PACKET_TRANSMITTED_INTERRUPT)
-			| (1 << DATA_OUT_PING_TOKEN_INTERRUPT)
-			| (1 << DATA_IN_TOKEN_INTERRUPT)
-			, &ep->regs->ep_stat);
-		u.raw [0] = readl (&dev->usb->setup0123);
-		u.raw [1] = readl (&dev->usb->setup4567);
+		if (dev->pdev->vendor == 0x10b5)
+			ep->is_halt = 0;
+		else{
+			if (ep->dev->pdev->device == 0x2280)
+				tmp = (1 << FIFO_OVERFLOW) |
+				    (1 << FIFO_UNDERFLOW);
+			else
+				tmp = 0;
+
+			writel(tmp | (1 << TIMEOUT) |
+				   (1 << USB_STALL_SENT) |
+				   (1 << USB_IN_NAK_SENT) |
+				   (1 << USB_IN_ACK_RCVD) |
+				   (1 << USB_OUT_PING_NAK_SENT) |
+				   (1 << USB_OUT_ACK_SENT) |
+				   (1 << SHORT_PACKET_OUT_DONE_INTERRUPT) |
+				   (1 << SHORT_PACKET_TRANSFERRED_INTERRUPT) |
+				   (1 << DATA_PACKET_RECEIVED_INTERRUPT) |
+				   (1 << DATA_PACKET_TRANSMITTED_INTERRUPT) |
+				   (1 << DATA_OUT_PING_TOKEN_INTERRUPT) |
+				   (1 << DATA_IN_TOKEN_INTERRUPT)
+				   , &ep->regs->ep_stat);
+		}
+		u.raw[0] = readl(&dev->usb->setup0123);
+		u.raw[1] = readl(&dev->usb->setup4567);
 
 		cpu_to_le32s (&u.raw [0]);
 		cpu_to_le32s (&u.raw [1]);
 
+		if (dev->pdev->vendor == 0x10b5)
+			defect7374_workaround(dev, u.r);
+
 		tmp = 0;
 
 #define	w_value		le16_to_cpu(u.r.wValue)
@@ -2318,6 +3183,12 @@ static void handle_stat0_irqs (struct net2280 *dev, u32 stat)
 		 * everything else goes uplevel to the gadget code.
 		 */
 		ep->responded = 1;
+
+		if (dev->gadget.speed == USB_SPEED_SUPER) {
+			handle_stat0_irqs_superspeed(dev, ep, u.r);
+			goto next_endpoints;
+		}
+
 		switch (u.r.bRequest) {
 		case USB_REQ_GET_STATUS: {
 			struct net2280_ep	*e;
@@ -2360,8 +3231,11 @@ static void handle_stat0_irqs (struct net2280 *dev, u32 stat)
 				VDEBUG(dev, "%s wedged, halt not cleared\n",
 						ep->ep.name);
 			} else {
-				VDEBUG(dev, "%s clear halt\n", ep->ep.name);
+				VDEBUG(dev, "%s clear halt\n", e->ep.name);
 				clear_halt(e);
+				if (ep->dev->pdev->vendor == 0x10b5 &&
+					!list_empty(&e->queue) && e->td_dma)
+						restart_dma(e);
 			}
 			allow_status (ep);
 			goto next_endpoints;
@@ -2381,6 +3255,8 @@ static void handle_stat0_irqs (struct net2280 *dev, u32 stat)
 			if (e->ep.name == ep0name)
 				goto do_stall;
 			set_halt (e);
+			if (dev->pdev->vendor == 0x10b5 && e->dma)
+				abort_dma(e);
 			allow_status (ep);
 			VDEBUG (dev, "%s set halt\n", ep->ep.name);
 			goto next_endpoints;
@@ -2392,7 +3268,7 @@ delegate:
 				"ep_cfg %08x\n",
 				u.r.bRequestType, u.r.bRequest,
 				w_value, w_index, w_length,
-				readl (&ep->regs->ep_cfg));
+				readl(&ep->cfg->ep_cfg));
 			ep->responded = 0;
 			spin_unlock (&dev->lock);
 			tmp = dev->driver->setup (&dev->gadget, &u.r);
@@ -2455,7 +3331,7 @@ static void handle_stat1_irqs (struct net2280 *dev, u32 stat)
 
 	/* after disconnect there's nothing else to do! */
 	tmp = (1 << VBUS_INTERRUPT) | (1 << ROOT_PORT_RESET_INTERRUPT);
-	mask = (1 << HIGH_SPEED) | (1 << FULL_SPEED);
+	mask = (1 << SUPER_SPEED) | (1 << HIGH_SPEED) | (1 << FULL_SPEED);
 
 	/* VBUS disconnect is indicated by VBUS_PIN and VBUS_INTERRUPT set.
 	 * Root Port Reset is indicated by ROOT_PORT_RESET_INTERRUPT set and
@@ -2546,12 +3422,19 @@ static void handle_stat1_irqs (struct net2280 *dev, u32 stat)
 		tmp = readl (&dma->dmastat);
 		writel (tmp, &dma->dmastat);
 
+		/* dma sync*/
+		if (dev->pdev->vendor == 0x10b5) {
+			u32 r_dmacount = readl(&dma->dmacount);
+			if (!ep->is_in &&  (r_dmacount & 0x00FFFFFF) &&
+			    (tmp & (1 << DMA_TRANSACTION_DONE_INTERRUPT)))
+				continue;
+		}
+
 		/* chaining should stop on abort, short OUT from fifo,
 		 * or (stat0 codepath) short OUT transfer.
 		 */
 		if (!use_dma_chaining) {
-			if ((tmp & (1 << DMA_TRANSACTION_DONE_INTERRUPT))
-					== 0) {
+			if (!(tmp & (1 << DMA_TRANSACTION_DONE_INTERRUPT))) {
 				DEBUG (ep->dev, "%s no xact done? %08x\n",
 					ep->ep.name, tmp);
 				continue;
@@ -2625,7 +3508,8 @@ static irqreturn_t net2280_irq (int irq, void *_dev)
 	struct net2280		*dev = _dev;
 
 	/* shared interrupt, not ours */
-	if (!(readl(&dev->regs->irqstat0) & (1 << INTA_ASSERTED)))
+	if (dev->pdev->vendor == 0x17cc &&
+		(!(readl(&dev->regs->irqstat0) & (1 << INTA_ASSERTED))))
 		return IRQ_NONE;
 
 	spin_lock (&dev->lock);
@@ -2636,6 +3520,13 @@ static irqreturn_t net2280_irq (int irq, void *_dev)
 	/* control requests and PIO */
 	handle_stat0_irqs (dev, readl (&dev->regs->irqstat0));
 
+	if (dev->pdev->vendor == 0x10b5) {
+		/* re-enable interrupt to trigger any possible new interrupt */
+		u32 pciirqenb1 = readl(&dev->regs->pciirqenb1);
+		writel(pciirqenb1 & 0x7FFFFFFF, &dev->regs->pciirqenb1);
+		writel(pciirqenb1, &dev->regs->pciirqenb1);
+	}
+
 	spin_unlock (&dev->lock);
 
 	return IRQ_HANDLED;
@@ -2674,6 +3565,8 @@ static void net2280_remove (struct pci_dev *pdev)
 	}
 	if (dev->got_irq)
 		free_irq (pdev->irq, dev);
+	if (use_msi && dev->pdev->vendor == 0x10b5)
+		pci_disable_msi(pdev);
 	if (dev->regs)
 		iounmap (dev->regs);
 	if (dev->region)
@@ -2708,7 +3601,8 @@ static int net2280_probe (struct pci_dev *pdev, const struct pci_device_id *id)
 	spin_lock_init (&dev->lock);
 	dev->pdev = pdev;
 	dev->gadget.ops = &net2280_ops;
-	dev->gadget.max_speed = USB_SPEED_HIGH;
+	dev->gadget.max_speed = (dev->pdev->vendor == 0x10b5) ?
+				USB_SPEED_SUPER : USB_SPEED_HIGH;
 
 	/* the "gadget" abstracts/virtualizes the controller */
 	dev->gadget.name = driver_name;
@@ -2750,8 +3644,39 @@ static int net2280_probe (struct pci_dev *pdev, const struct pci_device_id *id)
 	dev->dep = (struct net2280_dep_regs __iomem *) (base + 0x0200);
 	dev->epregs = (struct net2280_ep_regs __iomem *) (base + 0x0300);
 
-	/* put into initial config, link up all endpoints */
-	writel (0, &dev->usb->usbctl);
+	if (dev->pdev->vendor == 0x10b5) {
+		u32 fsmvalue;
+		u32 usbstat;
+		dev->usb_ext = (struct usb338x_usb_ext_regs __iomem *)
+							(base + 0x00b4);
+		dev->fiforegs = (struct usb338x_fifo_regs __iomem *)
+							(base + 0x0500);
+		dev->llregs = (struct usb338x_ll_regs __iomem *)
+							(base + 0x0700);
+		dev->ll_lfps_regs = (struct usb338x_ll_lfps_regs __iomem *)
+							(base + 0x0748);
+		dev->ll_tsn_regs = (struct usb338x_ll_tsn_regs __iomem *)
+							(base + 0x077c);
+		dev->ll_chicken_reg = (struct usb338x_ll_chi_regs __iomem *)
+							(base + 0x079c);
+		dev->plregs = (struct usb338x_pl_regs __iomem *)
+							(base + 0x0800);
+		usbstat = readl(&dev->usb->usbstat);
+		dev->enhanced_mode = (usbstat & (1 << 11)) ? 1 : 0;
+		dev->n_ep = (dev->enhanced_mode) ? 9 : 5;
+		/* put into initial config, link up all endpoints */
+		fsmvalue = get_idx_reg(dev->regs, SCRATCH) &
+					(0xf << DEFECT7374_FSM_FIELD);
+		/* See if firmware needs to set up for workaround: */
+		if (fsmvalue == DEFECT7374_FSM_SS_CONTROL_READ)
+			writel(0, &dev->usb->usbctl);
+	} else{
+		dev->enhanced_mode = 0;
+		dev->n_ep = 7;
+		/* put into initial config, link up all endpoints */
+		writel(0, &dev->usb->usbctl);
+	}
+
 	usb_reset (dev);
 	usb_reinit (dev);
 
@@ -2762,6 +3687,10 @@ static int net2280_probe (struct pci_dev *pdev, const struct pci_device_id *id)
 		goto done;
 	}
 
+	if (use_msi && dev->pdev->vendor == 0x10b5)
+		if (pci_enable_msi(pdev))
+			ERROR(dev, "Failed to enable MSI mode\n");
+
 	if (request_irq (pdev->irq, net2280_irq, IRQF_SHARED, driver_name, dev)
 			!= 0) {
 		ERROR (dev, "request interrupt %d failed\n", pdev->irq);
@@ -2797,7 +3726,8 @@ static int net2280_probe (struct pci_dev *pdev, const struct pci_device_id *id)
 	}
 
 	/* enable lower-overhead pci memory bursts during DMA */
-	writel ( (1 << DMA_MEMORY_WRITE_AND_INVALIDATE_ENABLE)
+	if (dev->pdev->vendor == 0x17cc)
+		writel((1 << DMA_MEMORY_WRITE_AND_INVALIDATE_ENABLE)
 			// 256 write retries may not be enough...
 			// | (1 << PCI_RETRY_ABORT_ENABLE)
 			| (1 << DMA_READ_MULTIPLE_ENABLE)
@@ -2814,10 +3744,10 @@ static int net2280_probe (struct pci_dev *pdev, const struct pci_device_id *id)
 	INFO (dev, "%s\n", driver_desc);
 	INFO (dev, "irq %d, pci mem %p, chip rev %04x\n",
 			pdev->irq, base, dev->chiprev);
-	INFO (dev, "version: " DRIVER_VERSION "; dma %s\n",
-			use_dma
-				? (use_dma_chaining ? "chaining" : "enabled")
-				: "disabled");
+	INFO(dev, "version: " DRIVER_VERSION "; dma %s %s\n",
+		use_dma	? (use_dma_chaining ? "chaining" : "enabled")
+			: "disabled",
+		dev->enhanced_mode ? "enhanced mode" : "legacy mode");
 	retval = device_create_file (&pdev->dev, &dev_attr_registers);
 	if (retval) goto done;
 
@@ -2849,7 +3779,8 @@ static void net2280_shutdown (struct pci_dev *pdev)
 	writel (0, &dev->usb->usbctl);
 
 	/* Disable full-speed test mode */
-	writel(0, &dev->usb->xcvrdiag);
+	if (dev->pdev->vendor == 0x17cc)
+		writel(0, &dev->usb->xcvrdiag);
 }
 
 
@@ -2869,8 +3800,24 @@ static const struct pci_device_id pci_ids [] = { {
 	.device =	0x2282,
 	.subvendor =	PCI_ANY_ID,
 	.subdevice =	PCI_ANY_ID,
-
-}, { /* end: all zeroes */ }
+},
+	{
+	 .class = ((PCI_CLASS_SERIAL_USB << 8) | 0xfe),
+	 .class_mask = ~0,
+	 .vendor = 0x10b5,
+	 .device = 0x3380,
+	 .subvendor = PCI_ANY_ID,
+	 .subdevice = PCI_ANY_ID,
+	 },
+	{
+	 .class = ((PCI_CLASS_SERIAL_USB << 8) | 0xfe),
+	 .class_mask = ~0,
+	 .vendor = 0x10b5,
+	 .device = 0x3382,
+	 .subvendor = PCI_ANY_ID,
+	 .subdevice = PCI_ANY_ID,
+	 },
+{ /* end: all zeroes */ }
 };
 MODULE_DEVICE_TABLE (pci, pci_ids);
 
diff --git a/drivers/usb/gadget/net2280.h b/drivers/usb/gadget/net2280.h
index a844be0d683a..a257516abbd6 100644
--- a/drivers/usb/gadget/net2280.h
+++ b/drivers/usb/gadget/net2280.h
@@ -6,6 +6,7 @@
 /*
  * Copyright (C) 2002 NetChip Technology, Inc. (http://www.netchip.com)
  * Copyright (C) 2003 David Brownell
+ * Copyright (C) 2014 Ricardo Ribalda - Qtechnology/AS
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -14,6 +15,7 @@
  */
 
 #include <linux/usb/net2280.h>
+#include <linux/usb/usb338x.h>
 
 /*-------------------------------------------------------------------------*/
 
@@ -59,13 +61,14 @@ set_idx_reg (struct net2280_regs __iomem *regs, u32 index, u32 value)
 #define	CHIPREV_1	0x0100
 #define	CHIPREV_1A	0x0110
 
-#ifdef	__KERNEL__
+/* DEFECT 7374 */
+#define DEFECT_7374_NUMBEROF_MAX_WAIT_LOOPS         200
+#define DEFECT_7374_PROCESSOR_WAIT_TIME             10
 
-/* ep a-f highspeed and fullspeed maxpacket, addresses
- * computed from ep->num
- */
-#define REG_EP_MAXPKT(dev,num) (((num) + 1) * 0x10 + \
-		(((dev)->gadget.speed == USB_SPEED_HIGH) ? 0 : 1))
+/* ep0 max packet size */
+#define EP0_SS_MAX_PACKET_SIZE  0x200
+#define EP0_HS_MAX_PACKET_SIZE  0x40
+#ifdef	__KERNEL__
 
 /*-------------------------------------------------------------------------*/
 
@@ -85,12 +88,15 @@ struct net2280_dma {
 
 struct net2280_ep {
 	struct usb_ep				ep;
+	struct net2280_ep_regs __iomem *cfg;
 	struct net2280_ep_regs			__iomem *regs;
 	struct net2280_dma_regs			__iomem *dma;
 	struct net2280_dma			*dummy;
+	struct usb338x_fifo_regs __iomem *fiforegs;
 	dma_addr_t				td_dma;	/* of dummy */
 	struct net2280				*dev;
 	unsigned long				irqs;
+	unsigned is_halt:1, dma_started:1;
 
 	/* analogous to a host-side qh */
 	struct list_head			queue;
@@ -116,10 +122,19 @@ static inline void allow_status (struct net2280_ep *ep)
 	ep->stopped = 1;
 }
 
-/* count (<= 4) bytes in the next fifo write will be valid */
-static inline void set_fifo_bytecount (struct net2280_ep *ep, unsigned count)
+static void allow_status_338x(struct net2280_ep *ep)
 {
-	writeb (count, 2 + (u8 __iomem *) &ep->regs->ep_cfg);
+	/*
+	 * Control Status Phase Handshake was set by the chip when the setup
+	 * packet arrived. While set, the chip automatically NAKs the host's
+	 * Status Phase tokens.
+	 */
+	writel(1 << CLEAR_CONTROL_STATUS_PHASE_HANDSHAKE, &ep->regs->ep_rsp);
+
+	ep->stopped = 1;
+
+	/* TD 9.9 Halt Endpoint test.  TD 9.22 set feature test. */
+	ep->responded = 0;
 }
 
 struct net2280_request {
@@ -135,23 +150,38 @@ struct net2280 {
 	/* each pci device provides one gadget, several endpoints */
 	struct usb_gadget		gadget;
 	spinlock_t			lock;
-	struct net2280_ep		ep [7];
+	struct net2280_ep		ep[9];
 	struct usb_gadget_driver 	*driver;
 	unsigned			enabled : 1,
 					protocol_stall : 1,
 					softconnect : 1,
 					got_irq : 1,
-					region : 1;
+					region:1,
+					u1_enable:1,
+					u2_enable:1,
+					ltm_enable:1,
+					wakeup_enable:1,
+					selfpowered:1,
+					addressed_state:1;
 	u16				chiprev;
+	int enhanced_mode;
+	int n_ep;
 
 	/* pci state used to access those endpoints */
 	struct pci_dev			*pdev;
 	struct net2280_regs		__iomem *regs;
 	struct net2280_usb_regs		__iomem *usb;
+	struct usb338x_usb_ext_regs	__iomem *usb_ext;
 	struct net2280_pci_regs		__iomem *pci;
 	struct net2280_dma_regs		__iomem *dma;
 	struct net2280_dep_regs		__iomem *dep;
 	struct net2280_ep_regs		__iomem *epregs;
+	struct usb338x_fifo_regs	__iomem *fiforegs;
+	struct usb338x_ll_regs		__iomem *llregs;
+	struct usb338x_ll_lfps_regs	__iomem *ll_lfps_regs;
+	struct usb338x_ll_tsn_regs	__iomem *ll_tsn_regs;
+	struct usb338x_ll_chi_regs	__iomem *ll_chicken_reg;
+	struct usb338x_pl_regs		__iomem *plregs;
 
 	struct pci_pool			*requests;
 	// statistics...
@@ -179,6 +209,43 @@ static inline void clear_halt (struct net2280_ep *ep)
 		, &ep->regs->ep_rsp);
 }
 
+/*
+ * FSM value for Defect 7374 (U1U2 Test) is managed in
+ * chip's SCRATCH register:
+ */
+#define DEFECT7374_FSM_FIELD    28
+
+/* Waiting for Control Read:
+ *  - A transition to this state indicates a fresh USB connection,
+ *    before the first Setup Packet. The connection speed is not
+ *    known. Firmware is waiting for the first Control Read.
+ *  - Starting state: This state can be thought of as the FSM's typical
+ *    starting state.
+ *  - Tip: Upon the first SS Control Read the FSM never
+ *    returns to this state.
+ */
+#define DEFECT7374_FSM_WAITING_FOR_CONTROL_READ (1 << DEFECT7374_FSM_FIELD)
+
+/* Non-SS Control Read:
+ *  - A transition to this state indicates detection of the first HS
+ *    or FS Control Read.
+ *  - Tip: Upon the first SS Control Read the FSM never
+ *    returns to this state.
+ */
+#define	DEFECT7374_FSM_NON_SS_CONTROL_READ (2 << DEFECT7374_FSM_FIELD)
+
+/* SS Control Read:
+ *  - A transition to this state indicates detection of the
+ *    first SS Control Read.
+ *  - This state indicates workaround completion. Workarounds no longer
+ *    need to be applied (as long as the chip remains powered up).
+ *  - Tip: Once in this state the FSM state does not change (until
+ *    the chip's power is lost and restored).
+ *  - This can be thought of as the final state of the FSM;
+ *    the FSM 'locks-up' in this state until the chip loses power.
+ */
+#define DEFECT7374_FSM_SS_CONTROL_READ (3 << DEFECT7374_FSM_FIELD)
+
 #ifdef USE_RDK_LEDS
 
 static inline void net2280_led_init (struct net2280 *dev)
@@ -198,6 +265,9 @@ void net2280_led_speed (struct net2280 *dev, enum usb_device_speed speed)
 {
 	u32	val = readl (&dev->regs->gpioctl);
 	switch (speed) {
+	case USB_SPEED_SUPER:		/* green + red */
+		val |= (1 << GPIO0_DATA) | (1 << GPIO1_DATA);
+		break;
 	case USB_SPEED_HIGH:		/* green */
 		val &= ~(1 << GPIO0_DATA);
 		val |= (1 << GPIO1_DATA);
@@ -271,6 +341,17 @@ static inline void net2280_led_shutdown (struct net2280 *dev)
 
 /*-------------------------------------------------------------------------*/
 
+static inline void set_fifo_bytecount(struct net2280_ep *ep, unsigned count)
+{
+	if (ep->dev->pdev->vendor == 0x17cc)
+		writeb(count, 2 + (u8 __iomem *) &ep->regs->ep_cfg);
+	else{
+		u32 tmp = readl(&ep->cfg->ep_cfg) &
+					(~(0x07 << EP_FIFO_BYTE_COUNT));
+		writel(tmp | (count << EP_FIFO_BYTE_COUNT), &ep->cfg->ep_cfg);
+	}
+}
+
 static inline void start_out_naking (struct net2280_ep *ep)
 {
 	/* NOTE:  hardware races lurk here, and PING protocol issues */
@@ -305,4 +386,22 @@ static inline void stop_out_naking (struct net2280_ep *ep)
 		writel ((1 << CLEAR_NAK_OUT_PACKETS), &ep->regs->ep_rsp);
 }
 
+
+static inline void set_max_speed(struct net2280_ep *ep, u32 max)
+{
+	u32 reg;
+	static const u32 ep_enhanced[9] = { 0x10, 0x60, 0x30, 0x80,
+					  0x50, 0x20, 0x70, 0x40, 0x90 };
+
+	if (ep->dev->enhanced_mode)
+		reg = ep_enhanced[ep->num];
+	else{
+		reg = (ep->num + 1) * 0x10;
+		if (ep->dev->gadget.speed != USB_SPEED_HIGH)
+			reg += 1;
+	}
+
+	set_idx_reg(ep->dev->regs, reg, max);
+}
+
 #endif	/* __KERNEL__ */
diff --git a/include/linux/usb/usb338x.h b/include/linux/usb/usb338x.h
new file mode 100644
index 000000000000..f92eb635b9d3
--- /dev/null
+++ b/include/linux/usb/usb338x.h
@@ -0,0 +1,199 @@
+/*
+ * USB 338x super/high/full speed USB device controller.
+ * Unlike many such controllers, this one talks PCI.
+ *
+ * Copyright (C) 2002 NetChip Technology, Inc. (http://www.netchip.com)
+ * Copyright (C) 2003 David Brownell
+ * Copyright (C) 2014 Ricardo Ribalda - Qtechnology/AS
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __LINUX_USB_USB338X_H
+#define __LINUX_USB_USB338X_H
+
+#include <linux/usb/net2280.h>
+
+/*
+ * Extra defined bits for net2280 registers
+ */
+#define     SCRATCH			      0x0b
+
+#define     DEFECT7374_FSM_FIELD                28
+#define     SUPER_SPEED				 8
+#define     DMA_REQUEST_OUTSTANDING              5
+#define     DMA_PAUSE_DONE_INTERRUPT            26
+#define     SET_ISOCHRONOUS_DELAY               24
+#define     SET_SEL                             22
+#define     SUPER_SPEED_MODE                     8
+
+/*ep_cfg*/
+#define     MAX_BURST_SIZE                      24
+#define     EP_FIFO_BYTE_COUNT                  16
+#define     IN_ENDPOINT_ENABLE                  14
+#define     IN_ENDPOINT_TYPE                    12
+#define     OUT_ENDPOINT_ENABLE                 10
+#define     OUT_ENDPOINT_TYPE                    8
+
+struct usb338x_usb_ext_regs {
+	u32     usbclass;
+#define     DEVICE_PROTOCOL                     16
+#define     DEVICE_SUB_CLASS                     8
+#define     DEVICE_CLASS                         0
+	u32     ss_sel;
+#define     U2_SYSTEM_EXIT_LATENCY               8
+#define     U1_SYSTEM_EXIT_LATENCY               0
+	u32     ss_del;
+#define     U2_DEVICE_EXIT_LATENCY               8
+#define     U1_DEVICE_EXIT_LATENCY               0
+	u32     usb2lpm;
+#define     USB_L1_LPM_HIRD                      2
+#define     USB_L1_LPM_REMOTE_WAKE               1
+#define     USB_L1_LPM_SUPPORT                   0
+	u32     usb3belt;
+#define     BELT_MULTIPLIER                     10
+#define     BEST_EFFORT_LATENCY_TOLERANCE        0
+	u32     usbctl2;
+#define     LTM_ENABLE                           7
+#define     U2_ENABLE                            6
+#define     U1_ENABLE                            5
+#define     FUNCTION_SUSPEND                     4
+#define     USB3_CORE_ENABLE                     3
+#define     USB2_CORE_ENABLE                     2
+#define     SERIAL_NUMBER_STRING_ENABLE          0
+	u32     in_timeout;
+#define     GPEP3_TIMEOUT                       19
+#define     GPEP2_TIMEOUT                       18
+#define     GPEP1_TIMEOUT                       17
+#define     GPEP0_TIMEOUT                       16
+#define     GPEP3_TIMEOUT_VALUE                 13
+#define     GPEP3_TIMEOUT_ENABLE                12
+#define     GPEP2_TIMEOUT_VALUE                  9
+#define     GPEP2_TIMEOUT_ENABLE                 8
+#define     GPEP1_TIMEOUT_VALUE                  5
+#define     GPEP1_TIMEOUT_ENABLE                 4
+#define     GPEP0_TIMEOUT_VALUE                  1
+#define     GPEP0_TIMEOUT_ENABLE                 0
+	u32     isodelay;
+#define     ISOCHRONOUS_DELAY                    0
+} __packed;
+
+struct usb338x_fifo_regs {
+	/* offset 0x0500, 0x0520, 0x0540, 0x0560, 0x0580 */
+	u32     ep_fifo_size_base;
+#define     IN_FIFO_BASE_ADDRESS                                22
+#define     IN_FIFO_SIZE                                        16
+#define     OUT_FIFO_BASE_ADDRESS                               6
+#define     OUT_FIFO_SIZE                                       0
+	u32     ep_fifo_out_wrptr;
+	u32     ep_fifo_out_rdptr;
+	u32     ep_fifo_in_wrptr;
+	u32     ep_fifo_in_rdptr;
+	u32     unused[3];
+} __packed;
+
+
+/* Link layer */
+struct usb338x_ll_regs {
+	/* offset 0x700 */
+	u32   ll_ltssm_ctrl1;
+	u32   ll_ltssm_ctrl2;
+	u32   ll_ltssm_ctrl3;
+	u32   unused[2];
+	u32   ll_general_ctrl0;
+	u32   ll_general_ctrl1;
+#define     PM_U3_AUTO_EXIT                                     29
+#define     PM_U2_AUTO_EXIT                                     28
+#define     PM_U1_AUTO_EXIT                                     27
+#define     PM_FORCE_U2_ENTRY                                   26
+#define     PM_FORCE_U1_ENTRY                                   25
+#define     PM_LGO_COLLISION_SEND_LAU                           24
+#define     PM_DIR_LINK_REJECT                                  23
+#define     PM_FORCE_LINK_ACCEPT                                22
+#define     PM_DIR_ENTRY_U3                                     20
+#define     PM_DIR_ENTRY_U2                                     19
+#define     PM_DIR_ENTRY_U1                                     18
+#define     PM_U2_ENABLE                                        17
+#define     PM_U1_ENABLE                                        16
+#define     SKP_THRESHOLD_ADJUST_FMW                            8
+#define     RESEND_DPP_ON_LRTY_FMW                              7
+#define     DL_BIT_VALUE_FMW                                    6
+#define     FORCE_DL_BIT                                        5
+	u32   ll_general_ctrl2;
+#define     SELECT_INVERT_LANE_POLARITY                         7
+#define     FORCE_INVERT_LANE_POLARITY                          6
+	u32   ll_general_ctrl3;
+	u32   ll_general_ctrl4;
+	u32   ll_error_gen;
+} __packed;
+
+struct usb338x_ll_lfps_regs {
+	/* offset 0x748 */
+	u32   ll_lfps_5;
+#define     TIMER_LFPS_6US                                      16
+	u32   ll_lfps_6;
+#define     TIMER_LFPS_80US                                     0
+} __packed;
+
+struct usb338x_ll_tsn_regs {
+	/* offset 0x77C */
+	u32   ll_tsn_counters_2;
+#define     HOT_TX_NORESET_TS2                                  24
+	u32   ll_tsn_counters_3;
+#define     HOT_RX_RESET_TS2                                    0
+} __packed;
+
+struct usb338x_ll_chi_regs {
+	/* offset 0x79C */
+	u32   ll_tsn_chicken_bit;
+#define     RECOVERY_IDLE_TO_RECOVER_FMW                        3
+} __packed;
+
+/* protocol layer */
+struct usb338x_pl_regs {
+	/* offset 0x800 */
+	u32   pl_reg_1;
+	u32   pl_reg_2;
+	u32   pl_reg_3;
+	u32   pl_reg_4;
+	u32   pl_ep_ctrl;
+	/* Protocol Layer Endpoint Control*/
+#define     PL_EP_CTRL                                  0x810
+#define     ENDPOINT_SELECT                             0
+	/* [4:0] */
+#define     EP_INITIALIZED                              16
+#define     SEQUENCE_NUMBER_RESET                       17
+#define     CLEAR_ACK_ERROR_CODE                        20
+	u32   pl_reg_6;
+	u32   pl_reg_7;
+	u32   pl_reg_8;
+	u32   pl_ep_status_1;
+	/* Protocol Layer Endpoint Status 1*/
+#define     PL_EP_STATUS_1                              0x820
+#define     STATE                                       16
+#define     ACK_GOOD_NORMAL                             0x11
+#define     ACK_GOOD_MORE_ACKS_TO_COME                  0x16
+	u32   pl_ep_status_2;
+	u32   pl_ep_status_3;
+	/* Protocol Layer Endpoint Status 3*/
+#define     PL_EP_STATUS_3                              0x828
+#define     SEQUENCE_NUMBER                             0
+	u32   pl_ep_status_4;
+	/* Protocol Layer Endpoint Status 4*/
+#define     PL_EP_STATUS_4                              0x82c
+	u32   pl_ep_cfg_4;
+	/* Protocol Layer Endpoint Configuration 4*/
+#define     PL_EP_CFG_4                                 0x830
+#define     NON_CTRL_IN_TOLERATE_BAD_DIR                6
+} __packed;
+
+#endif /* __LINUX_USB_USB338X_H */
-- 
cgit v1.2.3-59-g8ed1b


From b0d0ce8b6b91a0f6f99045b6019fc4c824634fb4 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Mon, 30 Jun 2014 12:10:24 +0200
Subject: spi: sh-msiof: Add DMA support

Add DMA support to the MSIOF driver using platform data.

As MSIOF DMA is limited to 32-bit words (requiring byte/wordswapping for
smaller wordsizes), and the group length is limited to 256 words, DMA is
performed on two fixed pages, allocated and mapped at driver initialization
time.

Performance figures (in Mbps) on r8a7791/koelsch at different SPI clock
frequencies for 1024-byte and 4096-byte transfers:

                   1024 bytes           4096 bytes
  -  3.25 MHz: PIO  2.1, DMA  2.6 | PIO  2.8, DMA  3.1
  -  6.5  MHz: PIO  3.2, DMA  4.4 | PIO  5.0, DMA  5.9
  - 13    MHz: PIO  4.2, DMA  6.6 | PIO  8.2, DMA 10.7
  - 26    MHz: PIO  5.9, DMA 10.4 | PIO 12.4, DMA 18.4

Note that DMA is only faster than PIO for transfers that exceed the FIFO
size (typically 64 words / 256 bytes).

Also note that large transfers (larger than the group length for DMA, or
larger than the FIFO size for PIO), should use cs-gpio (with the
appropriate pinmux setup), as the hardware chipselect will be deasserted in
between chunks.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/spi/spi-sh-msiof.c   | 348 ++++++++++++++++++++++++++++++++++++++++++-
 include/linux/spi/sh_msiof.h |   2 +
 2 files changed, 343 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c
index 824f44e6bd88..9922ed3a4441 100644
--- a/drivers/spi/spi-sh-msiof.c
+++ b/drivers/spi/spi-sh-msiof.c
@@ -2,6 +2,7 @@
  * SuperH MSIOF SPI Master Interface
  *
  * Copyright (c) 2009 Magnus Damm
+ * Copyright (C) 2014 Glider bvba
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -13,6 +14,8 @@
 #include <linux/clk.h>
 #include <linux/completion.h>
 #include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
 #include <linux/err.h>
 #include <linux/gpio.h>
 #include <linux/interrupt.h>
@@ -23,6 +26,7 @@
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
+#include <linux/sh_dma.h>
 
 #include <linux/spi/sh_msiof.h>
 #include <linux/spi/spi.h>
@@ -37,6 +41,7 @@ struct sh_msiof_chipdata {
 };
 
 struct sh_msiof_spi_priv {
+	struct spi_master *master;
 	void __iomem *mapbase;
 	struct clk *clk;
 	struct platform_device *pdev;
@@ -45,6 +50,10 @@ struct sh_msiof_spi_priv {
 	struct completion done;
 	int tx_fifo_size;
 	int rx_fifo_size;
+	void *tx_dma_page;
+	void *rx_dma_page;
+	dma_addr_t tx_dma_addr;
+	dma_addr_t rx_dma_addr;
 };
 
 #define TMDR1	0x00	/* Transmit Mode Register 1 */
@@ -84,6 +93,8 @@ struct sh_msiof_spi_priv {
 #define MDR2_WDLEN1(i)	(((i) - 1) << 16) /* Word Count (1-64/256 (SH, A1))) */
 #define MDR2_GRPMASK1	0x00000001 /* Group Output Mask 1 (SH, A1) */
 
+#define MAX_WDLEN	256U
+
 /* TSCR and RSCR */
 #define SCR_BRPS_MASK	    0x1f00 /* Prescaler Setting (1-32) */
 #define SCR_BRPS(i)	(((i) - 1) << 8)
@@ -282,8 +293,6 @@ static void sh_msiof_spi_set_pin_regs(struct sh_msiof_spi_priv *p,
 	 *    1    0         11     11    0    0
 	 *    1    1         11     11    1    1
 	 */
-	sh_msiof_write(p, FCTR, 0);
-
 	tmp = MDR1_SYNCMD_SPI | 1 << MDR1_FLD_SHIFT | MDR1_XXSTP;
 	tmp |= !cs_high << MDR1_SYNCAC_SHIFT;
 	tmp |= lsb_first << MDR1_BITLSB_SHIFT;
@@ -319,8 +328,6 @@ static void sh_msiof_spi_set_mode_regs(struct sh_msiof_spi_priv *p,
 
 	if (rx_buf)
 		sh_msiof_write(p, RMDR2, dr2);
-
-	sh_msiof_write(p, IER, STR_TEOF | STR_REOF);
 }
 
 static void sh_msiof_reset_str(struct sh_msiof_spi_priv *p)
@@ -563,8 +570,12 @@ static int sh_msiof_spi_txrx_once(struct sh_msiof_spi_priv *p,
 	/* the fifo contents need shifting */
 	fifo_shift = 32 - bits;
 
+	/* default FIFO watermarks for PIO */
+	sh_msiof_write(p, FCTR, 0);
+
 	/* setup msiof transfer mode registers */
 	sh_msiof_spi_set_mode_regs(p, tx_buf, rx_buf, bits, words);
+	sh_msiof_write(p, IER, IER_TEOFE | IER_REOFE);
 
 	/* write tx fifo */
 	if (tx_buf)
@@ -609,11 +620,170 @@ stop_ier:
 	return ret;
 }
 
+static void sh_msiof_dma_complete(void *arg)
+{
+	struct sh_msiof_spi_priv *p = arg;
+
+	sh_msiof_write(p, IER, 0);
+	complete(&p->done);
+}
+
+static int sh_msiof_dma_once(struct sh_msiof_spi_priv *p, const void *tx,
+			     void *rx, unsigned int len)
+{
+	u32 ier_bits = 0;
+	struct dma_async_tx_descriptor *desc_tx = NULL, *desc_rx = NULL;
+	dma_cookie_t cookie;
+	int ret;
+
+	/* 1 stage FIFO watermarks for DMA */
+	sh_msiof_write(p, FCTR, FCTR_TFWM_1 | FCTR_RFWM_1);
+
+	/* setup msiof transfer mode registers (32-bit words) */
+	sh_msiof_spi_set_mode_regs(p, tx, rx, 32, len / 4);
+
+	if (tx) {
+		ier_bits |= IER_TDREQE | IER_TDMAE;
+		dma_sync_single_for_device(&p->pdev->dev, p->tx_dma_addr, len,
+					   DMA_TO_DEVICE);
+		desc_tx = dmaengine_prep_slave_single(p->master->dma_tx,
+					p->tx_dma_addr, len, DMA_TO_DEVICE,
+					DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+		if (!desc_tx)
+			return -EIO;
+	}
+
+	if (rx) {
+		ier_bits |= IER_RDREQE | IER_RDMAE;
+		desc_rx = dmaengine_prep_slave_single(p->master->dma_rx,
+					p->rx_dma_addr, len, DMA_FROM_DEVICE,
+					DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+		if (!desc_rx)
+			return -EIO;
+	}
+	sh_msiof_write(p, IER, ier_bits);
+
+	reinit_completion(&p->done);
+
+	if (rx) {
+		desc_rx->callback = sh_msiof_dma_complete;
+		desc_rx->callback_param = p;
+		cookie = dmaengine_submit(desc_rx);
+		if (dma_submit_error(cookie)) {
+			ret = cookie;
+			goto stop_ier;
+		}
+		dma_async_issue_pending(p->master->dma_rx);
+	}
+
+	if (tx) {
+		if (rx) {
+			/* No callback */
+			desc_tx->callback = NULL;
+		} else {
+			desc_tx->callback = sh_msiof_dma_complete;
+			desc_tx->callback_param = p;
+		}
+		cookie = dmaengine_submit(desc_tx);
+		if (dma_submit_error(cookie)) {
+			ret = cookie;
+			goto stop_rx;
+		}
+		dma_async_issue_pending(p->master->dma_tx);
+	}
+
+	ret = sh_msiof_spi_start(p, rx);
+	if (ret) {
+		dev_err(&p->pdev->dev, "failed to start hardware\n");
+		goto stop_tx;
+	}
+
+	/* wait for tx fifo to be emptied / rx fifo to be filled */
+	ret = wait_for_completion_timeout(&p->done, HZ);
+	if (!ret) {
+		dev_err(&p->pdev->dev, "DMA timeout\n");
+		ret = -ETIMEDOUT;
+		goto stop_reset;
+	}
+
+	/* clear status bits */
+	sh_msiof_reset_str(p);
+
+	ret = sh_msiof_spi_stop(p, rx);
+	if (ret) {
+		dev_err(&p->pdev->dev, "failed to shut down hardware\n");
+		return ret;
+	}
+
+	if (rx)
+		dma_sync_single_for_cpu(&p->pdev->dev, p->rx_dma_addr, len,
+					DMA_FROM_DEVICE);
+
+	return 0;
+
+stop_reset:
+	sh_msiof_reset_str(p);
+	sh_msiof_spi_stop(p, rx);
+stop_tx:
+	if (tx)
+		dmaengine_terminate_all(p->master->dma_tx);
+stop_rx:
+	if (rx)
+		dmaengine_terminate_all(p->master->dma_rx);
+stop_ier:
+	sh_msiof_write(p, IER, 0);
+	return ret;
+}
+
+static void copy_bswap32(u32 *dst, const u32 *src, unsigned int words)
+{
+	/* src or dst can be unaligned, but not both */
+	if ((unsigned long)src & 3) {
+		while (words--) {
+			*dst++ = swab32(get_unaligned(src));
+			src++;
+		}
+	} else if ((unsigned long)dst & 3) {
+		while (words--) {
+			put_unaligned(swab32(*src++), dst);
+			dst++;
+		}
+	} else {
+		while (words--)
+			*dst++ = swab32(*src++);
+	}
+}
+
+static void copy_wswap32(u32 *dst, const u32 *src, unsigned int words)
+{
+	/* src or dst can be unaligned, but not both */
+	if ((unsigned long)src & 3) {
+		while (words--) {
+			*dst++ = swahw32(get_unaligned(src));
+			src++;
+		}
+	} else if ((unsigned long)dst & 3) {
+		while (words--) {
+			put_unaligned(swahw32(*src++), dst);
+			dst++;
+		}
+	} else {
+		while (words--)
+			*dst++ = swahw32(*src++);
+	}
+}
+
+static void copy_plain32(u32 *dst, const u32 *src, unsigned int words)
+{
+	memcpy(dst, src, words * 4);
+}
+
 static int sh_msiof_transfer_one(struct spi_master *master,
 				 struct spi_device *spi,
 				 struct spi_transfer *t)
 {
 	struct sh_msiof_spi_priv *p = spi_master_get_devdata(master);
+	void (*copy32)(u32 *, const u32 *, unsigned int);
 	void (*tx_fifo)(struct sh_msiof_spi_priv *, const void *, int, int);
 	void (*rx_fifo)(struct sh_msiof_spi_priv *, void *, int, int);
 	const void *tx_buf = t->tx_buf;
@@ -624,7 +794,48 @@ static int sh_msiof_transfer_one(struct spi_master *master,
 	unsigned int words;
 	int n;
 	bool swab;
+	int ret;
+
+	/* setup clocks (clock already enabled in chipselect()) */
+	sh_msiof_spi_set_clk_regs(p, clk_get_rate(p->clk), t->speed_hz);
+
+	while (master->dma_tx && len > 15) {
+		/*
+		 *  DMA supports 32-bit words only, hence pack 8-bit and 16-bit
+		 *  words, with byte resp. word swapping.
+		 */
+		unsigned int l = min(len, MAX_WDLEN * 4);
+
+		if (bits <= 8) {
+			if (l & 3)
+				break;
+			copy32 = copy_bswap32;
+		} else if (bits <= 16) {
+			if (l & 1)
+				break;
+			copy32 = copy_wswap32;
+		} else {
+			copy32 = copy_plain32;
+		}
+
+		if (tx_buf)
+			copy32(p->tx_dma_page, tx_buf, l / 4);
 
+		ret = sh_msiof_dma_once(p, tx_buf, rx_buf, l);
+		if (ret)
+			return ret;
+
+		if (rx_buf) {
+			copy32(rx_buf, p->rx_dma_page, l / 4);
+			rx_buf += l;
+		}
+		if (tx_buf)
+			tx_buf += l;
+
+		len -= l;
+		if (!len)
+			return 0;
+	}
 
 	if (bits <= 8 && len > 15 && !(len & 3)) {
 		bits = 32;
@@ -673,9 +884,6 @@ static int sh_msiof_transfer_one(struct spi_master *master,
 			rx_fifo = sh_msiof_spi_read_fifo_32;
 	}
 
-	/* setup clocks (clock already enabled in chipselect()) */
-	sh_msiof_spi_set_clk_regs(p, clk_get_rate(p->clk), t->speed_hz);
-
 	/* transfer in fifo sized chunks */
 	words = len / bytes_per_word;
 
@@ -745,6 +953,123 @@ static struct sh_msiof_spi_info *sh_msiof_spi_parse_dt(struct device *dev)
 }
 #endif
 
+static struct dma_chan *sh_msiof_request_dma_chan(struct device *dev,
+	enum dma_transfer_direction dir, unsigned int id, dma_addr_t port_addr)
+{
+	dma_cap_mask_t mask;
+	struct dma_chan *chan;
+	struct dma_slave_config cfg;
+	int ret;
+
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+
+	chan = dma_request_channel(mask, shdma_chan_filter,
+				  (void *)(unsigned long)id);
+	if (!chan) {
+		dev_warn(dev, "dma_request_channel failed\n");
+		return NULL;
+	}
+
+	memset(&cfg, 0, sizeof(cfg));
+	cfg.slave_id = id;
+	cfg.direction = dir;
+	if (dir == DMA_MEM_TO_DEV)
+		cfg.dst_addr = port_addr;
+	else
+		cfg.src_addr = port_addr;
+
+	ret = dmaengine_slave_config(chan, &cfg);
+	if (ret) {
+		dev_warn(dev, "dmaengine_slave_config failed %d\n", ret);
+		dma_release_channel(chan);
+		return NULL;
+	}
+
+	return chan;
+}
+
+static int sh_msiof_request_dma(struct sh_msiof_spi_priv *p)
+{
+	struct platform_device *pdev = p->pdev;
+	struct device *dev = &pdev->dev;
+	const struct sh_msiof_spi_info *info = dev_get_platdata(dev);
+	const struct resource *res;
+	struct spi_master *master;
+
+	if (!info || !info->dma_tx_id || !info->dma_rx_id)
+		return 0;	/* The driver assumes no error */
+
+	/* The DMA engine uses the second register set, if present */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!res)
+		res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
+	master = p->master;
+	master->dma_tx = sh_msiof_request_dma_chan(dev, DMA_MEM_TO_DEV,
+						   info->dma_tx_id,
+						   res->start + TFDR);
+	if (!master->dma_tx)
+		return -ENODEV;
+
+	master->dma_rx = sh_msiof_request_dma_chan(dev, DMA_DEV_TO_MEM,
+						   info->dma_rx_id,
+						   res->start + RFDR);
+	if (!master->dma_rx)
+		goto free_tx_chan;
+
+	p->tx_dma_page = (void *)__get_free_page(GFP_KERNEL | GFP_DMA);
+	if (!p->tx_dma_page)
+		goto free_rx_chan;
+
+	p->rx_dma_page = (void *)__get_free_page(GFP_KERNEL | GFP_DMA);
+	if (!p->rx_dma_page)
+		goto free_tx_page;
+
+	p->tx_dma_addr = dma_map_single(dev, p->tx_dma_page, PAGE_SIZE,
+					DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, p->tx_dma_addr))
+		goto free_rx_page;
+
+	p->rx_dma_addr = dma_map_single(dev, p->rx_dma_page, PAGE_SIZE,
+					DMA_FROM_DEVICE);
+	if (dma_mapping_error(dev, p->rx_dma_addr))
+		goto unmap_tx_page;
+
+	dev_info(dev, "DMA available");
+	return 0;
+
+unmap_tx_page:
+	dma_unmap_single(dev, p->tx_dma_addr, PAGE_SIZE, DMA_TO_DEVICE);
+free_rx_page:
+	free_page((unsigned long)p->rx_dma_page);
+free_tx_page:
+	free_page((unsigned long)p->tx_dma_page);
+free_rx_chan:
+	dma_release_channel(master->dma_rx);
+free_tx_chan:
+	dma_release_channel(master->dma_tx);
+	master->dma_tx = NULL;
+	return -ENODEV;
+}
+
+static void sh_msiof_release_dma(struct sh_msiof_spi_priv *p)
+{
+	struct spi_master *master = p->master;
+	struct device *dev;
+
+	if (!master->dma_tx)
+		return;
+
+	dev = &p->pdev->dev;
+	dma_unmap_single(dev, p->rx_dma_addr, PAGE_SIZE, DMA_FROM_DEVICE);
+	dma_unmap_single(dev, p->tx_dma_addr, PAGE_SIZE, DMA_TO_DEVICE);
+	free_page((unsigned long)p->rx_dma_page);
+	free_page((unsigned long)p->tx_dma_page);
+	dma_release_channel(master->dma_rx);
+	dma_release_channel(master->dma_tx);
+}
+
 static int sh_msiof_spi_probe(struct platform_device *pdev)
 {
 	struct resource	*r;
@@ -763,6 +1088,7 @@ static int sh_msiof_spi_probe(struct platform_device *pdev)
 	p = spi_master_get_devdata(master);
 
 	platform_set_drvdata(pdev, p);
+	p->master = master;
 
 	of_id = of_match_device(sh_msiof_match, &pdev->dev);
 	if (of_id) {
@@ -833,6 +1159,10 @@ static int sh_msiof_spi_probe(struct platform_device *pdev)
 	master->auto_runtime_pm = true;
 	master->transfer_one = sh_msiof_transfer_one;
 
+	ret = sh_msiof_request_dma(p);
+	if (ret < 0)
+		dev_warn(&pdev->dev, "DMA not available, using PIO\n");
+
 	ret = devm_spi_register_master(&pdev->dev, master);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "spi_register_master error.\n");
@@ -842,6 +1172,7 @@ static int sh_msiof_spi_probe(struct platform_device *pdev)
 	return 0;
 
  err2:
+	sh_msiof_release_dma(p);
 	pm_runtime_disable(&pdev->dev);
  err1:
 	spi_master_put(master);
@@ -850,6 +1181,9 @@ static int sh_msiof_spi_probe(struct platform_device *pdev)
 
 static int sh_msiof_spi_remove(struct platform_device *pdev)
 {
+	struct sh_msiof_spi_priv *p = platform_get_drvdata(pdev);
+
+	sh_msiof_release_dma(p);
 	pm_runtime_disable(&pdev->dev);
 	return 0;
 }
diff --git a/include/linux/spi/sh_msiof.h b/include/linux/spi/sh_msiof.h
index 2e8db3d2d2e5..88a14d81c49e 100644
--- a/include/linux/spi/sh_msiof.h
+++ b/include/linux/spi/sh_msiof.h
@@ -5,6 +5,8 @@ struct sh_msiof_spi_info {
 	int tx_fifo_override;
 	int rx_fifo_override;
 	u16 num_chipselect;
+	unsigned int dma_tx_id;
+	unsigned int dma_rx_id;
 };
 
 #endif /* __SPI_SH_MSIOF_H__ */
-- 
cgit v1.2.3-59-g8ed1b


From 981409b25e2a99409b26daa67293ca1cfd5ea0a0 Mon Sep 17 00:00:00 2001
From: Archit Taneja <archit@ti.com>
Date: Fri, 16 Nov 2012 14:46:04 +0530
Subject: fbdev: arm has __raw I/O accessors, use them in fb.h

This removes the sparse warnings on arm platforms:

warning: cast removes address space of expression

Signed-off-by: Archit Taneja <archit@ti.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
Cc: H Hartley Sweeten <hsweeten at visionengravers.com>
Cc: Alexander Shiyan <shc_work@mail.ru>
Cc: Russell King <linux@arm.linux.org.uk>
---
 include/linux/fb.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fb.h b/include/linux/fb.h
index b6bfda99add3..09bb7a18d287 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -553,7 +553,7 @@ static inline struct apertures_struct *alloc_apertures(unsigned int max_num) {
 #define fb_memcpy_fromfb sbus_memcpy_fromio
 #define fb_memcpy_tofb sbus_memcpy_toio
 
-#elif defined(__i386__) || defined(__alpha__) || defined(__x86_64__) || defined(__hppa__) || defined(__sh__) || defined(__powerpc__) || defined(__avr32__) || defined(__bfin__)
+#elif defined(__i386__) || defined(__alpha__) || defined(__x86_64__) || defined(__hppa__) || defined(__sh__) || defined(__powerpc__) || defined(__avr32__) || defined(__bfin__) || defined(__arm__)
 
 #define fb_readb __raw_readb
 #define fb_readw __raw_readw
-- 
cgit v1.2.3-59-g8ed1b


From 79922b8009c074e30d3a97f5a24519f11814ad03 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Tue, 6 May 2014 21:56:17 -0400
Subject: ftrace: Optimize function graph to be called directly

Function graph tracing is a bit different than the function tracers, as
it is processed after either the ftrace_caller or ftrace_regs_caller
and we only have one place to modify the jump to ftrace_graph_caller,
the jump needs to happen after the restore of registeres.

The function graph tracer is dependent on the function tracer, where
even if the function graph tracing is going on by itself, the save and
restore of registers is still done for function tracing regardless of
if function tracing is happening, before it calls the function graph
code.

If there's no function tracing happening, it is possible to just call
the function graph tracer directly, and avoid the wasted effort to save
and restore regs for function tracing.

This requires adding new flags to the dyn_ftrace records:

  FTRACE_FL_TRAMP
  FTRACE_FL_TRAMP_EN

The first is set if the count for the record is one, and the ftrace_ops
associated to that record has its own trampoline. That way the mcount code
can call that trampoline directly.

In the future, trampolines can be added to arbitrary ftrace_ops, where you
can have two or more ftrace_ops registered to ftrace (like kprobes and perf)
and if they are not tracing the same functions, then instead of doing a
loop to check all registered ftrace_ops against their hashes, just call the
ftrace_ops trampoline directly, which would call the registered ftrace_ops
function directly.

Without this patch perf showed:

  0.05%  hackbench  [kernel.kallsyms]  [k] ftrace_caller
  0.05%  hackbench  [kernel.kallsyms]  [k] arch_local_irq_save
  0.05%  hackbench  [kernel.kallsyms]  [k] native_sched_clock
  0.04%  hackbench  [kernel.kallsyms]  [k] __buffer_unlock_commit
  0.04%  hackbench  [kernel.kallsyms]  [k] preempt_trace
  0.04%  hackbench  [kernel.kallsyms]  [k] prepare_ftrace_return
  0.04%  hackbench  [kernel.kallsyms]  [k] __this_cpu_preempt_check
  0.04%  hackbench  [kernel.kallsyms]  [k] ftrace_graph_caller

See that the ftrace_caller took up more time than the ftrace_graph_caller
did.

With this patch:

  0.05%  hackbench  [kernel.kallsyms]  [k] __buffer_unlock_commit
  0.04%  hackbench  [kernel.kallsyms]  [k] call_filter_check_discard
  0.04%  hackbench  [kernel.kallsyms]  [k] ftrace_graph_caller
  0.04%  hackbench  [kernel.kallsyms]  [k] sched_clock

The ftrace_caller is no where to be found and ftrace_graph_caller still
takes up the same percentage.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/kernel/mcount_64.S |   5 +
 include/linux/ftrace.h      |  19 +++-
 kernel/trace/ftrace.c       | 242 ++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 254 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S
index c050a0153168..6b4e3c3b3d74 100644
--- a/arch/x86/kernel/mcount_64.S
+++ b/arch/x86/kernel/mcount_64.S
@@ -182,6 +182,10 @@ END(function_hook)
 ENTRY(ftrace_graph_caller)
 	MCOUNT_SAVE_FRAME
 
+	/* Check if tracing was disabled (quick check) */
+	cmpl $0, function_trace_stop
+	jne  fgraph_skip
+
 #ifdef CC_USING_FENTRY
 	leaq SS+16(%rsp), %rdi
 	movq $0, %rdx	/* No framepointers needed */
@@ -194,6 +198,7 @@ ENTRY(ftrace_graph_caller)
 
 	call	prepare_ftrace_return
 
+fgraph_skip:
 	MCOUNT_RESTORE_FRAME
 
 	retq
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index e5baa6b2c93f..11e18fd58b1a 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -118,12 +118,15 @@ struct ftrace_ops {
 	ftrace_func_t			func;
 	struct ftrace_ops		*next;
 	unsigned long			flags;
-	int __percpu			*disabled;
 	void				*private;
+	int __percpu			*disabled;
 #ifdef CONFIG_DYNAMIC_FTRACE
+	int				trampolines;
 	struct ftrace_hash		*notrace_hash;
 	struct ftrace_hash		*filter_hash;
+	struct ftrace_hash		*tramp_hash;
 	struct mutex			regex_lock;
+	unsigned long			trampoline;
 #endif
 };
 
@@ -317,13 +320,15 @@ extern int ftrace_nr_registered_ops(void);
  * from tracing that function.
  */
 enum {
-	FTRACE_FL_ENABLED	= (1UL << 29),
+	FTRACE_FL_ENABLED	= (1UL << 31),
 	FTRACE_FL_REGS		= (1UL << 30),
-	FTRACE_FL_REGS_EN	= (1UL << 31)
+	FTRACE_FL_REGS_EN	= (1UL << 29),
+	FTRACE_FL_TRAMP		= (1UL << 28),
+	FTRACE_FL_TRAMP_EN	= (1UL << 27),
 };
 
-#define FTRACE_REF_MAX_SHIFT	29
-#define FTRACE_FL_BITS		3
+#define FTRACE_REF_MAX_SHIFT	27
+#define FTRACE_FL_BITS		5
 #define FTRACE_FL_MASKED_BITS	((1UL << FTRACE_FL_BITS) - 1)
 #define FTRACE_FL_MASK		(FTRACE_FL_MASKED_BITS << FTRACE_REF_MAX_SHIFT)
 #define FTRACE_REF_MAX		((1UL << FTRACE_REF_MAX_SHIFT) - 1)
@@ -436,6 +441,10 @@ void ftrace_modify_all_code(int command);
 #define FTRACE_ADDR ((unsigned long)ftrace_caller)
 #endif
 
+#ifndef FTRACE_GRAPH_ADDR
+#define FTRACE_GRAPH_ADDR ((unsigned long)ftrace_graph_caller)
+#endif
+
 #ifndef FTRACE_REGS_ADDR
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
 # define FTRACE_REGS_ADDR ((unsigned long)ftrace_regs_caller)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index a58d840305c3..5d15eb8146a7 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1042,6 +1042,8 @@ static struct pid * const ftrace_swapper_pid = &init_struct_pid;
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 
+static struct ftrace_ops *removed_ops;
+
 #ifndef CONFIG_FTRACE_MCOUNT_RECORD
 # error Dynamic ftrace depends on MCOUNT_RECORD
 #endif
@@ -1512,6 +1514,33 @@ static bool test_rec_ops_needs_regs(struct dyn_ftrace *rec)
 	return  keep_regs;
 }
 
+static void ftrace_remove_tramp(struct ftrace_ops *ops,
+				struct dyn_ftrace *rec)
+{
+	struct ftrace_func_entry *entry;
+
+	entry = ftrace_lookup_ip(ops->tramp_hash, rec->ip);
+	if (!entry)
+		return;
+
+	/*
+	 * The tramp_hash entry will be removed at time
+	 * of update.
+	 */
+	ops->trampolines--;
+	rec->flags &= ~FTRACE_FL_TRAMP;
+}
+
+static void ftrace_clear_tramps(struct dyn_ftrace *rec)
+{
+	struct ftrace_ops *op;
+
+	do_for_each_ftrace_op(op, ftrace_ops_list) {
+		if (op->trampolines)
+			ftrace_remove_tramp(op, rec);
+	} while_for_each_ftrace_op(op);
+}
+
 static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
 				     int filter_hash,
 				     bool inc)
@@ -1594,6 +1623,28 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
 			rec->flags++;
 			if (FTRACE_WARN_ON(ftrace_rec_count(rec) == FTRACE_REF_MAX))
 				return;
+
+			/*
+			 * If there's only a single callback registered to a
+			 * function, and the ops has a trampoline registered
+			 * for it, then we can call it directly.
+			 */
+			if (ftrace_rec_count(rec) == 1 && ops->trampoline) {
+				rec->flags |= FTRACE_FL_TRAMP;
+				ops->trampolines++;
+			} else {
+				/*
+				 * If we are adding another function callback
+				 * to this function, and the previous had a
+				 * trampoline used, then we need to go back to
+				 * the default trampoline.
+				 */
+				rec->flags &= ~FTRACE_FL_TRAMP;
+
+				/* remove trampolines from any ops for this rec */
+				ftrace_clear_tramps(rec);
+			}
+
 			/*
 			 * If any ops wants regs saved for this function
 			 * then all ops will get saved regs.
@@ -1604,6 +1655,10 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
 			if (FTRACE_WARN_ON(ftrace_rec_count(rec) == 0))
 				return;
 			rec->flags--;
+
+			if (ops->trampoline && !ftrace_rec_count(rec))
+				ftrace_remove_tramp(ops, rec);
+
 			/*
 			 * If the rec had REGS enabled and the ops that is
 			 * being removed had REGS set, then see if there is
@@ -1616,6 +1671,11 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
 				if (!test_rec_ops_needs_regs(rec))
 					rec->flags &= ~FTRACE_FL_REGS;
 			}
+
+			/*
+			 * flags will be cleared in ftrace_check_record()
+			 * if rec count is zero.
+			 */
 		}
 		count++;
 		/* Shortcut, if we handled all records, we are done. */
@@ -1704,13 +1764,19 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update)
 		flag = FTRACE_FL_ENABLED;
 
 	/*
-	 * If enabling and the REGS flag does not match the REGS_EN, then
-	 * do not ignore this record. Set flags to fail the compare against
-	 * ENABLED.
+	 * If enabling and the REGS flag does not match the REGS_EN, or
+	 * the TRAMP flag doesn't match the TRAMP_EN, then do not ignore
+	 * this record. Set flags to fail the compare against ENABLED.
 	 */
-	if (flag &&
-	    (!(rec->flags & FTRACE_FL_REGS) != !(rec->flags & FTRACE_FL_REGS_EN)))
-		flag |= FTRACE_FL_REGS;
+	if (flag) {
+		if (!(rec->flags & FTRACE_FL_REGS) != 
+		    !(rec->flags & FTRACE_FL_REGS_EN))
+			flag |= FTRACE_FL_REGS;
+
+		if (!(rec->flags & FTRACE_FL_TRAMP) != 
+		    !(rec->flags & FTRACE_FL_TRAMP_EN))
+			flag |= FTRACE_FL_TRAMP;
+	}
 
 	/* If the state of this record hasn't changed, then do nothing */
 	if ((rec->flags & FTRACE_FL_ENABLED) == flag)
@@ -1728,6 +1794,12 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update)
 				else
 					rec->flags &= ~FTRACE_FL_REGS_EN;
 			}
+			if (flag & FTRACE_FL_TRAMP) {
+				if (rec->flags & FTRACE_FL_TRAMP)
+					rec->flags |= FTRACE_FL_TRAMP_EN;
+				else
+					rec->flags &= ~FTRACE_FL_TRAMP_EN;
+			}
 		}
 
 		/*
@@ -1736,7 +1808,7 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update)
 		 * Otherwise,
 		 *   return UPDATE_MODIFY_CALL to tell the caller to convert
 		 *   from the save regs, to a non-save regs function or
-		 *   vice versa.
+		 *   vice versa, or from a trampoline call.
 		 */
 		if (flag & FTRACE_FL_ENABLED)
 			return FTRACE_UPDATE_MAKE_CALL;
@@ -1783,6 +1855,43 @@ int ftrace_test_record(struct dyn_ftrace *rec, int enable)
 	return ftrace_check_record(rec, enable, 0);
 }
 
+static struct ftrace_ops *
+ftrace_find_tramp_ops_curr(struct dyn_ftrace *rec)
+{
+	struct ftrace_ops *op;
+
+	/* Removed ops need to be tested first */
+	if (removed_ops && removed_ops->tramp_hash) {
+		if (ftrace_lookup_ip(removed_ops->tramp_hash, rec->ip))
+			return removed_ops;
+	}
+
+	do_for_each_ftrace_op(op, ftrace_ops_list) {
+		if (!op->tramp_hash)
+			continue;
+
+		if (ftrace_lookup_ip(op->tramp_hash, rec->ip))
+			return op;
+
+	} while_for_each_ftrace_op(op);
+
+	return NULL;
+}
+
+static struct ftrace_ops *
+ftrace_find_tramp_ops_new(struct dyn_ftrace *rec)
+{
+	struct ftrace_ops *op;
+
+	do_for_each_ftrace_op(op, ftrace_ops_list) {
+		/* pass rec in as regs to have non-NULL val */
+		if (ftrace_ops_test(op, rec->ip, rec))
+			return op;
+	} while_for_each_ftrace_op(op);
+
+	return NULL;
+}
+
 /**
  * ftrace_get_addr_new - Get the call address to set to
  * @rec:  The ftrace record descriptor
@@ -1795,6 +1904,20 @@ int ftrace_test_record(struct dyn_ftrace *rec, int enable)
  */
 unsigned long ftrace_get_addr_new(struct dyn_ftrace *rec)
 {
+	struct ftrace_ops *ops;
+
+	/* Trampolines take precedence over regs */
+	if (rec->flags & FTRACE_FL_TRAMP) {
+		ops = ftrace_find_tramp_ops_new(rec);
+		if (FTRACE_WARN_ON(!ops || !ops->trampoline)) {
+			pr_warning("Bad trampoline accounting at: %p (%pS)\n",
+				    (void *)rec->ip, (void *)rec->ip);
+			/* Ftrace is shutting down, return anything */
+			return (unsigned long)FTRACE_ADDR;
+		}
+		return ops->trampoline;
+	}
+
 	if (rec->flags & FTRACE_FL_REGS)
 		return (unsigned long)FTRACE_REGS_ADDR;
 	else
@@ -1813,6 +1936,20 @@ unsigned long ftrace_get_addr_new(struct dyn_ftrace *rec)
  */
 unsigned long ftrace_get_addr_curr(struct dyn_ftrace *rec)
 {
+	struct ftrace_ops *ops;
+
+	/* Trampolines take precedence over regs */
+	if (rec->flags & FTRACE_FL_TRAMP_EN) {
+		ops = ftrace_find_tramp_ops_curr(rec);
+		if (FTRACE_WARN_ON(!ops)) {
+			pr_warning("Bad trampoline accounting at: %p (%pS)\n",
+				    (void *)rec->ip, (void *)rec->ip);
+			/* Ftrace is shutting down, return anything */
+			return (unsigned long)FTRACE_ADDR;
+		}
+		return ops->trampoline;
+	}
+
 	if (rec->flags & FTRACE_FL_REGS_EN)
 		return (unsigned long)FTRACE_REGS_ADDR;
 	else
@@ -2055,6 +2192,78 @@ void __weak arch_ftrace_update_code(int command)
 	ftrace_run_stop_machine(command);
 }
 
+static int ftrace_save_ops_tramp_hash(struct ftrace_ops *ops)
+{
+	struct ftrace_page *pg;
+	struct dyn_ftrace *rec;
+	int size, bits;
+	int ret;
+
+	size = ops->trampolines;
+	bits = 0;
+	/*
+	 * Make the hash size about 1/2 the # found
+	 */
+	for (size /= 2; size; size >>= 1)
+		bits++;
+
+	ops->tramp_hash = alloc_ftrace_hash(bits);
+	/*
+	 * TODO: a failed allocation is going to screw up
+	 * the accounting of what needs to be modified
+	 * and not. For now, we kill ftrace if we fail
+	 * to allocate here. But there are ways around this,
+	 * but that will take a little more work.
+	 */
+	if (!ops->tramp_hash)
+		return -ENOMEM;
+
+	do_for_each_ftrace_rec(pg, rec) {
+		if (ftrace_rec_count(rec) == 1 &&
+		    ftrace_ops_test(ops, rec->ip, rec)) {
+
+			/* This record had better have a trampoline */
+			if (FTRACE_WARN_ON(!(rec->flags & FTRACE_FL_TRAMP_EN)))
+				return -1;
+
+			ret = add_hash_entry(ops->tramp_hash, rec->ip);
+			if (ret < 0)
+				return ret;
+		}
+	} while_for_each_ftrace_rec();
+
+	return 0;
+}
+
+static int ftrace_save_tramp_hashes(void)
+{
+	struct ftrace_ops *op;
+	int ret;
+
+	/*
+	 * Now that any trampoline is being used, we need to save the
+	 * hashes for the ops that have them. This allows the mapping
+	 * back from the record to the ops that has the trampoline to
+	 * know what code is being replaced. Modifying code must always
+	 * verify what it is changing.
+	 */
+	do_for_each_ftrace_op(op, ftrace_ops_list) {
+
+		/* The tramp_hash is recreated each time. */
+		free_ftrace_hash(op->tramp_hash);
+		op->tramp_hash = NULL;
+
+		if (op->trampolines) {
+			ret = ftrace_save_ops_tramp_hash(op);
+			if (ret)
+				return ret;
+		}
+
+	} while_for_each_ftrace_op(op);
+
+	return 0;
+}
+
 static void ftrace_run_update_code(int command)
 {
 	int ret;
@@ -2081,6 +2290,9 @@ static void ftrace_run_update_code(int command)
 
 	ret = ftrace_arch_code_modify_post_process();
 	FTRACE_WARN_ON(ret);
+
+	ret = ftrace_save_tramp_hashes();
+	FTRACE_WARN_ON(ret);
 }
 
 static ftrace_func_t saved_ftrace_func;
@@ -2171,8 +2383,16 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command)
 		return 0;
 	}
 
+	/*
+	 * If the ops uses a trampoline, then it needs to be
+	 * tested first on update.
+	 */
+	removed_ops = ops;
+
 	ftrace_run_update_code(command);
 
+	removed_ops = NULL;
+
 	/*
 	 * Dynamic ops may be freed, we must make sure that all
 	 * callers are done before leaving this function.
@@ -5116,6 +5336,11 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
 	/* Function graph doesn't use the .func field of global_ops */
 	global_ops.flags |= FTRACE_OPS_FL_STUB;
 
+#ifdef CONFIG_DYNAMIC_FTRACE
+	/* Optimize function graph calling (if implemented by arch) */
+	global_ops.trampoline = FTRACE_GRAPH_ADDR;
+#endif
+
 	ret = ftrace_startup(&global_ops, FTRACE_START_FUNC_RET);
 
 out:
@@ -5136,6 +5361,9 @@ void unregister_ftrace_graph(void)
 	__ftrace_graph_entry = ftrace_graph_entry_stub;
 	ftrace_shutdown(&global_ops, FTRACE_STOP_FUNC_RET);
 	global_ops.flags &= ~FTRACE_OPS_FL_STUB;
+#ifdef CONFIG_DYNAMIC_FTRACE
+	global_ops.trampoline = 0;
+#endif
 	unregister_pm_notifier(&ftrace_suspend_notifier);
 	unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
 
-- 
cgit v1.2.3-59-g8ed1b


From 12306276fabcb746a14979e96f43a13c724dec49 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Fri, 20 Jun 2014 13:38:54 -0400
Subject: tracing: Move the trace_seq_* functions into its own trace_seq.c file

The trace_seq_*() functions are a nice utility that allows users to manipulate
buffers with printf() like formats. It has its own trace_seq.h header in
include/linux and should be in its own file. Being tied with trace_output.c
is rather awkward.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/trace_seq.h   |   2 +
 kernel/trace/Makefile       |   1 +
 kernel/trace/trace.c        |  24 ----
 kernel/trace/trace_output.c | 268 ---------------------------------------
 kernel/trace/trace_output.h |   3 -
 kernel/trace/trace_seq.c    | 303 ++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 306 insertions(+), 295 deletions(-)
 create mode 100644 kernel/trace/trace_seq.c

(limited to 'include/linux')

diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h
index 136116924d8d..66ea365acf01 100644
--- a/include/linux/trace_seq.h
+++ b/include/linux/trace_seq.h
@@ -25,6 +25,8 @@ trace_seq_init(struct trace_seq *s)
 	s->full = 0;
 }
 
+#define MAX_MEMHEX_BYTES	8
+
 /*
  * Currently only defined when tracing is enabled.
  */
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 2611613f14f1..67d6369ddf83 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -28,6 +28,7 @@ obj-$(CONFIG_RING_BUFFER_BENCHMARK) += ring_buffer_benchmark.o
 
 obj-$(CONFIG_TRACING) += trace.o
 obj-$(CONFIG_TRACING) += trace_output.o
+obj-$(CONFIG_TRACING) += trace_seq.o
 obj-$(CONFIG_TRACING) += trace_stat.o
 obj-$(CONFIG_TRACING) += trace_printk.o
 obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 384ede311717..eeb233cbac4f 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -923,30 +923,6 @@ out:
 	return ret;
 }
 
-ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
-{
-	int len;
-	int ret;
-
-	if (!cnt)
-		return 0;
-
-	if (s->len <= s->readpos)
-		return -EBUSY;
-
-	len = s->len - s->readpos;
-	if (cnt > len)
-		cnt = len;
-	ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
-	if (ret == cnt)
-		return -EFAULT;
-
-	cnt -= ret;
-
-	s->readpos += cnt;
-	return cnt;
-}
-
 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
 {
 	int len;
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index f3dad80c20b2..b8930f79a04b 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -20,23 +20,6 @@ static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
 
 static int next_event_type = __TRACE_LAST_TYPE + 1;
 
-int trace_print_seq(struct seq_file *m, struct trace_seq *s)
-{
-	int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
-	int ret;
-
-	ret = seq_write(m, s->buffer, len);
-
-	/*
-	 * Only reset this buffer if we successfully wrote to the
-	 * seq_file buffer.
-	 */
-	if (!ret)
-		trace_seq_init(s);
-
-	return ret;
-}
-
 enum print_line_t trace_print_bputs_msg_only(struct trace_iterator *iter)
 {
 	struct trace_seq *s = &iter->seq;
@@ -85,257 +68,6 @@ enum print_line_t trace_print_printk_msg_only(struct trace_iterator *iter)
 	return TRACE_TYPE_HANDLED;
 }
 
-/**
- * trace_seq_printf - sequence printing of trace information
- * @s: trace sequence descriptor
- * @fmt: printf format string
- *
- * It returns 0 if the trace oversizes the buffer's free
- * space, 1 otherwise.
- *
- * The tracer may use either sequence operations or its own
- * copy to user routines. To simplify formating of a trace
- * trace_seq_printf is used to store strings into a special
- * buffer (@s). Then the output may be either used by
- * the sequencer or pulled into another buffer.
- */
-int
-trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
-{
-	int len = (PAGE_SIZE - 1) - s->len;
-	va_list ap;
-	int ret;
-
-	if (s->full || !len)
-		return 0;
-
-	va_start(ap, fmt);
-	ret = vsnprintf(s->buffer + s->len, len, fmt, ap);
-	va_end(ap);
-
-	/* If we can't write it all, don't bother writing anything */
-	if (ret >= len) {
-		s->full = 1;
-		return 0;
-	}
-
-	s->len += ret;
-
-	return 1;
-}
-EXPORT_SYMBOL_GPL(trace_seq_printf);
-
-/**
- * trace_seq_bitmask - put a list of longs as a bitmask print output
- * @s:		trace sequence descriptor
- * @maskp:	points to an array of unsigned longs that represent a bitmask
- * @nmaskbits:	The number of bits that are valid in @maskp
- *
- * It returns 0 if the trace oversizes the buffer's free
- * space, 1 otherwise.
- *
- * Writes a ASCII representation of a bitmask string into @s.
- */
-int
-trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp,
-		  int nmaskbits)
-{
-	int len = (PAGE_SIZE - 1) - s->len;
-	int ret;
-
-	if (s->full || !len)
-		return 0;
-
-	ret = bitmap_scnprintf(s->buffer, len, maskp, nmaskbits);
-	s->len += ret;
-
-	return 1;
-}
-EXPORT_SYMBOL_GPL(trace_seq_bitmask);
-
-/**
- * trace_seq_vprintf - sequence printing of trace information
- * @s: trace sequence descriptor
- * @fmt: printf format string
- *
- * The tracer may use either sequence operations or its own
- * copy to user routines. To simplify formating of a trace
- * trace_seq_printf is used to store strings into a special
- * buffer (@s). Then the output may be either used by
- * the sequencer or pulled into another buffer.
- */
-int
-trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
-{
-	int len = (PAGE_SIZE - 1) - s->len;
-	int ret;
-
-	if (s->full || !len)
-		return 0;
-
-	ret = vsnprintf(s->buffer + s->len, len, fmt, args);
-
-	/* If we can't write it all, don't bother writing anything */
-	if (ret >= len) {
-		s->full = 1;
-		return 0;
-	}
-
-	s->len += ret;
-
-	return len;
-}
-EXPORT_SYMBOL_GPL(trace_seq_vprintf);
-
-int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
-{
-	int len = (PAGE_SIZE - 1) - s->len;
-	int ret;
-
-	if (s->full || !len)
-		return 0;
-
-	ret = bstr_printf(s->buffer + s->len, len, fmt, binary);
-
-	/* If we can't write it all, don't bother writing anything */
-	if (ret >= len) {
-		s->full = 1;
-		return 0;
-	}
-
-	s->len += ret;
-
-	return len;
-}
-
-/**
- * trace_seq_puts - trace sequence printing of simple string
- * @s: trace sequence descriptor
- * @str: simple string to record
- *
- * The tracer may use either the sequence operations or its own
- * copy to user routines. This function records a simple string
- * into a special buffer (@s) for later retrieval by a sequencer
- * or other mechanism.
- */
-int trace_seq_puts(struct trace_seq *s, const char *str)
-{
-	int len = strlen(str);
-
-	if (s->full)
-		return 0;
-
-	if (len > ((PAGE_SIZE - 1) - s->len)) {
-		s->full = 1;
-		return 0;
-	}
-
-	memcpy(s->buffer + s->len, str, len);
-	s->len += len;
-
-	return len;
-}
-
-int trace_seq_putc(struct trace_seq *s, unsigned char c)
-{
-	if (s->full)
-		return 0;
-
-	if (s->len >= (PAGE_SIZE - 1)) {
-		s->full = 1;
-		return 0;
-	}
-
-	s->buffer[s->len++] = c;
-
-	return 1;
-}
-EXPORT_SYMBOL(trace_seq_putc);
-
-int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len)
-{
-	if (s->full)
-		return 0;
-
-	if (len > ((PAGE_SIZE - 1) - s->len)) {
-		s->full = 1;
-		return 0;
-	}
-
-	memcpy(s->buffer + s->len, mem, len);
-	s->len += len;
-
-	return len;
-}
-
-int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, size_t len)
-{
-	unsigned char hex[HEX_CHARS];
-	const unsigned char *data = mem;
-	int i, j;
-
-	if (s->full)
-		return 0;
-
-#ifdef __BIG_ENDIAN
-	for (i = 0, j = 0; i < len; i++) {
-#else
-	for (i = len-1, j = 0; i >= 0; i--) {
-#endif
-		hex[j++] = hex_asc_hi(data[i]);
-		hex[j++] = hex_asc_lo(data[i]);
-	}
-	hex[j++] = ' ';
-
-	return trace_seq_putmem(s, hex, j);
-}
-
-void *trace_seq_reserve(struct trace_seq *s, size_t len)
-{
-	void *ret;
-
-	if (s->full)
-		return NULL;
-
-	if (len > ((PAGE_SIZE - 1) - s->len)) {
-		s->full = 1;
-		return NULL;
-	}
-
-	ret = s->buffer + s->len;
-	s->len += len;
-
-	return ret;
-}
-
-int trace_seq_path(struct trace_seq *s, const struct path *path)
-{
-	unsigned char *p;
-
-	if (s->full)
-		return 0;
-
-	if (s->len >= (PAGE_SIZE - 1)) {
-		s->full = 1;
-		return 0;
-	}
-
-	p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
-	if (!IS_ERR(p)) {
-		p = mangle_path(s->buffer + s->len, p, "\n");
-		if (p) {
-			s->len = p - s->buffer;
-			return 1;
-		}
-	} else {
-		s->buffer[s->len++] = '?';
-		return 1;
-	}
-
-	s->full = 1;
-	return 0;
-}
-
 const char *
 ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
 		       unsigned long flags,
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
index 127a9d8c8357..bf7daf2237ed 100644
--- a/kernel/trace/trace_output.h
+++ b/kernel/trace/trace_output.h
@@ -35,9 +35,6 @@ trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry);
 extern int __unregister_ftrace_event(struct trace_event *event);
 extern struct rw_semaphore trace_event_sem;
 
-#define MAX_MEMHEX_BYTES	8
-#define HEX_CHARS		(MAX_MEMHEX_BYTES*2 + 1)
-
 #define SEQ_PUT_FIELD_RET(s, x)				\
 do {							\
 	if (!trace_seq_putmem(s, &(x), sizeof(x)))	\
diff --git a/kernel/trace/trace_seq.c b/kernel/trace/trace_seq.c
new file mode 100644
index 000000000000..5ba99c6cf834
--- /dev/null
+++ b/kernel/trace/trace_seq.c
@@ -0,0 +1,303 @@
+/*
+ * trace_seq.c
+ *
+ * Copyright (C) 2008-2014 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#include <linux/uaccess.h>
+#include <linux/seq_file.h>
+#include <linux/trace_seq.h>
+
+int trace_print_seq(struct seq_file *m, struct trace_seq *s)
+{
+	int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
+	int ret;
+
+	ret = seq_write(m, s->buffer, len);
+
+	/*
+	 * Only reset this buffer if we successfully wrote to the
+	 * seq_file buffer.
+	 */
+	if (!ret)
+		trace_seq_init(s);
+
+	return ret;
+}
+
+/**
+ * trace_seq_printf - sequence printing of trace information
+ * @s: trace sequence descriptor
+ * @fmt: printf format string
+ *
+ * It returns 0 if the trace oversizes the buffer's free
+ * space, 1 otherwise.
+ *
+ * The tracer may use either sequence operations or its own
+ * copy to user routines. To simplify formating of a trace
+ * trace_seq_printf is used to store strings into a special
+ * buffer (@s). Then the output may be either used by
+ * the sequencer or pulled into another buffer.
+ */
+int
+trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
+{
+	int len = (PAGE_SIZE - 1) - s->len;
+	va_list ap;
+	int ret;
+
+	if (s->full || !len)
+		return 0;
+
+	va_start(ap, fmt);
+	ret = vsnprintf(s->buffer + s->len, len, fmt, ap);
+	va_end(ap);
+
+	/* If we can't write it all, don't bother writing anything */
+	if (ret >= len) {
+		s->full = 1;
+		return 0;
+	}
+
+	s->len += ret;
+
+	return 1;
+}
+EXPORT_SYMBOL_GPL(trace_seq_printf);
+
+/**
+ * trace_seq_bitmask - put a list of longs as a bitmask print output
+ * @s:		trace sequence descriptor
+ * @maskp:	points to an array of unsigned longs that represent a bitmask
+ * @nmaskbits:	The number of bits that are valid in @maskp
+ *
+ * It returns 0 if the trace oversizes the buffer's free
+ * space, 1 otherwise.
+ *
+ * Writes a ASCII representation of a bitmask string into @s.
+ */
+int
+trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp,
+		  int nmaskbits)
+{
+	int len = (PAGE_SIZE - 1) - s->len;
+	int ret;
+
+	if (s->full || !len)
+		return 0;
+
+	ret = bitmap_scnprintf(s->buffer, len, maskp, nmaskbits);
+	s->len += ret;
+
+	return 1;
+}
+EXPORT_SYMBOL_GPL(trace_seq_bitmask);
+
+/**
+ * trace_seq_vprintf - sequence printing of trace information
+ * @s: trace sequence descriptor
+ * @fmt: printf format string
+ *
+ * The tracer may use either sequence operations or its own
+ * copy to user routines. To simplify formating of a trace
+ * trace_seq_printf is used to store strings into a special
+ * buffer (@s). Then the output may be either used by
+ * the sequencer or pulled into another buffer.
+ */
+int
+trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
+{
+	int len = (PAGE_SIZE - 1) - s->len;
+	int ret;
+
+	if (s->full || !len)
+		return 0;
+
+	ret = vsnprintf(s->buffer + s->len, len, fmt, args);
+
+	/* If we can't write it all, don't bother writing anything */
+	if (ret >= len) {
+		s->full = 1;
+		return 0;
+	}
+
+	s->len += ret;
+
+	return len;
+}
+EXPORT_SYMBOL_GPL(trace_seq_vprintf);
+
+int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
+{
+	int len = (PAGE_SIZE - 1) - s->len;
+	int ret;
+
+	if (s->full || !len)
+		return 0;
+
+	ret = bstr_printf(s->buffer + s->len, len, fmt, binary);
+
+	/* If we can't write it all, don't bother writing anything */
+	if (ret >= len) {
+		s->full = 1;
+		return 0;
+	}
+
+	s->len += ret;
+
+	return len;
+}
+
+/**
+ * trace_seq_puts - trace sequence printing of simple string
+ * @s: trace sequence descriptor
+ * @str: simple string to record
+ *
+ * The tracer may use either the sequence operations or its own
+ * copy to user routines. This function records a simple string
+ * into a special buffer (@s) for later retrieval by a sequencer
+ * or other mechanism.
+ */
+int trace_seq_puts(struct trace_seq *s, const char *str)
+{
+	int len = strlen(str);
+
+	if (s->full)
+		return 0;
+
+	if (len > ((PAGE_SIZE - 1) - s->len)) {
+		s->full = 1;
+		return 0;
+	}
+
+	memcpy(s->buffer + s->len, str, len);
+	s->len += len;
+
+	return len;
+}
+
+int trace_seq_putc(struct trace_seq *s, unsigned char c)
+{
+	if (s->full)
+		return 0;
+
+	if (s->len >= (PAGE_SIZE - 1)) {
+		s->full = 1;
+		return 0;
+	}
+
+	s->buffer[s->len++] = c;
+
+	return 1;
+}
+EXPORT_SYMBOL(trace_seq_putc);
+
+int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len)
+{
+	if (s->full)
+		return 0;
+
+	if (len > ((PAGE_SIZE - 1) - s->len)) {
+		s->full = 1;
+		return 0;
+	}
+
+	memcpy(s->buffer + s->len, mem, len);
+	s->len += len;
+
+	return len;
+}
+
+#define HEX_CHARS		(MAX_MEMHEX_BYTES*2 + 1)
+
+int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, size_t len)
+{
+	unsigned char hex[HEX_CHARS];
+	const unsigned char *data = mem;
+	int i, j;
+
+	if (s->full)
+		return 0;
+
+#ifdef __BIG_ENDIAN
+	for (i = 0, j = 0; i < len; i++) {
+#else
+	for (i = len-1, j = 0; i >= 0; i--) {
+#endif
+		hex[j++] = hex_asc_hi(data[i]);
+		hex[j++] = hex_asc_lo(data[i]);
+	}
+	hex[j++] = ' ';
+
+	return trace_seq_putmem(s, hex, j);
+}
+
+void *trace_seq_reserve(struct trace_seq *s, size_t len)
+{
+	void *ret;
+
+	if (s->full)
+		return NULL;
+
+	if (len > ((PAGE_SIZE - 1) - s->len)) {
+		s->full = 1;
+		return NULL;
+	}
+
+	ret = s->buffer + s->len;
+	s->len += len;
+
+	return ret;
+}
+
+int trace_seq_path(struct trace_seq *s, const struct path *path)
+{
+	unsigned char *p;
+
+	if (s->full)
+		return 0;
+
+	if (s->len >= (PAGE_SIZE - 1)) {
+		s->full = 1;
+		return 0;
+	}
+
+	p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
+	if (!IS_ERR(p)) {
+		p = mangle_path(s->buffer + s->len, p, "\n");
+		if (p) {
+			s->len = p - s->buffer;
+			return 1;
+		}
+	} else {
+		s->buffer[s->len++] = '?';
+		return 1;
+	}
+
+	s->full = 1;
+	return 0;
+}
+
+ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
+{
+	int len;
+	int ret;
+
+	if (!cnt)
+		return 0;
+
+	if (s->len <= s->readpos)
+		return -EBUSY;
+
+	len = s->len - s->readpos;
+	if (cnt > len)
+		cnt = len;
+	ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
+	if (ret == cnt)
+		return -EFAULT;
+
+	cnt -= ret;
+
+	s->readpos += cnt;
+	return cnt;
+}
-- 
cgit v1.2.3-59-g8ed1b


From 36aabfff50b6a03bcfd2c3cfbd7b83eb0a9ce0c1 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Fri, 20 Jun 2014 17:38:01 -0400
Subject: tracing: Clean up trace_seq.c

For using trace_seq_*() functions in NMI context, I posted a patch to move
it to the lib/ directory. This caused Andrew Morton to take a look at the code.
He went through and gave a lot of comments about missing kernel doc,
inconsistent types for the save variable, mix match of EXPORT_SYMBOL_GPL()
and EXPORT_SYMBOL() as well as missing EXPORT_SYMBOL*()s. There were
a few comments about the way variables were being compared (int vs uint).

All these were good review comments and should be implemented regardless of
if trace_seq.c should be moved to lib/ or not.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/trace_seq.h |  20 ++---
 kernel/trace/trace_seq.c  | 207 +++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 185 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h
index 66ea365acf01..1f05317f51c4 100644
--- a/include/linux/trace_seq.h
+++ b/include/linux/trace_seq.h
@@ -38,14 +38,14 @@ int trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args);
 extern int
 trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary);
 extern int trace_print_seq(struct seq_file *m, struct trace_seq *s);
-extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
-				 size_t cnt);
+extern int trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
+			     int cnt);
 extern int trace_seq_puts(struct trace_seq *s, const char *str);
 extern int trace_seq_putc(struct trace_seq *s, unsigned char c);
-extern int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len);
+extern int trace_seq_putmem(struct trace_seq *s, const void *mem, unsigned int len);
 extern int trace_seq_putmem_hex(struct trace_seq *s, const void *mem,
-				size_t len);
-extern void *trace_seq_reserve(struct trace_seq *s, size_t len);
+				unsigned int len);
+extern void *trace_seq_reserve(struct trace_seq *s, unsigned int len);
 extern int trace_seq_path(struct trace_seq *s, const struct path *path);
 
 extern int trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp,
@@ -73,8 +73,8 @@ static inline int trace_print_seq(struct seq_file *m, struct trace_seq *s)
 {
 	return 0;
 }
-static inline ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
-				 size_t cnt)
+static inline int trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
+				    int cnt)
 {
 	return 0;
 }
@@ -87,16 +87,16 @@ static inline int trace_seq_putc(struct trace_seq *s, unsigned char c)
 	return 0;
 }
 static inline int
-trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len)
+trace_seq_putmem(struct trace_seq *s, const void *mem, unsigned int len)
 {
 	return 0;
 }
 static inline int trace_seq_putmem_hex(struct trace_seq *s, const void *mem,
-				       size_t len)
+				       unsigned int len)
 {
 	return 0;
 }
-static inline void *trace_seq_reserve(struct trace_seq *s, size_t len)
+static inline void *trace_seq_reserve(struct trace_seq *s, unsigned int len)
 {
 	return NULL;
 }
diff --git a/kernel/trace/trace_seq.c b/kernel/trace/trace_seq.c
index 5ba99c6cf834..0fabca773e51 100644
--- a/kernel/trace/trace_seq.c
+++ b/kernel/trace/trace_seq.c
@@ -3,21 +3,55 @@
  *
  * Copyright (C) 2008-2014 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
  *
+ * The trace_seq is a handy tool that allows you to pass a descriptor around
+ * to a buffer that other functions can write to. It is similar to the
+ * seq_file functionality but has some differences.
+ *
+ * To use it, the trace_seq must be initialized with trace_seq_init().
+ * This will set up the counters within the descriptor. You can call
+ * trace_seq_init() more than once to reset the trace_seq to start
+ * from scratch.
+ * 
+ * The buffer size is currently PAGE_SIZE, although it may become dynamic
+ * in the future.
+ *
+ * A write to the buffer will either succed or fail. That is, unlike
+ * sprintf() there will not be a partial write (well it may write into
+ * the buffer but it wont update the pointers). This allows users to
+ * try to write something into the trace_seq buffer and if it fails
+ * they can flush it and try again.
+ *
  */
 #include <linux/uaccess.h>
 #include <linux/seq_file.h>
 #include <linux/trace_seq.h>
 
+/* How much buffer is left on the trace_seq? */
+#define TRACE_SEQ_BUF_LEFT(s) ((PAGE_SIZE - 1) - (s)->len)
+
+/* How much buffer is written? */
+#define TRACE_SEQ_BUF_USED(s) min((s)->len, (unsigned int)(PAGE_SIZE - 1))
+
+/**
+ * trace_print_seq - move the contents of trace_seq into a seq_file
+ * @m: the seq_file descriptor that is the destination
+ * @s: the trace_seq descriptor that is the source.
+ *
+ * Returns 0 on success and non zero on error. If it succeeds to
+ * write to the seq_file it will reset the trace_seq, otherwise
+ * it does not modify the trace_seq to let the caller try again.
+ */
 int trace_print_seq(struct seq_file *m, struct trace_seq *s)
 {
-	int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
+	unsigned int len = TRACE_SEQ_BUF_USED(s);
 	int ret;
 
 	ret = seq_write(m, s->buffer, len);
 
 	/*
 	 * Only reset this buffer if we successfully wrote to the
-	 * seq_file buffer.
+	 * seq_file buffer. This lets the caller try again or
+	 * do something else with the contents.
 	 */
 	if (!ret)
 		trace_seq_init(s);
@@ -30,19 +64,20 @@ int trace_print_seq(struct seq_file *m, struct trace_seq *s)
  * @s: trace sequence descriptor
  * @fmt: printf format string
  *
- * It returns 0 if the trace oversizes the buffer's free
- * space, 1 otherwise.
- *
  * The tracer may use either sequence operations or its own
  * copy to user routines. To simplify formating of a trace
- * trace_seq_printf is used to store strings into a special
+ * trace_seq_printf() is used to store strings into a special
  * buffer (@s). Then the output may be either used by
  * the sequencer or pulled into another buffer.
+ *
+ * Returns 1 if we successfully written all the contents to
+ *   the buffer.
+  * Returns 0 if we the length to write is bigger than the
+ *   reserved buffer space. In this case, nothing gets written.
  */
-int
-trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
+int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
 {
-	int len = (PAGE_SIZE - 1) - s->len;
+	unsigned int len = TRACE_SEQ_BUF_LEFT(s);
 	va_list ap;
 	int ret;
 
@@ -66,21 +101,22 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
 EXPORT_SYMBOL_GPL(trace_seq_printf);
 
 /**
- * trace_seq_bitmask - put a list of longs as a bitmask print output
+ * trace_seq_bitmask - write a bitmask array in its ASCII representation
  * @s:		trace sequence descriptor
  * @maskp:	points to an array of unsigned longs that represent a bitmask
  * @nmaskbits:	The number of bits that are valid in @maskp
  *
- * It returns 0 if the trace oversizes the buffer's free
- * space, 1 otherwise.
- *
  * Writes a ASCII representation of a bitmask string into @s.
+ *
+ * Returns 1 if we successfully written all the contents to
+ *   the buffer.
+ * Returns 0 if we the length to write is bigger than the
+ *   reserved buffer space. In this case, nothing gets written.
  */
-int
-trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp,
-		  int nmaskbits)
+int trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp,
+		      int nmaskbits)
 {
-	int len = (PAGE_SIZE - 1) - s->len;
+	unsigned int len = TRACE_SEQ_BUF_LEFT(s);
 	int ret;
 
 	if (s->full || !len)
@@ -103,11 +139,12 @@ EXPORT_SYMBOL_GPL(trace_seq_bitmask);
  * trace_seq_printf is used to store strings into a special
  * buffer (@s). Then the output may be either used by
  * the sequencer or pulled into another buffer.
+ *
+ * Returns how much it wrote to the buffer.
  */
-int
-trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
+int trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
 {
-	int len = (PAGE_SIZE - 1) - s->len;
+	unsigned int len = TRACE_SEQ_BUF_LEFT(s);
 	int ret;
 
 	if (s->full || !len)
@@ -127,9 +164,26 @@ trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
 }
 EXPORT_SYMBOL_GPL(trace_seq_vprintf);
 
+/**
+ * trace_seq_bprintf - Write the printf string from binary arguments
+ * @s: trace sequence descriptor
+ * @fmt: The format string for the @binary arguments
+ * @binary: The binary arguments for @fmt.
+ *
+ * When recording in a fast path, a printf may be recorded with just
+ * saving the format and the arguments as they were passed to the
+ * function, instead of wasting cycles converting the arguments into
+ * ASCII characters. Instead, the arguments are saved in a 32 bit
+ * word array that is defined by the format string constraints.
+ *
+ * This function will take the format and the binary array and finish
+ * the conversion into the ASCII string within the buffer.
+ *
+ * Returns how much it wrote to the buffer.
+ */
 int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
 {
-	int len = (PAGE_SIZE - 1) - s->len;
+	unsigned int len = TRACE_SEQ_BUF_LEFT(s);
 	int ret;
 
 	if (s->full || !len)
@@ -147,6 +201,7 @@ int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
 
 	return len;
 }
+EXPORT_SYMBOL_GPL(trace_seq_bprintf);
 
 /**
  * trace_seq_puts - trace sequence printing of simple string
@@ -157,15 +212,17 @@ int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
  * copy to user routines. This function records a simple string
  * into a special buffer (@s) for later retrieval by a sequencer
  * or other mechanism.
+ *
+ * Returns how much it wrote to the buffer.
  */
 int trace_seq_puts(struct trace_seq *s, const char *str)
 {
-	int len = strlen(str);
+	unsigned int len = strlen(str);
 
 	if (s->full)
 		return 0;
 
-	if (len > ((PAGE_SIZE - 1) - s->len)) {
+	if (len > TRACE_SEQ_BUF_LEFT(s)) {
 		s->full = 1;
 		return 0;
 	}
@@ -175,13 +232,26 @@ int trace_seq_puts(struct trace_seq *s, const char *str)
 
 	return len;
 }
+EXPORT_SYMBOL_GPL(trace_seq_puts);
 
+/**
+ * trace_seq_putc - trace sequence printing of simple character
+ * @s: trace sequence descriptor
+ * @c: simple character to record
+ *
+ * The tracer may use either the sequence operations or its own
+ * copy to user routines. This function records a simple charater
+ * into a special buffer (@s) for later retrieval by a sequencer
+ * or other mechanism.
+ *
+ * Returns how much it wrote to the buffer.
+ */
 int trace_seq_putc(struct trace_seq *s, unsigned char c)
 {
 	if (s->full)
 		return 0;
 
-	if (s->len >= (PAGE_SIZE - 1)) {
+	if (TRACE_SEQ_BUF_LEFT(s) < 1) {
 		s->full = 1;
 		return 0;
 	}
@@ -190,14 +260,26 @@ int trace_seq_putc(struct trace_seq *s, unsigned char c)
 
 	return 1;
 }
-EXPORT_SYMBOL(trace_seq_putc);
+EXPORT_SYMBOL_GPL(trace_seq_putc);
 
-int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len)
+/**
+ * trace_seq_putmem - write raw data into the trace_seq buffer
+ * @s: trace sequence descriptor
+ * @mem: The raw memory to copy into the buffer
+ * @len: The length of the raw memory to copy (in bytes)
+ *
+ * There may be cases where raw memory needs to be written into the
+ * buffer and a strcpy() would not work. Using this function allows
+ * for such cases.
+ *
+ * Returns how much it wrote to the buffer.
+ */
+int trace_seq_putmem(struct trace_seq *s, const void *mem, unsigned int len)
 {
 	if (s->full)
 		return 0;
 
-	if (len > ((PAGE_SIZE - 1) - s->len)) {
+	if (len > TRACE_SEQ_BUF_LEFT(s)) {
 		s->full = 1;
 		return 0;
 	}
@@ -207,10 +289,24 @@ int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len)
 
 	return len;
 }
+EXPORT_SYMBOL_GPL(trace_seq_putmem);
 
 #define HEX_CHARS		(MAX_MEMHEX_BYTES*2 + 1)
 
-int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, size_t len)
+/**
+ * trace_seq_putmem_hex - write raw memory into the buffer in ASCII hex
+ * @s: trace sequence descriptor
+ * @mem: The raw memory to write its hex ASCII representation of
+ * @len: The length of the raw memory to copy (in bytes)
+ *
+ * This is similar to trace_seq_putmem() except instead of just copying the
+ * raw memory into the buffer it writes its ASCII representation of it
+ * in hex characters.
+ *
+ * Returns how much it wrote to the buffer.
+ */
+int trace_seq_putmem_hex(struct trace_seq *s, const void *mem,
+			 unsigned int len)
 {
 	unsigned char hex[HEX_CHARS];
 	const unsigned char *data = mem;
@@ -231,15 +327,27 @@ int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, size_t len)
 
 	return trace_seq_putmem(s, hex, j);
 }
+EXPORT_SYMBOL_GPL(trace_seq_putmem_hex);
 
-void *trace_seq_reserve(struct trace_seq *s, size_t len)
+/**
+ * trace_seq_reserve - reserve space on the sequence buffer
+ * @s: trace sequence descriptor
+ * @len: The amount to reserver.
+ *
+ * If for some reason there is a need to save some space on the
+ * buffer to fill in later, this function is used for that purpose.
+ * The given length will be reserved and the pointer to that
+ * location on the buffer is returned, unless there is not enough
+ * buffer left to hold the given length then NULL is returned.
+ */
+void *trace_seq_reserve(struct trace_seq *s, unsigned int len)
 {
 	void *ret;
 
 	if (s->full)
 		return NULL;
 
-	if (len > ((PAGE_SIZE - 1) - s->len)) {
+	if (len > TRACE_SEQ_BUF_LEFT(s)) {
 		s->full = 1;
 		return NULL;
 	}
@@ -249,7 +357,20 @@ void *trace_seq_reserve(struct trace_seq *s, size_t len)
 
 	return ret;
 }
+EXPORT_SYMBOL_GPL(trace_seq_reserve);
 
+/**
+ * trace_seq_path - copy a path into the sequence buffer
+ * @s: trace sequence descriptor
+ * @path: path to write into the sequence buffer.
+ *
+ * Write a path name into the sequence buffer.
+ *
+ * Returns 1 if we successfully written all the contents to
+ *   the buffer.
+ * Returns 0 if we the length to write is bigger than the
+ *   reserved buffer space. In this case, nothing gets written.
+ */
 int trace_seq_path(struct trace_seq *s, const struct path *path)
 {
 	unsigned char *p;
@@ -257,7 +378,7 @@ int trace_seq_path(struct trace_seq *s, const struct path *path)
 	if (s->full)
 		return 0;
 
-	if (s->len >= (PAGE_SIZE - 1)) {
+	if (TRACE_SEQ_BUF_LEFT(s) < 1) {
 		s->full = 1;
 		return 0;
 	}
@@ -277,8 +398,29 @@ int trace_seq_path(struct trace_seq *s, const struct path *path)
 	s->full = 1;
 	return 0;
 }
+EXPORT_SYMBOL_GPL(trace_seq_path);
 
-ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
+/**
+ * trace_seq_to_user - copy the squence buffer to user space
+ * @s: trace sequence descriptor
+ * @ubuf: The userspace memory location to copy to
+ * @cnt: The amount to copy
+ *
+ * Copies the sequence buffer into the userspace memory pointed to
+ * by @ubuf. It starts from the last read position (@s->readpos)
+ * and writes up to @cnt characters or till it reaches the end of
+ * the content in the buffer (@s->len), which ever comes first.
+ *
+ * On success, it returns a positive number of the number of bytes
+ * it copied.
+ *
+ * On failure it returns -EBUSY if all of the content in the
+ * sequence has been already read, which includes nothing in the
+ * sequenc (@s->len == @s->readpos).
+ *
+ * Returns -EFAULT if the copy to userspace fails.
+ */
+int trace_seq_to_user(struct trace_seq *s, char __user *ubuf, int cnt)
 {
 	int len;
 	int ret;
@@ -301,3 +443,4 @@ ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
 	s->readpos += cnt;
 	return cnt;
 }
+EXPORT_SYMBOL_GPL(trace_seq_to_user);
-- 
cgit v1.2.3-59-g8ed1b


From 6d2289f3faa71dcc5bba15c7aeba4f31c185b6df Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Fri, 20 Jun 2014 23:31:26 -0400
Subject: tracing: Make trace_seq_putmem_hex() more robust

Currently trace_seq_putmem_hex() can only take as a parameter a pointer
to something that is 8 bytes or less, otherwise it will overflow the
buffer. This is protected by a macro that encompasses the call to
trace_seq_putmem_hex() that has a BUILD_BUG_ON() for the variable before
it is passed in. This is not very robust and if trace_seq_putmem_hex() ever
gets used outside that macro it will cause issues.

Instead of only being able to produce a hex output of memory that is for
a single word, change it to be more robust and allow any size input.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/trace_seq.h   |  2 --
 kernel/trace/trace_output.h |  1 -
 kernel/trace/trace_seq.c    | 26 +++++++++++++++++++-------
 3 files changed, 19 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h
index 1f05317f51c4..8283762ab7ef 100644
--- a/include/linux/trace_seq.h
+++ b/include/linux/trace_seq.h
@@ -25,8 +25,6 @@ trace_seq_init(struct trace_seq *s)
 	s->full = 0;
 }
 
-#define MAX_MEMHEX_BYTES	8
-
 /*
  * Currently only defined when tracing is enabled.
  */
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
index bf7daf2237ed..80b25b585a70 100644
--- a/kernel/trace/trace_output.h
+++ b/kernel/trace/trace_output.h
@@ -43,7 +43,6 @@ do {							\
 
 #define SEQ_PUT_HEX_FIELD_RET(s, x)			\
 do {							\
-	BUILD_BUG_ON(sizeof(x) > MAX_MEMHEX_BYTES);	\
 	if (!trace_seq_putmem_hex(s, &(x), sizeof(x)))	\
 		return TRACE_TYPE_PARTIAL_LINE;		\
 } while (0)
diff --git a/kernel/trace/trace_seq.c b/kernel/trace/trace_seq.c
index 0fabca773e51..88c0f80f0a1f 100644
--- a/kernel/trace/trace_seq.c
+++ b/kernel/trace/trace_seq.c
@@ -291,6 +291,7 @@ int trace_seq_putmem(struct trace_seq *s, const void *mem, unsigned int len)
 }
 EXPORT_SYMBOL_GPL(trace_seq_putmem);
 
+#define MAX_MEMHEX_BYTES	8U
 #define HEX_CHARS		(MAX_MEMHEX_BYTES*2 + 1)
 
 /**
@@ -310,22 +311,33 @@ int trace_seq_putmem_hex(struct trace_seq *s, const void *mem,
 {
 	unsigned char hex[HEX_CHARS];
 	const unsigned char *data = mem;
+	unsigned int start_len;
 	int i, j;
+	int cnt = 0;
 
 	if (s->full)
 		return 0;
 
+	while (len) {
+		start_len = min(len, HEX_CHARS - 1);
 #ifdef __BIG_ENDIAN
-	for (i = 0, j = 0; i < len; i++) {
+		for (i = 0, j = 0; i < start_len; i++) {
 #else
-	for (i = len-1, j = 0; i >= 0; i--) {
+		for (i = start_len-1, j = 0; i >= 0; i--) {
 #endif
-		hex[j++] = hex_asc_hi(data[i]);
-		hex[j++] = hex_asc_lo(data[i]);
-	}
-	hex[j++] = ' ';
+			hex[j++] = hex_asc_hi(data[i]);
+			hex[j++] = hex_asc_lo(data[i]);
+		}
+		if (WARN_ON_ONCE(j == 0 || j/2 > len))
+			break;
+
+		/* j increments twice per loop */
+		len -= j / 2;
+		hex[j++] = ' ';
 
-	return trace_seq_putmem(s, hex, j);
+		cnt += trace_seq_putmem(s, hex, j);
+	}
+	return cnt;
 }
 EXPORT_SYMBOL_GPL(trace_seq_putmem_hex);
 
-- 
cgit v1.2.3-59-g8ed1b


From 9096032fbcdcdb80b76f1046346499e20417988e Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Mon, 23 Jun 2014 16:42:07 -0400
Subject: tracing: Remove trace_seq_reserve()

trace_seq_reserve() has no users in the kernel, it just wastes space.
Remove it.

Cc: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/trace_seq.h |  5 -----
 kernel/trace/trace_seq.c  | 30 ------------------------------
 2 files changed, 35 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h
index 8283762ab7ef..dd85753e1bb0 100644
--- a/include/linux/trace_seq.h
+++ b/include/linux/trace_seq.h
@@ -43,7 +43,6 @@ extern int trace_seq_putc(struct trace_seq *s, unsigned char c);
 extern int trace_seq_putmem(struct trace_seq *s, const void *mem, unsigned int len);
 extern int trace_seq_putmem_hex(struct trace_seq *s, const void *mem,
 				unsigned int len);
-extern void *trace_seq_reserve(struct trace_seq *s, unsigned int len);
 extern int trace_seq_path(struct trace_seq *s, const struct path *path);
 
 extern int trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp,
@@ -94,10 +93,6 @@ static inline int trace_seq_putmem_hex(struct trace_seq *s, const void *mem,
 {
 	return 0;
 }
-static inline void *trace_seq_reserve(struct trace_seq *s, unsigned int len)
-{
-	return NULL;
-}
 static inline int trace_seq_path(struct trace_seq *s, const struct path *path)
 {
 	return 0;
diff --git a/kernel/trace/trace_seq.c b/kernel/trace/trace_seq.c
index 88c0f80f0a1f..1f24ed99dca2 100644
--- a/kernel/trace/trace_seq.c
+++ b/kernel/trace/trace_seq.c
@@ -341,36 +341,6 @@ int trace_seq_putmem_hex(struct trace_seq *s, const void *mem,
 }
 EXPORT_SYMBOL_GPL(trace_seq_putmem_hex);
 
-/**
- * trace_seq_reserve - reserve space on the sequence buffer
- * @s: trace sequence descriptor
- * @len: The amount to reserver.
- *
- * If for some reason there is a need to save some space on the
- * buffer to fill in later, this function is used for that purpose.
- * The given length will be reserved and the pointer to that
- * location on the buffer is returned, unless there is not enough
- * buffer left to hold the given length then NULL is returned.
- */
-void *trace_seq_reserve(struct trace_seq *s, unsigned int len)
-{
-	void *ret;
-
-	if (s->full)
-		return NULL;
-
-	if (len > TRACE_SEQ_BUF_LEFT(s)) {
-		s->full = 1;
-		return NULL;
-	}
-
-	ret = s->buffer + s->len;
-	s->len += len;
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(trace_seq_reserve);
-
 /**
  * trace_seq_path - copy a path into the sequence buffer
  * @s: trace sequence descriptor
-- 
cgit v1.2.3-59-g8ed1b


From 7b039cb4c5a90d8ea576b17e096f7334457aeb57 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Thu, 26 Jun 2014 09:42:41 -0400
Subject: tracing: Add trace_seq_buffer_ptr() helper function

There's several locations in the kernel that open code the calculation
of the next location in the trace_seq buffer. This is usually done with

  p->buffer + p->len

Instead of having this open coded, supply a helper function in the
header to do it for them. This function is called trace_seq_buffer_ptr().

Link: http://lkml.kernel.org/p/20140626220129.452783019@goodmis.org

Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/kvm/mmutrace.h     |  2 +-
 drivers/scsi/scsi_trace.c   | 16 ++++++++--------
 include/linux/trace_seq.h   | 15 +++++++++++++++
 kernel/trace/trace_output.c | 14 +++++++-------
 4 files changed, 31 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h
index 9d2e0ffcb190..2e5652b62fd6 100644
--- a/arch/x86/kvm/mmutrace.h
+++ b/arch/x86/kvm/mmutrace.h
@@ -22,7 +22,7 @@
 	__entry->unsync = sp->unsync;
 
 #define KVM_MMU_PAGE_PRINTK() ({				        \
-	const char *ret = p->buffer + p->len;				\
+	const char *ret = trace_seq_buffer_ptr(p);			\
 	static const char *access_str[] = {			        \
 		"---", "--x", "w--", "w-x", "-u-", "-ux", "wu-", "wux"  \
 	};							        \
diff --git a/drivers/scsi/scsi_trace.c b/drivers/scsi/scsi_trace.c
index 2bea4f0b684a..503594e5f76d 100644
--- a/drivers/scsi/scsi_trace.c
+++ b/drivers/scsi/scsi_trace.c
@@ -28,7 +28,7 @@ scsi_trace_misc(struct trace_seq *, unsigned char *, int);
 static const char *
 scsi_trace_rw6(struct trace_seq *p, unsigned char *cdb, int len)
 {
-	const char *ret = p->buffer + p->len;
+	const char *ret = trace_seq_buffer_ptr(p);
 	sector_t lba = 0, txlen = 0;
 
 	lba |= ((cdb[1] & 0x1F) << 16);
@@ -46,7 +46,7 @@ scsi_trace_rw6(struct trace_seq *p, unsigned char *cdb, int len)
 static const char *
 scsi_trace_rw10(struct trace_seq *p, unsigned char *cdb, int len)
 {
-	const char *ret = p->buffer + p->len;
+	const char *ret = trace_seq_buffer_ptr(p);
 	sector_t lba = 0, txlen = 0;
 
 	lba |= (cdb[2] << 24);
@@ -71,7 +71,7 @@ scsi_trace_rw10(struct trace_seq *p, unsigned char *cdb, int len)
 static const char *
 scsi_trace_rw12(struct trace_seq *p, unsigned char *cdb, int len)
 {
-	const char *ret = p->buffer + p->len;
+	const char *ret = trace_seq_buffer_ptr(p);
 	sector_t lba = 0, txlen = 0;
 
 	lba |= (cdb[2] << 24);
@@ -94,7 +94,7 @@ scsi_trace_rw12(struct trace_seq *p, unsigned char *cdb, int len)
 static const char *
 scsi_trace_rw16(struct trace_seq *p, unsigned char *cdb, int len)
 {
-	const char *ret = p->buffer + p->len;
+	const char *ret = trace_seq_buffer_ptr(p);
 	sector_t lba = 0, txlen = 0;
 
 	lba |= ((u64)cdb[2] << 56);
@@ -125,7 +125,7 @@ scsi_trace_rw16(struct trace_seq *p, unsigned char *cdb, int len)
 static const char *
 scsi_trace_rw32(struct trace_seq *p, unsigned char *cdb, int len)
 {
-	const char *ret = p->buffer + p->len, *cmd;
+	const char *ret = trace_seq_buffer_ptr(p), *cmd;
 	sector_t lba = 0, txlen = 0;
 	u32 ei_lbrt = 0;
 
@@ -180,7 +180,7 @@ out:
 static const char *
 scsi_trace_unmap(struct trace_seq *p, unsigned char *cdb, int len)
 {
-	const char *ret = p->buffer + p->len;
+	const char *ret = trace_seq_buffer_ptr(p);
 	unsigned int regions = cdb[7] << 8 | cdb[8];
 
 	trace_seq_printf(p, "regions=%u", (regions - 8) / 16);
@@ -192,7 +192,7 @@ scsi_trace_unmap(struct trace_seq *p, unsigned char *cdb, int len)
 static const char *
 scsi_trace_service_action_in(struct trace_seq *p, unsigned char *cdb, int len)
 {
-	const char *ret = p->buffer + p->len, *cmd;
+	const char *ret = trace_seq_buffer_ptr(p), *cmd;
 	sector_t lba = 0;
 	u32 alloc_len = 0;
 
@@ -247,7 +247,7 @@ scsi_trace_varlen(struct trace_seq *p, unsigned char *cdb, int len)
 static const char *
 scsi_trace_misc(struct trace_seq *p, unsigned char *cdb, int len)
 {
-	const char *ret = p->buffer + p->len;
+	const char *ret = trace_seq_buffer_ptr(p);
 
 	trace_seq_printf(p, "-");
 	trace_seq_putc(p, 0);
diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h
index dd85753e1bb0..ea6c9dea79e3 100644
--- a/include/linux/trace_seq.h
+++ b/include/linux/trace_seq.h
@@ -25,6 +25,21 @@ trace_seq_init(struct trace_seq *s)
 	s->full = 0;
 }
 
+/**
+ * trace_seq_buffer_ptr - return pointer to next location in buffer
+ * @s: trace sequence descriptor
+ *
+ * Returns the pointer to the buffer where the next write to
+ * the buffer will happen. This is useful to save the location
+ * that is about to be written to and then return the result
+ * of that write.
+ */
+static inline unsigned char *
+trace_seq_buffer_ptr(struct trace_seq *s)
+{
+	return s->buffer + s->len;
+}
+
 /*
  * Currently only defined when tracing is enabled.
  */
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index b8930f79a04b..c6977d5a9b12 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -75,7 +75,7 @@ ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
 {
 	unsigned long mask;
 	const char *str;
-	const char *ret = p->buffer + p->len;
+	const char *ret = trace_seq_buffer_ptr(p);
 	int i, first = 1;
 
 	for (i = 0;  flag_array[i].name && flags; i++) {
@@ -111,7 +111,7 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
 			 const struct trace_print_flags *symbol_array)
 {
 	int i;
-	const char *ret = p->buffer + p->len;
+	const char *ret = trace_seq_buffer_ptr(p);
 
 	for (i = 0;  symbol_array[i].name; i++) {
 
@@ -122,7 +122,7 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
 		break;
 	}
 
-	if (ret == (const char *)(p->buffer + p->len))
+	if (ret == (const char *)(trace_seq_buffer_ptr(p)))
 		trace_seq_printf(p, "0x%lx", val);
 		
 	trace_seq_putc(p, 0);
@@ -137,7 +137,7 @@ ftrace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val,
 			 const struct trace_print_flags_u64 *symbol_array)
 {
 	int i;
-	const char *ret = p->buffer + p->len;
+	const char *ret = trace_seq_buffer_ptr(p);
 
 	for (i = 0;  symbol_array[i].name; i++) {
 
@@ -148,7 +148,7 @@ ftrace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val,
 		break;
 	}
 
-	if (ret == (const char *)(p->buffer + p->len))
+	if (ret == (const char *)(trace_seq_buffer_ptr(p)))
 		trace_seq_printf(p, "0x%llx", val);
 
 	trace_seq_putc(p, 0);
@@ -162,7 +162,7 @@ const char *
 ftrace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr,
 			 unsigned int bitmask_size)
 {
-	const char *ret = p->buffer + p->len;
+	const char *ret = trace_seq_buffer_ptr(p);
 
 	trace_seq_bitmask(p, bitmask_ptr, bitmask_size * 8);
 	trace_seq_putc(p, 0);
@@ -175,7 +175,7 @@ const char *
 ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len)
 {
 	int i;
-	const char *ret = p->buffer + p->len;
+	const char *ret = trace_seq_buffer_ptr(p);
 
 	for (i = 0; i < buf_len; i++)
 		trace_seq_printf(p, "%s%2.2x", i == 0 ? "" : " ", buf[i]);
-- 
cgit v1.2.3-59-g8ed1b


From 780db2071ac4d167ee4154ad9c96088f1bba044b Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 1 Jul 2014 10:31:13 -0600
Subject: blk-mq: decouble blk-mq freezing from generic bypassing

blk_mq freezing is entangled with generic bypassing which bypasses
blkcg and io scheduler and lets IO requests fall through the block
layer to the drivers in FIFO order.  This allows forward progress on
IOs with the advanced features disabled so that those features can be
configured or altered without worrying about stalling IO which may
lead to deadlock through memory allocation.

However, generic bypassing doesn't quite fit blk-mq.  blk-mq currently
doesn't make use of blkcg or ioscheds and it maps bypssing to
freezing, which blocks request processing and drains all the in-flight
ones.  This causes problems as bypassing assumes that request
processing is online.  blk-mq works around this by conditionally
allowing request processing for the problem case - during queue
initialization.

Another weirdity is that except for during queue cleanup, bypassing
started on the generic side prevents blk-mq from processing new
requests but doesn't drain the in-flight ones.  This shouldn't break
anything but again highlights that something isn't quite right here.

The root cause is conflating blk-mq freezing and generic bypassing
which are two different mechanisms.  The only intersecting purpose
that they serve is during queue cleanup.  Let's properly separate
blk-mq freezing from generic bypassing and simply use it where
necessary.

* request_queue->mq_freeze_depth is added and
  blk_mq_[un]freeze_queue() now operate on this counter instead of
  ->bypass_depth.  The replacement for QUEUE_FLAG_BYPASS isn't added
  but the counter is tested directly.  This will be further updated by
  later changes.

* blk_mq_drain_queue() is dropped and "__" prefix is dropped from
  blk_mq_freeze_queue().  Queue cleanup path now calls
  blk_mq_freeze_queue() directly.

* blk_queue_enter()'s fast path condition is simplified to simply
  check @q->mq_freeze_depth.  Previously, the condition was

	!blk_queue_dying(q) &&
	    (!blk_queue_bypass(q) || !blk_queue_init_done(q))

  mq_freeze_depth is incremented right after dying is set and
  blk_queue_init_done() exception isn't necessary as blk-mq doesn't
  start frozen, which only leaves the blk_queue_bypass() test which
  can be replaced by @q->mq_freeze_depth test.

This change simplifies the code and reduces confusion in the area.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Nicholas A. Bellinger <nab@linux-iscsi.org>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-core.c       |  2 +-
 block/blk-mq.c         | 17 ++++++-----------
 block/blk-mq.h         |  2 +-
 include/linux/blkdev.h |  1 +
 4 files changed, 9 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 0d0bdd65b2d7..c359d72e9d76 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -514,7 +514,7 @@ void blk_cleanup_queue(struct request_queue *q)
 	 * prevent that q->request_fn() gets invoked after draining finished.
 	 */
 	if (q->mq_ops) {
-		blk_mq_drain_queue(q);
+		blk_mq_freeze_queue(q);
 		spin_lock_irq(lock);
 	} else {
 		spin_lock_irq(lock);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index f4bdddd7ed99..1e324a123d40 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -84,15 +84,14 @@ static int blk_mq_queue_enter(struct request_queue *q)
 	smp_mb();
 
 	/* we have problems freezing the queue if it's initializing */
-	if (!blk_queue_dying(q) &&
-	    (!blk_queue_bypass(q) || !blk_queue_init_done(q)))
+	if (!q->mq_freeze_depth)
 		return 0;
 
 	__percpu_counter_add(&q->mq_usage_counter, -1, 1000000);
 
 	spin_lock_irq(q->queue_lock);
 	ret = wait_event_interruptible_lock_irq(q->mq_freeze_wq,
-		!blk_queue_bypass(q) || blk_queue_dying(q),
+		!q->mq_freeze_depth || blk_queue_dying(q),
 		*q->queue_lock);
 	/* inc usage with lock hold to avoid freeze_queue runs here */
 	if (!ret && !blk_queue_dying(q))
@@ -129,11 +128,10 @@ void blk_mq_drain_queue(struct request_queue *q)
  * Guarantee no request is in use, so we can change any data structure of
  * the queue afterward.
  */
-static void blk_mq_freeze_queue(struct request_queue *q)
+void blk_mq_freeze_queue(struct request_queue *q)
 {
 	spin_lock_irq(q->queue_lock);
-	q->bypass_depth++;
-	queue_flag_set(QUEUE_FLAG_BYPASS, q);
+	q->mq_freeze_depth++;
 	spin_unlock_irq(q->queue_lock);
 
 	blk_mq_drain_queue(q);
@@ -144,11 +142,8 @@ static void blk_mq_unfreeze_queue(struct request_queue *q)
 	bool wake = false;
 
 	spin_lock_irq(q->queue_lock);
-	if (!--q->bypass_depth) {
-		queue_flag_clear(QUEUE_FLAG_BYPASS, q);
-		wake = true;
-	}
-	WARN_ON_ONCE(q->bypass_depth < 0);
+	wake = !--q->mq_freeze_depth;
+	WARN_ON_ONCE(q->mq_freeze_depth < 0);
 	spin_unlock_irq(q->queue_lock);
 	if (wake)
 		wake_up_all(&q->mq_freeze_wq);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 26460884c6cd..ca4964a6295d 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -28,7 +28,7 @@ struct blk_mq_ctx {
 void __blk_mq_complete_request(struct request *rq);
 void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
 void blk_mq_init_flush(struct request_queue *q);
-void blk_mq_drain_queue(struct request_queue *q);
+void blk_mq_freeze_queue(struct request_queue *q);
 void blk_mq_free_queue(struct request_queue *q);
 void blk_mq_clone_flush_request(struct request *flush_rq,
 		struct request *orig_rq);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8699bcf5f099..c8f344ff74fe 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -470,6 +470,7 @@ struct request_queue {
 	struct mutex		sysfs_lock;
 
 	int			bypass_depth;
+	int			mq_freeze_depth;
 
 #if defined(CONFIG_BLK_DEV_BSG)
 	bsg_job_fn		*bsg_job_fn;
-- 
cgit v1.2.3-59-g8ed1b


From add703fda981b9719d37f371498b9f129acbd997 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 1 Jul 2014 10:34:38 -0600
Subject: blk-mq: use percpu_ref for mq usage count

Currently, blk-mq uses a percpu_counter to keep track of how many
usages are in flight.  The percpu_counter is drained while freezing to
ensure that no usage is left in-flight after freezing is complete.
blk_mq_queue_enter/exit() and blk_mq_[un]freeze_queue() implement this
per-cpu gating mechanism.

This type of code has relatively high chance of subtle bugs which are
extremely difficult to trigger and it's way too hairy to be open coded
in blk-mq.  percpu_ref can serve the same purpose after the recent
changes.  This patch replaces the open-coded per-cpu usage counting
and draining mechanism with percpu_ref.

blk_mq_queue_enter() performs tryget_live on the ref and exit()
performs put.  blk_mq_freeze_queue() kills the ref and waits until the
reference count reaches zero.  blk_mq_unfreeze_queue() revives the ref
and wakes up the waiters.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Nicholas A. Bellinger <nab@linux-iscsi.org>
Cc: Kent Overstreet <kmo@daterainc.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c         | 68 +++++++++++++++++++++-----------------------------
 include/linux/blkdev.h |  3 ++-
 2 files changed, 31 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 22682fb4be65..5189cb1e478a 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -78,34 +78,32 @@ static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx,
 
 static int blk_mq_queue_enter(struct request_queue *q)
 {
-	int ret;
-
-	__percpu_counter_add(&q->mq_usage_counter, 1, 1000000);
-	smp_mb();
-
-	/* we have problems freezing the queue if it's initializing */
-	if (!q->mq_freeze_depth)
-		return 0;
-
-	__percpu_counter_add(&q->mq_usage_counter, -1, 1000000);
+	while (true) {
+		int ret;
 
-	spin_lock_irq(q->queue_lock);
-	ret = wait_event_interruptible_lock_irq(q->mq_freeze_wq,
-		!q->mq_freeze_depth || blk_queue_dying(q),
-		*q->queue_lock);
-	/* inc usage with lock hold to avoid freeze_queue runs here */
-	if (!ret && !blk_queue_dying(q))
-		__percpu_counter_add(&q->mq_usage_counter, 1, 1000000);
-	else if (blk_queue_dying(q))
-		ret = -ENODEV;
-	spin_unlock_irq(q->queue_lock);
+		if (percpu_ref_tryget_live(&q->mq_usage_counter))
+			return 0;
 
-	return ret;
+		ret = wait_event_interruptible(q->mq_freeze_wq,
+				!q->mq_freeze_depth || blk_queue_dying(q));
+		if (blk_queue_dying(q))
+			return -ENODEV;
+		if (ret)
+			return ret;
+	}
 }
 
 static void blk_mq_queue_exit(struct request_queue *q)
 {
-	__percpu_counter_add(&q->mq_usage_counter, -1, 1000000);
+	percpu_ref_put(&q->mq_usage_counter);
+}
+
+static void blk_mq_usage_counter_release(struct percpu_ref *ref)
+{
+	struct request_queue *q =
+		container_of(ref, struct request_queue, mq_usage_counter);
+
+	wake_up_all(&q->mq_freeze_wq);
 }
 
 /*
@@ -118,18 +116,9 @@ void blk_mq_freeze_queue(struct request_queue *q)
 	q->mq_freeze_depth++;
 	spin_unlock_irq(q->queue_lock);
 
-	while (true) {
-		s64 count;
-
-		spin_lock_irq(q->queue_lock);
-		count = percpu_counter_sum(&q->mq_usage_counter);
-		spin_unlock_irq(q->queue_lock);
-
-		if (count == 0)
-			break;
-		blk_mq_start_hw_queues(q);
-		msleep(10);
-	}
+	percpu_ref_kill(&q->mq_usage_counter);
+	blk_mq_run_queues(q, false);
+	wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->mq_usage_counter));
 }
 
 static void blk_mq_unfreeze_queue(struct request_queue *q)
@@ -140,8 +129,10 @@ static void blk_mq_unfreeze_queue(struct request_queue *q)
 	wake = !--q->mq_freeze_depth;
 	WARN_ON_ONCE(q->mq_freeze_depth < 0);
 	spin_unlock_irq(q->queue_lock);
-	if (wake)
+	if (wake) {
+		percpu_ref_reinit(&q->mq_usage_counter);
 		wake_up_all(&q->mq_freeze_wq);
+	}
 }
 
 bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
@@ -1785,7 +1776,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 	if (!q)
 		goto err_hctxs;
 
-	if (percpu_counter_init(&q->mq_usage_counter, 0))
+	if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release))
 		goto err_map;
 
 	setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q);
@@ -1878,7 +1869,7 @@ void blk_mq_free_queue(struct request_queue *q)
 	blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
 	blk_mq_free_hw_queues(q, set);
 
-	percpu_counter_destroy(&q->mq_usage_counter);
+	percpu_ref_exit(&q->mq_usage_counter);
 
 	free_percpu(q->queue_ctx);
 	kfree(q->queue_hw_ctx);
@@ -2037,8 +2028,7 @@ static int __init blk_mq_init(void)
 {
 	blk_mq_cpu_init();
 
-	/* Must be called after percpu_counter_hotcpu_callback() */
-	hotcpu_notifier(blk_mq_queue_reinit_notify, -10);
+	hotcpu_notifier(blk_mq_queue_reinit_notify, 0);
 
 	return 0;
 }
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c8f344ff74fe..518b46555b80 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -21,6 +21,7 @@
 #include <linux/bsg.h>
 #include <linux/smp.h>
 #include <linux/rcupdate.h>
+#include <linux/percpu-refcount.h>
 
 #include <asm/scatterlist.h>
 
@@ -484,7 +485,7 @@ struct request_queue {
 #endif
 	struct rcu_head		rcu_head;
 	wait_queue_head_t	mq_freeze_wq;
-	struct percpu_counter	mq_usage_counter;
+	struct percpu_ref	mq_usage_counter;
 	struct list_head	all_q_node;
 
 	struct blk_mq_tag_set	*tag_set;
-- 
cgit v1.2.3-59-g8ed1b


From cbcd1054a1fd2aa980fc11ff28e436fc4aaa2d54 Mon Sep 17 00:00:00 2001
From: Gu Zheng <guz.fnst@cn.fujitsu.com>
Date: Tue, 1 Jul 2014 10:36:47 -0600
Subject: bio-integrity: add "bip_max_vcnt" into struct bio_integrity_payload

Commit 08778795 ("block: Fix nr_vecs for inline integrity vectors") from
Martin introduces the function bip_integrity_vecs(get the useful vectors)
to fix the issue about nr_vecs for inline integrity vectors that reported
by David Milburn.

But it seems that bip_integrity_vecs() will return the wrong number if the
bio is not based on any bio_set for some reason(bio->bi_pool == NULL),
because in that case, the bip_inline_vecs[0] is malloced directly.  So
here we add the bip_max_vcnt to record the count of vector slots, and
cleanup the function bip_integrity_vecs().

Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Kent Overstreet <kmo@daterainc.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/bio-integrity.c | 12 +++---------
 include/linux/bio.h   |  1 +
 2 files changed, 4 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 9e241063a616..bc423f7b02da 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -70,8 +70,10 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
 					  bs->bvec_integrity_pool);
 		if (!bip->bip_vec)
 			goto err;
+		bip->bip_max_vcnt = bvec_nr_vecs(idx);
 	} else {
 		bip->bip_vec = bip->bip_inline_vecs;
+		bip->bip_max_vcnt = inline_vecs;
 	}
 
 	bip->bip_slab = idx;
@@ -114,14 +116,6 @@ void bio_integrity_free(struct bio *bio)
 }
 EXPORT_SYMBOL(bio_integrity_free);
 
-static inline unsigned int bip_integrity_vecs(struct bio_integrity_payload *bip)
-{
-	if (bip->bip_slab == BIO_POOL_NONE)
-		return BIP_INLINE_VECS;
-
-	return bvec_nr_vecs(bip->bip_slab);
-}
-
 /**
  * bio_integrity_add_page - Attach integrity metadata
  * @bio:	bio to update
@@ -137,7 +131,7 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
 	struct bio_integrity_payload *bip = bio->bi_integrity;
 	struct bio_vec *iv;
 
-	if (bip->bip_vcnt >= bip_integrity_vecs(bip)) {
+	if (bip->bip_vcnt >= bip->bip_max_vcnt) {
 		printk(KERN_ERR "%s: bip_vec full\n", __func__);
 		return 0;
 	}
diff --git a/include/linux/bio.h b/include/linux/bio.h
index d2633ee099d9..b39e5000ff58 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -308,6 +308,7 @@ struct bio_integrity_payload {
 
 	unsigned short		bip_slab;	/* slab the bip came from */
 	unsigned short		bip_vcnt;	/* # of integrity bio_vecs */
+	unsigned short		bip_max_vcnt;	/* integrity bio_vec slots */
 	unsigned		bip_owns_buf:1;	/* should free bip_buf */
 
 	struct work_struct	bip_work;	/* I/O completion */
-- 
cgit v1.2.3-59-g8ed1b


From d93331965729850303f6111381c1a4a9e9b8ae5a Mon Sep 17 00:00:00 2001
From: Ben Greear <greearb@candelatech.com>
Date: Wed, 25 Jun 2014 14:44:53 -0700
Subject: ipv6: Allow accepting RA from local IP addresses.

This can be used in virtual networking applications, and
may have other uses as well.  The option is disabled by
default.

A specific use case is setting up virtual routers, bridges, and
hosts on a single OS without the use of network namespaces or
virtual machines.  With proper use of ip rules, routing tables,
veth interface pairs and/or other virtual interfaces,
and applications that can bind to interfaces and/or IP addresses,
it is possibly to create one or more virtual routers with multiple
hosts attached.  The host interfaces can act as IPv6 systems,
with radvd running on the ports in the virtual routers.  With the
option provided in this patch enabled, those hosts can now properly
obtain IPv6 addresses from the radvd.

Signed-off-by: Ben Greear <greearb@candelatech.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 12 ++++++++++++
 include/linux/ipv6.h                   |  1 +
 include/uapi/linux/ipv6.h              |  1 +
 include/uapi/linux/sysctl.h            |  1 +
 kernel/sysctl_binary.c                 |  1 +
 net/ipv6/addrconf.c                    | 10 ++++++++++
 net/ipv6/ndisc.c                       | 21 +++++++++++++--------
 7 files changed, 39 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index ab42c95f9985..10e216c6e05e 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1210,6 +1210,18 @@ accept_ra_defrtr - BOOLEAN
 	Functional default: enabled if accept_ra is enabled.
 			    disabled if accept_ra is disabled.
 
+accept_ra_from_local - BOOLEAN
+	Accept RA with source-address that is found on local machine
+        if the RA is otherwise proper and able to be accepted.
+        Default is to NOT accept these as it may be an un-intended
+        network loop.
+
+	Functional default:
+           enabled if accept_ra_from_local is enabled
+               on a specific interface.
+	   disabled if accept_ra_from_local is disabled
+               on a specific interface.
+
 accept_ra_pinfo - BOOLEAN
 	Learn Prefix Information in Router Advertisement.
 
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index c811300b0b0c..b0f2452f1d58 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -39,6 +39,7 @@ struct ipv6_devconf {
 #endif
 	__s32		proxy_ndp;
 	__s32		accept_source_route;
+	__s32		accept_ra_from_local;
 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 	__s32		optimistic_dad;
 #endif
diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
index 593b0e32d956..efa2666f4b8a 100644
--- a/include/uapi/linux/ipv6.h
+++ b/include/uapi/linux/ipv6.h
@@ -163,6 +163,7 @@ enum {
 	DEVCONF_MLDV1_UNSOLICITED_REPORT_INTERVAL,
 	DEVCONF_MLDV2_UNSOLICITED_REPORT_INTERVAL,
 	DEVCONF_SUPPRESS_FRAG_NDISC,
+	DEVCONF_ACCEPT_RA_FROM_LOCAL,
 	DEVCONF_MAX
 };
 
diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h
index 6d6721341f49..43aaba1cc037 100644
--- a/include/uapi/linux/sysctl.h
+++ b/include/uapi/linux/sysctl.h
@@ -568,6 +568,7 @@ enum {
 	NET_IPV6_ACCEPT_RA_RT_INFO_MAX_PLEN=22,
 	NET_IPV6_PROXY_NDP=23,
 	NET_IPV6_ACCEPT_SOURCE_ROUTE=25,
+	NET_IPV6_ACCEPT_RA_FROM_LOCAL=26,
 	__NET_IPV6_MAX
 };
 
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 653cbbd9e7ad..e4ba9a5a5ccb 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -522,6 +522,7 @@ static const struct bin_table bin_net_ipv6_conf_var_table[] = {
 	{ CTL_INT,	NET_IPV6_ACCEPT_RA_RT_INFO_MAX_PLEN,	"accept_ra_rt_info_max_plen" },
 	{ CTL_INT,	NET_IPV6_PROXY_NDP,			"proxy_ndp" },
 	{ CTL_INT,	NET_IPV6_ACCEPT_SOURCE_ROUTE,		"accept_source_route" },
+	{ CTL_INT,	NET_IPV6_ACCEPT_RA_FROM_LOCAL,		"accept_ra_from_local" },
 	{}
 };
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 5667b3003af9..358edd2272ac 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -186,6 +186,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
 	.max_desync_factor	= MAX_DESYNC_FACTOR,
 	.max_addresses		= IPV6_MAX_ADDRESSES,
 	.accept_ra_defrtr	= 1,
+	.accept_ra_from_local	= 0,
 	.accept_ra_pinfo	= 1,
 #ifdef CONFIG_IPV6_ROUTER_PREF
 	.accept_ra_rtr_pref	= 1,
@@ -222,6 +223,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
 	.max_desync_factor	= MAX_DESYNC_FACTOR,
 	.max_addresses		= IPV6_MAX_ADDRESSES,
 	.accept_ra_defrtr	= 1,
+	.accept_ra_from_local	= 0,
 	.accept_ra_pinfo	= 1,
 #ifdef CONFIG_IPV6_ROUTER_PREF
 	.accept_ra_rtr_pref	= 1,
@@ -4321,6 +4323,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
 	array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao;
 	array[DEVCONF_NDISC_NOTIFY] = cnf->ndisc_notify;
 	array[DEVCONF_SUPPRESS_FRAG_NDISC] = cnf->suppress_frag_ndisc;
+	array[DEVCONF_ACCEPT_RA_FROM_LOCAL] = cnf->accept_ra_from_local;
 }
 
 static inline size_t inet6_ifla6_size(void)
@@ -5167,6 +5170,13 @@ static struct addrconf_sysctl_table
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec
 		},
+		{
+			.procname	= "accept_ra_from_local",
+			.data		= &ipv6_devconf.accept_ra_from_local,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
+		},
 		{
 			/* sentinel */
 		}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 736c11c6d266..a845e3d2057e 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1148,11 +1148,15 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 		goto skip_defrtr;
 	}
 
-	if (ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr,
-			  NULL, 0)) {
+	/* Do not accept RA with source-addr found on local machine unless
+	 * accept_ra_from_local is set to true.
+	 */
+	if (!(in6_dev->cnf.accept_ra_from_local ||
+	      ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr,
+			    NULL, 0))) {
 		ND_PRINTK(2, info,
-			  "RA: %s, chk_addr failed for dev: %s\n",
-			  __func__, skb->dev->name);
+			  "RA from local address detected on dev: %s: default router ignored\n",
+			  skb->dev->name);
 		goto skip_defrtr;
 	}
 
@@ -1290,11 +1294,12 @@ skip_linkparms:
 	}
 
 #ifdef CONFIG_IPV6_ROUTE_INFO
-	if (ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr,
-			  NULL, 0)) {
+	if (!(in6_dev->cnf.accept_ra_from_local ||
+	      ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr,
+			    NULL, 0))) {
 		ND_PRINTK(2, info,
-			  "RA: %s, chk-addr (route info) is false for dev: %s\n",
-			  __func__, skb->dev->name);
+			  "RA from local address detected on dev: %s: router info ignored.\n",
+			  skb->dev->name);
 		goto skip_routeinfo;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 9fe516ba3fb29b6f6a752ffd93342fdee500ec01 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 27 Jun 2014 08:36:16 -0700
Subject: inet: move ipv6only in sock_common

When an UDP application switches from AF_INET to AF_INET6 sockets, we
have a small performance degradation for IPv4 communications because of
extra cache line misses to access ipv6only information.

This can also be noticed for TCP listeners, as ipv6_only_sock() is also
used from __inet_lookup_listener()->compute_score()

This is magnified when SO_REUSEPORT is used.

Move ipv6only into struct sock_common so that it is available at
no extra cost in lookups.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h             | 10 +++++-----
 include/net/inet_timewait_sock.h |  3 ++-
 include/net/sock.h               |  4 +++-
 net/dccp/minisocks.c             |  4 +---
 net/ipv4/tcp_minisocks.c         |  2 +-
 net/ipv6/af_inet6.c              |  6 +++---
 net/ipv6/ipv6_sockglue.c         |  4 ++--
 net/ipv6/udp.c                   |  3 +--
 8 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index b0f2452f1d58..5dc68c3ebcbd 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -194,7 +194,7 @@ struct ipv6_pinfo {
 	                        sndflow:1,
 				repflow:1,
 				pmtudisc:3,
-				ipv6only:1,
+				padding:1,	/* 1 bit hole */
 				srcprefs:3,	/* 001: prefer temporary address
 						 * 010: prefer public address
 						 * 100: prefer care-of address
@@ -273,8 +273,8 @@ static inline void inet_sk_copy_descendant(struct sock *sk_to,
 	__inet_sk_copy_descendant(sk_to, sk_from, ancestor_size);
 }
 
-#define __ipv6_only_sock(sk)	(inet6_sk(sk)->ipv6only)
-#define ipv6_only_sock(sk)	((sk)->sk_family == PF_INET6 && __ipv6_only_sock(sk))
+#define __ipv6_only_sock(sk)	(sk->sk_ipv6only)
+#define ipv6_only_sock(sk)	(__ipv6_only_sock(sk))
 #define ipv6_sk_rxinfo(sk)	((sk)->sk_family == PF_INET6 && \
 				 inet6_sk(sk)->rxopt.bits.rxinfo)
 
@@ -287,8 +287,8 @@ static inline const struct in6_addr *inet6_rcv_saddr(const struct sock *sk)
 
 static inline int inet_v6_ipv6only(const struct sock *sk)
 {
-	return likely(sk->sk_state != TCP_TIME_WAIT) ?
-		ipv6_only_sock(sk) : inet_twsk(sk)->tw_ipv6only;
+	/* ipv6only field is at same position for timewait and other sockets */
+	return ipv6_only_sock(sk);
 }
 #else
 #define __ipv6_only_sock(sk)	0
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 61474ea02152..6c566034e26d 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -108,6 +108,7 @@ struct inet_timewait_sock {
 #define tw_family		__tw_common.skc_family
 #define tw_state		__tw_common.skc_state
 #define tw_reuse		__tw_common.skc_reuse
+#define tw_ipv6only		__tw_common.skc_ipv6only
 #define tw_bound_dev_if		__tw_common.skc_bound_dev_if
 #define tw_node			__tw_common.skc_nulls_node
 #define tw_bind_node		__tw_common.skc_bind_node
@@ -131,7 +132,7 @@ struct inet_timewait_sock {
 	__be16			tw_sport;
 	kmemcheck_bitfield_begin(flags);
 	/* And these are ours. */
-	unsigned int		tw_ipv6only     : 1,
+	unsigned int		tw_pad0		: 1,	/* 1 bit hole */
 				tw_transparent  : 1,
 				tw_flowlabel	: 20,
 				tw_pad		: 2,	/* 2 bits hole */
diff --git a/include/net/sock.h b/include/net/sock.h
index 173cae485de1..8d4c9473e7d7 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -181,7 +181,8 @@ struct sock_common {
 	unsigned short		skc_family;
 	volatile unsigned char	skc_state;
 	unsigned char		skc_reuse:4;
-	unsigned char		skc_reuseport:4;
+	unsigned char		skc_reuseport:1;
+	unsigned char		skc_ipv6only:1;
 	int			skc_bound_dev_if;
 	union {
 		struct hlist_node	skc_bind_node;
@@ -317,6 +318,7 @@ struct sock {
 #define sk_state		__sk_common.skc_state
 #define sk_reuse		__sk_common.skc_reuse
 #define sk_reuseport		__sk_common.skc_reuseport
+#define sk_ipv6only		__sk_common.skc_ipv6only
 #define sk_bound_dev_if		__sk_common.skc_bound_dev_if
 #define sk_bind_node		__sk_common.skc_bind_node
 #define sk_prot			__sk_common.skc_prot
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index c69eb9c4fbb8..b50dc436db1f 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -55,11 +55,9 @@ void dccp_time_wait(struct sock *sk, int state, int timeo)
 		const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1);
 #if IS_ENABLED(CONFIG_IPV6)
 		if (tw->tw_family == PF_INET6) {
-			const struct ipv6_pinfo *np = inet6_sk(sk);
-
 			tw->tw_v6_daddr = sk->sk_v6_daddr;
 			tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
-			tw->tw_ipv6only = np->ipv6only;
+			tw->tw_ipv6only = sk->sk_ipv6only;
 		}
 #endif
 		/* Linkage updates. */
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index e68e0d4af6c9..1649988bd1b6 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -298,7 +298,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 			tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
 			tw->tw_tclass = np->tclass;
 			tw->tw_flowlabel = np->flow_label >> 12;
-			tw->tw_ipv6only = np->ipv6only;
+			tw->tw_ipv6only = sk->sk_ipv6only;
 		}
 #endif
 
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 7cb4392690dd..a426cd7099bb 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -197,7 +197,7 @@ lookup_protocol:
 	np->mcast_hops	= IPV6_DEFAULT_MCASTHOPS;
 	np->mc_loop	= 1;
 	np->pmtudisc	= IPV6_PMTUDISC_WANT;
-	np->ipv6only	= net->ipv6.sysctl.bindv6only;
+	sk->sk_ipv6only	= net->ipv6.sysctl.bindv6only;
 
 	/* Init the ipv4 part of the socket since we can have sockets
 	 * using v6 API for ipv4.
@@ -294,7 +294,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 		/* Binding to v4-mapped address on a v6-only socket
 		 * makes no sense
 		 */
-		if (np->ipv6only) {
+		if (sk->sk_ipv6only) {
 			err = -EINVAL;
 			goto out;
 		}
@@ -371,7 +371,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	if (addr_type != IPV6_ADDR_ANY) {
 		sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
 		if (addr_type != IPV6_ADDR_MAPPED)
-			np->ipv6only = 1;
+			sk->sk_ipv6only = 1;
 	}
 	if (snum)
 		sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index edb58aff4ae7..cc34f65179e4 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -235,7 +235,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 		if (optlen < sizeof(int) ||
 		    inet_sk(sk)->inet_num)
 			goto e_inval;
-		np->ipv6only = valbool;
+		sk->sk_ipv6only = valbool;
 		retv = 0;
 		break;
 
@@ -1058,7 +1058,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 	}
 
 	case IPV6_V6ONLY:
-		val = np->ipv6only;
+		val = sk->sk_ipv6only;
 		break;
 
 	case IPV6_RECVPKTINFO:
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 95c834799288..c2bd28fd43e4 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -79,7 +79,6 @@ static unsigned int udp6_ehashfn(struct net *net,
 int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
 {
 	const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
-	int sk_ipv6only = ipv6_only_sock(sk);
 	int sk2_ipv6only = inet_v6_ipv6only(sk2);
 	int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
 	int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
@@ -95,7 +94,7 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
 		return 1;
 
 	if (addr_type == IPV6_ADDR_ANY &&
-	    !(sk_ipv6only && addr_type2 == IPV6_ADDR_MAPPED))
+	    !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED))
 		return 1;
 
 	if (sk2_rcv_saddr6 &&
-- 
cgit v1.2.3-59-g8ed1b


From 628627bfd943c077c65489acd8b23c7bb14eb0e2 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Tue, 4 Mar 2014 18:50:53 +0100
Subject: clocksource: shmobile: Remove unused sh_timer_config members

The name, channel_offset, timer_bit, clockevent_rating and
clocksource_rating members are unused. Remove them.

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Tested-by: Simon Horman <horms+renesas@verge.net.au>
---
 include/linux/sh_timer.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sh_timer.h b/include/linux/sh_timer.h
index 8e1e036d6d45..64638b058076 100644
--- a/include/linux/sh_timer.h
+++ b/include/linux/sh_timer.h
@@ -2,11 +2,6 @@
 #define __SH_TIMER_H__
 
 struct sh_timer_config {
-	char *name;
-	long channel_offset;
-	int timer_bit;
-	unsigned long clockevent_rating;
-	unsigned long clocksource_rating;
 	unsigned int channels_mask;
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From fb2b3c9f68574738c70b9df5fc2bea40f91dd8be Mon Sep 17 00:00:00 2001
From: Peter De Schrijver <pdeschrijver@nvidia.com>
Date: Thu, 26 Jun 2014 18:00:53 +0300
Subject: clk: define and export clk_debugs_add_file

Define and export a new function clk_debugs_add_file which adds a file
to a existing clock's debugfs directory. This can be used by clock
providers to add debugfs entries which are not related to a specific clock
type. Examples include the ability to measure the rate of a clock. It can
also be used by modules to create new debugfs entries. This is useful if you
want to expose features for testing which can potentially cause system
instability such as allowing to change a clock's rate from userspace.

Signed-off-by: Peter De Schrijver <pdeschrijver@nvidia.com>
Signed-off-by: Mike Turquette <mturquette@linaro.org>
---
 drivers/clk/clk.c            | 12 ++++++++++++
 include/linux/clk-provider.h |  5 +++++
 2 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index 958967d141ee..9ad397050471 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -358,6 +358,18 @@ static void clk_debug_unregister(struct clk *clk)
 	debugfs_remove_recursive(clk->dentry);
 }
 
+struct dentry *clk_debugfs_add_file(struct clk *clk, char *name, umode_t mode,
+				void *data, const struct file_operations *fops)
+{
+	struct dentry *d = NULL;
+
+	if (clk->dentry)
+		d = debugfs_create_file(name, mode, clk->dentry, data, fops);
+
+	return d;
+}
+EXPORT_SYMBOL_GPL(clk_debugfs_add_file);
+
 /**
  * clk_debug_init - lazily create the debugfs clk tree visualization
  *
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 0c287dbbb144..411dd7eb2653 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -619,5 +619,10 @@ static inline void clk_writel(u32 val, u32 __iomem *reg)
 
 #endif	/* platform dependent I/O accessors */
 
+#ifdef CONFIG_DEBUG_FS
+struct dentry *clk_debugfs_add_file(struct clk *clk, char *name, umode_t mode,
+				void *data, const struct file_operations *fops);
+#endif
+
 #endif /* CONFIG_COMMON_CLK */
 #endif /* CLK_PROVIDER_H */
-- 
cgit v1.2.3-59-g8ed1b


From 6955b58254c2bcee8a7b55ce06468a645dc98ec5 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Sat, 19 Apr 2014 11:18:01 +0100
Subject: component: add support for component match array

Add support for generating a set of component matches at master probe
time, and submitting them to the component layer.  This allows the
component layer to perform the matches internally without needing to
call into the master driver, and allows for further restructuring of
the component helper.

Acked-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/base/component.c  | 120 ++++++++++++++++++++++++++++++++++++++++++++--
 include/linux/component.h |   7 +++
 2 files changed, 124 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/component.c b/drivers/base/component.c
index 55813e91bf0d..b4236daed4fa 100644
--- a/drivers/base/component.c
+++ b/drivers/base/component.c
@@ -18,6 +18,15 @@
 #include <linux/mutex.h>
 #include <linux/slab.h>
 
+struct component_match {
+	size_t alloc;
+	size_t num;
+	struct {
+		void *data;
+		int (*fn)(struct device *, void *);
+	} compare[0];
+};
+
 struct master {
 	struct list_head node;
 	struct list_head components;
@@ -25,6 +34,7 @@ struct master {
 
 	const struct component_master_ops *ops;
 	struct device *dev;
+	struct component_match *match;
 };
 
 struct component {
@@ -96,6 +106,34 @@ int component_master_add_child(struct master *master,
 }
 EXPORT_SYMBOL_GPL(component_master_add_child);
 
+static int find_components(struct master *master)
+{
+	struct component_match *match = master->match;
+	size_t i;
+	int ret = 0;
+
+	if (!match) {
+		/*
+		 * Search the list of components, looking for components that
+		 * belong to this master, and attach them to the master.
+		 */
+		return master->ops->add_components(master->dev, master);
+	}
+
+	/*
+	 * Scan the array of match functions and attach
+	 * any components which are found to this master.
+	 */
+	for (i = 0; i < match->num; i++) {
+		ret = component_master_add_child(master,
+						 match->compare[i].fn,
+						 match->compare[i].data);
+		if (ret)
+			break;
+	}
+	return ret;
+}
+
 /* Detach all attached components from this master */
 static void master_remove_components(struct master *master)
 {
@@ -128,7 +166,7 @@ static int try_to_bring_up_master(struct master *master,
 	 * Search the list of components, looking for components that
 	 * belong to this master, and attach them to the master.
 	 */
-	if (master->ops->add_components(master->dev, master)) {
+	if (find_components(master)) {
 		/* Failed to find all components */
 		ret = 0;
 		goto out;
@@ -186,18 +224,87 @@ static void take_down_master(struct master *master)
 	master_remove_components(master);
 }
 
-int component_master_add(struct device *dev,
-	const struct component_master_ops *ops)
+static size_t component_match_size(size_t num)
+{
+	return offsetof(struct component_match, compare[num]);
+}
+
+static struct component_match *component_match_realloc(struct device *dev,
+	struct component_match *match, size_t num)
+{
+	struct component_match *new;
+
+	if (match && match->alloc == num)
+		return match;
+
+	new = devm_kmalloc(dev, component_match_size(num), GFP_KERNEL);
+	if (!new)
+		return ERR_PTR(-ENOMEM);
+
+	if (match) {
+		memcpy(new, match, component_match_size(min(match->num, num)));
+		devm_kfree(dev, match);
+	} else {
+		new->num = 0;
+	}
+
+	new->alloc = num;
+
+	return new;
+}
+
+/*
+ * Add a component to be matched.
+ *
+ * The match array is first created or extended if necessary.
+ */
+void component_match_add(struct device *dev, struct component_match **matchptr,
+	int (*compare)(struct device *, void *), void *compare_data)
+{
+	struct component_match *match = *matchptr;
+
+	if (IS_ERR(match))
+		return;
+
+	if (!match || match->num == match->alloc) {
+		size_t new_size = match ? match->alloc + 16 : 15;
+
+		match = component_match_realloc(dev, match, new_size);
+
+		*matchptr = match;
+
+		if (IS_ERR(match))
+			return;
+	}
+
+	match->compare[match->num].fn = compare;
+	match->compare[match->num].data = compare_data;
+	match->num++;
+}
+EXPORT_SYMBOL(component_match_add);
+
+int component_master_add_with_match(struct device *dev,
+	const struct component_master_ops *ops,
+	struct component_match *match)
 {
 	struct master *master;
 	int ret;
 
+	if (ops->add_components && match)
+		return -EINVAL;
+
+	/* Reallocate the match array for its true size */
+	match = component_match_realloc(dev, match, match->num);
+	if (IS_ERR(match))
+		return PTR_ERR(match);
+
 	master = kzalloc(sizeof(*master), GFP_KERNEL);
 	if (!master)
 		return -ENOMEM;
 
 	master->dev = dev;
 	master->ops = ops;
+	master->match = match;
 	INIT_LIST_HEAD(&master->components);
 
 	/* Add to the list of available masters. */
@@ -215,6 +322,13 @@ int component_master_add(struct device *dev,
 
 	return ret < 0 ? ret : 0;
 }
+EXPORT_SYMBOL_GPL(component_master_add_with_match);
+
+int component_master_add(struct device *dev,
+	const struct component_master_ops *ops)
+{
+	return component_master_add_with_match(dev, ops, NULL);
+}
 EXPORT_SYMBOL_GPL(component_master_add);
 
 void component_master_del(struct device *dev,
diff --git a/include/linux/component.h b/include/linux/component.h
index 68870182ca1e..c00dcc302611 100644
--- a/include/linux/component.h
+++ b/include/linux/component.h
@@ -29,4 +29,11 @@ void component_master_del(struct device *,
 int component_master_add_child(struct master *master,
 	int (*compare)(struct device *, void *), void *compare_data);
 
+struct component_match;
+
+int component_master_add_with_match(struct device *,
+	const struct component_master_ops *, struct component_match *);
+void component_match_add(struct device *, struct component_match **,
+	int (*compare)(struct device *, void *), void *compare_data);
+
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From e7cf34ef396d6ceaa776661830d4ce332da650f0 Mon Sep 17 00:00:00 2001
From: Nishanth Menon <nm@ti.com>
Date: Mon, 30 Jun 2014 10:57:35 -0500
Subject: regulator: palmas: Rename reg_info to palmas_reg_info

reg_info is a generic term which might cause conflict at a later point
in time. To prevent such a thing from occuring in future, rename to
palmas_reg_info.

Signed-off-by: Nishanth Menon <nm@ti.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Tested-by: Stephen Warren <swarren@nvidia.com>
Tested-by: Keerthy <j-keerthy@ti.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/regulator/palmas-regulator.c | 4 ++--
 include/linux/mfd/palmas.h           | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/palmas-regulator.c b/drivers/regulator/palmas-regulator.c
index 91f60fa91d4b..05f11b96e17f 100644
--- a/drivers/regulator/palmas-regulator.c
+++ b/drivers/regulator/palmas-regulator.c
@@ -41,7 +41,7 @@ static const struct regulator_linear_range smps_high_ranges[] = {
 	REGULATOR_LINEAR_RANGE(3300000, 0x7A, 0x7f, 0),
 };
 
-static struct regs_info palmas_regs_info[] = {
+static struct palmas_regs_info palmas_regs_info[] = {
 	{
 		.name		= "SMPS12",
 		.sname		= "smps1-in",
@@ -227,7 +227,7 @@ static struct regs_info palmas_regs_info[] = {
 	},
 };
 
-static struct regs_info tps65917_regs_info[] = {
+static struct palmas_regs_info tps65917_regs_info[] = {
 	{
 		.name		= "SMPS1",
 		.sname		= "smps1-in",
diff --git a/include/linux/mfd/palmas.h b/include/linux/mfd/palmas.h
index 1a045ba32c13..fb0390a1a498 100644
--- a/include/linux/mfd/palmas.h
+++ b/include/linux/mfd/palmas.h
@@ -102,7 +102,7 @@ struct palmas_sleep_requestor_info {
 	int bit_pos;
 };
 
-struct regs_info {
+struct palmas_regs_info {
 	char	*name;
 	char	*sname;
 	u8	vsel_addr;
@@ -117,7 +117,7 @@ struct palmas_pmic_driver_data {
 	int ldo_begin;
 	int ldo_end;
 	int max_reg;
-	struct regs_info *palmas_regs_info;
+	struct palmas_regs_info *palmas_regs_info;
 	struct of_regulator_match *palmas_matches;
 	struct palmas_sleep_requestor_info *sleep_req_info;
 	int (*smps_register)(struct palmas_pmic *pmic,
-- 
cgit v1.2.3-59-g8ed1b


From 8b37e1bef5a6b60e949e28a4db3006e4b00bd758 Mon Sep 17 00:00:00 2001
From: Vincent Donnefort <vdonnefort@gmail.com>
Date: Sat, 14 Jun 2014 02:21:40 -0700
Subject: leds: convert blink timer to workqueue

This patch converts the blink timer from led-core to workqueue which is more
suitable for this kind of non-priority operations. Moreover, timer may lead to
errors when a LED setting function use a scheduling function such as pinctrl
which is using mutex.

Signed-off-by: Vincent Donnefort <vdonnefort@gmail.com>
Signed-off-by: Bryan Wu <cooloney@gmail.com>
---
 drivers/leds/led-class.c | 14 +++++++-------
 drivers/leds/led-core.c  | 11 ++++++-----
 include/linux/leds.h     |  3 +--
 3 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c
index aa29198fca3e..129729d35478 100644
--- a/drivers/leds/led-class.c
+++ b/drivers/leds/led-class.c
@@ -15,10 +15,10 @@
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/device.h>
-#include <linux/timer.h>
 #include <linux/err.h>
 #include <linux/ctype.h>
 #include <linux/leds.h>
+#include <linux/workqueue.h>
 #include "leds.h"
 
 static struct class *leds_class;
@@ -97,9 +97,10 @@ static const struct attribute_group *led_groups[] = {
 	NULL,
 };
 
-static void led_timer_function(unsigned long data)
+static void led_work_function(struct work_struct *ws)
 {
-	struct led_classdev *led_cdev = (void *)data;
+	struct led_classdev *led_cdev =
+		container_of(ws, struct led_classdev, blink_work.work);
 	unsigned long brightness;
 	unsigned long delay;
 
@@ -143,7 +144,8 @@ static void led_timer_function(unsigned long data)
 		}
 	}
 
-	mod_timer(&led_cdev->blink_timer, jiffies + msecs_to_jiffies(delay));
+	queue_delayed_work(system_wq, &led_cdev->blink_work,
+			   msecs_to_jiffies(delay));
 }
 
 static void set_brightness_delayed(struct work_struct *ws)
@@ -231,9 +233,7 @@ int led_classdev_register(struct device *parent, struct led_classdev *led_cdev)
 
 	INIT_WORK(&led_cdev->set_brightness_work, set_brightness_delayed);
 
-	init_timer(&led_cdev->blink_timer);
-	led_cdev->blink_timer.function = led_timer_function;
-	led_cdev->blink_timer.data = (unsigned long)led_cdev;
+	INIT_DELAYED_WORK(&led_cdev->blink_work, led_work_function);
 
 #ifdef CONFIG_LEDS_TRIGGERS
 	led_trigger_set_default(led_cdev);
diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c
index 71b40d3bf776..4bb116867b88 100644
--- a/drivers/leds/led-core.c
+++ b/drivers/leds/led-core.c
@@ -16,6 +16,7 @@
 #include <linux/module.h>
 #include <linux/rwsem.h>
 #include <linux/leds.h>
+#include <linux/workqueue.h>
 #include "leds.h"
 
 DECLARE_RWSEM(leds_list_lock);
@@ -51,7 +52,7 @@ static void led_set_software_blink(struct led_classdev *led_cdev,
 		return;
 	}
 
-	mod_timer(&led_cdev->blink_timer, jiffies + 1);
+	queue_delayed_work(system_wq, &led_cdev->blink_work, 1);
 }
 
 
@@ -75,7 +76,7 @@ void led_blink_set(struct led_classdev *led_cdev,
 		   unsigned long *delay_on,
 		   unsigned long *delay_off)
 {
-	del_timer_sync(&led_cdev->blink_timer);
+	cancel_delayed_work_sync(&led_cdev->blink_work);
 
 	led_cdev->flags &= ~LED_BLINK_ONESHOT;
 	led_cdev->flags &= ~LED_BLINK_ONESHOT_STOP;
@@ -90,7 +91,7 @@ void led_blink_set_oneshot(struct led_classdev *led_cdev,
 			   int invert)
 {
 	if ((led_cdev->flags & LED_BLINK_ONESHOT) &&
-	     timer_pending(&led_cdev->blink_timer))
+	     delayed_work_pending(&led_cdev->blink_work))
 		return;
 
 	led_cdev->flags |= LED_BLINK_ONESHOT;
@@ -107,7 +108,7 @@ EXPORT_SYMBOL(led_blink_set_oneshot);
 
 void led_stop_software_blink(struct led_classdev *led_cdev)
 {
-	del_timer_sync(&led_cdev->blink_timer);
+	cancel_delayed_work_sync(&led_cdev->blink_work);
 	led_cdev->blink_delay_on = 0;
 	led_cdev->blink_delay_off = 0;
 }
@@ -116,7 +117,7 @@ EXPORT_SYMBOL_GPL(led_stop_software_blink);
 void led_set_brightness(struct led_classdev *led_cdev,
 			enum led_brightness brightness)
 {
-	/* delay brightness setting if need to stop soft-blink timer */
+	/* delay brightness setting if need to stop soft-blink work */
 	if (led_cdev->blink_delay_on || led_cdev->blink_delay_off) {
 		led_cdev->delayed_set_value = brightness;
 		schedule_work(&led_cdev->set_brightness_work);
diff --git a/include/linux/leds.h b/include/linux/leds.h
index e43686472197..6a599dce7f9d 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -15,7 +15,6 @@
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/rwsem.h>
-#include <linux/timer.h>
 #include <linux/workqueue.h>
 
 struct device;
@@ -69,7 +68,7 @@ struct led_classdev {
 	const char		*default_trigger;	/* Trigger to use */
 
 	unsigned long		 blink_delay_on, blink_delay_off;
-	struct timer_list	 blink_timer;
+	struct delayed_work	 blink_work;
 	int			 blink_brightness;
 
 	struct work_struct	set_brightness_work;
-- 
cgit v1.2.3-59-g8ed1b


From 9e33002fd1791bcab626b19301670484c1cb6d50 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gwshan@linux.vnet.ibm.com>
Date: Thu, 19 Jun 2014 17:22:44 +1000
Subject: PCI: Make resetting secondary bus logic common

Commit d92a208d086 ("powerpc/pci: Mask linkDown on resetting PCI bus")
implemented same logic (resetting PCI secondary bus by bridge's config
register PCI_BRIDGE_CTL_BUS_RESET) in PCI core and arch-dependent code.  To
avoid the duplication, move the logic to pci_reset_secondary_bus().

That commit did not declare the pcibios_reset_secondary_bus() interface in
linux/include/pci.h.  Add the declaration.

No functional change.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pci.c   | 7 ++++++-
 include/linux/pci.h | 2 ++
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 63a54a340863..758f1d88f28d 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -3193,7 +3193,7 @@ static int pci_pm_reset(struct pci_dev *dev, int probe)
 	return 0;
 }
 
-void __weak pcibios_reset_secondary_bus(struct pci_dev *dev)
+void pci_reset_secondary_bus(struct pci_dev *dev)
 {
 	u16 ctrl;
 
@@ -3219,6 +3219,11 @@ void __weak pcibios_reset_secondary_bus(struct pci_dev *dev)
 	ssleep(1);
 }
 
+void __weak pcibios_reset_secondary_bus(struct pci_dev *dev)
+{
+	pci_reset_secondary_bus(dev);
+}
+
 /**
  * pci_reset_bridge_secondary_bus - Reset the secondary bus on a PCI bridge.
  * @dev: Bridge device
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 466bcd111d85..340529d399b2 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -978,6 +978,8 @@ int pci_try_reset_slot(struct pci_slot *slot);
 int pci_probe_reset_bus(struct pci_bus *bus);
 int pci_reset_bus(struct pci_bus *bus);
 int pci_try_reset_bus(struct pci_bus *bus);
+void pci_reset_secondary_bus(struct pci_dev *dev);
+void pcibios_reset_secondary_bus(struct pci_dev *dev);
 void pci_reset_bridge_secondary_bus(struct pci_dev *dev);
 void pci_update_resource(struct pci_dev *dev, int resno);
 int __must_check pci_assign_resource(struct pci_dev *dev, int i);
-- 
cgit v1.2.3-59-g8ed1b


From 31ea5d4dfe21fb50276dcd70ce268e58d57eccb4 Mon Sep 17 00:00:00 2001
From: Yijing Wang <wangyijing@huawei.com>
Date: Thu, 19 Jun 2014 16:30:30 +0800
Subject: PCI/MSI: Cache Multiple Message Capable in struct msi_desc

The Multiple Message Capable field in the MSI Message Control register
indicates how many vectors the device supports.  This field is read-only,
so cache it in msi_desc to avoid reading it repeatedly.

Since we cache the extracted field (not the entire Message Control
register), we can use msi_mask() instead of msi_capable_mask(), which is
then unused, so remove it.

[bhelgaas: fix whitespace, changelog]
Signed-off-by: Yijing Wang <wangyijing@huawei.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/msi.c   | 15 +++++----------
 include/linux/msi.h |  3 ++-
 2 files changed, 7 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 9c6995043fff..50a7e4e96da7 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -167,11 +167,6 @@ static inline __attribute_const__ u32 msi_mask(unsigned x)
 	return (1 << (1 << x)) - 1;
 }
 
-static inline __attribute_const__ u32 msi_capable_mask(u16 control)
-{
-	return msi_mask((control >> 1) & 7);
-}
-
 /*
  * PCI 2.3 does not specify mask bits for each MSI interrupt.  Attempting to
  * mask all MSI interrupts by clearing the MSI enable bit does not work
@@ -454,7 +449,8 @@ static void __pci_restore_msi_state(struct pci_dev *dev)
 	arch_restore_msi_irqs(dev);
 
 	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
-	msi_mask_irq(entry, msi_capable_mask(control), entry->masked);
+	msi_mask_irq(entry, msi_mask(entry->msi_attrib.multi_cap),
+		     entry->masked);
 	control &= ~PCI_MSI_FLAGS_QSIZE;
 	control |= (entry->msi_attrib.multiple << 4) | PCI_MSI_FLAGS_ENABLE;
 	pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control);
@@ -617,6 +613,7 @@ static int msi_capability_init(struct pci_dev *dev, int nvec)
 	entry->msi_attrib.maskbit	= !!(control & PCI_MSI_FLAGS_MASKBIT);
 	entry->msi_attrib.default_irq	= dev->irq;	/* Save IOAPIC IRQ */
 	entry->msi_attrib.pos		= dev->msi_cap;
+	entry->msi_attrib.multi_cap	= (control & PCI_MSI_FLAGS_QMASK) >> 1;
 
 	if (control & PCI_MSI_FLAGS_64BIT)
 		entry->mask_pos = dev->msi_cap + PCI_MSI_MASK_64;
@@ -625,7 +622,7 @@ static int msi_capability_init(struct pci_dev *dev, int nvec)
 	/* All MSIs are unmasked by default, Mask them all */
 	if (entry->msi_attrib.maskbit)
 		pci_read_config_dword(dev, entry->mask_pos, &entry->masked);
-	mask = msi_capable_mask(control);
+	mask = msi_mask(entry->msi_attrib.multi_cap);
 	msi_mask_irq(entry, mask, mask);
 
 	list_add_tail(&entry->list, &dev->msi_list);
@@ -870,7 +867,6 @@ void pci_msi_shutdown(struct pci_dev *dev)
 {
 	struct msi_desc *desc;
 	u32 mask;
-	u16 ctrl;
 
 	if (!pci_msi_enable || !dev || !dev->msi_enabled)
 		return;
@@ -883,8 +879,7 @@ void pci_msi_shutdown(struct pci_dev *dev)
 	dev->msi_enabled = 0;
 
 	/* Return the device with MSI unmasked as initial states */
-	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &ctrl);
-	mask = msi_capable_mask(ctrl);
+	mask = msi_mask(desc->msi_attrib.multi_cap);
 	/* Keep cached state to be restored */
 	arch_msi_mask_irq(desc, mask, ~mask);
 
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 92a2f991262a..8103f32f6d87 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -25,7 +25,8 @@ void write_msi_msg(unsigned int irq, struct msi_msg *msg);
 struct msi_desc {
 	struct {
 		__u8	is_msix	: 1;
-		__u8	multiple: 3;	/* log2 number of messages */
+		__u8	multiple: 3;	/* log2 num of messages allocated */
+		__u8	multi_cap : 3;	/* log2 num of messages supported */
 		__u8	maskbit	: 1;	/* mask-pending bit supported ? */
 		__u8	is_64	: 1;	/* Address size: 0=32bit 1=64bit */
 		__u8	pos;		/* Location of the msi capability */
-- 
cgit v1.2.3-59-g8ed1b


From 104a1c13ac66e40cf8c6ae74d76ff14ff24b9b01 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Thu, 3 Jul 2014 09:51:18 -0600
Subject: iommu/core: Create central IOMMU group lookup/creation interface

Currently each IOMMU driver that supports IOMMU groups has its own
code for discovering the base device used in grouping.  This code
is generally not specific to the IOMMU hardware, but to the bus of
the devices managed by the IOMMU.  We can therefore create a common
interface for supporting devices on different buses.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/iommu.c | 182 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/iommu.h |   1 +
 2 files changed, 183 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index e5555fcfe703..d061c8677a81 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -29,6 +29,7 @@
 #include <linux/idr.h>
 #include <linux/notifier.h>
 #include <linux/err.h>
+#include <linux/pci.h>
 #include <trace/events/iommu.h>
 
 static struct kset *iommu_group_kset;
@@ -514,6 +515,187 @@ int iommu_group_id(struct iommu_group *group)
 }
 EXPORT_SYMBOL_GPL(iommu_group_id);
 
+/*
+ * To consider a PCI device isolated, we require ACS to support Source
+ * Validation, Request Redirection, Completer Redirection, and Upstream
+ * Forwarding.  This effectively means that devices cannot spoof their
+ * requester ID, requests and completions cannot be redirected, and all
+ * transactions are forwarded upstream, even as it passes through a
+ * bridge where the target device is downstream.
+ */
+#define REQ_ACS_FLAGS   (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF)
+
+struct group_for_pci_data {
+	struct pci_dev *pdev;
+	struct iommu_group *group;
+};
+
+/*
+ * DMA alias iterator callback, return the last seen device.  Stop and return
+ * the IOMMU group if we find one along the way.
+ */
+static int get_pci_alias_or_group(struct pci_dev *pdev, u16 alias, void *opaque)
+{
+	struct group_for_pci_data *data = opaque;
+
+	data->pdev = pdev;
+	data->group = iommu_group_get(&pdev->dev);
+
+	return data->group != NULL;
+}
+
+/*
+ * Use standard PCI bus topology, isolation features, and DMA alias quirks
+ * to find or create an IOMMU group for a device.
+ */
+static struct iommu_group *iommu_group_get_for_pci_dev(struct pci_dev *pdev)
+{
+	struct group_for_pci_data data;
+	struct pci_bus *bus;
+	struct iommu_group *group = NULL;
+	struct pci_dev *tmp;
+
+	/*
+	 * Find the upstream DMA alias for the device.  A device must not
+	 * be aliased due to topology in order to have its own IOMMU group.
+	 * If we find an alias along the way that already belongs to a
+	 * group, use it.
+	 */
+	if (pci_for_each_dma_alias(pdev, get_pci_alias_or_group, &data))
+		return data.group;
+
+	pdev = data.pdev;
+
+	/*
+	 * Continue upstream from the point of minimum IOMMU granularity
+	 * due to aliases to the point where devices are protected from
+	 * peer-to-peer DMA by PCI ACS.  Again, if we find an existing
+	 * group, use it.
+	 */
+	for (bus = pdev->bus; !pci_is_root_bus(bus); bus = bus->parent) {
+		if (!bus->self)
+			continue;
+
+		if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS))
+			break;
+
+		pdev = bus->self;
+
+		group = iommu_group_get(&pdev->dev);
+		if (group)
+			return group;
+	}
+
+	/*
+	 * Next we need to consider DMA alias quirks.  If one device aliases
+	 * to another, they should be grouped together.  It's theoretically
+	 * possible that aliases could create chains of devices where each
+	 * device aliases another device.  If we then factor in multifunction
+	 * ACS grouping requirements, each alias could incorporate a new slot
+	 * with multiple functions, each with aliases.  This is all extremely
+	 * unlikely as DMA alias quirks are typically only used for PCIe
+	 * devices where we usually have a single slot per bus.  Furthermore,
+	 * the alias quirk is usually to another function within the slot
+	 * (and ACS multifunction is not supported) or to a different slot
+	 * that doesn't physically exist.  The likely scenario is therefore
+	 * that everything on the bus gets grouped together.  To reduce the
+	 * problem space, share the IOMMU group for all devices on the bus
+	 * if a DMA alias quirk is present on the bus.
+	 */
+	tmp = NULL;
+	for_each_pci_dev(tmp) {
+		if (tmp->bus != pdev->bus ||
+		    !(tmp->dev_flags & PCI_DEV_FLAGS_DMA_ALIAS_DEVFN))
+			continue;
+
+		pci_dev_put(tmp);
+		tmp = NULL;
+
+		/* We have an alias quirk, search for an existing group */
+		for_each_pci_dev(tmp) {
+			struct iommu_group *group_tmp;
+
+			if (tmp->bus != pdev->bus)
+				continue;
+
+			group_tmp = iommu_group_get(&tmp->dev);
+			if (!group) {
+				group = group_tmp;
+				continue;
+			}
+
+			if (group_tmp) {
+				WARN_ON(group != group_tmp);
+				iommu_group_put(group_tmp);
+			}
+		}
+
+		return group ? group : iommu_group_alloc();
+	}
+
+	/*
+	 * Non-multifunction devices or multifunction devices supporting
+	 * ACS get their own group.
+	 */
+	if (!pdev->multifunction || pci_acs_enabled(pdev, REQ_ACS_FLAGS))
+		return iommu_group_alloc();
+
+	/*
+	 * Multifunction devices not supporting ACS share a group with other
+	 * similar devices in the same slot.
+	 */
+	tmp = NULL;
+	for_each_pci_dev(tmp) {
+		if (tmp == pdev || tmp->bus != pdev->bus ||
+		    PCI_SLOT(tmp->devfn) !=  PCI_SLOT(pdev->devfn) ||
+		    pci_acs_enabled(tmp, REQ_ACS_FLAGS))
+			continue;
+
+		group = iommu_group_get(&tmp->dev);
+		if (group) {
+			pci_dev_put(tmp);
+			return group;
+		}
+	}
+
+	/* No shared group found, allocate new */
+	return iommu_group_alloc();
+}
+
+/**
+ * iommu_group_get_for_dev - Find or create the IOMMU group for a device
+ * @dev: target device
+ *
+ * This function is intended to be called by IOMMU drivers and extended to
+ * support common, bus-defined algorithms when determining or creating the
+ * IOMMU group for a device.  On success, the caller will hold a reference
+ * to the returned IOMMU group, which will already include the provided
+ * device.  The reference should be released with iommu_group_put().
+ */
+struct iommu_group *iommu_group_get_for_dev(struct device *dev)
+{
+	struct iommu_group *group = ERR_PTR(-EIO);
+	int ret;
+
+	group = iommu_group_get(dev);
+	if (group)
+		return group;
+
+	if (dev_is_pci(dev))
+		group = iommu_group_get_for_pci_dev(to_pci_dev(dev));
+
+	if (IS_ERR(group))
+		return group;
+
+	ret = iommu_group_add_device(group, dev);
+	if (ret) {
+		iommu_group_put(group);
+		return ERR_PTR(ret);
+	}
+
+	return group;
+}
+
 static int add_iommu_group(struct device *dev, void *data)
 {
 	struct iommu_ops *ops = data;
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index b96a5b2136e4..a2e5843b0a22 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -181,6 +181,7 @@ extern int iommu_group_register_notifier(struct iommu_group *group,
 extern int iommu_group_unregister_notifier(struct iommu_group *group,
 					   struct notifier_block *nb);
 extern int iommu_group_id(struct iommu_group *group);
+extern struct iommu_group *iommu_group_get_for_dev(struct device *dev);
 
 extern int iommu_domain_get_attr(struct iommu_domain *domain, enum iommu_attr,
 				 void *data);
-- 
cgit v1.2.3-59-g8ed1b


From c61959ecbbc6bf9034e65c8e8ef03fa9d1066f05 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Thu, 12 Jun 2014 16:12:24 -0600
Subject: iommu: Add sysfs support for IOMMUs

IOMMUs currently have no common representation to userspace, most
seem to have no representation at all aside from a few printks
on bootup.  There are however features of IOMMUs that are useful
to know about.  For instance the IOMMU might support superpages,
making use of processor large/huge pages more important in a device
assignment scenario.  It's also useful to create cross links between
devices and IOMMU hardware units, so that users might be able to
load balance their devices to avoid thrashing a single hardware unit.

This patch adds a device create and destroy interface as well as
device linking, making it very lightweight for an IOMMU driver to add
basic support.  IOMMU drivers can provide additional attributes
automatically by using an attribute_group.

The attributes exposed are expected to be relatively device specific,
the means to retrieve them certainly are, so there are currently no
common attributes for the new class created here.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 Documentation/ABI/testing/sysfs-class-iommu |  17 ++++
 drivers/iommu/Makefile                      |   1 +
 drivers/iommu/iommu-sysfs.c                 | 133 ++++++++++++++++++++++++++++
 include/linux/iommu.h                       |  26 ++++++
 4 files changed, 177 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-class-iommu
 create mode 100644 drivers/iommu/iommu-sysfs.c

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-class-iommu b/Documentation/ABI/testing/sysfs-class-iommu
new file mode 100644
index 000000000000..6d0a1b4be82d
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-iommu
@@ -0,0 +1,17 @@
+What:		/sys/class/iommu/<iommu>/devices/
+Date:		June 2014
+KernelVersion:	3.17
+Contact:	Alex Williamson <alex.williamson@redhat.com>
+Description:
+		IOMMU drivers are able to link devices managed by a
+		given IOMMU here to allow association of IOMMU to
+		device.
+
+What:		/sys/devices/.../iommu
+Date:		June 2014
+KernelVersion:	3.17
+Contact:	Alex Williamson <alex.williamson@redhat.com>
+Description:
+		IOMMU drivers are able to link the IOMMU for a
+		given device here to allow association of device to
+		IOMMU.
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 8893bad048e0..7788ebd1f6e2 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -1,5 +1,6 @@
 obj-$(CONFIG_IOMMU_API) += iommu.o
 obj-$(CONFIG_IOMMU_API) += iommu-traces.o
+obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o
 obj-$(CONFIG_OF_IOMMU)	+= of_iommu.o
 obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o msm_iommu_dev.o
 obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o
diff --git a/drivers/iommu/iommu-sysfs.c b/drivers/iommu/iommu-sysfs.c
new file mode 100644
index 000000000000..d6939234a009
--- /dev/null
+++ b/drivers/iommu/iommu-sysfs.c
@@ -0,0 +1,133 @@
+/*
+ * IOMMU sysfs class support
+ *
+ * Copyright (C) 2014 Red Hat, Inc.  All rights reserved.
+ *     Author: Alex Williamson <alex.williamson@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/device.h>
+#include <linux/iommu.h>
+#include <linux/module.h>
+
+/*
+ * We provide a common class "devices" group which initially has no attributes.
+ * As devices are added to the IOMMU, we'll add links to the group.
+ */
+static struct attribute *devices_attr[] = {
+	NULL,
+};
+
+static const struct attribute_group iommu_devices_attr_group = {
+	.name = "devices",
+	.attrs = devices_attr,
+};
+
+static const struct attribute_group *iommu_dev_groups[] = {
+	&iommu_devices_attr_group,
+	NULL,
+};
+
+static void iommu_release_device(struct device *dev)
+{
+	kfree(dev);
+}
+
+static struct class iommu_class = {
+	.name = "iommu",
+	.dev_release = iommu_release_device,
+	.dev_groups = iommu_dev_groups,
+};
+
+static int __init iommu_dev_init(void)
+{
+	return class_register(&iommu_class);
+}
+postcore_initcall(iommu_dev_init);
+
+/*
+ * Create an IOMMU device and return a pointer to it.  IOMMU specific
+ * attributes can be provided as an attribute group, allowing a unique
+ * namespace per IOMMU type.
+ */
+struct device *iommu_device_create(struct device *parent, void *drvdata,
+				   const struct attribute_group **groups,
+				   const char *fmt, ...)
+{
+	struct device *dev;
+	va_list vargs;
+	int ret;
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev)
+		return ERR_PTR(-ENOMEM);
+
+	device_initialize(dev);
+
+	dev->class = &iommu_class;
+	dev->parent = parent;
+	dev->groups = groups;
+	dev_set_drvdata(dev, drvdata);
+
+	va_start(vargs, fmt);
+	ret = kobject_set_name_vargs(&dev->kobj, fmt, vargs);
+	va_end(vargs);
+	if (ret)
+		goto error;
+
+	ret = device_add(dev);
+	if (ret)
+		goto error;
+
+	return dev;
+
+error:
+	put_device(dev);
+	return ERR_PTR(ret);
+}
+
+void iommu_device_destroy(struct device *dev)
+{
+	if (!dev || IS_ERR(dev))
+		return;
+
+	device_unregister(dev);
+}
+
+/*
+ * IOMMU drivers can indicate a device is managed by a given IOMMU using
+ * this interface.  A link to the device will be created in the "devices"
+ * directory of the IOMMU device in sysfs and an "iommu" link will be
+ * created under the linked device, pointing back at the IOMMU device.
+ */
+int iommu_device_link(struct device *dev, struct device *link)
+{
+	int ret;
+
+	if (!dev || IS_ERR(dev))
+		return -ENODEV;
+
+	ret = sysfs_add_link_to_group(&dev->kobj, "devices",
+				      &link->kobj, dev_name(link));
+	if (ret)
+		return ret;
+
+	ret = sysfs_create_link_nowarn(&link->kobj, &dev->kobj, "iommu");
+	if (ret)
+		sysfs_remove_link_from_group(&dev->kobj, "devices",
+					     dev_name(link));
+
+	return ret;
+}
+
+void iommu_device_unlink(struct device *dev, struct device *link)
+{
+	if (!dev || IS_ERR(dev))
+		return;
+
+	sysfs_remove_link(&link->kobj, "iommu");
+	sysfs_remove_link_from_group(&dev->kobj, "devices", dev_name(link));
+}
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index a2e5843b0a22..7fd16e3d1f25 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -187,6 +187,12 @@ extern int iommu_domain_get_attr(struct iommu_domain *domain, enum iommu_attr,
 				 void *data);
 extern int iommu_domain_set_attr(struct iommu_domain *domain, enum iommu_attr,
 				 void *data);
+struct device *iommu_device_create(struct device *parent, void *drvdata,
+				   const struct attribute_group **groups,
+				   const char *fmt, ...);
+void iommu_device_destroy(struct device *dev);
+int iommu_device_link(struct device *dev, struct device *link);
+void iommu_device_unlink(struct device *dev, struct device *link);
 
 /* Window handling function prototypes */
 extern int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr,
@@ -397,6 +403,26 @@ static inline int iommu_domain_set_attr(struct iommu_domain *domain,
 	return -EINVAL;
 }
 
+struct device *iommu_device_create(struct device *parent, void *drvdata,
+				   const struct attribute_group **groups,
+				   const char *fmt, ...)
+{
+	return ERR_PTR(-ENODEV);
+}
+
+void iommu_device_destroy(struct device *dev)
+{
+}
+
+int iommu_device_link(struct device *dev, struct device *link)
+{
+	return -EINVAL;
+}
+
+void iommu_device_unlink(struct device *dev, struct device *link)
+{
+}
+
 #endif /* CONFIG_IOMMU_API */
 
 #endif /* __LINUX_IOMMU_H */
-- 
cgit v1.2.3-59-g8ed1b


From a5459cfece880e82778a60e6290ad6c0dd688a06 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Thu, 12 Jun 2014 16:12:31 -0600
Subject: iommu/vt-d: Make use of IOMMU sysfs support

Register our DRHD IOMMUs, cross link devices, and provide a base set
of attributes for the IOMMU.  Note that IRQ remapping support parses
the DMAR table very early in boot, well before the iommu_class can
reasonably be setup, so our registration is split between
intel_iommu_init(), which occurs later, and alloc_iommu(), which
typically occurs much earlier, but may happen at any time later
with IOMMU hot-add support.

On a typical desktop system, this provides the following (pruned):

$ find /sys | grep dmar
/sys/devices/virtual/iommu/dmar0
/sys/devices/virtual/iommu/dmar0/devices
/sys/devices/virtual/iommu/dmar0/devices/0000:00:02.0
/sys/devices/virtual/iommu/dmar0/intel-iommu
/sys/devices/virtual/iommu/dmar0/intel-iommu/cap
/sys/devices/virtual/iommu/dmar0/intel-iommu/ecap
/sys/devices/virtual/iommu/dmar0/intel-iommu/address
/sys/devices/virtual/iommu/dmar0/intel-iommu/version
/sys/devices/virtual/iommu/dmar1
/sys/devices/virtual/iommu/dmar1/devices
/sys/devices/virtual/iommu/dmar1/devices/0000:00:00.0
/sys/devices/virtual/iommu/dmar1/devices/0000:00:01.0
/sys/devices/virtual/iommu/dmar1/devices/0000:00:16.0
/sys/devices/virtual/iommu/dmar1/devices/0000:00:1a.0
/sys/devices/virtual/iommu/dmar1/devices/0000:00:1b.0
/sys/devices/virtual/iommu/dmar1/devices/0000:00:1c.0
...
/sys/devices/virtual/iommu/dmar1/intel-iommu
/sys/devices/virtual/iommu/dmar1/intel-iommu/cap
/sys/devices/virtual/iommu/dmar1/intel-iommu/ecap
/sys/devices/virtual/iommu/dmar1/intel-iommu/address
/sys/devices/virtual/iommu/dmar1/intel-iommu/version
/sys/class/iommu/dmar0
/sys/class/iommu/dmar1

(devices also link back to the dmar units)

This makes address, version, capabilities, and extended capabilities
available, just like printed on boot.  I've tried not to duplicate
data that can be found in the DMAR table, with the exception of the
address, which provides an easy way to associate the sysfs device with
a DRHD entry in the DMAR.  It's tempting to add scopes and RMRR data
here, but the full DMAR table is already exposed under /sys/firmware/
and therefore already provides a way for userspace to learn such
details.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 .../ABI/testing/sysfs-class-iommu-intel-iommu      | 32 +++++++++
 drivers/iommu/dmar.c                               |  9 +++
 drivers/iommu/intel-iommu.c                        | 77 +++++++++++++++++++++-
 include/linux/intel-iommu.h                        |  3 +
 4 files changed, 120 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/ABI/testing/sysfs-class-iommu-intel-iommu

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-class-iommu-intel-iommu b/Documentation/ABI/testing/sysfs-class-iommu-intel-iommu
new file mode 100644
index 000000000000..258cc246d98e
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-iommu-intel-iommu
@@ -0,0 +1,32 @@
+What:		/sys/class/iommu/<iommu>/intel-iommu/address
+Date:		June 2014
+KernelVersion:	3.17
+Contact:	Alex Williamson <alex.williamson@redhat.com>
+Description:
+		Physical address of the VT-d DRHD for this IOMMU.
+		Format: %llx.  This allows association of a sysfs
+		intel-iommu with a DMAR DRHD table entry.
+
+What:		/sys/class/iommu/<iommu>/intel-iommu/cap
+Date:		June 2014
+KernelVersion:	3.17
+Contact:	Alex Williamson <alex.williamson@redhat.com>
+Description:
+		The cached hardware capability register value
+		of this DRHD unit.  Format: %llx.
+
+What:		/sys/class/iommu/<iommu>/intel-iommu/ecap
+Date:		June 2014
+KernelVersion:	3.17
+Contact:	Alex Williamson <alex.williamson@redhat.com>
+Description:
+		The cached hardware extended capability register
+		value of this DRHD unit.  Format: %llx.
+
+What:		/sys/class/iommu/<iommu>/intel-iommu/version
+Date:		June 2014
+KernelVersion:	3.17
+Contact:	Alex Williamson <alex.williamson@redhat.com>
+Description:
+		The architecture version as reported from the
+		VT-d VER_REG.  Format: %d:%d, major:minor
diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 9a4f05e5b23f..6744e2d4ff6f 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -38,6 +38,7 @@
 #include <linux/tboot.h>
 #include <linux/dmi.h>
 #include <linux/slab.h>
+#include <linux/iommu.h>
 #include <asm/irq_remapping.h>
 #include <asm/iommu_table.h>
 
@@ -980,6 +981,12 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
 	raw_spin_lock_init(&iommu->register_lock);
 
 	drhd->iommu = iommu;
+
+	if (intel_iommu_enabled)
+		iommu->iommu_dev = iommu_device_create(NULL, iommu,
+						       intel_iommu_groups,
+						       iommu->name);
+
 	return 0;
 
  err_unmap:
@@ -991,6 +998,8 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
 
 static void free_iommu(struct intel_iommu *iommu)
 {
+	iommu_device_destroy(iommu->iommu_dev);
+
 	if (iommu->irq) {
 		free_irq(iommu->irq, iommu);
 		irq_set_handler_data(iommu->irq, NULL);
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index f38ec7ab7673..f9e5f84ee952 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -3944,6 +3944,63 @@ static struct notifier_block intel_iommu_memory_nb = {
 	.priority = 0
 };
 
+
+static ssize_t intel_iommu_show_version(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct intel_iommu *iommu = dev_get_drvdata(dev);
+	u32 ver = readl(iommu->reg + DMAR_VER_REG);
+	return sprintf(buf, "%d:%d\n",
+		       DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
+}
+static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
+
+static ssize_t intel_iommu_show_address(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct intel_iommu *iommu = dev_get_drvdata(dev);
+	return sprintf(buf, "%llx\n", iommu->reg_phys);
+}
+static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
+
+static ssize_t intel_iommu_show_cap(struct device *dev,
+				    struct device_attribute *attr,
+				    char *buf)
+{
+	struct intel_iommu *iommu = dev_get_drvdata(dev);
+	return sprintf(buf, "%llx\n", iommu->cap);
+}
+static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
+
+static ssize_t intel_iommu_show_ecap(struct device *dev,
+				    struct device_attribute *attr,
+				    char *buf)
+{
+	struct intel_iommu *iommu = dev_get_drvdata(dev);
+	return sprintf(buf, "%llx\n", iommu->ecap);
+}
+static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
+
+static struct attribute *intel_iommu_attrs[] = {
+	&dev_attr_version.attr,
+	&dev_attr_address.attr,
+	&dev_attr_cap.attr,
+	&dev_attr_ecap.attr,
+	NULL,
+};
+
+static struct attribute_group intel_iommu_group = {
+	.name = "intel-iommu",
+	.attrs = intel_iommu_attrs,
+};
+
+const struct attribute_group *intel_iommu_groups[] = {
+	&intel_iommu_group,
+	NULL,
+};
+
 int __init intel_iommu_init(void)
 {
 	int ret = -ENODEV;
@@ -4015,6 +4072,11 @@ int __init intel_iommu_init(void)
 
 	init_iommu_pm_ops();
 
+	for_each_active_iommu(iommu, drhd)
+		iommu->iommu_dev = iommu_device_create(NULL, iommu,
+						       intel_iommu_groups,
+						       iommu->name);
+
 	bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
 	bus_register_notifier(&pci_bus_type, &device_nb);
 	if (si_domain && !hw_pass_through)
@@ -4358,12 +4420,16 @@ static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
 
 static int intel_iommu_add_device(struct device *dev)
 {
+	struct intel_iommu *iommu;
 	struct iommu_group *group;
 	u8 bus, devfn;
 
-	if (!device_to_iommu(dev, &bus, &devfn))
+	iommu = device_to_iommu(dev, &bus, &devfn);
+	if (!iommu)
 		return -ENODEV;
 
+	iommu_device_link(iommu->iommu_dev, dev);
+
 	group = iommu_group_get_for_dev(dev);
 
 	if (IS_ERR(group))
@@ -4375,7 +4441,16 @@ static int intel_iommu_add_device(struct device *dev)
 
 static void intel_iommu_remove_device(struct device *dev)
 {
+	struct intel_iommu *iommu;
+	u8 bus, devfn;
+
+	iommu = device_to_iommu(dev, &bus, &devfn);
+	if (!iommu)
+		return;
+
 	iommu_group_remove_device(dev);
+
+	iommu_device_unlink(iommu->iommu_dev, dev);
 }
 
 static struct iommu_ops intel_iommu_ops = {
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 0a2da5188217..a65208a8fe18 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -336,6 +336,7 @@ struct intel_iommu {
 #ifdef CONFIG_IRQ_REMAP
 	struct ir_table *ir_table;	/* Interrupt remapping info */
 #endif
+	struct device	*iommu_dev; /* IOMMU-sysfs device */
 	int		node;
 };
 
@@ -365,4 +366,6 @@ extern int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
 
 extern int dmar_ir_support(void);
 
+extern const struct attribute_group *intel_iommu_groups[];
+
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 93c6ee94c140eefb6f9d5b6e2ad1acc2e138e44c Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Thu, 3 Jul 2014 07:51:52 +0300
Subject: dma: Support for 3 bytes word size

Add DMA_SLAVE_BUSWIDTH_3_BYTES to dma_slave_buswidth for engines and users
to select 3 bytes as bus width.
For example eDMA can be configured to use 3bytes mode and in audio we have
formats stored on 3bytes in memory (_XXX_3LE) where this new bus width can
be used.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Acked-by: Vinod Koul <vinod.koul@intel.com>
Acked-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 include/linux/dmaengine.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index d2c5cc7c583c..3d1c2aa51530 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -299,6 +299,7 @@ enum dma_slave_buswidth {
 	DMA_SLAVE_BUSWIDTH_UNDEFINED = 0,
 	DMA_SLAVE_BUSWIDTH_1_BYTE = 1,
 	DMA_SLAVE_BUSWIDTH_2_BYTES = 2,
+	DMA_SLAVE_BUSWIDTH_3_BYTES = 3,
 	DMA_SLAVE_BUSWIDTH_4_BYTES = 4,
 	DMA_SLAVE_BUSWIDTH_8_BYTES = 8,
 };
-- 
cgit v1.2.3-59-g8ed1b


From b22f6434cf48af001330e370e9d781aeb668f98c Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Fri, 27 Jun 2014 09:03:12 +0200
Subject: iommu: Constify struct iommu_ops

This structure is read-only data and should never be modified.

Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd_iommu.c       |  4 ++--
 drivers/iommu/arm-smmu.c        |  2 +-
 drivers/iommu/exynos-iommu.c    |  2 +-
 drivers/iommu/fsl_pamu_domain.c |  2 +-
 drivers/iommu/intel-iommu.c     |  4 ++--
 drivers/iommu/iommu.c           | 19 ++++++++++++++-----
 drivers/iommu/ipmmu-vmsa.c      |  2 +-
 drivers/iommu/msm_iommu.c       |  2 +-
 drivers/iommu/omap-iommu.c      |  2 +-
 drivers/iommu/shmobile-iommu.c  |  2 +-
 drivers/iommu/tegra-gart.c      |  2 +-
 drivers/iommu/tegra-smmu.c      |  2 +-
 include/linux/device.h          |  2 +-
 include/linux/iommu.h           |  4 ++--
 14 files changed, 30 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index c8f87a6c9b90..18405314168b 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -80,7 +80,7 @@ LIST_HEAD(hpet_map);
  */
 static struct protection_domain *pt_domain;
 
-static struct iommu_ops amd_iommu_ops;
+static const struct iommu_ops amd_iommu_ops;
 
 static ATOMIC_NOTIFIER_HEAD(ppr_notifier);
 int amd_iommu_max_glx_val = -1;
@@ -3395,7 +3395,7 @@ static int amd_iommu_domain_has_cap(struct iommu_domain *domain,
 	return 0;
 }
 
-static struct iommu_ops amd_iommu_ops = {
+static const struct iommu_ops amd_iommu_ops = {
 	.domain_init = amd_iommu_domain_init,
 	.domain_destroy = amd_iommu_domain_destroy,
 	.attach_dev = amd_iommu_attach_device,
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 1599354e974d..67727294e6b5 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -1609,7 +1609,7 @@ static void arm_smmu_remove_device(struct device *dev)
 	iommu_group_remove_device(dev);
 }
 
-static struct iommu_ops arm_smmu_ops = {
+static const struct iommu_ops arm_smmu_ops = {
 	.domain_init	= arm_smmu_domain_init,
 	.domain_destroy	= arm_smmu_domain_destroy,
 	.attach_dev	= arm_smmu_attach_dev,
diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 99054d2c040d..d037e87a1fe5 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -1170,7 +1170,7 @@ static void exynos_iommu_remove_device(struct device *dev)
 	iommu_group_remove_device(dev);
 }
 
-static struct iommu_ops exynos_iommu_ops = {
+static const struct iommu_ops exynos_iommu_ops = {
 	.domain_init = exynos_iommu_domain_init,
 	.domain_destroy = exynos_iommu_domain_destroy,
 	.attach_dev = exynos_iommu_attach_device,
diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c
index d02d668c6ab1..fd5e9146aee6 100644
--- a/drivers/iommu/fsl_pamu_domain.c
+++ b/drivers/iommu/fsl_pamu_domain.c
@@ -1076,7 +1076,7 @@ static u32 fsl_pamu_get_windows(struct iommu_domain *domain)
 	return dma_domain->win_cnt;
 }
 
-static struct iommu_ops fsl_pamu_ops = {
+static const struct iommu_ops fsl_pamu_ops = {
 	.domain_init	= fsl_pamu_domain_init,
 	.domain_destroy = fsl_pamu_domain_destroy,
 	.attach_dev	= fsl_pamu_attach_device,
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index f9e5f84ee952..41d9a2dcab2e 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -450,7 +450,7 @@ EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
 static DEFINE_SPINLOCK(device_domain_lock);
 static LIST_HEAD(device_domain_list);
 
-static struct iommu_ops intel_iommu_ops;
+static const struct iommu_ops intel_iommu_ops;
 
 static int __init intel_iommu_setup(char *str)
 {
@@ -4453,7 +4453,7 @@ static void intel_iommu_remove_device(struct device *dev)
 	iommu_device_unlink(iommu->iommu_dev, dev);
 }
 
-static struct iommu_ops intel_iommu_ops = {
+static const struct iommu_ops intel_iommu_ops = {
 	.domain_init	= intel_iommu_domain_init,
 	.domain_destroy = intel_iommu_domain_destroy,
 	.attach_dev	= intel_iommu_attach_device,
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index d061c8677a81..169836020208 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -36,6 +36,10 @@ static struct kset *iommu_group_kset;
 static struct ida iommu_group_ida;
 static struct mutex iommu_group_mutex;
 
+struct iommu_callback_data {
+	const struct iommu_ops *ops;
+};
+
 struct iommu_group {
 	struct kobject kobj;
 	struct kobject *devices_kobj;
@@ -698,7 +702,8 @@ struct iommu_group *iommu_group_get_for_dev(struct device *dev)
 
 static int add_iommu_group(struct device *dev, void *data)
 {
-	struct iommu_ops *ops = data;
+	struct iommu_callback_data *cb = data;
+	const struct iommu_ops *ops = cb->ops;
 
 	if (!ops->add_device)
 		return -ENODEV;
@@ -714,7 +719,7 @@ static int iommu_bus_notifier(struct notifier_block *nb,
 			      unsigned long action, void *data)
 {
 	struct device *dev = data;
-	struct iommu_ops *ops = dev->bus->iommu_ops;
+	const struct iommu_ops *ops = dev->bus->iommu_ops;
 	struct iommu_group *group;
 	unsigned long group_action = 0;
 
@@ -767,10 +772,14 @@ static struct notifier_block iommu_bus_nb = {
 	.notifier_call = iommu_bus_notifier,
 };
 
-static void iommu_bus_init(struct bus_type *bus, struct iommu_ops *ops)
+static void iommu_bus_init(struct bus_type *bus, const struct iommu_ops *ops)
 {
+	struct iommu_callback_data cb = {
+		.ops = ops,
+	};
+
 	bus_register_notifier(bus, &iommu_bus_nb);
-	bus_for_each_dev(bus, NULL, ops, add_iommu_group);
+	bus_for_each_dev(bus, NULL, &cb, add_iommu_group);
 }
 
 /**
@@ -786,7 +795,7 @@ static void iommu_bus_init(struct bus_type *bus, struct iommu_ops *ops)
  * is set up. With this function the iommu-driver can set the iommu-ops
  * afterwards.
  */
-int bus_set_iommu(struct bus_type *bus, struct iommu_ops *ops)
+int bus_set_iommu(struct bus_type *bus, const struct iommu_ops *ops)
 {
 	if (bus->iommu_ops != NULL)
 		return -EBUSY;
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index 53cde086e83b..7dab5cbcc775 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -1120,7 +1120,7 @@ static void ipmmu_remove_device(struct device *dev)
 	dev->archdata.iommu = NULL;
 }
 
-static struct iommu_ops ipmmu_ops = {
+static const struct iommu_ops ipmmu_ops = {
 	.domain_init = ipmmu_domain_init,
 	.domain_destroy = ipmmu_domain_destroy,
 	.attach_dev = ipmmu_attach_device,
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index f5ff657f49fa..49f41d6e02f1 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -674,7 +674,7 @@ fail:
 	return 0;
 }
 
-static struct iommu_ops msm_iommu_ops = {
+static const struct iommu_ops msm_iommu_ops = {
 	.domain_init = msm_iommu_domain_init,
 	.domain_destroy = msm_iommu_domain_destroy,
 	.attach_dev = msm_iommu_attach_dev,
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index 895af06a667f..031b2476bcc4 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -1291,7 +1291,7 @@ static void omap_iommu_remove_device(struct device *dev)
 	kfree(arch_data);
 }
 
-static struct iommu_ops omap_iommu_ops = {
+static const struct iommu_ops omap_iommu_ops = {
 	.domain_init	= omap_iommu_domain_init,
 	.domain_destroy	= omap_iommu_domain_destroy,
 	.attach_dev	= omap_iommu_attach_dev,
diff --git a/drivers/iommu/shmobile-iommu.c b/drivers/iommu/shmobile-iommu.c
index 464acda0bbc4..1333e6fb3405 100644
--- a/drivers/iommu/shmobile-iommu.c
+++ b/drivers/iommu/shmobile-iommu.c
@@ -354,7 +354,7 @@ static int shmobile_iommu_add_device(struct device *dev)
 	return 0;
 }
 
-static struct iommu_ops shmobile_iommu_ops = {
+static const struct iommu_ops shmobile_iommu_ops = {
 	.domain_init = shmobile_iommu_domain_init,
 	.domain_destroy = shmobile_iommu_domain_destroy,
 	.attach_dev = shmobile_iommu_attach_device,
diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c
index dba1a9fd5070..b10a8ecede8e 100644
--- a/drivers/iommu/tegra-gart.c
+++ b/drivers/iommu/tegra-gart.c
@@ -309,7 +309,7 @@ static int gart_iommu_domain_has_cap(struct iommu_domain *domain,
 	return 0;
 }
 
-static struct iommu_ops gart_iommu_ops = {
+static const struct iommu_ops gart_iommu_ops = {
 	.domain_init	= gart_iommu_domain_init,
 	.domain_destroy	= gart_iommu_domain_destroy,
 	.attach_dev	= gart_iommu_attach_dev,
diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 605b5b46a903..792da5ea6d12 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -947,7 +947,7 @@ static void smmu_iommu_domain_destroy(struct iommu_domain *domain)
 	dev_dbg(smmu->dev, "smmu_as@%p\n", as);
 }
 
-static struct iommu_ops smmu_iommu_ops = {
+static const struct iommu_ops smmu_iommu_ops = {
 	.domain_init	= smmu_iommu_domain_init,
 	.domain_destroy	= smmu_iommu_domain_destroy,
 	.attach_dev	= smmu_iommu_attach_dev,
diff --git a/include/linux/device.h b/include/linux/device.h
index af424acd393d..75f5bcb8930b 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -124,7 +124,7 @@ struct bus_type {
 
 	const struct dev_pm_ops *pm;
 
-	struct iommu_ops *iommu_ops;
+	const struct iommu_ops *iommu_ops;
 
 	struct subsys_private *p;
 	struct lock_class_key lock_key;
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 7fd16e3d1f25..c7097d7f024c 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -50,7 +50,7 @@ struct iommu_domain_geometry {
 };
 
 struct iommu_domain {
-	struct iommu_ops *ops;
+	const struct iommu_ops *ops;
 	void *priv;
 	iommu_fault_handler_t handler;
 	void *handler_token;
@@ -140,7 +140,7 @@ struct iommu_ops {
 #define IOMMU_GROUP_NOTIFY_UNBIND_DRIVER	5 /* Pre Driver unbind */
 #define IOMMU_GROUP_NOTIFY_UNBOUND_DRIVER	6 /* Post Driver unbind */
 
-extern int bus_set_iommu(struct bus_type *bus, struct iommu_ops *ops);
+extern int bus_set_iommu(struct bus_type *bus, const struct iommu_ops *ops);
 extern bool iommu_present(struct bus_type *bus);
 extern struct iommu_domain *iommu_domain_alloc(struct bus_type *bus);
 extern struct iommu_group *iommu_group_get_by_id(int id);
-- 
cgit v1.2.3-59-g8ed1b


From 2d239c9e92087d5f4f667371ae350db9f76e3191 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <jic23@kernel.org>
Date: Sun, 22 Jun 2014 20:59:00 +0100
Subject: iio:st sensors: remove custom sampling frequence attribute in favour
 of core support.

This allows in kernel client drivers to access this

Signed-off-by: Jonathan Cameron <jic23@kernel.org>
Cc: Denis Ciocca <denis.ciocca@st.com>
Reviewed-by: Hartmut Knaack <knaack.h@gmx.de>
---
 drivers/iio/accel/st_accel_core.c               | 12 ++++++++--
 drivers/iio/common/st_sensors/st_sensors_core.c | 29 -------------------------
 drivers/iio/gyro/st_gyro_core.c                 | 12 ++++++++--
 drivers/iio/magnetometer/st_magn_core.c         | 12 ++++++++--
 drivers/iio/pressure/st_pressure_core.c         | 27 +++++++++++++++++++++--
 include/linux/iio/common/st_sensors.h           | 12 +---------
 6 files changed, 56 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c
index a2abf7c2ce3b..087864854c61 100644
--- a/drivers/iio/accel/st_accel_core.c
+++ b/drivers/iio/accel/st_accel_core.c
@@ -393,6 +393,9 @@ static int st_accel_read_raw(struct iio_dev *indio_dev,
 		*val = 0;
 		*val2 = adata->current_fullscale->gain;
 		return IIO_VAL_INT_PLUS_MICRO;
+	case IIO_CHAN_INFO_SAMP_FREQ:
+		*val = adata->odr;
+		return IIO_VAL_INT;
 	default:
 		return -EINVAL;
 	}
@@ -410,6 +413,13 @@ static int st_accel_write_raw(struct iio_dev *indio_dev,
 	case IIO_CHAN_INFO_SCALE:
 		err = st_sensors_set_fullscale_by_gain(indio_dev, val2);
 		break;
+	case IIO_CHAN_INFO_SAMP_FREQ:
+		if (val2)
+			return -EINVAL;
+		mutex_lock(&indio_dev->mlock);
+		err = st_sensors_set_odr(indio_dev, val);
+		mutex_unlock(&indio_dev->mlock);
+		return err;
 	default:
 		return -EINVAL;
 	}
@@ -417,14 +427,12 @@ static int st_accel_write_raw(struct iio_dev *indio_dev,
 	return err;
 }
 
-static ST_SENSOR_DEV_ATTR_SAMP_FREQ();
 static ST_SENSORS_DEV_ATTR_SAMP_FREQ_AVAIL();
 static ST_SENSORS_DEV_ATTR_SCALE_AVAIL(in_accel_scale_available);
 
 static struct attribute *st_accel_attributes[] = {
 	&iio_dev_attr_sampling_frequency_available.dev_attr.attr,
 	&iio_dev_attr_in_accel_scale_available.dev_attr.attr,
-	&iio_dev_attr_sampling_frequency.dev_attr.attr,
 	NULL,
 };
 
diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c
index e8b932fed70e..30fb6407fad0 100644
--- a/drivers/iio/common/st_sensors/st_sensors_core.c
+++ b/drivers/iio/common/st_sensors/st_sensors_core.c
@@ -463,35 +463,6 @@ read_wai_error:
 }
 EXPORT_SYMBOL(st_sensors_check_device_support);
 
-ssize_t st_sensors_sysfs_get_sampling_frequency(struct device *dev,
-				struct device_attribute *attr, char *buf)
-{
-	struct st_sensor_data *adata = iio_priv(dev_get_drvdata(dev));
-
-	return sprintf(buf, "%d\n", adata->odr);
-}
-EXPORT_SYMBOL(st_sensors_sysfs_get_sampling_frequency);
-
-ssize_t st_sensors_sysfs_set_sampling_frequency(struct device *dev,
-		struct device_attribute *attr, const char *buf, size_t size)
-{
-	int err;
-	unsigned int odr;
-	struct iio_dev *indio_dev = dev_get_drvdata(dev);
-
-	err = kstrtoint(buf, 10, &odr);
-	if (err < 0)
-		goto conversion_error;
-
-	mutex_lock(&indio_dev->mlock);
-	err = st_sensors_set_odr(indio_dev, odr);
-	mutex_unlock(&indio_dev->mlock);
-
-conversion_error:
-	return err < 0 ? err : size;
-}
-EXPORT_SYMBOL(st_sensors_sysfs_set_sampling_frequency);
-
 ssize_t st_sensors_sysfs_sampling_frequency_avail(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
diff --git a/drivers/iio/gyro/st_gyro_core.c b/drivers/iio/gyro/st_gyro_core.c
index ed74a9069989..f156fc6c5c6c 100644
--- a/drivers/iio/gyro/st_gyro_core.c
+++ b/drivers/iio/gyro/st_gyro_core.c
@@ -245,6 +245,9 @@ static int st_gyro_read_raw(struct iio_dev *indio_dev,
 		*val = 0;
 		*val2 = gdata->current_fullscale->gain;
 		return IIO_VAL_INT_PLUS_MICRO;
+	case IIO_CHAN_INFO_SAMP_FREQ:
+		*val = gdata->odr;
+		return IIO_VAL_INT;
 	default:
 		return -EINVAL;
 	}
@@ -262,6 +265,13 @@ static int st_gyro_write_raw(struct iio_dev *indio_dev,
 	case IIO_CHAN_INFO_SCALE:
 		err = st_sensors_set_fullscale_by_gain(indio_dev, val2);
 		break;
+	case IIO_CHAN_INFO_SAMP_FREQ:
+		if (val2)
+			return -EINVAL;
+		mutex_lock(&indio_dev->mlock);
+		err = st_sensors_set_odr(indio_dev, val);
+		mutex_unlock(&indio_dev->mlock);
+		return err;
 	default:
 		err = -EINVAL;
 	}
@@ -269,14 +279,12 @@ static int st_gyro_write_raw(struct iio_dev *indio_dev,
 	return err;
 }
 
-static ST_SENSOR_DEV_ATTR_SAMP_FREQ();
 static ST_SENSORS_DEV_ATTR_SAMP_FREQ_AVAIL();
 static ST_SENSORS_DEV_ATTR_SCALE_AVAIL(in_anglvel_scale_available);
 
 static struct attribute *st_gyro_attributes[] = {
 	&iio_dev_attr_sampling_frequency_available.dev_attr.attr,
 	&iio_dev_attr_in_anglvel_scale_available.dev_attr.attr,
-	&iio_dev_attr_sampling_frequency.dev_attr.attr,
 	NULL,
 };
 
diff --git a/drivers/iio/magnetometer/st_magn_core.c b/drivers/iio/magnetometer/st_magn_core.c
index 240a21dd0c61..a4b64130ac2f 100644
--- a/drivers/iio/magnetometer/st_magn_core.c
+++ b/drivers/iio/magnetometer/st_magn_core.c
@@ -299,6 +299,9 @@ static int st_magn_read_raw(struct iio_dev *indio_dev,
 		else
 			*val2 = mdata->current_fullscale->gain;
 		return IIO_VAL_INT_PLUS_MICRO;
+	case IIO_CHAN_INFO_SAMP_FREQ:
+		*val = mdata->odr;
+		return IIO_VAL_INT;
 	default:
 		return -EINVAL;
 	}
@@ -316,6 +319,13 @@ static int st_magn_write_raw(struct iio_dev *indio_dev,
 	case IIO_CHAN_INFO_SCALE:
 		err = st_sensors_set_fullscale_by_gain(indio_dev, val2);
 		break;
+	case IIO_CHAN_INFO_SAMP_FREQ:
+		if (val2)
+			return -EINVAL;
+		mutex_lock(&indio_dev->mlock);
+		err = st_sensors_set_odr(indio_dev, val);
+		mutex_unlock(&indio_dev->mlock);
+		return err;
 	default:
 		err = -EINVAL;
 	}
@@ -323,14 +333,12 @@ static int st_magn_write_raw(struct iio_dev *indio_dev,
 	return err;
 }
 
-static ST_SENSOR_DEV_ATTR_SAMP_FREQ();
 static ST_SENSORS_DEV_ATTR_SAMP_FREQ_AVAIL();
 static ST_SENSORS_DEV_ATTR_SCALE_AVAIL(in_magn_scale_available);
 
 static struct attribute *st_magn_attributes[] = {
 	&iio_dev_attr_sampling_frequency_available.dev_attr.attr,
 	&iio_dev_attr_in_magn_scale_available.dev_attr.attr,
-	&iio_dev_attr_sampling_frequency.dev_attr.attr,
 	NULL,
 };
 
diff --git a/drivers/iio/pressure/st_pressure_core.c b/drivers/iio/pressure/st_pressure_core.c
index cd7e01f3a93b..473d914ef470 100644
--- a/drivers/iio/pressure/st_pressure_core.c
+++ b/drivers/iio/pressure/st_pressure_core.c
@@ -307,6 +307,27 @@ static const struct st_sensors st_press_sensors[] = {
 	},
 };
 
+static int st_press_write_raw(struct iio_dev *indio_dev,
+			      struct iio_chan_spec const *ch,
+			      int val,
+			      int val2,
+			      long mask)
+{
+	int err;
+
+	switch (mask) {
+	case IIO_CHAN_INFO_SAMP_FREQ:
+		if (val2)
+			return -EINVAL;
+		mutex_lock(&indio_dev->mlock);
+		err = st_sensors_set_odr(indio_dev, val);
+		mutex_unlock(&indio_dev->mlock);
+		return err;
+	default:
+		return -EINVAL;
+	}
+}
+
 static int st_press_read_raw(struct iio_dev *indio_dev,
 			struct iio_chan_spec const *ch, int *val,
 							int *val2, long mask)
@@ -349,6 +370,9 @@ static int st_press_read_raw(struct iio_dev *indio_dev,
 		}
 
 		return IIO_VAL_FRACTIONAL;
+	case IIO_CHAN_INFO_SAMP_FREQ:
+		*val = pdata->odr;
+		return IIO_VAL_INT;
 	default:
 		return -EINVAL;
 	}
@@ -357,12 +381,10 @@ read_error:
 	return err;
 }
 
-static ST_SENSOR_DEV_ATTR_SAMP_FREQ();
 static ST_SENSORS_DEV_ATTR_SAMP_FREQ_AVAIL();
 
 static struct attribute *st_press_attributes[] = {
 	&iio_dev_attr_sampling_frequency_available.dev_attr.attr,
-	&iio_dev_attr_sampling_frequency.dev_attr.attr,
 	NULL,
 };
 
@@ -374,6 +396,7 @@ static const struct iio_info press_info = {
 	.driver_module = THIS_MODULE,
 	.attrs = &st_press_attribute_group,
 	.read_raw = &st_press_read_raw,
+	.write_raw = &st_press_write_raw,
 };
 
 #ifdef CONFIG_IIO_TRIGGER
diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h
index 96f51f0e0096..d8257ab60bac 100644
--- a/include/linux/iio/common/st_sensors.h
+++ b/include/linux/iio/common/st_sensors.h
@@ -47,6 +47,7 @@
 	.type = device_type, \
 	.modified = mod, \
 	.info_mask_separate = mask, \
+	.info_mask_shared_by_all = BIT(IIO_CHAN_INFO_SAMP_FREQ), \
 	.scan_index = index, \
 	.channel2 = ch2, \
 	.address = addr, \
@@ -59,11 +60,6 @@
 	}, \
 }
 
-#define ST_SENSOR_DEV_ATTR_SAMP_FREQ() \
-		IIO_DEV_ATTR_SAMP_FREQ(S_IWUSR | S_IRUGO, \
-			st_sensors_sysfs_get_sampling_frequency, \
-			st_sensors_sysfs_set_sampling_frequency)
-
 #define ST_SENSORS_DEV_ATTR_SAMP_FREQ_AVAIL() \
 		IIO_DEV_ATTR_SAMP_FREQ_AVAIL( \
 			st_sensors_sysfs_sampling_frequency_avail)
@@ -285,12 +281,6 @@ int st_sensors_read_info_raw(struct iio_dev *indio_dev,
 int st_sensors_check_device_support(struct iio_dev *indio_dev,
 			int num_sensors_list, const struct st_sensors *sensors);
 
-ssize_t st_sensors_sysfs_get_sampling_frequency(struct device *dev,
-				struct device_attribute *attr, char *buf);
-
-ssize_t st_sensors_sysfs_set_sampling_frequency(struct device *dev,
-		struct device_attribute *attr, const char *buf, size_t size);
-
 ssize_t st_sensors_sysfs_sampling_frequency_avail(struct device *dev,
 				struct device_attribute *attr, char *buf);
 
-- 
cgit v1.2.3-59-g8ed1b


From 82695ef549b5299d3d9e088d6648289bda8ef3d8 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <jic23@kernel.org>
Date: Sun, 22 Jun 2014 20:59:00 +0100
Subject: iio: adis: Switch sampling frequency attr to core support.

By using the info_mask_shared_by_all element of the channel spec, acce
to the sampling frequency becomes available to in kernel users of the
driver.  It also shortens and simplifies the code.

This particular conversion was made more complicated by the shared library
and the fact that a number of the drivers do not actually have support for
setting or reading the sampling frequency.  The hardware, in those cases
investigated supports it. It's just never been implemented.

Signed-off-by: Jonathan Cameron <jic23@kernel.org>
Reviewed-by: Hartmut Knaack <knaack.h@gmx.de>
Acked-by: Lars-Peter Clausen <lars@metafoo.de>
---
 drivers/iio/gyro/adis16260.c               | 124 +++++++++++------------------
 drivers/staging/iio/accel/adis16201_core.c |  14 ++--
 drivers/staging/iio/accel/adis16203_core.c |  11 +--
 drivers/staging/iio/accel/adis16204_core.c |  12 +--
 drivers/staging/iio/accel/adis16209_core.c |  18 +++--
 drivers/staging/iio/accel/adis16240_core.c |  12 +--
 include/linux/iio/imu/adis.h               |  33 ++++----
 7 files changed, 99 insertions(+), 125 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/gyro/adis16260.c b/drivers/iio/gyro/adis16260.c
index 22b6fb80fa1a..75fe0edd3d0f 100644
--- a/drivers/iio/gyro/adis16260.c
+++ b/drivers/iio/gyro/adis16260.c
@@ -101,65 +101,6 @@
 #define ADIS16260_SCAN_TEMP	3
 #define ADIS16260_SCAN_ANGL	4
 
-static ssize_t adis16260_read_frequency(struct device *dev,
-		struct device_attribute *attr,
-		char *buf)
-{
-	struct iio_dev *indio_dev = dev_to_iio_dev(dev);
-	struct adis *adis = iio_priv(indio_dev);
-	int ret, len = 0;
-	u16 t;
-	int sps;
-	ret = adis_read_reg_16(adis, ADIS16260_SMPL_PRD, &t);
-	if (ret)
-		return ret;
-
-	if (spi_get_device_id(adis->spi)->driver_data) /* If an adis16251 */
-		sps = (t & ADIS16260_SMPL_PRD_TIME_BASE) ? 8 : 256;
-	else
-		sps = (t & ADIS16260_SMPL_PRD_TIME_BASE) ? 66 : 2048;
-	sps /= (t & ADIS16260_SMPL_PRD_DIV_MASK) + 1;
-	len = sprintf(buf, "%d\n", sps);
-	return len;
-}
-
-static ssize_t adis16260_write_frequency(struct device *dev,
-		struct device_attribute *attr,
-		const char *buf,
-		size_t len)
-{
-	struct iio_dev *indio_dev = dev_to_iio_dev(dev);
-	struct adis *adis = iio_priv(indio_dev);
-	unsigned int val;
-	int ret;
-	u8 t;
-
-	ret = kstrtouint(buf, 10, &val);
-	if (ret)
-		return ret;
-
-	mutex_lock(&indio_dev->mlock);
-	if (spi_get_device_id(adis->spi)->driver_data)
-		t = 256 / val;
-	else
-		t = 2048 / val;
-
-	if (t > ADIS16260_SMPL_PRD_DIV_MASK)
-		t = ADIS16260_SMPL_PRD_DIV_MASK;
-	else if (t > 0)
-		t--;
-
-	if (t >= 0x0A)
-		adis->spi->max_speed_hz = ADIS16260_SPI_SLOW;
-	else
-		adis->spi->max_speed_hz = ADIS16260_SPI_FAST;
-	ret = adis_write_reg_8(adis, ADIS16260_SMPL_PRD, t);
-
-	mutex_unlock(&indio_dev->mlock);
-
-	return ret ? ret : len;
-}
-
 /* Power down the device */
 static int adis16260_stop_device(struct iio_dev *indio_dev)
 {
@@ -174,18 +115,19 @@ static int adis16260_stop_device(struct iio_dev *indio_dev)
 	return ret;
 }
 
-static IIO_DEV_ATTR_SAMP_FREQ(S_IWUSR | S_IRUGO,
-		adis16260_read_frequency,
-		adis16260_write_frequency);
-
 static const struct iio_chan_spec adis16260_channels[] = {
 	ADIS_GYRO_CHAN(X, ADIS16260_GYRO_OUT, ADIS16260_SCAN_GYRO,
 		BIT(IIO_CHAN_INFO_CALIBBIAS) |
-		BIT(IIO_CHAN_INFO_CALIBSCALE), 14),
-	ADIS_INCLI_CHAN(X, ADIS16260_ANGL_OUT, ADIS16260_SCAN_ANGL, 0, 14),
-	ADIS_TEMP_CHAN(ADIS16260_TEMP_OUT, ADIS16260_SCAN_TEMP, 12),
-	ADIS_SUPPLY_CHAN(ADIS16260_SUPPLY_OUT, ADIS16260_SCAN_SUPPLY, 12),
-	ADIS_AUX_ADC_CHAN(ADIS16260_AUX_ADC, ADIS16260_SCAN_AUX_ADC, 12),
+		BIT(IIO_CHAN_INFO_CALIBSCALE),
+		BIT(IIO_CHAN_INFO_SAMP_FREQ), 14),
+	ADIS_INCLI_CHAN(X, ADIS16260_ANGL_OUT, ADIS16260_SCAN_ANGL, 0,
+		BIT(IIO_CHAN_INFO_SAMP_FREQ), 14),
+	ADIS_TEMP_CHAN(ADIS16260_TEMP_OUT, ADIS16260_SCAN_TEMP,
+		BIT(IIO_CHAN_INFO_SAMP_FREQ), 12),
+	ADIS_SUPPLY_CHAN(ADIS16260_SUPPLY_OUT, ADIS16260_SCAN_SUPPLY,
+		BIT(IIO_CHAN_INFO_SAMP_FREQ), 12),
+	ADIS_AUX_ADC_CHAN(ADIS16260_AUX_ADC, ADIS16260_SCAN_AUX_ADC,
+		BIT(IIO_CHAN_INFO_SAMP_FREQ), 12),
 	IIO_CHAN_SOFT_TIMESTAMP(5),
 };
 
@@ -258,6 +200,20 @@ static int adis16260_read_raw(struct iio_dev *indio_dev,
 
 		*val = val16;
 		return IIO_VAL_INT;
+	case IIO_CHAN_INFO_SAMP_FREQ:
+		ret = adis_read_reg_16(adis, ADIS16260_SMPL_PRD, &val16);
+		if (ret)
+			return ret;
+
+		if (spi_get_device_id(adis->spi)->driver_data)
+		/* If an adis16251 */
+			*val = (val16 & ADIS16260_SMPL_PRD_TIME_BASE) ?
+				8 : 256;
+		else
+			*val = (val16 & ADIS16260_SMPL_PRD_TIME_BASE) ?
+				66 : 2048;
+		*val /= (val16 & ADIS16260_SMPL_PRD_DIV_MASK) + 1;
+		return IIO_VAL_INT;
 	}
 	return -EINVAL;
 }
@@ -269,7 +225,9 @@ static int adis16260_write_raw(struct iio_dev *indio_dev,
 			       long mask)
 {
 	struct adis *adis = iio_priv(indio_dev);
+	int ret;
 	u8 addr;
+	u8 t;
 
 	switch (mask) {
 	case IIO_CHAN_INFO_CALIBBIAS:
@@ -284,21 +242,31 @@ static int adis16260_write_raw(struct iio_dev *indio_dev,
 
 		addr = adis16260_addresses[chan->scan_index][1];
 		return adis_write_reg_16(adis, addr, val);
+	case IIO_CHAN_INFO_SAMP_FREQ:
+		mutex_lock(&indio_dev->mlock);
+		if (spi_get_device_id(adis->spi)->driver_data)
+			t = 256 / val;
+		else
+			t = 2048 / val;
+
+		if (t > ADIS16260_SMPL_PRD_DIV_MASK)
+			t = ADIS16260_SMPL_PRD_DIV_MASK;
+		else if (t > 0)
+			t--;
+
+		if (t >= 0x0A)
+			adis->spi->max_speed_hz = ADIS16260_SPI_SLOW;
+		else
+			adis->spi->max_speed_hz = ADIS16260_SPI_FAST;
+		ret = adis_write_reg_8(adis, ADIS16260_SMPL_PRD, t);
+
+		mutex_unlock(&indio_dev->mlock);
+		return ret;
 	}
 	return -EINVAL;
 }
 
-static struct attribute *adis16260_attributes[] = {
-	&iio_dev_attr_sampling_frequency.dev_attr.attr,
-	NULL
-};
-
-static const struct attribute_group adis16260_attribute_group = {
-	.attrs = adis16260_attributes,
-};
-
 static const struct iio_info adis16260_info = {
-	.attrs = &adis16260_attribute_group,
 	.read_raw = &adis16260_read_raw,
 	.write_raw = &adis16260_write_raw,
 	.update_scan_mode = adis_update_scan_mode,
diff --git a/drivers/staging/iio/accel/adis16201_core.c b/drivers/staging/iio/accel/adis16201_core.c
index 2105576fa77c..50ba1fa7f98a 100644
--- a/drivers/staging/iio/accel/adis16201_core.c
+++ b/drivers/staging/iio/accel/adis16201_core.c
@@ -131,17 +131,17 @@ static int adis16201_write_raw(struct iio_dev *indio_dev,
 }
 
 static const struct iio_chan_spec adis16201_channels[] = {
-	ADIS_SUPPLY_CHAN(ADIS16201_SUPPLY_OUT, ADIS16201_SCAN_SUPPLY, 12),
-	ADIS_TEMP_CHAN(ADIS16201_TEMP_OUT, ADIS16201_SCAN_TEMP, 12),
+  ADIS_SUPPLY_CHAN(ADIS16201_SUPPLY_OUT, ADIS16201_SCAN_SUPPLY, 0, 12),
+  ADIS_TEMP_CHAN(ADIS16201_TEMP_OUT, ADIS16201_SCAN_TEMP, 0, 12),
 	ADIS_ACCEL_CHAN(X, ADIS16201_XACCL_OUT, ADIS16201_SCAN_ACC_X,
-		BIT(IIO_CHAN_INFO_CALIBBIAS), 14),
+		BIT(IIO_CHAN_INFO_CALIBBIAS), 0, 14),
 	ADIS_ACCEL_CHAN(Y, ADIS16201_YACCL_OUT, ADIS16201_SCAN_ACC_Y,
-		BIT(IIO_CHAN_INFO_CALIBBIAS), 14),
-	ADIS_AUX_ADC_CHAN(ADIS16201_AUX_ADC, ADIS16201_SCAN_AUX_ADC, 12),
+		BIT(IIO_CHAN_INFO_CALIBBIAS), 0, 14),
+	ADIS_AUX_ADC_CHAN(ADIS16201_AUX_ADC, ADIS16201_SCAN_AUX_ADC, 0, 12),
 	ADIS_INCLI_CHAN(X, ADIS16201_XINCL_OUT, ADIS16201_SCAN_INCLI_X,
-		BIT(IIO_CHAN_INFO_CALIBBIAS), 14),
+		BIT(IIO_CHAN_INFO_CALIBBIAS), 0, 14),
 	ADIS_INCLI_CHAN(X, ADIS16201_YINCL_OUT, ADIS16201_SCAN_INCLI_Y,
-		BIT(IIO_CHAN_INFO_CALIBBIAS), 14),
+		BIT(IIO_CHAN_INFO_CALIBBIAS), 0, 14),
 	IIO_CHAN_SOFT_TIMESTAMP(7)
 };
 
diff --git a/drivers/staging/iio/accel/adis16203_core.c b/drivers/staging/iio/accel/adis16203_core.c
index 409a28ed9043..f472137b0069 100644
--- a/drivers/staging/iio/accel/adis16203_core.c
+++ b/drivers/staging/iio/accel/adis16203_core.c
@@ -99,13 +99,14 @@ static int adis16203_read_raw(struct iio_dev *indio_dev,
 }
 
 static const struct iio_chan_spec adis16203_channels[] = {
-	ADIS_SUPPLY_CHAN(ADIS16203_SUPPLY_OUT, ADIS16203_SCAN_SUPPLY, 12),
-	ADIS_AUX_ADC_CHAN(ADIS16203_AUX_ADC, ADIS16203_SCAN_AUX_ADC, 12),
+	ADIS_SUPPLY_CHAN(ADIS16203_SUPPLY_OUT, ADIS16203_SCAN_SUPPLY, 0, 12),
+	ADIS_AUX_ADC_CHAN(ADIS16203_AUX_ADC, ADIS16203_SCAN_AUX_ADC, 0, 12),
 	ADIS_INCLI_CHAN(X, ADIS16203_XINCL_OUT, ADIS16203_SCAN_INCLI_X,
-		BIT(IIO_CHAN_INFO_CALIBBIAS), 14),
+		BIT(IIO_CHAN_INFO_CALIBBIAS), 0, 14),
 	/* Fixme: Not what it appears to be - see data sheet */
-	ADIS_INCLI_CHAN(Y, ADIS16203_YINCL_OUT, ADIS16203_SCAN_INCLI_Y, 0, 14),
-	ADIS_TEMP_CHAN(ADIS16203_TEMP_OUT, ADIS16203_SCAN_TEMP, 12),
+	ADIS_INCLI_CHAN(Y, ADIS16203_YINCL_OUT, ADIS16203_SCAN_INCLI_Y,
+		0, 0, 14),
+	ADIS_TEMP_CHAN(ADIS16203_TEMP_OUT, ADIS16203_SCAN_TEMP, 0, 12),
 	IIO_CHAN_SOFT_TIMESTAMP(5),
 };
 
diff --git a/drivers/staging/iio/accel/adis16204_core.c b/drivers/staging/iio/accel/adis16204_core.c
index b8ea76857cd6..19eaebc77d7a 100644
--- a/drivers/staging/iio/accel/adis16204_core.c
+++ b/drivers/staging/iio/accel/adis16204_core.c
@@ -136,15 +136,15 @@ static int adis16204_write_raw(struct iio_dev *indio_dev,
 }
 
 static const struct iio_chan_spec adis16204_channels[] = {
-	ADIS_SUPPLY_CHAN(ADIS16204_SUPPLY_OUT, ADIS16204_SCAN_SUPPLY, 12),
-	ADIS_AUX_ADC_CHAN(ADIS16204_AUX_ADC, ADIS16204_SCAN_AUX_ADC, 12),
-	ADIS_TEMP_CHAN(ADIS16204_TEMP_OUT, ADIS16204_SCAN_TEMP, 12),
+	ADIS_SUPPLY_CHAN(ADIS16204_SUPPLY_OUT, ADIS16204_SCAN_SUPPLY, 0, 12),
+	ADIS_AUX_ADC_CHAN(ADIS16204_AUX_ADC, ADIS16204_SCAN_AUX_ADC, 0, 12),
+	ADIS_TEMP_CHAN(ADIS16204_TEMP_OUT, ADIS16204_SCAN_TEMP, 0, 12),
 	ADIS_ACCEL_CHAN(X, ADIS16204_XACCL_OUT, ADIS16204_SCAN_ACC_X,
-		BIT(IIO_CHAN_INFO_CALIBBIAS) | BIT(IIO_CHAN_INFO_PEAK), 14),
+		BIT(IIO_CHAN_INFO_CALIBBIAS) | BIT(IIO_CHAN_INFO_PEAK), 0, 14),
 	ADIS_ACCEL_CHAN(Y, ADIS16204_YACCL_OUT, ADIS16204_SCAN_ACC_Y,
-		BIT(IIO_CHAN_INFO_CALIBBIAS) | BIT(IIO_CHAN_INFO_PEAK), 14),
+		BIT(IIO_CHAN_INFO_CALIBBIAS) | BIT(IIO_CHAN_INFO_PEAK), 0, 14),
 	ADIS_ACCEL_CHAN(ROOT_SUM_SQUARED_X_Y, ADIS16204_XY_RSS_OUT,
-		ADIS16204_SCAN_ACC_XY, BIT(IIO_CHAN_INFO_PEAK), 14),
+		ADIS16204_SCAN_ACC_XY, BIT(IIO_CHAN_INFO_PEAK), 0, 14),
 	IIO_CHAN_SOFT_TIMESTAMP(5),
 };
 
diff --git a/drivers/staging/iio/accel/adis16209_core.c b/drivers/staging/iio/accel/adis16209_core.c
index 4492e51d8886..374dc6edbcf5 100644
--- a/drivers/staging/iio/accel/adis16209_core.c
+++ b/drivers/staging/iio/accel/adis16209_core.c
@@ -130,16 +130,18 @@ static int adis16209_read_raw(struct iio_dev *indio_dev,
 }
 
 static const struct iio_chan_spec adis16209_channels[] = {
-	ADIS_SUPPLY_CHAN(ADIS16209_SUPPLY_OUT, ADIS16209_SCAN_SUPPLY, 14),
-	ADIS_TEMP_CHAN(ADIS16209_TEMP_OUT, ADIS16209_SCAN_TEMP, 12),
+	ADIS_SUPPLY_CHAN(ADIS16209_SUPPLY_OUT, ADIS16209_SCAN_SUPPLY, 0, 14),
+	ADIS_TEMP_CHAN(ADIS16209_TEMP_OUT, ADIS16209_SCAN_TEMP, 0, 12),
 	ADIS_ACCEL_CHAN(X, ADIS16209_XACCL_OUT, ADIS16209_SCAN_ACC_X,
-		BIT(IIO_CHAN_INFO_CALIBBIAS), 14),
+		BIT(IIO_CHAN_INFO_CALIBBIAS), 0, 14),
 	ADIS_ACCEL_CHAN(Y, ADIS16209_YACCL_OUT, ADIS16209_SCAN_ACC_Y,
-		BIT(IIO_CHAN_INFO_CALIBBIAS), 14),
-	ADIS_AUX_ADC_CHAN(ADIS16209_AUX_ADC, ADIS16209_SCAN_AUX_ADC, 12),
-	ADIS_INCLI_CHAN(X, ADIS16209_XINCL_OUT, ADIS16209_SCAN_INCLI_X, 0, 14),
-	ADIS_INCLI_CHAN(Y, ADIS16209_YINCL_OUT, ADIS16209_SCAN_INCLI_Y, 0, 14),
-	ADIS_ROT_CHAN(X, ADIS16209_ROT_OUT, ADIS16209_SCAN_ROT, 0, 14),
+		BIT(IIO_CHAN_INFO_CALIBBIAS), 0, 14),
+	ADIS_AUX_ADC_CHAN(ADIS16209_AUX_ADC, ADIS16209_SCAN_AUX_ADC, 0, 12),
+	ADIS_INCLI_CHAN(X, ADIS16209_XINCL_OUT, ADIS16209_SCAN_INCLI_X,
+		0, 0, 14),
+	ADIS_INCLI_CHAN(Y, ADIS16209_YINCL_OUT, ADIS16209_SCAN_INCLI_Y,
+		0, 0, 14),
+	ADIS_ROT_CHAN(X, ADIS16209_ROT_OUT, ADIS16209_SCAN_ROT, 0, 0, 14),
 	IIO_CHAN_SOFT_TIMESTAMP(8)
 };
 
diff --git a/drivers/staging/iio/accel/adis16240_core.c b/drivers/staging/iio/accel/adis16240_core.c
index 3a303a03d028..74ace2a8769d 100644
--- a/drivers/staging/iio/accel/adis16240_core.c
+++ b/drivers/staging/iio/accel/adis16240_core.c
@@ -173,15 +173,15 @@ static int adis16240_write_raw(struct iio_dev *indio_dev,
 }
 
 static const struct iio_chan_spec adis16240_channels[] = {
-	ADIS_SUPPLY_CHAN(ADIS16240_SUPPLY_OUT, ADIS16240_SCAN_SUPPLY, 10),
-	ADIS_AUX_ADC_CHAN(ADIS16240_AUX_ADC, ADIS16240_SCAN_AUX_ADC, 10),
+	ADIS_SUPPLY_CHAN(ADIS16240_SUPPLY_OUT, ADIS16240_SCAN_SUPPLY, 0, 10),
+	ADIS_AUX_ADC_CHAN(ADIS16240_AUX_ADC, ADIS16240_SCAN_AUX_ADC, 0, 10),
 	ADIS_ACCEL_CHAN(X, ADIS16240_XACCL_OUT, ADIS16240_SCAN_ACC_X,
-		BIT(IIO_CHAN_INFO_CALIBBIAS) | BIT(IIO_CHAN_INFO_PEAK), 10),
+		BIT(IIO_CHAN_INFO_CALIBBIAS) | BIT(IIO_CHAN_INFO_PEAK), 0, 10),
 	ADIS_ACCEL_CHAN(Y, ADIS16240_YACCL_OUT, ADIS16240_SCAN_ACC_Y,
-		BIT(IIO_CHAN_INFO_CALIBBIAS) | BIT(IIO_CHAN_INFO_PEAK), 10),
+		BIT(IIO_CHAN_INFO_CALIBBIAS) | BIT(IIO_CHAN_INFO_PEAK), 0, 10),
 	ADIS_ACCEL_CHAN(Z, ADIS16240_ZACCL_OUT, ADIS16240_SCAN_ACC_Z,
-		BIT(IIO_CHAN_INFO_CALIBBIAS) | BIT(IIO_CHAN_INFO_PEAK), 10),
-	ADIS_TEMP_CHAN(ADIS16240_TEMP_OUT, ADIS16240_SCAN_TEMP, 10),
+		BIT(IIO_CHAN_INFO_CALIBBIAS) | BIT(IIO_CHAN_INFO_PEAK), 0, 10),
+	ADIS_TEMP_CHAN(ADIS16240_TEMP_OUT, ADIS16240_SCAN_TEMP, 0, 10),
 	IIO_CHAN_SOFT_TIMESTAMP(6)
 };
 
diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h
index b665dc7f017b..fa2d01ef8f55 100644
--- a/include/linux/iio/imu/adis.h
+++ b/include/linux/iio/imu/adis.h
@@ -157,13 +157,14 @@ int adis_single_conversion(struct iio_dev *indio_dev,
 	const struct iio_chan_spec *chan, unsigned int error_mask,
 	int *val);
 
-#define ADIS_VOLTAGE_CHAN(addr, si, chan, name, bits) { \
+#define ADIS_VOLTAGE_CHAN(addr, si, chan, name, info_all, bits) { \
 	.type = IIO_VOLTAGE, \
 	.indexed = 1, \
 	.channel = (chan), \
 	.extend_name = name, \
 	.info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | \
 		BIT(IIO_CHAN_INFO_SCALE), \
+	.info_mask_shared_by_all = info_all, \
 	.address = (addr), \
 	.scan_index = (si), \
 	.scan_type = { \
@@ -174,19 +175,20 @@ int adis_single_conversion(struct iio_dev *indio_dev,
 	}, \
 }
 
-#define ADIS_SUPPLY_CHAN(addr, si, bits) \
-	ADIS_VOLTAGE_CHAN(addr, si, 0, "supply", bits)
+#define ADIS_SUPPLY_CHAN(addr, si, info_all, bits) \
+	ADIS_VOLTAGE_CHAN(addr, si, 0, "supply", info_all, bits)
 
-#define ADIS_AUX_ADC_CHAN(addr, si, bits) \
-	ADIS_VOLTAGE_CHAN(addr, si, 1, NULL, bits)
+#define ADIS_AUX_ADC_CHAN(addr, si, info_all, bits) \
+	ADIS_VOLTAGE_CHAN(addr, si, 1, NULL, info_all, bits)
 
-#define ADIS_TEMP_CHAN(addr, si, bits) { \
+#define ADIS_TEMP_CHAN(addr, si, info_all, bits) { \
 	.type = IIO_TEMP, \
 	.indexed = 1, \
 	.channel = 0, \
 	.info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | \
 		BIT(IIO_CHAN_INFO_SCALE) | \
 		BIT(IIO_CHAN_INFO_OFFSET), \
+	.info_mask_shared_by_all = info_all, \
 	.address = (addr), \
 	.scan_index = (si), \
 	.scan_type = { \
@@ -197,13 +199,14 @@ int adis_single_conversion(struct iio_dev *indio_dev,
 	}, \
 }
 
-#define ADIS_MOD_CHAN(_type, mod, addr, si, info_sep, bits) { \
+#define ADIS_MOD_CHAN(_type, mod, addr, si, info_sep, info_all, bits) { \
 	.type = (_type), \
 	.modified = 1, \
 	.channel2 = IIO_MOD_ ## mod, \
 	.info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | \
 		 info_sep, \
 	.info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE), \
+	.info_mask_shared_by_all = info_all, \
 	.address = (addr), \
 	.scan_index = (si), \
 	.scan_type = { \
@@ -214,17 +217,17 @@ int adis_single_conversion(struct iio_dev *indio_dev,
 	}, \
 }
 
-#define ADIS_ACCEL_CHAN(mod, addr, si, info_sep, bits) \
-	ADIS_MOD_CHAN(IIO_ACCEL, mod, addr, si, info_sep, bits)
+#define ADIS_ACCEL_CHAN(mod, addr, si, info_sep, info_all, bits) \
+	ADIS_MOD_CHAN(IIO_ACCEL, mod, addr, si, info_sep, info_all, bits)
 
-#define ADIS_GYRO_CHAN(mod, addr, si, info_sep, bits) \
-	ADIS_MOD_CHAN(IIO_ANGL_VEL, mod, addr, si, info_sep, bits)
+#define ADIS_GYRO_CHAN(mod, addr, si, info_sep, info_all, bits)		\
+	ADIS_MOD_CHAN(IIO_ANGL_VEL, mod, addr, si, info_sep, info_all, bits)
 
-#define ADIS_INCLI_CHAN(mod, addr, si, info_sep, bits) \
-	ADIS_MOD_CHAN(IIO_INCLI, mod, addr, si, info_sep, bits)
+#define ADIS_INCLI_CHAN(mod, addr, si, info_sep, info_all, bits) \
+	ADIS_MOD_CHAN(IIO_INCLI, mod, addr, si, info_sep, info_all, bits)
 
-#define ADIS_ROT_CHAN(mod, addr, si, info_sep, bits) \
-	ADIS_MOD_CHAN(IIO_ROT, mod, addr, si, info_sep, bits)
+#define ADIS_ROT_CHAN(mod, addr, si, info_sep, info_all, bits) \
+	ADIS_MOD_CHAN(IIO_ROT, mod, addr, si, info_sep, info_all, bits)
 
 #ifdef CONFIG_IIO_ADIS_LIB_BUFFER
 
-- 
cgit v1.2.3-59-g8ed1b


From 75f353b61342b5847c7f6d8499fd6301dce09845 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@linaro.org>
Date: Tue, 24 Jun 2014 16:13:47 +0100
Subject: of/platform: Fix of_platform_device_destroy iteration of devices

of_platform_destroy does not work properly, since the tree
population test was iterating on all devices having as its parent
the given platform device.

The check was intended to check whether any other platform or amba
devices created by of_platform_populate were still populated, but
instead checked for every kind of device. This is wrong, since platform
devices typically create a subsystem regular device and set themselves
as parents.

Instead, go ahead and call the unregister functions for any devices
created with of_platform_populate. The driver core will take care of
unbinding drivers, and drivers are responsible for getting rid of any
child devices that weren't created by of_platform_populate.

Signed-off-by: Grant Likely <grant.likely@linaro.org>
Signed-off-by: Pantelis Antoniou <pantelis.antoniou@konsulko.com>
---
 drivers/of/platform.c       | 32 +++++++++-----------------------
 include/linux/of.h          |  1 +
 include/linux/of_platform.h |  7 ++-----
 3 files changed, 12 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index 500436f9be7f..0197725e033a 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -422,6 +422,7 @@ static int of_platform_bus_create(struct device_node *bus,
 			break;
 		}
 	}
+	of_node_set_flag(bus, OF_POPULATED_BUS);
 	return rc;
 }
 
@@ -508,19 +509,13 @@ EXPORT_SYMBOL_GPL(of_platform_populate);
 
 static int of_platform_device_destroy(struct device *dev, void *data)
 {
-	bool *children_left = data;
-
 	/* Do not touch devices not populated from the device tree */
-	if (!dev->of_node || !of_node_check_flag(dev->of_node, OF_POPULATED)) {
-		*children_left = true;
+	if (!dev->of_node || !of_node_check_flag(dev->of_node, OF_POPULATED))
 		return 0;
-	}
 
-	/* Recurse, but don't touch this device if it has any children left */
-	if (of_platform_depopulate(dev) != 0) {
-		*children_left = true;
-		return 0;
-	}
+	/* Recurse for any nodes that were treated as busses */
+	if (of_node_check_flag(dev->of_node, OF_POPULATED_BUS))
+		device_for_each_child(dev, NULL, of_platform_device_destroy);
 
 	if (dev->bus == &platform_bus_type)
 		platform_device_unregister(to_platform_device(dev));
@@ -528,19 +523,15 @@ static int of_platform_device_destroy(struct device *dev, void *data)
 	else if (dev->bus == &amba_bustype)
 		amba_device_unregister(to_amba_device(dev));
 #endif
-	else {
-		*children_left = true;
-		return 0;
-	}
 
 	of_node_clear_flag(dev->of_node, OF_POPULATED);
-
+	of_node_clear_flag(dev->of_node, OF_POPULATED_BUS);
 	return 0;
 }
 
 /**
  * of_platform_depopulate() - Remove devices populated from device tree
- * @parent: device which childred will be removed
+ * @parent: device which children will be removed
  *
  * Complementary to of_platform_populate(), this function removes children
  * of the given device (and, recurrently, their children) that have been
@@ -550,14 +541,9 @@ static int of_platform_device_destroy(struct device *dev, void *data)
  * Returns 0 when all children devices have been removed or
  * -EBUSY when some children remained.
  */
-int of_platform_depopulate(struct device *parent)
+void of_platform_depopulate(struct device *parent)
 {
-	bool children_left = false;
-
-	device_for_each_child(parent, &children_left,
-			      of_platform_device_destroy);
-
-	return children_left ? -EBUSY : 0;
+	device_for_each_child(parent, NULL, of_platform_device_destroy);
 }
 EXPORT_SYMBOL_GPL(of_platform_depopulate);
 
diff --git a/include/linux/of.h b/include/linux/of.h
index 196b34c1ef4e..abf829a1f150 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -204,6 +204,7 @@ static inline unsigned long of_read_ulong(const __be32 *cell, int size)
 #define OF_DYNAMIC	1 /* node and properties were allocated via kmalloc */
 #define OF_DETACHED	2 /* node has been detached from the device tree */
 #define OF_POPULATED	3 /* device already created for the node */
+#define OF_POPULATED_BUS	4 /* of_platform_populate recursed to children of this node */
 
 #define OF_IS_DYNAMIC(x) test_bit(OF_DYNAMIC, &x->_flags)
 #define OF_MARK_DYNAMIC(x) set_bit(OF_DYNAMIC, &x->_flags)
diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h
index d96e1badbee0..c2b0627a2317 100644
--- a/include/linux/of_platform.h
+++ b/include/linux/of_platform.h
@@ -72,7 +72,7 @@ extern int of_platform_populate(struct device_node *root,
 				const struct of_device_id *matches,
 				const struct of_dev_auxdata *lookup,
 				struct device *parent);
-extern int of_platform_depopulate(struct device *parent);
+extern void of_platform_depopulate(struct device *parent);
 #else
 static inline int of_platform_populate(struct device_node *root,
 					const struct of_device_id *matches,
@@ -81,10 +81,7 @@ static inline int of_platform_populate(struct device_node *root,
 {
 	return -ENODEV;
 }
-static inline int of_platform_depopulate(struct device *parent)
-{
-	return -ENODEV;
-}
+static inline void of_platform_depopulate(struct device *parent) { }
 #endif
 
 #endif	/* _LINUX_OF_PLATFORM_H */
-- 
cgit v1.2.3-59-g8ed1b


From 022ee6c558fc933679e151f00f84332974147fa2 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Thu, 26 Jun 2014 12:09:05 +0200
Subject: efi/x86: Move UEFI Runtime Services wrappers to generic code

In order for other archs (such as arm64) to be able to reuse the virtual
mode function call wrappers, move them to drivers/firmware/efi/runtime-wrappers.c.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/x86/Kconfig                        |   1 +
 arch/x86/platform/efi/efi.c             | 144 +---------------------------
 drivers/firmware/efi/Kconfig            |   3 +
 drivers/firmware/efi/Makefile           |   1 +
 drivers/firmware/efi/runtime-wrappers.c | 161 ++++++++++++++++++++++++++++++++
 include/linux/efi.h                     |   2 +
 6 files changed, 170 insertions(+), 142 deletions(-)
 create mode 100644 drivers/firmware/efi/runtime-wrappers.c

(limited to 'include/linux')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index fcefdda5136d..801ed36c2e49 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1522,6 +1522,7 @@ config EFI
 	bool "EFI runtime service support"
 	depends on ACPI
 	select UCS2_STRING
+	select EFI_RUNTIME_WRAPPERS
 	---help---
 	  This enables the kernel to use EFI runtime services that are
 	  available (such as the EFI variable services).
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index f8524434bf65..135812b593cc 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -88,130 +88,6 @@ static int __init setup_add_efi_memmap(char *arg)
 }
 early_param("add_efi_memmap", setup_add_efi_memmap);
 
-static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
-{
-	unsigned long flags;
-	efi_status_t status;
-
-	spin_lock_irqsave(&rtc_lock, flags);
-	status = efi_call_virt(get_time, tm, tc);
-	spin_unlock_irqrestore(&rtc_lock, flags);
-	return status;
-}
-
-static efi_status_t virt_efi_set_time(efi_time_t *tm)
-{
-	unsigned long flags;
-	efi_status_t status;
-
-	spin_lock_irqsave(&rtc_lock, flags);
-	status = efi_call_virt(set_time, tm);
-	spin_unlock_irqrestore(&rtc_lock, flags);
-	return status;
-}
-
-static efi_status_t virt_efi_get_wakeup_time(efi_bool_t *enabled,
-					     efi_bool_t *pending,
-					     efi_time_t *tm)
-{
-	unsigned long flags;
-	efi_status_t status;
-
-	spin_lock_irqsave(&rtc_lock, flags);
-	status = efi_call_virt(get_wakeup_time, enabled, pending, tm);
-	spin_unlock_irqrestore(&rtc_lock, flags);
-	return status;
-}
-
-static efi_status_t virt_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm)
-{
-	unsigned long flags;
-	efi_status_t status;
-
-	spin_lock_irqsave(&rtc_lock, flags);
-	status = efi_call_virt(set_wakeup_time, enabled, tm);
-	spin_unlock_irqrestore(&rtc_lock, flags);
-	return status;
-}
-
-static efi_status_t virt_efi_get_variable(efi_char16_t *name,
-					  efi_guid_t *vendor,
-					  u32 *attr,
-					  unsigned long *data_size,
-					  void *data)
-{
-	return efi_call_virt(get_variable,
-			     name, vendor, attr,
-			     data_size, data);
-}
-
-static efi_status_t virt_efi_get_next_variable(unsigned long *name_size,
-					       efi_char16_t *name,
-					       efi_guid_t *vendor)
-{
-	return efi_call_virt(get_next_variable,
-			     name_size, name, vendor);
-}
-
-static efi_status_t virt_efi_set_variable(efi_char16_t *name,
-					  efi_guid_t *vendor,
-					  u32 attr,
-					  unsigned long data_size,
-					  void *data)
-{
-	return efi_call_virt(set_variable,
-			     name, vendor, attr,
-			     data_size, data);
-}
-
-static efi_status_t virt_efi_query_variable_info(u32 attr,
-						 u64 *storage_space,
-						 u64 *remaining_space,
-						 u64 *max_variable_size)
-{
-	if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
-		return EFI_UNSUPPORTED;
-
-	return efi_call_virt(query_variable_info, attr, storage_space,
-			     remaining_space, max_variable_size);
-}
-
-static efi_status_t virt_efi_get_next_high_mono_count(u32 *count)
-{
-	return efi_call_virt(get_next_high_mono_count, count);
-}
-
-static void virt_efi_reset_system(int reset_type,
-				  efi_status_t status,
-				  unsigned long data_size,
-				  efi_char16_t *data)
-{
-	__efi_call_virt(reset_system, reset_type, status,
-			data_size, data);
-}
-
-static efi_status_t virt_efi_update_capsule(efi_capsule_header_t **capsules,
-					    unsigned long count,
-					    unsigned long sg_list)
-{
-	if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
-		return EFI_UNSUPPORTED;
-
-	return efi_call_virt(update_capsule, capsules, count, sg_list);
-}
-
-static efi_status_t virt_efi_query_capsule_caps(efi_capsule_header_t **capsules,
-						unsigned long count,
-						u64 *max_size,
-						int *reset_type)
-{
-	if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
-		return EFI_UNSUPPORTED;
-
-	return efi_call_virt(query_capsule_caps, capsules, count, max_size,
-			     reset_type);
-}
-
 static efi_status_t __init phys_efi_set_virtual_address_map(
 	unsigned long memory_map_size,
 	unsigned long descriptor_size,
@@ -721,22 +597,6 @@ void __init old_map_region(efi_memory_desc_t *md)
 		       (unsigned long long)md->phys_addr);
 }
 
-static void native_runtime_setup(void)
-{
-	efi.get_time = virt_efi_get_time;
-	efi.set_time = virt_efi_set_time;
-	efi.get_wakeup_time = virt_efi_get_wakeup_time;
-	efi.set_wakeup_time = virt_efi_set_wakeup_time;
-	efi.get_variable = virt_efi_get_variable;
-	efi.get_next_variable = virt_efi_get_next_variable;
-	efi.set_variable = virt_efi_set_variable;
-	efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count;
-	efi.reset_system = virt_efi_reset_system;
-	efi.query_variable_info = virt_efi_query_variable_info;
-	efi.update_capsule = virt_efi_update_capsule;
-	efi.query_capsule_caps = virt_efi_query_capsule_caps;
-}
-
 /* Merge contiguous regions of the same type and attribute */
 static void __init efi_merge_regions(void)
 {
@@ -923,7 +783,7 @@ static void __init kexec_enter_virtual_mode(void)
 	 */
 	efi.runtime_version = efi_systab.hdr.revision;
 
-	native_runtime_setup();
+	efi_native_runtime_setup();
 
 	efi.set_virtual_address_map = NULL;
 
@@ -1012,7 +872,7 @@ static void __init __efi_enter_virtual_mode(void)
 	efi.runtime_version = efi_systab.hdr.revision;
 
 	if (efi_is_native())
-		native_runtime_setup();
+		efi_native_runtime_setup();
 	else
 		efi_thunk_runtime_setup();
 
diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig
index d420ae2d3413..588dc47e7075 100644
--- a/drivers/firmware/efi/Kconfig
+++ b/drivers/firmware/efi/Kconfig
@@ -54,6 +54,9 @@ config EFI_PARAMS_FROM_FDT
 	  the EFI runtime support gets system table address, memory
           map address, and other parameters from the device tree.
 
+config EFI_RUNTIME_WRAPPERS
+	bool
+
 endmenu
 
 config UEFI_CPER
diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile
index 9553496b0f43..e1096539eedb 100644
--- a/drivers/firmware/efi/Makefile
+++ b/drivers/firmware/efi/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_EFI_VARS)			+= efivars.o
 obj-$(CONFIG_EFI_VARS_PSTORE)		+= efi-pstore.o
 obj-$(CONFIG_UEFI_CPER)			+= cper.o
 obj-$(CONFIG_EFI_RUNTIME_MAP)		+= runtime-map.o
+obj-$(CONFIG_EFI_RUNTIME_WRAPPERS)	+= runtime-wrappers.o
diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c
new file mode 100644
index 000000000000..10daa4bbb258
--- /dev/null
+++ b/drivers/firmware/efi/runtime-wrappers.c
@@ -0,0 +1,161 @@
+/*
+ * runtime-wrappers.c - Runtime Services function call wrappers
+ *
+ * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * Split off from arch/x86/platform/efi/efi.c
+ *
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999-2002 Hewlett-Packard Co.
+ * Copyright (C) 2005-2008 Intel Co.
+ * Copyright (C) 2013 SuSE Labs
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/efi.h>
+#include <linux/spinlock.h>             /* spinlock_t */
+#include <asm/efi.h>
+
+/*
+ * As per commit ef68c8f87ed1 ("x86: Serialize EFI time accesses on rtc_lock"),
+ * the EFI specification requires that callers of the time related runtime
+ * functions serialize with other CMOS accesses in the kernel, as the EFI time
+ * functions may choose to also use the legacy CMOS RTC.
+ */
+__weak DEFINE_SPINLOCK(rtc_lock);
+
+static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
+{
+	unsigned long flags;
+	efi_status_t status;
+
+	spin_lock_irqsave(&rtc_lock, flags);
+	status = efi_call_virt(get_time, tm, tc);
+	spin_unlock_irqrestore(&rtc_lock, flags);
+	return status;
+}
+
+static efi_status_t virt_efi_set_time(efi_time_t *tm)
+{
+	unsigned long flags;
+	efi_status_t status;
+
+	spin_lock_irqsave(&rtc_lock, flags);
+	status = efi_call_virt(set_time, tm);
+	spin_unlock_irqrestore(&rtc_lock, flags);
+	return status;
+}
+
+static efi_status_t virt_efi_get_wakeup_time(efi_bool_t *enabled,
+					     efi_bool_t *pending,
+					     efi_time_t *tm)
+{
+	unsigned long flags;
+	efi_status_t status;
+
+	spin_lock_irqsave(&rtc_lock, flags);
+	status = efi_call_virt(get_wakeup_time, enabled, pending, tm);
+	spin_unlock_irqrestore(&rtc_lock, flags);
+	return status;
+}
+
+static efi_status_t virt_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm)
+{
+	unsigned long flags;
+	efi_status_t status;
+
+	spin_lock_irqsave(&rtc_lock, flags);
+	status = efi_call_virt(set_wakeup_time, enabled, tm);
+	spin_unlock_irqrestore(&rtc_lock, flags);
+	return status;
+}
+
+static efi_status_t virt_efi_get_variable(efi_char16_t *name,
+					  efi_guid_t *vendor,
+					  u32 *attr,
+					  unsigned long *data_size,
+					  void *data)
+{
+	return efi_call_virt(get_variable, name, vendor, attr, data_size, data);
+}
+
+static efi_status_t virt_efi_get_next_variable(unsigned long *name_size,
+					       efi_char16_t *name,
+					       efi_guid_t *vendor)
+{
+	return efi_call_virt(get_next_variable, name_size, name, vendor);
+}
+
+static efi_status_t virt_efi_set_variable(efi_char16_t *name,
+					  efi_guid_t *vendor,
+					  u32 attr,
+					  unsigned long data_size,
+					  void *data)
+{
+	return efi_call_virt(set_variable, name, vendor, attr, data_size, data);
+}
+
+static efi_status_t virt_efi_query_variable_info(u32 attr,
+						 u64 *storage_space,
+						 u64 *remaining_space,
+						 u64 *max_variable_size)
+{
+	if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
+		return EFI_UNSUPPORTED;
+
+	return efi_call_virt(query_variable_info, attr, storage_space,
+			     remaining_space, max_variable_size);
+}
+
+static efi_status_t virt_efi_get_next_high_mono_count(u32 *count)
+{
+	return efi_call_virt(get_next_high_mono_count, count);
+}
+
+static void virt_efi_reset_system(int reset_type,
+				  efi_status_t status,
+				  unsigned long data_size,
+				  efi_char16_t *data)
+{
+	__efi_call_virt(reset_system, reset_type, status, data_size, data);
+}
+
+static efi_status_t virt_efi_update_capsule(efi_capsule_header_t **capsules,
+					    unsigned long count,
+					    unsigned long sg_list)
+{
+	if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
+		return EFI_UNSUPPORTED;
+
+	return efi_call_virt(update_capsule, capsules, count, sg_list);
+}
+
+static efi_status_t virt_efi_query_capsule_caps(efi_capsule_header_t **capsules,
+						unsigned long count,
+						u64 *max_size,
+						int *reset_type)
+{
+	if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
+		return EFI_UNSUPPORTED;
+
+	return efi_call_virt(query_capsule_caps, capsules, count, max_size,
+			     reset_type);
+}
+
+void efi_native_runtime_setup(void)
+{
+	efi.get_time = virt_efi_get_time;
+	efi.set_time = virt_efi_set_time;
+	efi.get_wakeup_time = virt_efi_get_wakeup_time;
+	efi.set_wakeup_time = virt_efi_set_wakeup_time;
+	efi.get_variable = virt_efi_get_variable;
+	efi.get_next_variable = virt_efi_get_next_variable;
+	efi.set_variable = virt_efi_set_variable;
+	efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count;
+	efi.reset_system = virt_efi_reset_system;
+	efi.query_variable_info = virt_efi_query_variable_info;
+	efi.update_capsule = virt_efi_update_capsule;
+	efi.query_capsule_caps = virt_efi_query_capsule_caps;
+}
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 41bbf8ba4ba8..0ceb816bdfc2 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -521,6 +521,8 @@ typedef efi_status_t efi_query_capsule_caps_t(efi_capsule_header_t **capsules,
 					      int *reset_type);
 typedef efi_status_t efi_query_variable_store_t(u32 attributes, unsigned long size);
 
+void efi_native_runtime_setup(void);
+
 /*
  *  EFI Configuration Table and GUID definitions
  */
-- 
cgit v1.2.3-59-g8ed1b


From bd669475d14e3279a7f96ed917a82df5da6ad52d Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Wed, 2 Jul 2014 14:54:42 +0200
Subject: efi: efistub: Refactor stub components

In order to move from the #include "../../../xxxxx.c" anti-pattern used
by both the x86 and arm64 versions of the stub to a static library
linked into either the kernel proper (arm64) or a separate boot
executable (x86), there is some prepatory work required.

This patch does the following:
- move forward declarations of functions shared between the arch
  specific and the generic parts of the stub to include/linux/efi.h
- move forward declarations of functions shared between various .c files
  of the generic stub code to a new local header file called "efistub.h"
- add #includes to all .c files which were formerly relying on the
  #includor to include the correct header files
- remove all static modifiers from functions which will need to be
  externally visible once we move to a static library

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/arm64/kernel/efi-stub.c           | 29 ++++---------
 arch/x86/boot/compressed/eboot.c       | 13 +++---
 drivers/firmware/efi/arm-stub.c        | 32 +++++++++------
 drivers/firmware/efi/efi-stub-helper.c | 74 +++++++++++++++++-----------------
 drivers/firmware/efi/efistub.h         | 42 +++++++++++++++++++
 drivers/firmware/efi/fdt.c             | 20 +++++----
 include/linux/efi.h                    | 42 +++++++++++++++++++
 7 files changed, 164 insertions(+), 88 deletions(-)
 create mode 100644 drivers/firmware/efi/efistub.h

(limited to 'include/linux')

diff --git a/arch/arm64/kernel/efi-stub.c b/arch/arm64/kernel/efi-stub.c
index 23cbde4324b1..e4999021b07d 100644
--- a/arch/arm64/kernel/efi-stub.c
+++ b/arch/arm64/kernel/efi-stub.c
@@ -11,36 +11,21 @@
  */
 #include <linux/efi.h>
 #include <asm/efi.h>
-#include <linux/libfdt.h>
 #include <asm/sections.h>
 
-static void efi_char16_printk(efi_system_table_t *sys_table_arg,
-			      efi_char16_t *str);
-
-static efi_status_t efi_open_volume(efi_system_table_t *sys_table,
-				    void *__image, void **__fh);
-static efi_status_t efi_file_close(void *handle);
-
-static efi_status_t
-efi_file_read(void *handle, unsigned long *size, void *addr);
-
-static efi_status_t
-efi_file_size(efi_system_table_t *sys_table, void *__fh,
-	      efi_char16_t *filename_16, void **handle, u64 *file_sz);
-
 /* Include shared EFI stub code */
 #include "../../../drivers/firmware/efi/efi-stub-helper.c"
 #include "../../../drivers/firmware/efi/fdt.c"
 #include "../../../drivers/firmware/efi/arm-stub.c"
 
 
-static efi_status_t handle_kernel_image(efi_system_table_t *sys_table,
-					unsigned long *image_addr,
-					unsigned long *image_size,
-					unsigned long *reserve_addr,
-					unsigned long *reserve_size,
-					unsigned long dram_base,
-					efi_loaded_image_t *image)
+efi_status_t handle_kernel_image(efi_system_table_t *sys_table,
+				 unsigned long *image_addr,
+				 unsigned long *image_size,
+				 unsigned long *reserve_addr,
+				 unsigned long *reserve_size,
+				 unsigned long dram_base,
+				 efi_loaded_image_t *image)
 {
 	efi_status_t status;
 	unsigned long kernel_size, kernel_memsize = 0;
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index c066bc4e3051..916bbdd7dd28 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -45,8 +45,7 @@ static void setup_boot_services##bits(struct efi_config *c)		\
 BOOT_SERVICES(32);
 BOOT_SERVICES(64);
 
-static void efi_printk(efi_system_table_t *, char *);
-static void efi_char16_printk(efi_system_table_t *, efi_char16_t *);
+void efi_char16_printk(efi_system_table_t *, efi_char16_t *);
 
 static efi_status_t
 __file_size32(void *__fh, efi_char16_t *filename_16,
@@ -153,7 +152,7 @@ grow:
 
 	return status;
 }
-static efi_status_t
+efi_status_t
 efi_file_size(efi_system_table_t *sys_table, void *__fh,
 	      efi_char16_t *filename_16, void **handle, u64 *file_sz)
 {
@@ -163,7 +162,7 @@ efi_file_size(efi_system_table_t *sys_table, void *__fh,
 	return __file_size32(__fh, filename_16, handle, file_sz);
 }
 
-static inline efi_status_t
+efi_status_t
 efi_file_read(void *handle, unsigned long *size, void *addr)
 {
 	unsigned long func;
@@ -181,7 +180,7 @@ efi_file_read(void *handle, unsigned long *size, void *addr)
 	}
 }
 
-static inline efi_status_t efi_file_close(void *handle)
+efi_status_t efi_file_close(void *handle)
 {
 	if (efi_early->is64) {
 		efi_file_handle_64_t *fh = handle;
@@ -246,7 +245,7 @@ static inline efi_status_t __open_volume64(void *__image, void **__fh)
 	return status;
 }
 
-static inline efi_status_t
+efi_status_t
 efi_open_volume(efi_system_table_t *sys_table, void *__image, void **__fh)
 {
 	if (efi_early->is64)
@@ -255,7 +254,7 @@ efi_open_volume(efi_system_table_t *sys_table, void *__image, void **__fh)
 	return __open_volume32(__image, __fh);
 }
 
-static void efi_char16_printk(efi_system_table_t *table, efi_char16_t *str)
+void efi_char16_printk(efi_system_table_t *table, efi_char16_t *str)
 {
 	unsigned long output_string;
 	size_t offset;
diff --git a/drivers/firmware/efi/arm-stub.c b/drivers/firmware/efi/arm-stub.c
index 41114ce03b01..480339b6b110 100644
--- a/drivers/firmware/efi/arm-stub.c
+++ b/drivers/firmware/efi/arm-stub.c
@@ -12,6 +12,11 @@
  *
  */
 
+#include <linux/efi.h>
+#include <asm/efi.h>
+
+#include "efistub.h"
+
 static int __init efi_secureboot_enabled(efi_system_table_t *sys_table_arg)
 {
 	static efi_guid_t const var_guid __initconst = EFI_GLOBAL_VARIABLE_GUID;
@@ -36,8 +41,8 @@ static int __init efi_secureboot_enabled(efi_system_table_t *sys_table_arg)
 	}
 }
 
-static efi_status_t efi_open_volume(efi_system_table_t *sys_table_arg,
-				    void *__image, void **__fh)
+efi_status_t efi_open_volume(efi_system_table_t *sys_table_arg,
+			     void *__image, void **__fh)
 {
 	efi_file_io_interface_t *io;
 	efi_loaded_image_t *image = __image;
@@ -60,14 +65,15 @@ static efi_status_t efi_open_volume(efi_system_table_t *sys_table_arg,
 	*__fh = fh;
 	return status;
 }
-static efi_status_t efi_file_close(void *handle)
+
+efi_status_t efi_file_close(void *handle)
 {
 	efi_file_handle_t *fh = handle;
 
 	return fh->close(handle);
 }
 
-static efi_status_t
+efi_status_t
 efi_file_read(void *handle, unsigned long *size, void *addr)
 {
 	efi_file_handle_t *fh = handle;
@@ -76,7 +82,7 @@ efi_file_read(void *handle, unsigned long *size, void *addr)
 }
 
 
-static efi_status_t
+efi_status_t
 efi_file_size(efi_system_table_t *sys_table_arg, void *__fh,
 	      efi_char16_t *filename_16, void **handle, u64 *file_sz)
 {
@@ -129,7 +135,7 @@ grow:
 
 
-static void efi_char16_printk(efi_system_table_t *sys_table_arg,
+void efi_char16_printk(efi_system_table_t *sys_table_arg,
 			      efi_char16_t *str)
 {
 	struct efi_simple_text_output_protocol *out;
@@ -145,13 +151,13 @@ static void efi_char16_printk(efi_system_table_t *sys_table_arg,
  * must be reserved. On failure it is required to free all
  * all allocations it has made.
  */
-static efi_status_t handle_kernel_image(efi_system_table_t *sys_table,
-					unsigned long *image_addr,
-					unsigned long *image_size,
-					unsigned long *reserve_addr,
-					unsigned long *reserve_size,
-					unsigned long dram_base,
-					efi_loaded_image_t *image);
+efi_status_t handle_kernel_image(efi_system_table_t *sys_table,
+				 unsigned long *image_addr,
+				 unsigned long *image_size,
+				 unsigned long *reserve_addr,
+				 unsigned long *reserve_size,
+				 unsigned long dram_base,
+				 efi_loaded_image_t *image);
 /*
  * EFI entry point for the arm/arm64 EFI stubs.  This is the entrypoint
  * that is described in the PE/COFF header.  Most of the code is the same
diff --git a/drivers/firmware/efi/efi-stub-helper.c b/drivers/firmware/efi/efi-stub-helper.c
index eb6d4be9e722..32d5cca30f49 100644
--- a/drivers/firmware/efi/efi-stub-helper.c
+++ b/drivers/firmware/efi/efi-stub-helper.c
@@ -9,18 +9,20 @@
  * under the terms of the GNU General Public License version 2.
  *
  */
-#define EFI_READ_CHUNK_SIZE	(1024 * 1024)
 
-/* error code which can't be mistaken for valid address */
-#define EFI_ERROR	(~0UL)
+#include <linux/efi.h>
+#include <asm/efi.h>
+
+#include "efistub.h"
 
+#define EFI_READ_CHUNK_SIZE	(1024 * 1024)
 
 struct file_info {
 	efi_file_handle_t *handle;
 	u64 size;
 };
 
-static void efi_printk(efi_system_table_t *sys_table_arg, char *str)
+void efi_printk(efi_system_table_t *sys_table_arg, char *str)
 {
 	char *s8;
 
@@ -37,16 +39,12 @@ static void efi_printk(efi_system_table_t *sys_table_arg, char *str)
 	}
 }
 
-#define pr_efi(sys_table, msg)     efi_printk(sys_table, "EFI stub: "msg)
-#define pr_efi_err(sys_table, msg) efi_printk(sys_table, "EFI stub: ERROR: "msg)
-
-
-static efi_status_t efi_get_memory_map(efi_system_table_t *sys_table_arg,
-				       efi_memory_desc_t **map,
-				       unsigned long *map_size,
-				       unsigned long *desc_size,
-				       u32 *desc_ver,
-				       unsigned long *key_ptr)
+efi_status_t efi_get_memory_map(efi_system_table_t *sys_table_arg,
+				efi_memory_desc_t **map,
+				unsigned long *map_size,
+				unsigned long *desc_size,
+				u32 *desc_ver,
+				unsigned long *key_ptr)
 {
 	efi_memory_desc_t *m = NULL;
 	efi_status_t status;
@@ -88,7 +86,7 @@ fail:
 }
 
 
-static unsigned long __init get_dram_base(efi_system_table_t *sys_table_arg)
+unsigned long __init get_dram_base(efi_system_table_t *sys_table_arg)
 {
 	efi_status_t status;
 	unsigned long map_size;
@@ -116,9 +114,9 @@ static unsigned long __init get_dram_base(efi_system_table_t *sys_table_arg)
 /*
  * Allocate at the highest possible address that is not above 'max'.
  */
-static efi_status_t efi_high_alloc(efi_system_table_t *sys_table_arg,
-			       unsigned long size, unsigned long align,
-			       unsigned long *addr, unsigned long max)
+efi_status_t efi_high_alloc(efi_system_table_t *sys_table_arg,
+			    unsigned long size, unsigned long align,
+			    unsigned long *addr, unsigned long max)
 {
 	unsigned long map_size, desc_size;
 	efi_memory_desc_t *map;
@@ -202,9 +200,9 @@ fail:
 /*
  * Allocate at the lowest possible address.
  */
-static efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg,
-			      unsigned long size, unsigned long align,
-			      unsigned long *addr)
+efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg,
+			   unsigned long size, unsigned long align,
+			   unsigned long *addr)
 {
 	unsigned long map_size, desc_size;
 	efi_memory_desc_t *map;
@@ -271,8 +269,8 @@ fail:
 	return status;
 }
 
-static void efi_free(efi_system_table_t *sys_table_arg, unsigned long size,
-		     unsigned long addr)
+void efi_free(efi_system_table_t *sys_table_arg, unsigned long size,
+	      unsigned long addr)
 {
 	unsigned long nr_pages;
 
@@ -290,12 +288,12 @@ static void efi_free(efi_system_table_t *sys_table_arg, unsigned long size,
  * We only support loading a file from the same filesystem as
  * the kernel image.
  */
-static efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg,
-					 efi_loaded_image_t *image,
-					 char *cmd_line, char *option_string,
-					 unsigned long max_addr,
-					 unsigned long *load_addr,
-					 unsigned long *load_size)
+efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg,
+				  efi_loaded_image_t *image,
+				  char *cmd_line, char *option_string,
+				  unsigned long max_addr,
+				  unsigned long *load_addr,
+				  unsigned long *load_size)
 {
 	struct file_info *files;
 	unsigned long file_addr;
@@ -477,12 +475,12 @@ fail:
  * address is not available the lowest available address will
  * be used.
  */
-static efi_status_t efi_relocate_kernel(efi_system_table_t *sys_table_arg,
-					unsigned long *image_addr,
-					unsigned long image_size,
-					unsigned long alloc_size,
-					unsigned long preferred_addr,
-					unsigned long alignment)
+efi_status_t efi_relocate_kernel(efi_system_table_t *sys_table_arg,
+				 unsigned long *image_addr,
+				 unsigned long image_size,
+				 unsigned long alloc_size,
+				 unsigned long preferred_addr,
+				 unsigned long alignment)
 {
 	unsigned long cur_image_addr;
 	unsigned long new_addr = 0;
@@ -589,9 +587,9 @@ static u8 *efi_utf16_to_utf8(u8 *dst, const u16 *src, int n)
  * Size of memory allocated return in *cmd_line_len.
  * Returns NULL on error.
  */
-static char *efi_convert_cmdline(efi_system_table_t *sys_table_arg,
-				 efi_loaded_image_t *image,
-				 int *cmd_line_len)
+char *efi_convert_cmdline(efi_system_table_t *sys_table_arg,
+			  efi_loaded_image_t *image,
+			  int *cmd_line_len)
 {
 	const u16 *s2;
 	u8 *s1 = NULL;
diff --git a/drivers/firmware/efi/efistub.h b/drivers/firmware/efi/efistub.h
new file mode 100644
index 000000000000..304ab295ca1a
--- /dev/null
+++ b/drivers/firmware/efi/efistub.h
@@ -0,0 +1,42 @@
+
+#ifndef _DRIVERS_FIRMWARE_EFI_EFISTUB_H
+#define _DRIVERS_FIRMWARE_EFI_EFISTUB_H
+
+/* error code which can't be mistaken for valid address */
+#define EFI_ERROR	(~0UL)
+
+void efi_char16_printk(efi_system_table_t *, efi_char16_t *);
+
+efi_status_t efi_open_volume(efi_system_table_t *sys_table_arg, void *__image,
+			     void **__fh);
+
+efi_status_t efi_file_size(efi_system_table_t *sys_table_arg, void *__fh,
+			   efi_char16_t *filename_16, void **handle,
+			   u64 *file_sz);
+
+efi_status_t efi_file_read(void *handle, unsigned long *size, void *addr);
+
+efi_status_t efi_file_close(void *handle);
+
+unsigned long get_dram_base(efi_system_table_t *sys_table_arg);
+
+efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt,
+			unsigned long orig_fdt_size,
+			void *fdt, int new_fdt_size, char *cmdline_ptr,
+			u64 initrd_addr, u64 initrd_size,
+			efi_memory_desc_t *memory_map,
+			unsigned long map_size, unsigned long desc_size,
+			u32 desc_ver);
+
+efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table,
+					    void *handle,
+					    unsigned long *new_fdt_addr,
+					    unsigned long max_addr,
+					    u64 initrd_addr, u64 initrd_size,
+					    char *cmdline_ptr,
+					    unsigned long fdt_addr,
+					    unsigned long fdt_size);
+
+void *get_fdt(efi_system_table_t *sys_table);
+
+#endif
diff --git a/drivers/firmware/efi/fdt.c b/drivers/firmware/efi/fdt.c
index 3aec36d7aae9..86d2934840e2 100644
--- a/drivers/firmware/efi/fdt.c
+++ b/drivers/firmware/efi/fdt.c
@@ -10,13 +10,17 @@
  *
  */
 
-static efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt,
-			       unsigned long orig_fdt_size,
-			       void *fdt, int new_fdt_size, char *cmdline_ptr,
-			       u64 initrd_addr, u64 initrd_size,
-			       efi_memory_desc_t *memory_map,
-			       unsigned long map_size, unsigned long desc_size,
-			       u32 desc_ver)
+#include <linux/efi.h>
+#include <linux/libfdt.h>
+#include <asm/efi.h>
+
+efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt,
+			unsigned long orig_fdt_size,
+			void *fdt, int new_fdt_size, char *cmdline_ptr,
+			u64 initrd_addr, u64 initrd_size,
+			efi_memory_desc_t *memory_map,
+			unsigned long map_size, unsigned long desc_size,
+			u32 desc_ver)
 {
 	int node, prev;
 	int status;
@@ -255,7 +259,7 @@ fail:
 	return EFI_LOAD_ERROR;
 }
 
-static void *get_fdt(efi_system_table_t *sys_table)
+void *get_fdt(efi_system_table_t *sys_table)
 {
 	efi_guid_t fdt_guid = DEVICE_TREE_GUID;
 	efi_config_table_t *tables;
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 0ceb816bdfc2..3a64f2f85821 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1163,4 +1163,46 @@ static inline void
 efi_runtime_map_setup(void *map, int nr_entries, u32 desc_size) {}
 #endif
 
+/* prototypes shared between arch specific and generic stub code */
+
+#define pr_efi(sys_table, msg)     efi_printk(sys_table, "EFI stub: "msg)
+#define pr_efi_err(sys_table, msg) efi_printk(sys_table, "EFI stub: ERROR: "msg)
+
+void efi_printk(efi_system_table_t *sys_table_arg, char *str);
+
+void efi_free(efi_system_table_t *sys_table_arg, unsigned long size,
+	      unsigned long addr);
+
+char *efi_convert_cmdline(efi_system_table_t *sys_table_arg,
+			  efi_loaded_image_t *image, int *cmd_line_len);
+
+efi_status_t efi_get_memory_map(efi_system_table_t *sys_table_arg,
+				efi_memory_desc_t **map,
+				unsigned long *map_size,
+				unsigned long *desc_size,
+				u32 *desc_ver,
+				unsigned long *key_ptr);
+
+efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg,
+			   unsigned long size, unsigned long align,
+			   unsigned long *addr);
+
+efi_status_t efi_high_alloc(efi_system_table_t *sys_table_arg,
+			    unsigned long size, unsigned long align,
+			    unsigned long *addr, unsigned long max);
+
+efi_status_t efi_relocate_kernel(efi_system_table_t *sys_table_arg,
+				 unsigned long *image_addr,
+				 unsigned long image_size,
+				 unsigned long alloc_size,
+				 unsigned long preferred_addr,
+				 unsigned long alignment);
+
+efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg,
+				  efi_loaded_image_t *image,
+				  char *cmd_line, char *option_string,
+				  unsigned long max_addr,
+				  unsigned long *load_addr,
+				  unsigned long *load_size);
+
 #endif /* _LINUX_EFI_H */
-- 
cgit v1.2.3-59-g8ed1b


From f473832fece16611520bf54ad52b16c3f6db0a94 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Sat, 5 Jul 2014 01:10:41 +0200
Subject: bcma: add driver for PCIe Gen 2 core
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

New Broadcom PCIe devices (802.11ac ones?) use Gen2 and have to be
initialized differently.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/bcma/Makefile                  |   1 +
 drivers/bcma/driver_pcie2.c            | 175 +++++++++++++++++++++++++++++++++
 drivers/bcma/main.c                    |   8 ++
 include/linux/bcma/bcma.h              |   2 +
 include/linux/bcma/bcma_driver_pcie2.h | 158 +++++++++++++++++++++++++++++
 5 files changed, 344 insertions(+)
 create mode 100644 drivers/bcma/driver_pcie2.c
 create mode 100644 include/linux/bcma/bcma_driver_pcie2.h

(limited to 'include/linux')

diff --git a/drivers/bcma/Makefile b/drivers/bcma/Makefile
index 734b32f09c0a..91290f7f61b8 100644
--- a/drivers/bcma/Makefile
+++ b/drivers/bcma/Makefile
@@ -3,6 +3,7 @@ bcma-y					+= driver_chipcommon.o driver_chipcommon_pmu.o
 bcma-$(CONFIG_BCMA_SFLASH)		+= driver_chipcommon_sflash.o
 bcma-$(CONFIG_BCMA_NFLASH)		+= driver_chipcommon_nflash.o
 bcma-y					+= driver_pci.o
+bcma-y					+= driver_pcie2.o
 bcma-$(CONFIG_BCMA_DRIVER_PCI_HOSTMODE)	+= driver_pci_host.o
 bcma-$(CONFIG_BCMA_DRIVER_MIPS)		+= driver_mips.o
 bcma-$(CONFIG_BCMA_DRIVER_GMAC_CMN)	+= driver_gmac_cmn.o
diff --git a/drivers/bcma/driver_pcie2.c b/drivers/bcma/driver_pcie2.c
new file mode 100644
index 000000000000..e4be537b0c66
--- /dev/null
+++ b/drivers/bcma/driver_pcie2.c
@@ -0,0 +1,175 @@
+/*
+ * Broadcom specific AMBA
+ * PCIe Gen 2 Core
+ *
+ * Copyright 2014, Broadcom Corporation
+ * Copyright 2014, Rafał Miłecki <zajec5@gmail.com>
+ *
+ * Licensed under the GNU/GPL. See COPYING for details.
+ */
+
+#include "bcma_private.h"
+#include <linux/bcma/bcma.h>
+
+/**************************************************
+ * R/W ops.
+ **************************************************/
+
+#if 0
+static u32 bcma_core_pcie2_cfg_read(struct bcma_drv_pcie2 *pcie2, u32 addr)
+{
+	pcie2_write32(pcie2, BCMA_CORE_PCIE2_CONFIGINDADDR, addr);
+	pcie2_read32(pcie2, BCMA_CORE_PCIE2_CONFIGINDADDR);
+	return pcie2_read32(pcie2, BCMA_CORE_PCIE2_CONFIGINDDATA);
+}
+#endif
+
+static void bcma_core_pcie2_cfg_write(struct bcma_drv_pcie2 *pcie2, u32 addr,
+				      u32 val)
+{
+	pcie2_write32(pcie2, BCMA_CORE_PCIE2_CONFIGINDADDR, addr);
+	pcie2_write32(pcie2, BCMA_CORE_PCIE2_CONFIGINDDATA, val);
+}
+
+/**************************************************
+ * Init.
+ **************************************************/
+
+static u32 bcma_core_pcie2_war_delay_perst_enab(struct bcma_drv_pcie2 *pcie2,
+						bool enable)
+{
+	u32 val;
+
+	/* restore back to default */
+	val = pcie2_read32(pcie2, BCMA_CORE_PCIE2_CLK_CONTROL);
+	val |= PCIE2_CLKC_DLYPERST;
+	val &= ~PCIE2_CLKC_DISSPROMLD;
+	if (enable) {
+		val &= ~PCIE2_CLKC_DLYPERST;
+		val |= PCIE2_CLKC_DISSPROMLD;
+	}
+	pcie2_write32(pcie2, (BCMA_CORE_PCIE2_CLK_CONTROL), val);
+	/* flush */
+	return pcie2_read32(pcie2, BCMA_CORE_PCIE2_CLK_CONTROL);
+}
+
+static void bcma_core_pcie2_set_ltr_vals(struct bcma_drv_pcie2 *pcie2)
+{
+	/* LTR0 */
+	pcie2_write32(pcie2, BCMA_CORE_PCIE2_CONFIGINDADDR, 0x844);
+	pcie2_write32(pcie2, BCMA_CORE_PCIE2_CONFIGINDDATA, 0x883c883c);
+	/* LTR1 */
+	pcie2_write32(pcie2, BCMA_CORE_PCIE2_CONFIGINDADDR, 0x848);
+	pcie2_write32(pcie2, BCMA_CORE_PCIE2_CONFIGINDDATA, 0x88648864);
+	/* LTR2 */
+	pcie2_write32(pcie2, BCMA_CORE_PCIE2_CONFIGINDADDR, 0x84C);
+	pcie2_write32(pcie2, BCMA_CORE_PCIE2_CONFIGINDDATA, 0x90039003);
+}
+
+static void bcma_core_pcie2_hw_ltr_war(struct bcma_drv_pcie2 *pcie2)
+{
+	u8 core_rev = pcie2->core->id.rev;
+	u32 devstsctr2;
+
+	if (core_rev < 2 || core_rev == 10 || core_rev > 13)
+		return;
+
+	pcie2_write32(pcie2, BCMA_CORE_PCIE2_CONFIGINDADDR,
+		      PCIE2_CAP_DEVSTSCTRL2_OFFSET);
+	devstsctr2 = pcie2_read32(pcie2, BCMA_CORE_PCIE2_CONFIGINDDATA);
+	if (devstsctr2 & PCIE2_CAP_DEVSTSCTRL2_LTRENAB) {
+		/* force the right LTR values */
+		bcma_core_pcie2_set_ltr_vals(pcie2);
+
+		/* TODO:
+		si_core_wrapperreg(pcie2, 3, 0x60, 0x8080, 0); */
+
+		/* enable the LTR */
+		devstsctr2 |= PCIE2_CAP_DEVSTSCTRL2_LTRENAB;
+		pcie2_write32(pcie2, BCMA_CORE_PCIE2_CONFIGINDADDR,
+			      PCIE2_CAP_DEVSTSCTRL2_OFFSET);
+		pcie2_write32(pcie2, BCMA_CORE_PCIE2_CONFIGINDDATA, devstsctr2);
+
+		/* set the LTR state to be active */
+		pcie2_write32(pcie2, BCMA_CORE_PCIE2_LTR_STATE,
+			      PCIE2_LTR_ACTIVE);
+		usleep_range(1000, 2000);
+
+		/* set the LTR state to be sleep */
+		pcie2_write32(pcie2, BCMA_CORE_PCIE2_LTR_STATE,
+			      PCIE2_LTR_SLEEP);
+		usleep_range(1000, 2000);
+	}
+}
+
+static void pciedev_crwlpciegen2(struct bcma_drv_pcie2 *pcie2)
+{
+	u8 core_rev = pcie2->core->id.rev;
+	bool pciewar160, pciewar162;
+
+	pciewar160 = core_rev == 7 || core_rev == 9 || core_rev == 11;
+	pciewar162 = core_rev == 5 || core_rev == 7 || core_rev == 8 ||
+		     core_rev == 9 || core_rev == 11;
+
+	if (!pciewar160 && !pciewar162)
+		return;
+
+/* TODO */
+#if 0
+	pcie2_set32(pcie2, BCMA_CORE_PCIE2_CLK_CONTROL,
+		    PCIE_DISABLE_L1CLK_GATING);
+#if 0
+	pcie2_write32(pcie2, BCMA_CORE_PCIE2_CONFIGINDADDR,
+		      PCIEGEN2_COE_PVT_TL_CTRL_0);
+	pcie2_mask32(pcie2, BCMA_CORE_PCIE2_CONFIGINDDATA,
+		     ~(1 << COE_PVT_TL_CTRL_0_PM_DIS_L1_REENTRY_BIT));
+#endif
+#endif
+}
+
+static void pciedev_crwlpciegen2_180(struct bcma_drv_pcie2 *pcie2)
+{
+	pcie2_write32(pcie2, BCMA_CORE_PCIE2_CONFIGINDADDR, PCIE2_PMCR_REFUP);
+	pcie2_set32(pcie2, BCMA_CORE_PCIE2_CONFIGINDDATA, 0x1f);
+}
+
+static void pciedev_crwlpciegen2_182(struct bcma_drv_pcie2 *pcie2)
+{
+	pcie2_write32(pcie2, BCMA_CORE_PCIE2_CONFIGINDADDR, PCIE2_SBMBX);
+	pcie2_write32(pcie2, BCMA_CORE_PCIE2_CONFIGINDDATA, 1 << 0);
+}
+
+static void pciedev_reg_pm_clk_period(struct bcma_drv_pcie2 *pcie2)
+{
+	struct bcma_drv_cc *drv_cc = &pcie2->core->bus->drv_cc;
+	u8 core_rev = pcie2->core->id.rev;
+	u32 alp_khz, pm_value;
+
+	if (core_rev <= 13) {
+		alp_khz = bcma_pmu_get_alp_clock(drv_cc) / 1000;
+		pm_value = (1000000 * 2) / alp_khz;
+		pcie2_write32(pcie2, BCMA_CORE_PCIE2_CONFIGINDADDR,
+			      PCIE2_PVT_REG_PM_CLK_PERIOD);
+		pcie2_write32(pcie2, BCMA_CORE_PCIE2_CONFIGINDDATA, pm_value);
+	}
+}
+
+void bcma_core_pcie2_init(struct bcma_drv_pcie2 *pcie2)
+{
+	struct bcma_chipinfo *ci = &pcie2->core->bus->chipinfo;
+	u32 tmp;
+
+	tmp = pcie2_read32(pcie2, BCMA_CORE_PCIE2_SPROM(54));
+	if ((tmp & 0xe) >> 1 == 2)
+		bcma_core_pcie2_cfg_write(pcie2, 0x4e0, 0x17);
+
+	/* TODO: Do we need pcie_reqsize? */
+
+	if (ci->id == BCMA_CHIP_ID_BCM4360 && ci->rev > 3)
+		bcma_core_pcie2_war_delay_perst_enab(pcie2, true);
+	bcma_core_pcie2_hw_ltr_war(pcie2);
+	pciedev_crwlpciegen2(pcie2);
+	pciedev_reg_pm_clk_period(pcie2);
+	pciedev_crwlpciegen2_180(pcie2);
+	pciedev_crwlpciegen2_182(pcie2);
+}
diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c
index 34ea4c588d36..0ff8d58831ef 100644
--- a/drivers/bcma/main.c
+++ b/drivers/bcma/main.c
@@ -132,6 +132,7 @@ static int bcma_register_cores(struct bcma_bus *bus)
 		case BCMA_CORE_CHIPCOMMON:
 		case BCMA_CORE_PCI:
 		case BCMA_CORE_PCIE:
+		case BCMA_CORE_PCIE2:
 		case BCMA_CORE_MIPS_74K:
 		case BCMA_CORE_4706_MAC_GBIT_COMMON:
 			continue;
@@ -281,6 +282,13 @@ int bcma_bus_register(struct bcma_bus *bus)
 		bcma_core_pci_init(&bus->drv_pci[1]);
 	}
 
+	/* Init PCIe Gen 2 core */
+	core = bcma_find_core_unit(bus, BCMA_CORE_PCIE2, 0);
+	if (core) {
+		bus->drv_pcie2.core = core;
+		bcma_core_pcie2_init(&bus->drv_pcie2);
+	}
+
 	/* Init GBIT MAC COMMON core */
 	core = bcma_find_core(bus, BCMA_CORE_4706_MAC_GBIT_COMMON);
 	if (core) {
diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index 0b3bb16c705a..452286a38b2b 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -6,6 +6,7 @@
 
 #include <linux/bcma/bcma_driver_chipcommon.h>
 #include <linux/bcma/bcma_driver_pci.h>
+#include <linux/bcma/bcma_driver_pcie2.h>
 #include <linux/bcma/bcma_driver_mips.h>
 #include <linux/bcma/bcma_driver_gmac_cmn.h>
 #include <linux/ssb/ssb.h> /* SPROM sharing */
@@ -333,6 +334,7 @@ struct bcma_bus {
 
 	struct bcma_drv_cc drv_cc;
 	struct bcma_drv_pci drv_pci[2];
+	struct bcma_drv_pcie2 drv_pcie2;
 	struct bcma_drv_mips drv_mips;
 	struct bcma_drv_gmac_cmn drv_gmac_cmn;
 
diff --git a/include/linux/bcma/bcma_driver_pcie2.h b/include/linux/bcma/bcma_driver_pcie2.h
new file mode 100644
index 000000000000..5988b05781c3
--- /dev/null
+++ b/include/linux/bcma/bcma_driver_pcie2.h
@@ -0,0 +1,158 @@
+#ifndef LINUX_BCMA_DRIVER_PCIE2_H_
+#define LINUX_BCMA_DRIVER_PCIE2_H_
+
+#define BCMA_CORE_PCIE2_CLK_CONTROL		0x0000
+#define  PCIE2_CLKC_RST_OE			0x0001 /* When set, drives PCI_RESET out to pin */
+#define  PCIE2_CLKC_RST				0x0002 /* Value driven out to pin */
+#define  PCIE2_CLKC_SPERST			0x0004 /* SurvivePeRst */
+#define  PCIE2_CLKC_DISABLE_L1CLK_GATING	0x0010
+#define  PCIE2_CLKC_DLYPERST			0x0100 /* Delay PeRst to CoE Core */
+#define  PCIE2_CLKC_DISSPROMLD			0x0200 /* DisableSpromLoadOnPerst */
+#define  PCIE2_CLKC_WAKE_MODE_L2		0x1000 /* Wake on L2 */
+#define BCMA_CORE_PCIE2_RC_PM_CONTROL		0x0004
+#define BCMA_CORE_PCIE2_RC_PM_STATUS		0x0008
+#define BCMA_CORE_PCIE2_EP_PM_CONTROL		0x000C
+#define BCMA_CORE_PCIE2_EP_PM_STATUS		0x0010
+#define BCMA_CORE_PCIE2_EP_LTR_CONTROL		0x0014
+#define BCMA_CORE_PCIE2_EP_LTR_STATUS		0x0018
+#define BCMA_CORE_PCIE2_EP_OBFF_STATUS		0x001C
+#define BCMA_CORE_PCIE2_PCIE_ERR_STATUS		0x0020
+#define BCMA_CORE_PCIE2_RC_AXI_CONFIG		0x0100
+#define BCMA_CORE_PCIE2_EP_AXI_CONFIG		0x0104
+#define BCMA_CORE_PCIE2_RXDEBUG_STATUS0		0x0108
+#define BCMA_CORE_PCIE2_RXDEBUG_CONTROL0	0x010C
+#define BCMA_CORE_PCIE2_CONFIGINDADDR		0x0120
+#define BCMA_CORE_PCIE2_CONFIGINDDATA		0x0124
+#define BCMA_CORE_PCIE2_MDIOCONTROL		0x0128
+#define BCMA_CORE_PCIE2_MDIOWRDATA		0x012C
+#define BCMA_CORE_PCIE2_MDIORDDATA		0x0130
+#define BCMA_CORE_PCIE2_DATAINTF		0x0180
+#define BCMA_CORE_PCIE2_D2H_INTRLAZY_0		0x0188
+#define BCMA_CORE_PCIE2_H2D_INTRLAZY_0		0x018c
+#define BCMA_CORE_PCIE2_H2D_INTSTAT_0		0x0190
+#define BCMA_CORE_PCIE2_H2D_INTMASK_0		0x0194
+#define BCMA_CORE_PCIE2_D2H_INTSTAT_0		0x0198
+#define BCMA_CORE_PCIE2_D2H_INTMASK_0		0x019c
+#define BCMA_CORE_PCIE2_LTR_STATE		0x01A0 /* Latency Tolerance Reporting */
+#define  PCIE2_LTR_ACTIVE			2
+#define  PCIE2_LTR_ACTIVE_IDLE			1
+#define  PCIE2_LTR_SLEEP			0
+#define  PCIE2_LTR_FINAL_MASK			0x300
+#define  PCIE2_LTR_FINAL_SHIFT			8
+#define BCMA_CORE_PCIE2_PWR_INT_STATUS		0x01A4
+#define BCMA_CORE_PCIE2_PWR_INT_MASK		0x01A8
+#define BCMA_CORE_PCIE2_CFG_ADDR		0x01F8
+#define BCMA_CORE_PCIE2_CFG_DATA		0x01FC
+#define BCMA_CORE_PCIE2_SYS_EQ_PAGE		0x0200
+#define BCMA_CORE_PCIE2_SYS_MSI_PAGE		0x0204
+#define BCMA_CORE_PCIE2_SYS_MSI_INTREN		0x0208
+#define BCMA_CORE_PCIE2_SYS_MSI_CTRL0		0x0210
+#define BCMA_CORE_PCIE2_SYS_MSI_CTRL1		0x0214
+#define BCMA_CORE_PCIE2_SYS_MSI_CTRL2		0x0218
+#define BCMA_CORE_PCIE2_SYS_MSI_CTRL3		0x021C
+#define BCMA_CORE_PCIE2_SYS_MSI_CTRL4		0x0220
+#define BCMA_CORE_PCIE2_SYS_MSI_CTRL5		0x0224
+#define BCMA_CORE_PCIE2_SYS_EQ_HEAD0		0x0250
+#define BCMA_CORE_PCIE2_SYS_EQ_TAIL0		0x0254
+#define BCMA_CORE_PCIE2_SYS_EQ_HEAD1		0x0258
+#define BCMA_CORE_PCIE2_SYS_EQ_TAIL1		0x025C
+#define BCMA_CORE_PCIE2_SYS_EQ_HEAD2		0x0260
+#define BCMA_CORE_PCIE2_SYS_EQ_TAIL2		0x0264
+#define BCMA_CORE_PCIE2_SYS_EQ_HEAD3		0x0268
+#define BCMA_CORE_PCIE2_SYS_EQ_TAIL3		0x026C
+#define BCMA_CORE_PCIE2_SYS_EQ_HEAD4		0x0270
+#define BCMA_CORE_PCIE2_SYS_EQ_TAIL4		0x0274
+#define BCMA_CORE_PCIE2_SYS_EQ_HEAD5		0x0278
+#define BCMA_CORE_PCIE2_SYS_EQ_TAIL5		0x027C
+#define BCMA_CORE_PCIE2_SYS_RC_INTX_EN		0x0330
+#define BCMA_CORE_PCIE2_SYS_RC_INTX_CSR		0x0334
+#define BCMA_CORE_PCIE2_SYS_MSI_REQ		0x0340
+#define BCMA_CORE_PCIE2_SYS_HOST_INTR_EN	0x0344
+#define BCMA_CORE_PCIE2_SYS_HOST_INTR_CSR	0x0348
+#define BCMA_CORE_PCIE2_SYS_HOST_INTR0		0x0350
+#define BCMA_CORE_PCIE2_SYS_HOST_INTR1		0x0354
+#define BCMA_CORE_PCIE2_SYS_HOST_INTR2		0x0358
+#define BCMA_CORE_PCIE2_SYS_HOST_INTR3		0x035C
+#define BCMA_CORE_PCIE2_SYS_EP_INT_EN0		0x0360
+#define BCMA_CORE_PCIE2_SYS_EP_INT_EN1		0x0364
+#define BCMA_CORE_PCIE2_SYS_EP_INT_CSR0		0x0370
+#define BCMA_CORE_PCIE2_SYS_EP_INT_CSR1		0x0374
+#define BCMA_CORE_PCIE2_SPROM(wordoffset)	(0x0800 + ((wordoffset) * 2))
+#define BCMA_CORE_PCIE2_FUNC0_IMAP0_0		0x0C00
+#define BCMA_CORE_PCIE2_FUNC0_IMAP0_1		0x0C04
+#define BCMA_CORE_PCIE2_FUNC0_IMAP0_2		0x0C08
+#define BCMA_CORE_PCIE2_FUNC0_IMAP0_3		0x0C0C
+#define BCMA_CORE_PCIE2_FUNC0_IMAP0_4		0x0C10
+#define BCMA_CORE_PCIE2_FUNC0_IMAP0_5		0x0C14
+#define BCMA_CORE_PCIE2_FUNC0_IMAP0_6		0x0C18
+#define BCMA_CORE_PCIE2_FUNC0_IMAP0_7		0x0C1C
+#define BCMA_CORE_PCIE2_FUNC1_IMAP0_0		0x0C20
+#define BCMA_CORE_PCIE2_FUNC1_IMAP0_1		0x0C24
+#define BCMA_CORE_PCIE2_FUNC1_IMAP0_2		0x0C28
+#define BCMA_CORE_PCIE2_FUNC1_IMAP0_3		0x0C2C
+#define BCMA_CORE_PCIE2_FUNC1_IMAP0_4		0x0C30
+#define BCMA_CORE_PCIE2_FUNC1_IMAP0_5		0x0C34
+#define BCMA_CORE_PCIE2_FUNC1_IMAP0_6		0x0C38
+#define BCMA_CORE_PCIE2_FUNC1_IMAP0_7		0x0C3C
+#define BCMA_CORE_PCIE2_FUNC0_IMAP1		0x0C80
+#define BCMA_CORE_PCIE2_FUNC1_IMAP1		0x0C88
+#define BCMA_CORE_PCIE2_FUNC0_IMAP2		0x0CC0
+#define BCMA_CORE_PCIE2_FUNC1_IMAP2		0x0CC8
+#define BCMA_CORE_PCIE2_IARR0_LOWER		0x0D00
+#define BCMA_CORE_PCIE2_IARR0_UPPER		0x0D04
+#define BCMA_CORE_PCIE2_IARR1_LOWER		0x0D08
+#define BCMA_CORE_PCIE2_IARR1_UPPER		0x0D0C
+#define BCMA_CORE_PCIE2_IARR2_LOWER		0x0D10
+#define BCMA_CORE_PCIE2_IARR2_UPPER		0x0D14
+#define BCMA_CORE_PCIE2_OARR0			0x0D20
+#define BCMA_CORE_PCIE2_OARR1			0x0D28
+#define BCMA_CORE_PCIE2_OARR2			0x0D30
+#define BCMA_CORE_PCIE2_OMAP0_LOWER		0x0D40
+#define BCMA_CORE_PCIE2_OMAP0_UPPER		0x0D44
+#define BCMA_CORE_PCIE2_OMAP1_LOWER		0x0D48
+#define BCMA_CORE_PCIE2_OMAP1_UPPER		0x0D4C
+#define BCMA_CORE_PCIE2_OMAP2_LOWER		0x0D50
+#define BCMA_CORE_PCIE2_OMAP2_UPPER		0x0D54
+#define BCMA_CORE_PCIE2_FUNC1_IARR1_SIZE	0x0D58
+#define BCMA_CORE_PCIE2_FUNC1_IARR2_SIZE	0x0D5C
+#define BCMA_CORE_PCIE2_MEM_CONTROL		0x0F00
+#define BCMA_CORE_PCIE2_MEM_ECC_ERRLOG0		0x0F04
+#define BCMA_CORE_PCIE2_MEM_ECC_ERRLOG1		0x0F08
+#define BCMA_CORE_PCIE2_LINK_STATUS		0x0F0C
+#define BCMA_CORE_PCIE2_STRAP_STATUS		0x0F10
+#define BCMA_CORE_PCIE2_RESET_STATUS		0x0F14
+#define BCMA_CORE_PCIE2_RESETEN_IN_LINKDOWN	0x0F18
+#define BCMA_CORE_PCIE2_MISC_INTR_EN		0x0F1C
+#define BCMA_CORE_PCIE2_TX_DEBUG_CFG		0x0F20
+#define BCMA_CORE_PCIE2_MISC_CONFIG		0x0F24
+#define BCMA_CORE_PCIE2_MISC_STATUS		0x0F28
+#define BCMA_CORE_PCIE2_INTR_EN			0x0F30
+#define BCMA_CORE_PCIE2_INTR_CLEAR		0x0F34
+#define BCMA_CORE_PCIE2_INTR_STATUS		0x0F38
+
+/* PCIE gen2 config regs */
+#define PCIE2_INTSTATUS				0x090
+#define PCIE2_INTMASK				0x094
+#define PCIE2_SBMBX				0x098
+
+#define PCIE2_PMCR_REFUP			0x1814 /* Trefup time */
+
+#define PCIE2_CAP_DEVSTSCTRL2_OFFSET		0xD4
+#define PCIE2_CAP_DEVSTSCTRL2_LTRENAB		0x400
+#define PCIE2_PVT_REG_PM_CLK_PERIOD		0x184c
+
+struct bcma_drv_pcie2 {
+	struct bcma_device *core;
+};
+
+#define pcie2_read16(pcie2, offset)		bcma_read16((pcie2)->core, offset)
+#define pcie2_read32(pcie2, offset)		bcma_read32((pcie2)->core, offset)
+#define pcie2_write16(pcie2, offset, val)	bcma_write16((pcie2)->core, offset, val)
+#define pcie2_write32(pcie2, offset, val)	bcma_write32((pcie2)->core, offset, val)
+
+#define pcie2_set32(pcie2, offset, set)		bcma_set32((pcie2)->core, offset, set)
+#define pcie2_mask32(pcie2, offset, mask)	bcma_mask32((pcie2)->core, offset, mask)
+
+void bcma_core_pcie2_init(struct bcma_drv_pcie2 *pcie2);
+
+#endif /* LINUX_BCMA_DRIVER_PCIE2_H_ */
-- 
cgit v1.2.3-59-g8ed1b


From ae5c6c6d7bcadfbedefb5fc8ff0ebe2bfa83a0a1 Mon Sep 17 00:00:00 2001
From: Stefan Sørensen <stefan.sorensen@spectralink.com>
Date: Fri, 27 Jun 2014 11:59:10 +0200
Subject: ptp: Classify ptp over ip over vlan packets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This extends the ptp bpf to also match ptp over ip over vlan packets. The ptp
classes are changed to orthogonal bitfields representing version, transport
and vlan values to simplify matching.

Signed-off-by: Stefan Sørensen <stefan.sorensen@spectralink.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/cpts.c | 24 ++++++++--------
 drivers/net/phy/dp83640.c      | 46 ++++++++++++++----------------
 include/linux/ptp_classify.h   |  5 ++--
 net/core/ptp_classifier.c      | 64 ++++++++++++++++++++++++++++++++++++++----
 4 files changed, 92 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 6b56f85951e5..ab92f67da035 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -256,23 +256,21 @@ static int cpts_match(struct sk_buff *skb, unsigned int ptp_class,
 		      u16 ts_seqid, u8 ts_msgtype)
 {
 	u16 *seqid;
-	unsigned int offset;
+	unsigned int offset = 0;
 	u8 *msgtype, *data = skb->data;
 
-	switch (ptp_class) {
-	case PTP_CLASS_V1_IPV4:
-	case PTP_CLASS_V2_IPV4:
-		offset = ETH_HLEN + IPV4_HLEN(data) + UDP_HLEN;
-		break;
-	case PTP_CLASS_V1_IPV6:
-	case PTP_CLASS_V2_IPV6:
-		offset = OFF_PTP6;
+	if (ptp_class & PTP_CLASS_VLAN)
+		offset += VLAN_HLEN;
+
+	switch (ptp_class & PTP_CLASS_PMASK) {
+	case PTP_CLASS_IPV4:
+		offset += ETH_HLEN + IPV4_HLEN(data) + UDP_HLEN;
 		break;
-	case PTP_CLASS_V2_L2:
-		offset = ETH_HLEN;
+	case PTP_CLASS_IPV6:
+		offset += ETH_HLEN + IP6_HLEN + UDP_HLEN;
 		break;
-	case PTP_CLASS_V2_VLAN:
-		offset = ETH_HLEN + VLAN_HLEN;
+	case PTP_CLASS_L2:
+		offset += ETH_HLEN;
 		break;
 	default:
 		return 0;
diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c
index 293ad064905d..53bd1af68422 100644
--- a/drivers/net/phy/dp83640.c
+++ b/drivers/net/phy/dp83640.c
@@ -856,20 +856,18 @@ static int is_sync(struct sk_buff *skb, int type)
 	u8 *data = skb->data, *msgtype;
 	unsigned int offset = 0;
 
-	switch (type) {
-	case PTP_CLASS_V1_IPV4:
-	case PTP_CLASS_V2_IPV4:
-		offset = ETH_HLEN + IPV4_HLEN(data) + UDP_HLEN;
-		break;
-	case PTP_CLASS_V1_IPV6:
-	case PTP_CLASS_V2_IPV6:
-		offset = OFF_PTP6;
+	if (type & PTP_CLASS_VLAN)
+		offset += VLAN_HLEN;
+
+	switch (type & PTP_CLASS_PMASK) {
+	case PTP_CLASS_IPV4:
+		offset += ETH_HLEN + IPV4_HLEN(data) + UDP_HLEN;
 		break;
-	case PTP_CLASS_V2_L2:
-		offset = ETH_HLEN;
+	case PTP_CLASS_IPV6:
+		offset += ETH_HLEN + IP6_HLEN + UDP_HLEN;
 		break;
-	case PTP_CLASS_V2_VLAN:
-		offset = ETH_HLEN + VLAN_HLEN;
+	case PTP_CLASS_L2:
+		offset += ETH_HLEN;
 		break;
 	default:
 		return 0;
@@ -889,25 +887,23 @@ static int is_sync(struct sk_buff *skb, int type)
 static int match(struct sk_buff *skb, unsigned int type, struct rxts *rxts)
 {
 	u16 *seqid;
-	unsigned int offset;
+	unsigned int offset = 0;
 	u8 *msgtype, *data = skb_mac_header(skb);
 
 	/* check sequenceID, messageType, 12 bit hash of offset 20-29 */
 
-	switch (type) {
-	case PTP_CLASS_V1_IPV4:
-	case PTP_CLASS_V2_IPV4:
-		offset = ETH_HLEN + IPV4_HLEN(data) + UDP_HLEN;
-		break;
-	case PTP_CLASS_V1_IPV6:
-	case PTP_CLASS_V2_IPV6:
-		offset = OFF_PTP6;
+	if (type & PTP_CLASS_VLAN)
+		offset += VLAN_HLEN;
+
+	switch (type & PTP_CLASS_PMASK) {
+	case PTP_CLASS_IPV4:
+		offset += ETH_HLEN + IPV4_HLEN(data) + UDP_HLEN;
 		break;
-	case PTP_CLASS_V2_L2:
-		offset = ETH_HLEN;
+	case PTP_CLASS_IPV6:
+		offset += ETH_HLEN + IP6_HLEN + UDP_HLEN;
 		break;
-	case PTP_CLASS_V2_VLAN:
-		offset = ETH_HLEN + VLAN_HLEN;
+	case PTP_CLASS_L2:
+		offset += ETH_HLEN;
 		break;
 	default:
 		return 0;
diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h
index 7dfed71d76a6..159c987b1853 100644
--- a/include/linux/ptp_classify.h
+++ b/include/linux/ptp_classify.h
@@ -33,8 +33,8 @@
 #define PTP_CLASS_IPV4  0x10 /* event in an IPV4 UDP packet */
 #define PTP_CLASS_IPV6  0x20 /* event in an IPV6 UDP packet */
 #define PTP_CLASS_L2    0x30 /* event in a L2 packet */
-#define PTP_CLASS_VLAN  0x40 /* event in a VLAN tagged L2 packet */
-#define PTP_CLASS_PMASK 0xf0 /* mask for the packet type field */
+#define PTP_CLASS_PMASK 0x30 /* mask for the packet type field */
+#define PTP_CLASS_VLAN  0x40 /* event in a VLAN tagged packet */
 
 #define PTP_CLASS_V1_IPV4 (PTP_CLASS_V1 | PTP_CLASS_IPV4)
 #define PTP_CLASS_V1_IPV6 (PTP_CLASS_V1 | PTP_CLASS_IPV6) /* probably DNE */
@@ -54,7 +54,6 @@
 #define IP6_HLEN	40
 #define UDP_HLEN	8
 #define OFF_IHL		14
-#define OFF_PTP6	(ETH_HLEN + IP6_HLEN + UDP_HLEN)
 #define IPV4_HLEN(data) (((struct iphdr *)(data + OFF_IHL))->ihl << 2)
 
 #if defined(CONFIG_NET_PTP_CLASSIFY)
diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c
index d3027a73fd4b..12ab7b4be609 100644
--- a/net/core/ptp_classifier.c
+++ b/net/core/ptp_classifier.c
@@ -52,14 +52,43 @@
  * test_8021q:
  *   jneq #0x8100, test_ieee1588   ; ETH_P_8021Q ?
  *   ldh [16]                      ; load inner type
- *   jneq #0x88f7, drop_ieee1588   ; ETH_P_1588 ?
+ *   jneq #0x88f7, test_8021q_ipv4 ; ETH_P_1588 ?
  *   ldb [18]                      ; load payload
  *   and #0x8                      ; as we don't have ports here, test
  *   jneq #0x0, drop_ieee1588      ; for PTP_GEN_BIT and drop these
  *   ldh [18]                      ; reload payload
  *   and #0xf                      ; mask PTP_CLASS_VMASK
- *   or #0x40                      ; PTP_CLASS_V2_VLAN
+ *   or #0x70                      ; PTP_CLASS_VLAN|PTP_CLASS_L2
+ *   ret a                         ; return PTP class
+ *
+ * ; PTP over UDP over IPv4 over 802.1Q over Ethernet
+ * test_8021q_ipv4:
+ *   jneq #0x800, test_8021q_ipv6  ; ETH_P_IP ?
+ *   ldb [27]                      ; load proto
+ *   jneq #17, drop_8021q_ipv4     ; IPPROTO_UDP ?
+ *   ldh [24]                      ; load frag offset field
+ *   jset #0x1fff, drop_8021q_ipv4; don't allow fragments
+ *   ldxb 4*([18]&0xf)             ; load IP header len
+ *   ldh [x + 20]                  ; load UDP dst port
+ *   jneq #319, drop_8021q_ipv4    ; is port PTP_EV_PORT ?
+ *   ldh [x + 26]                  ; load payload
+ *   and #0xf                      ; mask PTP_CLASS_VMASK
+ *   or #0x50                      ; PTP_CLASS_VLAN|PTP_CLASS_IPV4
+ *   ret a                         ; return PTP class
+ *   drop_8021q_ipv4: ret #0x0     ; PTP_CLASS_NONE
+ *
+ * ; PTP over UDP over IPv6 over 802.1Q over Ethernet
+ * test_8021q_ipv6:
+ *   jneq #0x86dd, drop_8021q_ipv6 ; ETH_P_IPV6 ?
+ *   ldb [24]                      ; load proto
+ *   jneq #17, drop_8021q_ipv6           ; IPPROTO_UDP ?
+ *   ldh [60]                      ; load UDP dst port
+ *   jneq #319, drop_8021q_ipv6          ; is port PTP_EV_PORT ?
+ *   ldh [66]                      ; load payload
+ *   and #0xf                      ; mask PTP_CLASS_VMASK
+ *   or #0x60                      ; PTP_CLASS_VLAN|PTP_CLASS_IPV6
  *   ret a                         ; return PTP class
+ *   drop_8021q_ipv6: ret #0x0     ; PTP_CLASS_NONE
  *
  * ; PTP over Ethernet
  * test_ieee1588:
@@ -113,16 +142,39 @@ void __init ptp_classifier_init(void)
 		{ 0x44,  0,  0, 0x00000020 },
 		{ 0x16,  0,  0, 0x00000000 },
 		{ 0x06,  0,  0, 0x00000000 },
-		{ 0x15,  0,  9, 0x00008100 },
+		{ 0x15,  0, 32, 0x00008100 },
 		{ 0x28,  0,  0, 0x00000010 },
-		{ 0x15,  0, 15, 0x000088f7 },
+		{ 0x15,  0,  7, 0x000088f7 },
 		{ 0x30,  0,  0, 0x00000012 },
 		{ 0x54,  0,  0, 0x00000008 },
-		{ 0x15,  0, 12, 0x00000000 },
+		{ 0x15,  0, 35, 0x00000000 },
 		{ 0x28,  0,  0, 0x00000012 },
 		{ 0x54,  0,  0, 0x0000000f },
-		{ 0x44,  0,  0, 0x00000040 },
+		{ 0x44,  0,  0, 0x00000070 },
+		{ 0x16,  0,  0, 0x00000000 },
+		{ 0x15,  0, 12, 0x00000800 },
+		{ 0x30,  0,  0, 0x0000001b },
+		{ 0x15,  0,  9, 0x00000011 },
+		{ 0x28,  0,  0, 0x00000018 },
+		{ 0x45,  7,  0, 0x00001fff },
+		{ 0xb1,  0,  0, 0x00000012 },
+		{ 0x48,  0,  0, 0x00000014 },
+		{ 0x15,  0,  4, 0x0000013f },
+		{ 0x48,  0,  0, 0x0000001a },
+		{ 0x54,  0,  0, 0x0000000f },
+		{ 0x44,  0,  0, 0x00000050 },
+		{ 0x16,  0,  0, 0x00000000 },
+		{ 0x06,  0,  0, 0x00000000 },
+		{ 0x15,  0,  8, 0x000086dd },
+		{ 0x30,  0,  0, 0x00000018 },
+		{ 0x15,  0,  6, 0x00000011 },
+		{ 0x28,  0,  0, 0x0000003c },
+		{ 0x15,  0,  4, 0x0000013f },
+		{ 0x28,  0,  0, 0x00000042 },
+		{ 0x54,  0,  0, 0x0000000f },
+		{ 0x44,  0,  0, 0x00000060 },
 		{ 0x16,  0,  0, 0x00000000 },
+		{ 0x06,  0,  0, 0x00000000 },
 		{ 0x15,  0,  7, 0x000088f7 },
 		{ 0x30,  0,  0, 0x0000000e },
 		{ 0x54,  0,  0, 0x00000008 },
-- 
cgit v1.2.3-59-g8ed1b


From 0e001614e849b68cff94cda8db8b550569d3dba6 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Tue, 1 Jul 2014 21:32:27 -0700
Subject: net: Call skb_get_hash in get_xps_queue and __skb_tx_hash

Call standard function to get a packet hash instead of taking this from
skb->sk->sk_hash or only using skb->protocol.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  2 +-
 include/linux/skbuff.h    |  2 +-
 net/core/flow_dissector.c | 29 +++++------------------------
 3 files changed, 7 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 66f9a04ec270..8b43a28ee0bc 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2486,7 +2486,7 @@ static inline int netif_set_xps_queue(struct net_device *dev,
  * as a distribution range limit for the returned value.
  */
 static inline u16 skb_tx_hash(const struct net_device *dev,
-			      const struct sk_buff *skb)
+			      struct sk_buff *skb)
 {
 	return __skb_tx_hash(dev, skb, dev->real_num_tx_queues);
 }
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index ec89301ada41..b297af70ac30 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3005,7 +3005,7 @@ static inline bool skb_rx_queue_recorded(const struct sk_buff *skb)
 	return skb->queue_mapping != 0;
 }
 
-u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
+u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
 		  unsigned int num_tx_queues);
 
 static inline struct sec_path *skb_sec_path(struct sk_buff *skb)
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 2ff8cd4dfc5f..62d1cb624f53 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -196,12 +196,6 @@ static __always_inline u32 __flow_hash_3words(u32 a, u32 b, u32 c)
 	return jhash_3words(a, b, c, hashrnd);
 }
 
-static __always_inline u32 __flow_hash_1word(u32 a)
-{
-	__flow_hash_secret_init();
-	return jhash_1word(a, hashrnd);
-}
-
 static inline u32 __flow_hash_from_keys(struct flow_keys *keys)
 {
 	u32 hash;
@@ -253,7 +247,7 @@ EXPORT_SYMBOL(__skb_get_hash);
  * Returns a Tx hash based on the given packet descriptor a Tx queues' number
  * to be used as a distribution range.
  */
-u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
+u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
 		  unsigned int num_tx_queues)
 {
 	u32 hash;
@@ -273,13 +267,7 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
 		qcount = dev->tc_to_txq[tc].count;
 	}
 
-	if (skb->sk && skb->sk->sk_hash)
-		hash = skb->sk->sk_hash;
-	else
-		hash = (__force u16) skb->protocol;
-	hash = __flow_hash_1word(hash);
-
-	return (u16) (((u64) hash * qcount) >> 32) + qoffset;
+	return (u16) (((u64)skb_get_hash(skb) * qcount) >> 32) + qoffset;
 }
 EXPORT_SYMBOL(__skb_tx_hash);
 
@@ -351,17 +339,10 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 		if (map) {
 			if (map->len == 1)
 				queue_index = map->queues[0];
-			else {
-				u32 hash;
-				if (skb->sk && skb->sk->sk_hash)
-					hash = skb->sk->sk_hash;
-				else
-					hash = (__force u16) skb->protocol ^
-					    skb->hash;
-				hash = __flow_hash_1word(hash);
+			else
 				queue_index = map->queues[
-				    ((u64)hash * map->len) >> 32];
-			}
+				    ((u64)skb_get_hash(skb) * map->len) >> 32];
+
 			if (unlikely(queue_index >= dev->real_num_tx_queues))
 				queue_index = -1;
 		}
-- 
cgit v1.2.3-59-g8ed1b


From cb1ce2ef387b01686469487edd45994872d52d73 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Tue, 1 Jul 2014 21:33:10 -0700
Subject: ipv6: Implement automatic flow label generation on transmit

Automatically generate flow labels for IPv6 packets on transmit.
The flow label is computed based on skb_get_hash. The flow label will
only automatically be set when it is zero otherwise (i.e. flow label
manager hasn't set one). This supports the transmit side functionality
of RFC 6438.

Added an IPv6 sysctl auto_flowlabels to enable/disable this behavior
system wide, and added IPV6_AUTOFLOWLABEL socket option to enable this
functionality per socket.

By default, auto flowlabels are disabled to avoid possible conflicts
with flow label manager, however if this feature proves useful we
may want to enable it by default.

It should also be noted that FreeBSD has already implemented automatic
flow labels (including the sysctl and socket option). In FreeBSD,
automatic flow labels default to enabled.

Performance impact:

Running super_netperf with 200 flows for TCP_RR and UDP_RR for
IPv6. Note that in UDP case, __skb_get_hash will be called for
every packet with explains slight regression. In the TCP case
the hash is saved in the socket so there is no regression.

Automatic flow labels disabled:

  TCP_RR:
    86.53% CPU utilization
    127/195/322 90/95/99% latencies
    1.40498e+06 tps

  UDP_RR:
    90.70% CPU utilization
    118/168/243 90/95/99% latencies
    1.50309e+06 tps

Automatic flow labels enabled:

  TCP_RR:
    85.90% CPU utilization
    128/199/337 90/95/99% latencies
    1.40051e+06

  UDP_RR
    92.61% CPU utilization
    115/164/236 90/95/99% latencies
    1.4687e+06

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt |  9 +++++++++
 include/linux/ipv6.h                   |  3 ++-
 include/net/ipv6.h                     | 20 ++++++++++++++++++++
 include/net/netns/ipv6.h               |  1 +
 include/uapi/linux/in6.h               |  1 +
 net/ipv6/af_inet6.c                    |  1 +
 net/ipv6/ip6_gre.c                     |  7 +++++--
 net/ipv6/ip6_output.c                  |  7 +++++--
 net/ipv6/ip6_tunnel.c                  |  3 ++-
 net/ipv6/ipv6_sockglue.c               |  8 ++++++++
 net/ipv6/sysctl_net_ipv6.c             |  8 ++++++++
 11 files changed, 62 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 10e216c6e05e..f35bfe43bf7a 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1132,6 +1132,15 @@ flowlabel_consistency - BOOLEAN
 	FALSE: disabled
 	Default: TRUE
 
+auto_flowlabels - BOOLEAN
+	Automatically generate flow labels based based on a flow hash
+	of the packet. This allows intermediate devices, such as routers,
+	to idenfify packet flows for mechanisms like Equal Cost Multipath
+	Routing (see RFC 6438).
+	TRUE: enabled
+	FALSE: disabled
+	Default: false
+
 anycast_src_echo_reply - BOOLEAN
 	Controls the use of anycast addresses as source addresses for ICMPv6
 	echo reply
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 5dc68c3ebcbd..ff560537dd61 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -199,7 +199,8 @@ struct ipv6_pinfo {
 						 * 010: prefer public address
 						 * 100: prefer care-of address
 						 */
-				dontfrag:1;
+				dontfrag:1,
+				autoflowlabel:1;
 	__u8			min_hopcount;
 	__u8			tclass;
 	__be32			rcv_flowinfo;
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 2aa86e1135a1..4308f2ada8b3 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -699,6 +699,26 @@ static inline void ip6_set_txhash(struct sock *sk)
 	sk->sk_txhash = flow_hash_from_keys(&keys);
 }
 
+static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb,
+					__be32 flowlabel, bool autolabel)
+{
+	if (!flowlabel && (autolabel || net->ipv6.sysctl.auto_flowlabels)) {
+		__be32 hash;
+
+		hash = skb_get_hash(skb);
+
+		/* Since this is being sent on the wire obfuscate hash a bit
+		 * to minimize possbility that any useful information to an
+		 * attacker is leaked. Only lower 20 bits are relevant.
+		 */
+		hash ^= hash >> 12;
+
+		flowlabel = hash & IPV6_FLOWLABEL_MASK;
+	}
+
+	return flowlabel;
+}
+
 /*
  *	Header manipulation
  */
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 19d3446e59d2..eade27adecf3 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -28,6 +28,7 @@ struct netns_sysctl_ipv6 {
 	int ip6_rt_mtu_expires;
 	int ip6_rt_min_advmss;
 	int flowlabel_consistency;
+	int auto_flowlabels;
 	int icmpv6_time;
 	int anycast_src_echo_reply;
 	int fwmark_reflect;
diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h
index 0d8e0f0342dc..22b7a69619d8 100644
--- a/include/uapi/linux/in6.h
+++ b/include/uapi/linux/in6.h
@@ -233,6 +233,7 @@ struct in6_flowlabel_req {
 #if 0	/* not yet */
 #define IPV6_USE_MIN_MTU	63
 #endif
+#define IPV6_AUTOFLOWLABEL	64
 
 /*
  * Netfilter (1)
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index a426cd7099bb..2daa3a133e49 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -765,6 +765,7 @@ static int __net_init inet6_net_init(struct net *net)
 	net->ipv6.sysctl.bindv6only = 0;
 	net->ipv6.sysctl.icmpv6_time = 1*HZ;
 	net->ipv6.sysctl.flowlabel_consistency = 1;
+	net->ipv6.sysctl.auto_flowlabels = 0;
 	atomic_set(&net->ipv6.rt_genid, 0);
 
 	err = ipv6_init_mibs(net);
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 3873181ed856..365b2b6f3942 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -723,7 +723,8 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
 	 *	Push down and install the IP header.
 	 */
 	ipv6h = ipv6_hdr(skb);
-	ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel);
+	ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield),
+		     ip6_make_flowlabel(net, skb, fl6->flowlabel, false));
 	ipv6h->hop_limit = tunnel->parms.hop_limit;
 	ipv6h->nexthdr = proto;
 	ipv6h->saddr = fl6->saddr;
@@ -1174,7 +1175,9 @@ static int ip6gre_header(struct sk_buff *skb, struct net_device *dev,
 	struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen);
 	__be16 *p = (__be16 *)(ipv6h+1);
 
-	ip6_flow_hdr(ipv6h, 0, t->fl.u.ip6.flowlabel);
+	ip6_flow_hdr(ipv6h, 0,
+		     ip6_make_flowlabel(dev_net(dev), skb,
+					t->fl.u.ip6.flowlabel, false));
 	ipv6h->hop_limit = t->parms.hop_limit;
 	ipv6h->nexthdr = NEXTHDR_GRE;
 	ipv6h->saddr = t->parms.laddr;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index cb9df0eb4023..fa83bdd4c3dd 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -205,7 +205,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 	if (hlimit < 0)
 		hlimit = ip6_dst_hoplimit(dst);
 
-	ip6_flow_hdr(hdr, tclass, fl6->flowlabel);
+	ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
+						     np->autoflowlabel));
 
 	hdr->payload_len = htons(seg_len);
 	hdr->nexthdr = proto;
@@ -1569,7 +1570,9 @@ int ip6_push_pending_frames(struct sock *sk)
 	skb_reset_network_header(skb);
 	hdr = ipv6_hdr(skb);
 
-	ip6_flow_hdr(hdr, np->cork.tclass, fl6->flowlabel);
+	ip6_flow_hdr(hdr, np->cork.tclass,
+		     ip6_make_flowlabel(net, skb, fl6->flowlabel,
+					np->autoflowlabel));
 	hdr->hop_limit = np->cork.hop_limit;
 	hdr->nexthdr = proto;
 	hdr->saddr = fl6->saddr;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index afa082458360..51a1eb185ea7 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1046,7 +1046,8 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
 	skb_push(skb, sizeof(struct ipv6hdr));
 	skb_reset_network_header(skb);
 	ipv6h = ipv6_hdr(skb);
-	ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel);
+	ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield),
+		     ip6_make_flowlabel(net, skb, fl6->flowlabel, false));
 	ipv6h->hop_limit = t->parms.hop_limit;
 	ipv6h->nexthdr = proto;
 	ipv6h->saddr = fl6->saddr;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index cc34f65179e4..b50b9e54cf53 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -834,6 +834,10 @@ pref_skip_coa:
 		np->dontfrag = valbool;
 		retv = 0;
 		break;
+	case IPV6_AUTOFLOWLABEL:
+		np->autoflowlabel = valbool;
+		retv = 0;
+		break;
 	}
 
 	release_sock(sk);
@@ -1273,6 +1277,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 		val = np->dontfrag;
 		break;
 
+	case IPV6_AUTOFLOWLABEL:
+		val = np->autoflowlabel;
+		break;
+
 	default:
 		return -ENOPROTOOPT;
 	}
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 058f3eca2e53..5bf7b61f8ae8 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -38,6 +38,13 @@ static struct ctl_table ipv6_table_template[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+	{
+		.procname	= "auto_flowlabels",
+		.data		= &init_net.ipv6.sysctl.auto_flowlabels,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
 	{
 		.procname	= "fwmark_reflect",
 		.data		= &init_net.ipv6.sysctl.fwmark_reflect,
@@ -74,6 +81,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
 	ipv6_table[0].data = &net->ipv6.sysctl.bindv6only;
 	ipv6_table[1].data = &net->ipv6.sysctl.anycast_src_echo_reply;
 	ipv6_table[2].data = &net->ipv6.sysctl.flowlabel_consistency;
+	ipv6_table[3].data = &net->ipv6.sysctl.auto_flowlabels;
 
 	ipv6_route_table = ipv6_route_sysctl_init(net);
 	if (!ipv6_route_table)
-- 
cgit v1.2.3-59-g8ed1b


From a3b18ddb9cc1056eea24e3edc1828cfb3fd0726f Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Tue, 1 Jul 2014 21:33:17 -0700
Subject: net: Only do flow_dissector hash computation once per packet

Add sw_hash flag to skbuff to indicate that skb->hash was computed
from flow_dissector. This flag is checked in skb_get_hash to avoid
repeatedly trying to compute the hash (ie. in the case that no L4 hash
can be computed).

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h    | 9 +++++++--
 net/core/flow_dissector.c | 2 ++
 2 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index b297af70ac30..890fb3307dd6 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -455,6 +455,7 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1,
  *	@ooo_okay: allow the mapping of a socket to a queue to be changed
  *	@l4_hash: indicate hash is a canonical 4-tuple hash over transport
  *		ports.
+ *	@sw_hash: indicates hash was computed in software stack
  *	@wifi_acked_valid: wifi_acked was set
  *	@wifi_acked: whether frame was acked on wifi or not
  *	@no_fcs:  Request NIC to treat last 4 bytes as Ethernet FCS
@@ -562,6 +563,7 @@ struct sk_buff {
 	__u8			pfmemalloc:1;
 	__u8			ooo_okay:1;
 	__u8			l4_hash:1;
+	__u8			sw_hash:1;
 	__u8			wifi_acked_valid:1;
 	__u8			wifi_acked:1;
 	__u8			no_fcs:1;
@@ -575,7 +577,7 @@ struct sk_buff {
 	__u8			encap_hdr_csum:1;
 	__u8			csum_valid:1;
 	__u8			csum_complete_sw:1;
-	/* 3/5 bit hole (depending on ndisc_nodetype presence) */
+	/* 2/4 bit hole (depending on ndisc_nodetype presence) */
 	kmemcheck_bitfield_end(flags2);
 
 #if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL
@@ -830,13 +832,14 @@ static inline void
 skb_set_hash(struct sk_buff *skb, __u32 hash, enum pkt_hash_types type)
 {
 	skb->l4_hash = (type == PKT_HASH_TYPE_L4);
+	skb->sw_hash = 0;
 	skb->hash = hash;
 }
 
 void __skb_get_hash(struct sk_buff *skb);
 static inline __u32 skb_get_hash(struct sk_buff *skb)
 {
-	if (!skb->l4_hash)
+	if (!skb->l4_hash && !skb->sw_hash)
 		__skb_get_hash(skb);
 
 	return skb->hash;
@@ -850,6 +853,7 @@ static inline __u32 skb_get_hash_raw(const struct sk_buff *skb)
 static inline void skb_clear_hash(struct sk_buff *skb)
 {
 	skb->hash = 0;
+	skb->sw_hash = 0;
 	skb->l4_hash = 0;
 }
 
@@ -862,6 +866,7 @@ static inline void skb_clear_hash_if_not_l4(struct sk_buff *skb)
 static inline void skb_copy_hash(struct sk_buff *to, const struct sk_buff *from)
 {
 	to->hash = from->hash;
+	to->sw_hash = from->sw_hash;
 	to->l4_hash = from->l4_hash;
 };
 
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index c5f3912dad4c..5f362c1d0332 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -256,6 +256,8 @@ void __skb_get_hash(struct sk_buff *skb)
 	if (keys.ports)
 		skb->l4_hash = 1;
 
+	skb->sw_hash = 1;
+
 	skb->hash = __flow_hash_from_keys(&keys);
 }
 EXPORT_SYMBOL(__skb_get_hash);
-- 
cgit v1.2.3-59-g8ed1b


From db55b62cab8d458aba4e7a81dc4dd98b45c03d85 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Wed, 2 Jul 2014 14:12:01 +0200
Subject: net: arcnet: Remove "#define bool int"

The header file include/linux/arcdevice.h #defines bool to int, if
bool is not already #defined. However, the files which use that header
file seem to rely on that #define (unconditionally) being in effect:
the prototypes for the functions arcrimi_reset, com20020_reset,
com90io_reset, com90xx_reset (whose addresses are assigned to the
hw.reset member of struct arcnet_local) use int explicitly.

Moreover, that #define is an accident waiting to happen (scenario:
inclusion of arcdevice.h followed by inclusion of some header which
declares function prototypes using bool). Also, #include
<linux/types.h> must appear before #include <linux/arcdevice.h> (the
compiler wouldn't like "typedef _Bool int").

Since none of the files using arcdevice.h declare variables of type
"bool", the patch is actually quite simple, unlike the commit message.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/arcdevice.h | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/arcdevice.h b/include/linux/arcdevice.h
index 7216b0daf544..df0356220730 100644
--- a/include/linux/arcdevice.h
+++ b/include/linux/arcdevice.h
@@ -22,10 +22,6 @@
 #ifdef __KERNEL__
 #include  <linux/irqreturn.h>
 
-#ifndef bool
-#define bool int
-#endif
-
 /*
  * RECON_THRESHOLD is the maximum number of RECON messages to receive
  * within one minute before printing a "cabling problem" warning. The
@@ -285,9 +281,9 @@ struct arcnet_local {
 	unsigned long first_recon; /* time of "first" RECON message to count */
 	unsigned long last_recon;  /* time of most recent RECON */
 	int num_recons;		/* number of RECONs between first and last. */
-	bool network_down;	/* do we think the network is down? */
+	int network_down;	/* do we think the network is down? */
 
-	bool excnak_pending;    /* We just got an excesive nak interrupt */
+	int excnak_pending;    /* We just got an excesive nak interrupt */
 
 	struct {
 		uint16_t sequence;	/* sequence number (incs with each packet) */
@@ -305,7 +301,7 @@ struct arcnet_local {
 		void (*command) (struct net_device * dev, int cmd);
 		int (*status) (struct net_device * dev);
 		void (*intmask) (struct net_device * dev, int mask);
-		bool (*reset) (struct net_device * dev, bool really_reset);
+		int (*reset) (struct net_device * dev, int really_reset);
 		void (*open) (struct net_device * dev);
 		void (*close) (struct net_device * dev);
 
-- 
cgit v1.2.3-59-g8ed1b


From e09f8ea560490e941139d23b4c278d3e6e2c871a Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Mon, 7 Jul 2014 14:31:36 -0600
Subject: iommu: Fix IOMMU sysfs stubs

0-day kernel build testing reports:

   arch/x86/kvm/x86.o: In function `iommu_device_destroy':
>> (.text+0x7a0a): multiple definition of `iommu_device_destroy'
   arch/x86/kvm/../../../virt/kvm/vfio.o:vfio.c:(.text+0x490): first defined here
   arch/x86/kvm/x86.o: In function `iommu_device_link':
>> (.text+0x7a15): multiple definition of `iommu_device_link'
   arch/x86/kvm/../../../virt/kvm/vfio.o:vfio.c:(.text+0x49b): first defined here
   arch/x86/kvm/x86.o: In function `iommu_device_unlink':
>> (.text+0x7a25): multiple definition of `iommu_device_unlink'
   arch/x86/kvm/../../../virt/kvm/vfio.o:vfio.c:(.text+0x4ab): first defined here
   arch/x86/kvm/x86.o: In function `iommu_device_create':
>> (.text+0x79f8): multiple definition of `iommu_device_create'
   arch/x86/kvm/../../../virt/kvm/vfio.o:vfio.c:(.text+0x47e): first defined here

These are due to failing to define the stubs as static inline.  Fix.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index c7097d7f024c..20f9a527922a 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -403,23 +403,24 @@ static inline int iommu_domain_set_attr(struct iommu_domain *domain,
 	return -EINVAL;
 }
 
-struct device *iommu_device_create(struct device *parent, void *drvdata,
-				   const struct attribute_group **groups,
-				   const char *fmt, ...)
+static inline struct device *iommu_device_create(struct device *parent,
+					void *drvdata,
+					const struct attribute_group **groups,
+					const char *fmt, ...)
 {
 	return ERR_PTR(-ENODEV);
 }
 
-void iommu_device_destroy(struct device *dev)
+static inline void iommu_device_destroy(struct device *dev)
 {
 }
 
-int iommu_device_link(struct device *dev, struct device *link)
+static inline int iommu_device_link(struct device *dev, struct device *link)
 {
 	return -EINVAL;
 }
 
-void iommu_device_unlink(struct device *dev, struct device *link)
+static inline void iommu_device_unlink(struct device *dev, struct device *link)
 {
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 1d0326b13bc9ecab5c784415165e6f78fb06ae5b Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <ilya.dryomov@inktank.com>
Date: Fri, 20 Jun 2014 14:14:41 +0400
Subject: libceph: rename ceph_osd_request::r_linger_osd to r_linger_osd_item

So that:

req->r_osd_item --> osd->o_requests list
req->r_linger_osd_item --> osd->o_linger_requests list

Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com>
Reviewed-by: Alex Elder <elder@linaro.org>
---
 include/linux/ceph/osd_client.h | 2 +-
 net/ceph/osd_client.c           | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 94ec69672164..7490a03ac163 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -117,7 +117,7 @@ struct ceph_osd_request {
 	struct list_head r_req_lru_item;
 	struct list_head r_osd_item;
 	struct list_head r_linger_item;
-	struct list_head r_linger_osd;
+	struct list_head r_linger_osd_item;
 	struct ceph_osd *r_osd;
 	struct ceph_pg   r_pgid;
 	int              r_pg_osds[CEPH_PG_MAX_SIZE];
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 05be0c181695..d5d2be3bd113 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -364,7 +364,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
 	RB_CLEAR_NODE(&req->r_node);
 	INIT_LIST_HEAD(&req->r_unsafe_item);
 	INIT_LIST_HEAD(&req->r_linger_item);
-	INIT_LIST_HEAD(&req->r_linger_osd);
+	INIT_LIST_HEAD(&req->r_linger_osd_item);
 	INIT_LIST_HEAD(&req->r_req_lru_item);
 	INIT_LIST_HEAD(&req->r_osd_item);
 
@@ -916,7 +916,7 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc,
 	 * list at the end to keep things in tid order.
 	 */
 	list_for_each_entry_safe(req, nreq, &osd->o_linger_requests,
-				 r_linger_osd) {
+				 r_linger_osd_item) {
 		/*
 		 * reregister request prior to unregistering linger so
 		 * that r_osd is preserved.
@@ -1218,7 +1218,7 @@ static void __register_linger_request(struct ceph_osd_client *osdc,
 	ceph_osdc_get_request(req);
 	list_add_tail(&req->r_linger_item, &osdc->req_linger);
 	if (req->r_osd)
-		list_add_tail(&req->r_linger_osd,
+		list_add_tail(&req->r_linger_osd_item,
 			      &req->r_osd->o_linger_requests);
 }
 
@@ -1228,7 +1228,7 @@ static void __unregister_linger_request(struct ceph_osd_client *osdc,
 	dout("__unregister_linger_request %p\n", req);
 	list_del_init(&req->r_linger_item);
 	if (req->r_osd) {
-		list_del_init(&req->r_linger_osd);
+		list_del_init(&req->r_linger_osd_item);
 
 		if (list_empty(&req->r_osd->o_requests) &&
 		    list_empty(&req->r_osd->o_linger_requests)) {
-- 
cgit v1.2.3-59-g8ed1b


From 0215e44bb390a968d01404aa2f35af56f9b55fc8 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <ilya.dryomov@inktank.com>
Date: Fri, 20 Jun 2014 14:14:41 +0400
Subject: libceph: move and add dout()s to ceph_msg_{get,put}()

Add dout()s to ceph_msg_{get,put}().  Also move them to .c and turn
kref release callback into a static function.

Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com>
Reviewed-by: Alex Elder <elder@linaro.org>
---
 include/linux/ceph/messenger.h | 14 ++------------
 net/ceph/messenger.c           | 31 ++++++++++++++++++++++---------
 2 files changed, 24 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index d21f2dba0731..40ae58e3e9db 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -285,19 +285,9 @@ extern void ceph_msg_data_add_bio(struct ceph_msg *msg, struct bio *bio,
 
 extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
 				     bool can_fail);
-extern void ceph_msg_kfree(struct ceph_msg *m);
 
-
-static inline struct ceph_msg *ceph_msg_get(struct ceph_msg *msg)
-{
-	kref_get(&msg->kref);
-	return msg;
-}
-extern void ceph_msg_last_put(struct kref *kref);
-static inline void ceph_msg_put(struct ceph_msg *msg)
-{
-	kref_put(&msg->kref, ceph_msg_last_put);
-}
+extern struct ceph_msg *ceph_msg_get(struct ceph_msg *msg);
+extern void ceph_msg_put(struct ceph_msg *msg);
 
 extern void ceph_msg_dump(struct ceph_msg *msg);
 
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 1948d592aa54..8bffa5b90fef 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -3269,24 +3269,21 @@ static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip)
 /*
  * Free a generically kmalloc'd message.
  */
-void ceph_msg_kfree(struct ceph_msg *m)
+static void ceph_msg_free(struct ceph_msg *m)
 {
-	dout("msg_kfree %p\n", m);
+	dout("%s %p\n", __func__, m);
 	ceph_kvfree(m->front.iov_base);
 	kmem_cache_free(ceph_msg_cache, m);
 }
 
-/*
- * Drop a msg ref.  Destroy as needed.
- */
-void ceph_msg_last_put(struct kref *kref)
+static void ceph_msg_release(struct kref *kref)
 {
 	struct ceph_msg *m = container_of(kref, struct ceph_msg, kref);
 	LIST_HEAD(data);
 	struct list_head *links;
 	struct list_head *next;
 
-	dout("ceph_msg_put last one on %p\n", m);
+	dout("%s %p\n", __func__, m);
 	WARN_ON(!list_empty(&m->list_head));
 
 	/* drop middle, data, if any */
@@ -3308,9 +3305,25 @@ void ceph_msg_last_put(struct kref *kref)
 	if (m->pool)
 		ceph_msgpool_put(m->pool, m);
 	else
-		ceph_msg_kfree(m);
+		ceph_msg_free(m);
+}
+
+struct ceph_msg *ceph_msg_get(struct ceph_msg *msg)
+{
+	dout("%s %p (was %d)\n", __func__, msg,
+	     atomic_read(&msg->kref.refcount));
+	kref_get(&msg->kref);
+	return msg;
+}
+EXPORT_SYMBOL(ceph_msg_get);
+
+void ceph_msg_put(struct ceph_msg *msg)
+{
+	dout("%s %p (was %d)\n", __func__, msg,
+	     atomic_read(&msg->kref.refcount));
+	kref_put(&msg->kref, ceph_msg_release);
 }
-EXPORT_SYMBOL(ceph_msg_last_put);
+EXPORT_SYMBOL(ceph_msg_put);
 
 void ceph_msg_dump(struct ceph_msg *msg)
 {
-- 
cgit v1.2.3-59-g8ed1b


From 9e94af202afd961da39f82b55ba83edd4ad30e98 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <ilya.dryomov@inktank.com>
Date: Fri, 20 Jun 2014 14:14:42 +0400
Subject: libceph: move and add dout()s to ceph_osdc_request_{get,put}()

Add dout()s to ceph_osdc_request_{get,put}().  Also move them to .c and
turn kref release callback into a static function.

Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com>
Reviewed-by: Alex Elder <elder@linaro.org>
---
 include/linux/ceph/osd_client.h | 11 ++---------
 net/ceph/osd_client.c           | 26 ++++++++++++++++++++++----
 2 files changed, 24 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 7490a03ac163..a8d5652f589d 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -328,15 +328,8 @@ extern void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc,
 extern void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc,
 						struct ceph_osd_request *req);
 
-static inline void ceph_osdc_get_request(struct ceph_osd_request *req)
-{
-	kref_get(&req->r_kref);
-}
-extern void ceph_osdc_release_request(struct kref *kref);
-static inline void ceph_osdc_put_request(struct ceph_osd_request *req)
-{
-	kref_put(&req->r_kref, ceph_osdc_release_request);
-}
+extern void ceph_osdc_get_request(struct ceph_osd_request *req);
+extern void ceph_osdc_put_request(struct ceph_osd_request *req);
 
 extern int ceph_osdc_start_request(struct ceph_osd_client *osdc,
 				   struct ceph_osd_request *req,
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 6202923b41ff..7406046212dc 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -297,12 +297,15 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req,
 /*
  * requests
  */
-void ceph_osdc_release_request(struct kref *kref)
+static void ceph_osdc_release_request(struct kref *kref)
 {
-	struct ceph_osd_request *req;
+	struct ceph_osd_request *req = container_of(kref,
+					    struct ceph_osd_request, r_kref);
 	unsigned int which;
 
-	req = container_of(kref, struct ceph_osd_request, r_kref);
+	dout("%s %p (r_request %p r_reply %p)\n", __func__, req,
+	     req->r_request, req->r_reply);
+
 	if (req->r_request)
 		ceph_msg_put(req->r_request);
 	if (req->r_reply) {
@@ -320,7 +323,22 @@ void ceph_osdc_release_request(struct kref *kref)
 		kmem_cache_free(ceph_osd_request_cache, req);
 
 }
-EXPORT_SYMBOL(ceph_osdc_release_request);
+
+void ceph_osdc_get_request(struct ceph_osd_request *req)
+{
+	dout("%s %p (was %d)\n", __func__, req,
+	     atomic_read(&req->r_kref.refcount));
+	kref_get(&req->r_kref);
+}
+EXPORT_SYMBOL(ceph_osdc_get_request);
+
+void ceph_osdc_put_request(struct ceph_osd_request *req)
+{
+	dout("%s %p (was %d)\n", __func__, req,
+	     atomic_read(&req->r_kref.refcount));
+	kref_put(&req->r_kref, ceph_osdc_release_request);
+}
+EXPORT_SYMBOL(ceph_osdc_put_request);
 
 struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
 					       struct ceph_snap_context *snapc,
-- 
cgit v1.2.3-59-g8ed1b


From c9f9b93ddfd76498fe36d9f550bd26533a4ee6bf Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <ilya.dryomov@inktank.com>
Date: Thu, 19 Jun 2014 11:38:13 +0400
Subject: libceph: introduce ceph_osdc_cancel_request()

Introduce ceph_osdc_cancel_request() intended for canceling requests
from the higher layers (rbd and cephfs).  Because higher layers are in
charge and are supposed to know what and when they are canceling, the
request is not completed, only unref'ed and removed from the libceph
data structures.

__cancel_request() is no longer called before __unregister_request(),
because __unregister_request() unconditionally revokes r_request and
there is no point in trying to do it twice.

Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com>
Reviewed-by: Alex Elder <elder@linaro.org>
---
 include/linux/ceph/osd_client.h |  1 +
 net/ceph/osd_client.c           | 31 +++++++++++++++++++++++++------
 2 files changed, 26 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index a8d5652f589d..de09cad7b7c7 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -334,6 +334,7 @@ extern void ceph_osdc_put_request(struct ceph_osd_request *req);
 extern int ceph_osdc_start_request(struct ceph_osd_client *osdc,
 				   struct ceph_osd_request *req,
 				   bool nofail);
+extern void ceph_osdc_cancel_request(struct ceph_osd_request *req);
 extern int ceph_osdc_wait_request(struct ceph_osd_client *osdc,
 				  struct ceph_osd_request *req);
 extern void ceph_osdc_sync(struct ceph_osd_client *osdc);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 89d7d8861d80..6c1ccf5590a3 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -2467,6 +2467,25 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
 }
 EXPORT_SYMBOL(ceph_osdc_start_request);
 
+/*
+ * Unregister a registered request.  The request is not completed (i.e.
+ * no callbacks or wakeups) - higher layers are supposed to know what
+ * they are canceling.
+ */
+void ceph_osdc_cancel_request(struct ceph_osd_request *req)
+{
+	struct ceph_osd_client *osdc = req->r_osdc;
+
+	mutex_lock(&osdc->request_mutex);
+	if (req->r_linger)
+		__unregister_linger_request(osdc, req);
+	__unregister_request(osdc, req);
+	mutex_unlock(&osdc->request_mutex);
+
+	dout("%s %p tid %llu canceled\n", __func__, req, req->r_tid);
+}
+EXPORT_SYMBOL(ceph_osdc_cancel_request);
+
 /*
  * wait for a request to complete
  */
@@ -2475,18 +2494,18 @@ int ceph_osdc_wait_request(struct ceph_osd_client *osdc,
 {
 	int rc;
 
+	dout("%s %p tid %llu\n", __func__, req, req->r_tid);
+
 	rc = wait_for_completion_interruptible(&req->r_completion);
 	if (rc < 0) {
-		mutex_lock(&osdc->request_mutex);
-		__cancel_request(req);
-		__unregister_request(osdc, req);
-		mutex_unlock(&osdc->request_mutex);
+		dout("%s %p tid %llu interrupted\n", __func__, req, req->r_tid);
+		ceph_osdc_cancel_request(req);
 		complete_request(req);
-		dout("wait_request tid %llu canceled/timed out\n", req->r_tid);
 		return rc;
 	}
 
-	dout("wait_request tid %llu result %d\n", req->r_tid, req->r_result);
+	dout("%s %p tid %llu result %d\n", __func__, req, req->r_tid,
+	     req->r_result);
 	return req->r_result;
 }
 EXPORT_SYMBOL(ceph_osdc_wait_request);
-- 
cgit v1.2.3-59-g8ed1b


From 2d05f082cbc73b837011225b165d64d25b47c940 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <ilya.dryomov@inktank.com>
Date: Tue, 24 Jun 2014 16:21:45 +0400
Subject: libceph: nuke ceph_osdc_unregister_linger_request()

Remove now unused ceph_osdc_unregister_linger_request().

Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com>
Reviewed-by: Alex Elder <elder@linaro.org>
---
 include/linux/ceph/osd_client.h |  2 --
 net/ceph/osd_client.c           | 10 ----------
 2 files changed, 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index de09cad7b7c7..03aeb27fcc69 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -325,8 +325,6 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
 
 extern void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc,
 					 struct ceph_osd_request *req);
-extern void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc,
-						struct ceph_osd_request *req);
 
 extern void ceph_osdc_get_request(struct ceph_osd_request *req);
 extern void ceph_osdc_put_request(struct ceph_osd_request *req);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 6c1ccf5590a3..30f6faf3584f 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1281,16 +1281,6 @@ static void __unregister_linger_request(struct ceph_osd_client *osdc,
 	ceph_osdc_put_request(req);
 }
 
-void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc,
-					 struct ceph_osd_request *req)
-{
-	mutex_lock(&osdc->request_mutex);
-	if (req->r_linger)
-		__unregister_linger_request(osdc, req);
-	mutex_unlock(&osdc->request_mutex);
-}
-EXPORT_SYMBOL(ceph_osdc_unregister_linger_request);
-
 void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc,
 				  struct ceph_osd_request *req)
 {
-- 
cgit v1.2.3-59-g8ed1b


From 2e3fadbf730fd0d13c891d5e555af3e7f39ca3f4 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 1 Jul 2014 16:40:19 +0100
Subject: PKCS#7: Implement a parser [RFC 2315]

Implement a parser for a PKCS#7 signed-data message as described in part of
RFC 2315.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
---
 crypto/asymmetric_keys/Kconfig        |   9 +
 crypto/asymmetric_keys/Makefile       |  13 ++
 crypto/asymmetric_keys/pkcs7.asn1     | 127 +++++++++++
 crypto/asymmetric_keys/pkcs7_parser.c | 396 ++++++++++++++++++++++++++++++++++
 crypto/asymmetric_keys/pkcs7_parser.h |  61 ++++++
 include/crypto/pkcs7.h                |  23 ++
 include/linux/oid_registry.h          |   1 +
 7 files changed, 630 insertions(+)
 create mode 100644 crypto/asymmetric_keys/pkcs7.asn1
 create mode 100644 crypto/asymmetric_keys/pkcs7_parser.c
 create mode 100644 crypto/asymmetric_keys/pkcs7_parser.h
 create mode 100644 include/crypto/pkcs7.h

(limited to 'include/linux')

diff --git a/crypto/asymmetric_keys/Kconfig b/crypto/asymmetric_keys/Kconfig
index 03a6eb95ab50..a7cec9dd6154 100644
--- a/crypto/asymmetric_keys/Kconfig
+++ b/crypto/asymmetric_keys/Kconfig
@@ -37,4 +37,13 @@ config X509_CERTIFICATE_PARSER
 	  data and provides the ability to instantiate a crypto key from a
 	  public key packet found inside the certificate.
 
+config PKCS7_MESSAGE_PARSER
+	tristate "PKCS#7 message parser"
+	depends on X509_CERTIFICATE_PARSER
+	select ASN1
+	select OID_REGISTRY
+	help
+	  This option provides support for parsing PKCS#7 format messages for
+	  signature data and provides the ability to verify the signature.
+
 endif # ASYMMETRIC_KEY_TYPE
diff --git a/crypto/asymmetric_keys/Makefile b/crypto/asymmetric_keys/Makefile
index 0727204aab68..59d8cade5cfe 100644
--- a/crypto/asymmetric_keys/Makefile
+++ b/crypto/asymmetric_keys/Makefile
@@ -25,3 +25,16 @@ $(obj)/x509_rsakey-asn1.o: $(obj)/x509_rsakey-asn1.c $(obj)/x509_rsakey-asn1.h
 
 clean-files	+= x509-asn1.c x509-asn1.h
 clean-files	+= x509_rsakey-asn1.c x509_rsakey-asn1.h
+
+#
+# PKCS#7 message handling
+#
+obj-$(CONFIG_PKCS7_MESSAGE_PARSER) += pkcs7_message.o
+pkcs7_message-y := \
+	pkcs7-asn1.o \
+	pkcs7_parser.o
+
+$(obj)/pkcs7_parser.o: $(obj)/pkcs7-asn1.h
+$(obj)/pkcs7-asn1.o: $(obj)/pkcs7-asn1.c $(obj)/pkcs7-asn1.h
+
+clean-files	+= pkcs7-asn1.c pkcs7-asn1.h
diff --git a/crypto/asymmetric_keys/pkcs7.asn1 b/crypto/asymmetric_keys/pkcs7.asn1
new file mode 100644
index 000000000000..a5a14ef28c86
--- /dev/null
+++ b/crypto/asymmetric_keys/pkcs7.asn1
@@ -0,0 +1,127 @@
+PKCS7ContentInfo ::= SEQUENCE {
+	contentType	ContentType,
+	content		[0] EXPLICIT SignedData OPTIONAL
+}
+
+ContentType ::= OBJECT IDENTIFIER ({ pkcs7_note_OID })
+
+SignedData ::= SEQUENCE {
+	version			INTEGER,
+	digestAlgorithms	DigestAlgorithmIdentifiers,
+	contentInfo		ContentInfo,
+	certificates		CHOICE {
+		certSet		[0] IMPLICIT ExtendedCertificatesAndCertificates,
+		certSequence	[2] IMPLICIT Certificates
+	} OPTIONAL ({ pkcs7_note_certificate_list }),
+	crls CHOICE {
+		crlSet		[1] IMPLICIT CertificateRevocationLists,
+		crlSequence	[3] IMPLICIT CRLSequence
+	} OPTIONAL,
+	signerInfos		SignerInfos
+}
+
+ContentInfo ::= SEQUENCE {
+	contentType	ContentType,
+	content		[0] EXPLICIT Data OPTIONAL
+}
+
+Data ::= ANY ({ pkcs7_note_data })
+
+DigestAlgorithmIdentifiers ::= CHOICE {
+	daSet			SET OF DigestAlgorithmIdentifier,
+	daSequence		SEQUENCE OF DigestAlgorithmIdentifier
+}
+
+DigestAlgorithmIdentifier ::= SEQUENCE {
+	algorithm   OBJECT IDENTIFIER ({ pkcs7_note_OID }),
+	parameters  ANY OPTIONAL
+}
+
+--
+-- Certificates and certificate lists
+--
+ExtendedCertificatesAndCertificates ::= SET OF ExtendedCertificateOrCertificate
+
+ExtendedCertificateOrCertificate ::= CHOICE {
+  certificate		Certificate,				-- X.509
+  extendedCertificate	[0] IMPLICIT ExtendedCertificate	-- PKCS#6
+}
+
+ExtendedCertificate ::= Certificate -- cheating
+
+Certificates ::= SEQUENCE OF Certificate
+
+CertificateRevocationLists ::= SET OF CertificateList
+
+CertificateList ::= SEQUENCE OF Certificate -- This may be defined incorrectly
+
+CRLSequence ::= SEQUENCE OF CertificateList
+
+Certificate ::= ANY ({ pkcs7_extract_cert }) -- X.509
+
+--
+-- Signer information
+--
+SignerInfos ::= CHOICE {
+	siSet		SET OF SignerInfo,
+	siSequence	SEQUENCE OF SignerInfo
+}
+
+SignerInfo ::= SEQUENCE {
+	version			INTEGER,
+	issuerAndSerialNumber	IssuerAndSerialNumber,
+	digestAlgorithm		DigestAlgorithmIdentifier ({ pkcs7_sig_note_digest_algo }),
+	authenticatedAttributes	CHOICE {
+		aaSet		[0] IMPLICIT SetOfAuthenticatedAttribute
+					({ pkcs7_sig_note_set_of_authattrs }),
+		aaSequence	[2] EXPLICIT SEQUENCE OF AuthenticatedAttribute
+			-- Explicit because easier to compute digest on
+			-- sequence of attributes and then reuse encoded
+			-- sequence in aaSequence.
+	} OPTIONAL,
+	digestEncryptionAlgorithm
+				DigestEncryptionAlgorithmIdentifier ({ pkcs7_sig_note_pkey_algo }),
+	encryptedDigest		EncryptedDigest,
+	unauthenticatedAttributes CHOICE {
+		uaSet		[1] IMPLICIT SET OF UnauthenticatedAttribute,
+		uaSequence	[3] IMPLICIT SEQUENCE OF UnauthenticatedAttribute
+	} OPTIONAL
+} ({ pkcs7_note_signed_info })
+
+IssuerAndSerialNumber ::= SEQUENCE {
+	issuer			Name ({ pkcs7_sig_note_issuer }),
+	serialNumber		CertificateSerialNumber ({ pkcs7_sig_note_serial })
+}
+
+CertificateSerialNumber ::= INTEGER
+
+SetOfAuthenticatedAttribute ::= SET OF AuthenticatedAttribute
+
+AuthenticatedAttribute ::= SEQUENCE {
+	type			OBJECT IDENTIFIER ({ pkcs7_note_OID }),
+	values			SET OF ANY ({ pkcs7_sig_note_authenticated_attr })
+}
+
+UnauthenticatedAttribute ::= SEQUENCE {
+	type			OBJECT IDENTIFIER ({ pkcs7_note_OID }),
+	values			SET OF ANY
+}
+
+DigestEncryptionAlgorithmIdentifier ::= SEQUENCE {
+	algorithm		OBJECT IDENTIFIER ({ pkcs7_note_OID }),
+	parameters		ANY OPTIONAL
+}
+
+EncryptedDigest ::= OCTET STRING ({ pkcs7_sig_note_signature })
+
+---
+--- X.500 Name
+---
+Name ::= SEQUENCE OF RelativeDistinguishedName
+
+RelativeDistinguishedName ::= SET OF AttributeValueAssertion
+
+AttributeValueAssertion ::= SEQUENCE {
+	attributeType		OBJECT IDENTIFIER ({ pkcs7_note_OID }),
+	attributeValue		ANY
+}
diff --git a/crypto/asymmetric_keys/pkcs7_parser.c b/crypto/asymmetric_keys/pkcs7_parser.c
new file mode 100644
index 000000000000..42e56aa7d277
--- /dev/null
+++ b/crypto/asymmetric_keys/pkcs7_parser.c
@@ -0,0 +1,396 @@
+/* PKCS#7 parser
+ *
+ * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) "PKCS7: "fmt
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/oid_registry.h>
+#include "public_key.h"
+#include "pkcs7_parser.h"
+#include "pkcs7-asn1.h"
+
+struct pkcs7_parse_context {
+	struct pkcs7_message	*msg;		/* Message being constructed */
+	struct pkcs7_signed_info *sinfo;	/* SignedInfo being constructed */
+	struct pkcs7_signed_info **ppsinfo;
+	struct x509_certificate *certs;		/* Certificate cache */
+	struct x509_certificate **ppcerts;
+	unsigned long	data;			/* Start of data */
+	enum OID	last_oid;		/* Last OID encountered */
+	unsigned	x509_index;
+	unsigned	sinfo_index;
+};
+
+/**
+ * pkcs7_free_message - Free a PKCS#7 message
+ * @pkcs7: The PKCS#7 message to free
+ */
+void pkcs7_free_message(struct pkcs7_message *pkcs7)
+{
+	struct x509_certificate *cert;
+	struct pkcs7_signed_info *sinfo;
+
+	if (pkcs7) {
+		while (pkcs7->certs) {
+			cert = pkcs7->certs;
+			pkcs7->certs = cert->next;
+			x509_free_certificate(cert);
+		}
+		while (pkcs7->crl) {
+			cert = pkcs7->crl;
+			pkcs7->crl = cert->next;
+			x509_free_certificate(cert);
+		}
+		while (pkcs7->signed_infos) {
+			sinfo = pkcs7->signed_infos;
+			pkcs7->signed_infos = sinfo->next;
+			mpi_free(sinfo->sig.mpi[0]);
+			kfree(sinfo->sig.digest);
+			kfree(sinfo);
+		}
+		kfree(pkcs7);
+	}
+}
+EXPORT_SYMBOL_GPL(pkcs7_free_message);
+
+/**
+ * pkcs7_parse_message - Parse a PKCS#7 message
+ * @data: The raw binary ASN.1 encoded message to be parsed
+ * @datalen: The size of the encoded message
+ */
+struct pkcs7_message *pkcs7_parse_message(const void *data, size_t datalen)
+{
+	struct pkcs7_parse_context *ctx;
+	struct pkcs7_message *msg;
+	long ret;
+
+	ret = -ENOMEM;
+	msg = kzalloc(sizeof(struct pkcs7_message), GFP_KERNEL);
+	if (!msg)
+		goto error_no_sig;
+	ctx = kzalloc(sizeof(struct pkcs7_parse_context), GFP_KERNEL);
+	if (!ctx)
+		goto error_no_ctx;
+	ctx->sinfo = kzalloc(sizeof(struct pkcs7_signed_info), GFP_KERNEL);
+	if (!ctx->sinfo)
+		goto error_no_sinfo;
+
+	ctx->msg = msg;
+	ctx->data = (unsigned long)data;
+	ctx->ppcerts = &ctx->certs;
+	ctx->ppsinfo = &ctx->msg->signed_infos;
+
+	/* Attempt to decode the signature */
+	ret = asn1_ber_decoder(&pkcs7_decoder, ctx, data, datalen);
+	if (ret < 0)
+		goto error_decode;
+
+	while (ctx->certs) {
+		struct x509_certificate *cert = ctx->certs;
+		ctx->certs = cert->next;
+		x509_free_certificate(cert);
+	}
+	mpi_free(ctx->sinfo->sig.mpi[0]);
+	kfree(ctx->sinfo->sig.digest);
+	kfree(ctx->sinfo);
+	kfree(ctx);
+	return msg;
+
+error_decode:
+	mpi_free(ctx->sinfo->sig.mpi[0]);
+	kfree(ctx->sinfo->sig.digest);
+	kfree(ctx->sinfo);
+error_no_sinfo:
+	kfree(ctx);
+error_no_ctx:
+	pkcs7_free_message(msg);
+error_no_sig:
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(pkcs7_parse_message);
+
+/**
+ * pkcs7_get_content_data - Get access to the PKCS#7 content
+ * @pkcs7: The preparsed PKCS#7 message to access
+ * @_data: Place to return a pointer to the data
+ * @_data_len: Place to return the data length
+ * @want_wrapper: True if the ASN.1 object header should be included in the data
+ *
+ * Get access to the data content of the PKCS#7 message, including, optionally,
+ * the header of the ASN.1 object that contains it.  Returns -ENODATA if the
+ * data object was missing from the message.
+ */
+int pkcs7_get_content_data(const struct pkcs7_message *pkcs7,
+			   const void **_data, size_t *_data_len,
+			   bool want_wrapper)
+{
+	size_t wrapper;
+
+	if (!pkcs7->data)
+		return -ENODATA;
+
+	wrapper = want_wrapper ? pkcs7->data_hdrlen : 0;
+	*_data = pkcs7->data - wrapper;
+	*_data_len = pkcs7->data_len + wrapper;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pkcs7_get_content_data);
+
+/*
+ * Note an OID when we find one for later processing when we know how
+ * to interpret it.
+ */
+int pkcs7_note_OID(void *context, size_t hdrlen,
+		   unsigned char tag,
+		   const void *value, size_t vlen)
+{
+	struct pkcs7_parse_context *ctx = context;
+
+	ctx->last_oid = look_up_OID(value, vlen);
+	if (ctx->last_oid == OID__NR) {
+		char buffer[50];
+		sprint_oid(value, vlen, buffer, sizeof(buffer));
+		printk("PKCS7: Unknown OID: [%lu] %s\n",
+		       (unsigned long)value - ctx->data, buffer);
+	}
+	return 0;
+}
+
+/*
+ * Note the digest algorithm for the signature.
+ */
+int pkcs7_sig_note_digest_algo(void *context, size_t hdrlen,
+			       unsigned char tag,
+			       const void *value, size_t vlen)
+{
+	struct pkcs7_parse_context *ctx = context;
+
+	switch (ctx->last_oid) {
+	case OID_md4:
+		ctx->sinfo->sig.pkey_hash_algo = HASH_ALGO_MD4;
+		break;
+	case OID_md5:
+		ctx->sinfo->sig.pkey_hash_algo = HASH_ALGO_MD5;
+		break;
+	case OID_sha1:
+		ctx->sinfo->sig.pkey_hash_algo = HASH_ALGO_SHA1;
+		break;
+	case OID_sha256:
+		ctx->sinfo->sig.pkey_hash_algo = HASH_ALGO_SHA256;
+		break;
+	default:
+		printk("Unsupported digest algo: %u\n", ctx->last_oid);
+		return -ENOPKG;
+	}
+	return 0;
+}
+
+/*
+ * Note the public key algorithm for the signature.
+ */
+int pkcs7_sig_note_pkey_algo(void *context, size_t hdrlen,
+			     unsigned char tag,
+			     const void *value, size_t vlen)
+{
+	struct pkcs7_parse_context *ctx = context;
+
+	switch (ctx->last_oid) {
+	case OID_rsaEncryption:
+		ctx->sinfo->sig.pkey_algo = PKEY_ALGO_RSA;
+		break;
+	default:
+		printk("Unsupported pkey algo: %u\n", ctx->last_oid);
+		return -ENOPKG;
+	}
+	return 0;
+}
+
+/*
+ * Extract a certificate and store it in the context.
+ */
+int pkcs7_extract_cert(void *context, size_t hdrlen,
+		       unsigned char tag,
+		       const void *value, size_t vlen)
+{
+	struct pkcs7_parse_context *ctx = context;
+	struct x509_certificate *x509;
+
+	if (tag != ((ASN1_UNIV << 6) | ASN1_CONS_BIT | ASN1_SEQ)) {
+		pr_debug("Cert began with tag %02x at %lu\n",
+			 tag, (unsigned long)ctx - ctx->data);
+		return -EBADMSG;
+	}
+
+	/* We have to correct for the header so that the X.509 parser can start
+	 * from the beginning.  Note that since X.509 stipulates DER, there
+	 * probably shouldn't be an EOC trailer - but it is in PKCS#7 (which
+	 * stipulates BER).
+	 */
+	value -= hdrlen;
+	vlen += hdrlen;
+
+	if (((u8*)value)[1] == 0x80)
+		vlen += 2; /* Indefinite length - there should be an EOC */
+
+	x509 = x509_cert_parse(value, vlen);
+	if (IS_ERR(x509))
+		return PTR_ERR(x509);
+
+	pr_debug("Got cert for %s\n", x509->subject);
+	pr_debug("- fingerprint %s\n", x509->fingerprint);
+
+	x509->index = ++ctx->x509_index;
+	*ctx->ppcerts = x509;
+	ctx->ppcerts = &x509->next;
+	return 0;
+}
+
+/*
+ * Save the certificate list
+ */
+int pkcs7_note_certificate_list(void *context, size_t hdrlen,
+				unsigned char tag,
+				const void *value, size_t vlen)
+{
+	struct pkcs7_parse_context *ctx = context;
+
+	pr_devel("Got cert list (%02x)\n", tag);
+
+	*ctx->ppcerts = ctx->msg->certs;
+	ctx->msg->certs = ctx->certs;
+	ctx->certs = NULL;
+	ctx->ppcerts = &ctx->certs;
+	return 0;
+}
+
+/*
+ * Extract the data from the message and store that and its content type OID in
+ * the context.
+ */
+int pkcs7_note_data(void *context, size_t hdrlen,
+		    unsigned char tag,
+		    const void *value, size_t vlen)
+{
+	struct pkcs7_parse_context *ctx = context;
+
+	pr_debug("Got data\n");
+
+	ctx->msg->data = value;
+	ctx->msg->data_len = vlen;
+	ctx->msg->data_hdrlen = hdrlen;
+	ctx->msg->data_type = ctx->last_oid;
+	return 0;
+}
+
+/*
+ * Parse authenticated attributes
+ */
+int pkcs7_sig_note_authenticated_attr(void *context, size_t hdrlen,
+				      unsigned char tag,
+				      const void *value, size_t vlen)
+{
+	struct pkcs7_parse_context *ctx = context;
+
+	pr_devel("AuthAttr: %02x %zu [%*ph]\n", tag, vlen, (unsigned)vlen, value);
+
+	switch (ctx->last_oid) {
+	case OID_messageDigest:
+		if (tag != ASN1_OTS)
+			return -EBADMSG;
+		ctx->sinfo->msgdigest = value;
+		ctx->sinfo->msgdigest_len = vlen;
+		return 0;
+	default:
+		return 0;
+	}
+}
+
+/*
+ * Note the set of auth attributes for digestion purposes [RFC2315 9.3]
+ */
+int pkcs7_sig_note_set_of_authattrs(void *context, size_t hdrlen,
+				    unsigned char tag,
+				    const void *value, size_t vlen)
+{
+	struct pkcs7_parse_context *ctx = context;
+
+	/* We need to switch the 'CONT 0' to a 'SET OF' when we digest */
+	ctx->sinfo->authattrs = value - (hdrlen - 1);
+	ctx->sinfo->authattrs_len = vlen + (hdrlen - 1);
+	return 0;
+}
+
+/*
+ * Note the issuing certificate serial number
+ */
+int pkcs7_sig_note_serial(void *context, size_t hdrlen,
+			  unsigned char tag,
+			  const void *value, size_t vlen)
+{
+	struct pkcs7_parse_context *ctx = context;
+	ctx->sinfo->raw_serial = value;
+	ctx->sinfo->raw_serial_size = vlen;
+	return 0;
+}
+
+/*
+ * Note the issuer's name
+ */
+int pkcs7_sig_note_issuer(void *context, size_t hdrlen,
+			  unsigned char tag,
+			  const void *value, size_t vlen)
+{
+	struct pkcs7_parse_context *ctx = context;
+	ctx->sinfo->raw_issuer = value;
+	ctx->sinfo->raw_issuer_size = vlen;
+	return 0;
+}
+
+/*
+ * Note the signature data
+ */
+int pkcs7_sig_note_signature(void *context, size_t hdrlen,
+			     unsigned char tag,
+			     const void *value, size_t vlen)
+{
+	struct pkcs7_parse_context *ctx = context;
+	MPI mpi;
+
+	BUG_ON(ctx->sinfo->sig.pkey_algo != PKEY_ALGO_RSA);
+
+	mpi = mpi_read_raw_data(value, vlen);
+	if (!mpi)
+		return -ENOMEM;
+
+	ctx->sinfo->sig.mpi[0] = mpi;
+	ctx->sinfo->sig.nr_mpi = 1;
+	return 0;
+}
+
+/*
+ * Note a signature information block
+ */
+int pkcs7_note_signed_info(void *context, size_t hdrlen,
+			   unsigned char tag,
+			   const void *value, size_t vlen)
+{
+	struct pkcs7_parse_context *ctx = context;
+
+	ctx->sinfo->index = ++ctx->sinfo_index;
+	*ctx->ppsinfo = ctx->sinfo;
+	ctx->ppsinfo = &ctx->sinfo->next;
+	ctx->sinfo = kzalloc(sizeof(struct pkcs7_signed_info), GFP_KERNEL);
+	if (!ctx->sinfo)
+		return -ENOMEM;
+	return 0;
+}
diff --git a/crypto/asymmetric_keys/pkcs7_parser.h b/crypto/asymmetric_keys/pkcs7_parser.h
new file mode 100644
index 000000000000..d25f4d15370f
--- /dev/null
+++ b/crypto/asymmetric_keys/pkcs7_parser.h
@@ -0,0 +1,61 @@
+/* PKCS#7 crypto data parser internal definitions
+ *
+ * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/oid_registry.h>
+#include <crypto/pkcs7.h>
+#include "x509_parser.h"
+
+#define kenter(FMT, ...) \
+	pr_devel("==> %s("FMT")\n", __func__, ##__VA_ARGS__)
+#define kleave(FMT, ...) \
+	pr_devel("<== %s()"FMT"\n", __func__, ##__VA_ARGS__)
+
+struct pkcs7_signed_info {
+	struct pkcs7_signed_info *next;
+	struct x509_certificate *signer; /* Signing certificate (in msg->certs) */
+	unsigned index;
+	bool trusted;
+
+	/* Message digest - the digest of the Content Data (or NULL) */
+	const void	*msgdigest;
+	unsigned	msgdigest_len;
+
+	/* Authenticated Attribute data (or NULL) */
+	unsigned	authattrs_len;
+	const void	*authattrs;
+
+	/* Issuing cert serial number and issuer's name */
+	const void	*raw_serial;
+	unsigned	raw_serial_size;
+	unsigned	raw_issuer_size;
+	const void	*raw_issuer;
+
+	/* Message signature.
+	 *
+	 * This contains the generated digest of _either_ the Content Data or
+	 * the Authenticated Attributes [RFC2315 9.3].  If the latter, one of
+	 * the attributes contains the digest of the the Content Data within
+	 * it.
+	 */
+	struct public_key_signature sig;
+};
+
+struct pkcs7_message {
+	struct x509_certificate *certs;	/* Certificate list */
+	struct x509_certificate *crl;	/* Revocation list */
+	struct pkcs7_signed_info *signed_infos;
+
+	/* Content Data (or NULL) */
+	enum OID	data_type;	/* Type of Data */
+	size_t		data_len;	/* Length of Data */
+	size_t		data_hdrlen;	/* Length of Data ASN.1 header */
+	const void	*data;		/* Content Data (or 0) */
+};
diff --git a/include/crypto/pkcs7.h b/include/crypto/pkcs7.h
new file mode 100644
index 000000000000..8f2628fbdfce
--- /dev/null
+++ b/include/crypto/pkcs7.h
@@ -0,0 +1,23 @@
+/* PKCS#7 crypto data parser
+ *
+ * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+struct pkcs7_message;
+
+/*
+ * pkcs7_parser.c
+ */
+extern struct pkcs7_message *pkcs7_parse_message(const void *data,
+						 size_t datalen);
+extern void pkcs7_free_message(struct pkcs7_message *pkcs7);
+
+extern int pkcs7_get_content_data(const struct pkcs7_message *pkcs7,
+				  const void **_data, size_t *_datalen,
+				  bool want_wrapper);
diff --git a/include/linux/oid_registry.h b/include/linux/oid_registry.h
index 6926db724258..edeff85cb1e8 100644
--- a/include/linux/oid_registry.h
+++ b/include/linux/oid_registry.h
@@ -55,6 +55,7 @@ enum OID {
 	OID_certAuthInfoAccess,		/* 1.3.6.1.5.5.7.1.1 */
 	OID_msOutlookExpress,		/* 1.3.6.1.4.1.311.16.4 */
 	OID_sha1,			/* 1.3.14.3.2.26 */
+	OID_sha256,			/* 2.16.840.1.101.3.4.2.1 */
 
 	/* Distinguished Name attribute IDs [RFC 2256] */
 	OID_commonName,			/* 2.5.4.3 */
-- 
cgit v1.2.3-59-g8ed1b


From e941759c74a44d6ac2eed21bb0a38b21fe4559e2 Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Date: Tue, 1 Jul 2014 12:57:14 +0200
Subject: fence: dma-buf cross-device synchronization (v18)

A fence can be attached to a buffer which is being filled or consumed
by hw, to allow userspace to pass the buffer without waiting to another
device.  For example, userspace can call page_flip ioctl to display the
next frame of graphics after kicking the GPU but while the GPU is still
rendering.  The display device sharing the buffer with the GPU would
attach a callback to get notified when the GPU's rendering-complete IRQ
fires, to update the scan-out address of the display, without having to
wake up userspace.

A driver must allocate a fence context for each execution ring that can
run in parallel. The function for this takes an argument with how many
contexts to allocate:
  + fence_context_alloc()

A fence is transient, one-shot deal.  It is allocated and attached
to one or more dma-buf's.  When the one that attached it is done, with
the pending operation, it can signal the fence:
  + fence_signal()

To have a rough approximation whether a fence is fired, call:
  + fence_is_signaled()

The dma-buf-mgr handles tracking, and waiting on, the fences associated
with a dma-buf.

The one pending on the fence can add an async callback:
  + fence_add_callback()

The callback can optionally be cancelled with:
  + fence_remove_callback()

To wait synchronously, optionally with a timeout:
  + fence_wait()
  + fence_wait_timeout()

When emitting a fence, call:
  + trace_fence_emit()

To annotate that a fence is blocking on another fence, call:
  + trace_fence_annotate_wait_on(fence, on_fence)

A default software-only implementation is provided, which can be used
by drivers attaching a fence to a buffer when they have no other means
for hw sync.  But a memory backed fence is also envisioned, because it
is common that GPU's can write to, or poll on some memory location for
synchronization.  For example:

  fence = custom_get_fence(...);
  if ((seqno_fence = to_seqno_fence(fence)) != NULL) {
    dma_buf *fence_buf = seqno_fence->sync_buf;
    get_dma_buf(fence_buf);

    ... tell the hw the memory location to wait ...
    custom_wait_on(fence_buf, seqno_fence->seqno_ofs, fence->seqno);
  } else {
    /* fall-back to sw sync * /
    fence_add_callback(fence, my_cb);
  }

On SoC platforms, if some other hw mechanism is provided for synchronizing
between IP blocks, it could be supported as an alternate implementation
with it's own fence ops in a similar way.

enable_signaling callback is used to provide sw signaling in case a cpu
waiter is requested or no compatible hardware signaling could be used.

The intention is to provide a userspace interface (presumably via eventfd)
later, to be used in conjunction with dma-buf's mmap support for sw access
to buffers (or for userspace apps that would prefer to do their own
synchronization).

v1: Original
v2: After discussion w/ danvet and mlankhorst on #dri-devel, we decided
    that dma-fence didn't need to care about the sw->hw signaling path
    (it can be handled same as sw->sw case), and therefore the fence->ops
    can be simplified and more handled in the core.  So remove the signal,
    add_callback, cancel_callback, and wait ops, and replace with a simple
    enable_signaling() op which can be used to inform a fence supporting
    hw->hw signaling that one or more devices which do not support hw
    signaling are waiting (and therefore it should enable an irq or do
    whatever is necessary in order that the CPU is notified when the
    fence is passed).
v3: Fix locking fail in attach_fence() and get_fence()
v4: Remove tie-in w/ dma-buf..  after discussion w/ danvet and mlankorst
    we decided that we need to be able to attach one fence to N dma-buf's,
    so using the list_head in dma-fence struct would be problematic.
v5: [ Maarten Lankhorst ] Updated for dma-bikeshed-fence and dma-buf-manager.
v6: [ Maarten Lankhorst ] I removed dma_fence_cancel_callback and some comments
    about checking if fence fired or not. This is broken by design.
    waitqueue_active during destruction is now fatal, since the signaller
    should be holding a reference in enable_signalling until it signalled
    the fence. Pass the original dma_fence_cb along, and call __remove_wait
    in the dma_fence_callback handler, so that no cleanup needs to be
    performed.
v7: [ Maarten Lankhorst ] Set cb->func and only enable sw signaling if
    fence wasn't signaled yet, for example for hardware fences that may
    choose to signal blindly.
v8: [ Maarten Lankhorst ] Tons of tiny fixes, moved __dma_fence_init to
    header and fixed include mess. dma-fence.h now includes dma-buf.h
    All members are now initialized, so kmalloc can be used for
    allocating a dma-fence. More documentation added.
v9: Change compiler bitfields to flags, change return type of
    enable_signaling to bool. Rework dma_fence_wait. Added
    dma_fence_is_signaled and dma_fence_wait_timeout.
    s/dma// and change exports to non GPL. Added fence_is_signaled and
    fence_enable_sw_signaling calls, add ability to override default
    wait operation.
v10: remove event_queue, use a custom list, export try_to_wake_up from
    scheduler. Remove fence lock and use a global spinlock instead,
    this should hopefully remove all the locking headaches I was having
    on trying to implement this. enable_signaling is called with this
    lock held.
v11:
    Use atomic ops for flags, lifting the need for some spin_lock_irqsaves.
    However I kept the guarantee that after fence_signal returns, it is
    guaranteed that enable_signaling has either been called to completion,
    or will not be called any more.

    Add contexts and seqno to base fence implementation. This allows you
    to wait for less fences, by testing for seqno + signaled, and then only
    wait on the later fence.

    Add FENCE_TRACE, FENCE_WARN, and FENCE_ERR. This makes debugging easier.
    An CONFIG_DEBUG_FENCE will be added to turn off the FENCE_TRACE
    spam, and another runtime option can turn it off at runtime.
v12:
    Add CONFIG_FENCE_TRACE. Add missing documentation for the fence->context
    and fence->seqno members.
v13:
    Fixup CONFIG_FENCE_TRACE kconfig description.
    Move fence_context_alloc to fence.
    Simplify fence_later.
    Kill priv member to fence_cb.
v14:
    Remove priv argument from fence_add_callback, oops!
v15:
    Remove priv from documentation.
    Explicitly include linux/atomic.h.
v16:
    Add trace events.
    Import changes required by android syncpoints.
v17:
    Use wake_up_state instead of try_to_wake_up. (Colin Cross)
    Fix up commit description for seqno_fence. (Rob Clark)
v18:
    Rename release_fence to fence_release.
    Move to drivers/dma-buf/.
    Rename __fence_is_signaled and __fence_signal to *_locked.
    Rename __fence_init to fence_init.
    Make fence_default_wait return a signed long, and fix wait ops too.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com> #use smp_mb__before_atomic()
Acked-by: Sumit Semwal <sumit.semwal@linaro.org>
Acked-by: Daniel Vetter <daniel@ffwll.ch>
Reviewed-by: Rob Clark <robdclark@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/DocBook/device-drivers.tmpl |   2 +
 MAINTAINERS                               |   2 +-
 drivers/base/Kconfig                      |   9 +
 drivers/dma-buf/Makefile                  |   2 +-
 drivers/dma-buf/fence.c                   | 431 ++++++++++++++++++++++++++++++
 include/linux/fence.h                     | 343 ++++++++++++++++++++++++
 include/trace/events/fence.h              | 128 +++++++++
 7 files changed, 915 insertions(+), 2 deletions(-)
 create mode 100644 drivers/dma-buf/fence.c
 create mode 100644 include/linux/fence.h
 create mode 100644 include/trace/events/fence.h

(limited to 'include/linux')

diff --git a/Documentation/DocBook/device-drivers.tmpl b/Documentation/DocBook/device-drivers.tmpl
index ac61ebd92875..e634657efb52 100644
--- a/Documentation/DocBook/device-drivers.tmpl
+++ b/Documentation/DocBook/device-drivers.tmpl
@@ -129,6 +129,8 @@ X!Edrivers/base/interface.c
      </sect1>
      <sect1><title>Device Drivers DMA Management</title>
 !Edrivers/dma-buf/dma-buf.c
+!Edrivers/dma-buf/fence.c
+!Iinclude/linux/fence.h
 !Iinclude/linux/reservation.h
 !Edrivers/base/dma-coherent.c
 !Edrivers/base/dma-mapping.c
diff --git a/MAINTAINERS b/MAINTAINERS
index 2eefee768d46..65c8f534b22f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2901,7 +2901,7 @@ L:	linux-media@vger.kernel.org
 L:	dri-devel@lists.freedesktop.org
 L:	linaro-mm-sig@lists.linaro.org
 F:	drivers/dma-buf/
-F:	include/linux/dma-buf* include/linux/reservation.h
+F:	include/linux/dma-buf* include/linux/reservation.h include/linux/fence.h
 F:	Documentation/dma-buf-sharing.txt
 T:	git git://git.linaro.org/people/sumitsemwal/linux-dma-buf.git
 
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 23b8726962af..00e13ce5cbbd 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -208,6 +208,15 @@ config DMA_SHARED_BUFFER
 	  APIs extension; the file's descriptor can then be passed on to other
 	  driver.
 
+config FENCE_TRACE
+	bool "Enable verbose FENCE_TRACE messages"
+	depends on DMA_SHARED_BUFFER
+	help
+	  Enable the FENCE_TRACE printks. This will add extra
+	  spam to the console log, but will make it easier to diagnose
+	  lockup related problems for dma-buffers shared across multiple
+	  devices.
+
 config DMA_CMA
 	bool "DMA Contiguous Memory Allocator"
 	depends on HAVE_DMA_CONTIGUOUS && CMA
diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile
index 4a4f4c9bacd0..d7825bfe630e 100644
--- a/drivers/dma-buf/Makefile
+++ b/drivers/dma-buf/Makefile
@@ -1 +1 @@
-obj-y := dma-buf.o reservation.o
+obj-y := dma-buf.o fence.o reservation.o
diff --git a/drivers/dma-buf/fence.c b/drivers/dma-buf/fence.c
new file mode 100644
index 000000000000..948bf00d955e
--- /dev/null
+++ b/drivers/dma-buf/fence.c
@@ -0,0 +1,431 @@
+/*
+ * Fence mechanism for dma-buf and to allow for asynchronous dma access
+ *
+ * Copyright (C) 2012 Canonical Ltd
+ * Copyright (C) 2012 Texas Instruments
+ *
+ * Authors:
+ * Rob Clark <robdclark@gmail.com>
+ * Maarten Lankhorst <maarten.lankhorst@canonical.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <linux/atomic.h>
+#include <linux/fence.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/fence.h>
+
+EXPORT_TRACEPOINT_SYMBOL(fence_annotate_wait_on);
+EXPORT_TRACEPOINT_SYMBOL(fence_emit);
+
+/**
+ * fence context counter: each execution context should have its own
+ * fence context, this allows checking if fences belong to the same
+ * context or not. One device can have multiple separate contexts,
+ * and they're used if some engine can run independently of another.
+ */
+static atomic_t fence_context_counter = ATOMIC_INIT(0);
+
+/**
+ * fence_context_alloc - allocate an array of fence contexts
+ * @num:	[in]	amount of contexts to allocate
+ *
+ * This function will return the first index of the number of fences allocated.
+ * The fence context is used for setting fence->context to a unique number.
+ */
+unsigned fence_context_alloc(unsigned num)
+{
+	BUG_ON(!num);
+	return atomic_add_return(num, &fence_context_counter) - num;
+}
+EXPORT_SYMBOL(fence_context_alloc);
+
+/**
+ * fence_signal_locked - signal completion of a fence
+ * @fence: the fence to signal
+ *
+ * Signal completion for software callbacks on a fence, this will unblock
+ * fence_wait() calls and run all the callbacks added with
+ * fence_add_callback(). Can be called multiple times, but since a fence
+ * can only go from unsignaled to signaled state, it will only be effective
+ * the first time.
+ *
+ * Unlike fence_signal, this function must be called with fence->lock held.
+ */
+int fence_signal_locked(struct fence *fence)
+{
+	struct fence_cb *cur, *tmp;
+	int ret = 0;
+
+	if (WARN_ON(!fence))
+		return -EINVAL;
+
+	if (!ktime_to_ns(fence->timestamp)) {
+		fence->timestamp = ktime_get();
+		smp_mb__before_atomic();
+	}
+
+	if (test_and_set_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
+		ret = -EINVAL;
+
+		/*
+		 * we might have raced with the unlocked fence_signal,
+		 * still run through all callbacks
+		 */
+	} else
+		trace_fence_signaled(fence);
+
+	list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) {
+		list_del_init(&cur->node);
+		cur->func(fence, cur);
+	}
+	return ret;
+}
+EXPORT_SYMBOL(fence_signal_locked);
+
+/**
+ * fence_signal - signal completion of a fence
+ * @fence: the fence to signal
+ *
+ * Signal completion for software callbacks on a fence, this will unblock
+ * fence_wait() calls and run all the callbacks added with
+ * fence_add_callback(). Can be called multiple times, but since a fence
+ * can only go from unsignaled to signaled state, it will only be effective
+ * the first time.
+ */
+int fence_signal(struct fence *fence)
+{
+	unsigned long flags;
+
+	if (!fence)
+		return -EINVAL;
+
+	if (!ktime_to_ns(fence->timestamp)) {
+		fence->timestamp = ktime_get();
+		smp_mb__before_atomic();
+	}
+
+	if (test_and_set_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+		return -EINVAL;
+
+	trace_fence_signaled(fence);
+
+	if (test_bit(FENCE_FLAG_ENABLE_SIGNAL_BIT, &fence->flags)) {
+		struct fence_cb *cur, *tmp;
+
+		spin_lock_irqsave(fence->lock, flags);
+		list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) {
+			list_del_init(&cur->node);
+			cur->func(fence, cur);
+		}
+		spin_unlock_irqrestore(fence->lock, flags);
+	}
+	return 0;
+}
+EXPORT_SYMBOL(fence_signal);
+
+/**
+ * fence_wait_timeout - sleep until the fence gets signaled
+ * or until timeout elapses
+ * @fence:	[in]	the fence to wait on
+ * @intr:	[in]	if true, do an interruptible wait
+ * @timeout:	[in]	timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
+ *
+ * Returns -ERESTARTSYS if interrupted, 0 if the wait timed out, or the
+ * remaining timeout in jiffies on success. Other error values may be
+ * returned on custom implementations.
+ *
+ * Performs a synchronous wait on this fence. It is assumed the caller
+ * directly or indirectly (buf-mgr between reservation and committing)
+ * holds a reference to the fence, otherwise the fence might be
+ * freed before return, resulting in undefined behavior.
+ */
+signed long
+fence_wait_timeout(struct fence *fence, bool intr, signed long timeout)
+{
+	signed long ret;
+
+	if (WARN_ON(timeout < 0))
+		return -EINVAL;
+
+	trace_fence_wait_start(fence);
+	ret = fence->ops->wait(fence, intr, timeout);
+	trace_fence_wait_end(fence);
+	return ret;
+}
+EXPORT_SYMBOL(fence_wait_timeout);
+
+void fence_release(struct kref *kref)
+{
+	struct fence *fence =
+			container_of(kref, struct fence, refcount);
+
+	trace_fence_destroy(fence);
+
+	BUG_ON(!list_empty(&fence->cb_list));
+
+	if (fence->ops->release)
+		fence->ops->release(fence);
+	else
+		fence_free(fence);
+}
+EXPORT_SYMBOL(fence_release);
+
+void fence_free(struct fence *fence)
+{
+	kfree(fence);
+}
+EXPORT_SYMBOL(fence_free);
+
+/**
+ * fence_enable_sw_signaling - enable signaling on fence
+ * @fence:	[in]	the fence to enable
+ *
+ * this will request for sw signaling to be enabled, to make the fence
+ * complete as soon as possible
+ */
+void fence_enable_sw_signaling(struct fence *fence)
+{
+	unsigned long flags;
+
+	if (!test_and_set_bit(FENCE_FLAG_ENABLE_SIGNAL_BIT, &fence->flags) &&
+	    !test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
+		trace_fence_enable_signal(fence);
+
+		spin_lock_irqsave(fence->lock, flags);
+
+		if (!fence->ops->enable_signaling(fence))
+			fence_signal_locked(fence);
+
+		spin_unlock_irqrestore(fence->lock, flags);
+	}
+}
+EXPORT_SYMBOL(fence_enable_sw_signaling);
+
+/**
+ * fence_add_callback - add a callback to be called when the fence
+ * is signaled
+ * @fence:	[in]	the fence to wait on
+ * @cb:		[in]	the callback to register
+ * @func:	[in]	the function to call
+ *
+ * cb will be initialized by fence_add_callback, no initialization
+ * by the caller is required. Any number of callbacks can be registered
+ * to a fence, but a callback can only be registered to one fence at a time.
+ *
+ * Note that the callback can be called from an atomic context.  If
+ * fence is already signaled, this function will return -ENOENT (and
+ * *not* call the callback)
+ *
+ * Add a software callback to the fence. Same restrictions apply to
+ * refcount as it does to fence_wait, however the caller doesn't need to
+ * keep a refcount to fence afterwards: when software access is enabled,
+ * the creator of the fence is required to keep the fence alive until
+ * after it signals with fence_signal. The callback itself can be called
+ * from irq context.
+ *
+ */
+int fence_add_callback(struct fence *fence, struct fence_cb *cb,
+		       fence_func_t func)
+{
+	unsigned long flags;
+	int ret = 0;
+	bool was_set;
+
+	if (WARN_ON(!fence || !func))
+		return -EINVAL;
+
+	if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
+		INIT_LIST_HEAD(&cb->node);
+		return -ENOENT;
+	}
+
+	spin_lock_irqsave(fence->lock, flags);
+
+	was_set = test_and_set_bit(FENCE_FLAG_ENABLE_SIGNAL_BIT, &fence->flags);
+
+	if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+		ret = -ENOENT;
+	else if (!was_set) {
+		trace_fence_enable_signal(fence);
+
+		if (!fence->ops->enable_signaling(fence)) {
+			fence_signal_locked(fence);
+			ret = -ENOENT;
+		}
+	}
+
+	if (!ret) {
+		cb->func = func;
+		list_add_tail(&cb->node, &fence->cb_list);
+	} else
+		INIT_LIST_HEAD(&cb->node);
+	spin_unlock_irqrestore(fence->lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(fence_add_callback);
+
+/**
+ * fence_remove_callback - remove a callback from the signaling list
+ * @fence:	[in]	the fence to wait on
+ * @cb:		[in]	the callback to remove
+ *
+ * Remove a previously queued callback from the fence. This function returns
+ * true if the callback is succesfully removed, or false if the fence has
+ * already been signaled.
+ *
+ * *WARNING*:
+ * Cancelling a callback should only be done if you really know what you're
+ * doing, since deadlocks and race conditions could occur all too easily. For
+ * this reason, it should only ever be done on hardware lockup recovery,
+ * with a reference held to the fence.
+ */
+bool
+fence_remove_callback(struct fence *fence, struct fence_cb *cb)
+{
+	unsigned long flags;
+	bool ret;
+
+	spin_lock_irqsave(fence->lock, flags);
+
+	ret = !list_empty(&cb->node);
+	if (ret)
+		list_del_init(&cb->node);
+
+	spin_unlock_irqrestore(fence->lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(fence_remove_callback);
+
+struct default_wait_cb {
+	struct fence_cb base;
+	struct task_struct *task;
+};
+
+static void
+fence_default_wait_cb(struct fence *fence, struct fence_cb *cb)
+{
+	struct default_wait_cb *wait =
+		container_of(cb, struct default_wait_cb, base);
+
+	wake_up_state(wait->task, TASK_NORMAL);
+}
+
+/**
+ * fence_default_wait - default sleep until the fence gets signaled
+ * or until timeout elapses
+ * @fence:	[in]	the fence to wait on
+ * @intr:	[in]	if true, do an interruptible wait
+ * @timeout:	[in]	timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
+ *
+ * Returns -ERESTARTSYS if interrupted, 0 if the wait timed out, or the
+ * remaining timeout in jiffies on success.
+ */
+signed long
+fence_default_wait(struct fence *fence, bool intr, signed long timeout)
+{
+	struct default_wait_cb cb;
+	unsigned long flags;
+	signed long ret = timeout;
+	bool was_set;
+
+	if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+		return timeout;
+
+	spin_lock_irqsave(fence->lock, flags);
+
+	if (intr && signal_pending(current)) {
+		ret = -ERESTARTSYS;
+		goto out;
+	}
+
+	was_set = test_and_set_bit(FENCE_FLAG_ENABLE_SIGNAL_BIT, &fence->flags);
+
+	if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+		goto out;
+
+	if (!was_set) {
+		trace_fence_enable_signal(fence);
+
+		if (!fence->ops->enable_signaling(fence)) {
+			fence_signal_locked(fence);
+			goto out;
+		}
+	}
+
+	cb.base.func = fence_default_wait_cb;
+	cb.task = current;
+	list_add(&cb.base.node, &fence->cb_list);
+
+	while (!test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags) && ret > 0) {
+		if (intr)
+			__set_current_state(TASK_INTERRUPTIBLE);
+		else
+			__set_current_state(TASK_UNINTERRUPTIBLE);
+		spin_unlock_irqrestore(fence->lock, flags);
+
+		ret = schedule_timeout(ret);
+
+		spin_lock_irqsave(fence->lock, flags);
+		if (ret > 0 && intr && signal_pending(current))
+			ret = -ERESTARTSYS;
+	}
+
+	if (!list_empty(&cb.base.node))
+		list_del(&cb.base.node);
+	__set_current_state(TASK_RUNNING);
+
+out:
+	spin_unlock_irqrestore(fence->lock, flags);
+	return ret;
+}
+EXPORT_SYMBOL(fence_default_wait);
+
+/**
+ * fence_init - Initialize a custom fence.
+ * @fence:	[in]	the fence to initialize
+ * @ops:	[in]	the fence_ops for operations on this fence
+ * @lock:	[in]	the irqsafe spinlock to use for locking this fence
+ * @context:	[in]	the execution context this fence is run on
+ * @seqno:	[in]	a linear increasing sequence number for this context
+ *
+ * Initializes an allocated fence, the caller doesn't have to keep its
+ * refcount after committing with this fence, but it will need to hold a
+ * refcount again if fence_ops.enable_signaling gets called. This can
+ * be used for other implementing other types of fence.
+ *
+ * context and seqno are used for easy comparison between fences, allowing
+ * to check which fence is later by simply using fence_later.
+ */
+void
+fence_init(struct fence *fence, const struct fence_ops *ops,
+	     spinlock_t *lock, unsigned context, unsigned seqno)
+{
+	BUG_ON(!lock);
+	BUG_ON(!ops || !ops->wait || !ops->enable_signaling ||
+	       !ops->get_driver_name || !ops->get_timeline_name);
+
+	kref_init(&fence->refcount);
+	fence->ops = ops;
+	INIT_LIST_HEAD(&fence->cb_list);
+	fence->lock = lock;
+	fence->context = context;
+	fence->seqno = seqno;
+	fence->flags = 0UL;
+
+	trace_fence_init(fence);
+}
+EXPORT_SYMBOL(fence_init);
diff --git a/include/linux/fence.h b/include/linux/fence.h
new file mode 100644
index 000000000000..b935cc650123
--- /dev/null
+++ b/include/linux/fence.h
@@ -0,0 +1,343 @@
+/*
+ * Fence mechanism for dma-buf to allow for asynchronous dma access
+ *
+ * Copyright (C) 2012 Canonical Ltd
+ * Copyright (C) 2012 Texas Instruments
+ *
+ * Authors:
+ * Rob Clark <robdclark@gmail.com>
+ * Maarten Lankhorst <maarten.lankhorst@canonical.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __LINUX_FENCE_H
+#define __LINUX_FENCE_H
+
+#include <linux/err.h>
+#include <linux/wait.h>
+#include <linux/list.h>
+#include <linux/bitops.h>
+#include <linux/kref.h>
+#include <linux/sched.h>
+#include <linux/printk.h>
+
+struct fence;
+struct fence_ops;
+struct fence_cb;
+
+/**
+ * struct fence - software synchronization primitive
+ * @refcount: refcount for this fence
+ * @ops: fence_ops associated with this fence
+ * @cb_list: list of all callbacks to call
+ * @lock: spin_lock_irqsave used for locking
+ * @context: execution context this fence belongs to, returned by
+ *           fence_context_alloc()
+ * @seqno: the sequence number of this fence inside the execution context,
+ * can be compared to decide which fence would be signaled later.
+ * @flags: A mask of FENCE_FLAG_* defined below
+ * @timestamp: Timestamp when the fence was signaled.
+ * @status: Optional, only valid if < 0, must be set before calling
+ * fence_signal, indicates that the fence has completed with an error.
+ *
+ * the flags member must be manipulated and read using the appropriate
+ * atomic ops (bit_*), so taking the spinlock will not be needed most
+ * of the time.
+ *
+ * FENCE_FLAG_SIGNALED_BIT - fence is already signaled
+ * FENCE_FLAG_ENABLE_SIGNAL_BIT - enable_signaling might have been called*
+ * FENCE_FLAG_USER_BITS - start of the unused bits, can be used by the
+ * implementer of the fence for its own purposes. Can be used in different
+ * ways by different fence implementers, so do not rely on this.
+ *
+ * *) Since atomic bitops are used, this is not guaranteed to be the case.
+ * Particularly, if the bit was set, but fence_signal was called right
+ * before this bit was set, it would have been able to set the
+ * FENCE_FLAG_SIGNALED_BIT, before enable_signaling was called.
+ * Adding a check for FENCE_FLAG_SIGNALED_BIT after setting
+ * FENCE_FLAG_ENABLE_SIGNAL_BIT closes this race, and makes sure that
+ * after fence_signal was called, any enable_signaling call will have either
+ * been completed, or never called at all.
+ */
+struct fence {
+	struct kref refcount;
+	const struct fence_ops *ops;
+	struct list_head cb_list;
+	spinlock_t *lock;
+	unsigned context, seqno;
+	unsigned long flags;
+	ktime_t timestamp;
+	int status;
+};
+
+enum fence_flag_bits {
+	FENCE_FLAG_SIGNALED_BIT,
+	FENCE_FLAG_ENABLE_SIGNAL_BIT,
+	FENCE_FLAG_USER_BITS, /* must always be last member */
+};
+
+typedef void (*fence_func_t)(struct fence *fence, struct fence_cb *cb);
+
+/**
+ * struct fence_cb - callback for fence_add_callback
+ * @node: used by fence_add_callback to append this struct to fence::cb_list
+ * @func: fence_func_t to call
+ *
+ * This struct will be initialized by fence_add_callback, additional
+ * data can be passed along by embedding fence_cb in another struct.
+ */
+struct fence_cb {
+	struct list_head node;
+	fence_func_t func;
+};
+
+/**
+ * struct fence_ops - operations implemented for fence
+ * @get_driver_name: returns the driver name.
+ * @get_timeline_name: return the name of the context this fence belongs to.
+ * @enable_signaling: enable software signaling of fence.
+ * @signaled: [optional] peek whether the fence is signaled, can be null.
+ * @wait: custom wait implementation, or fence_default_wait.
+ * @release: [optional] called on destruction of fence, can be null
+ * @fill_driver_data: [optional] callback to fill in free-form debug info
+ * Returns amount of bytes filled, or -errno.
+ * @fence_value_str: [optional] fills in the value of the fence as a string
+ * @timeline_value_str: [optional] fills in the current value of the timeline
+ * as a string
+ *
+ * Notes on enable_signaling:
+ * For fence implementations that have the capability for hw->hw
+ * signaling, they can implement this op to enable the necessary
+ * irqs, or insert commands into cmdstream, etc.  This is called
+ * in the first wait() or add_callback() path to let the fence
+ * implementation know that there is another driver waiting on
+ * the signal (ie. hw->sw case).
+ *
+ * This function can be called called from atomic context, but not
+ * from irq context, so normal spinlocks can be used.
+ *
+ * A return value of false indicates the fence already passed,
+ * or some failure occured that made it impossible to enable
+ * signaling. True indicates succesful enabling.
+ *
+ * fence->status may be set in enable_signaling, but only when false is
+ * returned.
+ *
+ * Calling fence_signal before enable_signaling is called allows
+ * for a tiny race window in which enable_signaling is called during,
+ * before, or after fence_signal. To fight this, it is recommended
+ * that before enable_signaling returns true an extra reference is
+ * taken on the fence, to be released when the fence is signaled.
+ * This will mean fence_signal will still be called twice, but
+ * the second time will be a noop since it was already signaled.
+ *
+ * Notes on signaled:
+ * May set fence->status if returning true.
+ *
+ * Notes on wait:
+ * Must not be NULL, set to fence_default_wait for default implementation.
+ * the fence_default_wait implementation should work for any fence, as long
+ * as enable_signaling works correctly.
+ *
+ * Must return -ERESTARTSYS if the wait is intr = true and the wait was
+ * interrupted, and remaining jiffies if fence has signaled, or 0 if wait
+ * timed out. Can also return other error values on custom implementations,
+ * which should be treated as if the fence is signaled. For example a hardware
+ * lockup could be reported like that.
+ *
+ * Notes on release:
+ * Can be NULL, this function allows additional commands to run on
+ * destruction of the fence. Can be called from irq context.
+ * If pointer is set to NULL, kfree will get called instead.
+ */
+
+struct fence_ops {
+	const char * (*get_driver_name)(struct fence *fence);
+	const char * (*get_timeline_name)(struct fence *fence);
+	bool (*enable_signaling)(struct fence *fence);
+	bool (*signaled)(struct fence *fence);
+	signed long (*wait)(struct fence *fence, bool intr, signed long timeout);
+	void (*release)(struct fence *fence);
+
+	int (*fill_driver_data)(struct fence *fence, void *data, int size);
+	void (*fence_value_str)(struct fence *fence, char *str, int size);
+	void (*timeline_value_str)(struct fence *fence, char *str, int size);
+};
+
+void fence_init(struct fence *fence, const struct fence_ops *ops,
+		spinlock_t *lock, unsigned context, unsigned seqno);
+
+void fence_release(struct kref *kref);
+void fence_free(struct fence *fence);
+
+/**
+ * fence_get - increases refcount of the fence
+ * @fence:	[in]	fence to increase refcount of
+ *
+ * Returns the same fence, with refcount increased by 1.
+ */
+static inline struct fence *fence_get(struct fence *fence)
+{
+	if (fence)
+		kref_get(&fence->refcount);
+	return fence;
+}
+
+/**
+ * fence_put - decreases refcount of the fence
+ * @fence:	[in]	fence to reduce refcount of
+ */
+static inline void fence_put(struct fence *fence)
+{
+	if (fence)
+		kref_put(&fence->refcount, fence_release);
+}
+
+int fence_signal(struct fence *fence);
+int fence_signal_locked(struct fence *fence);
+signed long fence_default_wait(struct fence *fence, bool intr, signed long timeout);
+int fence_add_callback(struct fence *fence, struct fence_cb *cb,
+		       fence_func_t func);
+bool fence_remove_callback(struct fence *fence, struct fence_cb *cb);
+void fence_enable_sw_signaling(struct fence *fence);
+
+/**
+ * fence_is_signaled_locked - Return an indication if the fence is signaled yet.
+ * @fence:	[in]	the fence to check
+ *
+ * Returns true if the fence was already signaled, false if not. Since this
+ * function doesn't enable signaling, it is not guaranteed to ever return
+ * true if fence_add_callback, fence_wait or fence_enable_sw_signaling
+ * haven't been called before.
+ *
+ * This function requires fence->lock to be held.
+ */
+static inline bool
+fence_is_signaled_locked(struct fence *fence)
+{
+	if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+		return true;
+
+	if (fence->ops->signaled && fence->ops->signaled(fence)) {
+		fence_signal_locked(fence);
+		return true;
+	}
+
+	return false;
+}
+
+/**
+ * fence_is_signaled - Return an indication if the fence is signaled yet.
+ * @fence:	[in]	the fence to check
+ *
+ * Returns true if the fence was already signaled, false if not. Since this
+ * function doesn't enable signaling, it is not guaranteed to ever return
+ * true if fence_add_callback, fence_wait or fence_enable_sw_signaling
+ * haven't been called before.
+ *
+ * It's recommended for seqno fences to call fence_signal when the
+ * operation is complete, it makes it possible to prevent issues from
+ * wraparound between time of issue and time of use by checking the return
+ * value of this function before calling hardware-specific wait instructions.
+ */
+static inline bool
+fence_is_signaled(struct fence *fence)
+{
+	if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+		return true;
+
+	if (fence->ops->signaled && fence->ops->signaled(fence)) {
+		fence_signal(fence);
+		return true;
+	}
+
+	return false;
+}
+
+/**
+ * fence_later - return the chronologically later fence
+ * @f1:	[in]	the first fence from the same context
+ * @f2:	[in]	the second fence from the same context
+ *
+ * Returns NULL if both fences are signaled, otherwise the fence that would be
+ * signaled last. Both fences must be from the same context, since a seqno is
+ * not re-used across contexts.
+ */
+static inline struct fence *fence_later(struct fence *f1, struct fence *f2)
+{
+	if (WARN_ON(f1->context != f2->context))
+		return NULL;
+
+	/*
+	 * can't check just FENCE_FLAG_SIGNALED_BIT here, it may never have been
+	 * set if enable_signaling wasn't called, and enabling that here is
+	 * overkill.
+	 */
+	if (f2->seqno - f1->seqno <= INT_MAX)
+		return fence_is_signaled(f2) ? NULL : f2;
+	else
+		return fence_is_signaled(f1) ? NULL : f1;
+}
+
+signed long fence_wait_timeout(struct fence *, bool intr, signed long timeout);
+
+
+/**
+ * fence_wait - sleep until the fence gets signaled
+ * @fence:	[in]	the fence to wait on
+ * @intr:	[in]	if true, do an interruptible wait
+ *
+ * This function will return -ERESTARTSYS if interrupted by a signal,
+ * or 0 if the fence was signaled. Other error values may be
+ * returned on custom implementations.
+ *
+ * Performs a synchronous wait on this fence. It is assumed the caller
+ * directly or indirectly holds a reference to the fence, otherwise the
+ * fence might be freed before return, resulting in undefined behavior.
+ */
+static inline signed long fence_wait(struct fence *fence, bool intr)
+{
+	signed long ret;
+
+	/* Since fence_wait_timeout cannot timeout with
+	 * MAX_SCHEDULE_TIMEOUT, only valid return values are
+	 * -ERESTARTSYS and MAX_SCHEDULE_TIMEOUT.
+	 */
+	ret = fence_wait_timeout(fence, intr, MAX_SCHEDULE_TIMEOUT);
+
+	return ret < 0 ? ret : 0;
+}
+
+unsigned fence_context_alloc(unsigned num);
+
+#define FENCE_TRACE(f, fmt, args...) \
+	do {								\
+		struct fence *__ff = (f);				\
+		if (config_enabled(CONFIG_FENCE_TRACE))			\
+			pr_info("f %u#%u: " fmt,			\
+				__ff->context, __ff->seqno, ##args);	\
+	} while (0)
+
+#define FENCE_WARN(f, fmt, args...) \
+	do {								\
+		struct fence *__ff = (f);				\
+		pr_warn("f %u#%u: " fmt, __ff->context, __ff->seqno,	\
+			 ##args);					\
+	} while (0)
+
+#define FENCE_ERR(f, fmt, args...) \
+	do {								\
+		struct fence *__ff = (f);				\
+		pr_err("f %u#%u: " fmt, __ff->context, __ff->seqno,	\
+			##args);					\
+	} while (0)
+
+#endif /* __LINUX_FENCE_H */
diff --git a/include/trace/events/fence.h b/include/trace/events/fence.h
new file mode 100644
index 000000000000..98feb1b82896
--- /dev/null
+++ b/include/trace/events/fence.h
@@ -0,0 +1,128 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM fence
+
+#if !defined(_TRACE_FENCE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_FENCE_H
+
+#include <linux/tracepoint.h>
+
+struct fence;
+
+TRACE_EVENT(fence_annotate_wait_on,
+
+	/* fence: the fence waiting on f1, f1: the fence to be waited on. */
+	TP_PROTO(struct fence *fence, struct fence *f1),
+
+	TP_ARGS(fence, f1),
+
+	TP_STRUCT__entry(
+		__string(driver, fence->ops->get_driver_name(fence))
+		__string(timeline, fence->ops->get_driver_name(fence))
+		__field(unsigned int, context)
+		__field(unsigned int, seqno)
+
+		__string(waiting_driver, f1->ops->get_driver_name(f1))
+		__string(waiting_timeline, f1->ops->get_timeline_name(f1))
+		__field(unsigned int, waiting_context)
+		__field(unsigned int, waiting_seqno)
+	),
+
+	TP_fast_assign(
+		__assign_str(driver, fence->ops->get_driver_name(fence))
+		__assign_str(timeline, fence->ops->get_timeline_name(fence))
+		__entry->context = fence->context;
+		__entry->seqno = fence->seqno;
+
+		__assign_str(waiting_driver, f1->ops->get_driver_name(f1))
+		__assign_str(waiting_timeline, f1->ops->get_timeline_name(f1))
+		__entry->waiting_context = f1->context;
+		__entry->waiting_seqno = f1->seqno;
+
+	),
+
+	TP_printk("driver=%s timeline=%s context=%u seqno=%u "	\
+		  "waits on driver=%s timeline=%s context=%u seqno=%u",
+		  __get_str(driver), __get_str(timeline), __entry->context,
+		  __entry->seqno,
+		  __get_str(waiting_driver), __get_str(waiting_timeline),
+		  __entry->waiting_context, __entry->waiting_seqno)
+);
+
+DECLARE_EVENT_CLASS(fence,
+
+	TP_PROTO(struct fence *fence),
+
+	TP_ARGS(fence),
+
+	TP_STRUCT__entry(
+		__string(driver, fence->ops->get_driver_name(fence))
+		__string(timeline, fence->ops->get_timeline_name(fence))
+		__field(unsigned int, context)
+		__field(unsigned int, seqno)
+	),
+
+	TP_fast_assign(
+		__assign_str(driver, fence->ops->get_driver_name(fence))
+		__assign_str(timeline, fence->ops->get_timeline_name(fence))
+		__entry->context = fence->context;
+		__entry->seqno = fence->seqno;
+	),
+
+	TP_printk("driver=%s timeline=%s context=%u seqno=%u",
+		  __get_str(driver), __get_str(timeline), __entry->context,
+		  __entry->seqno)
+);
+
+DEFINE_EVENT(fence, fence_emit,
+
+	TP_PROTO(struct fence *fence),
+
+	TP_ARGS(fence)
+);
+
+DEFINE_EVENT(fence, fence_init,
+
+	TP_PROTO(struct fence *fence),
+
+	TP_ARGS(fence)
+);
+
+DEFINE_EVENT(fence, fence_destroy,
+
+	TP_PROTO(struct fence *fence),
+
+	TP_ARGS(fence)
+);
+
+DEFINE_EVENT(fence, fence_enable_signal,
+
+	TP_PROTO(struct fence *fence),
+
+	TP_ARGS(fence)
+);
+
+DEFINE_EVENT(fence, fence_signaled,
+
+	TP_PROTO(struct fence *fence),
+
+	TP_ARGS(fence)
+);
+
+DEFINE_EVENT(fence, fence_wait_start,
+
+	TP_PROTO(struct fence *fence),
+
+	TP_ARGS(fence)
+);
+
+DEFINE_EVENT(fence, fence_wait_end,
+
+	TP_PROTO(struct fence *fence),
+
+	TP_ARGS(fence)
+);
+
+#endif /*  _TRACE_FENCE_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
-- 
cgit v1.2.3-59-g8ed1b


From 606b23ad609c71cfb37eeb972ea4c901034edd3c Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Date: Tue, 1 Jul 2014 12:57:20 +0200
Subject: seqno-fence: Hardware dma-buf implementation of fencing (v6)

This type of fence can be used with hardware synchronization for simple
hardware that can block execution until the condition
(dma_buf[offset] - value) >= 0 has been met when WAIT_GEQUAL is used,
or (dma_buf[offset] != 0) has been met when WAIT_NONZERO is set.

A software fallback still has to be provided in case the fence is used
with a device that doesn't support this mechanism. It is useful to expose
this for graphics cards that have an op to support this.

Some cards like i915 can export those, but don't have an option to wait,
so they need the software fallback.

I extended the original patch by Rob Clark.

v1: Original
v2: Renamed from bikeshed to seqno, moved into dma-fence.c since
    not much was left of the file. Lots of documentation added.
v3: Use fence_ops instead of custom callbacks. Moved to own file
    to avoid circular dependency between dma-buf.h and fence.h
v4: Add spinlock pointer to seqno_fence_init
v5: Add condition member to allow wait for != 0.
    Fix small style errors pointed out by checkpatch.
v6: Move to a separate file. Fix up api changes in fences.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Acked-by: Sumit Semwal <sumit.semwal@linaro.org>
Acked-by: Daniel Vetter <daniel@ffwll.ch>
Reviewed-by: Rob Clark <robdclark@gmail.com> #v4
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/DocBook/device-drivers.tmpl |   2 +
 MAINTAINERS                               |   2 +-
 drivers/dma-buf/Makefile                  |   2 +-
 drivers/dma-buf/seqno-fence.c             |  73 +++++++++++++++++++
 include/linux/seqno-fence.h               | 116 ++++++++++++++++++++++++++++++
 5 files changed, 193 insertions(+), 2 deletions(-)
 create mode 100644 drivers/dma-buf/seqno-fence.c
 create mode 100644 include/linux/seqno-fence.h

(limited to 'include/linux')

diff --git a/Documentation/DocBook/device-drivers.tmpl b/Documentation/DocBook/device-drivers.tmpl
index e634657efb52..ed0ef00cd7bc 100644
--- a/Documentation/DocBook/device-drivers.tmpl
+++ b/Documentation/DocBook/device-drivers.tmpl
@@ -130,7 +130,9 @@ X!Edrivers/base/interface.c
      <sect1><title>Device Drivers DMA Management</title>
 !Edrivers/dma-buf/dma-buf.c
 !Edrivers/dma-buf/fence.c
+!Edrivers/dma-buf/seqno-fence.c
 !Iinclude/linux/fence.h
+!Iinclude/linux/seqno-fence.h
 !Iinclude/linux/reservation.h
 !Edrivers/base/dma-coherent.c
 !Edrivers/base/dma-mapping.c
diff --git a/MAINTAINERS b/MAINTAINERS
index 65c8f534b22f..7c97777dd1b3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2901,7 +2901,7 @@ L:	linux-media@vger.kernel.org
 L:	dri-devel@lists.freedesktop.org
 L:	linaro-mm-sig@lists.linaro.org
 F:	drivers/dma-buf/
-F:	include/linux/dma-buf* include/linux/reservation.h include/linux/fence.h
+F:	include/linux/dma-buf* include/linux/reservation.h include/linux/*fence.h
 F:	Documentation/dma-buf-sharing.txt
 T:	git git://git.linaro.org/people/sumitsemwal/linux-dma-buf.git
 
diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile
index d7825bfe630e..57a675f90cd0 100644
--- a/drivers/dma-buf/Makefile
+++ b/drivers/dma-buf/Makefile
@@ -1 +1 @@
-obj-y := dma-buf.o fence.o reservation.o
+obj-y := dma-buf.o fence.o reservation.o seqno-fence.o
diff --git a/drivers/dma-buf/seqno-fence.c b/drivers/dma-buf/seqno-fence.c
new file mode 100644
index 000000000000..7d12a39a4b57
--- /dev/null
+++ b/drivers/dma-buf/seqno-fence.c
@@ -0,0 +1,73 @@
+/*
+ * seqno-fence, using a dma-buf to synchronize fencing
+ *
+ * Copyright (C) 2012 Texas Instruments
+ * Copyright (C) 2012-2014 Canonical Ltd
+ * Authors:
+ *   Rob Clark <robdclark@gmail.com>
+ *   Maarten Lankhorst <maarten.lankhorst@canonical.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <linux/seqno-fence.h>
+
+static const char *seqno_fence_get_driver_name(struct fence *fence)
+{
+	struct seqno_fence *seqno_fence = to_seqno_fence(fence);
+	return seqno_fence->ops->get_driver_name(fence);
+}
+
+static const char *seqno_fence_get_timeline_name(struct fence *fence)
+{
+	struct seqno_fence *seqno_fence = to_seqno_fence(fence);
+	return seqno_fence->ops->get_timeline_name(fence);
+}
+
+static bool seqno_enable_signaling(struct fence *fence)
+{
+	struct seqno_fence *seqno_fence = to_seqno_fence(fence);
+	return seqno_fence->ops->enable_signaling(fence);
+}
+
+static bool seqno_signaled(struct fence *fence)
+{
+	struct seqno_fence *seqno_fence = to_seqno_fence(fence);
+	return seqno_fence->ops->signaled && seqno_fence->ops->signaled(fence);
+}
+
+static void seqno_release(struct fence *fence)
+{
+	struct seqno_fence *f = to_seqno_fence(fence);
+
+	dma_buf_put(f->sync_buf);
+	if (f->ops->release)
+		f->ops->release(fence);
+	else
+		fence_free(&f->base);
+}
+
+static signed long seqno_wait(struct fence *fence, bool intr, signed long timeout)
+{
+	struct seqno_fence *f = to_seqno_fence(fence);
+	return f->ops->wait(fence, intr, timeout);
+}
+
+const struct fence_ops seqno_fence_ops = {
+	.get_driver_name = seqno_fence_get_driver_name,
+	.get_timeline_name = seqno_fence_get_timeline_name,
+	.enable_signaling = seqno_enable_signaling,
+	.signaled = seqno_signaled,
+	.wait = seqno_wait,
+	.release = seqno_release,
+};
+EXPORT_SYMBOL(seqno_fence_ops);
diff --git a/include/linux/seqno-fence.h b/include/linux/seqno-fence.h
new file mode 100644
index 000000000000..3d6003de4b0d
--- /dev/null
+++ b/include/linux/seqno-fence.h
@@ -0,0 +1,116 @@
+/*
+ * seqno-fence, using a dma-buf to synchronize fencing
+ *
+ * Copyright (C) 2012 Texas Instruments
+ * Copyright (C) 2012 Canonical Ltd
+ * Authors:
+ *   Rob Clark <robdclark@gmail.com>
+ *   Maarten Lankhorst <maarten.lankhorst@canonical.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __LINUX_SEQNO_FENCE_H
+#define __LINUX_SEQNO_FENCE_H
+
+#include <linux/fence.h>
+#include <linux/dma-buf.h>
+
+enum seqno_fence_condition {
+	SEQNO_FENCE_WAIT_GEQUAL,
+	SEQNO_FENCE_WAIT_NONZERO
+};
+
+struct seqno_fence {
+	struct fence base;
+
+	const struct fence_ops *ops;
+	struct dma_buf *sync_buf;
+	uint32_t seqno_ofs;
+	enum seqno_fence_condition condition;
+};
+
+extern const struct fence_ops seqno_fence_ops;
+
+/**
+ * to_seqno_fence - cast a fence to a seqno_fence
+ * @fence: fence to cast to a seqno_fence
+ *
+ * Returns NULL if the fence is not a seqno_fence,
+ * or the seqno_fence otherwise.
+ */
+static inline struct seqno_fence *
+to_seqno_fence(struct fence *fence)
+{
+	if (fence->ops != &seqno_fence_ops)
+		return NULL;
+	return container_of(fence, struct seqno_fence, base);
+}
+
+/**
+ * seqno_fence_init - initialize a seqno fence
+ * @fence: seqno_fence to initialize
+ * @lock: pointer to spinlock to use for fence
+ * @sync_buf: buffer containing the memory location to signal on
+ * @context: the execution context this fence is a part of
+ * @seqno_ofs: the offset within @sync_buf
+ * @seqno: the sequence # to signal on
+ * @ops: the fence_ops for operations on this seqno fence
+ *
+ * This function initializes a struct seqno_fence with passed parameters,
+ * and takes a reference on sync_buf which is released on fence destruction.
+ *
+ * A seqno_fence is a dma_fence which can complete in software when
+ * enable_signaling is called, but it also completes when
+ * (s32)((sync_buf)[seqno_ofs] - seqno) >= 0 is true
+ *
+ * The seqno_fence will take a refcount on the sync_buf until it's
+ * destroyed, but actual lifetime of sync_buf may be longer if one of the
+ * callers take a reference to it.
+ *
+ * Certain hardware have instructions to insert this type of wait condition
+ * in the command stream, so no intervention from software would be needed.
+ * This type of fence can be destroyed before completed, however a reference
+ * on the sync_buf dma-buf can be taken. It is encouraged to re-use the same
+ * dma-buf for sync_buf, since mapping or unmapping the sync_buf to the
+ * device's vm can be expensive.
+ *
+ * It is recommended for creators of seqno_fence to call fence_signal
+ * before destruction. This will prevent possible issues from wraparound at
+ * time of issue vs time of check, since users can check fence_is_signaled
+ * before submitting instructions for the hardware to wait on the fence.
+ * However, when ops.enable_signaling is not called, it doesn't have to be
+ * done as soon as possible, just before there's any real danger of seqno
+ * wraparound.
+ */
+static inline void
+seqno_fence_init(struct seqno_fence *fence, spinlock_t *lock,
+		 struct dma_buf *sync_buf,  uint32_t context,
+		 uint32_t seqno_ofs, uint32_t seqno,
+		 enum seqno_fence_condition cond,
+		 const struct fence_ops *ops)
+{
+	BUG_ON(!fence || !sync_buf || !ops);
+	BUG_ON(!ops->wait || !ops->enable_signaling ||
+	       !ops->get_driver_name || !ops->get_timeline_name);
+
+	/*
+	 * ops is used in fence_init for get_driver_name, so needs to be
+	 * initialized first
+	 */
+	fence->ops = ops;
+	fence_init(&fence->base, &seqno_fence_ops, lock, context, seqno);
+	get_dma_buf(sync_buf);
+	fence->sync_buf = sync_buf;
+	fence->seqno_ofs = seqno_ofs;
+	fence->condition = cond;
+}
+
+#endif /* __LINUX_SEQNO_FENCE_H */
-- 
cgit v1.2.3-59-g8ed1b


From 3aac4502fd3f80dcf7e65dbf6edd8676893c1f46 Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Date: Tue, 1 Jul 2014 12:57:26 +0200
Subject: dma-buf: use reservation objects
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This allows reservation objects to be used in dma-buf. it's required
for implementing polling support on the fences that belong to a dma-buf.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Acked-by: Mauro Carvalho Chehab <m.chehab@samsung.com> #drivers/media/v4l2-core/
Acked-by: Thomas Hellstrom <thellstrom@vmware.com> #drivers/gpu/drm/ttm
Acked-by: Sumit Semwal <sumit.semwal@linaro.org>
Acked-by: Daniel Vetter <daniel@ffwll.ch>
Signed-off-by: Vincent Stehlé <vincent.stehle@laposte.net> #drivers/gpu/drm/armada/
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/dma-buf/dma-buf.c                      | 22 ++++++++++++++++++++--
 drivers/gpu/drm/armada/armada_gem.c            |  2 +-
 drivers/gpu/drm/drm_prime.c                    |  8 +++++++-
 drivers/gpu/drm/exynos/exynos_drm_dmabuf.c     |  2 +-
 drivers/gpu/drm/i915/i915_gem_dmabuf.c         |  3 ++-
 drivers/gpu/drm/nouveau/nouveau_drm.c          |  1 +
 drivers/gpu/drm/nouveau/nouveau_gem.h          |  1 +
 drivers/gpu/drm/nouveau/nouveau_prime.c        |  7 +++++++
 drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c      |  2 +-
 drivers/gpu/drm/radeon/radeon_drv.c            |  2 ++
 drivers/gpu/drm/radeon/radeon_prime.c          |  8 ++++++++
 drivers/gpu/drm/tegra/gem.c                    |  2 +-
 drivers/gpu/drm/ttm/ttm_object.c               |  2 +-
 drivers/media/v4l2-core/videobuf2-dma-contig.c |  2 +-
 drivers/staging/android/ion/ion.c              |  3 ++-
 include/drm/drmP.h                             |  3 +++
 include/linux/dma-buf.h                        |  9 ++++++---
 17 files changed, 65 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 840c7fa80983..cd40ca22911f 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -25,10 +25,12 @@
 #include <linux/fs.h>
 #include <linux/slab.h>
 #include <linux/dma-buf.h>
+#include <linux/fence.h>
 #include <linux/anon_inodes.h>
 #include <linux/export.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
+#include <linux/reservation.h>
 
 static inline int is_dma_buf_file(struct file *);
 
@@ -56,6 +58,9 @@ static int dma_buf_release(struct inode *inode, struct file *file)
 	list_del(&dmabuf->list_node);
 	mutex_unlock(&db_list.lock);
 
+	if (dmabuf->resv == (struct reservation_object *)&dmabuf[1])
+		reservation_object_fini(dmabuf->resv);
+
 	kfree(dmabuf);
 	return 0;
 }
@@ -128,6 +133,7 @@ static inline int is_dma_buf_file(struct file *file)
  * @size:	[in]	Size of the buffer
  * @flags:	[in]	mode flags for the file.
  * @exp_name:	[in]	name of the exporting module - useful for debugging.
+ * @resv:	[in]	reservation-object, NULL to allocate default one.
  *
  * Returns, on success, a newly created dma_buf object, which wraps the
  * supplied private data and operations for dma_buf_ops. On either missing
@@ -135,10 +141,17 @@ static inline int is_dma_buf_file(struct file *file)
  *
  */
 struct dma_buf *dma_buf_export_named(void *priv, const struct dma_buf_ops *ops,
-				size_t size, int flags, const char *exp_name)
+				size_t size, int flags, const char *exp_name,
+				struct reservation_object *resv)
 {
 	struct dma_buf *dmabuf;
 	struct file *file;
+	size_t alloc_size = sizeof(struct dma_buf);
+	if (!resv)
+		alloc_size += sizeof(struct reservation_object);
+	else
+		/* prevent &dma_buf[1] == dma_buf->resv */
+		alloc_size += 1;
 
 	if (WARN_ON(!priv || !ops
 			  || !ops->map_dma_buf
@@ -150,7 +163,7 @@ struct dma_buf *dma_buf_export_named(void *priv, const struct dma_buf_ops *ops,
 		return ERR_PTR(-EINVAL);
 	}
 
-	dmabuf = kzalloc(sizeof(struct dma_buf), GFP_KERNEL);
+	dmabuf = kzalloc(alloc_size, GFP_KERNEL);
 	if (dmabuf == NULL)
 		return ERR_PTR(-ENOMEM);
 
@@ -158,6 +171,11 @@ struct dma_buf *dma_buf_export_named(void *priv, const struct dma_buf_ops *ops,
 	dmabuf->ops = ops;
 	dmabuf->size = size;
 	dmabuf->exp_name = exp_name;
+	if (!resv) {
+		resv = (struct reservation_object *)&dmabuf[1];
+		reservation_object_init(resv);
+	}
+	dmabuf->resv = resv;
 
 	file = anon_inode_getfile("dmabuf", &dma_buf_fops, dmabuf, flags);
 	if (IS_ERR(file)) {
diff --git a/drivers/gpu/drm/armada/armada_gem.c b/drivers/gpu/drm/armada/armada_gem.c
index bb9b642d8485..7496f55611a5 100644
--- a/drivers/gpu/drm/armada/armada_gem.c
+++ b/drivers/gpu/drm/armada/armada_gem.c
@@ -539,7 +539,7 @@ armada_gem_prime_export(struct drm_device *dev, struct drm_gem_object *obj,
 	int flags)
 {
 	return dma_buf_export(obj, &armada_gem_prime_dmabuf_ops, obj->size,
-			      O_RDWR);
+			      O_RDWR, NULL);
 }
 
 struct drm_gem_object *
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index 304ca8cacbc4..99d578bad17e 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -336,7 +336,13 @@ static const struct dma_buf_ops drm_gem_prime_dmabuf_ops =  {
 struct dma_buf *drm_gem_prime_export(struct drm_device *dev,
 				     struct drm_gem_object *obj, int flags)
 {
-	return dma_buf_export(obj, &drm_gem_prime_dmabuf_ops, obj->size, flags);
+	struct reservation_object *robj = NULL;
+
+	if (dev->driver->gem_prime_res_obj)
+		robj = dev->driver->gem_prime_res_obj(obj);
+
+	return dma_buf_export(obj, &drm_gem_prime_dmabuf_ops, obj->size,
+			      flags, robj);
 }
 EXPORT_SYMBOL(drm_gem_prime_export);
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_dmabuf.c b/drivers/gpu/drm/exynos/exynos_drm_dmabuf.c
index 2a3ad24276f8..60192ed544f0 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dmabuf.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dmabuf.c
@@ -187,7 +187,7 @@ struct dma_buf *exynos_dmabuf_prime_export(struct drm_device *drm_dev,
 	struct exynos_drm_gem_obj *exynos_gem_obj = to_exynos_gem_obj(obj);
 
 	return dma_buf_export(obj, &exynos_dmabuf_ops,
-				exynos_gem_obj->base.size, flags);
+				exynos_gem_obj->base.size, flags, NULL);
 }
 
 struct drm_gem_object *exynos_dmabuf_prime_import(struct drm_device *drm_dev,
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index 580aa42443ed..82a1f4b57778 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -237,7 +237,8 @@ struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
 			return ERR_PTR(ret);
 	}
 
-	return dma_buf_export(gem_obj, &i915_dmabuf_ops, gem_obj->size, flags);
+	return dma_buf_export(gem_obj, &i915_dmabuf_ops, gem_obj->size, flags,
+			      NULL);
 }
 
 static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index ddd83756b9a2..e8ae68a9aaf1 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -844,6 +844,7 @@ driver = {
 	.gem_prime_export = drm_gem_prime_export,
 	.gem_prime_import = drm_gem_prime_import,
 	.gem_prime_pin = nouveau_gem_prime_pin,
+	.gem_prime_res_obj = nouveau_gem_prime_res_obj,
 	.gem_prime_unpin = nouveau_gem_prime_unpin,
 	.gem_prime_get_sg_table = nouveau_gem_prime_get_sg_table,
 	.gem_prime_import_sg_table = nouveau_gem_prime_import_sg_table,
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.h b/drivers/gpu/drm/nouveau/nouveau_gem.h
index 7caca057bc38..ddab762d81fe 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.h
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.h
@@ -35,6 +35,7 @@ extern int nouveau_gem_ioctl_info(struct drm_device *, void *,
 				  struct drm_file *);
 
 extern int nouveau_gem_prime_pin(struct drm_gem_object *);
+struct reservation_object *nouveau_gem_prime_res_obj(struct drm_gem_object *);
 extern void nouveau_gem_prime_unpin(struct drm_gem_object *);
 extern struct sg_table *nouveau_gem_prime_get_sg_table(struct drm_gem_object *);
 extern struct drm_gem_object *nouveau_gem_prime_import_sg_table(
diff --git a/drivers/gpu/drm/nouveau/nouveau_prime.c b/drivers/gpu/drm/nouveau/nouveau_prime.c
index 51a2cb102b44..1f51008e4d26 100644
--- a/drivers/gpu/drm/nouveau/nouveau_prime.c
+++ b/drivers/gpu/drm/nouveau/nouveau_prime.c
@@ -102,3 +102,10 @@ void nouveau_gem_prime_unpin(struct drm_gem_object *obj)
 
 	nouveau_bo_unpin(nvbo);
 }
+
+struct reservation_object *nouveau_gem_prime_res_obj(struct drm_gem_object *obj)
+{
+	struct nouveau_bo *nvbo = nouveau_gem_object(obj);
+
+	return nvbo->bo.resv;
+}
diff --git a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
index 4fcca8d42796..a2dbfb1737b4 100644
--- a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
+++ b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
@@ -171,7 +171,7 @@ static struct dma_buf_ops omap_dmabuf_ops = {
 struct dma_buf *omap_gem_prime_export(struct drm_device *dev,
 		struct drm_gem_object *obj, int flags)
 {
-	return dma_buf_export(obj, &omap_dmabuf_ops, obj->size, flags);
+	return dma_buf_export(obj, &omap_dmabuf_ops, obj->size, flags, NULL);
 }
 
 struct drm_gem_object *omap_gem_prime_import(struct drm_device *dev,
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index cb1421369e3a..5fa62a113db6 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -132,6 +132,7 @@ struct drm_gem_object *radeon_gem_prime_import_sg_table(struct drm_device *dev,
 							struct sg_table *sg);
 int radeon_gem_prime_pin(struct drm_gem_object *obj);
 void radeon_gem_prime_unpin(struct drm_gem_object *obj);
+struct reservation_object *radeon_gem_prime_res_obj(struct drm_gem_object *);
 void *radeon_gem_prime_vmap(struct drm_gem_object *obj);
 void radeon_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
 extern long radeon_kms_compat_ioctl(struct file *filp, unsigned int cmd,
@@ -566,6 +567,7 @@ static struct drm_driver kms_driver = {
 	.gem_prime_import = drm_gem_prime_import,
 	.gem_prime_pin = radeon_gem_prime_pin,
 	.gem_prime_unpin = radeon_gem_prime_unpin,
+	.gem_prime_res_obj = radeon_gem_prime_res_obj,
 	.gem_prime_get_sg_table = radeon_gem_prime_get_sg_table,
 	.gem_prime_import_sg_table = radeon_gem_prime_import_sg_table,
 	.gem_prime_vmap = radeon_gem_prime_vmap,
diff --git a/drivers/gpu/drm/radeon/radeon_prime.c b/drivers/gpu/drm/radeon/radeon_prime.c
index 20074560fc25..28d71070c389 100644
--- a/drivers/gpu/drm/radeon/radeon_prime.c
+++ b/drivers/gpu/drm/radeon/radeon_prime.c
@@ -103,3 +103,11 @@ void radeon_gem_prime_unpin(struct drm_gem_object *obj)
 	radeon_bo_unpin(bo);
 	radeon_bo_unreserve(bo);
 }
+
+
+struct reservation_object *radeon_gem_prime_res_obj(struct drm_gem_object *obj)
+{
+	struct radeon_bo *bo = gem_to_radeon_bo(obj);
+
+	return bo->tbo.resv;
+}
diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c
index aa85b7b26f10..78cc8143760a 100644
--- a/drivers/gpu/drm/tegra/gem.c
+++ b/drivers/gpu/drm/tegra/gem.c
@@ -420,7 +420,7 @@ struct dma_buf *tegra_gem_prime_export(struct drm_device *drm,
 				       int flags)
 {
 	return dma_buf_export(gem, &tegra_gem_prime_dmabuf_ops, gem->size,
-			      flags);
+			      flags, NULL);
 }
 
 struct drm_gem_object *tegra_gem_prime_import(struct drm_device *drm,
diff --git a/drivers/gpu/drm/ttm/ttm_object.c b/drivers/gpu/drm/ttm/ttm_object.c
index d2a053352789..12c87110db3a 100644
--- a/drivers/gpu/drm/ttm/ttm_object.c
+++ b/drivers/gpu/drm/ttm/ttm_object.c
@@ -695,7 +695,7 @@ int ttm_prime_handle_to_fd(struct ttm_object_file *tfile,
 		}
 
 		dma_buf = dma_buf_export(prime, &tdev->ops,
-					 prime->size, flags);
+					 prime->size, flags, NULL);
 		if (IS_ERR(dma_buf)) {
 			ret = PTR_ERR(dma_buf);
 			ttm_mem_global_free(tdev->mem_glob,
diff --git a/drivers/media/v4l2-core/videobuf2-dma-contig.c b/drivers/media/v4l2-core/videobuf2-dma-contig.c
index 880be0782dd9..c4e4dfa8123a 100644
--- a/drivers/media/v4l2-core/videobuf2-dma-contig.c
+++ b/drivers/media/v4l2-core/videobuf2-dma-contig.c
@@ -404,7 +404,7 @@ static struct dma_buf *vb2_dc_get_dmabuf(void *buf_priv, unsigned long flags)
 	if (WARN_ON(!buf->sgt_base))
 		return NULL;
 
-	dbuf = dma_buf_export(buf, &vb2_dc_dmabuf_ops, buf->size, flags);
+	dbuf = dma_buf_export(buf, &vb2_dc_dmabuf_ops, buf->size, flags, NULL);
 	if (IS_ERR(dbuf))
 		return NULL;
 
diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c
index 389b8f67a2ec..270360912b2c 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -1120,7 +1120,8 @@ struct dma_buf *ion_share_dma_buf(struct ion_client *client,
 	ion_buffer_get(buffer);
 	mutex_unlock(&client->lock);
 
-	dmabuf = dma_buf_export(buffer, &dma_buf_ops, buffer->size, O_RDWR);
+	dmabuf = dma_buf_export(buffer, &dma_buf_ops, buffer->size, O_RDWR,
+				NULL);
 	if (IS_ERR(dmabuf)) {
 		ion_buffer_put(buffer);
 		return dmabuf;
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 8af71a8e2c00..e41f17ea1f13 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -83,6 +83,7 @@ struct drm_device;
 
 struct device_node;
 struct videomode;
+struct reservation_object;
 
 #include <drm/drm_os_linux.h>
 #include <drm/drm_hashtab.h>
@@ -923,6 +924,8 @@ struct drm_driver {
 	/* low-level interface used by drm_gem_prime_{import,export} */
 	int (*gem_prime_pin)(struct drm_gem_object *obj);
 	void (*gem_prime_unpin)(struct drm_gem_object *obj);
+	struct reservation_object * (*gem_prime_res_obj)(
+				struct drm_gem_object *obj);
 	struct sg_table *(*gem_prime_get_sg_table)(struct drm_gem_object *obj);
 	struct drm_gem_object *(*gem_prime_import_sg_table)(
 				struct drm_device *dev, size_t size,
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index f886985a28b2..fd7def2e0ae2 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -115,6 +115,7 @@ struct dma_buf_ops {
  * @exp_name: name of the exporter; useful for debugging.
  * @list_node: node for dma_buf accounting and debugging.
  * @priv: exporter specific private data for this buffer object.
+ * @resv: reservation object linked to this dma-buf
  */
 struct dma_buf {
 	size_t size;
@@ -128,6 +129,7 @@ struct dma_buf {
 	const char *exp_name;
 	struct list_head list_node;
 	void *priv;
+	struct reservation_object *resv;
 };
 
 /**
@@ -168,10 +170,11 @@ void dma_buf_detach(struct dma_buf *dmabuf,
 				struct dma_buf_attachment *dmabuf_attach);
 
 struct dma_buf *dma_buf_export_named(void *priv, const struct dma_buf_ops *ops,
-			       size_t size, int flags, const char *);
+			       size_t size, int flags, const char *,
+			       struct reservation_object *);
 
-#define dma_buf_export(priv, ops, size, flags)	\
-	dma_buf_export_named(priv, ops, size, flags, KBUILD_MODNAME)
+#define dma_buf_export(priv, ops, size, flags, resv)	\
+	dma_buf_export_named(priv, ops, size, flags, KBUILD_MODNAME, resv)
 
 int dma_buf_fd(struct dma_buf *dmabuf, int flags);
 struct dma_buf *dma_buf_get(int fd);
-- 
cgit v1.2.3-59-g8ed1b


From 3b1cae7c2c2b801b8530db07a02587bc1a41b3fe Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Mon, 30 Jun 2014 09:50:00 +0100
Subject: staging:iio:ad7291: Move out of staging

The ad7291 driver is in a reasonable shape. It does not use non-standard API/ABI
and there are no major style issues with the driver. So this patch moves it out
of staging.

There is one small warning from checkpatch which is also fixed in this patch.
The patch also sorts the #include directives in alphabetical order.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/adc/Kconfig              |  10 +
 drivers/iio/adc/Makefile             |   1 +
 drivers/iio/adc/ad7291.c             | 585 +++++++++++++++++++++++++++++++++++
 drivers/staging/iio/adc/Kconfig      |   7 -
 drivers/staging/iio/adc/Makefile     |   1 -
 drivers/staging/iio/adc/ad7291.c     | 584 ----------------------------------
 drivers/staging/iio/adc/ad7291.h     |  12 -
 include/linux/platform_data/ad7291.h |  12 +
 8 files changed, 608 insertions(+), 604 deletions(-)
 create mode 100644 drivers/iio/adc/ad7291.c
 delete mode 100644 drivers/staging/iio/adc/ad7291.c
 delete mode 100644 drivers/staging/iio/adc/ad7291.h
 create mode 100644 include/linux/platform_data/ad7291.h

(limited to 'include/linux')

diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig
index 20a7073f1dd6..11b048a59fde 100644
--- a/drivers/iio/adc/Kconfig
+++ b/drivers/iio/adc/Kconfig
@@ -20,6 +20,16 @@ config AD7266
 	  Say yes here to build support for Analog Devices AD7265 and AD7266
 	  ADCs.
 
+config AD7291
+	tristate "Analog Devices AD7291 ADC driver"
+	depends on I2C
+	help
+	  Say yes here to build support for Analog Devices AD7291
+	  8 Channel ADC with temperature sensor.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called ad7291.
+
 config AD7298
 	tristate "Analog Devices AD7298 ADC driver"
 	depends on SPI
diff --git a/drivers/iio/adc/Makefile b/drivers/iio/adc/Makefile
index 38cf5c3f5631..ad81b512aa3d 100644
--- a/drivers/iio/adc/Makefile
+++ b/drivers/iio/adc/Makefile
@@ -5,6 +5,7 @@
 # When adding new entries keep the list in alphabetical order
 obj-$(CONFIG_AD_SIGMA_DELTA) += ad_sigma_delta.o
 obj-$(CONFIG_AD7266) += ad7266.o
+obj-$(CONFIG_AD7291) += ad7291.o
 obj-$(CONFIG_AD7298) += ad7298.o
 obj-$(CONFIG_AD7923) += ad7923.o
 obj-$(CONFIG_AD7476) += ad7476.o
diff --git a/drivers/iio/adc/ad7291.c b/drivers/iio/adc/ad7291.c
new file mode 100644
index 000000000000..4ed78b94afd8
--- /dev/null
+++ b/drivers/iio/adc/ad7291.c
@@ -0,0 +1,585 @@
+/*
+ * AD7291 8-Channel, I2C, 12-Bit SAR ADC with Temperature Sensor
+ *
+ * Copyright 2010-2011 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/regulator/consumer.h>
+#include <linux/slab.h>
+#include <linux/sysfs.h>
+
+#include <linux/iio/iio.h>
+#include <linux/iio/sysfs.h>
+#include <linux/iio/events.h>
+
+#include <linux/platform_data/ad7291.h>
+
+/*
+ * Simplified handling
+ *
+ * If no events enabled - single polled channel read
+ * If event enabled direct reads disable unless channel
+ * is in the read mask.
+ *
+ * The noise-delayed bit as per datasheet suggestion is always enabled.
+ */
+
+/*
+ * AD7291 registers definition
+ */
+#define AD7291_COMMAND			0x00
+#define AD7291_VOLTAGE			0x01
+#define AD7291_T_SENSE			0x02
+#define AD7291_T_AVERAGE		0x03
+#define AD7291_DATA_HIGH(x)		((x) * 3 + 0x4)
+#define AD7291_DATA_LOW(x)		((x) * 3 + 0x5)
+#define AD7291_HYST(x)			((x) * 3 + 0x6)
+#define AD7291_VOLTAGE_ALERT_STATUS	0x1F
+#define AD7291_T_ALERT_STATUS		0x20
+
+#define AD7291_BITS			12
+#define AD7291_VOLTAGE_LIMIT_COUNT	8
+
+
+/*
+ * AD7291 command
+ */
+#define AD7291_AUTOCYCLE		BIT(0)
+#define AD7291_RESET			BIT(1)
+#define AD7291_ALERT_CLEAR		BIT(2)
+#define AD7291_ALERT_POLARITY		BIT(3)
+#define AD7291_EXT_REF			BIT(4)
+#define AD7291_NOISE_DELAY		BIT(5)
+#define AD7291_T_SENSE_MASK		BIT(7)
+#define AD7291_VOLTAGE_MASK		GENMASK(15, 8)
+#define AD7291_VOLTAGE_OFFSET		8
+
+/*
+ * AD7291 value masks
+ */
+#define AD7291_VALUE_MASK		GENMASK(11, 0)
+
+/*
+ * AD7291 alert register bits
+ */
+#define AD7291_T_LOW			BIT(0)
+#define AD7291_T_HIGH			BIT(1)
+#define AD7291_T_AVG_LOW		BIT(2)
+#define AD7291_T_AVG_HIGH		BIT(3)
+#define AD7291_V_LOW(x)			BIT((x) * 2)
+#define AD7291_V_HIGH(x)		BIT((x) * 2 + 1)
+
+
+struct ad7291_chip_info {
+	struct i2c_client	*client;
+	struct regulator	*reg;
+	u16			command;
+	u16			c_mask;	/* Active voltage channels for events */
+	struct mutex		state_lock;
+};
+
+static int ad7291_i2c_read(struct ad7291_chip_info *chip, u8 reg, u16 *data)
+{
+	struct i2c_client *client = chip->client;
+	int ret = 0;
+
+	ret = i2c_smbus_read_word_swapped(client, reg);
+	if (ret < 0) {
+		dev_err(&client->dev, "I2C read error\n");
+		return ret;
+	}
+
+	*data = ret;
+
+	return 0;
+}
+
+static int ad7291_i2c_write(struct ad7291_chip_info *chip, u8 reg, u16 data)
+{
+	return i2c_smbus_write_word_swapped(chip->client, reg, data);
+}
+
+static irqreturn_t ad7291_event_handler(int irq, void *private)
+{
+	struct iio_dev *indio_dev = private;
+	struct ad7291_chip_info *chip = iio_priv(private);
+	u16 t_status, v_status;
+	u16 command;
+	int i;
+	s64 timestamp = iio_get_time_ns();
+
+	if (ad7291_i2c_read(chip, AD7291_T_ALERT_STATUS, &t_status))
+		return IRQ_HANDLED;
+
+	if (ad7291_i2c_read(chip, AD7291_VOLTAGE_ALERT_STATUS, &v_status))
+		return IRQ_HANDLED;
+
+	if (!(t_status || v_status))
+		return IRQ_HANDLED;
+
+	command = chip->command | AD7291_ALERT_CLEAR;
+	ad7291_i2c_write(chip, AD7291_COMMAND, command);
+
+	command = chip->command & ~AD7291_ALERT_CLEAR;
+	ad7291_i2c_write(chip, AD7291_COMMAND, command);
+
+	/* For now treat t_sense and t_sense_average the same */
+	if ((t_status & AD7291_T_LOW) || (t_status & AD7291_T_AVG_LOW))
+		iio_push_event(indio_dev,
+			       IIO_UNMOD_EVENT_CODE(IIO_TEMP,
+						    0,
+						    IIO_EV_TYPE_THRESH,
+						    IIO_EV_DIR_FALLING),
+			       timestamp);
+	if ((t_status & AD7291_T_HIGH) || (t_status & AD7291_T_AVG_HIGH))
+		iio_push_event(indio_dev,
+			       IIO_UNMOD_EVENT_CODE(IIO_TEMP,
+						    0,
+						    IIO_EV_TYPE_THRESH,
+						    IIO_EV_DIR_RISING),
+			       timestamp);
+
+	for (i = 0; i < AD7291_VOLTAGE_LIMIT_COUNT; i++) {
+		if (v_status & AD7291_V_LOW(i))
+			iio_push_event(indio_dev,
+				       IIO_UNMOD_EVENT_CODE(IIO_VOLTAGE,
+							    i,
+							    IIO_EV_TYPE_THRESH,
+							    IIO_EV_DIR_FALLING),
+				       timestamp);
+		if (v_status & AD7291_V_HIGH(i))
+			iio_push_event(indio_dev,
+				       IIO_UNMOD_EVENT_CODE(IIO_VOLTAGE,
+							    i,
+							    IIO_EV_TYPE_THRESH,
+							    IIO_EV_DIR_RISING),
+				       timestamp);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static unsigned int ad7291_threshold_reg(const struct iio_chan_spec *chan,
+					 enum iio_event_direction dir,
+					 enum iio_event_info info)
+{
+	unsigned int offset;
+
+	switch (chan->type) {
+	case IIO_VOLTAGE:
+		offset = chan->channel;
+		break;
+	case IIO_TEMP:
+		offset = AD7291_VOLTAGE_OFFSET;
+		break;
+	default:
+	    return 0;
+	}
+
+	switch (info) {
+	case IIO_EV_INFO_VALUE:
+		if (dir == IIO_EV_DIR_FALLING)
+			return AD7291_DATA_HIGH(offset);
+		else
+			return AD7291_DATA_LOW(offset);
+	case IIO_EV_INFO_HYSTERESIS:
+		return AD7291_HYST(offset);
+	default:
+		break;
+	}
+	return 0;
+}
+
+static int ad7291_read_event_value(struct iio_dev *indio_dev,
+				   const struct iio_chan_spec *chan,
+				   enum iio_event_type type,
+				   enum iio_event_direction dir,
+				   enum iio_event_info info,
+				   int *val, int *val2)
+{
+	struct ad7291_chip_info *chip = iio_priv(indio_dev);
+	int ret;
+	u16 uval;
+
+	ret = ad7291_i2c_read(chip, ad7291_threshold_reg(chan, dir, info),
+			      &uval);
+	if (ret < 0)
+		return ret;
+
+	if (info == IIO_EV_INFO_HYSTERESIS || chan->type == IIO_VOLTAGE)
+		*val = uval & AD7291_VALUE_MASK;
+
+	else
+		*val = sign_extend32(uval, 11);
+
+	return IIO_VAL_INT;
+}
+
+static int ad7291_write_event_value(struct iio_dev *indio_dev,
+				    const struct iio_chan_spec *chan,
+				    enum iio_event_type type,
+				    enum iio_event_direction dir,
+				    enum iio_event_info info,
+				    int val, int val2)
+{
+	struct ad7291_chip_info *chip = iio_priv(indio_dev);
+
+	if (info == IIO_EV_INFO_HYSTERESIS || chan->type == IIO_VOLTAGE) {
+		if (val > AD7291_VALUE_MASK || val < 0)
+			return -EINVAL;
+	} else {
+		if (val > 2047 || val < -2048)
+			return -EINVAL;
+	}
+
+	return ad7291_i2c_write(chip, ad7291_threshold_reg(chan, dir, info),
+				val);
+}
+
+static int ad7291_read_event_config(struct iio_dev *indio_dev,
+				    const struct iio_chan_spec *chan,
+				    enum iio_event_type type,
+				    enum iio_event_direction dir)
+{
+	struct ad7291_chip_info *chip = iio_priv(indio_dev);
+	/*
+	 * To be enabled the channel must simply be on. If any are enabled
+	 * we are in continuous sampling mode
+	 */
+
+	switch (chan->type) {
+	case IIO_VOLTAGE:
+		return !!(chip->c_mask & BIT(15 - chan->channel));
+	case IIO_TEMP:
+		/* always on */
+		return 1;
+	default:
+		return -EINVAL;
+	}
+
+}
+
+static int ad7291_write_event_config(struct iio_dev *indio_dev,
+				     const struct iio_chan_spec *chan,
+				     enum iio_event_type type,
+				     enum iio_event_direction dir,
+				     int state)
+{
+	int ret = 0;
+	struct ad7291_chip_info *chip = iio_priv(indio_dev);
+	unsigned int mask;
+	u16 regval;
+
+	mutex_lock(&chip->state_lock);
+	regval = chip->command;
+	/*
+	 * To be enabled the channel must simply be on. If any are enabled
+	 * use continuous sampling mode.
+	 * Possible to disable temp as well but that makes single read tricky.
+	 */
+
+	mask = BIT(15 - chan->channel);
+
+	switch (chan->type) {
+	case IIO_VOLTAGE:
+		if ((!state) && (chip->c_mask & mask))
+			chip->c_mask &= ~mask;
+		else if (state && (!(chip->c_mask & mask)))
+			chip->c_mask |= mask;
+		else
+			break;
+
+		regval &= ~AD7291_AUTOCYCLE;
+		regval |= chip->c_mask;
+		if (chip->c_mask) /* Enable autocycle? */
+			regval |= AD7291_AUTOCYCLE;
+
+		ret = ad7291_i2c_write(chip, AD7291_COMMAND, regval);
+		if (ret < 0)
+			goto error_ret;
+
+		chip->command = regval;
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+error_ret:
+	mutex_unlock(&chip->state_lock);
+	return ret;
+}
+
+static int ad7291_read_raw(struct iio_dev *indio_dev,
+			   struct iio_chan_spec const *chan,
+			   int *val,
+			   int *val2,
+			   long mask)
+{
+	int ret;
+	struct ad7291_chip_info *chip = iio_priv(indio_dev);
+	u16 regval;
+
+	switch (mask) {
+	case IIO_CHAN_INFO_RAW:
+		switch (chan->type) {
+		case IIO_VOLTAGE:
+			mutex_lock(&chip->state_lock);
+			/* If in autocycle mode drop through */
+			if (chip->command & AD7291_AUTOCYCLE) {
+				mutex_unlock(&chip->state_lock);
+				return -EBUSY;
+			}
+			/* Enable this channel alone */
+			regval = chip->command & (~AD7291_VOLTAGE_MASK);
+			regval |= BIT(15 - chan->channel);
+			ret = ad7291_i2c_write(chip, AD7291_COMMAND, regval);
+			if (ret < 0) {
+				mutex_unlock(&chip->state_lock);
+				return ret;
+			}
+			/* Read voltage */
+			ret = i2c_smbus_read_word_swapped(chip->client,
+							  AD7291_VOLTAGE);
+			if (ret < 0) {
+				mutex_unlock(&chip->state_lock);
+				return ret;
+			}
+			*val = ret & AD7291_VALUE_MASK;
+			mutex_unlock(&chip->state_lock);
+			return IIO_VAL_INT;
+		case IIO_TEMP:
+			/* Assumes tsense bit of command register always set */
+			ret = i2c_smbus_read_word_swapped(chip->client,
+							  AD7291_T_SENSE);
+			if (ret < 0)
+				return ret;
+			*val = sign_extend32(ret, 11);
+			return IIO_VAL_INT;
+		default:
+			return -EINVAL;
+		}
+	case IIO_CHAN_INFO_AVERAGE_RAW:
+		ret = i2c_smbus_read_word_swapped(chip->client,
+						  AD7291_T_AVERAGE);
+			if (ret < 0)
+				return ret;
+			*val = sign_extend32(ret, 11);
+			return IIO_VAL_INT;
+	case IIO_CHAN_INFO_SCALE:
+		switch (chan->type) {
+		case IIO_VOLTAGE:
+			if (chip->reg) {
+				int vref;
+
+				vref = regulator_get_voltage(chip->reg);
+				if (vref < 0)
+					return vref;
+				*val = vref / 1000;
+			} else {
+				*val = 2500;
+			}
+			*val2 = AD7291_BITS;
+			return IIO_VAL_FRACTIONAL_LOG2;
+		case IIO_TEMP:
+			/*
+			 * One LSB of the ADC corresponds to 0.25 deg C.
+			 * The temperature reading is in 12-bit twos
+			 * complement format
+			 */
+			*val = 250;
+			return IIO_VAL_INT;
+		default:
+			return -EINVAL;
+		}
+	default:
+		return -EINVAL;
+	}
+}
+
+static const struct iio_event_spec ad7291_events[] = {
+	{
+		.type = IIO_EV_TYPE_THRESH,
+		.dir = IIO_EV_DIR_RISING,
+		.mask_separate = BIT(IIO_EV_INFO_VALUE) |
+			BIT(IIO_EV_INFO_ENABLE),
+	}, {
+		.type = IIO_EV_TYPE_THRESH,
+		.dir = IIO_EV_DIR_FALLING,
+		.mask_separate = BIT(IIO_EV_INFO_VALUE) |
+			BIT(IIO_EV_INFO_ENABLE),
+	}, {
+		.type = IIO_EV_TYPE_THRESH,
+		.dir = IIO_EV_DIR_EITHER,
+		.mask_separate = BIT(IIO_EV_INFO_HYSTERESIS),
+	},
+};
+
+#define AD7291_VOLTAGE_CHAN(_chan)					\
+{									\
+	.type = IIO_VOLTAGE,						\
+	.info_mask_separate = BIT(IIO_CHAN_INFO_RAW),			\
+	.info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE),		\
+	.indexed = 1,							\
+	.channel = _chan,						\
+	.event_spec = ad7291_events,					\
+	.num_event_specs = ARRAY_SIZE(ad7291_events),			\
+}
+
+static const struct iio_chan_spec ad7291_channels[] = {
+	AD7291_VOLTAGE_CHAN(0),
+	AD7291_VOLTAGE_CHAN(1),
+	AD7291_VOLTAGE_CHAN(2),
+	AD7291_VOLTAGE_CHAN(3),
+	AD7291_VOLTAGE_CHAN(4),
+	AD7291_VOLTAGE_CHAN(5),
+	AD7291_VOLTAGE_CHAN(6),
+	AD7291_VOLTAGE_CHAN(7),
+	{
+		.type = IIO_TEMP,
+		.info_mask_separate = BIT(IIO_CHAN_INFO_RAW) |
+				BIT(IIO_CHAN_INFO_AVERAGE_RAW) |
+				BIT(IIO_CHAN_INFO_SCALE),
+		.indexed = 1,
+		.channel = 0,
+		.event_spec = ad7291_events,
+		.num_event_specs = ARRAY_SIZE(ad7291_events),
+	}
+};
+
+static const struct iio_info ad7291_info = {
+	.read_raw = &ad7291_read_raw,
+	.read_event_config = &ad7291_read_event_config,
+	.write_event_config = &ad7291_write_event_config,
+	.read_event_value = &ad7291_read_event_value,
+	.write_event_value = &ad7291_write_event_value,
+	.driver_module = THIS_MODULE,
+};
+
+static int ad7291_probe(struct i2c_client *client,
+			const struct i2c_device_id *id)
+{
+	struct ad7291_platform_data *pdata = client->dev.platform_data;
+	struct ad7291_chip_info *chip;
+	struct iio_dev *indio_dev;
+	int ret = 0;
+
+	indio_dev = devm_iio_device_alloc(&client->dev, sizeof(*chip));
+	if (!indio_dev)
+		return -ENOMEM;
+	chip = iio_priv(indio_dev);
+
+	if (pdata && pdata->use_external_ref) {
+		chip->reg = devm_regulator_get(&client->dev, "vref");
+		if (IS_ERR(chip->reg))
+			return ret;
+
+		ret = regulator_enable(chip->reg);
+		if (ret)
+			return ret;
+	}
+
+	mutex_init(&chip->state_lock);
+	/* this is only used for device removal purposes */
+	i2c_set_clientdata(client, indio_dev);
+
+	chip->client = client;
+
+	chip->command = AD7291_NOISE_DELAY |
+			AD7291_T_SENSE_MASK | /* Tsense always enabled */
+			AD7291_ALERT_POLARITY; /* set irq polarity low level */
+
+	if (pdata && pdata->use_external_ref)
+		chip->command |= AD7291_EXT_REF;
+
+	indio_dev->name = id->name;
+	indio_dev->channels = ad7291_channels;
+	indio_dev->num_channels = ARRAY_SIZE(ad7291_channels);
+
+	indio_dev->dev.parent = &client->dev;
+	indio_dev->info = &ad7291_info;
+	indio_dev->modes = INDIO_DIRECT_MODE;
+
+	ret = ad7291_i2c_write(chip, AD7291_COMMAND, AD7291_RESET);
+	if (ret) {
+		ret = -EIO;
+		goto error_disable_reg;
+	}
+
+	ret = ad7291_i2c_write(chip, AD7291_COMMAND, chip->command);
+	if (ret) {
+		ret = -EIO;
+		goto error_disable_reg;
+	}
+
+	if (client->irq > 0) {
+		ret = request_threaded_irq(client->irq,
+					   NULL,
+					   &ad7291_event_handler,
+					   IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+					   id->name,
+					   indio_dev);
+		if (ret)
+			goto error_disable_reg;
+	}
+
+	ret = iio_device_register(indio_dev);
+	if (ret)
+		goto error_unreg_irq;
+
+	return 0;
+
+error_unreg_irq:
+	if (client->irq)
+		free_irq(client->irq, indio_dev);
+error_disable_reg:
+	if (chip->reg)
+		regulator_disable(chip->reg);
+
+	return ret;
+}
+
+static int ad7291_remove(struct i2c_client *client)
+{
+	struct iio_dev *indio_dev = i2c_get_clientdata(client);
+	struct ad7291_chip_info *chip = iio_priv(indio_dev);
+
+	iio_device_unregister(indio_dev);
+
+	if (client->irq)
+		free_irq(client->irq, indio_dev);
+
+	if (chip->reg)
+		regulator_disable(chip->reg);
+
+	return 0;
+}
+
+static const struct i2c_device_id ad7291_id[] = {
+	{ "ad7291", 0 },
+	{}
+};
+
+MODULE_DEVICE_TABLE(i2c, ad7291_id);
+
+static struct i2c_driver ad7291_driver = {
+	.driver = {
+		.name = KBUILD_MODNAME,
+	},
+	.probe = ad7291_probe,
+	.remove = ad7291_remove,
+	.id_table = ad7291_id,
+};
+module_i2c_driver(ad7291_driver);
+
+MODULE_AUTHOR("Sonic Zhang <sonic.zhang@analog.com>");
+MODULE_DESCRIPTION("Analog Devices AD7291 ADC driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/staging/iio/adc/Kconfig b/drivers/staging/iio/adc/Kconfig
index b87e382ad768..75d2d1bf93d1 100644
--- a/drivers/staging/iio/adc/Kconfig
+++ b/drivers/staging/iio/adc/Kconfig
@@ -3,13 +3,6 @@
 #
 menu "Analog to digital converters"
 
-config AD7291
-	tristate "Analog Devices AD7291 ADC driver"
-	depends on I2C
-	help
-	  Say yes here to build support for Analog Devices AD7291
-	  8 Channel ADC with temperature sensor.
-
 config AD7606
 	tristate "Analog Devices AD7606 ADC driver"
 	depends on GPIOLIB
diff --git a/drivers/staging/iio/adc/Makefile b/drivers/staging/iio/adc/Makefile
index afdcd1ff08ff..1c4277dbd318 100644
--- a/drivers/staging/iio/adc/Makefile
+++ b/drivers/staging/iio/adc/Makefile
@@ -8,7 +8,6 @@ ad7606-$(CONFIG_AD7606_IFACE_PARALLEL) += ad7606_par.o
 ad7606-$(CONFIG_AD7606_IFACE_SPI) += ad7606_spi.o
 obj-$(CONFIG_AD7606) += ad7606.o
 
-obj-$(CONFIG_AD7291) += ad7291.o
 obj-$(CONFIG_AD7780) += ad7780.o
 obj-$(CONFIG_AD7816) += ad7816.o
 obj-$(CONFIG_AD7192) += ad7192.o
diff --git a/drivers/staging/iio/adc/ad7291.c b/drivers/staging/iio/adc/ad7291.c
deleted file mode 100644
index 9139958da787..000000000000
--- a/drivers/staging/iio/adc/ad7291.c
+++ /dev/null
@@ -1,584 +0,0 @@
-/*
- * AD7291 8-Channel, I2C, 12-Bit SAR ADC with Temperature Sensor
- *
- * Copyright 2010-2011 Analog Devices Inc.
- *
- * Licensed under the GPL-2 or later.
- */
-
-#include <linux/interrupt.h>
-#include <linux/device.h>
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/sysfs.h>
-#include <linux/i2c.h>
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/regulator/consumer.h>
-#include <linux/err.h>
-
-#include <linux/iio/iio.h>
-#include <linux/iio/sysfs.h>
-#include <linux/iio/events.h>
-
-#include "ad7291.h"
-
-/*
- * Simplified handling
- *
- * If no events enabled - single polled channel read
- * If event enabled direct reads disable unless channel
- * is in the read mask.
- *
- * The noise-delayed bit as per datasheet suggestion is always enabled.
- */
-
-/*
- * AD7291 registers definition
- */
-#define AD7291_COMMAND			0x00
-#define AD7291_VOLTAGE			0x01
-#define AD7291_T_SENSE			0x02
-#define AD7291_T_AVERAGE		0x03
-#define AD7291_DATA_HIGH(x)		((x) * 3 + 0x4)
-#define AD7291_DATA_LOW(x)		((x) * 3 + 0x5)
-#define AD7291_HYST(x)			((x) * 3 + 0x6)
-#define AD7291_VOLTAGE_ALERT_STATUS	0x1F
-#define AD7291_T_ALERT_STATUS		0x20
-
-#define AD7291_BITS			12
-#define AD7291_VOLTAGE_LIMIT_COUNT	8
-
-
-/*
- * AD7291 command
- */
-#define AD7291_AUTOCYCLE		BIT(0)
-#define AD7291_RESET			BIT(1)
-#define AD7291_ALERT_CLEAR		BIT(2)
-#define AD7291_ALERT_POLARITY		BIT(3)
-#define AD7291_EXT_REF			BIT(4)
-#define AD7291_NOISE_DELAY		BIT(5)
-#define AD7291_T_SENSE_MASK		BIT(7)
-#define AD7291_VOLTAGE_MASK		GENMASK(15, 8)
-#define AD7291_VOLTAGE_OFFSET		8
-
-/*
- * AD7291 value masks
- */
-#define AD7291_VALUE_MASK		GENMASK(11, 0)
-
-/*
- * AD7291 alert register bits
- */
-#define AD7291_T_LOW			BIT(0)
-#define AD7291_T_HIGH			BIT(1)
-#define AD7291_T_AVG_LOW		BIT(2)
-#define AD7291_T_AVG_HIGH		BIT(3)
-#define AD7291_V_LOW(x)			BIT((x) * 2)
-#define AD7291_V_HIGH(x)		BIT((x) * 2 + 1)
-
-
-struct ad7291_chip_info {
-	struct i2c_client	*client;
-	struct regulator	*reg;
-	u16			command;
-	u16			c_mask;	/* Active voltage channels for events */
-	struct mutex		state_lock;
-};
-
-static int ad7291_i2c_read(struct ad7291_chip_info *chip, u8 reg, u16 *data)
-{
-	struct i2c_client *client = chip->client;
-	int ret = 0;
-
-	ret = i2c_smbus_read_word_swapped(client, reg);
-	if (ret < 0) {
-		dev_err(&client->dev, "I2C read error\n");
-		return ret;
-	}
-
-	*data = ret;
-
-	return 0;
-}
-
-static int ad7291_i2c_write(struct ad7291_chip_info *chip, u8 reg, u16 data)
-{
-	return i2c_smbus_write_word_swapped(chip->client, reg, data);
-}
-
-static irqreturn_t ad7291_event_handler(int irq, void *private)
-{
-	struct iio_dev *indio_dev = private;
-	struct ad7291_chip_info *chip = iio_priv(private);
-	u16 t_status, v_status;
-	u16 command;
-	int i;
-	s64 timestamp = iio_get_time_ns();
-
-	if (ad7291_i2c_read(chip, AD7291_T_ALERT_STATUS, &t_status))
-		return IRQ_HANDLED;
-
-	if (ad7291_i2c_read(chip, AD7291_VOLTAGE_ALERT_STATUS, &v_status))
-		return IRQ_HANDLED;
-
-	if (!(t_status || v_status))
-		return IRQ_HANDLED;
-
-	command = chip->command | AD7291_ALERT_CLEAR;
-	ad7291_i2c_write(chip, AD7291_COMMAND, command);
-
-	command = chip->command & ~AD7291_ALERT_CLEAR;
-	ad7291_i2c_write(chip, AD7291_COMMAND, command);
-
-	/* For now treat t_sense and t_sense_average the same */
-	if ((t_status & AD7291_T_LOW) || (t_status & AD7291_T_AVG_LOW))
-		iio_push_event(indio_dev,
-			       IIO_UNMOD_EVENT_CODE(IIO_TEMP,
-						    0,
-						    IIO_EV_TYPE_THRESH,
-						    IIO_EV_DIR_FALLING),
-			       timestamp);
-	if ((t_status & AD7291_T_HIGH) || (t_status & AD7291_T_AVG_HIGH))
-		iio_push_event(indio_dev,
-			       IIO_UNMOD_EVENT_CODE(IIO_TEMP,
-						    0,
-						    IIO_EV_TYPE_THRESH,
-						    IIO_EV_DIR_RISING),
-			       timestamp);
-
-	for (i = 0; i < AD7291_VOLTAGE_LIMIT_COUNT; i++) {
-		if (v_status & AD7291_V_LOW(i))
-			iio_push_event(indio_dev,
-				       IIO_UNMOD_EVENT_CODE(IIO_VOLTAGE,
-							    i,
-							    IIO_EV_TYPE_THRESH,
-							    IIO_EV_DIR_FALLING),
-				       timestamp);
-		if (v_status & AD7291_V_HIGH(i))
-			iio_push_event(indio_dev,
-				       IIO_UNMOD_EVENT_CODE(IIO_VOLTAGE,
-							    i,
-							    IIO_EV_TYPE_THRESH,
-							    IIO_EV_DIR_RISING),
-				       timestamp);
-	}
-
-	return IRQ_HANDLED;
-}
-
-static unsigned int ad7291_threshold_reg(const struct iio_chan_spec *chan,
-					 enum iio_event_direction dir,
-					 enum iio_event_info info)
-{
-	unsigned int offset;
-
-	switch (chan->type) {
-	case IIO_VOLTAGE:
-		offset = chan->channel;
-		break;
-	case IIO_TEMP:
-		offset = AD7291_VOLTAGE_OFFSET;
-		break;
-	default:
-	    return 0;
-	}
-
-	switch (info) {
-	case IIO_EV_INFO_VALUE:
-		if (dir == IIO_EV_DIR_FALLING)
-			return AD7291_DATA_HIGH(offset);
-		else
-			return AD7291_DATA_LOW(offset);
-	case IIO_EV_INFO_HYSTERESIS:
-		return AD7291_HYST(offset);
-	default:
-		break;
-	}
-	return 0;
-}
-
-static int ad7291_read_event_value(struct iio_dev *indio_dev,
-				   const struct iio_chan_spec *chan,
-				   enum iio_event_type type,
-				   enum iio_event_direction dir,
-				   enum iio_event_info info,
-				   int *val, int *val2)
-{
-	struct ad7291_chip_info *chip = iio_priv(indio_dev);
-	int ret;
-	u16 uval;
-
-	ret = ad7291_i2c_read(chip, ad7291_threshold_reg(chan, dir, info),
-			      &uval);
-	if (ret < 0)
-		return ret;
-
-	if (info == IIO_EV_INFO_HYSTERESIS || chan->type == IIO_VOLTAGE)
-		*val = uval & AD7291_VALUE_MASK;
-
-	else
-		*val = sign_extend32(uval, 11);
-
-	return IIO_VAL_INT;
-}
-
-static int ad7291_write_event_value(struct iio_dev *indio_dev,
-				    const struct iio_chan_spec *chan,
-				    enum iio_event_type type,
-				    enum iio_event_direction dir,
-				    enum iio_event_info info,
-				    int val, int val2)
-{
-	struct ad7291_chip_info *chip = iio_priv(indio_dev);
-
-	if (info == IIO_EV_INFO_HYSTERESIS || chan->type == IIO_VOLTAGE) {
-		if (val > AD7291_VALUE_MASK || val < 0)
-			return -EINVAL;
-	} else {
-		if (val > 2047 || val < -2048)
-			return -EINVAL;
-	}
-
-	return ad7291_i2c_write(chip, ad7291_threshold_reg(chan, dir, info),
-				val);
-}
-
-static int ad7291_read_event_config(struct iio_dev *indio_dev,
-				    const struct iio_chan_spec *chan,
-				    enum iio_event_type type,
-				    enum iio_event_direction dir)
-{
-	struct ad7291_chip_info *chip = iio_priv(indio_dev);
-	/*
-	 * To be enabled the channel must simply be on. If any are enabled
-	 * we are in continuous sampling mode
-	 */
-
-	switch (chan->type) {
-	case IIO_VOLTAGE:
-		return !!(chip->c_mask & BIT(15 - chan->channel));
-	case IIO_TEMP:
-		/* always on */
-		return 1;
-	default:
-		return -EINVAL;
-	}
-
-}
-
-static int ad7291_write_event_config(struct iio_dev *indio_dev,
-				     const struct iio_chan_spec *chan,
-				     enum iio_event_type type,
-				     enum iio_event_direction dir,
-				     int state)
-{
-	int ret = 0;
-	struct ad7291_chip_info *chip = iio_priv(indio_dev);
-	unsigned int mask;
-	u16 regval;
-
-	mutex_lock(&chip->state_lock);
-	regval = chip->command;
-	/*
-	 * To be enabled the channel must simply be on. If any are enabled
-	 * use continuous sampling mode.
-	 * Possible to disable temp as well but that makes single read tricky.
-	 */
-
-	mask = BIT(15 - chan->channel);
-
-	switch (chan->type) {
-	case IIO_VOLTAGE:
-		if ((!state) && (chip->c_mask & mask))
-			chip->c_mask &= ~mask;
-		else if (state && (!(chip->c_mask & mask)))
-			chip->c_mask |= mask;
-		else
-			break;
-
-		regval &= ~AD7291_AUTOCYCLE;
-		regval |= chip->c_mask;
-		if (chip->c_mask) /* Enable autocycle? */
-			regval |= AD7291_AUTOCYCLE;
-
-		ret = ad7291_i2c_write(chip, AD7291_COMMAND, regval);
-		if (ret < 0)
-			goto error_ret;
-
-		chip->command = regval;
-		break;
-	default:
-		ret = -EINVAL;
-	}
-
-error_ret:
-	mutex_unlock(&chip->state_lock);
-	return ret;
-}
-
-static int ad7291_read_raw(struct iio_dev *indio_dev,
-			   struct iio_chan_spec const *chan,
-			   int *val,
-			   int *val2,
-			   long mask)
-{
-	int ret;
-	struct ad7291_chip_info *chip = iio_priv(indio_dev);
-	u16 regval;
-
-	switch (mask) {
-	case IIO_CHAN_INFO_RAW:
-		switch (chan->type) {
-		case IIO_VOLTAGE:
-			mutex_lock(&chip->state_lock);
-			/* If in autocycle mode drop through */
-			if (chip->command & AD7291_AUTOCYCLE) {
-				mutex_unlock(&chip->state_lock);
-				return -EBUSY;
-			}
-			/* Enable this channel alone */
-			regval = chip->command & (~AD7291_VOLTAGE_MASK);
-			regval |= BIT(15 - chan->channel);
-			ret = ad7291_i2c_write(chip, AD7291_COMMAND, regval);
-			if (ret < 0) {
-				mutex_unlock(&chip->state_lock);
-				return ret;
-			}
-			/* Read voltage */
-			ret = i2c_smbus_read_word_swapped(chip->client,
-							  AD7291_VOLTAGE);
-			if (ret < 0) {
-				mutex_unlock(&chip->state_lock);
-				return ret;
-			}
-			*val = ret & AD7291_VALUE_MASK;
-			mutex_unlock(&chip->state_lock);
-			return IIO_VAL_INT;
-		case IIO_TEMP:
-			/* Assumes tsense bit of command register always set */
-			ret = i2c_smbus_read_word_swapped(chip->client,
-							  AD7291_T_SENSE);
-			if (ret < 0)
-				return ret;
-			*val = sign_extend32(ret, 11);
-			return IIO_VAL_INT;
-		default:
-			return -EINVAL;
-		}
-	case IIO_CHAN_INFO_AVERAGE_RAW:
-		ret = i2c_smbus_read_word_swapped(chip->client,
-						  AD7291_T_AVERAGE);
-			if (ret < 0)
-				return ret;
-			*val = sign_extend32(ret, 11);
-			return IIO_VAL_INT;
-	case IIO_CHAN_INFO_SCALE:
-		switch (chan->type) {
-		case IIO_VOLTAGE:
-			if (chip->reg) {
-				int vref;
-				vref = regulator_get_voltage(chip->reg);
-				if (vref < 0)
-					return vref;
-				*val = vref / 1000;
-			} else {
-				*val = 2500;
-			}
-			*val2 = AD7291_BITS;
-			return IIO_VAL_FRACTIONAL_LOG2;
-		case IIO_TEMP:
-			/*
-			 * One LSB of the ADC corresponds to 0.25 deg C.
-			 * The temperature reading is in 12-bit twos
-			 * complement format
-			 */
-			*val = 250;
-			return IIO_VAL_INT;
-		default:
-			return -EINVAL;
-		}
-	default:
-		return -EINVAL;
-	}
-}
-
-static const struct iio_event_spec ad7291_events[] = {
-	{
-		.type = IIO_EV_TYPE_THRESH,
-		.dir = IIO_EV_DIR_RISING,
-		.mask_separate = BIT(IIO_EV_INFO_VALUE) |
-			BIT(IIO_EV_INFO_ENABLE),
-	}, {
-		.type = IIO_EV_TYPE_THRESH,
-		.dir = IIO_EV_DIR_FALLING,
-		.mask_separate = BIT(IIO_EV_INFO_VALUE) |
-			BIT(IIO_EV_INFO_ENABLE),
-	}, {
-		.type = IIO_EV_TYPE_THRESH,
-		.dir = IIO_EV_DIR_EITHER,
-		.mask_separate = BIT(IIO_EV_INFO_HYSTERESIS),
-	},
-};
-
-#define AD7291_VOLTAGE_CHAN(_chan)					\
-{									\
-	.type = IIO_VOLTAGE,						\
-	.info_mask_separate = BIT(IIO_CHAN_INFO_RAW),			\
-	.info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE),		\
-	.indexed = 1,							\
-	.channel = _chan,						\
-	.event_spec = ad7291_events,					\
-	.num_event_specs = ARRAY_SIZE(ad7291_events),			\
-}
-
-static const struct iio_chan_spec ad7291_channels[] = {
-	AD7291_VOLTAGE_CHAN(0),
-	AD7291_VOLTAGE_CHAN(1),
-	AD7291_VOLTAGE_CHAN(2),
-	AD7291_VOLTAGE_CHAN(3),
-	AD7291_VOLTAGE_CHAN(4),
-	AD7291_VOLTAGE_CHAN(5),
-	AD7291_VOLTAGE_CHAN(6),
-	AD7291_VOLTAGE_CHAN(7),
-	{
-		.type = IIO_TEMP,
-		.info_mask_separate = BIT(IIO_CHAN_INFO_RAW) |
-				BIT(IIO_CHAN_INFO_AVERAGE_RAW) |
-				BIT(IIO_CHAN_INFO_SCALE),
-		.indexed = 1,
-		.channel = 0,
-		.event_spec = ad7291_events,
-		.num_event_specs = ARRAY_SIZE(ad7291_events),
-	}
-};
-
-static const struct iio_info ad7291_info = {
-	.read_raw = &ad7291_read_raw,
-	.read_event_config = &ad7291_read_event_config,
-	.write_event_config = &ad7291_write_event_config,
-	.read_event_value = &ad7291_read_event_value,
-	.write_event_value = &ad7291_write_event_value,
-	.driver_module = THIS_MODULE,
-};
-
-static int ad7291_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
-{
-	struct ad7291_platform_data *pdata = client->dev.platform_data;
-	struct ad7291_chip_info *chip;
-	struct iio_dev *indio_dev;
-	int ret = 0;
-
-	indio_dev = devm_iio_device_alloc(&client->dev, sizeof(*chip));
-	if (!indio_dev)
-		return -ENOMEM;
-	chip = iio_priv(indio_dev);
-
-	if (pdata && pdata->use_external_ref) {
-		chip->reg = devm_regulator_get(&client->dev, "vref");
-		if (IS_ERR(chip->reg))
-			return ret;
-
-		ret = regulator_enable(chip->reg);
-		if (ret)
-			return ret;
-	}
-
-	mutex_init(&chip->state_lock);
-	/* this is only used for device removal purposes */
-	i2c_set_clientdata(client, indio_dev);
-
-	chip->client = client;
-
-	chip->command = AD7291_NOISE_DELAY |
-			AD7291_T_SENSE_MASK | /* Tsense always enabled */
-			AD7291_ALERT_POLARITY; /* set irq polarity low level */
-
-	if (pdata && pdata->use_external_ref)
-		chip->command |= AD7291_EXT_REF;
-
-	indio_dev->name = id->name;
-	indio_dev->channels = ad7291_channels;
-	indio_dev->num_channels = ARRAY_SIZE(ad7291_channels);
-
-	indio_dev->dev.parent = &client->dev;
-	indio_dev->info = &ad7291_info;
-	indio_dev->modes = INDIO_DIRECT_MODE;
-
-	ret = ad7291_i2c_write(chip, AD7291_COMMAND, AD7291_RESET);
-	if (ret) {
-		ret = -EIO;
-		goto error_disable_reg;
-	}
-
-	ret = ad7291_i2c_write(chip, AD7291_COMMAND, chip->command);
-	if (ret) {
-		ret = -EIO;
-		goto error_disable_reg;
-	}
-
-	if (client->irq > 0) {
-		ret = request_threaded_irq(client->irq,
-					   NULL,
-					   &ad7291_event_handler,
-					   IRQF_TRIGGER_LOW | IRQF_ONESHOT,
-					   id->name,
-					   indio_dev);
-		if (ret)
-			goto error_disable_reg;
-	}
-
-	ret = iio_device_register(indio_dev);
-	if (ret)
-		goto error_unreg_irq;
-
-	return 0;
-
-error_unreg_irq:
-	if (client->irq)
-		free_irq(client->irq, indio_dev);
-error_disable_reg:
-	if (chip->reg)
-		regulator_disable(chip->reg);
-
-	return ret;
-}
-
-static int ad7291_remove(struct i2c_client *client)
-{
-	struct iio_dev *indio_dev = i2c_get_clientdata(client);
-	struct ad7291_chip_info *chip = iio_priv(indio_dev);
-
-	iio_device_unregister(indio_dev);
-
-	if (client->irq)
-		free_irq(client->irq, indio_dev);
-
-	if (chip->reg)
-		regulator_disable(chip->reg);
-
-	return 0;
-}
-
-static const struct i2c_device_id ad7291_id[] = {
-	{ "ad7291", 0 },
-	{}
-};
-
-MODULE_DEVICE_TABLE(i2c, ad7291_id);
-
-static struct i2c_driver ad7291_driver = {
-	.driver = {
-		.name = KBUILD_MODNAME,
-	},
-	.probe = ad7291_probe,
-	.remove = ad7291_remove,
-	.id_table = ad7291_id,
-};
-module_i2c_driver(ad7291_driver);
-
-MODULE_AUTHOR("Sonic Zhang <sonic.zhang@analog.com>");
-MODULE_DESCRIPTION("Analog Devices AD7291 ADC driver");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/staging/iio/adc/ad7291.h b/drivers/staging/iio/adc/ad7291.h
deleted file mode 100644
index bbd89fa51188..000000000000
--- a/drivers/staging/iio/adc/ad7291.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef __IIO_AD7291_H__
-#define __IIO_AD7291_H__
-
-/**
- * struct ad7291_platform_data - AD7291 platform data
- * @use_external_ref: Whether to use an external or internal reference voltage
- */
-struct ad7291_platform_data {
-	bool use_external_ref;
-};
-
-#endif
diff --git a/include/linux/platform_data/ad7291.h b/include/linux/platform_data/ad7291.h
new file mode 100644
index 000000000000..bbd89fa51188
--- /dev/null
+++ b/include/linux/platform_data/ad7291.h
@@ -0,0 +1,12 @@
+#ifndef __IIO_AD7291_H__
+#define __IIO_AD7291_H__
+
+/**
+ * struct ad7291_platform_data - AD7291 platform data
+ * @use_external_ref: Whether to use an external or internal reference voltage
+ */
+struct ad7291_platform_data {
+	bool use_external_ref;
+};
+
+#endif
-- 
cgit v1.2.3-59-g8ed1b


From 0ba6b8fb91fc051535c7612f6241c8197d92323b Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Date: Tue, 1 Jul 2014 12:57:37 +0200
Subject: reservation: add support for fences to enable cross-device
 synchronisation

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Acked-by: Sumit Semwal <sumit.semwal@linaro.org>
Acked-by: Daniel Vetter <daniel@ffwll.ch>
Reviewed-by: Rob Clark <robdclark@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/reservation.h | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/reservation.h b/include/linux/reservation.h
index 813dae960ebd..f3f57460a205 100644
--- a/include/linux/reservation.h
+++ b/include/linux/reservation.h
@@ -6,7 +6,7 @@
  * Copyright (C) 2012 Texas Instruments
  *
  * Authors:
- * Rob Clark <rob.clark@linaro.org>
+ * Rob Clark <robdclark@gmail.com>
  * Maarten Lankhorst <maarten.lankhorst@canonical.com>
  * Thomas Hellstrom <thellstrom-at-vmware-dot-com>
  *
@@ -40,22 +40,40 @@
 #define _LINUX_RESERVATION_H
 
 #include <linux/ww_mutex.h>
+#include <linux/fence.h>
+#include <linux/slab.h>
 
 extern struct ww_class reservation_ww_class;
 
 struct reservation_object {
 	struct ww_mutex lock;
+
+	struct fence *fence_excl;
+	struct fence **fence_shared;
+	u32 fence_shared_count, fence_shared_max;
 };
 
 static inline void
 reservation_object_init(struct reservation_object *obj)
 {
 	ww_mutex_init(&obj->lock, &reservation_ww_class);
+
+	obj->fence_shared_count = obj->fence_shared_max = 0;
+	obj->fence_shared = NULL;
+	obj->fence_excl = NULL;
 }
 
 static inline void
 reservation_object_fini(struct reservation_object *obj)
 {
+	int i;
+
+	if (obj->fence_excl)
+		fence_put(obj->fence_excl);
+	for (i = 0; i < obj->fence_shared_count; ++i)
+		fence_put(obj->fence_shared[i]);
+	kfree(obj->fence_shared);
+
 	ww_mutex_destroy(&obj->lock);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 9b495a5887994a6d74d5c261d012083a92b94738 Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Date: Tue, 1 Jul 2014 12:57:43 +0200
Subject: dma-buf: add poll support, v3

Thanks to Fengguang Wu for spotting a missing static cast.

v2:
- Kill unused variable need_shared.
v3:
- Clarify the BUG() in dma_buf_release some more. (Rob Clark)

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Acked-by: Sumit Semwal <sumit.semwal@linaro.org>
Acked-by: Daniel Vetter <daniel@ffwll.ch>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/dma-buf/dma-buf.c | 108 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/dma-buf.h   |  12 ++++++
 2 files changed, 120 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index cd40ca22911f..25e8c4165936 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -30,6 +30,7 @@
 #include <linux/export.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
+#include <linux/poll.h>
 #include <linux/reservation.h>
 
 static inline int is_dma_buf_file(struct file *);
@@ -52,6 +53,16 @@ static int dma_buf_release(struct inode *inode, struct file *file)
 
 	BUG_ON(dmabuf->vmapping_counter);
 
+	/*
+	 * Any fences that a dma-buf poll can wait on should be signaled
+	 * before releasing dma-buf. This is the responsibility of each
+	 * driver that uses the reservation objects.
+	 *
+	 * If you hit this BUG() it means someone dropped their ref to the
+	 * dma-buf while still having pending operation to the buffer.
+	 */
+	BUG_ON(dmabuf->cb_shared.active || dmabuf->cb_excl.active);
+
 	dmabuf->ops->release(dmabuf);
 
 	mutex_lock(&db_list.lock);
@@ -108,10 +119,103 @@ static loff_t dma_buf_llseek(struct file *file, loff_t offset, int whence)
 	return base + offset;
 }
 
+static void dma_buf_poll_cb(struct fence *fence, struct fence_cb *cb)
+{
+	struct dma_buf_poll_cb_t *dcb = (struct dma_buf_poll_cb_t *)cb;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dcb->poll->lock, flags);
+	wake_up_locked_poll(dcb->poll, dcb->active);
+	dcb->active = 0;
+	spin_unlock_irqrestore(&dcb->poll->lock, flags);
+}
+
+static unsigned int dma_buf_poll(struct file *file, poll_table *poll)
+{
+	struct dma_buf *dmabuf;
+	struct reservation_object *resv;
+	unsigned long events;
+
+	dmabuf = file->private_data;
+	if (!dmabuf || !dmabuf->resv)
+		return POLLERR;
+
+	resv = dmabuf->resv;
+
+	poll_wait(file, &dmabuf->poll, poll);
+
+	events = poll_requested_events(poll) & (POLLIN | POLLOUT);
+	if (!events)
+		return 0;
+
+	ww_mutex_lock(&resv->lock, NULL);
+
+	if (resv->fence_excl && (!(events & POLLOUT) ||
+				 resv->fence_shared_count == 0)) {
+		struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_excl;
+		unsigned long pevents = POLLIN;
+
+		if (resv->fence_shared_count == 0)
+			pevents |= POLLOUT;
+
+		spin_lock_irq(&dmabuf->poll.lock);
+		if (dcb->active) {
+			dcb->active |= pevents;
+			events &= ~pevents;
+		} else
+			dcb->active = pevents;
+		spin_unlock_irq(&dmabuf->poll.lock);
+
+		if (events & pevents) {
+			if (!fence_add_callback(resv->fence_excl,
+						&dcb->cb, dma_buf_poll_cb))
+				events &= ~pevents;
+			else
+				/*
+				 * No callback queued, wake up any additional
+				 * waiters.
+				 */
+				dma_buf_poll_cb(NULL, &dcb->cb);
+		}
+	}
+
+	if ((events & POLLOUT) && resv->fence_shared_count > 0) {
+		struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_shared;
+		int i;
+
+		/* Only queue a new callback if no event has fired yet */
+		spin_lock_irq(&dmabuf->poll.lock);
+		if (dcb->active)
+			events &= ~POLLOUT;
+		else
+			dcb->active = POLLOUT;
+		spin_unlock_irq(&dmabuf->poll.lock);
+
+		if (!(events & POLLOUT))
+			goto out;
+
+		for (i = 0; i < resv->fence_shared_count; ++i)
+			if (!fence_add_callback(resv->fence_shared[i],
+						&dcb->cb, dma_buf_poll_cb)) {
+				events &= ~POLLOUT;
+				break;
+			}
+
+		/* No callback queued, wake up any additional waiters. */
+		if (i == resv->fence_shared_count)
+			dma_buf_poll_cb(NULL, &dcb->cb);
+	}
+
+out:
+	ww_mutex_unlock(&resv->lock);
+	return events;
+}
+
 static const struct file_operations dma_buf_fops = {
 	.release	= dma_buf_release,
 	.mmap		= dma_buf_mmap_internal,
 	.llseek		= dma_buf_llseek,
+	.poll		= dma_buf_poll,
 };
 
 /*
@@ -171,6 +275,10 @@ struct dma_buf *dma_buf_export_named(void *priv, const struct dma_buf_ops *ops,
 	dmabuf->ops = ops;
 	dmabuf->size = size;
 	dmabuf->exp_name = exp_name;
+	init_waitqueue_head(&dmabuf->poll);
+	dmabuf->cb_excl.poll = dmabuf->cb_shared.poll = &dmabuf->poll;
+	dmabuf->cb_excl.active = dmabuf->cb_shared.active = 0;
+
 	if (!resv) {
 		resv = (struct reservation_object *)&dmabuf[1];
 		reservation_object_init(resv);
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index fd7def2e0ae2..694e1fe1c4b4 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -30,6 +30,8 @@
 #include <linux/list.h>
 #include <linux/dma-mapping.h>
 #include <linux/fs.h>
+#include <linux/fence.h>
+#include <linux/wait.h>
 
 struct device;
 struct dma_buf;
@@ -130,6 +132,16 @@ struct dma_buf {
 	struct list_head list_node;
 	void *priv;
 	struct reservation_object *resv;
+
+	/* poll support */
+	wait_queue_head_t poll;
+
+	struct dma_buf_poll_cb_t {
+		struct fence_cb cb;
+		wait_queue_head_t *poll;
+
+		unsigned long active;
+	} cb_excl, cb_shared;
 };
 
 /**
-- 
cgit v1.2.3-59-g8ed1b


From 04a5faa8cbe5a8eaf152cb88959ba6360c26e702 Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Date: Tue, 1 Jul 2014 12:57:54 +0200
Subject: reservation: update api and add some helpers

Move the list of shared fences to a struct, and return it in
reservation_object_get_list().
Add reservation_object_get_excl to get the exclusive fence.

Add reservation_object_reserve_shared(), which reserves space
in the reservation_object for 1 more shared fence.

reservation_object_add_shared_fence() and
reservation_object_add_excl_fence() are used to assign a new
fence to a reservation_object pointer, to complete a reservation.

Changes since v1:
- Add reservation_object_get_excl, reorder code a bit.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Acked-by: Sumit Semwal <sumit.semwal@linaro.org>
Acked-by: Daniel Vetter <daniel@ffwll.ch>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/DocBook/device-drivers.tmpl |   1 +
 drivers/dma-buf/dma-buf.c                 |  35 ++++---
 drivers/dma-buf/reservation.c             | 156 +++++++++++++++++++++++++++++-
 include/linux/reservation.h               |  56 +++++++++--
 4 files changed, 229 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/DocBook/device-drivers.tmpl b/Documentation/DocBook/device-drivers.tmpl
index ed0ef00cd7bc..dd3f278faa8a 100644
--- a/Documentation/DocBook/device-drivers.tmpl
+++ b/Documentation/DocBook/device-drivers.tmpl
@@ -133,6 +133,7 @@ X!Edrivers/base/interface.c
 !Edrivers/dma-buf/seqno-fence.c
 !Iinclude/linux/fence.h
 !Iinclude/linux/seqno-fence.h
+!Edrivers/dma-buf/reservation.c
 !Iinclude/linux/reservation.h
 !Edrivers/base/dma-coherent.c
 !Edrivers/base/dma-mapping.c
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 25e8c4165936..cb8379dfeed5 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -134,7 +134,10 @@ static unsigned int dma_buf_poll(struct file *file, poll_table *poll)
 {
 	struct dma_buf *dmabuf;
 	struct reservation_object *resv;
+	struct reservation_object_list *fobj;
+	struct fence *fence_excl;
 	unsigned long events;
+	unsigned shared_count;
 
 	dmabuf = file->private_data;
 	if (!dmabuf || !dmabuf->resv)
@@ -150,12 +153,18 @@ static unsigned int dma_buf_poll(struct file *file, poll_table *poll)
 
 	ww_mutex_lock(&resv->lock, NULL);
 
-	if (resv->fence_excl && (!(events & POLLOUT) ||
-				 resv->fence_shared_count == 0)) {
+	fobj = resv->fence;
+	if (!fobj)
+		goto out;
+
+	shared_count = fobj->shared_count;
+	fence_excl = resv->fence_excl;
+
+	if (fence_excl && (!(events & POLLOUT) || shared_count == 0)) {
 		struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_excl;
 		unsigned long pevents = POLLIN;
 
-		if (resv->fence_shared_count == 0)
+		if (shared_count == 0)
 			pevents |= POLLOUT;
 
 		spin_lock_irq(&dmabuf->poll.lock);
@@ -167,19 +176,20 @@ static unsigned int dma_buf_poll(struct file *file, poll_table *poll)
 		spin_unlock_irq(&dmabuf->poll.lock);
 
 		if (events & pevents) {
-			if (!fence_add_callback(resv->fence_excl,
-						&dcb->cb, dma_buf_poll_cb))
+			if (!fence_add_callback(fence_excl, &dcb->cb,
+						       dma_buf_poll_cb)) {
 				events &= ~pevents;
-			else
+			} else {
 				/*
 				 * No callback queued, wake up any additional
 				 * waiters.
 				 */
 				dma_buf_poll_cb(NULL, &dcb->cb);
+			}
 		}
 	}
 
-	if ((events & POLLOUT) && resv->fence_shared_count > 0) {
+	if ((events & POLLOUT) && shared_count > 0) {
 		struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_shared;
 		int i;
 
@@ -194,15 +204,18 @@ static unsigned int dma_buf_poll(struct file *file, poll_table *poll)
 		if (!(events & POLLOUT))
 			goto out;
 
-		for (i = 0; i < resv->fence_shared_count; ++i)
-			if (!fence_add_callback(resv->fence_shared[i],
-						&dcb->cb, dma_buf_poll_cb)) {
+		for (i = 0; i < shared_count; ++i) {
+			struct fence *fence = fobj->shared[i];
+
+			if (!fence_add_callback(fence, &dcb->cb,
+						dma_buf_poll_cb)) {
 				events &= ~POLLOUT;
 				break;
 			}
+		}
 
 		/* No callback queued, wake up any additional waiters. */
-		if (i == resv->fence_shared_count)
+		if (i == shared_count)
 			dma_buf_poll_cb(NULL, &dcb->cb);
 	}
 
diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c
index a73fbf3b8e56..e6166723a9ae 100644
--- a/drivers/dma-buf/reservation.c
+++ b/drivers/dma-buf/reservation.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2012-2013 Canonical Ltd
+ * Copyright (C) 2012-2014 Canonical Ltd (Maarten Lankhorst)
  *
  * Based on bo.c which bears the following copyright notice,
  * but is dual licensed:
@@ -37,3 +37,157 @@
 
 DEFINE_WW_CLASS(reservation_ww_class);
 EXPORT_SYMBOL(reservation_ww_class);
+
+/*
+ * Reserve space to add a shared fence to a reservation_object,
+ * must be called with obj->lock held.
+ */
+int reservation_object_reserve_shared(struct reservation_object *obj)
+{
+	struct reservation_object_list *fobj, *old;
+	u32 max;
+
+	old = reservation_object_get_list(obj);
+
+	if (old && old->shared_max) {
+		if (old->shared_count < old->shared_max) {
+			/* perform an in-place update */
+			kfree(obj->staged);
+			obj->staged = NULL;
+			return 0;
+		} else
+			max = old->shared_max * 2;
+	} else
+		max = 4;
+
+	/*
+	 * resize obj->staged or allocate if it doesn't exist,
+	 * noop if already correct size
+	 */
+	fobj = krealloc(obj->staged, offsetof(typeof(*fobj), shared[max]),
+			GFP_KERNEL);
+	if (!fobj)
+		return -ENOMEM;
+
+	obj->staged = fobj;
+	fobj->shared_max = max;
+	return 0;
+}
+EXPORT_SYMBOL(reservation_object_reserve_shared);
+
+static void
+reservation_object_add_shared_inplace(struct reservation_object *obj,
+				      struct reservation_object_list *fobj,
+				      struct fence *fence)
+{
+	u32 i;
+
+	for (i = 0; i < fobj->shared_count; ++i) {
+		if (fobj->shared[i]->context == fence->context) {
+			struct fence *old_fence = fobj->shared[i];
+
+			fence_get(fence);
+
+			fobj->shared[i] = fence;
+
+			fence_put(old_fence);
+			return;
+		}
+	}
+
+	fence_get(fence);
+	fobj->shared[fobj->shared_count] = fence;
+	/*
+	 * make the new fence visible before incrementing
+	 * fobj->shared_count
+	 */
+	smp_wmb();
+	fobj->shared_count++;
+}
+
+static void
+reservation_object_add_shared_replace(struct reservation_object *obj,
+				      struct reservation_object_list *old,
+				      struct reservation_object_list *fobj,
+				      struct fence *fence)
+{
+	unsigned i;
+
+	fence_get(fence);
+
+	if (!old) {
+		fobj->shared[0] = fence;
+		fobj->shared_count = 1;
+		goto done;
+	}
+
+	/*
+	 * no need to bump fence refcounts, rcu_read access
+	 * requires the use of kref_get_unless_zero, and the
+	 * references from the old struct are carried over to
+	 * the new.
+	 */
+	fobj->shared_count = old->shared_count;
+
+	for (i = 0; i < old->shared_count; ++i) {
+		if (fence && old->shared[i]->context == fence->context) {
+			fence_put(old->shared[i]);
+			fobj->shared[i] = fence;
+			fence = NULL;
+		} else
+			fobj->shared[i] = old->shared[i];
+	}
+	if (fence)
+		fobj->shared[fobj->shared_count++] = fence;
+
+done:
+	obj->fence = fobj;
+	kfree(old);
+}
+
+/*
+ * Add a fence to a shared slot, obj->lock must be held, and
+ * reservation_object_reserve_shared_fence has been called.
+ */
+void reservation_object_add_shared_fence(struct reservation_object *obj,
+					 struct fence *fence)
+{
+	struct reservation_object_list *old, *fobj = obj->staged;
+
+	old = reservation_object_get_list(obj);
+	obj->staged = NULL;
+
+	if (!fobj) {
+		BUG_ON(old->shared_count == old->shared_max);
+		reservation_object_add_shared_inplace(obj, old, fence);
+	} else
+		reservation_object_add_shared_replace(obj, old, fobj, fence);
+}
+EXPORT_SYMBOL(reservation_object_add_shared_fence);
+
+void reservation_object_add_excl_fence(struct reservation_object *obj,
+				       struct fence *fence)
+{
+	struct fence *old_fence = obj->fence_excl;
+	struct reservation_object_list *old;
+	u32 i = 0;
+
+	old = reservation_object_get_list(obj);
+	if (old) {
+		i = old->shared_count;
+		old->shared_count = 0;
+	}
+
+	if (fence)
+		fence_get(fence);
+
+	obj->fence_excl = fence;
+
+	/* inplace update, no shared fences */
+	while (i--)
+		fence_put(old->shared[i]);
+
+	if (old_fence)
+		fence_put(old_fence);
+}
+EXPORT_SYMBOL(reservation_object_add_excl_fence);
diff --git a/include/linux/reservation.h b/include/linux/reservation.h
index f3f57460a205..2affe67dea6e 100644
--- a/include/linux/reservation.h
+++ b/include/linux/reservation.h
@@ -45,36 +45,78 @@
 
 extern struct ww_class reservation_ww_class;
 
+struct reservation_object_list {
+	u32 shared_count, shared_max;
+	struct fence *shared[];
+};
+
 struct reservation_object {
 	struct ww_mutex lock;
 
 	struct fence *fence_excl;
-	struct fence **fence_shared;
-	u32 fence_shared_count, fence_shared_max;
+	struct reservation_object_list *fence;
+	struct reservation_object_list *staged;
 };
 
+#define reservation_object_assert_held(obj) \
+	lockdep_assert_held(&(obj)->lock.base)
+
 static inline void
 reservation_object_init(struct reservation_object *obj)
 {
 	ww_mutex_init(&obj->lock, &reservation_ww_class);
 
-	obj->fence_shared_count = obj->fence_shared_max = 0;
-	obj->fence_shared = NULL;
 	obj->fence_excl = NULL;
+	obj->fence = NULL;
+	obj->staged = NULL;
 }
 
 static inline void
 reservation_object_fini(struct reservation_object *obj)
 {
 	int i;
+	struct reservation_object_list *fobj;
 
+	/*
+	 * This object should be dead and all references must have
+	 * been released to it.
+	 */
 	if (obj->fence_excl)
 		fence_put(obj->fence_excl);
-	for (i = 0; i < obj->fence_shared_count; ++i)
-		fence_put(obj->fence_shared[i]);
-	kfree(obj->fence_shared);
+
+	fobj = obj->fence;
+	if (fobj) {
+		for (i = 0; i < fobj->shared_count; ++i)
+			fence_put(fobj->shared[i]);
+
+		kfree(fobj);
+	}
+	kfree(obj->staged);
 
 	ww_mutex_destroy(&obj->lock);
 }
 
+static inline struct reservation_object_list *
+reservation_object_get_list(struct reservation_object *obj)
+{
+	reservation_object_assert_held(obj);
+
+	return obj->fence;
+}
+
+static inline struct fence *
+reservation_object_get_excl(struct reservation_object *obj)
+{
+	reservation_object_assert_held(obj);
+
+	return obj->fence_excl;
+}
+
+int reservation_object_reserve_shared(struct reservation_object *obj);
+void reservation_object_add_shared_fence(struct reservation_object *obj,
+					 struct fence *fence);
+
+void reservation_object_add_excl_fence(struct reservation_object *obj,
+				       struct fence *fence);
+
 #endif /* _LINUX_RESERVATION_H */
-- 
cgit v1.2.3-59-g8ed1b


From 3c3b177a9369b26890ced004867fb32708e8ef5b Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Date: Tue, 1 Jul 2014 12:58:00 +0200
Subject: reservation: add suppport for read-only access using rcu

This adds some extra functions to deal with rcu.

reservation_object_get_fences_rcu() will obtain the list of shared
and exclusive fences without obtaining the ww_mutex.

reservation_object_wait_timeout_rcu() will wait on all fences of the
reservation_object, without obtaining the ww_mutex.

reservation_object_test_signaled_rcu() will test if all fences of the
reservation_object are signaled without using the ww_mutex.

reservation_object_get_excl and reservation_object_get_list require
the reservation object to be held, updating requires
write_seqcount_begin/end. If only the exclusive fence is needed,
rcu_dereference followed by fence_get_rcu can be used, if the shared
fences are needed it's recommended to use the supplied functions.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Acked-by: Sumit Semwal <sumit.semwal@linaro.org>
Acked-by: Daniel Vetter <daniel@ffwll.ch>
Reviewed-By: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/dma-buf/dma-buf.c     |  47 ++++--
 drivers/dma-buf/fence.c       |   2 +-
 drivers/dma-buf/reservation.c | 336 ++++++++++++++++++++++++++++++++++++++----
 include/linux/fence.h         |  17 +++
 include/linux/reservation.h   |  52 +++++--
 5 files changed, 400 insertions(+), 54 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index cb8379dfeed5..f3014c448e1e 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -137,7 +137,7 @@ static unsigned int dma_buf_poll(struct file *file, poll_table *poll)
 	struct reservation_object_list *fobj;
 	struct fence *fence_excl;
 	unsigned long events;
-	unsigned shared_count;
+	unsigned shared_count, seq;
 
 	dmabuf = file->private_data;
 	if (!dmabuf || !dmabuf->resv)
@@ -151,14 +151,20 @@ static unsigned int dma_buf_poll(struct file *file, poll_table *poll)
 	if (!events)
 		return 0;
 
-	ww_mutex_lock(&resv->lock, NULL);
+retry:
+	seq = read_seqcount_begin(&resv->seq);
+	rcu_read_lock();
 
-	fobj = resv->fence;
-	if (!fobj)
-		goto out;
-
-	shared_count = fobj->shared_count;
-	fence_excl = resv->fence_excl;
+	fobj = rcu_dereference(resv->fence);
+	if (fobj)
+		shared_count = fobj->shared_count;
+	else
+		shared_count = 0;
+	fence_excl = rcu_dereference(resv->fence_excl);
+	if (read_seqcount_retry(&resv->seq, seq)) {
+		rcu_read_unlock();
+		goto retry;
+	}
 
 	if (fence_excl && (!(events & POLLOUT) || shared_count == 0)) {
 		struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_excl;
@@ -176,14 +182,20 @@ static unsigned int dma_buf_poll(struct file *file, poll_table *poll)
 		spin_unlock_irq(&dmabuf->poll.lock);
 
 		if (events & pevents) {
-			if (!fence_add_callback(fence_excl, &dcb->cb,
+			if (!fence_get_rcu(fence_excl)) {
+				/* force a recheck */
+				events &= ~pevents;
+				dma_buf_poll_cb(NULL, &dcb->cb);
+			} else if (!fence_add_callback(fence_excl, &dcb->cb,
 						       dma_buf_poll_cb)) {
 				events &= ~pevents;
+				fence_put(fence_excl);
 			} else {
 				/*
 				 * No callback queued, wake up any additional
 				 * waiters.
 				 */
+				fence_put(fence_excl);
 				dma_buf_poll_cb(NULL, &dcb->cb);
 			}
 		}
@@ -205,13 +217,26 @@ static unsigned int dma_buf_poll(struct file *file, poll_table *poll)
 			goto out;
 
 		for (i = 0; i < shared_count; ++i) {
-			struct fence *fence = fobj->shared[i];
+			struct fence *fence = rcu_dereference(fobj->shared[i]);
 
+			if (!fence_get_rcu(fence)) {
+				/*
+				 * fence refcount dropped to zero, this means
+				 * that fobj has been freed
+				 *
+				 * call dma_buf_poll_cb and force a recheck!
+				 */
+				events &= ~POLLOUT;
+				dma_buf_poll_cb(NULL, &dcb->cb);
+				break;
+			}
 			if (!fence_add_callback(fence, &dcb->cb,
 						dma_buf_poll_cb)) {
+				fence_put(fence);
 				events &= ~POLLOUT;
 				break;
 			}
+			fence_put(fence);
 		}
 
 		/* No callback queued, wake up any additional waiters. */
@@ -220,7 +245,7 @@ static unsigned int dma_buf_poll(struct file *file, poll_table *poll)
 	}
 
 out:
-	ww_mutex_unlock(&resv->lock);
+	rcu_read_unlock();
 	return events;
 }
 
diff --git a/drivers/dma-buf/fence.c b/drivers/dma-buf/fence.c
index 948bf00d955e..4222cb2aa96a 100644
--- a/drivers/dma-buf/fence.c
+++ b/drivers/dma-buf/fence.c
@@ -184,7 +184,7 @@ EXPORT_SYMBOL(fence_release);
 
 void fence_free(struct fence *fence)
 {
-	kfree(fence);
+	kfree_rcu(fence, rcu);
 }
 EXPORT_SYMBOL(fence_free);
 
diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c
index e6166723a9ae..3c97c8fa8d02 100644
--- a/drivers/dma-buf/reservation.c
+++ b/drivers/dma-buf/reservation.c
@@ -38,6 +38,11 @@
 DEFINE_WW_CLASS(reservation_ww_class);
 EXPORT_SYMBOL(reservation_ww_class);
 
+struct lock_class_key reservation_seqcount_class;
+EXPORT_SYMBOL(reservation_seqcount_class);
+
+const char reservation_seqcount_string[] = "reservation_seqcount";
+EXPORT_SYMBOL(reservation_seqcount_string);
 /*
  * Reserve space to add a shared fence to a reservation_object,
  * must be called with obj->lock held.
@@ -82,27 +87,37 @@ reservation_object_add_shared_inplace(struct reservation_object *obj,
 {
 	u32 i;
 
+	fence_get(fence);
+
+	preempt_disable();
+	write_seqcount_begin(&obj->seq);
+
 	for (i = 0; i < fobj->shared_count; ++i) {
-		if (fobj->shared[i]->context == fence->context) {
-			struct fence *old_fence = fobj->shared[i];
+		struct fence *old_fence;
 
-			fence_get(fence);
+		old_fence = rcu_dereference_protected(fobj->shared[i],
+						reservation_object_held(obj));
 
-			fobj->shared[i] = fence;
+		if (old_fence->context == fence->context) {
+			/* memory barrier is added by write_seqcount_begin */
+			RCU_INIT_POINTER(fobj->shared[i], fence);
+			write_seqcount_end(&obj->seq);
+			preempt_enable();
 
 			fence_put(old_fence);
 			return;
 		}
 	}
 
-	fence_get(fence);
-	fobj->shared[fobj->shared_count] = fence;
 	/*
-	 * make the new fence visible before incrementing
-	 * fobj->shared_count
+	 * memory barrier is added by write_seqcount_begin,
+	 * fobj->shared_count is protected by this lock too
 	 */
-	smp_wmb();
+	RCU_INIT_POINTER(fobj->shared[fobj->shared_count], fence);
 	fobj->shared_count++;
+
+	write_seqcount_end(&obj->seq);
+	preempt_enable();
 }
 
 static void
@@ -112,11 +127,12 @@ reservation_object_add_shared_replace(struct reservation_object *obj,
 				      struct fence *fence)
 {
 	unsigned i;
+	struct fence *old_fence = NULL;
 
 	fence_get(fence);
 
 	if (!old) {
-		fobj->shared[0] = fence;
+		RCU_INIT_POINTER(fobj->shared[0], fence);
 		fobj->shared_count = 1;
 		goto done;
 	}
@@ -130,19 +146,38 @@ reservation_object_add_shared_replace(struct reservation_object *obj,
 	fobj->shared_count = old->shared_count;
 
 	for (i = 0; i < old->shared_count; ++i) {
-		if (fence && old->shared[i]->context == fence->context) {
-			fence_put(old->shared[i]);
-			fobj->shared[i] = fence;
-			fence = NULL;
+		struct fence *check;
+
+		check = rcu_dereference_protected(old->shared[i],
+						reservation_object_held(obj));
+
+		if (!old_fence && check->context == fence->context) {
+			old_fence = check;
+			RCU_INIT_POINTER(fobj->shared[i], fence);
 		} else
-			fobj->shared[i] = old->shared[i];
+			RCU_INIT_POINTER(fobj->shared[i], check);
+	}
+	if (!old_fence) {
+		RCU_INIT_POINTER(fobj->shared[fobj->shared_count], fence);
+		fobj->shared_count++;
 	}
-	if (fence)
-		fobj->shared[fobj->shared_count++] = fence;
 
 done:
-	obj->fence = fobj;
-	kfree(old);
+	preempt_disable();
+	write_seqcount_begin(&obj->seq);
+	/*
+	 * RCU_INIT_POINTER can be used here,
+	 * seqcount provides the necessary barriers
+	 */
+	RCU_INIT_POINTER(obj->fence, fobj);
+	write_seqcount_end(&obj->seq);
+	preempt_enable();
+
+	if (old)
+		kfree_rcu(old, rcu);
+
+	if (old_fence)
+		fence_put(old_fence);
 }
 
 /*
@@ -158,7 +193,7 @@ void reservation_object_add_shared_fence(struct reservation_object *obj,
 	obj->staged = NULL;
 
 	if (!fobj) {
-		BUG_ON(old->shared_count == old->shared_max);
+		BUG_ON(old->shared_count >= old->shared_max);
 		reservation_object_add_shared_inplace(obj, old, fence);
 	} else
 		reservation_object_add_shared_replace(obj, old, fobj, fence);
@@ -168,26 +203,275 @@ EXPORT_SYMBOL(reservation_object_add_shared_fence);
 void reservation_object_add_excl_fence(struct reservation_object *obj,
 				       struct fence *fence)
 {
-	struct fence *old_fence = obj->fence_excl;
+	struct fence *old_fence = reservation_object_get_excl(obj);
 	struct reservation_object_list *old;
 	u32 i = 0;
 
 	old = reservation_object_get_list(obj);
-	if (old) {
+	if (old)
 		i = old->shared_count;
-		old->shared_count = 0;
-	}
 
 	if (fence)
 		fence_get(fence);
 
-	obj->fence_excl = fence;
+	preempt_disable();
+	write_seqcount_begin(&obj->seq);
+	/* write_seqcount_begin provides the necessary memory barrier */
+	RCU_INIT_POINTER(obj->fence_excl, fence);
+	if (old)
+		old->shared_count = 0;
+	write_seqcount_end(&obj->seq);
+	preempt_enable();
 
 	/* inplace update, no shared fences */
 	while (i--)
-		fence_put(old->shared[i]);
+		fence_put(rcu_dereference_protected(old->shared[i],
+						reservation_object_held(obj)));
 
 	if (old_fence)
 		fence_put(old_fence);
 }
 EXPORT_SYMBOL(reservation_object_add_excl_fence);
+
+int reservation_object_get_fences_rcu(struct reservation_object *obj,
+				      struct fence **pfence_excl,
+				      unsigned *pshared_count,
+				      struct fence ***pshared)
+{
+	unsigned shared_count = 0;
+	unsigned retry = 1;
+	struct fence **shared = NULL, *fence_excl = NULL;
+	int ret = 0;
+
+	while (retry) {
+		struct reservation_object_list *fobj;
+		unsigned seq;
+
+		seq = read_seqcount_begin(&obj->seq);
+
+		rcu_read_lock();
+
+		fobj = rcu_dereference(obj->fence);
+		if (fobj) {
+			struct fence **nshared;
+			size_t sz = sizeof(*shared) * fobj->shared_max;
+
+			nshared = krealloc(shared, sz,
+					   GFP_NOWAIT | __GFP_NOWARN);
+			if (!nshared) {
+				rcu_read_unlock();
+				nshared = krealloc(shared, sz, GFP_KERNEL);
+				if (nshared) {
+					shared = nshared;
+					continue;
+				}
+
+				ret = -ENOMEM;
+				shared_count = 0;
+				break;
+			}
+			shared = nshared;
+			memcpy(shared, fobj->shared, sz);
+			shared_count = fobj->shared_count;
+		} else
+			shared_count = 0;
+		fence_excl = rcu_dereference(obj->fence_excl);
+
+		retry = read_seqcount_retry(&obj->seq, seq);
+		if (retry)
+			goto unlock;
+
+		if (!fence_excl || fence_get_rcu(fence_excl)) {
+			unsigned i;
+
+			for (i = 0; i < shared_count; ++i) {
+				if (fence_get_rcu(shared[i]))
+					continue;
+
+				/* uh oh, refcount failed, abort and retry */
+				while (i--)
+					fence_put(shared[i]);
+
+				if (fence_excl) {
+					fence_put(fence_excl);
+					fence_excl = NULL;
+				}
+
+				retry = 1;
+				break;
+			}
+		} else
+			retry = 1;
+
+unlock:
+		rcu_read_unlock();
+	}
+	*pshared_count = shared_count;
+	if (shared_count)
+		*pshared = shared;
+	else {
+		*pshared = NULL;
+		kfree(shared);
+	}
+	*pfence_excl = fence_excl;
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(reservation_object_get_fences_rcu);
+
+long reservation_object_wait_timeout_rcu(struct reservation_object *obj,
+					 bool wait_all, bool intr,
+					 unsigned long timeout)
+{
+	struct fence *fence;
+	unsigned seq, shared_count, i = 0;
+	long ret = timeout;
+
+retry:
+	fence = NULL;
+	shared_count = 0;
+	seq = read_seqcount_begin(&obj->seq);
+	rcu_read_lock();
+
+	if (wait_all) {
+		struct reservation_object_list *fobj = rcu_dereference(obj->fence);
+
+		if (fobj)
+			shared_count = fobj->shared_count;
+
+		if (read_seqcount_retry(&obj->seq, seq))
+			goto unlock_retry;
+
+		for (i = 0; i < shared_count; ++i) {
+			struct fence *lfence = rcu_dereference(fobj->shared[i]);
+
+			if (test_bit(FENCE_FLAG_SIGNALED_BIT, &lfence->flags))
+				continue;
+
+			if (!fence_get_rcu(lfence))
+				goto unlock_retry;
+
+			if (fence_is_signaled(lfence)) {
+				fence_put(lfence);
+				continue;
+			}
+
+			fence = lfence;
+			break;
+		}
+	}
+
+	if (!shared_count) {
+		struct fence *fence_excl = rcu_dereference(obj->fence_excl);
+
+		if (read_seqcount_retry(&obj->seq, seq))
+			goto unlock_retry;
+
+		if (fence_excl &&
+		    !test_bit(FENCE_FLAG_SIGNALED_BIT, &fence_excl->flags)) {
+			if (!fence_get_rcu(fence_excl))
+				goto unlock_retry;
+
+			if (fence_is_signaled(fence_excl))
+				fence_put(fence_excl);
+			else
+				fence = fence_excl;
+		}
+	}
+
+	rcu_read_unlock();
+	if (fence) {
+		ret = fence_wait_timeout(fence, intr, ret);
+		fence_put(fence);
+		if (ret > 0 && wait_all && (i + 1 < shared_count))
+			goto retry;
+	}
+	return ret;
+
+unlock_retry:
+	rcu_read_unlock();
+	goto retry;
+}
+EXPORT_SYMBOL_GPL(reservation_object_wait_timeout_rcu);
+
+
+static inline int
+reservation_object_test_signaled_single(struct fence *passed_fence)
+{
+	struct fence *fence, *lfence = passed_fence;
+	int ret = 1;
+
+	if (!test_bit(FENCE_FLAG_SIGNALED_BIT, &lfence->flags)) {
+		int ret;
+
+		fence = fence_get_rcu(lfence);
+		if (!fence)
+			return -1;
+
+		ret = !!fence_is_signaled(fence);
+		fence_put(fence);
+	}
+	return ret;
+}
+
+bool reservation_object_test_signaled_rcu(struct reservation_object *obj,
+					  bool test_all)
+{
+	unsigned seq, shared_count;
+	int ret = true;
+
+retry:
+	shared_count = 0;
+	seq = read_seqcount_begin(&obj->seq);
+	rcu_read_lock();
+
+	if (test_all) {
+		unsigned i;
+
+		struct reservation_object_list *fobj = rcu_dereference(obj->fence);
+
+		if (fobj)
+			shared_count = fobj->shared_count;
+
+		if (read_seqcount_retry(&obj->seq, seq))
+			goto unlock_retry;
+
+		for (i = 0; i < shared_count; ++i) {
+			struct fence *fence = rcu_dereference(fobj->shared[i]);
+
+			ret = reservation_object_test_signaled_single(fence);
+			if (ret < 0)
+				goto unlock_retry;
+			else if (!ret)
+				break;
+		}
+
+		/*
+		 * There could be a read_seqcount_retry here, but nothing cares
+		 * about whether it's the old or newer fence pointers that are
+		 * signaled. That race could still have happened after checking
+		 * read_seqcount_retry. If you care, use ww_mutex_lock.
+		 */
+	}
+
+	if (!shared_count) {
+		struct fence *fence_excl = rcu_dereference(obj->fence_excl);
+
+		if (read_seqcount_retry(&obj->seq, seq))
+			goto unlock_retry;
+
+		if (fence_excl) {
+			ret = reservation_object_test_signaled_single(fence_excl);
+			if (ret < 0)
+				goto unlock_retry;
+		}
+	}
+
+	rcu_read_unlock();
+	return ret;
+
+unlock_retry:
+	rcu_read_unlock();
+	goto retry;
+}
+EXPORT_SYMBOL_GPL(reservation_object_test_signaled_rcu);
diff --git a/include/linux/fence.h b/include/linux/fence.h
index b935cc650123..d174585b874b 100644
--- a/include/linux/fence.h
+++ b/include/linux/fence.h
@@ -28,6 +28,7 @@
 #include <linux/kref.h>
 #include <linux/sched.h>
 #include <linux/printk.h>
+#include <linux/rcupdate.h>
 
 struct fence;
 struct fence_ops;
@@ -37,6 +38,7 @@ struct fence_cb;
  * struct fence - software synchronization primitive
  * @refcount: refcount for this fence
  * @ops: fence_ops associated with this fence
+ * @rcu: used for releasing fence with kfree_rcu
  * @cb_list: list of all callbacks to call
  * @lock: spin_lock_irqsave used for locking
  * @context: execution context this fence belongs to, returned by
@@ -70,6 +72,7 @@ struct fence_cb;
 struct fence {
 	struct kref refcount;
 	const struct fence_ops *ops;
+	struct rcu_head rcu;
 	struct list_head cb_list;
 	spinlock_t *lock;
 	unsigned context, seqno;
@@ -191,6 +194,20 @@ static inline struct fence *fence_get(struct fence *fence)
 	return fence;
 }
 
+/**
+ * fence_get_rcu - get a fence from a reservation_object_list with rcu read lock
+ * @fence:	[in]	fence to increase refcount of
+ *
+ * Function returns NULL if no refcount could be obtained, or the fence.
+ */
+static inline struct fence *fence_get_rcu(struct fence *fence)
+{
+	if (kref_get_unless_zero(&fence->refcount))
+		return fence;
+	else
+		return NULL;
+}
+
 /**
  * fence_put - decreases refcount of the fence
  * @fence:	[in]	fence to reduce refcount of
diff --git a/include/linux/reservation.h b/include/linux/reservation.h
index 2affe67dea6e..5a0b64cf68b4 100644
--- a/include/linux/reservation.h
+++ b/include/linux/reservation.h
@@ -42,22 +42,29 @@
 #include <linux/ww_mutex.h>
 #include <linux/fence.h>
 #include <linux/slab.h>
+#include <linux/seqlock.h>
+#include <linux/rcupdate.h>
 
 extern struct ww_class reservation_ww_class;
+extern struct lock_class_key reservation_seqcount_class;
+extern const char reservation_seqcount_string[];
 
 struct reservation_object_list {
+	struct rcu_head rcu;
 	u32 shared_count, shared_max;
-	struct fence *shared[];
+	struct fence __rcu *shared[];
 };
 
 struct reservation_object {
 	struct ww_mutex lock;
+	seqcount_t seq;
 
-	struct fence *fence_excl;
-	struct reservation_object_list *fence;
+	struct fence __rcu *fence_excl;
+	struct reservation_object_list __rcu *fence;
 	struct reservation_object_list *staged;
 };
 
+#define reservation_object_held(obj) lockdep_is_held(&(obj)->lock.base)
 #define reservation_object_assert_held(obj) \
 	lockdep_assert_held(&(obj)->lock.base)
 
@@ -66,8 +73,9 @@ reservation_object_init(struct reservation_object *obj)
 {
 	ww_mutex_init(&obj->lock, &reservation_ww_class);
 
-	obj->fence_excl = NULL;
-	obj->fence = NULL;
+	__seqcount_init(&obj->seq, reservation_seqcount_string, &reservation_seqcount_class);
+	RCU_INIT_POINTER(obj->fence, NULL);
+	RCU_INIT_POINTER(obj->fence_excl, NULL);
 	obj->staged = NULL;
 }
 
@@ -76,18 +84,20 @@ reservation_object_fini(struct reservation_object *obj)
 {
 	int i;
 	struct reservation_object_list *fobj;
+	struct fence *excl;
 
 	/*
 	 * This object should be dead and all references must have
-	 * been released to it.
+	 * been released to it, so no need to be protected with rcu.
 	 */
-	if (obj->fence_excl)
-		fence_put(obj->fence_excl);
+	excl = rcu_dereference_protected(obj->fence_excl, 1);
+	if (excl)
+		fence_put(excl);
 
-	fobj = obj->fence;
+	fobj = rcu_dereference_protected(obj->fence, 1);
 	if (fobj) {
 		for (i = 0; i < fobj->shared_count; ++i)
-			fence_put(fobj->shared[i]);
+			fence_put(rcu_dereference_protected(fobj->shared[i], 1));
 
 		kfree(fobj);
 	}
@@ -99,17 +109,15 @@ reservation_object_fini(struct reservation_object *obj)
 static inline struct reservation_object_list *
 reservation_object_get_list(struct reservation_object *obj)
 {
-	reservation_object_assert_held(obj);
-
-	return obj->fence;
+	return rcu_dereference_protected(obj->fence,
+					 reservation_object_held(obj));
 }
 
 static inline struct fence *
 reservation_object_get_excl(struct reservation_object *obj)
 {
-	reservation_object_assert_held(obj);
-
-	return obj->fence_excl;
+	return rcu_dereference_protected(obj->fence_excl,
+					 reservation_object_held(obj));
 }
 
 int reservation_object_reserve_shared(struct reservation_object *obj);
@@ -119,4 +127,16 @@ void reservation_object_add_shared_fence(struct reservation_object *obj,
 void reservation_object_add_excl_fence(struct reservation_object *obj,
 				       struct fence *fence);
 
+int reservation_object_get_fences_rcu(struct reservation_object *obj,
+				      struct fence **pfence_excl,
+				      unsigned *pshared_count,
+				      struct fence ***pshared);
+
+long reservation_object_wait_timeout_rcu(struct reservation_object *obj,
+					 bool wait_all, bool intr,
+					 unsigned long timeout);
+
+bool reservation_object_test_signaled_rcu(struct reservation_object *obj,
+					  bool test_all);
+
 #endif /* _LINUX_RESERVATION_H */
-- 
cgit v1.2.3-59-g8ed1b


From 9f12fbe603f7ae346b2b46008e325f0c9a68e55d Mon Sep 17 00:00:00 2001
From: Zi Shen Lim <zlim.lnx@gmail.com>
Date: Thu, 3 Jul 2014 07:56:54 -0700
Subject: net: filter: move load_pointer() into filter.h

load_pointer() is already a static inline function.
Let's move it into filter.h so BPF JIT implementations can reuse this
function.

Since we're exporting this function, let's also rename it to
bpf_load_pointer() for clarity.

Signed-off-by: Zi Shen Lim <zlim.lnx@gmail.com>
Reviewed-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h | 13 +++++++++++++
 net/core/filter.c      | 15 +++------------
 2 files changed, 16 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index a7e3c48d73a7..b885dcb7eaca 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -6,6 +6,7 @@
 
 #include <linux/atomic.h>
 #include <linux/compat.h>
+#include <linux/skbuff.h>
 #include <linux/workqueue.h>
 #include <uapi/linux/filter.h>
 
@@ -406,6 +407,18 @@ static inline u16 bpf_anc_helper(const struct sock_filter *ftest)
 	}
 }
 
+void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb,
+					   int k, unsigned int size);
+
+static inline void *bpf_load_pointer(const struct sk_buff *skb, int k,
+				     unsigned int size, void *buffer)
+{
+	if (k >= 0)
+		return skb_header_pointer(skb, k, size, buffer);
+
+	return bpf_internal_load_pointer_neg_helper(skb, k, size);
+}
+
 #ifdef CONFIG_BPF_JIT
 #include <stdarg.h>
 #include <linux/linkage.h>
diff --git a/net/core/filter.c b/net/core/filter.c
index 1dbf6462f766..87af1e3e56c0 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -84,15 +84,6 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns
 	return NULL;
 }
 
-static inline void *load_pointer(const struct sk_buff *skb, int k,
-				 unsigned int size, void *buffer)
-{
-	if (k >= 0)
-		return skb_header_pointer(skb, k, size, buffer);
-
-	return bpf_internal_load_pointer_neg_helper(skb, k, size);
-}
-
 /**
  *	sk_filter - run a packet through a socket filter
  *	@sk: sock associated with &sk_buff
@@ -537,7 +528,7 @@ load_word:
 		 *   BPF_R0 - 8/16/32-bit skb data converted to cpu endianness
 		 */
 
-		ptr = load_pointer((struct sk_buff *) (unsigned long) CTX, off, 4, &tmp);
+		ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 4, &tmp);
 		if (likely(ptr != NULL)) {
 			BPF_R0 = get_unaligned_be32(ptr);
 			CONT;
@@ -547,7 +538,7 @@ load_word:
 	LD_ABS_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + imm32)) */
 		off = IMM;
 load_half:
-		ptr = load_pointer((struct sk_buff *) (unsigned long) CTX, off, 2, &tmp);
+		ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 2, &tmp);
 		if (likely(ptr != NULL)) {
 			BPF_R0 = get_unaligned_be16(ptr);
 			CONT;
@@ -557,7 +548,7 @@ load_half:
 	LD_ABS_B: /* BPF_R0 = *(u8 *) (skb->data + imm32) */
 		off = IMM;
 load_byte:
-		ptr = load_pointer((struct sk_buff *) (unsigned long) CTX, off, 1, &tmp);
+		ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 1, &tmp);
 		if (likely(ptr != NULL)) {
 			BPF_R0 = *(u8 *)ptr;
 			CONT;
-- 
cgit v1.2.3-59-g8ed1b


From 667c24917144e34880f821486bf0a6e4d05a3a14 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 8 Jul 2014 18:02:56 -0400
Subject: cgroup: introduce cgroup->subtree_control

cgroup is implementing support for subsystem dependency which would
require a way to enable a subsystem even when it's not directly
configured through "cgroup.subtree_control".

Previously, cgroup->child_subsys_mask directly reflected
"cgroup.subtree_control" and the enabled subsystems in the child
cgroups.  This patch adds cgroup->subtree_control which
"cgroup.subtree_control" operates on.  cgroup->child_subsys_mask is
now calculated from cgroup->subtree_control by
cgroup_refresh_child_subsys_mask(), which sets it identical to
cgroup->subtree_control for now.

This will allow using cgroup->child_subsys_mask for all the enabled
subsystems including the implicit ones and ->subtree_control for
tracking the explicitly requested ones.  This patch keeps the two
masks identical and doesn't introduce any behavior changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
---
 include/linux/cgroup.h |  8 +++++++-
 kernel/cgroup.c        | 46 +++++++++++++++++++++++++++++-----------------
 2 files changed, 36 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 8a111dd42d7a..8d52c8e5b510 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -203,7 +203,13 @@ struct cgroup {
 	struct kernfs_node *kn;		/* cgroup kernfs entry */
 	struct kernfs_node *populated_kn; /* kn for "cgroup.subtree_populated" */
 
-	/* the bitmask of subsystems enabled on the child cgroups */
+	/*
+	 * The bitmask of subsystems enabled on the child cgroups.
+	 * ->subtree_control is the one configured through
+	 * "cgroup.subtree_control" while ->child_subsys_mask is the
+	 * effective one which may have more subsystems enabled.
+	 */
+	unsigned int subtree_control;
 	unsigned int child_subsys_mask;
 
 	/* Private pointers for each registered subsystem */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index a46d7e2012b4..14a9d88eacf9 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1036,6 +1036,11 @@ static void cgroup_put(struct cgroup *cgrp)
 	css_put(&cgrp->self);
 }
 
+static void cgroup_refresh_child_subsys_mask(struct cgroup *cgrp)
+{
+	cgrp->child_subsys_mask = cgrp->subtree_control;
+}
+
 /**
  * cgroup_kn_unlock - unlocking helper for cgroup kernfs methods
  * @kn: the kernfs_node being serviced
@@ -1208,12 +1213,15 @@ static int rebind_subsystems(struct cgroup_root *dst_root, unsigned int ss_mask)
 		up_write(&css_set_rwsem);
 
 		src_root->subsys_mask &= ~(1 << ssid);
-		src_root->cgrp.child_subsys_mask &= ~(1 << ssid);
+		src_root->cgrp.subtree_control &= ~(1 << ssid);
+		cgroup_refresh_child_subsys_mask(&src_root->cgrp);
 
 		/* default hierarchy doesn't enable controllers by default */
 		dst_root->subsys_mask |= 1 << ssid;
-		if (dst_root != &cgrp_dfl_root)
-			dst_root->cgrp.child_subsys_mask |= 1 << ssid;
+		if (dst_root != &cgrp_dfl_root) {
+			dst_root->cgrp.subtree_control |= 1 << ssid;
+			cgroup_refresh_child_subsys_mask(&dst_root->cgrp);
+		}
 
 		if (ss->bind)
 			ss->bind(css);
@@ -2454,7 +2462,7 @@ static int cgroup_controllers_show(struct seq_file *seq, void *v)
 {
 	struct cgroup *cgrp = seq_css(seq)->cgroup;
 
-	cgroup_print_ss_mask(seq, cgroup_parent(cgrp)->child_subsys_mask);
+	cgroup_print_ss_mask(seq, cgroup_parent(cgrp)->subtree_control);
 	return 0;
 }
 
@@ -2463,7 +2471,7 @@ static int cgroup_subtree_control_show(struct seq_file *seq, void *v)
 {
 	struct cgroup *cgrp = seq_css(seq)->cgroup;
 
-	cgroup_print_ss_mask(seq, cgrp->child_subsys_mask);
+	cgroup_print_ss_mask(seq, cgrp->subtree_control);
 	return 0;
 }
 
@@ -2608,7 +2616,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
 
 	for_each_subsys(ss, ssid) {
 		if (enable & (1 << ssid)) {
-			if (cgrp->child_subsys_mask & (1 << ssid)) {
+			if (cgrp->subtree_control & (1 << ssid)) {
 				enable &= ~(1 << ssid);
 				continue;
 			}
@@ -2616,7 +2624,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
 			/* unavailable or not enabled on the parent? */
 			if (!(cgrp_dfl_root.subsys_mask & (1 << ssid)) ||
 			    (cgroup_parent(cgrp) &&
-			     !(cgroup_parent(cgrp)->child_subsys_mask & (1 << ssid)))) {
+			     !(cgroup_parent(cgrp)->subtree_control & (1 << ssid)))) {
 				ret = -ENOENT;
 				goto out_unlock;
 			}
@@ -2644,14 +2652,14 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
 				return restart_syscall();
 			}
 		} else if (disable & (1 << ssid)) {
-			if (!(cgrp->child_subsys_mask & (1 << ssid))) {
+			if (!(cgrp->subtree_control & (1 << ssid))) {
 				disable &= ~(1 << ssid);
 				continue;
 			}
 
 			/* a child has it enabled? */
 			cgroup_for_each_live_child(child, cgrp) {
-				if (child->child_subsys_mask & (1 << ssid)) {
+				if (child->subtree_control & (1 << ssid)) {
 					ret = -EBUSY;
 					goto out_unlock;
 				}
@@ -2665,7 +2673,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
 	}
 
 	/*
-	 * Except for the root, child_subsys_mask must be zero for a cgroup
+	 * Except for the root, subtree_control must be zero for a cgroup
 	 * with tasks so that child cgroups don't compete against tasks.
 	 */
 	if (enable && cgroup_parent(cgrp) && !list_empty(&cgrp->cset_links)) {
@@ -2673,8 +2681,9 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
 		goto out_unlock;
 	}
 
-	cgrp->child_subsys_mask |= enable;
-	cgrp->child_subsys_mask &= ~disable;
+	cgrp->subtree_control |= enable;
+	cgrp->subtree_control &= ~disable;
+	cgroup_refresh_child_subsys_mask(cgrp);
 
 	/* create new csses */
 	for_each_subsys(ss, ssid) {
@@ -2713,8 +2722,9 @@ out_unlock:
 	return ret ?: nbytes;
 
 err_undo_css:
-	cgrp->child_subsys_mask &= ~enable;
-	cgrp->child_subsys_mask |= disable;
+	cgrp->subtree_control &= ~enable;
+	cgrp->subtree_control |= disable;
+	cgroup_refresh_child_subsys_mask(cgrp);
 
 	for_each_subsys(ss, ssid) {
 		if (!(enable & (1 << ssid)))
@@ -4428,10 +4438,12 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
 
 	/*
 	 * On the default hierarchy, a child doesn't automatically inherit
-	 * child_subsys_mask from the parent.  Each is configured manually.
+	 * subtree_control from the parent.  Each is configured manually.
 	 */
-	if (!cgroup_on_dfl(cgrp))
-		cgrp->child_subsys_mask = parent->child_subsys_mask;
+	if (!cgroup_on_dfl(cgrp)) {
+		cgrp->subtree_control = parent->subtree_control;
+		cgroup_refresh_child_subsys_mask(cgrp);
+	}
 
 	kernfs_activate(kn);
 
-- 
cgit v1.2.3-59-g8ed1b


From f63070d350e3562baa6196f1043e01cd8da2509a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 8 Jul 2014 18:02:57 -0400
Subject: cgroup: make interface files visible iff enabled on
 cgroup->subtree_control

cgroup is implementing support for subsystem dependency which would
require a way to enable a subsystem even when it's not directly
configured through "cgroup.subtree_control".

The preceding patch distinguished cgroup->subtree_control and
->child_subsys_mask where the former is the subsystems explicitly
configured by the userland and the latter is all enabled subsystems
currently is equal to the former but will include subsystems
implicitly enabled through dependency.

Subsystems which are enabled due to dependency shouldn't be visible to
userland.  This patch updates cgroup_subtree_control_write() and
create_css() such that interface files are not created for implicitly
enabled subsytems.

* @visible paramter is added to create_css().  Interface files are
  created only when true.

* If an already implicitly enabled subsystem is turned on through
  "cgroup.subtree_control", the existing css should be used.  css
  draining is skipped.

* cgroup_subtree_control_write() computes the new target
  cgroup->child_subsys_mask and create/kill or show/hide csses
  accordingly.

As the two subsystem masks are still kept identical, this patch
doesn't introduce any behavior changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
---
 include/linux/cgroup.h |  2 ++
 kernel/cgroup.c        | 78 +++++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 66 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 8d52c8e5b510..5287f931680a 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -208,6 +208,8 @@ struct cgroup {
 	 * ->subtree_control is the one configured through
 	 * "cgroup.subtree_control" while ->child_subsys_mask is the
 	 * effective one which may have more subsystems enabled.
+	 * Controller knobs are made available iff it's enabled in
+	 * ->subtree_control.
 	 */
 	unsigned int subtree_control;
 	unsigned int child_subsys_mask;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 14a9d88eacf9..331fa296c7e0 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -186,7 +186,8 @@ static void cgroup_put(struct cgroup *cgrp);
 static int rebind_subsystems(struct cgroup_root *dst_root,
 			     unsigned int ss_mask);
 static int cgroup_destroy_locked(struct cgroup *cgrp);
-static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss);
+static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss,
+		      bool visible);
 static void css_release(struct percpu_ref *ref);
 static void kill_css(struct cgroup_subsys_state *css);
 static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[],
@@ -2577,6 +2578,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
 					    loff_t off)
 {
 	unsigned int enable = 0, disable = 0;
+	unsigned int css_enable, css_disable, old_ctrl, new_ctrl;
 	struct cgroup *cgrp, *child;
 	struct cgroup_subsys *ss;
 	char *tok;
@@ -2629,6 +2631,13 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
 				goto out_unlock;
 			}
 
+			/*
+			 * @ss is already enabled through dependency and
+			 * we'll just make it visible.  Skip draining.
+			 */
+			if (cgrp->child_subsys_mask & (1 << ssid))
+				continue;
+
 			/*
 			 * Because css offlining is asynchronous, userland
 			 * might try to re-enable the same controller while
@@ -2681,17 +2690,39 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
 		goto out_unlock;
 	}
 
+	/*
+	 * Update subsys masks and calculate what needs to be done.  More
+	 * subsystems than specified may need to be enabled or disabled
+	 * depending on subsystem dependencies.
+	 */
 	cgrp->subtree_control |= enable;
 	cgrp->subtree_control &= ~disable;
+
+	old_ctrl = cgrp->child_subsys_mask;
 	cgroup_refresh_child_subsys_mask(cgrp);
+	new_ctrl = cgrp->child_subsys_mask;
+
+	css_enable = ~old_ctrl & new_ctrl;
+	css_disable = old_ctrl & ~new_ctrl;
+	enable |= css_enable;
+	disable |= css_disable;
 
-	/* create new csses */
+	/*
+	 * Create new csses or make the existing ones visible.  A css is
+	 * created invisible if it's being implicitly enabled through
+	 * dependency.  An invisible css is made visible when the userland
+	 * explicitly enables it.
+	 */
 	for_each_subsys(ss, ssid) {
 		if (!(enable & (1 << ssid)))
 			continue;
 
 		cgroup_for_each_live_child(child, cgrp) {
-			ret = create_css(child, ss);
+			if (css_enable & (1 << ssid))
+				ret = create_css(child, ss,
+					cgrp->subtree_control & (1 << ssid));
+			else
+				ret = cgroup_populate_dir(child, 1 << ssid);
 			if (ret)
 				goto err_undo_css;
 		}
@@ -2706,13 +2737,21 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
 	if (ret)
 		goto err_undo_css;
 
-	/* all tasks are now migrated away from the old csses, kill them */
+	/*
+	 * All tasks are migrated out of disabled csses.  Kill or hide
+	 * them.  A css is hidden when the userland requests it to be
+	 * disabled while other subsystems are still depending on it.
+	 */
 	for_each_subsys(ss, ssid) {
 		if (!(disable & (1 << ssid)))
 			continue;
 
-		cgroup_for_each_live_child(child, cgrp)
-			kill_css(cgroup_css(child, ss));
+		cgroup_for_each_live_child(child, cgrp) {
+			if (css_disable & (1 << ssid))
+				kill_css(cgroup_css(child, ss));
+			else
+				cgroup_clear_dir(child, 1 << ssid);
+		}
 	}
 
 	kernfs_activate(cgrp->kn);
@@ -2732,8 +2771,14 @@ err_undo_css:
 
 		cgroup_for_each_live_child(child, cgrp) {
 			struct cgroup_subsys_state *css = cgroup_css(child, ss);
-			if (css)
+
+			if (!css)
+				continue;
+
+			if (css_enable & (1 << ssid))
 				kill_css(css);
+			else
+				cgroup_clear_dir(child, 1 << ssid);
 		}
 	}
 	goto out_unlock;
@@ -4282,12 +4327,14 @@ static void offline_css(struct cgroup_subsys_state *css)
  * create_css - create a cgroup_subsys_state
  * @cgrp: the cgroup new css will be associated with
  * @ss: the subsys of new css
+ * @visible: whether to create control knobs for the new css or not
  *
  * Create a new css associated with @cgrp - @ss pair.  On success, the new
- * css is online and installed in @cgrp with all interface files created.
- * Returns 0 on success, -errno on failure.
+ * css is online and installed in @cgrp with all interface files created if
+ * @visible.  Returns 0 on success, -errno on failure.
  */
-static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss)
+static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss,
+		      bool visible)
 {
 	struct cgroup *parent = cgroup_parent(cgrp);
 	struct cgroup_subsys_state *parent_css = cgroup_css(parent, ss);
@@ -4311,9 +4358,11 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss)
 		goto err_free_percpu_ref;
 	css->id = err;
 
-	err = cgroup_populate_dir(cgrp, 1 << ss->id);
-	if (err)
-		goto err_free_id;
+	if (visible) {
+		err = cgroup_populate_dir(cgrp, 1 << ss->id);
+		if (err)
+			goto err_free_id;
+	}
 
 	/* @css is ready to be brought online now, make it visible */
 	list_add_tail_rcu(&css->sibling, &parent_css->children);
@@ -4430,7 +4479,8 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
 	/* let's create and online css's */
 	for_each_subsys(ss, ssid) {
 		if (parent->child_subsys_mask & (1 << ssid)) {
-			ret = create_css(cgrp, ss);
+			ret = create_css(cgrp, ss,
+					 parent->subtree_control & (1 << ssid));
 			if (ret)
 				goto out_destroy;
 		}
-- 
cgit v1.2.3-59-g8ed1b


From b4536f0cab2b18414e26101a2b9d484c5cbea0c0 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 8 Jul 2014 18:02:57 -0400
Subject: cgroup: implement cgroup_subsys->css_reset()

cgroup is implementing support for subsystem dependency which would
require a way to enable a subsystem even when it's not directly
configured through "cgroup.subtree_control".

The previous patches added support for explicitly and implicitly
enabled subsystems and showing/hiding their interface files.  An
explicitly enabled subsystem may become implicitly enabled if it's
turned off through "cgroup.subtree_control" but there are subsystems
depending on it.  In such cases, the subsystem, as it's turned off
when seen from userland, shouldn't enforce any resource control.
Also, the subsystem may be explicitly turned on later again and its
interface files should be as close to the intial state as possible.

This patch adds cgroup_subsys->css_reset() which is invoked when a css
is hidden.  The callback should disable resource control and reset the
state to the vanilla state.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
---
 Documentation/cgroups/cgroups.txt | 14 ++++++++++++++
 include/linux/cgroup.h            |  1 +
 kernel/cgroup.c                   | 16 ++++++++++++----
 3 files changed, 27 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt
index 821de56d1580..10c949b293e4 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -599,6 +599,20 @@ fork. If this method returns 0 (success) then this should remain valid
 while the caller holds cgroup_mutex and it is ensured that either
 attach() or cancel_attach() will be called in future.
 
+void css_reset(struct cgroup_subsys_state *css)
+(cgroup_mutex held by caller)
+
+An optional operation which should restore @css's configuration to the
+initial state.  This is currently only used on the unified hierarchy
+when a subsystem is disabled on a cgroup through
+"cgroup.subtree_control" but should remain enabled because other
+subsystems depend on it.  cgroup core makes such a css invisible by
+removing the associated interface files and invokes this callback so
+that the hidden subsystem can return to the initial neutral state.
+This prevents unexpected resource control from a hidden css and
+ensures that the configuration is in the initial state when it is made
+visible again later.
+
 void cancel_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
 (cgroup_mutex held by caller)
 
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 5287f931680a..db99e3b923b1 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -642,6 +642,7 @@ struct cgroup_subsys {
 	int (*css_online)(struct cgroup_subsys_state *css);
 	void (*css_offline)(struct cgroup_subsys_state *css);
 	void (*css_free)(struct cgroup_subsys_state *css);
+	void (*css_reset)(struct cgroup_subsys_state *css);
 
 	int (*can_attach)(struct cgroup_subsys_state *css,
 			  struct cgroup_taskset *tset);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 331fa296c7e0..3a6b77d7ba4a 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2740,17 +2740,25 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
 	/*
 	 * All tasks are migrated out of disabled csses.  Kill or hide
 	 * them.  A css is hidden when the userland requests it to be
-	 * disabled while other subsystems are still depending on it.
+	 * disabled while other subsystems are still depending on it.  The
+	 * css must not actively control resources and be in the vanilla
+	 * state if it's made visible again later.  Controllers which may
+	 * be depended upon should provide ->css_reset() for this purpose.
 	 */
 	for_each_subsys(ss, ssid) {
 		if (!(disable & (1 << ssid)))
 			continue;
 
 		cgroup_for_each_live_child(child, cgrp) {
-			if (css_disable & (1 << ssid))
-				kill_css(cgroup_css(child, ss));
-			else
+			struct cgroup_subsys_state *css = cgroup_css(child, ss);
+
+			if (css_disable & (1 << ssid)) {
+				kill_css(css);
+			} else {
 				cgroup_clear_dir(child, 1 << ssid);
+				if (ss->css_reset)
+					ss->css_reset(css);
+			}
 		}
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From af0ba6789c8e43518635606d0af1ff475ba7471a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 8 Jul 2014 18:02:57 -0400
Subject: cgroup: implement cgroup_subsys->depends_on

Currently, the blkio subsystem attributes all of writeback IOs to the
root.  One of the issues is that there's no way to tell who originated
a writeback IO from block layer.  Those IOs are usually issued
asynchronously from a task which didn't have anything to do with
actually generating the dirty pages.  The memory subsystem, when
enabled, already keeps track of the ownership of each dirty page and
it's desirable for blkio to piggyback instead of adding its own
per-page tag.

blkio piggybacking on memory is an implementation detail which
preferably should be handled automatically without requiring explicit
userland action.  To achieve that, this patch implements
cgroup_subsys->depends_on which contains the mask of subsystems which
should be enabled together when the subsystem is enabled.

The previous patches already implemented the support for enabled but
invisible subsystems and cgroup_subsys->depends_on can be easily
implemented by updating cgroup_refresh_child_subsys_mask() so that it
calculates cgroup->child_subsys_mask considering
cgroup_subsys->depends_on of the explicitly enabled subsystems.

Documentation/cgroups/unified-hierarchy.txt is updated to explain that
subsystems may not become immediately available after being unused
from userland and that dependency could be a factor in it.  As
subsystems may already keep residual references, this doesn't
significantly change how subsystem rebinding can be used.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
---
 Documentation/cgroups/unified-hierarchy.txt | 23 ++++++++++++--
 include/linux/cgroup.h                      |  9 ++++++
 kernel/cgroup.c                             | 49 ++++++++++++++++++++++++++++-
 3 files changed, 77 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cgroups/unified-hierarchy.txt b/Documentation/cgroups/unified-hierarchy.txt
index 324b182e6000..a7a2205539a7 100644
--- a/Documentation/cgroups/unified-hierarchy.txt
+++ b/Documentation/cgroups/unified-hierarchy.txt
@@ -97,9 +97,26 @@ change soon.
 All controllers which are not bound to other hierarchies are
 automatically bound to unified hierarchy and show up at the root of
 it.  Controllers which are enabled only in the root of unified
-hierarchy can be bound to other hierarchies at any time.  This allows
-mixing unified hierarchy with the traditional multiple hierarchies in
-a fully backward compatible way.
+hierarchy can be bound to other hierarchies.  This allows mixing
+unified hierarchy with the traditional multiple hierarchies in a fully
+backward compatible way.
+
+A controller can be moved across hierarchies only after the controller
+is no longer referenced in its current hierarchy.  Because per-cgroup
+controller states are destroyed asynchronously and controllers may
+have lingering references, a controller may not show up immediately on
+the unified hierarchy after the final umount of the previous
+hierarchy.  Similarly, a controller should be fully disabled to be
+moved out of the unified hierarchy and it may take some time for the
+disabled controller to become available for other hierarchies;
+furthermore, due to dependencies among controllers, other controllers
+may need to be disabled too.
+
+While useful for development and manual configurations, dynamically
+moving controllers between the unified and other hierarchies is
+strongly discouraged for production use.  It is recommended to decide
+the hierarchies and controller associations before starting using the
+controllers.
 
 
 2-2. cgroup.subtree_control
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index db99e3b923b1..28853e771f3b 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -693,6 +693,15 @@ struct cgroup_subsys {
 
 	/* base cftypes, automatically registered with subsys itself */
 	struct cftype *base_cftypes;
+
+	/*
+	 * A subsystem may depend on other subsystems.  When such subsystem
+	 * is enabled on a cgroup, the depended-upon subsystems are enabled
+	 * together if available.  Subsystems enabled due to dependency are
+	 * not visible to userland until explicitly enabled.  The following
+	 * specifies the mask of subsystems that this one depends on.
+	 */
+	unsigned int depends_on;
 };
 
 #define SUBSYS(_x) extern struct cgroup_subsys _x ## _cgrp_subsys;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 3a6b77d7ba4a..cd02e99d5d3b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1037,9 +1037,56 @@ static void cgroup_put(struct cgroup *cgrp)
 	css_put(&cgrp->self);
 }
 
+/**
+ * cgroup_refresh_child_subsys_mask - update child_subsys_mask
+ * @cgrp: the target cgroup
+ *
+ * On the default hierarchy, a subsystem may request other subsystems to be
+ * enabled together through its ->depends_on mask.  In such cases, more
+ * subsystems than specified in "cgroup.subtree_control" may be enabled.
+ *
+ * This function determines which subsystems need to be enabled given the
+ * current @cgrp->subtree_control and records it in
+ * @cgrp->child_subsys_mask.  The resulting mask is always a superset of
+ * @cgrp->subtree_control and follows the usual hierarchy rules.
+ */
 static void cgroup_refresh_child_subsys_mask(struct cgroup *cgrp)
 {
-	cgrp->child_subsys_mask = cgrp->subtree_control;
+	struct cgroup *parent = cgroup_parent(cgrp);
+	unsigned int cur_ss_mask = cgrp->subtree_control;
+	struct cgroup_subsys *ss;
+	int ssid;
+
+	lockdep_assert_held(&cgroup_mutex);
+
+	if (!cgroup_on_dfl(cgrp)) {
+		cgrp->child_subsys_mask = cur_ss_mask;
+		return;
+	}
+
+	while (true) {
+		unsigned int new_ss_mask = cur_ss_mask;
+
+		for_each_subsys(ss, ssid)
+			if (cur_ss_mask & (1 << ssid))
+				new_ss_mask |= ss->depends_on;
+
+		/*
+		 * Mask out subsystems which aren't available.  This can
+		 * happen only if some depended-upon subsystems were bound
+		 * to non-default hierarchies.
+		 */
+		if (parent)
+			new_ss_mask &= parent->child_subsys_mask;
+		else
+			new_ss_mask &= cgrp->root->subsys_mask;
+
+		if (new_ss_mask == cur_ss_mask)
+			break;
+		cur_ss_mask = new_ss_mask;
+	}
+
+	cgrp->child_subsys_mask = cur_ss_mask;
 }
 
 /**
-- 
cgit v1.2.3-59-g8ed1b


From 021f653791ad17e03f98aaa7fb933816ae16f161 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 30 Jun 2014 16:01:31 +0100
Subject: irqchip: gic-v3: Initial support for GICv3

The Generic Interrupt Controller (version 3) offers services that are
similar to GICv2, with a number of additional features:
- Affinity routing based on the CPU MPIDR (ARE)
- System register for the CPU interfaces (SRE)
- Support for more that 8 CPUs
- Locality-specific Peripheral Interrupts (LPIs)
- Interrupt Translation Services (ITS)

This patch adds preliminary support for GICv3 with ARE and SRE,
non-secure mode only. It relies on higher exception levels to grant ARE
and SRE access.

Support for LPI and ITS will be added at a later time.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Jason Cooper <jason@lakedaemon.net>
Reviewed-by: Zi Shen Lim <zlim@broadcom.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Reviewed-by: Tirumalesh Chalamarla <tchalamarla@cavium.com>
Reviewed-by: Yun Wu <wuyun.wu@huawei.com>
Reviewed-by: Zhen Lei <thunder.leizhen@huawei.com>
Tested-by: Tirumalesh Chalamarla<tchalamarla@cavium.com>
Tested-by: Radha Mohan Chintakuntla <rchintakuntla@cavium.com>
Acked-by: Radha Mohan Chintakuntla <rchintakuntla@cavium.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lkml.kernel.org/r/1404140510-5382-3-git-send-email-marc.zyngier@arm.com
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
---
 arch/arm64/Kconfig                 |   1 +
 arch/arm64/kernel/head.S           |  18 +
 arch/arm64/kernel/hyp-stub.S       |   1 +
 drivers/irqchip/Kconfig            |   5 +
 drivers/irqchip/Makefile           |   1 +
 drivers/irqchip/irq-gic-v3.c       | 692 +++++++++++++++++++++++++++++++++++++
 include/linux/irqchip/arm-gic-v3.h | 198 +++++++++++
 7 files changed, 916 insertions(+)
 create mode 100644 drivers/irqchip/irq-gic-v3.c
 create mode 100644 include/linux/irqchip/arm-gic-v3.h

(limited to 'include/linux')

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 7295419165e1..be52492c2291 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -9,6 +9,7 @@ config ARM64
 	select ARM_AMBA
 	select ARM_ARCH_TIMER
 	select ARM_GIC
+	select ARM_GIC_V3
 	select BUILDTIME_EXTABLE_SORT
 	select CLONE_BACKWARDS
 	select COMMON_CLK
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index a96d3a6a63f6..96623502519c 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -22,6 +22,7 @@
 
 #include <linux/linkage.h>
 #include <linux/init.h>
+#include <linux/irqchip/arm-gic-v3.h>
 
 #include <asm/assembler.h>
 #include <asm/ptrace.h>
@@ -296,6 +297,23 @@ CPU_LE(	bic	x0, x0, #(3 << 24)	)	// Clear the EE and E0E bits for EL1
 	msr	cnthctl_el2, x0
 	msr	cntvoff_el2, xzr		// Clear virtual offset
 
+#ifdef CONFIG_ARM_GIC_V3
+	/* GICv3 system register access */
+	mrs	x0, id_aa64pfr0_el1
+	ubfx	x0, x0, #24, #4
+	cmp	x0, #1
+	b.ne	3f
+
+	mrs	x0, ICC_SRE_EL2
+	orr	x0, x0, #ICC_SRE_EL2_SRE	// Set ICC_SRE_EL2.SRE==1
+	orr	x0, x0, #ICC_SRE_EL2_ENABLE	// Set ICC_SRE_EL2.Enable==1
+	msr	ICC_SRE_EL2, x0
+	isb					// Make sure SRE is now set
+	msr	ICH_HCR_EL2, xzr		// Reset ICC_HCR_EL2 to defaults
+
+3:
+#endif
+
 	/* Populate ID registers. */
 	mrs	x0, midr_el1
 	mrs	x1, mpidr_el1
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index 0959611d9ff1..a272f335c289 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -19,6 +19,7 @@
 
 #include <linux/init.h>
 #include <linux/linkage.h>
+#include <linux/irqchip/arm-gic-v3.h>
 
 #include <asm/assembler.h>
 #include <asm/ptrace.h>
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index bbb746e35500..7f0c2a30267b 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -10,6 +10,11 @@ config ARM_GIC
 config GIC_NON_BANKED
 	bool
 
+config ARM_GIC_V3
+	bool
+	select IRQ_DOMAIN
+	select MULTI_IRQ_HANDLER
+
 config ARM_NVIC
 	bool
 	select IRQ_DOMAIN
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index 9b9505c8e774..c57e642700d4 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_ARCH_SUNXI)		+= irq-sun4i.o
 obj-$(CONFIG_ARCH_SUNXI)		+= irq-sunxi-nmi.o
 obj-$(CONFIG_ARCH_SPEAR3XX)		+= spear-shirq.o
 obj-$(CONFIG_ARM_GIC)			+= irq-gic.o irq-gic-common.o
+obj-$(CONFIG_ARM_GIC_V3)		+= irq-gic-v3.o irq-gic-common.o
 obj-$(CONFIG_ARM_NVIC)			+= irq-nvic.o
 obj-$(CONFIG_ARM_VIC)			+= irq-vic.o
 obj-$(CONFIG_IMGPDC_IRQ)		+= irq-imgpdc.o
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
new file mode 100644
index 000000000000..81519bae0453
--- /dev/null
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -0,0 +1,692 @@
+/*
+ * Copyright (C) 2013, 2014 ARM Limited, All Rights Reserved.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/cpu.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/percpu.h>
+#include <linux/slab.h>
+
+#include <linux/irqchip/arm-gic-v3.h>
+
+#include <asm/cputype.h>
+#include <asm/exception.h>
+#include <asm/smp_plat.h>
+
+#include "irq-gic-common.h"
+#include "irqchip.h"
+
+struct gic_chip_data {
+	void __iomem		*dist_base;
+	void __iomem		**redist_base;
+	void __percpu __iomem	**rdist;
+	struct irq_domain	*domain;
+	u64			redist_stride;
+	u32			redist_regions;
+	unsigned int		irq_nr;
+};
+
+static struct gic_chip_data gic_data __read_mostly;
+
+#define gic_data_rdist()		(this_cpu_ptr(gic_data.rdist))
+#define gic_data_rdist_rd_base()	(*gic_data_rdist())
+#define gic_data_rdist_sgi_base()	(gic_data_rdist_rd_base() + SZ_64K)
+
+/* Our default, arbitrary priority value. Linux only uses one anyway. */
+#define DEFAULT_PMR_VALUE	0xf0
+
+static inline unsigned int gic_irq(struct irq_data *d)
+{
+	return d->hwirq;
+}
+
+static inline int gic_irq_in_rdist(struct irq_data *d)
+{
+	return gic_irq(d) < 32;
+}
+
+static inline void __iomem *gic_dist_base(struct irq_data *d)
+{
+	if (gic_irq_in_rdist(d))	/* SGI+PPI -> SGI_base for this CPU */
+		return gic_data_rdist_sgi_base();
+
+	if (d->hwirq <= 1023)		/* SPI -> dist_base */
+		return gic_data.dist_base;
+
+	if (d->hwirq >= 8192)
+		BUG();		/* LPI Detected!!! */
+
+	return NULL;
+}
+
+static void gic_do_wait_for_rwp(void __iomem *base)
+{
+	u32 count = 1000000;	/* 1s! */
+
+	while (readl_relaxed(base + GICD_CTLR) & GICD_CTLR_RWP) {
+		count--;
+		if (!count) {
+			pr_err_ratelimited("RWP timeout, gone fishing\n");
+			return;
+		}
+		cpu_relax();
+		udelay(1);
+	};
+}
+
+/* Wait for completion of a distributor change */
+static void gic_dist_wait_for_rwp(void)
+{
+	gic_do_wait_for_rwp(gic_data.dist_base);
+}
+
+/* Wait for completion of a redistributor change */
+static void gic_redist_wait_for_rwp(void)
+{
+	gic_do_wait_for_rwp(gic_data_rdist_rd_base());
+}
+
+/* Low level accessors */
+static u64 gic_read_iar(void)
+{
+	u64 irqstat;
+
+	asm volatile("mrs %0, " __stringify(ICC_IAR1_EL1) : "=r" (irqstat));
+	return irqstat;
+}
+
+static void gic_write_pmr(u64 val)
+{
+	asm volatile("msr " __stringify(ICC_PMR_EL1) ", %0" : : "r" (val));
+}
+
+static void gic_write_ctlr(u64 val)
+{
+	asm volatile("msr " __stringify(ICC_CTLR_EL1) ", %0" : : "r" (val));
+	isb();
+}
+
+static void gic_write_grpen1(u64 val)
+{
+	asm volatile("msr " __stringify(ICC_GRPEN1_EL1) ", %0" : : "r" (val));
+	isb();
+}
+
+static void gic_write_sgi1r(u64 val)
+{
+	asm volatile("msr " __stringify(ICC_SGI1R_EL1) ", %0" : : "r" (val));
+}
+
+static void gic_enable_sre(void)
+{
+	u64 val;
+
+	asm volatile("mrs %0, " __stringify(ICC_SRE_EL1) : "=r" (val));
+	val |= ICC_SRE_EL1_SRE;
+	asm volatile("msr " __stringify(ICC_SRE_EL1) ", %0" : : "r" (val));
+	isb();
+
+	/*
+	 * Need to check that the SRE bit has actually been set. If
+	 * not, it means that SRE is disabled at EL2. We're going to
+	 * die painfully, and there is nothing we can do about it.
+	 *
+	 * Kindly inform the luser.
+	 */
+	asm volatile("mrs %0, " __stringify(ICC_SRE_EL1) : "=r" (val));
+	if (!(val & ICC_SRE_EL1_SRE))
+		pr_err("GIC: unable to set SRE (disabled at EL2), panic ahead\n");
+}
+
+static void gic_enable_redist(void)
+{
+	void __iomem *rbase;
+	u32 count = 1000000;	/* 1s! */
+	u32 val;
+
+	rbase = gic_data_rdist_rd_base();
+
+	/* Wake up this CPU redistributor */
+	val = readl_relaxed(rbase + GICR_WAKER);
+	val &= ~GICR_WAKER_ProcessorSleep;
+	writel_relaxed(val, rbase + GICR_WAKER);
+
+	while (readl_relaxed(rbase + GICR_WAKER) & GICR_WAKER_ChildrenAsleep) {
+		count--;
+		if (!count) {
+			pr_err_ratelimited("redist didn't wake up...\n");
+			return;
+		}
+		cpu_relax();
+		udelay(1);
+	};
+}
+
+/*
+ * Routines to disable, enable, EOI and route interrupts
+ */
+static void gic_poke_irq(struct irq_data *d, u32 offset)
+{
+	u32 mask = 1 << (gic_irq(d) % 32);
+	void (*rwp_wait)(void);
+	void __iomem *base;
+
+	if (gic_irq_in_rdist(d)) {
+		base = gic_data_rdist_sgi_base();
+		rwp_wait = gic_redist_wait_for_rwp;
+	} else {
+		base = gic_data.dist_base;
+		rwp_wait = gic_dist_wait_for_rwp;
+	}
+
+	writel_relaxed(mask, base + offset + (gic_irq(d) / 32) * 4);
+	rwp_wait();
+}
+
+static int gic_peek_irq(struct irq_data *d, u32 offset)
+{
+	u32 mask = 1 << (gic_irq(d) % 32);
+	void __iomem *base;
+
+	if (gic_irq_in_rdist(d))
+		base = gic_data_rdist_sgi_base();
+	else
+		base = gic_data.dist_base;
+
+	return !!(readl_relaxed(base + offset + (gic_irq(d) / 32) * 4) & mask);
+}
+
+static void gic_mask_irq(struct irq_data *d)
+{
+	gic_poke_irq(d, GICD_ICENABLER);
+}
+
+static void gic_unmask_irq(struct irq_data *d)
+{
+	gic_poke_irq(d, GICD_ISENABLER);
+}
+
+static void gic_eoi_irq(struct irq_data *d)
+{
+	gic_write_eoir(gic_irq(d));
+}
+
+static int gic_set_type(struct irq_data *d, unsigned int type)
+{
+	unsigned int irq = gic_irq(d);
+	void (*rwp_wait)(void);
+	void __iomem *base;
+
+	/* Interrupt configuration for SGIs can't be changed */
+	if (irq < 16)
+		return -EINVAL;
+
+	if (type != IRQ_TYPE_LEVEL_HIGH && type != IRQ_TYPE_EDGE_RISING)
+		return -EINVAL;
+
+	if (gic_irq_in_rdist(d)) {
+		base = gic_data_rdist_sgi_base();
+		rwp_wait = gic_redist_wait_for_rwp;
+	} else {
+		base = gic_data.dist_base;
+		rwp_wait = gic_dist_wait_for_rwp;
+	}
+
+	gic_configure_irq(irq, type, base, rwp_wait);
+
+	return 0;
+}
+
+static u64 gic_mpidr_to_affinity(u64 mpidr)
+{
+	u64 aff;
+
+	aff = (MPIDR_AFFINITY_LEVEL(mpidr, 3) << 32 |
+	       MPIDR_AFFINITY_LEVEL(mpidr, 2) << 16 |
+	       MPIDR_AFFINITY_LEVEL(mpidr, 1) << 8  |
+	       MPIDR_AFFINITY_LEVEL(mpidr, 0));
+
+	return aff;
+}
+
+static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
+{
+	u64 irqnr;
+
+	do {
+		irqnr = gic_read_iar();
+
+		if (likely(irqnr > 15 && irqnr < 1020)) {
+			u64 irq = irq_find_mapping(gic_data.domain, irqnr);
+			if (likely(irq)) {
+				handle_IRQ(irq, regs);
+				continue;
+			}
+
+			WARN_ONCE(true, "Unexpected SPI received!\n");
+			gic_write_eoir(irqnr);
+		}
+		if (irqnr < 16) {
+			gic_write_eoir(irqnr);
+#ifdef CONFIG_SMP
+			handle_IPI(irqnr, regs);
+#else
+			WARN_ONCE(true, "Unexpected SGI received!\n");
+#endif
+			continue;
+		}
+	} while (irqnr != ICC_IAR1_EL1_SPURIOUS);
+}
+
+static void __init gic_dist_init(void)
+{
+	unsigned int i;
+	u64 affinity;
+	void __iomem *base = gic_data.dist_base;
+
+	/* Disable the distributor */
+	writel_relaxed(0, base + GICD_CTLR);
+	gic_dist_wait_for_rwp();
+
+	gic_dist_config(base, gic_data.irq_nr, gic_dist_wait_for_rwp);
+
+	/* Enable distributor with ARE, Group1 */
+	writel_relaxed(GICD_CTLR_ARE_NS | GICD_CTLR_ENABLE_G1A | GICD_CTLR_ENABLE_G1,
+		       base + GICD_CTLR);
+
+	/*
+	 * Set all global interrupts to the boot CPU only. ARE must be
+	 * enabled.
+	 */
+	affinity = gic_mpidr_to_affinity(cpu_logical_map(smp_processor_id()));
+	for (i = 32; i < gic_data.irq_nr; i++)
+		writeq_relaxed(affinity, base + GICD_IROUTER + i * 8);
+}
+
+static int gic_populate_rdist(void)
+{
+	u64 mpidr = cpu_logical_map(smp_processor_id());
+	u64 typer;
+	u32 aff;
+	int i;
+
+	/*
+	 * Convert affinity to a 32bit value that can be matched to
+	 * GICR_TYPER bits [63:32].
+	 */
+	aff = (MPIDR_AFFINITY_LEVEL(mpidr, 3) << 24 |
+	       MPIDR_AFFINITY_LEVEL(mpidr, 2) << 16 |
+	       MPIDR_AFFINITY_LEVEL(mpidr, 1) << 8 |
+	       MPIDR_AFFINITY_LEVEL(mpidr, 0));
+
+	for (i = 0; i < gic_data.redist_regions; i++) {
+		void __iomem *ptr = gic_data.redist_base[i];
+		u32 reg;
+
+		reg = readl_relaxed(ptr + GICR_PIDR2) & GIC_PIDR2_ARCH_MASK;
+		if (reg != GIC_PIDR2_ARCH_GICv3 &&
+		    reg != GIC_PIDR2_ARCH_GICv4) { /* We're in trouble... */
+			pr_warn("No redistributor present @%p\n", ptr);
+			break;
+		}
+
+		do {
+			typer = readq_relaxed(ptr + GICR_TYPER);
+			if ((typer >> 32) == aff) {
+				gic_data_rdist_rd_base() = ptr;
+				pr_info("CPU%d: found redistributor %llx @%p\n",
+					smp_processor_id(),
+					(unsigned long long)mpidr, ptr);
+				return 0;
+			}
+
+			if (gic_data.redist_stride) {
+				ptr += gic_data.redist_stride;
+			} else {
+				ptr += SZ_64K * 2; /* Skip RD_base + SGI_base */
+				if (typer & GICR_TYPER_VLPIS)
+					ptr += SZ_64K * 2; /* Skip VLPI_base + reserved page */
+			}
+		} while (!(typer & GICR_TYPER_LAST));
+	}
+
+	/* We couldn't even deal with ourselves... */
+	WARN(true, "CPU%d: mpidr %llx has no re-distributor!\n",
+	     smp_processor_id(), (unsigned long long)mpidr);
+	return -ENODEV;
+}
+
+static void gic_cpu_init(void)
+{
+	void __iomem *rbase;
+
+	/* Register ourselves with the rest of the world */
+	if (gic_populate_rdist())
+		return;
+
+	gic_enable_redist();
+
+	rbase = gic_data_rdist_sgi_base();
+
+	gic_cpu_config(rbase, gic_redist_wait_for_rwp);
+
+	/* Enable system registers */
+	gic_enable_sre();
+
+	/* Set priority mask register */
+	gic_write_pmr(DEFAULT_PMR_VALUE);
+
+	/* EOI deactivates interrupt too (mode 0) */
+	gic_write_ctlr(ICC_CTLR_EL1_EOImode_drop_dir);
+
+	/* ... and let's hit the road... */
+	gic_write_grpen1(1);
+}
+
+#ifdef CONFIG_SMP
+static int gic_secondary_init(struct notifier_block *nfb,
+			      unsigned long action, void *hcpu)
+{
+	if (action == CPU_STARTING || action == CPU_STARTING_FROZEN)
+		gic_cpu_init();
+	return NOTIFY_OK;
+}
+
+/*
+ * Notifier for enabling the GIC CPU interface. Set an arbitrarily high
+ * priority because the GIC needs to be up before the ARM generic timers.
+ */
+static struct notifier_block gic_cpu_notifier = {
+	.notifier_call = gic_secondary_init,
+	.priority = 100,
+};
+
+static u16 gic_compute_target_list(int *base_cpu, const struct cpumask *mask,
+				   u64 cluster_id)
+{
+	int cpu = *base_cpu;
+	u64 mpidr = cpu_logical_map(cpu);
+	u16 tlist = 0;
+
+	while (cpu < nr_cpu_ids) {
+		/*
+		 * If we ever get a cluster of more than 16 CPUs, just
+		 * scream and skip that CPU.
+		 */
+		if (WARN_ON((mpidr & 0xff) >= 16))
+			goto out;
+
+		tlist |= 1 << (mpidr & 0xf);
+
+		cpu = cpumask_next(cpu, mask);
+		if (cpu == nr_cpu_ids)
+			goto out;
+
+		mpidr = cpu_logical_map(cpu);
+
+		if (cluster_id != (mpidr & ~0xffUL)) {
+			cpu--;
+			goto out;
+		}
+	}
+out:
+	*base_cpu = cpu;
+	return tlist;
+}
+
+static void gic_send_sgi(u64 cluster_id, u16 tlist, unsigned int irq)
+{
+	u64 val;
+
+	val = (MPIDR_AFFINITY_LEVEL(cluster_id, 3) << 48	|
+	       MPIDR_AFFINITY_LEVEL(cluster_id, 2) << 32	|
+	       irq << 24			    		|
+	       MPIDR_AFFINITY_LEVEL(cluster_id, 1) << 16	|
+	       tlist);
+
+	pr_debug("CPU%d: ICC_SGI1R_EL1 %llx\n", smp_processor_id(), val);
+	gic_write_sgi1r(val);
+}
+
+static void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
+{
+	int cpu;
+
+	if (WARN_ON(irq >= 16))
+		return;
+
+	/*
+	 * Ensure that stores to Normal memory are visible to the
+	 * other CPUs before issuing the IPI.
+	 */
+	smp_wmb();
+
+	for_each_cpu_mask(cpu, *mask) {
+		u64 cluster_id = cpu_logical_map(cpu) & ~0xffUL;
+		u16 tlist;
+
+		tlist = gic_compute_target_list(&cpu, mask, cluster_id);
+		gic_send_sgi(cluster_id, tlist, irq);
+	}
+
+	/* Force the above writes to ICC_SGI1R_EL1 to be executed */
+	isb();
+}
+
+static void gic_smp_init(void)
+{
+	set_smp_cross_call(gic_raise_softirq);
+	register_cpu_notifier(&gic_cpu_notifier);
+}
+
+static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
+			    bool force)
+{
+	unsigned int cpu = cpumask_any_and(mask_val, cpu_online_mask);
+	void __iomem *reg;
+	int enabled;
+	u64 val;
+
+	if (gic_irq_in_rdist(d))
+		return -EINVAL;
+
+	/* If interrupt was enabled, disable it first */
+	enabled = gic_peek_irq(d, GICD_ISENABLER);
+	if (enabled)
+		gic_mask_irq(d);
+
+	reg = gic_dist_base(d) + GICD_IROUTER + (gic_irq(d) * 8);
+	val = gic_mpidr_to_affinity(cpu_logical_map(cpu));
+
+	writeq_relaxed(val, reg);
+
+	/*
+	 * If the interrupt was enabled, enabled it again. Otherwise,
+	 * just wait for the distributor to have digested our changes.
+	 */
+	if (enabled)
+		gic_unmask_irq(d);
+	else
+		gic_dist_wait_for_rwp();
+
+	return IRQ_SET_MASK_OK;
+}
+#else
+#define gic_set_affinity	NULL
+#define gic_smp_init()		do { } while(0)
+#endif
+
+static struct irq_chip gic_chip = {
+	.name			= "GICv3",
+	.irq_mask		= gic_mask_irq,
+	.irq_unmask		= gic_unmask_irq,
+	.irq_eoi		= gic_eoi_irq,
+	.irq_set_type		= gic_set_type,
+	.irq_set_affinity	= gic_set_affinity,
+};
+
+static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
+			      irq_hw_number_t hw)
+{
+	/* SGIs are private to the core kernel */
+	if (hw < 16)
+		return -EPERM;
+	/* PPIs */
+	if (hw < 32) {
+		irq_set_percpu_devid(irq);
+		irq_set_chip_and_handler(irq, &gic_chip,
+					 handle_percpu_devid_irq);
+		set_irq_flags(irq, IRQF_VALID | IRQF_NOAUTOEN);
+	}
+	/* SPIs */
+	if (hw >= 32 && hw < gic_data.irq_nr) {
+		irq_set_chip_and_handler(irq, &gic_chip,
+					 handle_fasteoi_irq);
+		set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
+	}
+	irq_set_chip_data(irq, d->host_data);
+	return 0;
+}
+
+static int gic_irq_domain_xlate(struct irq_domain *d,
+				struct device_node *controller,
+				const u32 *intspec, unsigned int intsize,
+				unsigned long *out_hwirq, unsigned int *out_type)
+{
+	if (d->of_node != controller)
+		return -EINVAL;
+	if (intsize < 3)
+		return -EINVAL;
+
+	switch(intspec[0]) {
+	case 0:			/* SPI */
+		*out_hwirq = intspec[1] + 32;
+		break;
+	case 1:			/* PPI */
+		*out_hwirq = intspec[1] + 16;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	*out_type = intspec[2] & IRQ_TYPE_SENSE_MASK;
+	return 0;
+}
+
+static const struct irq_domain_ops gic_irq_domain_ops = {
+	.map = gic_irq_domain_map,
+	.xlate = gic_irq_domain_xlate,
+};
+
+static int __init gic_of_init(struct device_node *node, struct device_node *parent)
+{
+	void __iomem *dist_base;
+	void __iomem **redist_base;
+	u64 redist_stride;
+	u32 redist_regions;
+	u32 reg;
+	int gic_irqs;
+	int err;
+	int i;
+
+	dist_base = of_iomap(node, 0);
+	if (!dist_base) {
+		pr_err("%s: unable to map gic dist registers\n",
+			node->full_name);
+		return -ENXIO;
+	}
+
+	reg = readl_relaxed(dist_base + GICD_PIDR2) & GIC_PIDR2_ARCH_MASK;
+	if (reg != GIC_PIDR2_ARCH_GICv3 && reg != GIC_PIDR2_ARCH_GICv4) {
+		pr_err("%s: no distributor detected, giving up\n",
+			node->full_name);
+		err = -ENODEV;
+		goto out_unmap_dist;
+	}
+
+	if (of_property_read_u32(node, "#redistributor-regions", &redist_regions))
+		redist_regions = 1;
+
+	redist_base = kzalloc(sizeof(*redist_base) * redist_regions, GFP_KERNEL);
+	if (!redist_base) {
+		err = -ENOMEM;
+		goto out_unmap_dist;
+	}
+
+	for (i = 0; i < redist_regions; i++) {
+		redist_base[i] = of_iomap(node, 1 + i);
+		if (!redist_base[i]) {
+			pr_err("%s: couldn't map region %d\n",
+			       node->full_name, i);
+			err = -ENODEV;
+			goto out_unmap_rdist;
+		}
+	}
+
+	if (of_property_read_u64(node, "redistributor-stride", &redist_stride))
+		redist_stride = 0;
+
+	gic_data.dist_base = dist_base;
+	gic_data.redist_base = redist_base;
+	gic_data.redist_regions = redist_regions;
+	gic_data.redist_stride = redist_stride;
+
+	/*
+	 * Find out how many interrupts are supported.
+	 * The GIC only supports up to 1020 interrupt sources (SGI+PPI+SPI)
+	 */
+	gic_irqs = readl_relaxed(gic_data.dist_base + GICD_TYPER) & 0x1f;
+	gic_irqs = (gic_irqs + 1) * 32;
+	if (gic_irqs > 1020)
+		gic_irqs = 1020;
+	gic_data.irq_nr = gic_irqs;
+
+	gic_data.domain = irq_domain_add_tree(node, &gic_irq_domain_ops,
+					      &gic_data);
+	gic_data.rdist = alloc_percpu(typeof(*gic_data.rdist));
+
+	if (WARN_ON(!gic_data.domain) || WARN_ON(!gic_data.rdist)) {
+		err = -ENOMEM;
+		goto out_free;
+	}
+
+	set_handle_irq(gic_handle_irq);
+
+	gic_smp_init();
+	gic_dist_init();
+	gic_cpu_init();
+
+	return 0;
+
+out_free:
+	if (gic_data.domain)
+		irq_domain_remove(gic_data.domain);
+	free_percpu(gic_data.rdist);
+out_unmap_rdist:
+	for (i = 0; i < redist_regions; i++)
+		if (redist_base[i])
+			iounmap(redist_base[i]);
+	kfree(redist_base);
+out_unmap_dist:
+	iounmap(dist_base);
+	return err;
+}
+
+IRQCHIP_DECLARE(gic_v3, "arm,gic-v3", gic_of_init);
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
new file mode 100644
index 000000000000..30cb7556d43f
--- /dev/null
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -0,0 +1,198 @@
+/*
+ * Copyright (C) 2013, 2014 ARM Limited, All Rights Reserved.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __LINUX_IRQCHIP_ARM_GIC_V3_H
+#define __LINUX_IRQCHIP_ARM_GIC_V3_H
+
+/*
+ * Distributor registers. We assume we're running non-secure, with ARE
+ * being set. Secure-only and non-ARE registers are not described.
+ */
+#define GICD_CTLR			0x0000
+#define GICD_TYPER			0x0004
+#define GICD_IIDR			0x0008
+#define GICD_STATUSR			0x0010
+#define GICD_SETSPI_NSR			0x0040
+#define GICD_CLRSPI_NSR			0x0048
+#define GICD_SETSPI_SR			0x0050
+#define GICD_CLRSPI_SR			0x0058
+#define GICD_SEIR			0x0068
+#define GICD_ISENABLER			0x0100
+#define GICD_ICENABLER			0x0180
+#define GICD_ISPENDR			0x0200
+#define GICD_ICPENDR			0x0280
+#define GICD_ISACTIVER			0x0300
+#define GICD_ICACTIVER			0x0380
+#define GICD_IPRIORITYR			0x0400
+#define GICD_ICFGR			0x0C00
+#define GICD_IROUTER			0x6000
+#define GICD_PIDR2			0xFFE8
+
+#define GICD_CTLR_RWP			(1U << 31)
+#define GICD_CTLR_ARE_NS		(1U << 4)
+#define GICD_CTLR_ENABLE_G1A		(1U << 1)
+#define GICD_CTLR_ENABLE_G1		(1U << 0)
+
+#define GICD_IROUTER_SPI_MODE_ONE	(0U << 31)
+#define GICD_IROUTER_SPI_MODE_ANY	(1U << 31)
+
+#define GIC_PIDR2_ARCH_MASK		0xf0
+#define GIC_PIDR2_ARCH_GICv3		0x30
+#define GIC_PIDR2_ARCH_GICv4		0x40
+
+/*
+ * Re-Distributor registers, offsets from RD_base
+ */
+#define GICR_CTLR			GICD_CTLR
+#define GICR_IIDR			0x0004
+#define GICR_TYPER			0x0008
+#define GICR_STATUSR			GICD_STATUSR
+#define GICR_WAKER			0x0014
+#define GICR_SETLPIR			0x0040
+#define GICR_CLRLPIR			0x0048
+#define GICR_SEIR			GICD_SEIR
+#define GICR_PROPBASER			0x0070
+#define GICR_PENDBASER			0x0078
+#define GICR_INVLPIR			0x00A0
+#define GICR_INVALLR			0x00B0
+#define GICR_SYNCR			0x00C0
+#define GICR_MOVLPIR			0x0100
+#define GICR_MOVALLR			0x0110
+#define GICR_PIDR2			GICD_PIDR2
+
+#define GICR_WAKER_ProcessorSleep	(1U << 1)
+#define GICR_WAKER_ChildrenAsleep	(1U << 2)
+
+/*
+ * Re-Distributor registers, offsets from SGI_base
+ */
+#define GICR_ISENABLER0			GICD_ISENABLER
+#define GICR_ICENABLER0			GICD_ICENABLER
+#define GICR_ISPENDR0			GICD_ISPENDR
+#define GICR_ICPENDR0			GICD_ICPENDR
+#define GICR_ISACTIVER0			GICD_ISACTIVER
+#define GICR_ICACTIVER0			GICD_ICACTIVER
+#define GICR_IPRIORITYR0		GICD_IPRIORITYR
+#define GICR_ICFGR0			GICD_ICFGR
+
+#define GICR_TYPER_VLPIS		(1U << 1)
+#define GICR_TYPER_LAST			(1U << 4)
+
+/*
+ * CPU interface registers
+ */
+#define ICC_CTLR_EL1_EOImode_drop_dir	(0U << 1)
+#define ICC_CTLR_EL1_EOImode_drop	(1U << 1)
+#define ICC_SRE_EL1_SRE			(1U << 0)
+
+/*
+ * Hypervisor interface registers (SRE only)
+ */
+#define ICH_LR_VIRTUAL_ID_MASK		((1UL << 32) - 1)
+
+#define ICH_LR_EOI			(1UL << 41)
+#define ICH_LR_GROUP			(1UL << 60)
+#define ICH_LR_STATE			(3UL << 62)
+#define ICH_LR_PENDING_BIT		(1UL << 62)
+#define ICH_LR_ACTIVE_BIT		(1UL << 63)
+
+#define ICH_MISR_EOI			(1 << 0)
+#define ICH_MISR_U			(1 << 1)
+
+#define ICH_HCR_EN			(1 << 0)
+#define ICH_HCR_UIE			(1 << 1)
+
+#define ICH_VMCR_CTLR_SHIFT		0
+#define ICH_VMCR_CTLR_MASK		(0x21f << ICH_VMCR_CTLR_SHIFT)
+#define ICH_VMCR_BPR1_SHIFT		18
+#define ICH_VMCR_BPR1_MASK		(7 << ICH_VMCR_BPR1_SHIFT)
+#define ICH_VMCR_BPR0_SHIFT		21
+#define ICH_VMCR_BPR0_MASK		(7 << ICH_VMCR_BPR0_SHIFT)
+#define ICH_VMCR_PMR_SHIFT		24
+#define ICH_VMCR_PMR_MASK		(0xffUL << ICH_VMCR_PMR_SHIFT)
+
+#define ICC_EOIR1_EL1			S3_0_C12_C12_1
+#define ICC_IAR1_EL1			S3_0_C12_C12_0
+#define ICC_SGI1R_EL1			S3_0_C12_C11_5
+#define ICC_PMR_EL1			S3_0_C4_C6_0
+#define ICC_CTLR_EL1			S3_0_C12_C12_4
+#define ICC_SRE_EL1			S3_0_C12_C12_5
+#define ICC_GRPEN1_EL1			S3_0_C12_C12_7
+
+#define ICC_IAR1_EL1_SPURIOUS		0x3ff
+
+#define ICC_SRE_EL2			S3_4_C12_C9_5
+
+#define ICC_SRE_EL2_SRE			(1 << 0)
+#define ICC_SRE_EL2_ENABLE		(1 << 3)
+
+/*
+ * System register definitions
+ */
+#define ICH_VSEIR_EL2			S3_4_C12_C9_4
+#define ICH_HCR_EL2			S3_4_C12_C11_0
+#define ICH_VTR_EL2			S3_4_C12_C11_1
+#define ICH_MISR_EL2			S3_4_C12_C11_2
+#define ICH_EISR_EL2			S3_4_C12_C11_3
+#define ICH_ELSR_EL2			S3_4_C12_C11_5
+#define ICH_VMCR_EL2			S3_4_C12_C11_7
+
+#define __LR0_EL2(x)			S3_4_C12_C12_ ## x
+#define __LR8_EL2(x)			S3_4_C12_C13_ ## x
+
+#define ICH_LR0_EL2			__LR0_EL2(0)
+#define ICH_LR1_EL2			__LR0_EL2(1)
+#define ICH_LR2_EL2			__LR0_EL2(2)
+#define ICH_LR3_EL2			__LR0_EL2(3)
+#define ICH_LR4_EL2			__LR0_EL2(4)
+#define ICH_LR5_EL2			__LR0_EL2(5)
+#define ICH_LR6_EL2			__LR0_EL2(6)
+#define ICH_LR7_EL2			__LR0_EL2(7)
+#define ICH_LR8_EL2			__LR8_EL2(0)
+#define ICH_LR9_EL2			__LR8_EL2(1)
+#define ICH_LR10_EL2			__LR8_EL2(2)
+#define ICH_LR11_EL2			__LR8_EL2(3)
+#define ICH_LR12_EL2			__LR8_EL2(4)
+#define ICH_LR13_EL2			__LR8_EL2(5)
+#define ICH_LR14_EL2			__LR8_EL2(6)
+#define ICH_LR15_EL2			__LR8_EL2(7)
+
+#define __AP0Rx_EL2(x)			S3_4_C12_C8_ ## x
+#define ICH_AP0R0_EL2			__AP0Rx_EL2(0)
+#define ICH_AP0R1_EL2			__AP0Rx_EL2(1)
+#define ICH_AP0R2_EL2			__AP0Rx_EL2(2)
+#define ICH_AP0R3_EL2			__AP0Rx_EL2(3)
+
+#define __AP1Rx_EL2(x)			S3_4_C12_C9_ ## x
+#define ICH_AP1R0_EL2			__AP1Rx_EL2(0)
+#define ICH_AP1R1_EL2			__AP1Rx_EL2(1)
+#define ICH_AP1R2_EL2			__AP1Rx_EL2(2)
+#define ICH_AP1R3_EL2			__AP1Rx_EL2(3)
+
+#ifndef __ASSEMBLY__
+
+#include <linux/stringify.h>
+
+static inline void gic_write_eoir(u64 irq)
+{
+	asm volatile("msr " __stringify(ICC_EOIR1_EL1) ", %0" : : "r" (irq));
+	isb();
+}
+
+#endif
+
+#endif
-- 
cgit v1.2.3-59-g8ed1b


From 5a1379e8748a5cfa3eb068f812d61bde849ef76c Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 4 Jun 2014 17:48:15 +0200
Subject: firmware loader: allow disabling of udev as firmware loader

[The patch was originally proposed by Tom Gundersen, and rewritten
 afterwards by me; most of changelogs below borrowed from Tom's
 original patch -- tiwai]

Currently (at least) the dell-rbu driver selects FW_LOADER_USER_HELPER,
which means that distros can't really stop loading firmware through
udev without breaking other users (though some have).

Ideally we would remove/disable the udev firmware helper in both the
kernel and in udev, but if we were to disable it in udev and not the
kernel, the result would be (seemingly) hung kernels as no one would
be around to cancel firmware requests.

This patch allows udev firmware loading to be disabled while still
allowing non-udev firmware loading, as done by the dell-rbu driver, to
continue working. This is achieved by only using the fallback
mechanism when the uevent is suppressed.

The patch renames the user-selectable Kconfig from FW_LOADER_USER_HELPER
to FW_LOADER_USER_HELPER_FALLBACK, and the former is reverse-selected
by the latter or the drivers that need userhelper like dell-rbu.

Also, the "default y" is removed together with this change, since it's
been deprecated in udev upstream, thus rather better to disable it
nowadays.

Tested with
    FW_LOADER_USER_HELPER=n
    LATTICE_ECP3_CONFIG=y
    DELL_RBU=y
and udev without the firmware loading support, but I don't have the
hardware to test the lattice/dell drivers, so additional testing would
be appreciated.

Reviewed-by: Tom Gundersen <teg@jklm.no>
Cc: Ming Lei <ming.lei@canonical.com>
Cc: Abhay Salunke <Abhay_Salunke@dell.com>
Cc: Stefan Roese <sr@denx.de>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Kay Sievers <kay@vrfy.org>
Tested-by: Balaji Singh <B_B_Singh@DELL.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/Kconfig          | 10 ++++++++--
 drivers/base/firmware_class.c | 15 ++++++++++-----
 include/linux/firmware.h      |  2 +-
 3 files changed, 19 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 00e13ce5cbbd..88500fed3c7a 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -149,15 +149,21 @@ config EXTRA_FIRMWARE_DIR
 	  some other directory containing the firmware files.
 
 config FW_LOADER_USER_HELPER
+	bool
+
+config FW_LOADER_USER_HELPER_FALLBACK
 	bool "Fallback user-helper invocation for firmware loading"
 	depends on FW_LOADER
-	default y
+	select FW_LOADER_USER_HELPER
 	help
 	  This option enables / disables the invocation of user-helper
 	  (e.g. udev) for loading firmware files as a fallback after the
 	  direct file loading in kernel fails.  The user-mode helper is
 	  no longer required unless you have a special firmware file that
-	  resides in a non-standard path.
+	  resides in a non-standard path. Moreover, the udev support has
+	  been deprecated upstream.
+
+	  If you are unsure about this, say N here.
 
 config DEBUG_DRIVER
 	bool "Driver Core verbose debug messages"
diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index d276e33880be..46ea5f4c3bb5 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -100,9 +100,14 @@ static inline long firmware_loading_timeout(void)
 #define FW_OPT_UEVENT	(1U << 0)
 #define FW_OPT_NOWAIT	(1U << 1)
 #ifdef CONFIG_FW_LOADER_USER_HELPER
-#define FW_OPT_FALLBACK	(1U << 2)
+#define FW_OPT_USERHELPER	(1U << 2)
 #else
-#define FW_OPT_FALLBACK	0
+#define FW_OPT_USERHELPER	0
+#endif
+#ifdef CONFIG_FW_LOADER_USER_HELPER_FALLBACK
+#define FW_OPT_FALLBACK		FW_OPT_USERHELPER
+#else
+#define FW_OPT_FALLBACK		0
 #endif
 
 struct firmware_cache {
@@ -1111,7 +1116,7 @@ _request_firmware(const struct firmware **firmware_p, const char *name,
 
 	ret = fw_get_filesystem_firmware(device, fw->priv);
 	if (ret) {
-		if (opt_flags & FW_OPT_FALLBACK) {
+		if (opt_flags & FW_OPT_USERHELPER) {
 			dev_warn(device,
 				 "Direct firmware load failed with error %d\n",
 				 ret);
@@ -1171,7 +1176,7 @@ request_firmware(const struct firmware **firmware_p, const char *name,
 }
 EXPORT_SYMBOL(request_firmware);
 
-#ifdef CONFIG_FW_LOADER_USER_HELPER
+#ifdef CONFIG_FW_LOADER_USER_HELPER_FALLBACK
 /**
  * request_firmware: - load firmware directly without usermode helper
  * @firmware_p: pointer to firmware image
@@ -1277,7 +1282,7 @@ request_firmware_nowait(
 	fw_work->context = context;
 	fw_work->cont = cont;
 	fw_work->opt_flags = FW_OPT_NOWAIT | FW_OPT_FALLBACK |
-		(uevent ? FW_OPT_UEVENT : 0);
+		(uevent ? FW_OPT_UEVENT : FW_OPT_USERHELPER);
 
 	if (!try_module_get(module)) {
 		kfree(fw_work);
diff --git a/include/linux/firmware.h b/include/linux/firmware.h
index 59529330efd6..67e5b801af0c 100644
--- a/include/linux/firmware.h
+++ b/include/linux/firmware.h
@@ -68,7 +68,7 @@ static inline void release_firmware(const struct firmware *fw)
 
 #endif
 
-#ifdef CONFIG_FW_LOADER_USER_HELPER
+#ifdef CONFIG_FW_LOADER_USER_HELPER_FALLBACK
 int request_firmware_direct(const struct firmware **fw, const char *name,
 			    struct device *device);
 #else
-- 
cgit v1.2.3-59-g8ed1b


From c868edf42b4db89907b467c92b7f035c8c1cb0e5 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@suse.com>
Date: Wed, 2 Jul 2014 09:55:05 -0700
Subject: firmware loader: inform direct failure when udev loader is disabled

Now that the udev firmware loader is optional request_firmware()
will not provide any information on the kernel ring buffer if
direct firmware loading failed and udev firmware loading is disabled.
If no information is needed request_firmware_direct() should be used
for optional firmware, at which point drivers can take on the onus
over informing of any failures, if udev firmware loading is disabled
though we should at the very least provide some sort of information
as when the udev loader was enabled by default back in the days.

With this change with a simple firmware load test module [0]:

Example output without FW_LOADER_USER_HELPER_FALLBACK

platform fake-dev.0: Direct firmware load for fake.bin failed
with error -2

Example with FW_LOADER_USER_HELPER_FALLBACK

platform fake-dev.0: Direct firmware load for fake.bin failed with error -2
platform fake-dev.0: Falling back to user helper

Without this change without FW_LOADER_USER_HELPER_FALLBACK we
get no output logged upon failure.

Cc: Tom Gundersen <teg@jklm.no>
Cc: Ming Lei <ming.lei@canonical.com>
Cc: Abhay Salunke <Abhay_Salunke@dell.com>
Cc: Stefan Roese <sr@denx.de>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Kay Sievers <kay@vrfy.org>
Signed-off-by: Luis R. Rodriguez <mcgrof@suse.com>
Reviewed-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/firmware_class.c | 13 +++++++------
 include/linux/firmware.h      | 15 ++++++++-------
 2 files changed, 15 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index 28bc6db9fbf4..124d50ceb116 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -109,6 +109,7 @@ static inline long firmware_loading_timeout(void)
 #else
 #define FW_OPT_FALLBACK		0
 #endif
+#define FW_OPT_NO_WARN	(1U << 3)
 
 struct firmware_cache {
 	/* firmware_buf instance will be added into the below list */
@@ -1105,10 +1106,11 @@ _request_firmware(const struct firmware **firmware_p, const char *name,
 
 	ret = fw_get_filesystem_firmware(device, fw->priv);
 	if (ret) {
-		if (opt_flags & FW_OPT_USERHELPER) {
+		if (!(opt_flags & FW_OPT_NO_WARN))
 			dev_warn(device,
-				 "Direct firmware load failed with error %d\n",
-				 ret);
+				 "Direct firmware load for %s failed with error %d\n",
+				 name, ret);
+		if (opt_flags & FW_OPT_USERHELPER) {
 			dev_warn(device, "Falling back to user helper\n");
 			ret = fw_load_from_user_helper(fw, name, device,
 						       opt_flags, timeout);
@@ -1165,7 +1167,6 @@ request_firmware(const struct firmware **firmware_p, const char *name,
 }
 EXPORT_SYMBOL(request_firmware);
 
-#ifdef CONFIG_FW_LOADER_USER_HELPER_FALLBACK
 /**
  * request_firmware: - load firmware directly without usermode helper
  * @firmware_p: pointer to firmware image
@@ -1182,12 +1183,12 @@ int request_firmware_direct(const struct firmware **firmware_p,
 {
 	int ret;
 	__module_get(THIS_MODULE);
-	ret = _request_firmware(firmware_p, name, device, FW_OPT_UEVENT);
+	ret = _request_firmware(firmware_p, name, device,
+				FW_OPT_UEVENT | FW_OPT_NO_WARN);
 	module_put(THIS_MODULE);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(request_firmware_direct);
-#endif
 
 /**
  * release_firmware: - release the resource associated with a firmware image
diff --git a/include/linux/firmware.h b/include/linux/firmware.h
index 67e5b801af0c..5c41c5e75b5c 100644
--- a/include/linux/firmware.h
+++ b/include/linux/firmware.h
@@ -45,6 +45,8 @@ int request_firmware_nowait(
 	struct module *module, bool uevent,
 	const char *name, struct device *device, gfp_t gfp, void *context,
 	void (*cont)(const struct firmware *fw, void *context));
+int request_firmware_direct(const struct firmware **fw, const char *name,
+			    struct device *device);
 
 void release_firmware(const struct firmware *fw);
 #else
@@ -66,13 +68,12 @@ static inline void release_firmware(const struct firmware *fw)
 {
 }
 
-#endif
+static inline int request_firmware_direct(const struct firmware **fw,
+					  const char *name,
+					  struct device *device)
+{
+	return -EINVAL;
+}
 
-#ifdef CONFIG_FW_LOADER_USER_HELPER_FALLBACK
-int request_firmware_direct(const struct firmware **fw, const char *name,
-			    struct device *device);
-#else
-#define request_firmware_direct	request_firmware
 #endif
-
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 3d713e0e382e6fcfb4bba1501645b66c129ad60b Mon Sep 17 00:00:00 2001
From: Kim Phillips <kim.phillips@freescale.com>
Date: Mon, 2 Jun 2014 19:42:58 -0500
Subject: driver core: platform: add device binding path 'driver_override'

Needed by platform device drivers, such as the upcoming
vfio-platform driver, in order to bypass the existing OF, ACPI,
id_table and name string matches, and successfully be able to be
bound to any device, like so:

echo vfio-platform > /sys/bus/platform/devices/fff51000.ethernet/driver_override
echo fff51000.ethernet > /sys/bus/platform/devices/fff51000.ethernet/driver/unbind
echo fff51000.ethernet > /sys/bus/platform/drivers_probe

This mimics "PCI: Introduce new device binding path using
pci_dev.driver_override", which is an interface enhancement
for more deterministic PCI device binding, e.g., when in the
presence of hotplug.

Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: Alexander Graf <agraf@suse.de>
Reviewed-by: Stuart Yoder <stuart.yoder@freescale.com>
Signed-off-by: Kim Phillips <kim.phillips@freescale.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/ABI/testing/sysfs-bus-platform | 20 ++++++++++++
 drivers/base/platform.c                      | 47 ++++++++++++++++++++++++++++
 include/linux/platform_device.h              |  1 +
 3 files changed, 68 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-bus-platform

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-bus-platform b/Documentation/ABI/testing/sysfs-bus-platform
new file mode 100644
index 000000000000..5172a6124b27
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-platform
@@ -0,0 +1,20 @@
+What:		/sys/bus/platform/devices/.../driver_override
+Date:		April 2014
+Contact:	Kim Phillips <kim.phillips@freescale.com>
+Description:
+		This file allows the driver for a device to be specified which
+		will override standard OF, ACPI, ID table, and name matching.
+		When specified, only a driver with a name matching the value
+		written to driver_override will have an opportunity to bind
+		to the device.  The override is specified by writing a string
+		to the driver_override file (echo vfio-platform > \
+		driver_override) and may be cleared with an empty string
+		(echo > driver_override).  This returns the device to standard
+		matching rules binding.  Writing to driver_override does not
+		automatically unbind the device from its current driver or make
+		any attempt to automatically load the specified driver.  If no
+		driver with a matching name is currently loaded in the kernel,
+		the device will not bind to any driver.  This also allows
+		devices to opt-out of driver binding using a driver_override
+		name such as "none".  Only a single driver may be specified in
+		the override, there is no support for parsing delimiters.
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index c48c4acb9b87..148f66a1d49a 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -23,6 +23,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/idr.h>
 #include <linux/acpi.h>
+#include <linux/limits.h>
 
 #include "base.h"
 #include "power/power.h"
@@ -191,6 +192,7 @@ static void platform_device_release(struct device *dev)
 	kfree(pa->pdev.dev.platform_data);
 	kfree(pa->pdev.mfd_cell);
 	kfree(pa->pdev.resource);
+	kfree(pa->pdev.driver_override);
 	kfree(pa);
 }
 
@@ -698,8 +700,49 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *a,
 }
 static DEVICE_ATTR_RO(modalias);
 
+static ssize_t driver_override_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t count)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	char *driver_override, *old = pdev->driver_override, *cp;
+
+	if (count > PATH_MAX)
+		return -EINVAL;
+
+	driver_override = kstrndup(buf, count, GFP_KERNEL);
+	if (!driver_override)
+		return -ENOMEM;
+
+	cp = strchr(driver_override, '\n');
+	if (cp)
+		*cp = '\0';
+
+	if (strlen(driver_override)) {
+		pdev->driver_override = driver_override;
+	} else {
+		kfree(driver_override);
+		pdev->driver_override = NULL;
+	}
+
+	kfree(old);
+
+	return count;
+}
+
+static ssize_t driver_override_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+
+	return sprintf(buf, "%s\n", pdev->driver_override);
+}
+static DEVICE_ATTR_RW(driver_override);
+
+
 static struct attribute *platform_dev_attrs[] = {
 	&dev_attr_modalias.attr,
+	&dev_attr_driver_override.attr,
 	NULL,
 };
 ATTRIBUTE_GROUPS(platform_dev);
@@ -755,6 +798,10 @@ static int platform_match(struct device *dev, struct device_driver *drv)
 	struct platform_device *pdev = to_platform_device(dev);
 	struct platform_driver *pdrv = to_platform_driver(drv);
 
+	/* When driver_override is set, only bind to the matching driver */
+	if (pdev->driver_override)
+		return !strcmp(pdev->driver_override, drv->name);
+
 	/* Attempt an OF style match first */
 	if (of_driver_match_device(dev, drv))
 		return 1;
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 16f6654082dd..153d303af7eb 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -28,6 +28,7 @@ struct platform_device {
 	struct resource	*resource;
 
 	const struct platform_device_id	*id_entry;
+	char *driver_override; /* Driver name to force a match */
 
 	/* MFD cell pointer */
 	struct mfd_cell *mfd_cell;
-- 
cgit v1.2.3-59-g8ed1b


From f941a6d9a9e0612eb807af822b0d1ac004da8175 Mon Sep 17 00:00:00 2001
From: Linus Lüssing <linus.luessing@web.de>
Date: Mon, 7 Jul 2014 05:41:16 +0200
Subject: bridge: adding stubs for multicast exports
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To make users (e.g. batman-adv soon) load- and runnable even if the
bridge was compiled without snooping capabilities - or even if the
kernel was compiled without any bridge code at all.

Signed-off-by: Linus Lüssing <linus.luessing@web.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_bridge.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index fd22789d7b2e..e0c575ca89f7 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -36,8 +36,22 @@ extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __use
 
 typedef int br_should_route_hook_t(struct sk_buff *skb);
 extern br_should_route_hook_t __rcu *br_should_route_hook;
+
+#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING)
 int br_multicast_list_adjacent(struct net_device *dev,
 			       struct list_head *br_ip_list);
 bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto);
+#else
+static inline int br_multicast_list_adjacent(struct net_device *dev,
+					     struct list_head *br_ip_list)
+{
+	return 0;
+}
+static inline bool br_multicast_has_querier_adjacent(struct net_device *dev,
+						     int proto)
+{
+	return false;
+}
+#endif
 
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From c34963e21685659eb513e1c4d847f81d8a8f13f3 Mon Sep 17 00:00:00 2001
From: Linus Lüssing <linus.luessing@web.de>
Date: Mon, 7 Jul 2014 05:41:17 +0200
Subject: bridge: export knowledge about the presence of IGMP/MLD queriers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With this patch other modules are able to ask the bridge whether an
IGMP or MLD querier exists on the according, bridged link layer.

Multicast snooping can only be performed if a valid, selected querier
exists on a link.

Just like the bridge only enables its multicast snooping if a querier
exists, e.g. batman-adv too can only activate its multicast
snooping in bridged scenarios if a querier is present.

For instance this export avoids having to reimplement IGMP/MLD
querier message snooping and parsing in e.g. batman-adv, when
multicast optimizations for bridged scenarios are added in the
future.

Signed-off-by: Linus Lüssing <linus.luessing@web.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_bridge.h |  6 ++++++
 net/bridge/br_multicast.c | 37 +++++++++++++++++++++++++++++++++++++
 2 files changed, 43 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index e0c575ca89f7..808dcb8cc04f 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -40,6 +40,7 @@ extern br_should_route_hook_t __rcu *br_should_route_hook;
 #if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING)
 int br_multicast_list_adjacent(struct net_device *dev,
 			       struct list_head *br_ip_list);
+bool br_multicast_has_querier_anywhere(struct net_device *dev, int proto);
 bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto);
 #else
 static inline int br_multicast_list_adjacent(struct net_device *dev,
@@ -47,6 +48,11 @@ static inline int br_multicast_list_adjacent(struct net_device *dev,
 {
 	return 0;
 }
+static inline bool br_multicast_has_querier_anywhere(struct net_device *dev,
+						     int proto)
+{
+	return false;
+}
 static inline bool br_multicast_has_querier_adjacent(struct net_device *dev,
 						     int proto)
 {
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index abfa0b65a111..b4845f4b2bb4 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -2215,6 +2215,43 @@ unlock:
 }
 EXPORT_SYMBOL_GPL(br_multicast_list_adjacent);
 
+/**
+ * br_multicast_has_querier_anywhere - Checks for a querier on a bridge
+ * @dev: The bridge port providing the bridge on which to check for a querier
+ * @proto: The protocol family to check for: IGMP -> ETH_P_IP, MLD -> ETH_P_IPV6
+ *
+ * Checks whether the given interface has a bridge on top and if so returns
+ * true if a valid querier exists anywhere on the bridged link layer.
+ * Otherwise returns false.
+ */
+bool br_multicast_has_querier_anywhere(struct net_device *dev, int proto)
+{
+	struct net_bridge *br;
+	struct net_bridge_port *port;
+	struct ethhdr eth;
+	bool ret = false;
+
+	rcu_read_lock();
+	if (!br_port_exists(dev))
+		goto unlock;
+
+	port = br_port_get_rcu(dev);
+	if (!port || !port->br)
+		goto unlock;
+
+	br = port->br;
+
+	memset(&eth, 0, sizeof(eth));
+	eth.h_proto = htons(proto);
+
+	ret = br_multicast_querier_exists(br, &eth);
+
+unlock:
+	rcu_read_unlock();
+	return ret;
+}
+EXPORT_SYMBOL_GPL(br_multicast_has_querier_anywhere);
+
 /**
  * br_multicast_has_querier_adjacent - Checks for a querier behind a bridge port
  * @dev: The bridge port adjacent to which to check for a querier
-- 
cgit v1.2.3-59-g8ed1b


From 8471bb73ba10ed6788b4f1e9b8a0f9dc6bdb05b5 Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
Date: Wed, 21 May 2014 19:06:12 -0300
Subject: mtd: Introduce mtd_block_isreserved()

In addition to mtd_block_isbad(), which checks if a block is bad or
reserved, it's needed to check if a block is reserved only (but not
bad). This commit adds an MTD interface for it, in a similar fashion to
mtd_block_isbad().

While here, fix mtd_block_isbad() so the out-of-bounds checking is done
before the callback check.

Signed-off-by: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
Tested-by: Pekon Gupta <pekon@ti.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/mtdcore.c        | 14 ++++++++++++--
 drivers/mtd/mtdpart.c        |  9 +++++++++
 drivers/mtd/nand/nand_base.c | 18 ++++++++++++++++++
 drivers/mtd/nand/nand_bbt.c  | 14 ++++++++++++++
 include/linux/mtd/mtd.h      |  2 ++
 include/linux/mtd/nand.h     |  1 +
 6 files changed, 56 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 11857faa0d96..e4831b4159db 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -1043,12 +1043,22 @@ int mtd_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 }
 EXPORT_SYMBOL_GPL(mtd_is_locked);
 
-int mtd_block_isbad(struct mtd_info *mtd, loff_t ofs)
+int mtd_block_isreserved(struct mtd_info *mtd, loff_t ofs)
 {
-	if (!mtd->_block_isbad)
+	if (ofs < 0 || ofs > mtd->size)
+		return -EINVAL;
+	if (!mtd->_block_isreserved)
 		return 0;
+	return mtd->_block_isreserved(mtd, ofs);
+}
+EXPORT_SYMBOL_GPL(mtd_block_isreserved);
+
+int mtd_block_isbad(struct mtd_info *mtd, loff_t ofs)
+{
 	if (ofs < 0 || ofs > mtd->size)
 		return -EINVAL;
+	if (!mtd->_block_isbad)
+		return 0;
 	return mtd->_block_isbad(mtd, ofs);
 }
 EXPORT_SYMBOL_GPL(mtd_block_isbad);
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 1ca9aec141ff..921e8c647884 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -290,6 +290,13 @@ static void part_resume(struct mtd_info *mtd)
 	part->master->_resume(part->master);
 }
 
+static int part_block_isreserved(struct mtd_info *mtd, loff_t ofs)
+{
+	struct mtd_part *part = PART(mtd);
+	ofs += part->offset;
+	return part->master->_block_isreserved(part->master, ofs);
+}
+
 static int part_block_isbad(struct mtd_info *mtd, loff_t ofs)
 {
 	struct mtd_part *part = PART(mtd);
@@ -422,6 +429,8 @@ static struct mtd_part *allocate_partition(struct mtd_info *master,
 		slave->mtd._unlock = part_unlock;
 	if (master->_is_locked)
 		slave->mtd._is_locked = part_is_locked;
+	if (master->_block_isreserved)
+		slave->mtd._block_isreserved = part_block_isreserved;
 	if (master->_block_isbad)
 		slave->mtd._block_isbad = part_block_isbad;
 	if (master->_block_markbad)
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 41167e9e991e..0c505dd1f522 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -484,6 +484,23 @@ static int nand_check_wp(struct mtd_info *mtd)
 	return (chip->read_byte(mtd) & NAND_STATUS_WP) ? 0 : 1;
 }
 
+/**
+ * nand_block_checkbad - [GENERIC] Check if a block is marked bad
+ * @mtd: MTD device structure
+ * @ofs: offset from device start
+ *
+ * Check if the block is mark as reserved.
+ */
+static int nand_block_isreserved(struct mtd_info *mtd, loff_t ofs)
+{
+	struct nand_chip *chip = mtd->priv;
+
+	if (!chip->bbt)
+		return 0;
+	/* Return info from the table */
+	return nand_isreserved_bbt(mtd, ofs);
+}
+
 /**
  * nand_block_checkbad - [GENERIC] Check if a block is marked bad
  * @mtd: MTD device structure
@@ -4111,6 +4128,7 @@ int nand_scan_tail(struct mtd_info *mtd)
 	mtd->_unlock = NULL;
 	mtd->_suspend = nand_suspend;
 	mtd->_resume = nand_resume;
+	mtd->_block_isreserved = nand_block_isreserved;
 	mtd->_block_isbad = nand_block_isbad;
 	mtd->_block_markbad = nand_block_markbad;
 	mtd->writebufsize = mtd->writesize;
diff --git a/drivers/mtd/nand/nand_bbt.c b/drivers/mtd/nand/nand_bbt.c
index 7f0c3b4c2a4f..443fa82cde6a 100644
--- a/drivers/mtd/nand/nand_bbt.c
+++ b/drivers/mtd/nand/nand_bbt.c
@@ -1310,6 +1310,20 @@ int nand_default_bbt(struct mtd_info *mtd)
 	return nand_scan_bbt(mtd, this->badblock_pattern);
 }
 
+/**
+ * nand_isreserved_bbt - [NAND Interface] Check if a block is reserved
+ * @mtd: MTD device structure
+ * @offs: offset in the device
+ */
+int nand_isreserved_bbt(struct mtd_info *mtd, loff_t offs)
+{
+	struct nand_chip *this = mtd->priv;
+	int block;
+
+	block = (int)(offs >> this->bbt_erase_shift);
+	return bbt_get_entry(this, block) == BBT_BLOCK_RESERVED;
+}
+
 /**
  * nand_isbad_bbt - [NAND Interface] Check if a block is bad
  * @mtd: MTD device structure
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index a1b0b4c8fd79..031ff3a9a0bd 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -222,6 +222,7 @@ struct mtd_info {
 	int (*_lock) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
 	int (*_unlock) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
 	int (*_is_locked) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
+	int (*_block_isreserved) (struct mtd_info *mtd, loff_t ofs);
 	int (*_block_isbad) (struct mtd_info *mtd, loff_t ofs);
 	int (*_block_markbad) (struct mtd_info *mtd, loff_t ofs);
 	int (*_suspend) (struct mtd_info *mtd);
@@ -302,6 +303,7 @@ static inline void mtd_sync(struct mtd_info *mtd)
 int mtd_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len);
 int mtd_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len);
 int mtd_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len);
+int mtd_block_isreserved(struct mtd_info *mtd, loff_t ofs);
 int mtd_block_isbad(struct mtd_info *mtd, loff_t ofs);
 int mtd_block_markbad(struct mtd_info *mtd, loff_t ofs);
 
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 2f0af2891f0f..1cff329ae13d 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -810,6 +810,7 @@ extern struct nand_manufacturers nand_manuf_ids[];
 extern int nand_scan_bbt(struct mtd_info *mtd, struct nand_bbt_descr *bd);
 extern int nand_default_bbt(struct mtd_info *mtd);
 extern int nand_markbad_bbt(struct mtd_info *mtd, loff_t offs);
+extern int nand_isreserved_bbt(struct mtd_info *mtd, loff_t offs);
 extern int nand_isbad_bbt(struct mtd_info *mtd, loff_t offs, int allowbbt);
 extern int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr,
 			   int allowbbt);
-- 
cgit v1.2.3-59-g8ed1b


From 523ece889eeee84a381e16086b81e07a76cff8b6 Mon Sep 17 00:00:00 2001
From: Eugenia Emantayev <eugenia@mellanox.com>
Date: Tue, 8 Jul 2014 11:25:19 +0300
Subject: net/mlx4_en: Fix set port ratelimit for 40GE

In 40GE we can't use the default bw units for set ratelimit (100 Mbps)
since the max is 255*100 Mbps = 25 Gbps (not suited for 40GE), thus we need 1 Gbps units.
But for 10GE 1 Gbps units might be too bruit so we use the following solution.

For user set ratelimit <= 25 Gbps:
        use 100 Mbps units * user_ratelimit (* 10).

For user set ratelimit > 25 Gbps:
        use 1 Gbps units * user_ratelimit.

For user set unlimited ratelimit (0 Gbps):
        use 1 Gbps units * MAX_RATELIMIT_DEFAULT (57)

Note: any value > 58 will damage the FW ratelimit computation, so we allow
      a max and any higher value will be pulled down to 57.

Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/mlx4.h |  5 -----
 drivers/net/ethernet/mellanox/mlx4/port.c | 22 +++++++++++++++++-----
 include/linux/mlx4/device.h               | 11 +++++++++++
 3 files changed, 28 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 1d8af7336807..13fbcd03c3e4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -62,11 +62,6 @@
 
 #define INIT_HCA_TPT_MW_ENABLE          (1 << 7)
 
-#define MLX4_NUM_UP		8
-#define MLX4_NUM_TC		8
-#define MLX4_RATELIMIT_UNITS 3 /* 100 Mbps */
-#define MLX4_RATELIMIT_DEFAULT 0xffff
-
 struct mlx4_set_port_prio2tc_context {
 	u8 prio2tc[4];
 };
diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index 7ab97174886d..5d76a60ac053 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c
@@ -1051,14 +1051,26 @@ int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw,
 
 	for (i = 0; i < MLX4_NUM_TC; i++) {
 		struct mlx4_port_scheduler_tc_cfg_be *tc = &context->tc[i];
-		u16 r = ratelimit && ratelimit[i] ? ratelimit[i] :
-			MLX4_RATELIMIT_DEFAULT;
+		u16 r;
+
+		if (ratelimit && ratelimit[i]) {
+			if (ratelimit[i] <= MLX4_MAX_100M_UNITS_VAL) {
+				r = ratelimit[i];
+				tc->max_bw_units =
+					htons(MLX4_RATELIMIT_100M_UNITS);
+			} else {
+				r = ratelimit[i]/10;
+				tc->max_bw_units =
+					htons(MLX4_RATELIMIT_1G_UNITS);
+			}
+			tc->max_bw_value = htons(r);
+		} else {
+			tc->max_bw_value = htons(MLX4_RATELIMIT_DEFAULT);
+			tc->max_bw_units = htons(MLX4_RATELIMIT_1G_UNITS);
+		}
 
 		tc->pg = htons(pg[i]);
 		tc->bw_precentage = htons(tc_tx_bw[i]);
-
-		tc->max_bw_units = htons(MLX4_RATELIMIT_UNITS);
-		tc->max_bw_value = htons(r);
 	}
 
 	in_mod = MLX4_SET_PORT_SCHEDULER << 8 | port;
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index b12f4bbd064c..db0aef37645f 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -48,6 +48,17 @@
 #define MSIX_LEGACY_SZ		4
 #define MIN_MSIX_P_PORT		5
 
+#define MLX4_NUM_UP			8
+#define MLX4_NUM_TC			8
+#define MLX4_MAX_100M_UNITS_VAL		255	/*
+						 * work around: can't set values
+						 * greater then this value when
+						 * using 100 Mbps units.
+						 */
+#define MLX4_RATELIMIT_100M_UNITS	3	/* 100 Mbps */
+#define MLX4_RATELIMIT_1G_UNITS		4	/* 1 Gbps */
+#define MLX4_RATELIMIT_DEFAULT		0x00ff
+
 #define MLX4_ROCE_MAX_GIDS	128
 #define MLX4_ROCE_PF_GIDS	16
 
-- 
cgit v1.2.3-59-g8ed1b


From 6b32fafee2bb5fcf0b3d3d04a9762d3a0212089e Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 20 Jun 2014 14:37:38 +0200
Subject: dmaengine: shdma: Add more register documentation

Also add a few definitions that were missing.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
---
 arch/sh/include/asm/dma-register.h | 36 +++++++++++++++++++-----------------
 drivers/dma/sh/shdmac.c            | 12 ++++++------
 include/linux/sh_dma.h             | 24 +++++++++++++-----------
 3 files changed, 38 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/arch/sh/include/asm/dma-register.h b/arch/sh/include/asm/dma-register.h
index 51cd78feacff..c757b47e6b64 100644
--- a/arch/sh/include/asm/dma-register.h
+++ b/arch/sh/include/asm/dma-register.h
@@ -13,17 +13,17 @@
 #ifndef DMA_REGISTER_H
 #define DMA_REGISTER_H
 
-/* DMA register */
-#define SAR	0x00
-#define DAR	0x04
-#define TCR	0x08
-#define CHCR	0x0C
-#define DMAOR	0x40
+/* DMA registers */
+#define SAR	0x00	/* Source Address Register */
+#define DAR	0x04	/* Destination Address Register */
+#define TCR	0x08	/* Transfer Count Register */
+#define CHCR	0x0C	/* Channel Control Register */
+#define DMAOR	0x40	/* DMA Operation Register */
 
 /* DMAOR definitions */
-#define DMAOR_AE	0x00000004
+#define DMAOR_AE	0x00000004	/* Address Error Flag */
 #define DMAOR_NMIF	0x00000002
-#define DMAOR_DME	0x00000001
+#define DMAOR_DME	0x00000001	/* DMA Master Enable */
 
 /* Definitions for the SuperH DMAC */
 #define REQ_L	0x00000000
@@ -34,18 +34,20 @@
 #define ACK_W	0x00020000
 #define ACK_H	0x00000000
 #define ACK_L	0x00010000
-#define DM_INC	0x00004000
-#define DM_DEC	0x00008000
-#define DM_FIX	0x0000c000
-#define SM_INC	0x00001000
-#define SM_DEC	0x00002000
-#define SM_FIX	0x00003000
+#define DM_INC	0x00004000	/* Destination addresses are incremented */
+#define DM_DEC	0x00008000	/* Destination addresses are decremented */
+#define DM_FIX	0x0000c000	/* Destination address is fixed */
+#define SM_INC	0x00001000	/* Source addresses are incremented */
+#define SM_DEC	0x00002000	/* Source addresses are decremented */
+#define SM_FIX	0x00003000	/* Source address is fixed */
 #define RS_IN	0x00000200
 #define RS_OUT	0x00000300
+#define RS_AUTO	0x00000400	/* Auto Request */
+#define RS_ERS	0x00000800	/* DMA extended resource selector */
 #define TS_BLK	0x00000040
 #define TM_BUR	0x00000020
-#define CHCR_DE	0x00000001
-#define CHCR_TE	0x00000002
-#define CHCR_IE	0x00000004
+#define CHCR_DE	0x00000001	/* DMA Enable */
+#define CHCR_TE	0x00000002	/* Transfer End Flag */
+#define CHCR_IE	0x00000004	/* Interrupt Enable */
 
 #endif
diff --git a/drivers/dma/sh/shdmac.c b/drivers/dma/sh/shdmac.c
index 146d5df926db..1a6f6595c6c1 100644
--- a/drivers/dma/sh/shdmac.c
+++ b/drivers/dma/sh/shdmac.c
@@ -38,12 +38,12 @@
 #include "../dmaengine.h"
 #include "shdma.h"
 
-/* DMA register */
-#define SAR	0x00
-#define DAR	0x04
-#define TCR	0x08
-#define CHCR	0x0C
-#define DMAOR	0x40
+/* DMA registers */
+#define SAR	0x00	/* Source Address Register */
+#define DAR	0x04	/* Destination Address Register */
+#define TCR	0x08	/* Transfer Count Register */
+#define CHCR	0x0C	/* Channel Control Register */
+#define DMAOR	0x40	/* DMA Operation Register */
 
 #define TEND	0x18 /* USB-DMAC */
 
diff --git a/include/linux/sh_dma.h b/include/linux/sh_dma.h
index b7b43b82231e..56b97eed28a4 100644
--- a/include/linux/sh_dma.h
+++ b/include/linux/sh_dma.h
@@ -95,19 +95,21 @@ struct sh_dmae_pdata {
 };
 
 /* DMAOR definitions */
-#define DMAOR_AE	0x00000004
+#define DMAOR_AE	0x00000004	/* Address Error Flag */
 #define DMAOR_NMIF	0x00000002
-#define DMAOR_DME	0x00000001
+#define DMAOR_DME	0x00000001	/* DMA Master Enable */
 
 /* Definitions for the SuperH DMAC */
-#define DM_INC	0x00004000
-#define DM_DEC	0x00008000
-#define DM_FIX	0x0000c000
-#define SM_INC	0x00001000
-#define SM_DEC	0x00002000
-#define SM_FIX	0x00003000
-#define CHCR_DE	0x00000001
-#define CHCR_TE	0x00000002
-#define CHCR_IE	0x00000004
+#define DM_INC	0x00004000	/* Destination addresses are incremented */
+#define DM_DEC	0x00008000	/* Destination addresses are decremented */
+#define DM_FIX	0x0000c000	/* Destination address is fixed */
+#define SM_INC	0x00001000	/* Source addresses are incremented */
+#define SM_DEC	0x00002000	/* Source addresses are decremented */
+#define SM_FIX	0x00003000	/* Source address is fixed */
+#define RS_AUTO	0x00000400	/* Auto Request */
+#define RS_ERS	0x00000800	/* DMA extended resource selector */
+#define CHCR_DE	0x00000001	/* DMA Enable */
+#define CHCR_TE	0x00000002	/* Transfer End Flag */
+#define CHCR_IE	0x00000004	/* Interrupt Enable */
 
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 3a48edc4bd68f841c07c7bc86358d2f02133f247 Mon Sep 17 00:00:00 2001
From: Tim Kryger <tim.kryger@gmail.com>
Date: Fri, 13 Jun 2014 10:13:56 -0700
Subject: mmc: sdhci: Use mmc core regulator infrastucture

Switch the common SDHCI code over to use mmc_host's regulator pointers
and remove the ones in the sdhci_host structure.  Additionally, use the
common mmc_regulator_get_supply function to get the regulators and set
the ocr_avail mask.

This change sets the ocr_avail directly based upon the voltage ranges
supported which ensures ocr_avail is set correctly while allowing the
use of regulators that can't provide exactly 1.8v, 3.0v, or 3.3v.

Signed-off-by: Tim Kryger <tim.kryger@gmail.com>
Signed-off-by: Markus Mayer <markus.mayer@linaro.org>
Reviewed-by: Matt Porter <mporter@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci.c  | 97 ++++++++++++++++++-----------------------------
 include/linux/mmc/sdhci.h |  3 --
 2 files changed, 36 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 47055f3f01b8..ee524b06db14 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1223,6 +1223,7 @@ EXPORT_SYMBOL_GPL(sdhci_set_clock);
 static void sdhci_set_power(struct sdhci_host *host, unsigned char mode,
 			    unsigned short vdd)
 {
+	struct mmc_host *mmc = host->mmc;
 	u8 pwr = 0;
 
 	if (mode != MMC_POWER_OFF) {
@@ -1284,9 +1285,9 @@ static void sdhci_set_power(struct sdhci_host *host, unsigned char mode,
 			mdelay(10);
 	}
 
-	if (host->vmmc) {
+	if (!IS_ERR(mmc->supply.vmmc)) {
 		spin_unlock_irq(&host->lock);
-		mmc_regulator_set_ocr(host->mmc, host->vmmc, vdd);
+		mmc_regulator_set_ocr(host->mmc, mmc->supply.vmmc, vdd);
 		spin_lock_irq(&host->lock);
 	}
 }
@@ -1440,13 +1441,15 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios)
 {
 	unsigned long flags;
 	u8 ctrl;
+	struct mmc_host *mmc = host->mmc;
 
 	spin_lock_irqsave(&host->lock, flags);
 
 	if (host->flags & SDHCI_DEVICE_DEAD) {
 		spin_unlock_irqrestore(&host->lock, flags);
-		if (host->vmmc && ios->power_mode == MMC_POWER_OFF)
-			mmc_regulator_set_ocr(host->mmc, host->vmmc, 0);
+		if (!IS_ERR(mmc->supply.vmmc) &&
+		    ios->power_mode == MMC_POWER_OFF)
+			mmc_regulator_set_ocr(host->mmc, mmc->supply.vmmc, 0);
 		return;
 	}
 
@@ -1707,6 +1710,7 @@ static void sdhci_enable_sdio_irq(struct mmc_host *mmc, int enable)
 static int sdhci_do_start_signal_voltage_switch(struct sdhci_host *host,
 						struct mmc_ios *ios)
 {
+	struct mmc_host *mmc = host->mmc;
 	u16 ctrl;
 	int ret;
 
@@ -1725,8 +1729,9 @@ static int sdhci_do_start_signal_voltage_switch(struct sdhci_host *host,
 		ctrl &= ~SDHCI_CTRL_VDD_180;
 		sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2);
 
-		if (host->vqmmc) {
-			ret = regulator_set_voltage(host->vqmmc, 2700000, 3600000);
+		if (!IS_ERR(mmc->supply.vqmmc)) {
+			ret = regulator_set_voltage(mmc->supply.vqmmc, 2700000,
+						    3600000);
 			if (ret) {
 				pr_warning("%s: Switching to 3.3V signalling voltage "
 						" failed\n", mmc_hostname(host->mmc));
@@ -1746,8 +1751,8 @@ static int sdhci_do_start_signal_voltage_switch(struct sdhci_host *host,
 
 		return -EAGAIN;
 	case MMC_SIGNAL_VOLTAGE_180:
-		if (host->vqmmc) {
-			ret = regulator_set_voltage(host->vqmmc,
+		if (!IS_ERR(mmc->supply.vqmmc)) {
+			ret = regulator_set_voltage(mmc->supply.vqmmc,
 					1700000, 1950000);
 			if (ret) {
 				pr_warning("%s: Switching to 1.8V signalling voltage "
@@ -1776,8 +1781,9 @@ static int sdhci_do_start_signal_voltage_switch(struct sdhci_host *host,
 
 		return -EAGAIN;
 	case MMC_SIGNAL_VOLTAGE_120:
-		if (host->vqmmc) {
-			ret = regulator_set_voltage(host->vqmmc, 1100000, 1300000);
+		if (!IS_ERR(mmc->supply.vqmmc)) {
+			ret = regulator_set_voltage(mmc->supply.vqmmc, 1100000,
+						    1300000);
 			if (ret) {
 				pr_warning("%s: Switching to 1.2V signalling voltage "
 						" failed\n", mmc_hostname(host->mmc));
@@ -2962,25 +2968,22 @@ int sdhci_add_host(struct sdhci_host *host)
 	    !(host->mmc->caps & MMC_CAP_NONREMOVABLE))
 		mmc->caps |= MMC_CAP_NEEDS_POLL;
 
+	/* If there are external regulators, get them */
+	if (mmc_regulator_get_supply(mmc) == -EPROBE_DEFER)
+		return -EPROBE_DEFER;
+
 	/* If vqmmc regulator and no 1.8V signalling, then there's no UHS */
-	host->vqmmc = regulator_get_optional(mmc_dev(mmc), "vqmmc");
-	if (IS_ERR_OR_NULL(host->vqmmc)) {
-		if (PTR_ERR(host->vqmmc) < 0) {
-			pr_info("%s: no vqmmc regulator found\n",
-				mmc_hostname(mmc));
-			host->vqmmc = NULL;
-		}
-	} else {
-		ret = regulator_enable(host->vqmmc);
-		if (!regulator_is_supported_voltage(host->vqmmc, 1700000,
-			1950000))
+	if (!IS_ERR(mmc->supply.vqmmc)) {
+		ret = regulator_enable(mmc->supply.vqmmc);
+		if (!regulator_is_supported_voltage(mmc->supply.vqmmc, 1700000,
+						    1950000))
 			caps[1] &= ~(SDHCI_SUPPORT_SDR104 |
 					SDHCI_SUPPORT_SDR50 |
 					SDHCI_SUPPORT_DDR50);
 		if (ret) {
 			pr_warn("%s: Failed to enable vqmmc regulator: %d\n",
 				mmc_hostname(mmc), ret);
-			host->vqmmc = NULL;
+			mmc->supply.vqmmc = NULL;
 		}
 	}
 
@@ -3041,34 +3044,6 @@ int sdhci_add_host(struct sdhci_host *host)
 
 	ocr_avail = 0;
 
-	host->vmmc = regulator_get_optional(mmc_dev(mmc), "vmmc");
-	if (IS_ERR_OR_NULL(host->vmmc)) {
-		if (PTR_ERR(host->vmmc) < 0) {
-			pr_info("%s: no vmmc regulator found\n",
-				mmc_hostname(mmc));
-			host->vmmc = NULL;
-		}
-	}
-
-#ifdef CONFIG_REGULATOR
-	/*
-	 * Voltage range check makes sense only if regulator reports
-	 * any voltage value.
-	 */
-	if (host->vmmc && regulator_get_voltage(host->vmmc) > 0) {
-		ret = regulator_is_supported_voltage(host->vmmc, 2700000,
-			3600000);
-		if ((ret <= 0) || (!(caps[0] & SDHCI_CAN_VDD_330)))
-			caps[0] &= ~SDHCI_CAN_VDD_330;
-		if ((ret <= 0) || (!(caps[0] & SDHCI_CAN_VDD_300)))
-			caps[0] &= ~SDHCI_CAN_VDD_300;
-		ret = regulator_is_supported_voltage(host->vmmc, 1700000,
-			1950000);
-		if ((ret <= 0) || (!(caps[0] & SDHCI_CAN_VDD_180)))
-			caps[0] &= ~SDHCI_CAN_VDD_180;
-	}
-#endif /* CONFIG_REGULATOR */
-
 	/*
 	 * According to SD Host Controller spec v3.00, if the Host System
 	 * can afford more than 150mA, Host Driver should set XPC to 1. Also
@@ -3077,8 +3052,8 @@ int sdhci_add_host(struct sdhci_host *host)
 	 * value.
 	 */
 	max_current_caps = sdhci_readl(host, SDHCI_MAX_CURRENT);
-	if (!max_current_caps && host->vmmc) {
-		u32 curr = regulator_get_current_limit(host->vmmc);
+	if (!max_current_caps && !IS_ERR(mmc->supply.vmmc)) {
+		u32 curr = regulator_get_current_limit(mmc->supply.vmmc);
 		if (curr > 0) {
 
 			/* convert to SDHCI_MAX_CURRENT format */
@@ -3118,8 +3093,11 @@ int sdhci_add_host(struct sdhci_host *host)
 				   SDHCI_MAX_CURRENT_MULTIPLIER;
 	}
 
+	if (mmc->ocr_avail)
+		ocr_avail &= mmc->ocr_avail;
+
 	if (host->ocr_mask)
-		ocr_avail = host->ocr_mask;
+		ocr_avail &= host->ocr_mask;
 
 	mmc->ocr_avail = ocr_avail;
 	mmc->ocr_avail_sdio = ocr_avail;
@@ -3273,6 +3251,7 @@ EXPORT_SYMBOL_GPL(sdhci_add_host);
 
 void sdhci_remove_host(struct sdhci_host *host, int dead)
 {
+	struct mmc_host *mmc = host->mmc;
 	unsigned long flags;
 
 	if (dead) {
@@ -3310,15 +3289,11 @@ void sdhci_remove_host(struct sdhci_host *host, int dead)
 
 	tasklet_kill(&host->finish_tasklet);
 
-	if (host->vmmc) {
-		regulator_disable(host->vmmc);
-		regulator_put(host->vmmc);
-	}
+	if (!IS_ERR(mmc->supply.vmmc))
+		regulator_disable(mmc->supply.vmmc);
 
-	if (host->vqmmc) {
-		regulator_disable(host->vqmmc);
-		regulator_put(host->vqmmc);
-	}
+	if (!IS_ERR(mmc->supply.vqmmc))
+		regulator_disable(mmc->supply.vqmmc);
 
 	if (host->adma_desc)
 		dma_free_coherent(mmc_dev(host->mmc), ADMA_SIZE,
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index 08abe9941884..09ebe57d5ce9 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -104,9 +104,6 @@ struct sdhci_host {
 
 	const struct sdhci_ops *ops;	/* Low level hw interface */
 
-	struct regulator *vmmc;		/* Power regulator (vmmc) */
-	struct regulator *vqmmc;	/* Signaling regulator (vccq) */
-
 	/* Internal data */
 	struct mmc_host *mmc;	/* MMC structure */
 	u64 dma_mask;		/* custom DMA mask */
-- 
cgit v1.2.3-59-g8ed1b


From 2cd3a2a54656f9c480b1c7272fc07635d575841b Mon Sep 17 00:00:00 2001
From: Andreas Fenkart <afenkart@gmail.com>
Date: Thu, 29 May 2014 10:28:00 +0200
Subject: mmc: omap_hsmmc: Enable SDIO interrupt

There have been various patches floating around for enabling
the SDIO IRQ for hsmmc, but none of them ever got merged.

Probably the reason for not merging the SDIO interrupt patches
has been the lack of wake-up path for SDIO on some omaps that
has also needed remuxing the SDIO DAT1 line to a GPIO making
the patches complex.

This patch adds the minimal SDIO IRQ support to hsmmc for
omaps that do have the wake-up path. For those omaps, the
DAT1 line need to have the wake-up enable bit set, and the
wake-up interrupt is the same as for the MMC controller.

This patch has been tested on am3730 es1.2 with mwifiex
connected to MMC3 with mwifiex waking to Ethernet traffic
from off-idle mode. Note that for omaps that do not have
the SDIO wake-up path, this patch will not work for idle
modes and further patches for remuxing DAT1 to GPIO are
needed.

Based on earlier patches [1][2] by David Vrabel
<david.vrabel@csr.com>, Steve Sakoman <steve@sakoman.com>

For now, only support SDIO interrupt if we are booted with
a separate wake-irq configued via device tree. This is
because omaps need the wake-irq for idle states, and some
omaps need special quirks. And we don't want to add new
legacy mux platform init code callbacks any longer as we
are moving to DT based booting anyways.

To use it, you need to specify the wake-irq using the
interrupts-extended property.

[1] http://www.sakoman.com/cgi-bin/gitweb.cgi?p=linux.git;a=commitdiff_plain;h=010810d22f6f49ac03da4ba384969432e0320453
[2] http://comments.gmane.org/gmane.linux.kernel.mmc/20446

Acked-by: Balaji T K <balajitk@ti.com>
Signed-off-by: Andreas Fenkart <afenkart@gmail.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 .../devicetree/bindings/mmc/ti-omap-hsmmc.txt      |   1 +
 drivers/mmc/host/omap_hsmmc.c                      | 201 +++++++++++++++++++--
 include/linux/platform_data/mmc-omap.h             |   1 +
 3 files changed, 191 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt b/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt
index ce8056116fb0..0233ba7951e5 100644
--- a/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt
+++ b/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt
@@ -12,6 +12,7 @@ Required properties:
  Should be "ti,omap3-hsmmc", for OMAP3 controllers
  Should be "ti,omap3-pre-es3-hsmmc" for OMAP3 controllers pre ES3.0
  Should be "ti,omap4-hsmmc", for OMAP4 controllers
+ Should be "ti,am33xx-hsmmc", for AM335x controllers
 - ti,hwmods: Must be "mmc<n>", n is controller instance starting 1
 
 Optional properties:
diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 6b7b75585926..9446010a5dd9 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -29,6 +29,7 @@
 #include <linux/timer.h>
 #include <linux/clk.h>
 #include <linux/of.h>
+#include <linux/of_irq.h>
 #include <linux/of_gpio.h>
 #include <linux/of_device.h>
 #include <linux/omap-dmaengine.h>
@@ -36,6 +37,7 @@
 #include <linux/mmc/core.h>
 #include <linux/mmc/mmc.h>
 #include <linux/io.h>
+#include <linux/irq.h>
 #include <linux/gpio.h>
 #include <linux/regulator/consumer.h>
 #include <linux/pinctrl/consumer.h>
@@ -106,6 +108,7 @@
 #define TC_EN			(1 << 1)
 #define BWR_EN			(1 << 4)
 #define BRR_EN			(1 << 5)
+#define CIRQ_EN			(1 << 8)
 #define ERR_EN			(1 << 15)
 #define CTO_EN			(1 << 16)
 #define CCRC_EN			(1 << 17)
@@ -140,7 +143,6 @@
 #define VDD_3V0			3000000		/* 300000 uV */
 #define VDD_165_195		(ffs(MMC_VDD_165_195) - 1)
 
-#define AUTO_CMD23		(1 << 1)	/* Auto CMD23 support */
 /*
  * One controller can have multiple slots, like on some omap boards using
  * omap.c controller driver. Luckily this is not currently done on any known
@@ -194,6 +196,7 @@ struct omap_hsmmc_host {
 	u32			sysctl;
 	u32			capa;
 	int			irq;
+	int			wake_irq;
 	int			use_dma, dma_ch;
 	struct dma_chan		*tx_chan;
 	struct dma_chan		*rx_chan;
@@ -206,6 +209,9 @@ struct omap_hsmmc_host {
 	int			req_in_progress;
 	unsigned long		clk_rate;
 	unsigned int		flags;
+#define AUTO_CMD23		(1 << 0)        /* Auto CMD23 support */
+#define HSMMC_SDIO_IRQ_ENABLED	(1 << 1)        /* SDIO irq enabled */
+#define HSMMC_WAKE_IRQ_ENABLED	(1 << 2)
 	struct omap_hsmmc_next	next_data;
 	struct	omap_mmc_platform_data	*pdata;
 };
@@ -510,27 +516,40 @@ static void omap_hsmmc_stop_clock(struct omap_hsmmc_host *host)
 static void omap_hsmmc_enable_irq(struct omap_hsmmc_host *host,
 				  struct mmc_command *cmd)
 {
-	unsigned int irq_mask;
+	u32 irq_mask = INT_EN_MASK;
+	unsigned long flags;
 
 	if (host->use_dma)
-		irq_mask = INT_EN_MASK & ~(BRR_EN | BWR_EN);
-	else
-		irq_mask = INT_EN_MASK;
+		irq_mask &= ~(BRR_EN | BWR_EN);
 
 	/* Disable timeout for erases */
 	if (cmd->opcode == MMC_ERASE)
 		irq_mask &= ~DTO_EN;
 
+	spin_lock_irqsave(&host->irq_lock, flags);
 	OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR);
 	OMAP_HSMMC_WRITE(host->base, ISE, irq_mask);
+
+	/* latch pending CIRQ, but don't signal MMC core */
+	if (host->flags & HSMMC_SDIO_IRQ_ENABLED)
+		irq_mask |= CIRQ_EN;
 	OMAP_HSMMC_WRITE(host->base, IE, irq_mask);
+	spin_unlock_irqrestore(&host->irq_lock, flags);
 }
 
 static void omap_hsmmc_disable_irq(struct omap_hsmmc_host *host)
 {
-	OMAP_HSMMC_WRITE(host->base, ISE, 0);
-	OMAP_HSMMC_WRITE(host->base, IE, 0);
+	u32 irq_mask = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&host->irq_lock, flags);
+	/* no transfer running but need to keep cirq if enabled */
+	if (host->flags & HSMMC_SDIO_IRQ_ENABLED)
+		irq_mask |= CIRQ_EN;
+	OMAP_HSMMC_WRITE(host->base, ISE, irq_mask);
+	OMAP_HSMMC_WRITE(host->base, IE, irq_mask);
 	OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR);
+	spin_unlock_irqrestore(&host->irq_lock, flags);
 }
 
 /* Calculate divisor for the given clock frequency */
@@ -681,7 +700,9 @@ static int omap_hsmmc_context_restore(struct omap_hsmmc_host *host)
 		&& time_before(jiffies, timeout))
 		;
 
-	omap_hsmmc_disable_irq(host);
+	OMAP_HSMMC_WRITE(host->base, ISE, 0);
+	OMAP_HSMMC_WRITE(host->base, IE, 0);
+	OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR);
 
 	/* Do not initialize card-specific things if the power is off */
 	if (host->power_mode == MMC_POWER_OFF)
@@ -1118,8 +1139,12 @@ static irqreturn_t omap_hsmmc_irq(int irq, void *dev_id)
 	int status;
 
 	status = OMAP_HSMMC_READ(host->base, STAT);
-	while (status & INT_EN_MASK && host->req_in_progress) {
-		omap_hsmmc_do_irq(host, status);
+	while (status & (INT_EN_MASK | CIRQ_EN)) {
+		if (host->req_in_progress)
+			omap_hsmmc_do_irq(host, status);
+
+		if (status & CIRQ_EN)
+			mmc_signal_sdio_irq(host->mmc);
 
 		/* Flush posted write */
 		status = OMAP_HSMMC_READ(host->base, STAT);
@@ -1128,6 +1153,22 @@ static irqreturn_t omap_hsmmc_irq(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
+static irqreturn_t omap_hsmmc_wake_irq(int irq, void *dev_id)
+{
+	struct omap_hsmmc_host *host = dev_id;
+
+	/* cirq is level triggered, disable to avoid infinite loop */
+	spin_lock(&host->irq_lock);
+	if (host->flags & HSMMC_WAKE_IRQ_ENABLED) {
+		disable_irq_nosync(host->wake_irq);
+		host->flags &= ~HSMMC_WAKE_IRQ_ENABLED;
+	}
+	spin_unlock(&host->irq_lock);
+	pm_request_resume(host->dev); /* no use counter */
+
+	return IRQ_HANDLED;
+}
+
 static void set_sd_bus_power(struct omap_hsmmc_host *host)
 {
 	unsigned long i;
@@ -1639,6 +1680,79 @@ static void omap_hsmmc_init_card(struct mmc_host *mmc, struct mmc_card *card)
 		mmc_slot(host).init_card(card);
 }
 
+static void omap_hsmmc_enable_sdio_irq(struct mmc_host *mmc, int enable)
+{
+	struct omap_hsmmc_host *host = mmc_priv(mmc);
+	u32 irq_mask;
+	unsigned long flags;
+
+	spin_lock_irqsave(&host->irq_lock, flags);
+
+	irq_mask = OMAP_HSMMC_READ(host->base, ISE);
+	if (enable) {
+		host->flags |= HSMMC_SDIO_IRQ_ENABLED;
+		irq_mask |= CIRQ_EN;
+	} else {
+		host->flags &= ~HSMMC_SDIO_IRQ_ENABLED;
+		irq_mask &= ~CIRQ_EN;
+	}
+	OMAP_HSMMC_WRITE(host->base, IE, irq_mask);
+
+	/*
+	 * if enable, piggy back detection on current request
+	 * but always disable immediately
+	 */
+	if (!host->req_in_progress || !enable)
+		OMAP_HSMMC_WRITE(host->base, ISE, irq_mask);
+
+	/* flush posted write */
+	OMAP_HSMMC_READ(host->base, IE);
+
+	spin_unlock_irqrestore(&host->irq_lock, flags);
+}
+
+static int omap_hsmmc_configure_wake_irq(struct omap_hsmmc_host *host)
+{
+	struct mmc_host *mmc = host->mmc;
+	int ret;
+
+	/*
+	 * For omaps with wake-up path, wakeirq will be irq from pinctrl and
+	 * for other omaps, wakeirq will be from GPIO (dat line remuxed to
+	 * gpio). wakeirq is needed to detect sdio irq in runtime suspend state
+	 * with functional clock disabled.
+	 */
+	if (!host->dev->of_node || !host->wake_irq)
+		return -ENODEV;
+
+	/* Prevent auto-enabling of IRQ */
+	irq_set_status_flags(host->wake_irq, IRQ_NOAUTOEN);
+	ret = devm_request_irq(host->dev, host->wake_irq, omap_hsmmc_wake_irq,
+			       IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+			       mmc_hostname(mmc), host);
+	if (ret) {
+		dev_err(mmc_dev(host->mmc), "Unable to request wake IRQ\n");
+		goto err;
+	}
+
+	/*
+	 * Some omaps don't have wake-up path from deeper idle states
+	 * and need to remux SDIO DAT1 to GPIO for wake-up from idle.
+	 */
+	if (host->pdata->controller_flags & OMAP_HSMMC_SWAKEUP_MISSING) {
+		ret = -ENODEV;
+		devm_free_irq(host->dev, host->wake_irq, host);
+		goto err;
+	}
+
+	return 0;
+
+err:
+	dev_warn(host->dev, "no SDIO IRQ support, falling back to polling\n");
+	host->wake_irq = 0;
+	return ret;
+}
+
 static void omap_hsmmc_conf_bus_power(struct omap_hsmmc_host *host)
 {
 	u32 hctl, capa, value;
@@ -1691,7 +1805,7 @@ static const struct mmc_host_ops omap_hsmmc_ops = {
 	.get_cd = omap_hsmmc_get_cd,
 	.get_ro = omap_hsmmc_get_ro,
 	.init_card = omap_hsmmc_init_card,
-	/* NYET -- enable_sdio_irq */
+	.enable_sdio_irq = omap_hsmmc_enable_sdio_irq,
 };
 
 #ifdef CONFIG_DEBUG_FS
@@ -1761,6 +1875,10 @@ static const struct omap_mmc_of_data omap3_pre_es3_mmc_of_data = {
 static const struct omap_mmc_of_data omap4_mmc_of_data = {
 	.reg_offset = 0x100,
 };
+static const struct omap_mmc_of_data am33xx_mmc_of_data = {
+	.reg_offset = 0x100,
+	.controller_flags = OMAP_HSMMC_SWAKEUP_MISSING,
+};
 
 static const struct of_device_id omap_mmc_of_match[] = {
 	{
@@ -1777,6 +1895,10 @@ static const struct of_device_id omap_mmc_of_match[] = {
 		.compatible = "ti,omap4-hsmmc",
 		.data = &omap4_mmc_of_data,
 	},
+	{
+		.compatible = "ti,am33xx-hsmmc",
+		.data = &am33xx_mmc_of_data,
+	},
 	{},
 };
 MODULE_DEVICE_TABLE(of, omap_mmc_of_match);
@@ -1913,6 +2035,9 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, host);
 
+	if (pdev->dev.of_node)
+		host->wake_irq = irq_of_parse_and_map(pdev->dev.of_node, 1);
+
 	mmc->ops	= &omap_hsmmc_ops;
 
 	mmc->f_min = OMAP_MMC_MIN_CLOCK;
@@ -2066,6 +2191,18 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
 		dev_warn(&pdev->dev,
 			"pins are not configured from the driver\n");
 
+	/*
+	 * For now, only support SDIO interrupt if we have a separate
+	 * wake-up interrupt configured from device tree. This is because
+	 * the wake-up interrupt is needed for idle state and some
+	 * platforms need special quirks. And we don't want to add new
+	 * legacy mux platform init code callbacks any longer as we
+	 * are moving to DT based booting anyways.
+	 */
+	ret = omap_hsmmc_configure_wake_irq(host);
+	if (!ret)
+		mmc->caps |= MMC_CAP_SDIO_IRQ;
+
 	omap_hsmmc_protect_card(host);
 
 	mmc_add_host(mmc);
@@ -2170,11 +2307,18 @@ static int omap_hsmmc_suspend(struct device *dev)
 	pm_runtime_get_sync(host->dev);
 
 	if (!(host->mmc->pm_flags & MMC_PM_KEEP_POWER)) {
-		omap_hsmmc_disable_irq(host);
+		OMAP_HSMMC_WRITE(host->base, ISE, 0);
+		OMAP_HSMMC_WRITE(host->base, IE, 0);
+		OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR);
 		OMAP_HSMMC_WRITE(host->base, HCTL,
 				OMAP_HSMMC_READ(host->base, HCTL) & ~SDBP);
 	}
 
+	/* do not wake up due to sdio irq */
+	if ((host->mmc->caps & MMC_CAP_SDIO_IRQ) &&
+	    !(host->mmc->pm_flags & MMC_PM_WAKE_SDIO_IRQ))
+		disable_irq(host->wake_irq);
+
 	if (host->dbclk)
 		clk_disable_unprepare(host->dbclk);
 
@@ -2200,6 +2344,10 @@ static int omap_hsmmc_resume(struct device *dev)
 
 	omap_hsmmc_protect_card(host);
 
+	if ((host->mmc->caps & MMC_CAP_SDIO_IRQ) &&
+	    !(host->mmc->pm_flags & MMC_PM_WAKE_SDIO_IRQ))
+		enable_irq(host->wake_irq);
+
 	pm_runtime_mark_last_busy(host->dev);
 	pm_runtime_put_autosuspend(host->dev);
 	return 0;
@@ -2215,22 +2363,51 @@ static int omap_hsmmc_resume(struct device *dev)
 static int omap_hsmmc_runtime_suspend(struct device *dev)
 {
 	struct omap_hsmmc_host *host;
+	unsigned long flags;
 
 	host = platform_get_drvdata(to_platform_device(dev));
 	omap_hsmmc_context_save(host);
 	dev_dbg(dev, "disabled\n");
 
+	spin_lock_irqsave(&host->irq_lock, flags);
+	if ((host->mmc->caps & MMC_CAP_SDIO_IRQ) &&
+	    (host->flags & HSMMC_SDIO_IRQ_ENABLED)) {
+		/* disable sdio irq handling to prevent race */
+		OMAP_HSMMC_WRITE(host->base, ISE, 0);
+		OMAP_HSMMC_WRITE(host->base, IE, 0);
+		OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR);
+
+		WARN_ON(host->flags & HSMMC_WAKE_IRQ_ENABLED);
+		enable_irq(host->wake_irq);
+		host->flags |= HSMMC_WAKE_IRQ_ENABLED;
+	}
+	spin_unlock_irqrestore(&host->irq_lock, flags);
 	return 0;
 }
 
 static int omap_hsmmc_runtime_resume(struct device *dev)
 {
 	struct omap_hsmmc_host *host;
+	unsigned long flags;
 
 	host = platform_get_drvdata(to_platform_device(dev));
 	omap_hsmmc_context_restore(host);
 	dev_dbg(dev, "enabled\n");
 
+	spin_lock_irqsave(&host->irq_lock, flags);
+	if ((host->mmc->caps & MMC_CAP_SDIO_IRQ) &&
+	    (host->flags & HSMMC_SDIO_IRQ_ENABLED)) {
+		/* sdio irq flag can't change while in runtime suspend */
+		if (host->flags & HSMMC_WAKE_IRQ_ENABLED) {
+			disable_irq_nosync(host->wake_irq);
+			host->flags &= ~HSMMC_WAKE_IRQ_ENABLED;
+		}
+
+		OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR);
+		OMAP_HSMMC_WRITE(host->base, ISE, CIRQ_EN);
+		OMAP_HSMMC_WRITE(host->base, IE, CIRQ_EN);
+	}
+	spin_unlock_irqrestore(&host->irq_lock, flags);
 	return 0;
 }
 
diff --git a/include/linux/platform_data/mmc-omap.h b/include/linux/platform_data/mmc-omap.h
index 2bf1b30cb5dc..51e70cf25cbc 100644
--- a/include/linux/platform_data/mmc-omap.h
+++ b/include/linux/platform_data/mmc-omap.h
@@ -28,6 +28,7 @@
  */
 #define OMAP_HSMMC_SUPPORTS_DUAL_VOLT		BIT(0)
 #define OMAP_HSMMC_BROKEN_MULTIBLOCK_READ	BIT(1)
+#define OMAP_HSMMC_SWAKEUP_MISSING		BIT(2)
 
 struct mmc_card;
 
-- 
cgit v1.2.3-59-g8ed1b


From ec38846ad59d7b780540afcec101b24139933195 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Thu, 29 May 2014 01:20:16 +0200
Subject: backlight: atmel-pwm-bl: remove obsolete driver

The atmel-pwm-bl driver is now obsolete. It is not used by any mainlined boards
and is replaced by the generic pwm_bl with the pawm-atmel driver using the
generic PWM framework.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Acked-by: Hans-Christian Egtvedt <egtvedt@samfundet.no>
Acked-by: Jingoo Han <jg1.han@samsung.com>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
---
 drivers/video/backlight/Kconfig        |  11 --
 drivers/video/backlight/Makefile       |   1 -
 drivers/video/backlight/atmel-pwm-bl.c | 223 ---------------------------------
 include/linux/atmel-pwm-bl.h           |  43 -------
 4 files changed, 278 deletions(-)
 delete mode 100644 drivers/video/backlight/atmel-pwm-bl.c
 delete mode 100644 include/linux/atmel-pwm-bl.h

(limited to 'include/linux')

diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig
index 5d449059a556..c3c18339b8cb 100644
--- a/drivers/video/backlight/Kconfig
+++ b/drivers/video/backlight/Kconfig
@@ -178,17 +178,6 @@ config BACKLIGHT_ATMEL_LCDC
 	  If in doubt, it's safe to enable this option; it doesn't kick
 	  in unless the board's description says it's wired that way.
 
-config BACKLIGHT_ATMEL_PWM
-	tristate "Atmel PWM backlight control"
-	depends on ATMEL_PWM
-	help
-	  Say Y here if you want to use the PWM peripheral in Atmel AT91 and
-	  AVR32 devices. This driver will need additional platform data to know
-	  which PWM instance to use and how to configure it.
-
-	  To compile this driver as a module, choose M here: the module will be
-	  called atmel-pwm-bl.
-
 config BACKLIGHT_EP93XX
 	tristate "Cirrus EP93xx Backlight Driver"
 	depends on FB_EP93XX
diff --git a/drivers/video/backlight/Makefile b/drivers/video/backlight/Makefile
index bb820024f346..351451dbb607 100644
--- a/drivers/video/backlight/Makefile
+++ b/drivers/video/backlight/Makefile
@@ -25,7 +25,6 @@ obj-$(CONFIG_BACKLIGHT_ADP8860)		+= adp8860_bl.o
 obj-$(CONFIG_BACKLIGHT_ADP8870)		+= adp8870_bl.o
 obj-$(CONFIG_BACKLIGHT_APPLE)		+= apple_bl.o
 obj-$(CONFIG_BACKLIGHT_AS3711)		+= as3711_bl.o
-obj-$(CONFIG_BACKLIGHT_ATMEL_PWM)	+= atmel-pwm-bl.o
 obj-$(CONFIG_BACKLIGHT_BD6107)		+= bd6107.o
 obj-$(CONFIG_BACKLIGHT_CARILLO_RANCH)	+= cr_bllcd.o
 obj-$(CONFIG_BACKLIGHT_CLASS_DEVICE)	+= backlight.o
diff --git a/drivers/video/backlight/atmel-pwm-bl.c b/drivers/video/backlight/atmel-pwm-bl.c
deleted file mode 100644
index 261b1a4ec3d8..000000000000
--- a/drivers/video/backlight/atmel-pwm-bl.c
+++ /dev/null
@@ -1,223 +0,0 @@
-/*
- * Copyright (C) 2008 Atmel Corporation
- *
- * Backlight driver using Atmel PWM peripheral.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- */
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/fb.h>
-#include <linux/gpio.h>
-#include <linux/backlight.h>
-#include <linux/atmel_pwm.h>
-#include <linux/atmel-pwm-bl.h>
-#include <linux/slab.h>
-
-struct atmel_pwm_bl {
-	const struct atmel_pwm_bl_platform_data	*pdata;
-	struct backlight_device			*bldev;
-	struct platform_device			*pdev;
-	struct pwm_channel			pwmc;
-	int					gpio_on;
-};
-
-static void atmel_pwm_bl_set_gpio_on(struct atmel_pwm_bl *pwmbl, int on)
-{
-	if (!gpio_is_valid(pwmbl->gpio_on))
-		return;
-
-	gpio_set_value(pwmbl->gpio_on, on ^ pwmbl->pdata->on_active_low);
-}
-
-static int atmel_pwm_bl_set_intensity(struct backlight_device *bd)
-{
-	struct atmel_pwm_bl *pwmbl = bl_get_data(bd);
-	int intensity = bd->props.brightness;
-	int pwm_duty;
-
-	if (bd->props.power != FB_BLANK_UNBLANK)
-		intensity = 0;
-	if (bd->props.fb_blank != FB_BLANK_UNBLANK)
-		intensity = 0;
-
-	if (pwmbl->pdata->pwm_active_low)
-		pwm_duty = pwmbl->pdata->pwm_duty_min + intensity;
-	else
-		pwm_duty = pwmbl->pdata->pwm_duty_max - intensity;
-
-	if (pwm_duty > pwmbl->pdata->pwm_duty_max)
-		pwm_duty = pwmbl->pdata->pwm_duty_max;
-	if (pwm_duty < pwmbl->pdata->pwm_duty_min)
-		pwm_duty = pwmbl->pdata->pwm_duty_min;
-
-	if (!intensity) {
-		atmel_pwm_bl_set_gpio_on(pwmbl, 0);
-		pwm_channel_writel(&pwmbl->pwmc, PWM_CUPD, pwm_duty);
-		pwm_channel_disable(&pwmbl->pwmc);
-	} else {
-		pwm_channel_enable(&pwmbl->pwmc);
-		pwm_channel_writel(&pwmbl->pwmc, PWM_CUPD, pwm_duty);
-		atmel_pwm_bl_set_gpio_on(pwmbl, 1);
-	}
-
-	return 0;
-}
-
-static int atmel_pwm_bl_get_intensity(struct backlight_device *bd)
-{
-	struct atmel_pwm_bl *pwmbl = bl_get_data(bd);
-	u32 cdty;
-	u32 intensity;
-
-	cdty = pwm_channel_readl(&pwmbl->pwmc, PWM_CDTY);
-	if (pwmbl->pdata->pwm_active_low)
-		intensity = cdty - pwmbl->pdata->pwm_duty_min;
-	else
-		intensity = pwmbl->pdata->pwm_duty_max - cdty;
-
-	return intensity & 0xffff;
-}
-
-static int atmel_pwm_bl_init_pwm(struct atmel_pwm_bl *pwmbl)
-{
-	unsigned long pwm_rate = pwmbl->pwmc.mck;
-	unsigned long prescale = DIV_ROUND_UP(pwm_rate,
-			(pwmbl->pdata->pwm_frequency *
-			 pwmbl->pdata->pwm_compare_max)) - 1;
-
-	/*
-	 * Prescale must be power of two and maximum 0xf in size because of
-	 * hardware limit. PWM speed will be:
-	 *	PWM module clock speed / (2 ^ prescale).
-	 */
-	prescale = fls(prescale);
-	if (prescale > 0xf)
-		prescale = 0xf;
-
-	pwm_channel_writel(&pwmbl->pwmc, PWM_CMR, prescale);
-	pwm_channel_writel(&pwmbl->pwmc, PWM_CDTY,
-			pwmbl->pdata->pwm_duty_min +
-			pwmbl->bldev->props.brightness);
-	pwm_channel_writel(&pwmbl->pwmc, PWM_CPRD,
-			pwmbl->pdata->pwm_compare_max);
-
-	dev_info(&pwmbl->pdev->dev, "Atmel PWM backlight driver (%lu Hz)\n",
-		pwmbl->pwmc.mck / pwmbl->pdata->pwm_compare_max /
-		(1 << prescale));
-
-	return pwm_channel_enable(&pwmbl->pwmc);
-}
-
-static const struct backlight_ops atmel_pwm_bl_ops = {
-	.get_brightness = atmel_pwm_bl_get_intensity,
-	.update_status  = atmel_pwm_bl_set_intensity,
-};
-
-static int atmel_pwm_bl_probe(struct platform_device *pdev)
-{
-	struct backlight_properties props;
-	const struct atmel_pwm_bl_platform_data *pdata;
-	struct backlight_device *bldev;
-	struct atmel_pwm_bl *pwmbl;
-	unsigned long flags;
-	int retval;
-
-	pdata = dev_get_platdata(&pdev->dev);
-	if (!pdata)
-		return -ENODEV;
-
-	if (pdata->pwm_compare_max < pdata->pwm_duty_max ||
-			pdata->pwm_duty_min > pdata->pwm_duty_max ||
-			pdata->pwm_frequency == 0)
-		return -EINVAL;
-
-	pwmbl = devm_kzalloc(&pdev->dev, sizeof(struct atmel_pwm_bl),
-				GFP_KERNEL);
-	if (!pwmbl)
-		return -ENOMEM;
-
-	pwmbl->pdev = pdev;
-	pwmbl->pdata = pdata;
-	pwmbl->gpio_on = pdata->gpio_on;
-
-	retval = pwm_channel_alloc(pdata->pwm_channel, &pwmbl->pwmc);
-	if (retval)
-		return retval;
-
-	if (gpio_is_valid(pwmbl->gpio_on)) {
-		/* Turn display off by default. */
-		if (pdata->on_active_low)
-			flags = GPIOF_OUT_INIT_HIGH;
-		else
-			flags = GPIOF_OUT_INIT_LOW;
-
-		retval = devm_gpio_request_one(&pdev->dev, pwmbl->gpio_on,
-						flags, "gpio_atmel_pwm_bl");
-		if (retval)
-			goto err_free_pwm;
-	}
-
-	memset(&props, 0, sizeof(struct backlight_properties));
-	props.type = BACKLIGHT_RAW;
-	props.max_brightness = pdata->pwm_duty_max - pdata->pwm_duty_min;
-	bldev = devm_backlight_device_register(&pdev->dev, "atmel-pwm-bl",
-					&pdev->dev, pwmbl, &atmel_pwm_bl_ops,
-					&props);
-	if (IS_ERR(bldev)) {
-		retval = PTR_ERR(bldev);
-		goto err_free_pwm;
-	}
-
-	pwmbl->bldev = bldev;
-
-	platform_set_drvdata(pdev, pwmbl);
-
-	/* Power up the backlight by default at middle intesity. */
-	bldev->props.power = FB_BLANK_UNBLANK;
-	bldev->props.brightness = bldev->props.max_brightness / 2;
-
-	retval = atmel_pwm_bl_init_pwm(pwmbl);
-	if (retval)
-		goto err_free_pwm;
-
-	atmel_pwm_bl_set_intensity(bldev);
-
-	return 0;
-
-err_free_pwm:
-	pwm_channel_free(&pwmbl->pwmc);
-
-	return retval;
-}
-
-static int atmel_pwm_bl_remove(struct platform_device *pdev)
-{
-	struct atmel_pwm_bl *pwmbl = platform_get_drvdata(pdev);
-
-	atmel_pwm_bl_set_gpio_on(pwmbl, 0);
-	pwm_channel_disable(&pwmbl->pwmc);
-	pwm_channel_free(&pwmbl->pwmc);
-
-	return 0;
-}
-
-static struct platform_driver atmel_pwm_bl_driver = {
-	.driver = {
-		.name = "atmel-pwm-bl",
-	},
-	/* REVISIT add suspend() and resume() */
-	.probe = atmel_pwm_bl_probe,
-	.remove = atmel_pwm_bl_remove,
-};
-
-module_platform_driver(atmel_pwm_bl_driver);
-
-MODULE_AUTHOR("Hans-Christian egtvedt <hans-christian.egtvedt@atmel.com>");
-MODULE_DESCRIPTION("Atmel PWM backlight driver");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:atmel-pwm-bl");
diff --git a/include/linux/atmel-pwm-bl.h b/include/linux/atmel-pwm-bl.h
deleted file mode 100644
index 0153a47806c2..000000000000
--- a/include/linux/atmel-pwm-bl.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (C) 2007 Atmel Corporation
- *
- * Driver for the AT32AP700X PS/2 controller (PSIF).
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- */
-
-#ifndef __INCLUDE_ATMEL_PWM_BL_H
-#define __INCLUDE_ATMEL_PWM_BL_H
-
-/**
- * struct atmel_pwm_bl_platform_data
- * @pwm_channel: which PWM channel in the PWM module to use.
- * @pwm_frequency: PWM frequency to generate, the driver will try to be as
- *	close as the prescaler allows.
- * @pwm_compare_max: value to use in the PWM channel compare register.
- * @pwm_duty_max: maximum duty cycle value, must be less than or equal to
- *	pwm_compare_max.
- * @pwm_duty_min: minimum duty cycle value, must be less than pwm_duty_max.
- * @pwm_active_low: set to one if the low part of the PWM signal increases the
- *	brightness of the backlight.
- * @gpio_on: GPIO line to control the backlight on/off, set to -1 if not used.
- * @on_active_low: set to one if the on/off signal is on when GPIO is low.
- *
- * This struct must be added to the platform device in the board code. It is
- * used by the atmel-pwm-bl driver to setup the GPIO to control on/off and the
- * PWM device.
- */
-struct atmel_pwm_bl_platform_data {
-	unsigned int pwm_channel;
-	unsigned int pwm_frequency;
-	unsigned int pwm_compare_max;
-	unsigned int pwm_duty_max;
-	unsigned int pwm_duty_min;
-	unsigned int pwm_active_low;
-	int gpio_on;
-	unsigned int on_active_low;
-};
-
-#endif /* __INCLUDE_ATMEL_PWM_BL_H */
-- 
cgit v1.2.3-59-g8ed1b


From f2a70e1fc1ccc0fcdf4ad12db7382134228fb552 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Thu, 29 May 2014 01:20:18 +0200
Subject: misc: atmel_pwm: remove obsolete driver

The misc/atmel_pwm is not used by any mainlined boards and has been replaced by
the pwm-driver using the generic PWM framework.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
---
 drivers/misc/Kconfig      |  10 --
 drivers/misc/Makefile     |   1 -
 drivers/misc/atmel_pwm.c  | 402 ----------------------------------------------
 include/linux/atmel_pwm.h |  70 --------
 4 files changed, 483 deletions(-)
 delete mode 100644 drivers/misc/atmel_pwm.c
 delete mode 100644 include/linux/atmel_pwm.h

(limited to 'include/linux')

diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index ee9402324a23..b841180c7c74 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -51,16 +51,6 @@ config AD525X_DPOT_SPI
 	  To compile this driver as a module, choose M here: the
 	  module will be called ad525x_dpot-spi.
 
-config ATMEL_PWM
-	tristate "Atmel AT32/AT91 PWM support"
-	depends on HAVE_CLK
-	depends on AVR32 || ARCH_AT91SAM9263 || ARCH_AT91SAM9RL || ARCH_AT91SAM9G45
-	help
-	  This option enables device driver support for the PWM channels
-	  on certain Atmel processors.  Pulse Width Modulation is used for
-	  purposes including software controlled power-efficient backlights
-	  on LCD displays, motor control, and waveform generation.
-
 config ATMEL_TCLIB
 	bool "Atmel AT32/AT91 Timer/Counter Library"
 	depends on (AVR32 || ARCH_AT91)
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index d59ce1261b38..5497d026e651 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -7,7 +7,6 @@ obj-$(CONFIG_AD525X_DPOT)	+= ad525x_dpot.o
 obj-$(CONFIG_AD525X_DPOT_I2C)	+= ad525x_dpot-i2c.o
 obj-$(CONFIG_AD525X_DPOT_SPI)	+= ad525x_dpot-spi.o
 obj-$(CONFIG_INTEL_MID_PTI)	+= pti.o
-obj-$(CONFIG_ATMEL_PWM)		+= atmel_pwm.o
 obj-$(CONFIG_ATMEL_SSC)		+= atmel-ssc.o
 obj-$(CONFIG_ATMEL_TCLIB)	+= atmel_tclib.o
 obj-$(CONFIG_BMP085)		+= bmp085.o
diff --git a/drivers/misc/atmel_pwm.c b/drivers/misc/atmel_pwm.c
deleted file mode 100644
index a6dc56e1bc58..000000000000
--- a/drivers/misc/atmel_pwm.c
+++ /dev/null
@@ -1,402 +0,0 @@
-#include <linux/module.h>
-#include <linux/clk.h>
-#include <linux/err.h>
-#include <linux/slab.h>
-#include <linux/io.h>
-#include <linux/interrupt.h>
-#include <linux/platform_device.h>
-#include <linux/atmel_pwm.h>
-
-
-/*
- * This is a simple driver for the PWM controller found in various newer
- * Atmel SOCs, including the AVR32 series and the AT91sam9263.
- *
- * Chips with current Linux ports have only 4 PWM channels, out of max 32.
- * AT32UC3A and AT32UC3B chips have 7 channels (but currently no Linux).
- * Docs are inconsistent about the width of the channel counter registers;
- * it's at least 16 bits, but several places say 20 bits.
- */
-#define	PWM_NCHAN	4		/* max 32 */
-
-struct pwm {
-	spinlock_t		lock;
-	struct platform_device	*pdev;
-	u32			mask;
-	int			irq;
-	void __iomem		*base;
-	struct clk		*clk;
-	struct pwm_channel	*channel[PWM_NCHAN];
-	void			(*handler[PWM_NCHAN])(struct pwm_channel *);
-};
-
-
-/* global PWM controller registers */
-#define PWM_MR		0x00
-#define PWM_ENA		0x04
-#define PWM_DIS		0x08
-#define PWM_SR		0x0c
-#define PWM_IER		0x10
-#define PWM_IDR		0x14
-#define PWM_IMR		0x18
-#define PWM_ISR		0x1c
-
-static inline void pwm_writel(const struct pwm *p, unsigned offset, u32 val)
-{
-	__raw_writel(val, p->base + offset);
-}
-
-static inline u32 pwm_readl(const struct pwm *p, unsigned offset)
-{
-	return __raw_readl(p->base + offset);
-}
-
-static inline void __iomem *pwmc_regs(const struct pwm *p, int index)
-{
-	return p->base + 0x200 + index * 0x20;
-}
-
-static struct pwm *pwm;
-
-static void pwm_dumpregs(struct pwm_channel *ch, char *tag)
-{
-	struct device	*dev = &pwm->pdev->dev;
-
-	dev_dbg(dev, "%s: mr %08x, sr %08x, imr %08x\n",
-		tag,
-		pwm_readl(pwm, PWM_MR),
-		pwm_readl(pwm, PWM_SR),
-		pwm_readl(pwm, PWM_IMR));
-	dev_dbg(dev,
-		"pwm ch%d - mr %08x, dty %u, prd %u, cnt %u\n",
-		ch->index,
-		pwm_channel_readl(ch, PWM_CMR),
-		pwm_channel_readl(ch, PWM_CDTY),
-		pwm_channel_readl(ch, PWM_CPRD),
-		pwm_channel_readl(ch, PWM_CCNT));
-}
-
-
-/**
- * pwm_channel_alloc - allocate an unused PWM channel
- * @index: identifies the channel
- * @ch: structure to be initialized
- *
- * Drivers allocate PWM channels according to the board's wiring, and
- * matching board-specific setup code.  Returns zero or negative errno.
- */
-int pwm_channel_alloc(int index, struct pwm_channel *ch)
-{
-	unsigned long	flags;
-	int		status = 0;
-
-	if (!pwm)
-		return -EPROBE_DEFER;
-
-	if (!(pwm->mask & 1 << index))
-		return -ENODEV;
-
-	if (index < 0 || index >= PWM_NCHAN || !ch)
-		return -EINVAL;
-	memset(ch, 0, sizeof *ch);
-
-	spin_lock_irqsave(&pwm->lock, flags);
-	if (pwm->channel[index])
-		status = -EBUSY;
-	else {
-		clk_enable(pwm->clk);
-
-		ch->regs = pwmc_regs(pwm, index);
-		ch->index = index;
-
-		/* REVISIT: ap7000 seems to go 2x as fast as we expect!! */
-		ch->mck = clk_get_rate(pwm->clk);
-
-		pwm->channel[index] = ch;
-		pwm->handler[index] = NULL;
-
-		/* channel and irq are always disabled when we return */
-		pwm_writel(pwm, PWM_DIS, 1 << index);
-		pwm_writel(pwm, PWM_IDR, 1 << index);
-	}
-	spin_unlock_irqrestore(&pwm->lock, flags);
-	return status;
-}
-EXPORT_SYMBOL(pwm_channel_alloc);
-
-static int pwmcheck(struct pwm_channel *ch)
-{
-	int		index;
-
-	if (!pwm)
-		return -ENODEV;
-	if (!ch)
-		return -EINVAL;
-	index = ch->index;
-	if (index < 0 || index >= PWM_NCHAN || pwm->channel[index] != ch)
-		return -EINVAL;
-
-	return index;
-}
-
-/**
- * pwm_channel_free - release a previously allocated channel
- * @ch: the channel being released
- *
- * The channel is completely shut down (counter and IRQ disabled),
- * and made available for re-use.  Returns zero, or negative errno.
- */
-int pwm_channel_free(struct pwm_channel *ch)
-{
-	unsigned long	flags;
-	int		t;
-
-	spin_lock_irqsave(&pwm->lock, flags);
-	t = pwmcheck(ch);
-	if (t >= 0) {
-		pwm->channel[t] = NULL;
-		pwm->handler[t] = NULL;
-
-		/* channel and irq are always disabled when we return */
-		pwm_writel(pwm, PWM_DIS, 1 << t);
-		pwm_writel(pwm, PWM_IDR, 1 << t);
-
-		clk_disable(pwm->clk);
-		t = 0;
-	}
-	spin_unlock_irqrestore(&pwm->lock, flags);
-	return t;
-}
-EXPORT_SYMBOL(pwm_channel_free);
-
-int __pwm_channel_onoff(struct pwm_channel *ch, int enabled)
-{
-	unsigned long	flags;
-	int		t;
-
-	/* OMITTED FUNCTIONALITY:  starting several channels in synch */
-
-	spin_lock_irqsave(&pwm->lock, flags);
-	t = pwmcheck(ch);
-	if (t >= 0) {
-		pwm_writel(pwm, enabled ? PWM_ENA : PWM_DIS, 1 << t);
-		t = 0;
-		pwm_dumpregs(ch, enabled ? "enable" : "disable");
-	}
-	spin_unlock_irqrestore(&pwm->lock, flags);
-
-	return t;
-}
-EXPORT_SYMBOL(__pwm_channel_onoff);
-
-/**
- * pwm_clk_alloc - allocate and configure CLKA or CLKB
- * @prescale: from 0..10, the power of two used to divide MCK
- * @div: from 1..255, the linear divisor to use
- *
- * Returns PWM_CPR_CLKA, PWM_CPR_CLKB, or negative errno.  The allocated
- * clock will run with a period of (2^prescale * div) / MCK, or twice as
- * long if center aligned PWM output is used.  The clock must later be
- * deconfigured using pwm_clk_free().
- */
-int pwm_clk_alloc(unsigned prescale, unsigned div)
-{
-	unsigned long	flags;
-	u32		mr;
-	u32		val = (prescale << 8) | div;
-	int		ret = -EBUSY;
-
-	if (prescale >= 10 || div == 0 || div > 255)
-		return -EINVAL;
-
-	spin_lock_irqsave(&pwm->lock, flags);
-	mr = pwm_readl(pwm, PWM_MR);
-	if ((mr & 0xffff) == 0) {
-		mr |= val;
-		ret = PWM_CPR_CLKA;
-	} else if ((mr & (0xffff << 16)) == 0) {
-		mr |= val << 16;
-		ret = PWM_CPR_CLKB;
-	}
-	if (ret > 0)
-		pwm_writel(pwm, PWM_MR, mr);
-	spin_unlock_irqrestore(&pwm->lock, flags);
-	return ret;
-}
-EXPORT_SYMBOL(pwm_clk_alloc);
-
-/**
- * pwm_clk_free - deconfigure and release CLKA or CLKB
- *
- * Reverses the effect of pwm_clk_alloc().
- */
-void pwm_clk_free(unsigned clk)
-{
-	unsigned long	flags;
-	u32		mr;
-
-	spin_lock_irqsave(&pwm->lock, flags);
-	mr = pwm_readl(pwm, PWM_MR);
-	if (clk == PWM_CPR_CLKA)
-		pwm_writel(pwm, PWM_MR, mr & ~(0xffff << 0));
-	if (clk == PWM_CPR_CLKB)
-		pwm_writel(pwm, PWM_MR, mr & ~(0xffff << 16));
-	spin_unlock_irqrestore(&pwm->lock, flags);
-}
-EXPORT_SYMBOL(pwm_clk_free);
-
-/**
- * pwm_channel_handler - manage channel's IRQ handler
- * @ch: the channel
- * @handler: the handler to use, possibly NULL
- *
- * If the handler is non-null, the handler will be called after every
- * period of this PWM channel.  If the handler is null, this channel
- * won't generate an IRQ.
- */
-int pwm_channel_handler(struct pwm_channel *ch,
-		void (*handler)(struct pwm_channel *ch))
-{
-	unsigned long	flags;
-	int		t;
-
-	spin_lock_irqsave(&pwm->lock, flags);
-	t = pwmcheck(ch);
-	if (t >= 0) {
-		pwm->handler[t] = handler;
-		pwm_writel(pwm, handler ? PWM_IER : PWM_IDR, 1 << t);
-		t = 0;
-	}
-	spin_unlock_irqrestore(&pwm->lock, flags);
-
-	return t;
-}
-EXPORT_SYMBOL(pwm_channel_handler);
-
-static irqreturn_t pwm_irq(int id, void *_pwm)
-{
-	struct pwm	*p = _pwm;
-	irqreturn_t	handled = IRQ_NONE;
-	u32		irqstat;
-	int		index;
-
-	spin_lock(&p->lock);
-
-	/* ack irqs, then handle them */
-	irqstat = pwm_readl(pwm, PWM_ISR);
-
-	while (irqstat) {
-		struct pwm_channel *ch;
-		void (*handler)(struct pwm_channel *ch);
-
-		index = ffs(irqstat) - 1;
-		irqstat &= ~(1 << index);
-		ch = pwm->channel[index];
-		handler = pwm->handler[index];
-		if (handler && ch) {
-			spin_unlock(&p->lock);
-			handler(ch);
-			spin_lock(&p->lock);
-			handled = IRQ_HANDLED;
-		}
-	}
-
-	spin_unlock(&p->lock);
-	return handled;
-}
-
-static int __init pwm_probe(struct platform_device *pdev)
-{
-	struct resource *r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	int irq = platform_get_irq(pdev, 0);
-	u32 *mp = pdev->dev.platform_data;
-	struct pwm *p;
-	int status = -EIO;
-
-	if (pwm)
-		return -EBUSY;
-	if (!r || irq < 0 || !mp || !*mp)
-		return -ENODEV;
-	if (*mp & ~((1<<PWM_NCHAN)-1)) {
-		dev_warn(&pdev->dev, "mask 0x%x ... more than %d channels\n",
-			*mp, PWM_NCHAN);
-		return -EINVAL;
-	}
-
-	p = kzalloc(sizeof(*p), GFP_KERNEL);
-	if (!p)
-		return -ENOMEM;
-
-	spin_lock_init(&p->lock);
-	p->pdev = pdev;
-	p->mask = *mp;
-	p->irq = irq;
-	p->base = ioremap(r->start, resource_size(r));
-	if (!p->base)
-		goto fail;
-	p->clk = clk_get(&pdev->dev, "pwm_clk");
-	if (IS_ERR(p->clk)) {
-		status = PTR_ERR(p->clk);
-		p->clk = NULL;
-		goto fail;
-	}
-
-	status = request_irq(irq, pwm_irq, 0, pdev->name, p);
-	if (status < 0)
-		goto fail;
-
-	pwm = p;
-	platform_set_drvdata(pdev, p);
-
-	return 0;
-
-fail:
-	if (p->clk)
-		clk_put(p->clk);
-	if (p->base)
-		iounmap(p->base);
-
-	kfree(p);
-	return status;
-}
-
-static int __exit pwm_remove(struct platform_device *pdev)
-{
-	struct pwm *p = platform_get_drvdata(pdev);
-
-	if (p != pwm)
-		return -EINVAL;
-
-	clk_enable(pwm->clk);
-	pwm_writel(pwm, PWM_DIS, (1 << PWM_NCHAN) - 1);
-	pwm_writel(pwm, PWM_IDR, (1 << PWM_NCHAN) - 1);
-	clk_disable(pwm->clk);
-
-	pwm = NULL;
-
-	free_irq(p->irq, p);
-	clk_put(p->clk);
-	iounmap(p->base);
-	kfree(p);
-
-	return 0;
-}
-
-static struct platform_driver atmel_pwm_driver = {
-	.driver = {
-		.name = "atmel_pwm",
-		.owner = THIS_MODULE,
-	},
-	.remove = __exit_p(pwm_remove),
-
-	/* NOTE: PWM can keep running in AVR32 "idle" and "frozen" states;
-	 * and all AT91sam9263 states, albeit at reduced clock rate if
-	 * MCK becomes the slow clock (i.e. what Linux labels STR).
-	 */
-};
-
-module_platform_driver_probe(atmel_pwm_driver, pwm_probe);
-
-MODULE_DESCRIPTION("Driver for AT32/AT91 PWM module");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:atmel_pwm");
diff --git a/include/linux/atmel_pwm.h b/include/linux/atmel_pwm.h
deleted file mode 100644
index ea04abb3db8e..000000000000
--- a/include/linux/atmel_pwm.h
+++ /dev/null
@@ -1,70 +0,0 @@
-#ifndef __LINUX_ATMEL_PWM_H
-#define __LINUX_ATMEL_PWM_H
-
-/**
- * struct pwm_channel - driver handle to a PWM channel
- * @regs: base of this channel's registers
- * @index: number of this channel (0..31)
- * @mck: base clock rate, which can be prescaled and maybe subdivided
- *
- * Drivers initialize a pwm_channel structure using pwm_channel_alloc().
- * Then they configure its clock rate (derived from MCK), alignment,
- * polarity, and duty cycle by writing directly to the channel registers,
- * before enabling the channel by calling pwm_channel_enable().
- *
- * After emitting a PWM signal for the desired length of time, drivers
- * may then pwm_channel_disable() or pwm_channel_free().  Both of these
- * disable the channel, but when it's freed the IRQ is deconfigured and
- * the channel must later be re-allocated and reconfigured.
- *
- * Note that if the period or duty cycle need to be changed while the
- * PWM channel is operating, drivers must use the PWM_CUPD double buffer
- * mechanism, either polling until they change or getting implicitly
- * notified through a once-per-period interrupt handler.
- */
-struct pwm_channel {
-	void __iomem	*regs;
-	unsigned	index;
-	unsigned long	mck;
-};
-
-extern int pwm_channel_alloc(int index, struct pwm_channel *ch);
-extern int pwm_channel_free(struct pwm_channel *ch);
-
-extern int pwm_clk_alloc(unsigned prescale, unsigned div);
-extern void pwm_clk_free(unsigned clk);
-
-extern int __pwm_channel_onoff(struct pwm_channel *ch, int enabled);
-
-#define pwm_channel_enable(ch)	__pwm_channel_onoff((ch), 1)
-#define pwm_channel_disable(ch)	__pwm_channel_onoff((ch), 0)
-
-/* periodic interrupts, mostly for CUPD changes to period or cycle */
-extern int pwm_channel_handler(struct pwm_channel *ch,
-		void (*handler)(struct pwm_channel *ch));
-
-/* per-channel registers (banked at pwm_channel->regs) */
-#define PWM_CMR		0x00		/* mode register */
-#define		PWM_CPR_CPD	(1 << 10)	/* set: CUPD modifies period */
-#define		PWM_CPR_CPOL	(1 << 9)	/* set: idle high */
-#define		PWM_CPR_CALG	(1 << 8)	/* set: center align */
-#define		PWM_CPR_CPRE	(0xf << 0)	/* mask: rate is mck/(2^pre) */
-#define		PWM_CPR_CLKA	(0xb << 0)	/* rate CLKA */
-#define		PWM_CPR_CLKB	(0xc << 0)	/* rate CLKB */
-#define PWM_CDTY	0x04		/* duty cycle (max of CPRD) */
-#define PWM_CPRD	0x08		/* period (count up from zero) */
-#define PWM_CCNT	0x0c		/* counter (20 bits?) */
-#define PWM_CUPD	0x10		/* update CPRD (or CDTY) next period */
-
-static inline void
-pwm_channel_writel(struct pwm_channel *pwmc, unsigned offset, u32 val)
-{
-	__raw_writel(val, pwmc->regs + offset);
-}
-
-static inline u32 pwm_channel_readl(struct pwm_channel *pwmc, unsigned offset)
-{
-	return __raw_readl(pwmc->regs + offset);
-}
-
-#endif /* __LINUX_ATMEL_PWM_H */
-- 
cgit v1.2.3-59-g8ed1b


From 8cd118308a8649c649533a0133af0ce731d223bb Mon Sep 17 00:00:00 2001
From: Micky Ching <micky_ching@realsil.com.cn>
Date: Fri, 6 Jun 2014 15:05:44 +0800
Subject: mfd: rtsx: Add dma transfer function

rtsx driver using a single function for transfer data, dma map/unmap are
placed in one fix function. We need map/unmap dma in different place(for
mmc async driver), so add three function for dma map, dma transfer and
dma unmap.

Signed-off-by: Micky Ching <micky_ching@realsil.com.cn>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/rtsx_pcr.c       | 76 ++++++++++++++++++++++++++++----------------
 include/linux/mfd/rtsx_pci.h |  6 ++++
 2 files changed, 54 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/rtsx_pcr.c b/drivers/mfd/rtsx_pcr.c
index 1d15735f9ef9..d01b8c249231 100644
--- a/drivers/mfd/rtsx_pcr.c
+++ b/drivers/mfd/rtsx_pcr.c
@@ -337,40 +337,64 @@ static void rtsx_pci_add_sg_tbl(struct rtsx_pcr *pcr,
 int rtsx_pci_transfer_data(struct rtsx_pcr *pcr, struct scatterlist *sglist,
 		int num_sg, bool read, int timeout)
 {
-	struct completion trans_done;
-	u8 dir;
-	int err = 0, i, count;
-	long timeleft;
-	unsigned long flags;
-	struct scatterlist *sg;
-	enum dma_data_direction dma_dir;
-	u32 val;
-	dma_addr_t addr;
-	unsigned int len;
+	int err = 0, count;
 
 	dev_dbg(&(pcr->pci->dev), "--> %s: num_sg = %d\n", __func__, num_sg);
+	count = rtsx_pci_dma_map_sg(pcr, sglist, num_sg, read);
+	if (count < 1)
+		return -EINVAL;
+	dev_dbg(&(pcr->pci->dev), "DMA mapping count: %d\n", count);
+
+	err = rtsx_pci_dma_transfer(pcr, sglist, count, read, timeout);
+
+	rtsx_pci_dma_unmap_sg(pcr, sglist, num_sg, read);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_transfer_data);
+
+int rtsx_pci_dma_map_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist,
+		int num_sg, bool read)
+{
+	enum dma_data_direction dir = read ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
 
-	/* don't transfer data during abort processing */
 	if (pcr->remove_pci)
 		return -EINVAL;
 
 	if ((sglist == NULL) || (num_sg <= 0))
 		return -EINVAL;
 
-	if (read) {
-		dir = DEVICE_TO_HOST;
-		dma_dir = DMA_FROM_DEVICE;
-	} else {
-		dir = HOST_TO_DEVICE;
-		dma_dir = DMA_TO_DEVICE;
-	}
+	return dma_map_sg(&(pcr->pci->dev), sglist, num_sg, dir);
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_dma_map_sg);
 
-	count = dma_map_sg(&(pcr->pci->dev), sglist, num_sg, dma_dir);
-	if (count < 1) {
-		dev_err(&(pcr->pci->dev), "scatterlist map failed\n");
+void rtsx_pci_dma_unmap_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist,
+		int num_sg, bool read)
+{
+	enum dma_data_direction dir = read ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
+
+	dma_unmap_sg(&(pcr->pci->dev), sglist, num_sg, dir);
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_dma_unmap_sg);
+
+int rtsx_pci_dma_transfer(struct rtsx_pcr *pcr, struct scatterlist *sglist,
+		int count, bool read, int timeout)
+{
+	struct completion trans_done;
+	struct scatterlist *sg;
+	dma_addr_t addr;
+	long timeleft;
+	unsigned long flags;
+	unsigned int len;
+	int i, err = 0;
+	u32 val;
+	u8 dir = read ? DEVICE_TO_HOST : HOST_TO_DEVICE;
+
+	if (pcr->remove_pci)
+		return -ENODEV;
+
+	if ((sglist == NULL) || (count < 1))
 		return -EINVAL;
-	}
-	dev_dbg(&(pcr->pci->dev), "DMA mapping count: %d\n", count);
 
 	val = ((u32)(dir & 0x01) << 29) | TRIG_DMA | ADMA_MODE;
 	pcr->sgi = 0;
@@ -400,12 +424,10 @@ int rtsx_pci_transfer_data(struct rtsx_pcr *pcr, struct scatterlist *sglist,
 	}
 
 	spin_lock_irqsave(&pcr->lock, flags);
-
 	if (pcr->trans_result == TRANS_RESULT_FAIL)
 		err = -EINVAL;
 	else if (pcr->trans_result == TRANS_NO_DEVICE)
 		err = -ENODEV;
-
 	spin_unlock_irqrestore(&pcr->lock, flags);
 
 out:
@@ -413,8 +435,6 @@ out:
 	pcr->done = NULL;
 	spin_unlock_irqrestore(&pcr->lock, flags);
 
-	dma_unmap_sg(&(pcr->pci->dev), sglist, num_sg, dma_dir);
-
 	if ((err < 0) && (err != -ENODEV))
 		rtsx_pci_stop_cmd(pcr);
 
@@ -423,7 +443,7 @@ out:
 
 	return err;
 }
-EXPORT_SYMBOL_GPL(rtsx_pci_transfer_data);
+EXPORT_SYMBOL_GPL(rtsx_pci_dma_transfer);
 
 int rtsx_pci_read_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len)
 {
diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h
index a3835976f7c6..74346d5e7899 100644
--- a/include/linux/mfd/rtsx_pci.h
+++ b/include/linux/mfd/rtsx_pci.h
@@ -943,6 +943,12 @@ void rtsx_pci_send_cmd_no_wait(struct rtsx_pcr *pcr);
 int rtsx_pci_send_cmd(struct rtsx_pcr *pcr, int timeout);
 int rtsx_pci_transfer_data(struct rtsx_pcr *pcr, struct scatterlist *sglist,
 		int num_sg, bool read, int timeout);
+int rtsx_pci_dma_map_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist,
+		int num_sg, bool read);
+void rtsx_pci_dma_unmap_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist,
+		int num_sg, bool read);
+int rtsx_pci_dma_transfer(struct rtsx_pcr *pcr, struct scatterlist *sglist,
+		int count, bool read, int timeout);
 int rtsx_pci_read_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len);
 int rtsx_pci_write_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len);
 int rtsx_pci_card_pull_ctl_enable(struct rtsx_pcr *pcr, int card);
-- 
cgit v1.2.3-59-g8ed1b


From bdbc736da636956a40435f5f41d2be6af544c3fb Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Date: Mon, 2 Jun 2014 09:50:43 +0100
Subject: mfd: arizona: Lower ARIZONA_MAX_CORE_SUPPLIES to 2

There are no Arizona devices with 3 core supplies but we define a fix
array with space for 3 core supplies. Lower the ARIZONA_MAX_CORE_SUPPLIES
define to 2, to save a few bytes.

Signed-off-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/arizona/core.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/arizona/core.h b/include/linux/mfd/arizona/core.h
index 6d9371f88875..70854d892760 100644
--- a/include/linux/mfd/arizona/core.h
+++ b/include/linux/mfd/arizona/core.h
@@ -18,7 +18,7 @@
 #include <linux/regulator/consumer.h>
 #include <linux/mfd/arizona/pdata.h>
 
-#define ARIZONA_MAX_CORE_SUPPLIES 3
+#define ARIZONA_MAX_CORE_SUPPLIES 2
 
 enum arizona_type {
 	WM5102 = 1,
-- 
cgit v1.2.3-59-g8ed1b


From 10f9edaeaa30468194e1dcd0e47e59b012f4cf8b Mon Sep 17 00:00:00 2001
From: Alexander Shiyan <shc_work@mail.ru>
Date: Wed, 18 Jun 2014 21:05:40 +0400
Subject: mfd: mc13xxx: Use regmap irq framework for interrupts

This patch convert mc13xxx MFD driver to use regmap irq framework
for interrupt registration.

Signed-off-by: Alexander Shiyan <shc_work@mail.ru>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/Kconfig         |   1 +
 drivers/mfd/mc13xxx-core.c  | 310 ++++++--------------------------------------
 drivers/mfd/mc13xxx.h       |  11 +-
 include/linux/mfd/mc13783.h |   1 -
 include/linux/mfd/mc13xxx.h |  23 +++-
 5 files changed, 65 insertions(+), 281 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 207c433074af..defe58d65940 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -187,6 +187,7 @@ config MFD_MC13XXX
 	tristate
 	depends on (SPI_MASTER || I2C)
 	select MFD_CORE
+	select REGMAP_IRQ
 	help
 	  Enable support for the Freescale MC13783 and MC13892 PMICs.
 	  This driver provides common support for accessing the device,
diff --git a/drivers/mfd/mc13xxx-core.c b/drivers/mfd/mc13xxx-core.c
index acf5dd712eb2..2b6bc868cd3d 100644
--- a/drivers/mfd/mc13xxx-core.c
+++ b/drivers/mfd/mc13xxx-core.c
@@ -10,106 +10,18 @@
  * Free Software Foundation.
  */
 
-#include <linux/slab.h>
 #include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/mutex.h>
-#include <linux/interrupt.h>
-#include <linux/mfd/core.h>
-#include <linux/mfd/mc13xxx.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
-#include <linux/of_gpio.h>
+#include <linux/platform_device.h>
+#include <linux/mfd/core.h>
 
 #include "mc13xxx.h"
 
 #define MC13XXX_IRQSTAT0	0
-#define MC13XXX_IRQSTAT0_ADCDONEI	(1 << 0)
-#define MC13XXX_IRQSTAT0_ADCBISDONEI	(1 << 1)
-#define MC13XXX_IRQSTAT0_TSI		(1 << 2)
-#define MC13783_IRQSTAT0_WHIGHI		(1 << 3)
-#define MC13783_IRQSTAT0_WLOWI		(1 << 4)
-#define MC13XXX_IRQSTAT0_CHGDETI	(1 << 6)
-#define MC13783_IRQSTAT0_CHGOVI		(1 << 7)
-#define MC13XXX_IRQSTAT0_CHGREVI	(1 << 8)
-#define MC13XXX_IRQSTAT0_CHGSHORTI	(1 << 9)
-#define MC13XXX_IRQSTAT0_CCCVI		(1 << 10)
-#define MC13XXX_IRQSTAT0_CHGCURRI	(1 << 11)
-#define MC13XXX_IRQSTAT0_BPONI		(1 << 12)
-#define MC13XXX_IRQSTAT0_LOBATLI	(1 << 13)
-#define MC13XXX_IRQSTAT0_LOBATHI	(1 << 14)
-#define MC13783_IRQSTAT0_UDPI		(1 << 15)
-#define MC13783_IRQSTAT0_USBI		(1 << 16)
-#define MC13783_IRQSTAT0_IDI		(1 << 19)
-#define MC13783_IRQSTAT0_SE1I		(1 << 21)
-#define MC13783_IRQSTAT0_CKDETI		(1 << 22)
-#define MC13783_IRQSTAT0_UDMI		(1 << 23)
-
 #define MC13XXX_IRQMASK0	1
-#define MC13XXX_IRQMASK0_ADCDONEM	MC13XXX_IRQSTAT0_ADCDONEI
-#define MC13XXX_IRQMASK0_ADCBISDONEM	MC13XXX_IRQSTAT0_ADCBISDONEI
-#define MC13XXX_IRQMASK0_TSM		MC13XXX_IRQSTAT0_TSI
-#define MC13783_IRQMASK0_WHIGHM		MC13783_IRQSTAT0_WHIGHI
-#define MC13783_IRQMASK0_WLOWM		MC13783_IRQSTAT0_WLOWI
-#define MC13XXX_IRQMASK0_CHGDETM	MC13XXX_IRQSTAT0_CHGDETI
-#define MC13783_IRQMASK0_CHGOVM		MC13783_IRQSTAT0_CHGOVI
-#define MC13XXX_IRQMASK0_CHGREVM	MC13XXX_IRQSTAT0_CHGREVI
-#define MC13XXX_IRQMASK0_CHGSHORTM	MC13XXX_IRQSTAT0_CHGSHORTI
-#define MC13XXX_IRQMASK0_CCCVM		MC13XXX_IRQSTAT0_CCCVI
-#define MC13XXX_IRQMASK0_CHGCURRM	MC13XXX_IRQSTAT0_CHGCURRI
-#define MC13XXX_IRQMASK0_BPONM		MC13XXX_IRQSTAT0_BPONI
-#define MC13XXX_IRQMASK0_LOBATLM	MC13XXX_IRQSTAT0_LOBATLI
-#define MC13XXX_IRQMASK0_LOBATHM	MC13XXX_IRQSTAT0_LOBATHI
-#define MC13783_IRQMASK0_UDPM		MC13783_IRQSTAT0_UDPI
-#define MC13783_IRQMASK0_USBM		MC13783_IRQSTAT0_USBI
-#define MC13783_IRQMASK0_IDM		MC13783_IRQSTAT0_IDI
-#define MC13783_IRQMASK0_SE1M		MC13783_IRQSTAT0_SE1I
-#define MC13783_IRQMASK0_CKDETM		MC13783_IRQSTAT0_CKDETI
-#define MC13783_IRQMASK0_UDMM		MC13783_IRQSTAT0_UDMI
-
 #define MC13XXX_IRQSTAT1	3
-#define MC13XXX_IRQSTAT1_1HZI		(1 << 0)
-#define MC13XXX_IRQSTAT1_TODAI		(1 << 1)
-#define MC13783_IRQSTAT1_ONOFD1I	(1 << 3)
-#define MC13783_IRQSTAT1_ONOFD2I	(1 << 4)
-#define MC13783_IRQSTAT1_ONOFD3I	(1 << 5)
-#define MC13XXX_IRQSTAT1_SYSRSTI	(1 << 6)
-#define MC13XXX_IRQSTAT1_RTCRSTI	(1 << 7)
-#define MC13XXX_IRQSTAT1_PCI		(1 << 8)
-#define MC13XXX_IRQSTAT1_WARMI		(1 << 9)
-#define MC13XXX_IRQSTAT1_MEMHLDI	(1 << 10)
-#define MC13783_IRQSTAT1_PWRRDYI	(1 << 11)
-#define MC13XXX_IRQSTAT1_THWARNLI	(1 << 12)
-#define MC13XXX_IRQSTAT1_THWARNHI	(1 << 13)
-#define MC13XXX_IRQSTAT1_CLKI		(1 << 14)
-#define MC13783_IRQSTAT1_SEMAFI		(1 << 15)
-#define MC13783_IRQSTAT1_MC2BI		(1 << 17)
-#define MC13783_IRQSTAT1_HSDETI		(1 << 18)
-#define MC13783_IRQSTAT1_HSLI		(1 << 19)
-#define MC13783_IRQSTAT1_ALSPTHI	(1 << 20)
-#define MC13783_IRQSTAT1_AHSSHORTI	(1 << 21)
-
 #define MC13XXX_IRQMASK1	4
-#define MC13XXX_IRQMASK1_1HZM		MC13XXX_IRQSTAT1_1HZI
-#define MC13XXX_IRQMASK1_TODAM		MC13XXX_IRQSTAT1_TODAI
-#define MC13783_IRQMASK1_ONOFD1M	MC13783_IRQSTAT1_ONOFD1I
-#define MC13783_IRQMASK1_ONOFD2M	MC13783_IRQSTAT1_ONOFD2I
-#define MC13783_IRQMASK1_ONOFD3M	MC13783_IRQSTAT1_ONOFD3I
-#define MC13XXX_IRQMASK1_SYSRSTM	MC13XXX_IRQSTAT1_SYSRSTI
-#define MC13XXX_IRQMASK1_RTCRSTM	MC13XXX_IRQSTAT1_RTCRSTI
-#define MC13XXX_IRQMASK1_PCM		MC13XXX_IRQSTAT1_PCI
-#define MC13XXX_IRQMASK1_WARMM		MC13XXX_IRQSTAT1_WARMI
-#define MC13XXX_IRQMASK1_MEMHLDM	MC13XXX_IRQSTAT1_MEMHLDI
-#define MC13783_IRQMASK1_PWRRDYM	MC13783_IRQSTAT1_PWRRDYI
-#define MC13XXX_IRQMASK1_THWARNLM	MC13XXX_IRQSTAT1_THWARNLI
-#define MC13XXX_IRQMASK1_THWARNHM	MC13XXX_IRQSTAT1_THWARNHI
-#define MC13XXX_IRQMASK1_CLKM		MC13XXX_IRQSTAT1_CLKI
-#define MC13783_IRQMASK1_SEMAFM		MC13783_IRQSTAT1_SEMAFI
-#define MC13783_IRQMASK1_MC2BM		MC13783_IRQSTAT1_MC2BI
-#define MC13783_IRQMASK1_HSDETM		MC13783_IRQSTAT1_HSDETI
-#define MC13783_IRQMASK1_HSLM		MC13783_IRQSTAT1_HSLI
-#define MC13783_IRQMASK1_ALSPTHM	MC13783_IRQSTAT1_ALSPTHI
-#define MC13783_IRQMASK1_AHSSHORTM	MC13783_IRQSTAT1_AHSSHORTI
 
 #define MC13XXX_REVISION	7
 #define MC13XXX_REVISION_REVMETAL	(0x07 <<  0)
@@ -189,45 +101,21 @@ EXPORT_SYMBOL(mc13xxx_reg_rmw);
 
 int mc13xxx_irq_mask(struct mc13xxx *mc13xxx, int irq)
 {
-	int ret;
-	unsigned int offmask = irq < 24 ? MC13XXX_IRQMASK0 : MC13XXX_IRQMASK1;
-	u32 irqbit = 1 << (irq < 24 ? irq : irq - 24);
-	u32 mask;
-
-	if (irq < 0 || irq >= MC13XXX_NUM_IRQ)
-		return -EINVAL;
-
-	ret = mc13xxx_reg_read(mc13xxx, offmask, &mask);
-	if (ret)
-		return ret;
+	int virq = regmap_irq_get_virq(mc13xxx->irq_data, irq);
 
-	if (mask & irqbit)
-		/* already masked */
-		return 0;
+	disable_irq_nosync(virq);
 
-	return mc13xxx_reg_write(mc13xxx, offmask, mask | irqbit);
+	return 0;
 }
 EXPORT_SYMBOL(mc13xxx_irq_mask);
 
 int mc13xxx_irq_unmask(struct mc13xxx *mc13xxx, int irq)
 {
-	int ret;
-	unsigned int offmask = irq < 24 ? MC13XXX_IRQMASK0 : MC13XXX_IRQMASK1;
-	u32 irqbit = 1 << (irq < 24 ? irq : irq - 24);
-	u32 mask;
-
-	if (irq < 0 || irq >= MC13XXX_NUM_IRQ)
-		return -EINVAL;
+	int virq = regmap_irq_get_virq(mc13xxx->irq_data, irq);
 
-	ret = mc13xxx_reg_read(mc13xxx, offmask, &mask);
-	if (ret)
-		return ret;
+	enable_irq(virq);
 
-	if (!(mask & irqbit))
-		/* already unmasked */
-		return 0;
-
-	return mc13xxx_reg_write(mc13xxx, offmask, mask & ~irqbit);
+	return 0;
 }
 EXPORT_SYMBOL(mc13xxx_irq_unmask);
 
@@ -239,7 +127,7 @@ int mc13xxx_irq_status(struct mc13xxx *mc13xxx, int irq,
 	unsigned int offstat = irq < 24 ? MC13XXX_IRQSTAT0 : MC13XXX_IRQSTAT1;
 	u32 irqbit = 1 << (irq < 24 ? irq : irq - 24);
 
-	if (irq < 0 || irq >= MC13XXX_NUM_IRQ)
+	if (irq < 0 || irq >= ARRAY_SIZE(mc13xxx->irqs))
 		return -EINVAL;
 
 	if (enabled) {
@@ -266,147 +154,26 @@ int mc13xxx_irq_status(struct mc13xxx *mc13xxx, int irq,
 }
 EXPORT_SYMBOL(mc13xxx_irq_status);
 
-int mc13xxx_irq_ack(struct mc13xxx *mc13xxx, int irq)
-{
-	unsigned int offstat = irq < 24 ? MC13XXX_IRQSTAT0 : MC13XXX_IRQSTAT1;
-	unsigned int val = 1 << (irq < 24 ? irq : irq - 24);
-
-	BUG_ON(irq < 0 || irq >= MC13XXX_NUM_IRQ);
-
-	return mc13xxx_reg_write(mc13xxx, offstat, val);
-}
-EXPORT_SYMBOL(mc13xxx_irq_ack);
-
-int mc13xxx_irq_request_nounmask(struct mc13xxx *mc13xxx, int irq,
-		irq_handler_t handler, const char *name, void *dev)
-{
-	BUG_ON(!mutex_is_locked(&mc13xxx->lock));
-	BUG_ON(!handler);
-
-	if (irq < 0 || irq >= MC13XXX_NUM_IRQ)
-		return -EINVAL;
-
-	if (mc13xxx->irqhandler[irq])
-		return -EBUSY;
-
-	mc13xxx->irqhandler[irq] = handler;
-	mc13xxx->irqdata[irq] = dev;
-
-	return 0;
-}
-EXPORT_SYMBOL(mc13xxx_irq_request_nounmask);
-
 int mc13xxx_irq_request(struct mc13xxx *mc13xxx, int irq,
 		irq_handler_t handler, const char *name, void *dev)
 {
-	int ret;
+	int virq = regmap_irq_get_virq(mc13xxx->irq_data, irq);
 
-	ret = mc13xxx_irq_request_nounmask(mc13xxx, irq, handler, name, dev);
-	if (ret)
-		return ret;
-
-	ret = mc13xxx_irq_unmask(mc13xxx, irq);
-	if (ret) {
-		mc13xxx->irqhandler[irq] = NULL;
-		mc13xxx->irqdata[irq] = NULL;
-		return ret;
-	}
-
-	return 0;
+	return devm_request_threaded_irq(mc13xxx->dev, virq, NULL, handler,
+					 0, name, dev);
 }
 EXPORT_SYMBOL(mc13xxx_irq_request);
 
 int mc13xxx_irq_free(struct mc13xxx *mc13xxx, int irq, void *dev)
 {
-	int ret;
-	BUG_ON(!mutex_is_locked(&mc13xxx->lock));
+	int virq = regmap_irq_get_virq(mc13xxx->irq_data, irq);
 
-	if (irq < 0 || irq >= MC13XXX_NUM_IRQ || !mc13xxx->irqhandler[irq] ||
-			mc13xxx->irqdata[irq] != dev)
-		return -EINVAL;
-
-	ret = mc13xxx_irq_mask(mc13xxx, irq);
-	if (ret)
-		return ret;
-
-	mc13xxx->irqhandler[irq] = NULL;
-	mc13xxx->irqdata[irq] = NULL;
+	devm_free_irq(mc13xxx->dev, virq, dev);
 
 	return 0;
 }
 EXPORT_SYMBOL(mc13xxx_irq_free);
 
-static inline irqreturn_t mc13xxx_irqhandler(struct mc13xxx *mc13xxx, int irq)
-{
-	return mc13xxx->irqhandler[irq](irq, mc13xxx->irqdata[irq]);
-}
-
-/*
- * returns: number of handled irqs or negative error
- * locking: holds mc13xxx->lock
- */
-static int mc13xxx_irq_handle(struct mc13xxx *mc13xxx,
-		unsigned int offstat, unsigned int offmask, int baseirq)
-{
-	u32 stat, mask;
-	int ret = mc13xxx_reg_read(mc13xxx, offstat, &stat);
-	int num_handled = 0;
-
-	if (ret)
-		return ret;
-
-	ret = mc13xxx_reg_read(mc13xxx, offmask, &mask);
-	if (ret)
-		return ret;
-
-	while (stat & ~mask) {
-		int irq = __ffs(stat & ~mask);
-
-		stat &= ~(1 << irq);
-
-		if (likely(mc13xxx->irqhandler[baseirq + irq])) {
-			irqreturn_t handled;
-
-			handled = mc13xxx_irqhandler(mc13xxx, baseirq + irq);
-			if (handled == IRQ_HANDLED)
-				num_handled++;
-		} else {
-			dev_err(mc13xxx->dev,
-					"BUG: irq %u but no handler\n",
-					baseirq + irq);
-
-			mask |= 1 << irq;
-
-			ret = mc13xxx_reg_write(mc13xxx, offmask, mask);
-		}
-	}
-
-	return num_handled;
-}
-
-static irqreturn_t mc13xxx_irq_thread(int irq, void *data)
-{
-	struct mc13xxx *mc13xxx = data;
-	irqreturn_t ret;
-	int handled = 0;
-
-	mc13xxx_lock(mc13xxx);
-
-	ret = mc13xxx_irq_handle(mc13xxx, MC13XXX_IRQSTAT0,
-			MC13XXX_IRQMASK0, 0);
-	if (ret > 0)
-		handled = 1;
-
-	ret = mc13xxx_irq_handle(mc13xxx, MC13XXX_IRQSTAT1,
-			MC13XXX_IRQMASK1, 24);
-	if (ret > 0)
-		handled = 1;
-
-	mc13xxx_unlock(mc13xxx);
-
-	return IRQ_RETVAL(handled);
-}
-
 #define maskval(reg, mask)	(((reg) & (mask)) >> __ffs(mask))
 static void mc13xxx_print_revision(struct mc13xxx *mc13xxx, u32 revision)
 {
@@ -475,8 +242,6 @@ static irqreturn_t mc13xxx_handler_adcdone(int irq, void *data)
 {
 	struct mc13xxx_adcdone_data *adcdone_data = data;
 
-	mc13xxx_irq_ack(adcdone_data->mc13xxx, irq);
-
 	complete_all(&adcdone_data->done);
 
 	return IRQ_HANDLED;
@@ -544,7 +309,6 @@ int mc13xxx_adc_do_conversion(struct mc13xxx *mc13xxx, unsigned int mode,
 	dev_dbg(mc13xxx->dev, "%s: request irq\n", __func__);
 	mc13xxx_irq_request(mc13xxx, MC13XXX_IRQ_ADCDONE,
 			mc13xxx_handler_adcdone, __func__, &adcdone_data);
-	mc13xxx_irq_ack(mc13xxx, MC13XXX_IRQ_ADCDONE);
 
 	mc13xxx_reg_write(mc13xxx, MC13XXX_ADC0, adc0);
 	mc13xxx_reg_write(mc13xxx, MC13XXX_ADC1, adc1);
@@ -599,7 +363,8 @@ static int mc13xxx_add_subdevice_pdata(struct mc13xxx *mc13xxx,
 	if (!cell.name)
 		return -ENOMEM;
 
-	return mfd_add_devices(mc13xxx->dev, -1, &cell, 1, NULL, 0, NULL);
+	return mfd_add_devices(mc13xxx->dev, -1, &cell, 1, NULL, 0,
+			       regmap_irq_get_domain(mc13xxx->irq_data));
 }
 
 static int mc13xxx_add_subdevice(struct mc13xxx *mc13xxx, const char *format)
@@ -640,8 +405,8 @@ int mc13xxx_common_init(struct device *dev)
 {
 	struct mc13xxx_platform_data *pdata = dev_get_platdata(dev);
 	struct mc13xxx *mc13xxx = dev_get_drvdata(dev);
-	int ret;
 	u32 revision;
+	int i, ret;
 
 	mc13xxx->dev = dev;
 
@@ -651,31 +416,32 @@ int mc13xxx_common_init(struct device *dev)
 
 	mc13xxx->variant->print_revision(mc13xxx, revision);
 
-	/* mask all irqs */
-	ret = mc13xxx_reg_write(mc13xxx, MC13XXX_IRQMASK0, 0x00ffffff);
-	if (ret)
-		return ret;
+	for (i = 0; i < ARRAY_SIZE(mc13xxx->irqs); i++) {
+		mc13xxx->irqs[i].reg_offset = i / MC13XXX_IRQ_PER_REG;
+		mc13xxx->irqs[i].mask = BIT(i % MC13XXX_IRQ_PER_REG);
+	}
 
-	ret = mc13xxx_reg_write(mc13xxx, MC13XXX_IRQMASK1, 0x00ffffff);
+	mc13xxx->irq_chip.name = dev_name(dev);
+	mc13xxx->irq_chip.status_base = MC13XXX_IRQSTAT0;
+	mc13xxx->irq_chip.mask_base = MC13XXX_IRQMASK0;
+	mc13xxx->irq_chip.ack_base = MC13XXX_IRQSTAT0;
+	mc13xxx->irq_chip.irq_reg_stride = MC13XXX_IRQSTAT1 - MC13XXX_IRQSTAT0;
+	mc13xxx->irq_chip.init_ack_masked = true;
+	mc13xxx->irq_chip.use_ack = true;
+	mc13xxx->irq_chip.num_regs = MC13XXX_IRQ_REG_CNT;
+	mc13xxx->irq_chip.irqs = mc13xxx->irqs;
+	mc13xxx->irq_chip.num_irqs = ARRAY_SIZE(mc13xxx->irqs);
+
+	ret = regmap_add_irq_chip(mc13xxx->regmap, mc13xxx->irq, IRQF_ONESHOT,
+				  0, &mc13xxx->irq_chip, &mc13xxx->irq_data);
 	if (ret)
 		return ret;
 
 	mutex_init(&mc13xxx->lock);
 
-	ret = request_threaded_irq(mc13xxx->irq, NULL, mc13xxx_irq_thread,
-			IRQF_ONESHOT | IRQF_TRIGGER_HIGH, "mc13xxx", mc13xxx);
-	if (ret)
-		return ret;
-
 	if (mc13xxx_probe_flags_dt(mc13xxx) < 0 && pdata)
 		mc13xxx->flags = pdata->flags;
 
-	if (mc13xxx->flags & MC13XXX_USE_ADC)
-		mc13xxx_add_subdevice(mc13xxx, "%s-adc");
-
-	if (mc13xxx->flags & MC13XXX_USE_RTC)
-		mc13xxx_add_subdevice(mc13xxx, "%s-rtc");
-
 	if (pdata) {
 		mc13xxx_add_subdevice_pdata(mc13xxx, "%s-regulator",
 			&pdata->regulators, sizeof(pdata->regulators));
@@ -699,6 +465,12 @@ int mc13xxx_common_init(struct device *dev)
 			mc13xxx_add_subdevice(mc13xxx, "%s-ts");
 	}
 
+	if (mc13xxx->flags & MC13XXX_USE_ADC)
+		mc13xxx_add_subdevice(mc13xxx, "%s-adc");
+
+	if (mc13xxx->flags & MC13XXX_USE_RTC)
+		mc13xxx_add_subdevice(mc13xxx, "%s-rtc");
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(mc13xxx_common_init);
@@ -707,8 +479,8 @@ int mc13xxx_common_exit(struct device *dev)
 {
 	struct mc13xxx *mc13xxx = dev_get_drvdata(dev);
 
-	free_irq(mc13xxx->irq, mc13xxx);
 	mfd_remove_devices(dev);
+	regmap_del_irq_chip(mc13xxx->irq, mc13xxx->irq_data);
 	mutex_destroy(&mc13xxx->lock);
 
 	return 0;
diff --git a/drivers/mfd/mc13xxx.h b/drivers/mfd/mc13xxx.h
index ae7f1659f5d1..33677d1dcf66 100644
--- a/drivers/mfd/mc13xxx.h
+++ b/drivers/mfd/mc13xxx.h
@@ -13,7 +13,9 @@
 #include <linux/regmap.h>
 #include <linux/mfd/mc13xxx.h>
 
-#define MC13XXX_NUMREGS 0x3f
+#define MC13XXX_NUMREGS		0x3f
+#define MC13XXX_IRQ_REG_CNT	2
+#define MC13XXX_IRQ_PER_REG	24
 
 struct mc13xxx;
 
@@ -33,13 +35,14 @@ struct mc13xxx {
 	struct device *dev;
 	const struct mc13xxx_variant *variant;
 
+	struct regmap_irq irqs[MC13XXX_IRQ_PER_REG * MC13XXX_IRQ_REG_CNT];
+	struct regmap_irq_chip irq_chip;
+	struct regmap_irq_chip_data *irq_data;
+
 	struct mutex lock;
 	int irq;
 	int flags;
 
-	irq_handler_t irqhandler[MC13XXX_NUM_IRQ];
-	void *irqdata[MC13XXX_NUM_IRQ];
-
 	int adcflags;
 };
 
diff --git a/include/linux/mfd/mc13783.h b/include/linux/mfd/mc13783.h
index a8eeda773a7b..4ff6137d8d67 100644
--- a/include/linux/mfd/mc13783.h
+++ b/include/linux/mfd/mc13783.h
@@ -86,6 +86,5 @@
 #define MC13783_IRQ_HSL		43
 #define MC13783_IRQ_ALSPTH	44
 #define MC13783_IRQ_AHSSHORT	45
-#define MC13783_NUM_IRQ		MC13XXX_NUM_IRQ
 
 #endif /* ifndef __LINUX_MFD_MC13783_H */
diff --git a/include/linux/mfd/mc13xxx.h b/include/linux/mfd/mc13xxx.h
index d63b1d309106..638222e43e48 100644
--- a/include/linux/mfd/mc13xxx.h
+++ b/include/linux/mfd/mc13xxx.h
@@ -23,15 +23,10 @@ int mc13xxx_reg_rmw(struct mc13xxx *mc13xxx, unsigned int offset,
 
 int mc13xxx_irq_request(struct mc13xxx *mc13xxx, int irq,
 		irq_handler_t handler, const char *name, void *dev);
-int mc13xxx_irq_request_nounmask(struct mc13xxx *mc13xxx, int irq,
-		irq_handler_t handler, const char *name, void *dev);
 int mc13xxx_irq_free(struct mc13xxx *mc13xxx, int irq, void *dev);
 
-int mc13xxx_irq_mask(struct mc13xxx *mc13xxx, int irq);
-int mc13xxx_irq_unmask(struct mc13xxx *mc13xxx, int irq);
 int mc13xxx_irq_status(struct mc13xxx *mc13xxx, int irq,
 		int *enabled, int *pending);
-int mc13xxx_irq_ack(struct mc13xxx *mc13xxx, int irq);
 
 int mc13xxx_get_flags(struct mc13xxx *mc13xxx);
 
@@ -39,6 +34,22 @@ int mc13xxx_adc_do_conversion(struct mc13xxx *mc13xxx,
 		unsigned int mode, unsigned int channel,
 		u8 ato, bool atox, unsigned int *sample);
 
+/* Deprecated calls */
+static inline int mc13xxx_irq_ack(struct mc13xxx *mc13xxx, int irq)
+{
+	return 0;
+}
+
+static inline int mc13xxx_irq_request_nounmask(struct mc13xxx *mc13xxx, int irq,
+					       irq_handler_t handler,
+					       const char *name, void *dev)
+{
+	return mc13xxx_irq_request(mc13xxx, irq, handler, name, dev);
+}
+
+int mc13xxx_irq_mask(struct mc13xxx *mc13xxx, int irq);
+int mc13xxx_irq_unmask(struct mc13xxx *mc13xxx, int irq);
+
 #define MC13783_AUDIO_RX0	36
 #define MC13783_AUDIO_RX1	37
 #define MC13783_AUDIO_TX	38
@@ -68,8 +79,6 @@ int mc13xxx_adc_do_conversion(struct mc13xxx *mc13xxx,
 #define MC13XXX_IRQ_THWARNH	37
 #define MC13XXX_IRQ_CLK		38
 
-#define MC13XXX_NUM_IRQ		46
-
 struct regulator_init_data;
 
 struct mc13xxx_regulator_init_data {
-- 
cgit v1.2.3-59-g8ed1b


From 54e8827d5f0e66d152ef63e7958030ef4880cd85 Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Wed, 25 Jun 2014 16:14:44 +0900
Subject: mfd: sec-core: Add support for S2MPU02 device

Add support for Samsung S2MPU02 PMIC device to the MFD sec-core driver.
The S2MPU02 device includes PMIC/RTC/Clock devices.

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Reviewed-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/sec-core.c              |  46 ++++++---
 drivers/mfd/sec-irq.c               | 110 +++++++++++++++++---
 include/linux/mfd/samsung/core.h    |   1 +
 include/linux/mfd/samsung/irq.h     |  24 +++++
 include/linux/mfd/samsung/s2mpu02.h | 201 ++++++++++++++++++++++++++++++++++++
 5 files changed, 352 insertions(+), 30 deletions(-)
 create mode 100644 include/linux/mfd/samsung/s2mpu02.h

(limited to 'include/linux')

diff --git a/drivers/mfd/sec-core.c b/drivers/mfd/sec-core.c
index be06d0abbf19..15ba847b3d23 100644
--- a/drivers/mfd/sec-core.c
+++ b/drivers/mfd/sec-core.c
@@ -89,6 +89,15 @@ static const struct mfd_cell s2mpa01_devs[] = {
 	},
 };
 
+static const struct mfd_cell s2mpu02_devs[] = {
+	{ .name = "s2mpu02-pmic", },
+	{ .name = "s2mpu02-rtc", },
+	{
+		.name = "s2mpu02-clk",
+		.of_compatible = "samsung,s2mpu02-clk",
+	}
+};
+
 #ifdef CONFIG_OF
 static const struct of_device_id sec_dt_match[] = {
 	{	.compatible = "samsung,s5m8767-pmic",
@@ -102,6 +111,9 @@ static const struct of_device_id sec_dt_match[] = {
 	}, {
 		.compatible = "samsung,s2mpa01-pmic",
 		.data = (void *)S2MPA01,
+	}, {
+		.compatible = "samsung,s2mpu02-pmic",
+		.data = (void *)S2MPU02,
 	}, {
 		/* Sentinel */
 	},
@@ -250,9 +262,10 @@ static int sec_pmic_probe(struct i2c_client *i2c,
 {
 	struct sec_platform_data *pdata = dev_get_platdata(&i2c->dev);
 	const struct regmap_config *regmap;
+	const struct mfd_cell *sec_devs;
 	struct sec_pmic_dev *sec_pmic;
 	unsigned long device_type;
-	int ret;
+	int ret, num_sec_devs;
 
 	sec_pmic = devm_kzalloc(&i2c->dev, sizeof(struct sec_pmic_dev),
 				GFP_KERNEL);
@@ -319,34 +332,39 @@ static int sec_pmic_probe(struct i2c_client *i2c,
 
 	switch (sec_pmic->device_type) {
 	case S5M8751X:
-		ret = mfd_add_devices(sec_pmic->dev, -1, s5m8751_devs,
-				      ARRAY_SIZE(s5m8751_devs), NULL, 0, NULL);
+		sec_devs = s5m8751_devs;
+		num_sec_devs = ARRAY_SIZE(s5m8751_devs);
 		break;
 	case S5M8763X:
-		ret = mfd_add_devices(sec_pmic->dev, -1, s5m8763_devs,
-				      ARRAY_SIZE(s5m8763_devs), NULL, 0, NULL);
+		sec_devs = s5m8763_devs;
+		num_sec_devs = ARRAY_SIZE(s5m8763_devs);
 		break;
 	case S5M8767X:
-		ret = mfd_add_devices(sec_pmic->dev, -1, s5m8767_devs,
-				      ARRAY_SIZE(s5m8767_devs), NULL, 0, NULL);
+		sec_devs = s5m8767_devs;
+		num_sec_devs = ARRAY_SIZE(s5m8767_devs);
 		break;
 	case S2MPA01:
-		ret = mfd_add_devices(sec_pmic->dev, -1, s2mpa01_devs,
-				      ARRAY_SIZE(s2mpa01_devs), NULL, 0, NULL);
+		sec_devs = s2mpa01_devs;
+		num_sec_devs = ARRAY_SIZE(s2mpa01_devs);
 		break;
 	case S2MPS11X:
-		ret = mfd_add_devices(sec_pmic->dev, -1, s2mps11_devs,
-				      ARRAY_SIZE(s2mps11_devs), NULL, 0, NULL);
+		sec_devs = s2mps11_devs;
+		num_sec_devs = ARRAY_SIZE(s2mps11_devs);
 		break;
 	case S2MPS14X:
-		ret = mfd_add_devices(sec_pmic->dev, -1, s2mps14_devs,
-				      ARRAY_SIZE(s2mps14_devs), NULL, 0, NULL);
+		sec_devs = s2mps14_devs;
+		num_sec_devs = ARRAY_SIZE(s2mps14_devs);
+		break;
+	case S2MPU02:
+		sec_devs = s2mpu02_devs;
+		num_sec_devs = ARRAY_SIZE(s2mpu02_devs);
 		break;
 	default:
 		/* If this happens the probe function is problem */
 		BUG();
 	}
-
+	ret = mfd_add_devices(sec_pmic->dev, -1, sec_devs, num_sec_devs, NULL,
+			      0, NULL);
 	if (ret)
 		goto err_mfd;
 
diff --git a/drivers/mfd/sec-irq.c b/drivers/mfd/sec-irq.c
index 654e2c1dbf7a..f9a57869e3ec 100644
--- a/drivers/mfd/sec-irq.c
+++ b/drivers/mfd/sec-irq.c
@@ -20,6 +20,7 @@
 #include <linux/mfd/samsung/irq.h>
 #include <linux/mfd/samsung/s2mps11.h>
 #include <linux/mfd/samsung/s2mps14.h>
+#include <linux/mfd/samsung/s2mpu02.h>
 #include <linux/mfd/samsung/s5m8763.h>
 #include <linux/mfd/samsung/s5m8767.h>
 
@@ -161,6 +162,77 @@ static const struct regmap_irq s2mps14_irqs[] = {
 	},
 };
 
+static const struct regmap_irq s2mpu02_irqs[] = {
+	[S2MPU02_IRQ_PWRONF] = {
+		.reg_offset = 0,
+		.mask = S2MPS11_IRQ_PWRONF_MASK,
+	},
+	[S2MPU02_IRQ_PWRONR] = {
+		.reg_offset = 0,
+		.mask = S2MPS11_IRQ_PWRONR_MASK,
+	},
+	[S2MPU02_IRQ_JIGONBF] = {
+		.reg_offset = 0,
+		.mask = S2MPS11_IRQ_JIGONBF_MASK,
+	},
+	[S2MPU02_IRQ_JIGONBR] = {
+		.reg_offset = 0,
+		.mask = S2MPS11_IRQ_JIGONBR_MASK,
+	},
+	[S2MPU02_IRQ_ACOKBF] = {
+		.reg_offset = 0,
+		.mask = S2MPS11_IRQ_ACOKBF_MASK,
+	},
+	[S2MPU02_IRQ_ACOKBR] = {
+		.reg_offset = 0,
+		.mask = S2MPS11_IRQ_ACOKBR_MASK,
+	},
+	[S2MPU02_IRQ_PWRON1S] = {
+		.reg_offset = 0,
+		.mask = S2MPS11_IRQ_PWRON1S_MASK,
+	},
+	[S2MPU02_IRQ_MRB] = {
+		.reg_offset = 0,
+		.mask = S2MPS11_IRQ_MRB_MASK,
+	},
+	[S2MPU02_IRQ_RTC60S] = {
+		.reg_offset = 1,
+		.mask = S2MPS11_IRQ_RTC60S_MASK,
+	},
+	[S2MPU02_IRQ_RTCA1] = {
+		.reg_offset = 1,
+		.mask = S2MPS11_IRQ_RTCA1_MASK,
+	},
+	[S2MPU02_IRQ_RTCA0] = {
+		.reg_offset = 1,
+		.mask = S2MPS11_IRQ_RTCA0_MASK,
+	},
+	[S2MPU02_IRQ_SMPL] = {
+		.reg_offset = 1,
+		.mask = S2MPS11_IRQ_SMPL_MASK,
+	},
+	[S2MPU02_IRQ_RTC1S] = {
+		.reg_offset = 1,
+		.mask = S2MPS11_IRQ_RTC1S_MASK,
+	},
+	[S2MPU02_IRQ_WTSR] = {
+		.reg_offset = 1,
+		.mask = S2MPS11_IRQ_WTSR_MASK,
+	},
+	[S2MPU02_IRQ_INT120C] = {
+		.reg_offset = 2,
+		.mask = S2MPS11_IRQ_INT120C_MASK,
+	},
+	[S2MPU02_IRQ_INT140C] = {
+		.reg_offset = 2,
+		.mask = S2MPS11_IRQ_INT140C_MASK,
+	},
+	[S2MPU02_IRQ_TSD] = {
+		.reg_offset = 2,
+		.mask = S2MPS14_IRQ_TSD_MASK,
+	},
+};
+
 static const struct regmap_irq s5m8767_irqs[] = {
 	[S5M8767_IRQ_PWRR] = {
 		.reg_offset = 0,
@@ -327,6 +399,16 @@ static const struct regmap_irq_chip s2mps14_irq_chip = {
 	.ack_base = S2MPS14_REG_INT1,
 };
 
+static const struct regmap_irq_chip s2mpu02_irq_chip = {
+	.name = "s2mpu02",
+	.irqs = s2mpu02_irqs,
+	.num_irqs = ARRAY_SIZE(s2mpu02_irqs),
+	.num_regs = 3,
+	.status_base = S2MPU02_REG_INT1,
+	.mask_base = S2MPU02_REG_INT1M,
+	.ack_base = S2MPU02_REG_INT1,
+};
+
 static const struct regmap_irq_chip s5m8767_irq_chip = {
 	.name = "s5m8767",
 	.irqs = s5m8767_irqs,
@@ -351,6 +433,7 @@ int sec_irq_init(struct sec_pmic_dev *sec_pmic)
 {
 	int ret = 0;
 	int type = sec_pmic->device_type;
+	const struct regmap_irq_chip *sec_irq_chip;
 
 	if (!sec_pmic->irq) {
 		dev_warn(sec_pmic->dev,
@@ -361,28 +444,19 @@ int sec_irq_init(struct sec_pmic_dev *sec_pmic)
 
 	switch (type) {
 	case S5M8763X:
-		ret = regmap_add_irq_chip(sec_pmic->regmap_pmic, sec_pmic->irq,
-				  IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
-				  sec_pmic->irq_base, &s5m8763_irq_chip,
-				  &sec_pmic->irq_data);
+		sec_irq_chip = &s5m8763_irq_chip;
 		break;
 	case S5M8767X:
-		ret = regmap_add_irq_chip(sec_pmic->regmap_pmic, sec_pmic->irq,
-				  IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
-				  sec_pmic->irq_base, &s5m8767_irq_chip,
-				  &sec_pmic->irq_data);
+		sec_irq_chip = &s5m8767_irq_chip;
 		break;
 	case S2MPS11X:
-		ret = regmap_add_irq_chip(sec_pmic->regmap_pmic, sec_pmic->irq,
-				  IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
-				  sec_pmic->irq_base, &s2mps11_irq_chip,
-				  &sec_pmic->irq_data);
+		sec_irq_chip = &s2mps11_irq_chip;
 		break;
 	case S2MPS14X:
-		ret = regmap_add_irq_chip(sec_pmic->regmap_pmic, sec_pmic->irq,
-				  IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
-				  sec_pmic->irq_base, &s2mps14_irq_chip,
-				  &sec_pmic->irq_data);
+		sec_irq_chip = &s2mps14_irq_chip;
+		break;
+	case S2MPU02:
+		sec_irq_chip = &s2mpu02_irq_chip;
 		break;
 	default:
 		dev_err(sec_pmic->dev, "Unknown device type %lu\n",
@@ -390,6 +464,10 @@ int sec_irq_init(struct sec_pmic_dev *sec_pmic)
 		return -EINVAL;
 	}
 
+	ret = regmap_add_irq_chip(sec_pmic->regmap_pmic, sec_pmic->irq,
+			  IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
+			  sec_pmic->irq_base, sec_irq_chip,
+			  &sec_pmic->irq_data);
 	if (ret != 0) {
 		dev_err(sec_pmic->dev, "Failed to register IRQ chip: %d\n", ret);
 		return ret;
diff --git a/include/linux/mfd/samsung/core.h b/include/linux/mfd/samsung/core.h
index 47d84242940b..b5f73de81aad 100644
--- a/include/linux/mfd/samsung/core.h
+++ b/include/linux/mfd/samsung/core.h
@@ -21,6 +21,7 @@ enum sec_device_type {
 	S2MPA01,
 	S2MPS11X,
 	S2MPS14X,
+	S2MPU02,
 };
 
 /**
diff --git a/include/linux/mfd/samsung/irq.h b/include/linux/mfd/samsung/irq.h
index 1224f447356b..f35af7361b60 100644
--- a/include/linux/mfd/samsung/irq.h
+++ b/include/linux/mfd/samsung/irq.h
@@ -129,6 +129,30 @@ enum s2mps14_irq {
 	S2MPS14_IRQ_NR,
 };
 
+enum s2mpu02_irq {
+	S2MPU02_IRQ_PWRONF,
+	S2MPU02_IRQ_PWRONR,
+	S2MPU02_IRQ_JIGONBF,
+	S2MPU02_IRQ_JIGONBR,
+	S2MPU02_IRQ_ACOKBF,
+	S2MPU02_IRQ_ACOKBR,
+	S2MPU02_IRQ_PWRON1S,
+	S2MPU02_IRQ_MRB,
+
+	S2MPU02_IRQ_RTC60S,
+	S2MPU02_IRQ_RTCA1,
+	S2MPU02_IRQ_RTCA0,
+	S2MPU02_IRQ_SMPL,
+	S2MPU02_IRQ_RTC1S,
+	S2MPU02_IRQ_WTSR,
+
+	S2MPU02_IRQ_INT120C,
+	S2MPU02_IRQ_INT140C,
+	S2MPU02_IRQ_TSD,
+
+	S2MPU02_IRQ_NR,
+};
+
 /* Masks for interrupts are the same as in s2mps11 */
 #define S2MPS14_IRQ_TSD_MASK		(1 << 2)
 
diff --git a/include/linux/mfd/samsung/s2mpu02.h b/include/linux/mfd/samsung/s2mpu02.h
new file mode 100644
index 000000000000..47ae9bc583a7
--- /dev/null
+++ b/include/linux/mfd/samsung/s2mpu02.h
@@ -0,0 +1,201 @@
+/*
+ * s2mpu02.h
+ *
+ * Copyright (c) 2014 Samsung Electronics Co., Ltd
+ *              http://www.samsung.com
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __LINUX_MFD_S2MPU02_H
+#define __LINUX_MFD_S2MPU02_H
+
+/* S2MPU02 registers */
+enum S2MPU02_reg {
+	S2MPU02_REG_ID,
+	S2MPU02_REG_INT1,
+	S2MPU02_REG_INT2,
+	S2MPU02_REG_INT3,
+	S2MPU02_REG_INT1M,
+	S2MPU02_REG_INT2M,
+	S2MPU02_REG_INT3M,
+	S2MPU02_REG_ST1,
+	S2MPU02_REG_ST2,
+	S2MPU02_REG_PWRONSRC,
+	S2MPU02_REG_OFFSRC,
+	S2MPU02_REG_BU_CHG,
+	S2MPU02_REG_RTCCTRL,
+	S2MPU02_REG_PMCTRL1,
+	S2MPU02_REG_RSVD1,
+	S2MPU02_REG_RSVD2,
+	S2MPU02_REG_RSVD3,
+	S2MPU02_REG_RSVD4,
+	S2MPU02_REG_RSVD5,
+	S2MPU02_REG_RSVD6,
+	S2MPU02_REG_RSVD7,
+	S2MPU02_REG_WRSTEN,
+	S2MPU02_REG_RSVD8,
+	S2MPU02_REG_RSVD9,
+	S2MPU02_REG_RSVD10,
+	S2MPU02_REG_B1CTRL1,
+	S2MPU02_REG_B1CTRL2,
+	S2MPU02_REG_B2CTRL1,
+	S2MPU02_REG_B2CTRL2,
+	S2MPU02_REG_B3CTRL1,
+	S2MPU02_REG_B3CTRL2,
+	S2MPU02_REG_B4CTRL1,
+	S2MPU02_REG_B4CTRL2,
+	S2MPU02_REG_B5CTRL1,
+	S2MPU02_REG_B5CTRL2,
+	S2MPU02_REG_B5CTRL3,
+	S2MPU02_REG_B5CTRL4,
+	S2MPU02_REG_B5CTRL5,
+	S2MPU02_REG_B6CTRL1,
+	S2MPU02_REG_B6CTRL2,
+	S2MPU02_REG_B7CTRL1,
+	S2MPU02_REG_B7CTRL2,
+	S2MPU02_REG_RAMP1,
+	S2MPU02_REG_RAMP2,
+	S2MPU02_REG_L1CTRL,
+	S2MPU02_REG_L2CTRL1,
+	S2MPU02_REG_L2CTRL2,
+	S2MPU02_REG_L2CTRL3,
+	S2MPU02_REG_L2CTRL4,
+	S2MPU02_REG_L3CTRL,
+	S2MPU02_REG_L4CTRL,
+	S2MPU02_REG_L5CTRL,
+	S2MPU02_REG_L6CTRL,
+	S2MPU02_REG_L7CTRL,
+	S2MPU02_REG_L8CTRL,
+	S2MPU02_REG_L9CTRL,
+	S2MPU02_REG_L10CTRL,
+	S2MPU02_REG_L11CTRL,
+	S2MPU02_REG_L12CTRL,
+	S2MPU02_REG_L13CTRL,
+	S2MPU02_REG_L14CTRL,
+	S2MPU02_REG_L15CTRL,
+	S2MPU02_REG_L16CTRL,
+	S2MPU02_REG_L17CTRL,
+	S2MPU02_REG_L18CTRL,
+	S2MPU02_REG_L19CTRL,
+	S2MPU02_REG_L20CTRL,
+	S2MPU02_REG_L21CTRL,
+	S2MPU02_REG_L22CTRL,
+	S2MPU02_REG_L23CTRL,
+	S2MPU02_REG_L24CTRL,
+	S2MPU02_REG_L25CTRL,
+	S2MPU02_REG_L26CTRL,
+	S2MPU02_REG_L27CTRL,
+	S2MPU02_REG_L28CTRL,
+	S2MPU02_REG_LDODSCH1,
+	S2MPU02_REG_LDODSCH2,
+	S2MPU02_REG_LDODSCH3,
+	S2MPU02_REG_LDODSCH4,
+	S2MPU02_REG_SELMIF,
+	S2MPU02_REG_RSVD11,
+	S2MPU02_REG_RSVD12,
+	S2MPU02_REG_RSVD13,
+	S2MPU02_REG_DVSSEL,
+	S2MPU02_REG_DVSPTR,
+	S2MPU02_REG_DVSDATA,
+};
+
+/* S2MPU02 regulator ids */
+enum S2MPU02_regulators {
+	S2MPU02_LDO1,
+	S2MPU02_LDO2,
+	S2MPU02_LDO3,
+	S2MPU02_LDO4,
+	S2MPU02_LDO5,
+	S2MPU02_LDO6,
+	S2MPU02_LDO7,
+	S2MPU02_LDO8,
+	S2MPU02_LDO9,
+	S2MPU02_LDO10,
+	S2MPU02_LDO11,
+	S2MPU02_LDO12,
+	S2MPU02_LDO13,
+	S2MPU02_LDO14,
+	S2MPU02_LDO15,
+	S2MPU02_LDO16,
+	S2MPU02_LDO17,
+	S2MPU02_LDO18,
+	S2MPU02_LDO19,
+	S2MPU02_LDO20,
+	S2MPU02_LDO21,
+	S2MPU02_LDO22,
+	S2MPU02_LDO23,
+	S2MPU02_LDO24,
+	S2MPU02_LDO25,
+	S2MPU02_LDO26,
+	S2MPU02_LDO27,
+	S2MPU02_LDO28,
+	S2MPU02_BUCK1,
+	S2MPU02_BUCK2,
+	S2MPU02_BUCK3,
+	S2MPU02_BUCK4,
+	S2MPU02_BUCK5,
+	S2MPU02_BUCK6,
+	S2MPU02_BUCK7,
+
+	S2MPU02_REGULATOR_MAX,
+};
+
+/* Regulator constraints for BUCKx */
+#define S2MPU02_BUCK1234_MIN_600MV	600000
+#define S2MPU02_BUCK5_MIN_1081_25MV	1081250
+#define S2MPU02_BUCK6_MIN_1700MV	1700000
+#define S2MPU02_BUCK7_MIN_900MV		900000
+
+#define S2MPU02_BUCK1234_STEP_6_25MV	6250
+#define S2MPU02_BUCK5_STEP_6_25MV	6250
+#define S2MPU02_BUCK6_STEP_2_50MV	2500
+#define S2MPU02_BUCK7_STEP_6_25MV	6250
+
+#define S2MPU02_BUCK1234_START_SEL	0x00
+#define S2MPU02_BUCK5_START_SEL		0x4D
+#define S2MPU02_BUCK6_START_SEL		0x28
+#define S2MPU02_BUCK7_START_SEL		0x30
+
+#define S2MPU02_BUCK_RAMP_DELAY		12500
+
+/* Regulator constraints for different types of LDOx */
+#define S2MPU02_LDO_MIN_900MV		900000
+#define S2MPU02_LDO_MIN_1050MV		1050000
+#define S2MPU02_LDO_MIN_1600MV		1600000
+#define S2MPU02_LDO_STEP_12_5MV		12500
+#define S2MPU02_LDO_STEP_25MV		25000
+#define S2MPU02_LDO_STEP_50MV		50000
+
+#define S2MPU02_LDO_GROUP1_START_SEL	0x8
+#define S2MPU02_LDO_GROUP2_START_SEL	0xA
+#define S2MPU02_LDO_GROUP3_START_SEL	0x10
+
+#define S2MPU02_LDO_VSEL_MASK		0x3F
+#define S2MPU02_BUCK_VSEL_MASK		0xFF
+#define S2MPU02_ENABLE_MASK		(0x03 << S2MPU02_ENABLE_SHIFT)
+#define S2MPU02_ENABLE_SHIFT		6
+
+/* On/Off controlled by PWREN */
+#define S2MPU02_ENABLE_SUSPEND		(0x01 << S2MPU02_ENABLE_SHIFT)
+#define S2MPU02_DISABLE_SUSPEND		(0x11 << S2MPU02_ENABLE_SHIFT)
+#define S2MPU02_LDO_N_VOLTAGES		(S2MPU02_LDO_VSEL_MASK + 1)
+#define S2MPU02_BUCK_N_VOLTAGES		(S2MPU02_BUCK_VSEL_MASK + 1)
+
+/* RAMP delay for BUCK1234*/
+#define S2MPU02_BUCK1_RAMP_SHIFT	6
+#define S2MPU02_BUCK2_RAMP_SHIFT	4
+#define S2MPU02_BUCK3_RAMP_SHIFT	2
+#define S2MPU02_BUCK4_RAMP_SHIFT	0
+#define S2MPU02_BUCK1234_RAMP_MASK	0x3
+
+#endif /*  __LINUX_MFD_S2MPU02_H */
-- 
cgit v1.2.3-59-g8ed1b


From ee98662ec914a23ab826b0c83797aa9414f737bc Mon Sep 17 00:00:00 2001
From: Bill Richardson <wfrichar@chromium.org>
Date: Wed, 18 Jun 2014 11:13:58 -0700
Subject: mfd: cros_ec: Fix the comment on cros_ec_remove()

This comment was incorrect, so update it.

Signed-off-by: Bill Richardson <wfrichar@chromium.org>
Signed-off-by: Simon Glass <sjg@chromium.org>
Signed-off-by: Doug Anderson <dianders@chromium.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/cros_ec.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index 887ef4f7bef7..7e9fe6e98d2f 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -148,8 +148,7 @@ int cros_ec_prepare_tx(struct cros_ec_device *ec_dev,
 /**
  * cros_ec_remove - Remove a ChromeOS EC
  *
- * Call this to deregister a ChromeOS EC. After this you should call
- * cros_ec_free().
+ * Call this to deregister a ChromeOS EC, then clean up any private data.
  *
  * @ec_dev: Device to register
  * @return 0 if ok, -ve on error
-- 
cgit v1.2.3-59-g8ed1b


From 2ce701ae4e351d9407ec0b30f5f9dd56b6de4292 Mon Sep 17 00:00:00 2001
From: Bill Richardson <wfrichar@chromium.org>
Date: Wed, 18 Jun 2014 11:13:59 -0700
Subject: mfd: cros_ec: Allow static din/dout buffers with cros_ec_register()

The lower-level driver may want to provide its own buffers. If so,
there's no need to allocate new ones.  This already happens to work
just fine (since we check for size of 0 and use devm allocation), but
it's good to document it.

[dianders: Resolved conflicts; documented that no code changes needed
on mainline]

Signed-off-by: Bill Richardson <wfrichar@chromium.org>
Signed-off-by: Doug Anderson <dianders@chromium.org>
Reviewed-by: Simon Glass <sjg@chromium.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/cros_ec.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index 7e9fe6e98d2f..2ee3190b691c 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -68,8 +68,8 @@ struct cros_ec_msg {
  * We use this alignment to keep ARM and x86 happy. Probably word
  * alignment would be OK, there might be a small performance advantage
  * to using dword.
- * @din_size: size of din buffer
- * @dout_size: size of dout buffer
+ * @din_size: size of din buffer to allocate (zero to use static din)
+ * @dout_size: size of dout buffer to allocate (zero to use static dout)
  * @command_send: send a command
  * @command_recv: receive a command
  * @ec_name: name of EC device (e.g. 'chromeos-ec')
-- 
cgit v1.2.3-59-g8ed1b


From 7e6cb5b4dbbc4b1d98289c88d0bc4092cac328be Mon Sep 17 00:00:00 2001
From: Bill Richardson <wfrichar@chromium.org>
Date: Wed, 18 Jun 2014 11:14:00 -0700
Subject: mfd: cros_ec: Tweak struct cros_ec_device for clarity

The members of struct cros_ec_device were improperly commented, and
intermixed the private and public sections. This is just cleanup to make it
more obvious what goes with what.

[dianders: left lock in the structure but gave it the name that will
eventually be used.]

Signed-off-by: Bill Richardson <wfrichar@chromium.org>
Signed-off-by: Doug Anderson <dianders@chromium.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/cros_ec.c       |  2 +-
 drivers/mfd/cros_ec_i2c.c   |  4 +--
 drivers/mfd/cros_ec_spi.c   | 10 +++----
 include/linux/mfd/cros_ec.h | 65 ++++++++++++++++++++++++---------------------
 4 files changed, 43 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/cros_ec.c b/drivers/mfd/cros_ec.c
index 38fe9bf0d169..04e053c71cc6 100644
--- a/drivers/mfd/cros_ec.c
+++ b/drivers/mfd/cros_ec.c
@@ -57,7 +57,7 @@ static int cros_ec_command_sendrecv(struct cros_ec_device *ec_dev,
 	msg.in_buf = in_buf;
 	msg.in_len = in_len;
 
-	return ec_dev->command_xfer(ec_dev, &msg);
+	return ec_dev->cmd_xfer(ec_dev, &msg);
 }
 
 static int cros_ec_command_recv(struct cros_ec_device *ec_dev,
diff --git a/drivers/mfd/cros_ec_i2c.c b/drivers/mfd/cros_ec_i2c.c
index 4f71be99a183..777e529abb16 100644
--- a/drivers/mfd/cros_ec_i2c.c
+++ b/drivers/mfd/cros_ec_i2c.c
@@ -29,7 +29,7 @@ static inline struct cros_ec_device *to_ec_dev(struct device *dev)
 	return i2c_get_clientdata(client);
 }
 
-static int cros_ec_command_xfer(struct cros_ec_device *ec_dev,
+static int cros_ec_cmd_xfer_i2c(struct cros_ec_device *ec_dev,
 				struct cros_ec_msg *msg)
 {
 	struct i2c_client *client = ec_dev->priv;
@@ -136,7 +136,7 @@ static int cros_ec_i2c_probe(struct i2c_client *client,
 	ec_dev->dev = dev;
 	ec_dev->priv = client;
 	ec_dev->irq = client->irq;
-	ec_dev->command_xfer = cros_ec_command_xfer;
+	ec_dev->cmd_xfer = cros_ec_cmd_xfer_i2c;
 	ec_dev->ec_name = client->name;
 	ec_dev->phys_name = client->adapter->name;
 	ec_dev->parent = &client->dev;
diff --git a/drivers/mfd/cros_ec_spi.c b/drivers/mfd/cros_ec_spi.c
index 1fcc65ecad0e..6c3075fb5dc7 100644
--- a/drivers/mfd/cros_ec_spi.c
+++ b/drivers/mfd/cros_ec_spi.c
@@ -73,7 +73,7 @@
  *	if no record
  * @end_of_msg_delay: used to set the delay_usecs on the spi_transfer that
  *      is sent when we want to turn off CS at the end of a transaction.
- * @lock: mutex to ensure only one user of cros_ec_command_spi_xfer at a time
+ * @lock: mutex to ensure only one user of cros_ec_cmd_xfer_spi at a time
  */
 struct cros_ec_spi {
 	struct spi_device *spi;
@@ -210,13 +210,13 @@ static int cros_ec_spi_receive_response(struct cros_ec_device *ec_dev,
 }
 
 /**
- * cros_ec_command_spi_xfer - Transfer a message over SPI and receive the reply
+ * cros_ec_cmd_xfer_spi - Transfer a message over SPI and receive the reply
  *
  * @ec_dev: ChromeOS EC device
  * @ec_msg: Message to transfer
  */
-static int cros_ec_command_spi_xfer(struct cros_ec_device *ec_dev,
-				    struct cros_ec_msg *ec_msg)
+static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev,
+				struct cros_ec_msg *ec_msg)
 {
 	struct cros_ec_spi *ec_spi = ec_dev->priv;
 	struct spi_transfer trans;
@@ -368,7 +368,7 @@ static int cros_ec_spi_probe(struct spi_device *spi)
 	ec_dev->dev = dev;
 	ec_dev->priv = ec_spi;
 	ec_dev->irq = spi->irq;
-	ec_dev->command_xfer = cros_ec_command_spi_xfer;
+	ec_dev->cmd_xfer = cros_ec_cmd_xfer_spi;
 	ec_dev->ec_name = ec_spi->spi->modalias;
 	ec_dev->phys_name = dev_name(&ec_spi->spi->dev);
 	ec_dev->parent = &ec_spi->spi->dev;
diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index 2ee3190b691c..79a35857cc9e 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -16,7 +16,9 @@
 #ifndef __LINUX_MFD_CROS_EC_H
 #define __LINUX_MFD_CROS_EC_H
 
+#include <linux/notifier.h>
 #include <linux/mfd/cros_ec_commands.h>
+#include <linux/mutex.h>
 
 /*
  * Command interface between EC and AP, for LPC, I2C and SPI interfaces.
@@ -55,34 +57,53 @@ struct cros_ec_msg {
 /**
  * struct cros_ec_device - Information about a ChromeOS EC device
  *
+ * @ec_name: name of EC device (e.g. 'chromeos-ec')
+ * @phys_name: name of physical comms layer (e.g. 'i2c-4')
+ * @dev: Device pointer
+ * @was_wake_device: true if this device was set to wake the system from
+ * sleep at the last suspend
+ * @event_notifier: interrupt event notifier for transport devices
+ * @command_send: send a command
+ * @command_recv: receive a response
+ * @command_sendrecv: send a command and receive a response
+ *
  * @name: Name of this EC interface
  * @priv: Private data
  * @irq: Interrupt to use
- * @din: input buffer (from EC)
- * @dout: output buffer (to EC)
+ * @din: input buffer (for data from EC)
+ * @dout: output buffer (for data to EC)
  * \note
  * These two buffers will always be dword-aligned and include enough
  * space for up to 7 word-alignment bytes also, so we can ensure that
  * the body of the message is always dword-aligned (64-bit).
- *
  * We use this alignment to keep ARM and x86 happy. Probably word
  * alignment would be OK, there might be a small performance advantage
  * to using dword.
  * @din_size: size of din buffer to allocate (zero to use static din)
  * @dout_size: size of dout buffer to allocate (zero to use static dout)
- * @command_send: send a command
- * @command_recv: receive a command
- * @ec_name: name of EC device (e.g. 'chromeos-ec')
- * @phys_name: name of physical comms layer (e.g. 'i2c-4')
  * @parent: pointer to parent device (e.g. i2c or spi device)
- * @dev: Device pointer
- * dev_lock: Lock to prevent concurrent access
  * @wake_enabled: true if this device can wake the system from sleep
- * @was_wake_device: true if this device was set to wake the system from
- * sleep at the last suspend
- * @event_notifier: interrupt event notifier for transport devices
+ * @lock: one transaction at a time
+ * @cmd_xfer: low-level channel to the EC
  */
 struct cros_ec_device {
+
+	/* These are used by other drivers that want to talk to the EC */
+	const char *ec_name;
+	const char *phys_name;
+	struct device *dev;
+	bool was_wake_device;
+	struct class *cros_class;
+	struct blocking_notifier_head event_notifier;
+	int (*command_send)(struct cros_ec_device *ec,
+			    uint16_t cmd, void *out_buf, int out_len);
+	int (*command_recv)(struct cros_ec_device *ec,
+			    uint16_t cmd, void *in_buf, int in_len);
+	int (*command_sendrecv)(struct cros_ec_device *ec,
+				uint16_t cmd, void *out_buf, int out_len,
+				void *in_buf, int in_len);
+
+	/* These are used to implement the platform-specific interface */
 	const char *name;
 	void *priv;
 	int irq;
@@ -90,26 +111,10 @@ struct cros_ec_device {
 	uint8_t *dout;
 	int din_size;
 	int dout_size;
-	int (*command_send)(struct cros_ec_device *ec,
-			uint16_t cmd, void *out_buf, int out_len);
-	int (*command_recv)(struct cros_ec_device *ec,
-			uint16_t cmd, void *in_buf, int in_len);
-	int (*command_sendrecv)(struct cros_ec_device *ec,
-			uint16_t cmd, void *out_buf, int out_len,
-			void *in_buf, int in_len);
-	int (*command_xfer)(struct cros_ec_device *ec,
-			struct cros_ec_msg *msg);
-
-	const char *ec_name;
-	const char *phys_name;
 	struct device *parent;
-
-	/* These are --private-- fields - do not assign */
-	struct device *dev;
-	struct mutex dev_lock;
 	bool wake_enabled;
-	bool was_wake_device;
-	struct blocking_notifier_head event_notifier;
+	struct mutex lock;
+	int (*cmd_xfer)(struct cros_ec_device *ec, struct cros_ec_msg *msg);
 };
 
 /**
-- 
cgit v1.2.3-59-g8ed1b


From 5d4773e27e8ab37491767a6ef99ffd7100fe6341 Mon Sep 17 00:00:00 2001
From: Bill Richardson <wfrichar@chromium.org>
Date: Wed, 18 Jun 2014 11:14:02 -0700
Subject: mfd: cros_ec: Use struct cros_ec_command to communicate with the EC

This is some internal structure reorganization / renaming to prepare
for future patches that will add a userspace API to cros_ec.  There
should be no visible changes.

Signed-off-by: Bill Richardson <wfrichar@chromium.org>
Signed-off-by: Doug Anderson <dianders@chromium.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/cros_ec.c       | 28 ++++++++++++++--------------
 drivers/mfd/cros_ec_i2c.c   | 24 ++++++++++++------------
 drivers/mfd/cros_ec_spi.c   | 16 ++++++++--------
 include/linux/mfd/cros_ec.h | 35 ++++++++++++++++++-----------------
 4 files changed, 52 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/cros_ec.c b/drivers/mfd/cros_ec.c
index 04e053c71cc6..2e86c282f0b4 100644
--- a/drivers/mfd/cros_ec.c
+++ b/drivers/mfd/cros_ec.c
@@ -25,22 +25,22 @@
 #include <linux/mfd/cros_ec_commands.h>
 
 int cros_ec_prepare_tx(struct cros_ec_device *ec_dev,
-		       struct cros_ec_msg *msg)
+		       struct cros_ec_command *msg)
 {
 	uint8_t *out;
 	int csum, i;
 
-	BUG_ON(msg->out_len > EC_PROTO2_MAX_PARAM_SIZE);
+	BUG_ON(msg->outsize > EC_PROTO2_MAX_PARAM_SIZE);
 	out = ec_dev->dout;
 	out[0] = EC_CMD_VERSION0 + msg->version;
-	out[1] = msg->cmd;
-	out[2] = msg->out_len;
+	out[1] = msg->command;
+	out[2] = msg->outsize;
 	csum = out[0] + out[1] + out[2];
-	for (i = 0; i < msg->out_len; i++)
-		csum += out[EC_MSG_TX_HEADER_BYTES + i] = msg->out_buf[i];
-	out[EC_MSG_TX_HEADER_BYTES + msg->out_len] = (uint8_t)(csum & 0xff);
+	for (i = 0; i < msg->outsize; i++)
+		csum += out[EC_MSG_TX_HEADER_BYTES + i] = msg->outdata[i];
+	out[EC_MSG_TX_HEADER_BYTES + msg->outsize] = (uint8_t)(csum & 0xff);
 
-	return EC_MSG_TX_PROTO_BYTES + msg->out_len;
+	return EC_MSG_TX_PROTO_BYTES + msg->outsize;
 }
 EXPORT_SYMBOL(cros_ec_prepare_tx);
 
@@ -48,14 +48,14 @@ static int cros_ec_command_sendrecv(struct cros_ec_device *ec_dev,
 		uint16_t cmd, void *out_buf, int out_len,
 		void *in_buf, int in_len)
 {
-	struct cros_ec_msg msg;
+	struct cros_ec_command msg;
 
 	msg.version = cmd >> 8;
-	msg.cmd = cmd & 0xff;
-	msg.out_buf = out_buf;
-	msg.out_len = out_len;
-	msg.in_buf = in_buf;
-	msg.in_len = in_len;
+	msg.command = cmd & 0xff;
+	msg.outdata = out_buf;
+	msg.outsize = out_len;
+	msg.indata = in_buf;
+	msg.insize = in_len;
 
 	return ec_dev->cmd_xfer(ec_dev, &msg);
 }
diff --git a/drivers/mfd/cros_ec_i2c.c b/drivers/mfd/cros_ec_i2c.c
index 777e529abb16..37ed12f99560 100644
--- a/drivers/mfd/cros_ec_i2c.c
+++ b/drivers/mfd/cros_ec_i2c.c
@@ -30,7 +30,7 @@ static inline struct cros_ec_device *to_ec_dev(struct device *dev)
 }
 
 static int cros_ec_cmd_xfer_i2c(struct cros_ec_device *ec_dev,
-				struct cros_ec_msg *msg)
+				struct cros_ec_command *msg)
 {
 	struct i2c_client *client = ec_dev->priv;
 	int ret = -ENOMEM;
@@ -50,7 +50,7 @@ static int cros_ec_cmd_xfer_i2c(struct cros_ec_device *ec_dev,
 	 * allocate larger packet (one byte for checksum, one byte for
 	 * length, and one for result code)
 	 */
-	packet_len = msg->in_len + 3;
+	packet_len = msg->insize + 3;
 	in_buf = kzalloc(packet_len, GFP_KERNEL);
 	if (!in_buf)
 		goto done;
@@ -61,7 +61,7 @@ static int cros_ec_cmd_xfer_i2c(struct cros_ec_device *ec_dev,
 	 * allocate larger packet (one byte for checksum, one for
 	 * command code, one for length, and one for command version)
 	 */
-	packet_len = msg->out_len + 4;
+	packet_len = msg->outsize + 4;
 	out_buf = kzalloc(packet_len, GFP_KERNEL);
 	if (!out_buf)
 		goto done;
@@ -69,16 +69,16 @@ static int cros_ec_cmd_xfer_i2c(struct cros_ec_device *ec_dev,
 	i2c_msg[0].buf = (char *)out_buf;
 
 	out_buf[0] = EC_CMD_VERSION0 + msg->version;
-	out_buf[1] = msg->cmd;
-	out_buf[2] = msg->out_len;
+	out_buf[1] = msg->command;
+	out_buf[2] = msg->outsize;
 
 	/* copy message payload and compute checksum */
 	sum = out_buf[0] + out_buf[1] + out_buf[2];
-	for (i = 0; i < msg->out_len; i++) {
-		out_buf[3 + i] = msg->out_buf[i];
+	for (i = 0; i < msg->outsize; i++) {
+		out_buf[3 + i] = msg->outdata[i];
 		sum += out_buf[3 + i];
 	}
-	out_buf[3 + msg->out_len] = sum;
+	out_buf[3 + msg->outsize] = sum;
 
 	/* send command to EC and read answer */
 	ret = i2c_transfer(client->adapter, i2c_msg, 2);
@@ -94,20 +94,20 @@ static int cros_ec_cmd_xfer_i2c(struct cros_ec_device *ec_dev,
 	/* check response error code */
 	if (i2c_msg[1].buf[0]) {
 		dev_warn(ec_dev->dev, "command 0x%02x returned an error %d\n",
-			 msg->cmd, i2c_msg[1].buf[0]);
+			 msg->command, i2c_msg[1].buf[0]);
 		ret = -EINVAL;
 		goto done;
 	}
 
 	/* copy response packet payload and compute checksum */
 	sum = in_buf[0] + in_buf[1];
-	for (i = 0; i < msg->in_len; i++) {
-		msg->in_buf[i] = in_buf[2 + i];
+	for (i = 0; i < msg->insize; i++) {
+		msg->indata[i] = in_buf[2 + i];
 		sum += in_buf[2 + i];
 	}
 	dev_dbg(ec_dev->dev, "packet: %*ph, sum = %02x\n",
 		i2c_msg[1].len, in_buf, sum);
-	if (sum != in_buf[2 + msg->in_len]) {
+	if (sum != in_buf[2 + msg->insize]) {
 		dev_err(ec_dev->dev, "bad packet checksum\n");
 		ret = -EBADMSG;
 		goto done;
diff --git a/drivers/mfd/cros_ec_spi.c b/drivers/mfd/cros_ec_spi.c
index 9d45d88813b8..bef7735ecfde 100644
--- a/drivers/mfd/cros_ec_spi.c
+++ b/drivers/mfd/cros_ec_spi.c
@@ -216,7 +216,7 @@ static int cros_ec_spi_receive_response(struct cros_ec_device *ec_dev,
  * @ec_msg: Message to transfer
  */
 static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev,
-				struct cros_ec_msg *ec_msg)
+				struct cros_ec_command *ec_msg)
 {
 	struct cros_ec_spi *ec_spi = ec_dev->priv;
 	struct spi_transfer trans;
@@ -261,7 +261,7 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev,
 	/* Get the response */
 	if (!ret) {
 		ret = cros_ec_spi_receive_response(ec_dev,
-				ec_msg->in_len + EC_MSG_TX_PROTO_BYTES);
+				ec_msg->insize + EC_MSG_TX_PROTO_BYTES);
 	} else {
 		dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
 	}
@@ -290,21 +290,21 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev,
 	if (ptr[0]) {
 		if (ptr[0] == EC_RES_IN_PROGRESS) {
 			dev_dbg(ec_dev->dev, "command 0x%02x in progress\n",
-				ec_msg->cmd);
+				ec_msg->command);
 			ret = -EAGAIN;
 			goto exit;
 		}
 		dev_warn(ec_dev->dev, "command 0x%02x returned an error %d\n",
-			 ec_msg->cmd, ptr[0]);
+			 ec_msg->command, ptr[0]);
 		debug_packet(ec_dev->dev, "in_err", ptr, len);
 		ret = -EINVAL;
 		goto exit;
 	}
 	len = ptr[1];
 	sum = ptr[0] + ptr[1];
-	if (len > ec_msg->in_len) {
+	if (len > ec_msg->insize) {
 		dev_err(ec_dev->dev, "packet too long (%d bytes, expected %d)",
-			len, ec_msg->in_len);
+			len, ec_msg->insize);
 		ret = -ENOSPC;
 		goto exit;
 	}
@@ -312,8 +312,8 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev,
 	/* copy response packet payload and compute checksum */
 	for (i = 0; i < len; i++) {
 		sum += ptr[i + 2];
-		if (ec_msg->in_len)
-			ec_msg->in_buf[i] = ptr[i + 2];
+		if (ec_msg->insize)
+			ec_msg->indata[i] = ptr[i + 2];
 	}
 	sum &= 0xff;
 
diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index 79a35857cc9e..f27c03766069 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -35,23 +35,23 @@ enum {
 					EC_MSG_TX_PROTO_BYTES,
 };
 
-/**
- * struct cros_ec_msg - A message sent to the EC, and its reply
- *
+/*
  * @version: Command version number (often 0)
- * @cmd: Command to send (EC_CMD_...)
- * @out_buf: Outgoing payload (to EC)
- * @outlen: Outgoing length
- * @in_buf: Incoming payload (from EC)
- * @in_len: Incoming length
+ * @command: Command to send (EC_CMD_...)
+ * @outdata: Outgoing data to EC
+ * @outsize: Outgoing length in bytes
+ * @indata: Where to put the incoming data from EC
+ * @insize: Incoming length in bytes (filled in by EC)
+ * @result: EC's response to the command (separate from communication failure)
  */
-struct cros_ec_msg {
-	u8 version;
-	u8 cmd;
-	uint8_t *out_buf;
-	int out_len;
-	uint8_t *in_buf;
-	int in_len;
+struct cros_ec_command {
+	uint32_t version;
+	uint32_t command;
+	uint8_t *outdata;
+	uint32_t outsize;
+	uint8_t *indata;
+	uint32_t insize;
+	uint32_t result;
 };
 
 /**
@@ -114,7 +114,8 @@ struct cros_ec_device {
 	struct device *parent;
 	bool wake_enabled;
 	struct mutex lock;
-	int (*cmd_xfer)(struct cros_ec_device *ec, struct cros_ec_msg *msg);
+	int (*cmd_xfer)(struct cros_ec_device *ec,
+			struct cros_ec_command *msg);
 };
 
 /**
@@ -148,7 +149,7 @@ int cros_ec_resume(struct cros_ec_device *ec_dev);
  * @msg: Message to write
  */
 int cros_ec_prepare_tx(struct cros_ec_device *ec_dev,
-		       struct cros_ec_msg *msg);
+		       struct cros_ec_command *msg);
 
 /**
  * cros_ec_remove - Remove a ChromeOS EC
-- 
cgit v1.2.3-59-g8ed1b


From 533cec8f34778de10412dfabac991cf458ebf3c9 Mon Sep 17 00:00:00 2001
From: Bill Richardson <wfrichar@chromium.org>
Date: Wed, 18 Jun 2014 11:14:03 -0700
Subject: mfd: cros_ec: cleanup: remove unused fields from struct
 cros_ec_device

struct cros_ec_device has a superfluous "name" field. We can get all the
debugging info we need from the existing ec_name and phys_name fields, so
let's take out the extra field.

The printout also has sufficient info in it without explicitly adding
the transport.  Before this change:
  cros-ec-spi spi2.0: Chrome EC (SPI)

After this change:
  cros-ec-spi spi2.0: Chrome EC device registered

Signed-off-by: Bill Richardson <wfrichar@chromium.org>
Signed-off-by: Doug Anderson <dianders@chromium.org>
Reviewed-by: Simon Glass <sjg@chromium.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/cros_ec.c       | 2 +-
 drivers/mfd/cros_ec_i2c.c   | 1 -
 drivers/mfd/cros_ec_spi.c   | 1 -
 include/linux/mfd/cros_ec.h | 2 --
 4 files changed, 1 insertion(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/cros_ec.c b/drivers/mfd/cros_ec.c
index 2e86c282f0b4..49ed8c340868 100644
--- a/drivers/mfd/cros_ec.c
+++ b/drivers/mfd/cros_ec.c
@@ -140,7 +140,7 @@ int cros_ec_register(struct cros_ec_device *ec_dev)
 		goto fail_mfd;
 	}
 
-	dev_info(dev, "Chrome EC (%s)\n", ec_dev->name);
+	dev_info(dev, "Chrome EC device registered\n");
 
 	return 0;
 
diff --git a/drivers/mfd/cros_ec_i2c.c b/drivers/mfd/cros_ec_i2c.c
index 37ed12f99560..5bb32f5550b3 100644
--- a/drivers/mfd/cros_ec_i2c.c
+++ b/drivers/mfd/cros_ec_i2c.c
@@ -132,7 +132,6 @@ static int cros_ec_i2c_probe(struct i2c_client *client,
 		return -ENOMEM;
 
 	i2c_set_clientdata(client, ec_dev);
-	ec_dev->name = "I2C";
 	ec_dev->dev = dev;
 	ec_dev->priv = client;
 	ec_dev->irq = client->irq;
diff --git a/drivers/mfd/cros_ec_spi.c b/drivers/mfd/cros_ec_spi.c
index bef7735ecfde..6e929b5f3bd3 100644
--- a/drivers/mfd/cros_ec_spi.c
+++ b/drivers/mfd/cros_ec_spi.c
@@ -370,7 +370,6 @@ static int cros_ec_spi_probe(struct spi_device *spi)
 	cros_ec_spi_dt_probe(ec_spi, dev);
 
 	spi_set_drvdata(spi, ec_dev);
-	ec_dev->name = "SPI";
 	ec_dev->dev = dev;
 	ec_dev->priv = ec_spi;
 	ec_dev->irq = spi->irq;
diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index f27c03766069..2b0c5982dbc1 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -67,7 +67,6 @@ struct cros_ec_command {
  * @command_recv: receive a response
  * @command_sendrecv: send a command and receive a response
  *
- * @name: Name of this EC interface
  * @priv: Private data
  * @irq: Interrupt to use
  * @din: input buffer (for data from EC)
@@ -104,7 +103,6 @@ struct cros_ec_device {
 				void *in_buf, int in_len);
 
 	/* These are used to implement the platform-specific interface */
-	const char *name;
 	void *priv;
 	int irq;
 	uint8_t *din;
-- 
cgit v1.2.3-59-g8ed1b


From 5799f95a373a2752e5c732f531a6f40fe458b818 Mon Sep 17 00:00:00 2001
From: Bill Richardson <wfrichar@chromium.org>
Date: Wed, 18 Jun 2014 11:14:04 -0700
Subject: mfd: cros_ec: cleanup: Remove EC wrapper functions

Remove the three wrapper functions that talk to the EC without passing all
the desired arguments and just use the underlying communication function
that passes everything in a struct intead.

This is internal code refactoring only. Nothing should change.

Signed-off-by: Bill Richardson <wfrichar@chromium.org>
Signed-off-by: Doug Anderson <dianders@chromium.org>
Reviewed-by: Simon Glass <sjg@chromium.org>
Acked-by: Wolfram Sang <wsa@the-dreams.de>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/i2c/busses/i2c-cros-ec-tunnel.c | 15 +++++++++++----
 drivers/input/keyboard/cros_ec_keyb.c   | 12 ++++++++++--
 drivers/mfd/cros_ec.c                   | 32 --------------------------------
 include/linux/mfd/cros_ec.h             | 19 ++++++-------------
 4 files changed, 27 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/i2c/busses/i2c-cros-ec-tunnel.c b/drivers/i2c/busses/i2c-cros-ec-tunnel.c
index 8e7a71487bb1..dd07818d03d0 100644
--- a/drivers/i2c/busses/i2c-cros-ec-tunnel.c
+++ b/drivers/i2c/busses/i2c-cros-ec-tunnel.c
@@ -183,6 +183,7 @@ static int ec_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg i2c_msgs[],
 	u8 *request = NULL;
 	u8 *response = NULL;
 	int result;
+	struct cros_ec_command msg;
 
 	request_len = ec_i2c_count_message(i2c_msgs, num);
 	if (request_len < 0) {
@@ -218,9 +219,15 @@ static int ec_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg i2c_msgs[],
 	}
 
 	ec_i2c_construct_message(request, i2c_msgs, num, bus_num);
-	result = bus->ec->command_sendrecv(bus->ec, EC_CMD_I2C_PASSTHRU,
-					   request, request_len,
-					   response, response_len);
+
+	msg.version = 0;
+	msg.command = EC_CMD_I2C_PASSTHRU;
+	msg.outdata = request;
+	msg.outsize = request_len;
+	msg.indata = response;
+	msg.insize = response_len;
+
+	result = bus->ec->cmd_xfer(bus->ec, &msg);
 	if (result)
 		goto exit;
 
@@ -258,7 +265,7 @@ static int ec_i2c_probe(struct platform_device *pdev)
 	u32 remote_bus;
 	int err;
 
-	if (!ec->command_sendrecv) {
+	if (!ec->cmd_xfer) {
 		dev_err(dev, "Missing sendrecv\n");
 		return -EINVAL;
 	}
diff --git a/drivers/input/keyboard/cros_ec_keyb.c b/drivers/input/keyboard/cros_ec_keyb.c
index 408379669d3c..b8341ab99f55 100644
--- a/drivers/input/keyboard/cros_ec_keyb.c
+++ b/drivers/input/keyboard/cros_ec_keyb.c
@@ -191,8 +191,16 @@ static void cros_ec_keyb_close(struct input_dev *dev)
 
 static int cros_ec_keyb_get_state(struct cros_ec_keyb *ckdev, uint8_t *kb_state)
 {
-	return ckdev->ec->command_recv(ckdev->ec, EC_CMD_MKBP_STATE,
-					  kb_state, ckdev->cols);
+	struct cros_ec_command msg = {
+		.version = 0,
+		.command = EC_CMD_MKBP_STATE,
+		.outdata = NULL,
+		.outsize = 0,
+		.indata = kb_state,
+		.insize = ckdev->cols,
+	};
+
+	return ckdev->ec->cmd_xfer(ckdev->ec, &msg);
 }
 
 static int cros_ec_keyb_work(struct notifier_block *nb,
diff --git a/drivers/mfd/cros_ec.c b/drivers/mfd/cros_ec.c
index 49ed8c340868..4851ed2fbe31 100644
--- a/drivers/mfd/cros_ec.c
+++ b/drivers/mfd/cros_ec.c
@@ -44,34 +44,6 @@ int cros_ec_prepare_tx(struct cros_ec_device *ec_dev,
 }
 EXPORT_SYMBOL(cros_ec_prepare_tx);
 
-static int cros_ec_command_sendrecv(struct cros_ec_device *ec_dev,
-		uint16_t cmd, void *out_buf, int out_len,
-		void *in_buf, int in_len)
-{
-	struct cros_ec_command msg;
-
-	msg.version = cmd >> 8;
-	msg.command = cmd & 0xff;
-	msg.outdata = out_buf;
-	msg.outsize = out_len;
-	msg.indata = in_buf;
-	msg.insize = in_len;
-
-	return ec_dev->cmd_xfer(ec_dev, &msg);
-}
-
-static int cros_ec_command_recv(struct cros_ec_device *ec_dev,
-		uint16_t cmd, void *buf, int buf_len)
-{
-	return cros_ec_command_sendrecv(ec_dev, cmd, NULL, 0, buf, buf_len);
-}
-
-static int cros_ec_command_send(struct cros_ec_device *ec_dev,
-		uint16_t cmd, void *buf, int buf_len)
-{
-	return cros_ec_command_sendrecv(ec_dev, cmd, buf, buf_len, NULL, 0);
-}
-
 static irqreturn_t ec_irq_thread(int irq, void *data)
 {
 	struct cros_ec_device *ec_dev = data;
@@ -104,10 +76,6 @@ int cros_ec_register(struct cros_ec_device *ec_dev)
 
 	BLOCKING_INIT_NOTIFIER_HEAD(&ec_dev->event_notifier);
 
-	ec_dev->command_send = cros_ec_command_send;
-	ec_dev->command_recv = cros_ec_command_recv;
-	ec_dev->command_sendrecv = cros_ec_command_sendrecv;
-
 	if (ec_dev->din_size) {
 		ec_dev->din = devm_kzalloc(dev, ec_dev->din_size, GFP_KERNEL);
 		if (!ec_dev->din)
diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index 2b0c5982dbc1..60c088055f3a 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -63,9 +63,10 @@ struct cros_ec_command {
  * @was_wake_device: true if this device was set to wake the system from
  * sleep at the last suspend
  * @event_notifier: interrupt event notifier for transport devices
- * @command_send: send a command
- * @command_recv: receive a response
- * @command_sendrecv: send a command and receive a response
+ * @cmd_xfer: send command to EC and get response
+ *     Returns 0 if the communication succeeded, but that doesn't mean the EC
+ *     was happy with the command it got. Caller should check msg.result for
+ *     the EC's result code.
  *
  * @priv: Private data
  * @irq: Interrupt to use
@@ -83,7 +84,6 @@ struct cros_ec_command {
  * @parent: pointer to parent device (e.g. i2c or spi device)
  * @wake_enabled: true if this device can wake the system from sleep
  * @lock: one transaction at a time
- * @cmd_xfer: low-level channel to the EC
  */
 struct cros_ec_device {
 
@@ -94,13 +94,8 @@ struct cros_ec_device {
 	bool was_wake_device;
 	struct class *cros_class;
 	struct blocking_notifier_head event_notifier;
-	int (*command_send)(struct cros_ec_device *ec,
-			    uint16_t cmd, void *out_buf, int out_len);
-	int (*command_recv)(struct cros_ec_device *ec,
-			    uint16_t cmd, void *in_buf, int in_len);
-	int (*command_sendrecv)(struct cros_ec_device *ec,
-				uint16_t cmd, void *out_buf, int out_len,
-				void *in_buf, int in_len);
+	int (*cmd_xfer)(struct cros_ec_device *ec,
+			struct cros_ec_command *msg);
 
 	/* These are used to implement the platform-specific interface */
 	void *priv;
@@ -112,8 +107,6 @@ struct cros_ec_device {
 	struct device *parent;
 	bool wake_enabled;
 	struct mutex lock;
-	int (*cmd_xfer)(struct cros_ec_device *ec,
-			struct cros_ec_command *msg);
 };
 
 /**
-- 
cgit v1.2.3-59-g8ed1b


From 6db07b6336589ff480528173e41f8f6af3f0097f Mon Sep 17 00:00:00 2001
From: Bill Richardson <wfrichar@chromium.org>
Date: Wed, 18 Jun 2014 11:14:05 -0700
Subject: mfd: cros_ec: Check result code from EC messages

Just because the host was able to talk to the EC doesn't mean that the EC
was happy with what it was told. Errors in communincation are not the same
as error messages from the EC itself.

This change lets the EC report its errors separately.

[dianders: Added common function to cros_ec.c]

Signed-off-by: Bill Richardson <wfrichar@chromium.org>
Signed-off-by: Doug Anderson <dianders@chromium.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/cros_ec.c       | 18 ++++++++++++++++++
 drivers/mfd/cros_ec_i2c.c   |  8 +++-----
 drivers/mfd/cros_ec_spi.c   | 19 ++++++-------------
 include/linux/mfd/cros_ec.h | 12 ++++++++++++
 4 files changed, 39 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/cros_ec.c b/drivers/mfd/cros_ec.c
index 4851ed2fbe31..83e30c663578 100644
--- a/drivers/mfd/cros_ec.c
+++ b/drivers/mfd/cros_ec.c
@@ -44,6 +44,24 @@ int cros_ec_prepare_tx(struct cros_ec_device *ec_dev,
 }
 EXPORT_SYMBOL(cros_ec_prepare_tx);
 
+int cros_ec_check_result(struct cros_ec_device *ec_dev,
+			 struct cros_ec_command *msg)
+{
+	switch (msg->result) {
+	case EC_RES_SUCCESS:
+		return 0;
+	case EC_RES_IN_PROGRESS:
+		dev_dbg(ec_dev->dev, "command 0x%02x in progress\n",
+			msg->command);
+		return -EAGAIN;
+	default:
+		dev_dbg(ec_dev->dev, "command 0x%02x returned %d\n",
+			msg->command, msg->result);
+		return 0;
+	}
+}
+EXPORT_SYMBOL(cros_ec_check_result);
+
 static irqreturn_t ec_irq_thread(int irq, void *data)
 {
 	struct cros_ec_device *ec_dev = data;
diff --git a/drivers/mfd/cros_ec_i2c.c b/drivers/mfd/cros_ec_i2c.c
index 5bb32f5550b3..189e7d1d7742 100644
--- a/drivers/mfd/cros_ec_i2c.c
+++ b/drivers/mfd/cros_ec_i2c.c
@@ -92,12 +92,10 @@ static int cros_ec_cmd_xfer_i2c(struct cros_ec_device *ec_dev,
 	}
 
 	/* check response error code */
-	if (i2c_msg[1].buf[0]) {
-		dev_warn(ec_dev->dev, "command 0x%02x returned an error %d\n",
-			 msg->command, i2c_msg[1].buf[0]);
-		ret = -EINVAL;
+	msg->result = i2c_msg[1].buf[0];
+	ret = cros_ec_check_result(ec_dev, msg);
+	if (ret)
 		goto done;
-	}
 
 	/* copy response packet payload and compute checksum */
 	sum = in_buf[0] + in_buf[1];
diff --git a/drivers/mfd/cros_ec_spi.c b/drivers/mfd/cros_ec_spi.c
index 6e929b5f3bd3..da1da05cd546 100644
--- a/drivers/mfd/cros_ec_spi.c
+++ b/drivers/mfd/cros_ec_spi.c
@@ -285,21 +285,14 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev,
 		goto exit;
 	}
 
-	/* check response error code */
 	ptr = ec_dev->din;
-	if (ptr[0]) {
-		if (ptr[0] == EC_RES_IN_PROGRESS) {
-			dev_dbg(ec_dev->dev, "command 0x%02x in progress\n",
-				ec_msg->command);
-			ret = -EAGAIN;
-			goto exit;
-		}
-		dev_warn(ec_dev->dev, "command 0x%02x returned an error %d\n",
-			 ec_msg->command, ptr[0]);
-		debug_packet(ec_dev->dev, "in_err", ptr, len);
-		ret = -EINVAL;
+
+	/* check response error code */
+	ec_msg->result = ptr[0];
+	ret = cros_ec_check_result(ec_dev, ec_msg);
+	if (ret)
 		goto exit;
-	}
+
 	len = ptr[1];
 	sum = ptr[0] + ptr[1];
 	if (len > ec_msg->insize) {
diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index 60c088055f3a..1f79f162abe4 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -142,6 +142,18 @@ int cros_ec_resume(struct cros_ec_device *ec_dev);
 int cros_ec_prepare_tx(struct cros_ec_device *ec_dev,
 		       struct cros_ec_command *msg);
 
+/**
+ * cros_ec_check_result - Check ec_msg->result
+ *
+ * This is used by ChromeOS EC drivers to check the ec_msg->result for
+ * errors and to warn about them.
+ *
+ * @ec_dev: EC device
+ * @msg: Message to check
+ */
+int cros_ec_check_result(struct cros_ec_device *ec_dev,
+			 struct cros_ec_command *msg);
+
 /**
  * cros_ec_remove - Remove a ChromeOS EC
  *
-- 
cgit v1.2.3-59-g8ed1b


From 12ebc8a50bc54e3a6fe207861fc6793181f9c2dc Mon Sep 17 00:00:00 2001
From: Bill Richardson <wfrichar@chromium.org>
Date: Wed, 18 Jun 2014 11:14:06 -0700
Subject: mfd: cros_ec: ec_dev->cmd_xfer() returns number of bytes received
 from EC

When communicating with the EC, the cmd_xfer() function should return the
number of bytes it received from the EC, or negative on error.

Signed-off-by: Bill Richardson <wfrichar@chromium.org>
Signed-off-by: Doug Anderson <dianders@chromium.org>
Reviewed-by: Simon Glass <sjg@chromium.org>
Acked-by: Wolfram Sang <wsa@the-dreams.de>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/i2c/busses/i2c-cros-ec-tunnel.c | 2 +-
 drivers/mfd/cros_ec_i2c.c               | 2 +-
 drivers/mfd/cros_ec_spi.c               | 2 +-
 include/linux/mfd/cros_ec.h             | 8 ++++----
 4 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/i2c/busses/i2c-cros-ec-tunnel.c b/drivers/i2c/busses/i2c-cros-ec-tunnel.c
index dd07818d03d0..05e033c98115 100644
--- a/drivers/i2c/busses/i2c-cros-ec-tunnel.c
+++ b/drivers/i2c/busses/i2c-cros-ec-tunnel.c
@@ -228,7 +228,7 @@ static int ec_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg i2c_msgs[],
 	msg.insize = response_len;
 
 	result = bus->ec->cmd_xfer(bus->ec, &msg);
-	if (result)
+	if (result < 0)
 		goto exit;
 
 	result = ec_i2c_parse_response(response, i2c_msgs, &num);
diff --git a/drivers/mfd/cros_ec_i2c.c b/drivers/mfd/cros_ec_i2c.c
index 189e7d1d7742..fd7a546d3478 100644
--- a/drivers/mfd/cros_ec_i2c.c
+++ b/drivers/mfd/cros_ec_i2c.c
@@ -111,7 +111,7 @@ static int cros_ec_cmd_xfer_i2c(struct cros_ec_device *ec_dev,
 		goto done;
 	}
 
-	ret = 0;
+	ret = i2c_msg[1].buf[1];
  done:
 	kfree(in_buf);
 	kfree(out_buf);
diff --git a/drivers/mfd/cros_ec_spi.c b/drivers/mfd/cros_ec_spi.c
index da1da05cd546..ac52e3653e90 100644
--- a/drivers/mfd/cros_ec_spi.c
+++ b/drivers/mfd/cros_ec_spi.c
@@ -320,7 +320,7 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev,
 		goto exit;
 	}
 
-	ret = 0;
+	ret = len;
 exit:
 	mutex_unlock(&ec_spi->lock);
 	return ret;
diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index 1f79f162abe4..0ebf26fddbbb 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -41,7 +41,7 @@ enum {
  * @outdata: Outgoing data to EC
  * @outsize: Outgoing length in bytes
  * @indata: Where to put the incoming data from EC
- * @insize: Incoming length in bytes (filled in by EC)
+ * @insize: Max number of bytes to accept from EC
  * @result: EC's response to the command (separate from communication failure)
  */
 struct cros_ec_command {
@@ -64,9 +64,9 @@ struct cros_ec_command {
  * sleep at the last suspend
  * @event_notifier: interrupt event notifier for transport devices
  * @cmd_xfer: send command to EC and get response
- *     Returns 0 if the communication succeeded, but that doesn't mean the EC
- *     was happy with the command it got. Caller should check msg.result for
- *     the EC's result code.
+ *     Returns the number of bytes received if the communication succeeded, but
+ *     that doesn't mean the EC was happy with the command. The caller
+ *     should check msg.result for the EC's result code.
  *
  * @priv: Private data
  * @irq: Interrupt to use
-- 
cgit v1.2.3-59-g8ed1b


From d1fd345e2087f0362c92bd3b0a1cea7fe636ac3a Mon Sep 17 00:00:00 2001
From: Andrew Bresticker <abrestic@chromium.org>
Date: Wed, 18 Jun 2014 11:14:07 -0700
Subject: mfd: cros_ec: Move EC interrupt to cros_ec_keyb

If we receive EC interrupts after the cros_ec driver has probed, but
before the cros_ec_keyb driver has probed, the cros_ec IRQ handler
will not run the cros_ec_keyb notifier and the EC will leave the IRQ
line asserted.  The cros_ec IRQ handler then returns IRQ_HANDLED and
the resulting flood of interrupts causes the machine to hang.

Since the EC interrupt is currently only used for the keyboard, move
the setup and handling of the EC interrupt to the cros_ec_keyb driver.

Signed-off-by: Andrew Bresticker <abrestic@chromium.org>
Signed-off-by: Doug Anderson <dianders@chromium.org>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/input/keyboard/cros_ec_keyb.c | 58 ++++++++++++++++++++---------------
 drivers/mfd/cros_ec.c                 | 35 +--------------------
 include/linux/mfd/cros_ec.h           |  2 --
 3 files changed, 34 insertions(+), 61 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/keyboard/cros_ec_keyb.c b/drivers/input/keyboard/cros_ec_keyb.c
index b8341ab99f55..791781ade4e7 100644
--- a/drivers/input/keyboard/cros_ec_keyb.c
+++ b/drivers/input/keyboard/cros_ec_keyb.c
@@ -24,8 +24,8 @@
 #include <linux/module.h>
 #include <linux/i2c.h>
 #include <linux/input.h>
+#include <linux/interrupt.h>
 #include <linux/kernel.h>
-#include <linux/notifier.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/input/matrix_keypad.h>
@@ -42,7 +42,6 @@
  * @dev: Device pointer
  * @idev: Input device
  * @ec: Top level ChromeOS device to use to talk to EC
- * @event_notifier: interrupt event notifier for transport devices
  */
 struct cros_ec_keyb {
 	unsigned int rows;
@@ -55,7 +54,6 @@ struct cros_ec_keyb {
 	struct device *dev;
 	struct input_dev *idev;
 	struct cros_ec_device *ec;
-	struct notifier_block notifier;
 };
 
 
@@ -173,22 +171,6 @@ static void cros_ec_keyb_process(struct cros_ec_keyb *ckdev,
 	input_sync(ckdev->idev);
 }
 
-static int cros_ec_keyb_open(struct input_dev *dev)
-{
-	struct cros_ec_keyb *ckdev = input_get_drvdata(dev);
-
-	return blocking_notifier_chain_register(&ckdev->ec->event_notifier,
-						&ckdev->notifier);
-}
-
-static void cros_ec_keyb_close(struct input_dev *dev)
-{
-	struct cros_ec_keyb *ckdev = input_get_drvdata(dev);
-
-	blocking_notifier_chain_unregister(&ckdev->ec->event_notifier,
-					   &ckdev->notifier);
-}
-
 static int cros_ec_keyb_get_state(struct cros_ec_keyb *ckdev, uint8_t *kb_state)
 {
 	struct cros_ec_command msg = {
@@ -203,19 +185,41 @@ static int cros_ec_keyb_get_state(struct cros_ec_keyb *ckdev, uint8_t *kb_state)
 	return ckdev->ec->cmd_xfer(ckdev->ec, &msg);
 }
 
-static int cros_ec_keyb_work(struct notifier_block *nb,
-		     unsigned long state, void *_notify)
+static irqreturn_t cros_ec_keyb_irq(int irq, void *data)
 {
+	struct cros_ec_keyb *ckdev = data;
+	struct cros_ec_device *ec = ckdev->ec;
 	int ret;
-	struct cros_ec_keyb *ckdev = container_of(nb, struct cros_ec_keyb,
-						    notifier);
 	uint8_t kb_state[ckdev->cols];
 
+	if (device_may_wakeup(ec->dev))
+		pm_wakeup_event(ec->dev, 0);
+
 	ret = cros_ec_keyb_get_state(ckdev, kb_state);
 	if (ret >= 0)
 		cros_ec_keyb_process(ckdev, kb_state, ret);
+	else
+		dev_err(ec->dev, "failed to get keyboard state: %d\n", ret);
 
-	return NOTIFY_DONE;
+	return IRQ_HANDLED;
+}
+
+static int cros_ec_keyb_open(struct input_dev *dev)
+{
+	struct cros_ec_keyb *ckdev = input_get_drvdata(dev);
+	struct cros_ec_device *ec = ckdev->ec;
+
+	return request_threaded_irq(ec->irq, NULL, cros_ec_keyb_irq,
+					IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+					"cros_ec_keyb", ckdev);
+}
+
+static void cros_ec_keyb_close(struct input_dev *dev)
+{
+	struct cros_ec_keyb *ckdev = input_get_drvdata(dev);
+	struct cros_ec_device *ec = ckdev->ec;
+
+	free_irq(ec->irq, ckdev);
 }
 
 static int cros_ec_keyb_probe(struct platform_device *pdev)
@@ -246,8 +250,12 @@ static int cros_ec_keyb_probe(struct platform_device *pdev)
 	if (!idev)
 		return -ENOMEM;
 
+	if (!ec->irq) {
+		dev_err(dev, "no EC IRQ specified\n");
+		return -EINVAL;
+	}
+
 	ckdev->ec = ec;
-	ckdev->notifier.notifier_call = cros_ec_keyb_work;
 	ckdev->dev = dev;
 	dev_set_drvdata(&pdev->dev, ckdev);
 
diff --git a/drivers/mfd/cros_ec.c b/drivers/mfd/cros_ec.c
index 83e30c663578..4873f9c50452 100644
--- a/drivers/mfd/cros_ec.c
+++ b/drivers/mfd/cros_ec.c
@@ -62,18 +62,6 @@ int cros_ec_check_result(struct cros_ec_device *ec_dev,
 }
 EXPORT_SYMBOL(cros_ec_check_result);
 
-static irqreturn_t ec_irq_thread(int irq, void *data)
-{
-	struct cros_ec_device *ec_dev = data;
-
-	if (device_may_wakeup(ec_dev->dev))
-		pm_wakeup_event(ec_dev->dev, 0);
-
-	blocking_notifier_call_chain(&ec_dev->event_notifier, 1, ec_dev);
-
-	return IRQ_HANDLED;
-}
-
 static const struct mfd_cell cros_devs[] = {
 	{
 		.name = "cros-ec-keyb",
@@ -92,8 +80,6 @@ int cros_ec_register(struct cros_ec_device *ec_dev)
 	struct device *dev = ec_dev->dev;
 	int err = 0;
 
-	BLOCKING_INIT_NOTIFIER_HEAD(&ec_dev->event_notifier);
-
 	if (ec_dev->din_size) {
 		ec_dev->din = devm_kzalloc(dev, ec_dev->din_size, GFP_KERNEL);
 		if (!ec_dev->din)
@@ -105,42 +91,23 @@ int cros_ec_register(struct cros_ec_device *ec_dev)
 			return -ENOMEM;
 	}
 
-	if (!ec_dev->irq) {
-		dev_dbg(dev, "no valid IRQ: %d\n", ec_dev->irq);
-		return err;
-	}
-
-	err = request_threaded_irq(ec_dev->irq, NULL, ec_irq_thread,
-				   IRQF_TRIGGER_LOW | IRQF_ONESHOT,
-				   "chromeos-ec", ec_dev);
-	if (err) {
-		dev_err(dev, "request irq %d: error %d\n", ec_dev->irq, err);
-		return err;
-	}
-
 	err = mfd_add_devices(dev, 0, cros_devs,
 			      ARRAY_SIZE(cros_devs),
 			      NULL, ec_dev->irq, NULL);
 	if (err) {
 		dev_err(dev, "failed to add mfd devices\n");
-		goto fail_mfd;
+		return err;
 	}
 
 	dev_info(dev, "Chrome EC device registered\n");
 
 	return 0;
-
-fail_mfd:
-	free_irq(ec_dev->irq, ec_dev);
-
-	return err;
 }
 EXPORT_SYMBOL(cros_ec_register);
 
 int cros_ec_remove(struct cros_ec_device *ec_dev)
 {
 	mfd_remove_devices(ec_dev->dev);
-	free_irq(ec_dev->irq, ec_dev);
 
 	return 0;
 }
diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index 0ebf26fddbbb..fcbe9d129a9d 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -62,7 +62,6 @@ struct cros_ec_command {
  * @dev: Device pointer
  * @was_wake_device: true if this device was set to wake the system from
  * sleep at the last suspend
- * @event_notifier: interrupt event notifier for transport devices
  * @cmd_xfer: send command to EC and get response
  *     Returns the number of bytes received if the communication succeeded, but
  *     that doesn't mean the EC was happy with the command. The caller
@@ -93,7 +92,6 @@ struct cros_ec_device {
 	struct device *dev;
 	bool was_wake_device;
 	struct class *cros_class;
-	struct blocking_notifier_head event_notifier;
 	int (*cmd_xfer)(struct cros_ec_device *ec,
 			struct cros_ec_command *msg);
 
-- 
cgit v1.2.3-59-g8ed1b


From 9c87e0f10e281f782312e7b6aa202f2d434c84bf Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 1 Jul 2014 16:02:51 +0100
Subject: Provide PE binary definitions

Provide some PE binary structural and constant definitions as taken from the
pesign package sources.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
---
 include/linux/pe.h | 448 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 448 insertions(+)
 create mode 100644 include/linux/pe.h

(limited to 'include/linux')

diff --git a/include/linux/pe.h b/include/linux/pe.h
new file mode 100644
index 000000000000..e170b95e763b
--- /dev/null
+++ b/include/linux/pe.h
@@ -0,0 +1,448 @@
+/*
+ * Copyright 2011 Red Hat, Inc.
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author(s): Peter Jones <pjones@redhat.com>
+ */
+#ifndef __LINUX_PE_H
+#define __LINUX_PE_H
+
+#include <linux/types.h>
+
+#define MZ_MAGIC	0x5a4d	/* "MZ" */
+
+struct mz_hdr {
+	uint16_t magic;		/* MZ_MAGIC */
+	uint16_t lbsize;	/* size of last used block */
+	uint16_t blocks;	/* pages in file, 0x3 */
+	uint16_t relocs;	/* relocations */
+	uint16_t hdrsize;	/* header size in "paragraphs" */
+	uint16_t min_extra_pps;	/* .bss */
+	uint16_t max_extra_pps;	/* runtime limit for the arena size */
+	uint16_t ss;		/* relative stack segment */
+	uint16_t sp;		/* initial %sp register */
+	uint16_t checksum;	/* word checksum */
+	uint16_t ip;		/* initial %ip register */
+	uint16_t cs;		/* initial %cs relative to load segment */
+	uint16_t reloc_table_offset;	/* offset of the first relocation */
+	uint16_t overlay_num;	/* overlay number.  set to 0. */
+	uint16_t reserved0[4];	/* reserved */
+	uint16_t oem_id;	/* oem identifier */
+	uint16_t oem_info;	/* oem specific */
+	uint16_t reserved1[10];	/* reserved */
+	uint32_t peaddr;	/* address of pe header */
+	char     message[64];	/* message to print */
+};
+
+struct mz_reloc {
+	uint16_t offset;
+	uint16_t segment;
+};
+
+#define PE_MAGIC		0x00004550	/* "PE\0\0" */
+#define PE_OPT_MAGIC_PE32	0x010b
+#define PE_OPT_MAGIC_PE32_ROM	0x0107
+#define PE_OPT_MAGIC_PE32PLUS	0x020b
+
+/* machine type */
+#define	IMAGE_FILE_MACHINE_UNKNOWN	0x0000
+#define	IMAGE_FILE_MACHINE_AM33		0x01d3
+#define	IMAGE_FILE_MACHINE_AMD64	0x8664
+#define	IMAGE_FILE_MACHINE_ARM		0x01c0
+#define	IMAGE_FILE_MACHINE_ARMV7	0x01c4
+#define	IMAGE_FILE_MACHINE_EBC		0x0ebc
+#define	IMAGE_FILE_MACHINE_I386		0x014c
+#define	IMAGE_FILE_MACHINE_IA64		0x0200
+#define	IMAGE_FILE_MACHINE_M32R		0x9041
+#define	IMAGE_FILE_MACHINE_MIPS16	0x0266
+#define	IMAGE_FILE_MACHINE_MIPSFPU	0x0366
+#define	IMAGE_FILE_MACHINE_MIPSFPU16	0x0466
+#define	IMAGE_FILE_MACHINE_POWERPC	0x01f0
+#define	IMAGE_FILE_MACHINE_POWERPCFP	0x01f1
+#define	IMAGE_FILE_MACHINE_R4000	0x0166
+#define	IMAGE_FILE_MACHINE_SH3		0x01a2
+#define	IMAGE_FILE_MACHINE_SH3DSP	0x01a3
+#define	IMAGE_FILE_MACHINE_SH3E		0x01a4
+#define	IMAGE_FILE_MACHINE_SH4		0x01a6
+#define	IMAGE_FILE_MACHINE_SH5		0x01a8
+#define	IMAGE_FILE_MACHINE_THUMB	0x01c2
+#define	IMAGE_FILE_MACHINE_WCEMIPSV2	0x0169
+
+/* flags */
+#define IMAGE_FILE_RELOCS_STRIPPED           0x0001
+#define IMAGE_FILE_EXECUTABLE_IMAGE          0x0002
+#define IMAGE_FILE_LINE_NUMS_STRIPPED        0x0004
+#define IMAGE_FILE_LOCAL_SYMS_STRIPPED       0x0008
+#define IMAGE_FILE_AGGRESSIVE_WS_TRIM        0x0010
+#define IMAGE_FILE_LARGE_ADDRESS_AWARE       0x0020
+#define IMAGE_FILE_16BIT_MACHINE             0x0040
+#define IMAGE_FILE_BYTES_REVERSED_LO         0x0080
+#define IMAGE_FILE_32BIT_MACHINE             0x0100
+#define IMAGE_FILE_DEBUG_STRIPPED            0x0200
+#define IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP   0x0400
+#define IMAGE_FILE_NET_RUN_FROM_SWAP         0x0800
+#define IMAGE_FILE_SYSTEM                    0x1000
+#define IMAGE_FILE_DLL                       0x2000
+#define IMAGE_FILE_UP_SYSTEM_ONLY            0x4000
+#define IMAGE_FILE_BYTES_REVERSED_HI         0x8000
+
+struct pe_hdr {
+	uint32_t magic;		/* PE magic */
+	uint16_t machine;	/* machine type */
+	uint16_t sections;	/* number of sections */
+	uint32_t timestamp;	/* time_t */
+	uint32_t symbol_table;	/* symbol table offset */
+	uint32_t symbols;	/* number of symbols */
+	uint16_t opt_hdr_size;	/* size of optional header */
+	uint16_t flags;		/* flags */
+};
+
+#define IMAGE_FILE_OPT_ROM_MAGIC	0x107
+#define IMAGE_FILE_OPT_PE32_MAGIC	0x10b
+#define IMAGE_FILE_OPT_PE32_PLUS_MAGIC	0x20b
+
+#define IMAGE_SUBSYSTEM_UNKNOWN			 0
+#define IMAGE_SUBSYSTEM_NATIVE			 1
+#define IMAGE_SUBSYSTEM_WINDOWS_GUI		 2
+#define IMAGE_SUBSYSTEM_WINDOWS_CUI		 3
+#define IMAGE_SUBSYSTEM_POSIX_CUI		 7
+#define IMAGE_SUBSYSTEM_WINDOWS_CE_GUI		 9
+#define IMAGE_SUBSYSTEM_EFI_APPLICATION		10
+#define IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER	11
+#define IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER	12
+#define IMAGE_SUBSYSTEM_EFI_ROM_IMAGE		13
+#define IMAGE_SUBSYSTEM_XBOX			14
+
+#define IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE          0x0040
+#define IMAGE_DLL_CHARACTERISTICS_FORCE_INTEGRITY       0x0080
+#define IMAGE_DLL_CHARACTERISTICS_NX_COMPAT             0x0100
+#define IMAGE_DLLCHARACTERISTICS_NO_ISOLATION           0x0200
+#define IMAGE_DLLCHARACTERISTICS_NO_SEH                 0x0400
+#define IMAGE_DLLCHARACTERISTICS_NO_BIND                0x0800
+#define IMAGE_DLLCHARACTERISTICS_WDM_DRIVER             0x2000
+#define IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE  0x8000
+
+/* the fact that pe32 isn't padded where pe32+ is 64-bit means union won't
+ * work right.  vomit. */
+struct pe32_opt_hdr {
+	/* "standard" header */
+	uint16_t magic;		/* file type */
+	uint8_t  ld_major;	/* linker major version */
+	uint8_t  ld_minor;	/* linker minor version */
+	uint32_t text_size;	/* size of text section(s) */
+	uint32_t data_size;	/* size of data section(s) */
+	uint32_t bss_size;	/* size of bss section(s) */
+	uint32_t entry_point;	/* file offset of entry point */
+	uint32_t code_base;	/* relative code addr in ram */
+	uint32_t data_base;	/* relative data addr in ram */
+	/* "windows" header */
+	uint32_t image_base;	/* preferred load address */
+	uint32_t section_align;	/* alignment in bytes */
+	uint32_t file_align;	/* file alignment in bytes */
+	uint16_t os_major;	/* major OS version */
+	uint16_t os_minor;	/* minor OS version */
+	uint16_t image_major;	/* major image version */
+	uint16_t image_minor;	/* minor image version */
+	uint16_t subsys_major;	/* major subsystem version */
+	uint16_t subsys_minor;	/* minor subsystem version */
+	uint32_t win32_version;	/* reserved, must be 0 */
+	uint32_t image_size;	/* image size */
+	uint32_t header_size;	/* header size rounded up to
+				   file_align */
+	uint32_t csum;		/* checksum */
+	uint16_t subsys;	/* subsystem */
+	uint16_t dll_flags;	/* more flags! */
+	uint32_t stack_size_req;/* amt of stack requested */
+	uint32_t stack_size;	/* amt of stack required */
+	uint32_t heap_size_req;	/* amt of heap requested */
+	uint32_t heap_size;	/* amt of heap required */
+	uint32_t loader_flags;	/* reserved, must be 0 */
+	uint32_t data_dirs;	/* number of data dir entries */
+};
+
+struct pe32plus_opt_hdr {
+	uint16_t magic;		/* file type */
+	uint8_t  ld_major;	/* linker major version */
+	uint8_t  ld_minor;	/* linker minor version */
+	uint32_t text_size;	/* size of text section(s) */
+	uint32_t data_size;	/* size of data section(s) */
+	uint32_t bss_size;	/* size of bss section(s) */
+	uint32_t entry_point;	/* file offset of entry point */
+	uint32_t code_base;	/* relative code addr in ram */
+	/* "windows" header */
+	uint64_t image_base;	/* preferred load address */
+	uint32_t section_align;	/* alignment in bytes */
+	uint32_t file_align;	/* file alignment in bytes */
+	uint16_t os_major;	/* major OS version */
+	uint16_t os_minor;	/* minor OS version */
+	uint16_t image_major;	/* major image version */
+	uint16_t image_minor;	/* minor image version */
+	uint16_t subsys_major;	/* major subsystem version */
+	uint16_t subsys_minor;	/* minor subsystem version */
+	uint32_t win32_version;	/* reserved, must be 0 */
+	uint32_t image_size;	/* image size */
+	uint32_t header_size;	/* header size rounded up to
+				   file_align */
+	uint32_t csum;		/* checksum */
+	uint16_t subsys;	/* subsystem */
+	uint16_t dll_flags;	/* more flags! */
+	uint64_t stack_size_req;/* amt of stack requested */
+	uint64_t stack_size;	/* amt of stack required */
+	uint64_t heap_size_req;	/* amt of heap requested */
+	uint64_t heap_size;	/* amt of heap required */
+	uint32_t loader_flags;	/* reserved, must be 0 */
+	uint32_t data_dirs;	/* number of data dir entries */
+};
+
+struct data_dirent {
+	uint32_t virtual_address;	/* relative to load address */
+	uint32_t size;
+};
+
+struct data_directory {
+	struct data_dirent exports;		/* .edata */
+	struct data_dirent imports;		/* .idata */
+	struct data_dirent resources;		/* .rsrc */
+	struct data_dirent exceptions;		/* .pdata */
+	struct data_dirent certs;		/* certs */
+	struct data_dirent base_relocations;	/* .reloc */
+	struct data_dirent debug;		/* .debug */
+	struct data_dirent arch;		/* reservered */
+	struct data_dirent global_ptr;		/* global pointer reg. Size=0 */
+	struct data_dirent tls;			/* .tls */
+	struct data_dirent load_config;		/* load configuration structure */
+	struct data_dirent bound_imports;	/* no idea */
+	struct data_dirent import_addrs;	/* import address table */
+	struct data_dirent delay_imports;	/* delay-load import table */
+	struct data_dirent clr_runtime_hdr;	/* .cor (object only) */
+	struct data_dirent reserved;
+};
+
+struct section_header {
+	char name[8];			/* name or "/12\0" string tbl offset */
+	uint32_t virtual_size;		/* size of loaded section in ram */
+	uint32_t virtual_address;	/* relative virtual address */
+	uint32_t raw_data_size;		/* size of the section */
+	uint32_t data_addr;		/* file pointer to first page of sec */
+	uint32_t relocs;		/* file pointer to relocation entries */
+	uint32_t line_numbers;		/* line numbers! */
+	uint16_t num_relocs;		/* number of relocations */
+	uint16_t num_lin_numbers;	/* srsly. */
+	uint32_t flags;
+};
+
+/* they actually defined 0x00000000 as well, but I think we'll skip that one. */
+#define IMAGE_SCN_RESERVED_0	0x00000001
+#define IMAGE_SCN_RESERVED_1	0x00000002
+#define IMAGE_SCN_RESERVED_2	0x00000004
+#define IMAGE_SCN_TYPE_NO_PAD	0x00000008 /* don't pad - obsolete */
+#define IMAGE_SCN_RESERVED_3	0x00000010
+#define IMAGE_SCN_CNT_CODE	0x00000020 /* .text */
+#define IMAGE_SCN_CNT_INITIALIZED_DATA 0x00000040 /* .data */
+#define IMAGE_SCN_CNT_UNINITIALIZED_DATA 0x00000080 /* .bss */
+#define IMAGE_SCN_LNK_OTHER	0x00000100 /* reserved */
+#define IMAGE_SCN_LNK_INFO	0x00000200 /* .drectve comments */
+#define IMAGE_SCN_RESERVED_4	0x00000400
+#define IMAGE_SCN_LNK_REMOVE	0x00000800 /* .o only - scn to be rm'd*/
+#define IMAGE_SCN_LNK_COMDAT	0x00001000 /* .o only - COMDAT data */
+#define IMAGE_SCN_RESERVED_5	0x00002000 /* spec omits this */
+#define IMAGE_SCN_RESERVED_6	0x00004000 /* spec omits this */
+#define IMAGE_SCN_GPREL		0x00008000 /* global pointer referenced data */
+/* spec lists 0x20000 twice, I suspect they meant 0x10000 for one of them */
+#define IMAGE_SCN_MEM_PURGEABLE	0x00010000 /* reserved for "future" use */
+#define IMAGE_SCN_16BIT		0x00020000 /* reserved for "future" use */
+#define IMAGE_SCN_LOCKED	0x00040000 /* reserved for "future" use */
+#define IMAGE_SCN_PRELOAD	0x00080000 /* reserved for "future" use */
+/* and here they just stuck a 1-byte integer in the middle of a bitfield */
+#define IMAGE_SCN_ALIGN_1BYTES	0x00100000 /* it does what it says on the box */
+#define IMAGE_SCN_ALIGN_2BYTES	0x00200000
+#define IMAGE_SCN_ALIGN_4BYTES	0x00300000
+#define IMAGE_SCN_ALIGN_8BYTES	0x00400000
+#define IMAGE_SCN_ALIGN_16BYTES	0x00500000
+#define IMAGE_SCN_ALIGN_32BYTES	0x00600000
+#define IMAGE_SCN_ALIGN_64BYTES	0x00700000
+#define IMAGE_SCN_ALIGN_128BYTES 0x00800000
+#define IMAGE_SCN_ALIGN_256BYTES 0x00900000
+#define IMAGE_SCN_ALIGN_512BYTES 0x00a00000
+#define IMAGE_SCN_ALIGN_1024BYTES 0x00b00000
+#define IMAGE_SCN_ALIGN_2048BYTES 0x00c00000
+#define IMAGE_SCN_ALIGN_4096BYTES 0x00d00000
+#define IMAGE_SCN_ALIGN_8192BYTES 0x00e00000
+#define IMAGE_SCN_LNK_NRELOC_OVFL 0x01000000 /* extended relocations */
+#define IMAGE_SCN_MEM_DISCARDABLE 0x02000000 /* scn can be discarded */
+#define IMAGE_SCN_MEM_NOT_CACHED 0x04000000 /* cannot be cached */
+#define IMAGE_SCN_MEM_NOT_PAGED	0x08000000 /* not pageable */
+#define IMAGE_SCN_MEM_SHARED	0x10000000 /* can be shared */
+#define IMAGE_SCN_MEM_EXECUTE	0x20000000 /* can be executed as code */
+#define IMAGE_SCN_MEM_READ	0x40000000 /* readable */
+#define IMAGE_SCN_MEM_WRITE	0x80000000 /* writeable */
+
+enum x64_coff_reloc_type {
+	IMAGE_REL_AMD64_ABSOLUTE = 0,
+	IMAGE_REL_AMD64_ADDR64,
+	IMAGE_REL_AMD64_ADDR32,
+	IMAGE_REL_AMD64_ADDR32N,
+	IMAGE_REL_AMD64_REL32,
+	IMAGE_REL_AMD64_REL32_1,
+	IMAGE_REL_AMD64_REL32_2,
+	IMAGE_REL_AMD64_REL32_3,
+	IMAGE_REL_AMD64_REL32_4,
+	IMAGE_REL_AMD64_REL32_5,
+	IMAGE_REL_AMD64_SECTION,
+	IMAGE_REL_AMD64_SECREL,
+	IMAGE_REL_AMD64_SECREL7,
+	IMAGE_REL_AMD64_TOKEN,
+	IMAGE_REL_AMD64_SREL32,
+	IMAGE_REL_AMD64_PAIR,
+	IMAGE_REL_AMD64_SSPAN32,
+};
+
+enum arm_coff_reloc_type {
+	IMAGE_REL_ARM_ABSOLUTE,
+	IMAGE_REL_ARM_ADDR32,
+	IMAGE_REL_ARM_ADDR32N,
+	IMAGE_REL_ARM_BRANCH2,
+	IMAGE_REL_ARM_BRANCH1,
+	IMAGE_REL_ARM_SECTION,
+	IMAGE_REL_ARM_SECREL,
+};
+
+enum sh_coff_reloc_type {
+	IMAGE_REL_SH3_ABSOLUTE,
+	IMAGE_REL_SH3_DIRECT16,
+	IMAGE_REL_SH3_DIRECT32,
+	IMAGE_REL_SH3_DIRECT8,
+	IMAGE_REL_SH3_DIRECT8_WORD,
+	IMAGE_REL_SH3_DIRECT8_LONG,
+	IMAGE_REL_SH3_DIRECT4,
+	IMAGE_REL_SH3_DIRECT4_WORD,
+	IMAGE_REL_SH3_DIRECT4_LONG,
+	IMAGE_REL_SH3_PCREL8_WORD,
+	IMAGE_REL_SH3_PCREL8_LONG,
+	IMAGE_REL_SH3_PCREL12_WORD,
+	IMAGE_REL_SH3_STARTOF_SECTION,
+	IMAGE_REL_SH3_SIZEOF_SECTION,
+	IMAGE_REL_SH3_SECTION,
+	IMAGE_REL_SH3_SECREL,
+	IMAGE_REL_SH3_DIRECT32_NB,
+	IMAGE_REL_SH3_GPREL4_LONG,
+	IMAGE_REL_SH3_TOKEN,
+	IMAGE_REL_SHM_PCRELPT,
+	IMAGE_REL_SHM_REFLO,
+	IMAGE_REL_SHM_REFHALF,
+	IMAGE_REL_SHM_RELLO,
+	IMAGE_REL_SHM_RELHALF,
+	IMAGE_REL_SHM_PAIR,
+	IMAGE_REL_SHM_NOMODE,
+};
+
+enum ppc_coff_reloc_type {
+	IMAGE_REL_PPC_ABSOLUTE,
+	IMAGE_REL_PPC_ADDR64,
+	IMAGE_REL_PPC_ADDR32,
+	IMAGE_REL_PPC_ADDR24,
+	IMAGE_REL_PPC_ADDR16,
+	IMAGE_REL_PPC_ADDR14,
+	IMAGE_REL_PPC_REL24,
+	IMAGE_REL_PPC_REL14,
+	IMAGE_REL_PPC_ADDR32N,
+	IMAGE_REL_PPC_SECREL,
+	IMAGE_REL_PPC_SECTION,
+	IMAGE_REL_PPC_SECREL16,
+	IMAGE_REL_PPC_REFHI,
+	IMAGE_REL_PPC_REFLO,
+	IMAGE_REL_PPC_PAIR,
+	IMAGE_REL_PPC_SECRELLO,
+	IMAGE_REL_PPC_GPREL,
+	IMAGE_REL_PPC_TOKEN,
+};
+
+enum x86_coff_reloc_type {
+	IMAGE_REL_I386_ABSOLUTE,
+	IMAGE_REL_I386_DIR16,
+	IMAGE_REL_I386_REL16,
+	IMAGE_REL_I386_DIR32,
+	IMAGE_REL_I386_DIR32NB,
+	IMAGE_REL_I386_SEG12,
+	IMAGE_REL_I386_SECTION,
+	IMAGE_REL_I386_SECREL,
+	IMAGE_REL_I386_TOKEN,
+	IMAGE_REL_I386_SECREL7,
+	IMAGE_REL_I386_REL32,
+};
+
+enum ia64_coff_reloc_type {
+	IMAGE_REL_IA64_ABSOLUTE,
+	IMAGE_REL_IA64_IMM14,
+	IMAGE_REL_IA64_IMM22,
+	IMAGE_REL_IA64_IMM64,
+	IMAGE_REL_IA64_DIR32,
+	IMAGE_REL_IA64_DIR64,
+	IMAGE_REL_IA64_PCREL21B,
+	IMAGE_REL_IA64_PCREL21M,
+	IMAGE_REL_IA64_PCREL21F,
+	IMAGE_REL_IA64_GPREL22,
+	IMAGE_REL_IA64_LTOFF22,
+	IMAGE_REL_IA64_SECTION,
+	IMAGE_REL_IA64_SECREL22,
+	IMAGE_REL_IA64_SECREL64I,
+	IMAGE_REL_IA64_SECREL32,
+	IMAGE_REL_IA64_DIR32NB,
+	IMAGE_REL_IA64_SREL14,
+	IMAGE_REL_IA64_SREL22,
+	IMAGE_REL_IA64_SREL32,
+	IMAGE_REL_IA64_UREL32,
+	IMAGE_REL_IA64_PCREL60X,
+	IMAGE_REL_IA64_PCREL60B,
+	IMAGE_REL_IA64_PCREL60F,
+	IMAGE_REL_IA64_PCREL60I,
+	IMAGE_REL_IA64_PCREL60M,
+	IMAGE_REL_IA64_IMMGPREL6,
+	IMAGE_REL_IA64_TOKEN,
+	IMAGE_REL_IA64_GPREL32,
+	IMAGE_REL_IA64_ADDEND,
+};
+
+struct coff_reloc {
+	uint32_t virtual_address;
+	uint32_t symbol_table_index;
+	union {
+		enum x64_coff_reloc_type  x64_type;
+		enum arm_coff_reloc_type  arm_type;
+		enum sh_coff_reloc_type   sh_type;
+		enum ppc_coff_reloc_type  ppc_type;
+		enum x86_coff_reloc_type  x86_type;
+		enum ia64_coff_reloc_type ia64_type;
+		uint16_t data;
+	};
+};
+
+/*
+ * Definitions for the contents of the certs data block
+ */
+#define WIN_CERT_TYPE_PKCS_SIGNED_DATA	0x0002
+#define WIN_CERT_TYPE_EFI_OKCS115	0x0EF0
+#define WIN_CERT_TYPE_EFI_GUID		0x0EF1
+
+#define WIN_CERT_REVISION_1_0	0x0100
+#define WIN_CERT_REVISION_2_0	0x0200
+
+struct win_certificate {
+	uint32_t length;
+	uint16_t revision;
+	uint16_t cert_type;
+};
+
+#endif /* __LINUX_PE_H */
-- 
cgit v1.2.3-59-g8ed1b


From 26d1164be37f1145a96af15f294122876d8e5c77 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 1 Jul 2014 16:02:51 +0100
Subject: pefile: Parse a PE binary to find a key and a signature contained
 therein

Parse a PE binary to find a key and a signature contained therein.  Later
patches will check the signature and add the key if the signature checks out.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
---
 crypto/asymmetric_keys/Kconfig         |   9 ++
 crypto/asymmetric_keys/Makefile        |   8 ++
 crypto/asymmetric_keys/verify_pefile.c | 163 +++++++++++++++++++++++++++++++++
 crypto/asymmetric_keys/verify_pefile.h |  37 ++++++++
 include/linux/verify_pefile.h          |  18 ++++
 5 files changed, 235 insertions(+)
 create mode 100644 crypto/asymmetric_keys/verify_pefile.c
 create mode 100644 crypto/asymmetric_keys/verify_pefile.h
 create mode 100644 include/linux/verify_pefile.h

(limited to 'include/linux')

diff --git a/crypto/asymmetric_keys/Kconfig b/crypto/asymmetric_keys/Kconfig
index 14cac2860afa..ca41be5631c7 100644
--- a/crypto/asymmetric_keys/Kconfig
+++ b/crypto/asymmetric_keys/Kconfig
@@ -59,4 +59,13 @@ config PKCS7_TEST_KEY
 
 	  This is intended for testing the PKCS#7 parser.
 
+config SIGNED_PE_FILE_VERIFICATION
+	bool "Support for PE file signature verification"
+	depends on PKCS7_MESSAGE_PARSER=y
+	select ASN1
+	select OID_REGISTRY
+	help
+	  This option provides support for verifying the signature(s) on a
+	  signed PE binary.
+
 endif # ASYMMETRIC_KEY_TYPE
diff --git a/crypto/asymmetric_keys/Makefile b/crypto/asymmetric_keys/Makefile
index 92d0e9af24d5..3e4de5297015 100644
--- a/crypto/asymmetric_keys/Makefile
+++ b/crypto/asymmetric_keys/Makefile
@@ -47,3 +47,11 @@ clean-files	+= pkcs7-asn1.c pkcs7-asn1.h
 obj-$(CONFIG_PKCS7_TEST_KEY) += pkcs7_test_key.o
 pkcs7_test_key-y := \
 	pkcs7_key_type.o
+
+#
+# Signed PE binary-wrapped key handling
+#
+obj-$(CONFIG_SIGNED_PE_FILE_VERIFICATION) += verify_signed_pefile.o
+
+verify_signed_pefile-y := \
+	verify_pefile.o
diff --git a/crypto/asymmetric_keys/verify_pefile.c b/crypto/asymmetric_keys/verify_pefile.c
new file mode 100644
index 000000000000..aec7c509404e
--- /dev/null
+++ b/crypto/asymmetric_keys/verify_pefile.c
@@ -0,0 +1,163 @@
+/* Parse a signed PE binary
+ *
+ * Copyright (C) 2014 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) "PEFILE: "fmt
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/pe.h>
+#include <crypto/pkcs7.h>
+#include <crypto/hash.h>
+#include "verify_pefile.h"
+
+/*
+ * Parse a PE binary.
+ */
+static int pefile_parse_binary(const void *pebuf, unsigned int pelen,
+			       struct pefile_context *ctx)
+{
+	const struct mz_hdr *mz = pebuf;
+	const struct pe_hdr *pe;
+	const struct pe32_opt_hdr *pe32;
+	const struct pe32plus_opt_hdr *pe64;
+	const struct data_directory *ddir;
+	const struct data_dirent *dde;
+	const struct section_header *secs, *sec;
+	size_t cursor, datalen = pelen;
+
+	kenter("");
+
+#define chkaddr(base, x, s)						\
+	do {								\
+		if ((x) < base || (s) >= datalen || (x) > datalen - (s)) \
+			return -ELIBBAD;				\
+	} while (0)
+
+	chkaddr(0, 0, sizeof(*mz));
+	if (mz->magic != MZ_MAGIC)
+		return -ELIBBAD;
+	cursor = sizeof(*mz);
+
+	chkaddr(cursor, mz->peaddr, sizeof(*pe));
+	pe = pebuf + mz->peaddr;
+	if (pe->magic != PE_MAGIC)
+		return -ELIBBAD;
+	cursor = mz->peaddr + sizeof(*pe);
+
+	chkaddr(0, cursor, sizeof(pe32->magic));
+	pe32 = pebuf + cursor;
+	pe64 = pebuf + cursor;
+
+	switch (pe32->magic) {
+	case PE_OPT_MAGIC_PE32:
+		chkaddr(0, cursor, sizeof(*pe32));
+		ctx->image_checksum_offset =
+			(unsigned long)&pe32->csum - (unsigned long)pebuf;
+		ctx->header_size = pe32->header_size;
+		cursor += sizeof(*pe32);
+		ctx->n_data_dirents = pe32->data_dirs;
+		break;
+
+	case PE_OPT_MAGIC_PE32PLUS:
+		chkaddr(0, cursor, sizeof(*pe64));
+		ctx->image_checksum_offset =
+			(unsigned long)&pe64->csum - (unsigned long)pebuf;
+		ctx->header_size = pe64->header_size;
+		cursor += sizeof(*pe64);
+		ctx->n_data_dirents = pe64->data_dirs;
+		break;
+
+	default:
+		pr_debug("Unknown PEOPT magic = %04hx\n", pe32->magic);
+		return -ELIBBAD;
+	}
+
+	pr_debug("checksum @ %x\n", ctx->image_checksum_offset);
+	pr_debug("header size = %x\n", ctx->header_size);
+
+	if (cursor >= ctx->header_size || ctx->header_size >= datalen)
+		return -ELIBBAD;
+
+	if (ctx->n_data_dirents > (ctx->header_size - cursor) / sizeof(*dde))
+		return -ELIBBAD;
+
+	ddir = pebuf + cursor;
+	cursor += sizeof(*dde) * ctx->n_data_dirents;
+
+	ctx->cert_dirent_offset =
+		(unsigned long)&ddir->certs - (unsigned long)pebuf;
+	ctx->certs_size = ddir->certs.size;
+
+	if (!ddir->certs.virtual_address || !ddir->certs.size) {
+		pr_debug("Unsigned PE binary\n");
+		return -EKEYREJECTED;
+	}
+
+	chkaddr(ctx->header_size, ddir->certs.virtual_address,
+		ddir->certs.size);
+	ctx->sig_offset = ddir->certs.virtual_address;
+	ctx->sig_len = ddir->certs.size;
+	pr_debug("cert = %x @%x [%*ph]\n",
+		 ctx->sig_len, ctx->sig_offset,
+		 ctx->sig_len, pebuf + ctx->sig_offset);
+
+	ctx->n_sections = pe->sections;
+	if (ctx->n_sections > (ctx->header_size - cursor) / sizeof(*sec))
+		return -ELIBBAD;
+	ctx->secs = secs = pebuf + cursor;
+
+	return 0;
+}
+
+/**
+ * verify_pefile_signature - Verify the signature on a PE binary image
+ * @pebuf: Buffer containing the PE binary image
+ * @pelen: Length of the binary image
+ * @trust_keyring: Signing certificates to use as starting points
+ * @_trusted: Set to true if trustworth, false otherwise
+ *
+ * Validate that the certificate chain inside the PKCS#7 message inside the PE
+ * binary image intersects keys we already know and trust.
+ *
+ * Returns, in order of descending priority:
+ *
+ *  (*) -ELIBBAD if the image cannot be parsed, or:
+ *
+ *  (*) -EKEYREJECTED if a signature failed to match for which we have a valid
+ *	key, or:
+ *
+ *  (*) 0 if at least one signature chain intersects with the keys in the trust
+ *	keyring, or:
+ *
+ *  (*) -ENOPKG if a suitable crypto module couldn't be found for a check on a
+ *	chain.
+ *
+ *  (*) -ENOKEY if we couldn't find a match for any of the signature chains in
+ *	the message.
+ *
+ * May also return -ENOMEM.
+ */
+int verify_pefile_signature(const void *pebuf, unsigned pelen,
+			    struct key *trusted_keyring, bool *_trusted)
+{
+	struct pefile_context ctx;
+	int ret;
+
+	kenter("");
+
+	memset(&ctx, 0, sizeof(ctx));
+	ret = pefile_parse_binary(pebuf, pelen, &ctx);
+	if (ret < 0)
+		return ret;
+
+	return -ENOANO; // Not yet complete
+}
diff --git a/crypto/asymmetric_keys/verify_pefile.h b/crypto/asymmetric_keys/verify_pefile.h
new file mode 100644
index 000000000000..e165d23458d4
--- /dev/null
+++ b/crypto/asymmetric_keys/verify_pefile.h
@@ -0,0 +1,37 @@
+/* PE Binary parser bits
+ *
+ * Copyright (C) 2014 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/verify_pefile.h>
+#include <crypto/pkcs7.h>
+#include <crypto/hash_info.h>
+
+struct pefile_context {
+	unsigned	header_size;
+	unsigned	image_checksum_offset;
+	unsigned	cert_dirent_offset;
+	unsigned	n_data_dirents;
+	unsigned	n_sections;
+	unsigned	certs_size;
+	unsigned	sig_offset;
+	unsigned	sig_len;
+	const struct section_header *secs;
+	struct pkcs7_message *pkcs7;
+
+	/* PKCS#7 MS Individual Code Signing content */
+	const void	*digest;		/* Digest */
+	unsigned	digest_len;		/* Digest length */
+	enum hash_algo	digest_algo;		/* Digest algorithm */
+};
+
+#define kenter(FMT, ...)					\
+	pr_devel("==> %s("FMT")\n", __func__, ##__VA_ARGS__)
+#define kleave(FMT, ...) \
+	pr_devel("<== %s()"FMT"\n", __func__, ##__VA_ARGS__)
diff --git a/include/linux/verify_pefile.h b/include/linux/verify_pefile.h
new file mode 100644
index 000000000000..ac34819214f9
--- /dev/null
+++ b/include/linux/verify_pefile.h
@@ -0,0 +1,18 @@
+/* Signed PE file verification
+ *
+ * Copyright (C) 2014 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_VERIFY_PEFILE_H
+#define _LINUX_VERIFY_PEFILE_H
+
+extern int verify_pefile_signature(const void *pebuf, unsigned pelen,
+				   struct key *trusted_keyring, bool *_trusted);
+
+#endif /* _LINUX_VERIFY_PEFILE_H */
-- 
cgit v1.2.3-59-g8ed1b


From 4c0b4b1d1ae0cbc86f150e2905a1c3d2a17b7c1e Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 1 Jul 2014 16:02:52 +0100
Subject: pefile: Parse the "Microsoft individual code signing" data blob

The PKCS#7 certificate should contain a "Microsoft individual code signing"
data blob as its signed content.  This blob contains a digest of the signed
content of the PE binary and the OID of the digest algorithm used (typically
SHA256).

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
---
 crypto/asymmetric_keys/Makefile        |   9 ++-
 crypto/asymmetric_keys/mscode.asn1     |  28 ++++++++
 crypto/asymmetric_keys/mscode_parser.c | 120 +++++++++++++++++++++++++++++++++
 crypto/asymmetric_keys/verify_pefile.c |   7 ++
 crypto/asymmetric_keys/verify_pefile.h |   5 ++
 include/linux/oid_registry.h           |   7 +-
 6 files changed, 174 insertions(+), 2 deletions(-)
 create mode 100644 crypto/asymmetric_keys/mscode.asn1
 create mode 100644 crypto/asymmetric_keys/mscode_parser.c

(limited to 'include/linux')

diff --git a/crypto/asymmetric_keys/Makefile b/crypto/asymmetric_keys/Makefile
index 3e4de5297015..e47fcd9ac5e8 100644
--- a/crypto/asymmetric_keys/Makefile
+++ b/crypto/asymmetric_keys/Makefile
@@ -54,4 +54,11 @@ pkcs7_test_key-y := \
 obj-$(CONFIG_SIGNED_PE_FILE_VERIFICATION) += verify_signed_pefile.o
 
 verify_signed_pefile-y := \
-	verify_pefile.o
+	verify_pefile.o \
+	mscode_parser.o \
+	mscode-asn1.o
+
+$(obj)/mscode_parser.o: $(obj)/mscode-asn1.h $(obj)/mscode-asn1.h
+$(obj)/mscode-asn1.o: $(obj)/mscode-asn1.c $(obj)/mscode-asn1.h
+
+clean-files	+= mscode-asn1.c mscode-asn1.h
diff --git a/crypto/asymmetric_keys/mscode.asn1 b/crypto/asymmetric_keys/mscode.asn1
new file mode 100644
index 000000000000..6d09ba48c41c
--- /dev/null
+++ b/crypto/asymmetric_keys/mscode.asn1
@@ -0,0 +1,28 @@
+--- Microsoft individual code signing data blob parser
+---
+--- Copyright (C) 2012 Red Hat, Inc. All Rights Reserved.
+--- Written by David Howells (dhowells@redhat.com)
+---
+--- This program is free software; you can redistribute it and/or
+--- modify it under the terms of the GNU General Public Licence
+--- as published by the Free Software Foundation; either version
+--- 2 of the Licence, or (at your option) any later version.
+---
+
+MSCode ::= SEQUENCE {
+	type			SEQUENCE {
+		contentType	ContentType,
+		parameters	ANY
+	},
+	content			SEQUENCE {
+		digestAlgorithm	DigestAlgorithmIdentifier,
+		digest		OCTET STRING ({ mscode_note_digest })
+	}
+}
+
+ContentType ::= OBJECT IDENTIFIER ({ mscode_note_content_type })
+
+DigestAlgorithmIdentifier ::= SEQUENCE {
+	algorithm   OBJECT IDENTIFIER ({ mscode_note_digest_algo }),
+	parameters  ANY OPTIONAL
+}
diff --git a/crypto/asymmetric_keys/mscode_parser.c b/crypto/asymmetric_keys/mscode_parser.c
new file mode 100644
index 000000000000..09336c32b3d4
--- /dev/null
+++ b/crypto/asymmetric_keys/mscode_parser.c
@@ -0,0 +1,120 @@
+/* Parse a Microsoft Individual Code Signing blob
+ *
+ * Copyright (C) 2014 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) "MSCODE: "fmt
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/oid_registry.h>
+#include <crypto/pkcs7.h>
+#include "verify_pefile.h"
+#include "mscode-asn1.h"
+
+/*
+ * Parse a Microsoft Individual Code Signing blob
+ */
+int mscode_parse(struct pefile_context *ctx)
+{
+	const void *content_data;
+	size_t data_len;
+	int ret;
+
+	ret = pkcs7_get_content_data(ctx->pkcs7, &content_data, &data_len, 1);
+
+	if (ret) {
+		pr_debug("PKCS#7 message does not contain data\n");
+		return ret;
+	}
+
+	pr_devel("Data: %zu [%*ph]\n", data_len, (unsigned)(data_len),
+		 content_data);
+
+	return asn1_ber_decoder(&mscode_decoder, ctx, content_data, data_len);
+}
+
+/*
+ * Check the content type OID
+ */
+int mscode_note_content_type(void *context, size_t hdrlen,
+			     unsigned char tag,
+			     const void *value, size_t vlen)
+{
+	enum OID oid;
+
+	oid = look_up_OID(value, vlen);
+	if (oid == OID__NR) {
+		char buffer[50];
+
+		sprint_oid(value, vlen, buffer, sizeof(buffer));
+		pr_err("Unknown OID: %s\n", buffer);
+		return -EBADMSG;
+	}
+
+	if (oid != OID_msIndividualSPKeyPurpose) {
+		pr_err("Unexpected content type OID %u\n", oid);
+		return -EBADMSG;
+	}
+
+	return 0;
+}
+
+/*
+ * Note the digest algorithm OID
+ */
+int mscode_note_digest_algo(void *context, size_t hdrlen,
+			    unsigned char tag,
+			    const void *value, size_t vlen)
+{
+	struct pefile_context *ctx = context;
+	char buffer[50];
+	enum OID oid;
+
+	oid = look_up_OID(value, vlen);
+	switch (oid) {
+	case OID_md4:
+		ctx->digest_algo = HASH_ALGO_MD4;
+		break;
+	case OID_md5:
+		ctx->digest_algo = HASH_ALGO_MD5;
+		break;
+	case OID_sha1:
+		ctx->digest_algo = HASH_ALGO_SHA1;
+		break;
+	case OID_sha256:
+		ctx->digest_algo = HASH_ALGO_SHA256;
+		break;
+
+	case OID__NR:
+		sprint_oid(value, vlen, buffer, sizeof(buffer));
+		pr_err("Unknown OID: %s\n", buffer);
+		return -EBADMSG;
+
+	default:
+		pr_err("Unsupported content type: %u\n", oid);
+		return -ENOPKG;
+	}
+
+	return 0;
+}
+
+/*
+ * Note the digest we're guaranteeing with this certificate
+ */
+int mscode_note_digest(void *context, size_t hdrlen,
+		       unsigned char tag,
+		       const void *value, size_t vlen)
+{
+	struct pefile_context *ctx = context;
+
+	ctx->digest = value;
+	ctx->digest_len = vlen;
+	return 0;
+}
diff --git a/crypto/asymmetric_keys/verify_pefile.c b/crypto/asymmetric_keys/verify_pefile.c
index 13f3b44b5046..b975918e82d2 100644
--- a/crypto/asymmetric_keys/verify_pefile.c
+++ b/crypto/asymmetric_keys/verify_pefile.c
@@ -245,6 +245,13 @@ int verify_pefile_signature(const void *pebuf, unsigned pelen,
 		goto error;
 	}
 
+	ret = mscode_parse(&ctx);
+	if (ret < 0)
+		goto error;
+
+	pr_debug("Digest: %u [%*ph]\n",
+		 ctx.digest_len, ctx.digest_len, ctx.digest);
+
 	ret = -ENOANO; // Not yet complete
 
 error:
diff --git a/crypto/asymmetric_keys/verify_pefile.h b/crypto/asymmetric_keys/verify_pefile.h
index e165d23458d4..55d5f7ebc45a 100644
--- a/crypto/asymmetric_keys/verify_pefile.h
+++ b/crypto/asymmetric_keys/verify_pefile.h
@@ -35,3 +35,8 @@ struct pefile_context {
 	pr_devel("==> %s("FMT")\n", __func__, ##__VA_ARGS__)
 #define kleave(FMT, ...) \
 	pr_devel("<== %s()"FMT"\n", __func__, ##__VA_ARGS__)
+
+/*
+ * mscode_parser.c
+ */
+extern int mscode_parse(struct pefile_context *ctx);
diff --git a/include/linux/oid_registry.h b/include/linux/oid_registry.h
index edeff85cb1e8..c2bbf672b84e 100644
--- a/include/linux/oid_registry.h
+++ b/include/linux/oid_registry.h
@@ -52,8 +52,13 @@ enum OID {
 	OID_md4,			/* 1.2.840.113549.2.4 */
 	OID_md5,			/* 1.2.840.113549.2.5 */
 
-	OID_certAuthInfoAccess,		/* 1.3.6.1.5.5.7.1.1 */
+	/* Microsoft Authenticode & Software Publishing */
+	OID_msIndirectData,		/* 1.3.6.1.4.1.311.2.1.4 */
+	OID_msPeImageDataObjId,		/* 1.3.6.1.4.1.311.2.1.15 */
+	OID_msIndividualSPKeyPurpose,	/* 1.3.6.1.4.1.311.2.1.21 */
 	OID_msOutlookExpress,		/* 1.3.6.1.4.1.311.16.4 */
+
+	OID_certAuthInfoAccess,		/* 1.3.6.1.5.5.7.1.1 */
 	OID_sha1,			/* 1.3.14.3.2.26 */
 	OID_sha256,			/* 2.16.840.1.101.3.4.2.1 */
 
-- 
cgit v1.2.3-59-g8ed1b


From 51499aa912e0d6f5b3497e2b1121309f579e78ed Mon Sep 17 00:00:00 2001
From: Alexey Skidanov <Alexey.Skidanov@amd.com>
Date: Tue, 8 Jul 2014 17:30:17 +0300
Subject: iommu/amd: Moving PPR fault flags macros definitions

Any kernel source registering the invalid PPR calback may include the header file with PPR fault flags macros definitions.
Thus we move them to include/linux/amd-iommu.h

Signed-off-by: Alexey Skidanov <Alexey.Skidanov@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@amd.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd_iommu_types.h | 6 ------
 include/linux/amd-iommu.h       | 7 +++++++
 2 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index f1a5abf11acf..e5f1cd4728ed 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -390,12 +390,6 @@ struct amd_iommu_fault {
 
 };
 
-#define PPR_FAULT_EXEC	(1 << 1)
-#define PPR_FAULT_READ  (1 << 2)
-#define PPR_FAULT_WRITE (1 << 5)
-#define PPR_FAULT_USER  (1 << 6)
-#define PPR_FAULT_RSVD  (1 << 7)
-#define PPR_FAULT_GN    (1 << 8)
 
 struct iommu_domain;
 
diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h
index 15f6b9edd0b1..2b08e79f5100 100644
--- a/include/linux/amd-iommu.h
+++ b/include/linux/amd-iommu.h
@@ -119,6 +119,13 @@ typedef int (*amd_iommu_invalid_ppr_cb)(struct pci_dev *pdev,
 extern int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev,
 					amd_iommu_invalid_ppr_cb cb);
 
+#define PPR_FAULT_EXEC	(1 << 1)
+#define PPR_FAULT_READ  (1 << 2)
+#define PPR_FAULT_WRITE (1 << 5)
+#define PPR_FAULT_USER  (1 << 6)
+#define PPR_FAULT_RSVD  (1 << 7)
+#define PPR_FAULT_GN    (1 << 8)
+
 /**
  * amd_iommu_device_info() - Get information about IOMMUv2 support of a
  *			     PCI device
-- 
cgit v1.2.3-59-g8ed1b


From 7450e90bbb8d834c190cc8100d1cc41888358c7c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 9 Jul 2014 10:08:07 -0400
Subject: cgroup: remove CGRP_ROOT_OPTION_MASK

cgroup_root->flags only contains CGRP_ROOT_* flags and there's no
reason to mask the flags.  Remove CGRP_ROOT_OPTION_MASK.

This doesn't cause any behavior differences.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h | 3 ---
 kernel/cgroup.c        | 7 +++----
 2 files changed, 3 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 28853e771f3b..c4901c19668b 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -320,9 +320,6 @@ enum {
 
 	CGRP_ROOT_NOPREFIX	= (1 << 1), /* mounted subsystems have no named prefix */
 	CGRP_ROOT_XATTR		= (1 << 2), /* supports extended attributes */
-
-	/* mount options live below bit 16 */
-	CGRP_ROOT_OPTION_MASK	= (1 << 16) - 1,
 };
 
 /*
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index cd02e99d5d3b..5411fffa4b70 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1490,11 +1490,10 @@ static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data)
 	removed_mask = root->subsys_mask & ~opts.subsys_mask;
 
 	/* Don't allow flags or name to change at remount */
-	if (((opts.flags ^ root->flags) & CGRP_ROOT_OPTION_MASK) ||
+	if ((opts.flags ^ root->flags) ||
 	    (opts.name && strcmp(opts.name, root->name))) {
 		pr_err("option or name mismatch, new: 0x%x \"%s\", old: 0x%x \"%s\"\n",
-		       opts.flags & CGRP_ROOT_OPTION_MASK, opts.name ?: "",
-		       root->flags & CGRP_ROOT_OPTION_MASK, root->name);
+		       opts.flags, opts.name ?: "", root->flags, root->name);
 		ret = -EINVAL;
 		goto out_unlock;
 	}
@@ -1762,7 +1761,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
 			goto out_unlock;
 		}
 
-		if ((root->flags ^ opts.flags) & CGRP_ROOT_OPTION_MASK) {
+		if (root->flags ^ opts.flags) {
 			if ((root->flags | opts.flags) & CGRP_ROOT_SANE_BEHAVIOR) {
 				pr_err("sane_behavior: new mount options should match the existing superblock\n");
 				ret = -EINVAL;
-- 
cgit v1.2.3-59-g8ed1b


From aa6ec29bee8692ce232132f1a1ea2a1f9196610e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 9 Jul 2014 10:08:08 -0400
Subject: cgroup: remove sane_behavior support on non-default hierarchies

sane_behavior has been used as a development vehicle for the default
unified hierarchy.  Now that the default hierarchy is in place, the
flag became redundant and confusing as its usage is allowed on all
hierarchies.  There are gonna be either the default hierarchy or
legacy ones.  Let's make that clear by removing sane_behavior support
on non-default hierarchies.

This patch replaces cgroup_sane_behavior() with cgroup_on_dfl().  The
comment on top of CGRP_ROOT_SANE_BEHAVIOR is moved to on top of
cgroup_on_dfl() with sane_behavior specific part dropped.

On the default and legacy hierarchies w/o sane_behavior, this
shouldn't cause any behavior differences.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
Acked-by: Li Zefan <lizefan@huawei.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
---
 block/blk-throttle.c   |   6 +--
 include/linux/cgroup.h | 125 +++++++++++++++++++++----------------------------
 kernel/cgroup.c        |  19 ++++----
 kernel/cpuset.c        |  33 ++++++-------
 mm/memcontrol.c        |   7 +--
 5 files changed, 86 insertions(+), 104 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 3fdb21a390c1..9273d0969ebd 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -412,13 +412,13 @@ static void throtl_pd_init(struct blkcg_gq *blkg)
 	int rw;
 
 	/*
-	 * If sane_hierarchy is enabled, we switch to properly hierarchical
+	 * If on the default hierarchy, we switch to properly hierarchical
 	 * behavior where limits on a given throtl_grp are applied to the
 	 * whole subtree rather than just the group itself.  e.g. If 16M
 	 * read_bps limit is set on the root group, the whole system can't
 	 * exceed 16M for the device.
 	 *
-	 * If sane_hierarchy is not enabled, the broken flat hierarchy
+	 * If not on the default hierarchy, the broken flat hierarchy
 	 * behavior is retained where all throtl_grps are treated as if
 	 * they're all separate root groups right below throtl_data.
 	 * Limits of a group don't interact with limits of other groups
@@ -426,7 +426,7 @@ static void throtl_pd_init(struct blkcg_gq *blkg)
 	 */
 	parent_sq = &td->service_queue;
 
-	if (cgroup_sane_behavior(blkg->blkcg->css.cgroup) && blkg->parent)
+	if (cgroup_on_dfl(blkg->blkcg->css.cgroup) && blkg->parent)
 		parent_sq = &blkg_to_tg(blkg->parent)->service_queue;
 
 	throtl_service_queue_init(&tg->service_queue, parent_sq);
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index c4901c19668b..7bb274487c89 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -256,68 +256,7 @@ struct cgroup {
 
 /* cgroup_root->flags */
 enum {
-	/*
-	 * Unfortunately, cgroup core and various controllers are riddled
-	 * with idiosyncrasies and pointless options.  The following flag,
-	 * when set, will force sane behavior - some options are forced on,
-	 * others are disallowed, and some controllers will change their
-	 * hierarchical or other behaviors.
-	 *
-	 * The set of behaviors affected by this flag are still being
-	 * determined and developed and the mount option for this flag is
-	 * prefixed with __DEVEL__.  The prefix will be dropped once we
-	 * reach the point where all behaviors are compatible with the
-	 * planned unified hierarchy, which will automatically turn on this
-	 * flag.
-	 *
-	 * The followings are the behaviors currently affected this flag.
-	 *
-	 * - Mount options "noprefix", "xattr", "clone_children",
-	 *   "release_agent" and "name" are disallowed.
-	 *
-	 * - When mounting an existing superblock, mount options should
-	 *   match.
-	 *
-	 * - Remount is disallowed.
-	 *
-	 * - rename(2) is disallowed.
-	 *
-	 * - "tasks" is removed.  Everything should be at process
-	 *   granularity.  Use "cgroup.procs" instead.
-	 *
-	 * - "cgroup.procs" is not sorted.  pids will be unique unless they
-	 *   got recycled inbetween reads.
-	 *
-	 * - "release_agent" and "notify_on_release" are removed.
-	 *   Replacement notification mechanism will be implemented.
-	 *
-	 * - "cgroup.clone_children" is removed.
-	 *
-	 * - "cgroup.subtree_populated" is available.  Its value is 0 if
-	 *   the cgroup and its descendants contain no task; otherwise, 1.
-	 *   The file also generates kernfs notification which can be
-	 *   monitored through poll and [di]notify when the value of the
-	 *   file changes.
-	 *
-	 * - If mount is requested with sane_behavior but without any
-	 *   subsystem, the default unified hierarchy is mounted.
-	 *
-	 * - cpuset: tasks will be kept in empty cpusets when hotplug happens
-	 *   and take masks of ancestors with non-empty cpus/mems, instead of
-	 *   being moved to an ancestor.
-	 *
-	 * - cpuset: a task can be moved into an empty cpuset, and again it
-	 *   takes masks of ancestors.
-	 *
-	 * - memcg: use_hierarchy is on by default and the cgroup file for
-	 *   the flag is not created.
-	 *
-	 * - blkcg: blk-throttle becomes properly hierarchical.
-	 *
-	 * - debug: disallowed on the default hierarchy.
-	 */
-	CGRP_ROOT_SANE_BEHAVIOR	= (1 << 0),
-
+	CGRP_ROOT_SANE_BEHAVIOR	= (1 << 0), /* __DEVEL__sane_behavior specified */
 	CGRP_ROOT_NOPREFIX	= (1 << 1), /* mounted subsystems have no named prefix */
 	CGRP_ROOT_XATTR		= (1 << 2), /* supports extended attributes */
 };
@@ -531,20 +470,64 @@ struct cftype {
 extern struct cgroup_root cgrp_dfl_root;
 extern struct css_set init_css_set;
 
+/**
+ * cgroup_on_dfl - test whether a cgroup is on the default hierarchy
+ * @cgrp: the cgroup of interest
+ *
+ * The default hierarchy is the v2 interface of cgroup and this function
+ * can be used to test whether a cgroup is on the default hierarchy for
+ * cases where a subsystem should behave differnetly depending on the
+ * interface version.
+ *
+ * The set of behaviors which change on the default hierarchy are still
+ * being determined and the mount option is prefixed with __DEVEL__.
+ *
+ * List of changed behaviors:
+ *
+ * - Mount options "noprefix", "xattr", "clone_children", "release_agent"
+ *   and "name" are disallowed.
+ *
+ * - When mounting an existing superblock, mount options should match.
+ *
+ * - Remount is disallowed.
+ *
+ * - rename(2) is disallowed.
+ *
+ * - "tasks" is removed.  Everything should be at process granularity.  Use
+ *   "cgroup.procs" instead.
+ *
+ * - "cgroup.procs" is not sorted.  pids will be unique unless they got
+ *   recycled inbetween reads.
+ *
+ * - "release_agent" and "notify_on_release" are removed.  Replacement
+ *   notification mechanism will be implemented.
+ *
+ * - "cgroup.clone_children" is removed.
+ *
+ * - "cgroup.subtree_populated" is available.  Its value is 0 if the cgroup
+ *   and its descendants contain no task; otherwise, 1.  The file also
+ *   generates kernfs notification which can be monitored through poll and
+ *   [di]notify when the value of the file changes.
+ *
+ * - cpuset: tasks will be kept in empty cpusets when hotplug happens and
+ *   take masks of ancestors with non-empty cpus/mems, instead of being
+ *   moved to an ancestor.
+ *
+ * - cpuset: a task can be moved into an empty cpuset, and again it takes
+ *   masks of ancestors.
+ *
+ * - memcg: use_hierarchy is on by default and the cgroup file for the flag
+ *   is not created.
+ *
+ * - blkcg: blk-throttle becomes properly hierarchical.
+ *
+ * - debug: disallowed on the default hierarchy.
+ */
 static inline bool cgroup_on_dfl(const struct cgroup *cgrp)
 {
 	return cgrp->root == &cgrp_dfl_root;
 }
 
-/*
- * See the comment above CGRP_ROOT_SANE_BEHAVIOR for details.  This
- * function can be called as long as @cgrp is accessible.
- */
-static inline bool cgroup_sane_behavior(const struct cgroup *cgrp)
-{
-	return cgrp->root->flags & CGRP_ROOT_SANE_BEHAVIOR;
-}
-
 /* no synchronization, the result can only be used as a hint */
 static inline bool cgroup_has_tasks(struct cgroup *cgrp)
 {
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 0ea54af6b133..fb07c6d43aff 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1470,8 +1470,8 @@ static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data)
 	struct cgroup_sb_opts opts;
 	unsigned int added_mask, removed_mask;
 
-	if (root->flags & CGRP_ROOT_SANE_BEHAVIOR) {
-		pr_err("sane_behavior: remount is not allowed\n");
+	if (root == &cgrp_dfl_root) {
+		pr_err("remount is not allowed\n");
 		return -EINVAL;
 	}
 
@@ -2943,9 +2943,9 @@ static int cgroup_rename(struct kernfs_node *kn, struct kernfs_node *new_parent,
 
 	/*
 	 * This isn't a proper migration and its usefulness is very
-	 * limited.  Disallow if sane_behavior.
+	 * limited.  Disallow on the default hierarchy.
 	 */
-	if (cgroup_sane_behavior(cgrp))
+	if (cgroup_on_dfl(cgrp))
 		return -EPERM;
 
 	/*
@@ -3031,7 +3031,7 @@ static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[],
 		/* does cft->flags tell us to skip this file on @cgrp? */
 		if ((cft->flags & CFTYPE_ONLY_ON_DFL) && !cgroup_on_dfl(cgrp))
 			continue;
-		if ((cft->flags & CFTYPE_INSANE) && cgroup_sane_behavior(cgrp))
+		if ((cft->flags & CFTYPE_INSANE) && cgroup_on_dfl(cgrp))
 			continue;
 		if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgroup_parent(cgrp))
 			continue;
@@ -3764,8 +3764,9 @@ after:
  *
  * All this extra complexity was caused by the original implementation
  * committing to an entirely unnecessary property.  In the long term, we
- * want to do away with it.  Explicitly scramble sort order if
- * sane_behavior so that no such expectation exists in the new interface.
+ * want to do away with it.  Explicitly scramble sort order if on the
+ * default hierarchy so that no such expectation exists in the new
+ * interface.
  *
  * Scrambling is done by swapping every two consecutive bits, which is
  * non-identity one-to-one mapping which disturbs sort order sufficiently.
@@ -3780,7 +3781,7 @@ static pid_t pid_fry(pid_t pid)
 
 static pid_t cgroup_pid_fry(struct cgroup *cgrp, pid_t pid)
 {
-	if (cgroup_sane_behavior(cgrp))
+	if (cgroup_on_dfl(cgrp))
 		return pid_fry(pid);
 	else
 		return pid;
@@ -3883,7 +3884,7 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
 	css_task_iter_end(&it);
 	length = n;
 	/* now sort & (if procs) strip out duplicates */
-	if (cgroup_sane_behavior(cgrp))
+	if (cgroup_on_dfl(cgrp))
 		sort(array, length, sizeof(pid_t), fried_cmppid, NULL);
 	else
 		sort(array, length, sizeof(pid_t), cmppid, NULL);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index f6b33c696224..f9d4807c869f 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1383,12 +1383,9 @@ static int cpuset_can_attach(struct cgroup_subsys_state *css,
 
 	mutex_lock(&cpuset_mutex);
 
-	/*
-	 * We allow to move tasks into an empty cpuset if sane_behavior
-	 * flag is set.
-	 */
+	/* allow moving tasks into an empty cpuset if on default hierarchy */
 	ret = -ENOSPC;
-	if (!cgroup_sane_behavior(css->cgroup) &&
+	if (!cgroup_on_dfl(css->cgroup) &&
 	    (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)))
 		goto out_unlock;
 
@@ -2030,7 +2027,7 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs)
 	static cpumask_t off_cpus;
 	static nodemask_t off_mems;
 	bool is_empty;
-	bool sane = cgroup_sane_behavior(cs->css.cgroup);
+	bool on_dfl = cgroup_on_dfl(cs->css.cgroup);
 
 retry:
 	wait_event(cpuset_attach_wq, cs->attach_in_progress == 0);
@@ -2054,12 +2051,12 @@ retry:
 	mutex_unlock(&callback_mutex);
 
 	/*
-	 * If sane_behavior flag is set, we need to update tasks' cpumask
-	 * for empty cpuset to take on ancestor's cpumask. Otherwise, don't
-	 * call update_tasks_cpumask() if the cpuset becomes empty, as
-	 * the tasks in it will be migrated to an ancestor.
+	 * If on_dfl, we need to update tasks' cpumask for empty cpuset to
+	 * take on ancestor's cpumask. Otherwise, don't call
+	 * update_tasks_cpumask() if the cpuset becomes empty, as the tasks
+	 * in it will be migrated to an ancestor.
 	 */
-	if ((sane && cpumask_empty(cs->cpus_allowed)) ||
+	if ((on_dfl && cpumask_empty(cs->cpus_allowed)) ||
 	    (!cpumask_empty(&off_cpus) && !cpumask_empty(cs->cpus_allowed)))
 		update_tasks_cpumask(cs);
 
@@ -2068,12 +2065,12 @@ retry:
 	mutex_unlock(&callback_mutex);
 
 	/*
-	 * If sane_behavior flag is set, we need to update tasks' nodemask
-	 * for empty cpuset to take on ancestor's nodemask. Otherwise, don't
-	 * call update_tasks_nodemask() if the cpuset becomes empty, as
-	 * the tasks in it will be migratd to an ancestor.
+	 * If on_dfl, we need to update tasks' nodemask for empty cpuset to
+	 * take on ancestor's nodemask. Otherwise, don't call
+	 * update_tasks_nodemask() if the cpuset becomes empty, as the
+	 * tasks in it will be migratd to an ancestor.
 	 */
-	if ((sane && nodes_empty(cs->mems_allowed)) ||
+	if ((on_dfl && nodes_empty(cs->mems_allowed)) ||
 	    (!nodes_empty(off_mems) && !nodes_empty(cs->mems_allowed)))
 		update_tasks_nodemask(cs);
 
@@ -2083,13 +2080,13 @@ retry:
 	mutex_unlock(&cpuset_mutex);
 
 	/*
-	 * If sane_behavior flag is set, we'll keep tasks in empty cpusets.
+	 * If on_dfl, we'll keep tasks in empty cpusets.
 	 *
 	 * Otherwise move tasks to the nearest ancestor with execution
 	 * resources.  This is full cgroup operation which will
 	 * also call back into cpuset.  Should be done outside any lock.
 	 */
-	if (!sane && is_empty)
+	if (!on_dfl && is_empty)
 		remove_tasks_in_empty_cpuset(cs);
 }
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index db536e90c8ee..a2a4bd69a7ae 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -7024,16 +7024,17 @@ static void mem_cgroup_move_task(struct cgroup_subsys_state *css,
 
 /*
  * Cgroup retains root cgroups across [un]mount cycles making it necessary
- * to verify sane_behavior flag on each mount attempt.
+ * to verify whether we're attached to the default hierarchy on each mount
+ * attempt.
  */
 static void mem_cgroup_bind(struct cgroup_subsys_state *root_css)
 {
 	/*
-	 * use_hierarchy is forced with sane_behavior.  cgroup core
+	 * use_hierarchy is forced on the default hierarchy.  cgroup core
 	 * guarantees that @root doesn't have any children, so turning it
 	 * on for the root memcg is enough.
 	 */
-	if (cgroup_sane_behavior(root_css->cgroup))
+	if (cgroup_on_dfl(root_css->cgroup))
 		mem_cgroup_from_css(root_css)->use_hierarchy = true;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 01a0f4aaaefff9f57bb17e6cc514c84ba43a7335 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Wed, 2 Jul 2014 14:34:13 +0100
Subject: mfd: tps65910: Rid data size incompatibility warn when building for
 64bit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Extinguishes:

../drivers/mfd/tps65910.c: In function ‘tps65910_parse_dt’:
../drivers/mfd/tps65910.c:404:14:
	warning: cast from pointer to integer of different size

Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/tps65910.c       | 10 +++++-----
 include/linux/mfd/tps65910.h |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/tps65910.c b/drivers/mfd/tps65910.c
index f9e42ea1cb1a..f243e75d28f3 100644
--- a/drivers/mfd/tps65910.c
+++ b/drivers/mfd/tps65910.c
@@ -387,7 +387,7 @@ static const struct of_device_id tps65910_of_match[] = {
 MODULE_DEVICE_TABLE(of, tps65910_of_match);
 
 static struct tps65910_board *tps65910_parse_dt(struct i2c_client *client,
-						int *chip_id)
+						unsigned long *chip_id)
 {
 	struct device_node *np = client->dev.of_node;
 	struct tps65910_board *board_info;
@@ -401,7 +401,7 @@ static struct tps65910_board *tps65910_parse_dt(struct i2c_client *client,
 		return NULL;
 	}
 
-	*chip_id  = (int)match->data;
+	*chip_id  = (unsigned long)match->data;
 
 	board_info = devm_kzalloc(&client->dev, sizeof(*board_info),
 			GFP_KERNEL);
@@ -431,7 +431,7 @@ static struct tps65910_board *tps65910_parse_dt(struct i2c_client *client,
 #else
 static inline
 struct tps65910_board *tps65910_parse_dt(struct i2c_client *client,
-					 int *chip_id)
+					 unsigned long *chip_id)
 {
 	return NULL;
 }
@@ -453,14 +453,14 @@ static void tps65910_power_off(void)
 }
 
 static int tps65910_i2c_probe(struct i2c_client *i2c,
-					const struct i2c_device_id *id)
+			      const struct i2c_device_id *id)
 {
 	struct tps65910 *tps65910;
 	struct tps65910_board *pmic_plat_data;
 	struct tps65910_board *of_pmic_plat_data = NULL;
 	struct tps65910_platform_data *init_data;
+	unsigned long chip_id = id->driver_data;
 	int ret = 0;
-	int chip_id = id->driver_data;
 
 	pmic_plat_data = dev_get_platdata(&i2c->dev);
 
diff --git a/include/linux/mfd/tps65910.h b/include/linux/mfd/tps65910.h
index 16c2335c2856..6483a6fdce59 100644
--- a/include/linux/mfd/tps65910.h
+++ b/include/linux/mfd/tps65910.h
@@ -892,7 +892,7 @@ struct tps65910 {
 	struct device *dev;
 	struct i2c_client *i2c_client;
 	struct regmap *regmap;
-	unsigned int id;
+	unsigned long id;
 
 	/* Client devices */
 	struct tps65910_pmic *pmic;
-- 
cgit v1.2.3-59-g8ed1b


From f27bc4873fa8b75cc1eba7b641eda7375dc72ccf Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sun, 4 May 2014 15:38:38 -0700
Subject: rcu: Document deadlock-avoidance information for rcu_read_unlock()

Reported-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 include/linux/rcupdate.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 6a94cc8b1ca0..c56ad15204ec 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -858,6 +858,34 @@ static inline void rcu_read_lock(void)
 /**
  * rcu_read_unlock() - marks the end of an RCU read-side critical section.
  *
+ * In most situations, rcu_read_unlock() is immune from deadlock.
+ * However, in kernels built with CONFIG_RCU_BOOST, rcu_read_unlock()
+ * is responsible for deboosting, which it does via rt_mutex_unlock().
+ * Unfortunately, this function acquires the scheduler's runqueue and
+ * priority-inheritance spinlocks.  This means that deadlock could result
+ * if the caller of rcu_read_unlock() already holds one of these locks or
+ * any lock that is ever acquired while holding them.
+ *
+ * That said, RCU readers are never priority boosted unless they were
+ * preempted.  Therefore, one way to avoid deadlock is to make sure
+ * that preemption never happens within any RCU read-side critical
+ * section whose outermost rcu_read_unlock() is called with one of
+ * rt_mutex_unlock()'s locks held.  Such preemption can be avoided in
+ * a number of ways, for example, by invoking preempt_disable() before
+ * critical section's outermost rcu_read_lock().
+ *
+ * Given that the set of locks acquired by rt_mutex_unlock() might change
+ * at any time, a somewhat more future-proofed approach is to make sure
+ * that that preemption never happens within any RCU read-side critical
+ * section whose outermost rcu_read_unlock() is called with irqs disabled.
+ * This approach relies on the fact that rt_mutex_unlock() currently only
+ * acquires irq-disabled locks.
+ *
+ * The second of these two approaches is best in most situations,
+ * however, the first approach can also be useful, at least to those
+ * developers willing to keep abreast of the set of locks acquired by
+ * rt_mutex_unlock().
+ *
  * See rcu_read_lock() for more information.
  */
 static inline void rcu_read_unlock(void)
-- 
cgit v1.2.3-59-g8ed1b


From ab74fdfd4e11ec040f21cf87edc14fc9f62cc934 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sun, 4 May 2014 15:41:21 -0700
Subject: rcu: Handle obsolete references to TINY_PREEMPT_RCU

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 include/linux/rcupdate.h | 17 ++++++++---------
 init/Kconfig             |  2 +-
 2 files changed, 9 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index c56ad15204ec..d231aa17b1d7 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -826,15 +826,14 @@ static inline void rcu_preempt_sleep_check(void)
  * read-side critical section that would block in a !PREEMPT kernel.
  * But if you want the full story, read on!
  *
- * In non-preemptible RCU implementations (TREE_RCU and TINY_RCU), it
- * is illegal to block while in an RCU read-side critical section.  In
- * preemptible RCU implementations (TREE_PREEMPT_RCU and TINY_PREEMPT_RCU)
- * in CONFIG_PREEMPT kernel builds, RCU read-side critical sections may
- * be preempted, but explicit blocking is illegal.  Finally, in preemptible
- * RCU implementations in real-time (with -rt patchset) kernel builds,
- * RCU read-side critical sections may be preempted and they may also
- * block, but only when acquiring spinlocks that are subject to priority
- * inheritance.
+ * In non-preemptible RCU implementations (TREE_RCU and TINY_RCU),
+ * it is illegal to block while in an RCU read-side critical section.
+ * In preemptible RCU implementations (TREE_PREEMPT_RCU) in CONFIG_PREEMPT
+ * kernel builds, RCU read-side critical sections may be preempted,
+ * but explicit blocking is illegal.  Finally, in preemptible RCU
+ * implementations in real-time (with -rt patchset) kernel builds, RCU
+ * read-side critical sections may be preempted and they may also block, but
+ * only when acquiring spinlocks that are subject to priority inheritance.
  */
 static inline void rcu_read_lock(void)
 {
diff --git a/init/Kconfig b/init/Kconfig
index 9d76b99af1b9..977b37806e95 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -505,7 +505,7 @@ config PREEMPT_RCU
 	def_bool TREE_PREEMPT_RCU
 	help
 	  This option enables preemptible-RCU code that is common between
-	  the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations.
+	  TREE_PREEMPT_RCU and, in the old days, TINY_PREEMPT_RCU.
 
 config RCU_STALL_COMMON
 	def_bool ( TREE_RCU || TREE_PREEMPT_RCU || RCU_TRACE )
-- 
cgit v1.2.3-59-g8ed1b


From abaa93d9e1de2c29297e69ddba8ddd38f15064cf Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 12 Jun 2014 13:30:25 -0700
Subject: rcu: Simplify priority boosting by putting rt_mutex in rcu_node

RCU priority boosting currently checks for boosting via a pointer in
task_struct.  However, this is not needed: As Oleg noted, if the
rt_mutex is placed in the rcu_node instead of on the booster's stack,
the boostee can simply check it see if it owns the lock.  This commit
makes this change, shrinking task_struct by one pointer and the kernel
by thirteen lines.

Suggested-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/init_task.h |  9 +--------
 include/linux/sched.h     |  6 ------
 kernel/rcu/tree.h         |  3 +++
 kernel/rcu/tree_plugin.h  | 25 +++++++++++--------------
 4 files changed, 15 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 6df7f9fe0d01..2bb4c4f3531a 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -102,12 +102,6 @@ extern struct group_info init_groups;
 #define INIT_IDS
 #endif
 
-#ifdef CONFIG_RCU_BOOST
-#define INIT_TASK_RCU_BOOST()						\
-	.rcu_boost_mutex = NULL,
-#else
-#define INIT_TASK_RCU_BOOST()
-#endif
 #ifdef CONFIG_TREE_PREEMPT_RCU
 #define INIT_TASK_RCU_TREE_PREEMPT()					\
 	.rcu_blocked_node = NULL,
@@ -119,8 +113,7 @@ extern struct group_info init_groups;
 	.rcu_read_lock_nesting = 0,					\
 	.rcu_read_unlock_special = 0,					\
 	.rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry),		\
-	INIT_TASK_RCU_TREE_PREEMPT()					\
-	INIT_TASK_RCU_BOOST()
+	INIT_TASK_RCU_TREE_PREEMPT()
 #else
 #define INIT_TASK_RCU_PREEMPT(tsk)
 #endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 306f4f0c987a..3cfbc05e66e6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1270,9 +1270,6 @@ struct task_struct {
 #ifdef CONFIG_TREE_PREEMPT_RCU
 	struct rcu_node *rcu_blocked_node;
 #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
-#ifdef CONFIG_RCU_BOOST
-	struct rt_mutex *rcu_boost_mutex;
-#endif /* #ifdef CONFIG_RCU_BOOST */
 
 #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
 	struct sched_info sched_info;
@@ -2009,9 +2006,6 @@ static inline void rcu_copy_process(struct task_struct *p)
 #ifdef CONFIG_TREE_PREEMPT_RCU
 	p->rcu_blocked_node = NULL;
 #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
-#ifdef CONFIG_RCU_BOOST
-	p->rcu_boost_mutex = NULL;
-#endif /* #ifdef CONFIG_RCU_BOOST */
 	INIT_LIST_HEAD(&p->rcu_node_entry);
 }
 
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 3eeb919e26a2..60fb0eaa2d16 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -177,6 +177,9 @@ struct rcu_node {
 				/*  to carry out the boosting is fully */
 				/*  released with no future boostee accesses */
 				/*  before that rt_mutex is re-initialized. */
+	struct rt_mutex boost_mtx;
+				/* Used only for the priority-boosting */
+				/*  side effect, not as a lock. */
 	unsigned long boost_time;
 				/* When to start boosting (jiffies). */
 	struct task_struct *boost_kthread_task;
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 9c811879d31e..719587af7b10 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -33,6 +33,7 @@
 #define RCU_KTHREAD_PRIO 1
 
 #ifdef CONFIG_RCU_BOOST
+#include "../locking/rtmutex_common.h"
 #define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO
 #else
 #define RCU_BOOST_PRIO RCU_KTHREAD_PRIO
@@ -336,7 +337,7 @@ void rcu_read_unlock_special(struct task_struct *t)
 	unsigned long flags;
 	struct list_head *np;
 #ifdef CONFIG_RCU_BOOST
-	struct rt_mutex *rbmp = NULL;
+	bool drop_boost_mutex = false;
 #endif /* #ifdef CONFIG_RCU_BOOST */
 	struct rcu_node *rnp;
 	int special;
@@ -398,11 +399,8 @@ void rcu_read_unlock_special(struct task_struct *t)
 #ifdef CONFIG_RCU_BOOST
 		if (&t->rcu_node_entry == rnp->boost_tasks)
 			rnp->boost_tasks = np;
-		/* Snapshot/clear ->rcu_boost_mutex with rcu_node lock held. */
-		if (t->rcu_boost_mutex) {
-			rbmp = t->rcu_boost_mutex;
-			t->rcu_boost_mutex = NULL;
-		}
+		/* Snapshot ->boost_mtx ownership with rcu_node lock held. */
+		drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx) == t;
 #endif /* #ifdef CONFIG_RCU_BOOST */
 
 		/*
@@ -427,8 +425,8 @@ void rcu_read_unlock_special(struct task_struct *t)
 
 #ifdef CONFIG_RCU_BOOST
 		/* Unboost if we were boosted. */
-		if (rbmp) {
-			rt_mutex_unlock(rbmp);
+		if (drop_boost_mutex) {
+			rt_mutex_unlock(&rnp->boost_mtx);
 			complete(&rnp->boost_completion);
 		}
 #endif /* #ifdef CONFIG_RCU_BOOST */
@@ -1151,7 +1149,6 @@ static void rcu_wake_cond(struct task_struct *t, int status)
 static int rcu_boost(struct rcu_node *rnp)
 {
 	unsigned long flags;
-	struct rt_mutex mtx;
 	struct task_struct *t;
 	struct list_head *tb;
 
@@ -1202,14 +1199,14 @@ static int rcu_boost(struct rcu_node *rnp)
 	 * section.
 	 */
 	t = container_of(tb, struct task_struct, rcu_node_entry);
-	rt_mutex_init_proxy_locked(&mtx, t);
-	t->rcu_boost_mutex = &mtx;
+	rt_mutex_init_proxy_locked(&rnp->boost_mtx, t);
 	init_completion(&rnp->boost_completion);
 	raw_spin_unlock_irqrestore(&rnp->lock, flags);
-	rt_mutex_lock(&mtx);  /* Side effect: boosts task t's priority. */
-	rt_mutex_unlock(&mtx);  /* Keep lockdep happy. */
+	/* Lock only for side effect: boosts task t's priority. */
+	rt_mutex_lock(&rnp->boost_mtx);
+	rt_mutex_unlock(&rnp->boost_mtx);  /* Then keep lockdep happy. */
 
-	/* Wait until boostee is done accessing mtx before reinitializing. */
+	/* Wait for boostee to be done w/boost_mtx before reinitializing. */
 	wait_for_completion(&rnp->boost_completion);
 
 	return ACCESS_ONCE(rnp->exp_tasks) != NULL ||
-- 
cgit v1.2.3-59-g8ed1b


From c0f489d2c6fec8994c642c2ec925eb858727dc7b Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 4 Jun 2014 13:46:03 -0700
Subject: rcu: Bind grace-period kthreads to non-NO_HZ_FULL CPUs

Binding the grace-period kthreads to the timekeeping CPU resulted in
significant performance decreases for some workloads.  For more detail,
see:

https://lkml.org/lkml/2014/6/3/395 for benchmark numbers

https://lkml.org/lkml/2014/6/4/218 for CPU statistics

It turns out that it is necessary to bind the grace-period kthreads
to the timekeeping CPU only when all but CPU 0 is a nohz_full CPU
on the one hand or if CONFIG_NO_HZ_FULL_SYSIDLE=y on the other.
In other cases, it suffices to bind the grace-period kthreads to the
set of non-nohz_full CPUs.

This commit therefore creates a tick_nohz_not_full_mask that is the
complement of tick_nohz_full_mask, and then binds the grace-period
kthread to the set of CPUs indicated by this new mask, which covers
the CONFIG_NO_HZ_FULL_SYSIDLE=n case.  The CONFIG_NO_HZ_FULL_SYSIDLE=y
case still binds the grace-period kthreads to the timekeeping CPU.
This commit also includes the tick_nohz_full_enabled() check suggested
by Frederic Weisbecker.

Reported-by: Jet Chen <jet.chen@intel.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
[ paulmck: Created housekeeping_affine() and housekeeping_mask per
  fweisbec feedback. ]
---
 include/linux/tick.h     | 20 ++++++++++++++++++++
 kernel/rcu/tree_plugin.h | 14 +++++++++-----
 kernel/time/tick-sched.c | 10 ++++++++++
 3 files changed, 39 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tick.h b/include/linux/tick.h
index b84773cb9f4c..06cc093ab7ad 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -12,6 +12,7 @@
 #include <linux/hrtimer.h>
 #include <linux/context_tracking_state.h>
 #include <linux/cpumask.h>
+#include <linux/sched.h>
 
 #ifdef CONFIG_GENERIC_CLOCKEVENTS
 
@@ -162,6 +163,7 @@ static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; }
 #ifdef CONFIG_NO_HZ_FULL
 extern bool tick_nohz_full_running;
 extern cpumask_var_t tick_nohz_full_mask;
+extern cpumask_var_t housekeeping_mask;
 
 static inline bool tick_nohz_full_enabled(void)
 {
@@ -194,6 +196,24 @@ static inline void tick_nohz_full_kick_all(void) { }
 static inline void __tick_nohz_task_switch(struct task_struct *tsk) { }
 #endif
 
+static inline bool is_housekeeping_cpu(int cpu)
+{
+#ifdef CONFIG_NO_HZ_FULL
+	if (tick_nohz_full_enabled())
+		return cpumask_test_cpu(cpu, housekeeping_mask);
+#endif
+	return true;
+}
+
+static inline void housekeeping_affine(struct task_struct *t)
+{
+#ifdef CONFIG_NO_HZ_FULL
+	if (tick_nohz_full_enabled())
+		set_cpus_allowed_ptr(t, housekeeping_mask);
+
+#endif
+}
+
 static inline void tick_nohz_full_check(void)
 {
 	if (tick_nohz_full_enabled())
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 719587af7b10..b39ba7239bd6 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -2846,12 +2846,16 @@ static bool rcu_nohz_full_cpu(struct rcu_state *rsp)
  */
 static void rcu_bind_gp_kthread(void)
 {
-#ifdef CONFIG_NO_HZ_FULL
-	int cpu = tick_do_timer_cpu;
+	int __maybe_unused cpu;
 
-	if (cpu < 0 || cpu >= nr_cpu_ids)
+	if (!tick_nohz_full_enabled())
 		return;
-	if (raw_smp_processor_id() != cpu)
+#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
+	cpu = tick_do_timer_cpu;
+	if (cpu >= 0 && cpu < nr_cpu_ids && raw_smp_processor_id() != cpu)
 		set_cpus_allowed_ptr(current, cpumask_of(cpu));
-#endif /* #ifdef CONFIG_NO_HZ_FULL */
+#else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
+	if (!is_housekeeping_cpu(raw_smp_processor_id()))
+		housekeeping_affine(current);
+#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
 }
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 6558b7ac112d..f784d83e29f1 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -154,6 +154,7 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
 
 #ifdef CONFIG_NO_HZ_FULL
 cpumask_var_t tick_nohz_full_mask;
+cpumask_var_t housekeeping_mask;
 bool tick_nohz_full_running;
 
 static bool can_stop_full_tick(void)
@@ -281,6 +282,7 @@ static int __init tick_nohz_full_setup(char *str)
 	int cpu;
 
 	alloc_bootmem_cpumask_var(&tick_nohz_full_mask);
+	alloc_bootmem_cpumask_var(&housekeeping_mask);
 	if (cpulist_parse(str, tick_nohz_full_mask) < 0) {
 		pr_warning("NOHZ: Incorrect nohz_full cpumask\n");
 		return 1;
@@ -291,6 +293,8 @@ static int __init tick_nohz_full_setup(char *str)
 		pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu);
 		cpumask_clear_cpu(cpu, tick_nohz_full_mask);
 	}
+	cpumask_andnot(housekeeping_mask,
+		       cpu_possible_mask, tick_nohz_full_mask);
 	tick_nohz_full_running = true;
 
 	return 1;
@@ -332,9 +336,15 @@ static int tick_nohz_init_all(void)
 		pr_err("NO_HZ: Can't allocate full dynticks cpumask\n");
 		return err;
 	}
+	if (!alloc_cpumask_var(&housekeeping_mask, GFP_KERNEL)) {
+		pr_err("NO_HZ: Can't allocate not-full dynticks cpumask\n");
+		return err;
+	}
 	err = 0;
 	cpumask_setall(tick_nohz_full_mask);
 	cpumask_clear_cpu(smp_processor_id(), tick_nohz_full_mask);
+	cpumask_clear(housekeeping_mask);
+	cpumask_set_cpu(smp_processor_id(), housekeeping_mask);
 	tick_nohz_full_running = true;
 #endif
 	return err;
-- 
cgit v1.2.3-59-g8ed1b


From 20f6fdd01c2c0de9cc1109083222edded24c5350 Mon Sep 17 00:00:00 2001
From: Pratyush Anand <pratyush.anand@st.com>
Date: Fri, 4 Jul 2014 17:01:25 +0300
Subject: xhci: Platform: Set xhci lpm support quirk based on platform data

If an xhci platform supports USB3 LPM capability then enable
XHCI_LPM_SUPPORT quirk flag.

Signed-off-by: Pratyush Anand <pratyush.anand@st.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/devicetree/bindings/usb/usb-xhci.txt |  3 ++-
 drivers/usb/host/xhci-plat.c                       |  6 +++++
 include/linux/usb/xhci_pdriver.h                   | 27 ++++++++++++++++++++++
 3 files changed, 35 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/usb/xhci_pdriver.h

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/usb/usb-xhci.txt b/Documentation/devicetree/bindings/usb/usb-xhci.txt
index 5a79377c6a96..86f67f0886bc 100644
--- a/Documentation/devicetree/bindings/usb/usb-xhci.txt
+++ b/Documentation/devicetree/bindings/usb/usb-xhci.txt
@@ -9,8 +9,9 @@ Required properties:
     register set for the device.
   - interrupts: one XHCI interrupt should be described here.
 
-Optional property:
+Optional properties:
   - clocks: reference to a clock
+  - usb3-lpm-capable: determines if platform is USB3 LPM capable
 
 Example:
 	usb@f0931000 {
diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c
index a4ccd0eb793e..b17459d3fcc8 100644
--- a/drivers/usb/host/xhci-plat.c
+++ b/drivers/usb/host/xhci-plat.c
@@ -17,6 +17,7 @@
 #include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
+#include <linux/usb/xhci_pdriver.h>
 
 #include "xhci.h"
 #include "xhci-mvebu.h"
@@ -97,6 +98,8 @@ static const struct hc_driver xhci_plat_xhci_driver = {
 
 static int xhci_plat_probe(struct platform_device *pdev)
 {
+	struct device_node	*node = pdev->dev.of_node;
+	struct usb_xhci_pdata	*pdata = dev_get_platdata(&pdev->dev);
 	const struct hc_driver	*driver;
 	struct xhci_hcd		*xhci;
 	struct resource         *res;
@@ -185,6 +188,9 @@ static int xhci_plat_probe(struct platform_device *pdev)
 		goto dealloc_usb2_hcd;
 	}
 
+	if ((node && of_property_read_bool(node, "usb3-lpm-capable")) ||
+			(pdata && pdata->usb3_lpm_capable))
+		xhci->quirks |= XHCI_LPM_SUPPORT;
 	/*
 	 * Set the xHCI pointer before xhci_plat_setup() (aka hcd_driver.reset)
 	 * is called by usb_add_hcd().
diff --git a/include/linux/usb/xhci_pdriver.h b/include/linux/usb/xhci_pdriver.h
new file mode 100644
index 000000000000..376654b5b0f7
--- /dev/null
+++ b/include/linux/usb/xhci_pdriver.h
@@ -0,0 +1,27 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ */
+
+#ifndef __USB_CORE_XHCI_PDRIVER_H
+#define __USB_CORE_XHCI_PDRIVER_H
+
+/**
+ * struct usb_xhci_pdata - platform_data for generic xhci platform driver
+ *
+ * @usb3_lpm_capable:	determines if this xhci platform supports USB3
+ *			LPM capability
+ *
+ */
+struct usb_xhci_pdata {
+	unsigned	usb3_lpm_capable:1;
+};
+
+#endif /* __USB_CORE_XHCI_PDRIVER_H */
-- 
cgit v1.2.3-59-g8ed1b


From 8fe39aac0578cbb0abf27e1be70ff581e0c1d836 Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Fri, 22 Nov 2013 13:22:13 +0100
Subject: drbd: device->ldev is not guaranteed on an D_ATTACHING disk

Some parts of the code assumed that get_ldev_if_state(device, D_ATTACHING)
is sufficient to access the ldev member of the device object. That was
wrong. ldev may not be there or might be freed at any time if the device
has a disk state of D_ATTACHING.

bm_rw()
  Documented that drbd_bm_read() is only called from drbd_adm_attach.
  drbd_bm_write() is only called when a reference is held, and it is
  documented that a caller has to hold a reference before calling
  drbd_bm_write()

drbd_bm_write_page()
  Use get_ldev() instead of get_ldev_if_state(device, D_ATTACHING)

drbd_bmio_set_n_write()
  No longer use get_ldev_if_state(device, D_ATTACHING). All callers
  hold a reference to ldev now.

drbd_bmio_clear_n_write()
  All callers where holding a reference of ldev anyways. Remove the
  misleading get_ldev_if_state(device, D_ATTACHING)

drbd_reconsider_max_bio_size()
  Removed the get_ldev_if_state(device, D_ATTACHING). All callers
  now pass a struct drbd_backing_dev* when they have a proper
  reference, or a NULL pointer.
  Before this fix, the receiver could trigger a NULL pointer
  deref when in drbd_reconsider_max_bio_size()

drbd_bump_write_ordering()
  Used get_ldev_if_state(device, D_ATTACHING) with the wrong assumption.
  Remove it, and allow the caller to pass in a struct drbd_backing_dev*
  when the caller knows that accessing this bdev is safe.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_bitmap.c   |  4 +++-
 drivers/block/drbd/drbd_int.h      |  9 ++++----
 drivers/block/drbd/drbd_main.c     | 36 +++++++++++++------------------
 drivers/block/drbd/drbd_nl.c       | 41 +++++++++++++++++++++++-------------
 drivers/block/drbd/drbd_receiver.c | 43 ++++++++++++++++++++++++++------------
 include/linux/drbd.h               |  2 +-
 6 files changed, 79 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 1aa29f8fdfe1..ed310415020b 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -1085,6 +1085,8 @@ static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned la
 		kfree(ctx);
 		return -ENODEV;
 	}
+	/* Here D_ATTACHING is sufficient since drbd_bm_read() is called only from
+	   drbd_adm_attach(), after device->ldev was assigned. */
 
 	if (!ctx->flags)
 		WARN_ON(!(BM_LOCKED_MASK & b->bm_flags));
@@ -1260,7 +1262,7 @@ int drbd_bm_write_page(struct drbd_device *device, unsigned int idx) __must_hold
 		.kref = { ATOMIC_INIT(2) },
 	};
 
-	if (!get_ldev_if_state(device, D_ATTACHING)) {  /* put is in bm_aio_ctx_destroy() */
+	if (!get_ldev(device)) {  /* put is in bm_aio_ctx_destroy() */
 		drbd_err(device, "ASSERT FAILED: get_ldev_if_state() == 1 in drbd_bm_write_page()\n");
 		kfree(ctx);
 		return -ENODEV;
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 1ef2474e8f11..c87bc8e8fd82 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -984,8 +984,8 @@ extern int drbd_bitmap_io(struct drbd_device *device,
 extern int drbd_bitmap_io_from_worker(struct drbd_device *device,
 		int (*io_fn)(struct drbd_device *),
 		char *why, enum bm_flag flags);
-extern int drbd_bmio_set_n_write(struct drbd_device *device);
-extern int drbd_bmio_clear_n_write(struct drbd_device *device);
+extern int drbd_bmio_set_n_write(struct drbd_device *device) __must_hold(local);
+extern int drbd_bmio_clear_n_write(struct drbd_device *device) __must_hold(local);
 extern void drbd_ldev_destroy(struct drbd_device *device);
 
 /* Meta data layout
@@ -1313,7 +1313,7 @@ enum determine_dev_size {
 extern enum determine_dev_size
 drbd_determine_dev_size(struct drbd_device *, enum dds_flags, struct resize_parms *) __must_hold(local);
 extern void resync_after_online_grow(struct drbd_device *);
-extern void drbd_reconsider_max_bio_size(struct drbd_device *device);
+extern void drbd_reconsider_max_bio_size(struct drbd_device *device, struct drbd_backing_dev *bdev);
 extern enum drbd_state_rv drbd_set_role(struct drbd_device *device,
 					enum drbd_role new_role,
 					int force);
@@ -1479,7 +1479,8 @@ static inline void drbd_generic_make_request(struct drbd_device *device,
 		generic_make_request(bio);
 }
 
-void drbd_bump_write_ordering(struct drbd_resource *resource, enum write_ordering_e wo);
+void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev,
+			      enum write_ordering_e wo);
 
 /* drbd_proc.c */
 extern struct proc_dir_entry *drbd_proc;
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 17b9a237f2e6..a6af93528d57 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -3466,23 +3466,19 @@ void drbd_uuid_set_bm(struct drbd_device *device, u64 val) __must_hold(local)
  *
  * Sets all bits in the bitmap and writes the whole bitmap to stable storage.
  */
-int drbd_bmio_set_n_write(struct drbd_device *device)
+int drbd_bmio_set_n_write(struct drbd_device *device) __must_hold(local)
 {
 	int rv = -EIO;
 
-	if (get_ldev_if_state(device, D_ATTACHING)) {
-		drbd_md_set_flag(device, MDF_FULL_SYNC);
-		drbd_md_sync(device);
-		drbd_bm_set_all(device);
-
-		rv = drbd_bm_write(device);
+	drbd_md_set_flag(device, MDF_FULL_SYNC);
+	drbd_md_sync(device);
+	drbd_bm_set_all(device);
 
-		if (!rv) {
-			drbd_md_clear_flag(device, MDF_FULL_SYNC);
-			drbd_md_sync(device);
-		}
+	rv = drbd_bm_write(device);
 
-		put_ldev(device);
+	if (!rv) {
+		drbd_md_clear_flag(device, MDF_FULL_SYNC);
+		drbd_md_sync(device);
 	}
 
 	return rv;
@@ -3494,18 +3490,11 @@ int drbd_bmio_set_n_write(struct drbd_device *device)
  *
  * Clears all bits in the bitmap and writes the whole bitmap to stable storage.
  */
-int drbd_bmio_clear_n_write(struct drbd_device *device)
+int drbd_bmio_clear_n_write(struct drbd_device *device) __must_hold(local)
 {
-	int rv = -EIO;
-
 	drbd_resume_al(device);
-	if (get_ldev_if_state(device, D_ATTACHING)) {
-		drbd_bm_clear_all(device);
-		rv = drbd_bm_write(device);
-		put_ldev(device);
-	}
-
-	return rv;
+	drbd_bm_clear_all(device);
+	return drbd_bm_write(device);
 }
 
 static int w_bitmap_io(struct drbd_work *w, int unused)
@@ -3603,6 +3592,9 @@ static int w_go_diskless(struct drbd_work *w, int unused)
  * that drbd_set_out_of_sync() can not be called. This function MAY ONLY be
  * called from worker context. It MUST NOT be used while a previous such
  * work is still pending!
+ *
+ * Its worker function encloses the call of io_fn() by get_ldev() and
+ * put_ldev().
  */
 void drbd_queue_bitmap_io(struct drbd_device *device,
 			  int (*io_fn)(struct drbd_device *),
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 43fad2c1ba01..25f4b6f67c21 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1110,15 +1110,16 @@ static int drbd_check_al_size(struct drbd_device *device, struct disk_conf *dc)
 	return 0;
 }
 
-static void drbd_setup_queue_param(struct drbd_device *device, unsigned int max_bio_size)
+static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backing_dev *bdev,
+				   unsigned int max_bio_size)
 {
 	struct request_queue * const q = device->rq_queue;
 	unsigned int max_hw_sectors = max_bio_size >> 9;
 	unsigned int max_segments = 0;
 	struct request_queue *b = NULL;
 
-	if (get_ldev_if_state(device, D_ATTACHING)) {
-		b = device->ldev->backing_bdev->bd_disk->queue;
+	if (bdev) {
+		b = bdev->backing_bdev->bd_disk->queue;
 
 		max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
 		rcu_read_lock();
@@ -1163,11 +1164,10 @@ static void drbd_setup_queue_param(struct drbd_device *device, unsigned int max_
 				 b->backing_dev_info.ra_pages);
 			q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
 		}
-		put_ldev(device);
 	}
 }
 
-void drbd_reconsider_max_bio_size(struct drbd_device *device)
+void drbd_reconsider_max_bio_size(struct drbd_device *device, struct drbd_backing_dev *bdev)
 {
 	unsigned int now, new, local, peer;
 
@@ -1175,10 +1175,9 @@ void drbd_reconsider_max_bio_size(struct drbd_device *device)
 	local = device->local_max_bio_size; /* Eventually last known value, from volatile memory */
 	peer = device->peer_max_bio_size; /* Eventually last known value, from meta data */
 
-	if (get_ldev_if_state(device, D_ATTACHING)) {
-		local = queue_max_hw_sectors(device->ldev->backing_bdev->bd_disk->queue) << 9;
+	if (bdev) {
+		local = queue_max_hw_sectors(bdev->backing_bdev->bd_disk->queue) << 9;
 		device->local_max_bio_size = local;
-		put_ldev(device);
 	}
 	local = min(local, DRBD_MAX_BIO_SIZE);
 
@@ -1211,7 +1210,7 @@ void drbd_reconsider_max_bio_size(struct drbd_device *device)
 	if (new != now)
 		drbd_info(device, "max BIO size = %u\n", new);
 
-	drbd_setup_queue_param(device, new);
+	drbd_setup_queue_param(device, bdev, new);
 }
 
 /* Starts the worker thread */
@@ -1399,7 +1398,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
 	else
 		set_bit(MD_NO_FUA, &device->flags);
 
-	drbd_bump_write_ordering(device->resource, WO_bdev_flush);
+	drbd_bump_write_ordering(device->resource, NULL, WO_bdev_flush);
 
 	drbd_md_sync(device);
 
@@ -1704,7 +1703,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	new_disk_conf = NULL;
 	new_plan = NULL;
 
-	drbd_bump_write_ordering(device->resource, WO_bdev_flush);
+	drbd_bump_write_ordering(device->resource, device->ldev, WO_bdev_flush);
 
 	if (drbd_md_test_flag(device->ldev, MDF_CRASHED_PRIMARY))
 		set_bit(CRASHED_PRIMARY, &device->flags);
@@ -1720,7 +1719,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	device->read_cnt = 0;
 	device->writ_cnt = 0;
 
-	drbd_reconsider_max_bio_size(device);
+	drbd_reconsider_max_bio_size(device, device->ldev);
 
 	/* If I am currently not R_PRIMARY,
 	 * but meta data primary indicator is set,
@@ -2648,8 +2647,13 @@ int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
 	if (retcode != NO_ERROR)
 		goto out;
 
-	mutex_lock(&adm_ctx.resource->adm_mutex);
 	device = adm_ctx.device;
+	if (!get_ldev(device)) {
+		retcode = ERR_NO_DISK;
+		goto out;
+	}
+
+	mutex_lock(&adm_ctx.resource->adm_mutex);
 
 	/* If there is still bitmap IO pending, probably because of a previous
 	 * resync just being finished, wait for it before requesting a new resync.
@@ -2673,6 +2677,7 @@ int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
 		retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_T));
 	drbd_resume_io(device);
 	mutex_unlock(&adm_ctx.resource->adm_mutex);
+	put_ldev(device);
 out:
 	drbd_adm_finish(&adm_ctx, info, retcode);
 	return 0;
@@ -2698,7 +2703,7 @@ out:
 	return 0;
 }
 
-static int drbd_bmio_set_susp_al(struct drbd_device *device)
+static int drbd_bmio_set_susp_al(struct drbd_device *device) __must_hold(local)
 {
 	int rv;
 
@@ -2719,8 +2724,13 @@ int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
 	if (retcode != NO_ERROR)
 		goto out;
 
-	mutex_lock(&adm_ctx.resource->adm_mutex);
 	device = adm_ctx.device;
+	if (!get_ldev(device)) {
+		retcode = ERR_NO_DISK;
+		goto out;
+	}
+
+	mutex_lock(&adm_ctx.resource->adm_mutex);
 
 	/* If there is still bitmap IO pending, probably because of a previous
 	 * resync just being finished, wait for it before requesting a new resync.
@@ -2747,6 +2757,7 @@ int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
 		retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_S));
 	drbd_resume_io(device);
 	mutex_unlock(&adm_ctx.resource->adm_mutex);
+	put_ldev(device);
 out:
 	drbd_adm_finish(&adm_ctx, info, retcode);
 	return 0;
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index c7084188c2ae..be0c3761cdc6 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1168,7 +1168,7 @@ static void drbd_flush(struct drbd_connection *connection)
 				/* would rather check on EOPNOTSUPP, but that is not reliable.
 				 * don't try again for ANY return value != 0
 				 * if (rv == -EOPNOTSUPP) */
-				drbd_bump_write_ordering(connection->resource, WO_drain_io);
+				drbd_bump_write_ordering(connection->resource, NULL, WO_drain_io);
 			}
 			put_ldev(device);
 			kref_put(&device->kref, drbd_destroy_device);
@@ -1257,14 +1257,29 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connectio
 	return rv;
 }
 
+static enum write_ordering_e
+max_allowed_wo(struct drbd_backing_dev *bdev, enum write_ordering_e wo)
+{
+	struct disk_conf *dc;
+
+	dc = rcu_dereference(bdev->disk_conf);
+
+	if (wo == WO_bdev_flush && !dc->disk_flushes)
+		wo = WO_drain_io;
+	if (wo == WO_drain_io && !dc->disk_drain)
+		wo = WO_none;
+
+	return wo;
+}
+
 /**
  * drbd_bump_write_ordering() - Fall back to an other write ordering method
  * @connection:	DRBD connection.
  * @wo:		Write ordering method to try.
  */
-void drbd_bump_write_ordering(struct drbd_resource *resource, enum write_ordering_e wo)
+void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev,
+			      enum write_ordering_e wo)
 {
-	struct disk_conf *dc;
 	struct drbd_device *device;
 	enum write_ordering_e pwo;
 	int vnr;
@@ -1278,17 +1293,18 @@ void drbd_bump_write_ordering(struct drbd_resource *resource, enum write_orderin
 	wo = min(pwo, wo);
 	rcu_read_lock();
 	idr_for_each_entry(&resource->devices, device, vnr) {
-		if (!get_ldev_if_state(device, D_ATTACHING))
-			continue;
-		dc = rcu_dereference(device->ldev->disk_conf);
-
-		if (wo == WO_bdev_flush && !dc->disk_flushes)
-			wo = WO_drain_io;
-		if (wo == WO_drain_io && !dc->disk_drain)
-			wo = WO_none;
-		put_ldev(device);
+		if (get_ldev(device)) {
+			wo = max_allowed_wo(device->ldev, wo);
+			if (device->ldev == bdev)
+				bdev = NULL;
+			put_ldev(device);
+		}
 	}
 	rcu_read_unlock();
+
+	if (bdev)
+		wo = max_allowed_wo(bdev, wo);
+
 	resource->write_ordering = wo;
 	if (pwo != resource->write_ordering || wo == WO_bdev_flush)
 		drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
@@ -3709,7 +3725,6 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
 	}
 
 	device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
-	drbd_reconsider_max_bio_size(device);
 	/* Leave drbd_reconsider_max_bio_size() before drbd_determine_dev_size().
 	   In case we cleared the QUEUE_FLAG_DISCARD from our queue in
 	   drbd_reconsider_max_bio_size(), we can be sure that after
@@ -3717,6 +3732,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
 
 	ddsf = be16_to_cpu(p->dds_flags);
 	if (get_ldev(device)) {
+		drbd_reconsider_max_bio_size(device, device->ldev);
 		dd = drbd_determine_dev_size(device, ddsf, NULL);
 		put_ldev(device);
 		if (dd == DS_ERROR)
@@ -3724,6 +3740,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
 		drbd_md_sync(device);
 	} else {
 		/* I am diskless, need to accept the peer's size. */
+		drbd_reconsider_max_bio_size(device, NULL);
 		drbd_set_my_capacity(device, p_size);
 	}
 
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 3dbe9bd57a09..20ec8903b1e4 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -245,7 +245,7 @@ enum drbd_disk_state {
 	D_DISKLESS,
 	D_ATTACHING,      /* In the process of reading the meta-data */
 	D_FAILED,         /* Becomes D_DISKLESS as soon as we told it the peer */
-			/* when >= D_FAILED it is legal to access mdev->bc */
+			  /* when >= D_FAILED it is legal to access mdev->ldev */
 	D_NEGOTIATING,    /* Late attaching state, we need to talk to the peer */
 	D_INCONSISTENT,
 	D_OUTDATED,
-- 
cgit v1.2.3-59-g8ed1b


From 7ec3ea181d8cffb669982d79664f119ef6a71fc3 Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Wed, 9 Jul 2014 18:09:55 +0200
Subject: usb: gadget: Add helper macro for usb_composite_driver boilerplate

Introduce the module_usb_composite_driver macro as a convenience macro
for USB gadget composite driver modules, similar to module_usb_driver.
It is intended to be used by drivers which init/exit section does
nothing but calling usb_composite_probe/usb_composite_unrregister. By
using this macro it is possible to eliminate a few lines of boilerplate
code per USB gadget composite driver.

Based on f3a6a4b6 ("USB: Add helper macro for usb_driver boilerplate")
which introduced the according macro for USB drivers.

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 include/linux/usb/composite.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index 7373203140e7..c330f5ef42cf 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -386,6 +386,21 @@ struct usb_composite_driver {
 
 extern int usb_composite_probe(struct usb_composite_driver *driver);
 extern void usb_composite_unregister(struct usb_composite_driver *driver);
+
+/**
+ * module_usb_composite_driver() - Helper macro for registering a USB gadget
+ * composite driver
+ * @__usb_composite_driver: usb_composite_driver struct
+ *
+ * Helper macro for USB gadget composite drivers which do not do anything
+ * special in module init/exit. This eliminates a lot of boilerplate. Each
+ * module may only use this macro once, and calling it replaces module_init()
+ * and module_exit()
+ */
+#define module_usb_composite_driver(__usb_composite_driver) \
+	module_driver(__usb_composite_driver, usb_composite_probe, \
+		       usb_composite_unregister)
+
 extern void usb_composite_setup_continue(struct usb_composite_dev *cdev);
 extern int composite_dev_prepare(struct usb_composite_driver *composite,
 		struct usb_composite_dev *cdev);
-- 
cgit v1.2.3-59-g8ed1b


From 4da6daf4d3df5a977e4623963f141a627fd2efce Mon Sep 17 00:00:00 2001
From: Paul Moore <pmoore@redhat.com>
Date: Thu, 10 Jul 2014 10:17:48 -0400
Subject: selinux: fix the default socket labeling in sock_graft()

The sock_graft() hook has special handling for AF_INET, AF_INET, and
AF_UNIX sockets as those address families have special hooks which
label the sock before it is attached its associated socket.
Unfortunately, the sock_graft() hook was missing a default approach
to labeling sockets which meant that any other address family which
made use of connections or the accept() syscall would find the
returned socket to be in an "unlabeled" state.  This was recently
demonstrated by the kcrypto/AF_ALG subsystem and the newly released
cryptsetup package (cryptsetup v1.6.5 and later).

This patch preserves the special handling in selinux_sock_graft(),
but adds a default behavior - setting the sock's label equal to the
associated socket - which resolves the problem with AF_ALG and
presumably any other address family which makes use of accept().

Cc: stable@vger.kernel.org
Signed-off-by: Paul Moore <pmoore@redhat.com>
Tested-by: Milan Broz <gmazyland@gmail.com>
---
 include/linux/security.h |  5 ++++-
 security/selinux/hooks.c | 13 +++++++++++--
 2 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index 6478ce3252c7..794be735ff4b 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -987,7 +987,10 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	Retrieve the LSM-specific secid for the sock to enable caching of network
  *	authorizations.
  * @sock_graft:
- *	Sets the socket's isec sid to the sock's sid.
+ *	This hook is called in response to a newly created sock struct being
+ *	grafted onto an existing socket and allows the security module to
+ *	perform whatever security attribute management is necessary for both
+ *	the sock and socket.
  * @inet_conn_request:
  *	Sets the openreq's sid to socket's sid with MLS portion taken from peer sid.
  * @inet_csk_clone:
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 336f0a04450e..b3a6754e932b 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -4499,9 +4499,18 @@ static void selinux_sock_graft(struct sock *sk, struct socket *parent)
 	struct inode_security_struct *isec = SOCK_INODE(parent)->i_security;
 	struct sk_security_struct *sksec = sk->sk_security;
 
-	if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6 ||
-	    sk->sk_family == PF_UNIX)
+	switch (sk->sk_family) {
+	case PF_INET:
+	case PF_INET6:
+	case PF_UNIX:
 		isec->sid = sksec->sid;
+		break;
+	default:
+		/* by default there is no special labeling mechanism for the
+		 * sksec label so inherit the label from the parent socket */
+		BUG_ON(sksec->sid != SECINITSID_UNLABELED);
+		sksec->sid = isec->sid;
+	}
 	sksec->sclass = isec->sclass;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From aaaba34576407857f6146ff6c330f06e63fb2bf2 Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Tue, 18 Mar 2014 12:30:09 +0100
Subject: drbd: implement csums-after-crash-only

Checksum based resync trades CPU cycles for network bandwidth,
in situations where we expect much of the to-be-resynced blocks
to be actually identical on both sides already.

In a "network hickup" scenario, it won't help:
all to-be-resynced blocks will typically be different.

The use case is for the resync of *potentially* different blocks
after crash recovery -- the crash recovery had marked larger areas
(those covered by the activity log) as need-to-be-resynced,
just in case. Most of those blocks will be identical.

This option makes it possible to configure checksum based resync,
but only actually use it for the first resync after primary crash.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h      |  2 ++
 drivers/block/drbd/drbd_receiver.c |  2 ++
 drivers/block/drbd/drbd_worker.c   | 24 ++++++++++++++++++++----
 include/linux/drbd_genl.h          |  3 +++
 include/linux/drbd_limits.h        |  1 +
 5 files changed, 28 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index abf5aefd9790..fe6595a96a9a 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -738,6 +738,8 @@ struct drbd_device {
 	struct rb_root read_requests;
 	struct rb_root write_requests;
 
+	/* use checksums for *this* resync */
+	bool use_csums;
 	/* blocks to resync in this run [unit BM_BLOCK_SIZE] */
 	unsigned long rs_total;
 	/* number of resync blocks that failed in this run */
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 5626c5babc3f..d326af67c27e 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -2555,6 +2555,8 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
 			peer_req->w.cb = w_e_end_csum_rs_req;
 			/* used in the sector offset progress display */
 			device->bm_resync_fo = BM_SECT_TO_BIT(sector);
+			/* remember to report stats in drbd_resync_finished */
+			device->use_csums = true;
 		} else if (pi->cmd == P_OV_REPLY) {
 			/* track progress, we may need to throttle */
 			atomic_add(size >> 9, &device->rs_sect_in);
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 2ff5fd49a3b1..6532a697cf49 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -698,8 +698,8 @@ next_sector:
 		/* adjust very last sectors, in case we are oddly sized */
 		if (sector + (size>>9) > capacity)
 			size = (capacity-sector)<<9;
-		if (connection->agreed_pro_version >= 89 &&
-		    connection->csums_tfm) {
+
+		if (device->use_csums) {
 			switch (read_for_csum(peer_device, sector, size)) {
 			case -EIO: /* Disk failure */
 				put_ldev(device);
@@ -913,7 +913,7 @@ int drbd_resync_finished(struct drbd_device *device)
 		if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T)
 			khelper_cmd = "after-resync-target";
 
-		if (first_peer_device(device)->connection->csums_tfm && device->rs_total) {
+		if (device->use_csums && device->rs_total) {
 			const unsigned long s = device->rs_same_csum;
 			const unsigned long t = device->rs_total;
 			const int ratio =
@@ -1622,6 +1622,18 @@ static void do_start_resync(struct drbd_device *device)
 	clear_bit(AHEAD_TO_SYNC_SOURCE, &device->flags);
 }
 
+static bool use_checksum_based_resync(struct drbd_connection *connection, struct drbd_device *device)
+{
+	bool csums_after_crash_only;
+	rcu_read_lock();
+	csums_after_crash_only = rcu_dereference(connection->net_conf)->csums_after_crash_only;
+	rcu_read_unlock();
+	return connection->agreed_pro_version >= 89 &&		/* supported? */
+		connection->csums_tfm &&			/* configured? */
+		(csums_after_crash_only == 0			/* use for each resync? */
+		 || test_bit(CRASHED_PRIMARY, &device->flags));	/* or only after Primary crash? */
+}
+
 /**
  * drbd_start_resync() - Start the resync process
  * @device:	DRBD device.
@@ -1756,8 +1768,12 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
 		     drbd_conn_str(ns.conn),
 		     (unsigned long) device->rs_total << (BM_BLOCK_SHIFT-10),
 		     (unsigned long) device->rs_total);
-		if (side == C_SYNC_TARGET)
+		if (side == C_SYNC_TARGET) {
 			device->bm_resync_fo = 0;
+			device->use_csums = use_checksum_based_resync(connection, device);
+		} else {
+			device->use_csums = 0;
+		}
 
 		/* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid
 		 * with w_send_oos, or the sync target will get confused as to
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 4193f5f2636c..71fc924c53fa 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -171,6 +171,9 @@ GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf,
 	__flg_field(28, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	tentative)
 	__flg_field_def(29,	DRBD_GENLA_F_MANDATORY,	use_rle, DRBD_USE_RLE_DEF)
 	/* 9: __u32_field_def(30,	DRBD_GENLA_F_MANDATORY,	fencing_policy, DRBD_FENCING_DEF) */
+	/* 9: __str_field_def(31,     DRBD_GENLA_F_MANDATORY, name, SHARED_SECRET_MAX) */
+	/* 9: __u32_field(32,         DRBD_F_REQUIRED | DRBD_F_INVARIANT,     peer_node_id) */
+	__flg_field_def(33, 0 /* OPTIONAL */,	csums_after_crash_only, DRBD_CSUMS_AFTER_CRASH_ONLY_DEF)
 )
 
 GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms,
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 17e50bb00521..9d2df1d51414 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -214,6 +214,7 @@
 #define DRBD_ALLOW_TWO_PRIMARIES_DEF	0
 #define DRBD_ALWAYS_ASBP_DEF	0
 #define DRBD_USE_RLE_DEF	1
+#define DRBD_CSUMS_AFTER_CRASH_ONLY_DEF 0
 
 #define DRBD_AL_STRIPES_MIN     1
 #define DRBD_AL_STRIPES_MAX     1024
-- 
cgit v1.2.3-59-g8ed1b


From 5d0b17f1a29e8189d04aef447a3a53cfd05529b2 Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Tue, 18 Mar 2014 14:24:35 +0100
Subject: drbd: New net configuration option socket-check-timeout

In setups involving a DRBD-proxy and connections that experience a lot of
buffer-bloat it might be necessary to set ping-timeout to an
unusual high value. By default DRBD uses the same value to wait if a newly
established TCP-connection is stable. Since the DRBD-proxy is usually located
in the same data center such a long wait time may hinder DRBD's connect process.

In such setups socket-check-timeout should be set to
at least to the round trip time between DRBD and DRBD-proxy. I.e. in most
cases to 1.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_receiver.c | 46 +++++++++++++++++++++++++-------------
 include/linux/drbd_genl.h          |  1 +
 include/linux/drbd_limits.h        |  5 +++++
 3 files changed, 36 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 7da83f3a61eb..b89e6fb468c6 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -819,7 +819,7 @@ static int receive_first_packet(struct drbd_connection *connection, struct socke
  * drbd_socket_okay() - Free the socket if its connection is not okay
  * @sock:	pointer to the pointer to the socket.
  */
-static int drbd_socket_okay(struct socket **sock)
+static bool drbd_socket_okay(struct socket **sock)
 {
 	int rr;
 	char tb[4];
@@ -837,6 +837,30 @@ static int drbd_socket_okay(struct socket **sock)
 		return false;
 	}
 }
+
+static bool connection_established(struct drbd_connection *connection,
+				   struct socket **sock1,
+				   struct socket **sock2)
+{
+	struct net_conf *nc;
+	int timeout;
+	bool ok;
+
+	if (!*sock1 || !*sock2)
+		return false;
+
+	rcu_read_lock();
+	nc = rcu_dereference(connection->net_conf);
+	timeout = (nc->sock_check_timeo ?: nc->ping_timeo) * HZ / 10;
+	rcu_read_unlock();
+	schedule_timeout_interruptible(timeout);
+
+	ok = drbd_socket_okay(sock1);
+	ok = drbd_socket_okay(sock2) && ok;
+
+	return ok;
+}
+
 /* Gets called if a connection is established, or if a new minor gets created
    in a connection */
 int drbd_connected(struct drbd_peer_device *peer_device)
@@ -878,8 +902,8 @@ static int conn_connect(struct drbd_connection *connection)
 	struct drbd_socket sock, msock;
 	struct drbd_peer_device *peer_device;
 	struct net_conf *nc;
-	int vnr, timeout, h, ok;
-	bool discard_my_data;
+	int vnr, timeout, h;
+	bool discard_my_data, ok;
 	enum drbd_state_rv rv;
 	struct accept_wait_data ad = {
 		.connection = connection,
@@ -923,17 +947,8 @@ static int conn_connect(struct drbd_connection *connection)
 			}
 		}
 
-		if (sock.socket && msock.socket) {
-			rcu_read_lock();
-			nc = rcu_dereference(connection->net_conf);
-			timeout = nc->ping_timeo * HZ / 10;
-			rcu_read_unlock();
-			schedule_timeout_interruptible(timeout);
-			ok = drbd_socket_okay(&sock.socket);
-			ok = drbd_socket_okay(&msock.socket) && ok;
-			if (ok)
-				break;
-		}
+		if (connection_established(connection, &sock.socket, &msock.socket))
+			break;
 
 retry:
 		s = drbd_wait_for_connect(connection, &ad);
@@ -979,8 +994,7 @@ randomize:
 				goto out_release_sockets;
 		}
 
-		ok = drbd_socket_okay(&sock.socket);
-		ok = drbd_socket_okay(&msock.socket) && ok;
+		ok = connection_established(connection, &sock.socket, &msock.socket);
 	} while (!ok);
 
 	if (ad.s_listen)
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 71fc924c53fa..7b131ed8f9c6 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -174,6 +174,7 @@ GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf,
 	/* 9: __str_field_def(31,     DRBD_GENLA_F_MANDATORY, name, SHARED_SECRET_MAX) */
 	/* 9: __u32_field(32,         DRBD_F_REQUIRED | DRBD_F_INVARIANT,     peer_node_id) */
 	__flg_field_def(33, 0 /* OPTIONAL */,	csums_after_crash_only, DRBD_CSUMS_AFTER_CRASH_ONLY_DEF)
+	__u32_field_def(34, 0 /* OPTIONAL */, sock_check_timeo, DRBD_SOCKET_CHECK_TIMEO_DEF)
 )
 
 GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms,
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 9d2df1d51414..8ac8c5d9a3ad 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -225,4 +225,9 @@
 #define DRBD_AL_STRIPE_SIZE_MAX   16777216
 #define DRBD_AL_STRIPE_SIZE_DEF   32
 #define DRBD_AL_STRIPE_SIZE_SCALE 'k' /* kilobytes */
+
+#define DRBD_SOCKET_CHECK_TIMEO_MIN 0
+#define DRBD_SOCKET_CHECK_TIMEO_MAX DRBD_PING_TIMEO_MAX
+#define DRBD_SOCKET_CHECK_TIMEO_DEF 0
+#define DRBD_SOCKET_CHECK_TIMEO_SCALE '1'
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From bf0d6e4a1138e71cafdbbb99cde430eee50c4ff1 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 6 May 2014 14:28:32 +0300
Subject: drbd: silence underflow warning in read_in_block()

My static checker warns that "data_size" could be negative and underflow
the limit check.  The code looks suspicious but I don't know if it is a
real bug.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_receiver.c | 2 +-
 include/linux/drbd.h               | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index f972988291c5..9342b8da73ab 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1592,7 +1592,7 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
 	struct drbd_peer_request *peer_req;
 	struct page *page;
 	int dgs, ds, err;
-	int data_size = pi->size;
+	unsigned int data_size = pi->size;
 	void *dig_in = peer_device->connection->int_dig_in;
 	void *dig_vv = peer_device->connection->int_dig_vv;
 	unsigned long *data;
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 20ec8903b1e4..debb70d40547 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -52,7 +52,7 @@
 #endif
 
 extern const char *drbd_buildtag(void);
-#define REL_VERSION "8.4.3"
+#define REL_VERSION "8.4.5"
 #define API_VERSION 1
 #define PRO_VERSION_MIN 86
 #define PRO_VERSION_MAX 101
-- 
cgit v1.2.3-59-g8ed1b


From 5d5eacb34c9e1fdc0a47b885d832eaa4de860dc7 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Thu, 10 Jul 2014 07:01:58 -0400
Subject: bridge: fdb dumping takes a filter device

Dumping a bridge fdb dumps every fdb entry
held. With this change we are going to filter
on selected bridge port.

Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c      | 3 ++-
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 7 ++++---
 drivers/net/vxlan.c                              | 3 ++-
 include/linux/netdevice.h                        | 4 +++-
 include/linux/rtnetlink.h                        | 1 +
 net/bridge/br_fdb.c                              | 5 +++++
 net/bridge/br_private.h                          | 2 +-
 net/core/rtnetlink.c                             | 9 ++++++---
 8 files changed, 24 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index e49352d68ede..2899f783ee1d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -7095,13 +7095,14 @@ static int i40e_ndo_fdb_del(struct ndmsg *ndm,
 static int i40e_ndo_fdb_dump(struct sk_buff *skb,
 			     struct netlink_callback *cb,
 			     struct net_device *dev,
+			     struct net_device *filter_dev,
 			     int idx)
 {
 	struct i40e_netdev_priv *np = netdev_priv(dev);
 	struct i40e_pf *pf = np->vsi->back;
 
 	if (pf->flags & I40E_FLAG_SRIOV_ENABLED)
-		idx = ndo_dflt_fdb_dump(skb, cb, dev, idx);
+		idx = ndo_dflt_fdb_dump(skb, cb, dev, filter_dev, idx);
 
 	return idx;
 }
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index f8de2ae01a5a..0fdbcc8319f7 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -427,16 +427,17 @@ static int qlcnic_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 }
 
 static int qlcnic_fdb_dump(struct sk_buff *skb, struct netlink_callback *ncb,
-			struct net_device *netdev, int idx)
+			struct net_device *netdev,
+			struct net_device *filter_dev, int idx)
 {
 	struct qlcnic_adapter *adapter = netdev_priv(netdev);
 
 	if (!adapter->fdb_mac_learn)
-		return ndo_dflt_fdb_dump(skb, ncb, netdev, idx);
+		return ndo_dflt_fdb_dump(skb, ncb, netdev, filter_dev, idx);
 
 	if ((adapter->flags & QLCNIC_ESWITCH_ENABLED) ||
 	    qlcnic_sriov_check(adapter))
-		idx = ndo_dflt_fdb_dump(skb, ncb, netdev, idx);
+		idx = ndo_dflt_fdb_dump(skb, ncb, netdev, filter_dev, idx);
 
 	return idx;
 }
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index c2d360150804..e6808f7e4e32 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -933,7 +933,8 @@ out:
 
 /* Dump forwarding table */
 static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
-			  struct net_device *dev, int idx)
+			  struct net_device *dev,
+			  struct net_device *filter_dev, int idx)
 {
 	struct vxlan_dev *vxlan = netdev_priv(dev);
 	unsigned int h;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 8b43a28ee0bc..3a320db96180 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -943,7 +943,8 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
  *		      const unsigned char *addr)
  *	Deletes the FDB entry from dev coresponding to addr.
  * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb,
- *		       struct net_device *dev, int idx)
+ *		       struct net_device *dev, struct net_device *filter_dev,
+ *		       int idx)
  *	Used to add FDB entries to dump requests. Implementers should add
  *	entries to skb and update idx with the number of entries.
  *
@@ -1114,6 +1115,7 @@ struct net_device_ops {
 	int			(*ndo_fdb_dump)(struct sk_buff *skb,
 						struct netlink_callback *cb,
 						struct net_device *dev,
+						struct net_device *filter_dev,
 						int idx);
 
 	int			(*ndo_bridge_setlink)(struct net_device *dev,
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 953937ea5233..167bae7bdfa4 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -78,6 +78,7 @@ extern void __rtnl_unlock(void);
 extern int ndo_dflt_fdb_dump(struct sk_buff *skb,
 			     struct netlink_callback *cb,
 			     struct net_device *dev,
+			     struct net_device *filter_dev,
 			     int idx);
 extern int ndo_dflt_fdb_add(struct ndmsg *ndm,
 			    struct nlattr *tb[],
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 7be33667a839..6edecd11ecf0 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -676,6 +676,7 @@ errout:
 int br_fdb_dump(struct sk_buff *skb,
 		struct netlink_callback *cb,
 		struct net_device *dev,
+		struct net_device *filter_dev,
 		int idx)
 {
 	struct net_bridge *br = netdev_priv(dev);
@@ -691,6 +692,10 @@ int br_fdb_dump(struct sk_buff *skb,
 			if (idx < cb->args[0])
 				goto skip;
 
+			if (filter_dev && (!f->dst || !f->dst->dev ||
+					   f->dst->dev != filter_dev))
+				goto skip;
+
 			if (fdb_fill_info(skb, br, f,
 					  NETLINK_CB(cb->skb).portid,
 					  cb->nlh->nlmsg_seq,
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 23caf5b0309e..62a7fa2e3569 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -399,7 +399,7 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
 int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev,
 	       const unsigned char *addr, u16 nlh_flags);
 int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
-		struct net_device *dev, int idx);
+		struct net_device *dev, struct net_device *fdev, int idx);
 int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p);
 void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p);
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 27acaf7ff6d7..90a906e7ac26 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2517,6 +2517,7 @@ skip:
 int ndo_dflt_fdb_dump(struct sk_buff *skb,
 		      struct netlink_callback *cb,
 		      struct net_device *dev,
+		      struct net_device *filter_dev,
 		      int idx)
 {
 	int err;
@@ -2547,13 +2548,15 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
 			br_dev = netdev_master_upper_dev_get(dev);
 			ops = br_dev->netdev_ops;
 			if (ops->ndo_fdb_dump)
-				idx = ops->ndo_fdb_dump(skb, cb, dev, idx);
+				idx = ops->ndo_fdb_dump(skb, cb, dev, NULL,
+							idx);
 		}
 
 		if (dev->netdev_ops->ndo_fdb_dump)
-			idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, idx);
+			idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, NULL,
+							    idx);
 		else
-			idx = ndo_dflt_fdb_dump(skb, cb, dev, idx);
+			idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx);
 	}
 	rcu_read_unlock();
 
-- 
cgit v1.2.3-59-g8ed1b


From 8e2614bbf619e210674c3eae7f087db7c55ff89b Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Thu, 10 Jul 2014 14:05:11 +0200
Subject: PCI: Add include guard to include/linux/pci_ids.h

Adding an include guard frees the preprocessor from reparsing over 2600
#defines in the cases where pci_ids.h is somehow included more than once.
This gives a tiny-but-measurable performance improvement when compiling
such files.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/linux/pci_ids.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 7fa31731c854..6ed0bb73a864 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -6,6 +6,8 @@
  *	Do not add new entries to this file unless the definitions
  *	are shared between multiple drivers.
  */
+#ifndef _LINUX_PCI_IDS_H
+#define _LINUX_PCI_IDS_H
 
 /* Device classes and subclasses */
 
@@ -2968,3 +2970,5 @@
 #define PCI_DEVICE_ID_XEN_PLATFORM	0x0001
 
 #define PCI_VENDOR_ID_OCZ		0x1b85
+
+#endif /* _LINUX_PCI_IDS_H */
-- 
cgit v1.2.3-59-g8ed1b


From ed617e44234ee03a60edbf0809f696f0b9cf1b90 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Mon, 16 Jun 2014 09:16:58 -0400
Subject: tty: Document locking for tty driver methods

The tty core calls the tty driver's open, close and hangup
methods holding the tty lock.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/tty_driver.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index 756a60989294..e48c608a8fa8 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -35,14 +35,14 @@
  * 	This routine is mandatory; if this routine is not filled in,
  * 	the attempted open will fail with ENODEV.
  *
- *	Required method.
- *     
+ *	Required method. Called with tty lock held.
+ *
  * void (*close)(struct tty_struct * tty, struct file * filp);
  *
  * 	This routine is called when a particular tty device is closed.
  *	Note: called even if the corresponding open() failed.
  *
- *	Required method.
+ *	Required method. Called with tty lock held.
  *
  * void (*shutdown)(struct tty_struct * tty);
  *
@@ -172,6 +172,8 @@
  *
  *	Optional:
  *
+ *	Called with tty lock held.
+ *
  * int (*break_ctl)(struct tty_struct *tty, int state);
  *
  * 	This optional routine requests the tty driver to turn on or
-- 
cgit v1.2.3-59-g8ed1b


From 2243a87d90b42eb38bc281957df3e57c712b5e56 Mon Sep 17 00:00:00 2001
From: Fan Wu <fwu@marvell.com>
Date: Mon, 9 Jun 2014 09:37:56 +0800
Subject: pinctrl: avoid duplicated calling enable_pinmux_setting for a pin

What the patch does:
1. Call pinmux_disable_setting ahead of pinmux_enable_setting
  each time pinctrl_select_state is called
2. Remove the HW disable operation in pinmux_disable_setting function.
3. Remove the disable ops in struct pinmux_ops
4. Remove all the disable ops users in current code base.

Notes:
1. Great thanks for the suggestion from Linus, Tony Lindgren and
   Stephen Warren and Everyone that shared comments on this patch.
2. The patch also includes comment fixes from Stephen Warren.

The reason why we do this:
1. To avoid duplicated calling of the enable_setting operation
   without disabling operation inbetween which will let the pin
   descriptor desc->mux_usecount increase monotonously.
2. The HW pin disable operation is not useful for any of the
   existing platforms.
   And this can be used to avoid the HW glitch after using the
   item #1 modification.

In the following case, the issue can be reproduced:
1. There is a driver that need to switch pin state dynamically,
   e.g. between "sleep" and "default" state
2. The pin setting configuration in a DTS node may be like this:

  component a {
	pinctrl-names = "default", "sleep";
	pinctrl-0 = <&a_grp_setting &c_grp_setting>;
	pinctrl-1 = <&b_grp_setting &c_grp_setting>;
  }

  The "c_grp_setting" config node is totally identical, maybe like
  following one:

  c_grp_setting: c_grp_setting {
	pinctrl-single,pins = <GPIO48 AF6>;
  }

3. When switching the pin state in the following official pinctrl
   sequence:
	pin = pinctrl_get();
	state = pinctrl_lookup_state(wanted_state);
	pinctrl_select_state(state);
	pinctrl_put();

Test Result:
1. The switch is completed as expected, that is: the device's
   pin configuration is changed according to the description in the
   "wanted_state" group setting
2. The "desc->mux_usecount" of the corresponding pins in "c_group"
   is increased without being decreased, because the "desc" is for
   each physical pin while the setting is for each setting node
   in the DTS.
   Thus, if the "c_grp_setting" in pinctrl-0 is not disabled ahead
   of enabling "c_grp_setting" in pinctrl-1, the desc->mux_usecount
   will keep increasing without any chance to be decreased.

According to the comments in the original code, only the setting,
in old state but not in new state, will be "disabled" (calling
pinmux_disable_setting), which is correct logic but not intact. We
still need consider case that the setting is in both old state
and new state. We can do this in the following two ways:

1. Avoid to "enable"(calling pinmux_enable_setting) the "same pin
   setting" repeatedly
2. "Disable"(calling pinmux_disable_setting) the "same pin setting",
   actually two setting instances, ahead of enabling them.

Analysis:
1. The solution #2 is better because it can avoid too much
   iteration.
2. If we disable all of the settings in the old state and one of
   the setting(s) exist in the new state, the pins mux function
   change may happen when some SoC vendors defined the
   "pinctrl-single,function-off"
   in their DTS file.
   old_setting => disabled_setting => new_setting.
3. In the pinmux framework, when a pin state is switched, the
   setting in the old state should be marked as "disabled".

Conclusion:
1. To Remove the HW disabling operation to above the glitch mentioned
   above.
2. Handle the issue mentioned above by disabling all of the settings
   in old state and then enable the all of the settings in new state.

Signed-off-by: Fan Wu <fwu@marvell.com>
Acked-by: Stephen Warren <swarren@nvidia.com>
Acked-by: Patrice Chotard <patrice.chotard@st.com>
Acked-by: Heiko Stuebner <heiko@sntech.de>
Acked-by: Maxime Coquelin <maxime.coquelin@st.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/core.c                | 24 +++------------
 drivers/pinctrl/pinctrl-abx500.c      | 15 ---------
 drivers/pinctrl/pinctrl-adi2.c        | 30 ------------------
 drivers/pinctrl/pinctrl-at91.c        | 21 -------------
 drivers/pinctrl/pinctrl-bcm2835.c     | 11 -------
 drivers/pinctrl/pinctrl-exynos5440.c  |  8 -----
 drivers/pinctrl/pinctrl-msm.c         | 25 ---------------
 drivers/pinctrl/pinctrl-nomadik.c     | 16 ----------
 drivers/pinctrl/pinctrl-rockchip.c    | 18 -----------
 drivers/pinctrl/pinctrl-samsung.c     |  8 -----
 drivers/pinctrl/pinctrl-single.c      | 56 ---------------------------------
 drivers/pinctrl/pinctrl-st.c          |  6 ----
 drivers/pinctrl/pinctrl-tb10x.c       | 17 ----------
 drivers/pinctrl/pinctrl-tegra.c       | 13 --------
 drivers/pinctrl/pinctrl-tz1090-pdc.c  | 28 -----------------
 drivers/pinctrl/pinctrl-tz1090.c      | 58 -----------------------------------
 drivers/pinctrl/pinctrl-u300.c        | 14 ---------
 drivers/pinctrl/pinmux.c              |  4 ---
 drivers/pinctrl/sh-pfc/pinctrl.c      | 22 -------------
 drivers/pinctrl/sirf/pinctrl-sirf.c   | 10 ------
 drivers/pinctrl/spear/pinctrl-spear.c |  7 -----
 drivers/pinctrl/vt8500/pinctrl-wmt.c  | 12 --------
 include/linux/pinctrl/pinmux.h        |  2 --
 23 files changed, 5 insertions(+), 420 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c
index e09474ecde23..e4f65510c87e 100644
--- a/drivers/pinctrl/core.c
+++ b/drivers/pinctrl/core.c
@@ -992,29 +992,15 @@ int pinctrl_select_state(struct pinctrl *p, struct pinctrl_state *state)
 
 	if (p->state) {
 		/*
-		 * The set of groups with a mux configuration in the old state
-		 * may not be identical to the set of groups with a mux setting
-		 * in the new state. While this might be unusual, it's entirely
-		 * possible for the "user"-supplied mapping table to be written
-		 * that way. For each group that was configured in the old state
-		 * but not in the new state, this code puts that group into a
-		 * safe/disabled state.
+		 * For each pinmux setting in the old state, forget SW's record
+		 * of mux owner for that pingroup. Any pingroups which are
+		 * still owned by the new state will be re-acquired by the call
+		 * to pinmux_enable_setting() in the loop below.
 		 */
 		list_for_each_entry(setting, &p->state->settings, node) {
-			bool found = false;
 			if (setting->type != PIN_MAP_TYPE_MUX_GROUP)
 				continue;
-			list_for_each_entry(setting2, &state->settings, node) {
-				if (setting2->type != PIN_MAP_TYPE_MUX_GROUP)
-					continue;
-				if (setting2->data.mux.group ==
-						setting->data.mux.group) {
-					found = true;
-					break;
-				}
-			}
-			if (!found)
-				pinmux_disable_setting(setting);
+			pinmux_disable_setting(setting);
 		}
 	}
 
diff --git a/drivers/pinctrl/pinctrl-abx500.c b/drivers/pinctrl/pinctrl-abx500.c
index 163da9c3ea0e..f3f8b24efe54 100644
--- a/drivers/pinctrl/pinctrl-abx500.c
+++ b/drivers/pinctrl/pinctrl-abx500.c
@@ -737,20 +737,6 @@ static int abx500_pmx_enable(struct pinctrl_dev *pctldev, unsigned function,
 	return ret;
 }
 
-static void abx500_pmx_disable(struct pinctrl_dev *pctldev,
-			       unsigned function, unsigned group)
-{
-	struct abx500_pinctrl *pct = pinctrl_dev_get_drvdata(pctldev);
-	const struct abx500_pingroup *g;
-
-	g = &pct->soc->groups[group];
-	if (g->altsetting < 0)
-		return;
-
-	/* FIXME: poke out the mux, set the pin to some default state? */
-	dev_dbg(pct->dev, "disable group %s, %u pins\n", g->name, g->npins);
-}
-
 static int abx500_gpio_request_enable(struct pinctrl_dev *pctldev,
 			       struct pinctrl_gpio_range *range,
 			       unsigned offset)
@@ -799,7 +785,6 @@ static const struct pinmux_ops abx500_pinmux_ops = {
 	.get_function_name = abx500_pmx_get_func_name,
 	.get_function_groups = abx500_pmx_get_func_groups,
 	.enable = abx500_pmx_enable,
-	.disable = abx500_pmx_disable,
 	.gpio_request_enable = abx500_gpio_request_enable,
 	.gpio_disable_free = abx500_gpio_disable_free,
 };
diff --git a/drivers/pinctrl/pinctrl-adi2.c b/drivers/pinctrl/pinctrl-adi2.c
index 5c44feb54ebb..b02ee4f882c0 100644
--- a/drivers/pinctrl/pinctrl-adi2.c
+++ b/drivers/pinctrl/pinctrl-adi2.c
@@ -652,35 +652,6 @@ static int adi_pinmux_enable(struct pinctrl_dev *pctldev, unsigned func_id,
 	return 0;
 }
 
-static void adi_pinmux_disable(struct pinctrl_dev *pctldev, unsigned func_id,
-	unsigned group_id)
-{
-	struct adi_pinctrl *pinctrl = pinctrl_dev_get_drvdata(pctldev);
-	struct gpio_port *port;
-	struct pinctrl_gpio_range *range;
-	unsigned long flags;
-	unsigned short *mux, pin;
-
-	mux = (unsigned short *)pinctrl->soc->groups[group_id].mux;
-
-	while (*mux) {
-		pin = P_IDENT(*mux);
-
-		range = pinctrl_find_gpio_range_from_pin(pctldev, pin);
-		if (range == NULL) /* should not happen */
-			return;
-
-		port = container_of(range->gc, struct gpio_port, chip);
-
-		spin_lock_irqsave(&port->lock, flags);
-
-		port_setup(port, pin_to_offset(range, pin), true);
-		mux++;
-
-		spin_unlock_irqrestore(&port->lock, flags);
-	}
-}
-
 static int adi_pinmux_get_funcs_count(struct pinctrl_dev *pctldev)
 {
 	struct adi_pinctrl *pinctrl = pinctrl_dev_get_drvdata(pctldev);
@@ -728,7 +699,6 @@ static int adi_pinmux_request_gpio(struct pinctrl_dev *pctldev,
 
 static struct pinmux_ops adi_pinmux_ops = {
 	.enable = adi_pinmux_enable,
-	.disable = adi_pinmux_disable,
 	.get_functions_count = adi_pinmux_get_funcs_count,
 	.get_function_name = adi_pinmux_get_func_name,
 	.get_function_groups = adi_pinmux_get_groups,
diff --git a/drivers/pinctrl/pinctrl-at91.c b/drivers/pinctrl/pinctrl-at91.c
index 421493cb490c..bd57ab514aa4 100644
--- a/drivers/pinctrl/pinctrl-at91.c
+++ b/drivers/pinctrl/pinctrl-at91.c
@@ -611,26 +611,6 @@ static int at91_pmx_enable(struct pinctrl_dev *pctldev, unsigned selector,
 	return 0;
 }
 
-static void at91_pmx_disable(struct pinctrl_dev *pctldev, unsigned selector,
-			   unsigned group)
-{
-	struct at91_pinctrl *info = pinctrl_dev_get_drvdata(pctldev);
-	const struct at91_pmx_pin *pins_conf = info->groups[group].pins_conf;
-	const struct at91_pmx_pin *pin;
-	uint32_t npins = info->groups[group].npins;
-	int i;
-	unsigned mask;
-	void __iomem *pio;
-
-	for (i = 0; i < npins; i++) {
-		pin = &pins_conf[i];
-		at91_pin_dbg(info->dev, pin);
-		pio = pin_to_controller(info, pin->bank);
-		mask = pin_to_mask(pin->pin);
-		at91_mux_gpio_enable(pio, mask, 1);
-	}
-}
-
 static int at91_pmx_get_funcs_count(struct pinctrl_dev *pctldev)
 {
 	struct at91_pinctrl *info = pinctrl_dev_get_drvdata(pctldev);
@@ -705,7 +685,6 @@ static const struct pinmux_ops at91_pmx_ops = {
 	.get_function_name	= at91_pmx_get_func_name,
 	.get_function_groups	= at91_pmx_get_groups,
 	.enable			= at91_pmx_enable,
-	.disable		= at91_pmx_disable,
 	.gpio_request_enable	= at91_gpio_request_enable,
 	.gpio_disable_free	= at91_gpio_disable_free,
 };
diff --git a/drivers/pinctrl/pinctrl-bcm2835.c b/drivers/pinctrl/pinctrl-bcm2835.c
index 3d907de9bc91..5bcfd7ace0cd 100644
--- a/drivers/pinctrl/pinctrl-bcm2835.c
+++ b/drivers/pinctrl/pinctrl-bcm2835.c
@@ -841,16 +841,6 @@ static int bcm2835_pmx_enable(struct pinctrl_dev *pctldev,
 	return 0;
 }
 
-static void bcm2835_pmx_disable(struct pinctrl_dev *pctldev,
-		unsigned func_selector,
-		unsigned group_selector)
-{
-	struct bcm2835_pinctrl *pc = pinctrl_dev_get_drvdata(pctldev);
-
-	/* disable by setting to GPIO_IN */
-	bcm2835_pinctrl_fsel_set(pc, group_selector, BCM2835_FSEL_GPIO_IN);
-}
-
 static void bcm2835_pmx_gpio_disable_free(struct pinctrl_dev *pctldev,
 		struct pinctrl_gpio_range *range,
 		unsigned offset)
@@ -880,7 +870,6 @@ static const struct pinmux_ops bcm2835_pmx_ops = {
 	.get_function_name = bcm2835_pmx_get_function_name,
 	.get_function_groups = bcm2835_pmx_get_function_groups,
 	.enable = bcm2835_pmx_enable,
-	.disable = bcm2835_pmx_disable,
 	.gpio_disable_free = bcm2835_pmx_gpio_disable_free,
 	.gpio_set_direction = bcm2835_pmx_gpio_set_direction,
 };
diff --git a/drivers/pinctrl/pinctrl-exynos5440.c b/drivers/pinctrl/pinctrl-exynos5440.c
index 8fe2ab0a7698..4b145b5db7a6 100644
--- a/drivers/pinctrl/pinctrl-exynos5440.c
+++ b/drivers/pinctrl/pinctrl-exynos5440.c
@@ -371,13 +371,6 @@ static int exynos5440_pinmux_enable(struct pinctrl_dev *pctldev, unsigned select
 	return 0;
 }
 
-/* disable a specified pinmux by writing to registers */
-static void exynos5440_pinmux_disable(struct pinctrl_dev *pctldev,
-					unsigned selector, unsigned group)
-{
-	exynos5440_pinmux_setup(pctldev, selector, group, false);
-}
-
 /*
  * The calls to gpio_direction_output() and gpio_direction_input()
  * leads to this function call (via the pinctrl_gpio_direction_{input|output}()
@@ -395,7 +388,6 @@ static const struct pinmux_ops exynos5440_pinmux_ops = {
 	.get_function_name	= exynos5440_pinmux_get_fname,
 	.get_function_groups	= exynos5440_pinmux_get_groups,
 	.enable			= exynos5440_pinmux_enable,
-	.disable		= exynos5440_pinmux_disable,
 	.gpio_set_direction	= exynos5440_pinmux_gpio_set_direction,
 };
 
diff --git a/drivers/pinctrl/pinctrl-msm.c b/drivers/pinctrl/pinctrl-msm.c
index df6dda4ce803..bdfaba4430f2 100644
--- a/drivers/pinctrl/pinctrl-msm.c
+++ b/drivers/pinctrl/pinctrl-msm.c
@@ -165,36 +165,11 @@ static int msm_pinmux_enable(struct pinctrl_dev *pctldev,
 	return 0;
 }
 
-static void msm_pinmux_disable(struct pinctrl_dev *pctldev,
-			       unsigned function,
-			       unsigned group)
-{
-	struct msm_pinctrl *pctrl = pinctrl_dev_get_drvdata(pctldev);
-	const struct msm_pingroup *g;
-	unsigned long flags;
-	u32 val;
-
-	g = &pctrl->soc->groups[group];
-
-	if (WARN_ON(g->mux_bit < 0))
-		return;
-
-	spin_lock_irqsave(&pctrl->lock, flags);
-
-	/* Clear the mux bits to select gpio mode */
-	val = readl(pctrl->regs + g->ctl_reg);
-	val &= ~(0x7 << g->mux_bit);
-	writel(val, pctrl->regs + g->ctl_reg);
-
-	spin_unlock_irqrestore(&pctrl->lock, flags);
-}
-
 static const struct pinmux_ops msm_pinmux_ops = {
 	.get_functions_count	= msm_get_functions_count,
 	.get_function_name	= msm_get_function_name,
 	.get_function_groups	= msm_get_function_groups,
 	.enable			= msm_pinmux_enable,
-	.disable		= msm_pinmux_disable,
 };
 
 static int msm_config_reg(struct msm_pinctrl *pctrl,
diff --git a/drivers/pinctrl/pinctrl-nomadik.c b/drivers/pinctrl/pinctrl-nomadik.c
index 8f6f16ef73f3..a564251fe093 100644
--- a/drivers/pinctrl/pinctrl-nomadik.c
+++ b/drivers/pinctrl/pinctrl-nomadik.c
@@ -1765,21 +1765,6 @@ out_glitch:
 	return ret;
 }
 
-static void nmk_pmx_disable(struct pinctrl_dev *pctldev,
-			    unsigned function, unsigned group)
-{
-	struct nmk_pinctrl *npct = pinctrl_dev_get_drvdata(pctldev);
-	const struct nmk_pingroup *g;
-
-	g = &npct->soc->groups[group];
-
-	if (g->altsetting < 0)
-		return;
-
-	/* Poke out the mux, set the pin to some default state? */
-	dev_dbg(npct->dev, "disable group %s, %u pins\n", g->name, g->npins);
-}
-
 static int nmk_gpio_request_enable(struct pinctrl_dev *pctldev,
 				   struct pinctrl_gpio_range *range,
 				   unsigned offset)
@@ -1826,7 +1811,6 @@ static const struct pinmux_ops nmk_pinmux_ops = {
 	.get_function_name = nmk_pmx_get_func_name,
 	.get_function_groups = nmk_pmx_get_func_groups,
 	.enable = nmk_pmx_enable,
-	.disable = nmk_pmx_disable,
 	.gpio_request_enable = nmk_gpio_request_enable,
 	.gpio_disable_free = nmk_gpio_disable_free,
 };
diff --git a/drivers/pinctrl/pinctrl-rockchip.c b/drivers/pinctrl/pinctrl-rockchip.c
index bb805d5e9ff0..51f67a6eadcb 100644
--- a/drivers/pinctrl/pinctrl-rockchip.c
+++ b/drivers/pinctrl/pinctrl-rockchip.c
@@ -657,23 +657,6 @@ static int rockchip_pmx_enable(struct pinctrl_dev *pctldev, unsigned selector,
 	return 0;
 }
 
-static void rockchip_pmx_disable(struct pinctrl_dev *pctldev,
-					unsigned selector, unsigned group)
-{
-	struct rockchip_pinctrl *info = pinctrl_dev_get_drvdata(pctldev);
-	const unsigned int *pins = info->groups[group].pins;
-	struct rockchip_pin_bank *bank;
-	int cnt;
-
-	dev_dbg(info->dev, "disable function %s group %s\n",
-		info->functions[selector].name, info->groups[group].name);
-
-	for (cnt = 0; cnt < info->groups[group].npins; cnt++) {
-		bank = pin_to_bank(info, pins[cnt]);
-		rockchip_set_mux(bank, pins[cnt] - bank->pin_base, 0);
-	}
-}
-
 /*
  * The calls to gpio_direction_output() and gpio_direction_input()
  * leads to this function call (via the pinctrl_gpio_direction_{input|output}()
@@ -716,7 +699,6 @@ static const struct pinmux_ops rockchip_pmx_ops = {
 	.get_function_name	= rockchip_pmx_get_func_name,
 	.get_function_groups	= rockchip_pmx_get_groups,
 	.enable			= rockchip_pmx_enable,
-	.disable		= rockchip_pmx_disable,
 	.gpio_set_direction	= rockchip_pmx_gpio_set_direction,
 };
 
diff --git a/drivers/pinctrl/pinctrl-samsung.c b/drivers/pinctrl/pinctrl-samsung.c
index 3e61d0f8f146..089abde35d44 100644
--- a/drivers/pinctrl/pinctrl-samsung.c
+++ b/drivers/pinctrl/pinctrl-samsung.c
@@ -333,13 +333,6 @@ static int samsung_pinmux_enable(struct pinctrl_dev *pctldev, unsigned selector,
 	return 0;
 }
 
-/* disable a specified pinmux by writing to registers */
-static void samsung_pinmux_disable(struct pinctrl_dev *pctldev,
-					unsigned selector, unsigned group)
-{
-	samsung_pinmux_setup(pctldev, selector, group, false);
-}
-
 /*
  * The calls to gpio_direction_output() and gpio_direction_input()
  * leads to this function call (via the pinctrl_gpio_direction_{input|output}()
@@ -390,7 +383,6 @@ static const struct pinmux_ops samsung_pinmux_ops = {
 	.get_function_name	= samsung_pinmux_get_fname,
 	.get_function_groups	= samsung_pinmux_get_groups,
 	.enable			= samsung_pinmux_enable,
-	.disable		= samsung_pinmux_disable,
 	.gpio_set_direction	= samsung_pinmux_gpio_set_direction,
 };
 
diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c
index 2960557bfed9..ff6a2bda52e5 100644
--- a/drivers/pinctrl/pinctrl-single.c
+++ b/drivers/pinctrl/pinctrl-single.c
@@ -488,61 +488,6 @@ static int pcs_enable(struct pinctrl_dev *pctldev, unsigned fselector,
 	return 0;
 }
 
-static void pcs_disable(struct pinctrl_dev *pctldev, unsigned fselector,
-					unsigned group)
-{
-	struct pcs_device *pcs;
-	struct pcs_function *func;
-	int i;
-
-	pcs = pinctrl_dev_get_drvdata(pctldev);
-	/* If function mask is null, needn't disable it. */
-	if (!pcs->fmask)
-		return;
-
-	func = radix_tree_lookup(&pcs->ftree, fselector);
-	if (!func) {
-		dev_err(pcs->dev, "%s could not find function%i\n",
-			__func__, fselector);
-		return;
-	}
-
-	/*
-	 * Ignore disable if function-off is not specified. Some hardware
-	 * does not have clearly defined disable function. For pin specific
-	 * off modes, you can use alternate named states as described in
-	 * pinctrl-bindings.txt.
-	 */
-	if (pcs->foff == PCS_OFF_DISABLED) {
-		dev_dbg(pcs->dev, "ignoring disable for %s function%i\n",
-			func->name, fselector);
-		return;
-	}
-
-	dev_dbg(pcs->dev, "disabling function%i %s\n",
-		fselector, func->name);
-
-	for (i = 0; i < func->nvals; i++) {
-		struct pcs_func_vals *vals;
-		unsigned long flags;
-		unsigned val, mask;
-
-		vals = &func->vals[i];
-		raw_spin_lock_irqsave(&pcs->lock, flags);
-		val = pcs->read(vals->reg);
-
-		if (pcs->bits_per_mux)
-			mask = vals->mask;
-		else
-			mask = pcs->fmask;
-
-		val &= ~mask;
-		val |= pcs->foff << pcs->fshift;
-		pcs->write(val, vals->reg);
-		raw_spin_unlock_irqrestore(&pcs->lock, flags);
-	}
-}
-
 static int pcs_request_gpio(struct pinctrl_dev *pctldev,
 			    struct pinctrl_gpio_range *range, unsigned pin)
 {
@@ -575,7 +520,6 @@ static const struct pinmux_ops pcs_pinmux_ops = {
 	.get_function_name = pcs_get_function_name,
 	.get_function_groups = pcs_get_function_groups,
 	.enable = pcs_enable,
-	.disable = pcs_disable,
 	.gpio_request_enable = pcs_request_gpio,
 };
 
diff --git a/drivers/pinctrl/pinctrl-st.c b/drivers/pinctrl/pinctrl-st.c
index 1bd6363bc95e..e1919cd43117 100644
--- a/drivers/pinctrl/pinctrl-st.c
+++ b/drivers/pinctrl/pinctrl-st.c
@@ -930,11 +930,6 @@ static int st_pmx_enable(struct pinctrl_dev *pctldev, unsigned fselector,
 	return 0;
 }
 
-static void st_pmx_disable(struct pinctrl_dev *pctldev, unsigned selector,
-		unsigned group)
-{
-}
-
 static int st_pmx_set_gpio_direction(struct pinctrl_dev *pctldev,
 			struct pinctrl_gpio_range *range, unsigned gpio,
 			bool input)
@@ -957,7 +952,6 @@ static struct pinmux_ops st_pmxops = {
 	.get_function_name	= st_pmx_get_fname,
 	.get_function_groups	= st_pmx_get_groups,
 	.enable			= st_pmx_enable,
-	.disable		= st_pmx_disable,
 	.gpio_set_direction	= st_pmx_set_gpio_direction,
 };
 
diff --git a/drivers/pinctrl/pinctrl-tb10x.c b/drivers/pinctrl/pinctrl-tb10x.c
index 26ca6855f478..71c5d4f0c538 100644
--- a/drivers/pinctrl/pinctrl-tb10x.c
+++ b/drivers/pinctrl/pinctrl-tb10x.c
@@ -738,22 +738,6 @@ static int tb10x_pctl_enable(struct pinctrl_dev *pctl,
 	return 0;
 }
 
-static void tb10x_pctl_disable(struct pinctrl_dev *pctl,
-			unsigned func_selector, unsigned group_selector)
-{
-	struct tb10x_pinctrl *state = pinctrl_dev_get_drvdata(pctl);
-	const struct tb10x_pinfuncgrp *grp = &state->pingroups[group_selector];
-
-	if (grp->port < 0)
-		return;
-
-	mutex_lock(&state->mutex);
-
-	state->ports[grp->port].count--;
-
-	mutex_unlock(&state->mutex);
-}
-
 static struct pinmux_ops tb10x_pinmux_ops = {
 	.get_functions_count = tb10x_get_functions_count,
 	.get_function_name = tb10x_get_function_name,
@@ -761,7 +745,6 @@ static struct pinmux_ops tb10x_pinmux_ops = {
 	.gpio_request_enable = tb10x_gpio_request_enable,
 	.gpio_disable_free = tb10x_gpio_disable_free,
 	.enable = tb10x_pctl_enable,
-	.disable = tb10x_pctl_disable,
 };
 
 static struct pinctrl_desc tb10x_pindesc = {
diff --git a/drivers/pinctrl/pinctrl-tegra.c b/drivers/pinctrl/pinctrl-tegra.c
index 2d43bff74f59..150af5503c09 100644
--- a/drivers/pinctrl/pinctrl-tegra.c
+++ b/drivers/pinctrl/pinctrl-tegra.c
@@ -290,24 +290,11 @@ static int tegra_pinctrl_enable(struct pinctrl_dev *pctldev, unsigned function,
 	return 0;
 }
 
-static void tegra_pinctrl_disable(struct pinctrl_dev *pctldev,
-				  unsigned function, unsigned group)
-{
-	struct tegra_pmx *pmx = pinctrl_dev_get_drvdata(pctldev);
-	const struct tegra_pingroup *g;
-
-	g = &pmx->soc->groups[group];
-
-	if (WARN_ON(g->mux_reg < 0))
-		return;
-}
-
 static const struct pinmux_ops tegra_pinmux_ops = {
 	.get_functions_count = tegra_pinctrl_get_funcs_count,
 	.get_function_name = tegra_pinctrl_get_func_name,
 	.get_function_groups = tegra_pinctrl_get_func_groups,
 	.enable = tegra_pinctrl_enable,
-	.disable = tegra_pinctrl_disable,
 };
 
 static int tegra_pinconf_reg(struct tegra_pmx *pmx,
diff --git a/drivers/pinctrl/pinctrl-tz1090-pdc.c b/drivers/pinctrl/pinctrl-tz1090-pdc.c
index 5bf01c28925e..41e81a35cabb 100644
--- a/drivers/pinctrl/pinctrl-tz1090-pdc.c
+++ b/drivers/pinctrl/pinctrl-tz1090-pdc.c
@@ -574,33 +574,6 @@ static int tz1090_pdc_pinctrl_enable(struct pinctrl_dev *pctldev,
 	return 0;
 }
 
-static void tz1090_pdc_pinctrl_disable(struct pinctrl_dev *pctldev,
-				       unsigned int function,
-				       unsigned int group)
-{
-	struct tz1090_pdc_pmx *pmx = pinctrl_dev_get_drvdata(pctldev);
-	const struct tz1090_pdc_pingroup *grp = &tz1090_pdc_groups[group];
-
-	dev_dbg(pctldev->dev, "%s(func=%u (%s), group=%u (%s))\n",
-		__func__,
-		function, tz1090_pdc_functions[function].name,
-		group, tz1090_pdc_groups[group].name);
-
-	/* is it even a mux? */
-	if (grp->drv)
-		return;
-
-	/* does this group even control the function? */
-	if (function != grp->func)
-		return;
-
-	/* record the pin being unmuxed and update mux bit */
-	spin_lock(&pmx->lock);
-	pmx->mux_en &= ~BIT(grp->pins[0]);
-	tz1090_pdc_pinctrl_mux(pmx, grp);
-	spin_unlock(&pmx->lock);
-}
-
 static const struct tz1090_pdc_pingroup *find_mux_group(
 						struct tz1090_pdc_pmx *pmx,
 						unsigned int pin)
@@ -662,7 +635,6 @@ static struct pinmux_ops tz1090_pdc_pinmux_ops = {
 	.get_function_name	= tz1090_pdc_pinctrl_get_func_name,
 	.get_function_groups	= tz1090_pdc_pinctrl_get_func_groups,
 	.enable			= tz1090_pdc_pinctrl_enable,
-	.disable		= tz1090_pdc_pinctrl_disable,
 	.gpio_request_enable	= tz1090_pdc_pinctrl_gpio_request_enable,
 	.gpio_disable_free	= tz1090_pdc_pinctrl_gpio_disable_free,
 };
diff --git a/drivers/pinctrl/pinctrl-tz1090.c b/drivers/pinctrl/pinctrl-tz1090.c
index bc9cd7a7602e..24082216842e 100644
--- a/drivers/pinctrl/pinctrl-tz1090.c
+++ b/drivers/pinctrl/pinctrl-tz1090.c
@@ -1478,63 +1478,6 @@ mux_pins:
 	return 0;
 }
 
-/**
- * tz1090_pinctrl_disable() - Disable a function on a pin group.
- * @pctldev:		Pin control data
- * @function:		Function index to disable
- * @group:		Group index to disable
- *
- * Disable a particular function on a group of pins. The per GPIO pin pseudo pin
- * groups can be used (in which case the pin will be taken out of peripheral
- * mode. Some convenience pin groups can also be used in which case the effect
- * is the same as enabling the function on each individual pin in the group.
- */
-static void tz1090_pinctrl_disable(struct pinctrl_dev *pctldev,
-				   unsigned int function, unsigned int group)
-{
-	struct tz1090_pmx *pmx = pinctrl_dev_get_drvdata(pctldev);
-	struct tz1090_pingroup *grp;
-	unsigned int pin_num, mux_group, i, npins;
-	const unsigned int *pins;
-
-	/* group of pins? */
-	if (group < ARRAY_SIZE(tz1090_groups)) {
-		grp = &tz1090_groups[group];
-		npins = grp->npins;
-		pins = grp->pins;
-		/*
-		 * All pins in the group must belong to the same mux group,
-		 * which allows us to just use the mux group of the first pin.
-		 * By explicitly listing permitted pingroups for each function
-		 * the pinmux core should ensure this is always the case.
-		 */
-	} else {
-		pin_num = group - ARRAY_SIZE(tz1090_groups);
-		npins = 1;
-		pins = &pin_num;
-	}
-	mux_group = tz1090_mux_pins[*pins];
-
-	/* no mux group, but can still be individually muxed to peripheral */
-	if (mux_group >= TZ1090_MUX_GROUP_MAX) {
-		if (function == TZ1090_MUX_PERIP)
-			goto unmux_pins;
-		return;
-	}
-
-	/* mux group already set to a different function? */
-	grp = &tz1090_mux_groups[mux_group];
-	dev_dbg(pctldev->dev, "%s: unmuxing %u pin(s) in '%s' from '%s'\n",
-		__func__, npins, grp->name, tz1090_functions[function].name);
-
-	/* subtract pins from ref count and unmux individually */
-	WARN_ON(grp->func_count < npins);
-	grp->func_count -= npins;
-unmux_pins:
-	for (i = 0; i < npins; ++i)
-		tz1090_pinctrl_perip_select(pmx, pins[i], false);
-}
-
 /**
  * tz1090_pinctrl_gpio_request_enable() - Put pin in GPIO mode.
  * @pctldev:		Pin control data
@@ -1575,7 +1518,6 @@ static struct pinmux_ops tz1090_pinmux_ops = {
 	.get_function_name	= tz1090_pinctrl_get_func_name,
 	.get_function_groups	= tz1090_pinctrl_get_func_groups,
 	.enable			= tz1090_pinctrl_enable,
-	.disable		= tz1090_pinctrl_disable,
 	.gpio_request_enable	= tz1090_pinctrl_gpio_request_enable,
 	.gpio_disable_free	= tz1090_pinctrl_gpio_disable_free,
 };
diff --git a/drivers/pinctrl/pinctrl-u300.c b/drivers/pinctrl/pinctrl-u300.c
index 209a01b8bd3b..0959bb36450f 100644
--- a/drivers/pinctrl/pinctrl-u300.c
+++ b/drivers/pinctrl/pinctrl-u300.c
@@ -970,19 +970,6 @@ static int u300_pmx_enable(struct pinctrl_dev *pctldev, unsigned selector,
 	return 0;
 }
 
-static void u300_pmx_disable(struct pinctrl_dev *pctldev, unsigned selector,
-			     unsigned group)
-{
-	struct u300_pmx *upmx;
-
-	/* There is nothing to do with the power pins */
-	if (selector == 0)
-		return;
-
-	upmx = pinctrl_dev_get_drvdata(pctldev);
-	u300_pmx_endisable(upmx, selector, false);
-}
-
 static int u300_pmx_get_funcs_count(struct pinctrl_dev *pctldev)
 {
 	return ARRAY_SIZE(u300_pmx_functions);
@@ -1008,7 +995,6 @@ static const struct pinmux_ops u300_pmx_ops = {
 	.get_function_name = u300_pmx_get_func_name,
 	.get_function_groups = u300_pmx_get_groups,
 	.enable = u300_pmx_enable,
-	.disable = u300_pmx_disable,
 };
 
 static int u300_pin_config_get(struct pinctrl_dev *pctldev, unsigned pin,
diff --git a/drivers/pinctrl/pinmux.c b/drivers/pinctrl/pinmux.c
index 051e8592990e..c055daf9a80f 100644
--- a/drivers/pinctrl/pinmux.c
+++ b/drivers/pinctrl/pinmux.c
@@ -471,7 +471,6 @@ void pinmux_disable_setting(struct pinctrl_setting const *setting)
 {
 	struct pinctrl_dev *pctldev = setting->pctldev;
 	const struct pinctrl_ops *pctlops = pctldev->desc->pctlops;
-	const struct pinmux_ops *ops = pctldev->desc->pmxops;
 	int ret = 0;
 	const unsigned *pins = NULL;
 	unsigned num_pins = 0;
@@ -518,9 +517,6 @@ void pinmux_disable_setting(struct pinctrl_setting const *setting)
 				 pins[i], desc->name, gname);
 		}
 	}
-
-	if (ops->disable)
-		ops->disable(pctldev, setting->data.mux.func, setting->data.mux.group);
 }
 
 #ifdef CONFIG_DEBUG_FS
diff --git a/drivers/pinctrl/sh-pfc/pinctrl.c b/drivers/pinctrl/sh-pfc/pinctrl.c
index e758af95c209..11db3ee39d40 100644
--- a/drivers/pinctrl/sh-pfc/pinctrl.c
+++ b/drivers/pinctrl/sh-pfc/pinctrl.c
@@ -345,27 +345,6 @@ done:
 	return ret;
 }
 
-static void sh_pfc_func_disable(struct pinctrl_dev *pctldev, unsigned selector,
-				unsigned group)
-{
-	struct sh_pfc_pinctrl *pmx = pinctrl_dev_get_drvdata(pctldev);
-	struct sh_pfc *pfc = pmx->pfc;
-	const struct sh_pfc_pin_group *grp = &pfc->info->groups[group];
-	unsigned long flags;
-	unsigned int i;
-
-	spin_lock_irqsave(&pfc->lock, flags);
-
-	for (i = 0; i < grp->nr_pins; ++i) {
-		int idx = sh_pfc_get_pin_index(pfc, grp->pins[i]);
-		struct sh_pfc_pin_config *cfg = &pmx->configs[idx];
-
-		cfg->type = PINMUX_TYPE_NONE;
-	}
-
-	spin_unlock_irqrestore(&pfc->lock, flags);
-}
-
 static int sh_pfc_gpio_request_enable(struct pinctrl_dev *pctldev,
 				      struct pinctrl_gpio_range *range,
 				      unsigned offset)
@@ -464,7 +443,6 @@ static const struct pinmux_ops sh_pfc_pinmux_ops = {
 	.get_function_name	= sh_pfc_get_function_name,
 	.get_function_groups	= sh_pfc_get_function_groups,
 	.enable			= sh_pfc_func_enable,
-	.disable		= sh_pfc_func_disable,
 	.gpio_request_enable	= sh_pfc_gpio_request_enable,
 	.gpio_disable_free	= sh_pfc_gpio_disable_free,
 	.gpio_set_direction	= sh_pfc_gpio_set_direction,
diff --git a/drivers/pinctrl/sirf/pinctrl-sirf.c b/drivers/pinctrl/sirf/pinctrl-sirf.c
index 014f5b1fee55..4c1d7c68666d 100644
--- a/drivers/pinctrl/sirf/pinctrl-sirf.c
+++ b/drivers/pinctrl/sirf/pinctrl-sirf.c
@@ -186,15 +186,6 @@ static int sirfsoc_pinmux_enable(struct pinctrl_dev *pmxdev, unsigned selector,
 	return 0;
 }
 
-static void sirfsoc_pinmux_disable(struct pinctrl_dev *pmxdev, unsigned selector,
-	unsigned group)
-{
-	struct sirfsoc_pmx *spmx;
-
-	spmx = pinctrl_dev_get_drvdata(pmxdev);
-	sirfsoc_pinmux_endisable(spmx, selector, false);
-}
-
 static int sirfsoc_pinmux_get_funcs_count(struct pinctrl_dev *pmxdev)
 {
 	return sirfsoc_pmxfunc_cnt;
@@ -240,7 +231,6 @@ static int sirfsoc_pinmux_request_gpio(struct pinctrl_dev *pmxdev,
 
 static struct pinmux_ops sirfsoc_pinmux_ops = {
 	.enable = sirfsoc_pinmux_enable,
-	.disable = sirfsoc_pinmux_disable,
 	.get_functions_count = sirfsoc_pinmux_get_funcs_count,
 	.get_function_name = sirfsoc_pinmux_get_func_name,
 	.get_function_groups = sirfsoc_pinmux_get_groups,
diff --git a/drivers/pinctrl/spear/pinctrl-spear.c b/drivers/pinctrl/spear/pinctrl-spear.c
index 58bf6867aa17..f72cc4e192bd 100644
--- a/drivers/pinctrl/spear/pinctrl-spear.c
+++ b/drivers/pinctrl/spear/pinctrl-spear.c
@@ -274,12 +274,6 @@ static int spear_pinctrl_enable(struct pinctrl_dev *pctldev, unsigned function,
 	return spear_pinctrl_endisable(pctldev, function, group, true);
 }
 
-static void spear_pinctrl_disable(struct pinctrl_dev *pctldev,
-		unsigned function, unsigned group)
-{
-	spear_pinctrl_endisable(pctldev, function, group, false);
-}
-
 /* gpio with pinmux */
 static struct spear_gpio_pingroup *get_gpio_pingroup(struct spear_pmx *pmx,
 		unsigned pin)
@@ -345,7 +339,6 @@ static const struct pinmux_ops spear_pinmux_ops = {
 	.get_function_name = spear_pinctrl_get_func_name,
 	.get_function_groups = spear_pinctrl_get_func_groups,
 	.enable = spear_pinctrl_enable,
-	.disable = spear_pinctrl_disable,
 	.gpio_request_enable = gpio_request_enable,
 	.gpio_disable_free = gpio_disable_free,
 };
diff --git a/drivers/pinctrl/vt8500/pinctrl-wmt.c b/drivers/pinctrl/vt8500/pinctrl-wmt.c
index 2c61281bebd7..8c976c21eeee 100644
--- a/drivers/pinctrl/vt8500/pinctrl-wmt.c
+++ b/drivers/pinctrl/vt8500/pinctrl-wmt.c
@@ -141,17 +141,6 @@ static int wmt_pmx_enable(struct pinctrl_dev *pctldev,
 	return wmt_set_pinmux(data, func_selector, pinnum);
 }
 
-static void wmt_pmx_disable(struct pinctrl_dev *pctldev,
-			    unsigned func_selector,
-			    unsigned group_selector)
-{
-	struct wmt_pinctrl_data *data = pinctrl_dev_get_drvdata(pctldev);
-	u32 pinnum = data->pins[group_selector].number;
-
-	/* disable by setting GPIO_IN */
-	wmt_set_pinmux(data, WMT_FSEL_GPIO_IN, pinnum);
-}
-
 static void wmt_pmx_gpio_disable_free(struct pinctrl_dev *pctldev,
 				      struct pinctrl_gpio_range *range,
 				      unsigned offset)
@@ -180,7 +169,6 @@ static struct pinmux_ops wmt_pinmux_ops = {
 	.get_function_name = wmt_pmx_get_function_name,
 	.get_function_groups = wmt_pmx_get_function_groups,
 	.enable = wmt_pmx_enable,
-	.disable = wmt_pmx_disable,
 	.gpio_disable_free = wmt_pmx_gpio_disable_free,
 	.gpio_set_direction = wmt_pmx_gpio_set_direction,
 };
diff --git a/include/linux/pinctrl/pinmux.h b/include/linux/pinctrl/pinmux.h
index c15395031cb3..3097aafbeb24 100644
--- a/include/linux/pinctrl/pinmux.h
+++ b/include/linux/pinctrl/pinmux.h
@@ -70,8 +70,6 @@ struct pinmux_ops {
 				  unsigned * const num_groups);
 	int (*enable) (struct pinctrl_dev *pctldev, unsigned func_selector,
 		       unsigned group_selector);
-	void (*disable) (struct pinctrl_dev *pctldev, unsigned func_selector,
-			 unsigned group_selector);
 	int (*gpio_request_enable) (struct pinctrl_dev *pctldev,
 				    struct pinctrl_gpio_range *range,
 				    unsigned offset);
-- 
cgit v1.2.3-59-g8ed1b


From 2d7768a872b73a7105d6355948dae0acff72f061 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Thu, 7 Aug 2014 08:16:00 +0100
Subject: iio: st_sensors: add devicetree probing support

The I2C devices that make up the STMicroelectronics MEMS sensors
may be sneakily enabled by cleverly giving the device node the same
name as a string match from the platform device ID table. However
the right method is to use the compatible string.

On detection, the ST sensors use the ID string to probe and
instatiate the right sensor driver, so pass the kernel-internal ID
string in the .data field of the OF match table, and set the I2C
client name to this name when a compatible match is used.

This avoids having misc Linux-specific strings floating around in
the device tree.

Cc: Lee Jones <lee.jones@linaro.org>
Cc: Denis CIOCCA <denis.ciocca@st.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/accel/st_accel_i2c.c               | 51 ++++++++++++++++++++++++++
 drivers/iio/common/st_sensors/st_sensors_i2c.c | 30 +++++++++++++++
 drivers/iio/gyro/st_gyro_i2c.c                 | 39 ++++++++++++++++++++
 drivers/iio/magnetometer/st_magn_i2c.c         | 23 ++++++++++++
 drivers/iio/pressure/st_pressure_i2c.c         | 23 ++++++++++++
 include/linux/iio/common/st_sensors_i2c.h      | 11 ++++++
 6 files changed, 177 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/iio/accel/st_accel_i2c.c b/drivers/iio/accel/st_accel_i2c.c
index d7bedbdfc81d..7164aeff3ab1 100644
--- a/drivers/iio/accel/st_accel_i2c.c
+++ b/drivers/iio/accel/st_accel_i2c.c
@@ -18,6 +18,55 @@
 #include <linux/iio/common/st_sensors_i2c.h>
 #include "st_accel.h"
 
+#ifdef CONFIG_OF
+static const struct of_device_id st_accel_of_match[] = {
+	{
+		.compatible = "st,lsm303dlh-accel",
+		.data = LSM303DLH_ACCEL_DEV_NAME,
+	},
+	{
+		.compatible = "st,lsm303dlhc-accel",
+		.data = LSM303DLHC_ACCEL_DEV_NAME,
+	},
+	{
+		.compatible = "st,lis3dh-accel",
+		.data = LIS3DH_ACCEL_DEV_NAME,
+	},
+	{
+		.compatible = "st,lsm330d-accel",
+		.data = LSM330D_ACCEL_DEV_NAME,
+	},
+	{
+		.compatible = "st,lsm330dl-accel",
+		.data = LSM330DL_ACCEL_DEV_NAME,
+	},
+	{
+		.compatible = "st,lsm330dlc-accel",
+		.data = LSM330DLC_ACCEL_DEV_NAME,
+	},
+	{
+		.compatible = "st,lis331dlh-accel",
+		.data = LIS331DLH_ACCEL_DEV_NAME,
+	},
+	{
+		.compatible = "st,lsm303dl-accel",
+		.data = LSM303DL_ACCEL_DEV_NAME,
+	},
+	{
+		.compatible = "st,lsm303dlm-accel",
+		.data = LSM303DLM_ACCEL_DEV_NAME,
+	},
+	{
+		.compatible = "st,lsm330-accel",
+		.data = LSM330_ACCEL_DEV_NAME,
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, st_accel_of_match);
+#else
+#define st_accel_of_match NULL
+#endif
+
 static int st_accel_i2c_probe(struct i2c_client *client,
 						const struct i2c_device_id *id)
 {
@@ -31,6 +80,7 @@ static int st_accel_i2c_probe(struct i2c_client *client,
 
 	adata = iio_priv(indio_dev);
 	adata->dev = &client->dev;
+	st_sensors_of_i2c_probe(client, st_accel_of_match);
 
 	st_sensors_i2c_configure(indio_dev, client, adata);
 
@@ -67,6 +117,7 @@ static struct i2c_driver st_accel_driver = {
 	.driver = {
 		.owner = THIS_MODULE,
 		.name = "st-accel-i2c",
+		.of_match_table = of_match_ptr(st_accel_of_match),
 	},
 	.probe = st_accel_i2c_probe,
 	.remove = st_accel_i2c_remove,
diff --git a/drivers/iio/common/st_sensors/st_sensors_i2c.c b/drivers/iio/common/st_sensors/st_sensors_i2c.c
index 38af9440c103..bb6f3085f57b 100644
--- a/drivers/iio/common/st_sensors/st_sensors_i2c.c
+++ b/drivers/iio/common/st_sensors/st_sensors_i2c.c
@@ -12,6 +12,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/iio/iio.h>
+#include <linux/of_device.h>
 
 #include <linux/iio/common/st_sensors_i2c.h>
 
@@ -76,6 +77,35 @@ void st_sensors_i2c_configure(struct iio_dev *indio_dev,
 }
 EXPORT_SYMBOL(st_sensors_i2c_configure);
 
+#ifdef CONFIG_OF
+/**
+ * st_sensors_of_i2c_probe() - device tree probe for ST I2C sensors
+ * @client: the I2C client device for the sensor
+ * @match: the OF match table for the device, containing compatible strings
+ *	but also a .data field with the corresponding internal kernel name
+ *	used by this sensor.
+ *
+ * In effect this function matches a compatible string to an internal kernel
+ * name for a certain sensor device, so that the rest of the autodetection can
+ * rely on that name from this point on. I2C client devices will be renamed
+ * to match the internal kernel convention.
+ */
+void st_sensors_of_i2c_probe(struct i2c_client *client,
+			     const struct of_device_id *match)
+{
+	const struct of_device_id *of_id;
+
+	of_id = of_match_device(match, &client->dev);
+	if (!of_id)
+		return;
+
+	/* The name from the OF match takes precedence if present */
+	strncpy(client->name, of_id->data, sizeof(client->name));
+	client->name[sizeof(client->name) - 1] = '\0';
+}
+EXPORT_SYMBOL(st_sensors_of_i2c_probe);
+#endif
+
 MODULE_AUTHOR("Denis Ciocca <denis.ciocca@st.com>");
 MODULE_DESCRIPTION("STMicroelectronics ST-sensors i2c driver");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/iio/gyro/st_gyro_i2c.c b/drivers/iio/gyro/st_gyro_i2c.c
index 23c12f361b05..8fa0ad2ef4ef 100644
--- a/drivers/iio/gyro/st_gyro_i2c.c
+++ b/drivers/iio/gyro/st_gyro_i2c.c
@@ -18,6 +18,43 @@
 #include <linux/iio/common/st_sensors_i2c.h>
 #include "st_gyro.h"
 
+#ifdef CONFIG_OF
+static const struct of_device_id st_gyro_of_match[] = {
+	{
+		.compatible = "st,l3g4200d-gyro",
+		.data = L3G4200D_GYRO_DEV_NAME,
+	},
+	{
+		.compatible = "st,lsm330d-gyro",
+		.data = LSM330D_GYRO_DEV_NAME,
+	},
+	{
+		.compatible = "st,lsm330dl-gyro",
+		.data = LSM330DL_GYRO_DEV_NAME,
+	},
+	{
+		.compatible = "st,lsm330dlc-gyro",
+		.data = LSM330DLC_GYRO_DEV_NAME,
+	},
+	{
+		.compatible = "st,l3gd20-gyro",
+		.data = L3GD20_GYRO_DEV_NAME,
+	},
+	{
+		.compatible = "st,l3g4is-gyro",
+		.data = L3G4IS_GYRO_DEV_NAME,
+	},
+	{
+		.compatible = "st,lsm330-gyro",
+		.data = LSM330_GYRO_DEV_NAME,
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, st_gyro_of_match);
+#else
+#define st_gyro_of_match NULL
+#endif
+
 static int st_gyro_i2c_probe(struct i2c_client *client,
 						const struct i2c_device_id *id)
 {
@@ -31,6 +68,7 @@ static int st_gyro_i2c_probe(struct i2c_client *client,
 
 	gdata = iio_priv(indio_dev);
 	gdata->dev = &client->dev;
+	st_sensors_of_i2c_probe(client, st_gyro_of_match);
 
 	st_sensors_i2c_configure(indio_dev, client, gdata);
 
@@ -65,6 +103,7 @@ static struct i2c_driver st_gyro_driver = {
 	.driver = {
 		.owner = THIS_MODULE,
 		.name = "st-gyro-i2c",
+		.of_match_table = of_match_ptr(st_gyro_of_match),
 	},
 	.probe = st_gyro_i2c_probe,
 	.remove = st_gyro_i2c_remove,
diff --git a/drivers/iio/magnetometer/st_magn_i2c.c b/drivers/iio/magnetometer/st_magn_i2c.c
index 892e0feeb5c1..689250058442 100644
--- a/drivers/iio/magnetometer/st_magn_i2c.c
+++ b/drivers/iio/magnetometer/st_magn_i2c.c
@@ -18,6 +18,27 @@
 #include <linux/iio/common/st_sensors_i2c.h>
 #include "st_magn.h"
 
+#ifdef CONFIG_OF
+static const struct of_device_id st_magn_of_match[] = {
+	{
+		.compatible = "st,lsm303dlhc-magn",
+		.data = LSM303DLHC_MAGN_DEV_NAME,
+	},
+	{
+		.compatible = "st,lsm303dlm-magn",
+		.data = LSM303DLM_MAGN_DEV_NAME,
+	},
+	{
+		.compatible = "st,lis3mdl-magn",
+		.data = LIS3MDL_MAGN_DEV_NAME,
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, st_magn_of_match);
+#else
+#define st_magn_of_match NULL
+#endif
+
 static int st_magn_i2c_probe(struct i2c_client *client,
 						const struct i2c_device_id *id)
 {
@@ -31,6 +52,7 @@ static int st_magn_i2c_probe(struct i2c_client *client,
 
 	mdata = iio_priv(indio_dev);
 	mdata->dev = &client->dev;
+	st_sensors_of_i2c_probe(client, st_magn_of_match);
 
 	st_sensors_i2c_configure(indio_dev, client, mdata);
 
@@ -61,6 +83,7 @@ static struct i2c_driver st_magn_driver = {
 	.driver = {
 		.owner = THIS_MODULE,
 		.name = "st-magn-i2c",
+		.of_match_table = of_match_ptr(st_magn_of_match),
 	},
 	.probe = st_magn_i2c_probe,
 	.remove = st_magn_i2c_remove,
diff --git a/drivers/iio/pressure/st_pressure_i2c.c b/drivers/iio/pressure/st_pressure_i2c.c
index 3cd73e39b840..acaf165260bb 100644
--- a/drivers/iio/pressure/st_pressure_i2c.c
+++ b/drivers/iio/pressure/st_pressure_i2c.c
@@ -18,6 +18,27 @@
 #include <linux/iio/common/st_sensors_i2c.h>
 #include "st_pressure.h"
 
+#ifdef CONFIG_OF
+static const struct of_device_id st_press_of_match[] = {
+	{
+		.compatible = "st,lps001wp-press",
+		.data = LPS001WP_PRESS_DEV_NAME,
+	},
+	{
+		.compatible = "st,lps25h-press",
+		.data = LPS25H_PRESS_DEV_NAME,
+	},
+	{
+		.compatible = "st,lps331ap-press",
+		.data = LPS331AP_PRESS_DEV_NAME,
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, st_press_of_match);
+#else
+#define st_press_of_match NULL
+#endif
+
 static int st_press_i2c_probe(struct i2c_client *client,
 						const struct i2c_device_id *id)
 {
@@ -31,6 +52,7 @@ static int st_press_i2c_probe(struct i2c_client *client,
 
 	pdata = iio_priv(indio_dev);
 	pdata->dev = &client->dev;
+	st_sensors_of_i2c_probe(client, st_press_of_match);
 
 	st_sensors_i2c_configure(indio_dev, client, pdata);
 
@@ -60,6 +82,7 @@ static struct i2c_driver st_press_driver = {
 	.driver = {
 		.owner = THIS_MODULE,
 		.name = "st-press-i2c",
+		.of_match_table = of_match_ptr(st_press_of_match),
 	},
 	.probe = st_press_i2c_probe,
 	.remove = st_press_i2c_remove,
diff --git a/include/linux/iio/common/st_sensors_i2c.h b/include/linux/iio/common/st_sensors_i2c.h
index 67d845385ae2..1796af093368 100644
--- a/include/linux/iio/common/st_sensors_i2c.h
+++ b/include/linux/iio/common/st_sensors_i2c.h
@@ -13,8 +13,19 @@
 
 #include <linux/i2c.h>
 #include <linux/iio/common/st_sensors.h>
+#include <linux/of.h>
 
 void st_sensors_i2c_configure(struct iio_dev *indio_dev,
 		struct i2c_client *client, struct st_sensor_data *sdata);
 
+#ifdef CONFIG_OF
+void st_sensors_of_i2c_probe(struct i2c_client *client,
+			     const struct of_device_id *match);
+#else
+static inline void st_sensors_of_i2c_probe(struct i2c_client *client,
+					   const struct of_device_id *match)
+{
+}
+#endif
+
 #endif /* ST_SENSORS_I2C_H */
-- 
cgit v1.2.3-59-g8ed1b


From 77a533c73f032050be8b447828358a228a0a5736 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Thu, 7 Aug 2014 23:29:00 +0100
Subject: iio: core : events ABI for specifying period

The iio sysfs ABI defines a way to specify period for roc and thresholds.
What:		/sys/.../events/in_accel_x_thresh_rising_period
What:		/sys/.../events/in_accel_x_thresh_falling_period
what:		/sys/.../events/in_accel_x_roc_rising_period
What:		/sys/.../events/in_accel_x_roc_falling_period

But there is no way to add period with the current event info enum.

Added IIO_EV_INFO_PERIOD and corresponding string.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/industrialio-event.c | 1 +
 include/linux/iio/types.h        | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/iio/industrialio-event.c b/drivers/iio/industrialio-event.c
index 258a973a1fb8..35a5b0311dae 100644
--- a/drivers/iio/industrialio-event.c
+++ b/drivers/iio/industrialio-event.c
@@ -209,6 +209,7 @@ static const char * const iio_ev_info_text[] = {
 	[IIO_EV_INFO_ENABLE] = "en",
 	[IIO_EV_INFO_VALUE] = "value",
 	[IIO_EV_INFO_HYSTERESIS] = "hysteresis",
+	[IIO_EV_INFO_PERIOD] = "period",
 };
 
 static enum iio_event_direction iio_ev_attr_dir(struct iio_dev_attr *attr)
diff --git a/include/linux/iio/types.h b/include/linux/iio/types.h
index d480631eabc2..4a848d6be3bf 100644
--- a/include/linux/iio/types.h
+++ b/include/linux/iio/types.h
@@ -70,6 +70,7 @@ enum iio_event_info {
 	IIO_EV_INFO_ENABLE,
 	IIO_EV_INFO_VALUE,
 	IIO_EV_INFO_HYSTERESIS,
+	IIO_EV_INFO_PERIOD,
 };
 
 enum iio_event_direction {
-- 
cgit v1.2.3-59-g8ed1b


From 2c964a2f4191f2229566895f1a0e85f8339f5dd1 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Thu, 10 Jul 2014 21:01:22 +0200
Subject: drivers: tty: Merge alloc_tty_struct and initialize_tty_struct

The two functions alloc_tty_struct and initialize_tty_struct are
always called together. Merge them into alloc_tty_struct, updating its
prototype and the only two callers of these functions.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/pty.c    | 19 +++++++++----------
 drivers/tty/tty_io.c | 37 +++++++++++++------------------------
 include/linux/tty.h  |  4 +---
 3 files changed, 23 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c
index 25c9bc783722..ac723e3c031a 100644
--- a/drivers/tty/pty.c
+++ b/drivers/tty/pty.c
@@ -316,7 +316,7 @@ done:
  *	pty_common_install		-	set up the pty pair
  *	@driver: the pty driver
  *	@tty: the tty being instantiated
- *	@bool: legacy, true if this is BSD style
+ *	@legacy: true if this is BSD style
  *
  *	Perform the initial set up for the tty/pty pair. Called from the
  *	tty layer when the port is first opened.
@@ -331,18 +331,17 @@ static int pty_common_install(struct tty_driver *driver, struct tty_struct *tty,
 	int idx = tty->index;
 	int retval = -ENOMEM;
 
-	o_tty = alloc_tty_struct();
-	if (!o_tty)
-		goto err;
 	ports[0] = kmalloc(sizeof **ports, GFP_KERNEL);
 	ports[1] = kmalloc(sizeof **ports, GFP_KERNEL);
 	if (!ports[0] || !ports[1])
-		goto err_free_tty;
+		goto err;
 	if (!try_module_get(driver->other->owner)) {
 		/* This cannot in fact currently happen */
-		goto err_free_tty;
+		goto err;
 	}
-	initialize_tty_struct(o_tty, driver->other, idx);
+	o_tty = alloc_tty_struct(driver->other, idx);
+	if (!o_tty)
+		goto err_put_module;
 
 	if (legacy) {
 		/* We always use new tty termios data so we can do this
@@ -387,12 +386,12 @@ err_free_termios:
 		tty_free_termios(tty);
 err_deinit_tty:
 	deinitialize_tty_struct(o_tty);
+	free_tty_struct(o_tty);
+err_put_module:
 	module_put(o_tty->driver->owner);
-err_free_tty:
+err:
 	kfree(ports[0]);
 	kfree(ports[1]);
-	free_tty_struct(o_tty);
-err:
 	return retval;
 }
 
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 714320b5e525..8fbad3410c75 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -156,20 +156,6 @@ static void release_tty(struct tty_struct *tty, int idx);
 static void __proc_set_tty(struct task_struct *tsk, struct tty_struct *tty);
 static void proc_set_tty(struct task_struct *tsk, struct tty_struct *tty);
 
-/**
- *	alloc_tty_struct	-	allocate a tty object
- *
- *	Return a new empty tty structure. The data fields have not
- *	been initialized in any way but has been zeroed
- *
- *	Locking: none
- */
-
-struct tty_struct *alloc_tty_struct(void)
-{
-	return kzalloc(sizeof(struct tty_struct), GFP_KERNEL);
-}
-
 /**
  *	free_tty_struct		-	free a disused tty
  *	@tty: tty struct to free
@@ -1455,12 +1441,11 @@ struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx)
 	if (!try_module_get(driver->owner))
 		return ERR_PTR(-ENODEV);
 
-	tty = alloc_tty_struct();
+	tty = alloc_tty_struct(driver, idx);
 	if (!tty) {
 		retval = -ENOMEM;
 		goto err_module_put;
 	}
-	initialize_tty_struct(tty, driver, idx);
 
 	tty_lock(tty);
 	retval = tty_driver_install_tty(driver, tty);
@@ -3003,19 +2988,21 @@ static struct device *tty_get_device(struct tty_struct *tty)
 
 
 /**
- *	initialize_tty_struct
- *	@tty: tty to initialize
+ *	alloc_tty_struct
  *
- *	This subroutine initializes a tty structure that has been newly
- *	allocated.
+ *	This subroutine allocates and initializes a tty structure.
  *
- *	Locking: none - tty in question must not be exposed at this point
+ *	Locking: none - tty in question is not exposed at this point
  */
 
-void initialize_tty_struct(struct tty_struct *tty,
-		struct tty_driver *driver, int idx)
+struct tty_struct *alloc_tty_struct(struct tty_driver *driver, int idx)
 {
-	memset(tty, 0, sizeof(struct tty_struct));
+	struct tty_struct *tty;
+
+	tty = kzalloc(sizeof(*tty), GFP_KERNEL);
+	if (!tty)
+		return NULL;
+
 	kref_init(&tty->kref);
 	tty->magic = TTY_MAGIC;
 	tty_ldisc_init(tty);
@@ -3039,6 +3026,8 @@ void initialize_tty_struct(struct tty_struct *tty,
 	tty->index = idx;
 	tty_line_name(driver, idx, tty->name);
 	tty->dev = tty_get_device(tty);
+
+	return tty;
 }
 
 /**
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 1c3316a47d7e..84132942902a 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -477,13 +477,11 @@ extern int tty_mode_ioctl(struct tty_struct *tty, struct file *file,
 			unsigned int cmd, unsigned long arg);
 extern int tty_perform_flush(struct tty_struct *tty, unsigned long arg);
 extern void tty_default_fops(struct file_operations *fops);
-extern struct tty_struct *alloc_tty_struct(void);
+extern struct tty_struct *alloc_tty_struct(struct tty_driver *driver, int idx);
 extern int tty_alloc_file(struct file *file);
 extern void tty_add_file(struct tty_struct *tty, struct file *file);
 extern void tty_free_file(struct file *file);
 extern void free_tty_struct(struct tty_struct *tty);
-extern void initialize_tty_struct(struct tty_struct *tty,
-		struct tty_driver *driver, int idx);
 extern void deinitialize_tty_struct(struct tty_struct *tty);
 extern struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx);
 extern int tty_release(struct inode *inode, struct file *filp);
-- 
cgit v1.2.3-59-g8ed1b


From 726526c3552c5718d5aba11ac2e914b0081a5c88 Mon Sep 17 00:00:00 2001
From: Sudeep Dutt <sudeep.dutt@intel.com>
Date: Fri, 11 Jul 2014 14:04:20 -0700
Subject: misc: mic: add a bus driver for virtual MIC devices

This MIC virtual bus driver takes the responsibility of creating all
the virtual devices connected to the PCIe device on the host and the
platform device on the card. The MIC bus hardware operations provide
a way to abstract certain hardware details from the base physical devices.
Examples of devices added on the MIC virtual bus include host DMA and card DMA.
This abstraction enables using a common DMA driver on host and card.

Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Reviewed-by: Nikhil Rao <nikhil.rao@intel.com>
Signed-off-by: Sudeep Dutt <sudeep.dutt@intel.com>
Signed-off-by: Siva Yerramreddy <yshivakrishna@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/mic/Kconfig       |  17 ++++
 drivers/misc/mic/Makefile      |   1 +
 drivers/misc/mic/bus/Makefile  |   5 +
 drivers/misc/mic/bus/mic_bus.c | 218 +++++++++++++++++++++++++++++++++++++++++
 include/linux/mic_bus.h        | 110 +++++++++++++++++++++
 5 files changed, 351 insertions(+)
 create mode 100644 drivers/misc/mic/bus/Makefile
 create mode 100644 drivers/misc/mic/bus/mic_bus.c
 create mode 100644 include/linux/mic_bus.h

(limited to 'include/linux')

diff --git a/drivers/misc/mic/Kconfig b/drivers/misc/mic/Kconfig
index 462a5b1d8651..ee1d2ac3cd09 100644
--- a/drivers/misc/mic/Kconfig
+++ b/drivers/misc/mic/Kconfig
@@ -1,3 +1,20 @@
+comment "Intel MIC Bus Driver"
+
+config INTEL_MIC_BUS
+	tristate "Intel MIC Bus Driver"
+	depends on 64BIT && PCI && X86 && X86_DEV_DMA_OPS
+	help
+	  This option is selected by any driver which registers a
+	  device or driver on the MIC Bus, such as CONFIG_INTEL_MIC_HOST,
+	  CONFIG_INTEL_MIC_CARD, CONFIG_INTEL_MIC_X100_DMA etc.
+
+	  If you are building a host/card kernel with an Intel MIC device
+	  then say M (recommended) or Y, else say N. If unsure say N.
+
+	  More information about the Intel MIC family as well as the Linux
+	  OS and tools for MIC to use with this driver are available from
+	  <http://software.intel.com/en-us/mic-developer>.
+
 comment "Intel MIC Host Driver"
 
 config INTEL_MIC_HOST
diff --git a/drivers/misc/mic/Makefile b/drivers/misc/mic/Makefile
index 05b34d683a58..e9bf148755e2 100644
--- a/drivers/misc/mic/Makefile
+++ b/drivers/misc/mic/Makefile
@@ -4,3 +4,4 @@
 #
 obj-$(CONFIG_INTEL_MIC_HOST) += host/
 obj-$(CONFIG_INTEL_MIC_CARD) += card/
+obj-$(CONFIG_INTEL_MIC_BUS) += bus/
diff --git a/drivers/misc/mic/bus/Makefile b/drivers/misc/mic/bus/Makefile
new file mode 100644
index 000000000000..d85c7f2a0af4
--- /dev/null
+++ b/drivers/misc/mic/bus/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile - Intel MIC Linux driver.
+# Copyright(c) 2014, Intel Corporation.
+#
+obj-$(CONFIG_INTEL_MIC_BUS) += mic_bus.o
diff --git a/drivers/misc/mic/bus/mic_bus.c b/drivers/misc/mic/bus/mic_bus.c
new file mode 100644
index 000000000000..961ae90aae47
--- /dev/null
+++ b/drivers/misc/mic/bus/mic_bus.c
@@ -0,0 +1,218 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Bus driver.
+ *
+ * This implementation is very similar to the the virtio bus driver
+ * implementation @ drivers/virtio/virtio.c
+ */
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/idr.h>
+#include <linux/mic_bus.h>
+
+/* Unique numbering for mbus devices. */
+static DEFINE_IDA(mbus_index_ida);
+
+static ssize_t device_show(struct device *d,
+			   struct device_attribute *attr, char *buf)
+{
+	struct mbus_device *dev = dev_to_mbus(d);
+	return sprintf(buf, "0x%04x\n", dev->id.device);
+}
+static DEVICE_ATTR_RO(device);
+
+static ssize_t vendor_show(struct device *d,
+			   struct device_attribute *attr, char *buf)
+{
+	struct mbus_device *dev = dev_to_mbus(d);
+	return sprintf(buf, "0x%04x\n", dev->id.vendor);
+}
+static DEVICE_ATTR_RO(vendor);
+
+static ssize_t modalias_show(struct device *d,
+			     struct device_attribute *attr, char *buf)
+{
+	struct mbus_device *dev = dev_to_mbus(d);
+	return sprintf(buf, "mbus:d%08Xv%08X\n",
+		       dev->id.device, dev->id.vendor);
+}
+static DEVICE_ATTR_RO(modalias);
+
+static struct attribute *mbus_dev_attrs[] = {
+	&dev_attr_device.attr,
+	&dev_attr_vendor.attr,
+	&dev_attr_modalias.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(mbus_dev);
+
+static inline int mbus_id_match(const struct mbus_device *dev,
+				const struct mbus_device_id *id)
+{
+	if (id->device != dev->id.device && id->device != MBUS_DEV_ANY_ID)
+		return 0;
+
+	return id->vendor == MBUS_DEV_ANY_ID || id->vendor == dev->id.vendor;
+}
+
+/*
+ * This looks through all the IDs a driver claims to support.  If any of them
+ * match, we return 1 and the kernel will call mbus_dev_probe().
+ */
+static int mbus_dev_match(struct device *dv, struct device_driver *dr)
+{
+	unsigned int i;
+	struct mbus_device *dev = dev_to_mbus(dv);
+	const struct mbus_device_id *ids;
+
+	ids = drv_to_mbus(dr)->id_table;
+	for (i = 0; ids[i].device; i++)
+		if (mbus_id_match(dev, &ids[i]))
+			return 1;
+	return 0;
+}
+
+static int mbus_uevent(struct device *dv, struct kobj_uevent_env *env)
+{
+	struct mbus_device *dev = dev_to_mbus(dv);
+
+	return add_uevent_var(env, "MODALIAS=mbus:d%08Xv%08X",
+			      dev->id.device, dev->id.vendor);
+}
+
+static int mbus_dev_probe(struct device *d)
+{
+	int err;
+	struct mbus_device *dev = dev_to_mbus(d);
+	struct mbus_driver *drv = drv_to_mbus(dev->dev.driver);
+
+	err = drv->probe(dev);
+	if (!err)
+		if (drv->scan)
+			drv->scan(dev);
+	return err;
+}
+
+static int mbus_dev_remove(struct device *d)
+{
+	struct mbus_device *dev = dev_to_mbus(d);
+	struct mbus_driver *drv = drv_to_mbus(dev->dev.driver);
+
+	drv->remove(dev);
+	return 0;
+}
+
+static struct bus_type mic_bus = {
+	.name  = "mic_bus",
+	.match = mbus_dev_match,
+	.dev_groups = mbus_dev_groups,
+	.uevent = mbus_uevent,
+	.probe = mbus_dev_probe,
+	.remove = mbus_dev_remove,
+};
+
+int mbus_register_driver(struct mbus_driver *driver)
+{
+	driver->driver.bus = &mic_bus;
+	return driver_register(&driver->driver);
+}
+EXPORT_SYMBOL_GPL(mbus_register_driver);
+
+void mbus_unregister_driver(struct mbus_driver *driver)
+{
+	driver_unregister(&driver->driver);
+}
+EXPORT_SYMBOL_GPL(mbus_unregister_driver);
+
+static void mbus_release_dev(struct device *d)
+{
+	struct mbus_device *mbdev = dev_to_mbus(d);
+	kfree(mbdev);
+}
+
+struct mbus_device *
+mbus_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops,
+		     struct mbus_hw_ops *hw_ops, void __iomem *mmio_va)
+{
+	int ret;
+	struct mbus_device *mbdev;
+
+	mbdev = kzalloc(sizeof(*mbdev), GFP_KERNEL);
+	if (!mbdev)
+		return ERR_PTR(-ENOMEM);
+
+	mbdev->mmio_va = mmio_va;
+	mbdev->dev.parent = pdev;
+	mbdev->id.device = id;
+	mbdev->id.vendor = MBUS_DEV_ANY_ID;
+	mbdev->dev.archdata.dma_ops = dma_ops;
+	mbdev->dev.dma_mask = &mbdev->dev.coherent_dma_mask;
+	dma_set_mask(&mbdev->dev, DMA_BIT_MASK(64));
+	mbdev->dev.release = mbus_release_dev;
+	mbdev->hw_ops = hw_ops;
+	mbdev->dev.bus = &mic_bus;
+
+	/* Assign a unique device index and hence name. */
+	ret = ida_simple_get(&mbus_index_ida, 0, 0, GFP_KERNEL);
+	if (ret < 0)
+		goto free_mbdev;
+
+	mbdev->index = ret;
+	dev_set_name(&mbdev->dev, "mbus-dev%u", mbdev->index);
+	/*
+	 * device_register() causes the bus infrastructure to look for a
+	 * matching driver.
+	 */
+	ret = device_register(&mbdev->dev);
+	if (ret)
+		goto ida_remove;
+	return mbdev;
+ida_remove:
+	ida_simple_remove(&mbus_index_ida, mbdev->index);
+free_mbdev:
+	kfree(mbdev);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(mbus_register_device);
+
+void mbus_unregister_device(struct mbus_device *mbdev)
+{
+	int index = mbdev->index; /* save for after device release */
+
+	device_unregister(&mbdev->dev);
+	ida_simple_remove(&mbus_index_ida, index);
+}
+EXPORT_SYMBOL_GPL(mbus_unregister_device);
+
+static int __init mbus_init(void)
+{
+	return bus_register(&mic_bus);
+}
+
+static void __exit mbus_exit(void)
+{
+	bus_unregister(&mic_bus);
+	ida_destroy(&mbus_index_ida);
+}
+
+core_initcall(mbus_init);
+module_exit(mbus_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) MIC Bus driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/mic_bus.h b/include/linux/mic_bus.h
new file mode 100644
index 000000000000..d5b5f76d57ef
--- /dev/null
+++ b/include/linux/mic_bus.h
@@ -0,0 +1,110 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Bus driver.
+ *
+ * This implementation is very similar to the the virtio bus driver
+ * implementation @ include/linux/virtio.h.
+ */
+#ifndef _MIC_BUS_H_
+#define _MIC_BUS_H_
+/*
+ * Everything a mbus driver needs to work with any particular mbus
+ * implementation.
+ */
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+
+struct mbus_device_id {
+	__u32 device;
+	__u32 vendor;
+};
+
+#define MBUS_DEV_DMA_HOST 2
+#define MBUS_DEV_DMA_MIC 3
+#define MBUS_DEV_ANY_ID 0xffffffff
+
+/**
+ * mbus_device - representation of a device using mbus
+ * @mmio_va: virtual address of mmio space
+ * @hw_ops: the hardware ops supported by this device.
+ * @id: the device type identification (used to match it with a driver).
+ * @dev: underlying device.
+ * be used to communicate with.
+ * @index: unique position on the mbus bus
+ */
+struct mbus_device {
+	void __iomem *mmio_va;
+	struct mbus_hw_ops *hw_ops;
+	struct mbus_device_id id;
+	struct device dev;
+	int index;
+};
+
+/**
+ * mbus_driver - operations for a mbus I/O driver
+ * @driver: underlying device driver (populate name and owner).
+ * @id_table: the ids serviced by this driver.
+ * @probe: the function to call when a device is found.  Returns 0 or -errno.
+ * @remove: the function to call when a device is removed.
+ */
+struct mbus_driver {
+	struct device_driver driver;
+	const struct mbus_device_id *id_table;
+	int (*probe)(struct mbus_device *dev);
+	void (*scan)(struct mbus_device *dev);
+	void (*remove)(struct mbus_device *dev);
+};
+
+/**
+ * struct mic_irq - opaque pointer used as cookie
+ */
+struct mic_irq;
+
+/**
+ * mbus_hw_ops - Hardware operations for accessing a MIC device on the MIC bus.
+ */
+struct mbus_hw_ops {
+	struct mic_irq* (*request_threaded_irq)(struct mbus_device *mbdev,
+						irq_handler_t handler,
+						irq_handler_t thread_fn,
+						const char *name, void *data,
+						int intr_src);
+	void (*free_irq)(struct mbus_device *mbdev,
+			 struct mic_irq *cookie, void *data);
+	void (*ack_interrupt)(struct mbus_device *mbdev, int num);
+};
+
+struct mbus_device *
+mbus_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops,
+		     struct mbus_hw_ops *hw_ops, void __iomem *mmio_va);
+void mbus_unregister_device(struct mbus_device *mbdev);
+
+int mbus_register_driver(struct mbus_driver *drv);
+void mbus_unregister_driver(struct mbus_driver *drv);
+
+static inline struct mbus_device *dev_to_mbus(struct device *_dev)
+{
+	return container_of(_dev, struct mbus_device, dev);
+}
+
+static inline struct mbus_driver *drv_to_mbus(struct device_driver *drv)
+{
+	return container_of(drv, struct mbus_driver, driver);
+}
+
+#endif /* _MIC_BUS_H */
-- 
cgit v1.2.3-59-g8ed1b


From c14deddec1fbd8c9757c53a49dbfd2dc83265f21 Mon Sep 17 00:00:00 2001
From: "grmoore@altera.com" <grmoore@altera.com>
Date: Tue, 29 Apr 2014 10:29:51 -0500
Subject: mtd: spi-nor: add support for flag status register on Micron chips

Some new Micron flash chips require reading the flag status register to
determine when operations have completed.

Furthermore, chips with multi-die stacks of the 65nm 256Mb QSPI also
require reading the status register before reading the flag status
register.

This patch adds support for the flag status register in the n25q512ax3
and n25q00 Micron QSPI flash chips.

Signed-off-by: Graham Moore <grmoore@altera.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/spi-nor/spi-nor.c | 52 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/mtd/spi-nor.h   |  4 ++++
 2 files changed, 56 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index c713c8656710..7da3a7067c35 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -47,6 +47,25 @@ static int read_sr(struct spi_nor *nor)
 	return val;
 }
 
+/*
+ * Read the flag status register, returning its value in the location
+ * Return the status register value.
+ * Returns negative if error occurred.
+ */
+static int read_fsr(struct spi_nor *nor)
+{
+	int ret;
+	u8 val;
+
+	ret = nor->read_reg(nor, SPINOR_OP_RDFSR, &val, 1);
+	if (ret < 0) {
+		pr_err("error %d reading FSR\n", ret);
+		return ret;
+	}
+
+	return val;
+}
+
 /*
  * Read configuration register, returning its value in the
  * location. Return the configuration register value.
@@ -165,6 +184,32 @@ static int spi_nor_wait_till_ready(struct spi_nor *nor)
 	return -ETIMEDOUT;
 }
 
+static int spi_nor_wait_till_fsr_ready(struct spi_nor *nor)
+{
+	unsigned long deadline;
+	int sr;
+	int fsr;
+
+	deadline = jiffies + MAX_READY_WAIT_JIFFIES;
+
+	do {
+		cond_resched();
+
+		sr = read_sr(nor);
+		if (sr < 0) {
+			break;
+		} else if (!(sr & SR_WIP)) {
+			fsr = read_fsr(nor);
+			if (fsr < 0)
+				break;
+			if (fsr & FSR_READY)
+				return 0;
+		}
+	} while (!time_after_eq(jiffies, deadline));
+
+	return -ETIMEDOUT;
+}
+
 /*
  * Service routine to read status register until ready, or timeout occurs.
  * Returns non-zero if error.
@@ -402,6 +447,7 @@ struct flash_info {
 #define	SECT_4K_PMC		0x10	/* SPINOR_OP_BE_4K_PMC works uniformly */
 #define	SPI_NOR_DUAL_READ	0x20    /* Flash supports Dual Read */
 #define	SPI_NOR_QUAD_READ	0x40    /* Flash supports Quad Read */
+#define	USE_FSR			0x80	/* use flag status register */
 };
 
 #define INFO(_jedec_id, _ext_id, _sector_size, _n_sectors, _flags)	\
@@ -488,6 +534,8 @@ const struct spi_device_id spi_nor_ids[] = {
 	{ "n25q128a13",  INFO(0x20ba18, 0, 64 * 1024,  256, 0) },
 	{ "n25q256a",    INFO(0x20ba19, 0, 64 * 1024,  512, SECT_4K) },
 	{ "n25q512a",    INFO(0x20bb20, 0, 64 * 1024, 1024, SECT_4K) },
+	{ "n25q512ax3",  INFO(0x20ba20, 0, 64 * 1024, 1024, USE_FSR) },
+	{ "n25q00",      INFO(0x20ba21, 0, 64 * 1024, 2048, USE_FSR) },
 
 	/* PMC */
 	{ "pm25lv512",   INFO(0,        0, 32 * 1024,    2, SECT_4K_PMC) },
@@ -965,6 +1013,10 @@ int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id,
 	else
 		mtd->_write = spi_nor_write;
 
+	if ((info->flags & USE_FSR) &&
+	    nor->wait_till_ready == spi_nor_wait_till_ready)
+		nor->wait_till_ready = spi_nor_wait_till_fsr_ready;
+
 	/* prefer "small sector" erase if possible */
 	if (info->flags & SECT_4K) {
 		nor->erase_opcode = SPINOR_OP_BE_4K;
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 53241842a7ab..9e6294f32ba8 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -34,6 +34,7 @@
 #define SPINOR_OP_SE		0xd8	/* Sector erase (usually 64KiB) */
 #define SPINOR_OP_RDID		0x9f	/* Read JEDEC ID */
 #define SPINOR_OP_RDCR		0x35	/* Read configuration register */
+#define SPINOR_OP_RDFSR		0x70	/* Read flag status register */
 
 /* 4-byte address opcodes - used on Spansion and some Macronix flashes. */
 #define SPINOR_OP_READ4		0x13	/* Read data bytes (low frequency) */
@@ -66,6 +67,9 @@
 
 #define SR_QUAD_EN_MX		0x40	/* Macronix Quad I/O */
 
+/* Flag Status Register bits */
+#define FSR_READY		0x80
+
 /* Configuration Register bits. */
 #define CR_QUAD_EN_SPAN		0x2	/* Spansion Quad I/O */
 
-- 
cgit v1.2.3-59-g8ed1b


From 2004c726b9d9a9670b7f837190be9c8dfa7a0e9d Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@poochiereds.net>
Date: Sat, 21 Jun 2014 20:52:15 -0400
Subject: auth_gss: fetch the acceptor name out of the downcall

If rpc.gssd sends us an acceptor name string trailing the context token,
stash it as part of the context.

Signed-off-by: Jeff Layton <jlayton@poochiereds.net>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/sunrpc/auth_gss.h |  1 +
 net/sunrpc/auth_gss/auth_gss.c  | 20 +++++++++++++++++---
 2 files changed, 18 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h
index f1cfd4c85cd0..cbc6875fb9cf 100644
--- a/include/linux/sunrpc/auth_gss.h
+++ b/include/linux/sunrpc/auth_gss.h
@@ -71,6 +71,7 @@ struct gss_cl_ctx {
 	spinlock_t		gc_seq_lock;
 	struct gss_ctx __rcu	*gc_gss_ctx;
 	struct xdr_netobj	gc_wire_ctx;
+	struct xdr_netobj	gc_acceptor;
 	u32			gc_win;
 	unsigned long		gc_expiry;
 	struct rcu_head		gc_rcu;
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index b6e440baccc3..e34af68603bd 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -262,9 +262,22 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct
 		p = ERR_PTR(ret);
 		goto err;
 	}
-	dprintk("RPC:       %s Success. gc_expiry %lu now %lu timeout %u\n",
-		__func__, ctx->gc_expiry, now, timeout);
-	return q;
+
+	/* is there any trailing data? */
+	if (q == end) {
+		p = q;
+		goto done;
+	}
+
+	/* pull in acceptor name (if there is one) */
+	p = simple_get_netobj(q, end, &ctx->gc_acceptor);
+	if (IS_ERR(p))
+		goto err;
+done:
+	dprintk("RPC:       %s Success. gc_expiry %lu now %lu timeout %u acceptor %.*s\n",
+		__func__, ctx->gc_expiry, now, timeout, ctx->gc_acceptor.len,
+		ctx->gc_acceptor.data);
+	return p;
 err:
 	dprintk("RPC:       %s returns error %ld\n", __func__, -PTR_ERR(p));
 	return p;
@@ -1225,6 +1238,7 @@ gss_do_free_ctx(struct gss_cl_ctx *ctx)
 
 	gss_delete_sec_context(&ctx->gc_gss_ctx);
 	kfree(ctx->gc_wire_ctx.data);
+	kfree(ctx->gc_acceptor.data);
 	kfree(ctx);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From a0337d1ddb5a4bd609e3ff0955551cb240340340 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@poochiereds.net>
Date: Sat, 21 Jun 2014 20:52:16 -0400
Subject: sunrpc: add a new "stringify_acceptor" rpc_credop

...and add an new rpc_auth function to call it when it exists. This
is only applicable for AUTH_GSS mechanisms, so we only specify this
for those sorts of credentials.

Signed-off-by: Jeff Layton <jlayton@poochiereds.net>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/sunrpc/auth.h    |  2 ++
 net/sunrpc/auth.c              |  9 ++++++
 net/sunrpc/auth_gss/auth_gss.c | 62 ++++++++++++++++++++++++++++--------------
 3 files changed, 53 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 790be1472792..c683b9a06913 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -140,6 +140,7 @@ struct rpc_credops {
 						void *, __be32 *, void *);
 	int			(*crkey_timeout)(struct rpc_cred *);
 	bool			(*crkey_to_expire)(struct rpc_cred *);
+	char *			(*crstringify_acceptor)(struct rpc_cred *);
 };
 
 extern const struct rpc_authops	authunix_ops;
@@ -182,6 +183,7 @@ void			rpcauth_clear_credcache(struct rpc_cred_cache *);
 int			rpcauth_key_timeout_notify(struct rpc_auth *,
 						struct rpc_cred *);
 bool			rpcauth_cred_key_to_expire(struct rpc_cred *);
+char *			rpcauth_stringify_acceptor(struct rpc_cred *);
 
 static inline
 struct rpc_cred *	get_rpccred(struct rpc_cred *cred)
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index f77366717420..1481efff6aa2 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -363,6 +363,15 @@ rpcauth_cred_key_to_expire(struct rpc_cred *cred)
 }
 EXPORT_SYMBOL_GPL(rpcauth_cred_key_to_expire);
 
+char *
+rpcauth_stringify_acceptor(struct rpc_cred *cred)
+{
+	if (!cred->cr_ops->crstringify_acceptor)
+		return NULL;
+	return cred->cr_ops->crstringify_acceptor(cred);
+}
+EXPORT_SYMBOL_GPL(rpcauth_stringify_acceptor);
+
 /*
  * Destroy a list of credentials
  */
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index e34af68603bd..73854314fb85 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1346,6 +1346,26 @@ gss_cred_init(struct rpc_auth *auth, struct rpc_cred *cred)
 	return err;
 }
 
+static char *
+gss_stringify_acceptor(struct rpc_cred *cred)
+{
+	char *string;
+	struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base);
+	struct xdr_netobj *acceptor = &gss_cred->gc_ctx->gc_acceptor;
+
+	/* no point if there's no string */
+	if (!acceptor->len)
+		return NULL;
+
+	string = kmalloc(acceptor->len + 1, GFP_KERNEL);
+	if (!string)
+		return string;
+
+	memcpy(string, acceptor->data, acceptor->len);
+	string[acceptor->len] = '\0';
+	return string;
+}
+
 /*
  * Returns -EACCES if GSS context is NULL or will expire within the
  * timeout (miliseconds)
@@ -1923,29 +1943,31 @@ static const struct rpc_authops authgss_ops = {
 };
 
 static const struct rpc_credops gss_credops = {
-	.cr_name	= "AUTH_GSS",
-	.crdestroy	= gss_destroy_cred,
-	.cr_init	= gss_cred_init,
-	.crbind		= rpcauth_generic_bind_cred,
-	.crmatch	= gss_match,
-	.crmarshal	= gss_marshal,
-	.crrefresh	= gss_refresh,
-	.crvalidate	= gss_validate,
-	.crwrap_req	= gss_wrap_req,
-	.crunwrap_resp	= gss_unwrap_resp,
-	.crkey_timeout	= gss_key_timeout,
+	.cr_name		= "AUTH_GSS",
+	.crdestroy		= gss_destroy_cred,
+	.cr_init		= gss_cred_init,
+	.crbind			= rpcauth_generic_bind_cred,
+	.crmatch		= gss_match,
+	.crmarshal		= gss_marshal,
+	.crrefresh		= gss_refresh,
+	.crvalidate		= gss_validate,
+	.crwrap_req		= gss_wrap_req,
+	.crunwrap_resp		= gss_unwrap_resp,
+	.crkey_timeout		= gss_key_timeout,
+	.crstringify_acceptor	= gss_stringify_acceptor,
 };
 
 static const struct rpc_credops gss_nullops = {
-	.cr_name	= "AUTH_GSS",
-	.crdestroy	= gss_destroy_nullcred,
-	.crbind		= rpcauth_generic_bind_cred,
-	.crmatch	= gss_match,
-	.crmarshal	= gss_marshal,
-	.crrefresh	= gss_refresh_null,
-	.crvalidate	= gss_validate,
-	.crwrap_req	= gss_wrap_req,
-	.crunwrap_resp	= gss_unwrap_resp,
+	.cr_name		= "AUTH_GSS",
+	.crdestroy		= gss_destroy_nullcred,
+	.crbind			= rpcauth_generic_bind_cred,
+	.crmatch		= gss_match,
+	.crmarshal		= gss_marshal,
+	.crrefresh		= gss_refresh_null,
+	.crvalidate		= gss_validate,
+	.crwrap_req		= gss_wrap_req,
+	.crunwrap_resp		= gss_unwrap_resp,
+	.crstringify_acceptor	= gss_stringify_acceptor,
 };
 
 static const struct rpc_pipe_ops gss_upcall_ops_v0 = {
-- 
cgit v1.2.3-59-g8ed1b


From f11b2a1cfbf5dd783eb55cb470509d06e20d1c78 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@poochiereds.net>
Date: Sat, 21 Jun 2014 20:52:17 -0400
Subject: nfs4: copy acceptor name from context to nfs_client

The current CB_COMPOUND handling code tries to compare the principal
name of the request with the cl_hostname in the client. This is not
guaranteed to ever work, particularly if the client happened to mount
a CNAME of the server or a non-fqdn.

Fix this by instead comparing the cr_principal string with the acceptor
name that we get from gssd. In the event that gssd didn't send one
down (i.e. it was too old), then we fall back to trying to use the
cl_hostname as we do today.

Signed-off-by: Jeff Layton <jlayton@poochiereds.net>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/callback.c         | 12 ++++++++++++
 fs/nfs/client.c           |  1 +
 fs/nfs/nfs4proc.c         | 33 ++++++++++++++++++++++++++++++++-
 include/linux/nfs_fs_sb.h |  1 +
 include/linux/nfs_xdr.h   |  1 +
 5 files changed, 47 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 073b4cf67ed9..54de482143cc 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -428,6 +428,18 @@ check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp)
 	if (p == NULL)
 		return 0;
 
+	/*
+	 * Did we get the acceptor from userland during the SETCLIENID
+	 * negotiation?
+	 */
+	if (clp->cl_acceptor)
+		return !strcmp(p, clp->cl_acceptor);
+
+	/*
+	 * Otherwise try to verify it using the cl_hostname. Note that this
+	 * doesn't work if a non-canonical hostname was used in the devname.
+	 */
+
 	/* Expect a GSS_C_NT_HOSTBASED_NAME like "nfs@serverhostname" */
 
 	if (memcmp(p, "nfs@", 4) != 0)
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index b213ee8fb012..168aa0df2658 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -252,6 +252,7 @@ void nfs_free_client(struct nfs_client *clp)
 	put_net(clp->cl_net);
 	put_nfs_version(clp->cl_nfs_mod);
 	kfree(clp->cl_hostname);
+	kfree(clp->cl_acceptor);
 	kfree(clp);
 
 	dprintk("<-- nfs_free_client()\n");
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 0b8490eab486..b7babb3b8a4d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4936,6 +4936,18 @@ nfs4_init_callback_netid(const struct nfs_client *clp, char *buf, size_t len)
 		return scnprintf(buf, len, "tcp");
 }
 
+static void nfs4_setclientid_done(struct rpc_task *task, void *calldata)
+{
+	struct nfs4_setclientid *sc = calldata;
+
+	if (task->tk_status == 0)
+		sc->sc_cred = get_rpccred(task->tk_rqstp->rq_cred);
+}
+
+static const struct rpc_call_ops nfs4_setclientid_ops = {
+	.rpc_call_done = nfs4_setclientid_done,
+};
+
 /**
  * nfs4_proc_setclientid - Negotiate client ID
  * @clp: state data structure
@@ -4962,6 +4974,14 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
 		.rpc_resp = res,
 		.rpc_cred = cred,
 	};
+	struct rpc_task *task;
+	struct rpc_task_setup task_setup_data = {
+		.rpc_client = clp->cl_rpcclient,
+		.rpc_message = &msg,
+		.callback_ops = &nfs4_setclientid_ops,
+		.callback_data = &setclientid,
+		.flags = RPC_TASK_TIMEOUT,
+	};
 	int status;
 
 	/* nfs_client_id4 */
@@ -4988,7 +5008,18 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
 	dprintk("NFS call  setclientid auth=%s, '%.*s'\n",
 		clp->cl_rpcclient->cl_auth->au_ops->au_name,
 		setclientid.sc_name_len, setclientid.sc_name);
-	status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
+	task = rpc_run_task(&task_setup_data);
+	if (IS_ERR(task)) {
+		status = PTR_ERR(task);
+		goto out;
+	}
+	status = task->tk_status;
+	if (setclientid.sc_cred) {
+		clp->cl_acceptor = rpcauth_stringify_acceptor(setclientid.sc_cred);
+		put_rpccred(setclientid.sc_cred);
+	}
+	rpc_put_task(task);
+out:
 	trace_nfs4_setclientid(clp, status);
 	dprintk("NFS reply setclientid: %d\n", status);
 	return status;
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 1150ea41b626..922be2e050f5 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -45,6 +45,7 @@ struct nfs_client {
 	struct sockaddr_storage	cl_addr;	/* server identifier */
 	size_t			cl_addrlen;
 	char *			cl_hostname;	/* hostname of server */
+	char *			cl_acceptor;	/* GSSAPI acceptor name */
 	struct list_head	cl_share_link;	/* link in global client list */
 	struct list_head	cl_superblocks;	/* List of nfs_server structs */
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 81cbbf313272..0040629894df 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -993,6 +993,7 @@ struct nfs4_setclientid {
 	unsigned int			sc_uaddr_len;
 	char				sc_uaddr[RPCBIND_MAXUADDRLEN + 1];
 	u32				sc_cb_ident;
+	struct rpc_cred			*sc_cred;
 };
 
 struct nfs4_setclientid_res {
-- 
cgit v1.2.3-59-g8ed1b


From 17fa388ddceb89e9673d83d82cf28ef79e8225d5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 13 Jul 2014 17:00:37 +0200
Subject: locks: typedef fl_owner_t to void *

fl_owner_t is a cookie that can store all kinds of different pointers,
so don't pretends it points to a file structure.

For now just change the typedef, but as a follow on this will allow
to get rids of lots of casts and eventually the typedef itself.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jeff Layton <jlayton@primarydata.com>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index e11d60cc867b..2daccaf4b547 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -833,7 +833,7 @@ static inline struct file *get_file(struct file *f)
  *
  * Lockd stuffs a "host" pointer into this.
  */
-typedef struct files_struct *fl_owner_t;
+typedef void *fl_owner_t;
 
 struct file_lock_operations {
 	void (*fl_copy_lock)(struct file_lock *, struct file_lock *);
-- 
cgit v1.2.3-59-g8ed1b


From ec31a05c4dfa95149b1754d9de92831a5a95c636 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 12 Jul 2014 15:49:16 +0200
Subject: net: filter: sk_chk_filter() no longer mangles filter

Add const attribute to filter argument to make clear it is no
longer modified.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h | 2 +-
 net/core/filter.c      | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index b885dcb7eaca..c43c8258e682 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -362,7 +362,7 @@ void sk_unattached_filter_destroy(struct sk_filter *fp);
 int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
 int sk_detach_filter(struct sock *sk);
 
-int sk_chk_filter(struct sock_filter *filter, unsigned int flen);
+int sk_chk_filter(const struct sock_filter *filter, unsigned int flen);
 int sk_get_filter(struct sock *sk, struct sock_filter __user *filter,
 		  unsigned int len);
 
diff --git a/net/core/filter.c b/net/core/filter.c
index 87af1e3e56c0..b90ae7fb3b89 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1085,7 +1085,7 @@ err:
  * a cell if not previously written, and we check all branches to be sure
  * a malicious user doesn't try to abuse us.
  */
-static int check_load_and_stores(struct sock_filter *filter, int flen)
+static int check_load_and_stores(const struct sock_filter *filter, int flen)
 {
 	u16 *masks, memvalid = 0; /* One bit per cell, 16 cells */
 	int pc, ret = 0;
@@ -1218,7 +1218,7 @@ static bool chk_code_allowed(u16 code_to_probe)
  *
  * Returns 0 if the rule set is legal or -EINVAL if not.
  */
-int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
+int sk_chk_filter(const struct sock_filter *filter, unsigned int flen)
 {
 	bool anc_found;
 	int pc;
@@ -1228,7 +1228,7 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
 
 	/* Check the filter code now */
 	for (pc = 0; pc < flen; pc++) {
-		struct sock_filter *ftest = &filter[pc];
+		const struct sock_filter *ftest = &filter[pc];
 
 		/* May we actually operate on this code? */
 		if (!chk_code_allowed(ftest->code))
-- 
cgit v1.2.3-59-g8ed1b


From d788cbd3f9065d829351746f94417d469f14eaaf Mon Sep 17 00:00:00 2001
From: Kukjin Kim <kgene.kim@samsung.com>
Date: Tue, 1 Jul 2014 06:32:27 +0900
Subject: ASoC: samsung: remove s5pc100 related codes

This patch removes s5pc100 related codes in
<linux/platform_data/asoc-s3c.h>.

Signed-off-by: Kukjin Kim <kgene.kim@samsung.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 include/linux/platform_data/asoc-s3c.h | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/asoc-s3c.h b/include/linux/platform_data/asoc-s3c.h
index 709c6f7e2f8c..a6591c693ebb 100644
--- a/include/linux/platform_data/asoc-s3c.h
+++ b/include/linux/platform_data/asoc-s3c.h
@@ -15,15 +15,6 @@
 #define S3C64XX_AC97_GPE  1
 extern void s3c64xx_ac97_setup_gpio(int);
 
-/*
- * The machine init code calls s5p*_spdif_setup_gpio with
- * one of these defines in order to select appropriate bank
- * of GPIO for S/PDIF pins
- */
-#define S5PC100_SPDIF_GPD  0
-#define S5PC100_SPDIF_GPG3 1
-extern void s5pc100_spdif_setup_gpio(int);
-
 struct samsung_i2s {
 /* If the Primary DAI has 5.1 Channels */
 #define QUIRK_PRI_6CHAN		(1 << 0)
-- 
cgit v1.2.3-59-g8ed1b


From 1028a37daa148cc6cf85aa2aecb4390ddf1e1e56 Mon Sep 17 00:00:00 2001
From: James Ban <james.ban.opensource@diasemi.com>
Date: Mon, 14 Jul 2014 13:48:45 +0900
Subject: regulator: da9211: new regulator driver

This is the driver for the Dialog DA9211 Multi-phase 12A DC-DC Buck
Converter regulator. It communicates via an I2C bus to the device.

Signed-off-by: James Ban <james.ban.opensource@diasemi.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/regulator/Kconfig            |  10 +
 drivers/regulator/Makefile           |   1 +
 drivers/regulator/da9211-regulator.c | 388 +++++++++++++++++++++++++++++++++++
 drivers/regulator/da9211-regulator.h | 271 ++++++++++++++++++++++++
 include/linux/regulator/da9211.h     |  32 +++
 5 files changed, 702 insertions(+)
 create mode 100644 drivers/regulator/da9211-regulator.c
 create mode 100644 drivers/regulator/da9211-regulator.h
 create mode 100644 include/linux/regulator/da9211.h

(limited to 'include/linux')

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 789eb46090e3..f5040fc49a3a 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -198,6 +198,16 @@ config REGULATOR_DA9210
 	  converter 12A DC-DC Buck controlled through an I2C
 	  interface.
 
+config REGULATOR_DA9211
+	tristate "Dialog Semiconductor DA9211/DA9212 regulator"
+	depends on I2C
+	select REGMAP_I2C
+	help
+	  Say y here to support for the Dialog Semiconductor DA9211/DA9212.
+	  The DA9211/DA9212 is a multi-phase synchronous step down
+	  converter 12A DC-DC Buck controlled through an I2C
+	  interface.
+
 config REGULATOR_DBX500_PRCMU
 	bool
 
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index d461110f4463..aa4a6aa7b558 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -27,6 +27,7 @@ obj-$(CONFIG_REGULATOR_DA9052)	+= da9052-regulator.o
 obj-$(CONFIG_REGULATOR_DA9055)	+= da9055-regulator.o
 obj-$(CONFIG_REGULATOR_DA9063)	+= da9063-regulator.o
 obj-$(CONFIG_REGULATOR_DA9210) += da9210-regulator.o
+obj-$(CONFIG_REGULATOR_DA9211) += da9211-regulator.o
 obj-$(CONFIG_REGULATOR_DBX500_PRCMU) += dbx500-prcmu.o
 obj-$(CONFIG_REGULATOR_DB8500_PRCMU) += db8500-prcmu.o
 obj-$(CONFIG_REGULATOR_FAN53555) += fan53555.o
diff --git a/drivers/regulator/da9211-regulator.c b/drivers/regulator/da9211-regulator.c
new file mode 100644
index 000000000000..bd1850658a3f
--- /dev/null
+++ b/drivers/regulator/da9211-regulator.c
@@ -0,0 +1,388 @@
+/*
+ * da9211-regulator.c - Regulator device driver for DA9211
+ * Copyright (C) 2014  Dialog Semiconductor Ltd.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ */
+
+#include <linux/err.h>
+#include <linux/gpio.h>
+#include <linux/i2c.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/machine.h>
+#include <linux/regmap.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/regulator/da9211.h>
+#include "da9211-regulator.h"
+
+#define DA9211_BUCK_MODE_SLEEP	1
+#define DA9211_BUCK_MODE_SYNC	2
+#define DA9211_BUCK_MODE_AUTO	3
+
+/* DA9211 REGULATOR IDs */
+#define DA9211_ID_BUCKA	0
+#define DA9211_ID_BUCKB	1
+
+struct da9211 {
+	struct device *dev;
+	struct regmap *regmap;
+	struct da9211_pdata *pdata;
+	struct regulator_dev *rdev[DA9211_MAX_REGULATORS];
+	int num_regulator;
+	int chip_irq;
+};
+
+static const struct regmap_range_cfg da9211_regmap_range[] = {
+	{
+		.selector_reg = DA9211_REG_PAGE_CON,
+		.selector_mask  = DA9211_REG_PAGE_MASK,
+		.selector_shift = DA9211_REG_PAGE_SHIFT,
+		.window_start = 0,
+		.window_len = 256,
+		.range_min = 0,
+		.range_max = 2*256,
+	},
+};
+
+static const struct regmap_config da9211_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.max_register = 2 * 256,
+	.ranges = da9211_regmap_range,
+	.num_ranges = ARRAY_SIZE(da9211_regmap_range),
+};
+
+/* Default limits measured in millivolts and milliamps */
+#define DA9211_MIN_MV		300
+#define DA9211_MAX_MV		1570
+#define DA9211_STEP_MV		10
+
+/* Current limits for buck (uA) indices corresponds with register values */
+static const int da9211_current_limits[] = {
+	2000000, 2200000, 2400000, 2600000, 2800000, 3000000, 3200000, 3400000,
+	3600000, 3800000, 4000000, 4200000, 4400000, 4600000, 4800000, 5000000
+};
+
+static unsigned int da9211_buck_get_mode(struct regulator_dev *rdev)
+{
+	int id = rdev_get_id(rdev);
+	struct da9211 *chip = rdev_get_drvdata(rdev);
+	unsigned int data;
+	int ret, mode = 0;
+
+	ret = regmap_read(chip->regmap, DA9211_REG_BUCKA_CONF+id, &data);
+	if (ret < 0)
+		return ret;
+
+	switch (data & 0x03) {
+	case DA9211_BUCK_MODE_SYNC:
+		mode = REGULATOR_MODE_FAST;
+		break;
+	case DA9211_BUCK_MODE_AUTO:
+		mode = REGULATOR_MODE_NORMAL;
+		break;
+	case DA9211_BUCK_MODE_SLEEP:
+		mode = REGULATOR_MODE_STANDBY;
+		break;
+	}
+
+	return mode;
+}
+
+static int da9211_buck_set_mode(struct regulator_dev *rdev,
+					unsigned int mode)
+{
+	int id = rdev_get_id(rdev);
+	struct da9211 *chip = rdev_get_drvdata(rdev);
+	int val = 0;
+
+	switch (mode) {
+	case REGULATOR_MODE_FAST:
+		val = DA9211_BUCK_MODE_SYNC;
+		break;
+	case REGULATOR_MODE_NORMAL:
+		val = DA9211_BUCK_MODE_AUTO;
+		break;
+	case REGULATOR_MODE_STANDBY:
+		val = DA9211_BUCK_MODE_SLEEP;
+		break;
+	}
+
+	return regmap_update_bits(chip->regmap, DA9211_REG_BUCKA_CONF+id,
+					0x03, val);
+}
+
+static int da9211_set_current_limit(struct regulator_dev *rdev, int min,
+				    int max)
+{
+	int id = rdev_get_id(rdev);
+	struct da9211 *chip = rdev_get_drvdata(rdev);
+	int i;
+
+	/* search for closest to maximum */
+	for (i = ARRAY_SIZE(da9211_current_limits)-1; i >= 0; i--) {
+		if (min <= da9211_current_limits[i] &&
+		    max >= da9211_current_limits[i]) {
+				return regmap_update_bits(chip->regmap,
+					DA9211_REG_BUCK_ILIM,
+					(0x0F << id*4), (i << id*4));
+		}
+	}
+
+	return -EINVAL;
+}
+
+static int da9211_get_current_limit(struct regulator_dev *rdev)
+{
+	int id = rdev_get_id(rdev);
+	struct da9211 *chip = rdev_get_drvdata(rdev);
+	unsigned int data;
+	int ret;
+
+	ret = regmap_read(chip->regmap, DA9211_REG_BUCK_ILIM, &data);
+	if (ret < 0)
+		return ret;
+
+	/* select one of 16 values: 0000 (2000mA) to 1111 (5000mA) */
+	data = (data >> id*4) & 0x0F;
+	return da9211_current_limits[data];
+}
+
+static struct regulator_ops da9211_buck_ops = {
+	.get_mode = da9211_buck_get_mode,
+	.set_mode = da9211_buck_set_mode,
+	.enable = regulator_enable_regmap,
+	.disable = regulator_disable_regmap,
+	.is_enabled = regulator_is_enabled_regmap,
+	.set_voltage_sel = regulator_set_voltage_sel_regmap,
+	.get_voltage_sel = regulator_get_voltage_sel_regmap,
+	.list_voltage = regulator_list_voltage_linear,
+	.set_current_limit = da9211_set_current_limit,
+	.get_current_limit = da9211_get_current_limit,
+};
+
+#define DA9211_BUCK(_id) \
+{\
+	.name = #_id,\
+	.ops = &da9211_buck_ops,\
+	.type = REGULATOR_VOLTAGE,\
+	.id = DA9211_ID_##_id,\
+	.n_voltages = (DA9211_MAX_MV - DA9211_MIN_MV) / DA9211_STEP_MV + 1,\
+	.min_uV = (DA9211_MIN_MV * 1000),\
+	.uV_step = (DA9211_STEP_MV * 1000),\
+	.enable_reg = DA9211_REG_BUCKA_CONT + DA9211_ID_##_id,\
+	.enable_mask = DA9211_BUCKA_EN,\
+	.vsel_reg = DA9211_REG_VBUCKA_A + DA9211_ID_##_id * 2,\
+	.vsel_mask = DA9211_VBUCK_MASK,\
+	.owner = THIS_MODULE,\
+}
+
+static struct regulator_desc da9211_regulators[] = {
+	DA9211_BUCK(BUCKA),
+	DA9211_BUCK(BUCKB),
+};
+
+static irqreturn_t da9211_irq_handler(int irq, void *data)
+{
+	struct da9211 *chip = data;
+	int reg_val, err, ret = IRQ_NONE;
+
+	err = regmap_read(chip->regmap, DA9211_REG_EVENT_B, &reg_val);
+	if (err < 0)
+		goto error_i2c;
+
+	if (reg_val & DA9211_E_OV_CURR_A) {
+		regulator_notifier_call_chain(chip->rdev[0],
+			REGULATOR_EVENT_OVER_CURRENT,
+			rdev_get_drvdata(chip->rdev[0]));
+
+		err = regmap_write(chip->regmap, DA9211_REG_EVENT_B,
+			DA9211_E_OV_CURR_A);
+		if (err < 0)
+			goto error_i2c;
+
+		ret = IRQ_HANDLED;
+	}
+
+	if (reg_val & DA9211_E_OV_CURR_B) {
+		regulator_notifier_call_chain(chip->rdev[1],
+			REGULATOR_EVENT_OVER_CURRENT,
+			rdev_get_drvdata(chip->rdev[1]));
+
+		err = regmap_write(chip->regmap, DA9211_REG_EVENT_B,
+			DA9211_E_OV_CURR_B);
+		if (err < 0)
+			goto error_i2c;
+
+		ret = IRQ_HANDLED;
+	}
+
+	return ret;
+
+error_i2c:
+	dev_err(chip->dev, "I2C error : %d\n", err);
+	return IRQ_NONE;
+}
+
+static int da9211_regulator_init(struct da9211 *chip)
+{
+	struct regulator_config config = { };
+	int i, ret;
+	unsigned int data;
+
+	ret = regmap_read(chip->regmap, DA9211_REG_CONFIG_E, &data);
+	if (ret < 0) {
+		dev_err(chip->dev, "Failed to read CONTROL_E reg: %d\n", ret);
+		goto err;
+	}
+
+	data &= DA9211_SLAVE_SEL;
+	/* If configuration for 1/2 bucks is different between platform data
+	 * and the register, driver should exit.
+	 */
+	if ((chip->pdata->num_buck == 2 && data == 0x40)
+		|| (chip->pdata->num_buck == 1 && data == 0x00)) {
+		if (data == 0)
+			chip->num_regulator = 1;
+		else
+			chip->num_regulator = 2;
+	} else {
+		ret = -EINVAL;
+		dev_err(chip->dev, "Configuration is mismatched\n");
+		goto err;
+	}
+
+	for (i = 0; i < chip->num_regulator; i++) {
+		if (chip->pdata)
+			config.init_data =
+				&(chip->pdata->init_data[i]);
+
+		config.dev = chip->dev;
+		config.driver_data = chip;
+		config.regmap = chip->regmap;
+
+		chip->rdev[i] = devm_regulator_register(chip->dev,
+			&da9211_regulators[i], &config);
+		if (IS_ERR(chip->rdev[i])) {
+			dev_err(chip->dev,
+				"Failed to register DA9211 regulator\n");
+			ret = PTR_ERR(chip->rdev[i]);
+			goto err_regulator;
+		}
+
+		if (chip->chip_irq != 0) {
+			ret = regmap_update_bits(chip->regmap,
+				DA9211_REG_MASK_B, DA9211_M_OV_CURR_A << i, 1);
+			if (ret < 0) {
+				dev_err(chip->dev,
+					"Failed to update mask reg: %d\n", ret);
+				goto err_regulator;
+			}
+		}
+	}
+
+	return 0;
+
+err_regulator:
+	while (--i >= 0)
+		devm_regulator_unregister(chip->dev, chip->rdev[i]);
+err:
+	return ret;
+}
+/*
+ * I2C driver interface functions
+ */
+static int da9211_i2c_probe(struct i2c_client *i2c,
+		const struct i2c_device_id *id)
+{
+	struct da9211 *chip;
+	int error, ret;
+
+	chip = devm_kzalloc(&i2c->dev, sizeof(struct da9211), GFP_KERNEL);
+
+	chip->dev = &i2c->dev;
+	chip->regmap = devm_regmap_init_i2c(i2c, &da9211_regmap_config);
+	if (IS_ERR(chip->regmap)) {
+		error = PTR_ERR(chip->regmap);
+		dev_err(&i2c->dev, "Failed to allocate register map: %d\n",
+			error);
+		return error;
+	}
+
+	i2c_set_clientdata(i2c, chip);
+
+	chip->pdata = i2c->dev.platform_data;
+	if (!chip->pdata) {
+		dev_err(&i2c->dev, "No platform init data supplied\n");
+		return -ENODEV;
+	}
+
+	chip->chip_irq = i2c->irq;
+
+	if (chip->chip_irq != 0) {
+		ret = devm_request_threaded_irq(chip->dev, chip->chip_irq, NULL,
+					da9211_irq_handler,
+					IRQF_TRIGGER_LOW|IRQF_ONESHOT,
+					"da9211", chip);
+		if (ret != 0) {
+			dev_err(chip->dev, "Failed to request IRQ: %d\n",
+				chip->chip_irq);
+			return ret;
+		}
+	} else {
+		dev_warn(chip->dev, "No IRQ configured\n");
+	}
+
+	ret = da9211_regulator_init(chip);
+
+	if (ret < 0)
+		dev_err(&i2c->dev, "Failed to initialize regulator: %d\n", ret);
+
+	return ret;
+}
+
+static int da9211_i2c_remove(struct i2c_client *i2c)
+{
+	struct da9211 *chip = i2c_get_clientdata(i2c);
+	int i;
+
+	for (i = 0; i < chip->num_regulator; i++)
+		devm_regulator_unregister(chip->dev, chip->rdev[i]);
+
+	return 0;
+}
+
+static const struct i2c_device_id da9211_i2c_id[] = {
+	{"da9211", 0},
+	{},
+};
+
+MODULE_DEVICE_TABLE(i2c, da9211_i2c_id);
+
+static struct i2c_driver da9211_regulator_driver = {
+	.driver = {
+		.name = "da9211",
+		.owner = THIS_MODULE,
+	},
+	.probe = da9211_i2c_probe,
+	.remove = da9211_i2c_remove,
+	.id_table = da9211_i2c_id,
+};
+
+module_i2c_driver(da9211_regulator_driver);
+
+MODULE_AUTHOR("James Ban <James.Ban.opensource@diasemi.com>");
+MODULE_DESCRIPTION("Regulator device driver for Dialog DA9211");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/regulator/da9211-regulator.h b/drivers/regulator/da9211-regulator.h
new file mode 100644
index 000000000000..88b1769e8058
--- /dev/null
+++ b/drivers/regulator/da9211-regulator.h
@@ -0,0 +1,271 @@
+/*
+ * da9211-regulator.h - Regulator definitions for DA9211
+ * Copyright (C) 2014  Dialog Semiconductor Ltd.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ */
+
+#ifndef __DA9211_REGISTERS_H__
+#define __DA9211_REGISTERS_H__
+
+/* Page selection */
+#define	DA9211_REG_PAGE_CON			0x00
+
+/* System Control and Event Registers */
+#define	DA9211_REG_STATUS_A			0x50
+#define	DA9211_REG_STATUS_B			0x51
+#define	DA9211_REG_EVENT_A			0x52
+#define	DA9211_REG_EVENT_B			0x53
+#define	DA9211_REG_MASK_A			0x54
+#define	DA9211_REG_MASK_B			0x55
+#define	DA9211_REG_CONTROL_A		0x56
+
+/* GPIO Control Registers */
+#define	DA9211_REG_GPIO_0_1			0x58
+#define	DA9211_REG_GPIO_2_3			0x59
+#define	DA9211_REG_GPIO_4			0x5A
+
+/* Regulator Registers */
+#define	DA9211_REG_BUCKA_CONT			0x5D
+#define	DA9211_REG_BUCKB_CONT			0x5E
+#define	DA9211_REG_BUCK_ILIM			0xD0
+#define	DA9211_REG_BUCKA_CONF			0xD1
+#define	DA9211_REG_BUCKB_CONF			0xD2
+#define	DA9211_REG_BUCK_CONF			0xD3
+#define	DA9211_REG_VBACKA_MAX			0xD5
+#define	DA9211_REG_VBACKB_MAX			0xD6
+#define	DA9211_REG_VBUCKA_A				0xD7
+#define	DA9211_REG_VBUCKA_B				0xD8
+#define	DA9211_REG_VBUCKB_A				0xD9
+#define	DA9211_REG_VBUCKB_B				0xDA
+
+/* I2C Interface Settings */
+#define DA9211_REG_INTERFACE			0x105
+
+/* BUCK Phase Selection*/
+#define DA9211_REG_CONFIG_E			0x147
+
+/*
+ * Registers bits
+ */
+/* DA9211_REG_PAGE_CON (addr=0x00) */
+#define	DA9211_REG_PAGE_SHIFT			1
+#define	DA9211_REG_PAGE_MASK			0x02
+/* On I2C registers 0x00 - 0xFF */
+#define	DA9211_REG_PAGE0			0
+/* On I2C registers 0x100 - 0x1FF */
+#define	DA9211_REG_PAGE2			2
+#define	DA9211_PAGE_WRITE_MODE			0x00
+#define	DA9211_REPEAT_WRITE_MODE		0x40
+#define	DA9211_PAGE_REVERT			0x80
+
+/* DA9211_REG_STATUS_A (addr=0x50) */
+#define	DA9211_GPI0				0x01
+#define	DA9211_GPI1				0x02
+#define	DA9211_GPI2				0x04
+#define	DA9211_GPI3				0x08
+#define	DA9211_GPI4				0x10
+
+/* DA9211_REG_EVENT_A (addr=0x52) */
+#define	DA9211_E_GPI0				0x01
+#define	DA9211_E_GPI1				0x02
+#define	DA9211_E_GPI2				0x04
+#define	DA9211_E_GPI3				0x08
+#define	DA9211_E_GPI4				0x10
+#define	DA9211_E_UVLO_IO			0x40
+
+/* DA9211_REG_EVENT_B (addr=0x53) */
+#define	DA9211_E_PWRGOOD_A			0x01
+#define	DA9211_E_PWRGOOD_B			0x02
+#define	DA9211_E_TEMP_WARN			0x04
+#define	DA9211_E_TEMP_CRIT			0x08
+#define	DA9211_E_OV_CURR_A			0x10
+#define	DA9211_E_OV_CURR_B			0x20
+
+/* DA9211_REG_MASK_A (addr=0x54) */
+#define	DA9211_M_GPI0				0x01
+#define	DA9211_M_GPI1				0x02
+#define	DA9211_M_GPI2				0x04
+#define	DA9211_M_GPI3				0x08
+#define	DA9211_M_GPI4				0x10
+#define	DA9211_M_UVLO_IO			0x40
+
+/* DA9211_REG_MASK_B (addr=0x55) */
+#define	DA9211_M_PWRGOOD_A			0x01
+#define	DA9211_M_PWRGOOD_B			0x02
+#define	DA9211_M_TEMP_WARN			0x04
+#define	DA9211_M_TEMP_CRIT			0x08
+#define	DA9211_M_OV_CURR_A			0x10
+#define	DA9211_M_OV_CURR_B			0x20
+
+/* DA9211_REG_CONTROL_A (addr=0x56) */
+#define	DA9211_DEBOUNCING_SHIFT		0
+#define	DA9211_DEBOUNCING_MASK		0x07
+#define	DA9211_SLEW_RATE_SHIFT		3
+#define	DA9211_SLEW_RATE_A_MASK		0x18
+#define	DA9211_SLEW_RATE_B_SHIFT	5
+#define	DA9211_SLEW_RATE_B_MASK		0x60
+#define	DA9211_V_LOCK				0x80
+
+/* DA9211_REG_GPIO_0_1 (addr=0x58) */
+#define	DA9211_GPIO0_PIN_SHIFT		0
+#define	DA9211_GPIO0_PIN_MASK		0x03
+#define	DA9211_GPIO0_PIN_GPI		0x00
+#define	DA9211_GPIO0_PIN_GPO_OD		0x02
+#define	DA9211_GPIO0_PIN_GPO		0x03
+#define	DA9211_GPIO0_TYPE			0x04
+#define	DA9211_GPIO0_TYPE_GPI		0x00
+#define	DA9211_GPIO0_TYPE_GPO		0x04
+#define	DA9211_GPIO0_MODE			0x08
+#define	DA9211_GPIO1_PIN_SHIFT		4
+#define	DA9211_GPIO1_PIN_MASK		0x30
+#define	DA9211_GPIO1_PIN_GPI		0x00
+#define	DA9211_GPIO1_PIN_VERROR		0x10
+#define	DA9211_GPIO1_PIN_GPO_OD		0x20
+#define	DA9211_GPIO1_PIN_GPO		0x30
+#define	DA9211_GPIO1_TYPE_SHIFT		0x40
+#define	DA9211_GPIO1_TYPE_GPI		0x00
+#define	DA9211_GPIO1_TYPE_GPO		0x40
+#define	DA9211_GPIO1_MODE			0x80
+
+/* DA9211_REG_GPIO_2_3 (addr=0x59) */
+#define	DA9211_GPIO2_PIN_SHIFT		0
+#define	DA9211_GPIO2_PIN_MASK		0x03
+#define	DA9211_GPIO2_PIN_GPI		0x00
+#define	DA9211_GPIO5_PIN_BUCK_CLK	0x10
+#define	DA9211_GPIO2_PIN_GPO_OD		0x02
+#define	DA9211_GPIO2_PIN_GPO		0x03
+#define	DA9211_GPIO2_TYPE			0x04
+#define	DA9211_GPIO2_TYPE_GPI		0x00
+#define	DA9211_GPIO2_TYPE_GPO		0x04
+#define	DA9211_GPIO2_MODE			0x08
+#define	DA9211_GPIO3_PIN_SHIFT		4
+#define	DA9211_GPIO3_PIN_MASK		0x30
+#define	DA9211_GPIO3_PIN_GPI		0x00
+#define	DA9211_GPIO3_PIN_IERROR		0x10
+#define	DA9211_GPIO3_PIN_GPO_OD		0x20
+#define	DA9211_GPIO3_PIN_GPO		0x30
+#define	DA9211_GPIO3_TYPE_SHIFT		0x40
+#define	DA9211_GPIO3_TYPE_GPI		0x00
+#define	DA9211_GPIO3_TYPE_GPO		0x40
+#define	DA9211_GPIO3_MODE			0x80
+
+/* DA9211_REG_GPIO_4 (addr=0x5A) */
+#define	DA9211_GPIO4_PIN_SHIFT		0
+#define	DA9211_GPIO4_PIN_MASK		0x03
+#define	DA9211_GPIO4_PIN_GPI		0x00
+#define	DA9211_GPIO4_PIN_GPO_OD		0x02
+#define	DA9211_GPIO4_PIN_GPO		0x03
+#define	DA9211_GPIO4_TYPE			0x04
+#define	DA9211_GPIO4_TYPE_GPI		0x00
+#define	DA9211_GPIO4_TYPE_GPO		0x04
+#define	DA9211_GPIO4_MODE			0x08
+
+/* DA9211_REG_BUCKA_CONT (addr=0x5D) */
+#define	DA9211_BUCKA_EN				0x01
+#define	DA9211_BUCKA_GPI_SHIFT		1
+#define DA9211_BUCKA_GPI_MASK		0x06
+#define	DA9211_BUCKA_GPI_OFF		0x00
+#define	DA9211_BUCKA_GPI_GPIO0		0x02
+#define	DA9211_BUCKA_GPI_GPIO1		0x04
+#define	DA9211_BUCKA_GPI_GPIO3		0x06
+#define	DA9211_BUCKA_PD_DIS			0x08
+#define	DA9211_VBUCKA_SEL			0x10
+#define	DA9211_VBUCKA_SEL_A			0x00
+#define	DA9211_VBUCKA_SEL_B			0x10
+#define	DA9211_VBUCKA_GPI_SHIFT		5
+#define	DA9211_VBUCKA_GPI_MASK		0x60
+#define	DA9211_VBUCKA_GPI_OFF		0x00
+#define	DA9211_VBUCKA_GPI_GPIO1		0x20
+#define	DA9211_VBUCKA_GPI_GPIO2		0x40
+#define	DA9211_VBUCKA_GPI_GPIO4		0x60
+
+/* DA9211_REG_BUCKB_CONT (addr=0x5E) */
+#define	DA9211_BUCKB_EN				0x01
+#define	DA9211_BUCKB_GPI_SHIFT		1
+#define DA9211_BUCKB_GPI_MASK		0x06
+#define	DA9211_BUCKB_GPI_OFF		0x00
+#define	DA9211_BUCKB_GPI_GPIO0		0x02
+#define	DA9211_BUCKB_GPI_GPIO1		0x04
+#define	DA9211_BUCKB_GPI_GPIO3		0x06
+#define	DA9211_BUCKB_PD_DIS			0x08
+#define	DA9211_VBUCKB_SEL			0x10
+#define	DA9211_VBUCKB_SEL_A			0x00
+#define	DA9211_VBUCKB_SEL_B			0x10
+#define	DA9211_VBUCKB_GPI_SHIFT		5
+#define	DA9211_VBUCKB_GPI_MASK		0x60
+#define	DA9211_VBUCKB_GPI_OFF		0x00
+#define	DA9211_VBUCKB_GPI_GPIO1		0x20
+#define	DA9211_VBUCKB_GPI_GPIO2		0x40
+#define	DA9211_VBUCKB_GPI_GPIO4		0x60
+
+/* DA9211_REG_BUCK_ILIM (addr=0xD0) */
+#define DA9211_BUCKA_ILIM_SHIFT			0
+#define DA9211_BUCKA_ILIM_MASK			0x0F
+#define DA9211_BUCKB_ILIM_SHIFT			4
+#define DA9211_BUCKB_ILIM_MASK			0xF0
+
+/* DA9211_REG_BUCKA_CONF (addr=0xD1) */
+#define DA9211_BUCKA_MODE_SHIFT			0
+#define DA9211_BUCKA_MODE_MASK			0x03
+#define	DA9211_BUCKA_MODE_MANUAL		0x00
+#define	DA9211_BUCKA_MODE_SLEEP			0x01
+#define	DA9211_BUCKA_MODE_SYNC			0x02
+#define	DA9211_BUCKA_MODE_AUTO			0x03
+#define DA9211_BUCKA_UP_CTRL_SHIFT		2
+#define DA9211_BUCKA_UP_CTRL_MASK		0x1C
+#define DA9211_BUCKA_DOWN_CTRL_SHIFT	5
+#define DA9211_BUCKA_DOWN_CTRL_MASK		0xE0
+
+/* DA9211_REG_BUCKB_CONF (addr=0xD2) */
+#define DA9211_BUCKB_MODE_SHIFT			0
+#define DA9211_BUCKB_MODE_MASK			0x03
+#define	DA9211_BUCKB_MODE_MANUAL		0x00
+#define	DA9211_BUCKB_MODE_SLEEP			0x01
+#define	DA9211_BUCKB_MODE_SYNC			0x02
+#define	DA9211_BUCKB_MODE_AUTO			0x03
+#define DA9211_BUCKB_UP_CTRL_SHIFT		2
+#define DA9211_BUCKB_UP_CTRL_MASK		0x1C
+#define DA9211_BUCKB_DOWN_CTRL_SHIFT	5
+#define DA9211_BUCKB_DOWN_CTRL_MASK		0xE0
+
+/* DA9211_REG_BUCK_CONF (addr=0xD3) */
+#define DA9211_PHASE_SEL_A_SHIFT		0
+#define DA9211_PHASE_SEL_A_MASK			0x03
+#define DA9211_PHASE_SEL_B_SHIFT		2
+#define DA9211_PHASE_SEL_B_MASK			0x04
+#define DA9211_PH_SH_EN_A_SHIFT			3
+#define DA9211_PH_SH_EN_A_MASK			0x08
+#define DA9211_PH_SH_EN_B_SHIFT			4
+#define DA9211_PH_SH_EN_B_MASK			0x10
+
+/* DA9211_REG_VBUCKA_MAX (addr=0xD5) */
+#define DA9211_VBUCKA_BASE_SHIFT		0
+#define DA9211_VBUCKA_BASE_MASK			0x7F
+
+/* DA9211_REG_VBUCKB_MAX (addr=0xD6) */
+#define DA9211_VBUCKB_BASE_SHIFT		0
+#define DA9211_VBUCKB_BASE_MASK			0x7F
+
+/* DA9211_REG_VBUCKA/B_A/B (addr=0xD7/0xD8/0xD9/0xDA) */
+#define DA9211_VBUCK_SHIFT			0
+#define DA9211_VBUCK_MASK			0x7F
+#define DA9211_VBUCK_BIAS			0
+#define DA9211_BUCK_SL				0x80
+
+/* DA9211_REG_INTERFACE (addr=0x105) */
+#define DA9211_IF_BASE_ADDR_SHIFT		4
+#define DA9211_IF_BASE_ADDR_MASK		0xF0
+
+/* DA9211_REG_CONFIG_E (addr=0x147) */
+#define DA9211_SLAVE_SEL			0x40
+
+#endif	/* __DA9211_REGISTERS_H__ */
diff --git a/include/linux/regulator/da9211.h b/include/linux/regulator/da9211.h
new file mode 100644
index 000000000000..0981ce0e72cc
--- /dev/null
+++ b/include/linux/regulator/da9211.h
@@ -0,0 +1,32 @@
+/*
+ * da9211.h - Regulator device driver for DA9211
+ * Copyright (C) 2014  Dialog Semiconductor Ltd.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ */
+
+#ifndef __LINUX_REGULATOR_DA9211_H
+#define __LINUX_REGULATOR_DA9211_H
+
+#include <linux/regulator/machine.h>
+
+#define DA9211_MAX_REGULATORS	2
+
+struct da9211_pdata {
+	/*
+	 * Number of buck
+	 * 1 : 4 phase 1 buck
+	 * 2 : 2 phase 2 buck
+	 */
+	int num_buck;
+	struct regulator_init_data *init_data;
+};
+#endif
-- 
cgit v1.2.3-59-g8ed1b


From c84dbf61a7b322188d2a7fddc0cc6317ac6713e2 Mon Sep 17 00:00:00 2001
From: Torsten Duwe <duwe@lst.de>
Date: Sat, 14 Jun 2014 23:38:36 -0400
Subject: random: add_hwgenerator_randomness() for feeding entropy from devices

This patch adds an interface to the random pool for feeding entropy
in-kernel.

Signed-off-by: Torsten Duwe <duwe@suse.de>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Acked-by: H. Peter Anvin <hpa@zytor.com>
---
 drivers/char/random.c     | 21 +++++++++++++++++++++
 include/linux/hw_random.h |  2 ++
 2 files changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/char/random.c b/drivers/char/random.c
index d3bb7927fb49..914b1575df8f 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -250,6 +250,7 @@
 #include <linux/interrupt.h>
 #include <linux/mm.h>
 #include <linux/spinlock.h>
+#include <linux/kthread.h>
 #include <linux/percpu.h>
 #include <linux/cryptohash.h>
 #include <linux/fips.h>
@@ -1750,3 +1751,23 @@ randomize_range(unsigned long start, unsigned long end, unsigned long len)
 		return 0;
 	return PAGE_ALIGN(get_random_int() % range + start);
 }
+
+/* Interface for in-kernel drivers of true hardware RNGs.
+ * Those devices may produce endless random bits and will be throttled
+ * when our pool is full.
+ */
+void add_hwgenerator_randomness(const char *buffer, size_t count,
+				size_t entropy)
+{
+	struct entropy_store *poolp = &input_pool;
+
+	/* Suspend writing if we're above the trickle threshold.
+	 * We'll be woken up again once below random_write_wakeup_thresh,
+	 * or when the calling thread is about to terminate.
+	 */
+	wait_event_interruptible(random_write_wait, kthread_should_stop() ||
+			ENTROPY_BITS(&input_pool) <= random_write_wakeup_bits);
+	mix_pool_bytes(poolp, buffer, count);
+	credit_entropy_bits(poolp, entropy);
+}
+EXPORT_SYMBOL_GPL(add_hwgenerator_randomness);
diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h
index b4b0eef5fddf..3f075ff00411 100644
--- a/include/linux/hw_random.h
+++ b/include/linux/hw_random.h
@@ -47,5 +47,7 @@ struct hwrng {
 extern int hwrng_register(struct hwrng *rng);
 /** Unregister a Hardware Random Number Generator driver. */
 extern void hwrng_unregister(struct hwrng *rng);
+/** Feed random bits into the pool. */
+extern void add_hwgenerator_randomness(const char *buffer, size_t count, size_t entropy);
 
 #endif /* LINUX_HWRANDOM_H_ */
-- 
cgit v1.2.3-59-g8ed1b


From 0f734e6e768b4b66737b3d3e13f1769a12ecff86 Mon Sep 17 00:00:00 2001
From: Torsten Duwe <duwe@lst.de>
Date: Sat, 14 Jun 2014 23:48:41 -0400
Subject: hwrng: add per-device entropy derating

This patch introduces a derating factor to struct hwrng for
the random bits going into the kernel input pool, and a common
default derating for drivers which do not specify one.

Signed-off-by: Torsten Duwe <duwe@suse.de>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Acked-by: H. Peter Anvin <hpa@zytor.com>
---
 drivers/char/hw_random/core.c | 11 ++++++++++-
 include/linux/hw_random.h     |  3 +++
 2 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 48b3c812b9ec..3aaf97c774f1 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -56,11 +56,15 @@ static LIST_HEAD(rng_list);
 static DEFINE_MUTEX(rng_mutex);
 static int data_avail;
 static u8 *rng_buffer, *rng_fillbuf;
-static unsigned short current_quality = 700; /* an arbitrary 70% */
+static unsigned short current_quality;
+static unsigned short default_quality; /* = 0; default to "off" */
 
 module_param(current_quality, ushort, 0644);
 MODULE_PARM_DESC(current_quality,
 		 "current hwrng entropy estimation per mill");
+module_param(default_quality, ushort, 0644);
+MODULE_PARM_DESC(default_quality,
+		 "default entropy content of hwrng per mill");
 
 static void start_khwrngd(void);
 
@@ -79,6 +83,11 @@ static inline int hwrng_init(struct hwrng *rng)
 			return err;
 	}
 
+	current_quality = rng->quality ? : default_quality;
+	current_quality &= 1023;
+
+	if (current_quality == 0 && hwrng_fill)
+		kthread_stop(hwrng_fill);
 	if (current_quality > 0 && !hwrng_fill)
 		start_khwrngd();
 
diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h
index 3f075ff00411..914bb08cd738 100644
--- a/include/linux/hw_random.h
+++ b/include/linux/hw_random.h
@@ -29,6 +29,8 @@
  * @read:		New API. drivers can fill up to max bytes of data
  *			into the buffer. The buffer is aligned for any type.
  * @priv:		Private data, for use by the RNG driver.
+ * @quality:		Estimation of true entropy in RNG's bitstream
+ *			(per mill).
  */
 struct hwrng {
 	const char *name;
@@ -38,6 +40,7 @@ struct hwrng {
 	int (*data_read)(struct hwrng *rng, u32 *data);
 	int (*read)(struct hwrng *rng, void *data, size_t max, bool wait);
 	unsigned long priv;
+	unsigned short quality;
 
 	/* internal. */
 	struct list_head list;
-- 
cgit v1.2.3-59-g8ed1b


From 5577964e64692e17cc498854b7e0833e6532cd64 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 15 Jul 2014 11:05:09 -0400
Subject: cgroup: rename cgroup_subsys->base_cftypes to ->legacy_cftypes

Currently, cgroup_subsys->base_cftypes is used for both the unified
default hierarchy and legacy ones and subsystems can mark each file
with either CFTYPE_ONLY_ON_DFL or CFTYPE_INSANE if it has to appear
only on one of them.  This is quite hairy and error-prone.  Also, we
may end up exposing interface files to the default hierarchy without
thinking it through.

cgroup_subsys will grow two separate cftype arrays and apply each only
on the hierarchies of the matching type.  This will allow organizing
cftypes in a lot clearer way and encourage subsystems to scrutinize
the interface which is being exposed in the new default hierarchy.

In preparation, this patch renames cgroup_subsys->base_cftypes to
cgroup_subsys->legacy_cftypes.  This patch is pure rename.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: Li Zefan <lizefan@huawei.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Aristeu Rozanski <aris@redhat.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 block/blk-cgroup.c           | 2 +-
 include/linux/cgroup.h       | 2 +-
 kernel/cgroup.c              | 4 ++--
 kernel/cgroup_freezer.c      | 2 +-
 kernel/cpuset.c              | 2 +-
 kernel/sched/core.c          | 2 +-
 kernel/sched/cpuacct.c       | 2 +-
 mm/memcontrol.c              | 2 +-
 net/core/netclassid_cgroup.c | 2 +-
 net/core/netprio_cgroup.c    | 2 +-
 security/device_cgroup.c     | 2 +-
 11 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 63c3cd454d1e..5cfbc723041c 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -924,7 +924,7 @@ struct cgroup_subsys blkio_cgrp_subsys = {
 	.css_offline = blkcg_css_offline,
 	.css_free = blkcg_css_free,
 	.can_attach = blkcg_can_attach,
-	.base_cftypes = blkcg_files,
+	.legacy_cftypes = blkcg_files,
 #ifdef CONFIG_MEMCG
 	/*
 	 * This ensures that, if available, memcg is automatically enabled
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 7bb274487c89..a6e9c2eeab89 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -672,7 +672,7 @@ struct cgroup_subsys {
 	struct list_head cfts;
 
 	/* base cftypes, automatically registered with subsys itself */
-	struct cftype *base_cftypes;
+	struct cftype *legacy_cftypes;	/* used on the legacy hierarchies */
 
 	/*
 	 * A subsystem may depend on other subsystems.  When such subsystem
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 7e5fee5d6422..6496a83b0314 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4890,7 +4890,7 @@ int __init cgroup_init(void)
 		 */
 		if (!ss->disabled) {
 			cgrp_dfl_root.subsys_mask |= 1 << ss->id;
-			WARN_ON(cgroup_add_cftypes(ss, ss->base_cftypes));
+			WARN_ON(cgroup_add_cftypes(ss, ss->legacy_cftypes));
 		}
 	}
 
@@ -5480,6 +5480,6 @@ static struct cftype debug_files[] =  {
 struct cgroup_subsys debug_cgrp_subsys = {
 	.css_alloc = debug_css_alloc,
 	.css_free = debug_css_free,
-	.base_cftypes = debug_files,
+	.legacy_cftypes = debug_files,
 };
 #endif /* CONFIG_CGROUP_DEBUG */
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index a79e40f9d700..92b98cc0ee76 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -480,5 +480,5 @@ struct cgroup_subsys freezer_cgrp_subsys = {
 	.css_free	= freezer_css_free,
 	.attach		= freezer_attach,
 	.fork		= freezer_fork,
-	.base_cftypes	= files,
+	.legacy_cftypes	= files,
 };
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 53a9bbf16391..f337f42a07ac 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2036,7 +2036,7 @@ struct cgroup_subsys cpuset_cgrp_subsys = {
 	.cancel_attach	= cpuset_cancel_attach,
 	.attach		= cpuset_attach,
 	.bind		= cpuset_bind,
-	.base_cftypes	= files,
+	.legacy_cftypes	= files,
 	.early_init	= 1,
 };
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3bdf01b494fe..6628e8014824 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -8088,7 +8088,7 @@ struct cgroup_subsys cpu_cgrp_subsys = {
 	.can_attach	= cpu_cgroup_can_attach,
 	.attach		= cpu_cgroup_attach,
 	.exit		= cpu_cgroup_exit,
-	.base_cftypes	= cpu_files,
+	.legacy_cftypes	= cpu_files,
 	.early_init	= 1,
 };
 
diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
index 9cf350c94ec4..dd7cbb55bbf2 100644
--- a/kernel/sched/cpuacct.c
+++ b/kernel/sched/cpuacct.c
@@ -278,6 +278,6 @@ void cpuacct_account_field(struct task_struct *p, int index, u64 val)
 struct cgroup_subsys cpuacct_cgrp_subsys = {
 	.css_alloc	= cpuacct_css_alloc,
 	.css_free	= cpuacct_css_free,
-	.base_cftypes	= files,
+	.legacy_cftypes	= files,
 	.early_init	= 1,
 };
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index a2a4bd69a7ae..8331f2739e2c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -7048,7 +7048,7 @@ struct cgroup_subsys memory_cgrp_subsys = {
 	.cancel_attach = mem_cgroup_cancel_attach,
 	.attach = mem_cgroup_move_task,
 	.bind = mem_cgroup_bind,
-	.base_cftypes = mem_cgroup_files,
+	.legacy_cftypes = mem_cgroup_files,
 	.early_init = 0,
 };
 
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 30d903b19c62..1f2a126f4ffa 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -107,5 +107,5 @@ struct cgroup_subsys net_cls_cgrp_subsys = {
 	.css_online		= cgrp_css_online,
 	.css_free		= cgrp_css_free,
 	.attach			= cgrp_attach,
-	.base_cftypes		= ss_files,
+	.legacy_cftypes		= ss_files,
 };
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 2f385b9bccc0..cbd0a199bf52 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -249,7 +249,7 @@ struct cgroup_subsys net_prio_cgrp_subsys = {
 	.css_online	= cgrp_css_online,
 	.css_free	= cgrp_css_free,
 	.attach		= net_prio_attach,
-	.base_cftypes	= ss_files,
+	.legacy_cftypes	= ss_files,
 };
 
 static int netprio_device_event(struct notifier_block *unused,
diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index d9d69e6930ed..188c1d26393b 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -796,7 +796,7 @@ struct cgroup_subsys devices_cgrp_subsys = {
 	.css_free = devcgroup_css_free,
 	.css_online = devcgroup_online,
 	.css_offline = devcgroup_offline,
-	.base_cftypes = dev_cgroup_files,
+	.legacy_cftypes = dev_cgroup_files,
 };
 
 /**
-- 
cgit v1.2.3-59-g8ed1b


From 2cf669a58dc08fa065a8bd0dca866c0e6cb358cc Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 15 Jul 2014 11:05:09 -0400
Subject: cgroup: replace cgroup_add_cftypes() with cgroup_add_legacy_cftypes()

Currently, cftypes added by cgroup_add_cftypes() are used for both the
unified default hierarchy and legacy ones and subsystems can mark each
file with either CFTYPE_ONLY_ON_DFL or CFTYPE_INSANE if it has to
appear only on one of them.  This is quite hairy and error-prone.
Also, we may end up exposing interface files to the default hierarchy
without thinking it through.

cgroup_subsys will grow two separate cftype addition functions and
apply each only on the hierarchies of the matching type.  This will
allow organizing cftypes in a lot clearer way and encourage subsystems
to scrutinize the interface which is being exposed in the new default
hierarchy.

In preparation, this patch adds cgroup_add_legacy_cftypes() which
currently is a simple wrapper around cgroup_add_cftypes() and replaces
all cgroup_add_cftypes() usages with it.

While at it, this patch drops a completely spurious return from
__hugetlb_cgroup_file_init().

This patch doesn't introduce any functional differences.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: Li Zefan <lizefan@huawei.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 block/blk-cgroup.c        | 3 ++-
 include/linux/cgroup.h    | 2 +-
 kernel/cgroup.c           | 7 ++++++-
 mm/hugetlb_cgroup.c       | 5 ++---
 mm/memcontrol.c           | 3 ++-
 net/ipv4/tcp_memcontrol.c | 2 +-
 6 files changed, 14 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 5cfbc723041c..2541cf043ba8 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1124,7 +1124,8 @@ int __init blkcg_policy_register(struct blkcg_policy *pol)
 
 	/* everything is in place, add intf files for the new policy */
 	if (pol->cftypes)
-		WARN_ON(cgroup_add_cftypes(&blkio_cgrp_subsys, pol->cftypes));
+		WARN_ON(cgroup_add_legacy_cftypes(&blkio_cgrp_subsys,
+						  pol->cftypes));
 	ret = 0;
 out_unlock:
 	mutex_unlock(&blkcg_pol_mutex);
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index a6e9c2eeab89..f5f0feef2701 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -590,7 +590,7 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp)
 
 char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen);
 
-int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
+int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
 int cgroup_rm_cftypes(struct cftype *cfts);
 
 bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 6496a83b0314..c275aa439a6f 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3170,7 +3170,7 @@ int cgroup_rm_cftypes(struct cftype *cfts)
  * function currently returns 0 as long as @cfts registration is successful
  * even if some file creation attempts on existing cgroups fail.
  */
-int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
+static int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
 {
 	int ret;
 
@@ -3195,6 +3195,11 @@ int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
 	return ret;
 }
 
+int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
+{
+	return cgroup_add_cftypes(ss, cfts);
+}
+
 /**
  * cgroup_task_count - count the number of tasks in a cgroup.
  * @cgrp: the cgroup in question
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index 493f758445e7..9aae6f47433f 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -358,9 +358,8 @@ static void __init __hugetlb_cgroup_file_init(int idx)
 	cft = &h->cgroup_files[4];
 	memset(cft, 0, sizeof(*cft));
 
-	WARN_ON(cgroup_add_cftypes(&hugetlb_cgrp_subsys, h->cgroup_files));
-
-	return;
+	WARN_ON(cgroup_add_legacy_cftypes(&hugetlb_cgrp_subsys,
+					  h->cgroup_files));
 }
 
 void __init hugetlb_cgroup_file_init(void)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 8331f2739e2c..b6b3c6fea509 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -7065,7 +7065,8 @@ __setup("swapaccount=", enable_swap_account);
 
 static void __init memsw_file_init(void)
 {
-	WARN_ON(cgroup_add_cftypes(&memory_cgrp_subsys, memsw_cgroup_files));
+	WARN_ON(cgroup_add_legacy_cftypes(&memory_cgrp_subsys,
+					  memsw_cgroup_files));
 }
 
 static void __init enable_swap_cgroup(void)
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index f7a2ec3ac584..3af522622fad 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -222,7 +222,7 @@ static struct cftype tcp_files[] = {
 
 static int __init tcp_memcontrol_init(void)
 {
-	WARN_ON(cgroup_add_cftypes(&memory_cgrp_subsys, tcp_files));
+	WARN_ON(cgroup_add_legacy_cftypes(&memory_cgrp_subsys, tcp_files));
 	return 0;
 }
 __initcall(tcp_memcontrol_init);
-- 
cgit v1.2.3-59-g8ed1b


From a8ddc8215e1a4cd9dc5d6210811cfc381a489ec2 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 15 Jul 2014 11:05:10 -0400
Subject: cgroup: distinguish the default and legacy hierarchies when handling
 cftypes

Until now, cftype arrays carried files for both the default and legacy
hierarchies and the files which needed to be used on only one of them
were flagged with either CFTYPE_ONLY_ON_DFL or CFTYPE_INSANE.  This
gets confusing very quickly and we may end up exposing interface files
to the default hierarchy without thinking it through.

This patch makes cgroup core provide separate sets of interfaces for
cftype handling so that the cftypes for the default and legacy
hierarchies are clearly distinguished.  The previous two patches
renamed the existing ones so that they clearly indicate that they're
for the legacy hierarchies.  This patch adds the interface for the
default hierarchy and apply them selectively depending on the
hierarchy type.

* cftypes added through cgroup_subsys->dfl_cftypes and
  cgroup_add_dfl_cftypes() only show up on the default hierarchy.

* cftypes added through cgroup_subsys->legacy_cftypes and
  cgroup_add_legacy_cftypes() only show up on the legacy hierarchies.

* cgroup_subsys->dfl_cftypes and ->legacy_cftypes can point to the
  same array for the cases where the interface files are identical on
  both types of hierarchies.

* This makes all the existing subsystem interface files legacy-only by
  default and all subsystems will have no interface file created when
  enabled on the default hierarchy.  Each subsystem should explicitly
  review and compose the interface for the default hierarchy.

* A boot param "cgroup__DEVEL__legacy_files_on_dfl" is added which
  makes subsystems which haven't decided the interface files for the
  default hierarchy to present the legacy files on the default
  hierarchy so that its behavior on the default hierarchy can be
  tested.  As the awkward name suggests, this is for development only.

* memcg's CFTYPE_INSANE on "use_hierarchy" is noop now as the whole
  array isn't used on the default hierarchy.  The flag is removed.

v2: Updated documentation for cgroup__DEVEL__legacy_files_on_dfl.

v3: Clear CFTYPE_ONLY_ON_DFL and CFTYPE_INSANE when cfts are removed
    as suggested by Li.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: Li Zefan <lizefan@huawei.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Aristeu Rozanski <aris@redhat.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 Documentation/cgroups/unified-hierarchy.txt | 18 ++++++---
 include/linux/cgroup.h                      |  9 ++++-
 kernel/cgroup.c                             | 62 +++++++++++++++++++++++++++--
 mm/memcontrol.c                             |  1 -
 4 files changed, 78 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cgroups/unified-hierarchy.txt b/Documentation/cgroups/unified-hierarchy.txt
index a7a2205539a7..4f4563277864 100644
--- a/Documentation/cgroups/unified-hierarchy.txt
+++ b/Documentation/cgroups/unified-hierarchy.txt
@@ -94,12 +94,18 @@ change soon.
 
  mount -t cgroup -o __DEVEL__sane_behavior cgroup $MOUNT_POINT
 
-All controllers which are not bound to other hierarchies are
-automatically bound to unified hierarchy and show up at the root of
-it.  Controllers which are enabled only in the root of unified
-hierarchy can be bound to other hierarchies.  This allows mixing
-unified hierarchy with the traditional multiple hierarchies in a fully
-backward compatible way.
+All controllers which support the unified hierarchy and are not bound
+to other hierarchies are automatically bound to unified hierarchy and
+show up at the root of it.  Controllers which are enabled only in the
+root of unified hierarchy can be bound to other hierarchies.  This
+allows mixing unified hierarchy with the traditional multiple
+hierarchies in a fully backward compatible way.
+
+For development purposes, the following boot parameter makes all
+controllers to appear on the unified hierarchy whether supported or
+not.
+
+ cgroup__DEVEL__legacy_files_on_dfl
 
 A controller can be moved across hierarchies only after the controller
 is no longer referenced in its current hierarchy.  Because per-cgroup
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index f5f0feef2701..9f76236ac158 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -590,6 +590,7 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp)
 
 char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen);
 
+int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
 int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
 int cgroup_rm_cftypes(struct cftype *cfts);
 
@@ -671,8 +672,12 @@ struct cgroup_subsys {
 	 */
 	struct list_head cfts;
 
-	/* base cftypes, automatically registered with subsys itself */
-	struct cftype *legacy_cftypes;	/* used on the legacy hierarchies */
+	/*
+	 * Base cftypes which are automatically registered.  The two can
+	 * point to the same array.
+	 */
+	struct cftype *dfl_cftypes;	/* for the default hierarchy */
+	struct cftype *legacy_cftypes;	/* for the legacy hierarchies */
 
 	/*
 	 * A subsystem may depend on other subsystems.  When such subsystem
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index c275aa439a6f..374ebdf74f35 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -149,6 +149,12 @@ struct cgroup_root cgrp_dfl_root;
  */
 static bool cgrp_dfl_root_visible;
 
+/*
+ * Set by the boot param of the same name and makes subsystems with NULL
+ * ->dfl_files to use ->legacy_files on the default hierarchy.
+ */
+static bool cgroup_legacy_files_on_dfl;
+
 /* some controllers are not supported in the default hierarchy */
 static const unsigned int cgrp_dfl_root_inhibit_ss_mask = 0
 #ifdef CONFIG_CGROUP_DEBUG
@@ -3085,6 +3091,9 @@ static void cgroup_exit_cftypes(struct cftype *cfts)
 			kfree(cft->kf_ops);
 		cft->kf_ops = NULL;
 		cft->ss = NULL;
+
+		/* revert flags set by cgroup core while adding @cfts */
+		cft->flags &= ~(CFTYPE_ONLY_ON_DFL | CFTYPE_INSANE);
 	}
 }
 
@@ -3195,8 +3204,37 @@ static int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
 	return ret;
 }
 
+/**
+ * cgroup_add_dfl_cftypes - add an array of cftypes for default hierarchy
+ * @ss: target cgroup subsystem
+ * @cfts: zero-length name terminated array of cftypes
+ *
+ * Similar to cgroup_add_cftypes() but the added files are only used for
+ * the default hierarchy.
+ */
+int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
+{
+	struct cftype *cft;
+
+	for (cft = cfts; cft && cft->name[0] != '\0'; cft++)
+		cft->flags |= CFTYPE_ONLY_ON_DFL;
+	return cgroup_add_cftypes(ss, cfts);
+}
+
+/**
+ * cgroup_add_legacy_cftypes - add an array of cftypes for legacy hierarchies
+ * @ss: target cgroup subsystem
+ * @cfts: zero-length name terminated array of cftypes
+ *
+ * Similar to cgroup_add_cftypes() but the added files are only used for
+ * the legacy hierarchies.
+ */
 int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
 {
+	struct cftype *cft;
+
+	for (cft = cfts; cft && cft->name[0] != '\0'; cft++)
+		cft->flags |= CFTYPE_INSANE;
 	return cgroup_add_cftypes(ss, cfts);
 }
 
@@ -4893,9 +4931,19 @@ int __init cgroup_init(void)
 		 * disabled flag and cftype registration needs kmalloc,
 		 * both of which aren't available during early_init.
 		 */
-		if (!ss->disabled) {
-			cgrp_dfl_root.subsys_mask |= 1 << ss->id;
-			WARN_ON(cgroup_add_cftypes(ss, ss->legacy_cftypes));
+		if (ss->disabled)
+			continue;
+
+		cgrp_dfl_root.subsys_mask |= 1 << ss->id;
+
+		if (cgroup_legacy_files_on_dfl && !ss->dfl_cftypes)
+			ss->dfl_cftypes = ss->legacy_cftypes;
+
+		if (ss->dfl_cftypes == ss->legacy_cftypes) {
+			WARN_ON(cgroup_add_cftypes(ss, ss->dfl_cftypes));
+		} else {
+			WARN_ON(cgroup_add_dfl_cftypes(ss, ss->dfl_cftypes));
+			WARN_ON(cgroup_add_legacy_cftypes(ss, ss->legacy_cftypes));
 		}
 	}
 
@@ -5291,6 +5339,14 @@ static int __init cgroup_disable(char *str)
 }
 __setup("cgroup_disable=", cgroup_disable);
 
+static int __init cgroup_set_legacy_files_on_dfl(char *str)
+{
+	printk("cgroup: using legacy files on the default hierarchy\n");
+	cgroup_legacy_files_on_dfl = true;
+	return 0;
+}
+__setup("cgroup__DEVEL__legacy_files_on_dfl", cgroup_set_legacy_files_on_dfl);
+
 /**
  * css_tryget_online_from_dir - get corresponding css from a cgroup dentry
  * @dentry: directory dentry of interest
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b6b3c6fea509..45c10c6fc3ce 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -6003,7 +6003,6 @@ static struct cftype mem_cgroup_files[] = {
 	},
 	{
 		.name = "use_hierarchy",
-		.flags = CFTYPE_INSANE,
 		.write_u64 = mem_cgroup_hierarchy_write,
 		.read_u64 = mem_cgroup_hierarchy_read,
 	},
-- 
cgit v1.2.3-59-g8ed1b


From 05ebb6e60f044a9cef2549b6204559276500f363 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 15 Jul 2014 11:05:10 -0400
Subject: cgroup: make CFTYPE_ONLY_ON_DFL and CFTYPE_NO_ internal to cgroup
 core

cgroup now distinguishes cftypes for the default and legacy
hierarchies more explicitly by using separate arrays and
CFTYPE_ONLY_ON_DFL and CFTYPE_INSANE should be and are used only
inside cgroup core proper.  Let's make it clear that the flags are
internal by prefixing them with double underscores.

CFTYPE_INSANE is renamed to __CFTYPE_NOT_ON_DFL for consistency.  The
two flags are also collected and assigned bits >= 16 so that they
aren't mixed with the published flags.

v2: Convert the extra ones in cgroup_exit_cftypes() which are added by
    revision to the previous patch.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h |  6 ++++--
 kernel/cgroup.c        | 10 +++++-----
 2 files changed, 9 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 9f76236ac158..b5223c570eba 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -384,9 +384,11 @@ struct css_set {
 enum {
 	CFTYPE_ONLY_ON_ROOT	= (1 << 0),	/* only create on root cgrp */
 	CFTYPE_NOT_ON_ROOT	= (1 << 1),	/* don't create on root cgrp */
-	CFTYPE_INSANE		= (1 << 2),	/* don't create if sane_behavior */
 	CFTYPE_NO_PREFIX	= (1 << 3),	/* (DON'T USE FOR NEW FILES) no subsys prefix */
-	CFTYPE_ONLY_ON_DFL	= (1 << 4),	/* only on default hierarchy */
+
+	/* internal flags, do not use outside cgroup core proper */
+	__CFTYPE_ONLY_ON_DFL	= (1 << 16),	/* only on default hierarchy */
+	__CFTYPE_NOT_ON_DFL	= (1 << 17),	/* not on default hierarchy */
 };
 
 #define MAX_CFTYPE_NAME		64
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 374ebdf74f35..f41d164a3d54 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3031,9 +3031,9 @@ static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[],
 
 	for (cft = cfts; cft->name[0] != '\0'; cft++) {
 		/* does cft->flags tell us to skip this file on @cgrp? */
-		if ((cft->flags & CFTYPE_ONLY_ON_DFL) && !cgroup_on_dfl(cgrp))
+		if ((cft->flags & __CFTYPE_ONLY_ON_DFL) && !cgroup_on_dfl(cgrp))
 			continue;
-		if ((cft->flags & CFTYPE_INSANE) && cgroup_on_dfl(cgrp))
+		if ((cft->flags & __CFTYPE_NOT_ON_DFL) && cgroup_on_dfl(cgrp))
 			continue;
 		if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgroup_parent(cgrp))
 			continue;
@@ -3093,7 +3093,7 @@ static void cgroup_exit_cftypes(struct cftype *cfts)
 		cft->ss = NULL;
 
 		/* revert flags set by cgroup core while adding @cfts */
-		cft->flags &= ~(CFTYPE_ONLY_ON_DFL | CFTYPE_INSANE);
+		cft->flags &= ~(__CFTYPE_ONLY_ON_DFL | __CFTYPE_NOT_ON_DFL);
 	}
 }
 
@@ -3217,7 +3217,7 @@ int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
 	struct cftype *cft;
 
 	for (cft = cfts; cft && cft->name[0] != '\0'; cft++)
-		cft->flags |= CFTYPE_ONLY_ON_DFL;
+		cft->flags |= __CFTYPE_ONLY_ON_DFL;
 	return cgroup_add_cftypes(ss, cfts);
 }
 
@@ -3234,7 +3234,7 @@ int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
 	struct cftype *cft;
 
 	for (cft = cfts; cft && cft->name[0] != '\0'; cft++)
-		cft->flags |= CFTYPE_INSANE;
+		cft->flags |= __CFTYPE_NOT_ON_DFL;
 	return cgroup_add_cftypes(ss, cfts);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From b9ba6f94b2382ef832f97122976b73004f714714 Mon Sep 17 00:00:00 2001
From: Niu Yawei <yawei.niu@gmail.com>
Date: Wed, 4 Jun 2014 12:23:19 +0800
Subject: quota: remove dqptr_sem

Remove dqptr_sem to make quota code scalable: Remove the dqptr_sem,
accessing inode->i_dquot now protected by dquot_srcu, and changing
inode->i_dquot is now serialized by dq_data_lock.

Signed-off-by: Lai Siyao <lai.siyao@intel.com>
Signed-off-by: Niu Yawei <yawei.niu@intel.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/dquot.c      | 114 ++++++++++++++++++++++----------------------------
 fs/super.c            |   1 -
 include/linux/quota.h |   1 -
 3 files changed, 49 insertions(+), 67 deletions(-)

(limited to 'include/linux')

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index fb2d2e2a89e7..f2d0eee9d1f1 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -96,13 +96,16 @@
  * Note that some things (eg. sb pointer, type, id) doesn't change during
  * the life of the dquot structure and so needn't to be protected by a lock
  *
- * Any operation working on dquots via inode pointers must hold dqptr_sem.  If
- * operation is just reading pointers from inode (or not using them at all) the
- * read lock is enough. If pointers are altered function must hold write lock.
+ * Operation accessing dquots via inode pointers are protected by dquot_srcu.
+ * Operation of reading pointer needs srcu_read_lock(&dquot_srcu), and
+ * synchronize_srcu(&dquot_srcu) is called after clearing pointers from
+ * inode and before dropping dquot references to avoid use of dquots after
+ * they are freed. dq_data_lock is used to serialize the pointer setting and
+ * clearing operations.
  * Special care needs to be taken about S_NOQUOTA inode flag (marking that
  * inode is a quota file). Functions adding pointers from inode to dquots have
- * to check this flag under dqptr_sem and then (if S_NOQUOTA is not set) they
- * have to do all pointer modifications before dropping dqptr_sem. This makes
+ * to check this flag under dq_data_lock and then (if S_NOQUOTA is not set) they
+ * have to do all pointer modifications before dropping dq_data_lock. This makes
  * sure they cannot race with quotaon which first sets S_NOQUOTA flag and
  * then drops all pointers to dquots from an inode.
  *
@@ -116,21 +119,15 @@
  * spinlock to internal buffers before writing.
  *
  * Lock ordering (including related VFS locks) is the following:
- *   dqonoff_mutex > i_mutex > journal_lock > dqptr_sem > dquot->dq_lock >
- *   dqio_mutex
+ *   dqonoff_mutex > i_mutex > journal_lock > dquot->dq_lock > dqio_mutex
  * dqonoff_mutex > i_mutex comes from dquot_quota_sync, dquot_enable, etc.
- * The lock ordering of dqptr_sem imposed by quota code is only dqonoff_sem >
- * dqptr_sem. But filesystem has to count with the fact that functions such as
- * dquot_alloc_space() acquire dqptr_sem and they usually have to be called
- * from inside a transaction to keep filesystem consistency after a crash. Also
- * filesystems usually want to do some IO on dquot from ->mark_dirty which is
- * called with dqptr_sem held.
  */
 
 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_list_lock);
 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_state_lock);
 __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock);
 EXPORT_SYMBOL(dq_data_lock);
+DEFINE_STATIC_SRCU(dquot_srcu);
 
 void __quota_error(struct super_block *sb, const char *func,
 		   const char *fmt, ...)
@@ -964,7 +961,6 @@ static void add_dquot_ref(struct super_block *sb, int type)
 /*
  * Remove references to dquots from inode and add dquot to list for freeing
  * if we have the last reference to dquot
- * We can't race with anybody because we hold dqptr_sem for writing...
  */
 static void remove_inode_dquot_ref(struct inode *inode, int type,
 				   struct list_head *tofree_head)
@@ -1024,13 +1020,15 @@ static void remove_dquot_ref(struct super_block *sb, int type,
 		 *  We have to scan also I_NEW inodes because they can already
 		 *  have quota pointer initialized. Luckily, we need to touch
 		 *  only quota pointers and these have separate locking
-		 *  (dqptr_sem).
+		 *  (dq_data_lock).
 		 */
+		spin_lock(&dq_data_lock);
 		if (!IS_NOQUOTA(inode)) {
 			if (unlikely(inode_get_rsv_space(inode) > 0))
 				reserved = 1;
 			remove_inode_dquot_ref(inode, type, tofree_head);
 		}
+		spin_unlock(&dq_data_lock);
 	}
 	spin_unlock(&inode_sb_list_lock);
 #ifdef CONFIG_QUOTA_DEBUG
@@ -1048,9 +1046,8 @@ static void drop_dquot_ref(struct super_block *sb, int type)
 	LIST_HEAD(tofree_head);
 
 	if (sb->dq_op) {
-		down_write(&sb_dqopt(sb)->dqptr_sem);
 		remove_dquot_ref(sb, type, &tofree_head);
-		up_write(&sb_dqopt(sb)->dqptr_sem);
+		synchronize_srcu(&dquot_srcu);
 		put_dquot_list(&tofree_head);
 	}
 }
@@ -1381,9 +1378,6 @@ static int dquot_active(const struct inode *inode)
 /*
  * Initialize quota pointers in inode
  *
- * We do things in a bit complicated way but by that we avoid calling
- * dqget() and thus filesystem callbacks under dqptr_sem.
- *
  * It is better to call this function outside of any transaction as it
  * might need a lot of space in journal for dquot structure allocation.
  */
@@ -1394,8 +1388,6 @@ static void __dquot_initialize(struct inode *inode, int type)
 	struct super_block *sb = inode->i_sb;
 	qsize_t rsv;
 
-	/* First test before acquiring mutex - solves deadlocks when we
-         * re-enter the quota code and are already holding the mutex */
 	if (!dquot_active(inode))
 		return;
 
@@ -1429,7 +1421,7 @@ static void __dquot_initialize(struct inode *inode, int type)
 	if (!init_needed)
 		return;
 
-	down_write(&sb_dqopt(sb)->dqptr_sem);
+	spin_lock(&dq_data_lock);
 	if (IS_NOQUOTA(inode))
 		goto out_err;
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1449,15 +1441,12 @@ static void __dquot_initialize(struct inode *inode, int type)
 			 * did a write before quota was turned on
 			 */
 			rsv = inode_get_rsv_space(inode);
-			if (unlikely(rsv)) {
-				spin_lock(&dq_data_lock);
+			if (unlikely(rsv))
 				dquot_resv_space(inode->i_dquot[cnt], rsv);
-				spin_unlock(&dq_data_lock);
-			}
 		}
 	}
 out_err:
-	up_write(&sb_dqopt(sb)->dqptr_sem);
+	spin_unlock(&dq_data_lock);
 	/* Drop unused references */
 	dqput_all(got);
 }
@@ -1469,19 +1458,24 @@ void dquot_initialize(struct inode *inode)
 EXPORT_SYMBOL(dquot_initialize);
 
 /*
- * 	Release all quotas referenced by inode
+ * Release all quotas referenced by inode.
+ *
+ * This function only be called on inode free or converting
+ * a file to quota file, no other users for the i_dquot in
+ * both cases, so we needn't call synchronize_srcu() after
+ * clearing i_dquot.
  */
 static void __dquot_drop(struct inode *inode)
 {
 	int cnt;
 	struct dquot *put[MAXQUOTAS];
 
-	down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
+	spin_lock(&dq_data_lock);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		put[cnt] = inode->i_dquot[cnt];
 		inode->i_dquot[cnt] = NULL;
 	}
-	up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
+	spin_unlock(&dq_data_lock);
 	dqput_all(put);
 }
 
@@ -1599,15 +1593,11 @@ static void inode_decr_space(struct inode *inode, qsize_t number, int reserve)
  */
 int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
 {
-	int cnt, ret = 0;
+	int cnt, ret = 0, index;
 	struct dquot_warn warn[MAXQUOTAS];
 	struct dquot **dquots = inode->i_dquot;
 	int reserve = flags & DQUOT_SPACE_RESERVE;
 
-	/*
-	 * First test before acquiring mutex - solves deadlocks when we
-	 * re-enter the quota code and are already holding the mutex
-	 */
 	if (!dquot_active(inode)) {
 		inode_incr_space(inode, number, reserve);
 		goto out;
@@ -1616,7 +1606,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
 		warn[cnt].w_type = QUOTA_NL_NOWARN;
 
-	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+	index = srcu_read_lock(&dquot_srcu);
 	spin_lock(&dq_data_lock);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (!dquots[cnt])
@@ -1643,7 +1633,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
 		goto out_flush_warn;
 	mark_all_dquot_dirty(dquots);
 out_flush_warn:
-	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+	srcu_read_unlock(&dquot_srcu, index);
 	flush_warnings(warn);
 out:
 	return ret;
@@ -1655,17 +1645,16 @@ EXPORT_SYMBOL(__dquot_alloc_space);
  */
 int dquot_alloc_inode(const struct inode *inode)
 {
-	int cnt, ret = 0;
+	int cnt, ret = 0, index;
 	struct dquot_warn warn[MAXQUOTAS];
 	struct dquot * const *dquots = inode->i_dquot;
 
-	/* First test before acquiring mutex - solves deadlocks when we
-         * re-enter the quota code and are already holding the mutex */
 	if (!dquot_active(inode))
 		return 0;
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
 		warn[cnt].w_type = QUOTA_NL_NOWARN;
-	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+
+	index = srcu_read_lock(&dquot_srcu);
 	spin_lock(&dq_data_lock);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (!dquots[cnt])
@@ -1685,7 +1674,7 @@ warn_put_all:
 	spin_unlock(&dq_data_lock);
 	if (ret == 0)
 		mark_all_dquot_dirty(dquots);
-	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+	srcu_read_unlock(&dquot_srcu, index);
 	flush_warnings(warn);
 	return ret;
 }
@@ -1696,14 +1685,14 @@ EXPORT_SYMBOL(dquot_alloc_inode);
  */
 int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
 {
-	int cnt;
+	int cnt, index;
 
 	if (!dquot_active(inode)) {
 		inode_claim_rsv_space(inode, number);
 		return 0;
 	}
 
-	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+	index = srcu_read_lock(&dquot_srcu);
 	spin_lock(&dq_data_lock);
 	/* Claim reserved quotas to allocated quotas */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1715,7 +1704,7 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
 	inode_claim_rsv_space(inode, number);
 	spin_unlock(&dq_data_lock);
 	mark_all_dquot_dirty(inode->i_dquot);
-	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+	srcu_read_unlock(&dquot_srcu, index);
 	return 0;
 }
 EXPORT_SYMBOL(dquot_claim_space_nodirty);
@@ -1725,14 +1714,14 @@ EXPORT_SYMBOL(dquot_claim_space_nodirty);
  */
 void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number)
 {
-	int cnt;
+	int cnt, index;
 
 	if (!dquot_active(inode)) {
 		inode_reclaim_rsv_space(inode, number);
 		return;
 	}
 
-	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+	index = srcu_read_lock(&dquot_srcu);
 	spin_lock(&dq_data_lock);
 	/* Claim reserved quotas to allocated quotas */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1744,7 +1733,7 @@ void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number)
 	inode_reclaim_rsv_space(inode, number);
 	spin_unlock(&dq_data_lock);
 	mark_all_dquot_dirty(inode->i_dquot);
-	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+	srcu_read_unlock(&dquot_srcu, index);
 	return;
 }
 EXPORT_SYMBOL(dquot_reclaim_space_nodirty);
@@ -1757,16 +1746,14 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
 	unsigned int cnt;
 	struct dquot_warn warn[MAXQUOTAS];
 	struct dquot **dquots = inode->i_dquot;
-	int reserve = flags & DQUOT_SPACE_RESERVE;
+	int reserve = flags & DQUOT_SPACE_RESERVE, index;
 
-	/* First test before acquiring mutex - solves deadlocks when we
-         * re-enter the quota code and are already holding the mutex */
 	if (!dquot_active(inode)) {
 		inode_decr_space(inode, number, reserve);
 		return;
 	}
 
-	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+	index = srcu_read_lock(&dquot_srcu);
 	spin_lock(&dq_data_lock);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		int wtype;
@@ -1789,7 +1776,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
 		goto out_unlock;
 	mark_all_dquot_dirty(dquots);
 out_unlock:
-	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+	srcu_read_unlock(&dquot_srcu, index);
 	flush_warnings(warn);
 }
 EXPORT_SYMBOL(__dquot_free_space);
@@ -1802,13 +1789,12 @@ void dquot_free_inode(const struct inode *inode)
 	unsigned int cnt;
 	struct dquot_warn warn[MAXQUOTAS];
 	struct dquot * const *dquots = inode->i_dquot;
+	int index;
 
-	/* First test before acquiring mutex - solves deadlocks when we
-         * re-enter the quota code and are already holding the mutex */
 	if (!dquot_active(inode))
 		return;
 
-	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+	index = srcu_read_lock(&dquot_srcu);
 	spin_lock(&dq_data_lock);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		int wtype;
@@ -1823,7 +1809,7 @@ void dquot_free_inode(const struct inode *inode)
 	}
 	spin_unlock(&dq_data_lock);
 	mark_all_dquot_dirty(dquots);
-	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+	srcu_read_unlock(&dquot_srcu, index);
 	flush_warnings(warn);
 }
 EXPORT_SYMBOL(dquot_free_inode);
@@ -1837,6 +1823,8 @@ EXPORT_SYMBOL(dquot_free_inode);
  * This operation can block, but only after everything is updated
  * A transaction must be started when entering this function.
  *
+ * We are holding reference on transfer_from & transfer_to, no need to
+ * protect them by srcu_read_lock().
  */
 int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
 {
@@ -1849,8 +1837,6 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
 	struct dquot_warn warn_from_inodes[MAXQUOTAS];
 	struct dquot_warn warn_from_space[MAXQUOTAS];
 
-	/* First test before acquiring mutex - solves deadlocks when we
-         * re-enter the quota code and are already holding the mutex */
 	if (IS_NOQUOTA(inode))
 		return 0;
 	/* Initialize the arrays */
@@ -1859,12 +1845,12 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
 		warn_from_inodes[cnt].w_type = QUOTA_NL_NOWARN;
 		warn_from_space[cnt].w_type = QUOTA_NL_NOWARN;
 	}
-	down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
+
+	spin_lock(&dq_data_lock);
 	if (IS_NOQUOTA(inode)) {	/* File without quota accounting? */
-		up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
+		spin_unlock(&dq_data_lock);
 		return 0;
 	}
-	spin_lock(&dq_data_lock);
 	cur_space = inode_get_bytes(inode);
 	rsv_space = inode_get_rsv_space(inode);
 	space = cur_space + rsv_space;
@@ -1918,7 +1904,6 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
 		inode->i_dquot[cnt] = transfer_to[cnt];
 	}
 	spin_unlock(&dq_data_lock);
-	up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
 
 	mark_all_dquot_dirty(transfer_from);
 	mark_all_dquot_dirty(transfer_to);
@@ -1932,7 +1917,6 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
 	return 0;
 over_quota:
 	spin_unlock(&dq_data_lock);
-	up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	flush_warnings(warn_to);
 	return ret;
 }
diff --git a/fs/super.c b/fs/super.c
index d20d5b11dedf..872b26bf06dd 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -218,7 +218,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
 	lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key);
 	mutex_init(&s->s_dquot.dqio_mutex);
 	mutex_init(&s->s_dquot.dqonoff_mutex);
-	init_rwsem(&s->s_dquot.dqptr_sem);
 	s->s_maxbytes = MAX_NON_LFS;
 	s->s_op = &default_op;
 	s->s_time_gran = 1000000000;
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 0f3c5d38da1f..80d345a3524c 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -390,7 +390,6 @@ struct quota_info {
 	unsigned int flags;			/* Flags for diskquotas on this device */
 	struct mutex dqio_mutex;		/* lock device while I/O in progress */
 	struct mutex dqonoff_mutex;		/* Serialize quotaon & quotaoff */
-	struct rw_semaphore dqptr_sem;		/* serialize ops using quota_info struct, pointers from inode to dquots */
 	struct inode *files[MAXQUOTAS];		/* inodes of quotafiles */
 	struct mem_dqinfo info[MAXQUOTAS];	/* Information for each quota type */
 	const struct quota_format_ops *ops[MAXQUOTAS];	/* Operations for each type */
-- 
cgit v1.2.3-59-g8ed1b


From 685343fc3ba61a1f6eef361b786601123db16c28 Mon Sep 17 00:00:00 2001
From: Tom Gundersen <teg@jklm.no>
Date: Mon, 14 Jul 2014 16:37:22 +0200
Subject: net: add name_assign_type netdev attribute

Based on a patch by David Herrmann.

The name_assign_type attribute gives hints where the interface name of a
given net-device comes from. These values are currently defined:
  NET_NAME_ENUM:
    The ifname is provided by the kernel with an enumerated
    suffix, typically based on order of discovery. Names may
    be reused and unpredictable.
  NET_NAME_PREDICTABLE:
    The ifname has been assigned by the kernel in a predictable way
    that is guaranteed to avoid reuse and always be the same for a
    given device. Examples include statically created devices like
    the loopback device and names deduced from hardware properties
    (including being given explicitly by the firmware). Names
    depending on the order of discovery, or in any other way on the
    existence of other devices, must not be marked as PREDICTABLE.
  NET_NAME_USER:
    The ifname was provided by user-space during net-device setup.
  NET_NAME_RENAMED:
    The net-device has been renamed from userspace. Once this type is set,
    it cannot change again.
  NET_NAME_UNKNOWN:
    This is an internal placeholder to indicate that we yet haven't yet
    categorized the name. It will not be exposed to userspace, rather
    -EINVAL is returned.

The aim of these patches is to improve user-space renaming of interfaces. As
a general rule, userspace must rename interfaces to guarantee that names stay
the same every time a given piece of hardware appears (at boot, or when
attaching it). However, there are several situations where userspace should
not perform the renaming, and that depends on both the policy of the local
admin, but crucially also on the nature of the current interface name.

If an interface was created in repsonse to a userspace request, and userspace
already provided a name, we most probably want to leave that name alone. The
main instance of this is wifi-P2P devices created over nl80211, which currently
have a long-standing bug where they are getting renamed by udev. We label such
names NET_NAME_USER.

If an interface, unbeknown to us, has already been renamed from userspace, we
most probably want to leave also that alone. This will typically happen when
third-party plugins (for instance to udev, but the interface is generic so could
be from anywhere) renames the interface without informing udev about it. A
typical situation is when you switch root from an installer or an initrd to the
real system and the new instance of udev does not know what happened before
the switch. These types of problems have caused repeated issues in the past. To
solve this, once an interface has been renamed, its name is labelled
NET_NAME_RENAMED.

In many cases, the kernel is actually able to name interfaces in such a
way that there is no need for userspace to rename them. This is the case when
the enumeration order of devices, or in fact any other (non-parent) device on
the system, can not influence the name of the interface. Examples include
statically created devices, or any naming schemes based on hardware properties
of the interface. In this case the admin may prefer to use the kernel-provided
names, and to make that possible we label such names NET_NAME_PREDICTABLE.
We want the kernel to have tho possibilty of performing predictable interface
naming itself (and exposing to userspace that it has), as the information
necessary for a proper naming scheme for a certain class of devices may not
be exposed to userspace.

The case where renaming is almost certainly desired, is when the kernel has
given the interface a name using global device enumeration based on order of
discovery (ethX, wlanY, etc). These naming schemes are labelled NET_NAME_ENUM.

Lastly, a fallback is left as NET_NAME_UNKNOWN, to indicate that a driver has
not yet been ported. This is mostly useful as a transitionary measure, allowing
us to label the various naming schemes bit by bit.

v8: minor documentation fixes
v9: move comment to the right commit

Signed-off-by: Tom Gundersen <teg@jklm.no>
Reviewed-by: David Herrmann <dh.herrmann@gmail.com>
Reviewed-by: Kay Sievers <kay@vrfy.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/ABI/testing/sysfs-class-net | 11 +++++++++++
 include/linux/netdevice.h                 |  2 ++
 include/uapi/linux/netdevice.h            |  6 ++++++
 net/core/net-sysfs.c                      | 20 ++++++++++++++++++++
 4 files changed, 39 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-class-net b/Documentation/ABI/testing/sysfs-class-net
index 416c5d59f52e..d322b0581194 100644
--- a/Documentation/ABI/testing/sysfs-class-net
+++ b/Documentation/ABI/testing/sysfs-class-net
@@ -1,3 +1,14 @@
+What:		/sys/class/net/<iface>/name_assign_type
+Date:		July 2014
+KernelVersion:	3.17
+Contact:	netdev@vger.kernel.org
+Description:
+		Indicates the name assignment type. Possible values are:
+		1: enumerated by the kernel, possibly in an unpredictable way
+		2: predictably named by the kernel
+		3: named by userspace
+		4: renamed
+
 What:		/sys/class/net/<iface>/addr_assign_type
 Date:		July 2010
 KernelVersion:	3.2
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3a320db96180..9be34732142f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1381,6 +1381,8 @@ struct net_device {
 	struct kset		*queues_kset;
 #endif
 
+	unsigned char		name_assign_type;
+
 	bool			uc_promisc;
 	unsigned int		promiscuity;
 	unsigned int		allmulti;
diff --git a/include/uapi/linux/netdevice.h b/include/uapi/linux/netdevice.h
index fdfbd1c17065..55818543342d 100644
--- a/include/uapi/linux/netdevice.h
+++ b/include/uapi/linux/netdevice.h
@@ -37,6 +37,12 @@
 #define INIT_NETDEV_GROUP	0
 
 
+/* interface name assignment types (sysfs name_assign_type attribute) */
+#define NET_NAME_UNKNOWN	0	/* unknown origin (not exposed to userspace) */
+#define NET_NAME_ENUM		1	/* enumerated by kernel */
+#define NET_NAME_PREDICTABLE	2	/* predictably named by the kernel */
+#define NET_NAME_USER		3	/* provided by user-space */
+#define NET_NAME_RENAMED	4	/* renamed by user-space */
 
 /* Media selection options. */
 enum {
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 1cac29ebb05b..7752f2ad49a5 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -112,6 +112,25 @@ NETDEVICE_SHOW_RO(ifindex, fmt_dec);
 NETDEVICE_SHOW_RO(type, fmt_dec);
 NETDEVICE_SHOW_RO(link_mode, fmt_dec);
 
+static ssize_t format_name_assign_type(const struct net_device *net, char *buf)
+{
+	return sprintf(buf, fmt_dec, net->name_assign_type);
+}
+
+static ssize_t name_assign_type_show(struct device *dev,
+				     struct device_attribute *attr,
+				     char *buf)
+{
+	struct net_device *net = to_net_dev(dev);
+	ssize_t ret = -EINVAL;
+
+	if (net->name_assign_type != NET_NAME_UNKNOWN)
+		ret = netdev_show(dev, attr, buf, format_name_assign_type);
+
+	return ret;
+}
+static DEVICE_ATTR_RO(name_assign_type);
+
 /* use same locking rules as GIFHWADDR ioctl's */
 static ssize_t address_show(struct device *dev, struct device_attribute *attr,
 			    char *buf)
@@ -387,6 +406,7 @@ static struct attribute *net_class_attrs[] = {
 	&dev_attr_dev_port.attr,
 	&dev_attr_iflink.attr,
 	&dev_attr_ifindex.attr,
+	&dev_attr_name_assign_type.attr,
 	&dev_attr_addr_assign_type.attr,
 	&dev_attr_addr_len.attr,
 	&dev_attr_link_mode.attr,
-- 
cgit v1.2.3-59-g8ed1b


From c835a677331495cf137a7f8a023463afd9f032f8 Mon Sep 17 00:00:00 2001
From: Tom Gundersen <teg@jklm.no>
Date: Mon, 14 Jul 2014 16:37:24 +0200
Subject: net: set name_assign_type in alloc_netdev()

Extend alloc_netdev{,_mq{,s}}() to take name_assign_type as argument, and convert
all users to pass NET_NAME_UNKNOWN.

Coccinelle patch:

@@
expression sizeof_priv, name, setup, txqs, rxqs, count;
@@

(
-alloc_netdev_mqs(sizeof_priv, name, setup, txqs, rxqs)
+alloc_netdev_mqs(sizeof_priv, name, NET_NAME_UNKNOWN, setup, txqs, rxqs)
|
-alloc_netdev_mq(sizeof_priv, name, setup, count)
+alloc_netdev_mq(sizeof_priv, name, NET_NAME_UNKNOWN, setup, count)
|
-alloc_netdev(sizeof_priv, name, setup)
+alloc_netdev(sizeof_priv, name, NET_NAME_UNKNOWN, setup)
)

v9: move comments here from the wrong commit

Signed-off-by: Tom Gundersen <teg@jklm.no>
Reviewed-by: David Herrmann <dh.herrmann@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/firewire/net.c                              |  3 ++-
 drivers/hsi/clients/ssi_protocol.c                  |  2 +-
 drivers/infiniband/hw/amso1100/c2_provider.c        |  2 +-
 drivers/infiniband/ulp/ipoib/ipoib_main.c           |  4 ++--
 drivers/isdn/i4l/isdn_net.c                         |  3 ++-
 drivers/media/dvb-core/dvb_net.c                    |  3 ++-
 drivers/misc/sgi-xp/xpnet.c                         |  3 ++-
 drivers/net/arcnet/arcnet.c                         |  3 ++-
 drivers/net/bonding/bond_main.c                     |  2 +-
 drivers/net/caif/caif_serial.c                      |  3 ++-
 drivers/net/caif/caif_spi.c                         |  4 ++--
 drivers/net/caif/caif_virtio.c                      |  2 +-
 drivers/net/can/dev.c                               |  2 +-
 drivers/net/can/slcan.c                             |  2 +-
 drivers/net/dummy.c                                 |  2 +-
 drivers/net/eql.c                                   |  3 ++-
 drivers/net/ethernet/8390/lib8390.c                 |  2 +-
 drivers/net/ethernet/tile/tilegx.c                  |  4 ++--
 drivers/net/ethernet/tile/tilepro.c                 |  3 ++-
 drivers/net/hamradio/6pack.c                        |  3 ++-
 drivers/net/hamradio/baycom_epp.c                   |  2 +-
 drivers/net/hamradio/bpqether.c                     |  4 ++--
 drivers/net/hamradio/dmascc.c                       |  4 ++--
 drivers/net/hamradio/hdlcdrv.c                      |  2 +-
 drivers/net/hamradio/mkiss.c                        |  3 ++-
 drivers/net/hamradio/scc.c                          |  2 +-
 drivers/net/hamradio/yam.c                          |  2 +-
 drivers/net/ieee802154/fakehard.c                   |  3 ++-
 drivers/net/ifb.c                                   |  4 ++--
 drivers/net/loopback.c                              |  2 +-
 drivers/net/ppp/ppp_generic.c                       |  3 ++-
 drivers/net/slip/slip.c                             |  2 +-
 drivers/net/tun.c                                   |  3 ++-
 drivers/net/usb/cdc-phonet.c                        |  2 +-
 drivers/net/usb/hso.c                               |  3 ++-
 drivers/net/wan/dlci.c                              |  4 ++--
 drivers/net/wan/hdlc.c                              |  3 ++-
 drivers/net/wan/hdlc_fr.c                           |  5 +++--
 drivers/net/wan/lapbether.c                         |  4 ++--
 drivers/net/wan/sbni.c                              |  7 ++++---
 drivers/net/wan/sdla.c                              |  3 ++-
 drivers/net/wan/x25_asy.c                           |  4 ++--
 drivers/net/wimax/i2400m/usb.c                      |  2 +-
 drivers/net/wireless/airo.c                         |  5 +++--
 drivers/net/wireless/ath/ath6kl/cfg80211.c          |  2 +-
 drivers/net/wireless/ath/wil6210/netdev.c           |  2 +-
 drivers/net/wireless/brcm80211/brcmfmac/dhd_linux.c |  3 ++-
 drivers/net/wireless/libertas/main.c                |  2 +-
 drivers/net/wireless/libertas/mesh.c                |  2 +-
 drivers/net/wireless/mac80211_hwsim.c               |  3 ++-
 drivers/net/wireless/mwifiex/cfg80211.c             |  3 ++-
 drivers/net/xen-netback/interface.c                 |  4 ++--
 drivers/s390/net/claw.c                             |  2 +-
 drivers/s390/net/ctcm_main.c                        |  6 ++++--
 drivers/s390/net/netiucv.c                          |  2 +-
 drivers/s390/net/qeth_l2_main.c                     |  6 ++++--
 drivers/s390/net/qeth_l3_main.c                     |  3 ++-
 drivers/staging/cxt1e1/linux.c                      |  3 ++-
 drivers/staging/gdm724x/gdm_lte.c                   |  2 +-
 drivers/staging/gdm72xx/gdm_wimax.c                 |  3 ++-
 drivers/staging/vt6655/wpactl.c                     |  3 ++-
 drivers/staging/wlan-ng/p80211netdev.c              |  2 +-
 drivers/tty/n_gsm.c                                 |  5 ++---
 drivers/usb/gadget/f_phonet.c                       |  3 ++-
 include/linux/netdevice.h                           | 10 ++++++----
 net/802/fc.c                                        |  2 +-
 net/802/fddi.c                                      |  3 ++-
 net/802/hippi.c                                     |  3 ++-
 net/8021q/vlan.c                                    |  3 ++-
 net/appletalk/dev.c                                 |  3 ++-
 net/atm/br2684.c                                    |  4 ++--
 net/atm/clip.c                                      |  3 ++-
 net/batman-adv/soft-interface.c                     |  2 +-
 net/bluetooth/6lowpan.c                             |  2 +-
 net/bluetooth/bnep/core.c                           |  5 +++--
 net/bridge/br_if.c                                  |  2 +-
 net/core/dev.c                                      | 13 ++++++++-----
 net/core/rtnetlink.c                                |  4 ++--
 net/dsa/slave.c                                     |  4 ++--
 net/ethernet/eth.c                                  |  3 ++-
 net/ipv4/ip_tunnel.c                                |  2 +-
 net/ipv4/ipmr.c                                     |  2 +-
 net/ipv6/ip6_gre.c                                  |  6 ++++--
 net/ipv6/ip6_tunnel.c                               |  5 +++--
 net/ipv6/ip6_vti.c                                  |  4 ++--
 net/ipv6/ip6mr.c                                    |  2 +-
 net/ipv6/sit.c                                      |  4 +++-
 net/irda/irda_device.c                              |  3 ++-
 net/irda/irlan/irlan_eth.c                          |  2 +-
 net/l2tp/l2tp_eth.c                                 |  3 ++-
 net/mac80211/iface.c                                |  6 +++---
 net/mac802154/ieee802154_dev.c                      |  6 ++++--
 net/netrom/af_netrom.c                              |  2 +-
 net/openvswitch/vport-internal_dev.c                |  3 ++-
 net/phonet/pep-gprs.c                               |  2 +-
 net/rose/af_rose.c                                  |  2 +-
 net/sched/sch_teql.c                                |  4 ++--
 97 files changed, 185 insertions(+), 133 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c
index c3986452194d..2c68da1ceeee 100644
--- a/drivers/firewire/net.c
+++ b/drivers/firewire/net.c
@@ -1460,7 +1460,8 @@ static int fwnet_probe(struct fw_unit *unit,
 		goto have_dev;
 	}
 
-	net = alloc_netdev(sizeof(*dev), "firewire%d", fwnet_init_dev);
+	net = alloc_netdev(sizeof(*dev), "firewire%d", NET_NAME_UNKNOWN,
+			   fwnet_init_dev);
 	if (net == NULL) {
 		mutex_unlock(&fwnet_device_mutex);
 		return -ENOMEM;
diff --git a/drivers/hsi/clients/ssi_protocol.c b/drivers/hsi/clients/ssi_protocol.c
index ce4be3738d46..737fa2e0e782 100644
--- a/drivers/hsi/clients/ssi_protocol.c
+++ b/drivers/hsi/clients/ssi_protocol.c
@@ -1115,7 +1115,7 @@ static int ssi_protocol_probe(struct device *dev)
 		goto out;
 	}
 
-	ssi->netdev = alloc_netdev(0, ifname, ssip_pn_setup);
+	ssi->netdev = alloc_netdev(0, ifname, NET_NAME_UNKNOWN, ssip_pn_setup);
 	if (!ssi->netdev) {
 		dev_err(dev, "No memory for netdev\n");
 		err = -ENOMEM;
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c
index 8af33cf1fc4e..2d5cbf4363e4 100644
--- a/drivers/infiniband/hw/amso1100/c2_provider.c
+++ b/drivers/infiniband/hw/amso1100/c2_provider.c
@@ -734,7 +734,7 @@ static struct net_device *c2_pseudo_netdev_init(struct c2_dev *c2dev)
 	/* change ethxxx to iwxxx */
 	strcpy(name, "iw");
 	strcat(name, &c2dev->netdev->name[3]);
-	netdev = alloc_netdev(0, name, setup);
+	netdev = alloc_netdev(0, name, NET_NAME_UNKNOWN, setup);
 	if (!netdev) {
 		printk(KERN_ERR PFX "%s -  etherdev alloc failed",
 			__func__);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 5786a78ff8bc..4e675f4fecc9 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1394,8 +1394,8 @@ struct ipoib_dev_priv *ipoib_intf_alloc(const char *name)
 {
 	struct net_device *dev;
 
-	dev = alloc_netdev((int) sizeof (struct ipoib_dev_priv), name,
-			   ipoib_setup);
+	dev = alloc_netdev((int)sizeof(struct ipoib_dev_priv), name,
+			   NET_NAME_UNKNOWN, ipoib_setup);
 	if (!dev)
 		return NULL;
 
diff --git a/drivers/isdn/i4l/isdn_net.c b/drivers/isdn/i4l/isdn_net.c
index d9aebbc510cc..c2ed6246a389 100644
--- a/drivers/isdn/i4l/isdn_net.c
+++ b/drivers/isdn/i4l/isdn_net.c
@@ -2588,7 +2588,8 @@ isdn_net_new(char *name, struct net_device *master)
 		printk(KERN_WARNING "isdn_net: Could not allocate net-device\n");
 		return NULL;
 	}
-	netdev->dev = alloc_netdev(sizeof(isdn_net_local), name, _isdn_setup);
+	netdev->dev = alloc_netdev(sizeof(isdn_net_local), name,
+				   NET_NAME_UNKNOWN, _isdn_setup);
 	if (!netdev->dev) {
 		printk(KERN_WARNING "isdn_net: Could not allocate network device\n");
 		kfree(netdev);
diff --git a/drivers/media/dvb-core/dvb_net.c b/drivers/media/dvb-core/dvb_net.c
index 8a86b3025637..059e6117f22b 100644
--- a/drivers/media/dvb-core/dvb_net.c
+++ b/drivers/media/dvb-core/dvb_net.c
@@ -1276,7 +1276,8 @@ static int dvb_net_add_if(struct dvb_net *dvbnet, u16 pid, u8 feedtype)
 	if ((if_num = get_if(dvbnet)) < 0)
 		return -EINVAL;
 
-	net = alloc_netdev(sizeof(struct dvb_net_priv), "dvb", dvb_net_setup);
+	net = alloc_netdev(sizeof(struct dvb_net_priv), "dvb",
+			   NET_NAME_UNKNOWN, dvb_net_setup);
 	if (!net)
 		return -ENOMEM;
 
diff --git a/drivers/misc/sgi-xp/xpnet.c b/drivers/misc/sgi-xp/xpnet.c
index 3fac67a5204c..557f9782c53c 100644
--- a/drivers/misc/sgi-xp/xpnet.c
+++ b/drivers/misc/sgi-xp/xpnet.c
@@ -544,7 +544,8 @@ xpnet_init(void)
 	 * use ether_setup() to init the majority of our device
 	 * structure and then override the necessary pieces.
 	 */
-	xpnet_device = alloc_netdev(0, XPNET_DEVICE_NAME, ether_setup);
+	xpnet_device = alloc_netdev(0, XPNET_DEVICE_NAME, NET_NAME_UNKNOWN,
+				    ether_setup);
 	if (xpnet_device == NULL) {
 		kfree(xpnet_broadcast_partitions);
 		return -ENOMEM;
diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c
index a956053608f9..3b790de6c976 100644
--- a/drivers/net/arcnet/arcnet.c
+++ b/drivers/net/arcnet/arcnet.c
@@ -346,7 +346,8 @@ struct net_device *alloc_arcdev(const char *name)
 	struct net_device *dev;
 
 	dev = alloc_netdev(sizeof(struct arcnet_local),
-			   name && *name ? name : "arc%d", arcdev_setup);
+			   name && *name ? name : "arc%d", NET_NAME_UNKNOWN,
+			   arcdev_setup);
 	if(dev) {
 		struct arcnet_local *lp = netdev_priv(dev);
 		spin_lock_init(&lp->lock);
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 09dc3ef771a7..46dcb7b6216f 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4420,7 +4420,7 @@ int bond_create(struct net *net, const char *name)
 	rtnl_lock();
 
 	bond_dev = alloc_netdev_mq(sizeof(struct bonding),
-				   name ? name : "bond%d",
+				   name ? name : "bond%d", NET_NAME_UNKNOWN,
 				   bond_setup, tx_queues);
 	if (!bond_dev) {
 		pr_err("%s: eek! can't alloc netdev!\n", name);
diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c
index fc73865bb83a..27bbc56de15f 100644
--- a/drivers/net/caif/caif_serial.c
+++ b/drivers/net/caif/caif_serial.c
@@ -349,7 +349,8 @@ static int ldisc_open(struct tty_struct *tty)
 	result = snprintf(name, sizeof(name), "cf%s", tty->name);
 	if (result >= IFNAMSIZ)
 		return -EINVAL;
-	dev = alloc_netdev(sizeof(*ser), name, caifdev_setup);
+	dev = alloc_netdev(sizeof(*ser), name, NET_NAME_UNKNOWN,
+			   caifdev_setup);
 	if (!dev)
 		return -ENOMEM;
 
diff --git a/drivers/net/caif/caif_spi.c b/drivers/net/caif/caif_spi.c
index ff54c0eb2052..72ea9ff9bb9c 100644
--- a/drivers/net/caif/caif_spi.c
+++ b/drivers/net/caif/caif_spi.c
@@ -730,8 +730,8 @@ int cfspi_spi_probe(struct platform_device *pdev)
 	int res;
 	dev = (struct cfspi_dev *)pdev->dev.platform_data;
 
-	ndev = alloc_netdev(sizeof(struct cfspi),
-			"cfspi%d", cfspi_setup);
+	ndev = alloc_netdev(sizeof(struct cfspi), "cfspi%d",
+			    NET_NAME_UNKNOWN, cfspi_setup);
 	if (!dev)
 		return -ENODEV;
 
diff --git a/drivers/net/caif/caif_virtio.c b/drivers/net/caif/caif_virtio.c
index 985608634f8c..a5fefb9059c5 100644
--- a/drivers/net/caif/caif_virtio.c
+++ b/drivers/net/caif/caif_virtio.c
@@ -661,7 +661,7 @@ static int cfv_probe(struct virtio_device *vdev)
 	int err = -EINVAL;
 
 	netdev = alloc_netdev(sizeof(struct cfv_info), cfv_netdev_name,
-			      cfv_netdev_setup);
+			      NET_NAME_UNKNOWN, cfv_netdev_setup);
 	if (!netdev)
 		return -ENOMEM;
 
diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index e318e87e2bfc..9f91fcba43f8 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -565,7 +565,7 @@ struct net_device *alloc_candev(int sizeof_priv, unsigned int echo_skb_max)
 	else
 		size = sizeof_priv;
 
-	dev = alloc_netdev(size, "can%d", can_setup);
+	dev = alloc_netdev(size, "can%d", NET_NAME_UNKNOWN, can_setup);
 	if (!dev)
 		return NULL;
 
diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c
index ea4d4f1a6411..acb5b92ace92 100644
--- a/drivers/net/can/slcan.c
+++ b/drivers/net/can/slcan.c
@@ -529,7 +529,7 @@ static struct slcan *slc_alloc(dev_t line)
 		return NULL;
 
 	sprintf(name, "slcan%d", i);
-	dev = alloc_netdev(sizeof(*sl), name, slc_setup);
+	dev = alloc_netdev(sizeof(*sl), name, NET_NAME_UNKNOWN, slc_setup);
 	if (!dev)
 		return NULL;
 
diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c
index 0932ffbf381b..ff435fbd1ad0 100644
--- a/drivers/net/dummy.c
+++ b/drivers/net/dummy.c
@@ -164,7 +164,7 @@ static int __init dummy_init_one(void)
 	struct net_device *dev_dummy;
 	int err;
 
-	dev_dummy = alloc_netdev(0, "dummy%d", dummy_setup);
+	dev_dummy = alloc_netdev(0, "dummy%d", NET_NAME_UNKNOWN, dummy_setup);
 	if (!dev_dummy)
 		return -ENOMEM;
 
diff --git a/drivers/net/eql.c b/drivers/net/eql.c
index 7a79b6046879..957e5c0cede3 100644
--- a/drivers/net/eql.c
+++ b/drivers/net/eql.c
@@ -585,7 +585,8 @@ static int __init eql_init_module(void)
 
 	pr_info("%s\n", version);
 
-	dev_eql = alloc_netdev(sizeof(equalizer_t), "eql", eql_setup);
+	dev_eql = alloc_netdev(sizeof(equalizer_t), "eql", NET_NAME_UNKNOWN,
+			       eql_setup);
 	if (!dev_eql)
 		return -ENOMEM;
 
diff --git a/drivers/net/ethernet/8390/lib8390.c b/drivers/net/ethernet/8390/lib8390.c
index 599311f0e05c..b96e8852b2d1 100644
--- a/drivers/net/ethernet/8390/lib8390.c
+++ b/drivers/net/ethernet/8390/lib8390.c
@@ -986,7 +986,7 @@ static void ethdev_setup(struct net_device *dev)
 static struct net_device *____alloc_ei_netdev(int size)
 {
 	return alloc_netdev(sizeof(struct ei_device) + size, "eth%d",
-				ethdev_setup);
+			    NET_NAME_UNKNOWN, ethdev_setup);
 }
 
 
diff --git a/drivers/net/ethernet/tile/tilegx.c b/drivers/net/ethernet/tile/tilegx.c
index 4c70360967c2..69557a26f749 100644
--- a/drivers/net/ethernet/tile/tilegx.c
+++ b/drivers/net/ethernet/tile/tilegx.c
@@ -2201,8 +2201,8 @@ static void tile_net_dev_init(const char *name, const uint8_t *mac)
 	/* Allocate the device structure.  Normally, "name" is a
 	 * template, instantiated by register_netdev(), but not for us.
 	 */
-	dev = alloc_netdev_mqs(sizeof(*priv), name, tile_net_setup,
-			       NR_CPUS, 1);
+	dev = alloc_netdev_mqs(sizeof(*priv), name, NET_NAME_UNKNOWN,
+			       tile_net_setup, NR_CPUS, 1);
 	if (!dev) {
 		pr_err("alloc_netdev_mqs(%s) failed\n", name);
 		return;
diff --git a/drivers/net/ethernet/tile/tilepro.c b/drivers/net/ethernet/tile/tilepro.c
index e5a5c5d4ce0c..88c712126692 100644
--- a/drivers/net/ethernet/tile/tilepro.c
+++ b/drivers/net/ethernet/tile/tilepro.c
@@ -2292,7 +2292,8 @@ static struct net_device *tile_net_dev_init(const char *name)
 	 * tile_net_setup(), and saves "name".  Normally, "name" is a
 	 * template, instantiated by register_netdev(), but not for us.
 	 */
-	dev = alloc_netdev(sizeof(*priv), name, tile_net_setup);
+	dev = alloc_netdev(sizeof(*priv), name, NET_NAME_UNKNOWN,
+			   tile_net_setup);
 	if (!dev) {
 		pr_err("alloc_netdev(%s) failed\n", name);
 		return NULL;
diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c
index 66e2b19ef709..c3c4051a089d 100644
--- a/drivers/net/hamradio/6pack.c
+++ b/drivers/net/hamradio/6pack.c
@@ -596,7 +596,8 @@ static int sixpack_open(struct tty_struct *tty)
 	if (tty->ops->write == NULL)
 		return -EOPNOTSUPP;
 
-	dev = alloc_netdev(sizeof(struct sixpack), "sp%d", sp_setup);
+	dev = alloc_netdev(sizeof(struct sixpack), "sp%d", NET_NAME_UNKNOWN,
+			   sp_setup);
 	if (!dev) {
 		err = -ENOMEM;
 		goto out;
diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c
index 484f77ec2ce1..a98c153f371e 100644
--- a/drivers/net/hamradio/baycom_epp.c
+++ b/drivers/net/hamradio/baycom_epp.c
@@ -1206,7 +1206,7 @@ static int __init init_baycomepp(void)
 		struct net_device *dev;
 		
 		dev = alloc_netdev(sizeof(struct baycom_state), "bce%d",
-				   baycom_epp_dev_setup);
+				   NET_NAME_UNKNOWN, baycom_epp_dev_setup);
 
 		if (!dev) {
 			printk(KERN_WARNING "bce%d : out of memory\n", i);
diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c
index d50b23cf9ea9..c2894e43840e 100644
--- a/drivers/net/hamradio/bpqether.c
+++ b/drivers/net/hamradio/bpqether.c
@@ -501,8 +501,8 @@ static int bpq_new_device(struct net_device *edev)
 	struct net_device *ndev;
 	struct bpqdev *bpq;
 
-	ndev = alloc_netdev(sizeof(struct bpqdev), "bpq%d",
-			   bpq_setup);
+	ndev = alloc_netdev(sizeof(struct bpqdev), "bpq%d", NET_NAME_UNKNOWN,
+			    bpq_setup);
 	if (!ndev)
 		return -ENOMEM;
 
diff --git a/drivers/net/hamradio/dmascc.c b/drivers/net/hamradio/dmascc.c
index 6636022a1027..0fad408f24aa 100644
--- a/drivers/net/hamradio/dmascc.c
+++ b/drivers/net/hamradio/dmascc.c
@@ -466,7 +466,7 @@ static int __init setup_adapter(int card_base, int type, int n)
 	if (!info)
 		goto out;
 
-	info->dev[0] = alloc_netdev(0, "", dev_setup);
+	info->dev[0] = alloc_netdev(0, "", NET_NAME_UNKNOWN, dev_setup);
 	if (!info->dev[0]) {
 		printk(KERN_ERR "dmascc: "
 		       "could not allocate memory for %s at %#3x\n",
@@ -474,7 +474,7 @@ static int __init setup_adapter(int card_base, int type, int n)
 		goto out1;
 	}
 
-	info->dev[1] = alloc_netdev(0, "", dev_setup);
+	info->dev[1] = alloc_netdev(0, "", NET_NAME_UNKNOWN, dev_setup);
 	if (!info->dev[1]) {
 		printk(KERN_ERR "dmascc: "
 		       "could not allocate memory for %s at %#3x\n",
diff --git a/drivers/net/hamradio/hdlcdrv.c b/drivers/net/hamradio/hdlcdrv.c
index 5d78c1d08abd..c67a27245072 100644
--- a/drivers/net/hamradio/hdlcdrv.c
+++ b/drivers/net/hamradio/hdlcdrv.c
@@ -699,7 +699,7 @@ struct net_device *hdlcdrv_register(const struct hdlcdrv_ops *ops,
 	if (privsize < sizeof(struct hdlcdrv_state))
 		privsize = sizeof(struct hdlcdrv_state);
 
-	dev = alloc_netdev(privsize, ifname, hdlcdrv_setup);
+	dev = alloc_netdev(privsize, ifname, NET_NAME_UNKNOWN, hdlcdrv_setup);
 	if (!dev)
 		return ERR_PTR(-ENOMEM);
 
diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c
index 8a6c720a4cc9..f990bb1c3e02 100644
--- a/drivers/net/hamradio/mkiss.c
+++ b/drivers/net/hamradio/mkiss.c
@@ -734,7 +734,8 @@ static int mkiss_open(struct tty_struct *tty)
 	if (tty->ops->write == NULL)
 		return -EOPNOTSUPP;
 
-	dev = alloc_netdev(sizeof(struct mkiss), "ax%d", ax_setup);
+	dev = alloc_netdev(sizeof(struct mkiss), "ax%d", NET_NAME_UNKNOWN,
+			   ax_setup);
 	if (!dev) {
 		err = -ENOMEM;
 		goto out;
diff --git a/drivers/net/hamradio/scc.c b/drivers/net/hamradio/scc.c
index 4bc6ee8e7987..57be9e0e98a6 100644
--- a/drivers/net/hamradio/scc.c
+++ b/drivers/net/hamradio/scc.c
@@ -1515,7 +1515,7 @@ static int scc_net_alloc(const char *name, struct scc_channel *scc)
 	int err;
 	struct net_device *dev;
 
-	dev = alloc_netdev(0, name, scc_net_setup);
+	dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, scc_net_setup);
 	if (!dev) 
 		return -ENOMEM;
 
diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c
index 81901659cc9e..717433cfb81d 100644
--- a/drivers/net/hamradio/yam.c
+++ b/drivers/net/hamradio/yam.c
@@ -1147,7 +1147,7 @@ static int __init yam_init_driver(void)
 		sprintf(name, "yam%d", i);
 		
 		dev = alloc_netdev(sizeof(struct yam_port), name,
-				   yam_setup);
+				   NET_NAME_UNKNOWN, yam_setup);
 		if (!dev) {
 			pr_err("yam: cannot allocate net device\n");
 			err = -ENOMEM;
diff --git a/drivers/net/ieee802154/fakehard.c b/drivers/net/ieee802154/fakehard.c
index 78f18be3bbf2..9ce854f43917 100644
--- a/drivers/net/ieee802154/fakehard.c
+++ b/drivers/net/ieee802154/fakehard.c
@@ -343,7 +343,8 @@ static int ieee802154fake_probe(struct platform_device *pdev)
 	if (!phy)
 		return -ENOMEM;
 
-	dev = alloc_netdev(sizeof(struct fakehard_priv), "hardwpan%d", ieee802154_fake_setup);
+	dev = alloc_netdev(sizeof(struct fakehard_priv), "hardwpan%d",
+			   NET_NAME_UNKNOWN, ieee802154_fake_setup);
 	if (!dev) {
 		wpan_phy_free(phy);
 		return -ENOMEM;
diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
index 46a7790be004..d2d4a3d2237f 100644
--- a/drivers/net/ifb.c
+++ b/drivers/net/ifb.c
@@ -269,8 +269,8 @@ static int __init ifb_init_one(int index)
 	struct ifb_private *dp;
 	int err;
 
-	dev_ifb = alloc_netdev(sizeof(struct ifb_private),
-				 "ifb%d", ifb_setup);
+	dev_ifb = alloc_netdev(sizeof(struct ifb_private), "ifb%d",
+			       NET_NAME_UNKNOWN, ifb_setup);
 
 	if (!dev_ifb)
 		return -ENOMEM;
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index bb96409f8c05..8f2262540561 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -195,7 +195,7 @@ static __net_init int loopback_net_init(struct net *net)
 	int err;
 
 	err = -ENOMEM;
-	dev = alloc_netdev(0, "lo", loopback_setup);
+	dev = alloc_netdev(0, "lo", NET_NAME_UNKNOWN, loopback_setup);
 	if (!dev)
 		goto out;
 
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 91d6c1272fcf..5c002b1ef169 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -2665,7 +2665,8 @@ ppp_create_interface(struct net *net, int unit, int *retp)
 	int ret = -ENOMEM;
 	int i;
 
-	dev = alloc_netdev(sizeof(struct ppp), "", ppp_setup);
+	dev = alloc_netdev(sizeof(struct ppp), "", NET_NAME_UNKNOWN,
+			   ppp_setup);
 	if (!dev)
 		goto out1;
 
diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c
index 87526443841f..05387b1e2e95 100644
--- a/drivers/net/slip/slip.c
+++ b/drivers/net/slip/slip.c
@@ -749,7 +749,7 @@ static struct slip *sl_alloc(dev_t line)
 		return NULL;
 
 	sprintf(name, "sl%d", i);
-	dev = alloc_netdev(sizeof(*sl), name, sl_setup);
+	dev = alloc_netdev(sizeof(*sl), name, NET_NAME_UNKNOWN, sl_setup);
 	if (!dev)
 		return NULL;
 
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 98bad1fb1bfb..acaaf6784179 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1633,7 +1633,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 			name = ifr->ifr_name;
 
 		dev = alloc_netdev_mqs(sizeof(struct tun_struct), name,
-				       tun_setup, queues, queues);
+				       NET_NAME_UNKNOWN, tun_setup, queues,
+				       queues);
 
 		if (!dev)
 			return -ENOMEM;
diff --git a/drivers/net/usb/cdc-phonet.c b/drivers/net/usb/cdc-phonet.c
index 6358d420e185..2ec1500d0077 100644
--- a/drivers/net/usb/cdc-phonet.c
+++ b/drivers/net/usb/cdc-phonet.c
@@ -387,7 +387,7 @@ static int usbpn_probe(struct usb_interface *intf, const struct usb_device_id *i
 		return -EINVAL;
 
 	dev = alloc_netdev(sizeof(*pnd) + sizeof(pnd->urbs[0]) * rxq_size,
-				ifname, usbpn_setup);
+			   ifname, NET_NAME_UNKNOWN, usbpn_setup);
 	if (!dev)
 		return -ENOMEM;
 
diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index a3a05869309d..50b36b299946 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -2520,7 +2520,8 @@ static struct hso_device *hso_create_net_device(struct usb_interface *interface,
 
 	/* allocate our network device, then we can put in our private data */
 	/* call hso_net_init to do the basic initialization */
-	net = alloc_netdev(sizeof(struct hso_net), "hso%d", hso_net_init);
+	net = alloc_netdev(sizeof(struct hso_net), "hso%d", NET_NAME_UNKNOWN,
+			   hso_net_init);
 	if (!net) {
 		dev_err(&interface->dev, "Unable to create ethernet device\n");
 		goto exit;
diff --git a/drivers/net/wan/dlci.c b/drivers/net/wan/dlci.c
index 19f7cb2cdef3..a463613a0719 100644
--- a/drivers/net/wan/dlci.c
+++ b/drivers/net/wan/dlci.c
@@ -327,8 +327,8 @@ static int dlci_add(struct dlci_add *dlci)
 		goto err1;
 
 	/* create device name */
-	master = alloc_netdev( sizeof(struct dlci_local), "dlci%d",
-			      dlci_setup);
+	master = alloc_netdev(sizeof(struct dlci_local), "dlci%d",
+			      NET_NAME_UNKNOWN, dlci_setup);
 	if (!master) {
 		err = -ENOMEM;
 		goto err1;
diff --git a/drivers/net/wan/hdlc.c b/drivers/net/wan/hdlc.c
index 9c33ca918e19..51f6cee8aab2 100644
--- a/drivers/net/wan/hdlc.c
+++ b/drivers/net/wan/hdlc.c
@@ -256,7 +256,8 @@ static void hdlc_setup(struct net_device *dev)
 struct net_device *alloc_hdlcdev(void *priv)
 {
 	struct net_device *dev;
-	dev = alloc_netdev(sizeof(struct hdlc_device), "hdlc%d", hdlc_setup);
+	dev = alloc_netdev(sizeof(struct hdlc_device), "hdlc%d",
+			   NET_NAME_UNKNOWN, hdlc_setup);
 	if (dev)
 		dev_to_hdlc(dev)->priv = priv;
 	return dev;
diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c
index 7c6cb4f31798..7cc64eac0fa3 100644
--- a/drivers/net/wan/hdlc_fr.c
+++ b/drivers/net/wan/hdlc_fr.c
@@ -1075,10 +1075,11 @@ static int fr_add_pvc(struct net_device *frad, unsigned int dlci, int type)
 	used = pvc_is_used(pvc);
 
 	if (type == ARPHRD_ETHER) {
-		dev = alloc_netdev(0, "pvceth%d", ether_setup);
+		dev = alloc_netdev(0, "pvceth%d", NET_NAME_UNKNOWN,
+				   ether_setup);
 		dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 	} else
-		dev = alloc_netdev(0, "pvc%d", pvc_setup);
+		dev = alloc_netdev(0, "pvc%d", NET_NAME_UNKNOWN, pvc_setup);
 
 	if (!dev) {
 		netdev_warn(frad, "Memory squeeze on fr_pvc()\n");
diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c
index a33a46fa88dd..2f5eda8a7227 100644
--- a/drivers/net/wan/lapbether.c
+++ b/drivers/net/wan/lapbether.c
@@ -325,8 +325,8 @@ static int lapbeth_new_device(struct net_device *dev)
 
 	ASSERT_RTNL();
 
-	ndev = alloc_netdev(sizeof(*lapbeth), "lapb%d", 
-			   lapbeth_setup);
+	ndev = alloc_netdev(sizeof(*lapbeth), "lapb%d", NET_NAME_UNKNOWN,
+			    lapbeth_setup);
 	if (!ndev)
 		goto out;
 
diff --git a/drivers/net/wan/sbni.c b/drivers/net/wan/sbni.c
index 1b89ecf0959e..758c4ba1e97c 100644
--- a/drivers/net/wan/sbni.c
+++ b/drivers/net/wan/sbni.c
@@ -227,7 +227,8 @@ int __init sbni_probe(int unit)
 	struct net_device *dev;
 	int err;
 
-	dev = alloc_netdev(sizeof(struct net_local), "sbni", sbni_devsetup);
+	dev = alloc_netdev(sizeof(struct net_local), "sbni",
+			   NET_NAME_UNKNOWN, sbni_devsetup);
 	if (!dev)
 		return -ENOMEM;
 
@@ -1477,8 +1478,8 @@ int __init init_module( void )
 	int err;
 
 	while( num < SBNI_MAX_NUM_CARDS ) {
-		dev = alloc_netdev(sizeof(struct net_local), 
-				   "sbni%d", sbni_devsetup);
+		dev = alloc_netdev(sizeof(struct net_local), "sbni%d",
+				   NET_NAME_UNKNOWN, sbni_devsetup);
 		if( !dev)
 			break;
 
diff --git a/drivers/net/wan/sdla.c b/drivers/net/wan/sdla.c
index cdd45fb8a1f6..421ac5f85699 100644
--- a/drivers/net/wan/sdla.c
+++ b/drivers/net/wan/sdla.c
@@ -1631,7 +1631,8 @@ static int __init init_sdla(void)
 
 	printk("%s.\n", version);
 
-	sdla = alloc_netdev(sizeof(struct frad_local), "sdla0", setup_sdla);
+	sdla = alloc_netdev(sizeof(struct frad_local), "sdla0",
+			    NET_NAME_UNKNOWN, setup_sdla);
 	if (!sdla) 
 		return -ENOMEM;
 
diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c
index 5895f1978691..df6c07357556 100644
--- a/drivers/net/wan/x25_asy.c
+++ b/drivers/net/wan/x25_asy.c
@@ -81,8 +81,8 @@ static struct x25_asy *x25_asy_alloc(void)
 		char name[IFNAMSIZ];
 		sprintf(name, "x25asy%d", i);
 
-		dev = alloc_netdev(sizeof(struct x25_asy),
-				   name, x25_asy_setup);
+		dev = alloc_netdev(sizeof(struct x25_asy), name,
+				   NET_NAME_UNKNOWN, x25_asy_setup);
 		if (!dev)
 			return NULL;
 
diff --git a/drivers/net/wimax/i2400m/usb.c b/drivers/net/wimax/i2400m/usb.c
index cd15a93d9084..e7f5910a6519 100644
--- a/drivers/net/wimax/i2400m/usb.c
+++ b/drivers/net/wimax/i2400m/usb.c
@@ -472,7 +472,7 @@ int i2400mu_probe(struct usb_interface *iface,
 
 	/* Allocate instance [calls i2400m_netdev_setup() on it]. */
 	result = -ENOMEM;
-	net_dev = alloc_netdev(sizeof(*i2400mu), "wmx%d",
+	net_dev = alloc_netdev(sizeof(*i2400mu), "wmx%d", NET_NAME_UNKNOWN,
 			       i2400mu_netdev_setup);
 	if (net_dev == NULL) {
 		dev_err(dev, "no memory for network device instance\n");
diff --git a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c
index 64747d457bb3..29d88739454b 100644
--- a/drivers/net/wireless/airo.c
+++ b/drivers/net/wireless/airo.c
@@ -2685,7 +2685,8 @@ static struct net_device *init_wifidev(struct airo_info *ai,
 					struct net_device *ethdev)
 {
 	int err;
-	struct net_device *dev = alloc_netdev(0, "wifi%d", wifi_setup);
+	struct net_device *dev = alloc_netdev(0, "wifi%d", NET_NAME_UNKNOWN,
+					      wifi_setup);
 	if (!dev)
 		return NULL;
 	dev->ml_priv = ethdev->ml_priv;
@@ -2785,7 +2786,7 @@ static struct net_device *_init_airo_card( unsigned short irq, int port,
 	CapabilityRid cap_rid;
 
 	/* Create the network device object. */
-	dev = alloc_netdev(sizeof(*ai), "", ether_setup);
+	dev = alloc_netdev(sizeof(*ai), "", NET_NAME_UNKNOWN, ether_setup);
 	if (!dev) {
 		airo_print_err("", "Couldn't alloc_etherdev");
 		return NULL;
diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c
index 0e26f4a34fda..1c4ce8e3eebe 100644
--- a/drivers/net/wireless/ath/ath6kl/cfg80211.c
+++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c
@@ -3636,7 +3636,7 @@ struct wireless_dev *ath6kl_interface_add(struct ath6kl *ar, const char *name,
 	struct net_device *ndev;
 	struct ath6kl_vif *vif;
 
-	ndev = alloc_netdev(sizeof(*vif), name, ether_setup);
+	ndev = alloc_netdev(sizeof(*vif), name, NET_NAME_UNKNOWN, ether_setup);
 	if (!ndev)
 		return NULL;
 
diff --git a/drivers/net/wireless/ath/wil6210/netdev.c b/drivers/net/wireless/ath/wil6210/netdev.c
index 106b6dcb773a..7afce6e8c507 100644
--- a/drivers/net/wireless/ath/wil6210/netdev.c
+++ b/drivers/net/wireless/ath/wil6210/netdev.c
@@ -132,7 +132,7 @@ void *wil_if_alloc(struct device *dev, void __iomem *csr)
 	ch = wdev->wiphy->bands[IEEE80211_BAND_60GHZ]->channels;
 	cfg80211_chandef_create(&wdev->preset_chandef, ch, NL80211_CHAN_NO_HT);
 
-	ndev = alloc_netdev(0, "wlan%d", ether_setup);
+	ndev = alloc_netdev(0, "wlan%d", NET_NAME_UNKNOWN, ether_setup);
 	if (!ndev) {
 		dev_err(dev, "alloc_netdev_mqs failed\n");
 		rc = -ENOMEM;
diff --git a/drivers/net/wireless/brcm80211/brcmfmac/dhd_linux.c b/drivers/net/wireless/brcm80211/brcmfmac/dhd_linux.c
index 09dd8c13d844..2699441d4f41 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/dhd_linux.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/dhd_linux.c
@@ -808,7 +808,8 @@ struct brcmf_if *brcmf_add_if(struct brcmf_pub *drvr, s32 bssidx, s32 ifidx,
 	} else {
 		brcmf_dbg(INFO, "allocate netdev interface\n");
 		/* Allocate netdev, including space for private structure */
-		ndev = alloc_netdev(sizeof(*ifp), name, ether_setup);
+		ndev = alloc_netdev(sizeof(*ifp), name, NET_NAME_UNKNOWN,
+				    ether_setup);
 		if (!ndev)
 			return ERR_PTR(-ENOMEM);
 
diff --git a/drivers/net/wireless/libertas/main.c b/drivers/net/wireless/libertas/main.c
index 0c02f0483d1f..569b64ecc607 100644
--- a/drivers/net/wireless/libertas/main.c
+++ b/drivers/net/wireless/libertas/main.c
@@ -981,7 +981,7 @@ struct lbs_private *lbs_add_card(void *card, struct device *dmdev)
 		goto err_wdev;
 	}
 
-	dev = alloc_netdev(0, "wlan%d", ether_setup);
+	dev = alloc_netdev(0, "wlan%d", NET_NAME_UNKNOWN, ether_setup);
 	if (!dev) {
 		dev_err(dmdev, "no memory for network device instance\n");
 		goto err_adapter;
diff --git a/drivers/net/wireless/libertas/mesh.c b/drivers/net/wireless/libertas/mesh.c
index 6fef746345bc..01a67f62696f 100644
--- a/drivers/net/wireless/libertas/mesh.c
+++ b/drivers/net/wireless/libertas/mesh.c
@@ -1000,7 +1000,7 @@ static int lbs_add_mesh(struct lbs_private *priv)
 		goto done;
 	}
 
-	mesh_dev = alloc_netdev(0, "msh%d", ether_setup);
+	mesh_dev = alloc_netdev(0, "msh%d", NET_NAME_UNKNOWN, ether_setup);
 	if (!mesh_dev) {
 		lbs_deb_mesh("init mshX device failed\n");
 		ret = -ENOMEM;
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index eba51460a5de..5ea65fce0b83 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -2676,7 +2676,8 @@ static int __init init_mac80211_hwsim(void)
 			goto out_free_radios;
 	}
 
-	hwsim_mon = alloc_netdev(0, "hwsim%d", hwsim_mon_setup);
+	hwsim_mon = alloc_netdev(0, "hwsim%d", NET_NAME_UNKNOWN,
+				 hwsim_mon_setup);
 	if (hwsim_mon == NULL) {
 		err = -ENOMEM;
 		goto out_free_radios;
diff --git a/drivers/net/wireless/mwifiex/cfg80211.c b/drivers/net/wireless/mwifiex/cfg80211.c
index 6af135fa99f7..ca87f923c61e 100644
--- a/drivers/net/wireless/mwifiex/cfg80211.c
+++ b/drivers/net/wireless/mwifiex/cfg80211.c
@@ -2232,7 +2232,8 @@ struct wireless_dev *mwifiex_add_virtual_intf(struct wiphy *wiphy,
 	}
 
 	dev = alloc_netdev_mqs(sizeof(struct mwifiex_private *), name,
-			       ether_setup, IEEE80211_NUM_ACS, 1);
+			       NET_NAME_UNKNOWN, ether_setup,
+			       IEEE80211_NUM_ACS, 1);
 	if (!dev) {
 		wiphy_err(wiphy, "no memory available for netdevice\n");
 		priv->bss_mode = NL80211_IFTYPE_UNSPECIFIED;
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index ef75b45e5085..bd59d9dbf27b 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -418,8 +418,8 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
 	 * When the guest selects the desired number, it will be updated
 	 * via netif_set_real_num_*_queues().
 	 */
-	dev = alloc_netdev_mq(sizeof(struct xenvif), name, ether_setup,
-			      xenvif_max_queues);
+	dev = alloc_netdev_mq(sizeof(struct xenvif), name, NET_NAME_UNKNOWN,
+			      ether_setup, xenvif_max_queues);
 	if (dev == NULL) {
 		pr_warn("Could not allocate netdev for %s\n", name);
 		return ERR_PTR(-ENOMEM);
diff --git a/drivers/s390/net/claw.c b/drivers/s390/net/claw.c
index d837c3c5330f..fbc6701bef30 100644
--- a/drivers/s390/net/claw.c
+++ b/drivers/s390/net/claw.c
@@ -2915,7 +2915,7 @@ claw_new_device(struct ccwgroup_device *cgdev)
 			"failed with error code %d\n", ret);
 		goto out;
 	}
-	dev = alloc_netdev(0,"claw%d",claw_init_netdevice);
+	dev = alloc_netdev(0, "claw%d", NET_NAME_UNKNOWN, claw_init_netdevice);
 	if (!dev) {
 		dev_warn(&cgdev->dev,
 			"Activating the CLAW device failed\n");
diff --git a/drivers/s390/net/ctcm_main.c b/drivers/s390/net/ctcm_main.c
index 03b6ad035577..e056dd4fe44d 100644
--- a/drivers/s390/net/ctcm_main.c
+++ b/drivers/s390/net/ctcm_main.c
@@ -1137,9 +1137,11 @@ static struct net_device *ctcm_init_netdevice(struct ctcm_priv *priv)
 		return NULL;
 
 	if (IS_MPC(priv))
-		dev = alloc_netdev(0, MPC_DEVICE_GENE, ctcm_dev_setup);
+		dev = alloc_netdev(0, MPC_DEVICE_GENE, NET_NAME_UNKNOWN,
+				   ctcm_dev_setup);
 	else
-		dev = alloc_netdev(0, CTC_DEVICE_GENE, ctcm_dev_setup);
+		dev = alloc_netdev(0, CTC_DEVICE_GENE, NET_NAME_UNKNOWN,
+				   ctcm_dev_setup);
 
 	if (!dev) {
 		CTCM_DBF_TEXT_(ERROR, CTC_DBF_CRIT,
diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c
index ce16d1bdb20a..0a87809c8af7 100644
--- a/drivers/s390/net/netiucv.c
+++ b/drivers/s390/net/netiucv.c
@@ -2015,7 +2015,7 @@ static struct net_device *netiucv_init_netdevice(char *username, char *userdata)
 	struct net_device *dev;
 
 	dev = alloc_netdev(sizeof(struct netiucv_priv), "iucv%d",
-			   netiucv_setup_netdevice);
+			   NET_NAME_UNKNOWN, netiucv_setup_netdevice);
 	if (!dev)
 		return NULL;
 	rtnl_lock();
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 5ef5b4f45758..c2679bfe7f66 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -952,10 +952,12 @@ static int qeth_l2_setup_netdev(struct qeth_card *card)
 {
 	switch (card->info.type) {
 	case QETH_CARD_TYPE_IQD:
-		card->dev = alloc_netdev(0, "hsi%d", ether_setup);
+		card->dev = alloc_netdev(0, "hsi%d", NET_NAME_UNKNOWN,
+					 ether_setup);
 		break;
 	case QETH_CARD_TYPE_OSN:
-		card->dev = alloc_netdev(0, "osn%d", ether_setup);
+		card->dev = alloc_netdev(0, "osn%d", NET_NAME_UNKNOWN,
+					 ether_setup);
 		card->dev->flags |= IFF_NOARP;
 		break;
 	default:
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 14e0b5810e8c..f8427a2c4840 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -3287,7 +3287,8 @@ static int qeth_l3_setup_netdev(struct qeth_card *card)
 			}
 		}
 	} else if (card->info.type == QETH_CARD_TYPE_IQD) {
-		card->dev = alloc_netdev(0, "hsi%d", ether_setup);
+		card->dev = alloc_netdev(0, "hsi%d", NET_NAME_UNKNOWN,
+					 ether_setup);
 		if (!card->dev)
 			return -ENODEV;
 		card->dev->flags |= IFF_NOARP;
diff --git a/drivers/staging/cxt1e1/linux.c b/drivers/staging/cxt1e1/linux.c
index 09f3d5ca75ac..85d776bbfb15 100644
--- a/drivers/staging/cxt1e1/linux.c
+++ b/drivers/staging/cxt1e1/linux.c
@@ -917,7 +917,8 @@ c4_add_dev(hdw_info_t *hi, int brdno, unsigned long f0, unsigned long f1,
 	struct net_device *ndev;
 	ci_t       *ci;
 
-	ndev = alloc_netdev(sizeof(ci_t), SBE_IFACETMPL, c4_setup);
+	ndev = alloc_netdev(sizeof(ci_t), SBE_IFACETMPL, NET_NAME_UNKNOWN,
+			    c4_setup);
 	if (!ndev) {
 		pr_warning("%s: no memory for struct net_device !\n",
 			   hi->devname);
diff --git a/drivers/staging/gdm724x/gdm_lte.c b/drivers/staging/gdm724x/gdm_lte.c
index 64c55b99fda4..c2268527422f 100644
--- a/drivers/staging/gdm724x/gdm_lte.c
+++ b/drivers/staging/gdm724x/gdm_lte.c
@@ -885,7 +885,7 @@ int register_lte_device(struct phy_dev *phy_dev,
 
 		/* Allocate netdev */
 		net = alloc_netdev(sizeof(struct nic), pdn_dev_name,
-				ether_setup);
+				   NET_NAME_UNKNOWN, ether_setup);
 		if (net == NULL) {
 			pr_err("alloc_netdev failed\n");
 			ret = -ENOMEM;
diff --git a/drivers/staging/gdm72xx/gdm_wimax.c b/drivers/staging/gdm72xx/gdm_wimax.c
index e5e511585122..a9a6fc51024b 100644
--- a/drivers/staging/gdm72xx/gdm_wimax.c
+++ b/drivers/staging/gdm72xx/gdm_wimax.c
@@ -886,7 +886,8 @@ int register_wimax_device(struct phy_dev *phy_dev, struct device *pdev)
 	struct net_device *dev;
 	int ret;
 
-	dev = alloc_netdev(sizeof(*nic), "wm%d", ether_setup);
+	dev = alloc_netdev(sizeof(*nic), "wm%d", NET_NAME_UNKNOWN,
+			   ether_setup);
 
 	if (dev == NULL) {
 		pr_err("alloc_etherdev failed\n");
diff --git a/drivers/staging/vt6655/wpactl.c b/drivers/staging/vt6655/wpactl.c
index 8392d4d1d5ed..0814bfd68b2e 100644
--- a/drivers/staging/vt6655/wpactl.c
+++ b/drivers/staging/vt6655/wpactl.c
@@ -89,7 +89,8 @@ static int wpa_init_wpadev(PSDevice pDevice)
 	struct net_device *dev = pDevice->dev;
 	int ret = 0;
 
-	pDevice->wpadev = alloc_netdev(sizeof(PSDevice), "vntwpa", wpadev_setup);
+	pDevice->wpadev = alloc_netdev(sizeof(PSDevice), "vntwpa",
+				       NET_NAME_UNKNOWN, wpadev_setup);
 	if (pDevice->wpadev == NULL)
 		return -ENOMEM;
 
diff --git a/drivers/staging/wlan-ng/p80211netdev.c b/drivers/staging/wlan-ng/p80211netdev.c
index 00b186c59725..6c78f917e24a 100644
--- a/drivers/staging/wlan-ng/p80211netdev.c
+++ b/drivers/staging/wlan-ng/p80211netdev.c
@@ -769,7 +769,7 @@ int wlan_setup(wlandevice_t *wlandev, struct device *physdev)
 
 	/* Allocate and initialize the struct device */
 	netdev = alloc_netdev(sizeof(struct wireless_dev), "wlan%d",
-				ether_setup);
+			      NET_NAME_UNKNOWN, ether_setup);
 	if (netdev == NULL) {
 		dev_err(physdev, "Failed to alloc netdev.\n");
 		wlan_free_wiphy(wiphy);
diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c
index 2ebe47b78a3e..cde3ab97900f 100644
--- a/drivers/tty/n_gsm.c
+++ b/drivers/tty/n_gsm.c
@@ -2789,9 +2789,8 @@ static int gsm_create_network(struct gsm_dlci *dlci, struct gsm_netconfig *nc)
 	netname = "gsm%d";
 	if (nc->if_name[0] != '\0')
 		netname = nc->if_name;
-	net = alloc_netdev(sizeof(struct gsm_mux_net),
-			netname,
-			gsm_mux_net_init);
+	net = alloc_netdev(sizeof(struct gsm_mux_net), netname,
+			   NET_NAME_UNKNOWN, gsm_mux_net_init);
 	if (!net) {
 		pr_err("alloc_netdev failed");
 		return -ENOMEM;
diff --git a/drivers/usb/gadget/f_phonet.c b/drivers/usb/gadget/f_phonet.c
index f2b781773eed..b9cfc1571d71 100644
--- a/drivers/usb/gadget/f_phonet.c
+++ b/drivers/usb/gadget/f_phonet.c
@@ -721,7 +721,8 @@ struct net_device *gphonet_setup_default(void)
 	struct phonet_port *port;
 
 	/* Create net device */
-	dev = alloc_netdev(sizeof(*port), "upnlink%d", pn_net_setup);
+	dev = alloc_netdev(sizeof(*port), "upnlink%d", NET_NAME_UNKNOWN,
+			   pn_net_setup);
 	if (!dev)
 		return ERR_PTR(-ENOMEM);
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9be34732142f..15ed750458ad 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2991,13 +2991,15 @@ void ether_setup(struct net_device *dev);
 
 /* Support for loadable net-drivers */
 struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
+				    unsigned char name_assign_type,
 				    void (*setup)(struct net_device *),
 				    unsigned int txqs, unsigned int rxqs);
-#define alloc_netdev(sizeof_priv, name, setup) \
-	alloc_netdev_mqs(sizeof_priv, name, setup, 1, 1)
+#define alloc_netdev(sizeof_priv, name, name_assign_type, setup) \
+	alloc_netdev_mqs(sizeof_priv, name, name_assign_type, setup, 1, 1)
 
-#define alloc_netdev_mq(sizeof_priv, name, setup, count) \
-	alloc_netdev_mqs(sizeof_priv, name, setup, count, count)
+#define alloc_netdev_mq(sizeof_priv, name, name_assign_type, setup, count) \
+	alloc_netdev_mqs(sizeof_priv, name, name_assign_type, setup, count, \
+			 count)
 
 int register_netdev(struct net_device *dev);
 void unregister_netdev(struct net_device *dev);
diff --git a/net/802/fc.c b/net/802/fc.c
index 05eea6b98bb8..7c174b6750cd 100644
--- a/net/802/fc.c
+++ b/net/802/fc.c
@@ -126,6 +126,6 @@ static void fc_setup(struct net_device *dev)
  */
 struct net_device *alloc_fcdev(int sizeof_priv)
 {
-	return alloc_netdev(sizeof_priv, "fc%d", fc_setup);
+	return alloc_netdev(sizeof_priv, "fc%d", NET_NAME_UNKNOWN, fc_setup);
 }
 EXPORT_SYMBOL(alloc_fcdev);
diff --git a/net/802/fddi.c b/net/802/fddi.c
index 9cda40661e0d..59e7346f1193 100644
--- a/net/802/fddi.c
+++ b/net/802/fddi.c
@@ -207,7 +207,8 @@ static void fddi_setup(struct net_device *dev)
  */
 struct net_device *alloc_fddidev(int sizeof_priv)
 {
-	return alloc_netdev(sizeof_priv, "fddi%d", fddi_setup);
+	return alloc_netdev(sizeof_priv, "fddi%d", NET_NAME_UNKNOWN,
+			    fddi_setup);
 }
 EXPORT_SYMBOL(alloc_fddidev);
 
diff --git a/net/802/hippi.c b/net/802/hippi.c
index 5ff2a718ddca..2e03f8259dd5 100644
--- a/net/802/hippi.c
+++ b/net/802/hippi.c
@@ -228,7 +228,8 @@ static void hippi_setup(struct net_device *dev)
 
 struct net_device *alloc_hippi_dev(int sizeof_priv)
 {
-	return alloc_netdev(sizeof_priv, "hip%d", hippi_setup);
+	return alloc_netdev(sizeof_priv, "hip%d", NET_NAME_UNKNOWN,
+			    hippi_setup);
 }
 
 EXPORT_SYMBOL(alloc_hippi_dev);
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 44ebd5c2cd4a..cba9c212a730 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -250,7 +250,8 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
 		snprintf(name, IFNAMSIZ, "vlan%.4i", vlan_id);
 	}
 
-	new_dev = alloc_netdev(sizeof(struct vlan_dev_priv), name, vlan_setup);
+	new_dev = alloc_netdev(sizeof(struct vlan_dev_priv), name,
+			       NET_NAME_UNKNOWN, vlan_setup);
 
 	if (new_dev == NULL)
 		return -ENOBUFS;
diff --git a/net/appletalk/dev.c b/net/appletalk/dev.c
index 6c8016f61866..e4158b8b926d 100644
--- a/net/appletalk/dev.c
+++ b/net/appletalk/dev.c
@@ -39,6 +39,7 @@ static void ltalk_setup(struct net_device *dev)
 
 struct net_device *alloc_ltalkdev(int sizeof_priv)
 {
-	return alloc_netdev(sizeof_priv, "lt%d", ltalk_setup);
+	return alloc_netdev(sizeof_priv, "lt%d", NET_NAME_UNKNOWN,
+			    ltalk_setup);
 }
 EXPORT_SYMBOL(alloc_ltalkdev);
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index 403e71fa88fe..cc78538d163b 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -682,8 +682,8 @@ static int br2684_create(void __user *arg)
 
 	netdev = alloc_netdev(sizeof(struct br2684_dev),
 			      ni.ifname[0] ? ni.ifname : "nas%d",
-			      (payload == p_routed) ?
-			      br2684_setup_routed : br2684_setup);
+			      NET_NAME_UNKNOWN,
+			      (payload == p_routed) ? br2684_setup_routed : br2684_setup);
 	if (!netdev)
 		return -ENOMEM;
 
diff --git a/net/atm/clip.c b/net/atm/clip.c
index ba291ce4bdff..46339040fef0 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -520,7 +520,8 @@ static int clip_create(int number)
 			if (PRIV(dev)->number >= number)
 				number = PRIV(dev)->number + 1;
 	}
-	dev = alloc_netdev(sizeof(struct clip_priv), "", clip_setup);
+	dev = alloc_netdev(sizeof(struct clip_priv), "", NET_NAME_UNKNOWN,
+			   clip_setup);
 	if (!dev)
 		return -ENOMEM;
 	clip_priv = PRIV(dev);
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index e7ee65dc20bf..d551e6302cf3 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -895,7 +895,7 @@ struct net_device *batadv_softif_create(const char *name)
 	int ret;
 
 	soft_iface = alloc_netdev(sizeof(struct batadv_priv), name,
-				  batadv_softif_init_early);
+				  NET_NAME_UNKNOWN, batadv_softif_init_early);
 	if (!soft_iface)
 		return NULL;
 
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 5a7f81df603c..206b65ccd5b8 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -712,7 +712,7 @@ static int setup_netdev(struct l2cap_chan *chan, struct lowpan_dev **dev)
 	unsigned long flags;
 
 	netdev = alloc_netdev(sizeof(struct lowpan_dev), IFACE_NAME_TEMPLATE,
-			      netdev_setup);
+			      NET_NAME_UNKNOWN, netdev_setup);
 	if (!netdev)
 		return -ENOMEM;
 
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index a841d3e776c5..85bcc21e84d2 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -538,8 +538,9 @@ int bnep_add_connection(struct bnep_connadd_req *req, struct socket *sock)
 
 	/* session struct allocated as private part of net_device */
 	dev = alloc_netdev(sizeof(struct bnep_session),
-				(*req->device) ? req->device : "bnep%d",
-				bnep_net_setup);
+			   (*req->device) ? req->device : "bnep%d",
+			   NET_NAME_UNKNOWN,
+			   bnep_net_setup);
 	if (!dev)
 		return -ENOMEM;
 
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 3eca3fdf8fe1..078d336a1f37 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -344,7 +344,7 @@ int br_add_bridge(struct net *net, const char *name)
 	struct net_device *dev;
 	int res;
 
-	dev = alloc_netdev(sizeof(struct net_bridge), name,
+	dev = alloc_netdev(sizeof(struct net_bridge), name, NET_NAME_UNKNOWN,
 			   br_dev_setup);
 
 	if (!dev)
diff --git a/net/core/dev.c b/net/core/dev.c
index 38793fb84a35..2c98f10ee62a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6441,17 +6441,19 @@ void netdev_freemem(struct net_device *dev)
 
 /**
  *	alloc_netdev_mqs - allocate network device
- *	@sizeof_priv:	size of private data to allocate space for
- *	@name:		device name format string
- *	@setup:		callback to initialize device
- *	@txqs:		the number of TX subqueues to allocate
- *	@rxqs:		the number of RX subqueues to allocate
+ *	@sizeof_priv:		size of private data to allocate space for
+ *	@name:			device name format string
+ *	@name_assign_type: 	origin of device name
+ *	@setup:			callback to initialize device
+ *	@txqs:			the number of TX subqueues to allocate
+ *	@rxqs:			the number of RX subqueues to allocate
  *
  *	Allocates a struct net_device with private data area for driver use
  *	and performs basic initialization.  Also allocates subqueue structs
  *	for each queue on the device.
  */
 struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
+		unsigned char name_assign_type,
 		void (*setup)(struct net_device *),
 		unsigned int txqs, unsigned int rxqs)
 {
@@ -6530,6 +6532,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 #endif
 
 	strcpy(dev->name, name);
+	dev->name_assign_type = name_assign_type;
 	dev->group = INIT_NETDEV_GROUP;
 	if (!dev->ethtool_ops)
 		dev->ethtool_ops = &default_ethtool_ops;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 1f8a59e02c48..599864322de8 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1828,8 +1828,8 @@ struct net_device *rtnl_create_link(struct net *net,
 		num_rx_queues = ops->get_num_rx_queues();
 
 	err = -ENOMEM;
-	dev = alloc_netdev_mqs(ops->priv_size, ifname, ops->setup,
-			       num_tx_queues, num_rx_queues);
+	dev = alloc_netdev_mqs(ops->priv_size, ifname, NET_NAME_UNKNOWN,
+			       ops->setup, num_tx_queues, num_rx_queues);
 	if (!dev)
 		goto err;
 
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 64c5af0a10dd..45a1e34c89e0 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -340,8 +340,8 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent,
 	struct dsa_slave_priv *p;
 	int ret;
 
-	slave_dev = alloc_netdev(sizeof(struct dsa_slave_priv),
-				 name, ether_setup);
+	slave_dev = alloc_netdev(sizeof(struct dsa_slave_priv), name,
+				 NET_NAME_UNKNOWN, ether_setup);
 	if (slave_dev == NULL)
 		return slave_dev;
 
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 5dc638cad2e1..f405e0592407 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -390,7 +390,8 @@ EXPORT_SYMBOL(ether_setup);
 struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs,
 				      unsigned int rxqs)
 {
-	return alloc_netdev_mqs(sizeof_priv, "eth%d", ether_setup, txqs, rxqs);
+	return alloc_netdev_mqs(sizeof_priv, "eth%d", NET_NAME_UNKNOWN,
+				ether_setup, txqs, rxqs);
 }
 EXPORT_SYMBOL(alloc_etherdev_mqs);
 
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 54b6731dab55..0157a7af20a8 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -301,7 +301,7 @@ static struct net_device *__ip_tunnel_create(struct net *net,
 	}
 
 	ASSERT_RTNL();
-	dev = alloc_netdev(ops->priv_size, name, ops->setup);
+	dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
 	if (!dev) {
 		err = -ENOMEM;
 		goto failed;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 65bcaa789043..c8034587859d 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -500,7 +500,7 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
 	else
 		sprintf(name, "pimreg%u", mrt->id);
 
-	dev = alloc_netdev(0, name, reg_vif_setup);
+	dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
 
 	if (dev == NULL)
 		return NULL;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 365b2b6f3942..5f19dfbc4c6a 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -322,7 +322,8 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net,
 	else
 		strcpy(name, "ip6gre%d");
 
-	dev = alloc_netdev(sizeof(*t), name, ip6gre_tunnel_setup);
+	dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN,
+			   ip6gre_tunnel_setup);
 	if (!dev)
 		return NULL;
 
@@ -1326,7 +1327,8 @@ static int __net_init ip6gre_init_net(struct net *net)
 	int err;
 
 	ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0",
-					   ip6gre_tunnel_setup);
+					  NET_NAME_UNKNOWN,
+					  ip6gre_tunnel_setup);
 	if (!ign->fb_tunnel_dev) {
 		err = -ENOMEM;
 		goto err_alloc_dev;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 51a1eb185ea7..f9de5a695072 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -315,7 +315,8 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
 	else
 		sprintf(name, "ip6tnl%%d");
 
-	dev = alloc_netdev(sizeof (*t), name, ip6_tnl_dev_setup);
+	dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN,
+			   ip6_tnl_dev_setup);
 	if (dev == NULL)
 		goto failed;
 
@@ -1773,7 +1774,7 @@ static int __net_init ip6_tnl_init_net(struct net *net)
 
 	err = -ENOMEM;
 	ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
-				      ip6_tnl_dev_setup);
+					NET_NAME_UNKNOWN, ip6_tnl_dev_setup);
 
 	if (!ip6n->fb_tnl_dev)
 		goto err_alloc_dev;
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 9aaa6bb229e4..17ee4fc32dfe 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -204,7 +204,7 @@ static struct ip6_tnl *vti6_tnl_create(struct net *net, struct __ip6_tnl_parm *p
 	else
 		sprintf(name, "ip6_vti%%d");
 
-	dev = alloc_netdev(sizeof(*t), name, vti6_dev_setup);
+	dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, vti6_dev_setup);
 	if (dev == NULL)
 		goto failed;
 
@@ -1020,7 +1020,7 @@ static int __net_init vti6_init_net(struct net *net)
 
 	err = -ENOMEM;
 	ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6_vti0",
-					vti6_dev_setup);
+					NET_NAME_UNKNOWN, vti6_dev_setup);
 
 	if (!ip6n->fb_tnl_dev)
 		goto err_alloc_dev;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 8250474ab7dc..f9a3fd320d1d 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -744,7 +744,7 @@ static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
 	else
 		sprintf(name, "pim6reg%u", mrt->id);
 
-	dev = alloc_netdev(0, name, reg_vif_setup);
+	dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
 	if (dev == NULL)
 		return NULL;
 
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 4f408176dc64..2e9ba035fb5f 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -250,7 +250,8 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net,
 	else
 		strcpy(name, "sit%d");
 
-	dev = alloc_netdev(sizeof(*t), name, ipip6_tunnel_setup);
+	dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN,
+			   ipip6_tunnel_setup);
 	if (dev == NULL)
 		return NULL;
 
@@ -1729,6 +1730,7 @@ static int __net_init sit_init_net(struct net *net)
 	sitn->tunnels[3] = sitn->tunnels_r_l;
 
 	sitn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "sit0",
+					   NET_NAME_UNKNOWN,
 					   ipip6_tunnel_setup);
 	if (!sitn->fb_tunnel_dev) {
 		err = -ENOMEM;
diff --git a/net/irda/irda_device.c b/net/irda/irda_device.c
index 365b895da84b..9e0d909390fd 100644
--- a/net/irda/irda_device.c
+++ b/net/irda/irda_device.c
@@ -293,7 +293,8 @@ static void irda_device_setup(struct net_device *dev)
  */
 struct net_device *alloc_irdadev(int sizeof_priv)
 {
-	return alloc_netdev(sizeof_priv, "irda%d", irda_device_setup);
+	return alloc_netdev(sizeof_priv, "irda%d", NET_NAME_UNKNOWN,
+			    irda_device_setup);
 }
 EXPORT_SYMBOL(alloc_irdadev);
 
diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c
index ffcec225b5d9..dc13f1a45f2f 100644
--- a/net/irda/irlan/irlan_eth.c
+++ b/net/irda/irlan/irlan_eth.c
@@ -96,7 +96,7 @@ static void irlan_eth_setup(struct net_device *dev)
  */
 struct net_device *alloc_irlandev(const char *name)
 {
-	return alloc_netdev(sizeof(struct irlan_cb), name,
+	return alloc_netdev(sizeof(struct irlan_cb), name, NET_NAME_UNKNOWN,
 			    irlan_eth_setup);
 }
 
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 76125c57ee6d..edb78e69efe4 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -246,7 +246,8 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p
 		goto out;
 	}
 
-	dev = alloc_netdev(sizeof(*priv), name, l2tp_eth_dev_setup);
+	dev = alloc_netdev(sizeof(*priv), name, NET_NAME_UNKNOWN,
+			   l2tp_eth_dev_setup);
 	if (!dev) {
 		rc = -ENOMEM;
 		goto out_del_session;
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index bbf51b2f0651..4edfc7c1524f 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1624,9 +1624,9 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 		if (local->hw.queues >= IEEE80211_NUM_ACS)
 			txqs = IEEE80211_NUM_ACS;
 
-		ndev = alloc_netdev_mqs(sizeof(*sdata) +
-					local->hw.vif_data_size,
-					name, ieee80211_if_setup, txqs, 1);
+		ndev = alloc_netdev_mqs(sizeof(*sdata) + local->hw.vif_data_size,
+					name, NET_NAME_UNKNOWN,
+					ieee80211_if_setup, txqs, 1);
 		if (!ndev)
 			return -ENOMEM;
 		dev_net_set(ndev, wiphy_net(local->hw.wiphy));
diff --git a/net/mac802154/ieee802154_dev.c b/net/mac802154/ieee802154_dev.c
index 9b54370f5e87..b36b2b996578 100644
--- a/net/mac802154/ieee802154_dev.c
+++ b/net/mac802154/ieee802154_dev.c
@@ -167,11 +167,13 @@ mac802154_add_iface(struct wpan_phy *phy, const char *name, int type)
 	switch (type) {
 	case IEEE802154_DEV_MONITOR:
 		dev = alloc_netdev(sizeof(struct mac802154_sub_if_data),
-				   name, mac802154_monitor_setup);
+				   name, NET_NAME_UNKNOWN,
+				   mac802154_monitor_setup);
 		break;
 	case IEEE802154_DEV_WPAN:
 		dev = alloc_netdev(sizeof(struct mac802154_sub_if_data),
-				   name, mac802154_wpan_setup);
+				   name, NET_NAME_UNKNOWN,
+				   mac802154_wpan_setup);
 		break;
 	default:
 		dev = NULL;
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index ede50d197e10..71cf1bffea06 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -1418,7 +1418,7 @@ static int __init nr_proto_init(void)
 		struct net_device *dev;
 
 		sprintf(name, "nr%d", i);
-		dev = alloc_netdev(0, name, nr_setup);
+		dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, nr_setup);
 		if (!dev) {
 			printk(KERN_ERR "NET/ROM: nr_proto_init - unable to allocate device structure\n");
 			goto fail;
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 295471a66c78..bd658555afdf 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -165,7 +165,8 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
 	netdev_vport = netdev_vport_priv(vport);
 
 	netdev_vport->dev = alloc_netdev(sizeof(struct internal_dev),
-					 parms->name, do_setup);
+					 parms->name, NET_NAME_UNKNOWN,
+					 do_setup);
 	if (!netdev_vport->dev) {
 		err = -ENOMEM;
 		goto error_free_vport;
diff --git a/net/phonet/pep-gprs.c b/net/phonet/pep-gprs.c
index 66dc65e7c6a1..e9a83a637185 100644
--- a/net/phonet/pep-gprs.c
+++ b/net/phonet/pep-gprs.c
@@ -267,7 +267,7 @@ int gprs_attach(struct sock *sk)
 		return -EINVAL; /* need packet boundaries */
 
 	/* Create net device */
-	dev = alloc_netdev(sizeof(*gp), ifname, gprs_setup);
+	dev = alloc_netdev(sizeof(*gp), ifname, NET_NAME_UNKNOWN, gprs_setup);
 	if (!dev)
 		return -ENOMEM;
 	gp = netdev_priv(dev);
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 8451c8cdc9de..a85c1a086ae4 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -1538,7 +1538,7 @@ static int __init rose_proto_init(void)
 		char name[IFNAMSIZ];
 
 		sprintf(name, "rose%d", i);
-		dev = alloc_netdev(0, name, rose_setup);
+		dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, rose_setup);
 		if (!dev) {
 			printk(KERN_ERR "ROSE: rose_proto_init - unable to allocate memory\n");
 			rc = -ENOMEM;
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 474167162947..bd33793b527e 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -485,8 +485,8 @@ static int __init teql_init(void)
 		struct net_device *dev;
 		struct teql_master *master;
 
-		dev = alloc_netdev(sizeof(struct teql_master),
-				  "teql%d", teql_master_setup);
+		dev = alloc_netdev(sizeof(struct teql_master), "teql%d",
+				   NET_NAME_UNKNOWN, teql_master_setup);
 		if (!dev) {
 			err = -ENOMEM;
 			break;
-- 
cgit v1.2.3-59-g8ed1b


From 26c4fdb0528ae7c4be9fbc8a8210f3b410e6b5aa Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Mon, 14 Jul 2014 17:55:30 -0400
Subject: net-timestamp: document deprecated syststamp

The SO_TIMESTAMPING API defines option SOF_TIMESTAMPING_SYS_HW.
This feature is deprecated. It should not be implemented by new
device drivers. Existing drivers do not implement it, either --
with one exception.

Driver developers are encouraged to expose the NIC hw clock as a
PTP HW clock source, instead, and synchronize system time to the
HW source.

The control flag cannot be removed due to being part of the ABI, nor
can the structure scm_timestamping that is returned. Due to the one
legacy driver, the internal datapath and structure are not removed.

This patch only clearly marks the interface as deprecated. Device
drivers should always return a syststamp value of zero.

Signed-off-by: Willem de Bruijn <willemb@google.com>

----

We can consider adding a WARN_ON_ONCE in__sock_recv_timestamp
if non-zero syststamp is encountered
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/timestamping.txt | 10 ++++++++--
 include/linux/skbuff.h                    |  6 ++++--
 2 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt
index bc3554124903..8b4ad809df27 100644
--- a/Documentation/networking/timestamping.txt
+++ b/Documentation/networking/timestamping.txt
@@ -40,7 +40,7 @@ the set bits correspond to data that is available, then the control
 message will not be generated:
 
 SOF_TIMESTAMPING_SOFTWARE:     report systime if available
-SOF_TIMESTAMPING_SYS_HARDWARE: report hwtimetrans if available
+SOF_TIMESTAMPING_SYS_HARDWARE: report hwtimetrans if available (deprecated)
 SOF_TIMESTAMPING_RAW_HARDWARE: report hwtimeraw if available
 
 It is worth noting that timestamps may be collected for reasons other
@@ -94,7 +94,13 @@ not perfect; as a consequence, sorting packets received via different
 NICs by their hwtimetrans may differ from the order in which they were
 received. hwtimetrans may be non-monotonic even for the same NIC.
 Filled in if SOF_TIMESTAMPING_SYS_HARDWARE is set. Requires support
-by the network device and will be empty without that support.
+by the network device and will be empty without that support. This
+field is DEPRECATED. Only one driver computes this value. New device
+drivers must leave this zero. Instead, they can expose the hardware
+clock device on the NIC directly as a HW PTP clock source, to allow
+time conversion in userspace and optionally synchronize system time
+with a userspace PTP stack such as linuxptp. For the PTP clock API,
+see Documentation/ptp/ptp.txt.
 
 
 SIOCSHWTSTAMP, SIOCGHWTSTAMP:
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 890fb3307dd6..369430340ed9 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -211,7 +211,7 @@ static inline void skb_frag_size_sub(skb_frag_t *frag, int delta)
  * struct skb_shared_hwtstamps - hardware time stamps
  * @hwtstamp:	hardware time stamp transformed into duration
  *		since arbitrary point in time
- * @syststamp:	hwtstamp transformed to system time base
+ * @syststamp:	hwtstamp transformed to system time base (deprecated)
  *
  * Software time stamps generated by ktime_get_real() are stored in
  * skb->tstamp. The relation between the different kinds of time
@@ -222,7 +222,9 @@ static inline void skb_frag_size_sub(skb_frag_t *frag, int delta)
  * syststamp/tstamp/"syststamp from other device" comparison is
  * limited by the accuracy of the transformation into system time
  * base. This depends on the device driver and its underlying
- * hardware.
+ * hardware. The syststamp implementation is deprecated in favor
+ * of hwtstamps and hw PTP clock sources exposed directly to
+ * userspace.
  *
  * hwtstamps can only be compared against other hwtstamps from
  * the same device.
-- 
cgit v1.2.3-59-g8ed1b


From f65f6455fc082c7488af34f6955eb928903c3ae5 Mon Sep 17 00:00:00 2001
From: Kristina Martšenko <kristina.martsenko@gmail.com>
Date: Wed, 16 Jul 2014 02:30:19 +0300
Subject: ARM: OMAP2+: remove DSP platform device
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It was added to support DSP Bridge. Since DSP Bridge was removed, and
nothing else is using the platform device, remove it too.

Signed-off-by: Kristina Martšenko <kristina.martsenko@gmail.com>
Cc: Omar Ramirez Luna <omar.ramirez@copitl.com>
Cc: Suman Anna <s-anna@ti.com>
Cc: Felipe Contreras <felipe.contreras@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-omap2/Makefile           |   4 -
 arch/arm/mach-omap2/common.c           |   2 -
 arch/arm/mach-omap2/dsp.c              | 134 ---------------------------------
 include/linux/platform_data/dsp-omap.h |  34 ---------
 4 files changed, 174 deletions(-)
 delete mode 100644 arch/arm/mach-omap2/dsp.c
 delete mode 100644 include/linux/platform_data/dsp-omap.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile
index 8ca99e9321e3..fa7800015753 100644
--- a/arch/arm/mach-omap2/Makefile
+++ b/arch/arm/mach-omap2/Makefile
@@ -232,10 +232,6 @@ obj-$(CONFIG_HW_PERF_EVENTS)		+= pmu.o
 iommu-$(CONFIG_OMAP_IOMMU)		:= omap-iommu.o
 obj-y					+= $(iommu-m) $(iommu-y)
 
-ifneq ($(CONFIG_TIDSPBRIDGE),)
-obj-y					+= dsp.o
-endif
-
 # OMAP2420 MSDI controller integration support ("MMC")
 obj-$(CONFIG_SOC_OMAP2420)		+= msdi.o
 
diff --git a/arch/arm/mach-omap2/common.c b/arch/arm/mach-omap2/common.c
index 2dabb9ecb986..484cdadfb187 100644
--- a/arch/arm/mach-omap2/common.c
+++ b/arch/arm/mach-omap2/common.c
@@ -14,7 +14,6 @@
  */
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/platform_data/dsp-omap.h>
 
 #include "common.h"
 #include "omap-secure.h"
@@ -30,7 +29,6 @@ int __weak omap_secure_ram_reserve_memblock(void)
 
 void __init omap_reserve(void)
 {
-	omap_dsp_reserve_sdram_memblock();
 	omap_secure_ram_reserve_memblock();
 	omap_barrier_reserve_memblock();
 }
diff --git a/arch/arm/mach-omap2/dsp.c b/arch/arm/mach-omap2/dsp.c
deleted file mode 100644
index f7492df1cbba..000000000000
--- a/arch/arm/mach-omap2/dsp.c
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * TI's OMAP DSP platform device registration
- *
- * Copyright (C) 2005-2006 Texas Instruments, Inc.
- * Copyright (C) 2009 Nokia Corporation
- *
- * Written by Hiroshi DOYU <Hiroshi.DOYU@nokia.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-/*
- * XXX The function pointers to the PRM/CM functions are incorrect and
- * should be removed.  No device driver should be changing PRM/CM bits
- * directly; that's a layering violation -- those bits are the responsibility
- * of the OMAP PM core code.
- */
-
-#include <linux/module.h>
-#include <linux/platform_device.h>
-
-#include <asm/memblock.h>
-
-#include "control.h"
-#include "cm2xxx_3xxx.h"
-#include "prm2xxx_3xxx.h"
-#ifdef CONFIG_TIDSPBRIDGE_DVFS
-#include "omap-pm.h"
-#endif
-#include "soc.h"
-
-#include <linux/platform_data/dsp-omap.h>
-
-static struct platform_device *omap_dsp_pdev;
-
-static struct omap_dsp_platform_data omap_dsp_pdata __initdata = {
-#ifdef CONFIG_TIDSPBRIDGE_DVFS
-	.dsp_set_min_opp = omap_pm_dsp_set_min_opp,
-	.dsp_get_opp = omap_pm_dsp_get_opp,
-	.cpu_set_freq = omap_pm_cpu_set_freq,
-	.cpu_get_freq = omap_pm_cpu_get_freq,
-#endif
-	.dsp_prm_read = omap2_prm_read_mod_reg,
-	.dsp_prm_write = omap2_prm_write_mod_reg,
-	.dsp_prm_rmw_bits = omap2_prm_rmw_mod_reg_bits,
-	.dsp_cm_read = omap2_cm_read_mod_reg,
-	.dsp_cm_write = omap2_cm_write_mod_reg,
-	.dsp_cm_rmw_bits = omap2_cm_rmw_mod_reg_bits,
-
-	.set_bootaddr = omap_ctrl_write_dsp_boot_addr,
-	.set_bootmode = omap_ctrl_write_dsp_boot_mode,
-};
-
-static phys_addr_t omap_dsp_phys_mempool_base;
-
-void __init omap_dsp_reserve_sdram_memblock(void)
-{
-	phys_addr_t size = CONFIG_TIDSPBRIDGE_MEMPOOL_SIZE;
-	phys_addr_t paddr;
-
-	if (!cpu_is_omap34xx())
-		return;
-
-	if (!size)
-		return;
-
-	paddr = arm_memblock_steal(size, SZ_1M);
-	if (!paddr) {
-		pr_err("%s: failed to reserve %llx bytes\n",
-				__func__, (unsigned long long)size);
-		return;
-	}
-
-	omap_dsp_phys_mempool_base = paddr;
-}
-
-static phys_addr_t omap_dsp_get_mempool_base(void)
-{
-	return omap_dsp_phys_mempool_base;
-}
-
-static int __init omap_dsp_init(void)
-{
-	struct platform_device *pdev;
-	int err = -ENOMEM;
-	struct omap_dsp_platform_data *pdata = &omap_dsp_pdata;
-
-	if (!cpu_is_omap34xx())
-		return 0;
-
-	pdata->phys_mempool_base = omap_dsp_get_mempool_base();
-
-	if (pdata->phys_mempool_base) {
-		pdata->phys_mempool_size = CONFIG_TIDSPBRIDGE_MEMPOOL_SIZE;
-		pr_info("%s: %llx bytes @ %llx\n", __func__,
-			(unsigned long long)pdata->phys_mempool_size,
-			(unsigned long long)pdata->phys_mempool_base);
-	}
-
-	pdev = platform_device_alloc("omap-dsp", -1);
-	if (!pdev)
-		goto err_out;
-
-	err = platform_device_add_data(pdev, pdata, sizeof(*pdata));
-	if (err)
-		goto err_out;
-
-	err = platform_device_add(pdev);
-	if (err)
-		goto err_out;
-
-	omap_dsp_pdev = pdev;
-	return 0;
-
-err_out:
-	platform_device_put(pdev);
-	return err;
-}
-module_init(omap_dsp_init);
-
-static void __exit omap_dsp_exit(void)
-{
-	if (!cpu_is_omap34xx())
-		return;
-
-	platform_device_unregister(omap_dsp_pdev);
-}
-module_exit(omap_dsp_exit);
-
-MODULE_AUTHOR("Hiroshi DOYU");
-MODULE_DESCRIPTION("TI's OMAP DSP platform device registration");
-MODULE_LICENSE("GPL");
diff --git a/include/linux/platform_data/dsp-omap.h b/include/linux/platform_data/dsp-omap.h
deleted file mode 100644
index 5927709b1908..000000000000
--- a/include/linux/platform_data/dsp-omap.h
+++ /dev/null
@@ -1,34 +0,0 @@
-#ifndef __OMAP_DSP_H__
-#define __OMAP_DSP_H__
-
-#include <linux/types.h>
-
-struct omap_dsp_platform_data {
-	void (*dsp_set_min_opp) (u8 opp_id);
-	u8 (*dsp_get_opp) (void);
-	void (*cpu_set_freq) (unsigned long f);
-	unsigned long (*cpu_get_freq) (void);
-	unsigned long mpu_speed[6];
-
-	/* functions to write and read PRCM registers */
-	void (*dsp_prm_write)(u32, s16 , u16);
-	u32 (*dsp_prm_read)(s16 , u16);
-	u32 (*dsp_prm_rmw_bits)(u32, u32, s16, s16);
-	void (*dsp_cm_write)(u32, s16 , u16);
-	u32 (*dsp_cm_read)(s16 , u16);
-	u32 (*dsp_cm_rmw_bits)(u32, u32, s16, s16);
-
-	void (*set_bootaddr)(u32);
-	void (*set_bootmode)(u8);
-
-	phys_addr_t phys_mempool_base;
-	phys_addr_t phys_mempool_size;
-};
-
-#if defined(CONFIG_TIDSPBRIDGE) || defined(CONFIG_TIDSPBRIDGE_MODULE)
-extern void omap_dsp_reserve_sdram_memblock(void);
-#else
-static inline void omap_dsp_reserve_sdram_memblock(void) { }
-#endif
-
-#endif
-- 
cgit v1.2.3-59-g8ed1b


From 466af29bf4270e84261712428a1304c28e3743fa Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 6 Jun 2014 18:52:06 +0200
Subject: sched/deadline: Kill task_struct->pi_top_task

Remove task_struct->pi_top_task. The only user, rt_mutex_setprio(),
can use a local.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Juri Lelli <juri.lelli@gmail.com>
Cc: Alex Thorlton <athorlton@sgi.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Daeseok Youn <daeseok.youn@gmail.com>
Cc: Dario Faggioli <raistlin@linux.it>
Cc: Davidlohr Bueso <davidlohr@hp.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matthew Dempsky <mdempsky@chromium.org>
Cc: Michal Simek <michal.simek@xilinx.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Link: http://lkml.kernel.org/r/20140606165206.GB29465@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 2 --
 kernel/fork.c         | 1 -
 kernel/sched/core.c   | 6 +++---
 3 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 306f4f0c987a..c9c9ff723525 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1440,8 +1440,6 @@ struct task_struct {
 	struct rb_node *pi_waiters_leftmost;
 	/* Deadlock detection and priority inheritance handling */
 	struct rt_mutex_waiter *pi_blocked_on;
-	/* Top pi_waiters task */
-	struct task_struct *pi_top_task;
 #endif
 
 #ifdef CONFIG_DEBUG_MUTEXES
diff --git a/kernel/fork.c b/kernel/fork.c
index d2799d1fc952..6ff87f4429a4 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1095,7 +1095,6 @@ static void rt_mutex_init_task(struct task_struct *p)
 	p->pi_waiters = RB_ROOT;
 	p->pi_waiters_leftmost = NULL;
 	p->pi_blocked_on = NULL;
-	p->pi_top_task = NULL;
 #endif
 }
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2dbc63d1a847..cf7695a6c1d2 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2980,7 +2980,6 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
 	}
 
 	trace_sched_pi_setprio(p, prio);
-	p->pi_top_task = rt_mutex_get_top_task(p);
 	oldprio = p->prio;
 	prev_class = p->sched_class;
 	on_rq = p->on_rq;
@@ -3000,8 +2999,9 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
 	 *          running task
 	 */
 	if (dl_prio(prio)) {
-		if (!dl_prio(p->normal_prio) || (p->pi_top_task &&
-			dl_entity_preempt(&p->pi_top_task->dl, &p->dl))) {
+		struct task_struct *pi_task = rt_mutex_get_top_task(p);
+		if (!dl_prio(p->normal_prio) ||
+		    (pi_task && dl_entity_preempt(&pi_task->dl, &p->dl))) {
 			p->dl.dl_boosted = 1;
 			p->dl.dl_throttled = 0;
 			enqueue_flag = ENQUEUE_REPLENISH;
-- 
cgit v1.2.3-59-g8ed1b


From 8875125efe8402c4d84b08291e68f1281baba8e2 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <tkhai@yandex.ru>
Date: Sun, 29 Jun 2014 00:03:57 +0400
Subject: sched: Transform resched_task() into resched_curr()

We always use resched_task() with rq->curr argument.
It's not possible to reschedule any task but rq's current.

The patch introduces resched_curr(struct rq *) to
replace all of the repeating patterns. The main aim
is cleanup, but there is a little size profit too:

  (before)
	$ size kernel/sched/built-in.o
	   text	   data	    bss	    dec	    hex	filename
	155274	  16445	   7042	 178761	  2ba49	kernel/sched/built-in.o

	$ size vmlinux
	   text	   data	    bss	    dec	    hex	filename
	7411490	1178376	 991232	9581098	 92322a	vmlinux

  (after)
	$ size kernel/sched/built-in.o
	   text	   data	    bss	    dec	    hex	filename
	155130	  16445	   7042	 178617	  2b9b9	kernel/sched/built-in.o

	$ size vmlinux
	   text	   data	    bss	    dec	    hex	filename
	7411362	1178376	 991232	9580970	 9231aa	vmlinux

	I was choosing between resched_curr() and resched_rq(),
	and the first name looks better for me.

A little lie in Documentation/trace/ftrace.txt. I have not
actually collected the tracing again. With a hope the patch
won't make execution times much worse :)

Signed-off-by: Kirill Tkhai <tkhai@yandex.ru>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/20140628200219.1778.18735.stgit@localhost
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 Documentation/trace/ftrace.txt |  2 +-
 include/linux/sched.h          |  6 +++---
 kernel/sched/core.c            | 25 +++++++++++++------------
 kernel/sched/deadline.c        | 16 ++++++++--------
 kernel/sched/fair.c            | 20 ++++++++++----------
 kernel/sched/idle_task.c       |  2 +-
 kernel/sched/rt.c              | 27 ++++++++++++++-------------
 kernel/sched/sched.h           |  2 +-
 8 files changed, 51 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index 2479b2a0c77c..4da42616939f 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -1515,7 +1515,7 @@ Doing the same with chrt -r 5 and function-trace set.
   <idle>-0       3d.h4    1us+:      0:120:R   + [003]  2448: 94:R sleep
   <idle>-0       3d.h4    2us : ttwu_do_activate.constprop.87 <-try_to_wake_up
   <idle>-0       3d.h3    3us : check_preempt_curr <-ttwu_do_wakeup
-  <idle>-0       3d.h3    3us : resched_task <-check_preempt_curr
+  <idle>-0       3d.h3    3us : resched_curr <-check_preempt_curr
   <idle>-0       3dNh3    4us : task_woken_rt <-ttwu_do_wakeup
   <idle>-0       3dNh3    4us : _raw_spin_unlock <-try_to_wake_up
   <idle>-0       3dNh3    4us : sub_preempt_count <-_raw_spin_unlock
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c9c9ff723525..41a195385081 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2786,7 +2786,7 @@ static inline bool __must_check current_set_polling_and_test(void)
 
 	/*
 	 * Polling state must be visible before we test NEED_RESCHED,
-	 * paired by resched_task()
+	 * paired by resched_curr()
 	 */
 	smp_mb__after_atomic();
 
@@ -2804,7 +2804,7 @@ static inline bool __must_check current_clr_polling_and_test(void)
 
 	/*
 	 * Polling state must be visible before we test NEED_RESCHED,
-	 * paired by resched_task()
+	 * paired by resched_curr()
 	 */
 	smp_mb__after_atomic();
 
@@ -2836,7 +2836,7 @@ static inline void current_clr_polling(void)
 	 * TIF_NEED_RESCHED and the IPI handler, scheduler_ipi(), will also
 	 * fold.
 	 */
-	smp_mb(); /* paired with resched_task() */
+	smp_mb(); /* paired with resched_curr() */
 
 	preempt_fold_need_resched();
 }
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index cf7695a6c1d2..2f960813c582 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -589,30 +589,31 @@ static bool set_nr_if_polling(struct task_struct *p)
 #endif
 
 /*
- * resched_task - mark a task 'to be rescheduled now'.
+ * resched_curr - mark rq's current task 'to be rescheduled now'.
  *
  * On UP this means the setting of the need_resched flag, on SMP it
  * might also involve a cross-CPU call to trigger the scheduler on
  * the target CPU.
  */
-void resched_task(struct task_struct *p)
+void resched_curr(struct rq *rq)
 {
+	struct task_struct *curr = rq->curr;
 	int cpu;
 
-	lockdep_assert_held(&task_rq(p)->lock);
+	lockdep_assert_held(&rq->lock);
 
-	if (test_tsk_need_resched(p))
+	if (test_tsk_need_resched(curr))
 		return;
 
-	cpu = task_cpu(p);
+	cpu = cpu_of(rq);
 
 	if (cpu == smp_processor_id()) {
-		set_tsk_need_resched(p);
+		set_tsk_need_resched(curr);
 		set_preempt_need_resched();
 		return;
 	}
 
-	if (set_nr_and_not_polling(p))
+	if (set_nr_and_not_polling(curr))
 		smp_send_reschedule(cpu);
 	else
 		trace_sched_wake_idle_without_ipi(cpu);
@@ -625,7 +626,7 @@ void resched_cpu(int cpu)
 
 	if (!raw_spin_trylock_irqsave(&rq->lock, flags))
 		return;
-	resched_task(cpu_curr(cpu));
+	resched_curr(rq);
 	raw_spin_unlock_irqrestore(&rq->lock, flags);
 }
 
@@ -1027,7 +1028,7 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
 			if (class == rq->curr->sched_class)
 				break;
 			if (class == p->sched_class) {
-				resched_task(rq->curr);
+				resched_curr(rq);
 				break;
 			}
 		}
@@ -3073,7 +3074,7 @@ void set_user_nice(struct task_struct *p, long nice)
 		 * lowered its priority, then reschedule its CPU:
 		 */
 		if (delta < 0 || (delta > 0 && task_running(rq, p)))
-			resched_task(rq->curr);
+			resched_curr(rq);
 	}
 out_unlock:
 	task_rq_unlock(rq, p, &flags);
@@ -4299,7 +4300,7 @@ again:
 		 * fairness.
 		 */
 		if (preempt && rq != p_rq)
-			resched_task(p_rq->curr);
+			resched_curr(p_rq);
 	}
 
 out_unlock:
@@ -7106,7 +7107,7 @@ static void normalize_task(struct rq *rq, struct task_struct *p)
 	__setscheduler(rq, p, &attr);
 	if (on_rq) {
 		enqueue_task(rq, p, 0);
-		resched_task(rq->curr);
+		resched_curr(rq);
 	}
 
 	check_class_changed(rq, p, prev_class, old_prio);
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index fc4f98b1258f..df0b77a8caca 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -535,7 +535,7 @@ again:
 		if (task_has_dl_policy(rq->curr))
 			check_preempt_curr_dl(rq, p, 0);
 		else
-			resched_task(rq->curr);
+			resched_curr(rq);
 #ifdef CONFIG_SMP
 		/*
 		 * Queueing this task back might have overloaded rq,
@@ -634,7 +634,7 @@ static void update_curr_dl(struct rq *rq)
 			enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH);
 
 		if (!is_leftmost(curr, &rq->dl))
-			resched_task(curr);
+			resched_curr(rq);
 	}
 
 	/*
@@ -964,7 +964,7 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
 	    cpudl_find(&rq->rd->cpudl, p, NULL) != -1)
 		return;
 
-	resched_task(rq->curr);
+	resched_curr(rq);
 }
 
 static int pull_dl_task(struct rq *this_rq);
@@ -979,7 +979,7 @@ static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p,
 				  int flags)
 {
 	if (dl_entity_preempt(&p->dl, &rq->curr->dl)) {
-		resched_task(rq->curr);
+		resched_curr(rq);
 		return;
 	}
 
@@ -1333,7 +1333,7 @@ retry:
 	if (dl_task(rq->curr) &&
 	    dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) &&
 	    rq->curr->nr_cpus_allowed > 1) {
-		resched_task(rq->curr);
+		resched_curr(rq);
 		return 0;
 	}
 
@@ -1373,7 +1373,7 @@ retry:
 	set_task_cpu(next_task, later_rq->cpu);
 	activate_task(later_rq, next_task, 0);
 
-	resched_task(later_rq->curr);
+	resched_curr(later_rq);
 
 	double_unlock_balance(rq, later_rq);
 
@@ -1632,14 +1632,14 @@ static void prio_changed_dl(struct rq *rq, struct task_struct *p,
 		 */
 		if (dl_time_before(rq->dl.earliest_dl.curr, p->dl.deadline) &&
 		    rq->curr == p)
-			resched_task(p);
+			resched_curr(rq);
 #else
 		/*
 		 * Again, we don't know if p has a earlier
 		 * or later deadline, so let's blindly set a
 		 * (maybe not needed) rescheduling point.
 		 */
-		resched_task(p);
+		resched_curr(rq);
 #endif /* CONFIG_SMP */
 	} else
 		switched_to_dl(rq, p);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 923fe32db6b3..f5f0cc91518c 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2923,7 +2923,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
 	ideal_runtime = sched_slice(cfs_rq, curr);
 	delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
 	if (delta_exec > ideal_runtime) {
-		resched_task(rq_of(cfs_rq)->curr);
+		resched_curr(rq_of(cfs_rq));
 		/*
 		 * The current task ran long enough, ensure it doesn't get
 		 * re-elected due to buddy favours.
@@ -2947,7 +2947,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
 		return;
 
 	if (delta > ideal_runtime)
-		resched_task(rq_of(cfs_rq)->curr);
+		resched_curr(rq_of(cfs_rq));
 }
 
 static void
@@ -3087,7 +3087,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
 	 * validating it and just reschedule.
 	 */
 	if (queued) {
-		resched_task(rq_of(cfs_rq)->curr);
+		resched_curr(rq_of(cfs_rq));
 		return;
 	}
 	/*
@@ -3278,7 +3278,7 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec)
 	 * hierarchy can be throttled
 	 */
 	if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr))
-		resched_task(rq_of(cfs_rq)->curr);
+		resched_curr(rq_of(cfs_rq));
 }
 
 static __always_inline
@@ -3438,7 +3438,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
 
 	/* determine whether we need to wake up potentially idle cpu */
 	if (rq->curr == rq->idle && rq->cfs.nr_running)
-		resched_task(rq->curr);
+		resched_curr(rq);
 }
 
 static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b,
@@ -3897,7 +3897,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
 
 		if (delta < 0) {
 			if (rq->curr == p)
-				resched_task(p);
+				resched_curr(rq);
 			return;
 		}
 
@@ -4766,7 +4766,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
 	return;
 
 preempt:
-	resched_task(curr);
+	resched_curr(rq);
 	/*
 	 * Only set the backward buddy when the current task is still
 	 * on the rq. This can happen when a wakeup gets interleaved
@@ -7457,7 +7457,7 @@ static void task_fork_fair(struct task_struct *p)
 		 * 'current' within the tree based on its new key value.
 		 */
 		swap(curr->vruntime, se->vruntime);
-		resched_task(rq->curr);
+		resched_curr(rq);
 	}
 
 	se->vruntime -= cfs_rq->min_vruntime;
@@ -7482,7 +7482,7 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio)
 	 */
 	if (rq->curr == p) {
 		if (p->prio > oldprio)
-			resched_task(rq->curr);
+			resched_curr(rq);
 	} else
 		check_preempt_curr(rq, p, 0);
 }
@@ -7545,7 +7545,7 @@ static void switched_to_fair(struct rq *rq, struct task_struct *p)
 	 * if we can still preempt the current task.
 	 */
 	if (rq->curr == p)
-		resched_task(rq->curr);
+		resched_curr(rq);
 	else
 		check_preempt_curr(rq, p, 0);
 }
diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c
index 879f2b75266a..67ad4e7f506a 100644
--- a/kernel/sched/idle_task.c
+++ b/kernel/sched/idle_task.c
@@ -20,7 +20,7 @@ select_task_rq_idle(struct task_struct *p, int cpu, int sd_flag, int flags)
  */
 static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int flags)
 {
-	resched_task(rq->idle);
+	resched_curr(rq);
 }
 
 static struct task_struct *
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 671a8b5fdb6f..5f6edca4fafd 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -463,9 +463,10 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
 static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
 {
 	struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
+	struct rq *rq = rq_of_rt_rq(rt_rq);
 	struct sched_rt_entity *rt_se;
 
-	int cpu = cpu_of(rq_of_rt_rq(rt_rq));
+	int cpu = cpu_of(rq);
 
 	rt_se = rt_rq->tg->rt_se[cpu];
 
@@ -476,7 +477,7 @@ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
 			enqueue_rt_entity(rt_se, false);
 
 		if (rt_rq->highest_prio.curr < curr->prio)
-			resched_task(curr);
+			resched_curr(rq);
 	}
 }
 
@@ -566,7 +567,7 @@ static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
 		return;
 
 	enqueue_top_rt_rq(rt_rq);
-	resched_task(rq->curr);
+	resched_curr(rq);
 }
 
 static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
@@ -951,7 +952,7 @@ static void update_curr_rt(struct rq *rq)
 			raw_spin_lock(&rt_rq->rt_runtime_lock);
 			rt_rq->rt_time += delta_exec;
 			if (sched_rt_runtime_exceeded(rt_rq))
-				resched_task(curr);
+				resched_curr(rq);
 			raw_spin_unlock(&rt_rq->rt_runtime_lock);
 		}
 	}
@@ -1366,7 +1367,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
 	 * to try and push current away:
 	 */
 	requeue_task_rt(rq, p, 1);
-	resched_task(rq->curr);
+	resched_curr(rq);
 }
 
 #endif /* CONFIG_SMP */
@@ -1377,7 +1378,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
 static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags)
 {
 	if (p->prio < rq->curr->prio) {
-		resched_task(rq->curr);
+		resched_curr(rq);
 		return;
 	}
 
@@ -1693,7 +1694,7 @@ retry:
 	 * just reschedule current.
 	 */
 	if (unlikely(next_task->prio < rq->curr->prio)) {
-		resched_task(rq->curr);
+		resched_curr(rq);
 		return 0;
 	}
 
@@ -1740,7 +1741,7 @@ retry:
 	activate_task(lowest_rq, next_task, 0);
 	ret = 1;
 
-	resched_task(lowest_rq->curr);
+	resched_curr(lowest_rq);
 
 	double_unlock_balance(rq, lowest_rq);
 
@@ -1939,7 +1940,7 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p)
 		return;
 
 	if (pull_rt_task(rq))
-		resched_task(rq->curr);
+		resched_curr(rq);
 }
 
 void __init init_sched_rt_class(void)
@@ -1977,7 +1978,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p)
 			check_resched = 0;
 #endif /* CONFIG_SMP */
 		if (check_resched && p->prio < rq->curr->prio)
-			resched_task(rq->curr);
+			resched_curr(rq);
 	}
 }
 
@@ -2006,11 +2007,11 @@ prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
 		 * Only reschedule if p is still on the same runqueue.
 		 */
 		if (p->prio > rq->rt.highest_prio.curr && rq->curr == p)
-			resched_task(p);
+			resched_curr(rq);
 #else
 		/* For UP simply resched on drop of prio */
 		if (oldprio < p->prio)
-			resched_task(p);
+			resched_curr(rq);
 #endif /* CONFIG_SMP */
 	} else {
 		/*
@@ -2019,7 +2020,7 @@ prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
 		 * then reschedule.
 		 */
 		if (p->prio < rq->curr->prio)
-			resched_task(rq->curr);
+			resched_curr(rq);
 	}
 }
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 0191ed563bdd..1283945d1ace 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1199,7 +1199,7 @@ extern void init_sched_rt_class(void);
 extern void init_sched_fair_class(void);
 extern void init_sched_dl_class(void);
 
-extern void resched_task(struct task_struct *p);
+extern void resched_curr(struct rq *rq);
 extern void resched_cpu(int cpu);
 
 extern struct rt_bandwidth def_rt_bandwidth;
-- 
cgit v1.2.3-59-g8ed1b


From a509ea840b8e29e512764803e30b805c7ea89038 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Wed, 9 Jul 2014 17:45:10 +0200
Subject: ARM: mvebu: extend PMSU code to support dynamic frequency scaling

This commit adds the necessary code in the Marvell EBU PMSU driver to
support dynamic frequency scaling. In essence, what this new code does
is that it:

 * registers the frequency operating points supported by the CPU;

 * registers a clock notifier of the CPU clocks. The notifier function
   listens to the newly introduced APPLY_RATE_CHANGE event, and uses
   that to finalize the frequency transition by doing the part of the
   procedure that involves the PMSU;

 * registers a platform device for the cpufreq-generic driver, which
   will take care of the CPU frequency transitions.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Link: https://lkml.kernel.org/r/1404920715-19834-3-git-send-email-thomas.petazzoni@free-electrons.com
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
---
 arch/arm/mach-mvebu/pmsu.c | 162 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mvebu-pmsu.h |  20 ++++++
 2 files changed, 182 insertions(+)
 create mode 100644 include/linux/mvebu-pmsu.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-mvebu/pmsu.c b/arch/arm/mach-mvebu/pmsu.c
index 53a55c8520bf..db7d9ab298b6 100644
--- a/arch/arm/mach-mvebu/pmsu.c
+++ b/arch/arm/mach-mvebu/pmsu.c
@@ -18,20 +18,26 @@
 
 #define pr_fmt(fmt) "mvebu-pmsu: " fmt
 
+#include <linux/clk.h>
 #include <linux/cpu_pm.h>
+#include <linux/delay.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/of_address.h>
+#include <linux/of_device.h>
 #include <linux/io.h>
 #include <linux/platform_device.h>
+#include <linux/pm_opp.h>
 #include <linux/smp.h>
 #include <linux/resource.h>
+#include <linux/slab.h>
 #include <asm/cacheflush.h>
 #include <asm/cp15.h>
 #include <asm/smp_plat.h>
 #include <asm/suspend.h>
 #include <asm/tlbflush.h>
 #include "common.h"
+#include "armada-370-xp.h"
 
 static void __iomem *pmsu_mp_base;
 
@@ -57,6 +63,10 @@ static void __iomem *pmsu_mp_base;
 #define PMSU_STATUS_AND_MASK_IRQ_MASK		BIT(24)
 #define PMSU_STATUS_AND_MASK_FIQ_MASK		BIT(25)
 
+#define PMSU_EVENT_STATUS_AND_MASK(cpu)     ((cpu * 0x100) + 0x120)
+#define PMSU_EVENT_STATUS_AND_MASK_DFS_DONE        BIT(1)
+#define PMSU_EVENT_STATUS_AND_MASK_DFS_DONE_MASK   BIT(17)
+
 #define PMSU_BOOT_ADDR_REDIRECT_OFFSET(cpu) ((cpu * 0x100) + 0x124)
 
 /* PMSU fabric registers */
@@ -296,3 +306,155 @@ int __init armada_370_xp_cpu_pm_init(void)
 
 arch_initcall(armada_370_xp_cpu_pm_init);
 early_initcall(armada_370_xp_pmsu_init);
+
+static void mvebu_pmsu_dfs_request_local(void *data)
+{
+	u32 reg;
+	u32 cpu = smp_processor_id();
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	/* Prepare to enter idle */
+	reg = readl(pmsu_mp_base + PMSU_STATUS_AND_MASK(cpu));
+	reg |= PMSU_STATUS_AND_MASK_CPU_IDLE_WAIT |
+	       PMSU_STATUS_AND_MASK_IRQ_MASK     |
+	       PMSU_STATUS_AND_MASK_FIQ_MASK;
+	writel(reg, pmsu_mp_base + PMSU_STATUS_AND_MASK(cpu));
+
+	/* Request the DFS transition */
+	reg = readl(pmsu_mp_base + PMSU_CONTROL_AND_CONFIG(cpu));
+	reg |= PMSU_CONTROL_AND_CONFIG_DFS_REQ;
+	writel(reg, pmsu_mp_base + PMSU_CONTROL_AND_CONFIG(cpu));
+
+	/* The fact of entering idle will trigger the DFS transition */
+	wfi();
+
+	/*
+	 * We're back from idle, the DFS transition has completed,
+	 * clear the idle wait indication.
+	 */
+	reg = readl(pmsu_mp_base + PMSU_STATUS_AND_MASK(cpu));
+	reg &= ~PMSU_STATUS_AND_MASK_CPU_IDLE_WAIT;
+	writel(reg, pmsu_mp_base + PMSU_STATUS_AND_MASK(cpu));
+
+	local_irq_restore(flags);
+}
+
+int mvebu_pmsu_dfs_request(int cpu)
+{
+	unsigned long timeout;
+	int hwcpu = cpu_logical_map(cpu);
+	u32 reg;
+
+	/* Clear any previous DFS DONE event */
+	reg = readl(pmsu_mp_base + PMSU_EVENT_STATUS_AND_MASK(hwcpu));
+	reg &= ~PMSU_EVENT_STATUS_AND_MASK_DFS_DONE;
+	writel(reg, pmsu_mp_base + PMSU_EVENT_STATUS_AND_MASK(hwcpu));
+
+	/* Mask the DFS done interrupt, since we are going to poll */
+	reg = readl(pmsu_mp_base + PMSU_EVENT_STATUS_AND_MASK(hwcpu));
+	reg |= PMSU_EVENT_STATUS_AND_MASK_DFS_DONE_MASK;
+	writel(reg, pmsu_mp_base + PMSU_EVENT_STATUS_AND_MASK(hwcpu));
+
+	/* Trigger the DFS on the appropriate CPU */
+	smp_call_function_single(cpu, mvebu_pmsu_dfs_request_local,
+				 NULL, false);
+
+	/* Poll until the DFS done event is generated */
+	timeout = jiffies + HZ;
+	while (time_before(jiffies, timeout)) {
+		reg = readl(pmsu_mp_base + PMSU_EVENT_STATUS_AND_MASK(hwcpu));
+		if (reg & PMSU_EVENT_STATUS_AND_MASK_DFS_DONE)
+			break;
+		udelay(10);
+	}
+
+	if (time_after(jiffies, timeout))
+		return -ETIME;
+
+	/* Restore the DFS mask to its original state */
+	reg = readl(pmsu_mp_base + PMSU_EVENT_STATUS_AND_MASK(hwcpu));
+	reg &= ~PMSU_EVENT_STATUS_AND_MASK_DFS_DONE_MASK;
+	writel(reg, pmsu_mp_base + PMSU_EVENT_STATUS_AND_MASK(hwcpu));
+
+	return 0;
+}
+
+static int __init armada_xp_pmsu_cpufreq_init(void)
+{
+	struct device_node *np;
+	struct resource res;
+	int ret, cpu;
+
+	if (!of_machine_is_compatible("marvell,armadaxp"))
+		return 0;
+
+	/*
+	 * In order to have proper cpufreq handling, we need to ensure
+	 * that the Device Tree description of the CPU clock includes
+	 * the definition of the PMU DFS registers. If not, we do not
+	 * register the clock notifier and the cpufreq driver. This
+	 * piece of code is only for compatibility with old Device
+	 * Trees.
+	 */
+	np = of_find_compatible_node(NULL, NULL, "marvell,armada-xp-cpu-clock");
+	if (!np)
+		return 0;
+
+	ret = of_address_to_resource(np, 1, &res);
+	if (ret) {
+		pr_warn(FW_WARN "not enabling cpufreq, deprecated armada-xp-cpu-clock binding\n");
+		of_node_put(np);
+		return 0;
+	}
+
+	of_node_put(np);
+
+	/*
+	 * For each CPU, this loop registers the operating points
+	 * supported (which are the nominal CPU frequency and half of
+	 * it), and registers the clock notifier that will take care
+	 * of doing the PMSU part of a frequency transition.
+	 */
+	for_each_possible_cpu(cpu) {
+		struct device *cpu_dev;
+		struct clk *clk;
+		int ret;
+
+		cpu_dev = get_cpu_device(cpu);
+		if (!cpu_dev) {
+			pr_err("Cannot get CPU %d\n", cpu);
+			continue;
+		}
+
+		clk = clk_get(cpu_dev, 0);
+		if (!clk) {
+			pr_err("Cannot get clock for CPU %d\n", cpu);
+			return -ENODEV;
+		}
+
+		/*
+		 * In case of a failure of dev_pm_opp_add(), we don't
+		 * bother with cleaning up the registered OPP (there's
+		 * no function to do so), and simply cancel the
+		 * registration of the cpufreq device.
+		 */
+		ret = dev_pm_opp_add(cpu_dev, clk_get_rate(clk), 0);
+		if (ret) {
+			clk_put(clk);
+			return ret;
+		}
+
+		ret = dev_pm_opp_add(cpu_dev, clk_get_rate(clk) / 2, 0);
+		if (ret) {
+			clk_put(clk);
+			return ret;
+		}
+	}
+
+	platform_device_register_simple("cpufreq-generic", -1, NULL, 0);
+	return 0;
+}
+
+device_initcall(armada_xp_pmsu_cpufreq_init);
diff --git a/include/linux/mvebu-pmsu.h b/include/linux/mvebu-pmsu.h
new file mode 100644
index 000000000000..b918d07efe23
--- /dev/null
+++ b/include/linux/mvebu-pmsu.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2012 Marvell
+ *
+ * Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#ifndef __MVEBU_PMSU_H__
+#define __MVEBU_PMSU_H__
+
+#ifdef CONFIG_MACH_MVEBU_V7
+int mvebu_pmsu_dfs_request(int cpu);
+#else
+static inline int mvebu_pmsu_dfs_request(int cpu) { return -ENODEV; }
+#endif
+
+#endif /* __MVEBU_PMSU_H__ */
-- 
cgit v1.2.3-59-g8ed1b


From 743162013d40ca612b4cb53d3a200dff2d9ab26e Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 7 Jul 2014 15:16:04 +1000
Subject: sched: Remove proliferation of wait_on_bit() action functions

The current "wait_on_bit" interface requires an 'action'
function to be provided which does the actual waiting.
There are over 20 such functions, many of them identical.
Most cases can be satisfied by one of just two functions, one
which uses io_schedule() and one which just uses schedule().

So:
 Rename wait_on_bit and        wait_on_bit_lock to
        wait_on_bit_action and wait_on_bit_lock_action
 to make it explicit that they need an action function.

 Introduce new wait_on_bit{,_lock} and wait_on_bit{,_lock}_io
 which are *not* given an action function but implicitly use
 a standard one.
 The decision to error-out if a signal is pending is now made
 based on the 'mode' argument rather than being encoded in the action
 function.

 All instances of the old wait_on_bit and wait_on_bit_lock which
 can use the new version have been changed accordingly and their
 action functions have been discarded.
 wait_on_bit{_lock} does not return any specific error code in the
 event of a signal so the caller must check for non-zero and
 interpolate their own error code as appropriate.

The wait_on_bit() call in __fscache_wait_on_invalidate() was
ambiguous as it specified TASK_UNINTERRUPTIBLE but used
fscache_wait_bit_interruptible as an action function.
David Howells confirms this should be uniformly
"uninterruptible"

The main remaining user of wait_on_bit{,_lock}_action is NFS
which needs to use a freezer-aware schedule() call.

A comment in fs/gfs2/glock.c notes that having multiple 'action'
functions is useful as they display differently in the 'wchan'
field of 'ps'. (and /proc/$PID/wchan).
As the new bit_wait{,_io} functions are tagged "__sched", they
will not show up at all, but something higher in the stack.  So
the distinction will still be visible, only with different
function names (gds2_glock_wait versus gfs2_glock_dq_wait in the
gfs2/glock.c case).

Since first version of this patch (against 3.15) two new action
functions appeared, on in NFS and one in CIFS.  CIFS also now
uses an action function that makes the same freezer aware
schedule call as NFS.

Signed-off-by: NeilBrown <neilb@suse.de>
Acked-by: David Howells <dhowells@redhat.com> (fscache, keys)
Acked-by: Steven Whitehouse <swhiteho@redhat.com> (gfs2)
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Steve French <sfrench@samba.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/20140707051603.28027.72349.stgit@notabene.brown
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 Documentation/filesystems/caching/operations.txt |   2 +-
 drivers/md/dm-bufio.c                            |  41 +++-----
 drivers/md/dm-snap.c                             |  10 +-
 drivers/media/usb/dvb-usb-v2/dvb_usb_core.c      |  12 +--
 fs/btrfs/extent_io.c                             |  10 +-
 fs/buffer.c                                      |  11 +--
 fs/cifs/connect.c                                |  10 +-
 fs/cifs/file.c                                   |   9 +-
 fs/cifs/inode.c                                  |   4 +-
 fs/cifs/misc.c                                   |   2 +-
 fs/fs-writeback.c                                |   3 +-
 fs/fscache/cookie.c                              |   7 +-
 fs/fscache/internal.h                            |   2 -
 fs/fscache/main.c                                |  18 ----
 fs/fscache/page.c                                |   4 +-
 fs/gfs2/glock.c                                  |  25 +----
 fs/gfs2/lock_dlm.c                               |   8 +-
 fs/gfs2/ops_fstype.c                             |  11 +--
 fs/gfs2/recovery.c                               |   8 +-
 fs/gfs2/super.c                                  |   8 +-
 fs/inode.c                                       |   7 --
 fs/jbd2/transaction.c                            |  10 +-
 fs/nfs/file.c                                    |   4 +-
 fs/nfs/filelayout/filelayoutdev.c                |   4 +-
 fs/nfs/inode.c                                   |   4 +-
 fs/nfs/nfs4state.c                               |   4 +-
 fs/nfs/pagelist.c                                |  12 +--
 fs/nfs/pnfs.c                                    |   2 +-
 fs/nfs/write.c                                   |   4 +-
 include/linux/wait.h                             | 115 ++++++++++++++++++++++-
 include/linux/writeback.h                        |   3 +-
 kernel/ptrace.c                                  |   8 +-
 kernel/sched/wait.c                              |  18 ++++
 mm/filemap.c                                     |  20 +---
 mm/ksm.c                                         |   8 +-
 net/bluetooth/hci_core.c                         |   8 +-
 security/keys/gc.c                               |  11 +--
 security/keys/request_key.c                      |  23 +----
 38 files changed, 195 insertions(+), 275 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/caching/operations.txt b/Documentation/filesystems/caching/operations.txt
index bee2a5f93d60..a1c052cbba35 100644
--- a/Documentation/filesystems/caching/operations.txt
+++ b/Documentation/filesystems/caching/operations.txt
@@ -90,7 +90,7 @@ operations:
      to be cleared before proceeding:
 
 		wait_on_bit(&op->flags, FSCACHE_OP_WAITING,
-			    fscache_wait_bit, TASK_UNINTERRUPTIBLE);
+			    TASK_UNINTERRUPTIBLE);
 
 
  (2) The operation may be fast asynchronous (FSCACHE_OP_FAST), in which case it
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 4e84095833db..96c92b75452f 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -614,16 +614,6 @@ static void write_endio(struct bio *bio, int error)
 	wake_up_bit(&b->state, B_WRITING);
 }
 
-/*
- * This function is called when wait_on_bit is actually waiting.
- */
-static int do_io_schedule(void *word)
-{
-	io_schedule();
-
-	return 0;
-}
-
 /*
  * Initiate a write on a dirty buffer, but don't wait for it.
  *
@@ -640,8 +630,7 @@ static void __write_dirty_buffer(struct dm_buffer *b,
 		return;
 
 	clear_bit(B_DIRTY, &b->state);
-	wait_on_bit_lock(&b->state, B_WRITING,
-			 do_io_schedule, TASK_UNINTERRUPTIBLE);
+	wait_on_bit_lock_io(&b->state, B_WRITING, TASK_UNINTERRUPTIBLE);
 
 	if (!write_list)
 		submit_io(b, WRITE, b->block, write_endio);
@@ -675,9 +664,9 @@ static void __make_buffer_clean(struct dm_buffer *b)
 	if (!b->state)	/* fast case */
 		return;
 
-	wait_on_bit(&b->state, B_READING, do_io_schedule, TASK_UNINTERRUPTIBLE);
+	wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE);
 	__write_dirty_buffer(b, NULL);
-	wait_on_bit(&b->state, B_WRITING, do_io_schedule, TASK_UNINTERRUPTIBLE);
+	wait_on_bit_io(&b->state, B_WRITING, TASK_UNINTERRUPTIBLE);
 }
 
 /*
@@ -1030,7 +1019,7 @@ static void *new_read(struct dm_bufio_client *c, sector_t block,
 	if (need_submit)
 		submit_io(b, READ, b->block, read_endio);
 
-	wait_on_bit(&b->state, B_READING, do_io_schedule, TASK_UNINTERRUPTIBLE);
+	wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE);
 
 	if (b->read_error) {
 		int error = b->read_error;
@@ -1209,15 +1198,13 @@ again:
 				dropped_lock = 1;
 				b->hold_count++;
 				dm_bufio_unlock(c);
-				wait_on_bit(&b->state, B_WRITING,
-					    do_io_schedule,
-					    TASK_UNINTERRUPTIBLE);
+				wait_on_bit_io(&b->state, B_WRITING,
+					       TASK_UNINTERRUPTIBLE);
 				dm_bufio_lock(c);
 				b->hold_count--;
 			} else
-				wait_on_bit(&b->state, B_WRITING,
-					    do_io_schedule,
-					    TASK_UNINTERRUPTIBLE);
+				wait_on_bit_io(&b->state, B_WRITING,
+					       TASK_UNINTERRUPTIBLE);
 		}
 
 		if (!test_bit(B_DIRTY, &b->state) &&
@@ -1321,15 +1308,15 @@ retry:
 
 	__write_dirty_buffer(b, NULL);
 	if (b->hold_count == 1) {
-		wait_on_bit(&b->state, B_WRITING,
-			    do_io_schedule, TASK_UNINTERRUPTIBLE);
+		wait_on_bit_io(&b->state, B_WRITING,
+			       TASK_UNINTERRUPTIBLE);
 		set_bit(B_DIRTY, &b->state);
 		__unlink_buffer(b);
 		__link_buffer(b, new_block, LIST_DIRTY);
 	} else {
 		sector_t old_block;
-		wait_on_bit_lock(&b->state, B_WRITING,
-				 do_io_schedule, TASK_UNINTERRUPTIBLE);
+		wait_on_bit_lock_io(&b->state, B_WRITING,
+				    TASK_UNINTERRUPTIBLE);
 		/*
 		 * Relink buffer to "new_block" so that write_callback
 		 * sees "new_block" as a block number.
@@ -1341,8 +1328,8 @@ retry:
 		__unlink_buffer(b);
 		__link_buffer(b, new_block, b->list_mode);
 		submit_io(b, WRITE, new_block, write_endio);
-		wait_on_bit(&b->state, B_WRITING,
-			    do_io_schedule, TASK_UNINTERRUPTIBLE);
+		wait_on_bit_io(&b->state, B_WRITING,
+			       TASK_UNINTERRUPTIBLE);
 		__unlink_buffer(b);
 		__link_buffer(b, old_block, b->list_mode);
 	}
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 5bd2290cfb1e..864b03f47727 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1032,21 +1032,13 @@ static void start_merge(struct dm_snapshot *s)
 		snapshot_merge_next_chunks(s);
 }
 
-static int wait_schedule(void *ptr)
-{
-	schedule();
-
-	return 0;
-}
-
 /*
  * Stop the merging process and wait until it finishes.
  */
 static void stop_merge(struct dm_snapshot *s)
 {
 	set_bit(SHUTDOWN_MERGE, &s->state_bits);
-	wait_on_bit(&s->state_bits, RUNNING_MERGE, wait_schedule,
-		    TASK_UNINTERRUPTIBLE);
+	wait_on_bit(&s->state_bits, RUNNING_MERGE, TASK_UNINTERRUPTIBLE);
 	clear_bit(SHUTDOWN_MERGE, &s->state_bits);
 }
 
diff --git a/drivers/media/usb/dvb-usb-v2/dvb_usb_core.c b/drivers/media/usb/dvb-usb-v2/dvb_usb_core.c
index e35580618936..f296394bb7c5 100644
--- a/drivers/media/usb/dvb-usb-v2/dvb_usb_core.c
+++ b/drivers/media/usb/dvb-usb-v2/dvb_usb_core.c
@@ -253,13 +253,6 @@ static int dvb_usbv2_adapter_stream_exit(struct dvb_usb_adapter *adap)
 	return usb_urb_exitv2(&adap->stream);
 }
 
-static int wait_schedule(void *ptr)
-{
-	schedule();
-
-	return 0;
-}
-
 static int dvb_usb_start_feed(struct dvb_demux_feed *dvbdmxfeed)
 {
 	struct dvb_usb_adapter *adap = dvbdmxfeed->demux->priv;
@@ -273,8 +266,7 @@ static int dvb_usb_start_feed(struct dvb_demux_feed *dvbdmxfeed)
 			dvbdmxfeed->pid, dvbdmxfeed->index);
 
 	/* wait init is done */
-	wait_on_bit(&adap->state_bits, ADAP_INIT, wait_schedule,
-			TASK_UNINTERRUPTIBLE);
+	wait_on_bit(&adap->state_bits, ADAP_INIT, TASK_UNINTERRUPTIBLE);
 
 	if (adap->active_fe == -1)
 		return -EINVAL;
@@ -568,7 +560,7 @@ static int dvb_usb_fe_sleep(struct dvb_frontend *fe)
 
 	if (!adap->suspend_resume_active) {
 		set_bit(ADAP_SLEEP, &adap->state_bits);
-		wait_on_bit(&adap->state_bits, ADAP_STREAMING, wait_schedule,
+		wait_on_bit(&adap->state_bits, ADAP_STREAMING,
 				TASK_UNINTERRUPTIBLE);
 	}
 
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index a389820d158b..3e11aab9f391 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3437,16 +3437,10 @@ done_unlocked:
 	return 0;
 }
 
-static int eb_wait(void *word)
-{
-	io_schedule();
-	return 0;
-}
-
 void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
 {
-	wait_on_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK, eb_wait,
-		    TASK_UNINTERRUPTIBLE);
+	wait_on_bit_io(&eb->bflags, EXTENT_BUFFER_WRITEBACK,
+		       TASK_UNINTERRUPTIBLE);
 }
 
 static noinline_for_stack int
diff --git a/fs/buffer.c b/fs/buffer.c
index eba6e4f621ce..8f05111bbb8b 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -61,16 +61,9 @@ inline void touch_buffer(struct buffer_head *bh)
 }
 EXPORT_SYMBOL(touch_buffer);
 
-static int sleep_on_buffer(void *word)
-{
-	io_schedule();
-	return 0;
-}
-
 void __lock_buffer(struct buffer_head *bh)
 {
-	wait_on_bit_lock(&bh->b_state, BH_Lock, sleep_on_buffer,
-							TASK_UNINTERRUPTIBLE);
+	wait_on_bit_lock_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
 }
 EXPORT_SYMBOL(__lock_buffer);
 
@@ -123,7 +116,7 @@ EXPORT_SYMBOL(buffer_check_dirty_writeback);
  */
 void __wait_on_buffer(struct buffer_head * bh)
 {
-	wait_on_bit(&bh->b_state, BH_Lock, sleep_on_buffer, TASK_UNINTERRUPTIBLE);
+	wait_on_bit_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
 }
 EXPORT_SYMBOL(__wait_on_buffer);
 
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 20d75b8ddb26..b98366f21f9e 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3934,13 +3934,6 @@ cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb)
 	return tlink_tcon(cifs_sb_master_tlink(cifs_sb));
 }
 
-static int
-cifs_sb_tcon_pending_wait(void *unused)
-{
-	schedule();
-	return signal_pending(current) ? -ERESTARTSYS : 0;
-}
-
 /* find and return a tlink with given uid */
 static struct tcon_link *
 tlink_rb_search(struct rb_root *root, kuid_t uid)
@@ -4039,11 +4032,10 @@ cifs_sb_tlink(struct cifs_sb_info *cifs_sb)
 	} else {
 wait_for_construction:
 		ret = wait_on_bit(&tlink->tl_flags, TCON_LINK_PENDING,
-				  cifs_sb_tcon_pending_wait,
 				  TASK_INTERRUPTIBLE);
 		if (ret) {
 			cifs_put_tlink(tlink);
-			return ERR_PTR(ret);
+			return ERR_PTR(-ERESTARTSYS);
 		}
 
 		/* if it's good, return it */
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index e90a1e9aa627..b88b1ade4d3d 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3618,13 +3618,6 @@ static int cifs_launder_page(struct page *page)
 	return rc;
 }
 
-static int
-cifs_pending_writers_wait(void *unused)
-{
-	schedule();
-	return 0;
-}
-
 void cifs_oplock_break(struct work_struct *work)
 {
 	struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
@@ -3636,7 +3629,7 @@ void cifs_oplock_break(struct work_struct *work)
 	int rc = 0;
 
 	wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
-			cifs_pending_writers_wait, TASK_UNINTERRUPTIBLE);
+			TASK_UNINTERRUPTIBLE);
 
 	server->ops->downgrade_oplock(server, cinode,
 		test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index a174605f6afa..213c4580b4e3 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1794,8 +1794,8 @@ cifs_revalidate_mapping(struct inode *inode)
 	int rc;
 	unsigned long *flags = &CIFS_I(inode)->flags;
 
-	rc = wait_on_bit_lock(flags, CIFS_INO_LOCK, cifs_wait_bit_killable,
-				TASK_KILLABLE);
+	rc = wait_on_bit_lock_action(flags, CIFS_INO_LOCK, cifs_wait_bit_killable,
+				     TASK_KILLABLE);
 	if (rc)
 		return rc;
 
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 3b0c62e622da..6bf55d0ed494 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -582,7 +582,7 @@ int cifs_get_writer(struct cifsInodeInfo *cinode)
 
 start:
 	rc = wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_OPLOCK_BREAK,
-				   cifs_oplock_break_wait, TASK_KILLABLE);
+			 TASK_KILLABLE);
 	if (rc)
 		return rc;
 
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index be568b7311d6..ef9bef118342 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -342,7 +342,8 @@ static void __inode_wait_for_writeback(struct inode *inode)
 	wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
 	while (inode->i_state & I_SYNC) {
 		spin_unlock(&inode->i_lock);
-		__wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE);
+		__wait_on_bit(wqh, &wq, bit_wait,
+			      TASK_UNINTERRUPTIBLE);
 		spin_lock(&inode->i_lock);
 	}
 }
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index aec01be91b0a..89acec742e0b 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -160,7 +160,7 @@ void __fscache_enable_cookie(struct fscache_cookie *cookie,
 	_enter("%p", cookie);
 
 	wait_on_bit_lock(&cookie->flags, FSCACHE_COOKIE_ENABLEMENT_LOCK,
-			 fscache_wait_bit, TASK_UNINTERRUPTIBLE);
+			 TASK_UNINTERRUPTIBLE);
 
 	if (test_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags))
 		goto out_unlock;
@@ -255,7 +255,7 @@ static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie)
 	if (!fscache_defer_lookup) {
 		_debug("non-deferred lookup %p", &cookie->flags);
 		wait_on_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP,
-			    fscache_wait_bit, TASK_UNINTERRUPTIBLE);
+			    TASK_UNINTERRUPTIBLE);
 		_debug("complete");
 		if (test_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags))
 			goto unavailable;
@@ -463,7 +463,6 @@ void __fscache_wait_on_invalidate(struct fscache_cookie *cookie)
 	_enter("%p", cookie);
 
 	wait_on_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING,
-		    fscache_wait_bit_interruptible,
 		    TASK_UNINTERRUPTIBLE);
 
 	_leave("");
@@ -525,7 +524,7 @@ void __fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate)
 	}
 
 	wait_on_bit_lock(&cookie->flags, FSCACHE_COOKIE_ENABLEMENT_LOCK,
-			 fscache_wait_bit, TASK_UNINTERRUPTIBLE);
+			 TASK_UNINTERRUPTIBLE);
 	if (!test_and_clear_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags))
 		goto out_unlock_enable;
 
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
index bc6c08fcfddd..7872a62ef30c 100644
--- a/fs/fscache/internal.h
+++ b/fs/fscache/internal.h
@@ -97,8 +97,6 @@ static inline bool fscache_object_congested(void)
 	return workqueue_congested(WORK_CPU_UNBOUND, fscache_object_wq);
 }
 
-extern int fscache_wait_bit(void *);
-extern int fscache_wait_bit_interruptible(void *);
 extern int fscache_wait_atomic_t(atomic_t *);
 
 /*
diff --git a/fs/fscache/main.c b/fs/fscache/main.c
index 63f868e869b9..a31b83c5cbd9 100644
--- a/fs/fscache/main.c
+++ b/fs/fscache/main.c
@@ -196,24 +196,6 @@ static void __exit fscache_exit(void)
 
 module_exit(fscache_exit);
 
-/*
- * wait_on_bit() sleep function for uninterruptible waiting
- */
-int fscache_wait_bit(void *flags)
-{
-	schedule();
-	return 0;
-}
-
-/*
- * wait_on_bit() sleep function for interruptible waiting
- */
-int fscache_wait_bit_interruptible(void *flags)
-{
-	schedule();
-	return signal_pending(current);
-}
-
 /*
  * wait_on_atomic_t() sleep function for uninterruptible waiting
  */
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index ed70714503fa..85332b9d19d1 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -298,7 +298,6 @@ int fscache_wait_for_deferred_lookup(struct fscache_cookie *cookie)
 
 	jif = jiffies;
 	if (wait_on_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP,
-			fscache_wait_bit_interruptible,
 			TASK_INTERRUPTIBLE) != 0) {
 		fscache_stat(&fscache_n_retrievals_intr);
 		_leave(" = -ERESTARTSYS");
@@ -342,7 +341,6 @@ int fscache_wait_for_operation_activation(struct fscache_object *object,
 	if (stat_op_waits)
 		fscache_stat(stat_op_waits);
 	if (wait_on_bit(&op->flags, FSCACHE_OP_WAITING,
-			fscache_wait_bit_interruptible,
 			TASK_INTERRUPTIBLE) != 0) {
 		ret = fscache_cancel_op(op, do_cancel);
 		if (ret == 0)
@@ -351,7 +349,7 @@ int fscache_wait_for_operation_activation(struct fscache_object *object,
 		/* it's been removed from the pending queue by another party,
 		 * so we should get to run shortly */
 		wait_on_bit(&op->flags, FSCACHE_OP_WAITING,
-			    fscache_wait_bit, TASK_UNINTERRUPTIBLE);
+			    TASK_UNINTERRUPTIBLE);
 	}
 	_debug("<<< GO");
 
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index c355f7320e44..770e16716d81 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -855,27 +855,6 @@ void gfs2_holder_uninit(struct gfs2_holder *gh)
 	gh->gh_ip = 0;
 }
 
-/**
- * gfs2_glock_holder_wait
- * @word: unused
- *
- * This function and gfs2_glock_demote_wait both show up in the WCHAN
- * field. Thus I've separated these otherwise identical functions in
- * order to be more informative to the user.
- */
-
-static int gfs2_glock_holder_wait(void *word)
-{
-        schedule();
-        return 0;
-}
-
-static int gfs2_glock_demote_wait(void *word)
-{
-	schedule();
-	return 0;
-}
-
 /**
  * gfs2_glock_wait - wait on a glock acquisition
  * @gh: the glock holder
@@ -888,7 +867,7 @@ int gfs2_glock_wait(struct gfs2_holder *gh)
 	unsigned long time1 = jiffies;
 
 	might_sleep();
-	wait_on_bit(&gh->gh_iflags, HIF_WAIT, gfs2_glock_holder_wait, TASK_UNINTERRUPTIBLE);
+	wait_on_bit(&gh->gh_iflags, HIF_WAIT, TASK_UNINTERRUPTIBLE);
 	if (time_after(jiffies, time1 + HZ)) /* have we waited > a second? */
 		/* Lengthen the minimum hold time. */
 		gh->gh_gl->gl_hold_time = min(gh->gh_gl->gl_hold_time +
@@ -1128,7 +1107,7 @@ void gfs2_glock_dq_wait(struct gfs2_holder *gh)
 	struct gfs2_glock *gl = gh->gh_gl;
 	gfs2_glock_dq(gh);
 	might_sleep();
-	wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, TASK_UNINTERRUPTIBLE);
+	wait_on_bit(&gl->gl_flags, GLF_DEMOTE, TASK_UNINTERRUPTIBLE);
 }
 
 /**
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 91f274de1246..992ca5b1e045 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -936,12 +936,6 @@ fail:
 	return error;
 }
 
-static int dlm_recovery_wait(void *word)
-{
-	schedule();
-	return 0;
-}
-
 static int control_first_done(struct gfs2_sbd *sdp)
 {
 	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
@@ -976,7 +970,7 @@ restart:
 		fs_info(sdp, "control_first_done wait gen %u\n", start_gen);
 
 		wait_on_bit(&ls->ls_recover_flags, DFL_DLM_RECOVERY,
-			    dlm_recovery_wait, TASK_UNINTERRUPTIBLE);
+			    TASK_UNINTERRUPTIBLE);
 		goto restart;
 	}
 
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index bc564c0d6d16..d3eae244076e 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1024,20 +1024,13 @@ void gfs2_lm_unmount(struct gfs2_sbd *sdp)
 		lm->lm_unmount(sdp);
 }
 
-static int gfs2_journalid_wait(void *word)
-{
-	if (signal_pending(current))
-		return -EINTR;
-	schedule();
-	return 0;
-}
-
 static int wait_on_journal(struct gfs2_sbd *sdp)
 {
 	if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL)
 		return 0;
 
-	return wait_on_bit(&sdp->sd_flags, SDF_NOJOURNALID, gfs2_journalid_wait, TASK_INTERRUPTIBLE);
+	return wait_on_bit(&sdp->sd_flags, SDF_NOJOURNALID, TASK_INTERRUPTIBLE)
+		? -EINTR : 0;
 }
 
 void gfs2_online_uevent(struct gfs2_sbd *sdp)
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 94555d4c5698..573bd3b758fa 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -591,12 +591,6 @@ done:
 	wake_up_bit(&jd->jd_flags, JDF_RECOVERY);
 }
 
-static int gfs2_recovery_wait(void *word)
-{
-	schedule();
-	return 0;
-}
-
 int gfs2_recover_journal(struct gfs2_jdesc *jd, bool wait)
 {
 	int rv;
@@ -609,7 +603,7 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd, bool wait)
 	BUG_ON(!rv);
 
 	if (wait)
-		wait_on_bit(&jd->jd_flags, JDF_RECOVERY, gfs2_recovery_wait,
+		wait_on_bit(&jd->jd_flags, JDF_RECOVERY,
 			    TASK_UNINTERRUPTIBLE);
 
 	return wait ? jd->jd_recover_error : 0;
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 1319b5c4ec68..2607ff13d486 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -864,12 +864,6 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
 	return error;
 }
 
-static int gfs2_umount_recovery_wait(void *word)
-{
-	schedule();
-	return 0;
-}
-
 /**
  * gfs2_put_super - Unmount the filesystem
  * @sb: The VFS superblock
@@ -894,7 +888,7 @@ restart:
 			continue;
 		spin_unlock(&sdp->sd_jindex_spin);
 		wait_on_bit(&jd->jd_flags, JDF_RECOVERY,
-			    gfs2_umount_recovery_wait, TASK_UNINTERRUPTIBLE);
+			    TASK_UNINTERRUPTIBLE);
 		goto restart;
 	}
 	spin_unlock(&sdp->sd_jindex_spin);
diff --git a/fs/inode.c b/fs/inode.c
index 6eecb7ff0b9a..5938f3928944 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1695,13 +1695,6 @@ int inode_needs_sync(struct inode *inode)
 }
 EXPORT_SYMBOL(inode_needs_sync);
 
-int inode_wait(void *word)
-{
-	schedule();
-	return 0;
-}
-EXPORT_SYMBOL(inode_wait);
-
 /*
  * If we try to find an inode in the inode hash while it is being
  * deleted, we have to wait until the filesystem completes its
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 6f0f590cc5a3..5f09370c90a8 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -763,12 +763,6 @@ static void warn_dirty_buffer(struct buffer_head *bh)
 	       bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr);
 }
 
-static int sleep_on_shadow_bh(void *word)
-{
-	io_schedule();
-	return 0;
-}
-
 /*
  * If the buffer is already part of the current transaction, then there
  * is nothing we need to do.  If it is already part of a prior
@@ -906,8 +900,8 @@ repeat:
 		if (buffer_shadow(bh)) {
 			JBUFFER_TRACE(jh, "on shadow: sleep");
 			jbd_unlock_bh_state(bh);
-			wait_on_bit(&bh->b_state, BH_Shadow,
-				    sleep_on_shadow_bh, TASK_UNINTERRUPTIBLE);
+			wait_on_bit_io(&bh->b_state, BH_Shadow,
+				       TASK_UNINTERRUPTIBLE);
 			goto repeat;
 		}
 
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 4042ff58fe3f..524dd80d1898 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -361,8 +361,8 @@ start:
 	 * Prevent starvation issues if someone is doing a consistency
 	 * sync-to-disk
 	 */
-	ret = wait_on_bit(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING,
-			nfs_wait_bit_killable, TASK_KILLABLE);
+	ret = wait_on_bit_action(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING,
+				 nfs_wait_bit_killable, TASK_KILLABLE);
 	if (ret)
 		return ret;
 
diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c
index 44bf0140a4c7..e2a0361e24c6 100644
--- a/fs/nfs/filelayout/filelayoutdev.c
+++ b/fs/nfs/filelayout/filelayoutdev.c
@@ -783,8 +783,8 @@ nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j)
 static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds)
 {
 	might_sleep();
-	wait_on_bit(&ds->ds_state, NFS4DS_CONNECTING,
-			nfs_wait_bit_killable, TASK_KILLABLE);
+	wait_on_bit_action(&ds->ds_state, NFS4DS_CONNECTING,
+			   nfs_wait_bit_killable, TASK_KILLABLE);
 }
 
 static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 9927913c97c2..b7b710e7d08e 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1074,8 +1074,8 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
 	 * the bit lock here if it looks like we're going to be doing that.
 	 */
 	for (;;) {
-		ret = wait_on_bit(bitlock, NFS_INO_INVALIDATING,
-				  nfs_wait_bit_killable, TASK_KILLABLE);
+		ret = wait_on_bit_action(bitlock, NFS_INO_INVALIDATING,
+					 nfs_wait_bit_killable, TASK_KILLABLE);
 		if (ret)
 			goto out;
 		spin_lock(&inode->i_lock);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 848f6853c59e..42f121182167 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1251,8 +1251,8 @@ int nfs4_wait_clnt_recover(struct nfs_client *clp)
 	might_sleep();
 
 	atomic_inc(&clp->cl_count);
-	res = wait_on_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING,
-			nfs_wait_bit_killable, TASK_KILLABLE);
+	res = wait_on_bit_action(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING,
+				 nfs_wait_bit_killable, TASK_KILLABLE);
 	if (res)
 		goto out;
 	if (clp->cl_cons_state < 0)
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index b6ee3a6ee96d..6104d3500b49 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -138,12 +138,6 @@ nfs_iocounter_wait(struct nfs_io_counter *c)
 	return __nfs_iocounter_wait(c);
 }
 
-static int nfs_wait_bit_uninterruptible(void *word)
-{
-	io_schedule();
-	return 0;
-}
-
 /*
  * nfs_page_group_lock - lock the head of the page group
  * @req - request in group that is to be locked
@@ -158,7 +152,6 @@ nfs_page_group_lock(struct nfs_page *req)
 	WARN_ON_ONCE(head != head->wb_head);
 
 	wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,
-			nfs_wait_bit_uninterruptible,
 			TASK_UNINTERRUPTIBLE);
 }
 
@@ -425,9 +418,8 @@ void nfs_release_request(struct nfs_page *req)
 int
 nfs_wait_on_request(struct nfs_page *req)
 {
-	return wait_on_bit(&req->wb_flags, PG_BUSY,
-			nfs_wait_bit_uninterruptible,
-			TASK_UNINTERRUPTIBLE);
+	return wait_on_bit_io(&req->wb_flags, PG_BUSY,
+			      TASK_UNINTERRUPTIBLE);
 }
 
 /*
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 6fdcd233d6f7..a8914b335617 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1885,7 +1885,7 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
 	if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) {
 		if (!sync)
 			goto out;
-		status = wait_on_bit_lock(&nfsi->flags,
+		status = wait_on_bit_lock_action(&nfsi->flags,
 				NFS_INO_LAYOUTCOMMITTING,
 				nfs_wait_bit_killable,
 				TASK_KILLABLE);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 98ff061ccaf3..f05f321f9d3d 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -397,7 +397,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 	int err;
 
 	/* Stop dirtying of new pages while we sync */
-	err = wait_on_bit_lock(bitlock, NFS_INO_FLUSHING,
+	err = wait_on_bit_lock_action(bitlock, NFS_INO_FLUSHING,
 			nfs_wait_bit_killable, TASK_KILLABLE);
 	if (err)
 		goto out_err;
@@ -1475,7 +1475,7 @@ int nfs_commit_inode(struct inode *inode, int how)
 			return error;
 		if (!may_wait)
 			goto out_mark_dirty;
-		error = wait_on_bit(&NFS_I(inode)->flags,
+		error = wait_on_bit_action(&NFS_I(inode)->flags,
 				NFS_INO_COMMIT,
 				nfs_wait_bit_killable,
 				TASK_KILLABLE);
diff --git a/include/linux/wait.h b/include/linux/wait.h
index bd68819f0815..73960ff09e56 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -854,11 +854,14 @@ int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
 		(wait)->flags = 0;					\
 	} while (0)
 
+
+extern int bit_wait(void *);
+extern int bit_wait_io(void *);
+
 /**
  * wait_on_bit - wait for a bit to be cleared
  * @word: the word being waited on, a kernel virtual address
  * @bit: the bit of the word being waited on
- * @action: the function used to sleep, which may take special actions
  * @mode: the task state to sleep in
  *
  * There is a standard hashed waitqueue table for generic use. This
@@ -867,9 +870,62 @@ int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
  * call wait_on_bit() in threads waiting for the bit to clear.
  * One uses wait_on_bit() where one is waiting for the bit to clear,
  * but has no intention of setting it.
+ * Returned value will be zero if the bit was cleared, or non-zero
+ * if the process received a signal and the mode permitted wakeup
+ * on that signal.
+ */
+static inline int
+wait_on_bit(void *word, int bit, unsigned mode)
+{
+	if (!test_bit(bit, word))
+		return 0;
+	return out_of_line_wait_on_bit(word, bit,
+				       bit_wait,
+				       mode);
+}
+
+/**
+ * wait_on_bit_io - wait for a bit to be cleared
+ * @word: the word being waited on, a kernel virtual address
+ * @bit: the bit of the word being waited on
+ * @mode: the task state to sleep in
+ *
+ * Use the standard hashed waitqueue table to wait for a bit
+ * to be cleared.  This is similar to wait_on_bit(), but calls
+ * io_schedule() instead of schedule() for the actual waiting.
+ *
+ * Returned value will be zero if the bit was cleared, or non-zero
+ * if the process received a signal and the mode permitted wakeup
+ * on that signal.
+ */
+static inline int
+wait_on_bit_io(void *word, int bit, unsigned mode)
+{
+	if (!test_bit(bit, word))
+		return 0;
+	return out_of_line_wait_on_bit(word, bit,
+				       bit_wait_io,
+				       mode);
+}
+
+/**
+ * wait_on_bit_action - wait for a bit to be cleared
+ * @word: the word being waited on, a kernel virtual address
+ * @bit: the bit of the word being waited on
+ * @action: the function used to sleep, which may take special actions
+ * @mode: the task state to sleep in
+ *
+ * Use the standard hashed waitqueue table to wait for a bit
+ * to be cleared, and allow the waiting action to be specified.
+ * This is like wait_on_bit() but allows fine control of how the waiting
+ * is done.
+ *
+ * Returned value will be zero if the bit was cleared, or non-zero
+ * if the process received a signal and the mode permitted wakeup
+ * on that signal.
  */
 static inline int
-wait_on_bit(void *word, int bit, int (*action)(void *), unsigned mode)
+wait_on_bit_action(void *word, int bit, int (*action)(void *), unsigned mode)
 {
 	if (!test_bit(bit, word))
 		return 0;
@@ -880,7 +936,6 @@ wait_on_bit(void *word, int bit, int (*action)(void *), unsigned mode)
  * wait_on_bit_lock - wait for a bit to be cleared, when wanting to set it
  * @word: the word being waited on, a kernel virtual address
  * @bit: the bit of the word being waited on
- * @action: the function used to sleep, which may take special actions
  * @mode: the task state to sleep in
  *
  * There is a standard hashed waitqueue table for generic use. This
@@ -891,9 +946,61 @@ wait_on_bit(void *word, int bit, int (*action)(void *), unsigned mode)
  * wait_on_bit() in threads waiting to be able to set the bit.
  * One uses wait_on_bit_lock() where one is waiting for the bit to
  * clear with the intention of setting it, and when done, clearing it.
+ *
+ * Returns zero if the bit was (eventually) found to be clear and was
+ * set.  Returns non-zero if a signal was delivered to the process and
+ * the @mode allows that signal to wake the process.
+ */
+static inline int
+wait_on_bit_lock(void *word, int bit, unsigned mode)
+{
+	if (!test_and_set_bit(bit, word))
+		return 0;
+	return out_of_line_wait_on_bit_lock(word, bit, bit_wait, mode);
+}
+
+/**
+ * wait_on_bit_lock_io - wait for a bit to be cleared, when wanting to set it
+ * @word: the word being waited on, a kernel virtual address
+ * @bit: the bit of the word being waited on
+ * @mode: the task state to sleep in
+ *
+ * Use the standard hashed waitqueue table to wait for a bit
+ * to be cleared and then to atomically set it.  This is similar
+ * to wait_on_bit(), but calls io_schedule() instead of schedule()
+ * for the actual waiting.
+ *
+ * Returns zero if the bit was (eventually) found to be clear and was
+ * set.  Returns non-zero if a signal was delivered to the process and
+ * the @mode allows that signal to wake the process.
+ */
+static inline int
+wait_on_bit_lock_io(void *word, int bit, unsigned mode)
+{
+	if (!test_and_set_bit(bit, word))
+		return 0;
+	return out_of_line_wait_on_bit_lock(word, bit, bit_wait_io, mode);
+}
+
+/**
+ * wait_on_bit_lock_action - wait for a bit to be cleared, when wanting to set it
+ * @word: the word being waited on, a kernel virtual address
+ * @bit: the bit of the word being waited on
+ * @action: the function used to sleep, which may take special actions
+ * @mode: the task state to sleep in
+ *
+ * Use the standard hashed waitqueue table to wait for a bit
+ * to be cleared and then to set it, and allow the waiting action
+ * to be specified.
+ * This is like wait_on_bit() but allows fine control of how the waiting
+ * is done.
+ *
+ * Returns zero if the bit was (eventually) found to be clear and was
+ * set.  Returns non-zero if a signal was delivered to the process and
+ * the @mode allows that signal to wake the process.
  */
 static inline int
-wait_on_bit_lock(void *word, int bit, int (*action)(void *), unsigned mode)
+wait_on_bit_lock_action(void *word, int bit, int (*action)(void *), unsigned mode)
 {
 	if (!test_and_set_bit(bit, word))
 		return 0;
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 5777c13849ba..a219be961c0a 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -90,7 +90,6 @@ struct writeback_control {
  * fs/fs-writeback.c
  */	
 struct bdi_writeback;
-int inode_wait(void *);
 void writeback_inodes_sb(struct super_block *, enum wb_reason reason);
 void writeback_inodes_sb_nr(struct super_block *, unsigned long nr,
 							enum wb_reason reason);
@@ -105,7 +104,7 @@ void inode_wait_for_writeback(struct inode *inode);
 static inline void wait_on_inode(struct inode *inode)
 {
 	might_sleep();
-	wait_on_bit(&inode->i_state, __I_NEW, inode_wait, TASK_UNINTERRUPTIBLE);
+	wait_on_bit(&inode->i_state, __I_NEW, TASK_UNINTERRUPTIBLE);
 }
 
 /*
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index adf98622cb32..54e75226c2c4 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -28,12 +28,6 @@
 #include <linux/compat.h>
 
 
-static int ptrace_trapping_sleep_fn(void *flags)
-{
-	schedule();
-	return 0;
-}
-
 /*
  * ptrace a task: make the debugger its new parent and
  * move it to the ptrace list.
@@ -371,7 +365,7 @@ unlock_creds:
 out:
 	if (!retval) {
 		wait_on_bit(&task->jobctl, JOBCTL_TRAPPING_BIT,
-			    ptrace_trapping_sleep_fn, TASK_UNINTERRUPTIBLE);
+			    TASK_UNINTERRUPTIBLE);
 		proc_ptrace_connector(task, PTRACE_ATTACH);
 	}
 
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 0ffa20ae657b..a104879e88f2 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -502,3 +502,21 @@ void wake_up_atomic_t(atomic_t *p)
 	__wake_up_bit(atomic_t_waitqueue(p), p, WAIT_ATOMIC_T_BIT_NR);
 }
 EXPORT_SYMBOL(wake_up_atomic_t);
+
+__sched int bit_wait(void *word)
+{
+	if (signal_pending_state(current->state, current))
+		return 1;
+	schedule();
+	return 0;
+}
+EXPORT_SYMBOL(bit_wait);
+
+__sched int bit_wait_io(void *word)
+{
+	if (signal_pending_state(current->state, current))
+		return 1;
+	io_schedule();
+	return 0;
+}
+EXPORT_SYMBOL(bit_wait_io);
diff --git a/mm/filemap.c b/mm/filemap.c
index dafb06f70a09..d175917e2411 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -241,18 +241,6 @@ void delete_from_page_cache(struct page *page)
 }
 EXPORT_SYMBOL(delete_from_page_cache);
 
-static int sleep_on_page(void *word)
-{
-	io_schedule();
-	return 0;
-}
-
-static int sleep_on_page_killable(void *word)
-{
-	sleep_on_page(word);
-	return fatal_signal_pending(current) ? -EINTR : 0;
-}
-
 static int filemap_check_errors(struct address_space *mapping)
 {
 	int ret = 0;
@@ -692,7 +680,7 @@ void wait_on_page_bit(struct page *page, int bit_nr)
 	DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
 
 	if (test_bit(bit_nr, &page->flags))
-		__wait_on_bit(page_waitqueue(page), &wait, sleep_on_page,
+		__wait_on_bit(page_waitqueue(page), &wait, bit_wait_io,
 							TASK_UNINTERRUPTIBLE);
 }
 EXPORT_SYMBOL(wait_on_page_bit);
@@ -705,7 +693,7 @@ int wait_on_page_bit_killable(struct page *page, int bit_nr)
 		return 0;
 
 	return __wait_on_bit(page_waitqueue(page), &wait,
-			     sleep_on_page_killable, TASK_KILLABLE);
+			     bit_wait_io, TASK_KILLABLE);
 }
 
 /**
@@ -806,7 +794,7 @@ void __lock_page(struct page *page)
 {
 	DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
 
-	__wait_on_bit_lock(page_waitqueue(page), &wait, sleep_on_page,
+	__wait_on_bit_lock(page_waitqueue(page), &wait, bit_wait_io,
 							TASK_UNINTERRUPTIBLE);
 }
 EXPORT_SYMBOL(__lock_page);
@@ -816,7 +804,7 @@ int __lock_page_killable(struct page *page)
 	DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
 
 	return __wait_on_bit_lock(page_waitqueue(page), &wait,
-					sleep_on_page_killable, TASK_KILLABLE);
+					bit_wait_io, TASK_KILLABLE);
 }
 EXPORT_SYMBOL_GPL(__lock_page_killable);
 
diff --git a/mm/ksm.c b/mm/ksm.c
index 346ddc9e4c0d..fb7590222706 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1978,18 +1978,12 @@ void ksm_migrate_page(struct page *newpage, struct page *oldpage)
 #endif /* CONFIG_MIGRATION */
 
 #ifdef CONFIG_MEMORY_HOTREMOVE
-static int just_wait(void *word)
-{
-	schedule();
-	return 0;
-}
-
 static void wait_while_offlining(void)
 {
 	while (ksm_run & KSM_RUN_OFFLINE) {
 		mutex_unlock(&ksm_thread_mutex);
 		wait_on_bit(&ksm_run, ilog2(KSM_RUN_OFFLINE),
-				just_wait, TASK_UNINTERRUPTIBLE);
+			    TASK_UNINTERRUPTIBLE);
 		mutex_lock(&ksm_thread_mutex);
 	}
 }
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 0a43cce9a914..e090bffe1bf8 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -2186,12 +2186,6 @@ static void hci_inq_req(struct hci_request *req, unsigned long opt)
 	hci_req_add(req, HCI_OP_INQUIRY, sizeof(cp), &cp);
 }
 
-static int wait_inquiry(void *word)
-{
-	schedule();
-	return signal_pending(current);
-}
-
 int hci_inquiry(void __user *arg)
 {
 	__u8 __user *ptr = arg;
@@ -2242,7 +2236,7 @@ int hci_inquiry(void __user *arg)
 		/* Wait until Inquiry procedure finishes (HCI_INQUIRY flag is
 		 * cleared). If it is interrupted by a signal, return -EINTR.
 		 */
-		if (wait_on_bit(&hdev->flags, HCI_INQUIRY, wait_inquiry,
+		if (wait_on_bit(&hdev->flags, HCI_INQUIRY,
 				TASK_INTERRUPTIBLE))
 			return -EINTR;
 	}
diff --git a/security/keys/gc.c b/security/keys/gc.c
index d3222b6d7d59..9609a7f0faea 100644
--- a/security/keys/gc.c
+++ b/security/keys/gc.c
@@ -91,15 +91,6 @@ static void key_gc_timer_func(unsigned long data)
 	key_schedule_gc_links();
 }
 
-/*
- * wait_on_bit() sleep function for uninterruptible waiting
- */
-static int key_gc_wait_bit(void *flags)
-{
-	schedule();
-	return 0;
-}
-
 /*
  * Reap keys of dead type.
  *
@@ -123,7 +114,7 @@ void key_gc_keytype(struct key_type *ktype)
 	schedule_work(&key_gc_work);
 
 	kdebug("sleep");
-	wait_on_bit(&key_gc_flags, KEY_GC_REAPING_KEYTYPE, key_gc_wait_bit,
+	wait_on_bit(&key_gc_flags, KEY_GC_REAPING_KEYTYPE,
 		    TASK_UNINTERRUPTIBLE);
 
 	key_gc_dead_keytype = NULL;
diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index 381411941cc1..26a94f18af94 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -21,24 +21,6 @@
 
 #define key_negative_timeout	60	/* default timeout on a negative key's existence */
 
-/*
- * wait_on_bit() sleep function for uninterruptible waiting
- */
-static int key_wait_bit(void *flags)
-{
-	schedule();
-	return 0;
-}
-
-/*
- * wait_on_bit() sleep function for interruptible waiting
- */
-static int key_wait_bit_intr(void *flags)
-{
-	schedule();
-	return signal_pending(current) ? -ERESTARTSYS : 0;
-}
-
 /**
  * complete_request_key - Complete the construction of a key.
  * @cons: The key construction record.
@@ -592,10 +574,9 @@ int wait_for_key_construction(struct key *key, bool intr)
 	int ret;
 
 	ret = wait_on_bit(&key->flags, KEY_FLAG_USER_CONSTRUCT,
-			  intr ? key_wait_bit_intr : key_wait_bit,
 			  intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
-	if (ret < 0)
-		return ret;
+	if (ret)
+		return -ERESTARTSYS;
 	if (test_bit(KEY_FLAG_NEGATIVE, &key->flags)) {
 		smp_rmb();
 		return key->type_data.reject_error;
-- 
cgit v1.2.3-59-g8ed1b


From c1221321b7c25b53204447cff9949a6d5a7ddddc Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 7 Jul 2014 15:16:04 +1000
Subject: sched: Allow wait_on_bit_action() functions to support a timeout

It is currently not possible for various wait_on_bit functions
to implement a timeout.

While the "action" function that is called to do the waiting
could certainly use schedule_timeout(), there is no way to carry
forward the remaining timeout after a false wake-up.
As false-wakeups a clearly possible at least due to possible
hash collisions in bit_waitqueue(), this is a real problem.

The 'action' function is currently passed a pointer to the word
containing the bit being waited on.  No current action functions
use this pointer.  So changing it to something else will be a
little noisy but will have no immediate effect.

This patch changes the 'action' function to take a pointer to
the "struct wait_bit_key", which contains a pointer to the word
containing the bit so nothing is really lost.

It also adds a 'private' field to "struct wait_bit_key", which
is initialized to zero.

An action function can now implement a timeout with something
like

static int timed_out_waiter(struct wait_bit_key *key)
{
	unsigned long waited;
	if (key->private == 0) {
		key->private = jiffies;
		if (key->private == 0)
			key->private -= 1;
	}
	waited = jiffies - key->private;
	if (waited > 10 * HZ)
		return -EAGAIN;
	schedule_timeout(waited - 10 * HZ);
	return 0;
}

If any other need for context in a waiter were found it would be
easy to use ->private for some other purpose, or even extend
"struct wait_bit_key".

My particular need is to support timeouts in nfs_release_page()
to avoid deadlocks with loopback mounted NFS.

While wait_on_bit_timeout() would be a cleaner interface, it
will not meet my need.  I need the timeout to be sensitive to
the state of the connection with the server, which could change.
 So I need to use an 'action' interface.

Signed-off-by: NeilBrown <neilb@suse.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Steve French <sfrench@samba.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Steven Whitehouse <swhiteho@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/20140707051604.28027.41257.stgit@notabene.brown
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 fs/cifs/inode.c              |  2 +-
 fs/nfs/inode.c               |  2 +-
 fs/nfs/internal.h            |  2 +-
 fs/nfs/pagelist.c            |  2 +-
 include/linux/sunrpc/sched.h |  2 +-
 include/linux/wait.h         | 18 ++++++++++--------
 kernel/sched/wait.c          | 16 ++++++++--------
 net/sunrpc/sched.c           |  4 ++--
 8 files changed, 25 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 213c4580b4e3..41de3935caa0 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1780,7 +1780,7 @@ cifs_invalidate_mapping(struct inode *inode)
  * @word: long word containing the bit lock
  */
 static int
-cifs_wait_bit_killable(void *word)
+cifs_wait_bit_killable(struct wait_bit_key *key)
 {
 	if (fatal_signal_pending(current))
 		return -ERESTARTSYS;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index b7b710e7d08e..abd37a380535 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -75,7 +75,7 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
  * nfs_wait_bit_killable - helper for functions that are sleeping on bit locks
  * @word: long word containing the bit lock
  */
-int nfs_wait_bit_killable(void *word)
+int nfs_wait_bit_killable(struct wait_bit_key *key)
 {
 	if (fatal_signal_pending(current))
 		return -ERESTARTSYS;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 82ddbf46660e..e0193d63630c 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -347,7 +347,7 @@ extern int nfs_drop_inode(struct inode *);
 extern void nfs_clear_inode(struct inode *);
 extern void nfs_evict_inode(struct inode *);
 void nfs_zap_acl_cache(struct inode *inode);
-extern int nfs_wait_bit_killable(void *word);
+extern int nfs_wait_bit_killable(struct wait_bit_key *key);
 
 /* super.c */
 extern const struct super_operations nfs_sops;
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 6104d3500b49..745a612dbe22 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -117,7 +117,7 @@ __nfs_iocounter_wait(struct nfs_io_counter *c)
 		set_bit(NFS_IO_INPROGRESS, &c->flags);
 		if (atomic_read(&c->io_count) == 0)
 			break;
-		ret = nfs_wait_bit_killable(&c->flags);
+		ret = nfs_wait_bit_killable(&q.key);
 	} while (atomic_read(&c->io_count) != 0);
 	finish_wait(wq, &q.wait);
 	return ret;
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index ad7dbe2cfecd..1a8959944c5f 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -236,7 +236,7 @@ void *		rpc_malloc(struct rpc_task *, size_t);
 void		rpc_free(void *);
 int		rpciod_up(void);
 void		rpciod_down(void);
-int		__rpc_wait_for_completion_task(struct rpc_task *task, int (*)(void *));
+int		__rpc_wait_for_completion_task(struct rpc_task *task, wait_bit_action_f *);
 #ifdef RPC_DEBUG
 struct net;
 void		rpc_show_tasks(struct net *);
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 73960ff09e56..6fb1ba5f9b2f 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -25,6 +25,7 @@ struct wait_bit_key {
 	void			*flags;
 	int			bit_nr;
 #define WAIT_ATOMIC_T_BIT_NR	-1
+	unsigned long		private;
 };
 
 struct wait_bit_queue {
@@ -141,18 +142,19 @@ __remove_wait_queue(wait_queue_head_t *head, wait_queue_t *old)
 	list_del(&old->task_list);
 }
 
+typedef int wait_bit_action_f(struct wait_bit_key *);
 void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
 void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key);
 void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
 void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr);
 void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr);
 void __wake_up_bit(wait_queue_head_t *, void *, int);
-int __wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned);
-int __wait_on_bit_lock(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned);
+int __wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *, wait_bit_action_f *, unsigned);
+int __wait_on_bit_lock(wait_queue_head_t *, struct wait_bit_queue *, wait_bit_action_f *, unsigned);
 void wake_up_bit(void *, int);
 void wake_up_atomic_t(atomic_t *);
-int out_of_line_wait_on_bit(void *, int, int (*)(void *), unsigned);
-int out_of_line_wait_on_bit_lock(void *, int, int (*)(void *), unsigned);
+int out_of_line_wait_on_bit(void *, int, wait_bit_action_f *, unsigned);
+int out_of_line_wait_on_bit_lock(void *, int, wait_bit_action_f *, unsigned);
 int out_of_line_wait_on_atomic_t(atomic_t *, int (*)(atomic_t *), unsigned);
 wait_queue_head_t *bit_waitqueue(void *, int);
 
@@ -855,8 +857,8 @@ int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
 	} while (0)
 
 
-extern int bit_wait(void *);
-extern int bit_wait_io(void *);
+extern int bit_wait(struct wait_bit_key *);
+extern int bit_wait_io(struct wait_bit_key *);
 
 /**
  * wait_on_bit - wait for a bit to be cleared
@@ -925,7 +927,7 @@ wait_on_bit_io(void *word, int bit, unsigned mode)
  * on that signal.
  */
 static inline int
-wait_on_bit_action(void *word, int bit, int (*action)(void *), unsigned mode)
+wait_on_bit_action(void *word, int bit, wait_bit_action_f *action, unsigned mode)
 {
 	if (!test_bit(bit, word))
 		return 0;
@@ -1000,7 +1002,7 @@ wait_on_bit_lock_io(void *word, int bit, unsigned mode)
  * the @mode allows that signal to wake the process.
  */
 static inline int
-wait_on_bit_lock_action(void *word, int bit, int (*action)(void *), unsigned mode)
+wait_on_bit_lock_action(void *word, int bit, wait_bit_action_f *action, unsigned mode)
 {
 	if (!test_and_set_bit(bit, word))
 		return 0;
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index a104879e88f2..15cab1a4f84e 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -319,14 +319,14 @@ EXPORT_SYMBOL(wake_bit_function);
  */
 int __sched
 __wait_on_bit(wait_queue_head_t *wq, struct wait_bit_queue *q,
-			int (*action)(void *), unsigned mode)
+	      wait_bit_action_f *action, unsigned mode)
 {
 	int ret = 0;
 
 	do {
 		prepare_to_wait(wq, &q->wait, mode);
 		if (test_bit(q->key.bit_nr, q->key.flags))
-			ret = (*action)(q->key.flags);
+			ret = (*action)(&q->key);
 	} while (test_bit(q->key.bit_nr, q->key.flags) && !ret);
 	finish_wait(wq, &q->wait);
 	return ret;
@@ -334,7 +334,7 @@ __wait_on_bit(wait_queue_head_t *wq, struct wait_bit_queue *q,
 EXPORT_SYMBOL(__wait_on_bit);
 
 int __sched out_of_line_wait_on_bit(void *word, int bit,
-					int (*action)(void *), unsigned mode)
+				    wait_bit_action_f *action, unsigned mode)
 {
 	wait_queue_head_t *wq = bit_waitqueue(word, bit);
 	DEFINE_WAIT_BIT(wait, word, bit);
@@ -345,7 +345,7 @@ EXPORT_SYMBOL(out_of_line_wait_on_bit);
 
 int __sched
 __wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q,
-			int (*action)(void *), unsigned mode)
+			wait_bit_action_f *action, unsigned mode)
 {
 	do {
 		int ret;
@@ -353,7 +353,7 @@ __wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q,
 		prepare_to_wait_exclusive(wq, &q->wait, mode);
 		if (!test_bit(q->key.bit_nr, q->key.flags))
 			continue;
-		ret = action(q->key.flags);
+		ret = action(&q->key);
 		if (!ret)
 			continue;
 		abort_exclusive_wait(wq, &q->wait, mode, &q->key);
@@ -365,7 +365,7 @@ __wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q,
 EXPORT_SYMBOL(__wait_on_bit_lock);
 
 int __sched out_of_line_wait_on_bit_lock(void *word, int bit,
-					int (*action)(void *), unsigned mode)
+					 wait_bit_action_f *action, unsigned mode)
 {
 	wait_queue_head_t *wq = bit_waitqueue(word, bit);
 	DEFINE_WAIT_BIT(wait, word, bit);
@@ -503,7 +503,7 @@ void wake_up_atomic_t(atomic_t *p)
 }
 EXPORT_SYMBOL(wake_up_atomic_t);
 
-__sched int bit_wait(void *word)
+__sched int bit_wait(struct wait_bit_key *word)
 {
 	if (signal_pending_state(current->state, current))
 		return 1;
@@ -512,7 +512,7 @@ __sched int bit_wait(void *word)
 }
 EXPORT_SYMBOL(bit_wait);
 
-__sched int bit_wait_io(void *word)
+__sched int bit_wait_io(struct wait_bit_key *word)
 {
 	if (signal_pending_state(current->state, current))
 		return 1;
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index c0365c14b858..9358c79fd589 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -250,7 +250,7 @@ void rpc_destroy_wait_queue(struct rpc_wait_queue *queue)
 }
 EXPORT_SYMBOL_GPL(rpc_destroy_wait_queue);
 
-static int rpc_wait_bit_killable(void *word)
+static int rpc_wait_bit_killable(struct wait_bit_key *key)
 {
 	if (fatal_signal_pending(current))
 		return -ERESTARTSYS;
@@ -309,7 +309,7 @@ static int rpc_complete_task(struct rpc_task *task)
  * to enforce taking of the wq->lock and hence avoid races with
  * rpc_complete_task().
  */
-int __rpc_wait_for_completion_task(struct rpc_task *task, int (*action)(void *))
+int __rpc_wait_for_completion_task(struct rpc_task *task, wait_bit_action_f *action)
 {
 	if (action == NULL)
 		action = rpc_wait_bit_killable;
-- 
cgit v1.2.3-59-g8ed1b


From 646d7043adf3d92de5d3db1244a82a12628303de Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Fri, 11 Jul 2014 14:39:10 -0400
Subject: ftrace: Allow archs to specify if they need a separate function graph
 trampoline

Currently if an arch supports function graph tracing, the core code will
just assign the function graph trampoline to the function graph addr that
gets called.

But as the old method for function graph tracing always calls the function
trampoline first and that calls the function graph trampoline, some
archs may have the function graph trampoline dependent on operations that
were done in the function trampoline. This causes function graph tracer
to break on those archs.

Instead of having the default be to set the function graph ftrace_ops
to the function graph trampoline, have it instead just set it to zero
which will keep it from jumping to a trampoline that is not set up
to be jumped directly too.

Link: http://lkml.kernel.org/r/53BED155.9040607@nvidia.com

Reported-by: Tuomas Tynkkynen <ttynkkynen@nvidia.com>
Tested-by: Tuomas Tynkkynen <ttynkkynen@nvidia.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 10 ++++++++++
 kernel/trace/ftrace.c  |  6 ++++--
 2 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 11e18fd58b1a..4807a39e7ae1 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -453,6 +453,16 @@ void ftrace_modify_all_code(int command);
 #endif
 #endif
 
+/*
+ * If an arch would like functions that are only traced
+ * by the function graph tracer to jump directly to its own
+ * trampoline, then they can define FTRACE_GRAPH_TRAMP_ADDR
+ * to be that address to jump to.
+ */
+#ifndef FTRACE_GRAPH_TRAMP_ADDR
+#define FTRACE_GRAPH_TRAMP_ADDR ((unsigned long) 0)
+#endif
+
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 extern void ftrace_graph_caller(void);
 extern int ftrace_enable_ftrace_graph_caller(void);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 45aac1a742c5..1776153ea6e0 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5366,7 +5366,8 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 	/* Optimize function graph calling (if implemented by arch) */
-	global_ops.trampoline = FTRACE_GRAPH_ADDR;
+	if (FTRACE_GRAPH_TRAMP_ADDR != 0)
+		global_ops.trampoline = FTRACE_GRAPH_TRAMP_ADDR;
 #endif
 
 	ret = ftrace_startup(&global_ops, FTRACE_START_FUNC_RET);
@@ -5390,7 +5391,8 @@ void unregister_ftrace_graph(void)
 	ftrace_shutdown(&global_ops, FTRACE_STOP_FUNC_RET);
 	global_ops.flags &= ~FTRACE_OPS_FL_STUB;
 #ifdef CONFIG_DYNAMIC_FTRACE
-	global_ops.trampoline = 0;
+	if (FTRACE_GRAPH_TRAMP_ADDR != 0)
+		global_ops.trampoline = 0;
 #endif
 	unregister_pm_notifier(&ftrace_suspend_notifier);
 	unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
-- 
cgit v1.2.3-59-g8ed1b


From 8ecef00fe1f33658ee36e902dba6850b51312073 Mon Sep 17 00:00:00 2001
From: Ulrich Hecht <ulrich.hecht@gmail.com>
Date: Thu, 10 Jul 2014 09:53:59 +0200
Subject: usb: renesas_usbhs: add R-Car Gen. 2 init and power control

In preparation for DT conversion to reduce reliance on platform device
callbacks.

Signed-off-by: Ulrich Hecht <ulrich.hecht+renesas@gmail.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/renesas_usbhs/Makefile |  2 +-
 drivers/usb/renesas_usbhs/common.c | 66 +++++++++++++++++++++++++++++---
 drivers/usb/renesas_usbhs/common.h |  2 +
 drivers/usb/renesas_usbhs/rcar2.c  | 77 ++++++++++++++++++++++++++++++++++++++
 drivers/usb/renesas_usbhs/rcar2.h  |  4 ++
 include/linux/usb/renesas_usbhs.h  |  6 +++
 6 files changed, 151 insertions(+), 6 deletions(-)
 create mode 100644 drivers/usb/renesas_usbhs/rcar2.c
 create mode 100644 drivers/usb/renesas_usbhs/rcar2.h

(limited to 'include/linux')

diff --git a/drivers/usb/renesas_usbhs/Makefile b/drivers/usb/renesas_usbhs/Makefile
index bc8aef4311a1..9e47f477b6d2 100644
--- a/drivers/usb/renesas_usbhs/Makefile
+++ b/drivers/usb/renesas_usbhs/Makefile
@@ -4,7 +4,7 @@
 
 obj-$(CONFIG_USB_RENESAS_USBHS)	+= renesas_usbhs.o
 
-renesas_usbhs-y			:= common.o mod.o pipe.o fifo.o
+renesas_usbhs-y			:= common.o mod.o pipe.o fifo.o rcar2.o
 
 ifneq ($(CONFIG_USB_RENESAS_USBHS_HCD),)
 	renesas_usbhs-y		+= mod_host.o
diff --git a/drivers/usb/renesas_usbhs/common.c b/drivers/usb/renesas_usbhs/common.c
index 17267b0a2e95..1b9bf8d83235 100644
--- a/drivers/usb/renesas_usbhs/common.c
+++ b/drivers/usb/renesas_usbhs/common.c
@@ -15,12 +15,14 @@
  *
  */
 #include <linux/err.h>
+#include <linux/gpio.h>
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/pm_runtime.h>
 #include <linux/slab.h>
 #include <linux/sysfs.h>
 #include "common.h"
+#include "rcar2.h"
 
 /*
  *		image of renesas_usbhs
@@ -284,6 +286,8 @@ static void usbhsc_set_buswait(struct usbhs_priv *priv)
 /*
  *		platform default param
  */
+
+/* commonly used on old SH-Mobile SoCs */
 static u32 usbhsc_default_pipe_type[] = {
 		USB_ENDPOINT_XFER_CONTROL,
 		USB_ENDPOINT_XFER_ISOC,
@@ -297,6 +301,26 @@ static u32 usbhsc_default_pipe_type[] = {
 		USB_ENDPOINT_XFER_INT,
 };
 
+/* commonly used on newer SH-Mobile and R-Car SoCs */
+static u32 usbhsc_new_pipe_type[] = {
+		USB_ENDPOINT_XFER_CONTROL,
+		USB_ENDPOINT_XFER_ISOC,
+		USB_ENDPOINT_XFER_ISOC,
+		USB_ENDPOINT_XFER_BULK,
+		USB_ENDPOINT_XFER_BULK,
+		USB_ENDPOINT_XFER_BULK,
+		USB_ENDPOINT_XFER_INT,
+		USB_ENDPOINT_XFER_INT,
+		USB_ENDPOINT_XFER_INT,
+		USB_ENDPOINT_XFER_BULK,
+		USB_ENDPOINT_XFER_BULK,
+		USB_ENDPOINT_XFER_BULK,
+		USB_ENDPOINT_XFER_BULK,
+		USB_ENDPOINT_XFER_BULK,
+		USB_ENDPOINT_XFER_BULK,
+		USB_ENDPOINT_XFER_BULK,
+};
+
 /*
  *		power control
  */
@@ -423,8 +447,7 @@ static int usbhs_probe(struct platform_device *pdev)
 	int ret;
 
 	/* check platform information */
-	if (!info ||
-	    !info->platform_callback.get_id) {
+	if (!info) {
 		dev_err(&pdev->dev, "no platform information\n");
 		return -EINVAL;
 	}
@@ -451,13 +474,32 @@ static int usbhs_probe(struct platform_device *pdev)
 	/*
 	 * care platform info
 	 */
-	memcpy(&priv->pfunc,
-	       &info->platform_callback,
-	       sizeof(struct renesas_usbhs_platform_callback));
+
 	memcpy(&priv->dparam,
 	       &info->driver_param,
 	       sizeof(struct renesas_usbhs_driver_param));
 
+	switch (priv->dparam.type) {
+	case USBHS_TYPE_R8A7790:
+	case USBHS_TYPE_R8A7791:
+		priv->pfunc = usbhs_rcar2_ops;
+		if (!priv->dparam.pipe_type) {
+			priv->dparam.pipe_type = usbhsc_new_pipe_type;
+			priv->dparam.pipe_size =
+				ARRAY_SIZE(usbhsc_new_pipe_type);
+		}
+		break;
+	default:
+		if (!info->platform_callback.get_id) {
+			dev_err(&pdev->dev, "no platform callbacks");
+			return -EINVAL;
+		}
+		memcpy(&priv->pfunc,
+		       &info->platform_callback,
+		       sizeof(struct renesas_usbhs_platform_callback));
+		break;
+	}
+
 	/* set driver callback functions for platform */
 	dfunc			= &info->driver_callback;
 	dfunc->notify_hotplug	= usbhsc_drvcllbck_notify_hotplug;
@@ -507,6 +549,20 @@ static int usbhs_probe(struct platform_device *pdev)
 	 */
 	usbhs_sys_clock_ctrl(priv, 0);
 
+	/* check GPIO determining if USB function should be enabled */
+	if (priv->dparam.enable_gpio) {
+		gpio_request_one(priv->dparam.enable_gpio, GPIOF_IN, NULL);
+		ret = !gpio_get_value(priv->dparam.enable_gpio);
+		gpio_free(priv->dparam.enable_gpio);
+		if (ret) {
+			dev_warn(&pdev->dev,
+				 "USB function not selected (GPIO %d)\n",
+				 priv->dparam.enable_gpio);
+			ret = -ENOTSUPP;
+			goto probe_end_mod_exit;
+		}
+	}
+
 	/*
 	 * platform call
 	 *
diff --git a/drivers/usb/renesas_usbhs/common.h b/drivers/usb/renesas_usbhs/common.h
index c69dd2fba360..a7996da6a1bd 100644
--- a/drivers/usb/renesas_usbhs/common.h
+++ b/drivers/usb/renesas_usbhs/common.h
@@ -268,6 +268,8 @@ struct usbhs_priv {
 	 * fifo control
 	 */
 	struct usbhs_fifo_info fifo_info;
+
+	struct usb_phy *phy;
 };
 
 /*
diff --git a/drivers/usb/renesas_usbhs/rcar2.c b/drivers/usb/renesas_usbhs/rcar2.c
new file mode 100644
index 000000000000..e6b9dcc1c289
--- /dev/null
+++ b/drivers/usb/renesas_usbhs/rcar2.c
@@ -0,0 +1,77 @@
+/*
+ * Renesas USB driver R-Car Gen. 2 initialization and power control
+ *
+ * Copyright (C) 2014 Ulrich Hecht
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/gpio.h>
+#include <linux/of_gpio.h>
+#include <linux/platform_data/gpio-rcar.h>
+#include <linux/usb/phy.h>
+#include "common.h"
+#include "rcar2.h"
+
+static int usbhs_rcar2_hardware_init(struct platform_device *pdev)
+{
+	struct usbhs_priv *priv = usbhs_pdev_to_priv(pdev);
+	struct usb_phy *phy;
+
+	phy = usb_get_phy_dev(&pdev->dev, 0);
+	if (IS_ERR(phy))
+		return PTR_ERR(phy);
+
+	priv->phy = phy;
+	return 0;
+}
+
+static int usbhs_rcar2_hardware_exit(struct platform_device *pdev)
+{
+	struct usbhs_priv *priv = usbhs_pdev_to_priv(pdev);
+
+	if (!priv->phy)
+		return 0;
+
+	usb_put_phy(priv->phy);
+	priv->phy = NULL;
+
+	return 0;
+}
+
+static int usbhs_rcar2_power_ctrl(struct platform_device *pdev,
+				void __iomem *base, int enable)
+{
+	struct usbhs_priv *priv = usbhs_pdev_to_priv(pdev);
+
+	if (!priv->phy)
+		return -ENODEV;
+
+	if (enable) {
+		int retval = usb_phy_init(priv->phy);
+
+		if (!retval)
+			retval = usb_phy_set_suspend(priv->phy, 0);
+		return retval;
+	}
+
+	usb_phy_set_suspend(priv->phy, 1);
+	usb_phy_shutdown(priv->phy);
+	return 0;
+}
+
+static int usbhs_rcar2_get_id(struct platform_device *pdev)
+{
+	return USBHS_GADGET;
+}
+
+const struct renesas_usbhs_platform_callback usbhs_rcar2_ops = {
+	.hardware_init = usbhs_rcar2_hardware_init,
+	.hardware_exit = usbhs_rcar2_hardware_exit,
+	.power_ctrl = usbhs_rcar2_power_ctrl,
+	.get_id = usbhs_rcar2_get_id,
+};
diff --git a/drivers/usb/renesas_usbhs/rcar2.h b/drivers/usb/renesas_usbhs/rcar2.h
new file mode 100644
index 000000000000..f07f10d9b3b2
--- /dev/null
+++ b/drivers/usb/renesas_usbhs/rcar2.h
@@ -0,0 +1,4 @@
+#include "common.h"
+
+extern const struct renesas_usbhs_platform_callback
+	usbhs_rcar2_ops;
diff --git a/include/linux/usb/renesas_usbhs.h b/include/linux/usb/renesas_usbhs.h
index e452ba6ec6bd..d5952bb66752 100644
--- a/include/linux/usb/renesas_usbhs.h
+++ b/include/linux/usb/renesas_usbhs.h
@@ -153,6 +153,9 @@ struct renesas_usbhs_driver_param {
 	 */
 	int pio_dma_border; /* default is 64byte */
 
+	u32 type;
+	u32 enable_gpio;
+
 	/*
 	 * option:
 	 */
@@ -160,6 +163,9 @@ struct renesas_usbhs_driver_param {
 	u32 has_sudmac:1; /* for SUDMAC */
 };
 
+#define USBHS_TYPE_R8A7790 1
+#define USBHS_TYPE_R8A7791 2
+
 /*
  * option:
  *
-- 
cgit v1.2.3-59-g8ed1b


From 8fe8bc7773303e3c49be348c3180bc9785104dfc Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Wed, 9 Jul 2014 16:25:23 +0200
Subject: i2c: s6000: remove duplicate driver

It turned out that the s6000 simply has a designware IP core and should
use the designated driver for it which is way more maintained and
feature complete. There are currently no users in tree, and not even a
toolchain for s6000 seems to be available. So, simply remove this
duplicate. If someone needs assistance in converting to the designware
driver, the i2c list will be there to help.

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/Kconfig     |  10 -
 drivers/i2c/busses/Makefile    |   1 -
 drivers/i2c/busses/i2c-s6000.c | 404 -----------------------------------------
 drivers/i2c/busses/i2c-s6000.h |  79 --------
 include/linux/i2c/s6000.h      |  10 -
 5 files changed, 504 deletions(-)
 delete mode 100644 drivers/i2c/busses/i2c-s6000.c
 delete mode 100644 drivers/i2c/busses/i2c-s6000.h
 delete mode 100644 include/linux/i2c/s6000.h

(limited to 'include/linux')

diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index 9f7d5859cf65..d25dd120c011 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -700,16 +700,6 @@ config I2C_S3C2410
 	  Say Y here to include support for I2C controller in the
 	  Samsung SoCs.
 
-config I2C_S6000
-	tristate "S6000 I2C support"
-	depends on XTENSA_VARIANT_S6000
-	help
-	  This driver supports the on chip I2C device on the
-	  S6000 xtensa processor family.
-
-	  To compile this driver as a module, choose M here. The module
-	  will be called i2c-s6000.
-
 config I2C_SH7760
 	tristate "Renesas SH7760 I2C Controller"
 	depends on CPU_SUBTYPE_SH7760
diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
index dd9a7f8e873f..1958b490105e 100644
--- a/drivers/i2c/busses/Makefile
+++ b/drivers/i2c/busses/Makefile
@@ -68,7 +68,6 @@ obj-$(CONFIG_I2C_QUP)		+= i2c-qup.o
 obj-$(CONFIG_I2C_RIIC)		+= i2c-riic.o
 obj-$(CONFIG_I2C_RK3X)		+= i2c-rk3x.o
 obj-$(CONFIG_I2C_S3C2410)	+= i2c-s3c2410.o
-obj-$(CONFIG_I2C_S6000)		+= i2c-s6000.o
 obj-$(CONFIG_I2C_SH7760)	+= i2c-sh7760.o
 obj-$(CONFIG_I2C_SH_MOBILE)	+= i2c-sh_mobile.o
 obj-$(CONFIG_I2C_SIMTEC)	+= i2c-simtec.o
diff --git a/drivers/i2c/busses/i2c-s6000.c b/drivers/i2c/busses/i2c-s6000.c
deleted file mode 100644
index dd186a037684..000000000000
--- a/drivers/i2c/busses/i2c-s6000.c
+++ /dev/null
@@ -1,404 +0,0 @@
-/*
- * drivers/i2c/busses/i2c-s6000.c
- *
- * Description: Driver for S6000 Family I2C Interface
- * Copyright (c) 2008 emlix GmbH
- * Author:	Oskar Schirmer <oskar@scara.com>
- *
- * Partially based on i2c-bfin-twi.c driver by <sonic.zhang@analog.com>
- * Copyright (c) 2005-2007 Analog Devices, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <linux/clk.h>
-#include <linux/err.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/i2c.h>
-#include <linux/i2c/s6000.h>
-#include <linux/timer.h>
-#include <linux/spinlock.h>
-#include <linux/completion.h>
-#include <linux/interrupt.h>
-#include <linux/platform_device.h>
-#include <linux/io.h>
-
-#include "i2c-s6000.h"
-
-#define DRV_NAME "i2c-s6000"
-
-#define POLL_TIMEOUT	(2 * HZ)
-
-struct s6i2c_if {
-	u8 __iomem		*reg; /* memory mapped registers */
-	int			irq;
-	spinlock_t		lock;
-	struct i2c_msg		*msgs; /* messages currently handled */
-	int			msgs_num; /* nb of msgs to do */
-	int			msgs_push; /* nb of msgs read/written */
-	int			msgs_done; /* nb of msgs finally handled */
-	unsigned		push; /* nb of bytes read/written in msg */
-	unsigned		done; /* nb of bytes finally handled */
-	int			timeout_count; /* timeout retries left */
-	struct timer_list	timeout_timer;
-	struct i2c_adapter	adap;
-	struct completion	complete;
-	struct clk		*clk;
-	struct resource		*res;
-};
-
-static inline u16 i2c_rd16(struct s6i2c_if *iface, unsigned n)
-{
-	return readw(iface->reg + (n));
-}
-
-static inline void i2c_wr16(struct s6i2c_if *iface, unsigned n, u16 v)
-{
-	writew(v, iface->reg + (n));
-}
-
-static inline u32 i2c_rd32(struct s6i2c_if *iface, unsigned n)
-{
-	return readl(iface->reg + (n));
-}
-
-static inline void i2c_wr32(struct s6i2c_if *iface, unsigned n, u32 v)
-{
-	writel(v, iface->reg + (n));
-}
-
-static struct s6i2c_if s6i2c_if;
-
-static void s6i2c_handle_interrupt(struct s6i2c_if *iface)
-{
-	if (i2c_rd16(iface, S6_I2C_INTRSTAT) & (1 << S6_I2C_INTR_TXABRT)) {
-		i2c_rd16(iface, S6_I2C_CLRTXABRT);
-		i2c_wr16(iface, S6_I2C_INTRMASK, 0);
-		complete(&iface->complete);
-		return;
-	}
-	if (iface->msgs_done >= iface->msgs_num) {
-		dev_err(&iface->adap.dev, "s6i2c: spurious I2C irq: %04x\n",
-			i2c_rd16(iface, S6_I2C_INTRSTAT));
-		i2c_wr16(iface, S6_I2C_INTRMASK, 0);
-		return;
-	}
-	while ((iface->msgs_push < iface->msgs_num)
-	    && (i2c_rd16(iface, S6_I2C_STATUS) & (1 << S6_I2C_STATUS_TFNF))) {
-		struct i2c_msg *m = &iface->msgs[iface->msgs_push];
-		if (!(m->flags & I2C_M_RD))
-			i2c_wr16(iface, S6_I2C_DATACMD, m->buf[iface->push]);
-		else
-			i2c_wr16(iface, S6_I2C_DATACMD,
-				 1 << S6_I2C_DATACMD_READ);
-		if (++iface->push >= m->len) {
-			iface->push = 0;
-			iface->msgs_push += 1;
-		}
-	}
-	do {
-		struct i2c_msg *m = &iface->msgs[iface->msgs_done];
-		if (!(m->flags & I2C_M_RD)) {
-			if (iface->msgs_done < iface->msgs_push)
-				iface->msgs_done += 1;
-			else
-				break;
-		} else if (i2c_rd16(iface, S6_I2C_STATUS)
-				& (1 << S6_I2C_STATUS_RFNE)) {
-			m->buf[iface->done] = i2c_rd16(iface, S6_I2C_DATACMD);
-			if (++iface->done >= m->len) {
-				iface->done = 0;
-				iface->msgs_done += 1;
-			}
-		} else{
-			break;
-		}
-	} while (iface->msgs_done < iface->msgs_num);
-	if (iface->msgs_done >= iface->msgs_num) {
-		i2c_wr16(iface, S6_I2C_INTRMASK, 1 << S6_I2C_INTR_TXABRT);
-		complete(&iface->complete);
-	} else if (iface->msgs_push >= iface->msgs_num) {
-		i2c_wr16(iface, S6_I2C_INTRMASK, (1 << S6_I2C_INTR_TXABRT) |
-						 (1 << S6_I2C_INTR_RXFULL));
-	} else {
-		i2c_wr16(iface, S6_I2C_INTRMASK, (1 << S6_I2C_INTR_TXABRT) |
-						 (1 << S6_I2C_INTR_TXEMPTY) |
-						 (1 << S6_I2C_INTR_RXFULL));
-	}
-}
-
-static irqreturn_t s6i2c_interrupt_entry(int irq, void *dev_id)
-{
-	struct s6i2c_if *iface = dev_id;
-	if (!(i2c_rd16(iface, S6_I2C_STATUS) & ((1 << S6_I2C_INTR_RXUNDER)
-					      | (1 << S6_I2C_INTR_RXOVER)
-					      | (1 << S6_I2C_INTR_RXFULL)
-					      | (1 << S6_I2C_INTR_TXOVER)
-					      | (1 << S6_I2C_INTR_TXEMPTY)
-					      | (1 << S6_I2C_INTR_RDREQ)
-					      | (1 << S6_I2C_INTR_TXABRT)
-					      | (1 << S6_I2C_INTR_RXDONE)
-					      | (1 << S6_I2C_INTR_ACTIVITY)
-					      | (1 << S6_I2C_INTR_STOPDET)
-					      | (1 << S6_I2C_INTR_STARTDET)
-					      | (1 << S6_I2C_INTR_GENCALL))))
-		return IRQ_NONE;
-
-	spin_lock(&iface->lock);
-	del_timer(&iface->timeout_timer);
-	s6i2c_handle_interrupt(iface);
-	spin_unlock(&iface->lock);
-	return IRQ_HANDLED;
-}
-
-static void s6i2c_timeout(unsigned long data)
-{
-	struct s6i2c_if *iface = (struct s6i2c_if *)data;
-	unsigned long flags;
-
-	spin_lock_irqsave(&iface->lock, flags);
-	s6i2c_handle_interrupt(iface);
-	if (--iface->timeout_count > 0) {
-		iface->timeout_timer.expires = jiffies + POLL_TIMEOUT;
-		add_timer(&iface->timeout_timer);
-	} else {
-		complete(&iface->complete);
-		i2c_wr16(iface, S6_I2C_INTRMASK, 0);
-	}
-	spin_unlock_irqrestore(&iface->lock, flags);
-}
-
-static int s6i2c_master_xfer(struct i2c_adapter *adap,
-				struct i2c_msg *msgs, int num)
-{
-	struct s6i2c_if *iface = adap->algo_data;
-	int i;
-	if (num == 0)
-		return 0;
-	if (i2c_rd16(iface, S6_I2C_STATUS) & (1 << S6_I2C_STATUS_ACTIVITY))
-		yield();
-	i2c_wr16(iface, S6_I2C_INTRMASK, 0);
-	i2c_rd16(iface, S6_I2C_CLRINTR);
-	for (i = 0; i < num; i++) {
-		if (msgs[i].flags & I2C_M_TEN) {
-			dev_err(&adap->dev,
-				"s6i2c: 10 bits addr not supported\n");
-			return -EINVAL;
-		}
-		if (msgs[i].len == 0) {
-			dev_err(&adap->dev,
-				"s6i2c: zero length message not supported\n");
-			return -EINVAL;
-		}
-		if (msgs[i].addr != msgs[0].addr) {
-			dev_err(&adap->dev,
-				"s6i2c: multiple xfer cannot change target\n");
-			return -EINVAL;
-		}
-	}
-
-	iface->msgs = msgs;
-	iface->msgs_num = num;
-	iface->msgs_push = 0;
-	iface->msgs_done = 0;
-	iface->push = 0;
-	iface->done = 0;
-	iface->timeout_count = 10;
-	i2c_wr16(iface, S6_I2C_TAR, msgs[0].addr);
-	i2c_wr16(iface, S6_I2C_ENABLE, 1);
-	i2c_wr16(iface, S6_I2C_INTRMASK, (1 << S6_I2C_INTR_TXEMPTY) |
-					 (1 << S6_I2C_INTR_TXABRT));
-
-	iface->timeout_timer.expires = jiffies + POLL_TIMEOUT;
-	add_timer(&iface->timeout_timer);
-	wait_for_completion(&iface->complete);
-	del_timer_sync(&iface->timeout_timer);
-	while (i2c_rd32(iface, S6_I2C_TXFLR) > 0)
-		schedule();
-	while (i2c_rd16(iface, S6_I2C_STATUS) & (1 << S6_I2C_STATUS_ACTIVITY))
-		schedule();
-
-	i2c_wr16(iface, S6_I2C_INTRMASK, 0);
-	i2c_wr16(iface, S6_I2C_ENABLE, 0);
-	return iface->msgs_done;
-}
-
-static u32 s6i2c_functionality(struct i2c_adapter *adap)
-{
-	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
-}
-
-static struct i2c_algorithm s6i2c_algorithm = {
-	.master_xfer   = s6i2c_master_xfer,
-	.functionality = s6i2c_functionality,
-};
-
-static u16 nanoseconds_on_clk(struct s6i2c_if *iface, u32 ns)
-{
-	u32 dividend = ((clk_get_rate(iface->clk) / 1000) * ns) / 1000000;
-	if (dividend > 0xffff)
-		return 0xffff;
-	return dividend;
-}
-
-static int s6i2c_probe(struct platform_device *dev)
-{
-	struct s6i2c_if *iface = &s6i2c_if;
-	struct i2c_adapter *p_adap;
-	const char *clock;
-	int bus_num, rc;
-	spin_lock_init(&iface->lock);
-	init_completion(&iface->complete);
-	iface->irq = platform_get_irq(dev, 0);
-	if (iface->irq < 0) {
-		rc = iface->irq;
-		goto err_out;
-	}
-	iface->res = platform_get_resource(dev, IORESOURCE_MEM, 0);
-	if (!iface->res) {
-		rc = -ENXIO;
-		goto err_out;
-	}
-	iface->res = request_mem_region(iface->res->start,
-					resource_size(iface->res),
-					dev->dev.bus_id);
-	if (!iface->res) {
-		rc = -EBUSY;
-		goto err_out;
-	}
-	iface->reg = ioremap_nocache(iface->res->start,
-				     resource_size(iface->res));
-	if (!iface->reg) {
-		rc = -ENOMEM;
-		goto err_reg;
-	}
-
-	clock = 0;
-	bus_num = -1;
-	if (dev_get_platdata(&dev->dev)) {
-		struct s6_i2c_platform_data *pdata =
-			dev_get_platdata(&dev->dev);
-		bus_num = pdata->bus_num;
-		clock = pdata->clock;
-	}
-	iface->clk = clk_get(&dev->dev, clock);
-	if (IS_ERR(iface->clk)) {
-		rc = PTR_ERR(iface->clk);
-		goto err_map;
-	}
-	rc = clk_enable(iface->clk);
-	if (rc < 0)
-		goto err_clk_put;
-	init_timer(&iface->timeout_timer);
-	iface->timeout_timer.function = s6i2c_timeout;
-	iface->timeout_timer.data = (unsigned long)iface;
-
-	p_adap = &iface->adap;
-	strlcpy(p_adap->name, dev->name, sizeof(p_adap->name));
-	p_adap->algo = &s6i2c_algorithm;
-	p_adap->algo_data = iface;
-	p_adap->nr = bus_num;
-	p_adap->class = 0;
-	p_adap->dev.parent = &dev->dev;
-	i2c_wr16(iface, S6_I2C_INTRMASK, 0);
-	rc = request_irq(iface->irq, s6i2c_interrupt_entry,
-			 IRQF_SHARED, dev->name, iface);
-	if (rc) {
-		dev_err(&p_adap->dev, "s6i2c: can't get IRQ %d\n", iface->irq);
-		goto err_clk_dis;
-	}
-
-	i2c_wr16(iface, S6_I2C_ENABLE, 0);
-	udelay(1);
-	i2c_wr32(iface, S6_I2C_SRESET, 1 << S6_I2C_SRESET_IC_SRST);
-	i2c_wr16(iface, S6_I2C_CLRTXABRT, 1);
-	i2c_wr16(iface, S6_I2C_CON,
-			(1 << S6_I2C_CON_MASTER) |
-			(S6_I2C_CON_SPEED_NORMAL << S6_I2C_CON_SPEED) |
-			(0 << S6_I2C_CON_10BITSLAVE) |
-			(0 << S6_I2C_CON_10BITMASTER) |
-			(1 << S6_I2C_CON_RESTARTENA) |
-			(1 << S6_I2C_CON_SLAVEDISABLE));
-	i2c_wr16(iface, S6_I2C_SSHCNT, nanoseconds_on_clk(iface, 4000));
-	i2c_wr16(iface, S6_I2C_SSLCNT, nanoseconds_on_clk(iface, 4700));
-	i2c_wr16(iface, S6_I2C_FSHCNT, nanoseconds_on_clk(iface, 600));
-	i2c_wr16(iface, S6_I2C_FSLCNT, nanoseconds_on_clk(iface, 1300));
-	i2c_wr16(iface, S6_I2C_RXTL, 0);
-	i2c_wr16(iface, S6_I2C_TXTL, 0);
-
-	platform_set_drvdata(dev, iface);
-	rc = i2c_add_numbered_adapter(p_adap);
-	if (rc)
-		goto err_irq_free;
-	return 0;
-
-err_irq_free:
-	free_irq(iface->irq, iface);
-err_clk_dis:
-	clk_disable(iface->clk);
-err_clk_put:
-	clk_put(iface->clk);
-err_map:
-	iounmap(iface->reg);
-err_reg:
-	release_mem_region(iface->res->start,
-			   resource_size(iface->res));
-err_out:
-	return rc;
-}
-
-static int s6i2c_remove(struct platform_device *pdev)
-{
-	struct s6i2c_if *iface = platform_get_drvdata(pdev);
-	i2c_wr16(iface, S6_I2C_ENABLE, 0);
-	i2c_del_adapter(&iface->adap);
-	free_irq(iface->irq, iface);
-	clk_disable(iface->clk);
-	clk_put(iface->clk);
-	iounmap(iface->reg);
-	release_mem_region(iface->res->start,
-			   resource_size(iface->res));
-	return 0;
-}
-
-static struct platform_driver s6i2c_driver = {
-	.probe		= s6i2c_probe,
-	.remove		= s6i2c_remove,
-	.driver		= {
-		.name	= DRV_NAME,
-		.owner	= THIS_MODULE,
-	},
-};
-
-static int __init s6i2c_init(void)
-{
-	pr_info("I2C: S6000 I2C driver\n");
-	return platform_driver_register(&s6i2c_driver);
-}
-
-static void __exit s6i2c_exit(void)
-{
-	platform_driver_unregister(&s6i2c_driver);
-}
-
-MODULE_DESCRIPTION("I2C-Bus adapter routines for S6000 I2C");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:" DRV_NAME);
-
-subsys_initcall(s6i2c_init);
-module_exit(s6i2c_exit);
diff --git a/drivers/i2c/busses/i2c-s6000.h b/drivers/i2c/busses/i2c-s6000.h
deleted file mode 100644
index 4936f9f2256f..000000000000
--- a/drivers/i2c/busses/i2c-s6000.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * drivers/i2c/busses/i2c-s6000.h
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2008 Emlix GmbH <info@emlix.com>
- * Author:	Oskar Schirmer <oskar@scara.com>
- */
-
-#ifndef __DRIVERS_I2C_BUSSES_I2C_S6000_H
-#define __DRIVERS_I2C_BUSSES_I2C_S6000_H
-
-#define S6_I2C_CON		0x000
-#define S6_I2C_CON_MASTER		0
-#define S6_I2C_CON_SPEED		1
-#define S6_I2C_CON_SPEED_NORMAL			1
-#define S6_I2C_CON_SPEED_FAST			2
-#define S6_I2C_CON_SPEED_MASK			3
-#define S6_I2C_CON_10BITSLAVE		3
-#define S6_I2C_CON_10BITMASTER		4
-#define S6_I2C_CON_RESTARTENA		5
-#define S6_I2C_CON_SLAVEDISABLE		6
-#define S6_I2C_TAR		0x004
-#define S6_I2C_TAR_GCORSTART		10
-#define S6_I2C_TAR_SPECIAL		11
-#define S6_I2C_SAR		0x008
-#define S6_I2C_HSMADDR		0x00C
-#define S6_I2C_DATACMD		0x010
-#define S6_I2C_DATACMD_READ		8
-#define S6_I2C_SSHCNT		0x014
-#define S6_I2C_SSLCNT		0x018
-#define S6_I2C_FSHCNT		0x01C
-#define S6_I2C_FSLCNT		0x020
-#define S6_I2C_INTRSTAT		0x02C
-#define S6_I2C_INTRMASK		0x030
-#define S6_I2C_RAWINTR		0x034
-#define S6_I2C_INTR_RXUNDER		0
-#define S6_I2C_INTR_RXOVER		1
-#define S6_I2C_INTR_RXFULL		2
-#define S6_I2C_INTR_TXOVER		3
-#define S6_I2C_INTR_TXEMPTY		4
-#define S6_I2C_INTR_RDREQ		5
-#define S6_I2C_INTR_TXABRT		6
-#define S6_I2C_INTR_RXDONE		7
-#define S6_I2C_INTR_ACTIVITY		8
-#define S6_I2C_INTR_STOPDET		9
-#define S6_I2C_INTR_STARTDET		10
-#define S6_I2C_INTR_GENCALL		11
-#define S6_I2C_RXTL		0x038
-#define S6_I2C_TXTL		0x03C
-#define S6_I2C_CLRINTR		0x040
-#define S6_I2C_CLRRXUNDER	0x044
-#define S6_I2C_CLRRXOVER	0x048
-#define S6_I2C_CLRTXOVER	0x04C
-#define S6_I2C_CLRRDREQ		0x050
-#define S6_I2C_CLRTXABRT	0x054
-#define S6_I2C_CLRRXDONE	0x058
-#define S6_I2C_CLRACTIVITY	0x05C
-#define S6_I2C_CLRSTOPDET	0x060
-#define S6_I2C_CLRSTARTDET	0x064
-#define S6_I2C_CLRGENCALL	0x068
-#define S6_I2C_ENABLE		0x06C
-#define S6_I2C_STATUS		0x070
-#define S6_I2C_STATUS_ACTIVITY		0
-#define S6_I2C_STATUS_TFNF		1
-#define S6_I2C_STATUS_TFE		2
-#define S6_I2C_STATUS_RFNE		3
-#define S6_I2C_STATUS_RFF		4
-#define S6_I2C_TXFLR		0x074
-#define S6_I2C_RXFLR		0x078
-#define S6_I2C_SRESET		0x07C
-#define S6_I2C_SRESET_IC_SRST		0
-#define S6_I2C_SRESET_IC_MASTER_SRST	1
-#define S6_I2C_SRESET_IC_SLAVE_SRST	2
-#define S6_I2C_TXABRTSOURCE	0x080
-
-#endif
diff --git a/include/linux/i2c/s6000.h b/include/linux/i2c/s6000.h
deleted file mode 100644
index d9b34bfdae76..000000000000
--- a/include/linux/i2c/s6000.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef __LINUX_I2C_S6000_H
-#define __LINUX_I2C_S6000_H
-
-struct s6_i2c_platform_data {
-	const char *clock; /* the clock to use */
-	int bus_num; /* the bus number to register */
-};
-
-#endif
-
-- 
cgit v1.2.3-59-g8ed1b


From 3e5454d6568c203bca712e1976b052c345f47b44 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 15 Jul 2014 20:48:13 +0200
Subject: tracing: Kill destroy_preds() and destroy_file_preds()

destroy_preds() makes no sense.

The only caller, event_remove(), actually wants destroy_file_preds().
__trace_remove_event_call() does destroy_call_preds() which takes care
of call->filter.

And after the previous change we can simply remove destroy_preds() from
event_remove(), we are going to call remove_event_from_tracers() which
in turn calls remove_event_file_dir()->free_event_filter().

Link: http://lkml.kernel.org/p/20140715184813.GA20488@redhat.com

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h       |  1 -
 kernel/trace/trace_events.c        |  1 -
 kernel/trace/trace_events_filter.c | 20 --------------------
 3 files changed, 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index cff3106ffe2c..738d46539269 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -404,7 +404,6 @@ enum event_trigger_type {
 	ETT_EVENT_ENABLE	= (1 << 3),
 };
 
-extern void destroy_preds(struct ftrace_event_file *file);
 extern void destroy_call_preds(struct ftrace_event_call *call);
 extern int filter_match_preds(struct event_filter *filter, void *rec);
 
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 2de53628689f..85914edf5059 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1621,7 +1621,6 @@ static void event_remove(struct ftrace_event_call *call)
 		if (file->event_call != call)
 			continue;
 		ftrace_event_enable_disable(file, 0);
-		destroy_preds(file);
 		/*
 		 * The do_for_each_event_file() is
 		 * a double loop. After finding the call for this
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 8a8631926a07..30fc66f5cdca 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -810,26 +810,6 @@ void destroy_call_preds(struct ftrace_event_call *call)
 	call->filter = NULL;
 }
 
-static void destroy_file_preds(struct ftrace_event_file *file)
-{
-	__free_filter(file->filter);
-	file->filter = NULL;
-}
-
-/*
- * Called when destroying the ftrace_event_file.
- * The file is being freed, so we do not need to worry about
- * the file being currently used. This is for module code removing
- * the tracepoints from within it.
- */
-void destroy_preds(struct ftrace_event_file *file)
-{
-	if (file->event_call->flags & TRACE_EVENT_FL_USE_CALL_FILTER)
-		destroy_call_preds(file->event_call);
-	else
-		destroy_file_preds(file);
-}
-
 static struct event_filter *__alloc_filter(void)
 {
 	struct event_filter *filter;
-- 
cgit v1.2.3-59-g8ed1b


From 57375747b6fac0f6cf7b302c4a8adb9043ea8e3b Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 15 Jul 2014 20:48:16 +0200
Subject: tracing: Kill destroy_call_preds()

Remove destroy_call_preds(). Its only caller, __trace_remove_event_call(),
can use free_event_filter() and nullify ->filter by hand.

Perhaps we could keep this trivial helper although imo it is pointless, but
then it should be static in trace_events.c.

Link: http://lkml.kernel.org/p/20140715184816.GA20495@redhat.com

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h       | 1 -
 kernel/trace/trace_events.c        | 3 ++-
 kernel/trace/trace_events_filter.c | 6 ------
 3 files changed, 2 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 738d46539269..f434d75e083b 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -404,7 +404,6 @@ enum event_trigger_type {
 	ETT_EVENT_ENABLE	= (1 << 3),
 };
 
-extern void destroy_call_preds(struct ftrace_event_call *call);
 extern int filter_match_preds(struct event_filter *filter, void *rec);
 
 extern int filter_check_discard(struct ftrace_event_file *file, void *rec,
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 85914edf5059..0d8ee29f6b9a 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1748,7 +1748,8 @@ static void __trace_remove_event_call(struct ftrace_event_call *call)
 {
 	event_remove(call);
 	trace_destroy_fields(call);
-	destroy_call_preds(call);
+	free_event_filter(call->filter);
+	call->filter = NULL;
 }
 
 static int probe_remove_event_call(struct ftrace_event_call *call)
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 30fc66f5cdca..1edec329be29 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -804,12 +804,6 @@ void free_event_filter(struct event_filter *filter)
 	__free_filter(filter);
 }
 
-void destroy_call_preds(struct ftrace_event_call *call)
-{
-	__free_filter(call->filter);
-	call->filter = NULL;
-}
-
 static struct event_filter *__alloc_filter(void)
 {
 	struct event_filter *filter;
-- 
cgit v1.2.3-59-g8ed1b


From e738d3ebbf70ebadb83f3a4060dc34f422c2fbe7 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 15 Jul 2014 20:48:27 +0200
Subject: tracing: Kill ftrace_event_call->files

Remove ftrace_event_call->files. It has no users, and in fact even
the commit ae63b31e4d0e "tracing: Separate out trace events from
global variables" which added this member did not use it.

Link: http://lkml.kernel.org/p/20140715184827.GA20508@redhat.com

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index f434d75e083b..06c6faa9e5cc 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -272,7 +272,6 @@ struct ftrace_event_call {
 	struct trace_event	event;
 	const char		*print_fmt;
 	struct event_filter	*filter;
-	struct list_head	*files;
 	void			*mod;
 	void			*data;
 	/*
-- 
cgit v1.2.3-59-g8ed1b


From 4cc901613bd79dfa22d8aea996c2e9f74c04f8f2 Mon Sep 17 00:00:00 2001
From: Yijing Wang <wangyijing@huawei.com>
Date: Tue, 8 Jul 2014 10:08:36 +0800
Subject: PCI/MSI: Remove unused function msi_remove_pci_irq_vectors()

msi_remove_pci_irq_vectors() is unused, so remove it.

Signed-off-by: Yijing Wang <wangyijing@huawei.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/msi.c   | 18 ------------------
 include/linux/pci.h |  2 --
 2 files changed, 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index a59d673d074e..dd0a259e3aaa 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -1014,24 +1014,6 @@ void pci_disable_msix(struct pci_dev *dev)
 }
 EXPORT_SYMBOL(pci_disable_msix);
 
-/**
- * msi_remove_pci_irq_vectors - reclaim MSI(X) irqs to unused state
- * @dev: pointer to the pci_dev data structure of MSI(X) device function
- *
- * Being called during hotplug remove, from which the device function
- * is hot-removed. All previous assigned MSI/MSI-X irqs, if
- * allocated for this device function, are reclaimed to unused state,
- * which may be used later on.
- **/
-void msi_remove_pci_irq_vectors(struct pci_dev *dev)
-{
-	if (!pci_msi_enable || !dev)
-		return;
-
-	if (dev->msi_enabled || dev->msix_enabled)
-		free_msi_irqs(dev);
-}
-
 void pci_no_msi(void)
 {
 	pci_msi_enable = 0;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 466bcd111d85..66bd22fec38f 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1186,7 +1186,6 @@ int pci_msix_vec_count(struct pci_dev *dev);
 int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec);
 void pci_msix_shutdown(struct pci_dev *dev);
 void pci_disable_msix(struct pci_dev *dev);
-void msi_remove_pci_irq_vectors(struct pci_dev *dev);
 void pci_restore_msi_state(struct pci_dev *dev);
 int pci_msi_enabled(void);
 int pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec);
@@ -1217,7 +1216,6 @@ static inline int pci_enable_msix(struct pci_dev *dev,
 { return -ENOSYS; }
 static inline void pci_msix_shutdown(struct pci_dev *dev) { }
 static inline void pci_disable_msix(struct pci_dev *dev) { }
-static inline void msi_remove_pci_irq_vectors(struct pci_dev *dev) { }
 static inline void pci_restore_msi_state(struct pci_dev *dev) { }
 static inline int pci_msi_enabled(void) { return 0; }
 static inline int pci_enable_msi_range(struct pci_dev *dev, int minvec,
-- 
cgit v1.2.3-59-g8ed1b


From 3a6bfbc91df04b081a44d419e0260bad54abddf7 Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <davidlohr@hp.com>
Date: Sun, 29 Jun 2014 15:09:33 -0700
Subject: arch, locking: Ciao arch_mutex_cpu_relax()

The arch_mutex_cpu_relax() function, introduced by 34b133f, is
hacky and ugly. It was added a few years ago to address the fact
that common cpu_relax() calls include yielding on s390, and thus
impact the optimistic spinning functionality of mutexes. Nowadays
we use this function well beyond mutexes: rwsem, qrwlock, mcs and
lockref. Since the macro that defines the call is in the mutex header,
any users must include mutex.h and the naming is misleading as well.

This patch (i) renames the call to cpu_relax_lowlatency  ("relax, but
only if you can do it with very low latency") and (ii) defines it in
each arch's asm/processor.h local header, just like for regular cpu_relax
functions. On all archs, except s390, cpu_relax_lowlatency is simply cpu_relax,
and thus we can take it out of mutex.h. While this can seem redundant,
I believe it is a good choice as it allows us to move out arch specific
logic from generic locking primitives and enables future(?) archs to
transparently define it, similarly to System Z.

Signed-off-by: Davidlohr Bueso <davidlohr@hp.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Anton Blanchard <anton@samba.org>
Cc: Aurelien Jacquiot <a-jacquiot@ti.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Bharat Bhushan <r65777@freescale.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chen Liqin <liqin.linux@gmail.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: David Howells <dhowells@redhat.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Deepthi Dharwar <deepthi@linux.vnet.ibm.com>
Cc: Dominik Dingel <dingel@linux.vnet.ibm.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Guan Xuetao <gxt@mprc.pku.edu.cn>
Cc: Haavard Skinnemoen <hskinnemoen@gmail.com>
Cc: Hans-Christian Egtvedt <egtvedt@samfundet.no>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Helge Deller <deller@gmx.de>
Cc: Hirokazu Takata <takata@linux-m32r.org>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: James E.J. Bottomley <jejb@parisc-linux.org>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Jesper Nilsson <jesper.nilsson@axis.com>
Cc: Joe Perches <joe@perches.com>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Joseph Myers <joseph@codesourcery.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Koichi Yasutake <yasutake.koichi@jp.panasonic.com>
Cc: Lennox Wu <lennox.wu@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mark Salter <msalter@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Michael Neuling <mikey@neuling.org>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Mikael Starvik <starvik@axis.com>
Cc: Nicolas Pitre <nico@linaro.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Paul Burton <paul.burton@imgtec.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Qais Yousef <qais.yousef@imgtec.com>
Cc: Qiaowei Ren <qiaowei.ren@intel.com>
Cc: Rafael Wysocki <rafael.j.wysocki@intel.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Steven Miao <realmz6@gmail.com>
Cc: Steven Rostedt <srostedt@redhat.com>
Cc: Stratos Karafotis <stratosk@semaphore.gr>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vasily Kulikov <segoon@openwall.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Vineet Gupta <Vineet.Gupta1@synopsys.com>
Cc: Waiman Long <Waiman.Long@hp.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Wolfram Sang <wsa@the-dreams.de>
Cc: adi-buildroot-devel@lists.sourceforge.net
Cc: linux390@de.ibm.com
Cc: linux-alpha@vger.kernel.org
Cc: linux-am33-list@redhat.com
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-c6x-dev@linux-c6x.org
Cc: linux-cris-kernel@axis.com
Cc: linux-hexagon@vger.kernel.org
Cc: linux-ia64@vger.kernel.org
Cc: linux@lists.openrisc.net
Cc: linux-m32r-ja@ml.linux-m32r.org
Cc: linux-m32r@ml.linux-m32r.org
Cc: linux-m68k@lists.linux-m68k.org
Cc: linux-metag@vger.kernel.org
Cc: linux-mips@linux-mips.org
Cc: linux-parisc@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-s390@vger.kernel.org
Cc: linux-sh@vger.kernel.org
Cc: linux-xtensa@linux-xtensa.org
Cc: sparclinux@vger.kernel.org
Link: http://lkml.kernel.org/r/1404079773.2619.4.camel@buesod1.americas.hpqcorp.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/include/asm/processor.h      | 1 +
 arch/arc/include/asm/processor.h        | 2 ++
 arch/arm/include/asm/processor.h        | 2 ++
 arch/arm64/include/asm/processor.h      | 1 +
 arch/avr32/include/asm/processor.h      | 1 +
 arch/blackfin/include/asm/processor.h   | 2 +-
 arch/c6x/include/asm/processor.h        | 1 +
 arch/cris/include/asm/processor.h       | 1 +
 arch/hexagon/include/asm/processor.h    | 1 +
 arch/ia64/include/asm/processor.h       | 1 +
 arch/m32r/include/asm/processor.h       | 1 +
 arch/m68k/include/asm/processor.h       | 1 +
 arch/metag/include/asm/processor.h      | 1 +
 arch/microblaze/include/asm/processor.h | 1 +
 arch/mips/include/asm/processor.h       | 1 +
 arch/mn10300/include/asm/processor.h    | 2 ++
 arch/openrisc/include/asm/processor.h   | 1 +
 arch/parisc/include/asm/processor.h     | 1 +
 arch/powerpc/include/asm/processor.h    | 2 ++
 arch/s390/include/asm/processor.h       | 2 +-
 arch/score/include/asm/processor.h      | 1 +
 arch/sh/include/asm/processor.h         | 1 +
 arch/sparc/include/asm/processor_32.h   | 2 ++
 arch/sparc/include/asm/processor_64.h   | 1 +
 arch/tile/include/asm/processor.h       | 2 ++
 arch/unicore32/include/asm/processor.h  | 1 +
 arch/x86/include/asm/processor.h        | 2 ++
 arch/x86/um/asm/processor.h             | 3 ++-
 arch/xtensa/include/asm/processor.h     | 1 +
 include/linux/mutex.h                   | 4 ----
 kernel/locking/mcs_spinlock.c           | 8 +++-----
 kernel/locking/mcs_spinlock.h           | 4 ++--
 kernel/locking/mutex.c                  | 4 ++--
 kernel/locking/qrwlock.c                | 9 ++++-----
 kernel/locking/rwsem-xadd.c             | 4 ++--
 lib/lockref.c                           | 3 +--
 36 files changed, 51 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/processor.h b/arch/alpha/include/asm/processor.h
index 6cb7fe85c4b5..b4cf03690394 100644
--- a/arch/alpha/include/asm/processor.h
+++ b/arch/alpha/include/asm/processor.h
@@ -57,6 +57,7 @@ unsigned long get_wchan(struct task_struct *p);
   ((tsk) == current ? rdusp() : task_thread_info(tsk)->pcb.usp)
 
 #define cpu_relax()	barrier()
+#define cpu_relax_lowlatency() cpu_relax()
 
 #define ARCH_HAS_PREFETCH
 #define ARCH_HAS_PREFETCHW
diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
index d99f9b37cd15..82588f3ba77f 100644
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h
@@ -62,6 +62,8 @@ unsigned long thread_saved_pc(struct task_struct *t);
 #define cpu_relax()	do { } while (0)
 #endif
 
+#define cpu_relax_lowlatency() cpu_relax()
+
 #define copy_segments(tsk, mm)      do { } while (0)
 #define release_segments(mm)        do { } while (0)
 
diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h
index c3d5fc124a05..8a1e8e995dae 100644
--- a/arch/arm/include/asm/processor.h
+++ b/arch/arm/include/asm/processor.h
@@ -82,6 +82,8 @@ unsigned long get_wchan(struct task_struct *p);
 #define cpu_relax()			barrier()
 #endif
 
+#define cpu_relax_lowlatency()                cpu_relax()
+
 #define task_pt_regs(p) \
 	((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1)
 
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 34de2a8f7d93..4610b0daf1bf 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -129,6 +129,7 @@ extern void release_thread(struct task_struct *);
 unsigned long get_wchan(struct task_struct *p);
 
 #define cpu_relax()			barrier()
+#define cpu_relax_lowlatency()                cpu_relax()
 
 /* Thread switching */
 extern struct task_struct *cpu_switch_to(struct task_struct *prev,
diff --git a/arch/avr32/include/asm/processor.h b/arch/avr32/include/asm/processor.h
index 972adcc1e8f4..941593c7d9f3 100644
--- a/arch/avr32/include/asm/processor.h
+++ b/arch/avr32/include/asm/processor.h
@@ -92,6 +92,7 @@ extern struct avr32_cpuinfo boot_cpu_data;
 #define TASK_UNMAPPED_BASE	(PAGE_ALIGN(TASK_SIZE / 3))
 
 #define cpu_relax()		barrier()
+#define cpu_relax_lowlatency()        cpu_relax()
 #define cpu_sync_pipeline()	asm volatile("sub pc, -2" : : : "memory")
 
 struct cpu_context {
diff --git a/arch/blackfin/include/asm/processor.h b/arch/blackfin/include/asm/processor.h
index d0e72e9475a6..7acd46653df3 100644
--- a/arch/blackfin/include/asm/processor.h
+++ b/arch/blackfin/include/asm/processor.h
@@ -99,7 +99,7 @@ unsigned long get_wchan(struct task_struct *p);
 #define	KSTK_ESP(tsk)	((tsk) == current ? rdusp() : (tsk)->thread.usp)
 
 #define cpu_relax()    	smp_mb()
-
+#define cpu_relax_lowlatency() cpu_relax()
 
 /* Get the Silicon Revision of the chip */
 static inline uint32_t __pure bfin_revid(void)
diff --git a/arch/c6x/include/asm/processor.h b/arch/c6x/include/asm/processor.h
index b9eb3da7f278..f2ef31be2f8b 100644
--- a/arch/c6x/include/asm/processor.h
+++ b/arch/c6x/include/asm/processor.h
@@ -121,6 +121,7 @@ extern unsigned long get_wchan(struct task_struct *p);
 #define KSTK_ESP(task)	(task_pt_regs(task)->sp)
 
 #define cpu_relax()		do { } while (0)
+#define cpu_relax_lowlatency()        cpu_relax()
 
 extern const struct seq_operations cpuinfo_op;
 
diff --git a/arch/cris/include/asm/processor.h b/arch/cris/include/asm/processor.h
index 15b815df29c1..862126b58116 100644
--- a/arch/cris/include/asm/processor.h
+++ b/arch/cris/include/asm/processor.h
@@ -63,6 +63,7 @@ static inline void release_thread(struct task_struct *dead_task)
 #define init_stack      (init_thread_union.stack)
 
 #define cpu_relax()     barrier()
+#define cpu_relax_lowlatency() cpu_relax()
 
 void default_idle(void);
 
diff --git a/arch/hexagon/include/asm/processor.h b/arch/hexagon/include/asm/processor.h
index 45a825402f63..d8501137c8d0 100644
--- a/arch/hexagon/include/asm/processor.h
+++ b/arch/hexagon/include/asm/processor.h
@@ -56,6 +56,7 @@ struct thread_struct {
 }
 
 #define cpu_relax() __vmyield()
+#define cpu_relax_lowlatency() cpu_relax()
 
 /*
  * Decides where the kernel will search for a free chunk of vm space during
diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h
index efd1b927ccb7..c7367130ab14 100644
--- a/arch/ia64/include/asm/processor.h
+++ b/arch/ia64/include/asm/processor.h
@@ -548,6 +548,7 @@ ia64_eoi (void)
 }
 
 #define cpu_relax()	ia64_hint(ia64_hint_pause)
+#define cpu_relax_lowlatency() cpu_relax()
 
 static inline int
 ia64_get_irr(unsigned int vector)
diff --git a/arch/m32r/include/asm/processor.h b/arch/m32r/include/asm/processor.h
index 5767367550c6..9f8fd9bef70f 100644
--- a/arch/m32r/include/asm/processor.h
+++ b/arch/m32r/include/asm/processor.h
@@ -133,5 +133,6 @@ unsigned long get_wchan(struct task_struct *p);
 #define KSTK_ESP(tsk)  ((tsk)->thread.sp)
 
 #define cpu_relax()	barrier()
+#define cpu_relax_lowlatency() cpu_relax()
 
 #endif /* _ASM_M32R_PROCESSOR_H */
diff --git a/arch/m68k/include/asm/processor.h b/arch/m68k/include/asm/processor.h
index b0768a657920..20dda1d4b860 100644
--- a/arch/m68k/include/asm/processor.h
+++ b/arch/m68k/include/asm/processor.h
@@ -176,5 +176,6 @@ unsigned long get_wchan(struct task_struct *p);
 #define task_pt_regs(tsk)	((struct pt_regs *) ((tsk)->thread.esp0))
 
 #define cpu_relax()	barrier()
+#define cpu_relax_lowlatency() cpu_relax()
 
 #endif
diff --git a/arch/metag/include/asm/processor.h b/arch/metag/include/asm/processor.h
index a8a37477c66e..881071c07942 100644
--- a/arch/metag/include/asm/processor.h
+++ b/arch/metag/include/asm/processor.h
@@ -155,6 +155,7 @@ unsigned long get_wchan(struct task_struct *p);
 #define user_stack_pointer(regs)        ((regs)->ctx.AX[0].U0)
 
 #define cpu_relax()     barrier()
+#define cpu_relax_lowlatency()  cpu_relax()
 
 extern void setup_priv(void);
 
diff --git a/arch/microblaze/include/asm/processor.h b/arch/microblaze/include/asm/processor.h
index 9d31b057c355..497a988d79c2 100644
--- a/arch/microblaze/include/asm/processor.h
+++ b/arch/microblaze/include/asm/processor.h
@@ -22,6 +22,7 @@
 extern const struct seq_operations cpuinfo_op;
 
 # define cpu_relax()		barrier()
+# define cpu_relax_lowlatency()	cpu_relax()
 
 #define task_pt_regs(tsk) \
 		(((struct pt_regs *)(THREAD_SIZE + task_stack_page(tsk))) - 1)
diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h
index ad70cba8daff..d5098bc554f4 100644
--- a/arch/mips/include/asm/processor.h
+++ b/arch/mips/include/asm/processor.h
@@ -367,6 +367,7 @@ unsigned long get_wchan(struct task_struct *p);
 #define KSTK_STATUS(tsk) (task_pt_regs(tsk)->cp0_status)
 
 #define cpu_relax()	barrier()
+#define cpu_relax_lowlatency() cpu_relax()
 
 /*
  * Return_address is a replacement for __builtin_return_address(count)
diff --git a/arch/mn10300/include/asm/processor.h b/arch/mn10300/include/asm/processor.h
index 8b80b19d0c8a..769d5ed8e992 100644
--- a/arch/mn10300/include/asm/processor.h
+++ b/arch/mn10300/include/asm/processor.h
@@ -68,7 +68,9 @@ extern struct mn10300_cpuinfo cpu_data[];
 extern void identify_cpu(struct mn10300_cpuinfo *);
 extern void print_cpu_info(struct mn10300_cpuinfo *);
 extern void dodgy_tsc(void);
+
 #define cpu_relax() barrier()
+#define cpu_relax_lowlatency() cpu_relax()
 
 /*
  * User space process size: 1.75GB (default).
diff --git a/arch/openrisc/include/asm/processor.h b/arch/openrisc/include/asm/processor.h
index cab746fa9e87..4d235e3d2534 100644
--- a/arch/openrisc/include/asm/processor.h
+++ b/arch/openrisc/include/asm/processor.h
@@ -101,6 +101,7 @@ extern unsigned long thread_saved_pc(struct task_struct *t);
 #define init_stack      (init_thread_union.stack)
 
 #define cpu_relax()     barrier()
+#define cpu_relax_lowlatency() cpu_relax()
 
 #endif /* __ASSEMBLY__ */
 #endif /* __ASM_OPENRISC_PROCESSOR_H */
diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h
index d951c9681ab3..689a8ade3606 100644
--- a/arch/parisc/include/asm/processor.h
+++ b/arch/parisc/include/asm/processor.h
@@ -338,6 +338,7 @@ extern unsigned long get_wchan(struct task_struct *p);
 #define KSTK_ESP(tsk)	((tsk)->thread.regs.gr[30])
 
 #define cpu_relax()	barrier()
+#define cpu_relax_lowlatency() cpu_relax()
 
 /* Used as a macro to identify the combined VIPT/PIPT cached
  * CPUs which require a guarantee of coherency (no inequivalent
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 6d59072e13a7..dda7ac4c80bd 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -400,6 +400,8 @@ static inline unsigned long __pack_fe01(unsigned int fpmode)
 #define cpu_relax()	barrier()
 #endif
 
+#define cpu_relax_lowlatency() cpu_relax()
+
 /* Check that a certain kernel stack pointer is valid in task_struct p */
 int validate_sp(unsigned long sp, struct task_struct *p,
                        unsigned long nbytes);
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 6f02d452bbee..e568fc8a7250 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -217,7 +217,7 @@ static inline void cpu_relax(void)
 	barrier();
 }
 
-#define arch_mutex_cpu_relax()  barrier()
+#define cpu_relax_lowlatency()  barrier()
 
 static inline void psw_set_key(unsigned int key)
 {
diff --git a/arch/score/include/asm/processor.h b/arch/score/include/asm/processor.h
index d9a922d8711b..851f441991d2 100644
--- a/arch/score/include/asm/processor.h
+++ b/arch/score/include/asm/processor.h
@@ -24,6 +24,7 @@ extern unsigned long get_wchan(struct task_struct *p);
 #define current_text_addr() ({ __label__ _l; _l: &&_l; })
 
 #define cpu_relax()		barrier()
+#define cpu_relax_lowlatency()        cpu_relax()
 #define release_thread(thread)	do {} while (0)
 
 /*
diff --git a/arch/sh/include/asm/processor.h b/arch/sh/include/asm/processor.h
index 5448f9bbf4ab..1506897648aa 100644
--- a/arch/sh/include/asm/processor.h
+++ b/arch/sh/include/asm/processor.h
@@ -97,6 +97,7 @@ extern struct sh_cpuinfo cpu_data[];
 
 #define cpu_sleep()	__asm__ __volatile__ ("sleep" : : : "memory")
 #define cpu_relax()	barrier()
+#define cpu_relax_lowlatency() cpu_relax()
 
 void default_idle(void);
 void stop_this_cpu(void *);
diff --git a/arch/sparc/include/asm/processor_32.h b/arch/sparc/include/asm/processor_32.h
index a564817bbc2e..812fd08f3e62 100644
--- a/arch/sparc/include/asm/processor_32.h
+++ b/arch/sparc/include/asm/processor_32.h
@@ -119,6 +119,8 @@ extern struct task_struct *last_task_used_math;
 int do_mathemu(struct pt_regs *regs, struct task_struct *fpt);
 
 #define cpu_relax()	barrier()
+#define cpu_relax_lowlatency() cpu_relax()
+
 extern void (*sparc_idle)(void);
 
 #endif
diff --git a/arch/sparc/include/asm/processor_64.h b/arch/sparc/include/asm/processor_64.h
index 7028fe1a7c04..6924bdefe148 100644
--- a/arch/sparc/include/asm/processor_64.h
+++ b/arch/sparc/include/asm/processor_64.h
@@ -216,6 +216,7 @@ unsigned long get_wchan(struct task_struct *task);
 				     "nop\n\t"				\
 				     ".previous"			\
 				     ::: "memory")
+#define cpu_relax_lowlatency() cpu_relax()
 
 /* Prefetch support.  This is tuned for UltraSPARC-III and later.
  * UltraSPARC-I will treat these as nops, and UltraSPARC-II has
diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h
index 42323636c459..dd4f9f17e30a 100644
--- a/arch/tile/include/asm/processor.h
+++ b/arch/tile/include/asm/processor.h
@@ -266,6 +266,8 @@ static inline void cpu_relax(void)
 	barrier();
 }
 
+#define cpu_relax_lowlatency() cpu_relax()
+
 /* Info on this processor (see fs/proc/cpuinfo.c) */
 struct seq_operations;
 extern const struct seq_operations cpuinfo_op;
diff --git a/arch/unicore32/include/asm/processor.h b/arch/unicore32/include/asm/processor.h
index 4eaa42167667..8d21b7adf26b 100644
--- a/arch/unicore32/include/asm/processor.h
+++ b/arch/unicore32/include/asm/processor.h
@@ -71,6 +71,7 @@ extern void release_thread(struct task_struct *);
 unsigned long get_wchan(struct task_struct *p);
 
 #define cpu_relax()			barrier()
+#define cpu_relax_lowlatency()                cpu_relax()
 
 #define task_pt_regs(p) \
 	((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1)
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index a4ea02351f4d..32cc237f8e20 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -696,6 +696,8 @@ static inline void cpu_relax(void)
 	rep_nop();
 }
 
+#define cpu_relax_lowlatency() cpu_relax()
+
 /* Stop speculative execution and prefetching of modified code. */
 static inline void sync_core(void)
 {
diff --git a/arch/x86/um/asm/processor.h b/arch/x86/um/asm/processor.h
index 04f82e020f2b..2a206d2b14ab 100644
--- a/arch/x86/um/asm/processor.h
+++ b/arch/x86/um/asm/processor.h
@@ -25,7 +25,8 @@ static inline void rep_nop(void)
 	__asm__ __volatile__("rep;nop": : :"memory");
 }
 
-#define cpu_relax()	rep_nop()
+#define cpu_relax()		rep_nop()
+#define cpu_relax_lowlatency()	cpu_relax()
 
 #include <asm/processor-generic.h>
 
diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h
index abb59708a3b7..b61bdf0eea25 100644
--- a/arch/xtensa/include/asm/processor.h
+++ b/arch/xtensa/include/asm/processor.h
@@ -182,6 +182,7 @@ extern unsigned long get_wchan(struct task_struct *p);
 #define KSTK_ESP(tsk)		(task_pt_regs(tsk)->areg[1])
 
 #define cpu_relax()  barrier()
+#define cpu_relax_lowlatency() cpu_relax()
 
 /* Special register access. */
 
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 42aa9b9ecd5f..8d5535c58cc2 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -176,8 +176,4 @@ extern void mutex_unlock(struct mutex *lock);
 
 extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
 
-#ifndef arch_mutex_cpu_relax
-# define arch_mutex_cpu_relax() cpu_relax()
-#endif
-
 #endif /* __LINUX_MUTEX_H */
diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c
index be9ee1559fca..9887a905a762 100644
--- a/kernel/locking/mcs_spinlock.c
+++ b/kernel/locking/mcs_spinlock.c
@@ -1,6 +1,4 @@
-
 #include <linux/percpu.h>
-#include <linux/mutex.h>
 #include <linux/sched.h>
 #include "mcs_spinlock.h"
 
@@ -79,7 +77,7 @@ osq_wait_next(struct optimistic_spin_queue *lock,
 				break;
 		}
 
-		arch_mutex_cpu_relax();
+		cpu_relax_lowlatency();
 	}
 
 	return next;
@@ -120,7 +118,7 @@ bool osq_lock(struct optimistic_spin_queue *lock)
 		if (need_resched())
 			goto unqueue;
 
-		arch_mutex_cpu_relax();
+		cpu_relax_lowlatency();
 	}
 	return true;
 
@@ -146,7 +144,7 @@ unqueue:
 		if (smp_load_acquire(&node->locked))
 			return true;
 
-		arch_mutex_cpu_relax();
+		cpu_relax_lowlatency();
 
 		/*
 		 * Or we race against a concurrent unqueue()'s step-B, in which
diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h
index 74356dc0ce29..23e89c5930e9 100644
--- a/kernel/locking/mcs_spinlock.h
+++ b/kernel/locking/mcs_spinlock.h
@@ -27,7 +27,7 @@ struct mcs_spinlock {
 #define arch_mcs_spin_lock_contended(l)					\
 do {									\
 	while (!(smp_load_acquire(l)))					\
-		arch_mutex_cpu_relax();					\
+		cpu_relax_lowlatency();					\
 } while (0)
 #endif
 
@@ -104,7 +104,7 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
 			return;
 		/* Wait until the next pointer is set */
 		while (!(next = ACCESS_ONCE(node->next)))
-			arch_mutex_cpu_relax();
+			cpu_relax_lowlatency();
 	}
 
 	/* Pass lock to next waiter. */
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index d3100521388c..ae712b25e492 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -146,7 +146,7 @@ int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
 		if (need_resched())
 			break;
 
-		arch_mutex_cpu_relax();
+		cpu_relax_lowlatency();
 	}
 	rcu_read_unlock();
 
@@ -464,7 +464,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 		 * memory barriers as we'll eventually observe the right
 		 * values at the cost of a few extra spins.
 		 */
-		arch_mutex_cpu_relax();
+		cpu_relax_lowlatency();
 	}
 	osq_unlock(&lock->osq);
 slowpath:
diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c
index fb5b8ac411a5..f956ede7f90d 100644
--- a/kernel/locking/qrwlock.c
+++ b/kernel/locking/qrwlock.c
@@ -20,7 +20,6 @@
 #include <linux/cpumask.h>
 #include <linux/percpu.h>
 #include <linux/hardirq.h>
-#include <linux/mutex.h>
 #include <asm/qrwlock.h>
 
 /**
@@ -35,7 +34,7 @@ static __always_inline void
 rspin_until_writer_unlock(struct qrwlock *lock, u32 cnts)
 {
 	while ((cnts & _QW_WMASK) == _QW_LOCKED) {
-		arch_mutex_cpu_relax();
+		cpu_relax_lowlatency();
 		cnts = smp_load_acquire((u32 *)&lock->cnts);
 	}
 }
@@ -75,7 +74,7 @@ void queue_read_lock_slowpath(struct qrwlock *lock)
 	 * to make sure that the write lock isn't taken.
 	 */
 	while (atomic_read(&lock->cnts) & _QW_WMASK)
-		arch_mutex_cpu_relax();
+		cpu_relax_lowlatency();
 
 	cnts = atomic_add_return(_QR_BIAS, &lock->cnts) - _QR_BIAS;
 	rspin_until_writer_unlock(lock, cnts);
@@ -114,7 +113,7 @@ void queue_write_lock_slowpath(struct qrwlock *lock)
 				    cnts | _QW_WAITING) == cnts))
 			break;
 
-		arch_mutex_cpu_relax();
+		cpu_relax_lowlatency();
 	}
 
 	/* When no more readers, set the locked flag */
@@ -125,7 +124,7 @@ void queue_write_lock_slowpath(struct qrwlock *lock)
 				    _QW_LOCKED) == _QW_WAITING))
 			break;
 
-		arch_mutex_cpu_relax();
+		cpu_relax_lowlatency();
 	}
 unlock:
 	arch_spin_unlock(&lock->lock);
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index a2391ac135c8..d6203faf2eb1 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -329,7 +329,7 @@ bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner)
 		if (need_resched())
 			break;
 
-		arch_mutex_cpu_relax();
+		cpu_relax_lowlatency();
 	}
 	rcu_read_unlock();
 
@@ -381,7 +381,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
 		 * memory barriers as we'll eventually observe the right
 		 * values at the cost of a few extra spins.
 		 */
-		arch_mutex_cpu_relax();
+		cpu_relax_lowlatency();
 	}
 	osq_unlock(&sem->osq);
 done:
diff --git a/lib/lockref.c b/lib/lockref.c
index f07a40d33871..d2233de9a86e 100644
--- a/lib/lockref.c
+++ b/lib/lockref.c
@@ -1,6 +1,5 @@
 #include <linux/export.h>
 #include <linux/lockref.h>
-#include <linux/mutex.h>
 
 #if USE_CMPXCHG_LOCKREF
 
@@ -29,7 +28,7 @@
 		if (likely(old.lock_count == prev.lock_count)) {		\
 			SUCCESS;						\
 		}								\
-		arch_mutex_cpu_relax();						\
+		cpu_relax_lowlatency();						\
 	}									\
 } while (0)
 
-- 
cgit v1.2.3-59-g8ed1b


From 306a7f9139318a28063282a15b9f9ebacf09c9b9 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Thu, 17 Jul 2014 13:17:24 +0200
Subject: ARM: tegra: Move includes to include/soc/tegra

In order to not clutter the include/linux directory with SoC specific
headers, move the Tegra-specific headers out into a separate directory.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 arch/arm/mach-tegra/fuse.c          |   3 +-
 arch/arm/mach-tegra/pmc.c           |   3 +-
 arch/arm/mach-tegra/powergate.c     |   3 +-
 drivers/amba/tegra-ahb.c            |   3 +-
 drivers/clk/tegra/clk-periph-gate.c |   3 +-
 drivers/clk/tegra/clk-tegra30.c     |   5 +-
 drivers/clk/tegra/clk.c             |   3 +-
 drivers/gpu/drm/tegra/gr3d.c        |   3 +-
 drivers/gpu/drm/tegra/sor.c         |   3 +-
 drivers/iommu/tegra-smmu.c          |   3 +-
 drivers/pci/host/pci-tegra.c        |   5 +-
 include/linux/tegra-ahb.h           |  19 -----
 include/linux/tegra-cpuidle.h       |  25 -------
 include/linux/tegra-powergate.h     | 134 ------------------------------------
 include/linux/tegra-soc.h           |  22 ------
 include/soc/tegra/ahb.h             |  19 +++++
 include/soc/tegra/cpuidle.h         |  25 +++++++
 include/soc/tegra/fuse.h            |  22 ++++++
 include/soc/tegra/powergate.h       | 134 ++++++++++++++++++++++++++++++++++++
 19 files changed, 225 insertions(+), 212 deletions(-)
 delete mode 100644 include/linux/tegra-ahb.h
 delete mode 100644 include/linux/tegra-cpuidle.h
 delete mode 100644 include/linux/tegra-powergate.h
 delete mode 100644 include/linux/tegra-soc.h
 create mode 100644 include/soc/tegra/ahb.h
 create mode 100644 include/soc/tegra/cpuidle.h
 create mode 100644 include/soc/tegra/fuse.h
 create mode 100644 include/soc/tegra/powergate.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-tegra/fuse.c b/arch/arm/mach-tegra/fuse.c
index c9ac23b385be..930fef861227 100644
--- a/arch/arm/mach-tegra/fuse.c
+++ b/arch/arm/mach-tegra/fuse.c
@@ -23,7 +23,8 @@
 #include <linux/export.h>
 #include <linux/random.h>
 #include <linux/clk.h>
-#include <linux/tegra-soc.h>
+
+#include <soc/tegra/fuse.h>
 
 #include "fuse.h"
 #include "iomap.h"
diff --git a/arch/arm/mach-tegra/pmc.c b/arch/arm/mach-tegra/pmc.c
index 7c7123e7557b..0f457801eaca 100644
--- a/arch/arm/mach-tegra/pmc.c
+++ b/arch/arm/mach-tegra/pmc.c
@@ -20,7 +20,8 @@
 #include <linux/io.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/tegra-powergate.h>
+
+#include <soc/tegra/powergate.h>
 
 #include "flowctrl.h"
 #include "fuse.h"
diff --git a/arch/arm/mach-tegra/powergate.c b/arch/arm/mach-tegra/powergate.c
index 4cefc5cd6bed..7b148bc6c995 100644
--- a/arch/arm/mach-tegra/powergate.c
+++ b/arch/arm/mach-tegra/powergate.c
@@ -29,7 +29,8 @@
 #include <linux/seq_file.h>
 #include <linux/spinlock.h>
 #include <linux/clk/tegra.h>
-#include <linux/tegra-powergate.h>
+
+#include <soc/tegra/powergate.h>
 
 #include "fuse.h"
 #include "iomap.h"
diff --git a/drivers/amba/tegra-ahb.c b/drivers/amba/tegra-ahb.c
index 558a239954e8..d8961ef4d2e7 100644
--- a/drivers/amba/tegra-ahb.c
+++ b/drivers/amba/tegra-ahb.c
@@ -25,7 +25,8 @@
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
-#include <linux/tegra-ahb.h>
+
+#include <soc/tegra/ahb.h>
 
 #define DRV_NAME "tegra-ahb"
 
diff --git a/drivers/clk/tegra/clk-periph-gate.c b/drivers/clk/tegra/clk-periph-gate.c
index 507015314827..0aa8830ae7cc 100644
--- a/drivers/clk/tegra/clk-periph-gate.c
+++ b/drivers/clk/tegra/clk-periph-gate.c
@@ -20,7 +20,8 @@
 #include <linux/io.h>
 #include <linux/delay.h>
 #include <linux/err.h>
-#include <linux/tegra-soc.h>
+
+#include <soc/tegra/fuse.h>
 
 #include "clk.h"
 
diff --git a/drivers/clk/tegra/clk-tegra30.c b/drivers/clk/tegra/clk-tegra30.c
index 8b10c38b6e3c..5679ffdb3f8c 100644
--- a/drivers/clk/tegra/clk-tegra30.c
+++ b/drivers/clk/tegra/clk-tegra30.c
@@ -22,8 +22,11 @@
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/clk/tegra.h>
-#include <linux/tegra-powergate.h>
+
+#include <soc/tegra/powergate.h>
+
 #include <dt-bindings/clock/tegra30-car.h>
+
 #include "clk.h"
 #include "clk-id.h"
 
diff --git a/drivers/clk/tegra/clk.c b/drivers/clk/tegra/clk.c
index c0a7d7723510..f4503ba97400 100644
--- a/drivers/clk/tegra/clk.c
+++ b/drivers/clk/tegra/clk.c
@@ -19,7 +19,8 @@
 #include <linux/of.h>
 #include <linux/clk/tegra.h>
 #include <linux/reset-controller.h>
-#include <linux/tegra-soc.h>
+
+#include <soc/tegra/fuse.h>
 
 #include "clk.h"
 
diff --git a/drivers/gpu/drm/tegra/gr3d.c b/drivers/gpu/drm/tegra/gr3d.c
index 30f5ba9bd6d0..69974851e564 100644
--- a/drivers/gpu/drm/tegra/gr3d.c
+++ b/drivers/gpu/drm/tegra/gr3d.c
@@ -12,7 +12,8 @@
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/reset.h>
-#include <linux/tegra-powergate.h>
+
+#include <soc/tegra/powergate.h>
 
 #include "drm.h"
 #include "gem.h"
diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index 27c979b50111..eafd0b8a71d2 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -11,7 +11,8 @@
 #include <linux/io.h>
 #include <linux/platform_device.h>
 #include <linux/reset.h>
-#include <linux/tegra-powergate.h>
+
+#include <soc/tegra/powergate.h>
 
 #include <drm/drm_dp_helper.h>
 
diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 605b5b46a903..230d06c9328b 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -35,7 +35,8 @@
 #include <linux/of_iommu.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
-#include <linux/tegra-ahb.h>
+
+#include <soc/tegra/ahb.h>
 
 #include <asm/page.h>
 #include <asm/cacheflush.h>
diff --git a/drivers/pci/host/pci-tegra.c b/drivers/pci/host/pci-tegra.c
index 083cf37ca047..a2f0f88be382 100644
--- a/drivers/pci/host/pci-tegra.c
+++ b/drivers/pci/host/pci-tegra.c
@@ -41,11 +41,12 @@
 #include <linux/reset.h>
 #include <linux/sizes.h>
 #include <linux/slab.h>
-#include <linux/tegra-cpuidle.h>
-#include <linux/tegra-powergate.h>
 #include <linux/vmalloc.h>
 #include <linux/regulator/consumer.h>
 
+#include <soc/tegra/cpuidle.h>
+#include <soc/tegra/powergate.h>
+
 #include <asm/mach/irq.h>
 #include <asm/mach/map.h>
 #include <asm/mach/pci.h>
diff --git a/include/linux/tegra-ahb.h b/include/linux/tegra-ahb.h
deleted file mode 100644
index f1cd075ceee1..000000000000
--- a/include/linux/tegra-ahb.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- * Copyright (c) 2012, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#ifndef __LINUX_AHB_H__
-#define __LINUX_AHB_H__
-
-extern int tegra_ahb_enable_smmu(struct device_node *ahb);
-
-#endif	/* __LINUX_AHB_H__ */
diff --git a/include/linux/tegra-cpuidle.h b/include/linux/tegra-cpuidle.h
deleted file mode 100644
index 9c6286bbf662..000000000000
--- a/include/linux/tegra-cpuidle.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#ifndef __LINUX_TEGRA_CPUIDLE_H__
-#define __LINUX_TEGRA_CPUIDLE_H__
-
-#ifdef CONFIG_CPU_IDLE
-void tegra_cpuidle_pcie_irqs_in_use(void);
-#else
-static inline void tegra_cpuidle_pcie_irqs_in_use(void)
-{
-}
-#endif
-
-#endif
diff --git a/include/linux/tegra-powergate.h b/include/linux/tegra-powergate.h
deleted file mode 100644
index 46f0a07812b4..000000000000
--- a/include/linux/tegra-powergate.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Copyright (c) 2010 Google, Inc
- *
- * Author:
- *	Colin Cross <ccross@google.com>
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#ifndef _MACH_TEGRA_POWERGATE_H_
-#define _MACH_TEGRA_POWERGATE_H_
-
-struct clk;
-struct reset_control;
-
-#define TEGRA_POWERGATE_CPU	0
-#define TEGRA_POWERGATE_3D	1
-#define TEGRA_POWERGATE_VENC	2
-#define TEGRA_POWERGATE_PCIE	3
-#define TEGRA_POWERGATE_VDEC	4
-#define TEGRA_POWERGATE_L2	5
-#define TEGRA_POWERGATE_MPE	6
-#define TEGRA_POWERGATE_HEG	7
-#define TEGRA_POWERGATE_SATA	8
-#define TEGRA_POWERGATE_CPU1	9
-#define TEGRA_POWERGATE_CPU2	10
-#define TEGRA_POWERGATE_CPU3	11
-#define TEGRA_POWERGATE_CELP	12
-#define TEGRA_POWERGATE_3D1	13
-#define TEGRA_POWERGATE_CPU0	14
-#define TEGRA_POWERGATE_C0NC	15
-#define TEGRA_POWERGATE_C1NC	16
-#define TEGRA_POWERGATE_SOR	17
-#define TEGRA_POWERGATE_DIS	18
-#define TEGRA_POWERGATE_DISB	19
-#define TEGRA_POWERGATE_XUSBA	20
-#define TEGRA_POWERGATE_XUSBB	21
-#define TEGRA_POWERGATE_XUSBC	22
-#define TEGRA_POWERGATE_VIC	23
-#define TEGRA_POWERGATE_IRAM	24
-
-#define TEGRA_POWERGATE_3D0	TEGRA_POWERGATE_3D
-
-#define TEGRA_IO_RAIL_CSIA	0
-#define TEGRA_IO_RAIL_CSIB	1
-#define TEGRA_IO_RAIL_DSI	2
-#define TEGRA_IO_RAIL_MIPI_BIAS	3
-#define TEGRA_IO_RAIL_PEX_BIAS	4
-#define TEGRA_IO_RAIL_PEX_CLK1	5
-#define TEGRA_IO_RAIL_PEX_CLK2	6
-#define TEGRA_IO_RAIL_USB0	9
-#define TEGRA_IO_RAIL_USB1	10
-#define TEGRA_IO_RAIL_USB2	11
-#define TEGRA_IO_RAIL_USB_BIAS	12
-#define TEGRA_IO_RAIL_NAND	13
-#define TEGRA_IO_RAIL_UART	14
-#define TEGRA_IO_RAIL_BB	15
-#define TEGRA_IO_RAIL_AUDIO	17
-#define TEGRA_IO_RAIL_HSIC	19
-#define TEGRA_IO_RAIL_COMP	22
-#define TEGRA_IO_RAIL_HDMI	28
-#define TEGRA_IO_RAIL_PEX_CNTRL	32
-#define TEGRA_IO_RAIL_SDMMC1	33
-#define TEGRA_IO_RAIL_SDMMC3	34
-#define TEGRA_IO_RAIL_SDMMC4	35
-#define TEGRA_IO_RAIL_CAM	36
-#define TEGRA_IO_RAIL_RES	37
-#define TEGRA_IO_RAIL_HV	38
-#define TEGRA_IO_RAIL_DSIB	39
-#define TEGRA_IO_RAIL_DSIC	40
-#define TEGRA_IO_RAIL_DSID	41
-#define TEGRA_IO_RAIL_CSIE	44
-#define TEGRA_IO_RAIL_LVDS	57
-#define TEGRA_IO_RAIL_SYS_DDC	58
-
-#ifdef CONFIG_ARCH_TEGRA
-int tegra_powergate_is_powered(int id);
-int tegra_powergate_power_on(int id);
-int tegra_powergate_power_off(int id);
-int tegra_powergate_remove_clamping(int id);
-
-/* Must be called with clk disabled, and returns with clk enabled */
-int tegra_powergate_sequence_power_up(int id, struct clk *clk,
-				      struct reset_control *rst);
-
-int tegra_io_rail_power_on(int id);
-int tegra_io_rail_power_off(int id);
-#else
-static inline int tegra_powergate_is_powered(int id)
-{
-	return -ENOSYS;
-}
-
-static inline int tegra_powergate_power_on(int id)
-{
-	return -ENOSYS;
-}
-
-static inline int tegra_powergate_power_off(int id)
-{
-	return -ENOSYS;
-}
-
-static inline int tegra_powergate_remove_clamping(int id)
-{
-	return -ENOSYS;
-}
-
-static inline int tegra_powergate_sequence_power_up(int id, struct clk *clk,
-						    struct reset_control *rst)
-{
-	return -ENOSYS;
-}
-
-static inline int tegra_io_rail_power_on(int id)
-{
-	return -ENOSYS;
-}
-
-static inline int tegra_io_rail_power_off(int id)
-{
-	return -ENOSYS;
-}
-#endif
-
-#endif /* _MACH_TEGRA_POWERGATE_H_ */
diff --git a/include/linux/tegra-soc.h b/include/linux/tegra-soc.h
deleted file mode 100644
index 95f611d78f3a..000000000000
--- a/include/linux/tegra-soc.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (c) 2012, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef __LINUX_TEGRA_SOC_H_
-#define __LINUX_TEGRA_SOC_H_
-
-u32 tegra_read_chipid(void);
-
-#endif /* __LINUX_TEGRA_SOC_H_ */
diff --git a/include/soc/tegra/ahb.h b/include/soc/tegra/ahb.h
new file mode 100644
index 000000000000..504eb6f957e5
--- /dev/null
+++ b/include/soc/tegra/ahb.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2012, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __SOC_TEGRA_AHB_H__
+#define __SOC_TEGRA_AHB_H__
+
+extern int tegra_ahb_enable_smmu(struct device_node *ahb);
+
+#endif /* __SOC_TEGRA_AHB_H__ */
diff --git a/include/soc/tegra/cpuidle.h b/include/soc/tegra/cpuidle.h
new file mode 100644
index 000000000000..ea04f4225638
--- /dev/null
+++ b/include/soc/tegra/cpuidle.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __SOC_TEGRA_CPUIDLE_H__
+#define __SOC_TEGRA_CPUIDLE_H__
+
+#ifdef CONFIG_CPU_IDLE
+void tegra_cpuidle_pcie_irqs_in_use(void);
+#else
+static inline void tegra_cpuidle_pcie_irqs_in_use(void)
+{
+}
+#endif
+
+#endif /* __SOC_TEGRA_CPUIDLE_H__ */
diff --git a/include/soc/tegra/fuse.h b/include/soc/tegra/fuse.h
new file mode 100644
index 000000000000..85f555c89ad5
--- /dev/null
+++ b/include/soc/tegra/fuse.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2012, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __SOC_TEGRA_FUSE_H__
+#define __SOC_TEGRA_FUSE_H__
+
+u32 tegra_read_chipid(void);
+
+#endif /* __SOC_TEGRA_FUSE_H__ */
diff --git a/include/soc/tegra/powergate.h b/include/soc/tegra/powergate.h
new file mode 100644
index 000000000000..c16912ed1a8d
--- /dev/null
+++ b/include/soc/tegra/powergate.h
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2010 Google, Inc
+ *
+ * Author:
+ *	Colin Cross <ccross@google.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __SOC_TEGRA_POWERGATE_H__
+#define __SOC_TEGRA_POWERGATE_H__
+
+struct clk;
+struct reset_control;
+
+#define TEGRA_POWERGATE_CPU	0
+#define TEGRA_POWERGATE_3D	1
+#define TEGRA_POWERGATE_VENC	2
+#define TEGRA_POWERGATE_PCIE	3
+#define TEGRA_POWERGATE_VDEC	4
+#define TEGRA_POWERGATE_L2	5
+#define TEGRA_POWERGATE_MPE	6
+#define TEGRA_POWERGATE_HEG	7
+#define TEGRA_POWERGATE_SATA	8
+#define TEGRA_POWERGATE_CPU1	9
+#define TEGRA_POWERGATE_CPU2	10
+#define TEGRA_POWERGATE_CPU3	11
+#define TEGRA_POWERGATE_CELP	12
+#define TEGRA_POWERGATE_3D1	13
+#define TEGRA_POWERGATE_CPU0	14
+#define TEGRA_POWERGATE_C0NC	15
+#define TEGRA_POWERGATE_C1NC	16
+#define TEGRA_POWERGATE_SOR	17
+#define TEGRA_POWERGATE_DIS	18
+#define TEGRA_POWERGATE_DISB	19
+#define TEGRA_POWERGATE_XUSBA	20
+#define TEGRA_POWERGATE_XUSBB	21
+#define TEGRA_POWERGATE_XUSBC	22
+#define TEGRA_POWERGATE_VIC	23
+#define TEGRA_POWERGATE_IRAM	24
+
+#define TEGRA_POWERGATE_3D0	TEGRA_POWERGATE_3D
+
+#define TEGRA_IO_RAIL_CSIA	0
+#define TEGRA_IO_RAIL_CSIB	1
+#define TEGRA_IO_RAIL_DSI	2
+#define TEGRA_IO_RAIL_MIPI_BIAS	3
+#define TEGRA_IO_RAIL_PEX_BIAS	4
+#define TEGRA_IO_RAIL_PEX_CLK1	5
+#define TEGRA_IO_RAIL_PEX_CLK2	6
+#define TEGRA_IO_RAIL_USB0	9
+#define TEGRA_IO_RAIL_USB1	10
+#define TEGRA_IO_RAIL_USB2	11
+#define TEGRA_IO_RAIL_USB_BIAS	12
+#define TEGRA_IO_RAIL_NAND	13
+#define TEGRA_IO_RAIL_UART	14
+#define TEGRA_IO_RAIL_BB	15
+#define TEGRA_IO_RAIL_AUDIO	17
+#define TEGRA_IO_RAIL_HSIC	19
+#define TEGRA_IO_RAIL_COMP	22
+#define TEGRA_IO_RAIL_HDMI	28
+#define TEGRA_IO_RAIL_PEX_CNTRL	32
+#define TEGRA_IO_RAIL_SDMMC1	33
+#define TEGRA_IO_RAIL_SDMMC3	34
+#define TEGRA_IO_RAIL_SDMMC4	35
+#define TEGRA_IO_RAIL_CAM	36
+#define TEGRA_IO_RAIL_RES	37
+#define TEGRA_IO_RAIL_HV	38
+#define TEGRA_IO_RAIL_DSIB	39
+#define TEGRA_IO_RAIL_DSIC	40
+#define TEGRA_IO_RAIL_DSID	41
+#define TEGRA_IO_RAIL_CSIE	44
+#define TEGRA_IO_RAIL_LVDS	57
+#define TEGRA_IO_RAIL_SYS_DDC	58
+
+#ifdef CONFIG_ARCH_TEGRA
+int tegra_powergate_is_powered(int id);
+int tegra_powergate_power_on(int id);
+int tegra_powergate_power_off(int id);
+int tegra_powergate_remove_clamping(int id);
+
+/* Must be called with clk disabled, and returns with clk enabled */
+int tegra_powergate_sequence_power_up(int id, struct clk *clk,
+				      struct reset_control *rst);
+
+int tegra_io_rail_power_on(int id);
+int tegra_io_rail_power_off(int id);
+#else
+static inline int tegra_powergate_is_powered(int id)
+{
+	return -ENOSYS;
+}
+
+static inline int tegra_powergate_power_on(int id)
+{
+	return -ENOSYS;
+}
+
+static inline int tegra_powergate_power_off(int id)
+{
+	return -ENOSYS;
+}
+
+static inline int tegra_powergate_remove_clamping(int id)
+{
+	return -ENOSYS;
+}
+
+static inline int tegra_powergate_sequence_power_up(int id, struct clk *clk,
+						    struct reset_control *rst)
+{
+	return -ENOSYS;
+}
+
+static inline int tegra_io_rail_power_on(int id)
+{
+	return -ENOSYS;
+}
+
+static inline int tegra_io_rail_power_off(int id)
+{
+	return -ENOSYS;
+}
+#endif
+
+#endif /* __SOC_TEGRA_POWERGATE_H__ */
-- 
cgit v1.2.3-59-g8ed1b


From a5152c8a125da3c5e16dc2208dd52e80f0803c5c Mon Sep 17 00:00:00 2001
From: Boris BREZILLON <boris.brezillon@free-electrons.com>
Date: Thu, 10 Jul 2014 19:14:16 +0200
Subject: genirq: generic chip: Export irq_map_generic_chip function

Export the generic irq map function in order to provide irq_domain ops with
generic mapping and specific of xlate function (needed by the new atmel
AIC driver).

Signed-off-by: Boris BREZILLON <boris.brezillon@free-electrons.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/1405012462-766-2-git-send-email-boris.brezillon@free-electrons.com
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
---
 include/linux/irq.h       | 2 ++
 kernel/irq/generic-chip.c | 5 +++--
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 0d998d8b01d8..62af59242ddc 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -771,6 +771,8 @@ void irq_gc_eoi(struct irq_data *d);
 int irq_gc_set_wake(struct irq_data *d, unsigned int on);
 
 /* Setup functions for irq_chip_generic */
+int irq_map_generic_chip(struct irq_domain *d, unsigned int virq,
+			 irq_hw_number_t hw_irq);
 struct irq_chip_generic *
 irq_alloc_generic_chip(const char *name, int nr_ct, unsigned int irq_base,
 		       void __iomem *reg_base, irq_flow_handler_t handler);
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c
index 452d6f2ba21d..cf80e7b0ddab 100644
--- a/kernel/irq/generic-chip.c
+++ b/kernel/irq/generic-chip.c
@@ -341,8 +341,8 @@ static struct lock_class_key irq_nested_lock_class;
 /*
  * irq_map_generic_chip - Map a generic chip for an irq domain
  */
-static int irq_map_generic_chip(struct irq_domain *d, unsigned int virq,
-				irq_hw_number_t hw_irq)
+int irq_map_generic_chip(struct irq_domain *d, unsigned int virq,
+			 irq_hw_number_t hw_irq)
 {
 	struct irq_data *data = irq_get_irq_data(virq);
 	struct irq_domain_chip_generic *dgc = d->gc;
@@ -394,6 +394,7 @@ static int irq_map_generic_chip(struct irq_domain *d, unsigned int virq,
 	irq_modify_status(virq, dgc->irq_flags_to_clear, dgc->irq_flags_to_set);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(irq_map_generic_chip);
 
 struct irq_domain_ops irq_generic_chip_ops = {
 	.map	= irq_map_generic_chip,
-- 
cgit v1.2.3-59-g8ed1b


From 32c4741cb66703a3c282f41d77deff4afd93342a Mon Sep 17 00:00:00 2001
From: Dmitry Kasatkin <d.kasatkin@samsung.com>
Date: Tue, 17 Jun 2014 11:56:59 +0300
Subject: KEYS: validate certificate trust only with builtin keys

Instead of allowing public keys, with certificates signed by any
key on the system trusted keyring, to be added to a trusted keyring,
this patch further restricts the certificates to those signed only by
builtin keys on the system keyring.

This patch defines a new option 'builtin' for the kernel parameter
'keys_ownerid' to allow trust validation using builtin keys.

Simplified Mimi's "KEYS: define an owner trusted keyring" patch

Changelog v7:
- rename builtin_keys to use_builtin_keys

Signed-off-by: Dmitry Kasatkin <d.kasatkin@samsung.com>
Signed-off-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
---
 Documentation/kernel-parameters.txt      | 2 +-
 crypto/asymmetric_keys/x509_public_key.c | 9 ++++++---
 include/linux/key.h                      | 1 +
 kernel/system_keyring.c                  | 1 +
 4 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index bdb193afe176..90c12c591168 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -569,7 +569,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 	ca_keys=	[KEYS] This parameter identifies a specific key(s) on
 			the system trusted keyring to be used for certificate
 			trust validation.
-			format: id:<keyid>
+			format: { id:<keyid> | builtin }
 
 	ccw_timeout_log [S390]
 			See Documentation/s390/CommonIO for details.
diff --git a/crypto/asymmetric_keys/x509_public_key.c b/crypto/asymmetric_keys/x509_public_key.c
index d376195e1d08..927ce755ff67 100644
--- a/crypto/asymmetric_keys/x509_public_key.c
+++ b/crypto/asymmetric_keys/x509_public_key.c
@@ -24,6 +24,7 @@
 #include "public_key.h"
 #include "x509_parser.h"
 
+static bool use_builtin_keys;
 static char *ca_keyid;
 
 #ifndef MODULE
@@ -34,6 +35,8 @@ static int __init ca_keys_setup(char *str)
 
 	if (strncmp(str, "id:", 3) == 0)
 		ca_keyid = str;	/* owner key 'id:xxxxxx' */
+	else if (strcmp(str, "builtin") == 0)
+		use_builtin_keys = true;
 
 	return 1;
 }
@@ -180,7 +183,6 @@ EXPORT_SYMBOL_GPL(x509_check_signature);
 static int x509_validate_trust(struct x509_certificate *cert,
 			       struct key *trust_keyring)
 {
-	const struct public_key *pk;
 	struct key *key;
 	int ret = 1;
 
@@ -195,8 +197,9 @@ static int x509_validate_trust(struct x509_certificate *cert,
 					  cert->authority,
 					  strlen(cert->authority));
 	if (!IS_ERR(key))  {
-		pk = key->payload.data;
-		ret = x509_check_signature(pk, cert);
+		if (!use_builtin_keys
+		    || test_bit(KEY_FLAG_BUILTIN, &key->flags))
+			ret = x509_check_signature(key->payload.data, cert);
 		key_put(key);
 	}
 	return ret;
diff --git a/include/linux/key.h b/include/linux/key.h
index 017b0826642f..65316f7ae794 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -170,6 +170,7 @@ struct key {
 #define KEY_FLAG_INVALIDATED	7	/* set if key has been invalidated */
 #define KEY_FLAG_TRUSTED	8	/* set if key is trusted */
 #define KEY_FLAG_TRUSTED_ONLY	9	/* set if keyring only accepts links to trusted keys */
+#define KEY_FLAG_BUILTIN	10	/* set if key is builtin */
 
 	/* the key type and key description string
 	 * - the desc is used to match a key against search criteria
diff --git a/kernel/system_keyring.c b/kernel/system_keyring.c
index 52ebc70263f4..875f64e8935b 100644
--- a/kernel/system_keyring.c
+++ b/kernel/system_keyring.c
@@ -89,6 +89,7 @@ static __init int load_system_certificate_list(void)
 			pr_err("Problem loading in-kernel X.509 certificate (%ld)\n",
 			       PTR_ERR(key));
 		} else {
+			set_bit(KEY_FLAG_BUILTIN, &key_ref_to_ptr(key)->flags);
 			pr_notice("Loaded X.509 cert '%s'\n",
 				  key_ref_to_ptr(key)->description);
 			key_ref_put(key);
-- 
cgit v1.2.3-59-g8ed1b


From 1b2f121c1418249e56048d816754b479b3cb6fb3 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Wed, 25 Jun 2014 10:39:46 -0400
Subject: ftrace-graph: Remove dependency of ftrace_stop() from
 ftrace_graph_stop()

ftrace_stop() is going away as it disables parts of function tracing
that affects users that should not be affected. But ftrace_graph_stop()
is built on ftrace_stop(). Here's another example of killing all of
function tracing because something went wrong with function graph
tracing.

Instead of disabling all users of function tracing on function graph
error, disable only function graph tracing.

A new function is created called ftrace_graph_is_dead(). This is called
in strategic paths to prevent function graph from doing more harm and
allowing at least a warning to be printed before the system crashes.

NOTE: ftrace_stop() is still used until all the archs are converted over
to use ftrace_graph_is_dead(). After that, ftrace_stop() will be removed.

Reviewed-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h               |  1 +
 kernel/trace/ftrace.c                |  5 -----
 kernel/trace/trace_functions_graph.c | 35 +++++++++++++++++++++++++++++++++++
 3 files changed, 36 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 4807a39e7ae1..18fb2c4a3f7f 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -760,6 +760,7 @@ extern char __irqentry_text_end[];
 extern int register_ftrace_graph(trace_func_graph_ret_t retfunc,
 				trace_func_graph_ent_t entryfunc);
 
+extern bool ftrace_graph_is_dead(void);
 extern void ftrace_graph_stop(void);
 
 /* The current handlers in use */
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 1776153ea6e0..8063280fd53d 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5473,9 +5473,4 @@ void ftrace_graph_exit_task(struct task_struct *t)
 
 	kfree(ret_stack);
 }
-
-void ftrace_graph_stop(void)
-{
-	ftrace_stop();
-}
 #endif
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 4de3e57f723c..3604690be70b 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -15,6 +15,38 @@
 #include "trace.h"
 #include "trace_output.h"
 
+static bool kill_ftrace_graph;
+
+/**
+ * ftrace_graph_is_dead - returns true if ftrace_graph_stop() was called
+ *
+ * ftrace_graph_stop() is called when a severe error is detected in
+ * the function graph tracing. This function is called by the critical
+ * paths of function graph to keep those paths from doing any more harm.
+ */
+bool ftrace_graph_is_dead(void)
+{
+	return kill_ftrace_graph;
+}
+
+/**
+ * ftrace_graph_stop - set to permanently disable function graph tracincg
+ *
+ * In case of an error int function graph tracing, this is called
+ * to try to keep function graph tracing from causing any more harm.
+ * Usually this is pretty severe and this is called to try to at least
+ * get a warning out to the user.
+ */
+void ftrace_graph_stop(void)
+{
+	kill_ftrace_graph = true;
+	/*
+	 * ftrace_stop() will be removed when all archs are updated to
+	 * use ftrace_graph_is_dead()
+	 */
+	ftrace_stop();
+}
+
 /* When set, irq functions will be ignored */
 static int ftrace_graph_skip_irqs;
 
@@ -92,6 +124,9 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
 	unsigned long long calltime;
 	int index;
 
+	if (unlikely(ftrace_graph_is_dead()))
+		return -EBUSY;
+
 	if (!current->ret_stack)
 		return -EBUSY;
 
-- 
cgit v1.2.3-59-g8ed1b


From 0c7774abb41bd00d5836d9ba098825a40fa94133 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 17 Jul 2014 20:45:08 +0100
Subject: KEYS: Allow special keys (eg. DNS results) to be invalidated by
 CAP_SYS_ADMIN

Special kernel keys, such as those used to hold DNS results for AFS, CIFS and
NFS and those used to hold idmapper results for NFS, used to be
'invalidateable' with key_revoke().  However, since the default permissions for
keys were reduced:

	Commit: 96b5c8fea6c0861621051290d705ec2e971963f1
	KEYS: Reduce initial permissions on keys

it has become impossible to do this.

Add a key flag (KEY_FLAG_ROOT_CAN_INVAL) that will permit a key to be
invalidated by root.  This should not be used for system keyrings as the
garbage collector will try and remove any invalidate key.  For system keyrings,
KEY_FLAG_ROOT_CAN_CLEAR can be used instead.

After this, from userspace, keyctl_invalidate() and "keyctl invalidate" can be
used by any possessor of CAP_SYS_ADMIN (typically root) to invalidate DNS and
idmapper keys.  Invalidated keys are immediately garbage collected and will be
immediately rerequested if needed again.

Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Steve Dickson <steved@redhat.com>
---
 fs/nfs/idmap.c               |  2 ++
 include/linux/key.h          |  1 +
 net/dns_resolver/dns_query.c |  1 +
 security/keys/keyctl.c       | 15 ++++++++++++++-
 4 files changed, 18 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 567983d2c0eb..b7458d77f511 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -282,6 +282,8 @@ static struct key *nfs_idmap_request_key(const char *name, size_t namelen,
 						desc, "", 0, idmap);
 		mutex_unlock(&idmap->idmap_mutex);
 	}
+	if (!IS_ERR(rkey))
+		set_bit(KEY_FLAG_ROOT_CAN_INVAL, &rkey->flags);
 
 	kfree(desc);
 	return rkey;
diff --git a/include/linux/key.h b/include/linux/key.h
index 017b0826642f..e37a4d807185 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -170,6 +170,7 @@ struct key {
 #define KEY_FLAG_INVALIDATED	7	/* set if key has been invalidated */
 #define KEY_FLAG_TRUSTED	8	/* set if key is trusted */
 #define KEY_FLAG_TRUSTED_ONLY	9	/* set if keyring only accepts links to trusted keys */
+#define KEY_FLAG_ROOT_CAN_INVAL	11	/* set if key can be invalidated by root without permission */
 
 	/* the key type and key description string
 	 * - the desc is used to match a key against search criteria
diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c
index 9acec61f5433..9a32f55cf9b9 100644
--- a/net/dns_resolver/dns_query.c
+++ b/net/dns_resolver/dns_query.c
@@ -129,6 +129,7 @@ int dns_query(const char *type, const char *name, size_t namelen,
 	}
 
 	down_read(&rkey->sem);
+	set_bit(KEY_FLAG_ROOT_CAN_INVAL, &rkey->flags);
 	rkey->perm |= KEY_USR_VIEW;
 
 	ret = key_validate(rkey);
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index cd5bd0cef25d..609f8d326ddc 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -404,12 +404,25 @@ long keyctl_invalidate_key(key_serial_t id)
 	key_ref = lookup_user_key(id, 0, KEY_NEED_SEARCH);
 	if (IS_ERR(key_ref)) {
 		ret = PTR_ERR(key_ref);
+
+		/* Root is permitted to invalidate certain special keys */
+		if (capable(CAP_SYS_ADMIN)) {
+			key_ref = lookup_user_key(id, 0, 0);
+			if (IS_ERR(key_ref))
+				goto error;
+			if (test_bit(KEY_FLAG_ROOT_CAN_INVAL,
+				     &key_ref_to_ptr(key_ref)->flags))
+				goto invalidate;
+			goto error_put;
+		}
+
 		goto error;
 	}
 
+invalidate:
 	key_invalidate(key_ref_to_ptr(key_ref));
 	ret = 0;
-
+error_put:
 	key_ref_put(key_ref);
 error:
 	kleave(" = %ld", ret);
-- 
cgit v1.2.3-59-g8ed1b


From b4210b810e5040f10a30ba56de6c3faab5c49345 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Wed, 25 Jun 2014 15:27:37 +0200
Subject: Add module param type 'ullong'

Some driver might want to pass in an 64-bit value, so introduce
a module param type 'ullong'.

Signed-off-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Christoph Hellwig <hch@infradead.org>
Reviewed-by: Ewan Milne <emilne@redhat.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/moduleparam.h | 5 +++++
 kernel/params.c             | 1 +
 2 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index b1990c5524e1..494f99e852da 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -381,6 +381,11 @@ extern int param_set_ulong(const char *val, const struct kernel_param *kp);
 extern int param_get_ulong(char *buffer, const struct kernel_param *kp);
 #define param_check_ulong(name, p) __param_check(name, p, unsigned long)
 
+extern struct kernel_param_ops param_ops_ullong;
+extern int param_set_ullong(const char *val, const struct kernel_param *kp);
+extern int param_get_ullong(char *buffer, const struct kernel_param *kp);
+#define param_check_ullong(name, p) __param_check(name, p, unsigned long long)
+
 extern struct kernel_param_ops param_ops_charp;
 extern int param_set_charp(const char *val, const struct kernel_param *kp);
 extern int param_get_charp(char *buffer, const struct kernel_param *kp);
diff --git a/kernel/params.c b/kernel/params.c
index 1e52ca233fd9..34f527023794 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -256,6 +256,7 @@ STANDARD_PARAM_DEF(int, int, "%i", kstrtoint);
 STANDARD_PARAM_DEF(uint, unsigned int, "%u", kstrtouint);
 STANDARD_PARAM_DEF(long, long, "%li", kstrtol);
 STANDARD_PARAM_DEF(ulong, unsigned long, "%lu", kstrtoul);
+STANDARD_PARAM_DEF(ullong, unsigned long long, "%llu", kstrtoull);
 
 int param_set_charp(const char *val, const struct kernel_param *kp)
 {
-- 
cgit v1.2.3-59-g8ed1b


From ae4b884fc6316b3190be19448cea24b020c1cad6 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@primarydata.com>
Date: Tue, 15 Jul 2014 12:59:36 -0400
Subject: nfsd: silence sparse warning about accessing credentials

sparse says:

    fs/nfsd/auth.c:31:38: warning: incorrect type in argument 1 (different address spaces)
    fs/nfsd/auth.c:31:38:    expected struct cred const *cred
    fs/nfsd/auth.c:31:38:    got struct cred const [noderef] <asn:4>*real_cred

Add a new accessor for the ->real_cred and use that to fetch the
pointer. Accessing current->real_cred directly is actually quite safe
since we know that they can't go away so this is mostly a cosmetic fixup
to silence sparse.

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/auth.c       | 2 +-
 include/linux/cred.h | 9 +++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index 72f44823adbb..9d46a0bdd9f9 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -28,7 +28,7 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
 	validate_process_creds();
 
 	/* discard any old override before preparing the new set */
-	revert_creds(get_cred(current->real_cred));
+	revert_creds(get_cred(current_real_cred()));
 	new = prepare_creds();
 	if (!new)
 		return -ENOMEM;
diff --git a/include/linux/cred.h b/include/linux/cred.h
index f61d6c8f5ef3..b2d0820837c4 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -258,6 +258,15 @@ static inline void put_cred(const struct cred *_cred)
 #define current_cred() \
 	rcu_dereference_protected(current->cred, 1)
 
+/**
+ * current_real_cred - Access the current task's objective credentials
+ *
+ * Access the objective credentials of the current task.  RCU-safe,
+ * since nobody else can modify it.
+ */
+#define current_real_cred() \
+	rcu_dereference_protected(current->real_cred, 1)
+
 /**
  * __task_cred - Access a task's objective credentials
  * @task: The task to query
-- 
cgit v1.2.3-59-g8ed1b


From b1261c86fe238cc0da3f5dc837a38a0c39f3e7c4 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 14 Jul 2014 14:26:14 +0300
Subject: serial: 8250: introduce up_to_u8250p() helper

It helps to cast struct uart_port to struct uart_8250_port at runtime.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_core.c | 57 +++++++++++++------------------------
 drivers/tty/serial/8250/8250_dw.c   |  8 ++++--
 drivers/tty/serial/8250/8250_fsl.c  |  3 +-
 drivers/tty/serial/8250/8250_pci.c  |  6 ++--
 include/linux/serial_8250.h         |  5 ++++
 5 files changed, 32 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
index 7a91c6d1eb7d..0da01458816e 100644
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -439,8 +439,7 @@ static int exar_handle_irq(struct uart_port *port);
 
 static void set_io_from_upio(struct uart_port *p)
 {
-	struct uart_8250_port *up =
-		container_of(p, struct uart_8250_port, port);
+	struct uart_8250_port *up = up_to_u8250p(p);
 
 	up->dl_read = default_serial_dl_read;
 	up->dl_write = default_serial_dl_write;
@@ -1277,8 +1276,7 @@ static inline void __stop_tx(struct uart_8250_port *p)
 
 static void serial8250_stop_tx(struct uart_port *port)
 {
-	struct uart_8250_port *up =
-		container_of(port, struct uart_8250_port, port);
+	struct uart_8250_port *up = up_to_u8250p(port);
 
 	__stop_tx(up);
 
@@ -1293,8 +1291,7 @@ static void serial8250_stop_tx(struct uart_port *port)
 
 static void serial8250_start_tx(struct uart_port *port)
 {
-	struct uart_8250_port *up =
-		container_of(port, struct uart_8250_port, port);
+	struct uart_8250_port *up = up_to_u8250p(port);
 
 	if (up->dma && !serial8250_tx_dma(up)) {
 		return;
@@ -1322,8 +1319,7 @@ static void serial8250_start_tx(struct uart_port *port)
 
 static void serial8250_stop_rx(struct uart_port *port)
 {
-	struct uart_8250_port *up =
-		container_of(port, struct uart_8250_port, port);
+	struct uart_8250_port *up = up_to_u8250p(port);
 
 	up->ier &= ~UART_IER_RLSI;
 	up->port.read_status_mask &= ~UART_LSR_DR;
@@ -1332,8 +1328,7 @@ static void serial8250_stop_rx(struct uart_port *port)
 
 static void serial8250_enable_ms(struct uart_port *port)
 {
-	struct uart_8250_port *up =
-		container_of(port, struct uart_8250_port, port);
+	struct uart_8250_port *up = up_to_u8250p(port);
 
 	/* no MSR capabilities */
 	if (up->bugs & UART_BUG_NOMSR)
@@ -1499,8 +1494,7 @@ int serial8250_handle_irq(struct uart_port *port, unsigned int iir)
 {
 	unsigned char status;
 	unsigned long flags;
-	struct uart_8250_port *up =
-		container_of(port, struct uart_8250_port, port);
+	struct uart_8250_port *up = up_to_u8250p(port);
 	int dma_err = 0;
 
 	if (iir & UART_IIR_NO_INT)
@@ -1785,8 +1779,7 @@ static void serial8250_backup_timeout(unsigned long data)
 
 static unsigned int serial8250_tx_empty(struct uart_port *port)
 {
-	struct uart_8250_port *up =
-		container_of(port, struct uart_8250_port, port);
+	struct uart_8250_port *up = up_to_u8250p(port);
 	unsigned long flags;
 	unsigned int lsr;
 
@@ -1800,8 +1793,7 @@ static unsigned int serial8250_tx_empty(struct uart_port *port)
 
 static unsigned int serial8250_get_mctrl(struct uart_port *port)
 {
-	struct uart_8250_port *up =
-		container_of(port, struct uart_8250_port, port);
+	struct uart_8250_port *up = up_to_u8250p(port);
 	unsigned int status;
 	unsigned int ret;
 
@@ -1821,8 +1813,7 @@ static unsigned int serial8250_get_mctrl(struct uart_port *port)
 
 static void serial8250_set_mctrl(struct uart_port *port, unsigned int mctrl)
 {
-	struct uart_8250_port *up =
-		container_of(port, struct uart_8250_port, port);
+	struct uart_8250_port *up = up_to_u8250p(port);
 	unsigned char mcr = 0;
 
 	if (mctrl & TIOCM_RTS)
@@ -1843,8 +1834,7 @@ static void serial8250_set_mctrl(struct uart_port *port, unsigned int mctrl)
 
 static void serial8250_break_ctl(struct uart_port *port, int break_state)
 {
-	struct uart_8250_port *up =
-		container_of(port, struct uart_8250_port, port);
+	struct uart_8250_port *up = up_to_u8250p(port);
 	unsigned long flags;
 
 	spin_lock_irqsave(&port->lock, flags);
@@ -1911,8 +1901,7 @@ static void serial8250_put_poll_char(struct uart_port *port,
 			 unsigned char c)
 {
 	unsigned int ier;
-	struct uart_8250_port *up =
-		container_of(port, struct uart_8250_port, port);
+	struct uart_8250_port *up = up_to_u8250p(port);
 
 	/*
 	 *	First save the IER then disable the interrupts
@@ -1941,8 +1930,7 @@ static void serial8250_put_poll_char(struct uart_port *port,
 
 static int serial8250_startup(struct uart_port *port)
 {
-	struct uart_8250_port *up =
-		container_of(port, struct uart_8250_port, port);
+	struct uart_8250_port *up = up_to_u8250p(port);
 	unsigned long flags;
 	unsigned char lsr, iir;
 	int retval;
@@ -2194,8 +2182,7 @@ dont_test_tx_en:
 
 static void serial8250_shutdown(struct uart_port *port)
 {
-	struct uart_8250_port *up =
-		container_of(port, struct uart_8250_port, port);
+	struct uart_8250_port *up = up_to_u8250p(port);
 	unsigned long flags;
 
 	/*
@@ -2268,8 +2255,7 @@ void
 serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios,
 		          struct ktermios *old)
 {
-	struct uart_8250_port *up =
-		container_of(port, struct uart_8250_port, port);
+	struct uart_8250_port *up = up_to_u8250p(port);
 	unsigned char cval, fcr = 0;
 	unsigned long flags;
 	unsigned int baud, quot;
@@ -2498,8 +2484,7 @@ serial8250_set_ldisc(struct uart_port *port, int new)
 void serial8250_do_pm(struct uart_port *port, unsigned int state,
 		      unsigned int oldstate)
 {
-	struct uart_8250_port *p =
-		container_of(port, struct uart_8250_port, port);
+	struct uart_8250_port *p = up_to_u8250p(port);
 
 	serial8250_set_sleep(p, state != 0);
 }
@@ -2630,8 +2615,7 @@ static void serial8250_release_rsa_resource(struct uart_8250_port *up)
 
 static void serial8250_release_port(struct uart_port *port)
 {
-	struct uart_8250_port *up =
-		container_of(port, struct uart_8250_port, port);
+	struct uart_8250_port *up = up_to_u8250p(port);
 
 	serial8250_release_std_resource(up);
 	if (port->type == PORT_RSA)
@@ -2640,8 +2624,7 @@ static void serial8250_release_port(struct uart_port *port)
 
 static int serial8250_request_port(struct uart_port *port)
 {
-	struct uart_8250_port *up =
-		container_of(port, struct uart_8250_port, port);
+	struct uart_8250_port *up = up_to_u8250p(port);
 	int ret;
 
 	if (port->type == PORT_8250_CIR)
@@ -2659,8 +2642,7 @@ static int serial8250_request_port(struct uart_port *port)
 
 static void serial8250_config_port(struct uart_port *port, int flags)
 {
-	struct uart_8250_port *up =
-		container_of(port, struct uart_8250_port, port);
+	struct uart_8250_port *up = up_to_u8250p(port);
 	int probeflags = PROBE_ANY;
 	int ret;
 
@@ -2859,8 +2841,7 @@ serial8250_register_ports(struct uart_driver *drv, struct device *dev)
 
 static void serial8250_console_putchar(struct uart_port *port, int ch)
 {
-	struct uart_8250_port *up =
-		container_of(port, struct uart_8250_port, port);
+	struct uart_8250_port *up = up_to_u8250p(port);
 
 	wait_for_xmitr(up, UART_LSR_THRE);
 	serial_port_out(port, UART_TX, ch);
diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c
index c531fa42f838..affdcb192aed 100644
--- a/drivers/tty/serial/8250/8250_dw.c
+++ b/drivers/tty/serial/8250/8250_dw.c
@@ -84,8 +84,9 @@ static inline int dw8250_modify_msr(struct uart_port *p, int offset, int value)
 
 static void dw8250_force_idle(struct uart_port *p)
 {
-	serial8250_clear_and_reinit_fifos(container_of
-					  (p, struct uart_8250_port, port));
+	struct uart_8250_port *up = up_to_u8250p(p);
+
+	serial8250_clear_and_reinit_fifos(up);
 	(void)p->serial_in(p, UART_RX);
 }
 
@@ -255,6 +256,7 @@ static int dw8250_probe_of(struct uart_port *p,
 			   struct dw8250_data *data)
 {
 	struct device_node	*np = p->dev->of_node;
+	struct uart_8250_port *up = up_to_u8250p(p);
 	u32			val;
 	bool has_ucv = true;
 
@@ -287,7 +289,7 @@ static int dw8250_probe_of(struct uart_port *p,
 		}
 	}
 	if (has_ucv)
-		dw8250_setup_port(container_of(p, struct uart_8250_port, port));
+		dw8250_setup_port(up);
 
 	if (!of_property_read_u32(np, "reg-shift", &val))
 		p->regshift = val;
diff --git a/drivers/tty/serial/8250/8250_fsl.c b/drivers/tty/serial/8250/8250_fsl.c
index f4d3c47b88e8..c0533a57ec53 100644
--- a/drivers/tty/serial/8250/8250_fsl.c
+++ b/drivers/tty/serial/8250/8250_fsl.c
@@ -28,8 +28,7 @@ int fsl8250_handle_irq(struct uart_port *port)
 	unsigned char lsr, orig_lsr;
 	unsigned long flags;
 	unsigned int iir;
-	struct uart_8250_port *up =
-		container_of(port, struct uart_8250_port, port);
+	struct uart_8250_port *up = up_to_u8250p(port);
 
 	spin_lock_irqsave(&up->port.lock, flags);
 
diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c
index 33137b3ba94d..61830b1792eb 100644
--- a/drivers/tty/serial/8250/8250_pci.c
+++ b/drivers/tty/serial/8250/8250_pci.c
@@ -1581,8 +1581,7 @@ static int skip_tx_en_setup(struct serial_private *priv,
 
 static void kt_handle_break(struct uart_port *p)
 {
-	struct uart_8250_port *up =
-		container_of(p, struct uart_8250_port, port);
+	struct uart_8250_port *up = up_to_u8250p(p);
 	/*
 	 * On receipt of a BI, serial device in Intel ME (Intel
 	 * management engine) needs to have its fifos cleared for sane
@@ -1593,8 +1592,7 @@ static void kt_handle_break(struct uart_port *p)
 
 static unsigned int kt_serial_in(struct uart_port *p, int offset)
 {
-	struct uart_8250_port *up =
-		container_of(p, struct uart_8250_port, port);
+	struct uart_8250_port *up = up_to_u8250p(p);
 	unsigned int val;
 
 	/*
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index af47a8af6024..730ab4b3d686 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -100,6 +100,11 @@ struct uart_8250_port {
 	void			(*dl_write)(struct uart_8250_port *, int);
 };
 
+static inline struct uart_8250_port *up_to_u8250p(struct uart_port *up)
+{
+	return container_of(up, struct uart_8250_port, port);
+}
+
 int serial8250_register_8250_port(struct uart_8250_port *);
 void serial8250_unregister_port(int line);
 void serial8250_suspend_port(int line);
-- 
cgit v1.2.3-59-g8ed1b


From 266dcff03eed0050b6af11aaf2a61ab837d7ba3f Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 16 Jul 2014 01:19:34 +0000
Subject: Serial: allow port drivers to have a default attribute group

Some serial drivers (like 8250), want to add sysfs files.  We need to do
so in a race-free way, so allow any port to be able to specify an
attribute group that should be added at device creation time.

Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yoshihiro YUNOMAE <yoshihiro.yunomae.ez@hitachi.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/serial_core.c | 24 +++++++++++++++++-------
 include/linux/serial_core.h      |  2 ++
 2 files changed, 19 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index b70095e55df6..61529a84c3fc 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -2564,12 +2564,6 @@ static const struct attribute_group tty_dev_attr_group = {
 	.attrs = tty_dev_attrs,
 	};
 
-static const struct attribute_group *tty_dev_attr_groups[] = {
-	&tty_dev_attr_group,
-	NULL
-	};
-
-
 /**
  *	uart_add_one_port - attach a driver-defined port structure
  *	@drv: pointer to the uart low level driver structure for this port
@@ -2586,6 +2580,7 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *uport)
 	struct tty_port *port;
 	int ret = 0;
 	struct device *tty_dev;
+	int num_groups;
 
 	BUG_ON(in_interrupt());
 
@@ -2619,12 +2614,26 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *uport)
 
 	uart_configure_port(drv, state, uport);
 
+	num_groups = 2;
+	if (uport->attr_group)
+		num_groups++;
+
+	uport->tty_groups = kcalloc(num_groups, sizeof(**uport->tty_groups),
+				    GFP_KERNEL);
+	if (!uport->tty_groups) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	uport->tty_groups[0] = &tty_dev_attr_group;
+	if (uport->attr_group)
+		uport->tty_groups[1] = uport->attr_group;
+
 	/*
 	 * Register the port whether it's detected or not.  This allows
 	 * setserial to be used to alter this port's parameters.
 	 */
 	tty_dev = tty_port_register_device_attr(port, drv->tty_driver,
-			uport->line, uport->dev, port, tty_dev_attr_groups);
+			uport->line, uport->dev, port, uport->tty_groups);
 	if (likely(!IS_ERR(tty_dev))) {
 		device_set_wakeup_capable(tty_dev, 1);
 	} else {
@@ -2703,6 +2712,7 @@ int uart_remove_one_port(struct uart_driver *drv, struct uart_port *uport)
 	 */
 	if (uport->type != PORT_UNKNOWN)
 		uport->ops->release_port(uport);
+	kfree(uport->tty_groups);
 
 	/*
 	 * Indicate that there isn't a port here anymore.
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 5bbb809ee197..cf3a1e789bf5 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -199,6 +199,8 @@ struct uart_port {
 	unsigned char		suspended;
 	unsigned char		irq_wake;
 	unsigned char		unused[2];
+	struct attribute_group	*attr_group;		/* port specific attributes */
+	const struct attribute_group **tty_groups;	/* all attributes (serial core use only) */
 	void			*private_data;		/* generic platform data pointer */
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From aef9a7bd9b676f797dd5cefd43deb30d36b976a9 Mon Sep 17 00:00:00 2001
From: Yoshihiro YUNOMAE <yoshihiro.yunomae.ez@hitachi.com>
Date: Wed, 16 Jul 2014 01:19:36 +0000
Subject: serial/uart/8250: Add tunable RX interrupt trigger I/F of FIFO
 buffers

Add tunable RX interrupt trigger I/F of FIFO buffers.

Serial devices are used as not only message communication devices but control
or sending communication devices. For the latter uses, normally small data
will be exchanged, so user applications want to receive data unit as soon as
possible for real-time tendency. If we have a sensor which sends a 1 byte data
each time and must control a device based on the sensor feedback, the RX
interrupt should be triggered for each data.

According to HW specification of serial UART devices, RX interrupt trigger
can be changed, but the trigger is hard-coded. For example, RX interrupt trigger
in 16550A can be set to 1, 4, 8, or 14 bytes for HW, but current driver sets
the trigger to only 8bytes.

This patch makes some devices change RX interrupt trigger from userland.

<How to use>
- Read current setting
 # cat /sys/class/tty/ttyS0/rx_trig_bytes
 8

- Write user setting
 # echo 1 > /sys/class/tty/ttyS0/rx_trig_bytes
 # cat /sys/class/tty/ttyS0/rx_trig_bytes
 1

<Support uart devices>
- 16550A and Tegra (1, 4, 8, or 14 bytes)
- 16650V2 (8, 16, 24, or 28 bytes)
- 16654 (8, 16, 56, or 60 bytes)
- 16750 (1, 16, 32, or 56 bytes)

<Change log>
Changes in V9:
 - Use attr_group instead of dev_spec_attr_group of uart_port structure

Changes in V8:
 - Divide this patch from V7's patch based on Greg's comment

Changes in V7:
 - Add Documentation
 - Change I/F name from rx_int_trig to rx_trig_bytes because the name
   rx_int_trig is hard to understand how users specify the value

Changes in V6:
 - Move FCR_RX_TRIG_* definition in 8250.h to include/uapi/linux/serial_reg.h,
   rename those to UART_FCR_R_TRIG_*, and use UART_FCR_TRIGGER_MASK to
   UART_FCR_R_TRIG_BITS()
 - Change following function names:
    convert_fcr2val() => fcr_get_rxtrig_bytes()
    convert_val2rxtrig() => bytes_to_fcr_rxtrig()
 - Fix typo in serial8250_do_set_termios()
 - Delete the verbose error message pr_info() in bytes_to_fcr_rxtrig()
 - Rename *rx_int_trig/rx_trig* to *rxtrig* for several functions or variables
   (but UI remains rx_int_trig)
 - Change the meaningless variable name 'val' to 'bytes' following functions:
    fcr_get_rxtrig_bytes(), bytes_to_fcr_rxtrig(), do_set_rxtrig(),
    do_serial8250_set_rxtrig(), and serial8250_set_attr_rxtrig()
 - Use up->fcr in order to get rxtrig_bytes instead of rx_trig_raw in
   fcr_get_rxtrig_bytes()
 - Use conf_type->rxtrig_bytes[0] instead of switch statement for support check
   in register_dev_spec_attr_grp()
 - Delete the checking whether a user changed FCR or not when minimum buffer
   is needed in serial8250_do_set_termios()

Changes in V5.1:
 - Fix FCR_RX_TRIG_MAX_STATE definition

Changes in V5:
 - Support Tegra, 16650V2, 16654, and 16750
 - Store default FCR value to up->fcr when the port is first created
 - Add rx_trig_byte[] in uart_config[] for each device and use rx_trig_byte[]
   in convert_fcr2val() and convert_val2rxtrig()

Changes in V4:
 - Introduce fifo_bug flag in uart_8250_port structure
   This is enabled only when parity is enabled and UART_BUG_PARITY is enabled
   for up->bugs. If this flag is enabled, user cannot set RX trigger.
 - Return -EOPNOTSUPP when it does not support device at convert_fcr2val() and
   at convert_val2rxtrig()
 - Set the nearest lower RX trigger when users input a meaningless value at
   convert_val2rxtrig()
 - Check whether p->fcr is existing at serial8250_clear_and_reinit_fifos()
 - Set fcr = up->fcr in the begging of serial8250_do_set_termios()

Changes in V3:
 - Change I/F from ioctl(2) to sysfs(rx_int_trig)

Changed in V2:
 - Use _IOW for TIOCSFIFORTRIG definition
 - Pass the interrupt trigger value itself

Signed-off-by: Yoshihiro YUNOMAE <yoshihiro.yunomae.ez@hitachi.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/ABI/testing/sysfs-tty |  16 ++++
 drivers/tty/serial/8250/8250.h      |   2 +
 drivers/tty/serial/8250/8250_core.c | 173 ++++++++++++++++++++++++++++++++----
 include/linux/serial_8250.h         |   2 +
 include/uapi/linux/serial_reg.h     |   5 ++
 5 files changed, 183 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-tty b/Documentation/ABI/testing/sysfs-tty
index ad22fb0ee765..9eb3c2b6b040 100644
--- a/Documentation/ABI/testing/sysfs-tty
+++ b/Documentation/ABI/testing/sysfs-tty
@@ -138,3 +138,19 @@ Description:
 
 		 These sysfs values expose the TIOCGSERIAL interface via
 		 sysfs rather than via ioctls.
+
+What:		/sys/class/tty/ttyS0/rx_trig_bytes
+Date:		May 2014
+Contact:	Yoshihiro YUNOMAE <yoshihiro.yunomae.ez@hitachi.com>
+Description:
+		 Shows current RX interrupt trigger bytes or sets the
+		 user specified value to change it for the FIFO buffer.
+		 Users can show or set this value regardless of opening the
+		 serial device file or not.
+
+		 The RX trigger can be set one of four kinds of values for UART
+		 serials. When users input a meaning less value to this I/F,
+		 the RX trigger is changed to the nearest lower value for the
+		 device specification. For example, when user sets 7bytes on
+		 16550A, which has 1/4/8/14 bytes trigger, the RX trigger is
+		 automatically changed to 4 bytes.
diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h
index 1ebf8538b4fa..1b08c918cd51 100644
--- a/drivers/tty/serial/8250/8250.h
+++ b/drivers/tty/serial/8250/8250.h
@@ -12,6 +12,7 @@
  */
 
 #include <linux/serial_8250.h>
+#include <linux/serial_reg.h>
 #include <linux/dmaengine.h>
 
 struct uart_8250_dma {
@@ -60,6 +61,7 @@ struct serial8250_config {
 	unsigned short	fifo_size;
 	unsigned short	tx_loadsz;
 	unsigned char	fcr;
+	unsigned char	rxtrig_bytes[UART_FCR_R_TRIG_MAX_STATE];
 	unsigned int	flags;
 };
 
diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
index 0da01458816e..1d42dba6121d 100644
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -31,7 +31,6 @@
 #include <linux/tty.h>
 #include <linux/ratelimit.h>
 #include <linux/tty_flip.h>
-#include <linux/serial_reg.h>
 #include <linux/serial_core.h>
 #include <linux/serial.h>
 #include <linux/serial_8250.h>
@@ -161,6 +160,7 @@ static const struct serial8250_config uart_config[] = {
 		.fifo_size	= 16,
 		.tx_loadsz	= 16,
 		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
+		.rxtrig_bytes	= {1, 4, 8, 14},
 		.flags		= UART_CAP_FIFO,
 	},
 	[PORT_CIRRUS] = {
@@ -180,6 +180,7 @@ static const struct serial8250_config uart_config[] = {
 		.tx_loadsz	= 16,
 		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_01 |
 				  UART_FCR_T_TRIG_00,
+		.rxtrig_bytes	= {8, 16, 24, 28},
 		.flags		= UART_CAP_FIFO | UART_CAP_EFR | UART_CAP_SLEEP,
 	},
 	[PORT_16750] = {
@@ -188,6 +189,7 @@ static const struct serial8250_config uart_config[] = {
 		.tx_loadsz	= 64,
 		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10 |
 				  UART_FCR7_64BYTE,
+		.rxtrig_bytes	= {1, 16, 32, 56},
 		.flags		= UART_CAP_FIFO | UART_CAP_SLEEP | UART_CAP_AFE,
 	},
 	[PORT_STARTECH] = {
@@ -209,6 +211,7 @@ static const struct serial8250_config uart_config[] = {
 		.tx_loadsz	= 32,
 		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_01 |
 				  UART_FCR_T_TRIG_10,
+		.rxtrig_bytes	= {8, 16, 56, 60},
 		.flags		= UART_CAP_FIFO | UART_CAP_EFR | UART_CAP_SLEEP,
 	},
 	[PORT_16850] = {
@@ -266,6 +269,7 @@ static const struct serial8250_config uart_config[] = {
 		.tx_loadsz	= 8,
 		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_01 |
 				  UART_FCR_T_TRIG_01,
+		.rxtrig_bytes	= {1, 4, 8, 14},
 		.flags		= UART_CAP_FIFO | UART_CAP_RTOIE,
 	},
 	[PORT_XR17D15X] = {
@@ -530,11 +534,8 @@ static void serial8250_clear_fifos(struct uart_8250_port *p)
 
 void serial8250_clear_and_reinit_fifos(struct uart_8250_port *p)
 {
-	unsigned char fcr;
-
 	serial8250_clear_fifos(p);
-	fcr = uart_config[p->port.type].fcr;
-	serial_out(p, UART_FCR, fcr);
+	serial_out(p, UART_FCR, p->fcr);
 }
 EXPORT_SYMBOL_GPL(serial8250_clear_and_reinit_fifos);
 
@@ -2256,10 +2257,9 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios,
 		          struct ktermios *old)
 {
 	struct uart_8250_port *up = up_to_u8250p(port);
-	unsigned char cval, fcr = 0;
+	unsigned char cval;
 	unsigned long flags;
 	unsigned int baud, quot;
-	int fifo_bug = 0;
 
 	switch (termios->c_cflag & CSIZE) {
 	case CS5:
@@ -2282,7 +2282,7 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios,
 	if (termios->c_cflag & PARENB) {
 		cval |= UART_LCR_PARITY;
 		if (up->bugs & UART_BUG_PARITY)
-			fifo_bug = 1;
+			up->fifo_bug = true;
 	}
 	if (!(termios->c_cflag & PARODD))
 		cval |= UART_LCR_EPAR;
@@ -2306,10 +2306,10 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios,
 		quot++;
 
 	if (up->capabilities & UART_CAP_FIFO && port->fifosize > 1) {
-		fcr = uart_config[port->type].fcr;
-		if ((baud < 2400 && !up->dma) || fifo_bug) {
-			fcr &= ~UART_FCR_TRIGGER_MASK;
-			fcr |= UART_FCR_TRIGGER_1;
+		/* NOTE: If fifo_bug is not set, a user can set RX_trigger. */
+		if ((baud < 2400 && !up->dma) || up->fifo_bug) {
+			up->fcr &= ~UART_FCR_TRIGGER_MASK;
+			up->fcr |= UART_FCR_TRIGGER_1;
 		}
 	}
 
@@ -2442,15 +2442,15 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios,
 	 * is written without DLAB set, this mode will be disabled.
 	 */
 	if (port->type == PORT_16750)
-		serial_port_out(port, UART_FCR, fcr);
+		serial_port_out(port, UART_FCR, up->fcr);
 
 	serial_port_out(port, UART_LCR, cval);		/* reset DLAB */
 	up->lcr = cval;					/* Save LCR */
 	if (port->type != PORT_16750) {
 		/* emulated UARTs (Lucent Venus 167x) need two steps */
-		if (fcr & UART_FCR_ENABLE_FIFO)
+		if (up->fcr & UART_FCR_ENABLE_FIFO)
 			serial_port_out(port, UART_FCR, UART_FCR_ENABLE_FIFO);
-		serial_port_out(port, UART_FCR, fcr);		/* set fcr */
+		serial_port_out(port, UART_FCR, up->fcr);	/* set fcr */
 	}
 	serial8250_set_mctrl(port, port->mctrl);
 	spin_unlock_irqrestore(&port->lock, flags);
@@ -2640,6 +2640,146 @@ static int serial8250_request_port(struct uart_port *port)
 	return ret;
 }
 
+static int fcr_get_rxtrig_bytes(struct uart_8250_port *up)
+{
+	const struct serial8250_config *conf_type = &uart_config[up->port.type];
+	unsigned char bytes;
+
+	bytes = conf_type->rxtrig_bytes[UART_FCR_R_TRIG_BITS(up->fcr)];
+
+	return bytes ? bytes : -EOPNOTSUPP;
+}
+
+static int bytes_to_fcr_rxtrig(struct uart_8250_port *up, unsigned char bytes)
+{
+	const struct serial8250_config *conf_type = &uart_config[up->port.type];
+	int i;
+
+	if (!conf_type->rxtrig_bytes[UART_FCR_R_TRIG_BITS(UART_FCR_R_TRIG_00)])
+		return -EOPNOTSUPP;
+
+	for (i = 1; i < UART_FCR_R_TRIG_MAX_STATE; i++) {
+		if (bytes < conf_type->rxtrig_bytes[i])
+			/* Use the nearest lower value */
+			return (--i) << UART_FCR_R_TRIG_SHIFT;
+	}
+
+	return UART_FCR_R_TRIG_11;
+}
+
+static int do_get_rxtrig(struct tty_port *port)
+{
+	struct uart_state *state = container_of(port, struct uart_state, port);
+	struct uart_port *uport = state->uart_port;
+	struct uart_8250_port *up =
+		container_of(uport, struct uart_8250_port, port);
+
+	if (!(up->capabilities & UART_CAP_FIFO) || uport->fifosize <= 1)
+		return -EINVAL;
+
+	return fcr_get_rxtrig_bytes(up);
+}
+
+static int do_serial8250_get_rxtrig(struct tty_port *port)
+{
+	int rxtrig_bytes;
+
+	mutex_lock(&port->mutex);
+	rxtrig_bytes = do_get_rxtrig(port);
+	mutex_unlock(&port->mutex);
+
+	return rxtrig_bytes;
+}
+
+static ssize_t serial8250_get_attr_rx_trig_bytes(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+	struct tty_port *port = dev_get_drvdata(dev);
+	int rxtrig_bytes;
+
+	rxtrig_bytes = do_serial8250_get_rxtrig(port);
+	if (rxtrig_bytes < 0)
+		return rxtrig_bytes;
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", rxtrig_bytes);
+}
+
+static int do_set_rxtrig(struct tty_port *port, unsigned char bytes)
+{
+	struct uart_state *state = container_of(port, struct uart_state, port);
+	struct uart_port *uport = state->uart_port;
+	struct uart_8250_port *up =
+		container_of(uport, struct uart_8250_port, port);
+	int rxtrig;
+
+	if (!(up->capabilities & UART_CAP_FIFO) || uport->fifosize <= 1 ||
+	    up->fifo_bug)
+		return -EINVAL;
+
+	rxtrig = bytes_to_fcr_rxtrig(up, bytes);
+	if (rxtrig < 0)
+		return rxtrig;
+
+	serial8250_clear_fifos(up);
+	up->fcr &= ~UART_FCR_TRIGGER_MASK;
+	up->fcr |= (unsigned char)rxtrig;
+	serial_out(up, UART_FCR, up->fcr);
+	return 0;
+}
+
+static int do_serial8250_set_rxtrig(struct tty_port *port, unsigned char bytes)
+{
+	int ret;
+
+	mutex_lock(&port->mutex);
+	ret = do_set_rxtrig(port, bytes);
+	mutex_unlock(&port->mutex);
+
+	return ret;
+}
+
+static ssize_t serial8250_set_attr_rx_trig_bytes(struct device *dev,
+	struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct tty_port *port = dev_get_drvdata(dev);
+	unsigned char bytes;
+	int ret;
+
+	if (!count)
+		return -EINVAL;
+
+	ret = kstrtou8(buf, 10, &bytes);
+	if (ret < 0)
+		return ret;
+
+	ret = do_serial8250_set_rxtrig(port, bytes);
+	if (ret < 0)
+		return ret;
+
+	return count;
+}
+
+static DEVICE_ATTR(rx_trig_bytes, S_IRUSR | S_IWUSR | S_IRGRP,
+		   serial8250_get_attr_rx_trig_bytes,
+		   serial8250_set_attr_rx_trig_bytes);
+
+static struct attribute *serial8250_dev_attrs[] = {
+	&dev_attr_rx_trig_bytes.attr,
+	NULL,
+	};
+
+static struct attribute_group serial8250_dev_attr_group = {
+	.attrs = serial8250_dev_attrs,
+	};
+
+static void register_dev_spec_attr_grp(struct uart_8250_port *up)
+{
+	const struct serial8250_config *conf_type = &uart_config[up->port.type];
+
+	if (conf_type->rxtrig_bytes[0])
+		up->port.attr_group = &serial8250_dev_attr_group;
+}
+
 static void serial8250_config_port(struct uart_port *port, int flags)
 {
 	struct uart_8250_port *up = up_to_u8250p(port);
@@ -2687,6 +2827,9 @@ static void serial8250_config_port(struct uart_port *port, int flags)
 	if ((port->type == PORT_XR17V35X) ||
 	   (port->type == PORT_XR17D15X))
 		port->handle_irq = exar_handle_irq;
+
+	register_dev_spec_attr_grp(up);
+	up->fcr = uart_config[up->port.type].fcr;
 }
 
 static int
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index 730ab4b3d686..f93649e22c43 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -74,8 +74,10 @@ struct uart_8250_port {
 	struct list_head	list;		/* ports on this IRQ */
 	unsigned short		capabilities;	/* port capabilities */
 	unsigned short		bugs;		/* port bugs */
+	bool			fifo_bug;	/* min RX trigger if enabled */
 	unsigned int		tx_loadsz;	/* transmit fifo load size */
 	unsigned char		acr;
+	unsigned char		fcr;
 	unsigned char		ier;
 	unsigned char		lcr;
 	unsigned char		mcr;
diff --git a/include/uapi/linux/serial_reg.h b/include/uapi/linux/serial_reg.h
index 99b47058816a..df6c9ab6b0cd 100644
--- a/include/uapi/linux/serial_reg.h
+++ b/include/uapi/linux/serial_reg.h
@@ -88,6 +88,11 @@
 #define UART_FCR6_T_TRIGGER_30	0x30 /* Mask for transmit trigger set at 30 */
 #define UART_FCR7_64BYTE	0x20 /* Go into 64 byte mode (TI16C750) */
 
+#define UART_FCR_R_TRIG_SHIFT		6
+#define UART_FCR_R_TRIG_BITS(x)		\
+	(((x) & UART_FCR_TRIGGER_MASK) >> UART_FCR_R_TRIG_SHIFT)
+#define UART_FCR_R_TRIG_MAX_STATE	4
+
 #define UART_LCR	3	/* Out: Line Control Register */
 /*
  * Note: if the word length is 5 bits (UART_LCR_WLEN5), then setting 
-- 
cgit v1.2.3-59-g8ed1b


From 75f2a4ead5d5890ada9c2663a70fb58613c0d9f2 Mon Sep 17 00:00:00 2001
From: Himangi Saraogi <himangi774@gmail.com>
Date: Thu, 17 Jul 2014 02:27:52 +0530
Subject: devres: Add devm_kasprintf and devm_kvasprintf API

devm_kasprintf() and devm_kvasprintf() are the managed counterparts
for kasprintf() and kvasprintf().

Signed-off-by: Himangi Saraogi <himangi774@gmail.com>
Acked-by: Julia Lawall <julia.lawall@lip6.fr>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/devres.c  | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/device.h |  4 ++++
 2 files changed, 59 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/devres.c b/drivers/base/devres.c
index 52302946770f..69d9b0c89a01 100644
--- a/drivers/base/devres.c
+++ b/drivers/base/devres.c
@@ -816,6 +816,61 @@ char *devm_kstrdup(struct device *dev, const char *s, gfp_t gfp)
 }
 EXPORT_SYMBOL_GPL(devm_kstrdup);
 
+/**
+ * devm_kvasprintf - Allocate resource managed space
+ *			for the formatted string.
+ * @dev: Device to allocate memory for
+ * @gfp: the GFP mask used in the devm_kmalloc() call when
+ *       allocating memory
+ * @fmt: the formatted string to duplicate
+ * @ap: the list of tokens to be placed in the formatted string
+ * RETURNS:
+ * Pointer to allocated string on success, NULL on failure.
+ */
+char *devm_kvasprintf(struct device *dev, gfp_t gfp, const char *fmt,
+		      va_list ap)
+{
+	unsigned int len;
+	char *p;
+	va_list aq;
+
+	va_copy(aq, ap);
+	len = vsnprintf(NULL, 0, fmt, aq);
+	va_end(aq);
+
+	p = devm_kmalloc(dev, len+1, gfp);
+	if (!p)
+		return NULL;
+
+	vsnprintf(p, len+1, fmt, ap);
+
+	return p;
+}
+EXPORT_SYMBOL(devm_kvasprintf);
+
+/**
+ * devm_kasprintf - Allocate resource managed space
+ *		and copy an existing formatted string into that
+ * @dev: Device to allocate memory for
+ * @gfp: the GFP mask used in the devm_kmalloc() call when
+ *       allocating memory
+ * @fmt: the string to duplicate
+ * RETURNS:
+ * Pointer to allocated string on success, NULL on failure.
+ */
+char *devm_kasprintf(struct device *dev, gfp_t gfp, const char *fmt, ...)
+{
+	va_list ap;
+	char *p;
+
+	va_start(ap, fmt);
+	p = devm_kvasprintf(dev, gfp, fmt, ap);
+	va_end(ap);
+
+	return p;
+}
+EXPORT_SYMBOL_GPL(devm_kasprintf);
+
 /**
  * devm_kfree - Resource-managed kfree
  * @dev: Device this memory belongs to
diff --git a/include/linux/device.h b/include/linux/device.h
index af424acd393d..34a24ce30620 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -605,6 +605,10 @@ extern int devres_release_group(struct device *dev, void *id);
 
 /* managed devm_k.alloc/kfree for device drivers */
 extern void *devm_kmalloc(struct device *dev, size_t size, gfp_t gfp);
+extern char *devm_kvasprintf(struct device *dev, gfp_t gfp, const char *fmt,
+			     va_list ap);
+extern char *devm_kasprintf(struct device *dev, gfp_t gfp,
+			    const char *fmt, ...);
 static inline void *devm_kzalloc(struct device *dev, size_t size, gfp_t gfp)
 {
 	return devm_kmalloc(dev, size, gfp | __GFP_ZERO);
-- 
cgit v1.2.3-59-g8ed1b


From e1b243772d455f3b25b410a15a5677a9e74ffa37 Mon Sep 17 00:00:00 2001
From: Alexander Shiyan <shc_work@mail.ru>
Date: Tue, 20 May 2014 20:43:51 +0400
Subject: ARM: i.MX: Remove registration helper for i.MX1 USB UDC

imx_udc driver was removed from the kernel of about 10 months ago.
This patch removes a registration helper for this driver and
orphaned driver header.

Signed-off-by: Alexander Shiyan <shc_work@mail.ru>
Signed-off-by: Shawn Guo <shawn.guo@freescale.com>
---
 arch/arm/mach-imx/devices/Kconfig            |  3 --
 arch/arm/mach-imx/devices/Makefile           |  1 -
 arch/arm/mach-imx/devices/devices-common.h   | 16 ------
 arch/arm/mach-imx/devices/platform-imx_udc.c | 75 ----------------------------
 include/linux/platform_data/usb-imx_udc.h    | 23 ---------
 5 files changed, 118 deletions(-)
 delete mode 100644 arch/arm/mach-imx/devices/platform-imx_udc.c
 delete mode 100644 include/linux/platform_data/usb-imx_udc.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-imx/devices/Kconfig b/arch/arm/mach-imx/devices/Kconfig
index 2d260a5a307c..dd7e0b45fb18 100644
--- a/arch/arm/mach-imx/devices/Kconfig
+++ b/arch/arm/mach-imx/devices/Kconfig
@@ -43,9 +43,6 @@ config IMX_HAVE_PLATFORM_IMX_SSI
 config IMX_HAVE_PLATFORM_IMX_UART
 	bool
 
-config IMX_HAVE_PLATFORM_IMX_UDC
-	bool
-
 config IMX_HAVE_PLATFORM_IPU_CORE
 	bool
 
diff --git a/arch/arm/mach-imx/devices/Makefile b/arch/arm/mach-imx/devices/Makefile
index 1cbc14cd80d1..6bb144dd680e 100644
--- a/arch/arm/mach-imx/devices/Makefile
+++ b/arch/arm/mach-imx/devices/Makefile
@@ -16,7 +16,6 @@ obj-$(CONFIG_IMX_HAVE_PLATFORM_IMX_KEYPAD) += platform-imx-keypad.o
 obj-$(CONFIG_IMX_HAVE_PLATFORM_PATA_IMX) += platform-pata_imx.o
 obj-$(CONFIG_IMX_HAVE_PLATFORM_IMX_SSI) += platform-imx-ssi.o
 obj-$(CONFIG_IMX_HAVE_PLATFORM_IMX_UART) += platform-imx-uart.o
-obj-$(CONFIG_IMX_HAVE_PLATFORM_IMX_UDC) += platform-imx_udc.o
 obj-$(CONFIG_IMX_HAVE_PLATFORM_IPU_CORE) += platform-ipu-core.o
 obj-$(CONFIG_IMX_HAVE_PLATFORM_MX1_CAMERA) += platform-mx1-camera.o
 obj-$(CONFIG_IMX_HAVE_PLATFORM_MX2_CAMERA) += platform-mx2-camera.o
diff --git a/arch/arm/mach-imx/devices/devices-common.h b/arch/arm/mach-imx/devices/devices-common.h
index 61352a80bb59..69bafc884dfa 100644
--- a/arch/arm/mach-imx/devices/devices-common.h
+++ b/arch/arm/mach-imx/devices/devices-common.h
@@ -176,22 +176,6 @@ struct platform_device *__init imx_add_imx_uart_1irq(
 		const struct imx_imx_uart_1irq_data *data,
 		const struct imxuart_platform_data *pdata);
 
-#include <linux/platform_data/usb-imx_udc.h>
-struct imx_imx_udc_data {
-	resource_size_t iobase;
-	resource_size_t iosize;
-	resource_size_t irq0;
-	resource_size_t irq1;
-	resource_size_t irq2;
-	resource_size_t irq3;
-	resource_size_t irq4;
-	resource_size_t irq5;
-	resource_size_t irq6;
-};
-struct platform_device *__init imx_add_imx_udc(
-		const struct imx_imx_udc_data *data,
-		const struct imxusb_platform_data *pdata);
-
 #include <linux/platform_data/video-mx3fb.h>
 #include <linux/platform_data/camera-mx3.h>
 struct imx_ipu_core_data {
diff --git a/arch/arm/mach-imx/devices/platform-imx_udc.c b/arch/arm/mach-imx/devices/platform-imx_udc.c
deleted file mode 100644
index 5ced7e4e2c71..000000000000
--- a/arch/arm/mach-imx/devices/platform-imx_udc.c
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (C) 2010 Pengutronix
- * Uwe Kleine-Koenig <u.kleine-koenig@pengutronix.de>
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation.
- */
-#include "../hardware.h"
-#include "devices-common.h"
-
-#define imx_imx_udc_data_entry_single(soc, _size)			\
-	{								\
-		.iobase = soc ## _USBD_BASE_ADDR,			\
-		.iosize = _size,					\
-		.irq0 = soc ## _INT_USBD0,				\
-		.irq1 = soc ## _INT_USBD1,				\
-		.irq2 = soc ## _INT_USBD2,				\
-		.irq3 = soc ## _INT_USBD3,				\
-		.irq4 = soc ## _INT_USBD4,				\
-		.irq5 = soc ## _INT_USBD5,				\
-		.irq6 = soc ## _INT_USBD6,				\
-	}
-
-#define imx_imx_udc_data_entry(soc, _size)				\
-	[_id] = imx_imx_udc_data_entry_single(soc, _size)
-
-#ifdef CONFIG_SOC_IMX1
-const struct imx_imx_udc_data imx1_imx_udc_data __initconst =
-	imx_imx_udc_data_entry_single(MX1, SZ_4K);
-#endif /* ifdef CONFIG_SOC_IMX1 */
-
-struct platform_device *__init imx_add_imx_udc(
-		const struct imx_imx_udc_data *data,
-		const struct imxusb_platform_data *pdata)
-{
-	struct resource res[] = {
-		{
-			.start = data->iobase,
-			.end = data->iobase + data->iosize - 1,
-			.flags = IORESOURCE_MEM,
-		}, {
-			.start = data->irq0,
-			.end = data->irq0,
-			.flags = IORESOURCE_IRQ,
-		}, {
-			.start = data->irq1,
-			.end = data->irq1,
-			.flags = IORESOURCE_IRQ,
-		}, {
-			.start = data->irq2,
-			.end = data->irq2,
-			.flags = IORESOURCE_IRQ,
-		}, {
-			.start = data->irq3,
-			.end = data->irq3,
-			.flags = IORESOURCE_IRQ,
-		}, {
-			.start = data->irq4,
-			.end = data->irq4,
-			.flags = IORESOURCE_IRQ,
-		}, {
-			.start = data->irq5,
-			.end = data->irq5,
-			.flags = IORESOURCE_IRQ,
-		}, {
-			.start = data->irq6,
-			.end = data->irq6,
-			.flags = IORESOURCE_IRQ,
-		},
-	};
-
-	return imx_add_platform_device("imx_udc", 0,
-			res, ARRAY_SIZE(res), pdata, sizeof(*pdata));
-}
diff --git a/include/linux/platform_data/usb-imx_udc.h b/include/linux/platform_data/usb-imx_udc.h
deleted file mode 100644
index be273371f34a..000000000000
--- a/include/linux/platform_data/usb-imx_udc.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- *	Copyright (C) 2008 Darius Augulis <augulis.darius@gmail.com>
- *
- *	This program is free software; you can redistribute it and/or modify
- *	it under the terms of the GNU General Public License as published by
- *	the Free Software Foundation; either version 2 of the License, or
- *	(at your option) any later version.
- *
- *	This program is distributed in the hope that it will be useful,
- *	but WITHOUT ANY WARRANTY; without even the implied warranty of
- *	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *	GNU General Public License for more details.
- */
-
-#ifndef __ASM_ARCH_MXC_USB
-#define __ASM_ARCH_MXC_USB
-
-struct imxusb_platform_data {
-	int (*init)(struct device *);
-	void (*exit)(struct device *);
-};
-
-#endif /* __ASM_ARCH_MXC_USB */
-- 
cgit v1.2.3-59-g8ed1b


From 641dfe8b73e81aa38cbbeab72a480462a4987963 Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@freescale.com>
Date: Mon, 19 May 2014 20:41:52 +0800
Subject: ARM: imx: move EHCI platform defines out of platform_data header

The platform_data header usb-ehci-mxc.h has a lot of stuff used by only
IMX platform code.  They shouldn't be really in this header but a IMX
platform local header.  Create ehci.h and move these stuff into it.

Signed-off-by: Shawn Guo <shawn.guo@freescale.com>
---
 arch/arm/mach-imx/ehci-imx25.c              |  1 +
 arch/arm/mach-imx/ehci-imx27.c              |  1 +
 arch/arm/mach-imx/ehci-imx31.c              |  1 +
 arch/arm/mach-imx/ehci-imx35.c              |  1 +
 arch/arm/mach-imx/ehci-imx5.c               |  1 +
 arch/arm/mach-imx/ehci.h                    | 50 +++++++++++++++++++++++++++++
 arch/arm/mach-imx/mach-armadillo5x0.c       |  1 +
 arch/arm/mach-imx/mach-cpuimx27.c           |  1 +
 arch/arm/mach-imx/mach-cpuimx35.c           |  1 +
 arch/arm/mach-imx/mach-eukrea_cpuimx25.c    |  1 +
 arch/arm/mach-imx/mach-imx27_visstrim_m10.c |  1 +
 arch/arm/mach-imx/mach-mx25_3ds.c           |  1 +
 arch/arm/mach-imx/mach-mx27_3ds.c           |  1 +
 arch/arm/mach-imx/mach-mx31_3ds.c           |  1 +
 arch/arm/mach-imx/mach-mx31lilly.c          |  1 +
 arch/arm/mach-imx/mach-mx31lite.c           |  1 +
 arch/arm/mach-imx/mach-mx31moboard.c        |  1 +
 arch/arm/mach-imx/mach-mx35_3ds.c           |  1 +
 arch/arm/mach-imx/mach-pca100.c             |  1 +
 arch/arm/mach-imx/mach-pcm037.c             |  1 +
 arch/arm/mach-imx/mach-pcm038.c             |  1 +
 arch/arm/mach-imx/mach-pcm043.c             |  1 +
 arch/arm/mach-imx/mach-vpr200.c             |  1 +
 arch/arm/mach-imx/mx31moboard-devboard.c    |  1 +
 arch/arm/mach-imx/mx31moboard-marxbot.c     |  1 +
 arch/arm/mach-imx/mx31moboard-smartbot.c    |  1 +
 include/linux/platform_data/usb-ehci-mxc.h  | 46 --------------------------
 27 files changed, 75 insertions(+), 46 deletions(-)
 create mode 100644 arch/arm/mach-imx/ehci.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-imx/ehci-imx25.c b/arch/arm/mach-imx/ehci-imx25.c
index 134c190e3003..42a5a3d14c5f 100644
--- a/arch/arm/mach-imx/ehci-imx25.c
+++ b/arch/arm/mach-imx/ehci-imx25.c
@@ -17,6 +17,7 @@
 #include <linux/io.h>
 #include <linux/platform_data/usb-ehci-mxc.h>
 
+#include "ehci.h"
 #include "hardware.h"
 
 #define USBCTRL_OTGBASE_OFFSET	0x600
diff --git a/arch/arm/mach-imx/ehci-imx27.c b/arch/arm/mach-imx/ehci-imx27.c
index 448d9115539d..c56974346c16 100644
--- a/arch/arm/mach-imx/ehci-imx27.c
+++ b/arch/arm/mach-imx/ehci-imx27.c
@@ -17,6 +17,7 @@
 #include <linux/io.h>
 #include <linux/platform_data/usb-ehci-mxc.h>
 
+#include "ehci.h"
 #include "hardware.h"
 
 #define USBCTRL_OTGBASE_OFFSET	0x600
diff --git a/arch/arm/mach-imx/ehci-imx31.c b/arch/arm/mach-imx/ehci-imx31.c
index 05de4e1e39d7..bede21d9b981 100644
--- a/arch/arm/mach-imx/ehci-imx31.c
+++ b/arch/arm/mach-imx/ehci-imx31.c
@@ -17,6 +17,7 @@
 #include <linux/io.h>
 #include <linux/platform_data/usb-ehci-mxc.h>
 
+#include "ehci.h"
 #include "hardware.h"
 
 #define USBCTRL_OTGBASE_OFFSET	0x600
diff --git a/arch/arm/mach-imx/ehci-imx35.c b/arch/arm/mach-imx/ehci-imx35.c
index 554e7cccff53..f424a543755c 100644
--- a/arch/arm/mach-imx/ehci-imx35.c
+++ b/arch/arm/mach-imx/ehci-imx35.c
@@ -17,6 +17,7 @@
 #include <linux/io.h>
 #include <linux/platform_data/usb-ehci-mxc.h>
 
+#include "ehci.h"
 #include "hardware.h"
 
 #define USBCTRL_OTGBASE_OFFSET	0x600
diff --git a/arch/arm/mach-imx/ehci-imx5.c b/arch/arm/mach-imx/ehci-imx5.c
index e49710b10c68..74bfdd970bfe 100644
--- a/arch/arm/mach-imx/ehci-imx5.c
+++ b/arch/arm/mach-imx/ehci-imx5.c
@@ -17,6 +17,7 @@
 #include <linux/io.h>
 #include <linux/platform_data/usb-ehci-mxc.h>
 
+#include "ehci.h"
 #include "hardware.h"
 
 #define MXC_OTG_OFFSET			0
diff --git a/arch/arm/mach-imx/ehci.h b/arch/arm/mach-imx/ehci.h
new file mode 100644
index 000000000000..4f42ca380b26
--- /dev/null
+++ b/arch/arm/mach-imx/ehci.h
@@ -0,0 +1,50 @@
+#ifndef __MACH_IMX_EHCI_H
+#define __MACH_IMX_EHCI_H
+
+/* values for portsc field */
+#define MXC_EHCI_PHY_LOW_POWER_SUSPEND	(1 << 23)
+#define MXC_EHCI_FORCE_FS		(1 << 24)
+#define MXC_EHCI_UTMI_8BIT		(0 << 28)
+#define MXC_EHCI_UTMI_16BIT		(1 << 28)
+#define MXC_EHCI_SERIAL			(1 << 29)
+#define MXC_EHCI_MODE_UTMI		(0 << 30)
+#define MXC_EHCI_MODE_PHILIPS		(1 << 30)
+#define MXC_EHCI_MODE_ULPI		(2 << 30)
+#define MXC_EHCI_MODE_SERIAL		(3 << 30)
+
+/* values for flags field */
+#define MXC_EHCI_INTERFACE_DIFF_UNI	(0 << 0)
+#define MXC_EHCI_INTERFACE_DIFF_BI	(1 << 0)
+#define MXC_EHCI_INTERFACE_SINGLE_UNI	(2 << 0)
+#define MXC_EHCI_INTERFACE_SINGLE_BI	(3 << 0)
+#define MXC_EHCI_INTERFACE_MASK		(0xf)
+
+#define MXC_EHCI_POWER_PINS_ENABLED	(1 << 5)
+#define MXC_EHCI_PWR_PIN_ACTIVE_HIGH	(1 << 6)
+#define MXC_EHCI_OC_PIN_ACTIVE_LOW	(1 << 7)
+#define MXC_EHCI_TTL_ENABLED		(1 << 8)
+
+#define MXC_EHCI_INTERNAL_PHY		(1 << 9)
+#define MXC_EHCI_IPPUE_DOWN		(1 << 10)
+#define MXC_EHCI_IPPUE_UP		(1 << 11)
+#define MXC_EHCI_WAKEUP_ENABLED		(1 << 12)
+#define MXC_EHCI_ITC_NO_THRESHOLD	(1 << 13)
+
+#define MXC_USBCTRL_OFFSET		0
+#define MXC_USB_PHY_CTR_FUNC_OFFSET	0x8
+#define MXC_USB_PHY_CTR_FUNC2_OFFSET	0xc
+#define MXC_USBH2CTRL_OFFSET		0x14
+
+#define MX5_USBOTHER_REGS_OFFSET	0x800
+
+/* USB_PHY_CTRL_FUNC2*/
+#define MX5_USB_UTMI_PHYCTRL1_PLLDIV_MASK		0x3
+#define MX5_USB_UTMI_PHYCTRL1_PLLDIV_SHIFT		0
+
+int mx51_initialize_usb_hw(int port, unsigned int flags);
+int mx25_initialize_usb_hw(int port, unsigned int flags);
+int mx31_initialize_usb_hw(int port, unsigned int flags);
+int mx35_initialize_usb_hw(int port, unsigned int flags);
+int mx27_initialize_usb_hw(int port, unsigned int flags);
+
+#endif /* __MACH_IMX_EHCI_H */
diff --git a/arch/arm/mach-imx/mach-armadillo5x0.c b/arch/arm/mach-imx/mach-armadillo5x0.c
index 39406b7e3228..a7e9bd26a552 100644
--- a/arch/arm/mach-imx/mach-armadillo5x0.c
+++ b/arch/arm/mach-imx/mach-armadillo5x0.c
@@ -50,6 +50,7 @@
 #include "common.h"
 #include "devices-imx31.h"
 #include "crmregs-imx3.h"
+#include "ehci.h"
 #include "hardware.h"
 #include "iomux-mx3.h"
 #include "ulpi.h"
diff --git a/arch/arm/mach-imx/mach-cpuimx27.c b/arch/arm/mach-imx/mach-cpuimx27.c
index 75b7b6aa2720..e6d4b9929571 100644
--- a/arch/arm/mach-imx/mach-cpuimx27.c
+++ b/arch/arm/mach-imx/mach-cpuimx27.c
@@ -36,6 +36,7 @@
 
 #include "common.h"
 #include "devices-imx27.h"
+#include "ehci.h"
 #include "eukrea-baseboards.h"
 #include "hardware.h"
 #include "iomux-mx27.h"
diff --git a/arch/arm/mach-imx/mach-cpuimx35.c b/arch/arm/mach-imx/mach-cpuimx35.c
index 1ffa27169045..62a6e02f4763 100644
--- a/arch/arm/mach-imx/mach-cpuimx35.c
+++ b/arch/arm/mach-imx/mach-cpuimx35.c
@@ -39,6 +39,7 @@
 
 #include "common.h"
 #include "devices-imx35.h"
+#include "ehci.h"
 #include "eukrea-baseboards.h"
 #include "hardware.h"
 #include "iomux-mx35.h"
diff --git a/arch/arm/mach-imx/mach-eukrea_cpuimx25.c b/arch/arm/mach-imx/mach-eukrea_cpuimx25.c
index e978dda1434c..b2ee6e009fe4 100644
--- a/arch/arm/mach-imx/mach-eukrea_cpuimx25.c
+++ b/arch/arm/mach-imx/mach-eukrea_cpuimx25.c
@@ -35,6 +35,7 @@
 
 #include "common.h"
 #include "devices-imx25.h"
+#include "ehci.h"
 #include "eukrea-baseboards.h"
 #include "hardware.h"
 #include "iomux-mx25.h"
diff --git a/arch/arm/mach-imx/mach-imx27_visstrim_m10.c b/arch/arm/mach-imx/mach-imx27_visstrim_m10.c
index b61bd8ed5568..ede2bdbb5dd5 100644
--- a/arch/arm/mach-imx/mach-imx27_visstrim_m10.c
+++ b/arch/arm/mach-imx/mach-imx27_visstrim_m10.c
@@ -43,6 +43,7 @@
 
 #include "common.h"
 #include "devices-imx27.h"
+#include "ehci.h"
 #include "hardware.h"
 #include "iomux-mx27.h"
 
diff --git a/arch/arm/mach-imx/mach-mx25_3ds.c b/arch/arm/mach-imx/mach-mx25_3ds.c
index ea1fa199c148..0d01e367b062 100644
--- a/arch/arm/mach-imx/mach-mx25_3ds.c
+++ b/arch/arm/mach-imx/mach-mx25_3ds.c
@@ -39,6 +39,7 @@
 
 #include "common.h"
 #include "devices-imx25.h"
+#include "ehci.h"
 #include "hardware.h"
 #include "iomux-mx25.h"
 #include "mx25.h"
diff --git a/arch/arm/mach-imx/mach-mx27_3ds.c b/arch/arm/mach-imx/mach-mx27_3ds.c
index 435a5428a678..9ef4640f3660 100644
--- a/arch/arm/mach-imx/mach-mx27_3ds.c
+++ b/arch/arm/mach-imx/mach-mx27_3ds.c
@@ -40,6 +40,7 @@
 #include "3ds_debugboard.h"
 #include "common.h"
 #include "devices-imx27.h"
+#include "ehci.h"
 #include "hardware.h"
 #include "iomux-mx27.h"
 #include "ulpi.h"
diff --git a/arch/arm/mach-imx/mach-mx31_3ds.c b/arch/arm/mach-imx/mach-mx31_3ds.c
index 4217871a9653..453f41a2c5a9 100644
--- a/arch/arm/mach-imx/mach-mx31_3ds.c
+++ b/arch/arm/mach-imx/mach-mx31_3ds.c
@@ -40,6 +40,7 @@
 #include "3ds_debugboard.h"
 #include "common.h"
 #include "devices-imx31.h"
+#include "ehci.h"
 #include "hardware.h"
 #include "iomux-mx3.h"
 #include "ulpi.h"
diff --git a/arch/arm/mach-imx/mach-mx31lilly.c b/arch/arm/mach-imx/mach-mx31lilly.c
index eee042fa2768..e9549a3c0223 100644
--- a/arch/arm/mach-imx/mach-mx31lilly.c
+++ b/arch/arm/mach-imx/mach-mx31lilly.c
@@ -45,6 +45,7 @@
 #include "board-mx31lilly.h"
 #include "common.h"
 #include "devices-imx31.h"
+#include "ehci.h"
 #include "hardware.h"
 #include "iomux-mx3.h"
 #include "ulpi.h"
diff --git a/arch/arm/mach-imx/mach-mx31lite.c b/arch/arm/mach-imx/mach-mx31lite.c
index fa15d0b6118d..57eac6f45fab 100644
--- a/arch/arm/mach-imx/mach-mx31lite.c
+++ b/arch/arm/mach-imx/mach-mx31lite.c
@@ -42,6 +42,7 @@
 #include "board-mx31lite.h"
 #include "common.h"
 #include "devices-imx31.h"
+#include "ehci.h"
 #include "hardware.h"
 #include "iomux-mx3.h"
 #include "ulpi.h"
diff --git a/arch/arm/mach-imx/mach-mx31moboard.c b/arch/arm/mach-imx/mach-mx31moboard.c
index 08730f238449..6bed57040973 100644
--- a/arch/arm/mach-imx/mach-mx31moboard.c
+++ b/arch/arm/mach-imx/mach-mx31moboard.c
@@ -47,6 +47,7 @@
 #include "board-mx31moboard.h"
 #include "common.h"
 #include "devices-imx31.h"
+#include "ehci.h"
 #include "hardware.h"
 #include "iomux-mx3.h"
 #include "ulpi.h"
diff --git a/arch/arm/mach-imx/mach-mx35_3ds.c b/arch/arm/mach-imx/mach-mx35_3ds.c
index 4e8b184d773b..72cd77d21f63 100644
--- a/arch/arm/mach-imx/mach-mx35_3ds.c
+++ b/arch/arm/mach-imx/mach-mx35_3ds.c
@@ -50,6 +50,7 @@
 #include "3ds_debugboard.h"
 #include "common.h"
 #include "devices-imx35.h"
+#include "ehci.h"
 #include "hardware.h"
 #include "iomux-mx35.h"
 
diff --git a/arch/arm/mach-imx/mach-pca100.c b/arch/arm/mach-imx/mach-pca100.c
index 12212378c672..2d1c50bd8bdf 100644
--- a/arch/arm/mach-imx/mach-pca100.c
+++ b/arch/arm/mach-imx/mach-pca100.c
@@ -36,6 +36,7 @@
 
 #include "common.h"
 #include "devices-imx27.h"
+#include "ehci.h"
 #include "hardware.h"
 #include "iomux-mx27.h"
 #include "ulpi.h"
diff --git a/arch/arm/mach-imx/mach-pcm037.c b/arch/arm/mach-imx/mach-pcm037.c
index 81b8affb9448..8eb1570f7851 100644
--- a/arch/arm/mach-imx/mach-pcm037.c
+++ b/arch/arm/mach-imx/mach-pcm037.c
@@ -45,6 +45,7 @@
 
 #include "common.h"
 #include "devices-imx31.h"
+#include "ehci.h"
 #include "hardware.h"
 #include "iomux-mx3.h"
 #include "pcm037.h"
diff --git a/arch/arm/mach-imx/mach-pcm038.c b/arch/arm/mach-imx/mach-pcm038.c
index 6c56fb5553c7..ee862ad6b6fc 100644
--- a/arch/arm/mach-imx/mach-pcm038.c
+++ b/arch/arm/mach-imx/mach-pcm038.c
@@ -36,6 +36,7 @@
 #include "board-pcm038.h"
 #include "common.h"
 #include "devices-imx27.h"
+#include "ehci.h"
 #include "hardware.h"
 #include "iomux-mx27.h"
 #include "ulpi.h"
diff --git a/arch/arm/mach-imx/mach-pcm043.c b/arch/arm/mach-imx/mach-pcm043.c
index c62b5d261345..b623bcaca76c 100644
--- a/arch/arm/mach-imx/mach-pcm043.c
+++ b/arch/arm/mach-imx/mach-pcm043.c
@@ -35,6 +35,7 @@
 
 #include "common.h"
 #include "devices-imx35.h"
+#include "ehci.h"
 #include "hardware.h"
 #include "iomux-mx35.h"
 #include "ulpi.h"
diff --git a/arch/arm/mach-imx/mach-vpr200.c b/arch/arm/mach-imx/mach-vpr200.c
index 872b3c6ba408..97836e94451c 100644
--- a/arch/arm/mach-imx/mach-vpr200.c
+++ b/arch/arm/mach-imx/mach-vpr200.c
@@ -34,6 +34,7 @@
 
 #include "common.h"
 #include "devices-imx35.h"
+#include "ehci.h"
 #include "hardware.h"
 #include "iomux-mx35.h"
 
diff --git a/arch/arm/mach-imx/mx31moboard-devboard.c b/arch/arm/mach-imx/mx31moboard-devboard.c
index 52d5b1574721..1e4ea1640a2a 100644
--- a/arch/arm/mach-imx/mx31moboard-devboard.c
+++ b/arch/arm/mach-imx/mx31moboard-devboard.c
@@ -24,6 +24,7 @@
 
 #include "common.h"
 #include "devices-imx31.h"
+#include "ehci.h"
 #include "hardware.h"
 #include "iomux-mx3.h"
 #include "ulpi.h"
diff --git a/arch/arm/mach-imx/mx31moboard-marxbot.c b/arch/arm/mach-imx/mx31moboard-marxbot.c
index a4f43e90f3c1..699d01a4fef8 100644
--- a/arch/arm/mach-imx/mx31moboard-marxbot.c
+++ b/arch/arm/mach-imx/mx31moboard-marxbot.c
@@ -28,6 +28,7 @@
 
 #include "common.h"
 #include "devices-imx31.h"
+#include "ehci.h"
 #include "hardware.h"
 #include "iomux-mx3.h"
 #include "ulpi.h"
diff --git a/arch/arm/mach-imx/mx31moboard-smartbot.c b/arch/arm/mach-imx/mx31moboard-smartbot.c
index 04ae45dbfaa7..4b3d66eb8d34 100644
--- a/arch/arm/mach-imx/mx31moboard-smartbot.c
+++ b/arch/arm/mach-imx/mx31moboard-smartbot.c
@@ -28,6 +28,7 @@
 #include "board-mx31moboard.h"
 #include "common.h"
 #include "devices-imx31.h"
+#include "ehci.h"
 #include "hardware.h"
 #include "iomux-mx3.h"
 #include "ulpi.h"
diff --git a/include/linux/platform_data/usb-ehci-mxc.h b/include/linux/platform_data/usb-ehci-mxc.h
index 7eb9d1329671..157e71f79f99 100644
--- a/include/linux/platform_data/usb-ehci-mxc.h
+++ b/include/linux/platform_data/usb-ehci-mxc.h
@@ -1,46 +1,6 @@
 #ifndef __INCLUDE_ASM_ARCH_MXC_EHCI_H
 #define __INCLUDE_ASM_ARCH_MXC_EHCI_H
 
-/* values for portsc field */
-#define MXC_EHCI_PHY_LOW_POWER_SUSPEND	(1 << 23)
-#define MXC_EHCI_FORCE_FS		(1 << 24)
-#define MXC_EHCI_UTMI_8BIT		(0 << 28)
-#define MXC_EHCI_UTMI_16BIT		(1 << 28)
-#define MXC_EHCI_SERIAL			(1 << 29)
-#define MXC_EHCI_MODE_UTMI		(0 << 30)
-#define MXC_EHCI_MODE_PHILIPS		(1 << 30)
-#define MXC_EHCI_MODE_ULPI		(2 << 30)
-#define MXC_EHCI_MODE_SERIAL		(3 << 30)
-
-/* values for flags field */
-#define MXC_EHCI_INTERFACE_DIFF_UNI	(0 << 0)
-#define MXC_EHCI_INTERFACE_DIFF_BI	(1 << 0)
-#define MXC_EHCI_INTERFACE_SINGLE_UNI	(2 << 0)
-#define MXC_EHCI_INTERFACE_SINGLE_BI	(3 << 0)
-#define MXC_EHCI_INTERFACE_MASK		(0xf)
-
-#define MXC_EHCI_POWER_PINS_ENABLED	(1 << 5)
-#define MXC_EHCI_PWR_PIN_ACTIVE_HIGH	(1 << 6)
-#define MXC_EHCI_OC_PIN_ACTIVE_LOW	(1 << 7)
-#define MXC_EHCI_TTL_ENABLED		(1 << 8)
-
-#define MXC_EHCI_INTERNAL_PHY		(1 << 9)
-#define MXC_EHCI_IPPUE_DOWN		(1 << 10)
-#define MXC_EHCI_IPPUE_UP		(1 << 11)
-#define MXC_EHCI_WAKEUP_ENABLED		(1 << 12)
-#define MXC_EHCI_ITC_NO_THRESHOLD	(1 << 13)
-
-#define MXC_USBCTRL_OFFSET		0
-#define MXC_USB_PHY_CTR_FUNC_OFFSET	0x8
-#define MXC_USB_PHY_CTR_FUNC2_OFFSET	0xc
-#define MXC_USBH2CTRL_OFFSET		0x14
-
-#define MX5_USBOTHER_REGS_OFFSET	0x800
-
-/* USB_PHY_CTRL_FUNC2*/
-#define MX5_USB_UTMI_PHYCTRL1_PLLDIV_MASK		0x3
-#define MX5_USB_UTMI_PHYCTRL1_PLLDIV_SHIFT		0
-
 struct mxc_usbh_platform_data {
 	int (*init)(struct platform_device *pdev);
 	int (*exit)(struct platform_device *pdev);
@@ -49,11 +9,5 @@ struct mxc_usbh_platform_data {
 	struct usb_phy		*otg;
 };
 
-int mx51_initialize_usb_hw(int port, unsigned int flags);
-int mx25_initialize_usb_hw(int port, unsigned int flags);
-int mx31_initialize_usb_hw(int port, unsigned int flags);
-int mx35_initialize_usb_hw(int port, unsigned int flags);
-int mx27_initialize_usb_hw(int port, unsigned int flags);
-
 #endif /* __INCLUDE_ASM_ARCH_MXC_EHCI_H */
 
-- 
cgit v1.2.3-59-g8ed1b


From 35e3bc535d0437ca5f32985a294703ce48c75d88 Mon Sep 17 00:00:00 2001
From: Alexander Shiyan <shc_work@mail.ru>
Date: Sat, 5 Jul 2014 08:51:38 +0400
Subject: ARM: i.MX: Remove i.MX1 camera support

i.MX1 camera driver has been removed by the commit 90b055898e.
This patch removes remaining support files for this camera.

Signed-off-by: Alexander Shiyan <shc_work@mail.ru>
Signed-off-by: Shawn Guo <shawn.guo@freescale.com>
---
 arch/arm/mach-imx/Makefile                      |  3 --
 arch/arm/mach-imx/devices/Kconfig               |  3 --
 arch/arm/mach-imx/devices/Makefile              |  1 -
 arch/arm/mach-imx/devices/devices-common.h      | 10 ------
 arch/arm/mach-imx/devices/platform-mx1-camera.c | 42 -------------------------
 arch/arm/mach-imx/mx1-camera-fiq-ksym.c         | 18 -----------
 arch/arm/mach-imx/mx1-camera-fiq.S              | 35 ---------------------
 include/linux/platform_data/camera-mx1.h        | 35 ---------------------
 8 files changed, 147 deletions(-)
 delete mode 100644 arch/arm/mach-imx/devices/platform-mx1-camera.c
 delete mode 100644 arch/arm/mach-imx/mx1-camera-fiq-ksym.c
 delete mode 100644 arch/arm/mach-imx/mx1-camera-fiq.S
 delete mode 100644 include/linux/platform_data/camera-mx1.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-imx/Makefile b/arch/arm/mach-imx/Makefile
index a3d9712567c9..d1c1d38f3741 100644
--- a/arch/arm/mach-imx/Makefile
+++ b/arch/arm/mach-imx/Makefile
@@ -40,9 +40,6 @@ obj-y += ssi-fiq.o
 obj-y += ssi-fiq-ksym.o
 endif
 
-# Support for CMOS sensor interface
-obj-$(CONFIG_MX1_VIDEO) += mx1-camera-fiq.o mx1-camera-fiq-ksym.o
-
 # i.MX1 based machines
 obj-$(CONFIG_ARCH_MX1ADS) += mach-mx1ads.o
 obj-$(CONFIG_MACH_SCB9328) += mach-scb9328.o
diff --git a/arch/arm/mach-imx/devices/Kconfig b/arch/arm/mach-imx/devices/Kconfig
index 119f699649a9..73a1dce6cac1 100644
--- a/arch/arm/mach-imx/devices/Kconfig
+++ b/arch/arm/mach-imx/devices/Kconfig
@@ -45,9 +45,6 @@ config IMX_HAVE_PLATFORM_IMX_UART
 config IMX_HAVE_PLATFORM_IPU_CORE
 	bool
 
-config IMX_HAVE_PLATFORM_MX1_CAMERA
-	bool
-
 config IMX_HAVE_PLATFORM_MX2_CAMERA
 	bool
 
diff --git a/arch/arm/mach-imx/devices/Makefile b/arch/arm/mach-imx/devices/Makefile
index 6bb144dd680e..8fdb12b4ca7e 100644
--- a/arch/arm/mach-imx/devices/Makefile
+++ b/arch/arm/mach-imx/devices/Makefile
@@ -17,7 +17,6 @@ obj-$(CONFIG_IMX_HAVE_PLATFORM_PATA_IMX) += platform-pata_imx.o
 obj-$(CONFIG_IMX_HAVE_PLATFORM_IMX_SSI) += platform-imx-ssi.o
 obj-$(CONFIG_IMX_HAVE_PLATFORM_IMX_UART) += platform-imx-uart.o
 obj-$(CONFIG_IMX_HAVE_PLATFORM_IPU_CORE) += platform-ipu-core.o
-obj-$(CONFIG_IMX_HAVE_PLATFORM_MX1_CAMERA) += platform-mx1-camera.o
 obj-$(CONFIG_IMX_HAVE_PLATFORM_MX2_CAMERA) += platform-mx2-camera.o
 obj-$(CONFIG_IMX_HAVE_PLATFORM_MXC_EHCI) += platform-mxc-ehci.o
 obj-$(CONFIG_IMX_HAVE_PLATFORM_MXC_MMC) += platform-mxc-mmc.o
diff --git a/arch/arm/mach-imx/devices/devices-common.h b/arch/arm/mach-imx/devices/devices-common.h
index 69bafc884dfa..67f7fb13050d 100644
--- a/arch/arm/mach-imx/devices/devices-common.h
+++ b/arch/arm/mach-imx/devices/devices-common.h
@@ -192,16 +192,6 @@ struct platform_device *__init imx_add_mx3_sdc_fb(
 		const struct imx_ipu_core_data *data,
 		struct mx3fb_platform_data *pdata);
 
-#include <linux/platform_data/camera-mx1.h>
-struct imx_mx1_camera_data {
-	resource_size_t iobase;
-	resource_size_t iosize;
-	resource_size_t irq;
-};
-struct platform_device *__init imx_add_mx1_camera(
-		const struct imx_mx1_camera_data *data,
-		const struct mx1_camera_pdata *pdata);
-
 #include <linux/platform_data/camera-mx2.h>
 struct imx_mx2_camera_data {
 	const char *devid;
diff --git a/arch/arm/mach-imx/devices/platform-mx1-camera.c b/arch/arm/mach-imx/devices/platform-mx1-camera.c
deleted file mode 100644
index 2c6788131080..000000000000
--- a/arch/arm/mach-imx/devices/platform-mx1-camera.c
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (C) 2010 Pengutronix
- * Uwe Kleine-Koenig <u.kleine-koenig@pengutronix.de>
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation.
- */
-#include "../hardware.h"
-#include "devices-common.h"
-
-#define imx_mx1_camera_data_entry_single(soc, _size)			\
-	{								\
-		.iobase = soc ## _CSI ## _BASE_ADDR,			\
-		.iosize = _size,					\
-		.irq = soc ## _INT_CSI,					\
-	}
-
-#ifdef CONFIG_SOC_IMX1
-const struct imx_mx1_camera_data imx1_mx1_camera_data __initconst =
-	imx_mx1_camera_data_entry_single(MX1, 10);
-#endif /* ifdef CONFIG_SOC_IMX1 */
-
-struct platform_device *__init imx_add_mx1_camera(
-		const struct imx_mx1_camera_data *data,
-		const struct mx1_camera_pdata *pdata)
-{
-	struct resource res[] = {
-		{
-			.start = data->iobase,
-			.end = data->iobase + data->iosize - 1,
-			.flags = IORESOURCE_MEM,
-		}, {
-			.start = data->irq,
-			.end = data->irq,
-			.flags = IORESOURCE_IRQ,
-		},
-	};
-	return imx_add_platform_device_dmamask("mx1-camera", 0,
-			res, ARRAY_SIZE(res),
-			pdata, sizeof(*pdata), DMA_BIT_MASK(32));
-}
diff --git a/arch/arm/mach-imx/mx1-camera-fiq-ksym.c b/arch/arm/mach-imx/mx1-camera-fiq-ksym.c
deleted file mode 100644
index fb38436ca67f..000000000000
--- a/arch/arm/mach-imx/mx1-camera-fiq-ksym.c
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * Exported ksyms of ARCH_MX1
- *
- * Copyright (C) 2008, Darius Augulis <augulis.darius@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/platform_device.h>
-#include <linux/module.h>
-
-#include <linux/platform_data/camera-mx1.h>
-
-/* IMX camera FIQ handler */
-EXPORT_SYMBOL(mx1_camera_sof_fiq_start);
-EXPORT_SYMBOL(mx1_camera_sof_fiq_end);
diff --git a/arch/arm/mach-imx/mx1-camera-fiq.S b/arch/arm/mach-imx/mx1-camera-fiq.S
deleted file mode 100644
index 9c69aa65bf17..000000000000
--- a/arch/arm/mach-imx/mx1-camera-fiq.S
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- *  Copyright (C) 2008 Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
- *
- *  Based on linux/arch/arm/lib/floppydma.S
- *      Copyright (C) 1995, 1996 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-		.text
-		.global	mx1_camera_sof_fiq_end
-		.global	mx1_camera_sof_fiq_start
-mx1_camera_sof_fiq_start:
-		@ enable dma
-		ldr	r12, [r9]
-		orr	r12, r12, #0x00000001
-		str	r12, [r9]
-		@ unmask DMA interrupt
-		ldr	r12, [r8]
-		bic	r12, r12, r13
-		str	r12, [r8]
-		@ disable SOF interrupt
-		ldr	r12, [r10]
-		bic	r12, r12, #0x00010000
-		str	r12, [r10]
-		@ clear SOF flag
-		mov	r12, #0x00010000
-		str	r12, [r11]
-		@ return from FIQ
-		subs	pc, lr, #4
-mx1_camera_sof_fiq_end:
diff --git a/include/linux/platform_data/camera-mx1.h b/include/linux/platform_data/camera-mx1.h
deleted file mode 100644
index 4fd6c70314b4..000000000000
--- a/include/linux/platform_data/camera-mx1.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * mx1_camera.h - i.MX1/i.MXL camera driver header file
- *
- * Copyright (c) 2008, Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
- * Copyright (C) 2009, Darius Augulis <augulis.darius@gmail.com>
- *
- * Based on PXA camera.h file:
- * Copyright (C) 2003, Intel Corporation
- * Copyright (C) 2008, Guennadi Liakhovetski <kernel@pengutronix.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef __ASM_ARCH_CAMERA_H_
-#define __ASM_ARCH_CAMERA_H_
-
-#define MX1_CAMERA_DATA_HIGH	1
-#define MX1_CAMERA_PCLK_RISING	2
-#define MX1_CAMERA_VSYNC_HIGH	4
-
-extern unsigned char mx1_camera_sof_fiq_start, mx1_camera_sof_fiq_end;
-
-/**
- * struct mx1_camera_pdata - i.MX1/i.MXL camera platform data
- * @mclk_10khz:	master clock frequency in 10kHz units
- * @flags:	MX1 camera platform flags
- */
-struct mx1_camera_pdata {
-	unsigned long mclk_10khz;
-	unsigned long flags;
-};
-
-#endif /* __ASM_ARCH_CAMERA_H_ */
-- 
cgit v1.2.3-59-g8ed1b


From 5442e9fbd7c23172a1c9bc736629cd123a9923f0 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Wed, 16 Jul 2014 01:54:54 +0400
Subject: timerfd: Implement timerfd_ioctl method to restore
 timerfd_ctx::ticks, v3

The read() of timerfd files allows to fetch the number of timer ticks
while there is no way to set it back from userspace.

To restore the timer's state as it was at checkpoint moment we need
a path to bring @ticks back. Initially I thought about writing ticks
back via write() interface but it seems such API is somehow obscure.

Instead implement timerfd_ioctl() method with TFD_IOC_SET_TICKS
command which allows to adjust @ticks into non-zero value waking
up the waiters.

I wrapped code with CONFIG_CHECKPOINT_RESTORE which can be
dropped off if there users except c/r camp appear.

v2 (by akpm@):
 - Use define timerfd_ioctl NULL for non c/r config

v3:
 - Use copy_from_user for @ticks fetching since
   not all arch support get_user for 8 byte argument

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Andrey Vagin <avagin@openvz.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christopher Covington <cov@codeaurora.org>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Vladimir Davydov <vdavydov@parallels.com>
Link: http://lkml.kernel.org/r/20140715215703.285617923@openvz.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 fs/timerfd.c            | 37 +++++++++++++++++++++++++++++++++++++
 include/linux/timerfd.h |  5 +++++
 2 files changed, 42 insertions(+)

(limited to 'include/linux')

diff --git a/fs/timerfd.c b/fs/timerfd.c
index 77183f047f65..709603cac9e6 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -315,12 +315,49 @@ static int timerfd_show(struct seq_file *m, struct file *file)
 #define timerfd_show NULL
 #endif
 
+#ifdef CONFIG_CHECKPOINT_RESTORE
+static long timerfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct timerfd_ctx *ctx = file->private_data;
+	int ret = 0;
+
+	switch (cmd) {
+	case TFD_IOC_SET_TICKS: {
+		u64 ticks;
+
+		if (copy_from_user(&ticks, (u64 __user *)arg, sizeof(ticks)))
+			return -EFAULT;
+		if (!ticks)
+			return -EINVAL;
+
+		spin_lock_irq(&ctx->wqh.lock);
+		if (!timerfd_canceled(ctx)) {
+			ctx->ticks = ticks;
+			if (ticks)
+				wake_up_locked(&ctx->wqh);
+		} else
+			ret = -ECANCELED;
+		spin_unlock_irq(&ctx->wqh.lock);
+		break;
+	}
+	default:
+		ret = -ENOTTY;
+		break;
+	}
+
+	return ret;
+}
+#else
+#define timerfd_ioctl NULL
+#endif
+
 static const struct file_operations timerfd_fops = {
 	.release	= timerfd_release,
 	.poll		= timerfd_poll,
 	.read		= timerfd_read,
 	.llseek		= noop_llseek,
 	.show_fdinfo	= timerfd_show,
+	.unlocked_ioctl	= timerfd_ioctl,
 };
 
 static int timerfd_fget(int fd, struct fd *p)
diff --git a/include/linux/timerfd.h b/include/linux/timerfd.h
index d3b57fa12225..bd36ce431e32 100644
--- a/include/linux/timerfd.h
+++ b/include/linux/timerfd.h
@@ -11,6 +11,9 @@
 /* For O_CLOEXEC and O_NONBLOCK */
 #include <linux/fcntl.h>
 
+/* For _IO helpers */
+#include <linux/ioctl.h>
+
 /*
  * CAREFUL: Check include/asm-generic/fcntl.h when defining
  * new flags, since they might collide with O_* ones. We want
@@ -29,4 +32,6 @@
 /* Flags for timerfd_settime.  */
 #define TFD_SETTIME_FLAGS (TFD_TIMER_ABSTIME | TFD_TIMER_CANCEL_ON_SET)
 
+#define TFD_IOC_SET_TICKS	_IOW('T', 0, u64)
+
 #endif /* _LINUX_TIMERFD_H */
-- 
cgit v1.2.3-59-g8ed1b


From 3c45ddf823d679a820adddd53b52c6699c9a05ac Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 16 Jul 2014 15:38:32 -0400
Subject: svcrdma: Select NFSv4.1 backchannel transport based on forward
 channel

The current code always selects XPRT_TRANSPORT_BC_TCP for the back
channel, even when the forward channel was not TCP (eg, RDMA). When
a 4.1 mount is attempted with RDMA, the server panics in the TCP BC
code when trying to send CB_NULL.

Instead, construct the transport protocol number from the forward
channel transport or'd with XPRT_TRANSPORT_BC. Transports that do
not support bi-directional RPC will not have registered a "BC"
transport, causing create_backchannel_client() to fail immediately.

Fixes: https://bugzilla.linux-nfs.org/show_bug.cgi?id=265
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4callback.c                   | 3 ++-
 include/linux/sunrpc/svc_xprt.h          | 1 +
 net/sunrpc/svcsock.c                     | 2 ++
 net/sunrpc/xprt.c                        | 2 +-
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 1 +
 5 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index a88a93e09d69..564d72304613 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -689,7 +689,8 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
 		clp->cl_cb_session = ses;
 		args.bc_xprt = conn->cb_xprt;
 		args.prognumber = clp->cl_cb_session->se_cb_prog;
-		args.protocol = XPRT_TRANSPORT_BC_TCP;
+		args.protocol = conn->cb_xprt->xpt_class->xcl_ident |
+				XPRT_TRANSPORT_BC;
 		args.authflavor = ses->se_cb_sec.flavor;
 	}
 	/* Create RPC client */
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 7235040a19b2..5d9d6f84b382 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -33,6 +33,7 @@ struct svc_xprt_class {
 	struct svc_xprt_ops	*xcl_ops;
 	struct list_head	xcl_list;
 	u32			xcl_max_payload;
+	int			xcl_ident;
 };
 
 /*
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index b507cd327d9b..b2437ee93657 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -692,6 +692,7 @@ static struct svc_xprt_class svc_udp_class = {
 	.xcl_owner = THIS_MODULE,
 	.xcl_ops = &svc_udp_ops,
 	.xcl_max_payload = RPCSVC_MAXPAYLOAD_UDP,
+	.xcl_ident = XPRT_TRANSPORT_UDP,
 };
 
 static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
@@ -1292,6 +1293,7 @@ static struct svc_xprt_class svc_tcp_class = {
 	.xcl_owner = THIS_MODULE,
 	.xcl_ops = &svc_tcp_ops,
 	.xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
+	.xcl_ident = XPRT_TRANSPORT_TCP,
 };
 
 void svc_init_xprt_sock(void)
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index c3b2b3369e52..51c63165073c 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1306,7 +1306,7 @@ struct rpc_xprt *xprt_create_transport(struct xprt_create *args)
 		}
 	}
 	spin_unlock(&xprt_list_lock);
-	printk(KERN_ERR "RPC: transport (%d) not supported\n", args->ident);
+	dprintk("RPC: transport (%d) not supported\n", args->ident);
 	return ERR_PTR(-EIO);
 
 found:
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index e7323fbbd348..06a5d9235107 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -92,6 +92,7 @@ struct svc_xprt_class svc_rdma_class = {
 	.xcl_owner = THIS_MODULE,
 	.xcl_ops = &svc_rdma_ops,
 	.xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
+	.xcl_ident = XPRT_TRANSPORT_RDMA,
 };
 
 struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
-- 
cgit v1.2.3-59-g8ed1b


From 6ad59343ecd72dd3f83c4db3bcddbb0beabb4c4c Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Tue, 15 Jul 2014 16:18:57 +0200
Subject: ssb: extract power info from SPROM revs 4 and 5
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is needed to properly handle early 802.11n devices like BCM4321.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/ssb/pci.c            | 45 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/ssb/ssb_regs.h | 37 ++++++++++++++++++++++++++++++++++++
 2 files changed, 82 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/ssb/pci.c b/drivers/ssb/pci.c
index 6318364be590..0f28c08fcb3c 100644
--- a/drivers/ssb/pci.c
+++ b/drivers/ssb/pci.c
@@ -470,7 +470,15 @@ static void sprom_extract_r458(struct ssb_sprom *out, const u16 *in)
 
 static void sprom_extract_r45(struct ssb_sprom *out, const u16 *in)
 {
+	static const u16 pwr_info_offset[] = {
+		SSB_SPROM4_PWR_INFO_CORE0, SSB_SPROM4_PWR_INFO_CORE1,
+		SSB_SPROM4_PWR_INFO_CORE2, SSB_SPROM4_PWR_INFO_CORE3
+	};
 	u16 il0mac_offset;
+	int i;
+
+	BUILD_BUG_ON(ARRAY_SIZE(pwr_info_offset) !=
+		     ARRAY_SIZE(out->core_pwr_info));
 
 	if (out->revision == 4)
 		il0mac_offset = SSB_SPROM4_IL0MAC;
@@ -543,6 +551,43 @@ static void sprom_extract_r45(struct ssb_sprom *out, const u16 *in)
 						     SSB_SPROM4_AGAIN3,
 						     SSB_SPROM4_AGAIN3_SHIFT);
 
+	/* Extract cores power info info */
+	for (i = 0; i < ARRAY_SIZE(pwr_info_offset); i++) {
+		u16 o = pwr_info_offset[i];
+
+		SPEX(core_pwr_info[i].itssi_2g, o + SSB_SPROM4_2G_MAXP_ITSSI,
+			SSB_SPROM4_2G_ITSSI, SSB_SPROM4_2G_ITSSI_SHIFT);
+		SPEX(core_pwr_info[i].maxpwr_2g, o + SSB_SPROM4_2G_MAXP_ITSSI,
+			SSB_SPROM4_2G_MAXP, 0);
+
+		SPEX(core_pwr_info[i].pa_2g[0], o + SSB_SPROM4_2G_PA_0, ~0, 0);
+		SPEX(core_pwr_info[i].pa_2g[1], o + SSB_SPROM4_2G_PA_1, ~0, 0);
+		SPEX(core_pwr_info[i].pa_2g[2], o + SSB_SPROM4_2G_PA_2, ~0, 0);
+		SPEX(core_pwr_info[i].pa_2g[3], o + SSB_SPROM4_2G_PA_3, ~0, 0);
+
+		SPEX(core_pwr_info[i].itssi_5g, o + SSB_SPROM4_5G_MAXP_ITSSI,
+			SSB_SPROM4_5G_ITSSI, SSB_SPROM4_5G_ITSSI_SHIFT);
+		SPEX(core_pwr_info[i].maxpwr_5g, o + SSB_SPROM4_5G_MAXP_ITSSI,
+			SSB_SPROM4_5G_MAXP, 0);
+		SPEX(core_pwr_info[i].maxpwr_5gh, o + SSB_SPROM4_5GHL_MAXP,
+			SSB_SPROM4_5GH_MAXP, 0);
+		SPEX(core_pwr_info[i].maxpwr_5gl, o + SSB_SPROM4_5GHL_MAXP,
+			SSB_SPROM4_5GL_MAXP, SSB_SPROM4_5GL_MAXP_SHIFT);
+
+		SPEX(core_pwr_info[i].pa_5gl[0], o + SSB_SPROM4_5GL_PA_0, ~0, 0);
+		SPEX(core_pwr_info[i].pa_5gl[1], o + SSB_SPROM4_5GL_PA_1, ~0, 0);
+		SPEX(core_pwr_info[i].pa_5gl[2], o + SSB_SPROM4_5GL_PA_2, ~0, 0);
+		SPEX(core_pwr_info[i].pa_5gl[3], o + SSB_SPROM4_5GL_PA_3, ~0, 0);
+		SPEX(core_pwr_info[i].pa_5g[0], o + SSB_SPROM4_5G_PA_0, ~0, 0);
+		SPEX(core_pwr_info[i].pa_5g[1], o + SSB_SPROM4_5G_PA_1, ~0, 0);
+		SPEX(core_pwr_info[i].pa_5g[2], o + SSB_SPROM4_5G_PA_2, ~0, 0);
+		SPEX(core_pwr_info[i].pa_5g[3], o + SSB_SPROM4_5G_PA_3, ~0, 0);
+		SPEX(core_pwr_info[i].pa_5gh[0], o + SSB_SPROM4_5GH_PA_0, ~0, 0);
+		SPEX(core_pwr_info[i].pa_5gh[1], o + SSB_SPROM4_5GH_PA_1, ~0, 0);
+		SPEX(core_pwr_info[i].pa_5gh[2], o + SSB_SPROM4_5GH_PA_2, ~0, 0);
+		SPEX(core_pwr_info[i].pa_5gh[3], o + SSB_SPROM4_5GH_PA_3, ~0, 0);
+	}
+
 	sprom_extract_r458(out, in);
 
 	/* TODO - get remaining rev 4 stuff needed */
diff --git a/include/linux/ssb/ssb_regs.h b/include/linux/ssb/ssb_regs.h
index f9f931c89e3e..f7b9100686c3 100644
--- a/include/linux/ssb/ssb_regs.h
+++ b/include/linux/ssb/ssb_regs.h
@@ -345,6 +345,43 @@
 #define  SSB_SPROM4_TXPID5GH2_SHIFT	0
 #define  SSB_SPROM4_TXPID5GH3		0xFF00
 #define  SSB_SPROM4_TXPID5GH3_SHIFT	8
+
+/* There are 4 blocks with power info sharing the same layout */
+#define SSB_SPROM4_PWR_INFO_CORE0	0x0080
+#define SSB_SPROM4_PWR_INFO_CORE1	0x00AE
+#define SSB_SPROM4_PWR_INFO_CORE2	0x00DC
+#define SSB_SPROM4_PWR_INFO_CORE3	0x010A
+
+#define SSB_SPROM4_2G_MAXP_ITSSI	0x00	/* 2 GHz ITSSI and 2 GHz Max Power */
+#define  SSB_SPROM4_2G_MAXP		0x00FF
+#define  SSB_SPROM4_2G_ITSSI		0xFF00
+#define  SSB_SPROM4_2G_ITSSI_SHIFT	8
+#define SSB_SPROM4_2G_PA_0		0x02	/* 2 GHz power amp */
+#define SSB_SPROM4_2G_PA_1		0x04
+#define SSB_SPROM4_2G_PA_2		0x06
+#define SSB_SPROM4_2G_PA_3		0x08
+#define SSB_SPROM4_5G_MAXP_ITSSI	0x0A	/* 5 GHz ITSSI and 5.3 GHz Max Power */
+#define  SSB_SPROM4_5G_MAXP		0x00FF
+#define  SSB_SPROM4_5G_ITSSI		0xFF00
+#define  SSB_SPROM4_5G_ITSSI_SHIFT	8
+#define SSB_SPROM4_5GHL_MAXP		0x0C	/* 5.2 GHz and 5.8 GHz Max Power */
+#define  SSB_SPROM4_5GH_MAXP		0x00FF
+#define  SSB_SPROM4_5GL_MAXP		0xFF00
+#define  SSB_SPROM4_5GL_MAXP_SHIFT	8
+#define SSB_SPROM4_5G_PA_0		0x0E	/* 5.3 GHz power amp */
+#define SSB_SPROM4_5G_PA_1		0x10
+#define SSB_SPROM4_5G_PA_2		0x12
+#define SSB_SPROM4_5G_PA_3		0x14
+#define SSB_SPROM4_5GL_PA_0		0x16	/* 5.2 GHz power amp */
+#define SSB_SPROM4_5GL_PA_1		0x18
+#define SSB_SPROM4_5GL_PA_2		0x1A
+#define SSB_SPROM4_5GL_PA_3		0x1C
+#define SSB_SPROM4_5GH_PA_0		0x1E	/* 5.8 GHz power amp */
+#define SSB_SPROM4_5GH_PA_1		0x20
+#define SSB_SPROM4_5GH_PA_2		0x22
+#define SSB_SPROM4_5GH_PA_3		0x24
+
+/* TODO: Make it deprecated */
 #define SSB_SPROM4_MAXP_BG		0x0080  /* Max Power BG in path 1 */
 #define  SSB_SPROM4_MAXP_BG_MASK	0x00FF  /* Mask for Max Power BG */
 #define  SSB_SPROM4_ITSSI_BG		0xFF00	/* Mask for path 1 itssi_bg */
-- 
cgit v1.2.3-59-g8ed1b


From d1d3799fcb1037357b54be44e796a6253484268e Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Tue, 15 Jul 2014 19:44:28 +0200
Subject: bcma: add support for BCM43217 found in Tenda W322E (14e4:43a9)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/bcma/driver_chipcommon_pmu.c | 1 +
 drivers/bcma/host_pci.c              | 1 +
 drivers/bcma/sprom.c                 | 1 +
 include/linux/bcma/bcma.h            | 1 +
 4 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/bcma/driver_chipcommon_pmu.c b/drivers/bcma/driver_chipcommon_pmu.c
index 5081a8c439cc..bb694e2e9f32 100644
--- a/drivers/bcma/driver_chipcommon_pmu.c
+++ b/drivers/bcma/driver_chipcommon_pmu.c
@@ -603,6 +603,7 @@ void bcma_pmu_spuravoid_pllupdate(struct bcma_drv_cc *cc, int spuravoid)
 		tmp = BCMA_CC_PMU_CTL_PLL_UPD | BCMA_CC_PMU_CTL_NOILPONW;
 		break;
 
+	case BCMA_CHIP_ID_BCM43217:
 	case BCMA_CHIP_ID_BCM43227:
 	case BCMA_CHIP_ID_BCM43228:
 	case BCMA_CHIP_ID_BCM43428:
diff --git a/drivers/bcma/host_pci.c b/drivers/bcma/host_pci.c
index e333305363aa..3cf725a49dc1 100644
--- a/drivers/bcma/host_pci.c
+++ b/drivers/bcma/host_pci.c
@@ -279,6 +279,7 @@ static const struct pci_device_id bcma_pci_bridge_tbl[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4358) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4359) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4365) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x43a9) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4727) },
 	{ 0, },
 };
diff --git a/drivers/bcma/sprom.c b/drivers/bcma/sprom.c
index a9dfb1ac138d..97bb38e9ed65 100644
--- a/drivers/bcma/sprom.c
+++ b/drivers/bcma/sprom.c
@@ -534,6 +534,7 @@ static bool bcma_sprom_onchip_available(struct bcma_bus *bus)
 		/* for these chips OTP is always available */
 		present = true;
 		break;
+	case BCMA_CHIP_ID_BCM43217:
 	case BCMA_CHIP_ID_BCM43227:
 	case BCMA_CHIP_ID_BCM43228:
 	case BCMA_CHIP_ID_BCM43428:
diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index 452286a38b2b..7cb2344741cf 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -158,6 +158,7 @@ struct bcma_host_ops {
 /* Chip IDs of PCIe devices */
 #define BCMA_CHIP_ID_BCM4313	0x4313
 #define BCMA_CHIP_ID_BCM43142	43142
+#define BCMA_CHIP_ID_BCM43217	43217
 #define BCMA_CHIP_ID_BCM43224	43224
 #define  BCMA_PKG_ID_BCM43224_FAB_CSM	0x8
 #define  BCMA_PKG_ID_BCM43224_FAB_SMIC	0xa
-- 
cgit v1.2.3-59-g8ed1b


From c2cb2c4cf1a089501242a1701b589d2ad5eb0448 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Thu, 17 Jul 2014 19:31:05 +0200
Subject: b43: use one shared function for setting MAC frequency
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

By the way add few chipsets that were tracked with "wl" dumps.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/b43/main.c    | 39 ++++++++++++++++++++++++++++++++++++++
 drivers/net/wireless/b43/main.h    |  1 +
 drivers/net/wireless/b43/phy_lcn.c | 35 +---------------------------------
 drivers/net/wireless/b43/phy_n.c   |  7 +------
 include/linux/bcma/bcma.h          |  1 +
 5 files changed, 43 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c
index 3dcd3aa38608..3e127be06bfb 100644
--- a/drivers/net/wireless/b43/main.c
+++ b/drivers/net/wireless/b43/main.c
@@ -2964,6 +2964,45 @@ void b43_mac_phy_clock_set(struct b43_wldev *dev, bool on)
 	}
 }
 
+/* brcms_b_switch_macfreq */
+void b43_mac_switch_freq(struct b43_wldev *dev, u8 spurmode)
+{
+	u16 chip_id = dev->dev->chip_id;
+
+	if (chip_id == BCMA_CHIP_ID_BCM43217 ||
+	    chip_id == BCMA_CHIP_ID_BCM43222 ||
+	    chip_id == BCMA_CHIP_ID_BCM43224 ||
+	    chip_id == BCMA_CHIP_ID_BCM43225 ||
+	    chip_id == BCMA_CHIP_ID_BCM43227 ||
+	    chip_id == BCMA_CHIP_ID_BCM43228) {
+		switch (spurmode) {
+		case 2: /* 126 Mhz */
+			b43_write16(dev, B43_MMIO_TSF_CLK_FRAC_LOW, 0x2082);
+			b43_write16(dev, B43_MMIO_TSF_CLK_FRAC_HIGH, 0x8);
+			break;
+		case 1: /* 123 Mhz */
+			b43_write16(dev, B43_MMIO_TSF_CLK_FRAC_LOW, 0x5341);
+			b43_write16(dev, B43_MMIO_TSF_CLK_FRAC_HIGH, 0x8);
+			break;
+		default: /* 120 Mhz */
+			b43_write16(dev, B43_MMIO_TSF_CLK_FRAC_LOW, 0x8889);
+			b43_write16(dev, B43_MMIO_TSF_CLK_FRAC_HIGH, 0x8);
+			break;
+		}
+	} else if (dev->phy.type == B43_PHYTYPE_LCN) {
+		switch (spurmode) {
+		case 1: /* 82 Mhz */
+			b43_write16(dev, B43_MMIO_TSF_CLK_FRAC_LOW, 0x7CE0);
+			b43_write16(dev, B43_MMIO_TSF_CLK_FRAC_HIGH, 0xC);
+			break;
+		default: /* 80 Mhz */
+			b43_write16(dev, B43_MMIO_TSF_CLK_FRAC_LOW, 0xCCCD);
+			b43_write16(dev, B43_MMIO_TSF_CLK_FRAC_HIGH, 0xC);
+			break;
+		}
+	}
+}
+
 static void b43_adjust_opmode(struct b43_wldev *dev)
 {
 	struct b43_wl *wl = dev->wl;
diff --git a/drivers/net/wireless/b43/main.h b/drivers/net/wireless/b43/main.h
index f476fc337d64..9f22e4b4c132 100644
--- a/drivers/net/wireless/b43/main.h
+++ b/drivers/net/wireless/b43/main.h
@@ -99,6 +99,7 @@ void b43_power_saving_ctl_bits(struct b43_wldev *dev, unsigned int ps_flags);
 void b43_mac_suspend(struct b43_wldev *dev);
 void b43_mac_enable(struct b43_wldev *dev);
 void b43_mac_phy_clock_set(struct b43_wldev *dev, bool on);
+void b43_mac_switch_freq(struct b43_wldev *dev, u8 spurmode);
 
 
 struct b43_request_fw_context;
diff --git a/drivers/net/wireless/b43/phy_lcn.c b/drivers/net/wireless/b43/phy_lcn.c
index 0bafa3b17035..e76bbdf3247e 100644
--- a/drivers/net/wireless/b43/phy_lcn.c
+++ b/drivers/net/wireless/b43/phy_lcn.c
@@ -54,39 +54,6 @@ enum lcn_sense_type {
 	B43_SENSE_VBAT,
 };
 
-/* In theory it's PHY common function, move if needed */
-/* brcms_b_switch_macfreq */
-static void b43_phy_switch_macfreq(struct b43_wldev *dev, u8 spurmode)
-{
-	if (dev->dev->chip_id == 43224 || dev->dev->chip_id == 43225) {
-		switch (spurmode) {
-		case 2:		/* 126 Mhz */
-			b43_write16(dev, B43_MMIO_TSF_CLK_FRAC_LOW, 0x2082);
-			b43_write16(dev, B43_MMIO_TSF_CLK_FRAC_HIGH, 0x8);
-			break;
-		case 1:		/* 123 Mhz */
-			b43_write16(dev, B43_MMIO_TSF_CLK_FRAC_LOW, 0x5341);
-			b43_write16(dev, B43_MMIO_TSF_CLK_FRAC_HIGH, 0x8);
-			break;
-		default:	/* 120 Mhz */
-			b43_write16(dev, B43_MMIO_TSF_CLK_FRAC_LOW, 0x8889);
-			b43_write16(dev, B43_MMIO_TSF_CLK_FRAC_HIGH, 0x8);
-			break;
-		}
-	} else if (dev->phy.type == B43_PHYTYPE_LCN) {
-		switch (spurmode) {
-		case 1:		/* 82 Mhz */
-			b43_write16(dev, B43_MMIO_TSF_CLK_FRAC_LOW, 0x7CE0);
-			b43_write16(dev, B43_MMIO_TSF_CLK_FRAC_HIGH, 0xC);
-			break;
-		default:	/* 80 Mhz */
-			b43_write16(dev, B43_MMIO_TSF_CLK_FRAC_LOW, 0xCCCD);
-			b43_write16(dev, B43_MMIO_TSF_CLK_FRAC_HIGH, 0xC);
-			break;
-		}
-	}
-}
-
 /**************************************************
  * Radio 2064.
  **************************************************/
@@ -609,7 +576,7 @@ static void b43_phy_lcn_txrx_spur_avoidance_mode(struct b43_wldev *dev,
 		b43_phy_write(dev, 0x93b, ((0 << 13) + 23));
 		b43_phy_write(dev, 0x93c, ((0 << 13) + 1989));
 	}
-	b43_phy_switch_macfreq(dev, enable);
+	b43_mac_switch_freq(dev, enable);
 }
 
 /**************************************************
diff --git a/drivers/net/wireless/b43/phy_n.c b/drivers/net/wireless/b43/phy_n.c
index ef1acaec7027..0f0c1306b0ad 100644
--- a/drivers/net/wireless/b43/phy_n.c
+++ b/drivers/net/wireless/b43/phy_n.c
@@ -6113,12 +6113,7 @@ static void b43_nphy_channel_setup(struct b43_wldev *dev,
 
 		b43_nphy_pmu_spur_avoid(dev, avoid);
 
-		if (dev->dev->chip_id == 43222 || dev->dev->chip_id == 43224 ||
-		    dev->dev->chip_id == 43225) {
-			b43_write16(dev, B43_MMIO_TSF_CLK_FRAC_LOW,
-				    avoid ? 0x5341 : 0x8889);
-			b43_write16(dev, B43_MMIO_TSF_CLK_FRAC_HIGH, 0x8);
-		}
+		b43_mac_switch_freq(dev, avoid);
 
 		if (dev->phy.rev == 3 || dev->phy.rev == 4)
 			; /* TODO: reset PLL */
diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index 7cb2344741cf..969af0f2bdf9 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -159,6 +159,7 @@ struct bcma_host_ops {
 #define BCMA_CHIP_ID_BCM4313	0x4313
 #define BCMA_CHIP_ID_BCM43142	43142
 #define BCMA_CHIP_ID_BCM43217	43217
+#define BCMA_CHIP_ID_BCM43222	43222
 #define BCMA_CHIP_ID_BCM43224	43224
 #define  BCMA_PKG_ID_BCM43224_FAB_CSM	0x8
 #define  BCMA_PKG_ID_BCM43224_FAB_SMIC	0xa
-- 
cgit v1.2.3-59-g8ed1b


From 6a09d17bb66a533c165be81e8a4c3557f68e1a3b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 18 Jul 2014 18:56:34 +0100
Subject: KEYS: Provide a generic instantiation function

Provide a generic instantiation function for key types that use the preparse
hook.  This makes it easier to prereserve key quota before keyrings get locked
to retain the new key.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Steve Dickson <steved@redhat.com>
Acked-by: Jeff Layton <jlayton@primarydata.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 crypto/asymmetric_keys/asymmetric_type.c | 25 +------------------------
 include/linux/key-type.h                 |  2 ++
 security/keys/key.c                      | 30 ++++++++++++++++++++++++++++++
 3 files changed, 33 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/crypto/asymmetric_keys/asymmetric_type.c b/crypto/asymmetric_keys/asymmetric_type.c
index b77eb5304788..c1fe0fcee8e3 100644
--- a/crypto/asymmetric_keys/asymmetric_type.c
+++ b/crypto/asymmetric_keys/asymmetric_type.c
@@ -163,29 +163,6 @@ static void asymmetric_key_free_preparse(struct key_preparsed_payload *prep)
 	kfree(prep->description);
 }
 
-/*
- * Instantiate a asymmetric_key defined key.  The key was preparsed, so we just
- * have to transfer the data here.
- */
-static int asymmetric_key_instantiate(struct key *key, struct key_preparsed_payload *prep)
-{
-	int ret;
-
-	pr_devel("==>%s()\n", __func__);
-
-	ret = key_payload_reserve(key, prep->quotalen);
-	if (ret == 0) {
-		key->type_data.p[0] = prep->type_data[0];
-		key->type_data.p[1] = prep->type_data[1];
-		key->payload.data = prep->payload;
-		prep->type_data[0] = NULL;
-		prep->type_data[1] = NULL;
-		prep->payload = NULL;
-	}
-	pr_devel("<==%s() = %d\n", __func__, ret);
-	return ret;
-}
-
 /*
  * dispose of the data dangling from the corpse of a asymmetric key
  */
@@ -205,7 +182,7 @@ struct key_type key_type_asymmetric = {
 	.name		= "asymmetric",
 	.preparse	= asymmetric_key_preparse,
 	.free_preparse	= asymmetric_key_free_preparse,
-	.instantiate	= asymmetric_key_instantiate,
+	.instantiate	= generic_key_instantiate,
 	.match		= asymmetric_key_match,
 	.destroy	= asymmetric_key_destroy,
 	.describe	= asymmetric_key_describe,
diff --git a/include/linux/key-type.h b/include/linux/key-type.h
index a74c3a84dfdd..88503dca2a57 100644
--- a/include/linux/key-type.h
+++ b/include/linux/key-type.h
@@ -159,5 +159,7 @@ static inline int key_negate_and_link(struct key *key,
 	return key_reject_and_link(key, timeout, ENOKEY, keyring, instkey);
 }
 
+extern int generic_key_instantiate(struct key *key, struct key_preparsed_payload *prep);
+
 #endif /* CONFIG_KEYS */
 #endif /* _LINUX_KEY_TYPE_H */
diff --git a/security/keys/key.c b/security/keys/key.c
index 2048a110e7f1..7c9acbf106b6 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -1023,6 +1023,36 @@ void key_invalidate(struct key *key)
 }
 EXPORT_SYMBOL(key_invalidate);
 
+/**
+ * generic_key_instantiate - Simple instantiation of a key from preparsed data
+ * @key: The key to be instantiated
+ * @prep: The preparsed data to load.
+ *
+ * Instantiate a key from preparsed data.  We assume we can just copy the data
+ * in directly and clear the old pointers.
+ *
+ * This can be pointed to directly by the key type instantiate op pointer.
+ */
+int generic_key_instantiate(struct key *key, struct key_preparsed_payload *prep)
+{
+	int ret;
+
+	pr_devel("==>%s()\n", __func__);
+
+	ret = key_payload_reserve(key, prep->quotalen);
+	if (ret == 0) {
+		key->type_data.p[0] = prep->type_data[0];
+		key->type_data.p[1] = prep->type_data[1];
+		rcu_assign_keypointer(key, prep->payload);
+		prep->type_data[0] = NULL;
+		prep->type_data[1] = NULL;
+		prep->payload = NULL;
+	}
+	pr_devel("<==%s() = %d\n", __func__, ret);
+	return ret;
+}
+EXPORT_SYMBOL(generic_key_instantiate);
+
 /**
  * register_key_type - Register a type of key.
  * @ktype: The new key type.
-- 
cgit v1.2.3-59-g8ed1b


From 0ef1b9e0cfd98f91b2341d581ea9424eb4ba3aa7 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Wed, 25 Jun 2014 11:25:19 -0400
Subject: ftrace: Remove ftrace_start/stop()

There are no more kernel users of ftrace_stop() and ftrace_start().
Remove them.

Reviewed-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 28 ----------------------------
 1 file changed, 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 18fb2c4a3f7f..b7333794554f 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -143,32 +143,6 @@ enum ftrace_tracing_type_t {
 /* Current tracing type, default is FTRACE_TYPE_ENTER */
 extern enum ftrace_tracing_type_t ftrace_tracing_type;
 
-/**
- * ftrace_stop - stop function tracer.
- *
- * A quick way to stop the function tracer. Note this an on off switch,
- * it is not something that is recursive like preempt_disable.
- * This does not disable the calling of mcount, it only stops the
- * calling of functions from mcount.
- */
-static inline void ftrace_stop(void)
-{
-	function_trace_stop = 1;
-}
-
-/**
- * ftrace_start - start the function tracer.
- *
- * This function is the inverse of ftrace_stop. This does not enable
- * the function tracing if the function tracer is disabled. This only
- * sets the function tracer flag to continue calling the functions
- * from mcount.
- */
-static inline void ftrace_start(void)
-{
-	function_trace_stop = 0;
-}
-
 /*
  * The ftrace_ops must be a static and should also
  * be read_mostly.  These functions do modify read_mostly variables
@@ -245,8 +219,6 @@ static inline int ftrace_nr_registered_ops(void)
 }
 static inline void clear_ftrace_function(void) { }
 static inline void ftrace_kill(void) { }
-static inline void ftrace_stop(void) { }
-static inline void ftrace_start(void) { }
 #endif /* CONFIG_FUNCTION_TRACER */
 
 #ifdef CONFIG_STACK_TRACER
-- 
cgit v1.2.3-59-g8ed1b


From 7544256aa20356e506b0d179f9b6abc661847e2f Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Wed, 25 Jun 2014 13:26:59 -0400
Subject: ftrace: Remove check for HAVE_FUNCTION_TRACE_MCOUNT_TEST

function_trace_stop is no longer used to disable function tracing.
This means that archs are no longer limited if it does not support
checking this variable in the mcount trampoline.

No need to use the list_func for archs that do not support this
obsolete method.

Acked-by: James Hogan <james.hogan@imgtec.com>
Reviewed-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 Documentation/trace/ftrace-design.txt | 26 --------------------------
 include/linux/ftrace.h                |  3 +--
 2 files changed, 1 insertion(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/trace/ftrace-design.txt b/Documentation/trace/ftrace-design.txt
index 3f669b9e8852..dd5f916b351d 100644
--- a/Documentation/trace/ftrace-design.txt
+++ b/Documentation/trace/ftrace-design.txt
@@ -102,30 +102,6 @@ extern void mcount(void);
 EXPORT_SYMBOL(mcount);
 
 
-HAVE_FUNCTION_TRACE_MCOUNT_TEST
--------------------------------
-
-This is an optional optimization for the normal case when tracing is turned off
-in the system.  If you do not enable this Kconfig option, the common ftrace
-code will take care of doing the checking for you.
-
-To support this feature, you only need to check the function_trace_stop
-variable in the mcount function.  If it is non-zero, there is no tracing to be
-done at all, so you can return.
-
-This additional pseudo code would simply be:
-void mcount(void)
-{
-	/* save any bare state needed in order to do initial checking */
-
-+	if (function_trace_stop)
-+		return;
-
-	extern void (*ftrace_trace_function)(unsigned long, unsigned long);
-	if (ftrace_trace_function != ftrace_stub)
-...
-
-
 HAVE_FUNCTION_GRAPH_TRACER
 --------------------------
 
@@ -328,8 +304,6 @@ void mcount(void)
 
 void ftrace_caller(void)
 {
-	/* implement HAVE_FUNCTION_TRACE_MCOUNT_TEST if you desire */
-
 	/* save all state needed by the ABI (see paragraph above) */
 
 	unsigned long frompc = ...;
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index b7333794554f..c800906235e1 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -33,8 +33,7 @@
  * features, then it must call an indirect function that
  * does. Or at least does enough to prevent any unwelcomed side effects.
  */
-#if !defined(CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST) || \
-	!ARCH_SUPPORTS_FTRACE_OPS
+#if !ARCH_SUPPORTS_FTRACE_OPS
 # define FTRACE_FORCE_LIST_FUNC 1
 #else
 # define FTRACE_FORCE_LIST_FUNC 0
-- 
cgit v1.2.3-59-g8ed1b


From 3a636388bae8390d23f31e061c0c6fdc14525786 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Thu, 26 Jun 2014 11:24:52 -0400
Subject: tracing: Remove function_trace_stop and
 HAVE_FUNCTION_TRACE_MCOUNT_TEST

All users of function_trace_stop and HAVE_FUNCTION_TRACE_MCOUNT_TEST have
been removed. We can safely remove them from the kernel.

Reviewed-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 2 --
 kernel/trace/Kconfig   | 5 -----
 kernel/trace/ftrace.c  | 3 ---
 3 files changed, 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index c800906235e1..7a5b7b97e539 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -129,8 +129,6 @@ struct ftrace_ops {
 #endif
 };
 
-extern int function_trace_stop;
-
 /*
  * Type of the current tracing.
  */
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index d4409356f40d..a5da09c899dd 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -29,11 +29,6 @@ config HAVE_FUNCTION_GRAPH_FP_TEST
 	help
 	  See Documentation/trace/ftrace-design.txt
 
-config HAVE_FUNCTION_TRACE_MCOUNT_TEST
-	bool
-	help
-	  See Documentation/trace/ftrace-design.txt
-
 config HAVE_DYNAMIC_FTRACE
 	bool
 	help
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 70abf97d6e84..4c61f28a08e0 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -80,9 +80,6 @@ static struct ftrace_ops ftrace_list_end __read_mostly = {
 int ftrace_enabled __read_mostly;
 static int last_ftrace_enabled;
 
-/* Quick disabling of function tracer. */
-int function_trace_stop __read_mostly;
-
 /* Current function tracing op */
 struct ftrace_ops *function_trace_op __read_mostly = &ftrace_list_end;
 /* What to set function_trace_op to */
-- 
cgit v1.2.3-59-g8ed1b


From 48dc92b9fc3926844257316e75ba11eb5c742b2c Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 25 Jun 2014 16:08:24 -0700
Subject: seccomp: add "seccomp" syscall

This adds the new "seccomp" syscall with both an "operation" and "flags"
parameter for future expansion. The third argument is a pointer value,
used with the SECCOMP_SET_MODE_FILTER operation. Currently, flags must
be 0. This is functionally equivalent to prctl(PR_SET_SECCOMP, ...).

In addition to the TSYNC flag later in this patch series, there is a
non-zero chance that this syscall could be used for configuring a fixed
argument area for seccomp-tracer-aware processes to pass syscall arguments
in the future. Hence, the use of "seccomp" not simply "seccomp_add_filter"
for this syscall. Additionally, this syscall uses operation, flags,
and user pointer for arguments because strictly passing arguments via
a user pointer would mean seccomp itself would be unable to trivially
filter the seccomp syscall itself.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Andy Lutomirski <luto@amacapital.net>
---
 arch/Kconfig                      |  1 +
 arch/x86/syscalls/syscall_32.tbl  |  1 +
 arch/x86/syscalls/syscall_64.tbl  |  1 +
 include/linux/syscalls.h          |  2 ++
 include/uapi/asm-generic/unistd.h |  4 ++-
 include/uapi/linux/seccomp.h      |  4 +++
 kernel/seccomp.c                  | 55 +++++++++++++++++++++++++++++++++++----
 kernel/sys_ni.c                   |  3 +++
 8 files changed, 65 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/arch/Kconfig b/arch/Kconfig
index 97ff872c7acc..0eae9df35b88 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -321,6 +321,7 @@ config HAVE_ARCH_SECCOMP_FILTER
 	  - secure_computing is called from a ptrace_event()-safe context
 	  - secure_computing return value is checked and a return value of -1
 	    results in the system call being skipped immediately.
+	  - seccomp syscall wired up
 
 config SECCOMP_FILTER
 	def_bool y
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index d6b867921612..7527eac24122 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -360,3 +360,4 @@
 351	i386	sched_setattr		sys_sched_setattr
 352	i386	sched_getattr		sys_sched_getattr
 353	i386	renameat2		sys_renameat2
+354	i386	seccomp			sys_seccomp
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index ec255a1646d2..16272a6c12b7 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -323,6 +323,7 @@
 314	common	sched_setattr		sys_sched_setattr
 315	common	sched_getattr		sys_sched_getattr
 316	common	renameat2		sys_renameat2
+317	common	seccomp			sys_seccomp
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index b0881a0ed322..1713977ee26f 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -866,4 +866,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid,
 asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type,
 			 unsigned long idx1, unsigned long idx2);
 asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags);
+asmlinkage long sys_seccomp(unsigned int op, unsigned int flags,
+			    const char __user *uargs);
 #endif
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 333640608087..65acbf0e2867 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -699,9 +699,11 @@ __SYSCALL(__NR_sched_setattr, sys_sched_setattr)
 __SYSCALL(__NR_sched_getattr, sys_sched_getattr)
 #define __NR_renameat2 276
 __SYSCALL(__NR_renameat2, sys_renameat2)
+#define __NR_seccomp 277
+__SYSCALL(__NR_seccomp, sys_seccomp)
 
 #undef __NR_syscalls
-#define __NR_syscalls 277
+#define __NR_syscalls 278
 
 /*
  * All syscalls below here should go away really,
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index ac2dc9f72973..b258878ba754 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -10,6 +10,10 @@
 #define SECCOMP_MODE_STRICT	1 /* uses hard-coded filter. */
 #define SECCOMP_MODE_FILTER	2 /* uses user-supplied filter. */
 
+/* Valid operations for seccomp syscall. */
+#define SECCOMP_SET_MODE_STRICT	0
+#define SECCOMP_SET_MODE_FILTER	1
+
 /*
  * All BPF programs must return a 32-bit value.
  * The bottom 16-bits are for optional return data.
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 05cac2c2eca1..f0652578af75 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -18,6 +18,7 @@
 #include <linux/compat.h>
 #include <linux/sched.h>
 #include <linux/seccomp.h>
+#include <linux/syscalls.h>
 
 /* #define SECCOMP_DEBUG 1 */
 
@@ -314,7 +315,7 @@ free_prog:
  *
  * Returns 0 on success and non-zero otherwise.
  */
-static long seccomp_attach_user_filter(char __user *user_filter)
+static long seccomp_attach_user_filter(const char __user *user_filter)
 {
 	struct sock_fprog fprog;
 	long ret = -EFAULT;
@@ -517,6 +518,7 @@ out:
 #ifdef CONFIG_SECCOMP_FILTER
 /**
  * seccomp_set_mode_filter: internal function for setting seccomp filter
+ * @flags:  flags to change filter behavior
  * @filter: struct sock_fprog containing filter
  *
  * This function may be called repeatedly to install additional filters.
@@ -527,11 +529,16 @@ out:
  *
  * Returns 0 on success or -EINVAL on failure.
  */
-static long seccomp_set_mode_filter(char __user *filter)
+static long seccomp_set_mode_filter(unsigned int flags,
+				    const char __user *filter)
 {
 	const unsigned long seccomp_mode = SECCOMP_MODE_FILTER;
 	long ret = -EINVAL;
 
+	/* Validate flags. */
+	if (flags != 0)
+		goto out;
+
 	if (!seccomp_may_assign_mode(seccomp_mode))
 		goto out;
 
@@ -544,12 +551,35 @@ out:
 	return ret;
 }
 #else
-static inline long seccomp_set_mode_filter(char __user *filter)
+static inline long seccomp_set_mode_filter(unsigned int flags,
+					   const char __user *filter)
 {
 	return -EINVAL;
 }
 #endif
 
+/* Common entry point for both prctl and syscall. */
+static long do_seccomp(unsigned int op, unsigned int flags,
+		       const char __user *uargs)
+{
+	switch (op) {
+	case SECCOMP_SET_MODE_STRICT:
+		if (flags != 0 || uargs != NULL)
+			return -EINVAL;
+		return seccomp_set_mode_strict();
+	case SECCOMP_SET_MODE_FILTER:
+		return seccomp_set_mode_filter(flags, uargs);
+	default:
+		return -EINVAL;
+	}
+}
+
+SYSCALL_DEFINE3(seccomp, unsigned int, op, unsigned int, flags,
+			 const char __user *, uargs)
+{
+	return do_seccomp(op, flags, uargs);
+}
+
 /**
  * prctl_set_seccomp: configures current->seccomp.mode
  * @seccomp_mode: requested mode to use
@@ -559,12 +589,27 @@ static inline long seccomp_set_mode_filter(char __user *filter)
  */
 long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter)
 {
+	unsigned int op;
+	char __user *uargs;
+
 	switch (seccomp_mode) {
 	case SECCOMP_MODE_STRICT:
-		return seccomp_set_mode_strict();
+		op = SECCOMP_SET_MODE_STRICT;
+		/*
+		 * Setting strict mode through prctl always ignored filter,
+		 * so make sure it is always NULL here to pass the internal
+		 * check in do_seccomp().
+		 */
+		uargs = NULL;
+		break;
 	case SECCOMP_MODE_FILTER:
-		return seccomp_set_mode_filter(filter);
+		op = SECCOMP_SET_MODE_FILTER;
+		uargs = filter;
+		break;
 	default:
 		return -EINVAL;
 	}
+
+	/* prctl interface doesn't have flags, so they are always zero. */
+	return do_seccomp(op, 0, uargs);
 }
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 36441b51b5df..2904a2105914 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -213,3 +213,6 @@ cond_syscall(compat_sys_open_by_handle_at);
 
 /* compare kernel pointers */
 cond_syscall(sys_kcmp);
+
+/* operate on Secure Computing state */
+cond_syscall(sys_seccomp);
-- 
cgit v1.2.3-59-g8ed1b


From 1d4457f99928a968767f6405b4a1f50845aa15fd Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 21 May 2014 15:23:46 -0700
Subject: sched: move no_new_privs into new atomic flags

Since seccomp transitions between threads requires updates to the
no_new_privs flag to be atomic, the flag must be part of an atomic flag
set. This moves the nnp flag into a separate task field, and introduces
accessors.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Andy Lutomirski <luto@amacapital.net>
---
 fs/exec.c                  |  4 ++--
 include/linux/sched.h      | 18 +++++++++++++++---
 kernel/seccomp.c           |  2 +-
 kernel/sys.c               |  4 ++--
 security/apparmor/domain.c |  4 ++--
 5 files changed, 22 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index a3d33fe592d6..0f5c272410f6 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1234,7 +1234,7 @@ static void check_unsafe_exec(struct linux_binprm *bprm)
 	 * This isn't strictly necessary, but it makes it harder for LSMs to
 	 * mess up.
 	 */
-	if (current->no_new_privs)
+	if (task_no_new_privs(current))
 		bprm->unsafe |= LSM_UNSAFE_NO_NEW_PRIVS;
 
 	t = p;
@@ -1272,7 +1272,7 @@ int prepare_binprm(struct linux_binprm *bprm)
 	bprm->cred->egid = current_egid();
 
 	if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) &&
-	    !current->no_new_privs &&
+	    !task_no_new_privs(current) &&
 	    kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) &&
 	    kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) {
 		/* Set-uid? */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 306f4f0c987a..0fd19055bb64 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1307,13 +1307,12 @@ struct task_struct {
 				 * execve */
 	unsigned in_iowait:1;
 
-	/* task may not gain privileges */
-	unsigned no_new_privs:1;
-
 	/* Revert to default priority/policy when forking */
 	unsigned sched_reset_on_fork:1;
 	unsigned sched_contributes_to_load:1;
 
+	unsigned long atomic_flags; /* Flags needing atomic access. */
+
 	pid_t pid;
 	pid_t tgid;
 
@@ -1967,6 +1966,19 @@ static inline void memalloc_noio_restore(unsigned int flags)
 	current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags;
 }
 
+/* Per-process atomic flags. */
+#define PFA_NO_NEW_PRIVS 0x00000001	/* May not gain new privileges. */
+
+static inline bool task_no_new_privs(struct task_struct *p)
+{
+	return test_bit(PFA_NO_NEW_PRIVS, &p->atomic_flags);
+}
+
+static inline void task_set_no_new_privs(struct task_struct *p)
+{
+	set_bit(PFA_NO_NEW_PRIVS, &p->atomic_flags);
+}
+
 /*
  * task->jobctl flags
  */
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index f0652578af75..d2596136b0d1 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -241,7 +241,7 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
 	 * This avoids scenarios where unprivileged tasks can affect the
 	 * behavior of privileged children.
 	 */
-	if (!current->no_new_privs &&
+	if (!task_no_new_privs(current) &&
 	    security_capable_noaudit(current_cred(), current_user_ns(),
 				     CAP_SYS_ADMIN) != 0)
 		return -EACCES;
diff --git a/kernel/sys.c b/kernel/sys.c
index 66a751ebf9d9..ce8129192a26 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1990,12 +1990,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 		if (arg2 != 1 || arg3 || arg4 || arg5)
 			return -EINVAL;
 
-		current->no_new_privs = 1;
+		task_set_no_new_privs(current);
 		break;
 	case PR_GET_NO_NEW_PRIVS:
 		if (arg2 || arg3 || arg4 || arg5)
 			return -EINVAL;
-		return current->no_new_privs ? 1 : 0;
+		return task_no_new_privs(current) ? 1 : 0;
 	case PR_GET_THP_DISABLE:
 		if (arg2 || arg3 || arg4 || arg5)
 			return -EINVAL;
diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c
index 452567d3a08e..d97cba3e3849 100644
--- a/security/apparmor/domain.c
+++ b/security/apparmor/domain.c
@@ -621,7 +621,7 @@ int aa_change_hat(const char *hats[], int count, u64 token, bool permtest)
 	 * There is no exception for unconfined as change_hat is not
 	 * available.
 	 */
-	if (current->no_new_privs)
+	if (task_no_new_privs(current))
 		return -EPERM;
 
 	/* released below */
@@ -776,7 +776,7 @@ int aa_change_profile(const char *ns_name, const char *hname, bool onexec,
 	 * no_new_privs is set because this aways results in a reduction
 	 * of permissions.
 	 */
-	if (current->no_new_privs && !unconfined(profile)) {
+	if (task_no_new_privs(current) && !unconfined(profile)) {
 		put_cred(cred);
 		return -EPERM;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From dbd952127d11bb44a4ea30b08cc60531b6a23d71 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 27 Jun 2014 15:18:48 -0700
Subject: seccomp: introduce writer locking

Normally, task_struct.seccomp.filter is only ever read or modified by
the task that owns it (current). This property aids in fast access
during system call filtering as read access is lockless.

Updating the pointer from another task, however, opens up race
conditions. To allow cross-thread filter pointer updates, writes to the
seccomp fields are now protected by the sighand spinlock (which is shared
by all threads in the thread group). Read access remains lockless because
pointer updates themselves are atomic.  However, writes (or cloning)
often entail additional checking (like maximum instruction counts)
which require locking to perform safely.

In the case of cloning threads, the child is invisible to the system
until it enters the task list. To make sure a child can't be cloned from
a thread and left in a prior state, seccomp duplication is additionally
moved under the sighand lock. Then parent and child are certain have
the same seccomp state when they exit the lock.

Based on patches by Will Drewry and David Drysdale.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Andy Lutomirski <luto@amacapital.net>
---
 include/linux/seccomp.h |  6 +++---
 kernel/fork.c           | 49 ++++++++++++++++++++++++++++++++++++++++++++++++-
 kernel/seccomp.c        | 16 +++++++++++++++-
 3 files changed, 66 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index 4054b0994071..9ff98b4bfe2e 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -14,11 +14,11 @@ struct seccomp_filter;
  *
  * @mode:  indicates one of the valid values above for controlled
  *         system calls available to a process.
- * @filter: The metadata and ruleset for determining what system calls
- *          are allowed for a task.
+ * @filter: must always point to a valid seccomp-filter or NULL as it is
+ *          accessed without locking during system call entry.
  *
  *          @filter must only be accessed from the context of current as there
- *          is no locking.
+ *          is no read locking.
  */
 struct seccomp {
 	int mode;
diff --git a/kernel/fork.c b/kernel/fork.c
index 6a13c46cd87d..ed4bc339c9dc 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -315,6 +315,15 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
 		goto free_ti;
 
 	tsk->stack = ti;
+#ifdef CONFIG_SECCOMP
+	/*
+	 * We must handle setting up seccomp filters once we're under
+	 * the sighand lock in case orig has changed between now and
+	 * then. Until then, filter must be NULL to avoid messing up
+	 * the usage counts on the error path calling free_task.
+	 */
+	tsk->seccomp.filter = NULL;
+#endif
 
 	setup_thread_stack(tsk, orig);
 	clear_user_return_notifier(tsk);
@@ -1081,6 +1090,39 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 	return 0;
 }
 
+static void copy_seccomp(struct task_struct *p)
+{
+#ifdef CONFIG_SECCOMP
+	/*
+	 * Must be called with sighand->lock held, which is common to
+	 * all threads in the group. Holding cred_guard_mutex is not
+	 * needed because this new task is not yet running and cannot
+	 * be racing exec.
+	 */
+	BUG_ON(!spin_is_locked(&current->sighand->siglock));
+
+	/* Ref-count the new filter user, and assign it. */
+	get_seccomp_filter(current);
+	p->seccomp = current->seccomp;
+
+	/*
+	 * Explicitly enable no_new_privs here in case it got set
+	 * between the task_struct being duplicated and holding the
+	 * sighand lock. The seccomp state and nnp must be in sync.
+	 */
+	if (task_no_new_privs(current))
+		task_set_no_new_privs(p);
+
+	/*
+	 * If the parent gained a seccomp mode after copying thread
+	 * flags and between before we held the sighand lock, we have
+	 * to manually enable the seccomp thread flag here.
+	 */
+	if (p->seccomp.mode != SECCOMP_MODE_DISABLED)
+		set_tsk_thread_flag(p, TIF_SECCOMP);
+#endif
+}
+
 SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
 {
 	current->clear_child_tid = tidptr;
@@ -1196,7 +1238,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 		goto fork_out;
 
 	ftrace_graph_init_task(p);
-	get_seccomp_filter(p);
 
 	rt_mutex_init_task(p);
 
@@ -1436,6 +1477,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 
 	spin_lock(&current->sighand->siglock);
 
+	/*
+	 * Copy seccomp details explicitly here, in case they were changed
+	 * before holding sighand lock.
+	 */
+	copy_seccomp(p);
+
 	/*
 	 * Process group and session signals need to be delivered to just the
 	 * parent before the fork or both the parent and the child after the
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 58125160417c..d5543e787e4e 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -199,6 +199,8 @@ static u32 seccomp_run_filters(int syscall)
 
 static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
 {
+	BUG_ON(!spin_is_locked(&current->sighand->siglock));
+
 	if (current->seccomp.mode && current->seccomp.mode != seccomp_mode)
 		return false;
 
@@ -207,6 +209,8 @@ static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
 
 static inline void seccomp_assign_mode(unsigned long seccomp_mode)
 {
+	BUG_ON(!spin_is_locked(&current->sighand->siglock));
+
 	current->seccomp.mode = seccomp_mode;
 	set_tsk_thread_flag(current, TIF_SECCOMP);
 }
@@ -332,6 +336,8 @@ out:
  * @flags:  flags to change filter behavior
  * @filter: seccomp filter to add to the current process
  *
+ * Caller must be holding current->sighand->siglock lock.
+ *
  * Returns 0 on success, -ve on error.
  */
 static long seccomp_attach_filter(unsigned int flags,
@@ -340,6 +346,8 @@ static long seccomp_attach_filter(unsigned int flags,
 	unsigned long total_insns;
 	struct seccomp_filter *walker;
 
+	BUG_ON(!spin_is_locked(&current->sighand->siglock));
+
 	/* Validate resulting filter length. */
 	total_insns = filter->prog->len;
 	for (walker = current->seccomp.filter; walker; walker = walker->prev)
@@ -529,6 +537,8 @@ static long seccomp_set_mode_strict(void)
 	const unsigned long seccomp_mode = SECCOMP_MODE_STRICT;
 	long ret = -EINVAL;
 
+	spin_lock_irq(&current->sighand->siglock);
+
 	if (!seccomp_may_assign_mode(seccomp_mode))
 		goto out;
 
@@ -539,6 +549,7 @@ static long seccomp_set_mode_strict(void)
 	ret = 0;
 
 out:
+	spin_unlock_irq(&current->sighand->siglock);
 
 	return ret;
 }
@@ -566,13 +577,15 @@ static long seccomp_set_mode_filter(unsigned int flags,
 
 	/* Validate flags. */
 	if (flags != 0)
-		goto out;
+		return -EINVAL;
 
 	/* Prepare the new filter before holding any locks. */
 	prepared = seccomp_prepare_user_filter(filter);
 	if (IS_ERR(prepared))
 		return PTR_ERR(prepared);
 
+	spin_lock_irq(&current->sighand->siglock);
+
 	if (!seccomp_may_assign_mode(seccomp_mode))
 		goto out;
 
@@ -584,6 +597,7 @@ static long seccomp_set_mode_filter(unsigned int flags,
 
 	seccomp_assign_mode(seccomp_mode);
 out:
+	spin_unlock_irq(&current->sighand->siglock);
 	seccomp_filter_free(prepared);
 	return ret;
 }
-- 
cgit v1.2.3-59-g8ed1b


From c2e1f2e30daa551db3c670c0ccfeab20a540b9e1 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 5 Jun 2014 00:23:17 -0700
Subject: seccomp: implement SECCOMP_FILTER_FLAG_TSYNC

Applying restrictive seccomp filter programs to large or diverse
codebases often requires handling threads which may be started early in
the process lifetime (e.g., by code that is linked in). While it is
possible to apply permissive programs prior to process start up, it is
difficult to further restrict the kernel ABI to those threads after that
point.

This change adds a new seccomp syscall flag to SECCOMP_SET_MODE_FILTER for
synchronizing thread group seccomp filters at filter installation time.

When calling seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
filter) an attempt will be made to synchronize all threads in current's
threadgroup to its new seccomp filter program. This is possible iff all
threads are using a filter that is an ancestor to the filter current is
attempting to synchronize to. NULL filters (where the task is running as
SECCOMP_MODE_NONE) are also treated as ancestors allowing threads to be
transitioned into SECCOMP_MODE_FILTER. If prctrl(PR_SET_NO_NEW_PRIVS,
...) has been set on the calling thread, no_new_privs will be set for
all synchronized threads too. On success, 0 is returned. On failure,
the pid of one of the failing threads will be returned and no filters
will have been applied.

The race conditions against another thread are:
- requesting TSYNC (already handled by sighand lock)
- performing a clone (already handled by sighand lock)
- changing its filter (already handled by sighand lock)
- calling exec (handled by cred_guard_mutex)
The clone case is assisted by the fact that new threads will have their
seccomp state duplicated from their parent before appearing on the tasklist.

Holding cred_guard_mutex means that seccomp filters cannot be assigned
while in the middle of another thread's exec (potentially bypassing
no_new_privs or similar). The call to de_thread() may kill threads waiting
for the mutex.

Changes across threads to the filter pointer includes a barrier.

Based on patches by Will Drewry.

Suggested-by: Julien Tinnes <jln@chromium.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Andy Lutomirski <luto@amacapital.net>
---
 fs/exec.c                    |   2 +-
 include/linux/seccomp.h      |   2 +
 include/uapi/linux/seccomp.h |   3 +
 kernel/seccomp.c             | 135 ++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 140 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index 0f5c272410f6..ab1f1200ce5d 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1216,7 +1216,7 @@ EXPORT_SYMBOL(install_exec_creds);
 /*
  * determine how safe it is to execute the proposed program
  * - the caller must hold ->cred_guard_mutex to protect against
- *   PTRACE_ATTACH
+ *   PTRACE_ATTACH or seccomp thread-sync
  */
 static void check_unsafe_exec(struct linux_binprm *bprm)
 {
diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index 9ff98b4bfe2e..5d586a45a319 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -3,6 +3,8 @@
 
 #include <uapi/linux/seccomp.h>
 
+#define SECCOMP_FILTER_FLAG_MASK	(SECCOMP_FILTER_FLAG_TSYNC)
+
 #ifdef CONFIG_SECCOMP
 
 #include <linux/thread_info.h>
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index b258878ba754..0f238a43ff1e 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -14,6 +14,9 @@
 #define SECCOMP_SET_MODE_STRICT	0
 #define SECCOMP_SET_MODE_FILTER	1
 
+/* Valid flags for SECCOMP_SET_MODE_FILTER */
+#define SECCOMP_FILTER_FLAG_TSYNC	1
+
 /*
  * All BPF programs must return a 32-bit value.
  * The bottom 16-bits are for optional return data.
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 9065d2c79c56..74f460179171 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -26,6 +26,7 @@
 #ifdef CONFIG_SECCOMP_FILTER
 #include <asm/syscall.h>
 #include <linux/filter.h>
+#include <linux/pid.h>
 #include <linux/ptrace.h>
 #include <linux/security.h>
 #include <linux/tracehook.h>
@@ -225,6 +226,114 @@ static inline void seccomp_assign_mode(struct task_struct *task,
 }
 
 #ifdef CONFIG_SECCOMP_FILTER
+/* Returns 1 if the parent is an ancestor of the child. */
+static int is_ancestor(struct seccomp_filter *parent,
+		       struct seccomp_filter *child)
+{
+	/* NULL is the root ancestor. */
+	if (parent == NULL)
+		return 1;
+	for (; child; child = child->prev)
+		if (child == parent)
+			return 1;
+	return 0;
+}
+
+/**
+ * seccomp_can_sync_threads: checks if all threads can be synchronized
+ *
+ * Expects sighand and cred_guard_mutex locks to be held.
+ *
+ * Returns 0 on success, -ve on error, or the pid of a thread which was
+ * either not in the correct seccomp mode or it did not have an ancestral
+ * seccomp filter.
+ */
+static inline pid_t seccomp_can_sync_threads(void)
+{
+	struct task_struct *thread, *caller;
+
+	BUG_ON(!mutex_is_locked(&current->signal->cred_guard_mutex));
+	BUG_ON(!spin_is_locked(&current->sighand->siglock));
+
+	/* Validate all threads being eligible for synchronization. */
+	caller = current;
+	for_each_thread(caller, thread) {
+		pid_t failed;
+
+		/* Skip current, since it is initiating the sync. */
+		if (thread == caller)
+			continue;
+
+		if (thread->seccomp.mode == SECCOMP_MODE_DISABLED ||
+		    (thread->seccomp.mode == SECCOMP_MODE_FILTER &&
+		     is_ancestor(thread->seccomp.filter,
+				 caller->seccomp.filter)))
+			continue;
+
+		/* Return the first thread that cannot be synchronized. */
+		failed = task_pid_vnr(thread);
+		/* If the pid cannot be resolved, then return -ESRCH */
+		if (unlikely(WARN_ON(failed == 0)))
+			failed = -ESRCH;
+		return failed;
+	}
+
+	return 0;
+}
+
+/**
+ * seccomp_sync_threads: sets all threads to use current's filter
+ *
+ * Expects sighand and cred_guard_mutex locks to be held, and for
+ * seccomp_can_sync_threads() to have returned success already
+ * without dropping the locks.
+ *
+ */
+static inline void seccomp_sync_threads(void)
+{
+	struct task_struct *thread, *caller;
+
+	BUG_ON(!mutex_is_locked(&current->signal->cred_guard_mutex));
+	BUG_ON(!spin_is_locked(&current->sighand->siglock));
+
+	/* Synchronize all threads. */
+	caller = current;
+	for_each_thread(caller, thread) {
+		/* Skip current, since it needs no changes. */
+		if (thread == caller)
+			continue;
+
+		/* Get a task reference for the new leaf node. */
+		get_seccomp_filter(caller);
+		/*
+		 * Drop the task reference to the shared ancestor since
+		 * current's path will hold a reference.  (This also
+		 * allows a put before the assignment.)
+		 */
+		put_seccomp_filter(thread);
+		smp_store_release(&thread->seccomp.filter,
+				  caller->seccomp.filter);
+		/*
+		 * Opt the other thread into seccomp if needed.
+		 * As threads are considered to be trust-realm
+		 * equivalent (see ptrace_may_access), it is safe to
+		 * allow one thread to transition the other.
+		 */
+		if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) {
+			/*
+			 * Don't let an unprivileged task work around
+			 * the no_new_privs restriction by creating
+			 * a thread that sets it up, enters seccomp,
+			 * then dies.
+			 */
+			if (task_no_new_privs(caller))
+				task_set_no_new_privs(thread);
+
+			seccomp_assign_mode(thread, SECCOMP_MODE_FILTER);
+		}
+	}
+}
+
 /**
  * seccomp_prepare_filter: Prepares a seccomp filter for use.
  * @fprog: BPF program to install
@@ -364,6 +473,15 @@ static long seccomp_attach_filter(unsigned int flags,
 	if (total_insns > MAX_INSNS_PER_PATH)
 		return -ENOMEM;
 
+	/* If thread sync has been requested, check that it is possible. */
+	if (flags & SECCOMP_FILTER_FLAG_TSYNC) {
+		int ret;
+
+		ret = seccomp_can_sync_threads();
+		if (ret)
+			return ret;
+	}
+
 	/*
 	 * If there is an existing filter, make it the prev and don't drop its
 	 * task reference.
@@ -371,6 +489,10 @@ static long seccomp_attach_filter(unsigned int flags,
 	filter->prev = current->seccomp.filter;
 	current->seccomp.filter = filter;
 
+	/* Now that the new filter is in place, synchronize to all threads. */
+	if (flags & SECCOMP_FILTER_FLAG_TSYNC)
+		seccomp_sync_threads();
+
 	return 0;
 }
 
@@ -590,7 +712,7 @@ static long seccomp_set_mode_filter(unsigned int flags,
 	long ret = -EINVAL;
 
 	/* Validate flags. */
-	if (flags != 0)
+	if (flags & ~SECCOMP_FILTER_FLAG_MASK)
 		return -EINVAL;
 
 	/* Prepare the new filter before holding any locks. */
@@ -598,6 +720,14 @@ static long seccomp_set_mode_filter(unsigned int flags,
 	if (IS_ERR(prepared))
 		return PTR_ERR(prepared);
 
+	/*
+	 * Make sure we cannot change seccomp or nnp state via TSYNC
+	 * while another thread is in the middle of calling exec.
+	 */
+	if (flags & SECCOMP_FILTER_FLAG_TSYNC &&
+	    mutex_lock_killable(&current->signal->cred_guard_mutex))
+		goto out_free;
+
 	spin_lock_irq(&current->sighand->siglock);
 
 	if (!seccomp_may_assign_mode(seccomp_mode))
@@ -612,6 +742,9 @@ static long seccomp_set_mode_filter(unsigned int flags,
 	seccomp_assign_mode(current, seccomp_mode);
 out:
 	spin_unlock_irq(&current->sighand->siglock);
+	if (flags & SECCOMP_FILTER_FLAG_TSYNC)
+		mutex_unlock(&current->signal->cred_guard_mutex);
+out_free:
 	seccomp_filter_free(prepared);
 	return ret;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 8562c99cdd30217dea3609e268572f8764f401a5 Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt.fleming@intel.com>
Date: Fri, 13 Jun 2014 12:22:22 +0100
Subject: efi/reboot: Add generic wrapper around EfiResetSystem()

Implement efi_reboot(), which is really just a wrapper around the
EfiResetSystem() EFI runtime service, but it does at least allow us to
funnel all callers through a single location.

It also simplifies the callsites since users no longer need to check to
see whether EFI_RUNTIME_SERVICES are enabled.

Cc: Tony Luck <tony.luck@intel.com>
Tested-by: Mark Salter <msalter@redhat.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/ia64/kernel/process.c    |  2 +-
 arch/x86/kernel/reboot.c      |  6 +-----
 drivers/firmware/efi/Makefile |  2 +-
 drivers/firmware/efi/reboot.c | 26 ++++++++++++++++++++++++++
 include/linux/efi.h           |  4 ++++
 5 files changed, 33 insertions(+), 7 deletions(-)
 create mode 100644 drivers/firmware/efi/reboot.c

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 55d4ba47a907..deed6fa96bb0 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -662,7 +662,7 @@ void
 machine_restart (char *restart_cmd)
 {
 	(void) notify_die(DIE_MACHINE_RESTART, restart_cmd, NULL, 0, 0, 0);
-	(*efi.reset_system)(EFI_RESET_WARM, 0, 0, NULL);
+	efi_reboot(REBOOT_WARM, NULL);
 }
 
 void
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 52b1157c53eb..09e709fd1830 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -528,11 +528,7 @@ static void native_machine_emergency_restart(void)
 			break;
 
 		case BOOT_EFI:
-			if (efi_enabled(EFI_RUNTIME_SERVICES))
-				efi.reset_system(reboot_mode == REBOOT_WARM ?
-						 EFI_RESET_WARM :
-						 EFI_RESET_COLD,
-						 EFI_SUCCESS, 0, NULL);
+			efi_reboot(reboot_mode, NULL);
 			reboot_type = BOOT_BIOS;
 			break;
 
diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile
index a204d1474cec..d8be608a9f3b 100644
--- a/drivers/firmware/efi/Makefile
+++ b/drivers/firmware/efi/Makefile
@@ -1,7 +1,7 @@
 #
 # Makefile for linux kernel
 #
-obj-$(CONFIG_EFI)			+= efi.o vars.o
+obj-$(CONFIG_EFI)			+= efi.o vars.o reboot.o
 obj-$(CONFIG_EFI_VARS)			+= efivars.o
 obj-$(CONFIG_EFI_VARS_PSTORE)		+= efi-pstore.o
 obj-$(CONFIG_UEFI_CPER)			+= cper.o
diff --git a/drivers/firmware/efi/reboot.c b/drivers/firmware/efi/reboot.c
new file mode 100644
index 000000000000..81bf925f70f5
--- /dev/null
+++ b/drivers/firmware/efi/reboot.c
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2014 Intel Corporation; author Matt Fleming
+ * Copyright (c) 2014 Red Hat, Inc., Mark Salter <msalter@redhat.com>
+ */
+#include <linux/efi.h>
+#include <linux/reboot.h>
+
+void efi_reboot(enum reboot_mode reboot_mode, const char *__unused)
+{
+	int efi_mode;
+
+	if (!efi_enabled(EFI_RUNTIME_SERVICES))
+		return;
+
+	switch (reboot_mode) {
+	case REBOOT_WARM:
+	case REBOOT_SOFT:
+		efi_mode = EFI_RESET_WARM;
+		break;
+	default:
+		efi_mode = EFI_RESET_COLD;
+		break;
+	}
+
+	efi.reset_system(efi_mode, EFI_SUCCESS, 0, NULL);
+}
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 3a64f2f85821..e6980ba528ec 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -20,6 +20,7 @@
 #include <linux/ioport.h>
 #include <linux/pfn.h>
 #include <linux/pstore.h>
+#include <linux/reboot.h>
 
 #include <asm/page.h>
 
@@ -928,11 +929,14 @@ static inline bool efi_enabled(int feature)
 {
 	return test_bit(feature, &efi.flags) != 0;
 }
+extern void efi_reboot(enum reboot_mode reboot_mode, const char *__unused);
 #else
 static inline bool efi_enabled(int feature)
 {
 	return false;
 }
+static inline void
+efi_reboot(enum reboot_mode reboot_mode, const char *__unused) {}
 #endif
 
 /*
-- 
cgit v1.2.3-59-g8ed1b


From 0c5ed61adbdbf2ca5de934642d5be1e971c498c1 Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt.fleming@intel.com>
Date: Fri, 13 Jun 2014 12:35:21 +0100
Subject: efi/reboot: Allow powering off machines using EFI

Not only can EfiResetSystem() be used to reboot, it can also be used to
power down machines.

By and large, this functionality doesn't work very well across the range
of EFI machines in the wild, so it should definitely only be used as a
last resort. In an ideal world, this wouldn't be needed at all.

Unfortunately, we're starting to see machines where EFI is the *only*
reliable way to power down, and nothing else, not PCI, not ACPI, works.

efi_poweroff_required() should be implemented on a per-architecture
basis, since exactly when we should be using EFI runtime services is a
platform-specific decision. There's no analogue for reboot because each
architecture handles reboot very differently - the x86 code in
particular is pretty complex.

Patches to enable this for specific classes of hardware will be
submitted separately.

Tested-by: Mark Salter <msalter@redhat.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 drivers/firmware/efi/reboot.c | 22 ++++++++++++++++++++++
 include/linux/efi.h           |  2 ++
 2 files changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/firmware/efi/reboot.c b/drivers/firmware/efi/reboot.c
index 81bf925f70f5..e9eeeb3c6345 100644
--- a/drivers/firmware/efi/reboot.c
+++ b/drivers/firmware/efi/reboot.c
@@ -24,3 +24,25 @@ void efi_reboot(enum reboot_mode reboot_mode, const char *__unused)
 
 	efi.reset_system(efi_mode, EFI_SUCCESS, 0, NULL);
 }
+
+bool __weak efi_poweroff_required(void)
+{
+	return false;
+}
+
+static void efi_power_off(void)
+{
+	efi.reset_system(EFI_RESET_SHUTDOWN, EFI_SUCCESS, 0, NULL);
+}
+
+static int __init efi_shutdown_init(void)
+{
+	if (!efi_enabled(EFI_RUNTIME_SERVICES))
+		return -ENODEV;
+
+	if (efi_poweroff_required())
+		pm_power_off = efi_power_off;
+
+	return 0;
+}
+late_initcall(efi_shutdown_init);
diff --git a/include/linux/efi.h b/include/linux/efi.h
index e6980ba528ec..9917f58ee83e 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -878,6 +878,8 @@ extern void efi_reserve_boot_services(void);
 extern int efi_get_fdt_params(struct efi_fdt_params *params, int verbose);
 extern struct efi_memory_map memmap;
 
+extern bool efi_poweroff_required(void);
+
 /* Iterate through an efi_memory_map */
 #define for_each_efi_memory_desc(m, md)					   \
 	for ((md) = (m)->map;						   \
-- 
cgit v1.2.3-59-g8ed1b


From 44be28e9dd9880dca3e2cbf7a844f2114e67f2cb Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt.fleming@intel.com>
Date: Fri, 13 Jun 2014 12:39:55 +0100
Subject: x86/reboot: Add EFI reboot quirk for ACPI Hardware Reduced flag

It appears that the BayTrail-T class of hardware requires EFI in order
to powerdown and reboot and no other reliable method exists.

This quirk is generally applicable to all hardware that has the ACPI
Hardware Reduced bit set, since usually ACPI would be the preferred
method.

Cc: Len Brown <len.brown@intel.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/x86/include/asm/efi.h     |  6 ++++++
 arch/x86/kernel/reboot.c       | 18 ++++++++++++++++--
 arch/x86/platform/efi/quirks.c | 23 +++++++++++++++++++++++
 drivers/firmware/efi/reboot.c  |  8 ++++++++
 include/linux/efi.h            |  1 +
 5 files changed, 54 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 9043f365ebf5..044a2fd3c5fe 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -183,6 +183,8 @@ extern struct efi_config *efi_early;
 #define efi_call_early(f, ...)						\
 	efi_early->call(efi_early->f, __VA_ARGS__);
 
+extern bool efi_reboot_required(void);
+
 #else
 /*
  * IF EFI is not configured, have the EFI calls return -ENOSYS.
@@ -195,6 +197,10 @@ extern struct efi_config *efi_early;
 #define efi_call5(_f, _a1, _a2, _a3, _a4, _a5)		(-ENOSYS)
 #define efi_call6(_f, _a1, _a2, _a3, _a4, _a5, _a6)	(-ENOSYS)
 static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {}
+static inline bool efi_reboot_required(void)
+{
+	return false;
+}
 #endif /* CONFIG_EFI */
 
 #endif /* _ASM_X86_EFI_H */
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 09e709fd1830..17962e667a91 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -28,6 +28,7 @@
 #include <linux/mc146818rtc.h>
 #include <asm/realmode.h>
 #include <asm/x86_init.h>
+#include <asm/efi.h>
 
 /*
  * Power off function, if any
@@ -401,12 +402,25 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
 
 static int __init reboot_init(void)
 {
+	int rv;
+
 	/*
 	 * Only do the DMI check if reboot_type hasn't been overridden
 	 * on the command line
 	 */
-	if (reboot_default)
-		dmi_check_system(reboot_dmi_table);
+	if (!reboot_default)
+		return 0;
+
+	/*
+	 * The DMI quirks table takes precedence. If no quirks entry
+	 * matches and the ACPI Hardware Reduced bit is set, force EFI
+	 * reboot.
+	 */
+	rv = dmi_check_system(reboot_dmi_table);
+
+	if (!rv && efi_reboot_required())
+		reboot_type = BOOT_EFI;
+
 	return 0;
 }
 core_initcall(reboot_init);
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index b4cb9182f155..1c7380da65ff 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -7,6 +7,7 @@
 #include <linux/slab.h>
 #include <linux/memblock.h>
 #include <linux/bootmem.h>
+#include <linux/acpi.h>
 #include <asm/efi.h>
 #include <asm/uv/uv.h>
 
@@ -265,3 +266,25 @@ void __init efi_apply_memmap_quirks(void)
 	if (is_uv_system())
 		set_bit(EFI_OLD_MEMMAP, &efi.flags);
 }
+
+/*
+ * For most modern platforms the preferred method of powering off is via
+ * ACPI. However, there are some that are known to require the use of
+ * EFI runtime services and for which ACPI does not work at all.
+ *
+ * Using EFI is a last resort, to be used only if no other option
+ * exists.
+ */
+bool efi_reboot_required(void)
+{
+	if (!acpi_gbl_reduced_hardware)
+		return false;
+
+	efi_reboot_quirk_mode = EFI_RESET_WARM;
+	return true;
+}
+
+bool efi_poweroff_required(void)
+{
+	return !!acpi_gbl_reduced_hardware;
+}
diff --git a/drivers/firmware/efi/reboot.c b/drivers/firmware/efi/reboot.c
index e9eeeb3c6345..9c59d1c795d1 100644
--- a/drivers/firmware/efi/reboot.c
+++ b/drivers/firmware/efi/reboot.c
@@ -5,6 +5,8 @@
 #include <linux/efi.h>
 #include <linux/reboot.h>
 
+int efi_reboot_quirk_mode = -1;
+
 void efi_reboot(enum reboot_mode reboot_mode, const char *__unused)
 {
 	int efi_mode;
@@ -22,6 +24,12 @@ void efi_reboot(enum reboot_mode reboot_mode, const char *__unused)
 		break;
 	}
 
+	/*
+	 * If a quirk forced an EFI reset mode, always use that.
+	 */
+	if (efi_reboot_quirk_mode != -1)
+		efi_mode = efi_reboot_quirk_mode;
+
 	efi.reset_system(efi_mode, EFI_SUCCESS, 0, NULL);
 }
 
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 9917f58ee83e..bac0f93dc473 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -878,6 +878,7 @@ extern void efi_reserve_boot_services(void);
 extern int efi_get_fdt_params(struct efi_fdt_params *params, int verbose);
 extern struct efi_memory_map memmap;
 
+extern int efi_reboot_quirk_mode;
 extern bool efi_poweroff_required(void);
 
 /* Iterate through an efi_memory_map */
-- 
cgit v1.2.3-59-g8ed1b


From 9f27bc543bdf92e179927037e2ab8ed0261579a9 Mon Sep 17 00:00:00 2001
From: Daniel Kiper <daniel.kiper@oracle.com>
Date: Mon, 30 Jun 2014 19:52:58 +0200
Subject: efi: Introduce EFI_PARAVIRT flag

Introduce EFI_PARAVIRT flag. If it is set then kernel runs
on EFI platform but it has not direct control on EFI stuff
like EFI runtime, tables, structures, etc. If not this means
that Linux Kernel has direct access to EFI infrastructure
and everything runs as usual.

This functionality is used in Xen dom0 because hypervisor
has full control on EFI stuff and all calls from dom0 to
EFI must be requested via special hypercall which in turn
executes relevant EFI code in behalf of dom0.

Signed-off-by: Daniel Kiper <daniel.kiper@oracle.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/x86/platform/efi/efi.c | 31 +++++++++++++++++++++++++------
 drivers/firmware/efi/efi.c  | 21 ++++++++++++---------
 include/linux/efi.h         |  3 ++-
 3 files changed, 39 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 7d627a02ed82..d9026538cfdb 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -210,6 +210,9 @@ int __init efi_memblock_x86_reserve_range(void)
 	struct efi_info *e = &boot_params.efi_info;
 	unsigned long pmap;
 
+	if (efi_enabled(EFI_PARAVIRT))
+		return 0;
+
 #ifdef CONFIG_X86_32
 	/* Can't handle data above 4GB at this time */
 	if (e->efi_memmap_hi) {
@@ -422,14 +425,24 @@ static int __init efi_runtime_init(void)
 	 * the runtime services table so that we can grab the physical
 	 * address of several of the EFI runtime functions, needed to
 	 * set the firmware into virtual mode.
+	 *
+	 * When EFI_PARAVIRT is in force then we could not map runtime
+	 * service memory region because we do not have direct access to it.
+	 * However, runtime services are available through proxy functions
+	 * (e.g. in case of Xen dom0 EFI implementation they call special
+	 * hypercall which executes relevant EFI functions) and that is why
+	 * they are always enabled.
 	 */
-	if (efi_enabled(EFI_64BIT))
-		rv = efi_runtime_init64();
-	else
-		rv = efi_runtime_init32();
 
-	if (rv)
-		return rv;
+	if (!efi_enabled(EFI_PARAVIRT)) {
+		if (efi_enabled(EFI_64BIT))
+			rv = efi_runtime_init64();
+		else
+			rv = efi_runtime_init32();
+
+		if (rv)
+			return rv;
+	}
 
 	set_bit(EFI_RUNTIME_SERVICES, &efi.flags);
 
@@ -438,6 +451,9 @@ static int __init efi_runtime_init(void)
 
 static int __init efi_memmap_init(void)
 {
+	if (efi_enabled(EFI_PARAVIRT))
+		return 0;
+
 	/* Map the EFI memory map */
 	memmap.map = early_memremap((unsigned long)memmap.phys_map,
 				   memmap.nr_map * memmap.desc_size);
@@ -914,6 +930,9 @@ static void __init __efi_enter_virtual_mode(void)
 
 void __init efi_enter_virtual_mode(void)
 {
+	if (efi_enabled(EFI_PARAVIRT))
+		return;
+
 	if (efi_setup)
 		kexec_enter_virtual_mode();
 	else
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 023937a63a48..ac88ec05eb70 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -104,16 +104,19 @@ static struct attribute *efi_subsys_attrs[] = {
 static umode_t efi_attr_is_visible(struct kobject *kobj,
 				   struct attribute *attr, int n)
 {
-	umode_t mode = attr->mode;
-
-	if (attr == &efi_attr_fw_vendor.attr)
-		return (efi.fw_vendor == EFI_INVALID_TABLE_ADDR) ? 0 : mode;
-	else if (attr == &efi_attr_runtime.attr)
-		return (efi.runtime == EFI_INVALID_TABLE_ADDR) ? 0 : mode;
-	else if (attr == &efi_attr_config_table.attr)
-		return (efi.config_table == EFI_INVALID_TABLE_ADDR) ? 0 : mode;
+	if (attr == &efi_attr_fw_vendor.attr) {
+		if (efi_enabled(EFI_PARAVIRT) ||
+				efi.fw_vendor == EFI_INVALID_TABLE_ADDR)
+			return 0;
+	} else if (attr == &efi_attr_runtime.attr) {
+		if (efi.runtime == EFI_INVALID_TABLE_ADDR)
+			return 0;
+	} else if (attr == &efi_attr_config_table.attr) {
+		if (efi.config_table == EFI_INVALID_TABLE_ADDR)
+			return 0;
+	}
 
-	return mode;
+	return attr->mode;
 }
 
 static struct attribute_group efi_subsys_attr_group = {
diff --git a/include/linux/efi.h b/include/linux/efi.h
index bac0f93dc473..c7a29a26e900 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -922,7 +922,8 @@ extern int __init efi_setup_pcdp_console(char *);
 #define EFI_RUNTIME_SERVICES	3	/* Can we use runtime services? */
 #define EFI_MEMMAP		4	/* Can we use EFI memory map? */
 #define EFI_64BIT		5	/* Is the firmware 64-bit? */
-#define EFI_ARCH_1		6	/* First arch-specific bit */
+#define EFI_PARAVIRT		6	/* Access is via a paravirt interface */
+#define EFI_ARCH_1		7	/* First arch-specific bit */
 
 #ifdef CONFIG_EFI
 /*
-- 
cgit v1.2.3-59-g8ed1b


From f383d00a0d1f94a7d60c753ec8e3e402889f9622 Mon Sep 17 00:00:00 2001
From: Daniel Kiper <daniel.kiper@oracle.com>
Date: Mon, 30 Jun 2014 19:53:04 +0200
Subject: arch/x86: Remove efi_set_rtc_mmss()

efi_set_rtc_mmss() is never used to set RTC due to bugs found
on many EFI platforms. It is set directly by mach_set_rtc_mmss().
Hence, remove unused efi_set_rtc_mmss() function.

Signed-off-by: Daniel Kiper <daniel.kiper@oracle.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/x86/platform/efi/efi.c | 36 ------------------------------------
 include/linux/efi.h         |  1 -
 2 files changed, 37 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 85fb16cd6479..850da94fef30 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -104,42 +104,6 @@ static efi_status_t __init phys_efi_set_virtual_address_map(
 	return status;
 }
 
-int efi_set_rtc_mmss(const struct timespec *now)
-{
-	unsigned long nowtime = now->tv_sec;
-	efi_status_t	status;
-	efi_time_t	eft;
-	efi_time_cap_t	cap;
-	struct rtc_time	tm;
-
-	status = efi.get_time(&eft, &cap);
-	if (status != EFI_SUCCESS) {
-		pr_err("Oops: efitime: can't read time!\n");
-		return -1;
-	}
-
-	rtc_time_to_tm(nowtime, &tm);
-	if (!rtc_valid_tm(&tm)) {
-		eft.year = tm.tm_year + 1900;
-		eft.month = tm.tm_mon + 1;
-		eft.day = tm.tm_mday;
-		eft.minute = tm.tm_min;
-		eft.second = tm.tm_sec;
-		eft.nanosecond = 0;
-	} else {
-		pr_err("%s: Invalid EFI RTC value: write of %lx to EFI RTC failed\n",
-		       __func__, nowtime);
-		return -1;
-	}
-
-	status = efi.set_time(&eft);
-	if (status != EFI_SUCCESS) {
-		pr_err("Oops: efitime: can't write time!\n");
-		return -1;
-	}
-	return 0;
-}
-
 void efi_get_time(struct timespec *now)
 {
 	efi_status_t status;
diff --git a/include/linux/efi.h b/include/linux/efi.h
index c7a29a26e900..59c8acfebca7 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -873,7 +873,6 @@ extern int __init efi_uart_console_only (void);
 extern void efi_initialize_iomem_resources(struct resource *code_resource,
 		struct resource *data_resource, struct resource *bss_resource);
 extern void efi_get_time(struct timespec *now);
-extern int efi_set_rtc_mmss(const struct timespec *now);
 extern void efi_reserve_boot_services(void);
 extern int efi_get_fdt_params(struct efi_fdt_params *params, int verbose);
 extern struct efi_memory_map memmap;
-- 
cgit v1.2.3-59-g8ed1b


From 82f990a82244f8dfeb7e776186dc4811eb651ed7 Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt.fleming@intel.com>
Date: Tue, 8 Jul 2014 12:28:33 +0100
Subject: efi: Update stale locking comment for struct efivars

The comment describing how struct efivars->lock is used hasn't been
updated in sync with the code. Fix it.

Reported-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Mike Waychison <mikew@google.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 include/linux/efi.h | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index 59c8acfebca7..efc681fd5895 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1040,12 +1040,8 @@ struct efivar_operations {
 struct efivars {
 	/*
 	 * ->lock protects two things:
-	 * 1) ->list - adds, removals, reads, writes
-	 * 2) ops.[gs]et_variable() calls.
-	 * It must not be held when creating sysfs entries or calling kmalloc.
-	 * ops.get_next_variable() is only called from register_efivars()
-	 * or efivar_update_sysfs_entries(),
-	 * which is protected by the BKL, so that path is safe.
+	 * 1) efivarfs_list and efivars_sysfs_list
+	 * 2) ->ops calls
 	 */
 	spinlock_t lock;
 	struct kset *kset;
-- 
cgit v1.2.3-59-g8ed1b


From 6bb1d272d7c9f5dcfbb790d6aef47d8f82dccbf5 Mon Sep 17 00:00:00 2001
From: Jenny TC <jenny.tc@intel.com>
Date: Tue, 8 Jul 2014 11:34:18 +0530
Subject: power_supply: Add inlmt,iterm, min/max temp props

Add new power supply properties for input current, charge termination
current, min and max temperature

POWER_SUPPLY_PROP_TEMP_MIN - minimum operatable temperature
POWER_SUPPLY_PROP_TEMP_MAX - maximum operatable temperature

POWER_SUPPLY_PROP_INPUT_CURRENT_LIMIT - input current limit programmed
by charger. Indicates the input current for a charging source.

POWER_SUPPLY_PROP_CHARGE_TERM_CURRENT - Charge termination current used
to detect the end of charge condition

Signed-off-by: Jenny TC <jenny.tc@intel.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 Documentation/power/power_supply_class.txt | 6 ++++++
 drivers/power/power_supply_sysfs.c         | 4 ++++
 include/linux/power_supply.h               | 4 ++++
 3 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/power/power_supply_class.txt b/Documentation/power/power_supply_class.txt
index 89a8816990ff..48cff881cb8a 100644
--- a/Documentation/power/power_supply_class.txt
+++ b/Documentation/power/power_supply_class.txt
@@ -118,6 +118,10 @@ relative, time-based measurements.
 CONSTANT_CHARGE_CURRENT - constant charge current programmed by charger.
 CONSTANT_CHARGE_CURRENT_MAX - maximum charge current supported by the
 power supply object.
+INPUT_CURRENT_LIMIT - input current limit programmed by charger. Indicates
+the current drawn from a charging source.
+CHARGE_TERM_CURRENT - Charge termination current used to detect the end of charge
+condition.
 
 CONSTANT_CHARGE_VOLTAGE - constant charge voltage programmed by charger.
 CONSTANT_CHARGE_VOLTAGE_MAX - maximum charge voltage supported by the
@@ -140,6 +144,8 @@ TEMP_ALERT_MAX - maximum battery temperature alert.
 TEMP_AMBIENT - ambient temperature.
 TEMP_AMBIENT_ALERT_MIN - minimum ambient temperature alert.
 TEMP_AMBIENT_ALERT_MAX - maximum ambient temperature alert.
+TEMP_MIN - minimum operatable temperature
+TEMP_MAX - maximum operatable temperature
 
 TIME_TO_EMPTY - seconds left for battery to be considered empty (i.e.
 while battery powers a load)
diff --git a/drivers/power/power_supply_sysfs.c b/drivers/power/power_supply_sysfs.c
index 44420d1e9094..750a20275664 100644
--- a/drivers/power/power_supply_sysfs.c
+++ b/drivers/power/power_supply_sysfs.c
@@ -167,6 +167,7 @@ static struct device_attribute power_supply_attrs[] = {
 	POWER_SUPPLY_ATTR(constant_charge_voltage_max),
 	POWER_SUPPLY_ATTR(charge_control_limit),
 	POWER_SUPPLY_ATTR(charge_control_limit_max),
+	POWER_SUPPLY_ATTR(input_current_limit),
 	POWER_SUPPLY_ATTR(energy_full_design),
 	POWER_SUPPLY_ATTR(energy_empty_design),
 	POWER_SUPPLY_ATTR(energy_full),
@@ -178,6 +179,8 @@ static struct device_attribute power_supply_attrs[] = {
 	POWER_SUPPLY_ATTR(capacity_alert_max),
 	POWER_SUPPLY_ATTR(capacity_level),
 	POWER_SUPPLY_ATTR(temp),
+	POWER_SUPPLY_ATTR(temp_max),
+	POWER_SUPPLY_ATTR(temp_min),
 	POWER_SUPPLY_ATTR(temp_alert_min),
 	POWER_SUPPLY_ATTR(temp_alert_max),
 	POWER_SUPPLY_ATTR(temp_ambient),
@@ -189,6 +192,7 @@ static struct device_attribute power_supply_attrs[] = {
 	POWER_SUPPLY_ATTR(time_to_full_avg),
 	POWER_SUPPLY_ATTR(type),
 	POWER_SUPPLY_ATTR(scope),
+	POWER_SUPPLY_ATTR(charge_term_current),
 	/* Properties of type `const char *' */
 	POWER_SUPPLY_ATTR(model_name),
 	POWER_SUPPLY_ATTR(manufacturer),
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index f2b76aeaf4e4..f3dea41dbcd2 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -120,6 +120,7 @@ enum power_supply_property {
 	POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE_MAX,
 	POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT,
 	POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT_MAX,
+	POWER_SUPPLY_PROP_INPUT_CURRENT_LIMIT,
 	POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN,
 	POWER_SUPPLY_PROP_ENERGY_EMPTY_DESIGN,
 	POWER_SUPPLY_PROP_ENERGY_FULL,
@@ -131,6 +132,8 @@ enum power_supply_property {
 	POWER_SUPPLY_PROP_CAPACITY_ALERT_MAX, /* in percents! */
 	POWER_SUPPLY_PROP_CAPACITY_LEVEL,
 	POWER_SUPPLY_PROP_TEMP,
+	POWER_SUPPLY_PROP_TEMP_MAX,
+	POWER_SUPPLY_PROP_TEMP_MIN,
 	POWER_SUPPLY_PROP_TEMP_ALERT_MIN,
 	POWER_SUPPLY_PROP_TEMP_ALERT_MAX,
 	POWER_SUPPLY_PROP_TEMP_AMBIENT,
@@ -142,6 +145,7 @@ enum power_supply_property {
 	POWER_SUPPLY_PROP_TIME_TO_FULL_AVG,
 	POWER_SUPPLY_PROP_TYPE, /* use power_supply.type instead */
 	POWER_SUPPLY_PROP_SCOPE,
+	POWER_SUPPLY_PROP_CHARGE_TERM_CURRENT,
 	/* Properties of type `const char *' */
 	POWER_SUPPLY_PROP_MODEL_NAME,
 	POWER_SUPPLY_PROP_MANUFACTURER,
-- 
cgit v1.2.3-59-g8ed1b


From 11b8ddab817eb8070a542d33caeb93cccfa4e383 Mon Sep 17 00:00:00 2001
From: Reyad Attiyat <reyad.attiyat@gmail.com>
Date: Thu, 17 Jul 2014 19:18:00 +0100
Subject: iio: types: Added support for rotation from north usage attributes

Added the rotation from north usage attributes to the iio modifier enum and to the iio modifier names array.

Signed-off-by: Reyad Attiyat <reyad.attiyat@gmail.com>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/industrialio-core.c | 4 ++++
 include/linux/iio/types.h       | 4 ++++
 2 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index 4b1f375c5659..af3e76d652ba 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -87,6 +87,10 @@ static const char * const iio_modifier_names[] = {
 	[IIO_MOD_QUATERNION] = "quaternion",
 	[IIO_MOD_TEMP_AMBIENT] = "ambient",
 	[IIO_MOD_TEMP_OBJECT] = "object",
+	[IIO_MOD_NORTH_MAGN] = "from_north_magnetic",
+	[IIO_MOD_NORTH_TRUE] = "from_north_true",
+	[IIO_MOD_NORTH_MAGN_TILT_COMP] = "from_north_magnetic_tilt_comp",
+	[IIO_MOD_NORTH_TRUE_TILT_COMP] = "from_north_true_tilt_comp",
 };
 
 /* relies on pairs of these shared then separate */
diff --git a/include/linux/iio/types.h b/include/linux/iio/types.h
index 4a848d6be3bf..4a2af8adf874 100644
--- a/include/linux/iio/types.h
+++ b/include/linux/iio/types.h
@@ -56,6 +56,10 @@ enum iio_modifier {
 	IIO_MOD_QUATERNION,
 	IIO_MOD_TEMP_AMBIENT,
 	IIO_MOD_TEMP_OBJECT,
+	IIO_MOD_NORTH_MAGN,
+	IIO_MOD_NORTH_TRUE,
+	IIO_MOD_NORTH_MAGN_TILT_COMP,
+	IIO_MOD_NORTH_TRUE_TILT_COMP
 };
 
 enum iio_event_type {
-- 
cgit v1.2.3-59-g8ed1b


From c6f854d57d704a97adbf952ef0948acc68f3312c Mon Sep 17 00:00:00 2001
From: Veaceslav Falico <vfalico@gmail.com>
Date: Thu, 17 Jul 2014 19:46:09 +0200
Subject: net: use dev->name in netdev_pr* when it's available

netdev_name() returns dev->name only when the net_device is in
NETREG_REGISTERED state.

However, dev->name is always populated on creation, so we can easily use
it.

There are two cases when there's no real name - when it's an empty string
or when the name is in form of "eth%d", then netdev_name() returns "unnamed
net_device".

CC: "David S. Miller" <davem@davemloft.net>
CC: Tom Gundersen <teg@jklm.no>
Signed-off-by: Veaceslav Falico <vfalico@gmail.com>
Acked-by: Tom Gundersen <teg@jklm.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 15ed750458ad..70256aa2ae81 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3383,8 +3383,8 @@ extern struct pernet_operations __net_initdata loopback_net_ops;
 
 static inline const char *netdev_name(const struct net_device *dev)
 {
-	if (dev->reg_state != NETREG_REGISTERED)
-		return "(unregistered net_device)";
+	if (!dev->name[0] || strchr(dev->name, '%'))
+		return "(unnamed net_device)";
 	return dev->name;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From ccc7f4968a18b980994e622006b84e0195754390 Mon Sep 17 00:00:00 2001
From: Veaceslav Falico <vfalico@gmail.com>
Date: Thu, 17 Jul 2014 19:46:10 +0200
Subject: net: print net_device reg_state in netdev_* unless it's registered

This way we'll always know in what status the device is, unless it's
running normally (i.e. NETDEV_REGISTERED).

Also, emit a warning once in case of a bad reg_state.

CC: "David S. Miller" <davem@davemloft.net>
CC: Jason Baron <jbaron@akamai.com>
CC: Eric Dumazet <edumazet@google.com>
CC: Vlad Yasevich <vyasevic@redhat.com>
CC: stephen hemminger <stephen@networkplumber.org>
CC: Jerry Chu <hkchu@google.com>
CC: Ben Hutchings <bhutchings@solarflare.com>
CC: Joe Perches <joe@perches.com>
Signed-off-by: Veaceslav Falico <vfalico@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 18 +++++++++++++++++-
 lib/dynamic_debug.c       |  8 +++++---
 net/core/dev.c            |  8 +++++---
 3 files changed, 27 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 70256aa2ae81..8e8fb3ed574b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3388,6 +3388,21 @@ static inline const char *netdev_name(const struct net_device *dev)
 	return dev->name;
 }
 
+static inline const char *netdev_reg_state(const struct net_device *dev)
+{
+	switch (dev->reg_state) {
+	case NETREG_UNINITIALIZED: return " (uninitialized)";
+	case NETREG_REGISTERED: return "";
+	case NETREG_UNREGISTERING: return " (unregistering)";
+	case NETREG_UNREGISTERED: return " (unregistered)";
+	case NETREG_RELEASED: return " (released)";
+	case NETREG_DUMMY: return " (dummy)";
+	}
+
+	WARN_ONCE(1, "%s: unknown reg_state %d\n", dev->name, dev->reg_state);
+	return " (unknown)";
+}
+
 __printf(3, 4)
 int netdev_printk(const char *level, const struct net_device *dev,
 		  const char *format, ...);
@@ -3444,7 +3459,8 @@ do {								\
  * file/line information and a backtrace.
  */
 #define netdev_WARN(dev, format, args...)			\
-	WARN(1, "netdevice: %s\n" format, netdev_name(dev), ##args)
+	WARN(1, "netdevice: %s%s\n" format, netdev_name(dev),	\
+	     netdev_reg_state(dev), ##args)
 
 /* netif printk helpers, similar to netdev_printk */
 
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index 7288e38e1757..c9afbe2c445a 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -614,13 +614,15 @@ int __dynamic_netdev_dbg(struct _ddebug *descriptor,
 		char buf[PREFIX_SIZE];
 
 		res = dev_printk_emit(7, dev->dev.parent,
-				      "%s%s %s %s: %pV",
+				      "%s%s %s %s%s: %pV",
 				      dynamic_emit_prefix(descriptor, buf),
 				      dev_driver_string(dev->dev.parent),
 				      dev_name(dev->dev.parent),
-				      netdev_name(dev), &vaf);
+				      netdev_name(dev), netdev_reg_state(dev),
+				      &vaf);
 	} else if (dev) {
-		res = printk(KERN_DEBUG "%s: %pV", netdev_name(dev), &vaf);
+		res = printk(KERN_DEBUG "%s%s: %pV", netdev_name(dev),
+			     netdev_reg_state(dev), &vaf);
 	} else {
 		res = printk(KERN_DEBUG "(NULL net_device): %pV", &vaf);
 	}
diff --git a/net/core/dev.c b/net/core/dev.c
index 239722af098d..81d61014fd9b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6950,12 +6950,14 @@ static int __netdev_printk(const char *level, const struct net_device *dev,
 	if (dev && dev->dev.parent) {
 		r = dev_printk_emit(level[1] - '0',
 				    dev->dev.parent,
-				    "%s %s %s: %pV",
+				    "%s %s %s%s: %pV",
 				    dev_driver_string(dev->dev.parent),
 				    dev_name(dev->dev.parent),
-				    netdev_name(dev), vaf);
+				    netdev_name(dev), netdev_reg_state(dev),
+				    vaf);
 	} else if (dev) {
-		r = printk("%s%s: %pV", level, netdev_name(dev), vaf);
+		r = printk("%s%s%s: %pV", level, netdev_name(dev),
+			   netdev_reg_state(dev), vaf);
 	} else {
 		r = printk("%s(NULL net_device): %pV", level, vaf);
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 6f7eaa47e1de30159277f91f1145a6687f13ffd9 Mon Sep 17 00:00:00 2001
From: Arik Nemtsov <arik@wizery.com>
Date: Thu, 17 Jul 2014 17:14:24 +0300
Subject: mac80211: add TDLS QoS param IE on setup-confirm

When TDLS QoS is supported by the the peer and the local card, add
the WMM parameter IE to the setup-confirm frame. Take the QoS settings
from the current AP, or if unsupported, use the default values from
the specification. This behavior is mandated by IEEE802.11-2012 section
10.22.4.

Signed-off-by: Arik Nemtsov <arikx.nemtsov@intel.com>
Reviewed-by: Liad Kaufman <liad.kaufman@intel.com>
Reviewed-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h |  20 ++++++++
 net/mac80211/tdls.c       | 124 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 144 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 75d17e15da33..63ab3873c5ed 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1001,6 +1001,26 @@ struct ieee80211_vendor_ie {
 	u8 oui_type;
 } __packed;
 
+struct ieee80211_wmm_ac_param {
+	u8 aci_aifsn; /* AIFSN, ACM, ACI */
+	u8 cw; /* ECWmin, ECWmax (CW = 2^ECW - 1) */
+	__le16 txop_limit;
+} __packed;
+
+struct ieee80211_wmm_param_ie {
+	u8 element_id; /* Element ID: 221 (0xdd); */
+	u8 len; /* Length: 24 */
+	/* required fields for WMM version 1 */
+	u8 oui[3]; /* 00:50:f2 */
+	u8 oui_type; /* 2 */
+	u8 oui_subtype; /* 1 */
+	u8 version; /* 1 for WMM version 1.0 */
+	u8 qos_info; /* AP/STA specific QoS info */
+	u8 reserved; /* 0 */
+	/* AC_BE, AC_BK, AC_VI, AC_VO */
+	struct ieee80211_wmm_ac_param ac[4];
+} __packed;
+
 /* Control frames */
 struct ieee80211_rts {
 	__le16 frame_control;
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index bfd8fc4a6b2f..72eebea7e60a 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -8,6 +8,7 @@
  */
 
 #include <linux/ieee80211.h>
+#include <linux/log2.h>
 #include <net/cfg80211.h>
 #include "ieee80211_i.h"
 #include "driver-ops.h"
@@ -93,6 +94,74 @@ static void ieee80211_tdls_add_link_ie(struct ieee80211_sub_if_data *sdata,
 	memcpy(lnkid->resp_sta, rsp_addr, ETH_ALEN);
 }
 
+/* translate numbering in the WMM parameter IE to the mac80211 notation */
+static enum ieee80211_ac_numbers ieee80211_ac_from_wmm(int ac)
+{
+	switch (ac) {
+	default:
+		WARN_ON_ONCE(1);
+	case 0:
+		return IEEE80211_AC_BE;
+	case 1:
+		return IEEE80211_AC_BK;
+	case 2:
+		return IEEE80211_AC_VI;
+	case 3:
+		return IEEE80211_AC_VO;
+	}
+}
+
+static u8 ieee80211_wmm_aci_aifsn(int aifsn, bool acm, int aci)
+{
+	u8 ret;
+
+	ret = aifsn & 0x0f;
+	if (acm)
+		ret |= 0x10;
+	ret |= (aci << 5) & 0x60;
+	return ret;
+}
+
+static u8 ieee80211_wmm_ecw(u16 cw_min, u16 cw_max)
+{
+	return ((ilog2(cw_min + 1) << 0x0) & 0x0f) |
+	       ((ilog2(cw_max + 1) << 0x4) & 0xf0);
+}
+
+static void ieee80211_tdls_add_wmm_param_ie(struct ieee80211_sub_if_data *sdata,
+					    struct sk_buff *skb)
+{
+	struct ieee80211_wmm_param_ie *wmm;
+	struct ieee80211_tx_queue_params *txq;
+	int i;
+
+	wmm = (void *)skb_put(skb, sizeof(*wmm));
+	memset(wmm, 0, sizeof(*wmm));
+
+	wmm->element_id = WLAN_EID_VENDOR_SPECIFIC;
+	wmm->len = sizeof(*wmm) - 2;
+
+	wmm->oui[0] = 0x00; /* Microsoft OUI 00:50:F2 */
+	wmm->oui[1] = 0x50;
+	wmm->oui[2] = 0xf2;
+	wmm->oui_type = 2; /* WME */
+	wmm->oui_subtype = 1; /* WME param */
+	wmm->version = 1; /* WME ver */
+	wmm->qos_info = 0; /* U-APSD not in use */
+
+	/*
+	 * Use the EDCA parameters defined for the BSS, or default if the AP
+	 * doesn't support it, as mandated by 802.11-2012 section 10.22.4
+	 */
+	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
+		txq = &sdata->tx_conf[ieee80211_ac_from_wmm(i)];
+		wmm->ac[i].aci_aifsn = ieee80211_wmm_aci_aifsn(txq->aifs,
+							       txq->acm, i);
+		wmm->ac[i].cw = ieee80211_wmm_ecw(txq->cw_min, txq->cw_max);
+		wmm->ac[i].txop_limit = cpu_to_le16(txq->txop);
+	}
+}
+
 static void
 ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata,
 				   struct sk_buff *skb, const u8 *peer,
@@ -165,6 +234,56 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata,
 	ieee80211_tdls_add_link_ie(sdata, skb, peer, initiator);
 }
 
+static void
+ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata,
+				 struct sk_buff *skb, const u8 *peer,
+				 bool initiator, const u8 *extra_ies,
+				 size_t extra_ies_len)
+{
+	struct ieee80211_local *local = sdata->local;
+	size_t offset = 0, noffset;
+	struct sta_info *sta;
+	u8 *pos;
+
+	rcu_read_lock();
+
+	sta = sta_info_get(sdata, peer);
+	if (WARN_ON_ONCE(!sta)) {
+		rcu_read_unlock();
+		return;
+	}
+
+	/* add any custom IEs that go before the QoS IE */
+	if (extra_ies_len) {
+		static const u8 before_qos[] = {
+			WLAN_EID_RSN,
+		};
+		noffset = ieee80211_ie_split(extra_ies, extra_ies_len,
+					     before_qos,
+					     ARRAY_SIZE(before_qos),
+					     offset);
+		pos = skb_put(skb, noffset - offset);
+		memcpy(pos, extra_ies + offset, noffset - offset);
+		offset = noffset;
+	}
+
+	/* add the QoS param IE if both the peer and we support it */
+	if (local->hw.queues >= IEEE80211_NUM_ACS &&
+	    test_sta_flag(sta, WLAN_STA_WME))
+		ieee80211_tdls_add_wmm_param_ie(sdata, skb);
+
+	/* add any remaining IEs */
+	if (extra_ies_len) {
+		noffset = extra_ies_len;
+		pos = skb_put(skb, noffset - offset);
+		memcpy(pos, extra_ies + offset, noffset - offset);
+	}
+
+	ieee80211_tdls_add_link_ie(sdata, skb, peer, initiator);
+
+	rcu_read_unlock();
+}
+
 static void ieee80211_tdls_add_ies(struct ieee80211_sub_if_data *sdata,
 				   struct sk_buff *skb, const u8 *peer,
 				   u8 action_code, u16 status_code,
@@ -183,6 +302,11 @@ static void ieee80211_tdls_add_ies(struct ieee80211_sub_if_data *sdata,
 							   extra_ies_len);
 		break;
 	case WLAN_TDLS_SETUP_CONFIRM:
+		if (status_code == 0)
+			ieee80211_tdls_add_setup_cfm_ies(sdata, skb, peer,
+							 initiator, extra_ies,
+							 extra_ies_len);
+		break;
 	case WLAN_TDLS_TEARDOWN:
 	case WLAN_TDLS_DISCOVERY_REQUEST:
 		if (extra_ies_len)
-- 
cgit v1.2.3-59-g8ed1b


From 5b0c0b16d48d20e26859907df4dd449e3b3c7f4c Mon Sep 17 00:00:00 2001
From: Stratos Karafotis <stratosk@semaphore.gr>
Date: Mon, 30 Jun 2014 19:59:33 +0300
Subject: cpufreq: Introduce new relation for freq selection

Introduce CPUFREQ_RELATION_C for frequency selection.
It selects the frequency with the minimum euclidean distance to target.
In case of equal distance between 2 frequencies, it will select the
greater frequency.

Signed-off-by: Stratos Karafotis <stratosk@semaphore.gr>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/freq_table.c | 12 +++++++++++-
 include/linux/cpufreq.h      |  1 +
 2 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c
index 1632981c4b25..df14766a8e06 100644
--- a/drivers/cpufreq/freq_table.c
+++ b/drivers/cpufreq/freq_table.c
@@ -117,7 +117,7 @@ int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
 		.frequency = 0,
 	};
 	struct cpufreq_frequency_table *pos;
-	unsigned int freq, i = 0;
+	unsigned int freq, diff, i = 0;
 
 	pr_debug("request for target %u kHz (relation: %u) for cpu %u\n",
 					target_freq, relation, policy->cpu);
@@ -127,6 +127,7 @@ int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
 		suboptimal.frequency = ~0;
 		break;
 	case CPUFREQ_RELATION_L:
+	case CPUFREQ_RELATION_C:
 		optimal.frequency = ~0;
 		break;
 	}
@@ -168,6 +169,15 @@ int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
 				}
 			}
 			break;
+		case CPUFREQ_RELATION_C:
+			diff = abs(freq - target_freq);
+			if (diff < optimal.frequency ||
+			    (diff == optimal.frequency &&
+			     freq > table[optimal.driver_data].frequency)) {
+				optimal.frequency = diff;
+				optimal.driver_data = i;
+			}
+			break;
 		}
 	}
 	if (optimal.driver_data > i) {
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 8f8ae95c6e27..7d1955afa62c 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -176,6 +176,7 @@ static inline void disable_cpufreq(void) { }
 
 #define CPUFREQ_RELATION_L 0  /* lowest frequency at or above target */
 #define CPUFREQ_RELATION_H 1  /* highest frequency below or at target */
+#define CPUFREQ_RELATION_C 2  /* closest frequency to target */
 
 struct freq_attr {
 	struct attribute attr;
-- 
cgit v1.2.3-59-g8ed1b


From 4362175dd65d1816a18ac3f14107d788d5fced27 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Mon, 14 Jul 2014 18:29:16 +0100
Subject: mfd: ab8500-debugfs: BIG clean-up

When checkpatch is run on ab8500-debugfs.c it screamed blue murder!

This patch fixes up all of the errors/warnings reported:

WARNING: line over 80 characters
+		err = seq_printf(s, "  [0x%02X/0x%02X]: 0x%02X\n",

WARNING: Prefer [subsystem eg: netdev]_info([subsystem]dev, ... then dev_info(dev, ... then pr_info(...  to printk(KERN_INFO ...
+		printk(KERN_INFO" [0x%02X/0x%02X]: 0x%02X\n",

WARNING: Prefer seq_puts to seq_printf
+	seq_printf(s, AB8500_NAME_STRING " register values:\n");

WARNING: Prefer seq_puts to seq_printf
+	seq_printf(s, AB8500_NAME_STRING " register values:\n");

WARNING: Prefer [subsystem eg: netdev]_info([subsystem]dev, ... then dev_info(dev, ... then pr_info(...  to printk(KERN_INFO ...
+	printk(KERN_INFO"ab8500 register values:\n");

WARNING: Prefer [subsystem eg: netdev]_info([subsystem]dev, ... then dev_info(dev, ... then pr_info(...  to printk(KERN_INFO ...
+		printk(KERN_INFO" bank 0x%02X:\n", i);

WARNING: externs should be avoided in .c files
+extern int prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size);

WARNING: quoted string split across lines
+	pr_info("Saving all ABB registers at \"ab8500_complete_register_dump\" "
+		"for crash analyze.\n");

WARNING: Prefer [subsystem eg: netdev]_err([subsystem]dev, ... then dev_err(dev, ... then pr_err(...  to printk(KERN_ERR ...
+		printk(KERN_ERR "abx500_set_reg failed %d, %d", err, __LINE__);

WARNING: Prefer seq_puts to seq_printf
+	seq_printf(s, "name: number:  number of: wake:\n");

WARNING: line over 80 characters
+	return single_open(file, ab8500_print_modem_registers, inode->i_private);

WARNING: line over 80 characters
+	return single_open(file, ab8500_gpadc_btemp_ball_print, inode->i_private);

WARNING: line over 80 characters
+	return single_open(file, ab8500_gpadc_main_bat_v_print, inode->i_private);

WARNING: line over 80 characters
+	vbat_true_meas_convert = ab8500_gpadc_ad_to_voltage(gpadc, VBAT_TRUE_MEAS,

WARNING: line over 80 characters
+static int ab8540_gpadc_vbat_true_meas_and_ibat_print(struct seq_file *s, void *p)

WARNING: line over 80 characters
+static const struct file_operations ab8540_gpadc_vbat_true_meas_and_ibat_fops = {

WARNING: line over 80 characters
+		vmain_l, vmain_h, btemp_l, btemp_h, vbat_l, vbat_h, ibat_l, ibat_h);

WARNING: quoted string split across lines
+		dev_err(dev, "debugfs error input: "
+			"should be egal to 1, 4, 8 or 16\n");

WARNING: Missing a blank line after declarations
+	char *s = b;
+	if ((*s == '0') && ((*(s+1) == 'x') || (*(s+1) == 'X'))) {

WARNING: simple_strtoul is obsolete, use kstrtoul instead
+			loc.mask = simple_strtoul(b, &b, 0);

WARNING: simple_strtol is obsolete, use kstrtol instead
+			loc.shift = simple_strtol(b, &b, 0);

WARNING: simple_strtoul is obsolete, use kstrtoul instead
+	loc.bank = simple_strtoul(b, &b, 0);

WARNING: simple_strtoul is obsolete, use kstrtoul instead
+	loc.addr = simple_strtoul(b, &b, 0);

WARNING: simple_strtoul is obsolete, use kstrtoul instead
+		val = simple_strtoul(b, &b, 0);

WARNING: quoted string split across lines
+	pr_warn("HWREG request: %s, %s, addr=0x%08X, mask=0x%X, shift=%d"
+			"value=0x%X\n", (write) ? "write" : "read",

WARNING: Prefer [subsystem eg: netdev]_err([subsystem]dev, ... then dev_err(dev, ... then pr_err(...  to printk(KERN_ERR ...
+		printk(KERN_ERR "sysfs_create_file failed %d\n", err);

WARNING: Prefer [subsystem eg: netdev]_err([subsystem]dev, ... then dev_err(dev, ... then pr_err(...  to printk(KERN_ERR ...
+		printk(KERN_ERR "request_threaded_irq failed %d, %lu\n",

ERROR: code indent should use tabs where possible
+                       err, user_val);$

WARNING: please, no spaces at the start of a line
+                       err, user_val);$

WARNING: Missing a blank line after declarations
+	struct resource *res;
+	debug_bank = AB8500_MISC;

ERROR: space required after that ',' (ctx:VxV)
+		sizeof(*dev_attr)*num_irqs,GFP_KERNEL);
 		                          ^

WARNING: return of an errno should typically be -ve (return -ENXIO)
+		return ENXIO;

WARNING: line over 80 characters
+	file = debugfs_create_file("register-bank", (S_IRUGO | S_IWUSR | S_IWGRP),

WARNING: line over 80 characters
+	file = debugfs_create_file("register-address", (S_IRUGO | S_IWUSR | S_IWGRP),

WARNING: line over 80 characters
+	file = debugfs_create_file("register-value", (S_IRUGO | S_IWUSR | S_IWGRP),

WARNING: line over 80 characters
+	file = debugfs_create_file("irq-subscribe", (S_IRUGO | S_IWUSR | S_IWGRP),

WARNING: line over 80 characters
+	file = debugfs_create_file("irq-unsubscribe", (S_IRUGO | S_IWUSR | S_IWGRP),

WARNING: line over 80 characters
+	file = debugfs_create_file("all-modem-registers", (S_IRUGO | S_IWUSR | S_IWGRP),

WARNING: line over 80 characters
+	file = debugfs_create_file("main_charger_v", (S_IRUGO | S_IWUSR | S_IWGRP),

WARNING: line over 80 characters
+	file = debugfs_create_file("main_charger_c", (S_IRUGO | S_IWUSR | S_IWGRP),

WARNING: line over 80 characters
+	file = debugfs_create_file("usb_charger_c", (S_IRUGO | S_IWUSR | S_IWGRP),

WARNING: line over 80 characters
+		file = debugfs_create_file("xtal_temp", (S_IRUGO | S_IWUSR | S_IWGRP),

WARNING: line over 80 characters
+			ab8500_gpadc_dir, &plf->dev, &ab8540_gpadc_xtal_temp_fops);

WARNING: line over 80 characters
+		file = debugfs_create_file("vbattruemeas", (S_IRUGO | S_IWUSR | S_IWGRP),

WARNING: line over 80 characters
+		file = debugfs_create_file("otp_calib", (S_IRUGO | S_IWUSR | S_IWGRP),

WARNING: line over 80 characters
+			ab8500_gpadc_dir, &plf->dev, &ab8540_gpadc_otp_calib_fops);

total: 2 errors, 44 warnings, 3230 lines checked

Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/ab8500-debugfs.c      | 288 +++++++++++++++++++++++---------------
 include/linux/mfd/abx500/ab8500.h |   1 +
 2 files changed, 173 insertions(+), 116 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/ab8500-debugfs.c b/drivers/mfd/ab8500-debugfs.c
index f7f271c16f2c..b2c7e3b1edfa 100644
--- a/drivers/mfd/ab8500-debugfs.c
+++ b/drivers/mfd/ab8500-debugfs.c
@@ -135,10 +135,10 @@ struct ab8500_prcmu_ranges {
 /* hwreg- "mask" and "shift" entries ressources */
 struct hwreg_cfg {
 	u32  bank;      /* target bank */
-	u32  addr;      /* target address */
+	unsigned long addr;      /* target address */
 	uint fmt;       /* format */
-	uint mask;      /* read/write mask, applied before any bit shift */
-	int  shift;     /* bit shift (read:right shift, write:left shift */
+	unsigned long mask; /* read/write mask, applied before any bit shift */
+	long shift;     /* bit shift (read:right shift, write:left shift */
 };
 /* fmt bit #0: 0=hexa, 1=dec */
 #define REG_FMT_DEC(c) ((c)->fmt & 0x1)
@@ -1304,16 +1304,17 @@ static int ab8500_registers_print(struct device *dev, u32 bank,
 			}
 
 			if (s) {
-				err = seq_printf(s, "  [0x%02X/0x%02X]: 0x%02X\n",
-					bank, reg, value);
+				err = seq_printf(s,
+						 "  [0x%02X/0x%02X]: 0x%02X\n",
+						 bank, reg, value);
 				if (err < 0) {
 					/* Error is not returned here since
 					 * the output is wanted in any case */
 					return 0;
 				}
 			} else {
-				printk(KERN_INFO" [0x%02X/0x%02X]: 0x%02X\n",
-					bank, reg, value);
+				dev_info(dev, " [0x%02X/0x%02X]: 0x%02X\n",
+					 bank, reg, value);
 			}
 		}
 	}
@@ -1325,7 +1326,7 @@ static int ab8500_print_bank_registers(struct seq_file *s, void *p)
 	struct device *dev = s->private;
 	u32 bank = debug_bank;
 
-	seq_printf(s, AB8500_NAME_STRING " register values:\n");
+	seq_puts(s, AB8500_NAME_STRING " register values:\n");
 
 	seq_printf(s, " bank 0x%02X:\n", bank);
 
@@ -1351,7 +1352,7 @@ static int ab8500_print_all_banks(struct seq_file *s, void *p)
 	struct device *dev = s->private;
 	unsigned int i;
 
-	seq_printf(s, AB8500_NAME_STRING " register values:\n");
+	seq_puts(s, AB8500_NAME_STRING " register values:\n");
 
 	for (i = 0; i < AB8500_NUM_BANKS; i++) {
 		seq_printf(s, " bank 0x%02X:\n", i);
@@ -1366,10 +1367,10 @@ void ab8500_dump_all_banks(struct device *dev)
 {
 	unsigned int i;
 
-	printk(KERN_INFO"ab8500 register values:\n");
+	dev_info(dev, "ab8500 register values:\n");
 
 	for (i = 1; i < AB8500_NUM_BANKS; i++) {
-		printk(KERN_INFO" bank 0x%02X:\n", i);
+		dev_info(dev, " bank 0x%02X:\n", i);
 		ab8500_registers_print(dev, i, NULL);
 	}
 }
@@ -1383,8 +1384,6 @@ static struct ab8500_register_dump
 	u8 value;
 } ab8500_complete_register_dump[DUMP_MAX_REGS];
 
-extern int prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size);
-
 /* This shall only be called upon kernel panic! */
 void ab8500_dump_all_banks_to_mem(void)
 {
@@ -1392,8 +1391,7 @@ void ab8500_dump_all_banks_to_mem(void)
 	u8 bank;
 	int err = 0;
 
-	pr_info("Saving all ABB registers at \"ab8500_complete_register_dump\" "
-		"for crash analyze.\n");
+	pr_info("Saving all ABB registers for crash analysis.\n");
 
 	for (bank = 0; bank < AB8500_NUM_BANKS; bank++) {
 		for (i = 0; i < debug_ranges[bank].num_ranges; i++) {
@@ -1563,7 +1561,7 @@ static ssize_t ab8500_val_write(struct file *file,
 	err = abx500_set_register_interruptible(dev,
 		(u8)debug_bank, debug_address, (u8)user_val);
 	if (err < 0) {
-		printk(KERN_ERR "abx500_set_reg failed %d, %d", err, __LINE__);
+		pr_err("abx500_set_reg failed %d, %d", err, __LINE__);
 		return -EINVAL;
 	}
 
@@ -1595,7 +1593,7 @@ static int ab8500_interrupts_print(struct seq_file *s, void *p)
 {
 	int line;
 
-	seq_printf(s, "name: number:  number of: wake:\n");
+	seq_puts(s, "name: number:  number of: wake:\n");
 
 	for (line = 0; line < num_interrupt_lines; line++) {
 		struct irq_desc *desc = irq_to_desc(line + irq_first);
@@ -1721,7 +1719,8 @@ static int ab8500_print_modem_registers(struct seq_file *s, void *p)
 
 static int ab8500_modem_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, ab8500_print_modem_registers, inode->i_private);
+	return single_open(file, ab8500_print_modem_registers,
+			   inode->i_private);
 }
 
 static const struct file_operations ab8500_modem_fops = {
@@ -1750,7 +1749,8 @@ static int ab8500_gpadc_bat_ctrl_print(struct seq_file *s, void *p)
 
 static int ab8500_gpadc_bat_ctrl_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, ab8500_gpadc_bat_ctrl_print, inode->i_private);
+	return single_open(file, ab8500_gpadc_bat_ctrl_print,
+			   inode->i_private);
 }
 
 static const struct file_operations ab8500_gpadc_bat_ctrl_fops = {
@@ -1780,7 +1780,8 @@ static int ab8500_gpadc_btemp_ball_print(struct seq_file *s, void *p)
 static int ab8500_gpadc_btemp_ball_open(struct inode *inode,
 					struct file *file)
 {
-	return single_open(file, ab8500_gpadc_btemp_ball_print, inode->i_private);
+	return single_open(file, ab8500_gpadc_btemp_ball_print,
+			   inode->i_private);
 }
 
 static const struct file_operations ab8500_gpadc_btemp_ball_fops = {
@@ -1961,7 +1962,8 @@ static int ab8500_gpadc_main_bat_v_print(struct seq_file *s, void *p)
 static int ab8500_gpadc_main_bat_v_open(struct inode *inode,
 					struct file *file)
 {
-	return single_open(file, ab8500_gpadc_main_bat_v_print, inode->i_private);
+	return single_open(file, ab8500_gpadc_main_bat_v_print,
+			   inode->i_private);
 }
 
 static const struct file_operations ab8500_gpadc_main_bat_v_fops = {
@@ -2081,7 +2083,8 @@ static int ab8500_gpadc_bk_bat_v_print(struct seq_file *s, void *p)
 
 static int ab8500_gpadc_bk_bat_v_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, ab8500_gpadc_bk_bat_v_print, inode->i_private);
+	return single_open(file, ab8500_gpadc_bk_bat_v_print,
+			   inode->i_private);
 }
 
 static const struct file_operations ab8500_gpadc_bk_bat_v_fops = {
@@ -2110,7 +2113,8 @@ static int ab8500_gpadc_die_temp_print(struct seq_file *s, void *p)
 
 static int ab8500_gpadc_die_temp_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, ab8500_gpadc_die_temp_print, inode->i_private);
+	return single_open(file, ab8500_gpadc_die_temp_print,
+			   inode->i_private);
 }
 
 static const struct file_operations ab8500_gpadc_die_temp_fops = {
@@ -2189,8 +2193,9 @@ static int ab8540_gpadc_vbat_true_meas_print(struct seq_file *s, void *p)
 	gpadc = ab8500_gpadc_get("ab8500-gpadc.0");
 	vbat_true_meas_raw = ab8500_gpadc_read_raw(gpadc, VBAT_TRUE_MEAS,
 		avg_sample, trig_edge, trig_timer, conv_type);
-	vbat_true_meas_convert = ab8500_gpadc_ad_to_voltage(gpadc, VBAT_TRUE_MEAS,
-		vbat_true_meas_raw);
+	vbat_true_meas_convert =
+		ab8500_gpadc_ad_to_voltage(gpadc, VBAT_TRUE_MEAS,
+					   vbat_true_meas_raw);
 
 	return seq_printf(s, "%d,0x%X\n",
 		vbat_true_meas_convert, vbat_true_meas_raw);
@@ -2284,7 +2289,8 @@ static const struct file_operations ab8540_gpadc_vbat_meas_and_ibat_fops = {
 	.owner = THIS_MODULE,
 };
 
-static int ab8540_gpadc_vbat_true_meas_and_ibat_print(struct seq_file *s, void *p)
+static int ab8540_gpadc_vbat_true_meas_and_ibat_print(struct seq_file *s,
+						      void *p)
 {
 	int vbat_true_meas_raw;
 	int vbat_true_meas_convert;
@@ -2313,7 +2319,8 @@ static int ab8540_gpadc_vbat_true_meas_and_ibat_open(struct inode *inode,
 		inode->i_private);
 }
 
-static const struct file_operations ab8540_gpadc_vbat_true_meas_and_ibat_fops = {
+static const struct file_operations
+ab8540_gpadc_vbat_true_meas_and_ibat_fops = {
 	.open = ab8540_gpadc_vbat_true_meas_and_ibat_open,
 	.read = seq_read,
 	.llseek = seq_lseek,
@@ -2367,14 +2374,15 @@ static int ab8540_gpadc_otp_cal_print(struct seq_file *s, void *p)
 	ab8540_gpadc_get_otp(gpadc, &vmain_l, &vmain_h, &btemp_l, &btemp_h,
 			&vbat_l, &vbat_h, &ibat_l, &ibat_h);
 	return seq_printf(s, "VMAIN_L:0x%X\n"
-		"VMAIN_H:0x%X\n"
-		"BTEMP_L:0x%X\n"
-		"BTEMP_H:0x%X\n"
-		"VBAT_L:0x%X\n"
-		"VBAT_H:0x%X\n"
-		"IBAT_L:0x%X\n"
-		"IBAT_H:0x%X\n",
-		vmain_l, vmain_h, btemp_l, btemp_h, vbat_l, vbat_h, ibat_l, ibat_h);
+			  "VMAIN_H:0x%X\n"
+			  "BTEMP_L:0x%X\n"
+			  "BTEMP_H:0x%X\n"
+			  "VBAT_L:0x%X\n"
+			  "VBAT_H:0x%X\n"
+			  "IBAT_L:0x%X\n"
+			  "IBAT_H:0x%X\n",
+			  vmain_l, vmain_h, btemp_l, btemp_h,
+			  vbat_l, vbat_h, ibat_l, ibat_h);
 }
 
 static int ab8540_gpadc_otp_cal_open(struct inode *inode, struct file *file)
@@ -2418,8 +2426,8 @@ static ssize_t ab8500_gpadc_avg_sample_write(struct file *file,
 			|| (user_avg_sample == SAMPLE_16)) {
 		avg_sample = (u8) user_avg_sample;
 	} else {
-		dev_err(dev, "debugfs error input: "
-			"should be egal to 1, 4, 8 or 16\n");
+		dev_err(dev,
+			"debugfs err input: should be egal to 1, 4, 8 or 16\n");
 		return -EINVAL;
 	}
 
@@ -2578,6 +2586,7 @@ static const struct file_operations ab8500_gpadc_conv_type_fops = {
 static int strval_len(char *b)
 {
 	char *s = b;
+
 	if ((*s == '0') && ((*(s+1) == 'x') || (*(s+1) == 'X'))) {
 		s += 2;
 		for (; *s && (*s != ' ') && (*s != '\n'); s++) {
@@ -2642,13 +2651,17 @@ static ssize_t hwreg_common_write(char *b, struct hwreg_cfg *cfg,
 			b += (*(b+2) == ' ') ? 3 : 6;
 			if (strval_len(b) == 0)
 				return -EINVAL;
-			loc.mask = simple_strtoul(b, &b, 0);
+			ret = kstrtoul(b, 0, &loc.mask);
+			if (ret)
+				return ret;
 		} else if ((!strncmp(b, "-s ", 3)) ||
 				(!strncmp(b, "-shift ", 7))) {
 			b += (*(b+2) == ' ') ? 3 : 7;
 			if (strval_len(b) == 0)
 				return -EINVAL;
-			loc.shift = simple_strtol(b, &b, 0);
+			ret = kstrtol(b, 0, &loc.shift);
+			if (ret)
+				return ret;
 		} else {
 			return -EINVAL;
 		}
@@ -2656,29 +2669,36 @@ static ssize_t hwreg_common_write(char *b, struct hwreg_cfg *cfg,
 	/* get arg BANK and ADDRESS */
 	if (strval_len(b) == 0)
 		return -EINVAL;
-	loc.bank = simple_strtoul(b, &b, 0);
+	ret = kstrtouint(b, 0, &loc.bank);
+	if (ret)
+		return ret;
 	while (*b == ' ')
 		b++;
 	if (strval_len(b) == 0)
 		return -EINVAL;
-	loc.addr = simple_strtoul(b, &b, 0);
+	ret = kstrtoul(b, 0, &loc.addr);
+	if (ret)
+		return ret;
 
 	if (write) {
 		while (*b == ' ')
 			b++;
 		if (strval_len(b) == 0)
 			return -EINVAL;
-		val = simple_strtoul(b, &b, 0);
+		ret = kstrtouint(b, 0, &val);
+		if (ret)
+			return ret;
 	}
 
 	/* args are ok, update target cfg (mainly for read) */
 	*cfg = loc;
 
 #ifdef ABB_HWREG_DEBUG
-	pr_warn("HWREG request: %s, %s, addr=0x%08X, mask=0x%X, shift=%d"
-			"value=0x%X\n", (write) ? "write" : "read",
-			REG_FMT_DEC(cfg) ? "decimal" : "hexa",
-			cfg->addr, cfg->mask, cfg->shift, val);
+	pr_warn("HWREG request: %s, %s,\n"
+		"  addr=0x%08X, mask=0x%X, shift=%d" "value=0x%X\n",
+		(write) ? "write" : "read",
+		REG_FMT_DEC(cfg) ? "decimal" : "hexa",
+		cfg->addr, cfg->mask, cfg->shift, val);
 #endif
 
 	if (!write)
@@ -2814,7 +2834,7 @@ static ssize_t ab8500_subscribe_write(struct file *file,
 	dev_attr[irq_index]->attr.mode = S_IRUGO;
 	err = sysfs_create_file(&dev->kobj, &dev_attr[irq_index]->attr);
 	if (err < 0) {
-		printk(KERN_ERR "sysfs_create_file failed %d\n", err);
+		pr_info("sysfs_create_file failed %d\n", err);
 		return err;
 	}
 
@@ -2822,8 +2842,8 @@ static ssize_t ab8500_subscribe_write(struct file *file,
 				   IRQF_SHARED | IRQF_NO_SUSPEND,
 				   "ab8500-debug", &dev->kobj);
 	if (err < 0) {
-		printk(KERN_ERR "request_threaded_irq failed %d, %lu\n",
-                       err, user_val);
+		pr_info("request_threaded_irq failed %d, %lu\n",
+			err, user_val);
 		sysfs_remove_file(&dev->kobj, &dev_attr[irq_index]->attr);
 		return err;
 	}
@@ -2945,6 +2965,7 @@ static int ab8500_debug_probe(struct platform_device *plf)
 	struct dentry *file;
 	struct ab8500 *ab8500;
 	struct resource *res;
+
 	debug_bank = AB8500_MISC;
 	debug_address = AB8500_REV_REG & 0x00FF;
 
@@ -2957,7 +2978,7 @@ static int ab8500_debug_probe(struct platform_device *plf)
 		return -ENOMEM;
 
 	dev_attr = devm_kzalloc(&plf->dev,
-				sizeof(*dev_attr)*num_irqs,GFP_KERNEL);
+				sizeof(*dev_attr)*num_irqs, GFP_KERNEL);
 	if (!dev_attr)
 		return -ENOMEM;
 
@@ -2968,23 +2989,20 @@ static int ab8500_debug_probe(struct platform_device *plf)
 
 	res = platform_get_resource_byname(plf, 0, "IRQ_AB8500");
 	if (!res) {
-		dev_err(&plf->dev, "AB8500 irq not found, err %d\n",
-			irq_first);
-		return ENXIO;
+		dev_err(&plf->dev, "AB8500 irq not found, err %d\n", irq_first);
+		return -ENXIO;
 	}
 	irq_ab8500 = res->start;
 
 	irq_first = platform_get_irq_byname(plf, "IRQ_FIRST");
 	if (irq_first < 0) {
-		dev_err(&plf->dev, "First irq not found, err %d\n",
-			irq_first);
+		dev_err(&plf->dev, "First irq not found, err %d\n", irq_first);
 		return irq_first;
 	}
 
 	irq_last = platform_get_irq_byname(plf, "IRQ_LAST");
 	if (irq_last < 0) {
-		dev_err(&plf->dev, "Last irq not found, err %d\n",
-			irq_last);
+		dev_err(&plf->dev, "Last irq not found, err %d\n", irq_last);
 		return irq_last;
 	}
 
@@ -2993,37 +3011,41 @@ static int ab8500_debug_probe(struct platform_device *plf)
 		goto err;
 
 	ab8500_gpadc_dir = debugfs_create_dir(AB8500_ADC_NAME_STRING,
-		ab8500_dir);
+					      ab8500_dir);
 	if (!ab8500_gpadc_dir)
 		goto err;
 
-	file = debugfs_create_file("all-bank-registers", S_IRUGO,
-		ab8500_dir, &plf->dev, &ab8500_registers_fops);
+	file = debugfs_create_file("all-bank-registers", S_IRUGO, ab8500_dir,
+				   &plf->dev, &ab8500_registers_fops);
 	if (!file)
 		goto err;
 
-	file = debugfs_create_file("all-banks", S_IRUGO,
-		ab8500_dir, &plf->dev, &ab8500_all_banks_fops);
+	file = debugfs_create_file("all-banks", S_IRUGO, ab8500_dir,
+				   &plf->dev, &ab8500_all_banks_fops);
 	if (!file)
 		goto err;
 
-	file = debugfs_create_file("register-bank", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_dir, &plf->dev, &ab8500_bank_fops);
+	file = debugfs_create_file("register-bank",
+				   (S_IRUGO | S_IWUSR | S_IWGRP),
+				   ab8500_dir, &plf->dev, &ab8500_bank_fops);
 	if (!file)
 		goto err;
 
-	file = debugfs_create_file("register-address", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_dir, &plf->dev, &ab8500_address_fops);
+	file = debugfs_create_file("register-address",
+				   (S_IRUGO | S_IWUSR | S_IWGRP),
+				   ab8500_dir, &plf->dev, &ab8500_address_fops);
 	if (!file)
 		goto err;
 
-	file = debugfs_create_file("register-value", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_dir, &plf->dev, &ab8500_val_fops);
+	file = debugfs_create_file("register-value",
+				   (S_IRUGO | S_IWUSR | S_IWGRP),
+				   ab8500_dir, &plf->dev, &ab8500_val_fops);
 	if (!file)
 		goto err;
 
-	file = debugfs_create_file("irq-subscribe", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_dir, &plf->dev, &ab8500_subscribe_fops);
+	file = debugfs_create_file("irq-subscribe",
+				   (S_IRUGO | S_IWUSR | S_IWGRP), ab8500_dir,
+				   &plf->dev, &ab8500_subscribe_fops);
 	if (!file)
 		goto err;
 
@@ -3041,150 +3063,184 @@ static int ab8500_debug_probe(struct platform_device *plf)
 		num_interrupt_lines = AB8540_NR_IRQS;
 	}
 
-	file = debugfs_create_file("interrupts", (S_IRUGO),
-		ab8500_dir, &plf->dev, &ab8500_interrupts_fops);
+	file = debugfs_create_file("interrupts", (S_IRUGO), ab8500_dir,
+				   &plf->dev, &ab8500_interrupts_fops);
 	if (!file)
 		goto err;
 
-	file = debugfs_create_file("irq-unsubscribe", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_dir, &plf->dev, &ab8500_unsubscribe_fops);
+	file = debugfs_create_file("irq-unsubscribe",
+				   (S_IRUGO | S_IWUSR | S_IWGRP), ab8500_dir,
+				   &plf->dev, &ab8500_unsubscribe_fops);
 	if (!file)
 		goto err;
 
 	file = debugfs_create_file("hwreg", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_dir, &plf->dev, &ab8500_hwreg_fops);
+				   ab8500_dir, &plf->dev, &ab8500_hwreg_fops);
 	if (!file)
 		goto err;
 
-	file = debugfs_create_file("all-modem-registers", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_dir, &plf->dev, &ab8500_modem_fops);
+	file = debugfs_create_file("all-modem-registers",
+				   (S_IRUGO | S_IWUSR | S_IWGRP),
+				   ab8500_dir, &plf->dev, &ab8500_modem_fops);
 	if (!file)
 		goto err;
 
 	file = debugfs_create_file("bat_ctrl", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_bat_ctrl_fops);
+				   ab8500_gpadc_dir, &plf->dev,
+				   &ab8500_gpadc_bat_ctrl_fops);
 	if (!file)
 		goto err;
 
 	file = debugfs_create_file("btemp_ball", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_btemp_ball_fops);
+				   ab8500_gpadc_dir,
+				   &plf->dev, &ab8500_gpadc_btemp_ball_fops);
 	if (!file)
 		goto err;
 
-	file = debugfs_create_file("main_charger_v", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_main_charger_v_fops);
+	file = debugfs_create_file("main_charger_v",
+				   (S_IRUGO | S_IWUSR | S_IWGRP),
+				   ab8500_gpadc_dir, &plf->dev,
+				   &ab8500_gpadc_main_charger_v_fops);
 	if (!file)
 		goto err;
 
-	file = debugfs_create_file("acc_detect1", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_acc_detect1_fops);
+	file = debugfs_create_file("acc_detect1",
+				   (S_IRUGO | S_IWUSR | S_IWGRP),
+				   ab8500_gpadc_dir, &plf->dev,
+				   &ab8500_gpadc_acc_detect1_fops);
 	if (!file)
 		goto err;
 
-	file = debugfs_create_file("acc_detect2", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_acc_detect2_fops);
+	file = debugfs_create_file("acc_detect2",
+				   (S_IRUGO | S_IWUSR | S_IWGRP),
+				   ab8500_gpadc_dir, &plf->dev,
+				   &ab8500_gpadc_acc_detect2_fops);
 	if (!file)
 		goto err;
 
 	file = debugfs_create_file("adc_aux1", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_aux1_fops);
+				   ab8500_gpadc_dir, &plf->dev,
+				   &ab8500_gpadc_aux1_fops);
 	if (!file)
 		goto err;
 
 	file = debugfs_create_file("adc_aux2", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_aux2_fops);
+				   ab8500_gpadc_dir, &plf->dev,
+				   &ab8500_gpadc_aux2_fops);
 	if (!file)
 		goto err;
 
 	file = debugfs_create_file("main_bat_v", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_main_bat_v_fops);
+				   ab8500_gpadc_dir, &plf->dev,
+				   &ab8500_gpadc_main_bat_v_fops);
 	if (!file)
 		goto err;
 
 	file = debugfs_create_file("vbus_v", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_vbus_v_fops);
+				   ab8500_gpadc_dir, &plf->dev,
+				   &ab8500_gpadc_vbus_v_fops);
 	if (!file)
 		goto err;
 
-	file = debugfs_create_file("main_charger_c", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_main_charger_c_fops);
+	file = debugfs_create_file("main_charger_c",
+				   (S_IRUGO | S_IWUSR | S_IWGRP),
+				   ab8500_gpadc_dir, &plf->dev,
+				   &ab8500_gpadc_main_charger_c_fops);
 	if (!file)
 		goto err;
 
-	file = debugfs_create_file("usb_charger_c", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_usb_charger_c_fops);
+	file = debugfs_create_file("usb_charger_c",
+				   (S_IRUGO | S_IWUSR | S_IWGRP),
+				   ab8500_gpadc_dir,
+				   &plf->dev, &ab8500_gpadc_usb_charger_c_fops);
 	if (!file)
 		goto err;
 
 	file = debugfs_create_file("bk_bat_v", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_bk_bat_v_fops);
+				   ab8500_gpadc_dir, &plf->dev,
+				   &ab8500_gpadc_bk_bat_v_fops);
 	if (!file)
 		goto err;
 
 	file = debugfs_create_file("die_temp", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_die_temp_fops);
+				   ab8500_gpadc_dir, &plf->dev,
+				   &ab8500_gpadc_die_temp_fops);
 	if (!file)
 		goto err;
 
 	file = debugfs_create_file("usb_id", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_usb_id_fops);
+				   ab8500_gpadc_dir, &plf->dev,
+				   &ab8500_gpadc_usb_id_fops);
 	if (!file)
 		goto err;
 
 	if (is_ab8540(ab8500)) {
-		file = debugfs_create_file("xtal_temp", (S_IRUGO | S_IWUSR | S_IWGRP),
-			ab8500_gpadc_dir, &plf->dev, &ab8540_gpadc_xtal_temp_fops);
+		file = debugfs_create_file("xtal_temp",
+					   (S_IRUGO | S_IWUSR | S_IWGRP),
+					   ab8500_gpadc_dir, &plf->dev,
+					   &ab8540_gpadc_xtal_temp_fops);
 		if (!file)
 			goto err;
-		file = debugfs_create_file("vbattruemeas", (S_IRUGO | S_IWUSR | S_IWGRP),
-			ab8500_gpadc_dir, &plf->dev,
-			&ab8540_gpadc_vbat_true_meas_fops);
+		file = debugfs_create_file("vbattruemeas",
+					   (S_IRUGO | S_IWUSR | S_IWGRP),
+					   ab8500_gpadc_dir, &plf->dev,
+					   &ab8540_gpadc_vbat_true_meas_fops);
 		if (!file)
 			goto err;
 		file = debugfs_create_file("batctrl_and_ibat",
-			(S_IRUGO | S_IWUGO), ab8500_gpadc_dir,
-			&plf->dev, &ab8540_gpadc_bat_ctrl_and_ibat_fops);
+					(S_IRUGO | S_IWUGO),
+					ab8500_gpadc_dir,
+					&plf->dev,
+					&ab8540_gpadc_bat_ctrl_and_ibat_fops);
 		if (!file)
 			goto err;
 		file = debugfs_create_file("vbatmeas_and_ibat",
-			(S_IRUGO | S_IWUGO), ab8500_gpadc_dir,
-			&plf->dev,
-			&ab8540_gpadc_vbat_meas_and_ibat_fops);
+					(S_IRUGO | S_IWUGO),
+					ab8500_gpadc_dir, &plf->dev,
+					&ab8540_gpadc_vbat_meas_and_ibat_fops);
 		if (!file)
 			goto err;
 		file = debugfs_create_file("vbattruemeas_and_ibat",
-			(S_IRUGO | S_IWUGO), ab8500_gpadc_dir,
-			&plf->dev,
-			&ab8540_gpadc_vbat_true_meas_and_ibat_fops);
+				(S_IRUGO | S_IWUGO),
+				ab8500_gpadc_dir,
+				&plf->dev,
+				&ab8540_gpadc_vbat_true_meas_and_ibat_fops);
 		if (!file)
 			goto err;
 		file = debugfs_create_file("battemp_and_ibat",
-			(S_IRUGO | S_IWUGO), ab8500_gpadc_dir,
+			(S_IRUGO | S_IWUGO),
+			ab8500_gpadc_dir,
 			&plf->dev, &ab8540_gpadc_bat_temp_and_ibat_fops);
 		if (!file)
 			goto err;
-		file = debugfs_create_file("otp_calib", (S_IRUGO | S_IWUSR | S_IWGRP),
-			ab8500_gpadc_dir, &plf->dev, &ab8540_gpadc_otp_calib_fops);
+		file = debugfs_create_file("otp_calib",
+				(S_IRUGO | S_IWUSR | S_IWGRP),
+				ab8500_gpadc_dir,
+				&plf->dev, &ab8540_gpadc_otp_calib_fops);
 		if (!file)
 			goto err;
 	}
 	file = debugfs_create_file("avg_sample", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_avg_sample_fops);
+				   ab8500_gpadc_dir, &plf->dev,
+				   &ab8500_gpadc_avg_sample_fops);
 	if (!file)
 		goto err;
 
 	file = debugfs_create_file("trig_edge", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_trig_edge_fops);
+				   ab8500_gpadc_dir, &plf->dev,
+				   &ab8500_gpadc_trig_edge_fops);
 	if (!file)
 		goto err;
 
 	file = debugfs_create_file("trig_timer", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_trig_timer_fops);
+				   ab8500_gpadc_dir, &plf->dev,
+				   &ab8500_gpadc_trig_timer_fops);
 	if (!file)
 		goto err;
 
 	file = debugfs_create_file("conv_type", (S_IRUGO | S_IWUSR | S_IWGRP),
-		ab8500_gpadc_dir, &plf->dev, &ab8500_gpadc_conv_type_fops);
+				   ab8500_gpadc_dir, &plf->dev,
+				   &ab8500_gpadc_conv_type_fops);
 	if (!file)
 		goto err;
 
diff --git a/include/linux/mfd/abx500/ab8500.h b/include/linux/mfd/abx500/ab8500.h
index 4e7fe7417fc9..9475fee2bfc5 100644
--- a/include/linux/mfd/abx500/ab8500.h
+++ b/include/linux/mfd/abx500/ab8500.h
@@ -505,6 +505,7 @@ static inline int is_ab9540_2p0_or_earlier(struct ab8500 *ab)
 void ab8500_override_turn_on_stat(u8 mask, u8 set);
 
 #ifdef CONFIG_AB8500_DEBUG
+extern int prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size);
 void ab8500_dump_all_banks(struct device *dev);
 void ab8500_debug_register_interrupt(int line);
 #else
-- 
cgit v1.2.3-59-g8ed1b


From 6f1c1e71d933f58a6248f1681aededdd407f32a8 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier.martinez@collabora.co.uk>
Date: Fri, 4 Jul 2014 22:24:04 +0200
Subject: mfd: max77686: Convert to use regmap_irq

By using the generic IRQ support in the Register map API, it
is possible to get rid max77686-irq.c and simplify the code.

Suggested-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Javier Martinez Canillas <javier.martinez@collabora.co.uk>
Reviewed-by: Doug Anderson <dianders@chromium.org>
Tested-by: Doug Anderson <dianders@chromium.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/Kconfig                  |   1 +
 drivers/mfd/Makefile                 |   2 +-
 drivers/mfd/max77686-irq.c           | 319 -----------------------------------
 drivers/mfd/max77686.c               |  97 ++++++++++-
 drivers/rtc/rtc-max77686.c           |  27 +--
 include/linux/mfd/max77686-private.h |  31 +++-
 include/linux/mfd/max77686.h         |   2 -
 7 files changed, 123 insertions(+), 356 deletions(-)
 delete mode 100644 drivers/mfd/max77686-irq.c

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index b8d9ca0b68e2..30102042dcaf 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -384,6 +384,7 @@ config MFD_MAX77686
 	depends on I2C=y
 	select MFD_CORE
 	select REGMAP_I2C
+	select REGMAP_IRQ
 	select IRQ_DOMAIN
 	help
 	  Say yes here to add support for Maxim Semiconductor MAX77686.
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 4e2bc255b8b0..f00148782d9b 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -115,7 +115,7 @@ da9063-objs			:= da9063-core.o da9063-irq.o da9063-i2c.o
 obj-$(CONFIG_MFD_DA9063)	+= da9063.o
 
 obj-$(CONFIG_MFD_MAX14577)	+= max14577.o
-obj-$(CONFIG_MFD_MAX77686)	+= max77686.o max77686-irq.o
+obj-$(CONFIG_MFD_MAX77686)	+= max77686.o
 obj-$(CONFIG_MFD_MAX77693)	+= max77693.o
 obj-$(CONFIG_MFD_MAX8907)	+= max8907.o
 max8925-objs			:= max8925-core.o max8925-i2c.o
diff --git a/drivers/mfd/max77686-irq.c b/drivers/mfd/max77686-irq.c
deleted file mode 100644
index cdc3280e2ec7..000000000000
--- a/drivers/mfd/max77686-irq.c
+++ /dev/null
@@ -1,319 +0,0 @@
-/*
- * max77686-irq.c - Interrupt controller support for MAX77686
- *
- * Copyright (C) 2012 Samsung Electronics Co.Ltd
- * Chiwoong Byun <woong.byun@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- * This driver is based on max8997-irq.c
- */
-
-#include <linux/err.h>
-#include <linux/irq.h>
-#include <linux/interrupt.h>
-#include <linux/gpio.h>
-#include <linux/mfd/max77686.h>
-#include <linux/mfd/max77686-private.h>
-#include <linux/irqdomain.h>
-#include <linux/regmap.h>
-
-enum {
-	MAX77686_DEBUG_IRQ_INFO = 1 << 0,
-	MAX77686_DEBUG_IRQ_MASK = 1 << 1,
-	MAX77686_DEBUG_IRQ_INT = 1 << 2,
-};
-
-static int debug_mask = 0;
-module_param(debug_mask, int, 0);
-MODULE_PARM_DESC(debug_mask, "Set debug_mask : 0x0=off 0x1=IRQ_INFO  0x2=IRQ_MASK 0x4=IRQ_INI)");
-
-static const u8 max77686_mask_reg[] = {
-	[PMIC_INT1] = MAX77686_REG_INT1MSK,
-	[PMIC_INT2] = MAX77686_REG_INT2MSK,
-	[RTC_INT] = MAX77686_RTC_INTM,
-};
-
-static struct regmap *max77686_get_regmap(struct max77686_dev *max77686,
-				enum max77686_irq_source src)
-{
-	switch (src) {
-	case PMIC_INT1 ... PMIC_INT2:
-		return max77686->regmap;
-	case RTC_INT:
-		return max77686->rtc_regmap;
-	default:
-		return ERR_PTR(-EINVAL);
-	}
-}
-
-struct max77686_irq_data {
-	int mask;
-	enum max77686_irq_source group;
-};
-
-#define DECLARE_IRQ(idx, _group, _mask)		\
-	[(idx)] = { .group = (_group), .mask = (_mask) }
-static const struct max77686_irq_data max77686_irqs[] = {
-	DECLARE_IRQ(MAX77686_PMICIRQ_PWRONF,	PMIC_INT1, 1 << 0),
-	DECLARE_IRQ(MAX77686_PMICIRQ_PWRONR,	PMIC_INT1, 1 << 1),
-	DECLARE_IRQ(MAX77686_PMICIRQ_JIGONBF,	PMIC_INT1, 1 << 2),
-	DECLARE_IRQ(MAX77686_PMICIRQ_JIGONBR,	PMIC_INT1, 1 << 3),
-	DECLARE_IRQ(MAX77686_PMICIRQ_ACOKBF,	PMIC_INT1, 1 << 4),
-	DECLARE_IRQ(MAX77686_PMICIRQ_ACOKBR,	PMIC_INT1, 1 << 5),
-	DECLARE_IRQ(MAX77686_PMICIRQ_ONKEY1S,	PMIC_INT1, 1 << 6),
-	DECLARE_IRQ(MAX77686_PMICIRQ_MRSTB,		PMIC_INT1, 1 << 7),
-	DECLARE_IRQ(MAX77686_PMICIRQ_140C,		PMIC_INT2, 1 << 0),
-	DECLARE_IRQ(MAX77686_PMICIRQ_120C,		PMIC_INT2, 1 << 1),
-	DECLARE_IRQ(MAX77686_RTCIRQ_RTC60S,		RTC_INT, 1 << 0),
-	DECLARE_IRQ(MAX77686_RTCIRQ_RTCA1,		RTC_INT, 1 << 1),
-	DECLARE_IRQ(MAX77686_RTCIRQ_RTCA2,		RTC_INT, 1 << 2),
-	DECLARE_IRQ(MAX77686_RTCIRQ_SMPL,		RTC_INT, 1 << 3),
-	DECLARE_IRQ(MAX77686_RTCIRQ_RTC1S,		RTC_INT, 1 << 4),
-	DECLARE_IRQ(MAX77686_RTCIRQ_WTSR,		RTC_INT, 1 << 5),
-};
-
-static void max77686_irq_lock(struct irq_data *data)
-{
-	struct max77686_dev *max77686 = irq_get_chip_data(data->irq);
-
-	if (debug_mask & MAX77686_DEBUG_IRQ_MASK)
-		pr_info("%s\n", __func__);
-
-	mutex_lock(&max77686->irqlock);
-}
-
-static void max77686_irq_sync_unlock(struct irq_data *data)
-{
-	struct max77686_dev *max77686 = irq_get_chip_data(data->irq);
-	int i;
-
-	for (i = 0; i < MAX77686_IRQ_GROUP_NR; i++) {
-		u8 mask_reg = max77686_mask_reg[i];
-		struct regmap *map = max77686_get_regmap(max77686, i);
-
-		if (debug_mask & MAX77686_DEBUG_IRQ_MASK)
-			pr_debug("%s: mask_reg[%d]=0x%x, cur=0x%x\n",
-			__func__, i, mask_reg, max77686->irq_masks_cur[i]);
-
-		if (mask_reg == MAX77686_REG_INVALID ||
-				IS_ERR_OR_NULL(map))
-			continue;
-
-		max77686->irq_masks_cache[i] = max77686->irq_masks_cur[i];
-
-		regmap_write(map, max77686_mask_reg[i],
-				max77686->irq_masks_cur[i]);
-	}
-
-	mutex_unlock(&max77686->irqlock);
-}
-
-static const inline struct max77686_irq_data *to_max77686_irq(int irq)
-{
-	struct irq_data *data = irq_get_irq_data(irq);
-	return &max77686_irqs[data->hwirq];
-}
-
-static void max77686_irq_mask(struct irq_data *data)
-{
-	struct max77686_dev *max77686 = irq_get_chip_data(data->irq);
-	const struct max77686_irq_data *irq_data = to_max77686_irq(data->irq);
-
-	max77686->irq_masks_cur[irq_data->group] |= irq_data->mask;
-
-	if (debug_mask & MAX77686_DEBUG_IRQ_MASK)
-		pr_info("%s: group=%d, cur=0x%x\n",
-			__func__, irq_data->group,
-			max77686->irq_masks_cur[irq_data->group]);
-}
-
-static void max77686_irq_unmask(struct irq_data *data)
-{
-	struct max77686_dev *max77686 = irq_get_chip_data(data->irq);
-	const struct max77686_irq_data *irq_data = to_max77686_irq(data->irq);
-
-	max77686->irq_masks_cur[irq_data->group] &= ~irq_data->mask;
-
-	if (debug_mask & MAX77686_DEBUG_IRQ_MASK)
-		pr_info("%s: group=%d, cur=0x%x\n",
-			__func__, irq_data->group,
-			max77686->irq_masks_cur[irq_data->group]);
-}
-
-static struct irq_chip max77686_irq_chip = {
-	.name			= "max77686",
-	.irq_bus_lock		= max77686_irq_lock,
-	.irq_bus_sync_unlock	= max77686_irq_sync_unlock,
-	.irq_mask		= max77686_irq_mask,
-	.irq_unmask		= max77686_irq_unmask,
-};
-
-static irqreturn_t max77686_irq_thread(int irq, void *data)
-{
-	struct max77686_dev *max77686 = data;
-	unsigned int irq_reg[MAX77686_IRQ_GROUP_NR] = {};
-	unsigned int irq_src;
-	int ret;
-	int i, cur_irq;
-
-	ret = regmap_read(max77686->regmap,  MAX77686_REG_INTSRC, &irq_src);
-	if (ret < 0) {
-		dev_err(max77686->dev, "Failed to read interrupt source: %d\n",
-				ret);
-		return IRQ_NONE;
-	}
-
-	if (debug_mask & MAX77686_DEBUG_IRQ_INT)
-		pr_info("%s: irq_src=0x%x\n", __func__, irq_src);
-
-	if (irq_src == MAX77686_IRQSRC_PMIC) {
-		ret = regmap_bulk_read(max77686->regmap,
-					 MAX77686_REG_INT1, irq_reg, 2);
-		if (ret < 0) {
-			dev_err(max77686->dev, "Failed to read interrupt source: %d\n",
-					ret);
-			return IRQ_NONE;
-		}
-
-		if (debug_mask & MAX77686_DEBUG_IRQ_INT)
-			pr_info("%s: int1=0x%x, int2=0x%x\n", __func__,
-				 irq_reg[PMIC_INT1], irq_reg[PMIC_INT2]);
-	}
-
-	if (irq_src & MAX77686_IRQSRC_RTC) {
-		ret = regmap_read(max77686->rtc_regmap,
-					MAX77686_RTC_INT, &irq_reg[RTC_INT]);
-		if (ret < 0) {
-			dev_err(max77686->dev, "Failed to read interrupt source: %d\n",
-					ret);
-			return IRQ_NONE;
-		}
-
-		if (debug_mask & MAX77686_DEBUG_IRQ_INT)
-			pr_info("%s: rtc int=0x%x\n", __func__,
-							 irq_reg[RTC_INT]);
-
-	}
-
-	for (i = 0; i < MAX77686_IRQ_GROUP_NR; i++)
-		irq_reg[i] &= ~max77686->irq_masks_cur[i];
-
-	for (i = 0; i < MAX77686_IRQ_NR; i++) {
-		if (irq_reg[max77686_irqs[i].group] & max77686_irqs[i].mask) {
-			cur_irq = irq_find_mapping(max77686->irq_domain, i);
-			if (cur_irq)
-				handle_nested_irq(cur_irq);
-		}
-	}
-
-	return IRQ_HANDLED;
-}
-
-static int max77686_irq_domain_map(struct irq_domain *d, unsigned int irq,
-					irq_hw_number_t hw)
-{
-	struct max77686_dev *max77686 = d->host_data;
-
-	irq_set_chip_data(irq, max77686);
-	irq_set_chip_and_handler(irq, &max77686_irq_chip, handle_edge_irq);
-	irq_set_nested_thread(irq, 1);
-#ifdef CONFIG_ARM
-	set_irq_flags(irq, IRQF_VALID);
-#else
-	irq_set_noprobe(irq);
-#endif
-	return 0;
-}
-
-static struct irq_domain_ops max77686_irq_domain_ops = {
-	.map = max77686_irq_domain_map,
-};
-
-int max77686_irq_init(struct max77686_dev *max77686)
-{
-	struct irq_domain *domain;
-	int i;
-	int ret;
-	int val;
-	struct regmap *map;
-
-	mutex_init(&max77686->irqlock);
-
-	if (max77686->irq_gpio && !max77686->irq) {
-		max77686->irq = gpio_to_irq(max77686->irq_gpio);
-
-		if (debug_mask & MAX77686_DEBUG_IRQ_INT) {
-			ret = gpio_request(max77686->irq_gpio, "pmic_irq");
-			if (ret < 0) {
-				dev_err(max77686->dev,
-					"Failed to request gpio %d with ret:"
-					"%d\n",	max77686->irq_gpio, ret);
-				return IRQ_NONE;
-			}
-
-			gpio_direction_input(max77686->irq_gpio);
-			val = gpio_get_value(max77686->irq_gpio);
-			gpio_free(max77686->irq_gpio);
-			pr_info("%s: gpio_irq=%x\n", __func__, val);
-		}
-	}
-
-	if (!max77686->irq) {
-		dev_err(max77686->dev, "irq is not specified\n");
-		return -ENODEV;
-	}
-
-	/* Mask individual interrupt sources */
-	for (i = 0; i < MAX77686_IRQ_GROUP_NR; i++) {
-		max77686->irq_masks_cur[i] = 0xff;
-		max77686->irq_masks_cache[i] = 0xff;
-		map = max77686_get_regmap(max77686, i);
-
-		if (IS_ERR_OR_NULL(map))
-			continue;
-		if (max77686_mask_reg[i] == MAX77686_REG_INVALID)
-			continue;
-
-		regmap_write(map, max77686_mask_reg[i], 0xff);
-	}
-	domain = irq_domain_add_linear(NULL, MAX77686_IRQ_NR,
-					&max77686_irq_domain_ops, max77686);
-	if (!domain) {
-		dev_err(max77686->dev, "could not create irq domain\n");
-		return -ENODEV;
-	}
-	max77686->irq_domain = domain;
-
-	ret = request_threaded_irq(max77686->irq, NULL, max77686_irq_thread,
-				   IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
-				   "max77686-irq", max77686);
-
-	if (ret)
-		dev_err(max77686->dev, "Failed to request IRQ %d: %d\n",
-			max77686->irq, ret);
-
-
-	if (debug_mask & MAX77686_DEBUG_IRQ_INFO)
-		pr_info("%s-\n", __func__);
-
-	return 0;
-}
-
-void max77686_irq_exit(struct max77686_dev *max77686)
-{
-	if (max77686->irq)
-		free_irq(max77686->irq, max77686);
-}
diff --git a/drivers/mfd/max77686.c b/drivers/mfd/max77686.c
index ce869acf27ae..3cb41d02cd3d 100644
--- a/drivers/mfd/max77686.c
+++ b/drivers/mfd/max77686.c
@@ -25,6 +25,8 @@
 #include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/i2c.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
 #include <linux/pm_runtime.h>
 #include <linux/module.h>
 #include <linux/mfd/core.h>
@@ -46,6 +48,54 @@ static struct regmap_config max77686_regmap_config = {
 	.val_bits = 8,
 };
 
+static struct regmap_config max77686_rtc_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+};
+
+static const struct regmap_irq max77686_irqs[] = {
+	/* INT1 interrupts */
+	{ .reg_offset = 0, .mask = MAX77686_INT1_PWRONF_MSK, },
+	{ .reg_offset = 0, .mask = MAX77686_INT1_PWRONR_MSK, },
+	{ .reg_offset = 0, .mask = MAX77686_INT1_JIGONBF_MSK, },
+	{ .reg_offset = 0, .mask = MAX77686_INT1_JIGONBR_MSK, },
+	{ .reg_offset = 0, .mask = MAX77686_INT1_ACOKBF_MSK, },
+	{ .reg_offset = 0, .mask = MAX77686_INT1_ACOKBR_MSK, },
+	{ .reg_offset = 0, .mask = MAX77686_INT1_ONKEY1S_MSK, },
+	{ .reg_offset = 0, .mask = MAX77686_INT1_MRSTB_MSK, },
+	/* INT2 interrupts */
+	{ .reg_offset = 1, .mask = MAX77686_INT2_140C_MSK, },
+	{ .reg_offset = 1, .mask = MAX77686_INT2_120C_MSK, },
+};
+
+static const struct regmap_irq_chip max77686_irq_chip = {
+	.name			= "max77686-pmic",
+	.status_base		= MAX77686_REG_INT1,
+	.mask_base		= MAX77686_REG_INT1MSK,
+	.num_regs		= 2,
+	.irqs			= max77686_irqs,
+	.num_irqs		= ARRAY_SIZE(max77686_irqs),
+};
+
+static const struct regmap_irq max77686_rtc_irqs[] = {
+	/* RTC interrupts */
+	{ .reg_offset = 0, .mask = MAX77686_RTCINT_RTC60S_MSK, },
+	{ .reg_offset = 0, .mask = MAX77686_RTCINT_RTCA1_MSK, },
+	{ .reg_offset = 0, .mask = MAX77686_RTCINT_RTCA2_MSK, },
+	{ .reg_offset = 0, .mask = MAX77686_RTCINT_SMPL_MSK, },
+	{ .reg_offset = 0, .mask = MAX77686_RTCINT_RTC1S_MSK, },
+	{ .reg_offset = 0, .mask = MAX77686_RTCINT_WTSR_MSK, },
+};
+
+static const struct regmap_irq_chip max77686_rtc_irq_chip = {
+	.name			= "max77686-rtc",
+	.status_base		= MAX77686_RTC_INT,
+	.mask_base		= MAX77686_RTC_INTM,
+	.num_regs		= 1,
+	.irqs			= max77686_rtc_irqs,
+	.num_irqs		= ARRAY_SIZE(max77686_rtc_irqs),
+};
+
 #ifdef CONFIG_OF
 static const struct of_device_id max77686_pmic_dt_match[] = {
 	{.compatible = "maxim,max77686", .data = NULL},
@@ -101,7 +151,6 @@ static int max77686_i2c_probe(struct i2c_client *i2c,
 	max77686->type = id->driver_data;
 
 	max77686->wakeup = pdata->wakeup;
-	max77686->irq_gpio = pdata->irq_gpio;
 	max77686->irq = i2c->irq;
 
 	max77686->regmap = devm_regmap_init_i2c(i2c, &max77686_regmap_config);
@@ -117,8 +166,7 @@ static int max77686_i2c_probe(struct i2c_client *i2c,
 		dev_err(max77686->dev,
 			"device not found on this channel (this is not an error)\n");
 		return -ENODEV;
-	} else
-		dev_info(max77686->dev, "device found\n");
+	}
 
 	max77686->rtc = i2c_new_dummy(i2c->adapter, I2C_ADDR_RTC);
 	if (!max77686->rtc) {
@@ -127,15 +175,48 @@ static int max77686_i2c_probe(struct i2c_client *i2c,
 	}
 	i2c_set_clientdata(max77686->rtc, max77686);
 
-	max77686_irq_init(max77686);
+	max77686->rtc_regmap = devm_regmap_init_i2c(max77686->rtc,
+						    &max77686_rtc_regmap_config);
+	if (IS_ERR(max77686->rtc_regmap)) {
+		ret = PTR_ERR(max77686->rtc_regmap);
+		dev_err(max77686->dev, "failed to allocate RTC regmap: %d\n",
+			ret);
+		goto err_unregister_i2c;
+	}
+
+	ret = regmap_add_irq_chip(max77686->regmap, max77686->irq,
+				  IRQF_TRIGGER_FALLING | IRQF_ONESHOT |
+				  IRQF_SHARED, 0, &max77686_irq_chip,
+				  &max77686->irq_data);
+	if (ret != 0) {
+		dev_err(&i2c->dev, "failed to add PMIC irq chip: %d\n", ret);
+		goto err_unregister_i2c;
+	}
+	ret = regmap_add_irq_chip(max77686->rtc_regmap, max77686->irq,
+				  IRQF_TRIGGER_FALLING | IRQF_ONESHOT |
+				  IRQF_SHARED, 0, &max77686_rtc_irq_chip,
+				  &max77686->rtc_irq_data);
+	if (ret != 0) {
+		dev_err(&i2c->dev, "failed to add RTC irq chip: %d\n", ret);
+		goto err_del_irqc;
+	}
 
 	ret = mfd_add_devices(max77686->dev, -1, max77686_devs,
 			      ARRAY_SIZE(max77686_devs), NULL, 0, NULL);
 	if (ret < 0) {
-		mfd_remove_devices(max77686->dev);
-		i2c_unregister_device(max77686->rtc);
+		dev_err(&i2c->dev, "failed to add MFD devices: %d\n", ret);
+		goto err_del_rtc_irqc;
 	}
 
+	return 0;
+
+err_del_rtc_irqc:
+	regmap_del_irq_chip(max77686->irq, max77686->rtc_irq_data);
+err_del_irqc:
+	regmap_del_irq_chip(max77686->irq, max77686->irq_data);
+err_unregister_i2c:
+	i2c_unregister_device(max77686->rtc);
+
 	return ret;
 }
 
@@ -144,6 +225,10 @@ static int max77686_i2c_remove(struct i2c_client *i2c)
 	struct max77686_dev *max77686 = i2c_get_clientdata(i2c);
 
 	mfd_remove_devices(max77686->dev);
+
+	regmap_del_irq_chip(max77686->irq, max77686->rtc_irq_data);
+	regmap_del_irq_chip(max77686->irq, max77686->irq_data);
+
 	i2c_unregister_device(max77686->rtc);
 
 	return 0;
diff --git a/drivers/rtc/rtc-max77686.c b/drivers/rtc/rtc-max77686.c
index 9efe118a28ba..d20a7f0786eb 100644
--- a/drivers/rtc/rtc-max77686.c
+++ b/drivers/rtc/rtc-max77686.c
@@ -492,16 +492,11 @@ static int max77686_rtc_init_reg(struct max77686_rtc_info *info)
 	return ret;
 }
 
-static struct regmap_config max77686_rtc_regmap_config = {
-	.reg_bits = 8,
-	.val_bits = 8,
-};
-
 static int max77686_rtc_probe(struct platform_device *pdev)
 {
 	struct max77686_dev *max77686 = dev_get_drvdata(pdev->dev.parent);
 	struct max77686_rtc_info *info;
-	int ret, virq;
+	int ret;
 
 	dev_info(&pdev->dev, "%s\n", __func__);
 
@@ -514,14 +509,7 @@ static int max77686_rtc_probe(struct platform_device *pdev)
 	info->dev = &pdev->dev;
 	info->max77686 = max77686;
 	info->rtc = max77686->rtc;
-	info->max77686->rtc_regmap = devm_regmap_init_i2c(info->max77686->rtc,
-					 &max77686_rtc_regmap_config);
-	if (IS_ERR(info->max77686->rtc_regmap)) {
-		ret = PTR_ERR(info->max77686->rtc_regmap);
-		dev_err(info->max77686->dev, "Failed to allocate register map: %d\n",
-				ret);
-		return ret;
-	}
+
 	platform_set_drvdata(pdev, info);
 
 	ret = max77686_rtc_init_reg(info);
@@ -550,15 +538,16 @@ static int max77686_rtc_probe(struct platform_device *pdev)
 			ret = -EINVAL;
 		goto err_rtc;
 	}
-	virq = irq_create_mapping(max77686->irq_domain, MAX77686_RTCIRQ_RTCA1);
-	if (!virq) {
+
+	info->virq = regmap_irq_get_virq(max77686->rtc_irq_data,
+					 MAX77686_RTCIRQ_RTCA1);
+	if (!info->virq) {
 		ret = -ENXIO;
 		goto err_rtc;
 	}
-	info->virq = virq;
 
-	ret = devm_request_threaded_irq(&pdev->dev, virq, NULL,
-				max77686_rtc_alarm_irq, 0, "rtc-alarm0", info);
+	ret = devm_request_threaded_irq(&pdev->dev, info->virq, NULL,
+				max77686_rtc_alarm_irq, 0, "rtc-alarm1", info);
 	if (ret < 0)
 		dev_err(&pdev->dev, "Failed to request alarm IRQ: %d: %d\n",
 			info->virq, ret);
diff --git a/include/linux/mfd/max77686-private.h b/include/linux/mfd/max77686-private.h
index 8c75a9c8dfab..8e177806cba1 100644
--- a/include/linux/mfd/max77686-private.h
+++ b/include/linux/mfd/max77686-private.h
@@ -181,9 +181,6 @@ enum max77686_rtc_reg {
 	MAX77686_ALARM2_DATE		= 0x1B,
 };
 
-#define MAX77686_IRQSRC_PMIC	(0)
-#define MAX77686_IRQSRC_RTC		(1 << 0)
-
 enum max77686_irq_source {
 	PMIC_INT1 = 0,
 	PMIC_INT2,
@@ -205,16 +202,33 @@ enum max77686_irq {
 	MAX77686_PMICIRQ_140C,
 	MAX77686_PMICIRQ_120C,
 
-	MAX77686_RTCIRQ_RTC60S,
+	MAX77686_RTCIRQ_RTC60S = 0,
 	MAX77686_RTCIRQ_RTCA1,
 	MAX77686_RTCIRQ_RTCA2,
 	MAX77686_RTCIRQ_SMPL,
 	MAX77686_RTCIRQ_RTC1S,
 	MAX77686_RTCIRQ_WTSR,
-
-	MAX77686_IRQ_NR,
 };
 
+#define MAX77686_INT1_PWRONF_MSK	BIT(0)
+#define MAX77686_INT1_PWRONR_MSK	BIT(1)
+#define MAX77686_INT1_JIGONBF_MSK	BIT(2)
+#define MAX77686_INT1_JIGONBR_MSK	BIT(3)
+#define MAX77686_INT1_ACOKBF_MSK	BIT(4)
+#define MAX77686_INT1_ACOKBR_MSK	BIT(5)
+#define MAX77686_INT1_ONKEY1S_MSK	BIT(6)
+#define MAX77686_INT1_MRSTB_MSK		BIT(7)
+
+#define MAX77686_INT2_140C_MSK		BIT(0)
+#define MAX77686_INT2_120C_MSK		BIT(1)
+
+#define MAX77686_RTCINT_RTC60S_MSK	BIT(0)
+#define MAX77686_RTCINT_RTCA1_MSK	BIT(1)
+#define MAX77686_RTCINT_RTCA2_MSK	BIT(2)
+#define MAX77686_RTCINT_SMPL_MSK	BIT(3)
+#define MAX77686_RTCINT_RTC1S_MSK	BIT(4)
+#define MAX77686_RTCINT_WTSR_MSK	BIT(5)
+
 struct max77686_dev {
 	struct device *dev;
 	struct i2c_client *i2c; /* 0xcc / PMIC, Battery Control, and FLASH */
@@ -224,11 +238,10 @@ struct max77686_dev {
 
 	struct regmap *regmap;		/* regmap for mfd */
 	struct regmap *rtc_regmap;	/* regmap for rtc */
-
-	struct irq_domain *irq_domain;
+	struct regmap_irq_chip_data *irq_data;
+	struct regmap_irq_chip_data *rtc_irq_data;
 
 	int irq;
-	int irq_gpio;
 	bool wakeup;
 	struct mutex irqlock;
 	int irq_masks_cur[MAX77686_IRQ_GROUP_NR];
diff --git a/include/linux/mfd/max77686.h b/include/linux/mfd/max77686.h
index 46c0f320ed76..4cbcc13e8a2a 100644
--- a/include/linux/mfd/max77686.h
+++ b/include/linux/mfd/max77686.h
@@ -89,8 +89,6 @@ struct max77686_opmode_data {
 };
 
 struct max77686_platform_data {
-	/* IRQ */
-	int irq_gpio;
 	int ono;
 	int wakeup;
 
-- 
cgit v1.2.3-59-g8ed1b


From c708a98f01068fe07f77448031f9f5317423e777 Mon Sep 17 00:00:00 2001
From: Javi Merino <javi.merino@arm.com>
Date: Wed, 25 Jun 2014 11:00:12 +0100
Subject: thermal: document struct thermal_zone_device and thermal_governor

Document struct thermal_zone_device and struct thermal_governor fields
and their use by the thermal framework code.

Cc: Zhang Rui <rui.zhang@intel.com>
Cc: Eduardo Valentin <edubezval@gmail.com>
Signed-off-by: Javi Merino <javi.merino@arm.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 include/linux/thermal.h | 46 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 44 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index f7e11c7ea7d9..0305cde21a74 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -158,6 +158,42 @@ struct thermal_attr {
 	char name[THERMAL_NAME_LENGTH];
 };
 
+/**
+ * struct thermal_zone_device - structure for a thermal zone
+ * @id:		unique id number for each thermal zone
+ * @type:	the thermal zone device type
+ * @device:	&struct device for this thermal zone
+ * @trip_temp_attrs:	attributes for trip points for sysfs: trip temperature
+ * @trip_type_attrs:	attributes for trip points for sysfs: trip type
+ * @trip_hyst_attrs:	attributes for trip points for sysfs: trip hysteresis
+ * @devdata:	private pointer for device private data
+ * @trips:	number of trip points the thermal zone supports
+ * @passive_delay:	number of milliseconds to wait between polls when
+ *			performing passive cooling.  Currenty only used by the
+ *			step-wise governor
+ * @polling_delay:	number of milliseconds to wait between polls when
+ *			checking whether trip points have been crossed (0 for
+ *			interrupt driven systems)
+ * @temperature:	current temperature.  This is only for core code,
+ *			drivers should use thermal_zone_get_temp() to get the
+ *			current temperature
+ * @last_temperature:	previous temperature read
+ * @emul_temperature:	emulated temperature when using CONFIG_THERMAL_EMULATION
+ * @passive:		1 if you've crossed a passive trip point, 0 otherwise.
+ *			Currenty only used by the step-wise governor.
+ * @forced_passive:	If > 0, temperature at which to switch on all ACPI
+ *			processor cooling devices.  Currently only used by the
+ *			step-wise governor.
+ * @ops:	operations this &thermal_zone_device supports
+ * @tzp:	thermal zone parameters
+ * @governor:	pointer to the governor for this thermal zone
+ * @thermal_instances:	list of &struct thermal_instance of this thermal zone
+ * @idr:	&struct idr to generate unique id for this zone's cooling
+ *		devices
+ * @lock:	lock to protect thermal_instances list
+ * @node:	node in thermal_tz_list (in thermal_core.c)
+ * @poll_queue:	delayed work for polling
+ */
 struct thermal_zone_device {
 	int id;
 	char type[THERMAL_NAME_LENGTH];
@@ -179,12 +215,18 @@ struct thermal_zone_device {
 	struct thermal_governor *governor;
 	struct list_head thermal_instances;
 	struct idr idr;
-	struct mutex lock; /* protect thermal_instances list */
+	struct mutex lock;
 	struct list_head node;
 	struct delayed_work poll_queue;
 };
 
-/* Structure that holds thermal governor information */
+/**
+ * struct thermal_governor - structure that holds thermal governor information
+ * @name:	name of the governor
+ * @throttle:	callback called for every trip point even if temperature is
+ *		below the trip point temperature
+ * @governor_list:	node in thermal_governor_list (in thermal_core.c)
+ */
 struct thermal_governor {
 	char name[THERMAL_NAME_LENGTH];
 	int (*throttle)(struct thermal_zone_device *tz, int trip);
-- 
cgit v1.2.3-59-g8ed1b


From bb5fd0b6daaf0da0b1e78c699b8582984373d3f4 Mon Sep 17 00:00:00 2001
From: Boris BREZILLON <boris.brezillon@free-electrons.com>
Date: Fri, 11 Jul 2014 09:49:41 +0200
Subject: mtd: nand: define struct nand_timings

Define a struct containing the standard NAND timings as described in NAND
datasheets.

Signed-off-by: Boris BREZILLON <boris.brezillon@free-electrons.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 include/linux/mtd/nand.h | 49 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 1cff329ae13d..cdda207c16e1 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -948,4 +948,53 @@ static inline int jedec_feature(struct nand_chip *chip)
 	return chip->jedec_version ? le16_to_cpu(chip->jedec_params.features)
 		: 0;
 }
+
+/**
+ * struct nand_sdr_timings - SDR NAND chip timings
+ *
+ * This struct defines the timing requirements of a SDR NAND chip.
+ * These informations can be found in every NAND datasheets and the timings
+ * meaning are described in the ONFI specifications:
+ * www.onfi.org/~/media/ONFI/specs/onfi_3_1_spec.pdf (chapter 4.15 Timing
+ * Parameters)
+ *
+ * All these timings are expressed in picoseconds.
+ */
+
+struct nand_sdr_timings {
+	u32 tALH_min;
+	u32 tADL_min;
+	u32 tALS_min;
+	u32 tAR_min;
+	u32 tCEA_max;
+	u32 tCEH_min;
+	u32 tCH_min;
+	u32 tCHZ_max;
+	u32 tCLH_min;
+	u32 tCLR_min;
+	u32 tCLS_min;
+	u32 tCOH_min;
+	u32 tCS_min;
+	u32 tDH_min;
+	u32 tDS_min;
+	u32 tFEAT_max;
+	u32 tIR_min;
+	u32 tITC_max;
+	u32 tRC_min;
+	u32 tREA_max;
+	u32 tREH_min;
+	u32 tRHOH_min;
+	u32 tRHW_min;
+	u32 tRHZ_max;
+	u32 tRLOH_min;
+	u32 tRP_min;
+	u32 tRR_min;
+	u64 tRST_max;
+	u32 tWB_max;
+	u32 tWC_min;
+	u32 tWH_min;
+	u32 tWHR_min;
+	u32 tWP_min;
+	u32 tWW_min;
+};
 #endif /* __LINUX_MTD_NAND_H */
-- 
cgit v1.2.3-59-g8ed1b


From 974647ea8a13021a91d558df61d598bcabf73439 Mon Sep 17 00:00:00 2001
From: Boris BREZILLON <boris.brezillon@free-electrons.com>
Date: Fri, 11 Jul 2014 09:49:42 +0200
Subject: mtd: nand: add ONFI timing mode to nand_timings converter

Add a converter to retrieve NAND timings from an ONFI NAND timing mode.
At the moment, only SDR NAND timings are supported.

Signed-off-by: Boris BREZILLON <boris.brezillon@free-electrons.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/nand/Makefile       |   2 +-
 drivers/mtd/nand/nand_timings.c | 253 ++++++++++++++++++++++++++++++++++++++++
 include/linux/mtd/nand.h        |   3 +
 3 files changed, 257 insertions(+), 1 deletion(-)
 create mode 100644 drivers/mtd/nand/nand_timings.c

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
index 542b5689eb63..a035e7cc6d46 100644
--- a/drivers/mtd/nand/Makefile
+++ b/drivers/mtd/nand/Makefile
@@ -50,4 +50,4 @@ obj-$(CONFIG_MTD_NAND_GPMI_NAND)	+= gpmi-nand/
 obj-$(CONFIG_MTD_NAND_XWAY)		+= xway_nand.o
 obj-$(CONFIG_MTD_NAND_BCM47XXNFLASH)	+= bcm47xxnflash/
 
-nand-objs := nand_base.o nand_bbt.o
+nand-objs := nand_base.o nand_bbt.o nand_timings.o
diff --git a/drivers/mtd/nand/nand_timings.c b/drivers/mtd/nand/nand_timings.c
new file mode 100644
index 000000000000..8b36253420fa
--- /dev/null
+++ b/drivers/mtd/nand/nand_timings.c
@@ -0,0 +1,253 @@
+/*
+ *  Copyright (C) 2014 Free Electrons
+ *
+ *  Author: Boris BREZILLON <boris.brezillon@free-electrons.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/mtd/nand.h>
+
+static const struct nand_sdr_timings onfi_sdr_timings[] = {
+	/* Mode 0 */
+	{
+		.tADL_min = 200000,
+		.tALH_min = 20000,
+		.tALS_min = 50000,
+		.tAR_min = 25000,
+		.tCEA_max = 100000,
+		.tCEH_min = 20000,
+		.tCH_min = 20000,
+		.tCHZ_max = 100000,
+		.tCLH_min = 20000,
+		.tCLR_min = 20000,
+		.tCLS_min = 50000,
+		.tCOH_min = 0,
+		.tCS_min = 70000,
+		.tDH_min = 20000,
+		.tDS_min = 40000,
+		.tFEAT_max = 1000000,
+		.tIR_min = 10000,
+		.tITC_max = 1000000,
+		.tRC_min = 100000,
+		.tREA_max = 40000,
+		.tREH_min = 30000,
+		.tRHOH_min = 0,
+		.tRHW_min = 200000,
+		.tRHZ_max = 200000,
+		.tRLOH_min = 0,
+		.tRP_min = 50000,
+		.tRST_max = 250000000000,
+		.tWB_max = 200000,
+		.tRR_min = 40000,
+		.tWC_min = 100000,
+		.tWH_min = 30000,
+		.tWHR_min = 120000,
+		.tWP_min = 50000,
+		.tWW_min = 100000,
+	},
+	/* Mode 1 */
+	{
+		.tADL_min = 100000,
+		.tALH_min = 10000,
+		.tALS_min = 25000,
+		.tAR_min = 10000,
+		.tCEA_max = 45000,
+		.tCEH_min = 20000,
+		.tCH_min = 10000,
+		.tCHZ_max = 50000,
+		.tCLH_min = 10000,
+		.tCLR_min = 10000,
+		.tCLS_min = 25000,
+		.tCOH_min = 15000,
+		.tCS_min = 35000,
+		.tDH_min = 10000,
+		.tDS_min = 20000,
+		.tFEAT_max = 1000000,
+		.tIR_min = 0,
+		.tITC_max = 1000000,
+		.tRC_min = 50000,
+		.tREA_max = 30000,
+		.tREH_min = 15000,
+		.tRHOH_min = 15000,
+		.tRHW_min = 100000,
+		.tRHZ_max = 100000,
+		.tRLOH_min = 0,
+		.tRP_min = 25000,
+		.tRR_min = 20000,
+		.tRST_max = 500000000,
+		.tWB_max = 100000,
+		.tWC_min = 45000,
+		.tWH_min = 15000,
+		.tWHR_min = 80000,
+		.tWP_min = 25000,
+		.tWW_min = 100000,
+	},
+	/* Mode 2 */
+	{
+		.tADL_min = 100000,
+		.tALH_min = 10000,
+		.tALS_min = 15000,
+		.tAR_min = 10000,
+		.tCEA_max = 30000,
+		.tCEH_min = 20000,
+		.tCH_min = 10000,
+		.tCHZ_max = 50000,
+		.tCLH_min = 10000,
+		.tCLR_min = 10000,
+		.tCLS_min = 15000,
+		.tCOH_min = 15000,
+		.tCS_min = 25000,
+		.tDH_min = 5000,
+		.tDS_min = 15000,
+		.tFEAT_max = 1000000,
+		.tIR_min = 0,
+		.tITC_max = 1000000,
+		.tRC_min = 35000,
+		.tREA_max = 25000,
+		.tREH_min = 15000,
+		.tRHOH_min = 15000,
+		.tRHW_min = 100000,
+		.tRHZ_max = 100000,
+		.tRLOH_min = 0,
+		.tRR_min = 20000,
+		.tRST_max = 500000000,
+		.tWB_max = 100000,
+		.tRP_min = 17000,
+		.tWC_min = 35000,
+		.tWH_min = 15000,
+		.tWHR_min = 80000,
+		.tWP_min = 17000,
+		.tWW_min = 100000,
+	},
+	/* Mode 3 */
+	{
+		.tADL_min = 100000,
+		.tALH_min = 5000,
+		.tALS_min = 10000,
+		.tAR_min = 10000,
+		.tCEA_max = 25000,
+		.tCEH_min = 20000,
+		.tCH_min = 5000,
+		.tCHZ_max = 50000,
+		.tCLH_min = 5000,
+		.tCLR_min = 10000,
+		.tCLS_min = 10000,
+		.tCOH_min = 15000,
+		.tCS_min = 25000,
+		.tDH_min = 5000,
+		.tDS_min = 10000,
+		.tFEAT_max = 1000000,
+		.tIR_min = 0,
+		.tITC_max = 1000000,
+		.tRC_min = 30000,
+		.tREA_max = 20000,
+		.tREH_min = 10000,
+		.tRHOH_min = 15000,
+		.tRHW_min = 100000,
+		.tRHZ_max = 100000,
+		.tRLOH_min = 0,
+		.tRP_min = 15000,
+		.tRR_min = 20000,
+		.tRST_max = 500000000,
+		.tWB_max = 100000,
+		.tWC_min = 30000,
+		.tWH_min = 10000,
+		.tWHR_min = 80000,
+		.tWP_min = 15000,
+		.tWW_min = 100000,
+	},
+	/* Mode 4 */
+	{
+		.tADL_min = 70000,
+		.tALH_min = 5000,
+		.tALS_min = 10000,
+		.tAR_min = 10000,
+		.tCEA_max = 25000,
+		.tCEH_min = 20000,
+		.tCH_min = 5000,
+		.tCHZ_max = 30000,
+		.tCLH_min = 5000,
+		.tCLR_min = 10000,
+		.tCLS_min = 10000,
+		.tCOH_min = 15000,
+		.tCS_min = 20000,
+		.tDH_min = 5000,
+		.tDS_min = 10000,
+		.tFEAT_max = 1000000,
+		.tIR_min = 0,
+		.tITC_max = 1000000,
+		.tRC_min = 25000,
+		.tREA_max = 20000,
+		.tREH_min = 10000,
+		.tRHOH_min = 15000,
+		.tRHW_min = 100000,
+		.tRHZ_max = 100000,
+		.tRLOH_min = 5000,
+		.tRP_min = 12000,
+		.tRR_min = 20000,
+		.tRST_max = 500000000,
+		.tWB_max = 100000,
+		.tWC_min = 25000,
+		.tWH_min = 10000,
+		.tWHR_min = 80000,
+		.tWP_min = 12000,
+		.tWW_min = 100000,
+	},
+	/* Mode 5 */
+	{
+		.tADL_min = 70000,
+		.tALH_min = 5000,
+		.tALS_min = 10000,
+		.tAR_min = 10000,
+		.tCEA_max = 25000,
+		.tCEH_min = 20000,
+		.tCH_min = 5000,
+		.tCHZ_max = 30000,
+		.tCLH_min = 5000,
+		.tCLR_min = 10000,
+		.tCLS_min = 10000,
+		.tCOH_min = 15000,
+		.tCS_min = 15000,
+		.tDH_min = 5000,
+		.tDS_min = 7000,
+		.tFEAT_max = 1000000,
+		.tIR_min = 0,
+		.tITC_max = 1000000,
+		.tRC_min = 20000,
+		.tREA_max = 16000,
+		.tREH_min = 7000,
+		.tRHOH_min = 15000,
+		.tRHW_min = 100000,
+		.tRHZ_max = 100000,
+		.tRLOH_min = 5000,
+		.tRP_min = 10000,
+		.tRR_min = 20000,
+		.tRST_max = 500000000,
+		.tWB_max = 100000,
+		.tWC_min = 20000,
+		.tWH_min = 7000,
+		.tWHR_min = 80000,
+		.tWP_min = 10000,
+		.tWW_min = 100000,
+	},
+};
+
+/**
+ * onfi_async_timing_mode_to_sdr_timings - [NAND Interface] Retrieve NAND
+ * timings according to the given ONFI timing mode
+ * @mode: ONFI timing mode
+ */
+const struct nand_sdr_timings *onfi_async_timing_mode_to_sdr_timings(int mode)
+{
+	if (mode < 0 || mode >= ARRAY_SIZE(onfi_sdr_timings))
+		return ERR_PTR(-EINVAL);
+
+	return &onfi_sdr_timings[mode];
+}
+EXPORT_SYMBOL(onfi_async_timing_mode_to_sdr_timings);
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index cdda207c16e1..3083c53e0270 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -997,4 +997,7 @@ struct nand_sdr_timings {
 	u32 tWP_min;
 	u32 tWW_min;
 };
+
+/* get timing characteristics from ONFI timing mode. */
+const struct nand_sdr_timings *onfi_async_timing_mode_to_sdr_timings(int mode);
 #endif /* __LINUX_MTD_NAND_H */
-- 
cgit v1.2.3-59-g8ed1b


From 76be4a54157ab0059fb29d8d516db46d239812e2 Mon Sep 17 00:00:00 2001
From: Nishanth Menon <nm@ti.com>
Date: Thu, 12 Jun 2014 17:15:22 +0530
Subject: ARM: OMAP2+: DMA: remove requirement of irq for platform-dma driver

we have currently 2 DMA drivers that try to co-exist.
drivers/dma/omap-dma.c which registers it's own IRQ and is device tree
aware and uses arch/arm/plat-omap/dma.c instance created by
arch/arm/mach-omap2/dma.c to maintain channel usage (omap_request_dma).

Currently both try to register interrupts and mach-omap2/plat-omap dma.c
attempts to use the IRQ number registered by hwmod to register it's own
interrupt handler.

Now, there is no reasonable way of static allocating DMA irq in GIC
SPI when we use crossbar. However, since the dma_chan structure is
freed as a result of IRQ not being present due to devm allocation,
maintaining information of channel by platform code fails at a later
point in time when that region of memory is reused.

So, if hwmod does not indicate an IRQ number, then, assume that
dma-engine will take care of the interrupt handling.

Signed-off-by: Nishanth Menon <nm@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/dma.c | 3 +++
 arch/arm/plat-omap/dma.c  | 5 +++--
 include/linux/omap-dma.h  | 1 +
 3 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/dma.c b/arch/arm/mach-omap2/dma.c
index a6d2cf1f8d02..e1a56d87599e 100644
--- a/arch/arm/mach-omap2/dma.c
+++ b/arch/arm/mach-omap2/dma.c
@@ -259,6 +259,9 @@ static int __init omap2_system_dma_init_dev(struct omap_hwmod *oh, void *unused)
 	if (cpu_is_omap34xx() && (omap_type() != OMAP2_DEVICE_TYPE_GP))
 		d->dev_caps |= HS_CHANNELS_RESERVED;
 
+	if (platform_get_irq_byname(pdev, "0") < 0)
+		d->dev_caps |= DMA_ENGINE_HANDLE_IRQ;
+
 	/* Check the capabilities register for descriptor loading feature */
 	if (dma_read(CAPS_0, 0) & DMA_HAS_DESCRIPTOR_CAPS)
 		dma_common_ch_end = CCDN;
diff --git a/arch/arm/plat-omap/dma.c b/arch/arm/plat-omap/dma.c
index b5608b1f9fbd..7aae0e5b188c 100644
--- a/arch/arm/plat-omap/dma.c
+++ b/arch/arm/plat-omap/dma.c
@@ -2100,7 +2100,7 @@ static int omap_system_dma_probe(struct platform_device *pdev)
 		omap_dma_set_global_params(DMA_DEFAULT_ARB_RATE,
 				DMA_DEFAULT_FIFO_DEPTH, 0);
 
-	if (dma_omap2plus()) {
+	if (dma_omap2plus() && !(d->dev_caps & DMA_ENGINE_HANDLE_IRQ)) {
 		strcpy(irq_name, "0");
 		dma_irq = platform_get_irq_byname(pdev, irq_name);
 		if (dma_irq < 0) {
@@ -2145,7 +2145,8 @@ static int omap_system_dma_remove(struct platform_device *pdev)
 		char irq_name[4];
 		strcpy(irq_name, "0");
 		dma_irq = platform_get_irq_byname(pdev, irq_name);
-		remove_irq(dma_irq, &omap24xx_dma_irq);
+		if (dma_irq >= 0)
+			remove_irq(dma_irq, &omap24xx_dma_irq);
 	} else {
 		int irq_rel = 0;
 		for ( ; irq_rel < dma_chan_count; irq_rel++) {
diff --git a/include/linux/omap-dma.h b/include/linux/omap-dma.h
index 88e6ea4a5d36..6f06f8bc612c 100644
--- a/include/linux/omap-dma.h
+++ b/include/linux/omap-dma.h
@@ -130,6 +130,7 @@
 #define IS_WORD_16			BIT(0xd)
 #define ENABLE_16XX_MODE		BIT(0xe)
 #define HS_CHANNELS_RESERVED		BIT(0xf)
+#define DMA_ENGINE_HANDLE_IRQ		BIT(0x10)
 
 /* Defines for DMA Capabilities */
 #define DMA_HAS_TRANSPARENT_CAPS	(0x1 << 18)
-- 
cgit v1.2.3-59-g8ed1b


From f0e2cf7b912522c9c7146d9d6e99d1b0ea5c97c6 Mon Sep 17 00:00:00 2001
From: Kishon Vijay Abraham I <kishon@ti.com>
Date: Wed, 25 Jun 2014 23:22:57 +0530
Subject: phy: pipe3: insert delay to enumerate in GEN2 mode

8-bit delay value (0xF1) is required for GEN2 devices to be enumerated
consistently. Added an API to be called from PHY drivers to set this delay
value and called it from PIPE3 driver to set the delay value.

Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
Reviewed-by: Roger Quadros <rogerq@ti.com>
---
 Documentation/devicetree/bindings/phy/ti-phy.txt | 12 +++---
 drivers/phy/phy-omap-control.c                   | 52 +++++++++++++++++++++++-
 drivers/phy/phy-ti-pipe3.c                       |  4 +-
 include/linux/phy/omap_control_phy.h             | 10 +++++
 4 files changed, 71 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/phy/ti-phy.txt b/Documentation/devicetree/bindings/phy/ti-phy.txt
index b50e1c10a05d..305e3df3d9b1 100644
--- a/Documentation/devicetree/bindings/phy/ti-phy.txt
+++ b/Documentation/devicetree/bindings/phy/ti-phy.txt
@@ -9,15 +9,17 @@ Required properties:
                         e.g. USB2_PHY on OMAP5.
  "ti,control-phy-pipe3" - if it has DPLL and individual Rx & Tx power control
                         e.g. USB3 PHY and SATA PHY on OMAP5.
+ "ti,control-phy-pcie" - for pcie to support external clock for pcie and to
+			set PCS delay value.
+			e.g. PCIE PHY in DRA7x
  "ti,control-phy-usb2-dra7" - if it has power down register like USB2 PHY on
                         DRA7 platform.
  "ti,control-phy-usb2-am437" - if it has power down register like USB2 PHY on
                         AM437 platform.
- - reg : Address and length of the register set for the device. It contains
-   the address of "otghs_control" for control-phy-otghs or "power" register
-   for other types.
- - reg-names: should be "otghs_control" control-phy-otghs and "power" for
-   other types.
+ - reg : register ranges as listed in the reg-names property
+ - reg-names: "otghs_control" for control-phy-otghs
+	      "power", "pcie_pcs" and "control_sma" for control-phy-pcie
+	      "power" for all other types
 
 omap_control_usb: omap-control-usb@4a002300 {
         compatible = "ti,control-phy-otghs";
diff --git a/drivers/phy/phy-omap-control.c b/drivers/phy/phy-omap-control.c
index 311b4f9a5132..9487bf112267 100644
--- a/drivers/phy/phy-omap-control.c
+++ b/drivers/phy/phy-omap-control.c
@@ -26,6 +26,41 @@
 #include <linux/clk.h>
 #include <linux/phy/omap_control_phy.h>
 
+/**
+ * omap_control_pcie_pcs - set the PCS delay count
+ * @dev: the control module device
+ * @id: index of the pcie PHY (should be 1 or 2)
+ * @delay: 8 bit delay value
+ */
+void omap_control_pcie_pcs(struct device *dev, u8 id, u8 delay)
+{
+	u32 val;
+	struct omap_control_phy	*control_phy;
+
+	if (IS_ERR(dev) || !dev) {
+		pr_err("%s: invalid device\n", __func__);
+		return;
+	}
+
+	control_phy = dev_get_drvdata(dev);
+	if (!control_phy) {
+		dev_err(dev, "%s: invalid control phy device\n", __func__);
+		return;
+	}
+
+	if (control_phy->type != OMAP_CTRL_TYPE_PCIE) {
+		dev_err(dev, "%s: unsupported operation\n", __func__);
+		return;
+	}
+
+	val = readl(control_phy->pcie_pcs);
+	val &= ~(OMAP_CTRL_PCIE_PCS_MASK <<
+		(id * OMAP_CTRL_PCIE_PCS_DELAY_COUNT_SHIFT));
+	val |= delay << (id * OMAP_CTRL_PCIE_PCS_DELAY_COUNT_SHIFT);
+	writel(val, control_phy->pcie_pcs);
+}
+EXPORT_SYMBOL_GPL(omap_control_pcie_pcs);
+
 /**
  * omap_control_phy_power - power on/off the phy using control module reg
  * @dev: the control module device
@@ -61,6 +96,7 @@ void omap_control_phy_power(struct device *dev, int on)
 			val |= OMAP_CTRL_DEV_PHY_PD;
 		break;
 
+	case OMAP_CTRL_TYPE_PCIE:
 	case OMAP_CTRL_TYPE_PIPE3:
 		rate = clk_get_rate(control_phy->sys_clk);
 		rate = rate/1000000;
@@ -211,6 +247,7 @@ EXPORT_SYMBOL_GPL(omap_control_usb_set_mode);
 static const enum omap_control_phy_type otghs_data = OMAP_CTRL_TYPE_OTGHS;
 static const enum omap_control_phy_type usb2_data = OMAP_CTRL_TYPE_USB2;
 static const enum omap_control_phy_type pipe3_data = OMAP_CTRL_TYPE_PIPE3;
+static const enum omap_control_phy_type pcie_data = OMAP_CTRL_TYPE_PCIE;
 static const enum omap_control_phy_type dra7usb2_data = OMAP_CTRL_TYPE_DRA7USB2;
 static const enum omap_control_phy_type am437usb2_data = OMAP_CTRL_TYPE_AM437USB2;
 
@@ -227,6 +264,10 @@ static const struct of_device_id omap_control_phy_id_table[] = {
 		.compatible = "ti,control-phy-pipe3",
 		.data = &pipe3_data,
 	},
+	{
+		.compatible = "ti,control-phy-pcie",
+		.data = &pcie_data,
+	},
 	{
 		.compatible = "ti,control-phy-usb2-dra7",
 		.data = &dra7usb2_data,
@@ -279,7 +320,8 @@ static int omap_control_phy_probe(struct platform_device *pdev)
 		}
 	}
 
-	if (control_phy->type == OMAP_CTRL_TYPE_PIPE3) {
+	if (control_phy->type == OMAP_CTRL_TYPE_PIPE3 ||
+	    control_phy->type == OMAP_CTRL_TYPE_PCIE) {
 		control_phy->sys_clk = devm_clk_get(control_phy->dev,
 			"sys_clkin");
 		if (IS_ERR(control_phy->sys_clk)) {
@@ -288,6 +330,14 @@ static int omap_control_phy_probe(struct platform_device *pdev)
 		}
 	}
 
+	if (control_phy->type == OMAP_CTRL_TYPE_PCIE) {
+		res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+						   "pcie_pcs");
+		control_phy->pcie_pcs = devm_ioremap_resource(&pdev->dev, res);
+		if (IS_ERR(control_phy->pcie_pcs))
+			return PTR_ERR(control_phy->pcie_pcs);
+	}
+
 	dev_set_drvdata(control_phy->dev, control_phy);
 
 	return 0;
diff --git a/drivers/phy/phy-ti-pipe3.c b/drivers/phy/phy-ti-pipe3.c
index 6174f4b1a5de..93bcd67f1b22 100644
--- a/drivers/phy/phy-ti-pipe3.c
+++ b/drivers/phy/phy-ti-pipe3.c
@@ -217,8 +217,10 @@ static int ti_pipe3_init(struct phy *x)
 	u32 val;
 	int ret = 0;
 
-	if (of_device_is_compatible(phy->dev->of_node, "ti,phy-pipe3-pcie"))
+	if (of_device_is_compatible(phy->dev->of_node, "ti,phy-pipe3-pcie")) {
+		omap_control_pcie_pcs(phy->control_dev, phy->id, 0xF1);
 		return 0;
+	}
 
 	/* Bring it out of IDLE if it is IDLE */
 	val = ti_pipe3_readl(phy->pll_ctrl_base, PLL_CONFIGURATION2);
diff --git a/include/linux/phy/omap_control_phy.h b/include/linux/phy/omap_control_phy.h
index 5450403c7546..e9e6cfbfbb58 100644
--- a/include/linux/phy/omap_control_phy.h
+++ b/include/linux/phy/omap_control_phy.h
@@ -23,6 +23,7 @@ enum omap_control_phy_type {
 	OMAP_CTRL_TYPE_OTGHS = 1,	/* Mailbox OTGHS_CONTROL */
 	OMAP_CTRL_TYPE_USB2,	/* USB2_PHY, power down in CONTROL_DEV_CONF */
 	OMAP_CTRL_TYPE_PIPE3,	/* PIPE3 PHY, DPLL & seperate Rx/Tx power */
+	OMAP_CTRL_TYPE_PCIE,	/* RX TX control of ACSPCIE */
 	OMAP_CTRL_TYPE_DRA7USB2, /* USB2 PHY, power and power_aux e.g. DRA7 */
 	OMAP_CTRL_TYPE_AM437USB2, /* USB2 PHY, power e.g. AM437x */
 };
@@ -33,6 +34,7 @@ struct omap_control_phy {
 	u32 __iomem *otghs_control;
 	u32 __iomem *power;
 	u32 __iomem *power_aux;
+	u32 __iomem *pcie_pcs;
 
 	struct clk *sys_clk;
 
@@ -63,6 +65,9 @@ enum omap_control_usb_mode {
 #define	OMAP_CTRL_PIPE3_PHY_TX_RX_POWERON	0x3
 #define	OMAP_CTRL_PIPE3_PHY_TX_RX_POWEROFF	0x0
 
+#define	OMAP_CTRL_PCIE_PCS_MASK			0xff
+#define	OMAP_CTRL_PCIE_PCS_DELAY_COUNT_SHIFT	0x8
+
 #define OMAP_CTRL_USB2_PHY_PD		BIT(28)
 
 #define AM437X_CTRL_USB2_PHY_PD		BIT(0)
@@ -74,6 +79,7 @@ enum omap_control_usb_mode {
 void omap_control_phy_power(struct device *dev, int on);
 void omap_control_usb_set_mode(struct device *dev,
 			       enum omap_control_usb_mode mode);
+void omap_control_pcie_pcs(struct device *dev, u8 id, u8 delay);
 #else
 
 static inline void omap_control_phy_power(struct device *dev, int on)
@@ -84,6 +90,10 @@ static inline void omap_control_usb_set_mode(struct device *dev,
 	enum omap_control_usb_mode mode)
 {
 }
+
+static inline void omap_control_pcie_pcs(struct device *dev, u8 id, u8 delay)
+{
+}
 #endif
 
 #endif	/* __OMAP_CONTROL_PHY_H__ */
-- 
cgit v1.2.3-59-g8ed1b


From 3be88125d85df587085b0be0a5c0e9953eb5ed6b Mon Sep 17 00:00:00 2001
From: Roger Quadros <rogerq@ti.com>
Date: Fri, 4 Jul 2014 12:55:45 +0300
Subject: phy: core: Support regulator supply for PHY power

Some PHYs can be powered by an external power regulator.
e.g. USB_HS PHY on DRA7 SoC. Make the PHY core support a
power regulator.

Signed-off-by: Roger Quadros <rogerq@ti.com>
Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
---
 drivers/phy/phy-core.c  | 26 ++++++++++++++++++++++++++
 include/linux/phy/phy.h |  2 ++
 2 files changed, 28 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/phy/phy-core.c b/drivers/phy/phy-core.c
index 49c446530101..75c97396dbfa 100644
--- a/drivers/phy/phy-core.c
+++ b/drivers/phy/phy-core.c
@@ -21,6 +21,7 @@
 #include <linux/phy/phy.h>
 #include <linux/idr.h>
 #include <linux/pm_runtime.h>
+#include <linux/regulator/consumer.h>
 
 static struct class *phy_class;
 static DEFINE_MUTEX(phy_provider_mutex);
@@ -226,6 +227,12 @@ int phy_power_on(struct phy *phy)
 	if (!phy)
 		return 0;
 
+	if (phy->pwr) {
+		ret = regulator_enable(phy->pwr);
+		if (ret)
+			return ret;
+	}
+
 	ret = phy_pm_runtime_get_sync(phy);
 	if (ret < 0 && ret != -ENOTSUPP)
 		return ret;
@@ -247,6 +254,8 @@ int phy_power_on(struct phy *phy)
 out:
 	mutex_unlock(&phy->mutex);
 	phy_pm_runtime_put_sync(phy);
+	if (phy->pwr)
+		regulator_disable(phy->pwr);
 
 	return ret;
 }
@@ -272,6 +281,9 @@ int phy_power_off(struct phy *phy)
 	mutex_unlock(&phy->mutex);
 	phy_pm_runtime_put(phy);
 
+	if (phy->pwr)
+		regulator_disable(phy->pwr);
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(phy_power_off);
@@ -588,6 +600,16 @@ struct phy *phy_create(struct device *dev, const struct phy_ops *ops,
 		goto free_phy;
 	}
 
+	/* phy-supply */
+	phy->pwr = regulator_get_optional(dev, "phy");
+	if (IS_ERR(phy->pwr)) {
+		if (PTR_ERR(phy->pwr) == -EPROBE_DEFER) {
+			ret = -EPROBE_DEFER;
+			goto free_ida;
+		}
+		phy->pwr = NULL;
+	}
+
 	device_initialize(&phy->dev);
 	mutex_init(&phy->mutex);
 
@@ -617,6 +639,9 @@ put_dev:
 	put_device(&phy->dev);  /* calls phy_release() which frees resources */
 	return ERR_PTR(ret);
 
+free_ida:
+	ida_simple_remove(&phy_ida, phy->id);
+
 free_phy:
 	kfree(phy);
 	return ERR_PTR(ret);
@@ -800,6 +825,7 @@ static void phy_release(struct device *dev)
 
 	phy = to_phy(dev);
 	dev_vdbg(dev, "releasing '%s'\n", dev_name(dev));
+	regulator_put(phy->pwr);
 	ida_simple_remove(&phy_ida, phy->id);
 	kfree(phy);
 }
diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h
index 2760744cb2a7..9a8694524742 100644
--- a/include/linux/phy/phy.h
+++ b/include/linux/phy/phy.h
@@ -18,6 +18,7 @@
 #include <linux/of.h>
 #include <linux/device.h>
 #include <linux/pm_runtime.h>
+#include <linux/regulator/consumer.h>
 
 struct phy;
 
@@ -65,6 +66,7 @@ struct phy {
 	int			init_count;
 	int			power_count;
 	struct phy_attrs	attrs;
+	struct regulator	*pwr;
 };
 
 /**
-- 
cgit v1.2.3-59-g8ed1b


From f0ed817638b59aa927f1f7e9564dd8796b18dc4f Mon Sep 17 00:00:00 2001
From: Kishon Vijay Abraham I <kishon@ti.com>
Date: Mon, 14 Jul 2014 15:55:02 +0530
Subject: phy: core: Let node ptr of PHY point to PHY and not of PHY provider

In case of multi-phy PHY providers, each PHY should be modeled as a sub
node of the PHY provider. Then each PHY will have a different node pointer
(node pointer of sub node) than that of PHY provider. Added this provision
in the PHY core.
Also fixed all drivers to use the updated API.

Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
---
 Documentation/phy.txt               | 10 ++++++----
 drivers/phy/phy-bcm-kona-usb2.c     |  2 +-
 drivers/phy/phy-berlin-sata.c       |  2 +-
 drivers/phy/phy-core.c              | 25 ++++++++++++++++++-------
 drivers/phy/phy-exynos-dp-video.c   |  2 +-
 drivers/phy/phy-exynos-mipi-video.c |  2 +-
 drivers/phy/phy-exynos5-usbdrd.c    |  3 ++-
 drivers/phy/phy-exynos5250-sata.c   |  2 +-
 drivers/phy/phy-hix5hd2-sata.c      |  2 +-
 drivers/phy/phy-mvebu-sata.c        |  2 +-
 drivers/phy/phy-omap-usb2.c         |  2 +-
 drivers/phy/phy-qcom-apq8064-sata.c |  3 ++-
 drivers/phy/phy-samsung-usb2.c      |  3 ++-
 drivers/phy/phy-sun4i-usb.c         |  2 +-
 drivers/phy/phy-ti-pipe3.c          |  2 +-
 drivers/phy/phy-twl4030-usb.c       |  2 +-
 drivers/phy/phy-xgene.c             |  2 +-
 include/linux/phy/phy.h             | 15 ++++++++++-----
 18 files changed, 52 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/phy.txt b/Documentation/phy.txt
index ebff6ee52441..c6594af94d25 100644
--- a/Documentation/phy.txt
+++ b/Documentation/phy.txt
@@ -53,10 +53,12 @@ unregister the PHY.
 The PHY driver should create the PHY in order for other peripheral controllers
 to make use of it. The PHY framework provides 2 APIs to create the PHY.
 
-struct phy *phy_create(struct device *dev, const struct phy_ops *ops,
-        struct phy_init_data *init_data);
-struct phy *devm_phy_create(struct device *dev, const struct phy_ops *ops,
-	struct phy_init_data *init_data);
+struct phy *phy_create(struct device *dev, struct device_node *node,
+		       const struct phy_ops *ops,
+		       struct phy_init_data *init_data);
+struct phy *devm_phy_create(struct device *dev, struct device_node *node,
+			    const struct phy_ops *ops,
+			    struct phy_init_data *init_data);
 
 The PHY drivers can use one of the above 2 APIs to create the PHY by passing
 the device pointer, phy ops and init_data.
diff --git a/drivers/phy/phy-bcm-kona-usb2.c b/drivers/phy/phy-bcm-kona-usb2.c
index e94f5a6a5645..894fe74c1e44 100644
--- a/drivers/phy/phy-bcm-kona-usb2.c
+++ b/drivers/phy/phy-bcm-kona-usb2.c
@@ -117,7 +117,7 @@ static int bcm_kona_usb2_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, phy);
 
-	gphy = devm_phy_create(dev, &ops, NULL);
+	gphy = devm_phy_create(dev, NULL, &ops, NULL);
 	if (IS_ERR(gphy))
 		return PTR_ERR(gphy);
 
diff --git a/drivers/phy/phy-berlin-sata.c b/drivers/phy/phy-berlin-sata.c
index c5e688b0899f..5c3a0424aeb4 100644
--- a/drivers/phy/phy-berlin-sata.c
+++ b/drivers/phy/phy-berlin-sata.c
@@ -239,7 +239,7 @@ static int phy_berlin_sata_probe(struct platform_device *pdev)
 		if (!phy_desc)
 			return -ENOMEM;
 
-		phy = devm_phy_create(dev, &phy_berlin_sata_ops, NULL);
+		phy = devm_phy_create(dev, NULL, &phy_berlin_sata_ops, NULL);
 		if (IS_ERR(phy)) {
 			dev_err(dev, "failed to create PHY %d\n", phy_id);
 			return PTR_ERR(phy);
diff --git a/drivers/phy/phy-core.c b/drivers/phy/phy-core.c
index 527e744a3809..ff5eec5af817 100644
--- a/drivers/phy/phy-core.c
+++ b/drivers/phy/phy-core.c
@@ -415,13 +415,20 @@ struct phy *of_phy_simple_xlate(struct device *dev, struct of_phandle_args
 	struct phy *phy;
 	struct class_dev_iter iter;
 	struct device_node *node = dev->of_node;
+	struct device_node *child;
 
 	class_dev_iter_init(&iter, phy_class, NULL, NULL);
 	while ((dev = class_dev_iter_next(&iter))) {
 		phy = to_phy(dev);
-		if (node != phy->dev.of_node)
+		if (node != phy->dev.of_node) {
+			for_each_child_of_node(node, child) {
+				if (child == phy->dev.of_node)
+					goto phy_found;
+			}
 			continue;
+		}
 
+phy_found:
 		class_dev_iter_exit(&iter);
 		return phy;
 	}
@@ -579,13 +586,15 @@ EXPORT_SYMBOL_GPL(devm_of_phy_get);
 /**
  * phy_create() - create a new phy
  * @dev: device that is creating the new phy
+ * @node: device node of the phy
  * @ops: function pointers for performing phy operations
  * @init_data: contains the list of PHY consumers or NULL
  *
  * Called to create a phy using phy framework.
  */
-struct phy *phy_create(struct device *dev, const struct phy_ops *ops,
-	struct phy_init_data *init_data)
+struct phy *phy_create(struct device *dev, struct device_node *node,
+		       const struct phy_ops *ops,
+		       struct phy_init_data *init_data)
 {
 	int ret;
 	int id;
@@ -620,7 +629,7 @@ struct phy *phy_create(struct device *dev, const struct phy_ops *ops,
 
 	phy->dev.class = phy_class;
 	phy->dev.parent = dev;
-	phy->dev.of_node = dev->of_node;
+	phy->dev.of_node = node ?: dev->of_node;
 	phy->id = id;
 	phy->ops = ops;
 	phy->init_data = init_data;
@@ -656,6 +665,7 @@ EXPORT_SYMBOL_GPL(phy_create);
 /**
  * devm_phy_create() - create a new phy
  * @dev: device that is creating the new phy
+ * @node: device node of the phy
  * @ops: function pointers for performing phy operations
  * @init_data: contains the list of PHY consumers or NULL
  *
@@ -664,8 +674,9 @@ EXPORT_SYMBOL_GPL(phy_create);
  * On driver detach, release function is invoked on the devres data,
  * then, devres data is freed.
  */
-struct phy *devm_phy_create(struct device *dev, const struct phy_ops *ops,
-	struct phy_init_data *init_data)
+struct phy *devm_phy_create(struct device *dev, struct device_node *node,
+			    const struct phy_ops *ops,
+			    struct phy_init_data *init_data)
 {
 	struct phy **ptr, *phy;
 
@@ -673,7 +684,7 @@ struct phy *devm_phy_create(struct device *dev, const struct phy_ops *ops,
 	if (!ptr)
 		return ERR_PTR(-ENOMEM);
 
-	phy = phy_create(dev, ops, init_data);
+	phy = phy_create(dev, node, ops, init_data);
 	if (!IS_ERR(phy)) {
 		*ptr = phy;
 		devres_add(dev, ptr);
diff --git a/drivers/phy/phy-exynos-dp-video.c b/drivers/phy/phy-exynos-dp-video.c
index 098f822a2fa4..8b3026e2af7f 100644
--- a/drivers/phy/phy-exynos-dp-video.c
+++ b/drivers/phy/phy-exynos-dp-video.c
@@ -77,7 +77,7 @@ static int exynos_dp_video_phy_probe(struct platform_device *pdev)
 	if (IS_ERR(state->regs))
 		return PTR_ERR(state->regs);
 
-	phy = devm_phy_create(dev, &exynos_dp_video_phy_ops, NULL);
+	phy = devm_phy_create(dev, NULL, &exynos_dp_video_phy_ops, NULL);
 	if (IS_ERR(phy)) {
 		dev_err(dev, "failed to create Display Port PHY\n");
 		return PTR_ERR(phy);
diff --git a/drivers/phy/phy-exynos-mipi-video.c b/drivers/phy/phy-exynos-mipi-video.c
index 6d6bcf52a10e..b55a92e12496 100644
--- a/drivers/phy/phy-exynos-mipi-video.c
+++ b/drivers/phy/phy-exynos-mipi-video.c
@@ -136,7 +136,7 @@ static int exynos_mipi_video_phy_probe(struct platform_device *pdev)
 	spin_lock_init(&state->slock);
 
 	for (i = 0; i < EXYNOS_MIPI_PHYS_NUM; i++) {
-		struct phy *phy = devm_phy_create(dev,
+		struct phy *phy = devm_phy_create(dev, NULL,
 					&exynos_mipi_video_phy_ops, NULL);
 		if (IS_ERR(phy)) {
 			dev_err(dev, "failed to create PHY %d\n", i);
diff --git a/drivers/phy/phy-exynos5-usbdrd.c b/drivers/phy/phy-exynos5-usbdrd.c
index 205159db37a3..b05302b09c9f 100644
--- a/drivers/phy/phy-exynos5-usbdrd.c
+++ b/drivers/phy/phy-exynos5-usbdrd.c
@@ -635,7 +635,8 @@ static int exynos5_usbdrd_phy_probe(struct platform_device *pdev)
 	dev_vdbg(dev, "Creating usbdrd_phy phy\n");
 
 	for (i = 0; i < EXYNOS5_DRDPHYS_NUM; i++) {
-		struct phy *phy = devm_phy_create(dev, &exynos5_usbdrd_phy_ops,
+		struct phy *phy = devm_phy_create(dev, NULL,
+						  &exynos5_usbdrd_phy_ops,
 						  NULL);
 		if (IS_ERR(phy)) {
 			dev_err(dev, "Failed to create usbdrd_phy phy\n");
diff --git a/drivers/phy/phy-exynos5250-sata.c b/drivers/phy/phy-exynos5250-sata.c
index 05689450f93b..19a679aca4ac 100644
--- a/drivers/phy/phy-exynos5250-sata.c
+++ b/drivers/phy/phy-exynos5250-sata.c
@@ -210,7 +210,7 @@ static int exynos_sata_phy_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	sata_phy->phy = devm_phy_create(dev, &exynos_sata_phy_ops, NULL);
+	sata_phy->phy = devm_phy_create(dev, NULL, &exynos_sata_phy_ops, NULL);
 	if (IS_ERR(sata_phy->phy)) {
 		clk_disable_unprepare(sata_phy->phyclk);
 		dev_err(dev, "failed to create PHY\n");
diff --git a/drivers/phy/phy-hix5hd2-sata.c b/drivers/phy/phy-hix5hd2-sata.c
index d44283453d71..6a08fa5f81eb 100644
--- a/drivers/phy/phy-hix5hd2-sata.c
+++ b/drivers/phy/phy-hix5hd2-sata.c
@@ -156,7 +156,7 @@ static int hix5hd2_sata_phy_probe(struct platform_device *pdev)
 	if (IS_ERR(priv->peri_ctrl))
 		priv->peri_ctrl = NULL;
 
-	phy = devm_phy_create(dev, &hix5hd2_sata_phy_ops, NULL);
+	phy = devm_phy_create(dev, NULL, &hix5hd2_sata_phy_ops, NULL);
 	if (IS_ERR(phy)) {
 		dev_err(dev, "failed to create PHY\n");
 		return PTR_ERR(phy);
diff --git a/drivers/phy/phy-mvebu-sata.c b/drivers/phy/phy-mvebu-sata.c
index d70ecd6a1b3f..cc3c0e166daf 100644
--- a/drivers/phy/phy-mvebu-sata.c
+++ b/drivers/phy/phy-mvebu-sata.c
@@ -99,7 +99,7 @@ static int phy_mvebu_sata_probe(struct platform_device *pdev)
 	if (IS_ERR(priv->clk))
 		return PTR_ERR(priv->clk);
 
-	phy = devm_phy_create(&pdev->dev, &phy_mvebu_sata_ops, NULL);
+	phy = devm_phy_create(&pdev->dev, NULL, &phy_mvebu_sata_ops, NULL);
 	if (IS_ERR(phy))
 		return PTR_ERR(phy);
 
diff --git a/drivers/phy/phy-omap-usb2.c b/drivers/phy/phy-omap-usb2.c
index 34b396146c8a..93d78359246c 100644
--- a/drivers/phy/phy-omap-usb2.c
+++ b/drivers/phy/phy-omap-usb2.c
@@ -263,7 +263,7 @@ static int omap_usb2_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, phy);
 
-	generic_phy = devm_phy_create(phy->dev, &ops, NULL);
+	generic_phy = devm_phy_create(phy->dev, NULL, &ops, NULL);
 	if (IS_ERR(generic_phy))
 		return PTR_ERR(generic_phy);
 
diff --git a/drivers/phy/phy-qcom-apq8064-sata.c b/drivers/phy/phy-qcom-apq8064-sata.c
index c9b4dd6becf5..d7c01aa1f8d7 100644
--- a/drivers/phy/phy-qcom-apq8064-sata.c
+++ b/drivers/phy/phy-qcom-apq8064-sata.c
@@ -228,7 +228,8 @@ static int qcom_apq8064_sata_phy_probe(struct platform_device *pdev)
 	if (IS_ERR(phy->mmio))
 		return PTR_ERR(phy->mmio);
 
-	generic_phy = devm_phy_create(dev, &qcom_apq8064_sata_phy_ops, NULL);
+	generic_phy = devm_phy_create(dev, NULL, &qcom_apq8064_sata_phy_ops,
+				      NULL);
 	if (IS_ERR(generic_phy)) {
 		dev_err(dev, "%s: failed to create phy\n", __func__);
 		return PTR_ERR(generic_phy);
diff --git a/drivers/phy/phy-samsung-usb2.c b/drivers/phy/phy-samsung-usb2.c
index 16aae7a285f0..ae30640a411d 100644
--- a/drivers/phy/phy-samsung-usb2.c
+++ b/drivers/phy/phy-samsung-usb2.c
@@ -196,7 +196,8 @@ static int samsung_usb2_phy_probe(struct platform_device *pdev)
 		struct samsung_usb2_phy_instance *p = &drv->instances[i];
 
 		dev_dbg(dev, "Creating phy \"%s\"\n", label);
-		p->phy = devm_phy_create(dev, &samsung_usb2_phy_ops, NULL);
+		p->phy = devm_phy_create(dev, NULL, &samsung_usb2_phy_ops,
+					 NULL);
 		if (IS_ERR(p->phy)) {
 			dev_err(drv->dev, "Failed to create usb2_phy \"%s\"\n",
 				label);
diff --git a/drivers/phy/phy-sun4i-usb.c b/drivers/phy/phy-sun4i-usb.c
index 7a4ea552f621..61ebea49709b 100644
--- a/drivers/phy/phy-sun4i-usb.c
+++ b/drivers/phy/phy-sun4i-usb.c
@@ -295,7 +295,7 @@ static int sun4i_usb_phy_probe(struct platform_device *pdev)
 				return PTR_ERR(phy->pmu);
 		}
 
-		phy->phy = devm_phy_create(dev, &sun4i_usb_phy_ops, NULL);
+		phy->phy = devm_phy_create(dev, NULL, &sun4i_usb_phy_ops, NULL);
 		if (IS_ERR(phy->phy)) {
 			dev_err(dev, "failed to create PHY %d\n", i);
 			return PTR_ERR(phy->phy);
diff --git a/drivers/phy/phy-ti-pipe3.c b/drivers/phy/phy-ti-pipe3.c
index 93bcd67f1b22..b964aa967b46 100644
--- a/drivers/phy/phy-ti-pipe3.c
+++ b/drivers/phy/phy-ti-pipe3.c
@@ -400,7 +400,7 @@ static int ti_pipe3_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, phy);
 	pm_runtime_enable(phy->dev);
 
-	generic_phy = devm_phy_create(phy->dev, &ops, NULL);
+	generic_phy = devm_phy_create(phy->dev, NULL, &ops, NULL);
 	if (IS_ERR(generic_phy))
 		return PTR_ERR(generic_phy);
 
diff --git a/drivers/phy/phy-twl4030-usb.c b/drivers/phy/phy-twl4030-usb.c
index 2e0e9b3774c8..e1a6623d4696 100644
--- a/drivers/phy/phy-twl4030-usb.c
+++ b/drivers/phy/phy-twl4030-usb.c
@@ -695,7 +695,7 @@ static int twl4030_usb_probe(struct platform_device *pdev)
 	otg->set_host		= twl4030_set_host;
 	otg->set_peripheral	= twl4030_set_peripheral;
 
-	phy = devm_phy_create(twl->dev, &ops, init_data);
+	phy = devm_phy_create(twl->dev, NULL, &ops, init_data);
 	if (IS_ERR(phy)) {
 		dev_dbg(&pdev->dev, "Failed to create PHY\n");
 		return PTR_ERR(phy);
diff --git a/drivers/phy/phy-xgene.c b/drivers/phy/phy-xgene.c
index 4aa1ccd1511f..db809b97219e 100644
--- a/drivers/phy/phy-xgene.c
+++ b/drivers/phy/phy-xgene.c
@@ -1707,7 +1707,7 @@ static int xgene_phy_probe(struct platform_device *pdev)
 	ctx->dev = &pdev->dev;
 	platform_set_drvdata(pdev, ctx);
 
-	ctx->phy = devm_phy_create(ctx->dev, &xgene_phy_ops, NULL);
+	ctx->phy = devm_phy_create(ctx->dev, NULL, &xgene_phy_ops, NULL);
 	if (IS_ERR(ctx->phy)) {
 		dev_dbg(&pdev->dev, "Failed to create PHY\n");
 		rc = PTR_ERR(ctx->phy);
diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h
index 9a8694524742..8cb6f815475b 100644
--- a/include/linux/phy/phy.h
+++ b/include/linux/phy/phy.h
@@ -158,9 +158,10 @@ void devm_phy_put(struct device *dev, struct phy *phy);
 struct phy *of_phy_get(struct device_node *np, const char *con_id);
 struct phy *of_phy_simple_xlate(struct device *dev,
 	struct of_phandle_args *args);
-struct phy *phy_create(struct device *dev, const struct phy_ops *ops,
-	struct phy_init_data *init_data);
-struct phy *devm_phy_create(struct device *dev,
+struct phy *phy_create(struct device *dev, struct device_node *node,
+		       const struct phy_ops *ops,
+		       struct phy_init_data *init_data);
+struct phy *devm_phy_create(struct device *dev, struct device_node *node,
 	const struct phy_ops *ops, struct phy_init_data *init_data);
 void phy_destroy(struct phy *phy);
 void devm_phy_destroy(struct device *dev, struct phy *phy);
@@ -299,13 +300,17 @@ static inline struct phy *of_phy_simple_xlate(struct device *dev,
 }
 
 static inline struct phy *phy_create(struct device *dev,
-	const struct phy_ops *ops, struct phy_init_data *init_data)
+				     struct device_node *node,
+				     const struct phy_ops *ops,
+				     struct phy_init_data *init_data)
 {
 	return ERR_PTR(-ENOSYS);
 }
 
 static inline struct phy *devm_phy_create(struct device *dev,
-	const struct phy_ops *ops, struct phy_init_data *init_data)
+					  struct device_node *node,
+					  const struct phy_ops *ops,
+					  struct phy_init_data *init_data)
 {
 	return ERR_PTR(-ENOSYS);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 0bec8c88dc2b076a0a4a0437e1e878026cbaccb4 Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Tue, 22 Jul 2014 12:06:23 +0200
Subject: net: skbuff: Use ALIGN macro instead of open coding it

Use ALIGN from linux/kernel.h to define SKB_DATA_ALIGN instead of open
coding it.

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 369430340ed9..b613557132b9 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -112,8 +112,7 @@
 #define CHECKSUM_COMPLETE	2
 #define CHECKSUM_PARTIAL	3
 
-#define SKB_DATA_ALIGN(X)	(((X) + (SMP_CACHE_BYTES - 1)) & \
-				 ~(SMP_CACHE_BYTES - 1))
+#define SKB_DATA_ALIGN(X)	ALIGN(X, SMP_CACHE_BYTES)
 #define SKB_WITH_OVERHEAD(X)	\
 	((X) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
 #define SKB_MAX_ORDER(X, ORDER) \
-- 
cgit v1.2.3-59-g8ed1b


From fc7c70e0b6b637bbf6cf8b9cee547d5ae83899c9 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 18 Jul 2014 18:56:34 +0100
Subject: KEYS: struct key_preparsed_payload should have two payload pointers

struct key_preparsed_payload should have two payload pointers to correspond
with those in struct key.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Steve Dickson <steved@redhat.com>
Acked-by: Jeff Layton <jlayton@primarydata.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 crypto/asymmetric_keys/asymmetric_type.c | 2 +-
 crypto/asymmetric_keys/x509_public_key.c | 2 +-
 include/linux/key-type.h                 | 2 +-
 security/keys/encrypted-keys/encrypted.c | 2 +-
 security/keys/key.c                      | 6 ++++--
 5 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/crypto/asymmetric_keys/asymmetric_type.c b/crypto/asymmetric_keys/asymmetric_type.c
index c1fe0fcee8e3..21960a4e74e8 100644
--- a/crypto/asymmetric_keys/asymmetric_type.c
+++ b/crypto/asymmetric_keys/asymmetric_type.c
@@ -156,7 +156,7 @@ static void asymmetric_key_free_preparse(struct key_preparsed_payload *prep)
 	pr_devel("==>%s()\n", __func__);
 
 	if (subtype) {
-		subtype->destroy(prep->payload);
+		subtype->destroy(prep->payload[0]);
 		module_put(subtype->owner);
 	}
 	kfree(prep->type_data[1]);
diff --git a/crypto/asymmetric_keys/x509_public_key.c b/crypto/asymmetric_keys/x509_public_key.c
index 382ef0d2ff2e..3fc8a0634ed7 100644
--- a/crypto/asymmetric_keys/x509_public_key.c
+++ b/crypto/asymmetric_keys/x509_public_key.c
@@ -177,7 +177,7 @@ static int x509_key_preparse(struct key_preparsed_payload *prep)
 	__module_get(public_key_subtype.owner);
 	prep->type_data[0] = &public_key_subtype;
 	prep->type_data[1] = cert->fingerprint;
-	prep->payload = cert->pub;
+	prep->payload[0] = cert->pub;
 	prep->description = desc;
 	prep->quotalen = 100;
 
diff --git a/include/linux/key-type.h b/include/linux/key-type.h
index 88503dca2a57..d2b4845d74bf 100644
--- a/include/linux/key-type.h
+++ b/include/linux/key-type.h
@@ -41,7 +41,7 @@ struct key_construction {
 struct key_preparsed_payload {
 	char		*description;	/* Proposed key description (or NULL) */
 	void		*type_data[2];	/* Private key-type data */
-	void		*payload;	/* Proposed payload */
+	void		*payload[2];	/* Proposed payload */
 	const void	*data;		/* Raw data */
 	size_t		datalen;	/* Raw datalen */
 	size_t		quotalen;	/* Quota length for proposed payload */
diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c
index 5fe443d120af..d252c5704f8a 100644
--- a/security/keys/encrypted-keys/encrypted.c
+++ b/security/keys/encrypted-keys/encrypted.c
@@ -811,7 +811,7 @@ static int encrypted_instantiate(struct key *key,
 		goto out;
 	}
 
-	rcu_assign_keypointer(key, epayload);
+	prep->payload[0] = epayload;
 out:
 	kfree(datablob);
 	return ret;
diff --git a/security/keys/key.c b/security/keys/key.c
index 7c9acbf106b6..03620a35a4dc 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -1043,10 +1043,12 @@ int generic_key_instantiate(struct key *key, struct key_preparsed_payload *prep)
 	if (ret == 0) {
 		key->type_data.p[0] = prep->type_data[0];
 		key->type_data.p[1] = prep->type_data[1];
-		rcu_assign_keypointer(key, prep->payload);
+		rcu_assign_keypointer(key, prep->payload[0]);
+		key->payload.data2[1] = prep->payload[1];
 		prep->type_data[0] = NULL;
 		prep->type_data[1] = NULL;
-		prep->payload = NULL;
+		prep->payload[0] = NULL;
+		prep->payload[1] = NULL;
 	}
 	pr_devel("<==%s() = %d\n", __func__, ret);
 	return ret;
-- 
cgit v1.2.3-59-g8ed1b


From 7dfa0ca6a95de65b7a7760630cdbd7d30f204bfa Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 18 Jul 2014 18:56:34 +0100
Subject: KEYS: Allow expiry time to be set when preparsing a key

Allow a key type's preparsing routine to set the expiry time for a key.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Steve Dickson <steved@redhat.com>
Acked-by: Jeff Layton <jlayton@primarydata.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 Documentation/security/keys.txt | 10 +++++++---
 include/linux/key-type.h        |  1 +
 security/keys/key.c             |  8 ++++++++
 3 files changed, 16 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/security/keys.txt b/Documentation/security/keys.txt
index a4c33f1a7c6d..315cf96a41a2 100644
--- a/Documentation/security/keys.txt
+++ b/Documentation/security/keys.txt
@@ -1150,20 +1150,24 @@ The structure has a number of fields, some of which are mandatory:
 		const void	*data;
 		size_t		datalen;
 		size_t		quotalen;
+		time_t		expiry;
 	};
 
      Before calling the method, the caller will fill in data and datalen with
      the payload blob parameters; quotalen will be filled in with the default
-     quota size from the key type and the rest will be cleared.
+     quota size from the key type; expiry will be set to TIME_T_MAX and the
+     rest will be cleared.
 
      If a description can be proposed from the payload contents, that should be
      attached as a string to the description field.  This will be used for the
      key description if the caller of add_key() passes NULL or "".
 
      The method can attach anything it likes to type_data[] and payload.  These
-     are merely passed along to the instantiate() or update() operations.
+     are merely passed along to the instantiate() or update() operations.  If
+     set, the expiry time will be applied to the key if it is instantiated from
+     this data.
 
-     The method should return 0 if success ful or a negative error code
+     The method should return 0 if successful or a negative error code
      otherwise.
 
      
diff --git a/include/linux/key-type.h b/include/linux/key-type.h
index d2b4845d74bf..44792ee649de 100644
--- a/include/linux/key-type.h
+++ b/include/linux/key-type.h
@@ -45,6 +45,7 @@ struct key_preparsed_payload {
 	const void	*data;		/* Raw data */
 	size_t		datalen;	/* Raw datalen */
 	size_t		quotalen;	/* Quota length for proposed payload */
+	time_t		expiry;		/* Expiry time of key */
 	bool		trusted;	/* True if key is trusted */
 };
 
diff --git a/security/keys/key.c b/security/keys/key.c
index 03620a35a4dc..755fb02df5af 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -437,6 +437,11 @@ static int __key_instantiate_and_link(struct key *key,
 			/* disable the authorisation key */
 			if (authkey)
 				key_revoke(authkey);
+
+			if (prep->expiry != TIME_T_MAX) {
+				key->expiry = prep->expiry;
+				key_schedule_gc(prep->expiry + key_gc_delay);
+			}
 		}
 	}
 
@@ -479,6 +484,7 @@ int key_instantiate_and_link(struct key *key,
 	prep.data = data;
 	prep.datalen = datalen;
 	prep.quotalen = key->type->def_datalen;
+	prep.expiry = TIME_T_MAX;
 	if (key->type->preparse) {
 		ret = key->type->preparse(&prep);
 		if (ret < 0)
@@ -811,6 +817,7 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
 	prep.datalen = plen;
 	prep.quotalen = index_key.type->def_datalen;
 	prep.trusted = flags & KEY_ALLOC_TRUSTED;
+	prep.expiry = TIME_T_MAX;
 	if (index_key.type->preparse) {
 		ret = index_key.type->preparse(&prep);
 		if (ret < 0) {
@@ -941,6 +948,7 @@ int key_update(key_ref_t key_ref, const void *payload, size_t plen)
 	prep.data = payload;
 	prep.datalen = plen;
 	prep.quotalen = key->type->def_datalen;
+	prep.expiry = TIME_T_MAX;
 	if (key->type->preparse) {
 		ret = key->type->preparse(&prep);
 		if (ret < 0)
-- 
cgit v1.2.3-59-g8ed1b


From 44a69f6195628f6f940566d133a72987559e102d Mon Sep 17 00:00:00 2001
From: Tomasz Nowicki <tomasz.nowicki@linaro.org>
Date: Tue, 22 Jul 2014 11:20:12 +0200
Subject: acpi, apei, ghes: Make NMI error notification to be GHES architecture
 extension.

Currently APEI depends on x86 architecture. It is because of NMI hardware
error notification of GHES which is currently supported by x86 only.
However, many other APEI features can be still used perfectly by other
architectures.

This commit adds two symbols:
1. HAVE_ACPI_APEI for those archs which support APEI.
2. HAVE_ACPI_APEI_NMI which is used for NMI code isolation in ghes.c
   file. NMI related data and functions are grouped so they can be wrapped
   inside one #ifdef section. Appropriate function stubs are provided for
   !NMI case.

Note there is no functional changes for x86 due to hard selected
HAVE_ACPI_APEI and HAVE_ACPI_APEI_NMI symbols.

Signed-off-by: Tomasz Nowicki <tomasz.nowicki@linaro.org>
Acked-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/x86/Kconfig          |   2 +
 drivers/acpi/apei/Kconfig |   8 ++-
 drivers/acpi/apei/ghes.c  | 149 ++++++++++++++++++++++++++++++----------------
 include/linux/nmi.h       |   4 ++
 4 files changed, 110 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d24887b645dc..43873442dee1 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -132,6 +132,8 @@ config X86
 	select GENERIC_CPU_AUTOPROBE
 	select HAVE_ARCH_AUDITSYSCALL
 	select ARCH_SUPPORTS_ATOMIC_RMW
+	select HAVE_ACPI_APEI if ACPI
+	select HAVE_ACPI_APEI_NMI if ACPI
 
 config INSTRUCTION_DECODER
 	def_bool y
diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig
index c4dac7150960..b0140c8fc733 100644
--- a/drivers/acpi/apei/Kconfig
+++ b/drivers/acpi/apei/Kconfig
@@ -1,9 +1,15 @@
+config HAVE_ACPI_APEI
+	bool
+
+config HAVE_ACPI_APEI_NMI
+	bool
+
 config ACPI_APEI
 	bool "ACPI Platform Error Interface (APEI)"
 	select MISC_FILESYSTEMS
 	select PSTORE
 	select UEFI_CPER
-	depends on X86
+	depends on HAVE_ACPI_APEI
 	help
 	  APEI allows to report errors (for example from the chipset)
 	  to the operating system. This improves NMI handling
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 352170a74a2e..7fcf4d7b41f6 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -47,11 +47,11 @@
 #include <linux/genalloc.h>
 #include <linux/pci.h>
 #include <linux/aer.h>
+#include <linux/nmi.h>
 
 #include <acpi/ghes.h>
 #include <acpi/apei.h>
 #include <asm/tlbflush.h>
-#include <asm/nmi.h>
 
 #include "apei-internal.h"
 
@@ -86,8 +86,6 @@
 bool ghes_disable;
 module_param_named(disable, ghes_disable, bool, 0);
 
-static int ghes_panic_timeout	__read_mostly = 30;
-
 /*
  * All error sources notified with SCI shares one notifier function,
  * so they need to be linked and checked one by one.  This is applied
@@ -97,15 +95,8 @@ static int ghes_panic_timeout	__read_mostly = 30;
  * list changing, not for traversing.
  */
 static LIST_HEAD(ghes_sci);
-static LIST_HEAD(ghes_nmi);
 static DEFINE_MUTEX(ghes_list_mutex);
 
-/*
- * NMI may be triggered on any CPU, so ghes_nmi_lock is used for
- * mutual exclusion.
- */
-static DEFINE_RAW_SPINLOCK(ghes_nmi_lock);
-
 /*
  * Because the memory area used to transfer hardware error information
  * from BIOS to Linux can be determined only in NMI, IRQ or timer
@@ -130,18 +121,8 @@ static struct vm_struct *ghes_ioremap_area;
 static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi);
 static DEFINE_SPINLOCK(ghes_ioremap_lock_irq);
 
-/*
- * printk is not safe in NMI context.  So in NMI handler, we allocate
- * required memory from lock-less memory allocator
- * (ghes_estatus_pool), save estatus into it, put them into lock-less
- * list (ghes_estatus_llist), then delay printk into IRQ context via
- * irq_work (ghes_proc_irq_work).  ghes_estatus_size_request record
- * required pool size by all NMI error source.
- */
 static struct gen_pool *ghes_estatus_pool;
 static unsigned long ghes_estatus_pool_size_request;
-static struct llist_head ghes_estatus_llist;
-static struct irq_work ghes_proc_irq_work;
 
 struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
 static atomic_t ghes_estatus_cache_alloced;
@@ -249,11 +230,6 @@ static int ghes_estatus_pool_expand(unsigned long len)
 	return 0;
 }
 
-static void ghes_estatus_pool_shrink(unsigned long len)
-{
-	ghes_estatus_pool_size_request -= PAGE_ALIGN(len);
-}
-
 static struct ghes *ghes_new(struct acpi_hest_generic *generic)
 {
 	struct ghes *ghes;
@@ -732,6 +708,32 @@ static int ghes_notify_sci(struct notifier_block *this,
 	return ret;
 }
 
+static struct notifier_block ghes_notifier_sci = {
+	.notifier_call = ghes_notify_sci,
+};
+
+#ifdef CONFIG_HAVE_ACPI_APEI_NMI
+/*
+ * printk is not safe in NMI context.  So in NMI handler, we allocate
+ * required memory from lock-less memory allocator
+ * (ghes_estatus_pool), save estatus into it, put them into lock-less
+ * list (ghes_estatus_llist), then delay printk into IRQ context via
+ * irq_work (ghes_proc_irq_work).  ghes_estatus_size_request record
+ * required pool size by all NMI error source.
+ */
+static struct llist_head ghes_estatus_llist;
+static struct irq_work ghes_proc_irq_work;
+
+/*
+ * NMI may be triggered on any CPU, so ghes_nmi_lock is used for
+ * mutual exclusion.
+ */
+static DEFINE_RAW_SPINLOCK(ghes_nmi_lock);
+
+static LIST_HEAD(ghes_nmi);
+
+static int ghes_panic_timeout	__read_mostly = 30;
+
 static struct llist_node *llist_nodes_reverse(struct llist_node *llnode)
 {
 	struct llist_node *next, *tail = NULL;
@@ -875,10 +877,6 @@ out:
 	return ret;
 }
 
-static struct notifier_block ghes_notifier_sci = {
-	.notifier_call = ghes_notify_sci,
-};
-
 static unsigned long ghes_esource_prealloc_size(
 	const struct acpi_hest_generic *generic)
 {
@@ -894,11 +892,71 @@ static unsigned long ghes_esource_prealloc_size(
 	return prealloc_size;
 }
 
+static void ghes_estatus_pool_shrink(unsigned long len)
+{
+	ghes_estatus_pool_size_request -= PAGE_ALIGN(len);
+}
+
+static void ghes_nmi_add(struct ghes *ghes)
+{
+	unsigned long len;
+
+	len = ghes_esource_prealloc_size(ghes->generic);
+	ghes_estatus_pool_expand(len);
+	mutex_lock(&ghes_list_mutex);
+	if (list_empty(&ghes_nmi))
+		register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0, "ghes");
+	list_add_rcu(&ghes->list, &ghes_nmi);
+	mutex_unlock(&ghes_list_mutex);
+}
+
+static void ghes_nmi_remove(struct ghes *ghes)
+{
+	unsigned long len;
+
+	mutex_lock(&ghes_list_mutex);
+	list_del_rcu(&ghes->list);
+	if (list_empty(&ghes_nmi))
+		unregister_nmi_handler(NMI_LOCAL, "ghes");
+	mutex_unlock(&ghes_list_mutex);
+	/*
+	 * To synchronize with NMI handler, ghes can only be
+	 * freed after NMI handler finishes.
+	 */
+	synchronize_rcu();
+	len = ghes_esource_prealloc_size(ghes->generic);
+	ghes_estatus_pool_shrink(len);
+}
+
+static void ghes_nmi_init_cxt(void)
+{
+	init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq);
+}
+#else /* CONFIG_HAVE_ACPI_APEI_NMI */
+static inline void ghes_nmi_add(struct ghes *ghes)
+{
+	pr_err(GHES_PFX "ID: %d, trying to add NMI notification which is not supported!\n",
+	       ghes->generic->header.source_id);
+	BUG();
+}
+
+static inline void ghes_nmi_remove(struct ghes *ghes)
+{
+	pr_err(GHES_PFX "ID: %d, trying to remove NMI notification which is not supported!\n",
+	       ghes->generic->header.source_id);
+	BUG();
+}
+
+static inline void ghes_nmi_init_cxt(void)
+{
+}
+#endif /* CONFIG_HAVE_ACPI_APEI_NMI */
+
 static int ghes_probe(struct platform_device *ghes_dev)
 {
 	struct acpi_hest_generic *generic;
 	struct ghes *ghes = NULL;
-	unsigned long len;
+
 	int rc = -EINVAL;
 
 	generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data;
@@ -909,7 +967,13 @@ static int ghes_probe(struct platform_device *ghes_dev)
 	case ACPI_HEST_NOTIFY_POLLED:
 	case ACPI_HEST_NOTIFY_EXTERNAL:
 	case ACPI_HEST_NOTIFY_SCI:
+		break;
 	case ACPI_HEST_NOTIFY_NMI:
+		if (!IS_ENABLED(CONFIG_HAVE_ACPI_APEI_NMI)) {
+			pr_warn(GHES_PFX "Generic hardware error source: %d notified via NMI interrupt is not supported!\n",
+				generic->header.source_id);
+			goto err;
+		}
 		break;
 	case ACPI_HEST_NOTIFY_LOCAL:
 		pr_warning(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n",
@@ -970,14 +1034,7 @@ static int ghes_probe(struct platform_device *ghes_dev)
 		mutex_unlock(&ghes_list_mutex);
 		break;
 	case ACPI_HEST_NOTIFY_NMI:
-		len = ghes_esource_prealloc_size(generic);
-		ghes_estatus_pool_expand(len);
-		mutex_lock(&ghes_list_mutex);
-		if (list_empty(&ghes_nmi))
-			register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0,
-						"ghes");
-		list_add_rcu(&ghes->list, &ghes_nmi);
-		mutex_unlock(&ghes_list_mutex);
+		ghes_nmi_add(ghes);
 		break;
 	default:
 		BUG();
@@ -999,7 +1056,6 @@ static int ghes_remove(struct platform_device *ghes_dev)
 {
 	struct ghes *ghes;
 	struct acpi_hest_generic *generic;
-	unsigned long len;
 
 	ghes = platform_get_drvdata(ghes_dev);
 	generic = ghes->generic;
@@ -1020,18 +1076,7 @@ static int ghes_remove(struct platform_device *ghes_dev)
 		mutex_unlock(&ghes_list_mutex);
 		break;
 	case ACPI_HEST_NOTIFY_NMI:
-		mutex_lock(&ghes_list_mutex);
-		list_del_rcu(&ghes->list);
-		if (list_empty(&ghes_nmi))
-			unregister_nmi_handler(NMI_LOCAL, "ghes");
-		mutex_unlock(&ghes_list_mutex);
-		/*
-		 * To synchronize with NMI handler, ghes can only be
-		 * freed after NMI handler finishes.
-		 */
-		synchronize_rcu();
-		len = ghes_esource_prealloc_size(generic);
-		ghes_estatus_pool_shrink(len);
+		ghes_nmi_remove(ghes);
 		break;
 	default:
 		BUG();
@@ -1075,7 +1120,7 @@ static int __init ghes_init(void)
 		return -EINVAL;
 	}
 
-	init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq);
+	ghes_nmi_init_cxt();
 
 	rc = ghes_ioremap_init();
 	if (rc)
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 447775ee2c4b..1d2a6ab6b8bb 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -63,4 +63,8 @@ extern int proc_dowatchdog(struct ctl_table *, int ,
 			   void __user *, size_t *, loff_t *);
 #endif
 
+#ifdef CONFIG_HAVE_ACPI_APEI_NMI
+#include <asm/nmi.h>
+#endif
+
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 35630df68d6030daf12dde12ed07bbe26324e6ac Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Sun, 25 May 2014 22:35:38 +0200
Subject: NFC: st21nfcb: Add driver for STMicroelectronics ST21NFCB NFC chip

Add driver for STMicroelectronics ST21NFCB NFC controller.
ST21NFCB is using NCI protocol and a proprietary low level transport
protocol called NDLC used on top.

NDLC:
The protocol defines 2 types of frame:
- One type carrying NCI data (referred as DATAFRAME frames).
- One type carrying protocol information used for flow control and error
control mechanisms (referred as SUPERVISOR frames).

After each frame transmission to the NFC controller, the device host
SHALL waitfor  an ACK (SUPERVISOR frame) reception before sending a
new frame.
The NFC controller MAY send a frame at anytime to the device host.
The NFC controller MAY send a specific WAIT supervisor frame to indicate
to device host that a NCI data packet has been received but that it could
take significant time before the NFC controller sends an ACK and thus
allows next data reception.

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/nfc/Kconfig                    |   2 +-
 drivers/nfc/Makefile                   |   3 +-
 drivers/nfc/st21nfcb/Kconfig           |  22 ++
 drivers/nfc/st21nfcb/Makefile          |   8 +
 drivers/nfc/st21nfcb/i2c.c             | 462 +++++++++++++++++++++++++++++++++
 drivers/nfc/st21nfcb/ndlc.c            | 298 +++++++++++++++++++++
 drivers/nfc/st21nfcb/ndlc.h            |  55 ++++
 drivers/nfc/st21nfcb/st21nfcb.c        | 129 +++++++++
 drivers/nfc/st21nfcb/st21nfcb.h        |  38 +++
 include/linux/platform_data/st21nfcb.h |  32 +++
 10 files changed, 1047 insertions(+), 2 deletions(-)
 create mode 100644 drivers/nfc/st21nfcb/Kconfig
 create mode 100644 drivers/nfc/st21nfcb/Makefile
 create mode 100644 drivers/nfc/st21nfcb/i2c.c
 create mode 100644 drivers/nfc/st21nfcb/ndlc.c
 create mode 100644 drivers/nfc/st21nfcb/ndlc.h
 create mode 100644 drivers/nfc/st21nfcb/st21nfcb.c
 create mode 100644 drivers/nfc/st21nfcb/st21nfcb.h
 create mode 100644 include/linux/platform_data/st21nfcb.h

(limited to 'include/linux')

diff --git a/drivers/nfc/Kconfig b/drivers/nfc/Kconfig
index 26c66a126551..7929fac13e1c 100644
--- a/drivers/nfc/Kconfig
+++ b/drivers/nfc/Kconfig
@@ -72,5 +72,5 @@ source "drivers/nfc/pn544/Kconfig"
 source "drivers/nfc/microread/Kconfig"
 source "drivers/nfc/nfcmrvl/Kconfig"
 source "drivers/nfc/st21nfca/Kconfig"
-
+source "drivers/nfc/st21nfcb/Kconfig"
 endmenu
diff --git a/drivers/nfc/Makefile b/drivers/nfc/Makefile
index 23225b0287fd..6b23a2c6e34a 100644
--- a/drivers/nfc/Makefile
+++ b/drivers/nfc/Makefile
@@ -11,6 +11,7 @@ obj-$(CONFIG_NFC_SIM)		+= nfcsim.o
 obj-$(CONFIG_NFC_PORT100)	+= port100.o
 obj-$(CONFIG_NFC_MRVL)		+= nfcmrvl/
 obj-$(CONFIG_NFC_TRF7970A)	+= trf7970a.o
-obj-$(CONFIG_NFC_ST21NFCA)  += st21nfca/
+obj-$(CONFIG_NFC_ST21NFCA)  	+= st21nfca/
+obj-$(CONFIG_NFC_ST21NFCB)	+= st21nfcb/
 
 ccflags-$(CONFIG_NFC_DEBUG) := -DDEBUG
diff --git a/drivers/nfc/st21nfcb/Kconfig b/drivers/nfc/st21nfcb/Kconfig
new file mode 100644
index 000000000000..e0322dd03a70
--- /dev/null
+++ b/drivers/nfc/st21nfcb/Kconfig
@@ -0,0 +1,22 @@
+config NFC_ST21NFCB
+	tristate "STMicroelectronics ST21NFCB NFC driver"
+	depends on NFC_NCI
+	default n
+	---help---
+	  STMicroelectronics ST21NFCB core driver. It implements the chipset
+	  NCI logic and hooks into the NFC kernel APIs. Physical layers will
+	  register against it.
+
+	  To compile this driver as a module, choose m here. The module will
+	  be called st21nfcb.
+	  Say N if unsure.
+
+config NFC_ST21NFCB_I2C
+	tristate "NFC ST21NFCB i2c support"
+	depends on NFC_ST21NFCB && I2C
+	---help---
+	  This module adds support for the STMicroelectronics st21nfcb i2c interface.
+	  Select this if your platform is using the i2c bus.
+
+	  If you choose to build a module, it'll be called st21nfcb_i2c.
+	  Say N if unsure.
diff --git a/drivers/nfc/st21nfcb/Makefile b/drivers/nfc/st21nfcb/Makefile
new file mode 100644
index 000000000000..13d9f03b2fea
--- /dev/null
+++ b/drivers/nfc/st21nfcb/Makefile
@@ -0,0 +1,8 @@
+#
+# Makefile for ST21NFCB NCI based NFC driver
+#
+
+st21nfcb_i2c-objs  = i2c.o
+
+obj-$(CONFIG_NFC_ST21NFCB)     += st21nfcb.o ndlc.o
+obj-$(CONFIG_NFC_ST21NFCB_I2C) += st21nfcb_i2c.o
diff --git a/drivers/nfc/st21nfcb/i2c.c b/drivers/nfc/st21nfcb/i2c.c
new file mode 100644
index 000000000000..0f690baaef7a
--- /dev/null
+++ b/drivers/nfc/st21nfcb/i2c.c
@@ -0,0 +1,462 @@
+/*
+ * I2C Link Layer for ST21NFCB NCI based Driver
+ * Copyright (C) 2014  STMicroelectronics SAS. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/crc-ccitt.h>
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/gpio.h>
+#include <linux/of_irq.h>
+#include <linux/of_gpio.h>
+#include <linux/miscdevice.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/nfc.h>
+#include <linux/firmware.h>
+#include <linux/unaligned/access_ok.h>
+#include <linux/platform_data/st21nfcb.h>
+
+#include <net/nfc/nci.h>
+#include <net/nfc/llc.h>
+#include <net/nfc/nfc.h>
+
+#include "ndlc.h"
+
+#define DRIVER_DESC "NCI NFC driver for ST21NFCB"
+
+/* ndlc header */
+#define ST21NFCB_FRAME_HEADROOM	1
+#define ST21NFCB_FRAME_TAILROOM 0
+
+#define ST21NFCB_NCI_I2C_MIN_SIZE 4   /* PCB(1) + NCI Packet header(3) */
+#define ST21NFCB_NCI_I2C_MAX_SIZE 250 /* req 4.2.1 */
+
+#define ST21NFCB_NCI_I2C_DRIVER_NAME "st21nfcb_nci_i2c"
+
+static struct i2c_device_id st21nfcb_nci_i2c_id_table[] = {
+	{ST21NFCB_NCI_DRIVER_NAME, 0},
+	{}
+};
+MODULE_DEVICE_TABLE(i2c, st21nfcb_nci_i2c_id_table);
+
+struct st21nfcb_i2c_phy {
+	struct i2c_client *i2c_dev;
+	struct llt_ndlc *ndlc;
+
+	unsigned int gpio_irq;
+	unsigned int gpio_reset;
+	unsigned int irq_polarity;
+
+	int powered;
+
+	/*
+	 * < 0 if hardware error occured (e.g. i2c err)
+	 * and prevents normal operation.
+	 */
+	int hard_fault;
+};
+
+#define I2C_DUMP_SKB(info, skb)					\
+do {								\
+	pr_debug("%s:\n", info);				\
+	print_hex_dump(KERN_DEBUG, "i2c: ", DUMP_PREFIX_OFFSET,	\
+		       16, 1, (skb)->data, (skb)->len, 0);	\
+} while (0)
+
+static int st21nfcb_nci_i2c_enable(void *phy_id)
+{
+	struct st21nfcb_i2c_phy *phy = phy_id;
+
+	gpio_set_value(phy->gpio_reset, 0);
+	usleep_range(10000, 15000);
+	gpio_set_value(phy->gpio_reset, 1);
+	phy->powered = 1;
+	usleep_range(80000, 85000);
+
+	return 0;
+}
+
+static void st21nfcb_nci_i2c_disable(void *phy_id)
+{
+	struct st21nfcb_i2c_phy *phy = phy_id;
+
+	pr_info("\n");
+
+	phy->powered = 0;
+	/* reset chip in order to flush clf */
+	gpio_set_value(phy->gpio_reset, 0);
+	usleep_range(10000, 15000);
+	gpio_set_value(phy->gpio_reset, 1);
+}
+
+static void st21nfcb_nci_remove_header(struct sk_buff *skb)
+{
+	skb_pull(skb, ST21NFCB_FRAME_HEADROOM);
+}
+
+/*
+ * Writing a frame must not return the number of written bytes.
+ * It must return either zero for success, or <0 for error.
+ * In addition, it must not alter the skb
+ */
+static int st21nfcb_nci_i2c_write(void *phy_id, struct sk_buff *skb)
+{
+	int r = -1;
+	struct st21nfcb_i2c_phy *phy = phy_id;
+	struct i2c_client *client = phy->i2c_dev;
+
+	I2C_DUMP_SKB("st21nfcb_nci_i2c_write", skb);
+
+	if (phy->hard_fault != 0)
+		return phy->hard_fault;
+
+	r = i2c_master_send(client, skb->data, skb->len);
+	if (r == -EREMOTEIO) {  /* Retry, chip was in standby */
+		usleep_range(1000, 4000);
+		r = i2c_master_send(client, skb->data, skb->len);
+	}
+
+	if (r >= 0) {
+		if (r != skb->len)
+			r = -EREMOTEIO;
+		else
+			r = 0;
+	}
+
+	st21nfcb_nci_remove_header(skb);
+
+	return r;
+}
+
+/*
+ * Reads an ndlc frame and returns it in a newly allocated sk_buff.
+ * returns:
+ * frame size : if received frame is complete (find ST21NFCB_SOF_EOF at
+ * end of read)
+ * -EAGAIN : if received frame is incomplete (not find ST21NFCB_SOF_EOF
+ * at end of read)
+ * -EREMOTEIO : i2c read error (fatal)
+ * -EBADMSG : frame was incorrect and discarded
+ * (value returned from st21nfcb_nci_i2c_repack)
+ * -EIO : if no ST21NFCB_SOF_EOF is found after reaching
+ * the read length end sequence
+ */
+static int st21nfcb_nci_i2c_read(struct st21nfcb_i2c_phy *phy,
+				 struct sk_buff **skb)
+{
+	int r;
+	u8 len;
+	u8 buf[ST21NFCB_NCI_I2C_MAX_SIZE];
+	struct i2c_client *client = phy->i2c_dev;
+
+	r = i2c_master_recv(client, buf, 4);
+	if (r == -EREMOTEIO) {  /* Retry, chip was in standby */
+		usleep_range(1000, 4000);
+		r = i2c_master_recv(client, buf, 4);
+	} else if (r != 4) {
+		nfc_err(&client->dev, "cannot read ndlc & nci header\n");
+		return -EREMOTEIO;
+	}
+
+	len = be16_to_cpu(*(__be16 *) (buf + 2));
+	if (len > ST21NFCB_NCI_I2C_MAX_SIZE) {
+		nfc_err(&client->dev, "invalid frame len\n");
+		return -EBADMSG;
+	}
+
+	*skb = alloc_skb(4 + len, GFP_KERNEL);
+	if (*skb == NULL)
+		return -ENOMEM;
+
+	skb_reserve(*skb, 4);
+	skb_put(*skb, 4);
+	memcpy((*skb)->data, buf, 4);
+
+	if (!len)
+		return 0;
+
+	r = i2c_master_recv(client, buf, len);
+	if (r != len) {
+		kfree_skb(*skb);
+		return -EREMOTEIO;
+	}
+
+	skb_put(*skb, len);
+	memcpy((*skb)->data + 4, buf, len);
+
+	I2C_DUMP_SKB("i2c frame read", *skb);
+
+	return 0;
+}
+
+/*
+ * Reads an ndlc frame from the chip.
+ *
+ * On ST21NFCB, IRQ goes in idle state when read starts.
+ */
+static irqreturn_t st21nfcb_nci_irq_thread_fn(int irq, void *phy_id)
+{
+	struct st21nfcb_i2c_phy *phy = phy_id;
+	struct i2c_client *client;
+	struct sk_buff *skb = NULL;
+	int r;
+
+	if (!phy || irq != phy->i2c_dev->irq) {
+		WARN_ON_ONCE(1);
+		return IRQ_NONE;
+	}
+
+	client = phy->i2c_dev;
+	dev_dbg(&client->dev, "IRQ\n");
+
+	if (phy->hard_fault)
+		return IRQ_HANDLED;
+
+	if (!phy->powered) {
+		st21nfcb_nci_i2c_disable(phy);
+		return IRQ_HANDLED;
+	}
+
+	r = st21nfcb_nci_i2c_read(phy, &skb);
+	if (r == -EREMOTEIO) {
+		phy->hard_fault = r;
+                ndlc_recv(phy->ndlc, NULL);
+		return IRQ_HANDLED;
+	} else if (r == -ENOMEM || r == -EBADMSG) {
+		return IRQ_HANDLED;
+	}
+
+	ndlc_recv(phy->ndlc, skb);
+
+	return IRQ_HANDLED;
+}
+
+static struct nfc_phy_ops i2c_phy_ops = {
+	.write = st21nfcb_nci_i2c_write,
+	.enable = st21nfcb_nci_i2c_enable,
+	.disable = st21nfcb_nci_i2c_disable,
+};
+
+#ifdef CONFIG_OF
+static int st21nfcb_nci_i2c_of_request_resources(struct i2c_client *client)
+{
+	struct st21nfcb_i2c_phy *phy = i2c_get_clientdata(client);
+	struct device_node *pp;
+	int gpio;
+	int r;
+
+	pp = client->dev.of_node;
+	if (!pp)
+		return -ENODEV;
+
+	/* Get GPIO from device tree */
+	gpio = of_get_named_gpio(pp, "reset-gpios", 0);
+	if (gpio < 0) {
+		nfc_err(&client->dev,
+			"Failed to retrieve reset-gpios from device tree\n");
+		return gpio;
+	}
+
+	/* GPIO request and configuration */
+	r = devm_gpio_request(&client->dev, gpio, "clf_reset");
+	if (r) {
+		nfc_err(&client->dev, "Failed to request reset pin\n");
+		return -ENODEV;
+	}
+
+	r = gpio_direction_output(gpio, 1);
+	if (r) {
+		nfc_err(&client->dev,
+			"Failed to set reset pin direction as output\n");
+		return -ENODEV;
+	}
+	phy->gpio_reset = gpio;
+
+	/* IRQ */
+	r = irq_of_parse_and_map(pp, 0);
+	if (r < 0) {
+		nfc_err(&client->dev,
+				"Unable to get irq, error: %d\n", r);
+		return r;
+	}
+
+	phy->irq_polarity = irq_get_trigger_type(r);
+	client->irq = r;
+
+	return 0;
+}
+#else
+static int st21nfcb_nci_i2c_of_request_resources(struct i2c_client *client)
+{
+	return -ENODEV;
+}
+#endif
+
+static int st21nfcb_nci_i2c_request_resources(struct i2c_client *client)
+{
+	struct st21nfcb_nfc_platform_data *pdata;
+	struct st21nfcb_i2c_phy *phy = i2c_get_clientdata(client);
+	int r;
+	int irq;
+
+	pdata = client->dev.platform_data;
+	if (pdata == NULL) {
+		nfc_err(&client->dev, "No platform data\n");
+		return -EINVAL;
+	}
+
+	/* store for later use */
+	phy->gpio_irq = pdata->gpio_irq;
+	phy->gpio_reset = pdata->gpio_reset;
+	phy->irq_polarity = pdata->irq_polarity;
+
+	r = devm_gpio_request(&client->dev, phy->gpio_irq, "wake_up");
+	if (r) {
+		pr_err("%s : gpio_request failed\n", __FILE__);
+		return -ENODEV;
+	}
+
+	r = gpio_direction_input(phy->gpio_irq);
+	if (r) {
+		pr_err("%s : gpio_direction_input failed\n", __FILE__);
+		return -ENODEV;
+	}
+
+	r = devm_gpio_request(&client->dev,
+			      phy->gpio_reset, "clf_reset");
+	if (r) {
+		pr_err("%s : reset gpio_request failed\n", __FILE__);
+		return -ENODEV;
+	}
+
+	r = gpio_direction_output(phy->gpio_reset, 1);
+	if (r) {
+		pr_err("%s : reset gpio_direction_output failed\n",
+			__FILE__);
+		return -ENODEV;
+	}
+
+	/* IRQ */
+	irq = gpio_to_irq(phy->gpio_irq);
+	if (irq < 0) {
+		nfc_err(&client->dev,
+			"Unable to get irq number for GPIO %d error %d\n",
+			phy->gpio_irq, r);
+		return -ENODEV;
+	}
+	client->irq = irq;
+
+	return 0;
+}
+
+static int st21nfcb_nci_i2c_probe(struct i2c_client *client,
+				  const struct i2c_device_id *id)
+{
+	struct st21nfcb_i2c_phy *phy;
+	struct st21nfcb_nfc_platform_data *pdata;
+	int r;
+
+	dev_dbg(&client->dev, "%s\n", __func__);
+	dev_dbg(&client->dev, "IRQ: %d\n", client->irq);
+
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
+		nfc_err(&client->dev, "Need I2C_FUNC_I2C\n");
+		return -ENODEV;
+	}
+
+	phy = devm_kzalloc(&client->dev, sizeof(struct st21nfcb_i2c_phy),
+			   GFP_KERNEL);
+	if (!phy) {
+		nfc_err(&client->dev,
+			"Cannot allocate memory for st21nfcb i2c phy.\n");
+		return -ENOMEM;
+	}
+
+	phy->i2c_dev = client;
+
+	i2c_set_clientdata(client, phy);
+
+	pdata = client->dev.platform_data;
+	if (!pdata && client->dev.of_node) {
+		r = st21nfcb_nci_i2c_of_request_resources(client);
+		if (r) {
+			nfc_err(&client->dev, "No platform data\n");
+			return r;
+		}
+	} else if (pdata) {
+		r = st21nfcb_nci_i2c_request_resources(client);
+		if (r) {
+			nfc_err(&client->dev,
+				"Cannot get platform resources\n");
+			return r;
+		}
+	} else {
+		nfc_err(&client->dev,
+			"st21nfcb platform resources not available\n");
+		return -ENODEV;
+	}
+
+	r = devm_request_threaded_irq(&client->dev, client->irq, NULL,
+				st21nfcb_nci_irq_thread_fn,
+				phy->irq_polarity | IRQF_ONESHOT,
+				ST21NFCB_NCI_DRIVER_NAME, phy);
+	if (r < 0) {
+		nfc_err(&client->dev, "Unable to register IRQ handler\n");
+		return r;
+	}
+
+	return ndlc_probe(phy, &i2c_phy_ops, &client->dev,
+			ST21NFCB_FRAME_HEADROOM, ST21NFCB_FRAME_TAILROOM,
+			&phy->ndlc);
+}
+
+static int st21nfcb_nci_i2c_remove(struct i2c_client *client)
+{
+	struct st21nfcb_i2c_phy *phy = i2c_get_clientdata(client);
+
+	dev_dbg(&client->dev, "%s\n", __func__);
+
+	ndlc_remove(phy->ndlc);
+
+	if (phy->powered)
+		st21nfcb_nci_i2c_disable(phy);
+
+	return 0;
+}
+
+static const struct of_device_id of_st21nfcb_i2c_match[] = {
+	{ .compatible = "st,st21nfcb_i2c", },
+	{}
+};
+
+static struct i2c_driver st21nfcb_nci_i2c_driver = {
+	.driver = {
+		.owner = THIS_MODULE,
+		.name = ST21NFCB_NCI_I2C_DRIVER_NAME,
+		.owner = THIS_MODULE,
+		.of_match_table = of_match_ptr(of_st21nfcb_i2c_match),
+	},
+	.probe = st21nfcb_nci_i2c_probe,
+	.id_table = st21nfcb_nci_i2c_id_table,
+	.remove = st21nfcb_nci_i2c_remove,
+};
+
+module_i2c_driver(st21nfcb_nci_i2c_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION(DRIVER_DESC);
diff --git a/drivers/nfc/st21nfcb/ndlc.c b/drivers/nfc/st21nfcb/ndlc.c
new file mode 100644
index 000000000000..83c97c36112b
--- /dev/null
+++ b/drivers/nfc/st21nfcb/ndlc.c
@@ -0,0 +1,298 @@
+/*
+ * Low Level Transport (NDLC) Driver for STMicroelectronics NFC Chip
+ *
+ * Copyright (C) 2014  STMicroelectronics SAS. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/sched.h>
+#include <net/nfc/nci_core.h>
+
+#include "ndlc.h"
+#include "st21nfcb.h"
+
+#define NDLC_TIMER_T1		100
+#define NDLC_TIMER_T1_WAIT	400
+#define NDLC_TIMER_T2		1200
+
+#define PCB_TYPE_DATAFRAME		0x80
+#define PCB_TYPE_SUPERVISOR		0xc0
+#define PCB_TYPE_MASK			PCB_TYPE_SUPERVISOR
+
+#define PCB_SYNC_ACK			0x20
+#define PCB_SYNC_NACK			0x10
+#define PCB_SYNC_WAIT			0x30
+#define PCB_SYNC_NOINFO			0x00
+#define PCB_SYNC_MASK			PCB_SYNC_WAIT
+
+#define PCB_DATAFRAME_RETRANSMIT_YES	0x00
+#define PCB_DATAFRAME_RETRANSMIT_NO	0x04
+#define PCB_DATAFRAME_RETRANSMIT_MASK	PCB_DATAFRAME_RETRANSMIT_NO
+
+#define PCB_SUPERVISOR_RETRANSMIT_YES	0x00
+#define PCB_SUPERVISOR_RETRANSMIT_NO	0x02
+#define PCB_SUPERVISOR_RETRANSMIT_MASK	PCB_SUPERVISOR_RETRANSMIT_NO
+
+#define PCB_FRAME_CRC_INFO_PRESENT	0x08
+#define PCB_FRAME_CRC_INFO_NOTPRESENT	0x00
+#define PCB_FRAME_CRC_INFO_MASK		PCB_FRAME_CRC_INFO_PRESENT
+
+#define NDLC_DUMP_SKB(info, skb)                                 \
+do {                                                             \
+	pr_debug("%s:\n", info);                                 \
+	print_hex_dump(KERN_DEBUG, "ndlc: ", DUMP_PREFIX_OFFSET, \
+			16, 1, skb->data, skb->len, 0);          \
+} while (0)
+
+int ndlc_open(struct llt_ndlc *ndlc)
+{
+	/* toggle reset pin */
+	ndlc->ops->enable(ndlc->phy_id);
+	return 0;
+}
+EXPORT_SYMBOL(ndlc_open);
+
+void ndlc_close(struct llt_ndlc *ndlc)
+{
+	/* toggle reset pin */
+	ndlc->ops->disable(ndlc->phy_id);
+}
+EXPORT_SYMBOL(ndlc_close);
+
+int ndlc_send(struct llt_ndlc *ndlc, struct sk_buff *skb)
+{
+	/* add ndlc header */
+	u8 pcb = PCB_TYPE_DATAFRAME | PCB_DATAFRAME_RETRANSMIT_NO |
+		PCB_FRAME_CRC_INFO_NOTPRESENT;
+
+	*skb_push(skb, 1) = pcb;
+	skb_queue_tail(&ndlc->send_q, skb);
+
+	schedule_work(&ndlc->sm_work);
+
+	return 0;
+}
+EXPORT_SYMBOL(ndlc_send);
+
+static void llt_ndlc_send_queue(struct llt_ndlc *ndlc)
+{
+	struct sk_buff *skb;
+	int r;
+	unsigned long time_sent;
+
+	if (ndlc->send_q.qlen)
+		pr_debug("sendQlen=%d unackQlen=%d\n",
+			 ndlc->send_q.qlen, ndlc->ack_pending_q.qlen);
+
+	while (ndlc->send_q.qlen) {
+		skb = skb_dequeue(&ndlc->send_q);
+		NDLC_DUMP_SKB("ndlc frame written", skb);
+		r = ndlc->ops->write(ndlc->phy_id, skb);
+		if (r < 0) {
+			ndlc->hard_fault = r;
+			break;
+		}
+		time_sent = jiffies;
+		*(unsigned long *)skb->cb = time_sent;
+
+		skb_queue_tail(&ndlc->ack_pending_q, skb);
+
+		/* start timer t1 for ndlc aknowledge */
+		ndlc->t1_active = true;
+		mod_timer(&ndlc->t1_timer, time_sent +
+			msecs_to_jiffies(NDLC_TIMER_T1));
+	}
+}
+
+static void llt_ndlc_requeue_data_pending(struct llt_ndlc *ndlc)
+{
+	struct sk_buff *skb;
+	u8 pcb;
+
+	while ((skb = skb_dequeue_tail(&ndlc->ack_pending_q))) {
+		pcb = skb->data[0];
+		switch (pcb & PCB_TYPE_MASK) {
+		case PCB_TYPE_SUPERVISOR:
+			skb->data[0] = (pcb & ~PCB_SUPERVISOR_RETRANSMIT_MASK) |
+				PCB_SUPERVISOR_RETRANSMIT_YES;
+			break;
+		case PCB_TYPE_DATAFRAME:
+			skb->data[0] = (pcb & ~PCB_DATAFRAME_RETRANSMIT_MASK) |
+				PCB_DATAFRAME_RETRANSMIT_YES;
+			break;
+		default:
+			pr_err("UNKNOWN Packet Control Byte=%d\n", pcb);
+			kfree_skb(skb);
+			break;
+		}
+		skb_queue_head(&ndlc->send_q, skb);
+	}
+}
+
+static void llt_ndlc_rcv_queue(struct llt_ndlc *ndlc)
+{
+	struct sk_buff *skb;
+	u8 pcb;
+	unsigned long time_sent;
+
+	if (ndlc->rcv_q.qlen)
+		pr_debug("rcvQlen=%d\n", ndlc->rcv_q.qlen);
+
+	while ((skb = skb_dequeue(&ndlc->rcv_q)) != NULL) {
+		pcb = skb->data[0];
+		skb_pull(skb, 1);
+		if ((pcb & PCB_TYPE_MASK) == PCB_TYPE_SUPERVISOR) {
+			switch (pcb & PCB_SYNC_MASK) {
+			case PCB_SYNC_ACK:
+				del_timer_sync(&ndlc->t1_timer);
+				del_timer_sync(&ndlc->t2_timer);
+				ndlc->t2_active = false;
+				ndlc->t1_active = false;
+				break;
+			case PCB_SYNC_NACK:
+				llt_ndlc_requeue_data_pending(ndlc);
+				llt_ndlc_send_queue(ndlc);
+				/* start timer t1 for ndlc aknowledge */
+				time_sent = jiffies;
+				ndlc->t1_active = true;
+				mod_timer(&ndlc->t1_timer, time_sent +
+					msecs_to_jiffies(NDLC_TIMER_T1));
+				break;
+			case PCB_SYNC_WAIT:
+				time_sent = jiffies;
+				ndlc->t1_active = true;
+				mod_timer(&ndlc->t1_timer, time_sent +
+					  msecs_to_jiffies(NDLC_TIMER_T1_WAIT));
+				break;
+			default:
+				pr_err("UNKNOWN Packet Control Byte=%d\n", pcb);
+				kfree_skb(skb);
+				break;
+			}
+		} else {
+			nci_recv_frame(ndlc->ndev, skb);
+		}
+	}
+}
+
+static void llt_ndlc_sm_work(struct work_struct *work)
+{
+	struct llt_ndlc *ndlc = container_of(work, struct llt_ndlc, sm_work);
+
+	llt_ndlc_send_queue(ndlc);
+	llt_ndlc_rcv_queue(ndlc);
+
+	if (ndlc->t1_active && timer_pending(&ndlc->t1_timer) == 0) {
+		pr_debug
+		    ("Handle T1(recv SUPERVISOR) elapsed (T1 now inactive)\n");
+		ndlc->t1_active = false;
+
+		llt_ndlc_requeue_data_pending(ndlc);
+		llt_ndlc_send_queue(ndlc);
+	}
+
+	if (ndlc->t2_active && timer_pending(&ndlc->t2_timer) == 0) {
+		pr_debug("Handle T2(recv DATA) elapsed (T2 now inactive)\n");
+		ndlc->t2_active = false;
+		ndlc->t1_active = false;
+		del_timer_sync(&ndlc->t1_timer);
+
+		ndlc_close(ndlc);
+		ndlc->hard_fault = -EREMOTEIO;
+	}
+}
+
+void ndlc_recv(struct llt_ndlc *ndlc, struct sk_buff *skb)
+{
+	if (skb == NULL) {
+		pr_err("NULL Frame -> link is dead\n");
+		ndlc->hard_fault = -EREMOTEIO;
+		ndlc_close(ndlc);
+	} else {
+		NDLC_DUMP_SKB("incoming frame", skb);
+		skb_queue_tail(&ndlc->rcv_q, skb);
+	}
+
+	schedule_work(&ndlc->sm_work);
+}
+EXPORT_SYMBOL(ndlc_recv);
+
+static void ndlc_t1_timeout(unsigned long data)
+{
+	struct llt_ndlc *ndlc = (struct llt_ndlc *)data;
+
+	pr_debug("\n");
+
+	schedule_work(&ndlc->sm_work);
+}
+
+static void ndlc_t2_timeout(unsigned long data)
+{
+	struct llt_ndlc *ndlc = (struct llt_ndlc *)data;
+
+	pr_debug("\n");
+
+	schedule_work(&ndlc->sm_work);
+}
+
+int ndlc_probe(void *phy_id, struct nfc_phy_ops *phy_ops, struct device *dev,
+	       int phy_headroom, int phy_tailroom, struct llt_ndlc **ndlc_id)
+{
+	struct llt_ndlc *ndlc;
+
+	ndlc = devm_kzalloc(dev, sizeof(struct llt_ndlc), GFP_KERNEL);
+	if (!ndlc) {
+		nfc_err(dev, "Cannot allocate memory for ndlc.\n");
+		return -ENOMEM;
+	}
+	ndlc->ops = phy_ops;
+	ndlc->phy_id = phy_id;
+	ndlc->dev = dev;
+
+	*ndlc_id = ndlc;
+
+	/* start timers */
+	init_timer(&ndlc->t1_timer);
+	ndlc->t1_timer.data = (unsigned long)ndlc;
+	ndlc->t1_timer.function = ndlc_t1_timeout;
+
+	init_timer(&ndlc->t2_timer);
+	ndlc->t2_timer.data = (unsigned long)ndlc;
+	ndlc->t2_timer.function = ndlc_t2_timeout;
+
+	skb_queue_head_init(&ndlc->rcv_q);
+	skb_queue_head_init(&ndlc->send_q);
+	skb_queue_head_init(&ndlc->ack_pending_q);
+
+	INIT_WORK(&ndlc->sm_work, llt_ndlc_sm_work);
+
+	return st21nfcb_nci_probe(ndlc, phy_headroom, phy_tailroom);
+}
+EXPORT_SYMBOL(ndlc_probe);
+
+void ndlc_remove(struct llt_ndlc *ndlc)
+{
+	/* cancel timers */
+	del_timer_sync(&ndlc->t1_timer);
+	del_timer_sync(&ndlc->t2_timer);
+	ndlc->t2_active = false;
+	ndlc->t1_active = false;
+
+	skb_queue_purge(&ndlc->rcv_q);
+	skb_queue_purge(&ndlc->send_q);
+
+	st21nfcb_nci_remove(ndlc->ndev);
+	kfree(ndlc);
+}
+EXPORT_SYMBOL(ndlc_remove);
diff --git a/drivers/nfc/st21nfcb/ndlc.h b/drivers/nfc/st21nfcb/ndlc.h
new file mode 100644
index 000000000000..c30a2f0faa5f
--- /dev/null
+++ b/drivers/nfc/st21nfcb/ndlc.h
@@ -0,0 +1,55 @@
+/*
+ * NCI based Driver for STMicroelectronics NFC Chip
+ *
+ * Copyright (C) 2014  STMicroelectronics SAS. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __LOCAL_NDLC_H_
+#define __LOCAL_NDLC_H_
+
+#include <linux/skbuff.h>
+#include <net/nfc/nfc.h>
+
+/* Low Level Transport description */
+struct llt_ndlc {
+	struct nci_dev *ndev;
+	struct nfc_phy_ops *ops;
+	void *phy_id;
+
+	struct timer_list t1_timer;
+	bool t1_active;
+
+	struct timer_list t2_timer;
+	bool t2_active;
+
+	struct sk_buff_head rcv_q;
+	struct sk_buff_head send_q;
+	struct sk_buff_head ack_pending_q;
+
+	struct work_struct sm_work;
+
+	struct device *dev;
+
+	int hard_fault;
+};
+
+int ndlc_open(struct llt_ndlc *ndlc);
+void ndlc_close(struct llt_ndlc *ndlc);
+int ndlc_send(struct llt_ndlc *ndlc, struct sk_buff *skb);
+void ndlc_recv(struct llt_ndlc *ndlc, struct sk_buff *skb);
+int ndlc_probe(void *phy_id, struct nfc_phy_ops *phy_ops, struct device *dev,
+	int phy_headroom, int phy_tailroom, struct llt_ndlc **ndlc_id);
+void ndlc_remove(struct llt_ndlc *ndlc);
+#endif /* __LOCAL_NDLC_H__ */
diff --git a/drivers/nfc/st21nfcb/st21nfcb.c b/drivers/nfc/st21nfcb/st21nfcb.c
new file mode 100644
index 000000000000..4d95863e3063
--- /dev/null
+++ b/drivers/nfc/st21nfcb/st21nfcb.c
@@ -0,0 +1,129 @@
+/*
+ * NCI based Driver for STMicroelectronics NFC Chip
+ *
+ * Copyright (C) 2014  STMicroelectronics SAS. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/module.h>
+#include <linux/nfc.h>
+#include <net/nfc/nci.h>
+#include <net/nfc/nci_core.h>
+
+#include "st21nfcb.h"
+#include "ndlc.h"
+
+#define DRIVER_DESC "NCI NFC driver for ST21NFCB"
+
+static int st21nfcb_nci_open(struct nci_dev *ndev)
+{
+	struct st21nfcb_nci_info *info = nci_get_drvdata(ndev);
+	int r;
+
+	if (test_and_set_bit(ST21NFCB_NCI_RUNNING, &info->flags))
+		return 0;
+
+	r = ndlc_open(info->ndlc);
+	if (r)
+		clear_bit(ST21NFCB_NCI_RUNNING, &info->flags);
+
+	return r;
+}
+
+static int st21nfcb_nci_close(struct nci_dev *ndev)
+{
+	struct st21nfcb_nci_info *info = nci_get_drvdata(ndev);
+
+	if (!test_and_clear_bit(ST21NFCB_NCI_RUNNING, &info->flags))
+		return 0;
+
+	ndlc_close(info->ndlc);
+
+	return 0;
+}
+
+static int st21nfcb_nci_send(struct nci_dev *ndev, struct sk_buff *skb)
+{
+	struct st21nfcb_nci_info *info = nci_get_drvdata(ndev);
+
+	skb->dev = (void *)ndev;
+
+	if (!test_bit(ST21NFCB_NCI_RUNNING, &info->flags))
+		return -EBUSY;
+
+	return ndlc_send(info->ndlc, skb);
+}
+
+static struct nci_ops st21nfcb_nci_ops = {
+	.open = st21nfcb_nci_open,
+	.close = st21nfcb_nci_close,
+	.send = st21nfcb_nci_send,
+};
+
+int st21nfcb_nci_probe(struct llt_ndlc *ndlc, int phy_headroom,
+		       int phy_tailroom)
+{
+	struct st21nfcb_nci_info *info;
+	int r;
+	u32 protocols;
+
+	info = devm_kzalloc(ndlc->dev,
+			sizeof(struct st21nfcb_nci_info), GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+
+	protocols = NFC_PROTO_JEWEL_MASK
+		| NFC_PROTO_MIFARE_MASK
+		| NFC_PROTO_FELICA_MASK
+		| NFC_PROTO_ISO14443_MASK
+		| NFC_PROTO_ISO14443_B_MASK
+		| NFC_PROTO_NFC_DEP_MASK;
+
+	ndlc->ndev = nci_allocate_device(&st21nfcb_nci_ops, protocols,
+					phy_headroom, phy_tailroom);
+	if (!ndlc->ndev) {
+		pr_err("Cannot allocate nfc ndev\n");
+		r = -ENOMEM;
+		goto err_alloc_ndev;
+	}
+	info->ndlc = ndlc;
+
+	nci_set_drvdata(ndlc->ndev, info);
+
+	r = nci_register_device(ndlc->ndev);
+	if (r)
+		goto err_regdev;
+
+	return r;
+err_regdev:
+	nci_free_device(ndlc->ndev);
+
+err_alloc_ndev:
+	kfree(info);
+	return r;
+}
+EXPORT_SYMBOL_GPL(st21nfcb_nci_probe);
+
+void st21nfcb_nci_remove(struct nci_dev *ndev)
+{
+	struct st21nfcb_nci_info *info = nci_get_drvdata(ndev);
+
+	nci_unregister_device(ndev);
+	nci_free_device(ndev);
+	kfree(info);
+}
+EXPORT_SYMBOL_GPL(st21nfcb_nci_remove);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION(DRIVER_DESC);
diff --git a/drivers/nfc/st21nfcb/st21nfcb.h b/drivers/nfc/st21nfcb/st21nfcb.h
new file mode 100644
index 000000000000..4bbbebb9f34d
--- /dev/null
+++ b/drivers/nfc/st21nfcb/st21nfcb.h
@@ -0,0 +1,38 @@
+/*
+ * NCI based Driver for STMicroelectronics NFC Chip
+ *
+ * Copyright (C) 2014  STMicroelectronics SAS. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __LOCAL_ST21NFCB_H_
+#define __LOCAL_ST21NFCB_H_
+
+#include <net/nfc/nci_core.h>
+
+#include "ndlc.h"
+
+/* Define private flags: */
+#define ST21NFCB_NCI_RUNNING			1
+
+struct st21nfcb_nci_info {
+	struct llt_ndlc *ndlc;
+	unsigned long flags;
+};
+
+void st21nfcb_nci_remove(struct nci_dev *ndev);
+int st21nfcb_nci_probe(struct llt_ndlc *ndlc, int phy_headroom,
+		int phy_tailroom);
+
+#endif /* __LOCAL_ST21NFCB_H_ */
diff --git a/include/linux/platform_data/st21nfcb.h b/include/linux/platform_data/st21nfcb.h
new file mode 100644
index 000000000000..2a7b769c714d
--- /dev/null
+++ b/include/linux/platform_data/st21nfcb.h
@@ -0,0 +1,32 @@
+/*
+ * Driver include for the ST21NFCB NFC chip.
+ *
+ * Copyright (C) 2014  STMicroelectronics SAS. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _ST21NFCA_HCI_H_
+#define _ST21NFCA_HCI_H_
+
+#include <linux/i2c.h>
+
+#define ST21NFCB_NCI_DRIVER_NAME "st21nfcb_nci"
+
+struct st21nfcb_nfc_platform_data {
+	unsigned int gpio_irq;
+	unsigned int gpio_reset;
+	unsigned int irq_polarity;
+};
+
+#endif /* _ST21NFCA_HCI_H_ */
-- 
cgit v1.2.3-59-g8ed1b


From fb92ff78f85b6c1a6f1277f7dd04a3762ba725ef Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Sun, 25 May 2014 22:46:58 +0200
Subject: NFC: st21nfcb: few code clean up

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/nfc/st21nfcb/i2c.c             | 16 ++++++++--------
 include/linux/platform_data/st21nfcb.h |  4 ++--
 2 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/nfc/st21nfcb/i2c.c b/drivers/nfc/st21nfcb/i2c.c
index 0f690baaef7a..8af880ead5db 100644
--- a/drivers/nfc/st21nfcb/i2c.c
+++ b/drivers/nfc/st21nfcb/i2c.c
@@ -164,11 +164,11 @@ static int st21nfcb_nci_i2c_read(struct st21nfcb_i2c_phy *phy,
 	u8 buf[ST21NFCB_NCI_I2C_MAX_SIZE];
 	struct i2c_client *client = phy->i2c_dev;
 
-	r = i2c_master_recv(client, buf, 4);
+	r = i2c_master_recv(client, buf, ST21NFCB_NCI_I2C_MIN_SIZE);
 	if (r == -EREMOTEIO) {  /* Retry, chip was in standby */
 		usleep_range(1000, 4000);
-		r = i2c_master_recv(client, buf, 4);
-	} else if (r != 4) {
+		r = i2c_master_recv(client, buf, ST21NFCB_NCI_I2C_MIN_SIZE);
+	} else if (r != ST21NFCB_NCI_I2C_MIN_SIZE) {
 		nfc_err(&client->dev, "cannot read ndlc & nci header\n");
 		return -EREMOTEIO;
 	}
@@ -179,13 +179,13 @@ static int st21nfcb_nci_i2c_read(struct st21nfcb_i2c_phy *phy,
 		return -EBADMSG;
 	}
 
-	*skb = alloc_skb(4 + len, GFP_KERNEL);
+	*skb = alloc_skb(ST21NFCB_NCI_I2C_MIN_SIZE + len, GFP_KERNEL);
 	if (*skb == NULL)
 		return -ENOMEM;
 
-	skb_reserve(*skb, 4);
-	skb_put(*skb, 4);
-	memcpy((*skb)->data, buf, 4);
+	skb_reserve(*skb, ST21NFCB_NCI_I2C_MIN_SIZE);
+	skb_put(*skb, ST21NFCB_NCI_I2C_MIN_SIZE);
+	memcpy((*skb)->data, buf, ST21NFCB_NCI_I2C_MIN_SIZE);
 
 	if (!len)
 		return 0;
@@ -197,7 +197,7 @@ static int st21nfcb_nci_i2c_read(struct st21nfcb_i2c_phy *phy,
 	}
 
 	skb_put(*skb, len);
-	memcpy((*skb)->data + 4, buf, len);
+	memcpy((*skb)->data + ST21NFCB_NCI_I2C_MIN_SIZE, buf, len);
 
 	I2C_DUMP_SKB("i2c frame read", *skb);
 
diff --git a/include/linux/platform_data/st21nfcb.h b/include/linux/platform_data/st21nfcb.h
index 2a7b769c714d..2d11f1f5efab 100644
--- a/include/linux/platform_data/st21nfcb.h
+++ b/include/linux/platform_data/st21nfcb.h
@@ -16,8 +16,8 @@
  * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#ifndef _ST21NFCA_HCI_H_
-#define _ST21NFCA_HCI_H_
+#ifndef _ST21NFCB_NCI_H_
+#define _ST21NFCB_NCI_H_
 
 #include <linux/i2c.h>
 
-- 
cgit v1.2.3-59-g8ed1b


From 28cb5ef16e578bbca0a562b09f12c8c98ca92720 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 23 Jul 2014 01:00:36 +0200
Subject: PM: Create PM workqueue if runtime PM is not configured too

The PM workqueue is going to be used by ACPI PM notify handlers
regardless of whether or not runtime PM is configured, so move
it out of #ifdef CONFIG_PM_RUNTIME.

Do that in three places in the ACPI device PM code.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/pm_runtime.h | 11 +++++++++--
 kernel/power/main.c        |  4 ----
 2 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 43fd6716f662..367f49b9a1c9 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -24,11 +24,20 @@
 #define RPM_AUTO		0x08	/* Use autosuspend_delay */
 
 #ifdef CONFIG_PM
+extern struct workqueue_struct *pm_wq;
+
+static inline bool queue_pm_work(struct work_struct *work)
+{
+	return queue_work(pm_wq, work);
+}
+
 extern int pm_generic_runtime_suspend(struct device *dev);
 extern int pm_generic_runtime_resume(struct device *dev);
 extern int pm_runtime_force_suspend(struct device *dev);
 extern int pm_runtime_force_resume(struct device *dev);
 #else
+static inline bool queue_pm_work(struct work_struct *work) { return false; }
+
 static inline int pm_generic_runtime_suspend(struct device *dev) { return 0; }
 static inline int pm_generic_runtime_resume(struct device *dev) { return 0; }
 static inline int pm_runtime_force_suspend(struct device *dev) { return 0; }
@@ -37,8 +46,6 @@ static inline int pm_runtime_force_resume(struct device *dev) { return 0; }
 
 #ifdef CONFIG_PM_RUNTIME
 
-extern struct workqueue_struct *pm_wq;
-
 extern int __pm_runtime_idle(struct device *dev, int rpmflags);
 extern int __pm_runtime_suspend(struct device *dev, int rpmflags);
 extern int __pm_runtime_resume(struct device *dev, int rpmflags);
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 8e90f330f139..a18efed75fa7 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -615,7 +615,6 @@ static struct attribute_group attr_group = {
 	.attrs = g,
 };
 
-#ifdef CONFIG_PM_RUNTIME
 struct workqueue_struct *pm_wq;
 EXPORT_SYMBOL_GPL(pm_wq);
 
@@ -625,9 +624,6 @@ static int __init pm_start_workqueue(void)
 
 	return pm_wq ? 0 : -ENOMEM;
 }
-#else
-static inline int pm_start_workqueue(void) { return 0; }
-#endif
 
 static int __init pm_init(void)
 {
-- 
cgit v1.2.3-59-g8ed1b


From c072530f391e33bd22ed0638c08f07528f154493 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 23 Jul 2014 01:00:45 +0200
Subject: ACPI / PM: Revork the handling of ACPI device wakeup notifications

Since ACPI wakeup GPEs are going to be enabled during system suspend
as well as for runtime wakeup by a subsequent patch and the same
notify handlers will be used in both cases, rework the ACPI device
wakeup notification framework so that the part specific to physical
devices is always run asynchronously from the PM workqueue.  This
prevents runtime resume callbacks for those devices from being
run during system suspend and resume which may not be appropriate,
among other things.

Also make ACPI device wakeup notification handling a bit more robust
agaist subsequent removal of ACPI device objects, whould that ever
happen, and create a wakeup source object for each ACPI device
configured for wakeup so that wakeup notifications for those
devices can wake up the system from the "freeze" sleep state.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/device_pm.c | 80 +++++++++++++++++++++++++++++++++++-------------
 drivers/acpi/pci_root.c  |  2 +-
 drivers/pci/pci-acpi.c   | 60 ++++++++++++------------------------
 include/acpi/acpi_bus.h  | 21 ++++++++-----
 include/linux/pci-acpi.h | 13 +++++---
 5 files changed, 101 insertions(+), 75 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index 49a51277f81d..366de0b0c39b 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -367,29 +367,61 @@ EXPORT_SYMBOL(acpi_bus_power_manageable);
 #ifdef CONFIG_PM
 static DEFINE_MUTEX(acpi_pm_notifier_lock);
 
+static void acpi_pm_notify_handler(acpi_handle handle, u32 val, void *not_used)
+{
+	struct acpi_device *adev;
+
+	if (val != ACPI_NOTIFY_DEVICE_WAKE)
+		return;
+
+	adev = acpi_bus_get_acpi_device(handle);
+	if (!adev)
+		return;
+
+	mutex_lock(&acpi_pm_notifier_lock);
+
+	if (adev->wakeup.flags.notifier_present) {
+		__pm_wakeup_event(adev->wakeup.ws, 0);
+		if (adev->wakeup.context.work.func)
+			queue_pm_work(&adev->wakeup.context.work);
+	}
+
+	mutex_unlock(&acpi_pm_notifier_lock);
+
+	acpi_bus_put_acpi_device(adev);
+}
+
 /**
- * acpi_add_pm_notifier - Register PM notifier for given ACPI device.
- * @adev: ACPI device to add the notifier for.
- * @context: Context information to pass to the notifier routine.
+ * acpi_add_pm_notifier - Register PM notify handler for given ACPI device.
+ * @adev: ACPI device to add the notify handler for.
+ * @dev: Device to generate a wakeup event for while handling the notification.
+ * @work_func: Work function to execute when handling the notification.
  *
  * NOTE: @adev need not be a run-wake or wakeup device to be a valid source of
  * PM wakeup events.  For example, wakeup events may be generated for bridges
  * if one of the devices below the bridge is signaling wakeup, even if the
  * bridge itself doesn't have a wakeup GPE associated with it.
  */
-acpi_status acpi_add_pm_notifier(struct acpi_device *adev,
-				 acpi_notify_handler handler, void *context)
+acpi_status acpi_add_pm_notifier(struct acpi_device *adev, struct device *dev,
+				 void (*work_func)(struct work_struct *work))
 {
 	acpi_status status = AE_ALREADY_EXISTS;
 
+	if (!dev && !work_func)
+		return AE_BAD_PARAMETER;
+
 	mutex_lock(&acpi_pm_notifier_lock);
 
 	if (adev->wakeup.flags.notifier_present)
 		goto out;
 
-	status = acpi_install_notify_handler(adev->handle,
-					     ACPI_SYSTEM_NOTIFY,
-					     handler, context);
+	adev->wakeup.ws = wakeup_source_register(dev_name(&adev->dev));
+	adev->wakeup.context.dev = dev;
+	if (work_func)
+		INIT_WORK(&adev->wakeup.context.work, work_func);
+
+	status = acpi_install_notify_handler(adev->handle, ACPI_SYSTEM_NOTIFY,
+					     acpi_pm_notify_handler, NULL);
 	if (ACPI_FAILURE(status))
 		goto out;
 
@@ -404,8 +436,7 @@ acpi_status acpi_add_pm_notifier(struct acpi_device *adev,
  * acpi_remove_pm_notifier - Unregister PM notifier from given ACPI device.
  * @adev: ACPI device to remove the notifier from.
  */
-acpi_status acpi_remove_pm_notifier(struct acpi_device *adev,
-				    acpi_notify_handler handler)
+acpi_status acpi_remove_pm_notifier(struct acpi_device *adev)
 {
 	acpi_status status = AE_BAD_PARAMETER;
 
@@ -416,10 +447,17 @@ acpi_status acpi_remove_pm_notifier(struct acpi_device *adev,
 
 	status = acpi_remove_notify_handler(adev->handle,
 					    ACPI_SYSTEM_NOTIFY,
-					    handler);
+					    acpi_pm_notify_handler);
 	if (ACPI_FAILURE(status))
 		goto out;
 
+	if (adev->wakeup.context.work.func) {
+		cancel_work_sync(&adev->wakeup.context.work);
+		adev->wakeup.context.work.func = NULL;
+	}
+	adev->wakeup.context.dev = NULL;
+	wakeup_source_unregister(adev->wakeup.ws);
+
 	adev->wakeup.flags.notifier_present = false;
 
  out:
@@ -602,16 +640,15 @@ EXPORT_SYMBOL(acpi_pm_device_sleep_state);
 
 #ifdef CONFIG_PM_RUNTIME
 /**
- * acpi_wakeup_device - Wakeup notification handler for ACPI devices.
- * @handle: ACPI handle of the device the notification is for.
- * @event: Type of the signaled event.
- * @context: Device corresponding to @handle.
+ * acpi_pm_notify_work_func - ACPI devices wakeup notification work function.
+ * @work: Work item to handle.
  */
-static void acpi_wakeup_device(acpi_handle handle, u32 event, void *context)
+static void acpi_pm_notify_work_func(struct work_struct *work)
 {
-	struct device *dev = context;
+	struct device *dev;
 
-	if (event == ACPI_NOTIFY_DEVICE_WAKE && dev) {
+	dev = container_of(work, struct acpi_device_wakeup_context, work)->dev;
+	if (dev) {
 		pm_wakeup_event(dev, 0);
 		pm_runtime_resume(dev);
 	}
@@ -677,8 +714,7 @@ int acpi_pm_device_run_wake(struct device *phys_dev, bool enable)
 }
 EXPORT_SYMBOL(acpi_pm_device_run_wake);
 #else
-static inline void acpi_wakeup_device(acpi_handle handle, u32 event,
-				      void *context) {}
+static inline void acpi_pm_notify_work_func(struct work_struct *work) {}
 #endif /* CONFIG_PM_RUNTIME */
 
 #ifdef CONFIG_PM_SLEEP
@@ -1048,7 +1084,7 @@ int acpi_dev_pm_attach(struct device *dev, bool power_on)
 	if (dev->pm_domain)
 		return -EEXIST;
 
-	acpi_add_pm_notifier(adev, acpi_wakeup_device, dev);
+	acpi_add_pm_notifier(adev, dev, acpi_pm_notify_work_func);
 	dev->pm_domain = &acpi_general_pm_domain;
 	if (power_on) {
 		acpi_dev_pm_full_power(adev);
@@ -1076,7 +1112,7 @@ void acpi_dev_pm_detach(struct device *dev, bool power_off)
 
 	if (adev && dev->pm_domain == &acpi_general_pm_domain) {
 		dev->pm_domain = NULL;
-		acpi_remove_pm_notifier(adev, acpi_wakeup_device);
+		acpi_remove_pm_notifier(adev);
 		if (power_off) {
 			/*
 			 * If the device's PM QoS resume latency limit or flags
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index d388f13d48b4..e6ae603ed1a1 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -593,7 +593,7 @@ static int acpi_pci_root_add(struct acpi_device *device,
 	if (no_aspm)
 		pcie_no_aspm();
 
-	pci_acpi_add_bus_pm_notifier(device, root->bus);
+	pci_acpi_add_bus_pm_notifier(device);
 	if (device->wakeup.flags.run_wake)
 		device_set_run_wake(root->bus->bridge, true);
 
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index ca4927ba8433..7b8b2298840a 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -18,31 +18,31 @@
 #include "pci.h"
 
 /**
- * pci_acpi_wake_bus - Wake-up notification handler for root buses.
- * @handle: ACPI handle of a device the notification is for.
- * @event: Type of the signaled event.
- * @context: PCI root bus to wake up devices on.
+ * pci_acpi_wake_bus - Root bus wakeup notification fork function.
+ * @work: Work item to handle.
  */
-static void pci_acpi_wake_bus(acpi_handle handle, u32 event, void *context)
+static void pci_acpi_wake_bus(struct work_struct *work)
 {
-	struct pci_bus *pci_bus = context;
+	struct acpi_device *adev;
+	struct acpi_pci_root *root;
 
-	if (event == ACPI_NOTIFY_DEVICE_WAKE && pci_bus)
-		pci_pme_wakeup_bus(pci_bus);
+	adev = container_of(work, struct acpi_device, wakeup.context.work);
+	root = acpi_driver_data(adev);
+	pci_pme_wakeup_bus(root->bus);
 }
 
 /**
- * pci_acpi_wake_dev - Wake-up notification handler for PCI devices.
+ * pci_acpi_wake_dev - PCI device wakeup notification work function.
  * @handle: ACPI handle of a device the notification is for.
- * @event: Type of the signaled event.
- * @context: PCI device object to wake up.
+ * @work: Work item to handle.
  */
-static void pci_acpi_wake_dev(acpi_handle handle, u32 event, void *context)
+static void pci_acpi_wake_dev(struct work_struct *work)
 {
-	struct pci_dev *pci_dev = context;
+	struct acpi_device_wakeup_context *context;
+	struct pci_dev *pci_dev;
 
-	if (event != ACPI_NOTIFY_DEVICE_WAKE || !pci_dev)
-		return;
+	context = container_of(work, struct acpi_device_wakeup_context, work);
+	pci_dev = to_pci_dev(context->dev);
 
 	if (pci_dev->pme_poll)
 		pci_dev->pme_poll = false;
@@ -65,23 +65,12 @@ static void pci_acpi_wake_dev(acpi_handle handle, u32 event, void *context)
 }
 
 /**
- * pci_acpi_add_bus_pm_notifier - Register PM notifier for given PCI bus.
- * @dev: ACPI device to add the notifier for.
- * @pci_bus: PCI bus to walk checking for PME status if an event is signaled.
+ * pci_acpi_add_bus_pm_notifier - Register PM notifier for root PCI bus.
+ * @dev: PCI root bridge ACPI device.
  */
-acpi_status pci_acpi_add_bus_pm_notifier(struct acpi_device *dev,
-					 struct pci_bus *pci_bus)
+acpi_status pci_acpi_add_bus_pm_notifier(struct acpi_device *dev)
 {
-	return acpi_add_pm_notifier(dev, pci_acpi_wake_bus, pci_bus);
-}
-
-/**
- * pci_acpi_remove_bus_pm_notifier - Unregister PCI bus PM notifier.
- * @dev: ACPI device to remove the notifier from.
- */
-acpi_status pci_acpi_remove_bus_pm_notifier(struct acpi_device *dev)
-{
-	return acpi_remove_pm_notifier(dev, pci_acpi_wake_bus);
+	return acpi_add_pm_notifier(dev, NULL, pci_acpi_wake_bus);
 }
 
 /**
@@ -92,16 +81,7 @@ acpi_status pci_acpi_remove_bus_pm_notifier(struct acpi_device *dev)
 acpi_status pci_acpi_add_pm_notifier(struct acpi_device *dev,
 				     struct pci_dev *pci_dev)
 {
-	return acpi_add_pm_notifier(dev, pci_acpi_wake_dev, pci_dev);
-}
-
-/**
- * pci_acpi_remove_pm_notifier - Unregister PCI device PM notifier.
- * @dev: ACPI device to remove the notifier from.
- */
-acpi_status pci_acpi_remove_pm_notifier(struct acpi_device *dev)
-{
-	return acpi_remove_pm_notifier(dev, pci_acpi_wake_dev);
+	return acpi_add_pm_notifier(dev, &pci_dev->dev, pci_acpi_wake_dev);
 }
 
 phys_addr_t acpi_pci_root_get_mcfg_addr(acpi_handle handle)
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index b5714580801a..99780d46abb6 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -315,12 +315,19 @@ struct acpi_device_wakeup_flags {
 	u8 notifier_present:1;  /* Wake-up notify handler has been installed */
 };
 
+struct acpi_device_wakeup_context {
+	struct work_struct work;
+	struct device *dev;
+};
+
 struct acpi_device_wakeup {
 	acpi_handle gpe_device;
 	u64 gpe_number;
 	u64 sleep_state;
 	struct list_head resources;
 	struct acpi_device_wakeup_flags flags;
+	struct acpi_device_wakeup_context context;
+	struct wakeup_source *ws;
 	int prepare_count;
 };
 
@@ -510,20 +517,18 @@ int acpi_enable_wakeup_device_power(struct acpi_device *dev, int state);
 int acpi_disable_wakeup_device_power(struct acpi_device *dev);
 
 #ifdef CONFIG_PM
-acpi_status acpi_add_pm_notifier(struct acpi_device *adev,
-				 acpi_notify_handler handler, void *context);
-acpi_status acpi_remove_pm_notifier(struct acpi_device *adev,
-				    acpi_notify_handler handler);
+acpi_status acpi_add_pm_notifier(struct acpi_device *adev, struct device *dev,
+				 void (*work_func)(struct work_struct *work));
+acpi_status acpi_remove_pm_notifier(struct acpi_device *adev);
 int acpi_pm_device_sleep_state(struct device *, int *, int);
 #else
 static inline acpi_status acpi_add_pm_notifier(struct acpi_device *adev,
-					       acpi_notify_handler handler,
-					       void *context)
+					       struct device *dev,
+				               void (*work_func)(struct work_struct *work))
 {
 	return AE_SUPPORT;
 }
-static inline acpi_status acpi_remove_pm_notifier(struct acpi_device *adev,
-						  acpi_notify_handler handler)
+static inline acpi_status acpi_remove_pm_notifier(struct acpi_device *adev)
 {
 	return AE_SUPPORT;
 }
diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h
index 637a608ded0b..64dacb7288a6 100644
--- a/include/linux/pci-acpi.h
+++ b/include/linux/pci-acpi.h
@@ -11,12 +11,17 @@
 #include <linux/acpi.h>
 
 #ifdef CONFIG_ACPI
-extern acpi_status pci_acpi_add_bus_pm_notifier(struct acpi_device *dev,
-						 struct pci_bus *pci_bus);
-extern acpi_status pci_acpi_remove_bus_pm_notifier(struct acpi_device *dev);
+extern acpi_status pci_acpi_add_bus_pm_notifier(struct acpi_device *dev);
+static inline acpi_status pci_acpi_remove_bus_pm_notifier(struct acpi_device *dev)
+{
+	return acpi_remove_pm_notifier(dev);
+}
 extern acpi_status pci_acpi_add_pm_notifier(struct acpi_device *dev,
 					     struct pci_dev *pci_dev);
-extern acpi_status pci_acpi_remove_pm_notifier(struct acpi_device *dev);
+static inline acpi_status pci_acpi_remove_pm_notifier(struct acpi_device *dev)
+{
+	return acpi_remove_pm_notifier(dev);
+}
 extern phys_addr_t acpi_pci_root_get_mcfg_addr(acpi_handle handle);
 
 static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev)
-- 
cgit v1.2.3-59-g8ed1b


From 633adaba49d46dcaa4289de5b25c562b54ff575b Mon Sep 17 00:00:00 2001
From: Lv Zheng <lv.zheng@intel.com>
Date: Wed, 16 Jul 2014 16:58:30 +0800
Subject: ACPICA: Linux: Allow ACPICA inclusion for CONFIG_ACPI=n builds.

This patch moves <acpi/acpi.h> out of CONFIG_ACPI condition so that all
ACPICA prototypes can be seen by the CONFIG_ACPI=n Linux kernel builds.
Note that we can do this because ACPICA has implemented stubs for all
ACPICA prototypes that are currently referenced by the Linux kernel.

Signed-off-by: Lv Zheng <lv.zheng@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/acpi.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 358c01b971db..5320153c311b 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -29,17 +29,17 @@
 #include <linux/ioport.h>	/* for struct resource */
 #include <linux/device.h>
 
-#ifdef	CONFIG_ACPI
-
 #ifndef _LINUX
 #define _LINUX
 #endif
+#include <acpi/acpi.h>
+
+#ifdef	CONFIG_ACPI
 
 #include <linux/list.h>
 #include <linux/mod_devicetable.h>
 #include <linux/dynamic_debug.h>
 
-#include <acpi/acpi.h>
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 #include <acpi/acpi_numa.h>
-- 
cgit v1.2.3-59-g8ed1b


From f997ea54479e85076873a70fe53e66c9153e6f00 Mon Sep 17 00:00:00 2001
From: Lv Zheng <lv.zheng@intel.com>
Date: Wed, 16 Jul 2014 16:58:40 +0800
Subject: ACPI / SFI: Fix wrong <acpi/acpi.h> inclusion in SFI/ACPI wrapper -
 table definitions.

This patch removes <acpi/acpi.h> inclusions from <linux/sfi_acpi.h> as
<linux/acpi.h> has already included it for CONFIG_ACPI=n builds.

Cc: Len Brown <lenb@kernel.org>
Cc: sfi-devel@simplefirmware.org
Signed-off-by: Lv Zheng <lv.zheng@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/sfi_acpi.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sfi_acpi.h b/include/linux/sfi_acpi.h
index 4723bbfa1c26..a6e555cbe05c 100644
--- a/include/linux/sfi_acpi.h
+++ b/include/linux/sfi_acpi.h
@@ -63,8 +63,6 @@
 #include <linux/sfi.h>
 
 #ifdef CONFIG_SFI
-#include <acpi/acpi.h>	/* FIXME: inclusion should be removed */
-
 extern int sfi_acpi_table_parse(char *signature, char *oem_id,
 				char *oem_table_id,
 				int (*handler)(struct acpi_table_header *));
@@ -78,7 +76,6 @@ static inline int __init acpi_sfi_table_parse(char *signature,
 	return sfi_acpi_table_parse(signature, NULL, NULL, handler);
 }
 #else /* !CONFIG_SFI */
-
 static inline int sfi_acpi_table_parse(char *signature, char *oem_id,
 				char *oem_table_id,
 				int (*handler)(struct acpi_table_header *))
-- 
cgit v1.2.3-59-g8ed1b


From 914b881f9452fd615cc597b434fd8c0e12a7dae2 Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Thu, 22 May 2014 14:06:33 +0900
Subject: extcon: sm5502: Add support new SM5502 extcon device driver

This patch add new SM5502 MUIC(Micro-USB Interface Controller) device by using
EXTCON subsystem. The extcon-sm5502 driver is capable of identifying the type
of the external power source and attached accessory. An external power sources,
such as Deticated Charger or a standard USB port, are able to charge the battery
in the smart phone via the connector.

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/extcon/Kconfig         |  10 +
 drivers/extcon/Makefile        |   1 +
 drivers/extcon/extcon-sm5502.c | 620 +++++++++++++++++++++++++++++++++++++++++
 include/linux/extcon/sm5502.h  | 264 ++++++++++++++++++
 4 files changed, 895 insertions(+)
 create mode 100644 drivers/extcon/extcon-sm5502.c
 create mode 100644 include/linux/extcon/sm5502.h

(limited to 'include/linux')

diff --git a/drivers/extcon/Kconfig b/drivers/extcon/Kconfig
index 9125eba6b3d6..6f2f4727de2c 100644
--- a/drivers/extcon/Kconfig
+++ b/drivers/extcon/Kconfig
@@ -70,4 +70,14 @@ config EXTCON_PALMAS
 	  Say Y here to enable support for USB peripheral and USB host
 	  detection by palmas usb.
 
+config EXTCON_SM5502
+	tristate "SM5502 EXTCON support"
+	select IRQ_DOMAIN
+	select REGMAP_I2C
+	select REGMAP_IRQ
+	help
+	  If you say yes here you get support for the MUIC device of
+	  Silicon Mitus SM5502. The SM5502 is a USB port accessory
+	  detector and switch.
+
 endif # MULTISTATE_SWITCH
diff --git a/drivers/extcon/Makefile b/drivers/extcon/Makefile
index e48abc6d230f..b38546eb522a 100644
--- a/drivers/extcon/Makefile
+++ b/drivers/extcon/Makefile
@@ -10,3 +10,4 @@ obj-$(CONFIG_EXTCON_MAX14577)	+= extcon-max14577.o
 obj-$(CONFIG_EXTCON_MAX77693)	+= extcon-max77693.o
 obj-$(CONFIG_EXTCON_MAX8997)	+= extcon-max8997.o
 obj-$(CONFIG_EXTCON_PALMAS)	+= extcon-palmas.o
+obj-$(CONFIG_EXTCON_SM5502)	+= extcon-sm5502.o
diff --git a/drivers/extcon/extcon-sm5502.c b/drivers/extcon/extcon-sm5502.c
new file mode 100644
index 000000000000..9f318c222b89
--- /dev/null
+++ b/drivers/extcon/extcon-sm5502.c
@@ -0,0 +1,620 @@
+/*
+ * extcon-sm5502.c - Silicon Mitus SM5502 extcon drvier to support USB switches
+ *
+ * Copyright (c) 2014 Samsung Electronics Co., Ltd
+ * Author: Chanwoo Choi <cw00.choi@samsung.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/input.h>
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+#include <linux/extcon.h>
+#include <linux/extcon/sm5502.h>
+
+struct muic_irq {
+	unsigned int irq;
+	const char *name;
+	unsigned int virq;
+};
+
+struct reg_data {
+	u8 reg;
+	unsigned int val;
+	bool invert;
+};
+
+struct sm5502_muic_info {
+	struct device *dev;
+	struct extcon_dev *edev;
+
+	struct i2c_client *i2c;
+	struct regmap *regmap;
+
+	struct regmap_irq_chip_data *irq_data;
+	struct muic_irq *muic_irqs;
+	unsigned int num_muic_irqs;
+	int irq;
+	bool irq_attach;
+	bool irq_detach;
+	struct work_struct irq_work;
+
+	struct reg_data *reg_data;
+	unsigned int num_reg_data;
+
+	struct mutex mutex;
+};
+
+/* Default value of SM5502 register to bring up MUIC device. */
+static struct reg_data sm5502_reg_data[] = {
+	{
+		.reg = SM5502_REG_CONTROL,
+		.val = SM5502_REG_CONTROL_MASK_INT_MASK,
+		.invert = false,
+	}, {
+		.reg = SM5502_REG_INTMASK1,
+		.val = SM5502_REG_INTM1_KP_MASK
+			| SM5502_REG_INTM1_LKP_MASK
+			| SM5502_REG_INTM1_LKR_MASK,
+		.invert = true,
+	}, {
+		.reg = SM5502_REG_INTMASK2,
+		.val = SM5502_REG_INTM2_VBUS_DET_MASK
+			| SM5502_REG_INTM2_REV_ACCE_MASK
+			| SM5502_REG_INTM2_ADC_CHG_MASK
+			| SM5502_REG_INTM2_STUCK_KEY_MASK
+			| SM5502_REG_INTM2_STUCK_KEY_RCV_MASK
+			| SM5502_REG_INTM2_MHL_MASK,
+		.invert = true,
+	},
+	{ }
+};
+
+/* List of detectable cables */
+enum {
+	EXTCON_CABLE_USB = 0,
+	EXTCON_CABLE_USB_HOST,
+	EXTCON_CABLE_TA,
+
+	EXTCON_CABLE_END,
+};
+
+static const char *sm5502_extcon_cable[] = {
+	[EXTCON_CABLE_USB]	= "USB",
+	[EXTCON_CABLE_USB_HOST]	= "USB-Host",
+	[EXTCON_CABLE_TA]	= "TA",
+	NULL,
+};
+
+/* Define supported accessory type */
+enum sm5502_muic_acc_type {
+	SM5502_MUIC_ADC_GROUND = 0x0,
+	SM5502_MUIC_ADC_SEND_END_BUTTON,
+	SM5502_MUIC_ADC_REMOTE_S1_BUTTON,
+	SM5502_MUIC_ADC_REMOTE_S2_BUTTON,
+	SM5502_MUIC_ADC_REMOTE_S3_BUTTON,
+	SM5502_MUIC_ADC_REMOTE_S4_BUTTON,
+	SM5502_MUIC_ADC_REMOTE_S5_BUTTON,
+	SM5502_MUIC_ADC_REMOTE_S6_BUTTON,
+	SM5502_MUIC_ADC_REMOTE_S7_BUTTON,
+	SM5502_MUIC_ADC_REMOTE_S8_BUTTON,
+	SM5502_MUIC_ADC_REMOTE_S9_BUTTON,
+	SM5502_MUIC_ADC_REMOTE_S10_BUTTON,
+	SM5502_MUIC_ADC_REMOTE_S11_BUTTON,
+	SM5502_MUIC_ADC_REMOTE_S12_BUTTON,
+	SM5502_MUIC_ADC_RESERVED_ACC_1,
+	SM5502_MUIC_ADC_RESERVED_ACC_2,
+	SM5502_MUIC_ADC_RESERVED_ACC_3,
+	SM5502_MUIC_ADC_RESERVED_ACC_4,
+	SM5502_MUIC_ADC_RESERVED_ACC_5,
+	SM5502_MUIC_ADC_AUDIO_TYPE2,
+	SM5502_MUIC_ADC_PHONE_POWERED_DEV,
+	SM5502_MUIC_ADC_TTY_CONVERTER,
+	SM5502_MUIC_ADC_UART_CABLE,
+	SM5502_MUIC_ADC_TYPE1_CHARGER,
+	SM5502_MUIC_ADC_FACTORY_MODE_BOOT_OFF_USB,
+	SM5502_MUIC_ADC_FACTORY_MODE_BOOT_ON_USB,
+	SM5502_MUIC_ADC_AUDIO_VIDEO_CABLE,
+	SM5502_MUIC_ADC_TYPE2_CHARGER,
+	SM5502_MUIC_ADC_FACTORY_MODE_BOOT_OFF_UART,
+	SM5502_MUIC_ADC_FACTORY_MODE_BOOT_ON_UART,
+	SM5502_MUIC_ADC_AUDIO_TYPE1,
+	SM5502_MUIC_ADC_OPEN = 0x1f,
+
+	/* The below accessories have same ADC value (0x1f or 0x1e).
+	   So, Device type1 is used to separate specific accessory. */
+							/* |---------|--ADC| */
+							/* |    [7:5]|[4:0]| */
+	SM5502_MUIC_ADC_AUDIO_TYPE1_FULL_REMOTE = 0x3e,	/* |      001|11110| */
+	SM5502_MUIC_ADC_AUDIO_TYPE1_SEND_END = 0x5e,	/* |      010|11110| */
+							/* |Dev Type1|--ADC| */
+	SM5502_MUIC_ADC_OPEN_USB = 0x5f,		/* |      010|11111| */
+	SM5502_MUIC_ADC_OPEN_TA = 0xdf,			/* |      110|11111| */
+	SM5502_MUIC_ADC_OPEN_USB_OTG = 0xff,		/* |      111|11111| */
+};
+
+/* List of supported interrupt for SM5502 */
+static struct muic_irq sm5502_muic_irqs[] = {
+	{ SM5502_IRQ_INT1_ATTACH,	"muic-attach" },
+	{ SM5502_IRQ_INT1_DETACH,	"muic-detach" },
+	{ SM5502_IRQ_INT1_KP,		"muic-kp" },
+	{ SM5502_IRQ_INT1_LKP,		"muic-lkp" },
+	{ SM5502_IRQ_INT1_LKR,		"muic-lkr" },
+	{ SM5502_IRQ_INT1_OVP_EVENT,	"muic-ovp-event" },
+	{ SM5502_IRQ_INT1_OCP_EVENT,	"muic-ocp-event" },
+	{ SM5502_IRQ_INT1_OVP_OCP_DIS,	"muic-ovp-ocp-dis" },
+	{ SM5502_IRQ_INT2_VBUS_DET,	"muic-vbus-det" },
+	{ SM5502_IRQ_INT2_REV_ACCE,	"muic-rev-acce" },
+	{ SM5502_IRQ_INT2_ADC_CHG,	"muic-adc-chg" },
+	{ SM5502_IRQ_INT2_STUCK_KEY,	"muic-stuck-key" },
+	{ SM5502_IRQ_INT2_STUCK_KEY_RCV, "muic-stuck-key-rcv" },
+	{ SM5502_IRQ_INT2_MHL,		"muic-mhl" },
+};
+
+/* Define interrupt list of SM5502 to register regmap_irq */
+static const struct regmap_irq sm5502_irqs[] = {
+	/* INT1 interrupts */
+	{ .reg_offset = 0, .mask = SM5502_IRQ_INT1_ATTACH_MASK, },
+	{ .reg_offset = 0, .mask = SM5502_IRQ_INT1_DETACH_MASK, },
+	{ .reg_offset = 0, .mask = SM5502_IRQ_INT1_KP_MASK, },
+	{ .reg_offset = 0, .mask = SM5502_IRQ_INT1_LKP_MASK, },
+	{ .reg_offset = 0, .mask = SM5502_IRQ_INT1_LKR_MASK, },
+	{ .reg_offset = 0, .mask = SM5502_IRQ_INT1_OVP_EVENT_MASK, },
+	{ .reg_offset = 0, .mask = SM5502_IRQ_INT1_OCP_EVENT_MASK, },
+	{ .reg_offset = 0, .mask = SM5502_IRQ_INT1_OVP_OCP_DIS_MASK, },
+
+	/* INT2 interrupts */
+	{ .reg_offset = 1, .mask = SM5502_IRQ_INT2_VBUS_DET_MASK,},
+	{ .reg_offset = 1, .mask = SM5502_IRQ_INT2_REV_ACCE_MASK, },
+	{ .reg_offset = 1, .mask = SM5502_IRQ_INT2_ADC_CHG_MASK, },
+	{ .reg_offset = 1, .mask = SM5502_IRQ_INT2_STUCK_KEY_MASK, },
+	{ .reg_offset = 1, .mask = SM5502_IRQ_INT2_STUCK_KEY_RCV_MASK, },
+	{ .reg_offset = 1, .mask = SM5502_IRQ_INT2_MHL_MASK, },
+};
+
+static const struct regmap_irq_chip sm5502_muic_irq_chip = {
+	.name			= "sm5502",
+	.status_base		= SM5502_REG_INT1,
+	.mask_base		= SM5502_REG_INTMASK1,
+	.mask_invert		= false,
+	.num_regs		= 2,
+	.irqs			= sm5502_irqs,
+	.num_irqs		= ARRAY_SIZE(sm5502_irqs),
+};
+
+/* Define regmap configuration of SM5502 for I2C communication  */
+static bool sm5502_muic_volatile_reg(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case SM5502_REG_INTMASK1:
+	case SM5502_REG_INTMASK2:
+		return true;
+	default:
+		break;
+	}
+	return false;
+}
+
+static const struct regmap_config sm5502_muic_regmap_config = {
+	.reg_bits	= 8,
+	.val_bits	= 8,
+	.volatile_reg	= sm5502_muic_volatile_reg,
+	.max_register	= SM5502_REG_END,
+};
+
+/* Return cable type of attached or detached accessories */
+static unsigned int sm5502_muic_get_cable_type(struct sm5502_muic_info *info)
+{
+	unsigned int cable_type = -1, adc, dev_type1;
+	int ret;
+
+	/* Read ADC value according to external cable or button */
+	ret = regmap_read(info->regmap, SM5502_REG_ADC, &adc);
+	if (ret) {
+		dev_err(info->dev, "failed to read ADC register\n");
+		return ret;
+	}
+
+	/*
+	 * If ADC is SM5502_MUIC_ADC_GROUND(0x0), external cable hasn't
+	 * connected with to MUIC device.
+	 */
+	cable_type &= SM5502_REG_ADC_MASK;
+	if (cable_type == SM5502_MUIC_ADC_GROUND)
+		return SM5502_MUIC_ADC_GROUND;
+
+	switch (cable_type) {
+	case SM5502_MUIC_ADC_GROUND:
+	case SM5502_MUIC_ADC_SEND_END_BUTTON:
+	case SM5502_MUIC_ADC_REMOTE_S1_BUTTON:
+	case SM5502_MUIC_ADC_REMOTE_S2_BUTTON:
+	case SM5502_MUIC_ADC_REMOTE_S3_BUTTON:
+	case SM5502_MUIC_ADC_REMOTE_S4_BUTTON:
+	case SM5502_MUIC_ADC_REMOTE_S5_BUTTON:
+	case SM5502_MUIC_ADC_REMOTE_S6_BUTTON:
+	case SM5502_MUIC_ADC_REMOTE_S7_BUTTON:
+	case SM5502_MUIC_ADC_REMOTE_S8_BUTTON:
+	case SM5502_MUIC_ADC_REMOTE_S9_BUTTON:
+	case SM5502_MUIC_ADC_REMOTE_S10_BUTTON:
+	case SM5502_MUIC_ADC_REMOTE_S11_BUTTON:
+	case SM5502_MUIC_ADC_REMOTE_S12_BUTTON:
+	case SM5502_MUIC_ADC_RESERVED_ACC_1:
+	case SM5502_MUIC_ADC_RESERVED_ACC_2:
+	case SM5502_MUIC_ADC_RESERVED_ACC_3:
+	case SM5502_MUIC_ADC_RESERVED_ACC_4:
+	case SM5502_MUIC_ADC_RESERVED_ACC_5:
+	case SM5502_MUIC_ADC_AUDIO_TYPE2:
+	case SM5502_MUIC_ADC_PHONE_POWERED_DEV:
+	case SM5502_MUIC_ADC_TTY_CONVERTER:
+	case SM5502_MUIC_ADC_UART_CABLE:
+	case SM5502_MUIC_ADC_TYPE1_CHARGER:
+	case SM5502_MUIC_ADC_FACTORY_MODE_BOOT_OFF_USB:
+	case SM5502_MUIC_ADC_FACTORY_MODE_BOOT_ON_USB:
+	case SM5502_MUIC_ADC_AUDIO_VIDEO_CABLE:
+	case SM5502_MUIC_ADC_TYPE2_CHARGER:
+	case SM5502_MUIC_ADC_FACTORY_MODE_BOOT_OFF_UART:
+	case SM5502_MUIC_ADC_FACTORY_MODE_BOOT_ON_UART:
+		break;
+	case SM5502_MUIC_ADC_AUDIO_TYPE1:
+		/*
+		 * Check whether cable type is
+		 * SM5502_MUIC_ADC_AUDIO_TYPE1_FULL_REMOTE
+		 * or SM5502_MUIC_ADC_AUDIO_TYPE1_SEND_END
+		 * by using Button event.
+		 */
+		break;
+	case SM5502_MUIC_ADC_OPEN:
+		ret = regmap_read(info->regmap, SM5502_REG_DEV_TYPE1,
+				  &dev_type1);
+		if (ret) {
+			dev_err(info->dev, "failed to read DEV_TYPE1 reg\n");
+			return ret;
+		}
+
+		switch (dev_type1) {
+		case SM5502_REG_DEV_TYPE1_USB_SDP_MASK:
+			cable_type = SM5502_MUIC_ADC_OPEN_USB;
+			break;
+		case SM5502_REG_DEV_TYPE1_DEDICATED_CHG_MASK:
+			cable_type = SM5502_MUIC_ADC_OPEN_TA;
+			break;
+		case SM5502_REG_DEV_TYPE1_USB_OTG_MASK:
+			cable_type = SM5502_MUIC_ADC_OPEN_USB_OTG;
+			break;
+		default:
+			dev_dbg(info->dev,
+				"cannot identify the cable type: adc(0x%x) "
+				"dev_type1(0x%x)\n", adc, dev_type1);
+			return -EINVAL;
+		};
+		break;
+	default:
+		dev_err(info->dev,
+			"failed to identify the cable type: adc(0x%x)\n", adc);
+		return -EINVAL;
+	};
+
+	return cable_type;
+}
+
+static int sm5502_muic_cable_handler(struct sm5502_muic_info *info,
+				     bool attached)
+{
+	static unsigned int prev_cable_type = SM5502_MUIC_ADC_GROUND;
+	const char **cable_names = info->edev->supported_cable;
+	unsigned int cable_type = SM5502_MUIC_ADC_GROUND;
+	unsigned int idx = 0;
+
+	if (!cable_names)
+		return 0;
+
+	/* Get the type of attached or detached cable */
+	if (attached)
+		cable_type = sm5502_muic_get_cable_type(info);
+	else if (!attached)
+		cable_type = prev_cable_type;
+	prev_cable_type = cable_type;
+
+	switch (cable_type) {
+	case SM5502_MUIC_ADC_OPEN_USB:
+		idx = EXTCON_CABLE_USB;
+		break;
+	case SM5502_MUIC_ADC_OPEN_TA:
+		idx = EXTCON_CABLE_TA;
+		break;
+	case SM5502_MUIC_ADC_OPEN_USB_OTG:
+		idx = EXTCON_CABLE_USB_HOST;
+		break;
+	default:
+		dev_dbg(info->dev,
+			"cannot handle this cable_type (0x%x)\n", cable_type);
+		return 0;
+	};
+
+	extcon_set_cable_state(info->edev, cable_names[idx], attached);
+
+	return 0;
+}
+
+static void sm5502_muic_irq_work(struct work_struct *work)
+{
+	struct sm5502_muic_info *info = container_of(work,
+			struct sm5502_muic_info, irq_work);
+	int ret = 0;
+
+	if (!info->edev)
+		return;
+
+	mutex_lock(&info->mutex);
+
+	/* Detect attached or detached cables */
+	if (info->irq_attach) {
+		ret = sm5502_muic_cable_handler(info, true);
+		info->irq_attach = false;
+	}
+	if (info->irq_detach) {
+		ret = sm5502_muic_cable_handler(info, false);
+		info->irq_detach = false;
+	}
+
+	if (ret < 0)
+		dev_err(info->dev, "failed to handle MUIC interrupt\n");
+
+	mutex_unlock(&info->mutex);
+
+	return;
+}
+
+/*
+ * Sets irq_attach or irq_detach in sm5502_muic_info and returns 0.
+ * Returns -ESRCH if irq_type does not match registered IRQ for this dev type.
+ */
+static int sm5502_parse_irq(struct sm5502_muic_info *info, int irq_type)
+{
+	switch (irq_type) {
+	case SM5502_IRQ_INT1_ATTACH:
+		info->irq_attach = true;
+		break;
+	case SM5502_IRQ_INT1_DETACH:
+		info->irq_detach = true;
+		break;
+	case SM5502_IRQ_INT1_KP:
+	case SM5502_IRQ_INT1_LKP:
+	case SM5502_IRQ_INT1_LKR:
+	case SM5502_IRQ_INT1_OVP_EVENT:
+	case SM5502_IRQ_INT1_OCP_EVENT:
+	case SM5502_IRQ_INT1_OVP_OCP_DIS:
+	case SM5502_IRQ_INT2_VBUS_DET:
+	case SM5502_IRQ_INT2_REV_ACCE:
+	case SM5502_IRQ_INT2_ADC_CHG:
+	case SM5502_IRQ_INT2_STUCK_KEY:
+	case SM5502_IRQ_INT2_STUCK_KEY_RCV:
+	case SM5502_IRQ_INT2_MHL:
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static irqreturn_t sm5502_muic_irq_handler(int irq, void *data)
+{
+	struct sm5502_muic_info *info = data;
+	int i, irq_type = -1, ret;
+
+	for (i = 0; i < info->num_muic_irqs; i++)
+		if (irq == info->muic_irqs[i].virq)
+			irq_type = info->muic_irqs[i].irq;
+
+	ret = sm5502_parse_irq(info, irq_type);
+	if (ret < 0) {
+		dev_warn(info->dev, "cannot handle is interrupt:%d\n",
+				    irq_type);
+		return IRQ_HANDLED;
+	}
+	schedule_work(&info->irq_work);
+
+	return IRQ_HANDLED;
+}
+
+static void sm5502_init_dev_type(struct sm5502_muic_info *info)
+{
+	unsigned int reg_data, vendor_id, version_id;
+	int i, ret;
+
+	/* To test I2C, Print version_id and vendor_id of SM5502 */
+	ret = regmap_read(info->regmap, SM5502_REG_DEVICE_ID, &reg_data);
+	if (ret) {
+		dev_err(info->dev,
+			"failed to read DEVICE_ID register: %d\n", ret);
+		return;
+	}
+
+	vendor_id = ((reg_data & SM5502_REG_DEVICE_ID_VENDOR_MASK) >>
+				SM5502_REG_DEVICE_ID_VENDOR_SHIFT);
+	version_id = ((reg_data & SM5502_REG_DEVICE_ID_VERSION_MASK) >>
+				SM5502_REG_DEVICE_ID_VERSION_SHIFT);
+
+	dev_info(info->dev, "Device type: version: 0x%x, vendor: 0x%x\n",
+			    version_id, vendor_id);
+
+	/* Initiazle the register of SM5502 device to bring-up */
+	for (i = 0; i < info->num_reg_data; i++) {
+		unsigned int val = 0;
+
+		if (!info->reg_data[i].invert)
+			val |= ~info->reg_data[i].val;
+		else
+			val = info->reg_data[i].val;
+		regmap_write(info->regmap, info->reg_data[i].reg, val);
+	}
+}
+
+static int sm5022_muic_i2c_probe(struct i2c_client *i2c,
+				 const struct i2c_device_id *id)
+{
+	struct device_node *np = i2c->dev.of_node;
+	struct sm5502_muic_info *info;
+	int i, ret, irq_flags;
+
+	if (!np)
+		return -EINVAL;
+
+	info = devm_kzalloc(&i2c->dev, sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+	i2c_set_clientdata(i2c, info);
+
+	info->dev = &i2c->dev;
+	info->i2c = i2c;
+	info->irq = i2c->irq;
+	info->muic_irqs = sm5502_muic_irqs;
+	info->num_muic_irqs = ARRAY_SIZE(sm5502_muic_irqs);
+	info->reg_data = sm5502_reg_data;
+	info->num_reg_data = ARRAY_SIZE(sm5502_reg_data);
+
+	mutex_init(&info->mutex);
+
+	INIT_WORK(&info->irq_work, sm5502_muic_irq_work);
+
+	info->regmap = devm_regmap_init_i2c(i2c, &sm5502_muic_regmap_config);
+	if (IS_ERR(info->regmap)) {
+		ret = PTR_ERR(info->regmap);
+		dev_err(info->dev, "failed to allocate register map: %d\n",
+				   ret);
+		return ret;
+	}
+
+	/* Support irq domain for SM5502 MUIC device */
+	irq_flags = IRQF_TRIGGER_FALLING | IRQF_ONESHOT | IRQF_SHARED;
+	ret = regmap_add_irq_chip(info->regmap, info->irq, irq_flags, 0,
+				  &sm5502_muic_irq_chip, &info->irq_data);
+	if (ret != 0) {
+		dev_err(info->dev, "failed to request IRQ %d: %d\n",
+				    info->irq, ret);
+		return ret;
+	}
+
+	for (i = 0; i < info->num_muic_irqs; i++) {
+		struct muic_irq *muic_irq = &info->muic_irqs[i];
+		unsigned int virq = 0;
+
+		virq = regmap_irq_get_virq(info->irq_data, muic_irq->irq);
+		if (virq <= 0)
+			return -EINVAL;
+		muic_irq->virq = virq;
+
+		ret = devm_request_threaded_irq(info->dev, virq, NULL,
+						sm5502_muic_irq_handler,
+						IRQF_NO_SUSPEND,
+						muic_irq->name, info);
+		if (ret) {
+			dev_err(info->dev, "failed: irq request (IRQ: %d,"
+				" error :%d)\n", muic_irq->irq, ret);
+			return ret;
+		}
+	}
+
+	/* Allocate extcon device */
+	info->edev = devm_extcon_dev_allocate(info->dev, sm5502_extcon_cable);
+	if (IS_ERR(info->edev)) {
+		dev_err(info->dev, "failed to allocate memory for extcon\n");
+		return -ENOMEM;
+	}
+	info->edev->name = np->name;
+
+	/* Register extcon device */
+	ret = devm_extcon_dev_register(info->dev, info->edev);
+	if (ret) {
+		dev_err(info->dev, "failed to register extcon device\n");
+		return ret;
+	}
+
+	/* Initialize SM5502 device and print vendor id and version id */
+	sm5502_init_dev_type(info);
+
+	return 0;
+}
+
+static int sm5502_muic_i2c_remove(struct i2c_client *i2c)
+{
+	struct sm5502_muic_info *info = i2c_get_clientdata(i2c);
+
+	regmap_del_irq_chip(info->irq, info->irq_data);
+
+	return 0;
+}
+
+static struct of_device_id sm5502_dt_match[] = {
+	{ .compatible = "siliconmitus,sm5502-muic" },
+	{ },
+};
+
+#ifdef CONFIG_PM_SLEEP
+static int sm5502_muic_suspend(struct device *dev)
+{
+	struct i2c_client *i2c = container_of(dev, struct i2c_client, dev);
+	struct sm5502_muic_info *info = i2c_get_clientdata(i2c);
+
+	enable_irq_wake(info->irq);
+
+	return 0;
+}
+
+static int sm5502_muic_resume(struct device *dev)
+{
+	struct i2c_client *i2c = container_of(dev, struct i2c_client, dev);
+	struct sm5502_muic_info *info = i2c_get_clientdata(i2c);
+
+	disable_irq_wake(info->irq);
+
+	return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(sm5502_muic_pm_ops,
+			 sm5502_muic_suspend, sm5502_muic_resume);
+
+static const struct i2c_device_id sm5502_i2c_id[] = {
+	{ "sm5502", TYPE_SM5502 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, sm5502_i2c_id);
+
+static struct i2c_driver sm5502_muic_i2c_driver = {
+	.driver		= {
+		.name	= "sm5502",
+		.owner	= THIS_MODULE,
+		.pm	= &sm5502_muic_pm_ops,
+		.of_match_table = sm5502_dt_match,
+	},
+	.probe	= sm5022_muic_i2c_probe,
+	.remove	= sm5502_muic_i2c_remove,
+	.id_table = sm5502_i2c_id,
+};
+
+static int __init sm5502_muic_i2c_init(void)
+{
+	return i2c_add_driver(&sm5502_muic_i2c_driver);
+}
+subsys_initcall(sm5502_muic_i2c_init);
+
+MODULE_DESCRIPTION("Silicon Mitus SM5502 Extcon driver");
+MODULE_AUTHOR("Chanwoo Choi <cw00.choi@samsung.com>");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/extcon/sm5502.h b/include/linux/extcon/sm5502.h
new file mode 100644
index 000000000000..17bd6550c485
--- /dev/null
+++ b/include/linux/extcon/sm5502.h
@@ -0,0 +1,264 @@
+/*
+ * sm5502.h
+ *
+ * Copyright (c) 2014 Samsung Electronics Co., Ltd
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __LINUX_EXTCON_SM5502_H
+#define __LINUX_EXTCON_SM5502_H
+
+enum sm5502_types {
+	TYPE_SM5502,
+};
+
+/* SM5502 registers */
+enum sm5502_reg {
+	SM5502_REG_DEVICE_ID = 0x01,
+	SM5502_REG_CONTROL,
+	SM5502_REG_INT1,
+	SM5502_REG_INT2,
+	SM5502_REG_INTMASK1,
+	SM5502_REG_INTMASK2,
+	SM5502_REG_ADC,
+	SM5502_REG_TIMING_SET1,
+	SM5502_REG_TIMING_SET2,
+	SM5502_REG_DEV_TYPE1,
+	SM5502_REG_DEV_TYPE2,
+	SM5502_REG_BUTTON1,
+	SM5502_REG_BUTTON2,
+	SM5502_REG_CAR_KIT_STATUS,
+	SM5502_REG_RSVD1,
+	SM5502_REG_RSVD2,
+	SM5502_REG_RSVD3,
+	SM5502_REG_RSVD4,
+	SM5502_REG_MANUAL_SW1,
+	SM5502_REG_MANUAL_SW2,
+	SM5502_REG_DEV_TYPE3,
+	SM5502_REG_RSVD5,
+	SM5502_REG_RSVD6,
+	SM5502_REG_RSVD7,
+	SM5502_REG_RSVD8,
+	SM5502_REG_RSVD9,
+	SM5502_REG_RESET,
+	SM5502_REG_RSVD10,
+	SM5502_REG_RESERVED_ID1,
+	SM5502_REG_RSVD11,
+	SM5502_REG_RSVD12,
+	SM5502_REG_RESERVED_ID2,
+	SM5502_REG_RSVD13,
+	SM5502_REG_OCP,
+	SM5502_REG_RSVD14,
+	SM5502_REG_RSVD15,
+	SM5502_REG_RSVD16,
+	SM5502_REG_RSVD17,
+	SM5502_REG_RSVD18,
+	SM5502_REG_RSVD19,
+	SM5502_REG_RSVD20,
+	SM5502_REG_RSVD21,
+	SM5502_REG_RSVD22,
+	SM5502_REG_RSVD23,
+	SM5502_REG_RSVD24,
+	SM5502_REG_RSVD25,
+	SM5502_REG_RSVD26,
+	SM5502_REG_RSVD27,
+	SM5502_REG_RSVD28,
+	SM5502_REG_RSVD29,
+	SM5502_REG_RSVD30,
+	SM5502_REG_RSVD31,
+	SM5502_REG_RSVD32,
+	SM5502_REG_RSVD33,
+	SM5502_REG_RSVD34,
+	SM5502_REG_RSVD35,
+	SM5502_REG_RSVD36,
+	SM5502_REG_RESERVED_ID3,
+
+	SM5502_REG_END,
+};
+
+/* Define SM5502 MASK/SHIFT constant */
+#define SM5502_REG_DEVICE_ID_VENDOR_SHIFT	0
+#define SM5502_REG_DEVICE_ID_VERSION_SHIFT	3
+#define SM5502_REG_DEVICE_ID_VENDOR_MASK	(0x3 << SM5502_REG_DEVICE_ID_VENDOR_SHIFT)
+#define SM5502_REG_DEVICE_ID_VERSION_MASK	(0x1f << SM5502_REG_DEVICE_ID_VERSION_SHIFT)
+
+#define SM5502_REG_CONTROL_MASK_INT_SHIFT	0
+#define SM5502_REG_CONTROL_WAIT_SHIFT		1
+#define SM5502_REG_CONTROL_MANUAL_SW_SHIFT	2
+#define SM5502_REG_CONTROL_RAW_DATA_SHIFT	3
+#define SM5502_REG_CONTROL_SW_OPEN_SHIFT	4
+#define SM5502_REG_CONTROL_MASK_INT_MASK	(0x1 << SM5502_REG_CONTROL_MASK_INT_SHIFT)
+#define SM5502_REG_CONTROL_WAIT_MASK		(0x1 << SM5502_REG_CONTROL_WAIT_SHIFT)
+#define SM5502_REG_CONTROL_MANUAL_SW_MASK	(0x1 << SM5502_REG_CONTROL_MANUAL_SW_SHIFT)
+#define SM5502_REG_CONTROL_RAW_DATA_MASK	(0x1 << SM5502_REG_CONTROL_RAW_DATA_SHIFT)
+#define SM5502_REG_CONTROL_SW_OPEN_MASK		(0x1 << SM5502_REG_CONTROL_SW_OPEN_SHIFT)
+
+#define SM5502_REG_INTM1_ATTACH_SHIFT		0
+#define SM5502_REG_INTM1_DETACH_SHIFT		1
+#define SM5502_REG_INTM1_KP_SHIFT		2
+#define SM5502_REG_INTM1_LKP_SHIFT		3
+#define SM5502_REG_INTM1_LKR_SHIFT		4
+#define SM5502_REG_INTM1_OVP_EVENT_SHIFT	5
+#define SM5502_REG_INTM1_OCP_EVENT_SHIFT	6
+#define SM5502_REG_INTM1_OVP_OCP_DIS_SHIFT	7
+#define SM5502_REG_INTM1_ATTACH_MASK		(0x1 << SM5502_REG_INTM1_ATTACH_SHIFT)
+#define SM5502_REG_INTM1_DETACH_MASK		(0x1 << SM5502_REG_INTM1_DETACH_SHIFT)
+#define SM5502_REG_INTM1_KP_MASK		(0x1 << SM5502_REG_INTM1_KP_SHIFT)
+#define SM5502_REG_INTM1_LKP_MASK		(0x1 << SM5502_REG_INTM1_LKP_SHIFT)
+#define SM5502_REG_INTM1_LKR_MASK		(0x1 << SM5502_REG_INTM1_LKR_SHIFT)
+#define SM5502_REG_INTM1_OVP_EVENT_MASK		(0x1 << SM5502_REG_INTM1_OVP_EVENT_SHIFT)
+#define SM5502_REG_INTM1_OCP_EVENT_MASK		(0x1 << SM5502_REG_INTM1_OCP_EVENT_SHIFT)
+#define SM5502_REG_INTM1_OVP_OCP_DIS_MASK	(0x1 << SM5502_REG_INTM1_OVP_OCP_DIS_SHIFT)
+
+#define SM5502_REG_INTM2_VBUS_DET_SHIFT		0
+#define SM5502_REG_INTM2_REV_ACCE_SHIFT		1
+#define SM5502_REG_INTM2_ADC_CHG_SHIFT		2
+#define SM5502_REG_INTM2_STUCK_KEY_SHIFT	3
+#define SM5502_REG_INTM2_STUCK_KEY_RCV_SHIFT	4
+#define SM5502_REG_INTM2_MHL_SHIFT		5
+#define SM5502_REG_INTM2_VBUS_DET_MASK		(0x1 << SM5502_REG_INTM2_VBUS_DET_SHIFT)
+#define SM5502_REG_INTM2_REV_ACCE_MASK		(0x1 << SM5502_REG_INTM2_REV_ACCE_SHIFT)
+#define SM5502_REG_INTM2_ADC_CHG_MASK		(0x1 << SM5502_REG_INTM2_ADC_CHG_SHIFT)
+#define SM5502_REG_INTM2_STUCK_KEY_MASK		(0x1 << SM5502_REG_INTM2_STUCK_KEY_SHIFT)
+#define SM5502_REG_INTM2_STUCK_KEY_RCV_MASK	(0x1 << SM5502_REG_INTM2_STUCK_KEY_RCV_SHIFT)
+#define SM5502_REG_INTM2_MHL_MASK		(0x1 << SM5502_REG_INTM2_MHL_SHIFT)
+
+#define SM5502_REG_ADC_SHIFT			0
+#define SM5502_REG_ADC_MASK			(0x1f << SM5502_REG_ADC_SHIFT)
+
+#define SM5502_REG_TIMING_SET1_KEY_PRESS_SHIFT	4
+#define SM5502_REG_TIMING_SET1_KEY_PRESS_MASK	(0xf << SM5502_REG_TIMING_SET1_KEY_PRESS_SHIFT)
+#define TIMING_KEY_PRESS_100MS			0x0
+#define TIMING_KEY_PRESS_200MS			0x1
+#define TIMING_KEY_PRESS_300MS			0x2
+#define TIMING_KEY_PRESS_400MS			0x3
+#define TIMING_KEY_PRESS_500MS			0x4
+#define TIMING_KEY_PRESS_600MS			0x5
+#define TIMING_KEY_PRESS_700MS			0x6
+#define TIMING_KEY_PRESS_800MS			0x7
+#define TIMING_KEY_PRESS_900MS			0x8
+#define TIMING_KEY_PRESS_1000MS			0x9
+#define SM5502_REG_TIMING_SET1_ADC_DET_SHIFT	0
+#define SM5502_REG_TIMING_SET1_ADC_DET_MASK	(0xf << SM5502_REG_TIMING_SET1_ADC_DET_SHIFT)
+#define TIMING_ADC_DET_50MS			0x0
+#define TIMING_ADC_DET_100MS			0x1
+#define TIMING_ADC_DET_150MS			0x2
+#define TIMING_ADC_DET_200MS			0x3
+#define TIMING_ADC_DET_300MS			0x4
+#define TIMING_ADC_DET_400MS			0x5
+#define TIMING_ADC_DET_500MS			0x6
+#define TIMING_ADC_DET_600MS			0x7
+#define TIMING_ADC_DET_700MS			0x8
+#define TIMING_ADC_DET_800MS			0x9
+#define TIMING_ADC_DET_900MS			0xA
+#define TIMING_ADC_DET_1000MS			0xB
+
+#define SM5502_REG_TIMING_SET2_SW_WAIT_SHIFT	4
+#define SM5502_REG_TIMING_SET2_SW_WAIT_MASK	(0xf << SM5502_REG_TIMING_SET2_SW_WAIT_SHIFT)
+#define TIMING_SW_WAIT_10MS			0x0
+#define TIMING_SW_WAIT_30MS			0x1
+#define TIMING_SW_WAIT_50MS			0x2
+#define TIMING_SW_WAIT_70MS			0x3
+#define TIMING_SW_WAIT_90MS			0x4
+#define TIMING_SW_WAIT_110MS			0x5
+#define TIMING_SW_WAIT_130MS			0x6
+#define TIMING_SW_WAIT_150MS			0x7
+#define TIMING_SW_WAIT_170MS			0x8
+#define TIMING_SW_WAIT_190MS			0x9
+#define TIMING_SW_WAIT_210MS			0xA
+#define SM5502_REG_TIMING_SET2_LONG_KEY_SHIFT	0
+#define SM5502_REG_TIMING_SET2_LONG_KEY_MASK	(0xf << SM5502_REG_TIMING_SET2_LONG_KEY_SHIFT)
+#define TIMING_LONG_KEY_300MS			0x0
+#define TIMING_LONG_KEY_400MS			0x1
+#define TIMING_LONG_KEY_500MS			0x2
+#define TIMING_LONG_KEY_600MS			0x3
+#define TIMING_LONG_KEY_700MS			0x4
+#define TIMING_LONG_KEY_800MS			0x5
+#define TIMING_LONG_KEY_900MS			0x6
+#define TIMING_LONG_KEY_1000MS			0x7
+#define TIMING_LONG_KEY_1100MS			0x8
+#define TIMING_LONG_KEY_1200MS			0x9
+#define TIMING_LONG_KEY_1300MS			0xA
+#define TIMING_LONG_KEY_1400MS			0xB
+#define TIMING_LONG_KEY_1500MS			0xC
+
+#define SM5502_REG_DEV_TYPE1_AUDIO_TYPE1_SHIFT		0
+#define SM5502_REG_DEV_TYPE1_AUDIO_TYPE2_SHIFT		1
+#define SM5502_REG_DEV_TYPE1_USB_SDP_SHIFT		2
+#define SM5502_REG_DEV_TYPE1_UART_SHIFT			3
+#define SM5502_REG_DEV_TYPE1_CAR_KIT_CHARGER_SHIFT	4
+#define SM5502_REG_DEV_TYPE1_USB_CHG_SHIFT		5
+#define SM5502_REG_DEV_TYPE1_DEDICATED_CHG_SHIFT	6
+#define SM5502_REG_DEV_TYPE1_USB_OTG_SHIFT		7
+#define SM5502_REG_DEV_TYPE1_AUDIO_TYPE1_MASK		(0x1 << SM5502_REG_DEV_TYPE1_AUDIO_TYPE1_SHIFT)
+#define SM5502_REG_DEV_TYPE1_AUDIO_TYPE1__MASK		(0x1 << SM5502_REG_DEV_TYPE1_AUDIO_TYPE2_SHIFT)
+#define SM5502_REG_DEV_TYPE1_USB_SDP_MASK		(0x1 << SM5502_REG_DEV_TYPE1_USB_SDP_SHIFT)
+#define SM5502_REG_DEV_TYPE1_UART_MASK			(0x1 << SM5502_REG_DEV_TYPE1_UART_SHIFT)
+#define SM5502_REG_DEV_TYPE1_CAR_KIT_CHARGER_MASK	(0x1 << SM5502_REG_DEV_TYPE1_CAR_KIT_CHARGER_SHIFT)
+#define SM5502_REG_DEV_TYPE1_USB_CHG_MASK		(0x1 << SM5502_REG_DEV_TYPE1_USB_CHG_SHIFT)
+#define SM5502_REG_DEV_TYPE1_DEDICATED_CHG_MASK		(0x1 << SM5502_REG_DEV_TYPE1_DEDICATED_CHG_SHIFT)
+#define SM5502_REG_DEV_TYPE1_USB_OTG_MASK		(0x1 << SM5502_REG_DEV_TYPE1_USB_OTG_SHIFT)
+
+#define SM5502_REG_DEV_TYPE2_JIG_USB_ON_SHIFT		0
+#define SM5502_REG_DEV_TYPE2_JIG_USB_OFF_SHIFT		1
+#define SM5502_REG_DEV_TYPE2_JIG_UART_ON_SHIFT		2
+#define SM5502_REG_DEV_TYPE2_JIG_UART_OFF_SHIFT		3
+#define SM5502_REG_DEV_TYPE2_PPD_SHIFT			4
+#define SM5502_REG_DEV_TYPE2_TTY_SHIFT			5
+#define SM5502_REG_DEV_TYPE2_AV_CABLE_SHIFT		6
+#define SM5502_REG_DEV_TYPE2_JIG_USB_ON_MASK		(0x1 << SM5502_REG_DEV_TYPE2_JIG_USB_ON_SHIFT)
+#define SM5502_REG_DEV_TYPE2_JIG_USB_OFF_MASK		(0x1 << SM5502_REG_DEV_TYPE2_JIG_USB_OFF_SHIFT)
+#define SM5502_REG_DEV_TYPE2_JIG_UART_ON_MASK		(0x1 << SM5502_REG_DEV_TYPE2_JIG_UART_ON_SHIFT)
+#define SM5502_REG_DEV_TYPE2_JIG_UART_OFF_MASK		(0x1 << SM5502_REG_DEV_TYPE2_JIG_UART_OFF_SHIFT)
+#define SM5502_REG_DEV_TYPE2_PPD_MASK			(0x1 << SM5502_REG_DEV_TYPE2_PPD_SHIFT)
+#define SM5502_REG_DEV_TYPE2_TTY_MASK			(0x1 << SM5502_REG_DEV_TYPE2_TTY_SHIFT)
+#define SM5502_REG_DEV_TYPE2_AV_CABLE_MASK		(0x1 << SM5502_REG_DEV_TYPE2_AV_CABLE_SHIFT)
+
+/* SM5502 Interrupts */
+enum sm5502_irq {
+	/* INT1 */
+	SM5502_IRQ_INT1_ATTACH,
+	SM5502_IRQ_INT1_DETACH,
+	SM5502_IRQ_INT1_KP,
+	SM5502_IRQ_INT1_LKP,
+	SM5502_IRQ_INT1_LKR,
+	SM5502_IRQ_INT1_OVP_EVENT,
+	SM5502_IRQ_INT1_OCP_EVENT,
+	SM5502_IRQ_INT1_OVP_OCP_DIS,
+
+	/* INT2 */
+	SM5502_IRQ_INT2_VBUS_DET,
+	SM5502_IRQ_INT2_REV_ACCE,
+	SM5502_IRQ_INT2_ADC_CHG,
+	SM5502_IRQ_INT2_STUCK_KEY,
+	SM5502_IRQ_INT2_STUCK_KEY_RCV,
+	SM5502_IRQ_INT2_MHL,
+
+	SM5502_IRQ_NUM,
+};
+
+#define SM5502_IRQ_INT1_ATTACH_MASK		BIT(0)
+#define SM5502_IRQ_INT1_DETACH_MASK		BIT(1)
+#define SM5502_IRQ_INT1_KP_MASK			BIT(2)
+#define SM5502_IRQ_INT1_LKP_MASK		BIT(3)
+#define SM5502_IRQ_INT1_LKR_MASK		BIT(4)
+#define SM5502_IRQ_INT1_OVP_EVENT_MASK		BIT(5)
+#define SM5502_IRQ_INT1_OCP_EVENT_MASK		BIT(6)
+#define SM5502_IRQ_INT1_OVP_OCP_DIS_MASK	BIT(7)
+#define SM5502_IRQ_INT2_VBUS_DET_MASK		BIT(0)
+#define SM5502_IRQ_INT2_REV_ACCE_MASK		BIT(1)
+#define SM5502_IRQ_INT2_ADC_CHG_MASK		BIT(2)
+#define SM5502_IRQ_INT2_STUCK_KEY_MASK		BIT(3)
+#define SM5502_IRQ_INT2_STUCK_KEY_RCV_MASK	BIT(4)
+#define SM5502_IRQ_INT2_MHL_MASK		BIT(5)
+
+#endif /*  __LINUX_EXTCON_SM5502_H */
-- 
cgit v1.2.3-59-g8ed1b


From a75fed2ee6c187ab32b1cb01882c1032c4c9e4a8 Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Wed, 28 May 2014 15:35:29 +0900
Subject: extcon: sm5502: Change internal hardware switch according to cable
 type

This patch changes internal hardware DP_CON/DM_CON switch according to
cable type. The SM5502 MUIC device can set hardware switch as following:
- OPEN (not connected state) / USB / UART / AUDIO
Also, this patch set VBUSIN switch according to cable type.

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/extcon/extcon-sm5502.c | 78 +++++++++++++++++++++++++++++++++++++++---
 include/linux/extcon/sm5502.h  | 23 +++++++++++++
 2 files changed, 96 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/extcon/extcon-sm5502.c b/drivers/extcon/extcon-sm5502.c
index a32d40f97ff0..560d7dccec7b 100644
--- a/drivers/extcon/extcon-sm5502.c
+++ b/drivers/extcon/extcon-sm5502.c
@@ -228,6 +228,61 @@ static const struct regmap_config sm5502_muic_regmap_config = {
 	.max_register	= SM5502_REG_END,
 };
 
+/* Change DM_CON/DP_CON/VBUSIN switch according to cable type */
+static int sm5502_muic_set_path(struct sm5502_muic_info *info,
+				unsigned int con_sw, unsigned int vbus_sw,
+				bool attached)
+{
+	int ret;
+
+	if (!attached) {
+		con_sw	= DM_DP_SWITCH_OPEN;
+		vbus_sw	= VBUSIN_SWITCH_OPEN;
+	}
+
+	switch (con_sw) {
+	case DM_DP_SWITCH_OPEN:
+	case DM_DP_SWITCH_USB:
+	case DM_DP_SWITCH_AUDIO:
+	case DM_DP_SWITCH_UART:
+		ret = regmap_update_bits(info->regmap, SM5502_REG_MANUAL_SW1,
+					 SM5502_REG_MANUAL_SW1_DP_MASK |
+					 SM5502_REG_MANUAL_SW1_DM_MASK,
+					 con_sw);
+		if (ret < 0) {
+			dev_err(info->dev,
+				"cannot update DM_CON/DP_CON switch\n");
+			return ret;
+		}
+		break;
+	default:
+		dev_err(info->dev, "Unknown DM_CON/DP_CON switch type (%d)\n",
+				con_sw);
+		return -EINVAL;
+	};
+
+	switch (vbus_sw) {
+	case VBUSIN_SWITCH_OPEN:
+	case VBUSIN_SWITCH_VBUSOUT:
+	case VBUSIN_SWITCH_MIC:
+	case VBUSIN_SWITCH_VBUSOUT_WITH_USB:
+		ret = regmap_update_bits(info->regmap, SM5502_REG_MANUAL_SW1,
+					 SM5502_REG_MANUAL_SW1_VBUSIN_MASK,
+					 vbus_sw);
+		if (ret < 0) {
+			dev_err(info->dev,
+				"cannot update VBUSIN switch\n");
+			return ret;
+		}
+		break;
+	default:
+		dev_err(info->dev, "Unknown VBUS switch type (%d)\n", vbus_sw);
+		return -EINVAL;
+	};
+
+	return 0;
+}
+
 /* Return cable type of attached or detached accessories */
 static unsigned int sm5502_muic_get_cable_type(struct sm5502_muic_info *info)
 {
@@ -329,7 +384,10 @@ static int sm5502_muic_cable_handler(struct sm5502_muic_info *info,
 	static unsigned int prev_cable_type = SM5502_MUIC_ADC_GROUND;
 	const char **cable_names = info->edev->supported_cable;
 	unsigned int cable_type = SM5502_MUIC_ADC_GROUND;
+	unsigned int con_sw = DM_DP_SWITCH_OPEN;
+	unsigned int vbus_sw = VBUSIN_SWITCH_OPEN;
 	unsigned int idx = 0;
+	int ret;
 
 	if (!cable_names)
 		return 0;
@@ -343,15 +401,19 @@ static int sm5502_muic_cable_handler(struct sm5502_muic_info *info,
 
 	switch (cable_type) {
 	case SM5502_MUIC_ADC_OPEN_USB:
-		idx = EXTCON_CABLE_USB;
+		idx	= EXTCON_CABLE_USB;
+		con_sw	= DM_DP_SWITCH_USB;
+		vbus_sw	= VBUSIN_SWITCH_VBUSOUT_WITH_USB;
 		break;
 	case SM5502_MUIC_ADC_OPEN_TA:
-		idx = EXTCON_CABLE_TA;
+		idx	= EXTCON_CABLE_TA;
+		con_sw	= DM_DP_SWITCH_OPEN;
+		vbus_sw	= VBUSIN_SWITCH_VBUSOUT;
 		break;
 	case SM5502_MUIC_ADC_OPEN_USB_OTG:
-		idx = EXTCON_CABLE_USB_HOST;
-		break;
-	case SM5502_MUIC_ADC_GROUND:
+		idx	= EXTCON_CABLE_USB_HOST;
+		con_sw	= DM_DP_SWITCH_USB;
+		vbus_sw	= VBUSIN_SWITCH_OPEN;
 		break;
 	default:
 		dev_dbg(info->dev,
@@ -359,6 +421,12 @@ static int sm5502_muic_cable_handler(struct sm5502_muic_info *info,
 		return 0;
 	};
 
+	/* Change internal hardware path(DM_CON/DP_CON, VBUSIN) */
+	ret = sm5502_muic_set_path(info, con_sw, vbus_sw, attached);
+	if (ret < 0)
+		return ret;
+
+	/* Change the state of external accessory */
 	extcon_set_cable_state(info->edev, cable_names[idx], attached);
 
 	return 0;
diff --git a/include/linux/extcon/sm5502.h b/include/linux/extcon/sm5502.h
index 17bd6550c485..030526bf8d79 100644
--- a/include/linux/extcon/sm5502.h
+++ b/include/linux/extcon/sm5502.h
@@ -223,6 +223,29 @@ enum sm5502_reg {
 #define SM5502_REG_DEV_TYPE2_TTY_MASK			(0x1 << SM5502_REG_DEV_TYPE2_TTY_SHIFT)
 #define SM5502_REG_DEV_TYPE2_AV_CABLE_MASK		(0x1 << SM5502_REG_DEV_TYPE2_AV_CABLE_SHIFT)
 
+#define SM5502_REG_MANUAL_SW1_VBUSIN_SHIFT	0
+#define SM5502_REG_MANUAL_SW1_DP_SHIFT		2
+#define SM5502_REG_MANUAL_SW1_DM_SHIFT		5
+#define SM5502_REG_MANUAL_SW1_VBUSIN_MASK	(0x3 << SM5502_REG_MANUAL_SW1_VBUSIN_SHIFT)
+#define SM5502_REG_MANUAL_SW1_DP_MASK		(0x7 << SM5502_REG_MANUAL_SW1_DP_SHIFT)
+#define SM5502_REG_MANUAL_SW1_DM_MASK		(0x7 << SM5502_REG_MANUAL_SW1_DM_SHIFT)
+#define VBUSIN_SWITCH_OPEN			0x0
+#define VBUSIN_SWITCH_VBUSOUT			0x1
+#define VBUSIN_SWITCH_MIC			0x2
+#define VBUSIN_SWITCH_VBUSOUT_WITH_USB		0x3
+#define DM_DP_CON_SWITCH_OPEN			0x0
+#define DM_DP_CON_SWITCH_USB			0x1
+#define DM_DP_CON_SWITCH_AUDIO			0x2
+#define DM_DP_CON_SWITCH_UART			0x3
+#define DM_DP_SWITCH_OPEN			((DM_DP_CON_SWITCH_OPEN <<SM5502_REG_MANUAL_SW1_DP_SHIFT) \
+						| (DM_DP_CON_SWITCH_OPEN <<SM5502_REG_MANUAL_SW1_DM_SHIFT))
+#define DM_DP_SWITCH_USB			((DM_DP_CON_SWITCH_USB <<SM5502_REG_MANUAL_SW1_DP_SHIFT) \
+						| (DM_DP_CON_SWITCH_USB <<SM5502_REG_MANUAL_SW1_DM_SHIFT))
+#define DM_DP_SWITCH_AUDIO			((DM_DP_CON_SWITCH_AUDIO <<SM5502_REG_MANUAL_SW1_DP_SHIFT) \
+						| (DM_DP_CON_SWITCH_AUDIO <<SM5502_REG_MANUAL_SW1_DM_SHIFT))
+#define DM_DP_SWITCH_UART			((DM_DP_CON_SWITCH_UART <<SM5502_REG_MANUAL_SW1_DP_SHIFT) \
+						| (DM_DP_CON_SWITCH_UART <<SM5502_REG_MANUAL_SW1_DM_SHIFT))
+
 /* SM5502 Interrupts */
 enum sm5502_irq {
 	/* INT1 */
-- 
cgit v1.2.3-59-g8ed1b


From 2599d8580f93f0794d2fa850501b1068ce1d0aa8 Mon Sep 17 00:00:00 2001
From: Amir Vadai <amirv@mellanox.com>
Date: Tue, 22 Jul 2014 15:44:11 +0300
Subject: net/mlx4_core: Use low memory profile on kdump kernel

When running in kdump kernel, reduce number of resources allocated for
the hardware. This will enable the NIC to operate in this low memory
environment at the expense of performance and some features not related
to the basic NIC functionality.

Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/main.c | 28 +++++++++++++++++++++++++---
 include/linux/mlx4/device.h               |  7 +++++++
 2 files changed, 32 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 82ab427290c3..80b8c5f30e4e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -120,6 +120,16 @@ static struct mlx4_profile default_profile = {
 	.num_mtt	= 1 << 20, /* It is really num mtt segements */
 };
 
+static struct mlx4_profile low_mem_profile = {
+	.num_qp		= 1 << 17,
+	.num_srq	= 1 << 6,
+	.rdmarc_per_qp	= 1 << 4,
+	.num_cq		= 1 << 8,
+	.num_mcg	= 1 << 8,
+	.num_mpt	= 1 << 9,
+	.num_mtt	= 1 << 7,
+};
+
 static int log_num_mac = 7;
 module_param_named(log_num_mac, log_num_mac, int, 0444);
 MODULE_PARM_DESC(log_num_mac, "Log2 max number of MACs per ETH port (1-7)");
@@ -129,6 +139,8 @@ module_param_named(log_num_vlan, log_num_vlan, int, 0444);
 MODULE_PARM_DESC(log_num_vlan, "Log2 max number of VLANs per ETH port (0-7)");
 /* Log2 max number of VLANs per ETH port (0-7) */
 #define MLX4_LOG_NUM_VLANS 7
+#define MLX4_MIN_LOG_NUM_VLANS 0
+#define MLX4_MIN_LOG_NUM_MAC 1
 
 static bool use_prio;
 module_param_named(use_prio, use_prio, bool, 0444);
@@ -287,8 +299,13 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	if (mlx4_is_mfunc(dev))
 		dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
 
-	dev->caps.log_num_macs  = log_num_mac;
-	dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS;
+	if (mlx4_low_memory_profile()) {
+		dev->caps.log_num_macs  = MLX4_MIN_LOG_NUM_MAC;
+		dev->caps.log_num_vlans = MLX4_MIN_LOG_NUM_VLANS;
+	} else {
+		dev->caps.log_num_macs  = log_num_mac;
+		dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS;
+	}
 
 	for (i = 1; i <= dev->caps.num_ports; ++i) {
 		dev->caps.port_type[i] = MLX4_PORT_TYPE_NONE;
@@ -1587,7 +1604,12 @@ static int mlx4_init_hca(struct mlx4_dev *dev)
 		if (mlx4_is_master(dev))
 			mlx4_parav_master_pf_caps(dev);
 
-		profile = default_profile;
+		if (mlx4_low_memory_profile()) {
+			mlx4_info(dev, "Running from within kdump kernel. Using low memory profile\n");
+			profile = low_mem_profile;
+		} else {
+			profile = default_profile;
+		}
 		if (dev->caps.steering_mode ==
 		    MLX4_STEERING_MODE_DEVICE_MANAGED)
 			profile.num_mcg = MLX4_FS_NUM_MCG;
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index fa660aedb822..e15b1544ea83 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -1254,4 +1254,11 @@ int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port);
 int mlx4_vf_get_enable_smi_admin(struct mlx4_dev *dev, int slave, int port);
 int mlx4_vf_set_enable_smi_admin(struct mlx4_dev *dev, int slave, int port,
 				 int enable);
+
+/* Returns true if running in low memory profile (kdump kernel) */
+static inline bool mlx4_low_memory_profile(void)
+{
+	return reset_devices;
+}
+
 #endif /* MLX4_DEVICE_H */
-- 
cgit v1.2.3-59-g8ed1b


From 14c8a620ba436511b1347c592633befa49535176 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Wed, 23 Jul 2014 10:47:49 +0200
Subject: gpio: drop retval check enforcing from gpiochip_remove()

As we start to decomission the return value from gpiochip_remove()
the compilers emit warnings due to the function being tagged
__must_check. So drop this until we remove the return value
altogether.

Cc: Abdoulaye Berthe <berthe.ab@gmail.com>
Suggested-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/gpio/driver.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 573e4f3243d0..ca3024554a2d 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -141,7 +141,7 @@ extern const char *gpiochip_is_requested(struct gpio_chip *chip,
 
 /* add/remove chips */
 extern int gpiochip_add(struct gpio_chip *chip);
-extern int __must_check gpiochip_remove(struct gpio_chip *chip);
+extern int gpiochip_remove(struct gpio_chip *chip);
 extern struct gpio_chip *gpiochip_find(void *data,
 			      int (*match)(struct gpio_chip *chip, void *data));
 
-- 
cgit v1.2.3-59-g8ed1b


From efd342fb0031a17758571dce42e3f373d94e2fec Mon Sep 17 00:00:00 2001
From: Matthias Brugger <matthias.bgg@gmail.com>
Date: Fri, 18 Jul 2014 11:36:39 +0200
Subject: of: Provide a function to request and map memory

A call to of_iomap does not request the memory region. This patch adds the
function of_io_request_and_map which requests the memory region before
mapping it.

Signed-off-by: Matthias Brugger <matthias.bgg@gmail.com>
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Suggested-by: Rob Herring <robh@kernel.org>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/of/address.c       | 36 ++++++++++++++++++++++++++++++++++++
 include/linux/io.h         |  2 ++
 include/linux/of_address.h | 11 +++++++++++
 lib/devres.c               |  2 --
 4 files changed, 49 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/address.c b/drivers/of/address.c
index 5edfcb0da37d..e3718250d66e 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -702,6 +702,42 @@ void __iomem *of_iomap(struct device_node *np, int index)
 }
 EXPORT_SYMBOL(of_iomap);
 
+/*
+ * of_io_request_and_map - Requests a resource and maps the memory mapped IO
+ *			   for a given device_node
+ * @device:	the device whose io range will be mapped
+ * @index:	index of the io range
+ * @name:	name of the resource
+ *
+ * Returns a pointer to the requested and mapped memory or an ERR_PTR() encoded
+ * error code on failure. Usage example:
+ *
+ *	base = of_io_request_and_map(node, 0, "foo");
+ *	if (IS_ERR(base))
+ *		return PTR_ERR(base);
+ */
+void __iomem *of_io_request_and_map(struct device_node *np, int index,
+					char *name)
+{
+	struct resource res;
+	void __iomem *mem;
+
+	if (of_address_to_resource(np, index, &res))
+		return IOMEM_ERR_PTR(-EINVAL);
+
+	if (!request_mem_region(res.start, resource_size(&res), name))
+		return IOMEM_ERR_PTR(-EBUSY);
+
+	mem = ioremap(res.start, resource_size(&res));
+	if (!mem) {
+		release_mem_region(res.start, resource_size(&res));
+		return IOMEM_ERR_PTR(-ENOMEM);
+	}
+
+	return mem;
+}
+EXPORT_SYMBOL(of_io_request_and_map);
+
 /**
  * of_dma_get_range - Get DMA range info
  * @np:		device node to get DMA range info
diff --git a/include/linux/io.h b/include/linux/io.h
index b76e6e545806..d5fc9b8d8b03 100644
--- a/include/linux/io.h
+++ b/include/linux/io.h
@@ -58,6 +58,8 @@ static inline void devm_ioport_unmap(struct device *dev, void __iomem *addr)
 }
 #endif
 
+#define IOMEM_ERR_PTR(err) (__force void __iomem *)ERR_PTR(err)
+
 void __iomem *devm_ioremap(struct device *dev, resource_size_t offset,
 			    unsigned long size);
 void __iomem *devm_ioremap_nocache(struct device *dev, resource_size_t offset,
diff --git a/include/linux/of_address.h b/include/linux/of_address.h
index c13b8782a4eb..fb7b7221e063 100644
--- a/include/linux/of_address.h
+++ b/include/linux/of_address.h
@@ -109,7 +109,12 @@ static inline bool of_dma_is_coherent(struct device_node *np)
 extern int of_address_to_resource(struct device_node *dev, int index,
 				  struct resource *r);
 void __iomem *of_iomap(struct device_node *node, int index);
+void __iomem *of_io_request_and_map(struct device_node *device,
+					int index, char *name);
 #else
+
+#include <linux/io.h>
+
 static inline int of_address_to_resource(struct device_node *dev, int index,
 					 struct resource *r)
 {
@@ -120,6 +125,12 @@ static inline void __iomem *of_iomap(struct device_node *device, int index)
 {
 	return NULL;
 }
+
+static inline void __iomem *of_io_request_and_map(struct device_node *device,
+					int index, char *name)
+{
+	return IOMEM_ERR_PTR(-EINVAL);
+}
 #endif
 
 #if defined(CONFIG_OF_ADDRESS) && defined(CONFIG_PCI)
diff --git a/lib/devres.c b/lib/devres.c
index f562bf6ff71d..bb632484a860 100644
--- a/lib/devres.c
+++ b/lib/devres.c
@@ -86,8 +86,6 @@ void devm_iounmap(struct device *dev, void __iomem *addr)
 }
 EXPORT_SYMBOL(devm_iounmap);
 
-#define IOMEM_ERR_PTR(err) (__force void __iomem *)ERR_PTR(err)
-
 /**
  * devm_ioremap_resource() - check, request region, and ioremap resource
  * @dev: generic device to handle the resource for
-- 
cgit v1.2.3-59-g8ed1b


From 8594d83261d14754288ef00993f0971131ff08eb Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Fri, 11 Jul 2014 14:19:32 +0800
Subject: iommu/vt-d: Simplify include/linux/dmar.h

Simplify include/linux/dmar.h a bit based on the fact that
both CONFIG_INTEL_IOMMU and CONFIG_IRQ_REMAP select CONFIG_DMAR_TABLE.

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/dmar.h | 50 ++++++++++++++++++--------------------------------
 1 file changed, 18 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index 23c8db129560..1deece46a0ca 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -114,22 +114,30 @@ extern int dmar_remove_dev_scope(struct dmar_pci_notify_info *info,
 /* Intel IOMMU detection */
 extern int detect_intel_iommu(void);
 extern int enable_drhd_fault_handling(void);
-#else
-struct dmar_pci_notify_info;
-static inline int detect_intel_iommu(void)
+
+#ifdef CONFIG_INTEL_IOMMU
+extern int iommu_detected, no_iommu;
+extern int intel_iommu_init(void);
+extern int dmar_parse_one_rmrr(struct acpi_dmar_header *header);
+extern int dmar_parse_one_atsr(struct acpi_dmar_header *header);
+extern int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info);
+#else /* !CONFIG_INTEL_IOMMU: */
+static inline int intel_iommu_init(void) { return -ENODEV; }
+static inline int dmar_parse_one_rmrr(struct acpi_dmar_header *header)
 {
-	return -ENODEV;
+	return 0;
 }
-
-static inline int dmar_table_init(void)
+static inline int dmar_parse_one_atsr(struct acpi_dmar_header *header)
 {
-	return -ENODEV;
+	return 0;
 }
-static inline int enable_drhd_fault_handling(void)
+static inline int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
 {
-	return -1;
+	return 0;
 }
-#endif /* !CONFIG_DMAR_TABLE */
+#endif /* CONFIG_INTEL_IOMMU */
+
+#endif /* CONFIG_DMAR_TABLE */
 
 struct irte {
 	union {
@@ -177,26 +185,4 @@ extern int dmar_set_interrupt(struct intel_iommu *iommu);
 extern irqreturn_t dmar_fault(int irq, void *dev_id);
 extern int arch_setup_dmar_msi(unsigned int irq);
 
-#ifdef CONFIG_INTEL_IOMMU
-extern int iommu_detected, no_iommu;
-extern int dmar_parse_one_rmrr(struct acpi_dmar_header *header);
-extern int dmar_parse_one_atsr(struct acpi_dmar_header *header);
-extern int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info);
-extern int intel_iommu_init(void);
-#else /* !CONFIG_INTEL_IOMMU: */
-static inline int intel_iommu_init(void) { return -ENODEV; }
-static inline int dmar_parse_one_rmrr(struct acpi_dmar_header *header)
-{
-	return 0;
-}
-static inline int dmar_parse_one_atsr(struct acpi_dmar_header *header)
-{
-	return 0;
-}
-static inline int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
-{
-	return 0;
-}
-#endif /* CONFIG_INTEL_IOMMU */
-
 #endif /* __DMAR_H__ */
-- 
cgit v1.2.3-59-g8ed1b


From a156ef99e874f3701367cc192aa604bcf8c0a236 Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Fri, 11 Jul 2014 14:19:36 +0800
Subject: iommu/vt-d: Introduce helper function iova_size() to improve code
 readability

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 7 +++----
 include/linux/iova.h        | 5 +++++
 2 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index c9e65edaa2ad..cd1ba24c766a 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -3101,10 +3101,10 @@ static void flush_unmaps(void)
 			/* On real hardware multiple invalidations are expensive */
 			if (cap_caching_mode(iommu->cap))
 				iommu_flush_iotlb_psi(iommu, domain->id,
-					iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1,
+					iova->pfn_lo, iova_size(iova),
 					!deferred_flush[i].freelist[j], 0);
 			else {
-				mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
+				mask = ilog2(mm_to_dma_pfn(iova_size(iova)));
 				iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
 						(uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
 			}
@@ -3905,8 +3905,7 @@ static int intel_iommu_memory_notifier(struct notifier_block *nb,
 			rcu_read_lock();
 			for_each_active_iommu(iommu, drhd)
 				iommu_flush_iotlb_psi(iommu, si_domain->id,
-					iova->pfn_lo,
-					iova->pfn_hi - iova->pfn_lo + 1,
+					iova->pfn_lo, iova_size(iova),
 					!freelist, 0);
 			rcu_read_unlock();
 			dma_free_pagelist(freelist);
diff --git a/include/linux/iova.h b/include/linux/iova.h
index 3277f4711349..19e81d5ccb6d 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -34,6 +34,11 @@ struct iova_domain {
 	unsigned long	dma_32bit_pfn;
 };
 
+static inline unsigned long iova_size(struct iova *iova)
+{
+	return iova->pfn_hi - iova->pfn_lo + 1;
+}
+
 struct iova *alloc_iova_mem(void);
 void free_iova_mem(struct iova *iova);
 void free_iova(struct iova_domain *iovad, unsigned long pfn);
-- 
cgit v1.2.3-59-g8ed1b


From 1a112d10f03e83fb3a2fdc4c9165865dec8a3ca6 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 23 Jul 2014 09:05:27 -0400
Subject: libata: introduce ata_host->n_tags to avoid oops on SAS controllers

1871ee134b73 ("libata: support the ata host which implements a queue
depth less than 32") directly used ata_port->scsi_host->can_queue from
ata_qc_new() to determine the number of tags supported by the host;
unfortunately, SAS controllers doing SATA don't initialize ->scsi_host
leading to the following oops.

 BUG: unable to handle kernel NULL pointer dereference at 0000000000000058
 IP: [<ffffffff814e0618>] ata_qc_new_init+0x188/0x1b0
 PGD 0
 Oops: 0002 [#1] SMP
 Modules linked in: isci libsas scsi_transport_sas mgag200 drm_kms_helper ttm
 CPU: 1 PID: 518 Comm: udevd Not tainted 3.16.0-rc6+ #62
 Hardware name: Intel Corporation S2600CO/S2600CO, BIOS SE5C600.86B.02.02.0002.122320131210 12/23/2013
 task: ffff880c1a00b280 ti: ffff88061a000000 task.ti: ffff88061a000000
 RIP: 0010:[<ffffffff814e0618>]  [<ffffffff814e0618>] ata_qc_new_init+0x188/0x1b0
 RSP: 0018:ffff88061a003ae8  EFLAGS: 00010012
 RAX: 0000000000000001 RBX: ffff88000241ca80 RCX: 00000000000000fa
 RDX: 0000000000000020 RSI: 0000000000000020 RDI: ffff8806194aa298
 RBP: ffff88061a003ae8 R08: ffff8806194a8000 R09: 0000000000000000
 R10: 0000000000000000 R11: ffff88000241ca80 R12: ffff88061ad58200
 R13: ffff8806194aa298 R14: ffffffff814e67a0 R15: ffff8806194a8000
 FS:  00007f3ad7fe3840(0000) GS:ffff880627620000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 0000000000000058 CR3: 000000061a118000 CR4: 00000000001407e0
 Stack:
  ffff88061a003b20 ffffffff814e96e1 ffff88000241ca80 ffff88061ad58200
  ffff8800b6bf6000 ffff880c1c988000 ffff880619903850 ffff88061a003b68
  ffffffffa0056ce1 ffff88061a003b48 0000000013d6e6f8 ffff88000241ca80
 Call Trace:
  [<ffffffff814e96e1>] ata_sas_queuecmd+0xa1/0x430
  [<ffffffffa0056ce1>] sas_queuecommand+0x191/0x220 [libsas]
  [<ffffffff8149afee>] scsi_dispatch_cmd+0x10e/0x300
  [<ffffffff814a3bc5>] scsi_request_fn+0x2f5/0x550
  [<ffffffff81317613>] __blk_run_queue+0x33/0x40
  [<ffffffff8131781a>] queue_unplugged+0x2a/0x90
  [<ffffffff8131ceb4>] blk_flush_plug_list+0x1b4/0x210
  [<ffffffff8131d274>] blk_finish_plug+0x14/0x50
  [<ffffffff8117eaa8>] __do_page_cache_readahead+0x198/0x1f0
  [<ffffffff8117ee21>] force_page_cache_readahead+0x31/0x50
  [<ffffffff8117ee7e>] page_cache_sync_readahead+0x3e/0x50
  [<ffffffff81172ac6>] generic_file_read_iter+0x496/0x5a0
  [<ffffffff81219897>] blkdev_read_iter+0x37/0x40
  [<ffffffff811e307e>] new_sync_read+0x7e/0xb0
  [<ffffffff811e3734>] vfs_read+0x94/0x170
  [<ffffffff811e43c6>] SyS_read+0x46/0xb0
  [<ffffffff811e33d1>] ? SyS_lseek+0x91/0xb0
  [<ffffffff8171ee29>] system_call_fastpath+0x16/0x1b
 Code: 00 00 00 88 50 29 83 7f 08 01 19 d2 83 e2 f0 83 ea 50 88 50 34 c6 81 1d 02 00 00 40 c6 81 17 02 00 00 00 5d c3 66 0f 1f 44 00 00 <89> 14 25 58 00 00 00

Fix it by introducing ata_host->n_tags which is initialized to
ATA_MAX_QUEUE - 1 in ata_host_init() for SAS controllers and set to
scsi_host_template->can_queue in ata_host_register() for !SAS ones.
As SAS hosts are never registered, this will give them the same
ATA_MAX_QUEUE - 1 as before.  Note that we can't use
scsi_host->can_queue directly for SAS hosts anyway as they can go
higher than the libata maximum.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Mike Qiu <qiudayu@linux.vnet.ibm.com>
Reported-by: Jesse Brandeburg <jesse.brandeburg@gmail.com>
Reported-by: Peter Hurley <peter@hurleysoftware.com>
Reported-by: Peter Zijlstra <peterz@infradead.org>
Tested-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Fixes: 1871ee134b73 ("libata: support the ata host which implements a queue depth less than 32")
Cc: Kevin Hao <haokexin@gmail.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: stable@vger.kernel.org
---
 drivers/ata/libata-core.c | 16 ++++------------
 include/linux/libata.h    |  1 +
 2 files changed, 5 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index d19c37a7abc9..677c0c1b03bd 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4798,9 +4798,8 @@ void swap_buf_le16(u16 *buf, unsigned int buf_words)
 static struct ata_queued_cmd *ata_qc_new(struct ata_port *ap)
 {
 	struct ata_queued_cmd *qc = NULL;
-	unsigned int i, tag, max_queue;
-
-	max_queue = ap->scsi_host->can_queue;
+	unsigned int max_queue = ap->host->n_tags;
+	unsigned int i, tag;
 
 	/* no command while frozen */
 	if (unlikely(ap->pflags & ATA_PFLAG_FROZEN))
@@ -6094,6 +6093,7 @@ void ata_host_init(struct ata_host *host, struct device *dev,
 {
 	spin_lock_init(&host->lock);
 	mutex_init(&host->eh_mutex);
+	host->n_tags = ATA_MAX_QUEUE - 1;
 	host->dev = dev;
 	host->ops = ops;
 }
@@ -6175,15 +6175,7 @@ int ata_host_register(struct ata_host *host, struct scsi_host_template *sht)
 {
 	int i, rc;
 
-	/*
-	 * The max queue supported by hardware must not be greater than
-	 * ATA_MAX_QUEUE.
-	 */
-	if (sht->can_queue > ATA_MAX_QUEUE) {
-		dev_err(host->dev, "BUG: the hardware max queue is too large\n");
-		WARN_ON(1);
-		return -EINVAL;
-	}
+	host->n_tags = clamp(sht->can_queue, 1, ATA_MAX_QUEUE - 1);
 
 	/* host must have been started */
 	if (!(host->flags & ATA_HOST_STARTED)) {
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 5ab4e3a76721..92abb497ab14 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -593,6 +593,7 @@ struct ata_host {
 	struct device 		*dev;
 	void __iomem * const	*iomap;
 	unsigned int		n_ports;
+	unsigned int		n_tags;			/* nr of NCQ tags */
 	void			*private_data;
 	struct ata_port_operations *ops;
 	unsigned long		flags;
-- 
cgit v1.2.3-59-g8ed1b


From ec4c4d877becf1c224f45347f4fc0016765e00d0 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 15 Jul 2014 09:58:06 +0200
Subject: video: fix up versatile CLCD helper move

commit 11c32d7b6274cb0f ("video: move Versatile CLCD helpers")
moved files out of the plat-versatile directory but in the process
got a few of the dependencies wrong:

- If CONFIG_FB is not set, the file no longer gets built, resulting
  in a link error
- If CONFIG_FB or CONFIG_FB_ARMCLCD are disabled, we also get a
  Kconfig warning for incorrect dependencies due to the symbol
  being 'select'ed from the platform Kconfig.
- When the file is not built, we also get a link error for missing
  symbols.

This patch should fix all three, by removing the 'select' statements,
changing the Kconfig description of the symbol to be enabled in
exactly the right configurations, and adding inline stub functions
for the case when the framebuffer driver is disabled.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm/Kconfig                                   |  2 --
 arch/arm/mach-vexpress/Kconfig                     |  1 -
 drivers/video/fbdev/Kconfig                        |  6 ++----
 include/linux/platform_data/video-clcd-versatile.h | 18 ++++++++++++++++++
 4 files changed, 20 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index c32064de77d8..11f18a04c066 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -332,7 +332,6 @@ config ARCH_REALVIEW
 	select ICST
 	select NEED_MACH_MEMORY_H
 	select PLAT_VERSATILE
-	select PLAT_VERSATILE_CLCD
 	help
 	  This enables support for ARM Ltd RealView boards.
 
@@ -347,7 +346,6 @@ config ARCH_VERSATILE
 	select HAVE_MACH_CLKDEV
 	select ICST
 	select PLAT_VERSATILE
-	select PLAT_VERSATILE_CLCD
 	select PLAT_VERSATILE_CLOCK
 	select VERSATILE_FPGA_IRQ
 	help
diff --git a/arch/arm/mach-vexpress/Kconfig b/arch/arm/mach-vexpress/Kconfig
index d8b9330f896a..e9166dfc4756 100644
--- a/arch/arm/mach-vexpress/Kconfig
+++ b/arch/arm/mach-vexpress/Kconfig
@@ -13,7 +13,6 @@ menuconfig ARCH_VEXPRESS
 	select ICST
 	select NO_IOPORT_MAP
 	select PLAT_VERSATILE
-	select PLAT_VERSATILE_CLCD
 	select POWER_RESET
 	select POWER_RESET_VEXPRESS
 	select POWER_SUPPLY
diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig
index 92026d31bb48..bdf463072247 100644
--- a/drivers/video/fbdev/Kconfig
+++ b/drivers/video/fbdev/Kconfig
@@ -292,10 +292,8 @@ config FB_ARMCLCD
 
 # Helper logic selected only by the ARM Versatile platform family.
 config PLAT_VERSATILE_CLCD
-	depends on FB_ARMCLCD
-	depends on (PLAT_VERSATILE || ARCH_INTEGRATOR)
-	default y
-	bool
+	def_bool ARCH_VERSATILE || ARCH_REALVIEW || ARCH_VEXPRESS
+	depends on FB_ARMCLCD && FB=y
 
 config FB_ACORN
 	bool "Acorn VIDC support"
diff --git a/include/linux/platform_data/video-clcd-versatile.h b/include/linux/platform_data/video-clcd-versatile.h
index 6bb6a1d2019b..09ccf182af4d 100644
--- a/include/linux/platform_data/video-clcd-versatile.h
+++ b/include/linux/platform_data/video-clcd-versatile.h
@@ -1,9 +1,27 @@
 #ifndef PLAT_CLCD_H
 #define PLAT_CLCD_H
 
+#ifdef CONFIG_PLAT_VERSATILE_CLCD
 struct clcd_panel *versatile_clcd_get_panel(const char *);
 int versatile_clcd_setup_dma(struct clcd_fb *, unsigned long);
 int versatile_clcd_mmap_dma(struct clcd_fb *, struct vm_area_struct *);
 void versatile_clcd_remove_dma(struct clcd_fb *);
+#else
+static inline struct clcd_panel *versatile_clcd_get_panel(const char *s)
+{
+	return NULL;
+}
+static inline int versatile_clcd_setup_dma(struct clcd_fb *fb, unsigned long framesize)
+{
+	return -ENODEV;
+}
+static inline int versatile_clcd_mmap_dma(struct clcd_fb *fb, struct vm_area_struct *vm)
+{
+	return -ENODEV;
+}
+static inline void versatile_clcd_remove_dma(struct clcd_fb *fb)
+{
+}
+#endif
 
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 1bd6b601fe196b6fbce2c93536ce0f3f53577cec Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@nvidia.com>
Date: Tue, 22 Jul 2014 16:17:41 +0900
Subject: gpio: make gpiochip_get_desc() gpiolib-private

As GPIO descriptors are not going to remain unique anymore, having this
function public is not safe. Restrain its use to gpiolib since we have
no user outside of it.

Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib-of.c   | 2 +-
 drivers/gpio/gpiolib.c      | 1 -
 drivers/gpio/gpiolib.h      | 2 ++
 include/linux/gpio/driver.h | 3 ---
 4 files changed, 3 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index 3e2fae205bee..7cfdc2278905 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -23,7 +23,7 @@
 #include <linux/pinctrl/pinctrl.h>
 #include <linux/slab.h>
 
-struct gpio_desc;
+#include "gpiolib.h"
 
 /* Private data structure for of_gpiochip_find_and_xlate */
 struct gg_data {
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index c5509359ba88..38d176e31379 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -82,7 +82,6 @@ struct gpio_desc *gpiochip_get_desc(struct gpio_chip *chip,
 
 	return &chip->desc[hwnum];
 }
-EXPORT_SYMBOL_GPL(gpiochip_get_desc);
 
 /**
  * Convert a GPIO descriptor to the integer namespace.
diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h
index 98020c393eb3..acbb9335f08c 100644
--- a/drivers/gpio/gpiolib.h
+++ b/drivers/gpio/gpiolib.h
@@ -51,6 +51,8 @@ void gpiochip_free_own_desc(struct gpio_desc *desc);
 struct gpio_desc *of_get_named_gpiod_flags(struct device_node *np,
 		   const char *list_name, int index, enum of_gpio_flags *flags);
 
+struct gpio_desc *gpiochip_get_desc(struct gpio_chip *chip, u16 hwnum);
+
 extern struct spinlock gpio_lock;
 extern struct list_head gpio_chips;
 
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index ca3024554a2d..88f92dfae545 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -151,9 +151,6 @@ void gpiod_unlock_as_irq(struct gpio_desc *desc);
 
 struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc);
 
-struct gpio_desc *gpiochip_get_desc(struct gpio_chip *chip,
-				    u16 hwnum);
-
 enum gpio_lookup_flags {
 	GPIO_ACTIVE_HIGH = (0 << 0),
 	GPIO_ACTIVE_LOW = (1 << 0),
-- 
cgit v1.2.3-59-g8ed1b


From d74be6dfea1b96cfb4bd79d9254fa9d21ed5f131 Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@nvidia.com>
Date: Tue, 22 Jul 2014 16:17:42 +0900
Subject: gpio: remove gpiod_lock/unlock_as_irq()

gpio_lock/unlock_as_irq() are working with (chip, offset) arguments and
are thus not using the old integer namespace. Therefore, there is no
reason to have gpiod variants of these functions working with
descriptors, especially since the (chip, offset) tuple is more suitable
to the users of these functions (GPIO drivers, whereas GPIO descriptors
are targeted at GPIO consumers).

Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/gpio/driver.txt |  4 ++--
 drivers/gpio/gpiolib-acpi.c   |  6 +++---
 drivers/gpio/gpiolib-legacy.c | 12 ------------
 drivers/gpio/gpiolib-sysfs.c  |  4 ++--
 drivers/gpio/gpiolib.c        | 30 ++++++++++++++++--------------
 include/asm-generic/gpio.h    |  3 ---
 include/linux/gpio/driver.h   |  4 ++--
 7 files changed, 25 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/gpio/driver.txt b/Documentation/gpio/driver.txt
index fa9a0a8b3734..224dbbcd1804 100644
--- a/Documentation/gpio/driver.txt
+++ b/Documentation/gpio/driver.txt
@@ -157,12 +157,12 @@ Locking IRQ usage
 Input GPIOs can be used as IRQ signals. When this happens, a driver is requested
 to mark the GPIO as being used as an IRQ:
 
-	int gpiod_lock_as_irq(struct gpio_desc *desc)
+	int gpio_lock_as_irq(struct gpio_chip *chip, unsigned int offset)
 
 This will prevent the use of non-irq related GPIO APIs until the GPIO IRQ lock
 is released:
 
-	void gpiod_unlock_as_irq(struct gpio_desc *desc)
+	void gpio_unlock_as_irq(struct gpio_chip *chip, unsigned int offset)
 
 When implementing an irqchip inside a GPIO driver, these two functions should
 typically be called in the .startup() and .shutdown() callbacks from the
diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index 4a987917c186..d2e8600df02c 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -157,7 +157,7 @@ static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares,
 
 	gpiod_direction_input(desc);
 
-	ret = gpiod_lock_as_irq(desc);
+	ret = gpio_lock_as_irq(chip, pin);
 	if (ret) {
 		dev_err(chip->dev, "Failed to lock GPIO as interrupt\n");
 		goto fail_free_desc;
@@ -212,7 +212,7 @@ static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares,
 fail_free_event:
 	kfree(event);
 fail_unlock_irq:
-	gpiod_unlock_as_irq(desc);
+	gpio_unlock_as_irq(chip, pin);
 fail_free_desc:
 	gpiochip_free_own_desc(desc);
 
@@ -263,7 +263,7 @@ static void acpi_gpiochip_free_interrupts(struct acpi_gpio_chip *acpi_gpio)
 		desc = gpiochip_get_desc(chip, event->pin);
 		if (WARN_ON(IS_ERR(desc)))
 			continue;
-		gpiod_unlock_as_irq(desc);
+		gpio_unlock_as_irq(chip, event->pin);
 		gpiochip_free_own_desc(desc);
 		list_del(&event->node);
 		kfree(event);
diff --git a/drivers/gpio/gpiolib-legacy.c b/drivers/gpio/gpiolib-legacy.c
index c684d94cdbb4..078ae6c2df79 100644
--- a/drivers/gpio/gpiolib-legacy.c
+++ b/drivers/gpio/gpiolib-legacy.c
@@ -100,15 +100,3 @@ void gpio_free_array(const struct gpio *array, size_t num)
 		gpio_free((array++)->gpio);
 }
 EXPORT_SYMBOL_GPL(gpio_free_array);
-
-int gpio_lock_as_irq(struct gpio_chip *chip, unsigned int offset)
-{
-	return gpiod_lock_as_irq(gpiochip_get_desc(chip, offset));
-}
-EXPORT_SYMBOL_GPL(gpio_lock_as_irq);
-
-void gpio_unlock_as_irq(struct gpio_chip *chip, unsigned int offset)
-{
-	return gpiod_unlock_as_irq(gpiochip_get_desc(chip, offset));
-}
-EXPORT_SYMBOL_GPL(gpio_unlock_as_irq);
diff --git a/drivers/gpio/gpiolib-sysfs.c b/drivers/gpio/gpiolib-sysfs.c
index f150aa288fa1..be45a9283c28 100644
--- a/drivers/gpio/gpiolib-sysfs.c
+++ b/drivers/gpio/gpiolib-sysfs.c
@@ -161,7 +161,7 @@ static int gpio_setup_irq(struct gpio_desc *desc, struct device *dev,
 	desc->flags &= ~GPIO_TRIGGER_MASK;
 
 	if (!gpio_flags) {
-		gpiod_unlock_as_irq(desc);
+		gpio_unlock_as_irq(desc->chip, gpio_chip_hwgpio(desc));
 		ret = 0;
 		goto free_id;
 	}
@@ -200,7 +200,7 @@ static int gpio_setup_irq(struct gpio_desc *desc, struct device *dev,
 	if (ret < 0)
 		goto free_id;
 
-	ret = gpiod_lock_as_irq(desc);
+	ret = gpio_lock_as_irq(desc->chip, gpio_chip_hwgpio(desc));
 	if (ret < 0) {
 		gpiod_warn(desc, "failed to flag the GPIO for IRQ\n");
 		goto free_id;
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 38d176e31379..7582207c92e7 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1428,44 +1428,46 @@ int gpiod_to_irq(const struct gpio_desc *desc)
 EXPORT_SYMBOL_GPL(gpiod_to_irq);
 
 /**
- * gpiod_lock_as_irq() - lock a GPIO to be used as IRQ
- * @gpio: the GPIO line to lock as used for IRQ
+ * gpio_lock_as_irq() - lock a GPIO to be used as IRQ
+ * @chip: the chip the GPIO to lock belongs to
+ * @offset: the offset of the GPIO to lock as IRQ
  *
  * This is used directly by GPIO drivers that want to lock down
  * a certain GPIO line to be used for IRQs.
  */
-int gpiod_lock_as_irq(struct gpio_desc *desc)
+int gpio_lock_as_irq(struct gpio_chip *chip, unsigned int offset)
 {
-	if (!desc)
+	if (offset >= chip->ngpio)
 		return -EINVAL;
 
-	if (test_bit(FLAG_IS_OUT, &desc->flags)) {
-		gpiod_err(desc,
+	if (test_bit(FLAG_IS_OUT, &chip->desc[offset].flags)) {
+		chip_err(chip,
 			  "%s: tried to flag a GPIO set as output for IRQ\n",
 			  __func__);
 		return -EIO;
 	}
 
-	set_bit(FLAG_USED_AS_IRQ, &desc->flags);
+	set_bit(FLAG_USED_AS_IRQ, &chip->desc[offset].flags);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(gpiod_lock_as_irq);
+EXPORT_SYMBOL_GPL(gpio_lock_as_irq);
 
 /**
- * gpiod_unlock_as_irq() - unlock a GPIO used as IRQ
- * @gpio: the GPIO line to unlock from IRQ usage
+ * gpio_unlock_as_irq() - unlock a GPIO used as IRQ
+ * @chip: the chip the GPIO to lock belongs to
+ * @offset: the offset of the GPIO to lock as IRQ
  *
  * This is used directly by GPIO drivers that want to indicate
  * that a certain GPIO is no longer used exclusively for IRQ.
  */
-void gpiod_unlock_as_irq(struct gpio_desc *desc)
+void gpio_unlock_as_irq(struct gpio_chip *chip, unsigned int offset)
 {
-	if (!desc)
+	if (offset >= chip->ngpio)
 		return;
 
-	clear_bit(FLAG_USED_AS_IRQ, &desc->flags);
+	clear_bit(FLAG_USED_AS_IRQ, &chip->desc[offset].flags);
 }
-EXPORT_SYMBOL_GPL(gpiod_unlock_as_irq);
+EXPORT_SYMBOL_GPL(gpio_unlock_as_irq);
 
 /**
  * gpiod_get_raw_value_cansleep() - return a gpio's raw value
diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h
index 23e364538ab5..c1d4105e1c1d 100644
--- a/include/asm-generic/gpio.h
+++ b/include/asm-generic/gpio.h
@@ -110,9 +110,6 @@ static inline int __gpio_to_irq(unsigned gpio)
 	return gpiod_to_irq(gpio_to_desc(gpio));
 }
 
-extern int gpio_lock_as_irq(struct gpio_chip *chip, unsigned int offset);
-extern void gpio_unlock_as_irq(struct gpio_chip *chip, unsigned int offset);
-
 extern int gpio_request_one(unsigned gpio, unsigned long flags, const char *label);
 extern int gpio_request_array(const struct gpio *array, size_t num);
 extern void gpio_free_array(const struct gpio *array, size_t num);
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 88f92dfae545..c66c91682d9e 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -146,8 +146,8 @@ extern struct gpio_chip *gpiochip_find(void *data,
 			      int (*match)(struct gpio_chip *chip, void *data));
 
 /* lock/unlock as IRQ */
-int gpiod_lock_as_irq(struct gpio_desc *desc);
-void gpiod_unlock_as_irq(struct gpio_desc *desc);
+int gpio_lock_as_irq(struct gpio_chip *chip, unsigned int offset);
+void gpio_unlock_as_irq(struct gpio_chip *chip, unsigned int offset);
 
 struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc);
 
-- 
cgit v1.2.3-59-g8ed1b


From 76f4108892d9a9e3408bba839914f97a54086a6f Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Wed, 16 Jul 2014 21:03:52 +0000
Subject: hrtimer: Cleanup hrtimer accessors to the timekepeing state

Rather then having two similar but totally different implementations
that provide timekeeping state to the hrtimer code, try to unify the
two implementations to be more simliar.

Thus this clarifies ktime_get_update_offsets to
ktime_get_update_offsets_now and changes get_xtime...  to
ktime_get_update_offsets_tick.

Signed-off-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/hrtimer.h   |  9 ++++++---
 include/linux/time.h      |  2 --
 kernel/time/hrtimer.c     | 19 ++++++++-----------
 kernel/time/timekeeping.c | 36 +++++++++++++++++++++++-------------
 4 files changed, 37 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index bb4ffff31c69..e84eb4f228cd 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -331,9 +331,12 @@ extern ktime_t ktime_get_real(void);
 extern ktime_t ktime_get_boottime(void);
 extern ktime_t ktime_get_monotonic_offset(void);
 extern ktime_t ktime_get_clocktai(void);
-extern ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot,
-					 ktime_t *offs_tai);
-
+extern ktime_t ktime_get_update_offsets_tick(ktime_t *offs_real,
+						ktime_t *offs_boot,
+						ktime_t *offs_tai);
+extern ktime_t ktime_get_update_offsets_now(ktime_t *offs_real,
+						ktime_t *offs_boot,
+						ktime_t *offs_tai);
 DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
 
 
diff --git a/include/linux/time.h b/include/linux/time.h
index d5d229b2e5af..f6d990d1c79a 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -133,8 +133,6 @@ unsigned long get_seconds(void);
 struct timespec current_kernel_time(void);
 struct timespec __current_kernel_time(void); /* does not take xtime_lock */
 struct timespec get_monotonic_coarse(void);
-void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,
-				struct timespec *wtom, struct timespec *sleep);
 void timekeeping_inject_sleeptime(struct timespec *delta);
 
 #define CURRENT_TIME		(current_kernel_time())
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 66a6dc1075ad..2f4ef8a1e5ff 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -114,21 +114,18 @@ static inline int hrtimer_clockid_to_base(clockid_t clock_id)
  */
 static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
 {
-	ktime_t xtim, mono, boot;
-	struct timespec xts, tom, slp;
-	s32 tai_offset;
+	ktime_t xtim, mono, boot, tai;
+	ktime_t off_real, off_boot, off_tai;
 
-	get_xtime_and_monotonic_and_sleep_offset(&xts, &tom, &slp);
-	tai_offset = timekeeping_get_tai_offset();
+	mono = ktime_get_update_offsets_tick(&off_real, &off_boot, &off_tai);
+	boot = ktime_add(mono, off_boot);
+	xtim = ktime_add(mono, off_real);
+	tai = ktime_add(xtim, off_tai);
 
-	xtim = timespec_to_ktime(xts);
-	mono = ktime_add(xtim, timespec_to_ktime(tom));
-	boot = ktime_add(mono, timespec_to_ktime(slp));
 	base->clock_base[HRTIMER_BASE_REALTIME].softirq_time = xtim;
 	base->clock_base[HRTIMER_BASE_MONOTONIC].softirq_time = mono;
 	base->clock_base[HRTIMER_BASE_BOOTTIME].softirq_time = boot;
-	base->clock_base[HRTIMER_BASE_TAI].softirq_time =
-				ktime_add(xtim,	ktime_set(tai_offset, 0));
+	base->clock_base[HRTIMER_BASE_TAI].softirq_time = tai;
 }
 
 /*
@@ -673,7 +670,7 @@ static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
 	ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset;
 	ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset;
 
-	return ktime_get_update_offsets(offs_real, offs_boot, offs_tai);
+	return ktime_get_update_offsets_now(offs_real, offs_boot, offs_tai);
 }
 
 /*
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 908861c58e62..b94fa3652aaa 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1581,29 +1581,39 @@ void do_timer(unsigned long ticks)
 }
 
 /**
- * get_xtime_and_monotonic_and_sleep_offset() - get xtime, wall_to_monotonic,
- *    and sleep offsets.
- * @xtim:	pointer to timespec to be set with xtime
- * @wtom:	pointer to timespec to be set with wall_to_monotonic
- * @sleep:	pointer to timespec to be set with time in suspend
+ * ktime_get_update_offsets_tick - hrtimer helper
+ * @offs_real:	pointer to storage for monotonic -> realtime offset
+ * @offs_boot:	pointer to storage for monotonic -> boottime offset
+ * @offs_tai:	pointer to storage for monotonic -> clock tai offset
+ *
+ * Returns monotonic time at last tick and various offsets
  */
-void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,
-				struct timespec *wtom, struct timespec *sleep)
+ktime_t ktime_get_update_offsets_tick(ktime_t *offs_real, ktime_t *offs_boot,
+							ktime_t *offs_tai)
 {
 	struct timekeeper *tk = &timekeeper;
-	unsigned long seq;
+	struct timespec ts;
+	ktime_t now;
+	unsigned int seq;
 
 	do {
 		seq = read_seqcount_begin(&timekeeper_seq);
-		*xtim = tk_xtime(tk);
-		*wtom = tk->wall_to_monotonic;
-		*sleep = tk->total_sleep_time;
+
+		ts = tk_xtime(tk);
+
+		*offs_real = tk->offs_real;
+		*offs_boot = tk->offs_boot;
+		*offs_tai = tk->offs_tai;
 	} while (read_seqcount_retry(&timekeeper_seq, seq));
+
+	now = ktime_set(ts.tv_sec, ts.tv_nsec);
+	now = ktime_sub(now, *offs_real);
+	return now;
 }
 
 #ifdef CONFIG_HIGH_RES_TIMERS
 /**
- * ktime_get_update_offsets - hrtimer helper
+ * ktime_get_update_offsets_now - hrtimer helper
  * @offs_real:	pointer to storage for monotonic -> realtime offset
  * @offs_boot:	pointer to storage for monotonic -> boottime offset
  * @offs_tai:	pointer to storage for monotonic -> clock tai offset
@@ -1611,7 +1621,7 @@ void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,
  * Returns current monotonic time and updates the offsets
  * Called from hrtimer_interrupt() or retrigger_next_event()
  */
-ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot,
+ktime_t ktime_get_update_offsets_now(ktime_t *offs_real, ktime_t *offs_boot,
 							ktime_t *offs_tai)
 {
 	struct timekeeper *tk = &timekeeper;
-- 
cgit v1.2.3-59-g8ed1b


From 24e4a8c3e8868874835b0f1ad6dd417341e99822 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Wed, 16 Jul 2014 21:03:53 +0000
Subject: ktime: Kill non-scalar ktime_t implementation for 2038

The non-scalar ktime_t implementation is basically a timespec
which has to be changed to support dates past 2038 on 32bit
systems.

This patch removes the non-scalar ktime_t implementation, forcing
the scalar s64 nanosecond version on all architectures.

This may have additional performance overhead on some 32bit
systems when converting between ktime_t and timespec structures,
however the majority of 32bit systems (arm and i386) were already
using scalar ktime_t, so no performance regressions will be seen
on those platforms.

On affected platforms, I'm open to finding optimizations, including
avoiding converting to timespecs where possible.

[ tglx: We can now cleanup the ktime_t.tv64 mess, but thats a
  different issue and we can throw a coccinelle script at it ]

Signed-off-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 arch/arm/Kconfig          |   1 -
 arch/hexagon/Kconfig      |   1 -
 arch/s390/Kconfig         |   1 -
 arch/x86/Kconfig          |   1 -
 include/linux/ktime.h     | 173 +---------------------------------------------
 include/linux/time.h      |  11 ++-
 kernel/time/Kconfig       |   4 --
 kernel/time/hrtimer.c     |  54 ---------------
 kernel/time/timekeeping.c |   7 +-
 9 files changed, 7 insertions(+), 246 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 05a71511ab3c..b9f6728331c8 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -64,7 +64,6 @@ config ARM
 	select HAVE_UID16
 	select HAVE_VIRT_CPU_ACCOUNTING_GEN
 	select IRQ_FORCED_THREADING
-	select KTIME_SCALAR
 	select MODULES_USE_ELF_REL
 	select NO_BOOTMEM
 	select OLD_SIGACTION
diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
index 0fd6138f6203..4dc89d1f9c48 100644
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig
@@ -23,7 +23,6 @@ config HEXAGON
 	select GENERIC_IOMAP
 	select GENERIC_SMP_IDLE_THREAD
 	select STACKTRACE_SUPPORT
-	select KTIME_SCALAR
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_CLOCKEVENTS_BROADCAST
 	select MODULES_USE_ELF_RELA
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index bb63499fc5d3..1afc7a686702 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -137,7 +137,6 @@ config S390
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_UID16 if 32BIT
 	select HAVE_VIRT_CPU_ACCOUNTING
-	select KTIME_SCALAR if 32BIT
 	select MODULES_USE_ELF_RELA
 	select NO_BOOTMEM
 	select OLD_SIGACTION
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index a8f749ef0fdc..7fa17b5ce668 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -111,7 +111,6 @@ config X86
 	select ARCH_CLOCKSOURCE_DATA
 	select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC)
 	select GENERIC_TIME_VSYSCALL
-	select KTIME_SCALAR if X86_32
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
 	select HAVE_CONTEXT_TRACKING if X86_64
diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index de9e46e6bcc9..fbc64f8481b7 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -27,43 +27,19 @@
 /*
  * ktime_t:
  *
- * On 64-bit CPUs a single 64-bit variable is used to store the hrtimers
+ * A single 64-bit variable is used to store the hrtimers
  * internal representation of time values in scalar nanoseconds. The
  * design plays out best on 64-bit CPUs, where most conversions are
  * NOPs and most arithmetic ktime_t operations are plain arithmetic
  * operations.
  *
- * On 32-bit CPUs an optimized representation of the timespec structure
- * is used to avoid expensive conversions from and to timespecs. The
- * endian-aware order of the tv struct members is chosen to allow
- * mathematical operations on the tv64 member of the union too, which
- * for certain operations produces better code.
- *
- * For architectures with efficient support for 64/32-bit conversions the
- * plain scalar nanosecond based representation can be selected by the
- * config switch CONFIG_KTIME_SCALAR.
  */
 union ktime {
 	s64	tv64;
-#if BITS_PER_LONG != 64 && !defined(CONFIG_KTIME_SCALAR)
-	struct {
-# ifdef __BIG_ENDIAN
-	s32	sec, nsec;
-# else
-	s32	nsec, sec;
-# endif
-	} tv;
-#endif
 };
 
 typedef union ktime ktime_t;		/* Kill this */
 
-/*
- * ktime_t definitions when using the 64-bit scalar representation:
- */
-
-#if (BITS_PER_LONG == 64) || defined(CONFIG_KTIME_SCALAR)
-
 /**
  * ktime_set - Set a ktime_t variable from a seconds/nanoseconds value
  * @secs:	seconds to set
@@ -123,153 +99,6 @@ static inline ktime_t timeval_to_ktime(struct timeval tv)
 /* Convert ktime_t to nanoseconds - NOP in the scalar storage format: */
 #define ktime_to_ns(kt)			((kt).tv64)
 
-#else	/* !((BITS_PER_LONG == 64) || defined(CONFIG_KTIME_SCALAR)) */
-
-/*
- * Helper macros/inlines to get the ktime_t math right in the timespec
- * representation. The macros are sometimes ugly - their actual use is
- * pretty okay-ish, given the circumstances. We do all this for
- * performance reasons. The pure scalar nsec_t based code was nice and
- * simple, but created too many 64-bit / 32-bit conversions and divisions.
- *
- * Be especially aware that negative values are represented in a way
- * that the tv.sec field is negative and the tv.nsec field is greater
- * or equal to zero but less than nanoseconds per second. This is the
- * same representation which is used by timespecs.
- *
- *   tv.sec < 0 and 0 >= tv.nsec < NSEC_PER_SEC
- */
-
-/* Set a ktime_t variable to a value in sec/nsec representation: */
-static inline ktime_t ktime_set(const long secs, const unsigned long nsecs)
-{
-	return (ktime_t) { .tv = { .sec = secs, .nsec = nsecs } };
-}
-
-/**
- * ktime_sub - subtract two ktime_t variables
- * @lhs:	minuend
- * @rhs:	subtrahend
- *
- * Return: The remainder of the subtraction.
- */
-static inline ktime_t ktime_sub(const ktime_t lhs, const ktime_t rhs)
-{
-	ktime_t res;
-
-	res.tv64 = lhs.tv64 - rhs.tv64;
-	if (res.tv.nsec < 0)
-		res.tv.nsec += NSEC_PER_SEC;
-
-	return res;
-}
-
-/**
- * ktime_add - add two ktime_t variables
- * @add1:	addend1
- * @add2:	addend2
- *
- * Return: The sum of @add1 and @add2.
- */
-static inline ktime_t ktime_add(const ktime_t add1, const ktime_t add2)
-{
-	ktime_t res;
-
-	res.tv64 = add1.tv64 + add2.tv64;
-	/*
-	 * performance trick: the (u32) -NSEC gives 0x00000000Fxxxxxxx
-	 * so we subtract NSEC_PER_SEC and add 1 to the upper 32 bit.
-	 *
-	 * it's equivalent to:
-	 *   tv.nsec -= NSEC_PER_SEC
-	 *   tv.sec ++;
-	 */
-	if (res.tv.nsec >= NSEC_PER_SEC)
-		res.tv64 += (u32)-NSEC_PER_SEC;
-
-	return res;
-}
-
-/**
- * ktime_add_ns - Add a scalar nanoseconds value to a ktime_t variable
- * @kt:		addend
- * @nsec:	the scalar nsec value to add
- *
- * Return: The sum of @kt and @nsec in ktime_t format.
- */
-extern ktime_t ktime_add_ns(const ktime_t kt, u64 nsec);
-
-/**
- * ktime_sub_ns - Subtract a scalar nanoseconds value from a ktime_t variable
- * @kt:		minuend
- * @nsec:	the scalar nsec value to subtract
- *
- * Return: The subtraction of @nsec from @kt in ktime_t format.
- */
-extern ktime_t ktime_sub_ns(const ktime_t kt, u64 nsec);
-
-/**
- * timespec_to_ktime - convert a timespec to ktime_t format
- * @ts:		the timespec variable to convert
- *
- * Return: A ktime_t variable with the converted timespec value.
- */
-static inline ktime_t timespec_to_ktime(const struct timespec ts)
-{
-	return (ktime_t) { .tv = { .sec = (s32)ts.tv_sec,
-			   	   .nsec = (s32)ts.tv_nsec } };
-}
-
-/**
- * timeval_to_ktime - convert a timeval to ktime_t format
- * @tv:		the timeval variable to convert
- *
- * Return: A ktime_t variable with the converted timeval value.
- */
-static inline ktime_t timeval_to_ktime(const struct timeval tv)
-{
-	return (ktime_t) { .tv = { .sec = (s32)tv.tv_sec,
-				   .nsec = (s32)(tv.tv_usec *
-						 NSEC_PER_USEC) } };
-}
-
-/**
- * ktime_to_timespec - convert a ktime_t variable to timespec format
- * @kt:		the ktime_t variable to convert
- *
- * Return: The timespec representation of the ktime value.
- */
-static inline struct timespec ktime_to_timespec(const ktime_t kt)
-{
-	return (struct timespec) { .tv_sec = (time_t) kt.tv.sec,
-				   .tv_nsec = (long) kt.tv.nsec };
-}
-
-/**
- * ktime_to_timeval - convert a ktime_t variable to timeval format
- * @kt:		the ktime_t variable to convert
- *
- * Return: The timeval representation of the ktime value.
- */
-static inline struct timeval ktime_to_timeval(const ktime_t kt)
-{
-	return (struct timeval) {
-		.tv_sec = (time_t) kt.tv.sec,
-		.tv_usec = (suseconds_t) (kt.tv.nsec / NSEC_PER_USEC) };
-}
-
-/**
- * ktime_to_ns - convert a ktime_t variable to scalar nanoseconds
- * @kt:		the ktime_t variable to convert
- *
- * Return: The scalar nanoseconds representation of @kt.
- */
-static inline s64 ktime_to_ns(const ktime_t kt)
-{
-	return (s64) kt.tv.sec * NSEC_PER_SEC + kt.tv.nsec;
-}
-
-#endif	/* !((BITS_PER_LONG == 64) || defined(CONFIG_KTIME_SCALAR)) */
 
 /**
  * ktime_equal - Compares two ktime_t variables to see if they are equal
diff --git a/include/linux/time.h b/include/linux/time.h
index f6d990d1c79a..129f0bd36a8d 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -19,6 +19,10 @@ extern struct timezone sys_tz;
 
 #define TIME_T_MAX	(time_t)((1UL << ((sizeof(time_t) << 3) - 1)) - 1)
 
+/* Located here for timespec_valid_strict */
+#define KTIME_MAX			((s64)~((u64)1 << 63))
+#define KTIME_SEC_MAX			(KTIME_MAX / NSEC_PER_SEC)
+
 static inline int timespec_equal(const struct timespec *a,
                                  const struct timespec *b)
 {
@@ -84,13 +88,6 @@ static inline struct timespec timespec_sub(struct timespec lhs,
 	return ts_delta;
 }
 
-#define KTIME_MAX			((s64)~((u64)1 << 63))
-#if (BITS_PER_LONG == 64)
-# define KTIME_SEC_MAX			(KTIME_MAX / NSEC_PER_SEC)
-#else
-# define KTIME_SEC_MAX			LONG_MAX
-#endif
-
 /*
  * Returns true if the timespec is norm, false if denorm:
  */
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index f448513a45ed..feccfd888732 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -20,10 +20,6 @@ config GENERIC_TIME_VSYSCALL
 config GENERIC_TIME_VSYSCALL_OLD
 	bool
 
-# ktime_t scalar 64bit nsec representation
-config KTIME_SCALAR
-	bool
-
 # Old style timekeeping
 config ARCH_USES_GETTIMEOFFSET
 	bool
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 2f4ef8a1e5ff..19f211051c35 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -261,60 +261,6 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
  * too large for inlining:
  */
 #if BITS_PER_LONG < 64
-# ifndef CONFIG_KTIME_SCALAR
-/**
- * ktime_add_ns - Add a scalar nanoseconds value to a ktime_t variable
- * @kt:		addend
- * @nsec:	the scalar nsec value to add
- *
- * Returns the sum of kt and nsec in ktime_t format
- */
-ktime_t ktime_add_ns(const ktime_t kt, u64 nsec)
-{
-	ktime_t tmp;
-
-	if (likely(nsec < NSEC_PER_SEC)) {
-		tmp.tv64 = nsec;
-	} else {
-		unsigned long rem = do_div(nsec, NSEC_PER_SEC);
-
-		/* Make sure nsec fits into long */
-		if (unlikely(nsec > KTIME_SEC_MAX))
-			return (ktime_t){ .tv64 = KTIME_MAX };
-
-		tmp = ktime_set((long)nsec, rem);
-	}
-
-	return ktime_add(kt, tmp);
-}
-
-EXPORT_SYMBOL_GPL(ktime_add_ns);
-
-/**
- * ktime_sub_ns - Subtract a scalar nanoseconds value from a ktime_t variable
- * @kt:		minuend
- * @nsec:	the scalar nsec value to subtract
- *
- * Returns the subtraction of @nsec from @kt in ktime_t format
- */
-ktime_t ktime_sub_ns(const ktime_t kt, u64 nsec)
-{
-	ktime_t tmp;
-
-	if (likely(nsec < NSEC_PER_SEC)) {
-		tmp.tv64 = nsec;
-	} else {
-		unsigned long rem = do_div(nsec, NSEC_PER_SEC);
-
-		tmp = ktime_set((long)nsec, rem);
-	}
-
-	return ktime_sub(kt, tmp);
-}
-
-EXPORT_SYMBOL_GPL(ktime_sub_ns);
-# endif /* !CONFIG_KTIME_SCALAR */
-
 /*
  * Divide a ktime value by a nanosecond value
  */
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index b94fa3652aaa..cafef242d8f9 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -344,11 +344,8 @@ ktime_t ktime_get(void)
 		nsecs = timekeeping_get_ns(tk) + tk->wall_to_monotonic.tv_nsec;
 
 	} while (read_seqcount_retry(&timekeeper_seq, seq));
-	/*
-	 * Use ktime_set/ktime_add_ns to create a proper ktime on
-	 * 32-bit architectures without CONFIG_KTIME_SCALAR.
-	 */
-	return ktime_add_ns(ktime_set(secs, 0), nsecs);
+
+	return ktime_set(secs, nsecs);
 }
 EXPORT_SYMBOL_GPL(ktime_get);
 
-- 
cgit v1.2.3-59-g8ed1b


From 166afb64511eef08e13331b970c44fe91cea45ef Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 16 Jul 2014 21:03:55 +0000
Subject: ktime: Sanitize ktime_to_us/ms conversion

With the plain nanoseconds based ktime_t we can simply use
ktime_divns() instead of going through loops and hoops of
timespec/timeval conversion.

Reported-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/hrtimer.h |  6 ------
 include/linux/ktime.h   | 12 ++++++++----
 kernel/time/hrtimer.c   |  1 +
 3 files changed, 9 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index e84eb4f228cd..adf5056bd7b3 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -457,12 +457,6 @@ extern void hrtimer_run_pending(void);
 /* Bootup initialization: */
 extern void __init hrtimers_init(void);
 
-#if BITS_PER_LONG < 64
-extern u64 ktime_divns(const ktime_t kt, s64 div);
-#else /* BITS_PER_LONG < 64 */
-# define ktime_divns(kt, div)		(u64)((kt).tv64 / (div))
-#endif
-
 /* Show pending timers: */
 extern void sysrq_timer_list_show(void);
 
diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index fbc64f8481b7..74eaba9b3569 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -157,16 +157,20 @@ static inline bool ktime_before(const ktime_t cmp1, const ktime_t cmp2)
 	return ktime_compare(cmp1, cmp2) < 0;
 }
 
+#if BITS_PER_LONG < 64
+extern u64 ktime_divns(const ktime_t kt, s64 div);
+#else /* BITS_PER_LONG < 64 */
+# define ktime_divns(kt, div)		(u64)((kt).tv64 / (div))
+#endif
+
 static inline s64 ktime_to_us(const ktime_t kt)
 {
-	struct timeval tv = ktime_to_timeval(kt);
-	return (s64) tv.tv_sec * USEC_PER_SEC + tv.tv_usec;
+	return ktime_divns(kt, NSEC_PER_USEC);
 }
 
 static inline s64 ktime_to_ms(const ktime_t kt)
 {
-	struct timeval tv = ktime_to_timeval(kt);
-	return (s64) tv.tv_sec * MSEC_PER_SEC + tv.tv_usec / USEC_PER_MSEC;
+	return ktime_divns(kt, NSEC_PER_MSEC);
 }
 
 static inline s64 ktime_us_delta(const ktime_t later, const ktime_t earlier)
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 19f211051c35..64843a836637 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -280,6 +280,7 @@ u64 ktime_divns(const ktime_t kt, s64 div)
 
 	return dclc;
 }
+EXPORT_SYMBOL_GPL(ktime_divns);
 #endif /* BITS_PER_LONG >= 64 */
 
 /*
-- 
cgit v1.2.3-59-g8ed1b


From b17b20d70dcbe48dd1aa6aba073a60ddfce5d7db Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Wed, 16 Jul 2014 21:03:56 +0000
Subject: ktime: Change ktime_set() to take 64bit seconds value

In order to support dates past 2038 on 32bit systems, ktime_set()
needs to handle 64bit second values.

[ tglx: Removed the BITS_PER_LONG check ]

Signed-off-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/ktime.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index 74eaba9b3569..538c283714e1 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -47,13 +47,12 @@ typedef union ktime ktime_t;		/* Kill this */
  *
  * Return: The ktime_t representation of the value.
  */
-static inline ktime_t ktime_set(const long secs, const unsigned long nsecs)
+static inline ktime_t ktime_set(const s64 secs, const unsigned long nsecs)
 {
-#if (BITS_PER_LONG == 64)
 	if (unlikely(secs >= KTIME_SEC_MAX))
 		return (ktime_t){ .tv64 = KTIME_MAX };
-#endif
-	return (ktime_t) { .tv64 = (s64)secs * NSEC_PER_SEC + (s64)nsecs };
+
+	return (ktime_t) { .tv64 = secs * NSEC_PER_SEC + (s64)nsecs };
 }
 
 /* Subtract two ktime_t variables. rem = lhs -rhs: */
-- 
cgit v1.2.3-59-g8ed1b


From 361a3bf00582469877f8d18ff20f1efa6b781274 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Wed, 16 Jul 2014 21:03:58 +0000
Subject: time64: Add time64.h header and define struct timespec64

Define the timespec64 structure and standard helper functions.

[ tglx: Make it 32bit only. 64bit really can map timespec to timespec64 ]

Signed-off-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/time.h   |  15 +----
 include/linux/time64.h | 162 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 163 insertions(+), 14 deletions(-)
 create mode 100644 include/linux/time64.h

(limited to 'include/linux')

diff --git a/include/linux/time.h b/include/linux/time.h
index 129f0bd36a8d..234feac7f1c3 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -4,25 +4,12 @@
 # include <linux/cache.h>
 # include <linux/seqlock.h>
 # include <linux/math64.h>
-#include <uapi/linux/time.h>
+# include <linux/time64.h>
 
 extern struct timezone sys_tz;
 
-/* Parameters used to convert the timespec values: */
-#define MSEC_PER_SEC	1000L
-#define USEC_PER_MSEC	1000L
-#define NSEC_PER_USEC	1000L
-#define NSEC_PER_MSEC	1000000L
-#define USEC_PER_SEC	1000000L
-#define NSEC_PER_SEC	1000000000L
-#define FSEC_PER_SEC	1000000000000000LL
-
 #define TIME_T_MAX	(time_t)((1UL << ((sizeof(time_t) << 3) - 1)) - 1)
 
-/* Located here for timespec_valid_strict */
-#define KTIME_MAX			((s64)~((u64)1 << 63))
-#define KTIME_SEC_MAX			(KTIME_MAX / NSEC_PER_SEC)
-
 static inline int timespec_equal(const struct timespec *a,
                                  const struct timespec *b)
 {
diff --git a/include/linux/time64.h b/include/linux/time64.h
new file mode 100644
index 000000000000..e7b499e1cd79
--- /dev/null
+++ b/include/linux/time64.h
@@ -0,0 +1,162 @@
+#ifndef _LINUX_TIME64_H
+#define _LINUX_TIME64_H
+
+#include <uapi/linux/time.h>
+
+typedef __s64 time64_t;
+
+/*
+ * This wants to go into uapi/linux/time.h once we agreed about the
+ * userspace interfaces.
+ */
+#if __BITS_PER_LONG == 64
+# define timespec64 timespec
+#else
+struct timespec64 {
+	time64_t	tv_sec;			/* seconds */
+	long		tv_nsec;		/* nanoseconds */
+};
+#endif
+
+/* Parameters used to convert the timespec values: */
+#define MSEC_PER_SEC	1000L
+#define USEC_PER_MSEC	1000L
+#define NSEC_PER_USEC	1000L
+#define NSEC_PER_MSEC	1000000L
+#define USEC_PER_SEC	1000000L
+#define NSEC_PER_SEC	1000000000L
+#define FSEC_PER_SEC	1000000000000000LL
+
+/* Located here for timespec[64]_valid_strict */
+#define KTIME_MAX			((s64)~((u64)1 << 63))
+#define KTIME_SEC_MAX			(KTIME_MAX / NSEC_PER_SEC)
+
+#if __BITS_PER_LONG == 64
+
+# define timespec64_equal		timespec_equal
+# define timespec64_compare		timespec_compare
+# define set_normalized_timespec64	set_normalized_timespec
+# define timespec64_add_safe		timespec_add_safe
+# define timespec64_add			timespec_add
+# define timespec64_sub			timespec_sub
+# define timespec64_valid		timespec_valid
+# define timespec64_valid_strict	timespec_valid_strict
+# define timespec64_to_ns		timespec_to_ns
+# define ns_to_timespec64		ns_to_timespec
+# define timespec64_add_ns		timespec_add_ns
+
+#else
+
+static inline int timespec64_equal(const struct timespec64 *a,
+				   const struct timespec64 *b)
+{
+	return (a->tv_sec == b->tv_sec) && (a->tv_nsec == b->tv_nsec);
+}
+
+/*
+ * lhs < rhs:  return <0
+ * lhs == rhs: return 0
+ * lhs > rhs:  return >0
+ */
+static inline int timespec64_compare(const struct timespec64 *lhs, const struct timespec64 *rhs)
+{
+	if (lhs->tv_sec < rhs->tv_sec)
+		return -1;
+	if (lhs->tv_sec > rhs->tv_sec)
+		return 1;
+	return lhs->tv_nsec - rhs->tv_nsec;
+}
+
+extern void set_normalized_timespec64(struct timespec64 *ts, time64_t sec, s64 nsec);
+
+/*
+ * timespec64_add_safe assumes both values are positive and checks for
+ * overflow. It will return TIME_T_MAX if the returned value would be
+ * smaller then either of the arguments.
+ */
+extern struct timespec64 timespec64_add_safe(const struct timespec64 lhs,
+					 const struct timespec64 rhs);
+
+
+static inline struct timespec64 timespec64_add(struct timespec64 lhs,
+						struct timespec64 rhs)
+{
+	struct timespec64 ts_delta;
+	set_normalized_timespec64(&ts_delta, lhs.tv_sec + rhs.tv_sec,
+				lhs.tv_nsec + rhs.tv_nsec);
+	return ts_delta;
+}
+
+/*
+ * sub = lhs - rhs, in normalized form
+ */
+static inline struct timespec64 timespec64_sub(struct timespec64 lhs,
+						struct timespec64 rhs)
+{
+	struct timespec64 ts_delta;
+	set_normalized_timespec64(&ts_delta, lhs.tv_sec - rhs.tv_sec,
+				lhs.tv_nsec - rhs.tv_nsec);
+	return ts_delta;
+}
+
+/*
+ * Returns true if the timespec64 is norm, false if denorm:
+ */
+static inline bool timespec64_valid(const struct timespec64 *ts)
+{
+	/* Dates before 1970 are bogus */
+	if (ts->tv_sec < 0)
+		return false;
+	/* Can't have more nanoseconds then a second */
+	if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC)
+		return false;
+	return true;
+}
+
+static inline bool timespec64_valid_strict(const struct timespec64 *ts)
+{
+	if (!timespec64_valid(ts))
+		return false;
+	/* Disallow values that could overflow ktime_t */
+	if ((unsigned long long)ts->tv_sec >= KTIME_SEC_MAX)
+		return false;
+	return true;
+}
+
+/**
+ * timespec64_to_ns - Convert timespec64 to nanoseconds
+ * @ts:		pointer to the timespec64 variable to be converted
+ *
+ * Returns the scalar nanosecond representation of the timespec64
+ * parameter.
+ */
+static inline s64 timespec64_to_ns(const struct timespec64 *ts)
+{
+	return ((s64) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec;
+}
+
+/**
+ * ns_to_timespec64 - Convert nanoseconds to timespec64
+ * @nsec:	the nanoseconds value to be converted
+ *
+ * Returns the timespec64 representation of the nsec parameter.
+ */
+extern struct timespec64 ns_to_timespec64(const s64 nsec);
+
+/**
+ * timespec64_add_ns - Adds nanoseconds to a timespec64
+ * @a:		pointer to timespec64 to be incremented
+ * @ns:		unsigned nanoseconds value to be added
+ *
+ * This must always be inlined because its used from the x86-64 vdso,
+ * which cannot call other kernel functions.
+ */
+static __always_inline void timespec64_add_ns(struct timespec64 *a, u64 ns)
+{
+	a->tv_sec += __iter_div_u64_rem(a->tv_nsec + ns, NSEC_PER_SEC, &ns);
+	a->tv_nsec = ns;
+}
+
+#endif
+
+#endif /* _LINUX_TIME64_H */
-- 
cgit v1.2.3-59-g8ed1b


From 49cd6f869984692547c57621bf42697aaa7f5622 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Wed, 16 Jul 2014 21:03:59 +0000
Subject: time: More core infrastructure for timespec64

Helper and conversion functions for timespec64.

Signed-off-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/ktime.h  | 28 +++++++++++++++++++++++
 include/linux/time64.h | 28 +++++++++++++++++++++++
 kernel/time/time.c     | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 118 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index 538c283714e1..da6b680c252b 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -83,6 +83,12 @@ static inline ktime_t timespec_to_ktime(struct timespec ts)
 	return ktime_set(ts.tv_sec, ts.tv_nsec);
 }
 
+/* convert a timespec64 to ktime_t format: */
+static inline ktime_t timespec64_to_ktime(struct timespec64 ts)
+{
+	return ktime_set(ts.tv_sec, ts.tv_nsec);
+}
+
 /* convert a timeval to ktime_t format: */
 static inline ktime_t timeval_to_ktime(struct timeval tv)
 {
@@ -92,6 +98,9 @@ static inline ktime_t timeval_to_ktime(struct timeval tv)
 /* Map the ktime_t to timespec conversion to ns_to_timespec function */
 #define ktime_to_timespec(kt)		ns_to_timespec((kt).tv64)
 
+/* Map the ktime_t to timespec conversion to ns_to_timespec function */
+#define ktime_to_timespec64(kt)		ns_to_timespec64((kt).tv64)
+
 /* Map the ktime_t to timeval conversion to ns_to_timeval function */
 #define ktime_to_timeval(kt)		ns_to_timeval((kt).tv64)
 
@@ -213,6 +222,25 @@ static inline __must_check bool ktime_to_timespec_cond(const ktime_t kt,
 	}
 }
 
+/**
+ * ktime_to_timespec64_cond - convert a ktime_t variable to timespec64
+ *			    format only if the variable contains data
+ * @kt:		the ktime_t variable to convert
+ * @ts:		the timespec variable to store the result in
+ *
+ * Return: %true if there was a successful conversion, %false if kt was 0.
+ */
+static inline __must_check bool ktime_to_timespec64_cond(const ktime_t kt,
+						       struct timespec64 *ts)
+{
+	if (kt.tv64) {
+		*ts = ktime_to_timespec64(kt);
+		return true;
+	} else {
+		return false;
+	}
+}
+
 /*
  * The resolution of the clocks. The resolution value is returned in
  * the clock_getres() system call to give application programmers an
diff --git a/include/linux/time64.h b/include/linux/time64.h
index e7b499e1cd79..a3831478d9cf 100644
--- a/include/linux/time64.h
+++ b/include/linux/time64.h
@@ -33,6 +33,16 @@ struct timespec64 {
 
 #if __BITS_PER_LONG == 64
 
+static inline struct timespec timespec64_to_timespec(const struct timespec64 ts64)
+{
+	return ts64;
+}
+
+static inline struct timespec64 timespec_to_timespec64(const struct timespec ts)
+{
+	return ts;
+}
+
 # define timespec64_equal		timespec_equal
 # define timespec64_compare		timespec_compare
 # define set_normalized_timespec64	set_normalized_timespec
@@ -47,6 +57,24 @@ struct timespec64 {
 
 #else
 
+static inline struct timespec timespec64_to_timespec(const struct timespec64 ts64)
+{
+	struct timespec ret;
+
+	ret.tv_sec = (time_t)ts64.tv_sec;
+	ret.tv_nsec = ts64.tv_nsec;
+	return ret;
+}
+
+static inline struct timespec64 timespec_to_timespec64(const struct timespec ts)
+{
+	struct timespec64 ret;
+
+	ret.tv_sec = ts.tv_sec;
+	ret.tv_nsec = ts.tv_nsec;
+	return ret;
+}
+
 static inline int timespec64_equal(const struct timespec64 *a,
 				   const struct timespec64 *b)
 {
diff --git a/kernel/time/time.c b/kernel/time/time.c
index 7c7964c33ae7..e8121a67fd74 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -420,6 +420,68 @@ struct timeval ns_to_timeval(const s64 nsec)
 }
 EXPORT_SYMBOL(ns_to_timeval);
 
+#if BITS_PER_LONG == 32
+/**
+ * set_normalized_timespec - set timespec sec and nsec parts and normalize
+ *
+ * @ts:		pointer to timespec variable to be set
+ * @sec:	seconds to set
+ * @nsec:	nanoseconds to set
+ *
+ * Set seconds and nanoseconds field of a timespec variable and
+ * normalize to the timespec storage format
+ *
+ * Note: The tv_nsec part is always in the range of
+ *	0 <= tv_nsec < NSEC_PER_SEC
+ * For negative values only the tv_sec field is negative !
+ */
+void set_normalized_timespec64(struct timespec64 *ts, time64_t sec, s64 nsec)
+{
+	while (nsec >= NSEC_PER_SEC) {
+		/*
+		 * The following asm() prevents the compiler from
+		 * optimising this loop into a modulo operation. See
+		 * also __iter_div_u64_rem() in include/linux/time.h
+		 */
+		asm("" : "+rm"(nsec));
+		nsec -= NSEC_PER_SEC;
+		++sec;
+	}
+	while (nsec < 0) {
+		asm("" : "+rm"(nsec));
+		nsec += NSEC_PER_SEC;
+		--sec;
+	}
+	ts->tv_sec = sec;
+	ts->tv_nsec = nsec;
+}
+EXPORT_SYMBOL(set_normalized_timespec64);
+
+/**
+ * ns_to_timespec64 - Convert nanoseconds to timespec64
+ * @nsec:       the nanoseconds value to be converted
+ *
+ * Returns the timespec64 representation of the nsec parameter.
+ */
+struct timespec64 ns_to_timespec64(const s64 nsec)
+{
+	struct timespec64 ts;
+	s32 rem;
+
+	if (!nsec)
+		return (struct timespec64) {0, 0};
+
+	ts.tv_sec = div_s64_rem(nsec, NSEC_PER_SEC, &rem);
+	if (unlikely(rem < 0)) {
+		ts.tv_sec--;
+		rem += NSEC_PER_SEC;
+	}
+	ts.tv_nsec = rem;
+
+	return ts;
+}
+EXPORT_SYMBOL(ns_to_timespec64);
+#endif
 /*
  * When we convert to jiffies then we interpret incoming values
  * the following way:
-- 
cgit v1.2.3-59-g8ed1b


From 7d489d15ce4be5310ca60e5896df833f9b3b4088 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Wed, 16 Jul 2014 21:04:01 +0000
Subject: timekeeping: Convert timekeeping core to use timespec64s

Convert the core timekeeping logic to use timespec64s. This moves the
2038 issues out of the core logic and into all of the accessor
functions.

Future changes will need to push the timespec64s out to all
timekeeping users, but that can be done interface by interface.

Signed-off-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/timekeeper_internal.h |  10 +--
 kernel/time/ntp.c                   |   8 +-
 kernel/time/ntp_internal.h          |   2 +-
 kernel/time/timekeeping.c           | 172 ++++++++++++++++++++----------------
 kernel/time/timekeeping_debug.c     |   2 +-
 kernel/time/timekeeping_internal.h  |   2 +-
 6 files changed, 109 insertions(+), 87 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index c1825eb436ed..1b05491e10f9 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -55,15 +55,15 @@ struct timekeeper {
 	 * - wall_to_monotonic is no longer the boot time, getboottime must be
 	 * used instead.
 	 */
-	struct timespec		wall_to_monotonic;
+	struct timespec64	wall_to_monotonic;
 	/* Offset clock monotonic -> clock realtime */
 	ktime_t			offs_real;
 	/* time spent in suspend */
-	struct timespec		total_sleep_time;
+	struct timespec64	total_sleep_time;
 	/* Offset clock monotonic -> clock boottime */
 	ktime_t			offs_boot;
 	/* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */
-	struct timespec		raw_time;
+	struct timespec64	raw_time;
 	/* The current UTC to TAI offset in seconds */
 	s32			tai_offset;
 	/* Offset clock monotonic -> clock tai */
@@ -71,9 +71,9 @@ struct timekeeper {
 
 };
 
-static inline struct timespec tk_xtime(struct timekeeper *tk)
+static inline struct timespec64 tk_xtime(struct timekeeper *tk)
 {
-	struct timespec ts;
+	struct timespec64 ts;
 
 	ts.tv_sec = tk->xtime_sec;
 	ts.tv_nsec = (long)(tk->xtime_nsec >> tk->shift);
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 33db43a39515..6e87df94122f 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -531,7 +531,7 @@ void ntp_notify_cmos_timer(void) { }
 /*
  * Propagate a new txc->status value into the NTP state:
  */
-static inline void process_adj_status(struct timex *txc, struct timespec *ts)
+static inline void process_adj_status(struct timex *txc, struct timespec64 *ts)
 {
 	if ((time_status & STA_PLL) && !(txc->status & STA_PLL)) {
 		time_state = TIME_OK;
@@ -554,7 +554,7 @@ static inline void process_adj_status(struct timex *txc, struct timespec *ts)
 
 
 static inline void process_adjtimex_modes(struct timex *txc,
-						struct timespec *ts,
+						struct timespec64 *ts,
 						s32 *time_tai)
 {
 	if (txc->modes & ADJ_STATUS)
@@ -640,7 +640,7 @@ int ntp_validate_timex(struct timex *txc)
  * adjtimex mainly allows reading (and writing, if superuser) of
  * kernel time-keeping variables. used by xntpd.
  */
-int __do_adjtimex(struct timex *txc, struct timespec *ts, s32 *time_tai)
+int __do_adjtimex(struct timex *txc, struct timespec64 *ts, s32 *time_tai)
 {
 	int result;
 
@@ -684,7 +684,7 @@ int __do_adjtimex(struct timex *txc, struct timespec *ts, s32 *time_tai)
 	/* fill PPS status fields */
 	pps_fill_timex(txc);
 
-	txc->time.tv_sec = ts->tv_sec;
+	txc->time.tv_sec = (time_t)ts->tv_sec;
 	txc->time.tv_usec = ts->tv_nsec;
 	if (!(time_status & STA_NANO))
 		txc->time.tv_usec /= NSEC_PER_USEC;
diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h
index 1950cb4ca2a4..bbd102ad9df7 100644
--- a/kernel/time/ntp_internal.h
+++ b/kernel/time/ntp_internal.h
@@ -7,6 +7,6 @@ extern void ntp_clear(void);
 extern u64 ntp_tick_length(void);
 extern int second_overflow(unsigned long secs);
 extern int ntp_validate_timex(struct timex *);
-extern int __do_adjtimex(struct timex *, struct timespec *, s32 *);
+extern int __do_adjtimex(struct timex *, struct timespec64 *, s32 *);
 extern void __hardpps(const struct timespec *, const struct timespec *);
 #endif /* _LINUX_NTP_INTERNAL_H */
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index cafef242d8f9..84a2075c3eb4 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -51,43 +51,43 @@ static inline void tk_normalize_xtime(struct timekeeper *tk)
 	}
 }
 
-static void tk_set_xtime(struct timekeeper *tk, const struct timespec *ts)
+static void tk_set_xtime(struct timekeeper *tk, const struct timespec64 *ts)
 {
 	tk->xtime_sec = ts->tv_sec;
 	tk->xtime_nsec = (u64)ts->tv_nsec << tk->shift;
 }
 
-static void tk_xtime_add(struct timekeeper *tk, const struct timespec *ts)
+static void tk_xtime_add(struct timekeeper *tk, const struct timespec64 *ts)
 {
 	tk->xtime_sec += ts->tv_sec;
 	tk->xtime_nsec += (u64)ts->tv_nsec << tk->shift;
 	tk_normalize_xtime(tk);
 }
 
-static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec wtm)
+static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec64 wtm)
 {
-	struct timespec tmp;
+	struct timespec64 tmp;
 
 	/*
 	 * Verify consistency of: offset_real = -wall_to_monotonic
 	 * before modifying anything
 	 */
-	set_normalized_timespec(&tmp, -tk->wall_to_monotonic.tv_sec,
+	set_normalized_timespec64(&tmp, -tk->wall_to_monotonic.tv_sec,
 					-tk->wall_to_monotonic.tv_nsec);
-	WARN_ON_ONCE(tk->offs_real.tv64 != timespec_to_ktime(tmp).tv64);
+	WARN_ON_ONCE(tk->offs_real.tv64 != timespec64_to_ktime(tmp).tv64);
 	tk->wall_to_monotonic = wtm;
-	set_normalized_timespec(&tmp, -wtm.tv_sec, -wtm.tv_nsec);
-	tk->offs_real = timespec_to_ktime(tmp);
+	set_normalized_timespec64(&tmp, -wtm.tv_sec, -wtm.tv_nsec);
+	tk->offs_real = timespec64_to_ktime(tmp);
 	tk->offs_tai = ktime_add(tk->offs_real, ktime_set(tk->tai_offset, 0));
 }
 
-static void tk_set_sleep_time(struct timekeeper *tk, struct timespec t)
+static void tk_set_sleep_time(struct timekeeper *tk, struct timespec64 t)
 {
 	/* Verify consistency before modifying */
-	WARN_ON_ONCE(tk->offs_boot.tv64 != timespec_to_ktime(tk->total_sleep_time).tv64);
+	WARN_ON_ONCE(tk->offs_boot.tv64 != timespec64_to_ktime(tk->total_sleep_time).tv64);
 
 	tk->total_sleep_time	= t;
-	tk->offs_boot		= timespec_to_ktime(t);
+	tk->offs_boot		= timespec64_to_ktime(t);
 }
 
 /**
@@ -281,7 +281,7 @@ static void timekeeping_forward_now(struct timekeeper *tk)
 	tk_normalize_xtime(tk);
 
 	nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift);
-	timespec_add_ns(&tk->raw_time, nsec);
+	timespec64_add_ns(&tk->raw_time, nsec);
 }
 
 /**
@@ -360,7 +360,7 @@ EXPORT_SYMBOL_GPL(ktime_get);
 void ktime_get_ts(struct timespec *ts)
 {
 	struct timekeeper *tk = &timekeeper;
-	struct timespec tomono;
+	struct timespec64 ts64, tomono;
 	s64 nsec;
 	unsigned int seq;
 
@@ -368,15 +368,16 @@ void ktime_get_ts(struct timespec *ts)
 
 	do {
 		seq = read_seqcount_begin(&timekeeper_seq);
-		ts->tv_sec = tk->xtime_sec;
+		ts64.tv_sec = tk->xtime_sec;
 		nsec = timekeeping_get_ns(tk);
 		tomono = tk->wall_to_monotonic;
 
 	} while (read_seqcount_retry(&timekeeper_seq, seq));
 
-	ts->tv_sec += tomono.tv_sec;
-	ts->tv_nsec = 0;
-	timespec_add_ns(ts, nsec + tomono.tv_nsec);
+	ts64.tv_sec += tomono.tv_sec;
+	ts64.tv_nsec = 0;
+	timespec64_add_ns(&ts64, nsec + tomono.tv_nsec);
+	*ts = timespec64_to_timespec(ts64);
 }
 EXPORT_SYMBOL_GPL(ktime_get_ts);
 
@@ -390,6 +391,7 @@ EXPORT_SYMBOL_GPL(ktime_get_ts);
 void timekeeping_clocktai(struct timespec *ts)
 {
 	struct timekeeper *tk = &timekeeper;
+	struct timespec64 ts64;
 	unsigned long seq;
 	u64 nsecs;
 
@@ -398,13 +400,14 @@ void timekeeping_clocktai(struct timespec *ts)
 	do {
 		seq = read_seqcount_begin(&timekeeper_seq);
 
-		ts->tv_sec = tk->xtime_sec + tk->tai_offset;
+		ts64.tv_sec = tk->xtime_sec + tk->tai_offset;
 		nsecs = timekeeping_get_ns(tk);
 
 	} while (read_seqcount_retry(&timekeeper_seq, seq));
 
-	ts->tv_nsec = 0;
-	timespec_add_ns(ts, nsecs);
+	ts64.tv_nsec = 0;
+	timespec64_add_ns(&ts64, nsecs);
+	*ts = timespec64_to_timespec(ts64);
 
 }
 EXPORT_SYMBOL(timekeeping_clocktai);
@@ -446,7 +449,7 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
 	do {
 		seq = read_seqcount_begin(&timekeeper_seq);
 
-		*ts_raw = tk->raw_time;
+		*ts_raw = timespec64_to_timespec(tk->raw_time);
 		ts_real->tv_sec = tk->xtime_sec;
 		ts_real->tv_nsec = 0;
 
@@ -487,7 +490,7 @@ EXPORT_SYMBOL(do_gettimeofday);
 int do_settimeofday(const struct timespec *tv)
 {
 	struct timekeeper *tk = &timekeeper;
-	struct timespec ts_delta, xt;
+	struct timespec64 ts_delta, xt, tmp;
 	unsigned long flags;
 
 	if (!timespec_valid_strict(tv))
@@ -502,9 +505,10 @@ int do_settimeofday(const struct timespec *tv)
 	ts_delta.tv_sec = tv->tv_sec - xt.tv_sec;
 	ts_delta.tv_nsec = tv->tv_nsec - xt.tv_nsec;
 
-	tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, ts_delta));
+	tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, ts_delta));
 
-	tk_set_xtime(tk, tv);
+	tmp = timespec_to_timespec64(*tv);
+	tk_set_xtime(tk, &tmp);
 
 	timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
 
@@ -528,26 +532,28 @@ int timekeeping_inject_offset(struct timespec *ts)
 {
 	struct timekeeper *tk = &timekeeper;
 	unsigned long flags;
-	struct timespec tmp;
+	struct timespec64 ts64, tmp;
 	int ret = 0;
 
 	if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC)
 		return -EINVAL;
 
+	ts64 = timespec_to_timespec64(*ts);
+
 	raw_spin_lock_irqsave(&timekeeper_lock, flags);
 	write_seqcount_begin(&timekeeper_seq);
 
 	timekeeping_forward_now(tk);
 
 	/* Make sure the proposed value is valid */
-	tmp = timespec_add(tk_xtime(tk),  *ts);
-	if (!timespec_valid_strict(&tmp)) {
+	tmp = timespec64_add(tk_xtime(tk),  ts64);
+	if (!timespec64_valid_strict(&tmp)) {
 		ret = -EINVAL;
 		goto error;
 	}
 
-	tk_xtime_add(tk, ts);
-	tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *ts));
+	tk_xtime_add(tk, &ts64);
+	tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, ts64));
 
 error: /* even if we error out, we forwarded the time, so call update */
 	timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
@@ -691,17 +697,19 @@ EXPORT_SYMBOL_GPL(ktime_get_real);
 void getrawmonotonic(struct timespec *ts)
 {
 	struct timekeeper *tk = &timekeeper;
+	struct timespec64 ts64;
 	unsigned long seq;
 	s64 nsecs;
 
 	do {
 		seq = read_seqcount_begin(&timekeeper_seq);
 		nsecs = timekeeping_get_ns_raw(tk);
-		*ts = tk->raw_time;
+		ts64 = tk->raw_time;
 
 	} while (read_seqcount_retry(&timekeeper_seq, seq));
 
-	timespec_add_ns(ts, nsecs);
+	timespec64_add_ns(&ts64, nsecs);
+	*ts = timespec64_to_timespec(ts64);
 }
 EXPORT_SYMBOL(getrawmonotonic);
 
@@ -781,11 +789,12 @@ void __init timekeeping_init(void)
 	struct timekeeper *tk = &timekeeper;
 	struct clocksource *clock;
 	unsigned long flags;
-	struct timespec now, boot, tmp;
-
-	read_persistent_clock(&now);
+	struct timespec64 now, boot, tmp;
+	struct timespec ts;
 
-	if (!timespec_valid_strict(&now)) {
+	read_persistent_clock(&ts);
+	now = timespec_to_timespec64(ts);
+	if (!timespec64_valid_strict(&now)) {
 		pr_warn("WARNING: Persistent clock returned invalid value!\n"
 			"         Check your CMOS/BIOS settings.\n");
 		now.tv_sec = 0;
@@ -793,8 +802,9 @@ void __init timekeeping_init(void)
 	} else if (now.tv_sec || now.tv_nsec)
 		persistent_clock_exist = true;
 
-	read_boot_clock(&boot);
-	if (!timespec_valid_strict(&boot)) {
+	read_boot_clock(&ts);
+	boot = timespec_to_timespec64(ts);
+	if (!timespec64_valid_strict(&boot)) {
 		pr_warn("WARNING: Boot clock returned invalid value!\n"
 			"         Check your CMOS/BIOS settings.\n");
 		boot.tv_sec = 0;
@@ -816,7 +826,7 @@ void __init timekeeping_init(void)
 	if (boot.tv_sec == 0 && boot.tv_nsec == 0)
 		boot = tk_xtime(tk);
 
-	set_normalized_timespec(&tmp, -boot.tv_sec, -boot.tv_nsec);
+	set_normalized_timespec64(&tmp, -boot.tv_sec, -boot.tv_nsec);
 	tk_set_wall_to_mono(tk, tmp);
 
 	tmp.tv_sec = 0;
@@ -830,7 +840,7 @@ void __init timekeeping_init(void)
 }
 
 /* time in seconds when suspend began */
-static struct timespec timekeeping_suspend_time;
+static struct timespec64 timekeeping_suspend_time;
 
 /**
  * __timekeeping_inject_sleeptime - Internal function to add sleep interval
@@ -840,17 +850,17 @@ static struct timespec timekeeping_suspend_time;
  * adds the sleep offset to the timekeeping variables.
  */
 static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
-							struct timespec *delta)
+					   struct timespec64 *delta)
 {
-	if (!timespec_valid_strict(delta)) {
+	if (!timespec64_valid_strict(delta)) {
 		printk_deferred(KERN_WARNING
 				"__timekeeping_inject_sleeptime: Invalid "
 				"sleep delta value!\n");
 		return;
 	}
 	tk_xtime_add(tk, delta);
-	tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *delta));
-	tk_set_sleep_time(tk, timespec_add(tk->total_sleep_time, *delta));
+	tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, *delta));
+	tk_set_sleep_time(tk, timespec64_add(tk->total_sleep_time, *delta));
 	tk_debug_account_sleep_time(delta);
 }
 
@@ -867,6 +877,7 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
 void timekeeping_inject_sleeptime(struct timespec *delta)
 {
 	struct timekeeper *tk = &timekeeper;
+	struct timespec64 tmp;
 	unsigned long flags;
 
 	/*
@@ -881,7 +892,8 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
 
 	timekeeping_forward_now(tk);
 
-	__timekeeping_inject_sleeptime(tk, delta);
+	tmp = timespec_to_timespec64(*delta);
+	__timekeeping_inject_sleeptime(tk, &tmp);
 
 	timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
 
@@ -904,11 +916,13 @@ static void timekeeping_resume(void)
 	struct timekeeper *tk = &timekeeper;
 	struct clocksource *clock = tk->clock;
 	unsigned long flags;
-	struct timespec ts_new, ts_delta;
+	struct timespec64 ts_new, ts_delta;
+	struct timespec tmp;
 	cycle_t cycle_now, cycle_delta;
 	bool suspendtime_found = false;
 
-	read_persistent_clock(&ts_new);
+	read_persistent_clock(&tmp);
+	ts_new = timespec_to_timespec64(tmp);
 
 	clockevents_resume();
 	clocksource_resume();
@@ -951,10 +965,10 @@ static void timekeeping_resume(void)
 		}
 		nsec += ((u64) cycle_delta * mult) >> shift;
 
-		ts_delta = ns_to_timespec(nsec);
+		ts_delta = ns_to_timespec64(nsec);
 		suspendtime_found = true;
-	} else if (timespec_compare(&ts_new, &timekeeping_suspend_time) > 0) {
-		ts_delta = timespec_sub(ts_new, timekeeping_suspend_time);
+	} else if (timespec64_compare(&ts_new, &timekeeping_suspend_time) > 0) {
+		ts_delta = timespec64_sub(ts_new, timekeeping_suspend_time);
 		suspendtime_found = true;
 	}
 
@@ -981,10 +995,12 @@ static int timekeeping_suspend(void)
 {
 	struct timekeeper *tk = &timekeeper;
 	unsigned long flags;
-	struct timespec		delta, delta_delta;
-	static struct timespec	old_delta;
+	struct timespec64		delta, delta_delta;
+	static struct timespec64	old_delta;
+	struct timespec tmp;
 
-	read_persistent_clock(&timekeeping_suspend_time);
+	read_persistent_clock(&tmp);
+	timekeeping_suspend_time = timespec_to_timespec64(tmp);
 
 	/*
 	 * On some systems the persistent_clock can not be detected at
@@ -1005,8 +1021,8 @@ static int timekeeping_suspend(void)
 	 * try to compensate so the difference in system time
 	 * and persistent_clock time stays close to constant.
 	 */
-	delta = timespec_sub(tk_xtime(tk), timekeeping_suspend_time);
-	delta_delta = timespec_sub(delta, old_delta);
+	delta = timespec64_sub(tk_xtime(tk), timekeeping_suspend_time);
+	delta_delta = timespec64_sub(delta, old_delta);
 	if (abs(delta_delta.tv_sec)  >= 2) {
 		/*
 		 * if delta_delta is too large, assume time correction
@@ -1016,7 +1032,7 @@ static int timekeeping_suspend(void)
 	} else {
 		/* Otherwise try to adjust old_system to compensate */
 		timekeeping_suspend_time =
-			timespec_add(timekeeping_suspend_time, delta_delta);
+			timespec64_add(timekeeping_suspend_time, delta_delta);
 	}
 
 	timekeeping_update(tk, TK_MIRROR);
@@ -1253,14 +1269,14 @@ static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk)
 		/* Figure out if its a leap sec and apply if needed */
 		leap = second_overflow(tk->xtime_sec);
 		if (unlikely(leap)) {
-			struct timespec ts;
+			struct timespec64 ts;
 
 			tk->xtime_sec += leap;
 
 			ts.tv_sec = leap;
 			ts.tv_nsec = 0;
 			tk_set_wall_to_mono(tk,
-				timespec_sub(tk->wall_to_monotonic, ts));
+				timespec64_sub(tk->wall_to_monotonic, ts));
 
 			__timekeeping_set_tai_offset(tk, tk->tai_offset - leap);
 
@@ -1469,7 +1485,7 @@ EXPORT_SYMBOL_GPL(getboottime);
 void get_monotonic_boottime(struct timespec *ts)
 {
 	struct timekeeper *tk = &timekeeper;
-	struct timespec tomono, sleep;
+	struct timespec64 tomono, sleep, ret;
 	s64 nsec;
 	unsigned int seq;
 
@@ -1477,16 +1493,17 @@ void get_monotonic_boottime(struct timespec *ts)
 
 	do {
 		seq = read_seqcount_begin(&timekeeper_seq);
-		ts->tv_sec = tk->xtime_sec;
+		ret.tv_sec = tk->xtime_sec;
 		nsec = timekeeping_get_ns(tk);
 		tomono = tk->wall_to_monotonic;
 		sleep = tk->total_sleep_time;
 
 	} while (read_seqcount_retry(&timekeeper_seq, seq));
 
-	ts->tv_sec += tomono.tv_sec + sleep.tv_sec;
-	ts->tv_nsec = 0;
-	timespec_add_ns(ts, nsec + tomono.tv_nsec + sleep.tv_nsec);
+	ret.tv_sec += tomono.tv_sec + sleep.tv_sec;
+	ret.tv_nsec = 0;
+	timespec64_add_ns(&ret, nsec + tomono.tv_nsec + sleep.tv_nsec);
+	*ts = timespec64_to_timespec(ret);
 }
 EXPORT_SYMBOL_GPL(get_monotonic_boottime);
 
@@ -1514,8 +1531,11 @@ EXPORT_SYMBOL_GPL(ktime_get_boottime);
 void monotonic_to_bootbased(struct timespec *ts)
 {
 	struct timekeeper *tk = &timekeeper;
+	struct timespec64 ts64;
 
-	*ts = timespec_add(*ts, tk->total_sleep_time);
+	ts64 = timespec_to_timespec64(*ts);
+	ts64 = timespec64_add(ts64, tk->total_sleep_time);
+	*ts = timespec64_to_timespec(ts64);
 }
 EXPORT_SYMBOL_GPL(monotonic_to_bootbased);
 
@@ -1531,13 +1551,13 @@ struct timespec __current_kernel_time(void)
 {
 	struct timekeeper *tk = &timekeeper;
 
-	return tk_xtime(tk);
+	return timespec64_to_timespec(tk_xtime(tk));
 }
 
 struct timespec current_kernel_time(void)
 {
 	struct timekeeper *tk = &timekeeper;
-	struct timespec now;
+	struct timespec64 now;
 	unsigned long seq;
 
 	do {
@@ -1546,14 +1566,14 @@ struct timespec current_kernel_time(void)
 		now = tk_xtime(tk);
 	} while (read_seqcount_retry(&timekeeper_seq, seq));
 
-	return now;
+	return timespec64_to_timespec(now);
 }
 EXPORT_SYMBOL(current_kernel_time);
 
 struct timespec get_monotonic_coarse(void)
 {
 	struct timekeeper *tk = &timekeeper;
-	struct timespec now, mono;
+	struct timespec64 now, mono;
 	unsigned long seq;
 
 	do {
@@ -1563,9 +1583,10 @@ struct timespec get_monotonic_coarse(void)
 		mono = tk->wall_to_monotonic;
 	} while (read_seqcount_retry(&timekeeper_seq, seq));
 
-	set_normalized_timespec(&now, now.tv_sec + mono.tv_sec,
+	set_normalized_timespec64(&now, now.tv_sec + mono.tv_sec,
 				now.tv_nsec + mono.tv_nsec);
-	return now;
+
+	return timespec64_to_timespec(now);
 }
 
 /*
@@ -1589,7 +1610,7 @@ ktime_t ktime_get_update_offsets_tick(ktime_t *offs_real, ktime_t *offs_boot,
 							ktime_t *offs_tai)
 {
 	struct timekeeper *tk = &timekeeper;
-	struct timespec ts;
+	struct timespec64 ts;
 	ktime_t now;
 	unsigned int seq;
 
@@ -1597,7 +1618,6 @@ ktime_t ktime_get_update_offsets_tick(ktime_t *offs_real, ktime_t *offs_boot,
 		seq = read_seqcount_begin(&timekeeper_seq);
 
 		ts = tk_xtime(tk);
-
 		*offs_real = tk->offs_real;
 		*offs_boot = tk->offs_boot;
 		*offs_tai = tk->offs_tai;
@@ -1650,14 +1670,14 @@ ktime_t ktime_get_monotonic_offset(void)
 {
 	struct timekeeper *tk = &timekeeper;
 	unsigned long seq;
-	struct timespec wtom;
+	struct timespec64 wtom;
 
 	do {
 		seq = read_seqcount_begin(&timekeeper_seq);
 		wtom = tk->wall_to_monotonic;
 	} while (read_seqcount_retry(&timekeeper_seq, seq));
 
-	return timespec_to_ktime(wtom);
+	return timespec64_to_ktime(wtom);
 }
 EXPORT_SYMBOL_GPL(ktime_get_monotonic_offset);
 
@@ -1668,7 +1688,8 @@ int do_adjtimex(struct timex *txc)
 {
 	struct timekeeper *tk = &timekeeper;
 	unsigned long flags;
-	struct timespec ts;
+	struct timespec64 ts;
+	struct timespec tmp;
 	s32 orig_tai, tai;
 	int ret;
 
@@ -1688,7 +1709,8 @@ int do_adjtimex(struct timex *txc)
 			return ret;
 	}
 
-	getnstimeofday(&ts);
+	getnstimeofday(&tmp);
+	ts = timespec_to_timespec64(tmp);
 
 	raw_spin_lock_irqsave(&timekeeper_lock, flags);
 	write_seqcount_begin(&timekeeper_seq);
diff --git a/kernel/time/timekeeping_debug.c b/kernel/time/timekeeping_debug.c
index 4d54f97558df..f6bd65236712 100644
--- a/kernel/time/timekeeping_debug.c
+++ b/kernel/time/timekeeping_debug.c
@@ -67,7 +67,7 @@ static int __init tk_debug_sleep_time_init(void)
 }
 late_initcall(tk_debug_sleep_time_init);
 
-void tk_debug_account_sleep_time(struct timespec *t)
+void tk_debug_account_sleep_time(struct timespec64 *t)
 {
 	sleep_time_bin[fls(t->tv_sec)]++;
 }
diff --git a/kernel/time/timekeeping_internal.h b/kernel/time/timekeeping_internal.h
index 13323ea08ffa..e3d28ad236f9 100644
--- a/kernel/time/timekeeping_internal.h
+++ b/kernel/time/timekeeping_internal.h
@@ -6,7 +6,7 @@
 #include <linux/time.h>
 
 #ifdef CONFIG_DEBUG_FS
-extern void tk_debug_account_sleep_time(struct timespec *t);
+extern void tk_debug_account_sleep_time(struct timespec64 *t);
 #else
 #define tk_debug_account_sleep_time(x)
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 8b094cd03b4a3793220d8d8d86a173bfea8c285b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 16 Jul 2014 21:04:02 +0000
Subject: time: Consolidate the time accessor prototypes

Right now we have time related prototypes in 3 different header
files. Move it to a single timekeeping header file and move the core
internal stuff into a core private header.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/hrtimer.h     | 11 -------
 include/linux/ktime.h       |  8 ++---
 include/linux/time.h        | 45 +++-----------------------
 include/linux/timekeeping.h | 78 +++++++++++++++++++++++++++++++++++++++++++++
 kernel/time/hrtimer.c       |  2 ++
 kernel/time/posix-timers.c  |  2 ++
 kernel/time/tick-internal.h |  2 ++
 kernel/time/time.c          |  1 +
 kernel/time/timekeeping.h   | 20 ++++++++++++
 9 files changed, 111 insertions(+), 58 deletions(-)
 create mode 100644 include/linux/timekeeping.h
 create mode 100644 kernel/time/timekeeping.h

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index adf5056bd7b3..a036d058a249 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -326,17 +326,6 @@ static inline void timerfd_clock_was_set(void) { }
 #endif
 extern void hrtimers_resume(void);
 
-extern ktime_t ktime_get(void);
-extern ktime_t ktime_get_real(void);
-extern ktime_t ktime_get_boottime(void);
-extern ktime_t ktime_get_monotonic_offset(void);
-extern ktime_t ktime_get_clocktai(void);
-extern ktime_t ktime_get_update_offsets_tick(ktime_t *offs_real,
-						ktime_t *offs_boot,
-						ktime_t *offs_tai);
-extern ktime_t ktime_get_update_offsets_now(ktime_t *offs_real,
-						ktime_t *offs_boot,
-						ktime_t *offs_tai);
 DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
 
 
diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index da6b680c252b..c9d645ad98ff 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -250,12 +250,6 @@ static inline __must_check bool ktime_to_timespec64_cond(const ktime_t kt,
 #define LOW_RES_NSEC		TICK_NSEC
 #define KTIME_LOW_RES		(ktime_t){ .tv64 = LOW_RES_NSEC }
 
-/* Get the monotonic time in timespec format: */
-extern void ktime_get_ts(struct timespec *ts);
-
-/* Get the real (wall-) time in timespec format: */
-#define ktime_get_real_ts(ts)	getnstimeofday(ts)
-
 static inline ktime_t ns_to_ktime(u64 ns)
 {
 	static const ktime_t ktime_zero = { .tv64 = 0 };
@@ -270,4 +264,6 @@ static inline ktime_t ms_to_ktime(u64 ms)
 	return ktime_add_ms(ktime_zero, ms);
 }
 
+# include <linux/timekeeping.h>
+
 #endif
diff --git a/include/linux/time.h b/include/linux/time.h
index 234feac7f1c3..8c42cf8d2444 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -99,25 +99,7 @@ static inline bool timespec_valid_strict(const struct timespec *ts)
 	return true;
 }
 
-extern bool persistent_clock_exist;
-
-static inline bool has_persistent_clock(void)
-{
-	return persistent_clock_exist;
-}
-
-extern void read_persistent_clock(struct timespec *ts);
-extern void read_boot_clock(struct timespec *ts);
-extern int persistent_clock_is_local;
-extern int update_persistent_clock(struct timespec now);
-void timekeeping_init(void);
-extern int timekeeping_suspended;
-
-unsigned long get_seconds(void);
-struct timespec current_kernel_time(void);
-struct timespec __current_kernel_time(void); /* does not take xtime_lock */
-struct timespec get_monotonic_coarse(void);
-void timekeeping_inject_sleeptime(struct timespec *delta);
+extern struct timespec timespec_trunc(struct timespec t, unsigned gran);
 
 #define CURRENT_TIME		(current_kernel_time())
 #define CURRENT_TIME_SEC	((struct timespec) { get_seconds(), 0 })
@@ -135,33 +117,14 @@ void timekeeping_inject_sleeptime(struct timespec *delta);
 extern u32 (*arch_gettimeoffset)(void);
 #endif
 
-extern void do_gettimeofday(struct timeval *tv);
-extern int do_settimeofday(const struct timespec *tv);
-extern int do_sys_settimeofday(const struct timespec *tv,
-			       const struct timezone *tz);
-#define do_posix_clock_monotonic_gettime(ts) ktime_get_ts(ts)
-extern long do_utimes(int dfd, const char __user *filename, struct timespec *times, int flags);
 struct itimerval;
 extern int do_setitimer(int which, struct itimerval *value,
 			struct itimerval *ovalue);
-extern unsigned int alarm_setitimer(unsigned int seconds);
 extern int do_getitimer(int which, struct itimerval *value);
-extern int __getnstimeofday(struct timespec *tv);
-extern void getnstimeofday(struct timespec *tv);
-extern void getrawmonotonic(struct timespec *ts);
-extern void getnstime_raw_and_real(struct timespec *ts_raw,
-		struct timespec *ts_real);
-extern void getboottime(struct timespec *ts);
-extern void monotonic_to_bootbased(struct timespec *ts);
-extern void get_monotonic_boottime(struct timespec *ts);
 
-extern struct timespec timespec_trunc(struct timespec t, unsigned gran);
-extern int timekeeping_valid_for_hres(void);
-extern u64 timekeeping_max_deferment(void);
-extern int timekeeping_inject_offset(struct timespec *ts);
-extern s32 timekeeping_get_tai_offset(void);
-extern void timekeeping_set_tai_offset(s32 tai_offset);
-extern void timekeeping_clocktai(struct timespec *ts);
+extern unsigned int alarm_setitimer(unsigned int seconds);
+
+extern long do_utimes(int dfd, const char __user *filename, struct timespec *times, int flags);
 
 struct tms;
 extern void do_sys_times(struct tms *);
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
new file mode 100644
index 000000000000..e81c35b71da7
--- /dev/null
+++ b/include/linux/timekeeping.h
@@ -0,0 +1,78 @@
+#ifndef _LINUX_TIMEKEEPING_H
+#define _LINUX_TIMEKEEPING_H
+
+/* Included from linux/ktime.h */
+
+void timekeeping_init(void);
+extern int timekeeping_suspended;
+
+/*
+ * Get and set timeofday
+ */
+extern void do_gettimeofday(struct timeval *tv);
+extern int do_settimeofday(const struct timespec *tv);
+extern int do_sys_settimeofday(const struct timespec *tv,
+			       const struct timezone *tz);
+
+/*
+ * Kernel time accessors
+ */
+unsigned long get_seconds(void);
+struct timespec current_kernel_time(void);
+/* does not take xtime_lock */
+struct timespec __current_kernel_time(void);
+
+/*
+ * timespec based interfaces
+ */
+struct timespec get_monotonic_coarse(void);
+extern void getrawmonotonic(struct timespec *ts);
+extern void monotonic_to_bootbased(struct timespec *ts);
+extern void get_monotonic_boottime(struct timespec *ts);
+extern void ktime_get_ts(struct timespec *ts);
+
+extern int __getnstimeofday(struct timespec *tv);
+extern void getnstimeofday(struct timespec *tv);
+extern void getboottime(struct timespec *ts);
+
+#define do_posix_clock_monotonic_gettime(ts) ktime_get_ts(ts)
+#define ktime_get_real_ts(ts)	getnstimeofday(ts)
+
+
+/*
+ * ktime_t based interfaces
+ */
+extern ktime_t ktime_get(void);
+extern ktime_t ktime_get_real(void);
+extern ktime_t ktime_get_boottime(void);
+extern ktime_t ktime_get_monotonic_offset(void);
+extern ktime_t ktime_get_clocktai(void);
+
+/*
+ * RTC specific
+ */
+extern void timekeeping_inject_sleeptime(struct timespec *delta);
+
+/*
+ * PPS accessor
+ */
+extern void getnstime_raw_and_real(struct timespec *ts_raw,
+				   struct timespec *ts_real);
+
+/*
+ * Persistent clock related interfaces
+ */
+extern bool persistent_clock_exist;
+extern int persistent_clock_is_local;
+
+static inline bool has_persistent_clock(void)
+{
+	return persistent_clock_exist;
+}
+
+extern void read_persistent_clock(struct timespec *ts);
+extern void read_boot_clock(struct timespec *ts);
+extern int update_persistent_clock(struct timespec now);
+
+
+#endif
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 64843a836637..1c2fe7de2842 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -54,6 +54,8 @@
 
 #include <trace/events/timer.h>
 
+#include "timekeeping.h"
+
 /*
  * The timer bases:
  *
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 424c2d4265c9..42b463ad90f2 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -49,6 +49,8 @@
 #include <linux/export.h>
 #include <linux/hashtable.h>
 
+#include "timekeeping.h"
+
 /*
  * Management arrays for POSIX timers. Timers are now kept in static hash table
  * with 512 entries.
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 7ab92b19965a..c19c1d84b6f3 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -4,6 +4,8 @@
 #include <linux/hrtimer.h>
 #include <linux/tick.h>
 
+#include "timekeeping.h"
+
 extern seqlock_t jiffies_lock;
 
 #define CS_NAME_LEN	32
diff --git a/kernel/time/time.c b/kernel/time/time.c
index e8121a67fd74..278c63cc8054 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -42,6 +42,7 @@
 #include <asm/unistd.h>
 
 #include "timeconst.h"
+#include "timekeeping.h"
 
 /*
  * The timezone where the local system is located.  Used as a default by some
diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h
new file mode 100644
index 000000000000..adc1fc98bde3
--- /dev/null
+++ b/kernel/time/timekeeping.h
@@ -0,0 +1,20 @@
+#ifndef _KERNEL_TIME_TIMEKEEPING_H
+#define _KERNEL_TIME_TIMEKEEPING_H
+/*
+ * Internal interfaces for kernel/time/
+ */
+extern ktime_t ktime_get_update_offsets_tick(ktime_t *offs_real,
+						ktime_t *offs_boot,
+						ktime_t *offs_tai);
+extern ktime_t ktime_get_update_offsets_now(ktime_t *offs_real,
+						ktime_t *offs_boot,
+						ktime_t *offs_tai);
+
+extern int timekeeping_valid_for_hres(void);
+extern u64 timekeeping_max_deferment(void);
+extern int timekeeping_inject_offset(struct timespec *ts);
+extern s32 timekeeping_get_tai_offset(void);
+extern void timekeeping_set_tai_offset(s32 tai_offset);
+extern void timekeeping_clocktai(struct timespec *ts);
+
+#endif
-- 
cgit v1.2.3-59-g8ed1b


From d6d29896c665dfd50e6e0be7a9039901640433a3 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 16 Jul 2014 21:04:04 +0000
Subject: timekeeping: Provide timespec64 based interfaces

To convert callers of the core code to timespec64 we need to provide
the proper interfaces.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/timekeeping.h | 66 +++++++++++++++++++++++++++++++++++++++++----
 kernel/time/ntp.c           |  7 ++---
 kernel/time/timekeeping.c   | 47 +++++++++++++++-----------------
 3 files changed, 87 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index e81c35b71da7..3eb19e34cc20 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -29,15 +29,71 @@ struct timespec get_monotonic_coarse(void);
 extern void getrawmonotonic(struct timespec *ts);
 extern void monotonic_to_bootbased(struct timespec *ts);
 extern void get_monotonic_boottime(struct timespec *ts);
-extern void ktime_get_ts(struct timespec *ts);
+extern void ktime_get_ts64(struct timespec64 *ts);
+
+extern int __getnstimeofday64(struct timespec64 *tv);
+extern void getnstimeofday64(struct timespec64 *tv);
+
+#if BITS_PER_LONG == 64
+static inline int __getnstimeofday(struct timespec *ts)
+{
+	return __getnstimeofday64(ts);
+}
+
+static inline void getnstimeofday(struct timespec *ts)
+{
+	getnstimeofday64(ts);
+}
+
+static inline void ktime_get_ts(struct timespec *ts)
+{
+	ktime_get_ts64(ts);
+}
+
+static inline void ktime_get_real_ts(struct timespec *ts)
+{
+	getnstimeofday64(ts);
+}
+
+#else
+static inline int __getnstimeofday(struct timespec *ts)
+{
+	struct timespec64 ts64;
+	int ret = __getnstimeofday64(&ts64);
+
+	*ts = timespec64_to_timespec(ts64);
+	return ret;
+}
+
+static inline void getnstimeofday(struct timespec *ts)
+{
+	struct timespec64 ts64;
+
+	getnstimeofday64(&ts64);
+	*ts = timespec64_to_timespec(ts64);
+}
+
+static inline void ktime_get_ts(struct timespec *ts)
+{
+	struct timespec64 ts64;
+
+	ktime_get_ts64(&ts64);
+	*ts = timespec64_to_timespec(ts64);
+}
+
+static inline void ktime_get_real_ts(struct timespec *ts)
+{
+	struct timespec64 ts64;
+
+	getnstimeofday64(&ts64);
+	*ts = timespec64_to_timespec(ts64);
+}
+#endif
 
-extern int __getnstimeofday(struct timespec *tv);
-extern void getnstimeofday(struct timespec *tv);
 extern void getboottime(struct timespec *ts);
 
 #define do_posix_clock_monotonic_gettime(ts) ktime_get_ts(ts)
-#define ktime_get_real_ts(ts)	getnstimeofday(ts)
-
+#define ktime_get_real_ts64(ts)	getnstimeofday64(ts)
 
 /*
  * ktime_t based interfaces
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 6e87df94122f..87a346fd6d61 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -466,7 +466,8 @@ static DECLARE_DELAYED_WORK(sync_cmos_work, sync_cmos_clock);
 
 static void sync_cmos_clock(struct work_struct *work)
 {
-	struct timespec now, next;
+	struct timespec64 now;
+	struct timespec next;
 	int fail = 1;
 
 	/*
@@ -485,9 +486,9 @@ static void sync_cmos_clock(struct work_struct *work)
 		return;
 	}
 
-	getnstimeofday(&now);
+	getnstimeofday64(&now);
 	if (abs(now.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec * 5) {
-		struct timespec adjust = now;
+		struct timespec adjust = timespec64_to_timespec(now);
 
 		fail = -ENODEV;
 		if (persistent_clock_is_local)
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 84a2075c3eb4..3210c9e690c5 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -285,13 +285,13 @@ static void timekeeping_forward_now(struct timekeeper *tk)
 }
 
 /**
- * __getnstimeofday - Returns the time of day in a timespec.
+ * __getnstimeofday64 - Returns the time of day in a timespec64.
  * @ts:		pointer to the timespec to be set
  *
  * Updates the time of day in the timespec.
  * Returns 0 on success, or -ve when suspended (timespec will be undefined).
  */
-int __getnstimeofday(struct timespec *ts)
+int __getnstimeofday64(struct timespec64 *ts)
 {
 	struct timekeeper *tk = &timekeeper;
 	unsigned long seq;
@@ -306,7 +306,7 @@ int __getnstimeofday(struct timespec *ts)
 	} while (read_seqcount_retry(&timekeeper_seq, seq));
 
 	ts->tv_nsec = 0;
-	timespec_add_ns(ts, nsecs);
+	timespec64_add_ns(ts, nsecs);
 
 	/*
 	 * Do not bail out early, in case there were callers still using
@@ -316,19 +316,19 @@ int __getnstimeofday(struct timespec *ts)
 		return -EAGAIN;
 	return 0;
 }
-EXPORT_SYMBOL(__getnstimeofday);
+EXPORT_SYMBOL(__getnstimeofday64);
 
 /**
- * getnstimeofday - Returns the time of day in a timespec.
+ * getnstimeofday64 - Returns the time of day in a timespec64.
  * @ts:		pointer to the timespec to be set
  *
  * Returns the time of day in a timespec (WARN if suspended).
  */
-void getnstimeofday(struct timespec *ts)
+void getnstimeofday64(struct timespec64 *ts)
 {
-	WARN_ON(__getnstimeofday(ts));
+	WARN_ON(__getnstimeofday64(ts));
 }
-EXPORT_SYMBOL(getnstimeofday);
+EXPORT_SYMBOL(getnstimeofday64);
 
 ktime_t ktime_get(void)
 {
@@ -350,17 +350,17 @@ ktime_t ktime_get(void)
 EXPORT_SYMBOL_GPL(ktime_get);
 
 /**
- * ktime_get_ts - get the monotonic clock in timespec format
+ * ktime_get_ts64 - get the monotonic clock in timespec64 format
  * @ts:		pointer to timespec variable
  *
  * The function calculates the monotonic clock from the realtime
  * clock and the wall_to_monotonic offset and stores the result
  * in normalized timespec format in the variable pointed to by @ts.
  */
-void ktime_get_ts(struct timespec *ts)
+void ktime_get_ts64(struct timespec64 *ts)
 {
 	struct timekeeper *tk = &timekeeper;
-	struct timespec64 ts64, tomono;
+	struct timespec64 tomono;
 	s64 nsec;
 	unsigned int seq;
 
@@ -368,18 +368,17 @@ void ktime_get_ts(struct timespec *ts)
 
 	do {
 		seq = read_seqcount_begin(&timekeeper_seq);
-		ts64.tv_sec = tk->xtime_sec;
+		ts->tv_sec = tk->xtime_sec;
 		nsec = timekeeping_get_ns(tk);
 		tomono = tk->wall_to_monotonic;
 
 	} while (read_seqcount_retry(&timekeeper_seq, seq));
 
-	ts64.tv_sec += tomono.tv_sec;
-	ts64.tv_nsec = 0;
-	timespec64_add_ns(&ts64, nsec + tomono.tv_nsec);
-	*ts = timespec64_to_timespec(ts64);
+	ts->tv_sec += tomono.tv_sec;
+	ts->tv_nsec = 0;
+	timespec64_add_ns(ts, nsec + tomono.tv_nsec);
 }
-EXPORT_SYMBOL_GPL(ktime_get_ts);
+EXPORT_SYMBOL_GPL(ktime_get_ts64);
 
 
 /**
@@ -473,9 +472,9 @@ EXPORT_SYMBOL(getnstime_raw_and_real);
  */
 void do_gettimeofday(struct timeval *tv)
 {
-	struct timespec now;
+	struct timespec64 now;
 
-	getnstimeofday(&now);
+	getnstimeofday64(&now);
 	tv->tv_sec = now.tv_sec;
 	tv->tv_usec = now.tv_nsec/1000;
 }
@@ -680,11 +679,11 @@ int timekeeping_notify(struct clocksource *clock)
  */
 ktime_t ktime_get_real(void)
 {
-	struct timespec now;
+	struct timespec64 now;
 
-	getnstimeofday(&now);
+	getnstimeofday64(&now);
 
-	return timespec_to_ktime(now);
+	return timespec64_to_ktime(now);
 }
 EXPORT_SYMBOL_GPL(ktime_get_real);
 
@@ -1689,7 +1688,6 @@ int do_adjtimex(struct timex *txc)
 	struct timekeeper *tk = &timekeeper;
 	unsigned long flags;
 	struct timespec64 ts;
-	struct timespec tmp;
 	s32 orig_tai, tai;
 	int ret;
 
@@ -1709,8 +1707,7 @@ int do_adjtimex(struct timex *txc)
 			return ret;
 	}
 
-	getnstimeofday(&tmp);
-	ts = timespec_to_timespec64(tmp);
+	getnstimeofday64(&ts);
 
 	raw_spin_lock_irqsave(&timekeeper_lock, flags);
 	write_seqcount_begin(&timekeeper_seq);
-- 
cgit v1.2.3-59-g8ed1b


From c905fae43f61c2b4508fc01722e8db61b6b8ac0b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 16 Jul 2014 21:04:05 +0000
Subject: timekeeper: Move tk_xtime to core code

No users outside of the core.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/timekeeper_internal.h | 18 ----------
 kernel/time/timekeeping.c           | 70 +++++++++++++++++++++++--------------
 2 files changed, 43 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index 1b05491e10f9..16de6d7c240a 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -71,16 +71,6 @@ struct timekeeper {
 
 };
 
-static inline struct timespec64 tk_xtime(struct timekeeper *tk)
-{
-	struct timespec64 ts;
-
-	ts.tv_sec = tk->xtime_sec;
-	ts.tv_nsec = (long)(tk->xtime_nsec >> tk->shift);
-	return ts;
-}
-
-
 #ifdef CONFIG_GENERIC_TIME_VSYSCALL
 
 extern void update_vsyscall(struct timekeeper *tk);
@@ -92,14 +82,6 @@ extern void update_vsyscall_old(struct timespec *ts, struct timespec *wtm,
 				struct clocksource *c, u32 mult);
 extern void update_vsyscall_tz(void);
 
-static inline void update_vsyscall(struct timekeeper *tk)
-{
-	struct timespec xt;
-
-	xt = tk_xtime(tk);
-	update_vsyscall_old(&xt, &tk->wall_to_monotonic, tk->clock, tk->mult);
-}
-
 #else
 
 static inline void update_vsyscall(struct timekeeper *tk)
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 3210c9e690c5..983d67b388d7 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -51,6 +51,15 @@ static inline void tk_normalize_xtime(struct timekeeper *tk)
 	}
 }
 
+static inline struct timespec64 tk_xtime(struct timekeeper *tk)
+{
+	struct timespec64 ts;
+
+	ts.tv_sec = tk->xtime_sec;
+	ts.tv_nsec = (long)(tk->xtime_nsec >> tk->shift);
+	return ts;
+}
+
 static void tk_set_xtime(struct timekeeper *tk, const struct timespec64 *ts)
 {
 	tk->xtime_sec = ts->tv_sec;
@@ -199,6 +208,40 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
 	return nsec + arch_gettimeoffset();
 }
 
+#ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD
+
+static inline void update_vsyscall(struct timekeeper *tk)
+{
+	struct timespec xt;
+
+	xt = tk_xtime(tk);
+	update_vsyscall_old(&xt, &tk->wall_to_monotonic, tk->clock, tk->mult);
+}
+
+static inline void old_vsyscall_fixup(struct timekeeper *tk)
+{
+	s64 remainder;
+
+	/*
+	* Store only full nanoseconds into xtime_nsec after rounding
+	* it up and add the remainder to the error difference.
+	* XXX - This is necessary to avoid small 1ns inconsistnecies caused
+	* by truncating the remainder in vsyscalls. However, it causes
+	* additional work to be done in timekeeping_adjust(). Once
+	* the vsyscall implementations are converted to use xtime_nsec
+	* (shifted nanoseconds), and CONFIG_GENERIC_TIME_VSYSCALL_OLD
+	* users are removed, this can be killed.
+	*/
+	remainder = tk->xtime_nsec & ((1ULL << tk->shift) - 1);
+	tk->xtime_nsec -= remainder;
+	tk->xtime_nsec += 1ULL << tk->shift;
+	tk->ntp_error += remainder << tk->ntp_error_shift;
+	tk->ntp_error -= (1ULL << tk->shift) << tk->ntp_error_shift;
+}
+#else
+#define old_vsyscall_fixup(tk)
+#endif
+
 static RAW_NOTIFIER_HEAD(pvclock_gtod_chain);
 
 static void update_pvclock_gtod(struct timekeeper *tk, bool was_set)
@@ -1330,33 +1373,6 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
 	return offset;
 }
 
-#ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD
-static inline void old_vsyscall_fixup(struct timekeeper *tk)
-{
-	s64 remainder;
-
-	/*
-	* Store only full nanoseconds into xtime_nsec after rounding
-	* it up and add the remainder to the error difference.
-	* XXX - This is necessary to avoid small 1ns inconsistnecies caused
-	* by truncating the remainder in vsyscalls. However, it causes
-	* additional work to be done in timekeeping_adjust(). Once
-	* the vsyscall implementations are converted to use xtime_nsec
-	* (shifted nanoseconds), and CONFIG_GENERIC_TIME_VSYSCALL_OLD
-	* users are removed, this can be killed.
-	*/
-	remainder = tk->xtime_nsec & ((1ULL << tk->shift) - 1);
-	tk->xtime_nsec -= remainder;
-	tk->xtime_nsec += 1ULL << tk->shift;
-	tk->ntp_error += remainder << tk->ntp_error_shift;
-	tk->ntp_error -= (1ULL << tk->shift) << tk->ntp_error_shift;
-}
-#else
-#define old_vsyscall_fixup(tk)
-#endif
-
-
-
 /**
  * update_wall_time - Uses the current clocksource to increment the wall time
  *
-- 
cgit v1.2.3-59-g8ed1b


From 3fdb14fd1df70325e1e91e1203a699a4803ed741 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 16 Jul 2014 21:04:07 +0000
Subject: timekeeping: Cache optimize struct timekeeper

struct timekeeper is quite badly sorted for the hot readout path. Most
time access functions need to load two cache lines.

Rearrange it so ktime_get() and getnstimeofday() are happy with a
single cache line.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/timekeeper_internal.h |  84 ++++++++--------
 kernel/time/timekeeping.c           | 185 +++++++++++++++++++-----------------
 2 files changed, 143 insertions(+), 126 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index 16de6d7c240a..2cb96235c249 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -10,7 +10,22 @@
 #include <linux/jiffies.h>
 #include <linux/time.h>
 
-/* Structure holding internal timekeeping values. */
+/*
+ * Structure holding internal timekeeping values.
+ *
+ * Note: wall_to_monotonic is what we need to add to xtime (or xtime
+ * corrected for sub jiffie times) to get to monotonic time.
+ * Monotonic is pegged at zero at system boot time, so
+ * wall_to_monotonic will be negative, however, we will ALWAYS keep
+ * the tv_nsec part positive so we can use the usual normalization.
+ *
+ * wall_to_monotonic is moved after resume from suspend for the
+ * monotonic time not to jump. We need to add total_sleep_time to
+ * wall_to_monotonic to get the real boot based time offset.
+ *
+ * - wall_to_monotonic is no longer the boot time, getboottime must be
+ * used instead.
+ */
 struct timekeeper {
 	/* Current clocksource used for timekeeping. */
 	struct clocksource	*clock;
@@ -18,6 +33,29 @@ struct timekeeper {
 	u32			mult;
 	/* The shift value of the current clocksource. */
 	u32			shift;
+	/* Clock shifted nano seconds */
+	u64			xtime_nsec;
+
+	/* Current CLOCK_REALTIME time in seconds */
+	u64			xtime_sec;
+	/* CLOCK_REALTIME to CLOCK_MONOTONIC offset */
+	struct timespec64	wall_to_monotonic;
+
+	/* Offset clock monotonic -> clock realtime */
+	ktime_t			offs_real;
+	/* Offset clock monotonic -> clock boottime */
+	ktime_t			offs_boot;
+	/* Offset clock monotonic -> clock tai */
+	ktime_t			offs_tai;
+
+	/* time spent in suspend */
+	struct timespec64	total_sleep_time;
+	/* The current UTC to TAI offset in seconds */
+	s32			tai_offset;
+
+	/* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */
+	struct timespec64	raw_time;
+
 	/* Number of clock cycles in one NTP interval. */
 	cycle_t			cycle_interval;
 	/* Last cycle value (also stored in clock->cycle_last) */
@@ -29,46 +67,16 @@ struct timekeeper {
 	/* Raw nano seconds accumulated per NTP interval. */
 	u32			raw_interval;
 
-	/* Current CLOCK_REALTIME time in seconds */
-	u64			xtime_sec;
-	/* Clock shifted nano seconds */
-	u64			xtime_nsec;
-
-	/* Difference between accumulated time and NTP time in ntp
-	 * shifted nano seconds. */
+	/*
+	 * Difference between accumulated time and NTP time in ntp
+	 * shifted nano seconds.
+	 */
 	s64			ntp_error;
-	/* Shift conversion between clock shifted nano seconds and
-	 * ntp shifted nano seconds. */
-	u32			ntp_error_shift;
-
 	/*
-	 * wall_to_monotonic is what we need to add to xtime (or xtime corrected
-	 * for sub jiffie times) to get to monotonic time.  Monotonic is pegged
-	 * at zero at system boot time, so wall_to_monotonic will be negative,
-	 * however, we will ALWAYS keep the tv_nsec part positive so we can use
-	 * the usual normalization.
-	 *
-	 * wall_to_monotonic is moved after resume from suspend for the
-	 * monotonic time not to jump. We need to add total_sleep_time to
-	 * wall_to_monotonic to get the real boot based time offset.
-	 *
-	 * - wall_to_monotonic is no longer the boot time, getboottime must be
-	 * used instead.
+	 * Shift conversion between clock shifted nano seconds and
+	 * ntp shifted nano seconds.
 	 */
-	struct timespec64	wall_to_monotonic;
-	/* Offset clock monotonic -> clock realtime */
-	ktime_t			offs_real;
-	/* time spent in suspend */
-	struct timespec64	total_sleep_time;
-	/* Offset clock monotonic -> clock boottime */
-	ktime_t			offs_boot;
-	/* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */
-	struct timespec64	raw_time;
-	/* The current UTC to TAI offset in seconds */
-	s32			tai_offset;
-	/* Offset clock monotonic -> clock tai */
-	ktime_t			offs_tai;
-
+	u32			ntp_error_shift;
 };
 
 #ifdef CONFIG_GENERIC_TIME_VSYSCALL
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 983d67b388d7..7ca150ad387d 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -32,9 +32,16 @@
 #define TK_MIRROR		(1 << 1)
 #define TK_CLOCK_WAS_SET	(1 << 2)
 
-static struct timekeeper timekeeper;
+/*
+ * The most important data for readout fits into a single 64 byte
+ * cache line.
+ */
+static struct {
+	seqcount_t		seq;
+	struct timekeeper	timekeeper;
+} tk_core ____cacheline_aligned;
+
 static DEFINE_RAW_SPINLOCK(timekeeper_lock);
-static seqcount_t timekeeper_seq;
 static struct timekeeper shadow_timekeeper;
 
 /* flag for if timekeeping is suspended */
@@ -254,7 +261,7 @@ static void update_pvclock_gtod(struct timekeeper *tk, bool was_set)
  */
 int pvclock_gtod_register_notifier(struct notifier_block *nb)
 {
-	struct timekeeper *tk = &timekeeper;
+	struct timekeeper *tk = &tk_core.timekeeper;
 	unsigned long flags;
 	int ret;
 
@@ -295,7 +302,8 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
 	update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET);
 
 	if (action & TK_MIRROR)
-		memcpy(&shadow_timekeeper, &timekeeper, sizeof(timekeeper));
+		memcpy(&shadow_timekeeper, &tk_core.timekeeper,
+		       sizeof(tk_core.timekeeper));
 }
 
 /**
@@ -336,17 +344,17 @@ static void timekeeping_forward_now(struct timekeeper *tk)
  */
 int __getnstimeofday64(struct timespec64 *ts)
 {
-	struct timekeeper *tk = &timekeeper;
+	struct timekeeper *tk = &tk_core.timekeeper;
 	unsigned long seq;
 	s64 nsecs = 0;
 
 	do {
-		seq = read_seqcount_begin(&timekeeper_seq);
+		seq = read_seqcount_begin(&tk_core.seq);
 
 		ts->tv_sec = tk->xtime_sec;
 		nsecs = timekeeping_get_ns(tk);
 
-	} while (read_seqcount_retry(&timekeeper_seq, seq));
+	} while (read_seqcount_retry(&tk_core.seq, seq));
 
 	ts->tv_nsec = 0;
 	timespec64_add_ns(ts, nsecs);
@@ -375,18 +383,18 @@ EXPORT_SYMBOL(getnstimeofday64);
 
 ktime_t ktime_get(void)
 {
-	struct timekeeper *tk = &timekeeper;
+	struct timekeeper *tk = &tk_core.timekeeper;
 	unsigned int seq;
 	s64 secs, nsecs;
 
 	WARN_ON(timekeeping_suspended);
 
 	do {
-		seq = read_seqcount_begin(&timekeeper_seq);
+		seq = read_seqcount_begin(&tk_core.seq);
 		secs = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
 		nsecs = timekeeping_get_ns(tk) + tk->wall_to_monotonic.tv_nsec;
 
-	} while (read_seqcount_retry(&timekeeper_seq, seq));
+	} while (read_seqcount_retry(&tk_core.seq, seq));
 
 	return ktime_set(secs, nsecs);
 }
@@ -402,7 +410,7 @@ EXPORT_SYMBOL_GPL(ktime_get);
  */
 void ktime_get_ts64(struct timespec64 *ts)
 {
-	struct timekeeper *tk = &timekeeper;
+	struct timekeeper *tk = &tk_core.timekeeper;
 	struct timespec64 tomono;
 	s64 nsec;
 	unsigned int seq;
@@ -410,12 +418,12 @@ void ktime_get_ts64(struct timespec64 *ts)
 	WARN_ON(timekeeping_suspended);
 
 	do {
-		seq = read_seqcount_begin(&timekeeper_seq);
+		seq = read_seqcount_begin(&tk_core.seq);
 		ts->tv_sec = tk->xtime_sec;
 		nsec = timekeeping_get_ns(tk);
 		tomono = tk->wall_to_monotonic;
 
-	} while (read_seqcount_retry(&timekeeper_seq, seq));
+	} while (read_seqcount_retry(&tk_core.seq, seq));
 
 	ts->tv_sec += tomono.tv_sec;
 	ts->tv_nsec = 0;
@@ -432,7 +440,7 @@ EXPORT_SYMBOL_GPL(ktime_get_ts64);
  */
 void timekeeping_clocktai(struct timespec *ts)
 {
-	struct timekeeper *tk = &timekeeper;
+	struct timekeeper *tk = &tk_core.timekeeper;
 	struct timespec64 ts64;
 	unsigned long seq;
 	u64 nsecs;
@@ -440,12 +448,12 @@ void timekeeping_clocktai(struct timespec *ts)
 	WARN_ON(timekeeping_suspended);
 
 	do {
-		seq = read_seqcount_begin(&timekeeper_seq);
+		seq = read_seqcount_begin(&tk_core.seq);
 
 		ts64.tv_sec = tk->xtime_sec + tk->tai_offset;
 		nsecs = timekeeping_get_ns(tk);
 
-	} while (read_seqcount_retry(&timekeeper_seq, seq));
+	} while (read_seqcount_retry(&tk_core.seq, seq));
 
 	ts64.tv_nsec = 0;
 	timespec64_add_ns(&ts64, nsecs);
@@ -482,14 +490,14 @@ EXPORT_SYMBOL(ktime_get_clocktai);
  */
 void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
 {
-	struct timekeeper *tk = &timekeeper;
+	struct timekeeper *tk = &tk_core.timekeeper;
 	unsigned long seq;
 	s64 nsecs_raw, nsecs_real;
 
 	WARN_ON_ONCE(timekeeping_suspended);
 
 	do {
-		seq = read_seqcount_begin(&timekeeper_seq);
+		seq = read_seqcount_begin(&tk_core.seq);
 
 		*ts_raw = timespec64_to_timespec(tk->raw_time);
 		ts_real->tv_sec = tk->xtime_sec;
@@ -498,7 +506,7 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
 		nsecs_raw = timekeeping_get_ns_raw(tk);
 		nsecs_real = timekeeping_get_ns(tk);
 
-	} while (read_seqcount_retry(&timekeeper_seq, seq));
+	} while (read_seqcount_retry(&tk_core.seq, seq));
 
 	timespec_add_ns(ts_raw, nsecs_raw);
 	timespec_add_ns(ts_real, nsecs_real);
@@ -531,7 +539,7 @@ EXPORT_SYMBOL(do_gettimeofday);
  */
 int do_settimeofday(const struct timespec *tv)
 {
-	struct timekeeper *tk = &timekeeper;
+	struct timekeeper *tk = &tk_core.timekeeper;
 	struct timespec64 ts_delta, xt, tmp;
 	unsigned long flags;
 
@@ -539,7 +547,7 @@ int do_settimeofday(const struct timespec *tv)
 		return -EINVAL;
 
 	raw_spin_lock_irqsave(&timekeeper_lock, flags);
-	write_seqcount_begin(&timekeeper_seq);
+	write_seqcount_begin(&tk_core.seq);
 
 	timekeeping_forward_now(tk);
 
@@ -554,7 +562,7 @@ int do_settimeofday(const struct timespec *tv)
 
 	timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
 
-	write_seqcount_end(&timekeeper_seq);
+	write_seqcount_end(&tk_core.seq);
 	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 
 	/* signal hrtimers about time change */
@@ -572,7 +580,7 @@ EXPORT_SYMBOL(do_settimeofday);
  */
 int timekeeping_inject_offset(struct timespec *ts)
 {
-	struct timekeeper *tk = &timekeeper;
+	struct timekeeper *tk = &tk_core.timekeeper;
 	unsigned long flags;
 	struct timespec64 ts64, tmp;
 	int ret = 0;
@@ -583,7 +591,7 @@ int timekeeping_inject_offset(struct timespec *ts)
 	ts64 = timespec_to_timespec64(*ts);
 
 	raw_spin_lock_irqsave(&timekeeper_lock, flags);
-	write_seqcount_begin(&timekeeper_seq);
+	write_seqcount_begin(&tk_core.seq);
 
 	timekeeping_forward_now(tk);
 
@@ -600,7 +608,7 @@ int timekeeping_inject_offset(struct timespec *ts)
 error: /* even if we error out, we forwarded the time, so call update */
 	timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
 
-	write_seqcount_end(&timekeeper_seq);
+	write_seqcount_end(&tk_core.seq);
 	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 
 	/* signal hrtimers about time change */
@@ -617,14 +625,14 @@ EXPORT_SYMBOL(timekeeping_inject_offset);
  */
 s32 timekeeping_get_tai_offset(void)
 {
-	struct timekeeper *tk = &timekeeper;
+	struct timekeeper *tk = &tk_core.timekeeper;
 	unsigned int seq;
 	s32 ret;
 
 	do {
-		seq = read_seqcount_begin(&timekeeper_seq);
+		seq = read_seqcount_begin(&tk_core.seq);
 		ret = tk->tai_offset;
-	} while (read_seqcount_retry(&timekeeper_seq, seq));
+	} while (read_seqcount_retry(&tk_core.seq, seq));
 
 	return ret;
 }
@@ -645,14 +653,14 @@ static void __timekeeping_set_tai_offset(struct timekeeper *tk, s32 tai_offset)
  */
 void timekeeping_set_tai_offset(s32 tai_offset)
 {
-	struct timekeeper *tk = &timekeeper;
+	struct timekeeper *tk = &tk_core.timekeeper;
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&timekeeper_lock, flags);
-	write_seqcount_begin(&timekeeper_seq);
+	write_seqcount_begin(&tk_core.seq);
 	__timekeeping_set_tai_offset(tk, tai_offset);
 	timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
-	write_seqcount_end(&timekeeper_seq);
+	write_seqcount_end(&tk_core.seq);
 	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 	clock_was_set();
 }
@@ -664,14 +672,14 @@ void timekeeping_set_tai_offset(s32 tai_offset)
  */
 static int change_clocksource(void *data)
 {
-	struct timekeeper *tk = &timekeeper;
+	struct timekeeper *tk = &tk_core.timekeeper;
 	struct clocksource *new, *old;
 	unsigned long flags;
 
 	new = (struct clocksource *) data;
 
 	raw_spin_lock_irqsave(&timekeeper_lock, flags);
-	write_seqcount_begin(&timekeeper_seq);
+	write_seqcount_begin(&tk_core.seq);
 
 	timekeeping_forward_now(tk);
 	/*
@@ -691,7 +699,7 @@ static int change_clocksource(void *data)
 	}
 	timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
 
-	write_seqcount_end(&timekeeper_seq);
+	write_seqcount_end(&tk_core.seq);
 	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 
 	return 0;
@@ -706,7 +714,7 @@ static int change_clocksource(void *data)
  */
 int timekeeping_notify(struct clocksource *clock)
 {
-	struct timekeeper *tk = &timekeeper;
+	struct timekeeper *tk = &tk_core.timekeeper;
 
 	if (tk->clock == clock)
 		return 0;
@@ -738,17 +746,17 @@ EXPORT_SYMBOL_GPL(ktime_get_real);
  */
 void getrawmonotonic(struct timespec *ts)
 {
-	struct timekeeper *tk = &timekeeper;
+	struct timekeeper *tk = &tk_core.timekeeper;
 	struct timespec64 ts64;
 	unsigned long seq;
 	s64 nsecs;
 
 	do {
-		seq = read_seqcount_begin(&timekeeper_seq);
+		seq = read_seqcount_begin(&tk_core.seq);
 		nsecs = timekeeping_get_ns_raw(tk);
 		ts64 = tk->raw_time;
 
-	} while (read_seqcount_retry(&timekeeper_seq, seq));
+	} while (read_seqcount_retry(&tk_core.seq, seq));
 
 	timespec64_add_ns(&ts64, nsecs);
 	*ts = timespec64_to_timespec(ts64);
@@ -760,16 +768,16 @@ EXPORT_SYMBOL(getrawmonotonic);
  */
 int timekeeping_valid_for_hres(void)
 {
-	struct timekeeper *tk = &timekeeper;
+	struct timekeeper *tk = &tk_core.timekeeper;
 	unsigned long seq;
 	int ret;
 
 	do {
-		seq = read_seqcount_begin(&timekeeper_seq);
+		seq = read_seqcount_begin(&tk_core.seq);
 
 		ret = tk->clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
 
-	} while (read_seqcount_retry(&timekeeper_seq, seq));
+	} while (read_seqcount_retry(&tk_core.seq, seq));
 
 	return ret;
 }
@@ -779,16 +787,16 @@ int timekeeping_valid_for_hres(void)
  */
 u64 timekeeping_max_deferment(void)
 {
-	struct timekeeper *tk = &timekeeper;
+	struct timekeeper *tk = &tk_core.timekeeper;
 	unsigned long seq;
 	u64 ret;
 
 	do {
-		seq = read_seqcount_begin(&timekeeper_seq);
+		seq = read_seqcount_begin(&tk_core.seq);
 
 		ret = tk->clock->max_idle_ns;
 
-	} while (read_seqcount_retry(&timekeeper_seq, seq));
+	} while (read_seqcount_retry(&tk_core.seq, seq));
 
 	return ret;
 }
@@ -828,7 +836,7 @@ void __weak read_boot_clock(struct timespec *ts)
  */
 void __init timekeeping_init(void)
 {
-	struct timekeeper *tk = &timekeeper;
+	struct timekeeper *tk = &tk_core.timekeeper;
 	struct clocksource *clock;
 	unsigned long flags;
 	struct timespec64 now, boot, tmp;
@@ -854,7 +862,7 @@ void __init timekeeping_init(void)
 	}
 
 	raw_spin_lock_irqsave(&timekeeper_lock, flags);
-	write_seqcount_begin(&timekeeper_seq);
+	write_seqcount_begin(&tk_core.seq);
 	ntp_init();
 
 	clock = clocksource_default_clock();
@@ -875,9 +883,10 @@ void __init timekeeping_init(void)
 	tmp.tv_nsec = 0;
 	tk_set_sleep_time(tk, tmp);
 
-	memcpy(&shadow_timekeeper, &timekeeper, sizeof(timekeeper));
+	memcpy(&shadow_timekeeper, &tk_core.timekeeper,
+	       sizeof(tk_core.timekeeper));
 
-	write_seqcount_end(&timekeeper_seq);
+	write_seqcount_end(&tk_core.seq);
 	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 }
 
@@ -918,7 +927,7 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
  */
 void timekeeping_inject_sleeptime(struct timespec *delta)
 {
-	struct timekeeper *tk = &timekeeper;
+	struct timekeeper *tk = &tk_core.timekeeper;
 	struct timespec64 tmp;
 	unsigned long flags;
 
@@ -930,7 +939,7 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
 		return;
 
 	raw_spin_lock_irqsave(&timekeeper_lock, flags);
-	write_seqcount_begin(&timekeeper_seq);
+	write_seqcount_begin(&tk_core.seq);
 
 	timekeeping_forward_now(tk);
 
@@ -939,7 +948,7 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
 
 	timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
 
-	write_seqcount_end(&timekeeper_seq);
+	write_seqcount_end(&tk_core.seq);
 	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 
 	/* signal hrtimers about time change */
@@ -955,7 +964,7 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
  */
 static void timekeeping_resume(void)
 {
-	struct timekeeper *tk = &timekeeper;
+	struct timekeeper *tk = &tk_core.timekeeper;
 	struct clocksource *clock = tk->clock;
 	unsigned long flags;
 	struct timespec64 ts_new, ts_delta;
@@ -970,7 +979,7 @@ static void timekeeping_resume(void)
 	clocksource_resume();
 
 	raw_spin_lock_irqsave(&timekeeper_lock, flags);
-	write_seqcount_begin(&timekeeper_seq);
+	write_seqcount_begin(&tk_core.seq);
 
 	/*
 	 * After system resumes, we need to calculate the suspended time and
@@ -1022,7 +1031,7 @@ static void timekeeping_resume(void)
 	tk->ntp_error = 0;
 	timekeeping_suspended = 0;
 	timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
-	write_seqcount_end(&timekeeper_seq);
+	write_seqcount_end(&tk_core.seq);
 	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 
 	touch_softlockup_watchdog();
@@ -1035,7 +1044,7 @@ static void timekeeping_resume(void)
 
 static int timekeeping_suspend(void)
 {
-	struct timekeeper *tk = &timekeeper;
+	struct timekeeper *tk = &tk_core.timekeeper;
 	unsigned long flags;
 	struct timespec64		delta, delta_delta;
 	static struct timespec64	old_delta;
@@ -1053,7 +1062,7 @@ static int timekeeping_suspend(void)
 		persistent_clock_exist = true;
 
 	raw_spin_lock_irqsave(&timekeeper_lock, flags);
-	write_seqcount_begin(&timekeeper_seq);
+	write_seqcount_begin(&tk_core.seq);
 	timekeeping_forward_now(tk);
 	timekeeping_suspended = 1;
 
@@ -1078,7 +1087,7 @@ static int timekeeping_suspend(void)
 	}
 
 	timekeeping_update(tk, TK_MIRROR);
-	write_seqcount_end(&timekeeper_seq);
+	write_seqcount_end(&tk_core.seq);
 	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 
 	clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL);
@@ -1380,7 +1389,7 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
 void update_wall_time(void)
 {
 	struct clocksource *clock;
-	struct timekeeper *real_tk = &timekeeper;
+	struct timekeeper *real_tk = &tk_core.timekeeper;
 	struct timekeeper *tk = &shadow_timekeeper;
 	cycle_t offset;
 	int shift = 0, maxshift;
@@ -1440,7 +1449,7 @@ void update_wall_time(void)
 	 */
 	clock_set |= accumulate_nsecs_to_secs(tk);
 
-	write_seqcount_begin(&timekeeper_seq);
+	write_seqcount_begin(&tk_core.seq);
 	/* Update clock->cycle_last with the new value */
 	clock->cycle_last = tk->cycle_last;
 	/*
@@ -1450,12 +1459,12 @@ void update_wall_time(void)
 	 * requires changes to all other timekeeper usage sites as
 	 * well, i.e. move the timekeeper pointer getter into the
 	 * spinlocked/seqcount protected sections. And we trade this
-	 * memcpy under the timekeeper_seq against one before we start
+	 * memcpy under the tk_core.seq against one before we start
 	 * updating.
 	 */
 	memcpy(real_tk, tk, sizeof(*tk));
 	timekeeping_update(real_tk, clock_set);
-	write_seqcount_end(&timekeeper_seq);
+	write_seqcount_end(&tk_core.seq);
 out:
 	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 	if (clock_set)
@@ -1476,7 +1485,7 @@ out:
  */
 void getboottime(struct timespec *ts)
 {
-	struct timekeeper *tk = &timekeeper;
+	struct timekeeper *tk = &tk_core.timekeeper;
 	struct timespec boottime = {
 		.tv_sec = tk->wall_to_monotonic.tv_sec +
 				tk->total_sleep_time.tv_sec,
@@ -1499,7 +1508,7 @@ EXPORT_SYMBOL_GPL(getboottime);
  */
 void get_monotonic_boottime(struct timespec *ts)
 {
-	struct timekeeper *tk = &timekeeper;
+	struct timekeeper *tk = &tk_core.timekeeper;
 	struct timespec64 tomono, sleep, ret;
 	s64 nsec;
 	unsigned int seq;
@@ -1507,13 +1516,13 @@ void get_monotonic_boottime(struct timespec *ts)
 	WARN_ON(timekeeping_suspended);
 
 	do {
-		seq = read_seqcount_begin(&timekeeper_seq);
+		seq = read_seqcount_begin(&tk_core.seq);
 		ret.tv_sec = tk->xtime_sec;
 		nsec = timekeeping_get_ns(tk);
 		tomono = tk->wall_to_monotonic;
 		sleep = tk->total_sleep_time;
 
-	} while (read_seqcount_retry(&timekeeper_seq, seq));
+	} while (read_seqcount_retry(&tk_core.seq, seq));
 
 	ret.tv_sec += tomono.tv_sec + sleep.tv_sec;
 	ret.tv_nsec = 0;
@@ -1545,7 +1554,7 @@ EXPORT_SYMBOL_GPL(ktime_get_boottime);
  */
 void monotonic_to_bootbased(struct timespec *ts)
 {
-	struct timekeeper *tk = &timekeeper;
+	struct timekeeper *tk = &tk_core.timekeeper;
 	struct timespec64 ts64;
 
 	ts64 = timespec_to_timespec64(*ts);
@@ -1556,7 +1565,7 @@ EXPORT_SYMBOL_GPL(monotonic_to_bootbased);
 
 unsigned long get_seconds(void)
 {
-	struct timekeeper *tk = &timekeeper;
+	struct timekeeper *tk = &tk_core.timekeeper;
 
 	return tk->xtime_sec;
 }
@@ -1564,22 +1573,22 @@ EXPORT_SYMBOL(get_seconds);
 
 struct timespec __current_kernel_time(void)
 {
-	struct timekeeper *tk = &timekeeper;
+	struct timekeeper *tk = &tk_core.timekeeper;
 
 	return timespec64_to_timespec(tk_xtime(tk));
 }
 
 struct timespec current_kernel_time(void)
 {
-	struct timekeeper *tk = &timekeeper;
+	struct timekeeper *tk = &tk_core.timekeeper;
 	struct timespec64 now;
 	unsigned long seq;
 
 	do {
-		seq = read_seqcount_begin(&timekeeper_seq);
+		seq = read_seqcount_begin(&tk_core.seq);
 
 		now = tk_xtime(tk);
-	} while (read_seqcount_retry(&timekeeper_seq, seq));
+	} while (read_seqcount_retry(&tk_core.seq, seq));
 
 	return timespec64_to_timespec(now);
 }
@@ -1587,16 +1596,16 @@ EXPORT_SYMBOL(current_kernel_time);
 
 struct timespec get_monotonic_coarse(void)
 {
-	struct timekeeper *tk = &timekeeper;
+	struct timekeeper *tk = &tk_core.timekeeper;
 	struct timespec64 now, mono;
 	unsigned long seq;
 
 	do {
-		seq = read_seqcount_begin(&timekeeper_seq);
+		seq = read_seqcount_begin(&tk_core.seq);
 
 		now = tk_xtime(tk);
 		mono = tk->wall_to_monotonic;
-	} while (read_seqcount_retry(&timekeeper_seq, seq));
+	} while (read_seqcount_retry(&tk_core.seq, seq));
 
 	set_normalized_timespec64(&now, now.tv_sec + mono.tv_sec,
 				now.tv_nsec + mono.tv_nsec);
@@ -1624,19 +1633,19 @@ void do_timer(unsigned long ticks)
 ktime_t ktime_get_update_offsets_tick(ktime_t *offs_real, ktime_t *offs_boot,
 							ktime_t *offs_tai)
 {
-	struct timekeeper *tk = &timekeeper;
+	struct timekeeper *tk = &tk_core.timekeeper;
 	struct timespec64 ts;
 	ktime_t now;
 	unsigned int seq;
 
 	do {
-		seq = read_seqcount_begin(&timekeeper_seq);
+		seq = read_seqcount_begin(&tk_core.seq);
 
 		ts = tk_xtime(tk);
 		*offs_real = tk->offs_real;
 		*offs_boot = tk->offs_boot;
 		*offs_tai = tk->offs_tai;
-	} while (read_seqcount_retry(&timekeeper_seq, seq));
+	} while (read_seqcount_retry(&tk_core.seq, seq));
 
 	now = ktime_set(ts.tv_sec, ts.tv_nsec);
 	now = ktime_sub(now, *offs_real);
@@ -1656,13 +1665,13 @@ ktime_t ktime_get_update_offsets_tick(ktime_t *offs_real, ktime_t *offs_boot,
 ktime_t ktime_get_update_offsets_now(ktime_t *offs_real, ktime_t *offs_boot,
 							ktime_t *offs_tai)
 {
-	struct timekeeper *tk = &timekeeper;
+	struct timekeeper *tk = &tk_core.timekeeper;
 	ktime_t now;
 	unsigned int seq;
 	u64 secs, nsecs;
 
 	do {
-		seq = read_seqcount_begin(&timekeeper_seq);
+		seq = read_seqcount_begin(&tk_core.seq);
 
 		secs = tk->xtime_sec;
 		nsecs = timekeeping_get_ns(tk);
@@ -1670,7 +1679,7 @@ ktime_t ktime_get_update_offsets_now(ktime_t *offs_real, ktime_t *offs_boot,
 		*offs_real = tk->offs_real;
 		*offs_boot = tk->offs_boot;
 		*offs_tai = tk->offs_tai;
-	} while (read_seqcount_retry(&timekeeper_seq, seq));
+	} while (read_seqcount_retry(&tk_core.seq, seq));
 
 	now = ktime_add_ns(ktime_set(secs, 0), nsecs);
 	now = ktime_sub(now, *offs_real);
@@ -1683,14 +1692,14 @@ ktime_t ktime_get_update_offsets_now(ktime_t *offs_real, ktime_t *offs_boot,
  */
 ktime_t ktime_get_monotonic_offset(void)
 {
-	struct timekeeper *tk = &timekeeper;
+	struct timekeeper *tk = &tk_core.timekeeper;
 	unsigned long seq;
 	struct timespec64 wtom;
 
 	do {
-		seq = read_seqcount_begin(&timekeeper_seq);
+		seq = read_seqcount_begin(&tk_core.seq);
 		wtom = tk->wall_to_monotonic;
-	} while (read_seqcount_retry(&timekeeper_seq, seq));
+	} while (read_seqcount_retry(&tk_core.seq, seq));
 
 	return timespec64_to_ktime(wtom);
 }
@@ -1701,7 +1710,7 @@ EXPORT_SYMBOL_GPL(ktime_get_monotonic_offset);
  */
 int do_adjtimex(struct timex *txc)
 {
-	struct timekeeper *tk = &timekeeper;
+	struct timekeeper *tk = &tk_core.timekeeper;
 	unsigned long flags;
 	struct timespec64 ts;
 	s32 orig_tai, tai;
@@ -1726,7 +1735,7 @@ int do_adjtimex(struct timex *txc)
 	getnstimeofday64(&ts);
 
 	raw_spin_lock_irqsave(&timekeeper_lock, flags);
-	write_seqcount_begin(&timekeeper_seq);
+	write_seqcount_begin(&tk_core.seq);
 
 	orig_tai = tai = tk->tai_offset;
 	ret = __do_adjtimex(txc, &ts, &tai);
@@ -1735,7 +1744,7 @@ int do_adjtimex(struct timex *txc)
 		__timekeeping_set_tai_offset(tk, tai);
 		timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
 	}
-	write_seqcount_end(&timekeeper_seq);
+	write_seqcount_end(&tk_core.seq);
 	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 
 	if (tai != orig_tai)
@@ -1755,11 +1764,11 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&timekeeper_lock, flags);
-	write_seqcount_begin(&timekeeper_seq);
+	write_seqcount_begin(&tk_core.seq);
 
 	__hardpps(phase_ts, raw_ts);
 
-	write_seqcount_end(&timekeeper_seq);
+	write_seqcount_end(&tk_core.seq);
 	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 }
 EXPORT_SYMBOL(hardpps);
-- 
cgit v1.2.3-59-g8ed1b


From 7c032df5570388044b4efda3d9f4d2ffb96a3116 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 16 Jul 2014 21:04:10 +0000
Subject: timekeeping: Provide internal ktime_t based data

The ktime_t based interfaces are used a lot in performance critical
code pathes. Add ktime_t based data so the interfaces don't have to
convert from the xtime/timespec based data.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/timekeeper_internal.h |  3 +++
 kernel/time/timekeeping.c           | 22 ++++++++++++++++++++++
 2 files changed, 25 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index 2cb96235c249..87e0992564f2 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -36,6 +36,9 @@ struct timekeeper {
 	/* Clock shifted nano seconds */
 	u64			xtime_nsec;
 
+	/* Monotonic base time */
+	ktime_t			base_mono;
+
 	/* Current CLOCK_REALTIME time in seconds */
 	u64			xtime_sec;
 	/* CLOCK_REALTIME to CLOCK_MONOTONIC offset */
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index bfe3ea09afc9..86a92476c027 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -291,6 +291,26 @@ int pvclock_gtod_unregister_notifier(struct notifier_block *nb)
 }
 EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier);
 
+/*
+ * Update the ktime_t based scalar nsec members of the timekeeper
+ */
+static inline void tk_update_ktime_data(struct timekeeper *tk)
+{
+	s64 nsec;
+
+	/*
+	 * The xtime based monotonic readout is:
+	 *	nsec = (xtime_sec + wtm_sec) * 1e9 + wtm_nsec + now();
+	 * The ktime based monotonic readout is:
+	 *	nsec = base_mono + now();
+	 * ==> base_mono = (xtime_sec + wtm_sec) * 1e9 + wtm_nsec
+	 */
+	nsec = (s64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec);
+	nsec *= NSEC_PER_SEC;
+	nsec += tk->wall_to_monotonic.tv_nsec;
+	tk->base_mono = ns_to_ktime(nsec);
+}
+
 /* must hold timekeeper_lock */
 static void timekeeping_update(struct timekeeper *tk, unsigned int action)
 {
@@ -301,6 +321,8 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
 	update_vsyscall(tk);
 	update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET);
 
+	tk_update_ktime_data(tk);
+
 	if (action & TK_MIRROR)
 		memcpy(&shadow_timekeeper, &tk_core.timekeeper,
 		       sizeof(tk_core.timekeeper));
-- 
cgit v1.2.3-59-g8ed1b


From 0077dc60f274b9a7e9aa705a34784fefb87e0eee Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 16 Jul 2014 21:04:13 +0000
Subject: timekeeping: Provide ktime_get_with_offset()

Provide a helper function which lets us implement ktime_t based
interfaces for real, boot and tai clocks.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/timekeeping.h |  9 +++++++++
 kernel/time/timekeeping.c   | 27 +++++++++++++++++++++++++++
 2 files changed, 36 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 3eb19e34cc20..a58e4b1879db 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -98,7 +98,16 @@ extern void getboottime(struct timespec *ts);
 /*
  * ktime_t based interfaces
  */
+
+enum tk_offsets {
+	TK_OFFS_REAL,
+	TK_OFFS_BOOT,
+	TK_OFFS_TAI,
+	TK_OFFS_MAX,
+};
+
 extern ktime_t ktime_get(void);
+extern ktime_t ktime_get_with_offset(enum tk_offsets offs);
 extern ktime_t ktime_get_real(void);
 extern ktime_t ktime_get_boottime(void);
 extern ktime_t ktime_get_monotonic_offset(void);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index d5be1425cc03..7c5f5e4a006c 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -423,6 +423,33 @@ ktime_t ktime_get(void)
 }
 EXPORT_SYMBOL_GPL(ktime_get);
 
+static ktime_t *offsets[TK_OFFS_MAX] = {
+	[TK_OFFS_REAL]	= &tk_core.timekeeper.offs_real,
+	[TK_OFFS_BOOT]	= &tk_core.timekeeper.offs_boot,
+	[TK_OFFS_TAI]	= &tk_core.timekeeper.offs_tai,
+};
+
+ktime_t ktime_get_with_offset(enum tk_offsets offs)
+{
+	struct timekeeper *tk = &tk_core.timekeeper;
+	unsigned int seq;
+	ktime_t base, *offset = offsets[offs];
+	s64 nsecs;
+
+	WARN_ON(timekeeping_suspended);
+
+	do {
+		seq = read_seqcount_begin(&tk_core.seq);
+		base = ktime_add(tk->base_mono, *offset);
+		nsecs = timekeeping_get_ns(tk);
+
+	} while (read_seqcount_retry(&tk_core.seq, seq));
+
+	return ktime_add_ns(base, nsecs);
+
+}
+EXPORT_SYMBOL_GPL(ktime_get_with_offset);
+
 /**
  * ktime_get_ts64 - get the monotonic clock in timespec64 format
  * @ts:		pointer to timespec variable
-- 
cgit v1.2.3-59-g8ed1b


From f5264d5d5a0729306cc792d84432b97785d2662a Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 16 Jul 2014 21:04:14 +0000
Subject: timekeeping: Use ktime_t based data for ktime_get_real()

Speed up the readout.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/timekeeping.h |  9 ++++++++-
 kernel/time/timekeeping.c   | 15 ---------------
 2 files changed, 8 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index a58e4b1879db..68e6678a743b 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -108,11 +108,18 @@ enum tk_offsets {
 
 extern ktime_t ktime_get(void);
 extern ktime_t ktime_get_with_offset(enum tk_offsets offs);
-extern ktime_t ktime_get_real(void);
 extern ktime_t ktime_get_boottime(void);
 extern ktime_t ktime_get_monotonic_offset(void);
 extern ktime_t ktime_get_clocktai(void);
 
+/**
+ * ktime_get_real - get the real (wall-) time in ktime_t format
+ */
+static inline ktime_t ktime_get_real(void)
+{
+	return ktime_get_with_offset(TK_OFFS_REAL);
+}
+
 /*
  * RTC specific
  */
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 7c5f5e4a006c..56db2e16970a 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -773,21 +773,6 @@ int timekeeping_notify(struct clocksource *clock)
 	return tk->clock == clock ? 0 : -1;
 }
 
-/**
- * ktime_get_real - get the real (wall-) time in ktime_t format
- *
- * returns the time in ktime_t format
- */
-ktime_t ktime_get_real(void)
-{
-	struct timespec64 now;
-
-	getnstimeofday64(&now);
-
-	return timespec64_to_ktime(now);
-}
-EXPORT_SYMBOL_GPL(ktime_get_real);
-
 /**
  * getrawmonotonic - Returns the raw monotonic time in a timespec
  * @ts:		pointer to the timespec to be set
-- 
cgit v1.2.3-59-g8ed1b


From b82c817e2d16e818c472eb71019de521816000a3 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 16 Jul 2014 21:04:16 +0000
Subject: timekeeping; Use ktime_t based data for ktime_get_boottime()

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/timekeeping.h | 12 +++++++++++-
 kernel/time/timekeeping.c   | 17 -----------------
 2 files changed, 11 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 68e6678a743b..2fc606203c8c 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -108,7 +108,6 @@ enum tk_offsets {
 
 extern ktime_t ktime_get(void);
 extern ktime_t ktime_get_with_offset(enum tk_offsets offs);
-extern ktime_t ktime_get_boottime(void);
 extern ktime_t ktime_get_monotonic_offset(void);
 extern ktime_t ktime_get_clocktai(void);
 
@@ -120,6 +119,17 @@ static inline ktime_t ktime_get_real(void)
 	return ktime_get_with_offset(TK_OFFS_REAL);
 }
 
+/**
+ * ktime_get_boottime - Returns monotonic time since boot in ktime_t format
+ *
+ * This is similar to CLOCK_MONTONIC/ktime_get, but also includes the
+ * time spent in suspend.
+ */
+static inline ktime_t ktime_get_boottime(void)
+{
+	return ktime_get_with_offset(TK_OFFS_BOOT);
+}
+
 /*
  * RTC specific
  */
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 56db2e16970a..5e60aa09af79 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1565,23 +1565,6 @@ void get_monotonic_boottime(struct timespec *ts)
 }
 EXPORT_SYMBOL_GPL(get_monotonic_boottime);
 
-/**
- * ktime_get_boottime - Returns monotonic time since boot in a ktime
- *
- * Returns the monotonic time since boot in a ktime
- *
- * This is similar to CLOCK_MONTONIC/ktime_get, but also
- * includes the time spent in suspend.
- */
-ktime_t ktime_get_boottime(void)
-{
-	struct timespec ts;
-
-	get_monotonic_boottime(&ts);
-	return timespec_to_ktime(ts);
-}
-EXPORT_SYMBOL_GPL(ktime_get_boottime);
-
 /**
  * monotonic_to_bootbased - Convert the monotonic time to boot based.
  * @ts:		pointer to the timespec to be converted
-- 
cgit v1.2.3-59-g8ed1b


From afab07c0e91ecf098abf34573ccfcd86d6be26f9 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 16 Jul 2014 21:04:17 +0000
Subject: timekeeping: Use ktime_t based data for ktime_get_clocktai()

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/timekeeping.h |  9 ++++++++-
 kernel/time/timekeeping.c   | 15 ---------------
 2 files changed, 8 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 2fc606203c8c..3050a7d0a5a9 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -109,7 +109,6 @@ enum tk_offsets {
 extern ktime_t ktime_get(void);
 extern ktime_t ktime_get_with_offset(enum tk_offsets offs);
 extern ktime_t ktime_get_monotonic_offset(void);
-extern ktime_t ktime_get_clocktai(void);
 
 /**
  * ktime_get_real - get the real (wall-) time in ktime_t format
@@ -130,6 +129,14 @@ static inline ktime_t ktime_get_boottime(void)
 	return ktime_get_with_offset(TK_OFFS_BOOT);
 }
 
+/**
+ * ktime_get_clocktai - Returns the TAI time of day in ktime_t format
+ */
+static inline ktime_t ktime_get_clocktai(void)
+{
+	return ktime_get_with_offset(TK_OFFS_TAI);
+}
+
 /*
  * RTC specific
  */
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 5e60aa09af79..c083ae2c34b5 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -512,21 +512,6 @@ void timekeeping_clocktai(struct timespec *ts)
 }
 EXPORT_SYMBOL(timekeeping_clocktai);
 
-
-/**
- * ktime_get_clocktai - Returns the TAI time of day in a ktime
- *
- * Returns the time of day in a ktime.
- */
-ktime_t ktime_get_clocktai(void)
-{
-	struct timespec ts;
-
-	timekeeping_clocktai(&ts);
-	return timespec_to_ktime(ts);
-}
-EXPORT_SYMBOL(ktime_get_clocktai);
-
 #ifdef CONFIG_NTP_PPS
 
 /**
-- 
cgit v1.2.3-59-g8ed1b


From 9a6b51976ea3a326b6de534beec3fd87275f4ef6 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 16 Jul 2014 21:04:22 +0000
Subject: timekeeping: Provide ktime_mono_to_any()

ktime based conversion function to map a monotonic time stamp to a
different CLOCK.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/timekeeping.h |  9 +++++++++
 kernel/time/timekeeping.c   | 20 ++++++++++++++++++++
 2 files changed, 29 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 3050a7d0a5a9..910a98ef2154 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -109,6 +109,7 @@ enum tk_offsets {
 extern ktime_t ktime_get(void);
 extern ktime_t ktime_get_with_offset(enum tk_offsets offs);
 extern ktime_t ktime_get_monotonic_offset(void);
+extern ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs);
 
 /**
  * ktime_get_real - get the real (wall-) time in ktime_t format
@@ -137,6 +138,14 @@ static inline ktime_t ktime_get_clocktai(void)
 	return ktime_get_with_offset(TK_OFFS_TAI);
 }
 
+/**
+ * ktime_mono_to_real - Convert monotonic time to clock realtime
+ */
+static inline ktime_t ktime_mono_to_real(ktime_t mono)
+{
+	return ktime_mono_to_any(mono, TK_OFFS_REAL);
+}
+
 /*
  * RTC specific
  */
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index e99350319eec..032e77a54a79 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -450,6 +450,26 @@ ktime_t ktime_get_with_offset(enum tk_offsets offs)
 }
 EXPORT_SYMBOL_GPL(ktime_get_with_offset);
 
+/**
+ * ktime_mono_to_any() - convert mononotic time to any other time
+ * @tmono:	time to convert.
+ * @offs:	which offset to use
+ */
+ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs)
+{
+	ktime_t *offset = offsets[offs];
+	unsigned long seq;
+	ktime_t tconv;
+
+	do {
+		seq = read_seqcount_begin(&tk_core.seq);
+		tconv = ktime_add(tmono, *offset);
+	} while (read_seqcount_retry(&tk_core.seq, seq));
+
+	return tconv;
+}
+EXPORT_SYMBOL_GPL(ktime_mono_to_any);
+
 /**
  * ktime_get_ts64 - get the monotonic clock in timespec64 format
  * @ts:		pointer to timespec variable
-- 
cgit v1.2.3-59-g8ed1b


From dcaab54e348c5b66cca4802815ceebd37059e70c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 16 Jul 2014 21:04:28 +0000
Subject: timekeeping: Remove ktime_get_monotonic_offset()

No more users.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/timekeeping.h |  1 -
 kernel/time/timekeeping.c   | 18 ------------------
 2 files changed, 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 910a98ef2154..64c81f367866 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -108,7 +108,6 @@ enum tk_offsets {
 
 extern ktime_t ktime_get(void);
 extern ktime_t ktime_get_with_offset(enum tk_offsets offs);
-extern ktime_t ktime_get_monotonic_offset(void);
 extern ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs);
 
 /**
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 032e77a54a79..f7378eaebe67 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1707,24 +1707,6 @@ ktime_t ktime_get_update_offsets_now(ktime_t *offs_real, ktime_t *offs_boot,
 }
 #endif
 
-/**
- * ktime_get_monotonic_offset() - get wall_to_monotonic in ktime_t format
- */
-ktime_t ktime_get_monotonic_offset(void)
-{
-	struct timekeeper *tk = &tk_core.timekeeper;
-	unsigned long seq;
-	struct timespec64 wtom;
-
-	do {
-		seq = read_seqcount_begin(&tk_core.seq);
-		wtom = tk->wall_to_monotonic;
-	} while (read_seqcount_retry(&tk_core.seq, seq));
-
-	return timespec64_to_ktime(wtom);
-}
-EXPORT_SYMBOL_GPL(ktime_get_monotonic_offset);
-
 /**
  * do_adjtimex() - Accessor function to NTP __do_adjtimex function
  */
-- 
cgit v1.2.3-59-g8ed1b


From 897994e32b2b0a41ce4222c3b38a05bd2d1ee9fa Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 16 Jul 2014 21:04:29 +0000
Subject: timekeeping: Provide ktime_get[*]_ns() helpers

A lot of code converts either timespecs or ktime_t to
nanoseconds. Provide helper functions.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/timekeeping.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 64c81f367866..903ecc10fcff 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -145,6 +145,21 @@ static inline ktime_t ktime_mono_to_real(ktime_t mono)
 	return ktime_mono_to_any(mono, TK_OFFS_REAL);
 }
 
+static inline u64 ktime_get_ns(void)
+{
+	return ktime_to_ns(ktime_get());
+}
+
+static inline u64 ktime_get_real_ns(void)
+{
+	return ktime_to_ns(ktime_get_real());
+}
+
+static inline u64 ktime_get_boot_ns(void)
+{
+	return ktime_to_ns(ktime_get_boottime());
+}
+
 /*
  * RTC specific
  */
-- 
cgit v1.2.3-59-g8ed1b


From 57e0be041d9e21a7397eed3b67a7936ac4ac83c0 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 16 Jul 2014 21:04:32 +0000
Subject: sched: Make task->real_start_time nanoseconds based

Simplify the only user of this data by removing the timespec
conversion.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 fs/proc/array.c       | 7 +------
 include/linux/sched.h | 2 +-
 kernel/fork.c         | 3 +--
 3 files changed, 3 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/array.c b/fs/proc/array.c
index 64db2bceac59..d7f9199217bb 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -473,13 +473,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 	priority = task_prio(task);
 	nice = task_nice(task);
 
-	/* Temporary variable needed for gcc-2.96 */
-	/* convert timespec -> nsec*/
-	start_time =
-		(unsigned long long)task->real_start_time.tv_sec * NSEC_PER_SEC
-				+ task->real_start_time.tv_nsec;
 	/* convert nsec -> ticks */
-	start_time = nsec_to_clock_t(start_time);
+	start_time = nsec_to_clock_t(task->real_start_time);
 
 	seq_printf(m, "%d (%s) %c", pid_nr_ns(pid, ns), tcomm, state);
 	seq_put_decimal_ll(m, ' ', ppid);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 306f4f0c987a..67678fa76f99 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1368,7 +1368,7 @@ struct task_struct {
 #endif
 	unsigned long nvcsw, nivcsw; /* context switch counts */
 	struct timespec start_time; 		/* monotonic time */
-	struct timespec real_start_time;	/* boot based time */
+	u64 real_start_time;	/* boot based time in nsec */
 /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
 	unsigned long min_flt, maj_flt;
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 8f541930ce26..a7ab82db2f60 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1263,8 +1263,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	posix_cpu_timers_init(p);
 
 	ktime_get_ts(&p->start_time);
-	p->real_start_time = p->start_time;
-	monotonic_to_bootbased(&p->real_start_time);
+	p->real_start_time = ktime_get_boot_ns();
 	p->io_context = NULL;
 	p->audit_context = NULL;
 	if (clone_flags & CLONE_THREAD)
-- 
cgit v1.2.3-59-g8ed1b


From ccbf62d8a284cf181ac28c8e8407dd077d90dd4b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 16 Jul 2014 21:04:34 +0000
Subject: sched: Make task->start_time nanoseconds based

Simplify the timespec to nsec/usec conversions.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/sched.h |  2 +-
 kernel/acct.c         | 10 +++-------
 kernel/fork.c         |  2 +-
 kernel/tsacct.c       | 19 +++++++++----------
 4 files changed, 14 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 67678fa76f99..10c6e829927f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1367,7 +1367,7 @@ struct task_struct {
 	} vtime_snap_whence;
 #endif
 	unsigned long nvcsw, nivcsw; /* context switch counts */
-	struct timespec start_time; 		/* monotonic time */
+	u64 start_time;		/* monotonic time in nsec */
 	u64 real_start_time;	/* boot based time in nsec */
 /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
 	unsigned long min_flt, maj_flt;
diff --git a/kernel/acct.c b/kernel/acct.c
index 1be013c6053e..a1844f14c6d6 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -458,9 +458,7 @@ static void do_acct_process(struct bsd_acct_struct *acct,
 	acct_t ac;
 	mm_segment_t fs;
 	unsigned long flim;
-	u64 elapsed;
-	u64 run_time;
-	struct timespec uptime;
+	u64 elapsed, run_time;
 	struct tty_struct *tty;
 	const struct cred *orig_cred;
 
@@ -484,10 +482,8 @@ static void do_acct_process(struct bsd_acct_struct *acct,
 	strlcpy(ac.ac_comm, current->comm, sizeof(ac.ac_comm));
 
 	/* calculate run_time in nsec*/
-	ktime_get_ts(&uptime);
-	run_time = (u64)uptime.tv_sec*NSEC_PER_SEC + uptime.tv_nsec;
-	run_time -= (u64)current->group_leader->start_time.tv_sec * NSEC_PER_SEC
-		       + current->group_leader->start_time.tv_nsec;
+	run_time = ktime_get_ns();
+	run_time -= current->group_leader->start_time;
 	/* convert nsec -> AHZ */
 	elapsed = nsec_to_AHZ(run_time);
 #if ACCT_VERSION==3
diff --git a/kernel/fork.c b/kernel/fork.c
index a7ab82db2f60..627b7f80afb0 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1262,7 +1262,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 
 	posix_cpu_timers_init(p);
 
-	ktime_get_ts(&p->start_time);
+	p->start_time = ktime_get_ns();
 	p->real_start_time = ktime_get_boot_ns();
 	p->io_context = NULL;
 	p->audit_context = NULL;
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index ea6d170452c4..975cb49e32bf 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -31,20 +31,19 @@ void bacct_add_tsk(struct user_namespace *user_ns,
 		   struct taskstats *stats, struct task_struct *tsk)
 {
 	const struct cred *tcred;
-	struct timespec uptime, ts;
 	cputime_t utime, stime, utimescaled, stimescaled;
-	u64 ac_etime;
+	u64 delta;
 
 	BUILD_BUG_ON(TS_COMM_LEN < TASK_COMM_LEN);
 
-	/* calculate task elapsed time in timespec */
-	ktime_get_ts(&uptime);
-	ts = timespec_sub(uptime, tsk->start_time);
-	/* rebase elapsed time to usec (should never be negative) */
-	ac_etime = timespec_to_ns(&ts);
-	do_div(ac_etime, NSEC_PER_USEC);
-	stats->ac_etime = ac_etime;
-	stats->ac_btime = get_seconds() - ts.tv_sec;
+	/* calculate task elapsed time in nsec */
+	delta = ktime_get_ns() - tsk->start_time;
+	/* Convert to micro seconds */
+	do_div(delta, NSEC_PER_USEC);
+	stats->ac_etime = delta;
+	/* Convert to seconds for btime */
+	do_div(delta, USEC_PER_SEC);
+	stats->ac_btime = get_seconds() - delta;
 	if (thread_group_leader(tsk)) {
 		stats->ac_exitcode = tsk->exit_code;
 		if (tsk->flags & PF_FORKNOEXEC)
-- 
cgit v1.2.3-59-g8ed1b


From 9667a23db0dc0bd4892f0ada7e4e71528eaeed62 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 16 Jul 2014 21:04:35 +0000
Subject: delayacct: Make accounting nanosecond based

Kill the timespec juggling and calculate with plain nanoseconds.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/sched.h |  4 ++--
 kernel/delayacct.c    | 34 ++++++++++++----------------------
 2 files changed, 14 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 10c6e829927f..653744ae8d27 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -813,7 +813,7 @@ struct task_delay_info {
 	 * associated with the operation is added to XXX_delay.
 	 * XXX_delay contains the accumulated delay time in nanoseconds.
 	 */
-	struct timespec blkio_start, blkio_end;	/* Shared by blkio, swapin */
+	u64 blkio_start;	/* Shared by blkio, swapin */
 	u64 blkio_delay;	/* wait for sync block io completion */
 	u64 swapin_delay;	/* wait for swapin block io completion */
 	u32 blkio_count;	/* total count of the number of sync block */
@@ -821,7 +821,7 @@ struct task_delay_info {
 	u32 swapin_count;	/* total count of the number of swapin block */
 				/* io operations performed */
 
-	struct timespec freepages_start, freepages_end;
+	u64 freepages_start;
 	u64 freepages_delay;	/* wait for memory reclaim */
 	u32 freepages_count;	/* total count of memory reclaim */
 };
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index de699f42f9bc..cf2e65dddb19 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -46,32 +46,25 @@ void __delayacct_tsk_init(struct task_struct *tsk)
 }
 
 /*
- * Finish delay accounting for a statistic using
- * its timestamps (@start, @end), accumalator (@total) and @count
+ * Finish delay accounting for a statistic using its timestamps (@start),
+ * accumalator (@total) and @count
  */
-
-static void delayacct_end(struct timespec *start, struct timespec *end,
-				u64 *total, u32 *count)
+static void delayacct_end(u64 *start, u64 *total, u32 *count)
 {
-	struct timespec ts;
-	s64 ns;
+	s64 ns = ktime_get_ns() - *start;
 	unsigned long flags;
 
-	ktime_get_ts(end);
-	ts = timespec_sub(*end, *start);
-	ns = timespec_to_ns(&ts);
-	if (ns < 0)
-		return;
-
-	spin_lock_irqsave(&current->delays->lock, flags);
-	*total += ns;
-	(*count)++;
-	spin_unlock_irqrestore(&current->delays->lock, flags);
+	if (ns > 0) {
+		spin_lock_irqsave(&current->delays->lock, flags);
+		*total += ns;
+		(*count)++;
+		spin_unlock_irqrestore(&current->delays->lock, flags);
+	}
 }
 
 void __delayacct_blkio_start(void)
 {
-	ktime_get_ts(&current->delays->blkio_start);
+	current->delays->blkio_start = ktime_get_ns();
 }
 
 void __delayacct_blkio_end(void)
@@ -79,12 +72,10 @@ void __delayacct_blkio_end(void)
 	if (current->delays->flags & DELAYACCT_PF_SWAPIN)
 		/* Swapin block I/O */
 		delayacct_end(&current->delays->blkio_start,
-			&current->delays->blkio_end,
 			&current->delays->swapin_delay,
 			&current->delays->swapin_count);
 	else	/* Other block I/O */
 		delayacct_end(&current->delays->blkio_start,
-			&current->delays->blkio_end,
 			&current->delays->blkio_delay,
 			&current->delays->blkio_count);
 }
@@ -159,13 +150,12 @@ __u64 __delayacct_blkio_ticks(struct task_struct *tsk)
 
 void __delayacct_freepages_start(void)
 {
-	ktime_get_ts(&current->delays->freepages_start);
+	current->delays->freepages_start = ktime_get_ns();
 }
 
 void __delayacct_freepages_end(void)
 {
 	delayacct_end(&current->delays->freepages_start,
-			&current->delays->freepages_end,
 			&current->delays->freepages_delay,
 			&current->delays->freepages_count);
 }
-- 
cgit v1.2.3-59-g8ed1b


From d9bb5a43277d2dcc514fa693f741bbc38e2e2271 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 22 Jul 2014 17:48:04 -0400
Subject: svcrdma: Double the default credit limit

The RDMA credit limit controls how many concurrent RPCs are allowed
per connection.

An NFS/RDMA client and server exchange their credit limits in the
RPC/RDMA headers. The Linux client and the Solaris client and server
allow 32 credits. The Linux server allows only 16, which limits its
performance.

Set the server's default credit limit to 32, like the other well-
known implementations, so the out-of-the-shrinkwrap performance of
the Linux server is better.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 5cf99a016368..975da754c778 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -174,8 +174,7 @@ struct svcxprt_rdma {
  * page size of 4k, or 32k * 2 ops / 4k = 16 outstanding RDMA_READ.  */
 #define RPCRDMA_ORD             (64/4)
 #define RPCRDMA_SQ_DEPTH_MULT   8
-#define RPCRDMA_MAX_THREADS     16
-#define RPCRDMA_MAX_REQUESTS    16
+#define RPCRDMA_MAX_REQUESTS    32
 #define RPCRDMA_MAX_REQ_SIZE    4096
 
 /* svc_rdma_marshal.c */
-- 
cgit v1.2.3-59-g8ed1b


From 0162d621ddf3bd02bf7de324dcf002d9c84c5059 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Wed, 23 Jul 2014 15:03:00 -0400
Subject: ftrace: Rename ftrace_ops field from trampolines to nr_trampolines

Having two fields within the same struct that is off by one character
can be confusing and error prone. Rename the counter "trampolines"
to "nr_trampolines" to explicitly show it is a counter and not to
be confused by the "trampoline" field.

Suggested-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h |  2 +-
 kernel/trace/ftrace.c  | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 7a5b7b97e539..6bb5e3f2a3b4 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -120,7 +120,7 @@ struct ftrace_ops {
 	void				*private;
 	int __percpu			*disabled;
 #ifdef CONFIG_DYNAMIC_FTRACE
-	int				trampolines;
+	int				nr_trampolines;
 	struct ftrace_hash		*notrace_hash;
 	struct ftrace_hash		*filter_hash;
 	struct ftrace_hash		*tramp_hash;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 762806026561..eda69c9f78d0 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1513,7 +1513,7 @@ static void ftrace_remove_tramp(struct ftrace_ops *ops,
 	 * The tramp_hash entry will be removed at time
 	 * of update.
 	 */
-	ops->trampolines--;
+	ops->nr_trampolines--;
 	rec->flags &= ~FTRACE_FL_TRAMP;
 }
 
@@ -1522,7 +1522,7 @@ static void ftrace_clear_tramps(struct dyn_ftrace *rec)
 	struct ftrace_ops *op;
 
 	do_for_each_ftrace_op(op, ftrace_ops_list) {
-		if (op->trampolines)
+		if (op->nr_trampolines)
 			ftrace_remove_tramp(op, rec);
 	} while_for_each_ftrace_op(op);
 }
@@ -1617,7 +1617,7 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
 			 */
 			if (ftrace_rec_count(rec) == 1 && ops->trampoline) {
 				rec->flags |= FTRACE_FL_TRAMP;
-				ops->trampolines++;
+				ops->nr_trampolines++;
 			} else {
 				/*
 				 * If we are adding another function callback
@@ -2185,7 +2185,7 @@ static int ftrace_save_ops_tramp_hash(struct ftrace_ops *ops)
 	int size, bits;
 	int ret;
 
-	size = ops->trampolines;
+	size = ops->nr_trampolines;
 	bits = 0;
 	/*
 	 * Make the hash size about 1/2 the # found
@@ -2239,7 +2239,7 @@ static int ftrace_save_tramp_hashes(void)
 		free_ftrace_hash(op->tramp_hash);
 		op->tramp_hash = NULL;
 
-		if (op->trampolines) {
+		if (op->nr_trampolines) {
 			ret = ftrace_save_ops_tramp_hash(op);
 			if (ret)
 				return ret;
-- 
cgit v1.2.3-59-g8ed1b


From 14a7004671246d1b799f545335995a9897de1268 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 16 Jul 2014 21:04:44 +0000
Subject: net: mlx5: Use ktime_get_ns()

This code is beyond silly:

     struct timespec ts = ktime_get_ts();
     ktime_t ktime = timespec_to_ktime(ts);

Further down the code builds the delta of two ktime_t values and
converts the result to nanoseconds.

Use ktime_get_ns() and replace all the nonsense.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Eli Cohen <eli@mellanox.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 16 ++++------------
 include/linux/mlx5/driver.h                   |  4 ++--
 2 files changed, 6 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 87d1b018a9c3..67f8f5a1dc86 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -548,7 +548,7 @@ static void cmd_work_handler(struct work_struct *work)
 	lay->status_own = CMD_OWNER_HW;
 	set_signature(ent, !cmd->checksum_disabled);
 	dump_command(dev, ent, 1);
-	ktime_get_ts(&ent->ts1);
+	ent->ts1 = ktime_get_ns();
 
 	/* ring doorbell after the descriptor is valid */
 	wmb();
@@ -637,7 +637,6 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
 {
 	struct mlx5_cmd *cmd = &dev->cmd;
 	struct mlx5_cmd_work_ent *ent;
-	ktime_t t1, t2, delta;
 	struct mlx5_cmd_stats *stats;
 	int err = 0;
 	s64 ds;
@@ -668,10 +667,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
 		if (err == -ETIMEDOUT)
 			goto out;
 
-		t1 = timespec_to_ktime(ent->ts1);
-		t2 = timespec_to_ktime(ent->ts2);
-		delta = ktime_sub(t2, t1);
-		ds = ktime_to_ns(delta);
+		ds = ent->ts2 - ent->ts1;
 		op = be16_to_cpu(((struct mlx5_inbox_hdr *)in->first.data)->opcode);
 		if (op < ARRAY_SIZE(cmd->stats)) {
 			stats = &cmd->stats[op];
@@ -1135,7 +1131,6 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector)
 	void *context;
 	int err;
 	int i;
-	ktime_t t1, t2, delta;
 	s64 ds;
 	struct mlx5_cmd_stats *stats;
 	unsigned long flags;
@@ -1149,7 +1144,7 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector)
 				sem = &cmd->pages_sem;
 			else
 				sem = &cmd->sem;
-			ktime_get_ts(&ent->ts2);
+			ent->ts2 = ktime_get_ns();
 			memcpy(ent->out->first.data, ent->lay->out, sizeof(ent->lay->out));
 			dump_command(dev, ent, 0);
 			if (!ent->ret) {
@@ -1163,10 +1158,7 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector)
 			}
 			free_ent(cmd, ent->idx);
 			if (ent->callback) {
-				t1 = timespec_to_ktime(ent->ts1);
-				t2 = timespec_to_ktime(ent->ts2);
-				delta = ktime_sub(t2, t1);
-				ds = ktime_to_ns(delta);
+				ds = ent->ts2 - ent->ts1;
 				if (ent->op < ARRAY_SIZE(cmd->stats)) {
 					stats = &cmd->stats[ent->op];
 					spin_lock_irqsave(&stats->lock, flags);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 2bce4aad2570..52d631ca32cf 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -604,8 +604,8 @@ struct mlx5_cmd_work_ent {
 	int			page_queue;
 	u8			status;
 	u8			token;
-	struct timespec		ts1;
-	struct timespec		ts2;
+	u64			ts1;
+	u64			ts2;
 	u16			op;
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From fb31cc153dec0d4bdd9a5d7ce60d61acd04b4304 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 16 Jul 2014 21:04:49 +0000
Subject: iio: Use ktime_get_real_ns()

No idea why iio needs wall clock based time stamps, but we can avoid
the timespec conversion dance by using the new interfaces.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Jonathan Cameron <jic23@kernel.org>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/iio/iio.h | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index ccde91725f98..15dc6bc2bdd2 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -277,14 +277,7 @@ static inline bool iio_channel_has_info(const struct iio_chan_spec *chan,
  **/
 static inline s64 iio_get_time_ns(void)
 {
-	struct timespec ts;
-	/*
-	 * calls getnstimeofday.
-	 * If hrtimers then up to ns accurate, if not microsecond.
-	 */
-	ktime_get_real_ts(&ts);
-
-	return timespec_to_ns(&ts);
+	return ktime_get_real_ns();
 }
 
 /* Device operating modes */
-- 
cgit v1.2.3-59-g8ed1b


From 250fade8af2ac5dda8d5106ea06738b6f9e768a7 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 16 Jul 2014 21:04:55 +0000
Subject: timekeeping: Remove monotonic_to_bootbased

No more users.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/timekeeping.h |  1 -
 kernel/time/timekeeping.c   | 15 ---------------
 2 files changed, 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 903ecc10fcff..8ea3ca1b0ee5 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -27,7 +27,6 @@ struct timespec __current_kernel_time(void);
  */
 struct timespec get_monotonic_coarse(void);
 extern void getrawmonotonic(struct timespec *ts);
-extern void monotonic_to_bootbased(struct timespec *ts);
 extern void get_monotonic_boottime(struct timespec *ts);
 extern void ktime_get_ts64(struct timespec64 *ts);
 
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index f7378eaebe67..b35613508725 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1570,21 +1570,6 @@ void get_monotonic_boottime(struct timespec *ts)
 }
 EXPORT_SYMBOL_GPL(get_monotonic_boottime);
 
-/**
- * monotonic_to_bootbased - Convert the monotonic time to boot based.
- * @ts:		pointer to the timespec to be converted
- */
-void monotonic_to_bootbased(struct timespec *ts)
-{
-	struct timekeeper *tk = &tk_core.timekeeper;
-	struct timespec64 ts64;
-
-	ts64 = timespec_to_timespec64(*ts);
-	ts64 = timespec64_add(ts64, tk->total_sleep_time);
-	*ts = timespec64_to_timespec(ts64);
-}
-EXPORT_SYMBOL_GPL(monotonic_to_bootbased);
-
 unsigned long get_seconds(void)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
-- 
cgit v1.2.3-59-g8ed1b


From 48f18fd6addc199f330d838d54fe7b0a0892adaa Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 16 Jul 2014 21:04:57 +0000
Subject: timekeeping: Use ktime_get_boottime() for get_monotonic_boottime()

get_monotonic_boottime() is not used in fast pathes, so the extra
timespec conversion is not problematic.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/timekeeping.h |  9 ++++++++-
 kernel/time/timekeeping.c   | 34 ----------------------------------
 2 files changed, 8 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 8ea3ca1b0ee5..7b8f20007871 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -27,7 +27,6 @@ struct timespec __current_kernel_time(void);
  */
 struct timespec get_monotonic_coarse(void);
 extern void getrawmonotonic(struct timespec *ts);
-extern void get_monotonic_boottime(struct timespec *ts);
 extern void ktime_get_ts64(struct timespec64 *ts);
 
 extern int __getnstimeofday64(struct timespec64 *tv);
@@ -159,6 +158,14 @@ static inline u64 ktime_get_boot_ns(void)
 	return ktime_to_ns(ktime_get_boottime());
 }
 
+/*
+ * Timespec interfaces utilizing the ktime based ones
+ */
+static inline void get_monotonic_boottime(struct timespec *ts)
+{
+	*ts = ktime_to_timespec(ktime_get_boottime());
+}
+
 /*
  * RTC specific
  */
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index b35613508725..f63476fb0daf 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1536,40 +1536,6 @@ void getboottime(struct timespec *ts)
 }
 EXPORT_SYMBOL_GPL(getboottime);
 
-/**
- * get_monotonic_boottime - Returns monotonic time since boot
- * @ts:		pointer to the timespec to be set
- *
- * Returns the monotonic time since boot in a timespec.
- *
- * This is similar to CLOCK_MONTONIC/ktime_get_ts, but also
- * includes the time spent in suspend.
- */
-void get_monotonic_boottime(struct timespec *ts)
-{
-	struct timekeeper *tk = &tk_core.timekeeper;
-	struct timespec64 tomono, sleep, ret;
-	s64 nsec;
-	unsigned int seq;
-
-	WARN_ON(timekeeping_suspended);
-
-	do {
-		seq = read_seqcount_begin(&tk_core.seq);
-		ret.tv_sec = tk->xtime_sec;
-		nsec = timekeeping_get_ns(tk);
-		tomono = tk->wall_to_monotonic;
-		sleep = tk->total_sleep_time;
-
-	} while (read_seqcount_retry(&tk_core.seq, seq));
-
-	ret.tv_sec += tomono.tv_sec + sleep.tv_sec;
-	ret.tv_nsec = 0;
-	timespec64_add_ns(&ret, nsec + tomono.tv_nsec + sleep.tv_nsec);
-	*ts = timespec64_to_timespec(ret);
-}
-EXPORT_SYMBOL_GPL(get_monotonic_boottime);
-
 unsigned long get_seconds(void)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
-- 
cgit v1.2.3-59-g8ed1b


From 47da70d32535000ec29cc206cfc1d318fbd8761f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 16 Jul 2014 21:05:00 +0000
Subject: timekeeping: Remove timekeeper.total_sleep_time

No more users. Remove it

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/timekeeper_internal.h |  6 ++----
 kernel/time/timekeeping.c           | 14 +++-----------
 2 files changed, 5 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index 87e0992564f2..8e5d77a01787 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -20,8 +20,8 @@
  * the tv_nsec part positive so we can use the usual normalization.
  *
  * wall_to_monotonic is moved after resume from suspend for the
- * monotonic time not to jump. We need to add total_sleep_time to
- * wall_to_monotonic to get the real boot based time offset.
+ * monotonic time not to jump. To calculate the real boot time offset
+ * we need to do offs_real - offs_boot.
  *
  * - wall_to_monotonic is no longer the boot time, getboottime must be
  * used instead.
@@ -51,8 +51,6 @@ struct timekeeper {
 	/* Offset clock monotonic -> clock tai */
 	ktime_t			offs_tai;
 
-	/* time spent in suspend */
-	struct timespec64	total_sleep_time;
 	/* The current UTC to TAI offset in seconds */
 	s32			tai_offset;
 
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 3edc0c1d6fe8..50d5de05b837 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -97,13 +97,9 @@ static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec64 wtm)
 	tk->offs_tai = ktime_add(tk->offs_real, ktime_set(tk->tai_offset, 0));
 }
 
-static void tk_set_sleep_time(struct timekeeper *tk, struct timespec64 t)
+static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
 {
-	/* Verify consistency before modifying */
-	WARN_ON_ONCE(tk->offs_boot.tv64 != timespec64_to_ktime(tk->total_sleep_time).tv64);
-
-	tk->total_sleep_time	= t;
-	tk->offs_boot		= timespec64_to_ktime(t);
+	tk->offs_boot = ktime_add(tk->offs_boot, delta);
 }
 
 /**
@@ -919,10 +915,6 @@ void __init timekeeping_init(void)
 	set_normalized_timespec64(&tmp, -boot.tv_sec, -boot.tv_nsec);
 	tk_set_wall_to_mono(tk, tmp);
 
-	tmp.tv_sec = 0;
-	tmp.tv_nsec = 0;
-	tk_set_sleep_time(tk, tmp);
-
 	timekeeping_update(tk, TK_MIRROR);
 
 	write_seqcount_end(&tk_core.seq);
@@ -950,7 +942,7 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
 	}
 	tk_xtime_add(tk, delta);
 	tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, *delta));
-	tk_set_sleep_time(tk, timespec64_add(tk->total_sleep_time, *delta));
+	tk_update_sleep_time(tk, timespec64_to_ktime(*delta));
 	tk_debug_account_sleep_time(delta);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 61edec81d260bc96a73c878bbdb4c614460346da Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 16 Jul 2014 21:05:01 +0000
Subject: timekeeping: Simplify timekeeping_clocktai()

timekeeping_clocktai() is not used in fast pathes, so the extra
timespec conversion is not problematic.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/timekeeping.h |  5 +++++
 kernel/time/timekeeping.c   | 31 -------------------------------
 2 files changed, 5 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 7b8f20007871..f0f12a84a31b 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -166,6 +166,11 @@ static inline void get_monotonic_boottime(struct timespec *ts)
 	*ts = ktime_to_timespec(ktime_get_boottime());
 }
 
+static inline void timekeeping_clocktai(struct timespec *ts)
+{
+	*ts = ktime_to_timespec(ktime_get_clocktai());
+}
+
 /*
  * RTC specific
  */
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 50d5de05b837..118e91e3071c 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -497,37 +497,6 @@ void ktime_get_ts64(struct timespec64 *ts)
 }
 EXPORT_SYMBOL_GPL(ktime_get_ts64);
 
-
-/**
- * timekeeping_clocktai - Returns the TAI time of day in a timespec
- * @ts:		pointer to the timespec to be set
- *
- * Returns the time of day in a timespec.
- */
-void timekeeping_clocktai(struct timespec *ts)
-{
-	struct timekeeper *tk = &tk_core.timekeeper;
-	struct timespec64 ts64;
-	unsigned long seq;
-	u64 nsecs;
-
-	WARN_ON(timekeeping_suspended);
-
-	do {
-		seq = read_seqcount_begin(&tk_core.seq);
-
-		ts64.tv_sec = tk->xtime_sec + tk->tai_offset;
-		nsecs = timekeeping_get_ns(tk);
-
-	} while (read_seqcount_retry(&tk_core.seq, seq));
-
-	ts64.tv_nsec = 0;
-	timespec64_add_ns(&ts64, nsecs);
-	*ts = timespec64_to_timespec(ts64);
-
-}
-EXPORT_SYMBOL(timekeeping_clocktai);
-
 #ifdef CONFIG_NTP_PPS
 
 /**
-- 
cgit v1.2.3-59-g8ed1b


From f519b1a2e08c913375324a927992bb328387f169 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 16 Jul 2014 21:05:04 +0000
Subject: timekeeping: Provide ktime_get_raw()

Provide a ktime_t based interface for raw monotonic time.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/timekeeper_internal.h |  3 +++
 include/linux/timekeeping.h         |  6 ++++++
 kernel/time/timekeeping.c           | 25 +++++++++++++++++++++++++
 3 files changed, 34 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index 8e5d77a01787..2e20275a7083 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -54,6 +54,9 @@ struct timekeeper {
 	/* The current UTC to TAI offset in seconds */
 	s32			tai_offset;
 
+	/* Monotonic raw base time */
+	ktime_t			base_raw;
+
 	/* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */
 	struct timespec64	raw_time;
 
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index f0f12a84a31b..58ad7eff83ff 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -107,6 +107,7 @@ enum tk_offsets {
 extern ktime_t ktime_get(void);
 extern ktime_t ktime_get_with_offset(enum tk_offsets offs);
 extern ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs);
+extern ktime_t ktime_get_raw(void);
 
 /**
  * ktime_get_real - get the real (wall-) time in ktime_t format
@@ -158,6 +159,11 @@ static inline u64 ktime_get_boot_ns(void)
 	return ktime_to_ns(ktime_get_boottime());
 }
 
+static inline u64 ktime_get_raw_ns(void)
+{
+	return ktime_to_ns(ktime_get_raw());
+}
+
 /*
  * Timespec interfaces utilizing the ktime based ones
  */
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 118e91e3071c..af8051f4420d 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -305,6 +305,9 @@ static inline void tk_update_ktime_data(struct timekeeper *tk)
 	nsec *= NSEC_PER_SEC;
 	nsec += tk->wall_to_monotonic.tv_nsec;
 	tk->base_mono = ns_to_ktime(nsec);
+
+	/* Update the monotonic raw base */
+	tk->base_raw = timespec64_to_ktime(tk->raw_time);
 }
 
 /* must hold timekeeper_lock */
@@ -466,6 +469,27 @@ ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs)
 }
 EXPORT_SYMBOL_GPL(ktime_mono_to_any);
 
+/**
+ * ktime_get_raw - Returns the raw monotonic time in ktime_t format
+ */
+ktime_t ktime_get_raw(void)
+{
+	struct timekeeper *tk = &tk_core.timekeeper;
+	unsigned int seq;
+	ktime_t base;
+	s64 nsecs;
+
+	do {
+		seq = read_seqcount_begin(&tk_core.seq);
+		base = tk->base_raw;
+		nsecs = timekeeping_get_ns_raw(tk);
+
+	} while (read_seqcount_retry(&tk_core.seq, seq));
+
+	return ktime_add_ns(base, nsecs);
+}
+EXPORT_SYMBOL_GPL(ktime_get_raw);
+
 /**
  * ktime_get_ts64 - get the monotonic clock in timespec64 format
  * @ts:		pointer to timespec variable
@@ -878,6 +902,7 @@ void __init timekeeping_init(void)
 	tk_set_xtime(tk, &now);
 	tk->raw_time.tv_sec = 0;
 	tk->raw_time.tv_nsec = 0;
+	tk->base_raw.tv64 = 0;
 	if (boot.tv_sec == 0 && boot.tv_nsec == 0)
 		boot = tk_xtime(tk);
 
-- 
cgit v1.2.3-59-g8ed1b


From 4a0e637738f06673725792d74eed67f8779b62c7 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 16 Jul 2014 21:05:13 +0000
Subject: clocksource: Get rid of cycle_last

cycle_last was added to the clocksource to support the TSC
validation. We moved that to the core code, so we can get rid of the
extra copy.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 arch/arm64/kernel/vdso.c            |  2 +-
 arch/ia64/kernel/time.c             |  4 ++--
 arch/powerpc/kernel/time.c          |  4 ++--
 arch/s390/kernel/time.c             |  2 +-
 arch/tile/kernel/time.c             |  2 +-
 arch/x86/kernel/vsyscall_gtod.c     |  2 +-
 arch/x86/kvm/x86.c                  |  2 +-
 include/linux/clocksource.h         |  2 --
 include/linux/timekeeper_internal.h |  7 ++++---
 kernel/time/timekeeping.c           | 23 +++++++++++------------
 10 files changed, 24 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 50384fec56c4..574672f001f7 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -224,7 +224,7 @@ void update_vsyscall(struct timekeeper *tk)
 	vdso_data->wtm_clock_nsec		= tk->wall_to_monotonic.tv_nsec;
 
 	if (!use_syscall) {
-		vdso_data->cs_cycle_last	= tk->clock->cycle_last;
+		vdso_data->cs_cycle_last	= tk->cycle_last;
 		vdso_data->xtime_clock_sec	= tk->xtime_sec;
 		vdso_data->xtime_clock_nsec	= tk->xtime_nsec;
 		vdso_data->cs_mult		= tk->mult;
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 71c52bc7c28d..11dc42da7daf 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -441,7 +441,7 @@ void update_vsyscall_tz(void)
 }
 
 void update_vsyscall_old(struct timespec *wall, struct timespec *wtm,
-			struct clocksource *c, u32 mult)
+			 struct clocksource *c, u32 mult, cycles_t cycle_last)
 {
 	write_seqcount_begin(&fsyscall_gtod_data.seq);
 
@@ -450,7 +450,7 @@ void update_vsyscall_old(struct timespec *wall, struct timespec *wtm,
         fsyscall_gtod_data.clk_mult = mult;
         fsyscall_gtod_data.clk_shift = c->shift;
         fsyscall_gtod_data.clk_fsys_mmio = c->archdata.fsys_mmio;
-        fsyscall_gtod_data.clk_cycle_last = c->cycle_last;
+        fsyscall_gtod_data.clk_cycle_last = cycle_last;
 
 	/* copy kernel time structures */
         fsyscall_gtod_data.wall_time.tv_sec = wall->tv_sec;
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 9fff9cdcc519..368ab374d33c 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -741,7 +741,7 @@ static cycle_t timebase_read(struct clocksource *cs)
 }
 
 void update_vsyscall_old(struct timespec *wall_time, struct timespec *wtm,
-			struct clocksource *clock, u32 mult)
+			 struct clocksource *clock, u32 mult, cycle_t cycle_last)
 {
 	u64 new_tb_to_xs, new_stamp_xsec;
 	u32 frac_sec;
@@ -774,7 +774,7 @@ void update_vsyscall_old(struct timespec *wall_time, struct timespec *wtm,
 	 * We expect the caller to have done the first increment of
 	 * vdso_data->tb_update_count already.
 	 */
-	vdso_data->tb_orig_stamp = clock->cycle_last;
+	vdso_data->tb_orig_stamp = cycle_last;
 	vdso_data->stamp_xsec = new_stamp_xsec;
 	vdso_data->tb_to_xs = new_tb_to_xs;
 	vdso_data->wtom_clock_sec = wtm->tv_sec;
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 0931b110c826..97950f392613 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -220,7 +220,7 @@ void update_vsyscall(struct timekeeper *tk)
 	/* Make userspace gettimeofday spin until we're done. */
 	++vdso_data->tb_update_count;
 	smp_wmb();
-	vdso_data->xtime_tod_stamp = tk->clock->cycle_last;
+	vdso_data->xtime_tod_stamp = tk->cycle_last;
 	vdso_data->xtime_clock_sec = tk->xtime_sec;
 	vdso_data->xtime_clock_nsec = tk->xtime_nsec;
 	vdso_data->wtom_clock_sec =
diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c
index ae70155c2f16..d22d5bfc1e4e 100644
--- a/arch/tile/kernel/time.c
+++ b/arch/tile/kernel/time.c
@@ -269,7 +269,7 @@ void update_vsyscall(struct timekeeper *tk)
 	/* Userspace gettimeofday will spin while this value is odd. */
 	++vdso_data->tb_update_count;
 	smp_wmb();
-	vdso_data->xtime_tod_stamp = clock->cycle_last;
+	vdso_data->xtime_tod_stamp = tk->cycle_last;
 	vdso_data->xtime_clock_sec = tk->xtime_sec;
 	vdso_data->xtime_clock_nsec = tk->xtime_nsec;
 	vdso_data->wtom_clock_sec = wtm->tv_sec;
diff --git a/arch/x86/kernel/vsyscall_gtod.c b/arch/x86/kernel/vsyscall_gtod.c
index 9531fbb123ba..c3cb3c144591 100644
--- a/arch/x86/kernel/vsyscall_gtod.c
+++ b/arch/x86/kernel/vsyscall_gtod.c
@@ -32,7 +32,7 @@ void update_vsyscall(struct timekeeper *tk)
 
 	/* copy vsyscall data */
 	vdata->vclock_mode	= tk->clock->archdata.vclock_mode;
-	vdata->cycle_last	= tk->clock->cycle_last;
+	vdata->cycle_last	= tk->cycle_last;
 	vdata->mask		= tk->clock->mask;
 	vdata->mult		= tk->mult;
 	vdata->shift		= tk->shift;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 63832f5110b6..7b25125f3f42 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1001,7 +1001,7 @@ static void update_pvclock_gtod(struct timekeeper *tk)
 
 	/* copy pvclock gtod data */
 	vdata->clock.vclock_mode	= tk->clock->archdata.vclock_mode;
-	vdata->clock.cycle_last		= tk->clock->cycle_last;
+	vdata->clock.cycle_last		= tk->cycle_last;
 	vdata->clock.mask		= tk->clock->mask;
 	vdata->clock.mult		= tk->mult;
 	vdata->clock.shift		= tk->shift;
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index a16b497d5159..653f0e2b6ca9 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -162,7 +162,6 @@ extern u64 timecounter_cyc2time(struct timecounter *tc,
  * @archdata:		arch-specific data
  * @suspend:		suspend function for the clocksource, if necessary
  * @resume:		resume function for the clocksource, if necessary
- * @cycle_last:		most recent cycle counter value seen by ::read()
  * @owner:		module reference, must be set by clocksource in modules
  */
 struct clocksource {
@@ -171,7 +170,6 @@ struct clocksource {
 	 * clocksource itself is cacheline aligned.
 	 */
 	cycle_t (*read)(struct clocksource *cs);
-	cycle_t cycle_last;
 	cycle_t mask;
 	u32 mult;
 	u32 shift;
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index 2e20275a7083..cb88096222c0 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -29,6 +29,8 @@
 struct timekeeper {
 	/* Current clocksource used for timekeeping. */
 	struct clocksource	*clock;
+	/* Last cycle value */
+	cycle_t			cycle_last;
 	/* NTP adjusted clock multiplier */
 	u32			mult;
 	/* The shift value of the current clocksource. */
@@ -62,8 +64,6 @@ struct timekeeper {
 
 	/* Number of clock cycles in one NTP interval. */
 	cycle_t			cycle_interval;
-	/* Last cycle value (also stored in clock->cycle_last) */
-	cycle_t			cycle_last;
 	/* Number of clock shifted nano seconds in one NTP interval. */
 	u64			xtime_interval;
 	/* shifted nano seconds left over when rounding cycle_interval */
@@ -91,7 +91,8 @@ extern void update_vsyscall_tz(void);
 #elif defined(CONFIG_GENERIC_TIME_VSYSCALL_OLD)
 
 extern void update_vsyscall_old(struct timespec *ts, struct timespec *wtm,
-				struct clocksource *c, u32 mult);
+				struct clocksource *c, u32 mult,
+				cycles_t cycle_last);
 extern void update_vsyscall_tz(void);
 
 #else
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 531805013786..4e748c404749 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -121,7 +121,7 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
 
 	old_clock = tk->clock;
 	tk->clock = clock;
-	tk->cycle_last = clock->cycle_last = clock->read(clock);
+	tk->cycle_last = clock->read(clock);
 
 	/* Do the ns -> cycle conversion first, using original mult */
 	tmp = NTP_INTERVAL_LENGTH;
@@ -182,7 +182,7 @@ static inline s64 timekeeping_get_ns(struct timekeeper *tk)
 	cycle_now = clock->read(clock);
 
 	/* calculate the delta since the last update_wall_time: */
-	delta = clocksource_delta(cycle_now, clock->cycle_last, clock->mask);
+	delta = clocksource_delta(cycle_now, tk->cycle_last, clock->mask);
 
 	nsec = delta * tk->mult + tk->xtime_nsec;
 	nsec >>= tk->shift;
@@ -202,7 +202,7 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
 	cycle_now = clock->read(clock);
 
 	/* calculate the delta since the last update_wall_time: */
-	delta = clocksource_delta(cycle_now, clock->cycle_last, clock->mask);
+	delta = clocksource_delta(cycle_now, tk->cycle_last, clock->mask);
 
 	/* convert delta to nanoseconds. */
 	nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift);
@@ -218,7 +218,8 @@ static inline void update_vsyscall(struct timekeeper *tk)
 	struct timespec xt;
 
 	xt = tk_xtime(tk);
-	update_vsyscall_old(&xt, &tk->wall_to_monotonic, tk->clock, tk->mult);
+	update_vsyscall_old(&xt, &tk->wall_to_monotonic, tk->clock, tk->mult,
+			    tk->cycle_last);
 }
 
 static inline void old_vsyscall_fixup(struct timekeeper *tk)
@@ -342,8 +343,8 @@ static void timekeeping_forward_now(struct timekeeper *tk)
 
 	clock = tk->clock;
 	cycle_now = clock->read(clock);
-	delta = clocksource_delta(cycle_now, clock->cycle_last, clock->mask);
-	tk->cycle_last = clock->cycle_last = cycle_now;
+	delta = clocksource_delta(cycle_now, tk->cycle_last, clock->mask);
+	tk->cycle_last = cycle_now;
 
 	tk->xtime_nsec += delta * tk->mult;
 
@@ -1020,13 +1021,13 @@ static void timekeeping_resume(void)
 	 */
 	cycle_now = clock->read(clock);
 	if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) &&
-		cycle_now > clock->cycle_last) {
+		cycle_now > tk->cycle_last) {
 		u64 num, max = ULLONG_MAX;
 		u32 mult = clock->mult;
 		u32 shift = clock->shift;
 		s64 nsec = 0;
 
-		cycle_delta = clocksource_delta(cycle_now, clock->cycle_last,
+		cycle_delta = clocksource_delta(cycle_now, tk->cycle_last,
 						clock->mask);
 
 		/*
@@ -1053,7 +1054,7 @@ static void timekeeping_resume(void)
 		__timekeeping_inject_sleeptime(tk, &ts_delta);
 
 	/* Re-base the last cycle value */
-	tk->cycle_last = clock->cycle_last = cycle_now;
+	tk->cycle_last = cycle_now;
 	tk->ntp_error = 0;
 	timekeeping_suspended = 0;
 	timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
@@ -1433,7 +1434,7 @@ void update_wall_time(void)
 #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
 	offset = real_tk->cycle_interval;
 #else
-	offset = clocksource_delta(clock->read(clock), clock->cycle_last,
+	offset = clocksource_delta(clock->read(clock), tk->cycle_last,
 				   clock->mask);
 #endif
 
@@ -1477,8 +1478,6 @@ void update_wall_time(void)
 	clock_set |= accumulate_nsecs_to_secs(tk);
 
 	write_seqcount_begin(&tk_core.seq);
-	/* Update clock->cycle_last with the new value */
-	clock->cycle_last = tk->cycle_last;
 	/*
 	 * Update the real timekeeper.
 	 *
-- 
cgit v1.2.3-59-g8ed1b


From 6d3aadf3e180e09dbefab16478c6876b584ce16e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 16 Jul 2014 21:05:15 +0000
Subject: timekeeping: Restructure the timekeeper some more

Access to time requires to touch two cachelines at minimum

   1) The timekeeper data structure

   2) The clocksource data structure

The access to the clocksource data structure can be avoided as almost
all clocksource implementations ignore the argument to the read
callback, which is a pointer to the clocksource.

But the core needs to touch it to access the members @read and @mask.

So we are better off by copying the @read function pointer and the
@mask from the clocksource to the core data structure itself.

For the most used ktime_get() access all required data including the
@read and @mask copies fits together with the sequence counter into a
single 64 byte cacheline.

For the other time access functions we touch in the current code three
cache lines in the worst case. But with the clocksource data copies we
can reduce that to two adjacent cachelines, which is more efficient
than disjunct cache lines.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/timekeeper_internal.h |  4 ++++
 kernel/time/timekeeping.c           | 35 +++++++++++++++--------------------
 2 files changed, 19 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index cb88096222c0..75bb8add78f5 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -29,6 +29,10 @@
 struct timekeeper {
 	/* Current clocksource used for timekeeping. */
 	struct clocksource	*clock;
+	/* Read function of @clock */
+	cycle_t			(*read)(struct clocksource *cs);
+	/* Bitmask for two's complement subtraction of non 64bit counters */
+	cycle_t			mask;
 	/* Last cycle value */
 	cycle_t			cycle_last;
 	/* NTP adjusted clock multiplier */
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 4e748c404749..14b7367e6b94 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -121,7 +121,9 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
 
 	old_clock = tk->clock;
 	tk->clock = clock;
-	tk->cycle_last = clock->read(clock);
+	tk->read = clock->read;
+	tk->mask = clock->mask;
+	tk->cycle_last = tk->read(clock);
 
 	/* Do the ns -> cycle conversion first, using original mult */
 	tmp = NTP_INTERVAL_LENGTH;
@@ -174,15 +176,13 @@ static inline u32 arch_gettimeoffset(void) { return 0; }
 static inline s64 timekeeping_get_ns(struct timekeeper *tk)
 {
 	cycle_t cycle_now, delta;
-	struct clocksource *clock;
 	s64 nsec;
 
 	/* read clocksource: */
-	clock = tk->clock;
-	cycle_now = clock->read(clock);
+	cycle_now = tk->read(tk->clock);
 
 	/* calculate the delta since the last update_wall_time: */
-	delta = clocksource_delta(cycle_now, tk->cycle_last, clock->mask);
+	delta = clocksource_delta(cycle_now, tk->cycle_last, tk->mask);
 
 	nsec = delta * tk->mult + tk->xtime_nsec;
 	nsec >>= tk->shift;
@@ -193,16 +193,15 @@ static inline s64 timekeeping_get_ns(struct timekeeper *tk)
 
 static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
 {
+	struct clocksource *clock = tk->clock;
 	cycle_t cycle_now, delta;
-	struct clocksource *clock;
 	s64 nsec;
 
 	/* read clocksource: */
-	clock = tk->clock;
-	cycle_now = clock->read(clock);
+	cycle_now = tk->read(clock);
 
 	/* calculate the delta since the last update_wall_time: */
-	delta = clocksource_delta(cycle_now, tk->cycle_last, clock->mask);
+	delta = clocksource_delta(cycle_now, tk->cycle_last, tk->mask);
 
 	/* convert delta to nanoseconds. */
 	nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift);
@@ -337,13 +336,12 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
  */
 static void timekeeping_forward_now(struct timekeeper *tk)
 {
+	struct clocksource *clock = tk->clock;
 	cycle_t cycle_now, delta;
-	struct clocksource *clock;
 	s64 nsec;
 
-	clock = tk->clock;
-	cycle_now = clock->read(clock);
-	delta = clocksource_delta(cycle_now, tk->cycle_last, clock->mask);
+	cycle_now = tk->read(clock);
+	delta = clocksource_delta(cycle_now, tk->cycle_last, tk->mask);
 	tk->cycle_last = cycle_now;
 
 	tk->xtime_nsec += delta * tk->mult;
@@ -1019,7 +1017,7 @@ static void timekeeping_resume(void)
 	 * The less preferred source will only be tried if there is no better
 	 * usable source. The rtc part is handled separately in rtc core code.
 	 */
-	cycle_now = clock->read(clock);
+	cycle_now = tk->read(clock);
 	if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) &&
 		cycle_now > tk->cycle_last) {
 		u64 num, max = ULLONG_MAX;
@@ -1028,7 +1026,7 @@ static void timekeeping_resume(void)
 		s64 nsec = 0;
 
 		cycle_delta = clocksource_delta(cycle_now, tk->cycle_last,
-						clock->mask);
+						tk->mask);
 
 		/*
 		 * "cycle_delta * mutl" may cause 64 bits overflow, if the
@@ -1415,7 +1413,6 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
  */
 void update_wall_time(void)
 {
-	struct clocksource *clock;
 	struct timekeeper *real_tk = &tk_core.timekeeper;
 	struct timekeeper *tk = &shadow_timekeeper;
 	cycle_t offset;
@@ -1429,13 +1426,11 @@ void update_wall_time(void)
 	if (unlikely(timekeeping_suspended))
 		goto out;
 
-	clock = real_tk->clock;
-
 #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
 	offset = real_tk->cycle_interval;
 #else
-	offset = clocksource_delta(clock->read(clock), tk->cycle_last,
-				   clock->mask);
+	offset = clocksource_delta(tk->read(tk->clock), tk->cycle_last,
+				   tk->mask);
 #endif
 
 	/* Check if there's really nothing to do */
-- 
cgit v1.2.3-59-g8ed1b


From d28ede83791defee9a81e558540699dc46dbbe13 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 16 Jul 2014 21:05:16 +0000
Subject: timekeeping: Create struct tk_read_base and use it in struct
 timekeeper

The members of the new struct are the required ones for the new NMI
safe accessor to clcok monotonic. In order to reuse the existing
timekeeping code and to make the update of the fast NMI safe
timekeepers a simple memcpy use the struct for the timekeeper as well
and convert all users.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 arch/arm64/kernel/vdso.c            |  10 +--
 arch/s390/kernel/time.c             |  16 ++---
 arch/tile/kernel/time.c             |  10 +--
 arch/x86/kernel/vsyscall_gtod.c     |  23 ++++---
 arch/x86/kvm/x86.c                  |  14 ++--
 include/linux/timekeeper_internal.h | 103 +++++++++++++++-------------
 kernel/time/timekeeping.c           | 132 ++++++++++++++++++------------------
 7 files changed, 158 insertions(+), 150 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 574672f001f7..8296f7f5f0ba 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -211,7 +211,7 @@ struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
 void update_vsyscall(struct timekeeper *tk)
 {
 	struct timespec xtime_coarse;
-	u32 use_syscall = strcmp(tk->clock->name, "arch_sys_counter");
+	u32 use_syscall = strcmp(tk->tkr.clock->name, "arch_sys_counter");
 
 	++vdso_data->tb_seq_count;
 	smp_wmb();
@@ -224,11 +224,11 @@ void update_vsyscall(struct timekeeper *tk)
 	vdso_data->wtm_clock_nsec		= tk->wall_to_monotonic.tv_nsec;
 
 	if (!use_syscall) {
-		vdso_data->cs_cycle_last	= tk->cycle_last;
+		vdso_data->cs_cycle_last	= tk->tkr.cycle_last;
 		vdso_data->xtime_clock_sec	= tk->xtime_sec;
-		vdso_data->xtime_clock_nsec	= tk->xtime_nsec;
-		vdso_data->cs_mult		= tk->mult;
-		vdso_data->cs_shift		= tk->shift;
+		vdso_data->xtime_clock_nsec	= tk->tkr.xtime_nsec;
+		vdso_data->cs_mult		= tk->tkr.mult;
+		vdso_data->cs_shift		= tk->tkr.shift;
 	}
 
 	smp_wmb();
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 97950f392613..4cef607f3711 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -214,26 +214,26 @@ void update_vsyscall(struct timekeeper *tk)
 {
 	u64 nsecps;
 
-	if (tk->clock != &clocksource_tod)
+	if (tk->tkr.clock != &clocksource_tod)
 		return;
 
 	/* Make userspace gettimeofday spin until we're done. */
 	++vdso_data->tb_update_count;
 	smp_wmb();
-	vdso_data->xtime_tod_stamp = tk->cycle_last;
+	vdso_data->xtime_tod_stamp = tk->tkr.cycle_last;
 	vdso_data->xtime_clock_sec = tk->xtime_sec;
-	vdso_data->xtime_clock_nsec = tk->xtime_nsec;
+	vdso_data->xtime_clock_nsec = tk->tkr.xtime_nsec;
 	vdso_data->wtom_clock_sec =
 		tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
-	vdso_data->wtom_clock_nsec = tk->xtime_nsec +
-		+ ((u64) tk->wall_to_monotonic.tv_nsec << tk->shift);
-	nsecps = (u64) NSEC_PER_SEC << tk->shift;
+	vdso_data->wtom_clock_nsec = tk->tkr.xtime_nsec +
+		+ ((u64) tk->wall_to_monotonic.tv_nsec << tk->tkr.shift);
+	nsecps = (u64) NSEC_PER_SEC << tk->tkr.shift;
 	while (vdso_data->wtom_clock_nsec >= nsecps) {
 		vdso_data->wtom_clock_nsec -= nsecps;
 		vdso_data->wtom_clock_sec++;
 	}
-	vdso_data->tk_mult = tk->mult;
-	vdso_data->tk_shift = tk->shift;
+	vdso_data->tk_mult = tk->tkr.mult;
+	vdso_data->tk_shift = tk->tkr.shift;
 	smp_wmb();
 	++vdso_data->tb_update_count;
 }
diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c
index d22d5bfc1e4e..d8fbc289e680 100644
--- a/arch/tile/kernel/time.c
+++ b/arch/tile/kernel/time.c
@@ -261,7 +261,7 @@ void update_vsyscall_tz(void)
 void update_vsyscall(struct timekeeper *tk)
 {
 	struct timespec *wtm = &tk->wall_to_monotonic;
-	struct clocksource *clock = tk->clock;
+	struct clocksource *clock = tk->tkr.clock;
 
 	if (clock != &cycle_counter_cs)
 		return;
@@ -269,13 +269,13 @@ void update_vsyscall(struct timekeeper *tk)
 	/* Userspace gettimeofday will spin while this value is odd. */
 	++vdso_data->tb_update_count;
 	smp_wmb();
-	vdso_data->xtime_tod_stamp = tk->cycle_last;
+	vdso_data->xtime_tod_stamp = tk->tkr.cycle_last;
 	vdso_data->xtime_clock_sec = tk->xtime_sec;
-	vdso_data->xtime_clock_nsec = tk->xtime_nsec;
+	vdso_data->xtime_clock_nsec = tk->tkr.xtime_nsec;
 	vdso_data->wtom_clock_sec = wtm->tv_sec;
 	vdso_data->wtom_clock_nsec = wtm->tv_nsec;
-	vdso_data->mult = tk->mult;
-	vdso_data->shift = tk->shift;
+	vdso_data->mult = tk->tkr.mult;
+	vdso_data->shift = tk->tkr.shift;
 	smp_wmb();
 	++vdso_data->tb_update_count;
 }
diff --git a/arch/x86/kernel/vsyscall_gtod.c b/arch/x86/kernel/vsyscall_gtod.c
index c3cb3c144591..c7d791f32b98 100644
--- a/arch/x86/kernel/vsyscall_gtod.c
+++ b/arch/x86/kernel/vsyscall_gtod.c
@@ -31,29 +31,30 @@ void update_vsyscall(struct timekeeper *tk)
 	gtod_write_begin(vdata);
 
 	/* copy vsyscall data */
-	vdata->vclock_mode	= tk->clock->archdata.vclock_mode;
-	vdata->cycle_last	= tk->cycle_last;
-	vdata->mask		= tk->clock->mask;
-	vdata->mult		= tk->mult;
-	vdata->shift		= tk->shift;
+	vdata->vclock_mode	= tk->tkr.clock->archdata.vclock_mode;
+	vdata->cycle_last	= tk->tkr.cycle_last;
+	vdata->mask		= tk->tkr.mask;
+	vdata->mult		= tk->tkr.mult;
+	vdata->shift		= tk->tkr.shift;
 
 	vdata->wall_time_sec		= tk->xtime_sec;
-	vdata->wall_time_snsec		= tk->xtime_nsec;
+	vdata->wall_time_snsec		= tk->tkr.xtime_nsec;
 
 	vdata->monotonic_time_sec	= tk->xtime_sec
 					+ tk->wall_to_monotonic.tv_sec;
-	vdata->monotonic_time_snsec	= tk->xtime_nsec
+	vdata->monotonic_time_snsec	= tk->tkr.xtime_nsec
 					+ ((u64)tk->wall_to_monotonic.tv_nsec
-						<< tk->shift);
+						<< tk->tkr.shift);
 	while (vdata->monotonic_time_snsec >=
-					(((u64)NSEC_PER_SEC) << tk->shift)) {
+					(((u64)NSEC_PER_SEC) << tk->tkr.shift)) {
 		vdata->monotonic_time_snsec -=
-					((u64)NSEC_PER_SEC) << tk->shift;
+					((u64)NSEC_PER_SEC) << tk->tkr.shift;
 		vdata->monotonic_time_sec++;
 	}
 
 	vdata->wall_time_coarse_sec	= tk->xtime_sec;
-	vdata->wall_time_coarse_nsec	= (long)(tk->xtime_nsec >> tk->shift);
+	vdata->wall_time_coarse_nsec	= (long)(tk->tkr.xtime_nsec >>
+						 tk->tkr.shift);
 
 	vdata->monotonic_time_coarse_sec =
 		vdata->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7b25125f3f42..b7e57946d1c1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -995,19 +995,19 @@ static void update_pvclock_gtod(struct timekeeper *tk)
 	struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
 	u64 boot_ns;
 
-	boot_ns = ktime_to_ns(ktime_add(tk->base_mono, tk->offs_boot));
+	boot_ns = ktime_to_ns(ktime_add(tk->tkr.base_mono, tk->offs_boot));
 
 	write_seqcount_begin(&vdata->seq);
 
 	/* copy pvclock gtod data */
-	vdata->clock.vclock_mode	= tk->clock->archdata.vclock_mode;
-	vdata->clock.cycle_last		= tk->cycle_last;
-	vdata->clock.mask		= tk->clock->mask;
-	vdata->clock.mult		= tk->mult;
-	vdata->clock.shift		= tk->shift;
+	vdata->clock.vclock_mode	= tk->tkr.clock->archdata.vclock_mode;
+	vdata->clock.cycle_last		= tk->tkr.cycle_last;
+	vdata->clock.mask		= tk->tkr.mask;
+	vdata->clock.mult		= tk->tkr.mult;
+	vdata->clock.shift		= tk->tkr.shift;
 
 	vdata->boot_ns			= boot_ns;
-	vdata->nsec_base		= tk->xtime_nsec;
+	vdata->nsec_base		= tk->tkr.xtime_nsec;
 
 	write_seqcount_end(&vdata->seq);
 }
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index 75bb8add78f5..97381997625b 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -10,80 +10,87 @@
 #include <linux/jiffies.h>
 #include <linux/time.h>
 
-/*
- * Structure holding internal timekeeping values.
- *
- * Note: wall_to_monotonic is what we need to add to xtime (or xtime
- * corrected for sub jiffie times) to get to monotonic time.
- * Monotonic is pegged at zero at system boot time, so
- * wall_to_monotonic will be negative, however, we will ALWAYS keep
- * the tv_nsec part positive so we can use the usual normalization.
+/**
+ * struct tk_read_base - base structure for timekeeping readout
+ * @clock:	Current clocksource used for timekeeping.
+ * @read:	Read function of @clock
+ * @mask:	Bitmask for two's complement subtraction of non 64bit clocks
+ * @cycle_last: @clock cycle value at last update
+ * @mult:	NTP adjusted multiplier for scaled math conversion
+ * @shift:	Shift value for scaled math conversion
+ * @xtime_nsec: Shifted (fractional) nano seconds offset for readout
+ * @base_mono:  ktime_t (nanoseconds) base time for readout
  *
- * wall_to_monotonic is moved after resume from suspend for the
- * monotonic time not to jump. To calculate the real boot time offset
- * we need to do offs_real - offs_boot.
+ * This struct has size 56 byte on 64 bit. Together with a seqcount it
+ * occupies a single 64byte cache line.
  *
- * - wall_to_monotonic is no longer the boot time, getboottime must be
- * used instead.
+ * The struct is separate from struct timekeeper as it is also used
+ * for a fast NMI safe accessor to clock monotonic.
  */
-struct timekeeper {
-	/* Current clocksource used for timekeeping. */
+struct tk_read_base {
 	struct clocksource	*clock;
-	/* Read function of @clock */
 	cycle_t			(*read)(struct clocksource *cs);
-	/* Bitmask for two's complement subtraction of non 64bit counters */
 	cycle_t			mask;
-	/* Last cycle value */
 	cycle_t			cycle_last;
-	/* NTP adjusted clock multiplier */
 	u32			mult;
-	/* The shift value of the current clocksource. */
 	u32			shift;
-	/* Clock shifted nano seconds */
 	u64			xtime_nsec;
-
-	/* Monotonic base time */
 	ktime_t			base_mono;
+};
 
-	/* Current CLOCK_REALTIME time in seconds */
+/**
+ * struct timekeeper - Structure holding internal timekeeping values.
+ * @tkr:		The readout base structure
+ * @xtime_sec:		Current CLOCK_REALTIME time in seconds
+ * @wall_to_monotonic:	CLOCK_REALTIME to CLOCK_MONOTONIC offset
+ * @offs_real:		Offset clock monotonic -> clock realtime
+ * @offs_boot:		Offset clock monotonic -> clock boottime
+ * @offs_tai:		Offset clock monotonic -> clock tai
+ * @tai_offset:		The current UTC to TAI offset in seconds
+ * @base_raw:		Monotonic raw base time in ktime_t format
+ * @raw_time:		Monotonic raw base time in timespec64 format
+ * @cycle_interval:	Number of clock cycles in one NTP interval
+ * @xtime_interval:	Number of clock shifted nano seconds in one NTP
+ *			interval.
+ * @xtime_remainder:	Shifted nano seconds left over when rounding
+ *			@cycle_interval
+ * @raw_interval:	Raw nano seconds accumulated per NTP interval.
+ * @ntp_error:		Difference between accumulated time and NTP time in ntp
+ *			shifted nano seconds.
+ * @ntp_error_shift:	Shift conversion between clock shifted nano seconds and
+ *			ntp shifted nano seconds.
+ *
+ * Note: For timespec(64) based interfaces wall_to_monotonic is what
+ * we need to add to xtime (or xtime corrected for sub jiffie times)
+ * to get to monotonic time.  Monotonic is pegged at zero at system
+ * boot time, so wall_to_monotonic will be negative, however, we will
+ * ALWAYS keep the tv_nsec part positive so we can use the usual
+ * normalization.
+ *
+ * wall_to_monotonic is moved after resume from suspend for the
+ * monotonic time not to jump. We need to add total_sleep_time to
+ * wall_to_monotonic to get the real boot based time offset.
+ *
+ * wall_to_monotonic is no longer the boot time, getboottime must be
+ * used instead.
+ */
+struct timekeeper {
+	struct tk_read_base	tkr;
 	u64			xtime_sec;
-	/* CLOCK_REALTIME to CLOCK_MONOTONIC offset */
 	struct timespec64	wall_to_monotonic;
-
-	/* Offset clock monotonic -> clock realtime */
 	ktime_t			offs_real;
-	/* Offset clock monotonic -> clock boottime */
 	ktime_t			offs_boot;
-	/* Offset clock monotonic -> clock tai */
 	ktime_t			offs_tai;
-
-	/* The current UTC to TAI offset in seconds */
 	s32			tai_offset;
-
-	/* Monotonic raw base time */
 	ktime_t			base_raw;
-
-	/* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */
 	struct timespec64	raw_time;
 
-	/* Number of clock cycles in one NTP interval. */
+	/* The following members are for timekeeping internal use */
 	cycle_t			cycle_interval;
-	/* Number of clock shifted nano seconds in one NTP interval. */
 	u64			xtime_interval;
-	/* shifted nano seconds left over when rounding cycle_interval */
 	s64			xtime_remainder;
-	/* Raw nano seconds accumulated per NTP interval. */
 	u32			raw_interval;
-
-	/*
-	 * Difference between accumulated time and NTP time in ntp
-	 * shifted nano seconds.
-	 */
 	s64			ntp_error;
-	/*
-	 * Shift conversion between clock shifted nano seconds and
-	 * ntp shifted nano seconds.
-	 */
 	u32			ntp_error_shift;
 };
 
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 14b7367e6b94..ccb69980ef7e 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -52,8 +52,8 @@ bool __read_mostly persistent_clock_exist = false;
 
 static inline void tk_normalize_xtime(struct timekeeper *tk)
 {
-	while (tk->xtime_nsec >= ((u64)NSEC_PER_SEC << tk->shift)) {
-		tk->xtime_nsec -= (u64)NSEC_PER_SEC << tk->shift;
+	while (tk->tkr.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr.shift)) {
+		tk->tkr.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr.shift;
 		tk->xtime_sec++;
 	}
 }
@@ -63,20 +63,20 @@ static inline struct timespec64 tk_xtime(struct timekeeper *tk)
 	struct timespec64 ts;
 
 	ts.tv_sec = tk->xtime_sec;
-	ts.tv_nsec = (long)(tk->xtime_nsec >> tk->shift);
+	ts.tv_nsec = (long)(tk->tkr.xtime_nsec >> tk->tkr.shift);
 	return ts;
 }
 
 static void tk_set_xtime(struct timekeeper *tk, const struct timespec64 *ts)
 {
 	tk->xtime_sec = ts->tv_sec;
-	tk->xtime_nsec = (u64)ts->tv_nsec << tk->shift;
+	tk->tkr.xtime_nsec = (u64)ts->tv_nsec << tk->tkr.shift;
 }
 
 static void tk_xtime_add(struct timekeeper *tk, const struct timespec64 *ts)
 {
 	tk->xtime_sec += ts->tv_sec;
-	tk->xtime_nsec += (u64)ts->tv_nsec << tk->shift;
+	tk->tkr.xtime_nsec += (u64)ts->tv_nsec << tk->tkr.shift;
 	tk_normalize_xtime(tk);
 }
 
@@ -119,11 +119,11 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
 	u64 tmp, ntpinterval;
 	struct clocksource *old_clock;
 
-	old_clock = tk->clock;
-	tk->clock = clock;
-	tk->read = clock->read;
-	tk->mask = clock->mask;
-	tk->cycle_last = tk->read(clock);
+	old_clock = tk->tkr.clock;
+	tk->tkr.clock = clock;
+	tk->tkr.read = clock->read;
+	tk->tkr.mask = clock->mask;
+	tk->tkr.cycle_last = tk->tkr.read(clock);
 
 	/* Do the ns -> cycle conversion first, using original mult */
 	tmp = NTP_INTERVAL_LENGTH;
@@ -147,11 +147,11 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
 	if (old_clock) {
 		int shift_change = clock->shift - old_clock->shift;
 		if (shift_change < 0)
-			tk->xtime_nsec >>= -shift_change;
+			tk->tkr.xtime_nsec >>= -shift_change;
 		else
-			tk->xtime_nsec <<= shift_change;
+			tk->tkr.xtime_nsec <<= shift_change;
 	}
-	tk->shift = clock->shift;
+	tk->tkr.shift = clock->shift;
 
 	tk->ntp_error = 0;
 	tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift;
@@ -161,7 +161,7 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
 	 * active clocksource. These value will be adjusted via NTP
 	 * to counteract clock drifting.
 	 */
-	tk->mult = clock->mult;
+	tk->tkr.mult = clock->mult;
 }
 
 /* Timekeeper helper functions. */
@@ -179,13 +179,13 @@ static inline s64 timekeeping_get_ns(struct timekeeper *tk)
 	s64 nsec;
 
 	/* read clocksource: */
-	cycle_now = tk->read(tk->clock);
+	cycle_now = tk->tkr.read(tk->tkr.clock);
 
 	/* calculate the delta since the last update_wall_time: */
-	delta = clocksource_delta(cycle_now, tk->cycle_last, tk->mask);
+	delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask);
 
-	nsec = delta * tk->mult + tk->xtime_nsec;
-	nsec >>= tk->shift;
+	nsec = delta * tk->tkr.mult + tk->tkr.xtime_nsec;
+	nsec >>= tk->tkr.shift;
 
 	/* If arch requires, add in get_arch_timeoffset() */
 	return nsec + arch_gettimeoffset();
@@ -193,15 +193,15 @@ static inline s64 timekeeping_get_ns(struct timekeeper *tk)
 
 static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
 {
-	struct clocksource *clock = tk->clock;
+	struct clocksource *clock = tk->tkr.clock;
 	cycle_t cycle_now, delta;
 	s64 nsec;
 
 	/* read clocksource: */
-	cycle_now = tk->read(clock);
+	cycle_now = tk->tkr.read(clock);
 
 	/* calculate the delta since the last update_wall_time: */
-	delta = clocksource_delta(cycle_now, tk->cycle_last, tk->mask);
+	delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask);
 
 	/* convert delta to nanoseconds. */
 	nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift);
@@ -217,8 +217,8 @@ static inline void update_vsyscall(struct timekeeper *tk)
 	struct timespec xt;
 
 	xt = tk_xtime(tk);
-	update_vsyscall_old(&xt, &tk->wall_to_monotonic, tk->clock, tk->mult,
-			    tk->cycle_last);
+	update_vsyscall_old(&xt, &tk->wall_to_monotonic, tk->tkr.clock, tk->tkr.mult,
+			    tk->tkr.cycle_last);
 }
 
 static inline void old_vsyscall_fixup(struct timekeeper *tk)
@@ -235,11 +235,11 @@ static inline void old_vsyscall_fixup(struct timekeeper *tk)
 	* (shifted nanoseconds), and CONFIG_GENERIC_TIME_VSYSCALL_OLD
 	* users are removed, this can be killed.
 	*/
-	remainder = tk->xtime_nsec & ((1ULL << tk->shift) - 1);
-	tk->xtime_nsec -= remainder;
-	tk->xtime_nsec += 1ULL << tk->shift;
+	remainder = tk->tkr.xtime_nsec & ((1ULL << tk->tkr.shift) - 1);
+	tk->tkr.xtime_nsec -= remainder;
+	tk->tkr.xtime_nsec += 1ULL << tk->tkr.shift;
 	tk->ntp_error += remainder << tk->ntp_error_shift;
-	tk->ntp_error -= (1ULL << tk->shift) << tk->ntp_error_shift;
+	tk->ntp_error -= (1ULL << tk->tkr.shift) << tk->ntp_error_shift;
 }
 #else
 #define old_vsyscall_fixup(tk)
@@ -304,7 +304,7 @@ static inline void tk_update_ktime_data(struct timekeeper *tk)
 	nsec = (s64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec);
 	nsec *= NSEC_PER_SEC;
 	nsec += tk->wall_to_monotonic.tv_nsec;
-	tk->base_mono = ns_to_ktime(nsec);
+	tk->tkr.base_mono = ns_to_ktime(nsec);
 
 	/* Update the monotonic raw base */
 	tk->base_raw = timespec64_to_ktime(tk->raw_time);
@@ -336,18 +336,18 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
  */
 static void timekeeping_forward_now(struct timekeeper *tk)
 {
-	struct clocksource *clock = tk->clock;
+	struct clocksource *clock = tk->tkr.clock;
 	cycle_t cycle_now, delta;
 	s64 nsec;
 
-	cycle_now = tk->read(clock);
-	delta = clocksource_delta(cycle_now, tk->cycle_last, tk->mask);
-	tk->cycle_last = cycle_now;
+	cycle_now = tk->tkr.read(clock);
+	delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask);
+	tk->tkr.cycle_last = cycle_now;
 
-	tk->xtime_nsec += delta * tk->mult;
+	tk->tkr.xtime_nsec += delta * tk->tkr.mult;
 
 	/* If arch requires, add in get_arch_timeoffset() */
-	tk->xtime_nsec += (u64)arch_gettimeoffset() << tk->shift;
+	tk->tkr.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr.shift;
 
 	tk_normalize_xtime(tk);
 
@@ -412,7 +412,7 @@ ktime_t ktime_get(void)
 
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
-		base = tk->base_mono;
+		base = tk->tkr.base_mono;
 		nsecs = timekeeping_get_ns(tk);
 
 	} while (read_seqcount_retry(&tk_core.seq, seq));
@@ -438,7 +438,7 @@ ktime_t ktime_get_with_offset(enum tk_offsets offs)
 
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
-		base = ktime_add(tk->base_mono, *offset);
+		base = ktime_add(tk->tkr.base_mono, *offset);
 		nsecs = timekeeping_get_ns(tk);
 
 	} while (read_seqcount_retry(&tk_core.seq, seq));
@@ -731,7 +731,7 @@ static int change_clocksource(void *data)
 	 */
 	if (try_module_get(new->owner)) {
 		if (!new->enable || new->enable(new) == 0) {
-			old = tk->clock;
+			old = tk->tkr.clock;
 			tk_setup_internals(tk, new);
 			if (old->disable)
 				old->disable(old);
@@ -759,11 +759,11 @@ int timekeeping_notify(struct clocksource *clock)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
 
-	if (tk->clock == clock)
+	if (tk->tkr.clock == clock)
 		return 0;
 	stop_machine(change_clocksource, clock, NULL);
 	tick_clock_notify();
-	return tk->clock == clock ? 0 : -1;
+	return tk->tkr.clock == clock ? 0 : -1;
 }
 
 /**
@@ -803,7 +803,7 @@ int timekeeping_valid_for_hres(void)
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
 
-		ret = tk->clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
+		ret = tk->tkr.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
 
 	} while (read_seqcount_retry(&tk_core.seq, seq));
 
@@ -822,7 +822,7 @@ u64 timekeeping_max_deferment(void)
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
 
-		ret = tk->clock->max_idle_ns;
+		ret = tk->tkr.clock->max_idle_ns;
 
 	} while (read_seqcount_retry(&tk_core.seq, seq));
 
@@ -989,7 +989,7 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
 static void timekeeping_resume(void)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
-	struct clocksource *clock = tk->clock;
+	struct clocksource *clock = tk->tkr.clock;
 	unsigned long flags;
 	struct timespec64 ts_new, ts_delta;
 	struct timespec tmp;
@@ -1017,16 +1017,16 @@ static void timekeeping_resume(void)
 	 * The less preferred source will only be tried if there is no better
 	 * usable source. The rtc part is handled separately in rtc core code.
 	 */
-	cycle_now = tk->read(clock);
+	cycle_now = tk->tkr.read(clock);
 	if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) &&
-		cycle_now > tk->cycle_last) {
+		cycle_now > tk->tkr.cycle_last) {
 		u64 num, max = ULLONG_MAX;
 		u32 mult = clock->mult;
 		u32 shift = clock->shift;
 		s64 nsec = 0;
 
-		cycle_delta = clocksource_delta(cycle_now, tk->cycle_last,
-						tk->mask);
+		cycle_delta = clocksource_delta(cycle_now, tk->tkr.cycle_last,
+						tk->tkr.mask);
 
 		/*
 		 * "cycle_delta * mutl" may cause 64 bits overflow, if the
@@ -1052,7 +1052,7 @@ static void timekeeping_resume(void)
 		__timekeeping_inject_sleeptime(tk, &ts_delta);
 
 	/* Re-base the last cycle value */
-	tk->cycle_last = cycle_now;
+	tk->tkr.cycle_last = cycle_now;
 	tk->ntp_error = 0;
 	timekeeping_suspended = 0;
 	timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
@@ -1239,12 +1239,12 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
 		}
 	}
 
-	if (unlikely(tk->clock->maxadj &&
-		(tk->mult + adj > tk->clock->mult + tk->clock->maxadj))) {
+	if (unlikely(tk->tkr.clock->maxadj &&
+		(tk->tkr.mult + adj > tk->tkr.clock->mult + tk->tkr.clock->maxadj))) {
 		printk_deferred_once(KERN_WARNING
 			"Adjusting %s more than 11%% (%ld vs %ld)\n",
-			tk->clock->name, (long)tk->mult + adj,
-			(long)tk->clock->mult + tk->clock->maxadj);
+			tk->tkr.clock->name, (long)tk->tkr.mult + adj,
+			(long)tk->tkr.clock->mult + tk->tkr.clock->maxadj);
 	}
 	/*
 	 * So the following can be confusing.
@@ -1295,9 +1295,9 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
 	 *
 	 * XXX - TODO: Doc ntp_error calculation.
 	 */
-	tk->mult += adj;
+	tk->tkr.mult += adj;
 	tk->xtime_interval += interval;
-	tk->xtime_nsec -= offset;
+	tk->tkr.xtime_nsec -= offset;
 	tk->ntp_error -= (interval - offset) << tk->ntp_error_shift;
 
 out_adjust:
@@ -1315,9 +1315,9 @@ out_adjust:
 	 * We'll correct this error next time through this function, when
 	 * xtime_nsec is not as small.
 	 */
-	if (unlikely((s64)tk->xtime_nsec < 0)) {
-		s64 neg = -(s64)tk->xtime_nsec;
-		tk->xtime_nsec = 0;
+	if (unlikely((s64)tk->tkr.xtime_nsec < 0)) {
+		s64 neg = -(s64)tk->tkr.xtime_nsec;
+		tk->tkr.xtime_nsec = 0;
 		tk->ntp_error += neg << tk->ntp_error_shift;
 	}
 
@@ -1333,13 +1333,13 @@ out_adjust:
  */
 static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk)
 {
-	u64 nsecps = (u64)NSEC_PER_SEC << tk->shift;
+	u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr.shift;
 	unsigned int clock_set = 0;
 
-	while (tk->xtime_nsec >= nsecps) {
+	while (tk->tkr.xtime_nsec >= nsecps) {
 		int leap;
 
-		tk->xtime_nsec -= nsecps;
+		tk->tkr.xtime_nsec -= nsecps;
 		tk->xtime_sec++;
 
 		/* Figure out if its a leap sec and apply if needed */
@@ -1384,9 +1384,9 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
 
 	/* Accumulate one shifted interval */
 	offset -= interval;
-	tk->cycle_last += interval;
+	tk->tkr.cycle_last += interval;
 
-	tk->xtime_nsec += tk->xtime_interval << shift;
+	tk->tkr.xtime_nsec += tk->xtime_interval << shift;
 	*clock_set |= accumulate_nsecs_to_secs(tk);
 
 	/* Accumulate raw time */
@@ -1429,8 +1429,8 @@ void update_wall_time(void)
 #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
 	offset = real_tk->cycle_interval;
 #else
-	offset = clocksource_delta(tk->read(tk->clock), tk->cycle_last,
-				   tk->mask);
+	offset = clocksource_delta(tk->tkr.read(tk->tkr.clock),
+				   tk->tkr.cycle_last, tk->tkr.mask);
 #endif
 
 	/* Check if there's really nothing to do */
@@ -1591,8 +1591,8 @@ ktime_t ktime_get_update_offsets_tick(ktime_t *offs_real, ktime_t *offs_boot,
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
 
-		base = tk->base_mono;
-		nsecs = tk->xtime_nsec >> tk->shift;
+		base = tk->tkr.base_mono;
+		nsecs = tk->tkr.xtime_nsec >> tk->tkr.shift;
 
 		*offs_real = tk->offs_real;
 		*offs_boot = tk->offs_boot;
@@ -1623,7 +1623,7 @@ ktime_t ktime_get_update_offsets_now(ktime_t *offs_real, ktime_t *offs_boot,
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
 
-		base = tk->base_mono;
+		base = tk->tkr.base_mono;
 		nsecs = timekeeping_get_ns(tk);
 
 		*offs_real = tk->offs_real;
-- 
cgit v1.2.3-59-g8ed1b


From 0ea5a520f73ca31abc4c10b6d5bc14a884a0641b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 16 Jul 2014 21:05:20 +0000
Subject: seqcount: Provide raw_read_seqcount()

raw_read_seqcount opens a read critical section of the given seqcount
without any lockdep checking and without checking or masking the
LSB. Calling code is responsible for handling that.

Preparatory patch to provide a NMI safe clock monotonic accessor
function.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/seqlock.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 535f158977b9..dcc64b9bfc41 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -116,6 +116,22 @@ repeat:
 	return ret;
 }
 
+/**
+ * raw_read_seqcount - Read the raw seqcount
+ * @s: pointer to seqcount_t
+ * Returns: count to be passed to read_seqcount_retry
+ *
+ * raw_read_seqcount opens a read critical section of the given
+ * seqcount without any lockdep checking and without checking or
+ * masking the LSB. Calling code is responsible for handling that.
+ */
+static inline unsigned raw_read_seqcount(const seqcount_t *s)
+{
+	unsigned ret = ACCESS_ONCE(s->sequence);
+	smp_rmb();
+	return ret;
+}
+
 /**
  * raw_read_seqcount_begin - start seq-read critical section w/o lockdep
  * @s: pointer to seqcount_t
-- 
cgit v1.2.3-59-g8ed1b


From 9b0fd802e8c0545148324916055e7b40d97963fa Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Wed, 16 Jul 2014 21:05:21 +0000
Subject: seqcount: Add raw_write_seqcount_latch()

For NMI safe access to clock monotonic we use the seqcount LSB as
index of a timekeeper array. The update sequence looks like this:

      smp_wmb();      <- prior stores to a[1]
      seq++;
      smp_wmb();      <- seq increment before update of a[0]
      update(a[0]);
      smp_wmb();      <- update of a[0]
      seq++;
      smp_wmb();      <- seq increment before update of a[1]
      update(a[1]);

To avoid open coded barriers, provide a helper function.

[ tglx: Split out of a combo patch against the first implementation of
  	the NMI safe accessor ]

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/seqlock.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index dcc64b9bfc41..cce6e7453592 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -235,6 +235,17 @@ static inline void raw_write_seqcount_end(seqcount_t *s)
 	s->sequence++;
 }
 
+/*
+ * raw_write_seqcount_latch - redirect readers to even/odd copy
+ * @s: pointer to seqcount_t
+ */
+static inline void raw_write_seqcount_latch(seqcount_t *s)
+{
+       smp_wmb();      /* prior stores before incrementing "sequence" */
+       s->sequence++;
+       smp_wmb();      /* increment "sequence" before following stores */
+}
+
 /*
  * Sequence counter only version assumes that callers are using their
  * own mutexing.
-- 
cgit v1.2.3-59-g8ed1b


From 4396e058c52e167729729cf64ea3dfa229637086 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 16 Jul 2014 21:05:23 +0000
Subject: timekeeping: Provide fast and NMI safe access to CLOCK_MONOTONIC

Tracers want a correlated time between the kernel instrumentation and
user space. We really do not want to export sched_clock() to user
space, so we need to provide something sensible for this.

Using separate data structures with an non blocking sequence count
based update mechanism allows us to do that. The data structure
required for the readout has a sequence counter and two copies of the
timekeeping data.

On the update side:

  smp_wmb();
  tkf->seq++;
  smp_wmb();
  update(tkf->base[0], tk);
  smp_wmb();
  tkf->seq++;
  smp_wmb();
  update(tkf->base[1], tk);

On the reader side:

  do {
     seq = tkf->seq;
     smp_rmb();
     idx = seq & 0x01;
     now = now(tkf->base[idx]);
     smp_rmb();
  } while (seq != tkf->seq)

So if a NMI hits the update of base[0] it will use base[1] which is
still consistent, but this timestamp is not guaranteed to be monotonic
across an update.

The timestamp is calculated by:

	now = base_mono + clock_delta * slope

So if the update lowers the slope, readers who are forced to the
not yet updated second array are still using the old steeper slope.

 tmono
 ^
 |    o  n
 |   o n
 |  u
 | o
 |o
 |12345678---> reader order

 o = old slope
 u = update
 n = new slope

So reader 6 will observe time going backwards versus reader 5.

While other CPUs are likely to be able observe that, the only way
for a CPU local observation is when an NMI hits in the middle of
the update. Timestamps taken from that NMI context might be ahead
of the following timestamps. Callers need to be aware of that and
deal with it.

V2: Got rid of clock monotonic raw and reorganized the data
    structures. Folded in the barrier fix from Mathieu.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/timekeeping.h |   2 +
 kernel/time/timekeeping.c   | 124 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 126 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 58ad7eff83ff..1caa6b04fdc5 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -164,6 +164,8 @@ static inline u64 ktime_get_raw_ns(void)
 	return ktime_to_ns(ktime_get_raw());
 }
 
+extern u64 ktime_get_mono_fast_ns(void);
+
 /*
  * Timespec interfaces utilizing the ktime based ones
  */
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index dee23c9d6c21..8980fb722fc5 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -44,6 +44,22 @@ static struct {
 static DEFINE_RAW_SPINLOCK(timekeeper_lock);
 static struct timekeeper shadow_timekeeper;
 
+/**
+ * struct tk_fast - NMI safe timekeeper
+ * @seq:	Sequence counter for protecting updates. The lowest bit
+ *		is the index for the tk_read_base array
+ * @base:	tk_read_base array. Access is indexed by the lowest bit of
+ *		@seq.
+ *
+ * See @update_fast_timekeeper() below.
+ */
+struct tk_fast {
+	seqcount_t		seq;
+	struct tk_read_base	base[2];
+};
+
+static struct tk_fast tk_fast_mono ____cacheline_aligned;
+
 /* flag for if timekeeping is suspended */
 int __read_mostly timekeeping_suspended;
 
@@ -210,6 +226,112 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
 	return nsec + arch_gettimeoffset();
 }
 
+/**
+ * update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper.
+ * @tk:		The timekeeper from which we take the update
+ * @tkf:	The fast timekeeper to update
+ * @tbase:	The time base for the fast timekeeper (mono/raw)
+ *
+ * We want to use this from any context including NMI and tracing /
+ * instrumenting the timekeeping code itself.
+ *
+ * So we handle this differently than the other timekeeping accessor
+ * functions which retry when the sequence count has changed. The
+ * update side does:
+ *
+ * smp_wmb();	<- Ensure that the last base[1] update is visible
+ * tkf->seq++;
+ * smp_wmb();	<- Ensure that the seqcount update is visible
+ * update(tkf->base[0], tk);
+ * smp_wmb();	<- Ensure that the base[0] update is visible
+ * tkf->seq++;
+ * smp_wmb();	<- Ensure that the seqcount update is visible
+ * update(tkf->base[1], tk);
+ *
+ * The reader side does:
+ *
+ * do {
+ *	seq = tkf->seq;
+ *	smp_rmb();
+ *	idx = seq & 0x01;
+ *	now = now(tkf->base[idx]);
+ *	smp_rmb();
+ * } while (seq != tkf->seq)
+ *
+ * As long as we update base[0] readers are forced off to
+ * base[1]. Once base[0] is updated readers are redirected to base[0]
+ * and the base[1] update takes place.
+ *
+ * So if a NMI hits the update of base[0] then it will use base[1]
+ * which is still consistent. In the worst case this can result is a
+ * slightly wrong timestamp (a few nanoseconds). See
+ * @ktime_get_mono_fast_ns.
+ */
+static void update_fast_timekeeper(struct timekeeper *tk)
+{
+	struct tk_read_base *base = tk_fast_mono.base;
+
+	/* Force readers off to base[1] */
+	raw_write_seqcount_latch(&tk_fast_mono.seq);
+
+	/* Update base[0] */
+	memcpy(base, &tk->tkr, sizeof(*base));
+
+	/* Force readers back to base[0] */
+	raw_write_seqcount_latch(&tk_fast_mono.seq);
+
+	/* Update base[1] */
+	memcpy(base + 1, base, sizeof(*base));
+}
+
+/**
+ * ktime_get_mono_fast_ns - Fast NMI safe access to clock monotonic
+ *
+ * This timestamp is not guaranteed to be monotonic across an update.
+ * The timestamp is calculated by:
+ *
+ *	now = base_mono + clock_delta * slope
+ *
+ * So if the update lowers the slope, readers who are forced to the
+ * not yet updated second array are still using the old steeper slope.
+ *
+ * tmono
+ * ^
+ * |    o  n
+ * |   o n
+ * |  u
+ * | o
+ * |o
+ * |12345678---> reader order
+ *
+ * o = old slope
+ * u = update
+ * n = new slope
+ *
+ * So reader 6 will observe time going backwards versus reader 5.
+ *
+ * While other CPUs are likely to be able observe that, the only way
+ * for a CPU local observation is when an NMI hits in the middle of
+ * the update. Timestamps taken from that NMI context might be ahead
+ * of the following timestamps. Callers need to be aware of that and
+ * deal with it.
+ */
+u64 notrace ktime_get_mono_fast_ns(void)
+{
+	struct tk_read_base *tkr;
+	unsigned int seq;
+	u64 now;
+
+	do {
+		seq = raw_read_seqcount(&tk_fast_mono.seq);
+		tkr = tk_fast_mono.base + (seq & 0x01);
+		now = ktime_to_ns(tkr->base_mono) + timekeeping_get_ns(tkr);
+
+	} while (read_seqcount_retry(&tk_fast_mono.seq, seq));
+	return now;
+}
+EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns);
+
 #ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD
 
 static inline void update_vsyscall(struct timekeeper *tk)
@@ -325,6 +447,8 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
 	if (action & TK_MIRROR)
 		memcpy(&shadow_timekeeper, &tk_core.timekeeper,
 		       sizeof(tk_core.timekeeper));
+
+	update_fast_timekeeper(tk);
 }
 
 /**
-- 
cgit v1.2.3-59-g8ed1b


From dc491596f6394382fbc74ad331156207d619fa0a Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Fri, 6 Dec 2013 17:25:21 -0800
Subject: timekeeping: Rework frequency adjustments to work better w/ nohz

The existing timekeeping_adjust logic has always been complicated
to understand. Further, since it was developed prior to NOHZ becoming
common, its not surprising it performs poorly when NOHZ is enabled.

Since Miroslav pointed out the problematic nature of the existing code
in the NOHZ case, I've tried to refactor the code to perform better.

The problem with the previous approach was that it tried to adjust
for the total cumulative error using a scaled dampening factor. This
resulted in large errors to be corrected slowly, while small errors
were corrected quickly. With NOHZ the timekeeping code doesn't know
how far out the next tick will be, so this results in bad
over-correction to small errors, and insufficient correction to large
errors.

Inspired by Miroslav's patch, I've refactored the code to try to
address the correction in two steps.

1) Check the future freq error for the next tick, and if the frequency
error is large, try to make sure we correct it so it doesn't cause
much accumulated error.

2) Then make a small single unit adjustment to correct any cumulative
error that has collected over time.

This method performs fairly well in the simulator Miroslav created.

Major credit to Miroslav for pointing out the issue, providing the
original patch to resolve this, a simulator for testing, as well as
helping debug and resolve issues in my implementation so that it
performed closer to his original implementation.

Cc: Miroslav Lichvar <mlichvar@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Reported-by: Miroslav Lichvar <mlichvar@redhat.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/timekeeper_internal.h |   1 +
 kernel/time/timekeeping.c           | 193 ++++++++++++++++--------------------
 2 files changed, 84 insertions(+), 110 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index 97381997625b..f7ac48d2edf5 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -92,6 +92,7 @@ struct timekeeper {
 	u32			raw_interval;
 	s64			ntp_error;
 	u32			ntp_error_shift;
+	u32			ntp_err_mult;
 };
 
 #ifdef CONFIG_GENERIC_TIME_VSYSCALL
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 2b56b959615b..43c706a7a728 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -178,6 +178,7 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
 	 * to counteract clock drifting.
 	 */
 	tk->tkr.mult = clock->mult;
+	tk->ntp_err_mult = 0;
 }
 
 /* Timekeeper helper functions. */
@@ -1257,125 +1258,34 @@ static int __init timekeeping_init_ops(void)
 	register_syscore_ops(&timekeeping_syscore_ops);
 	return 0;
 }
-
 device_initcall(timekeeping_init_ops);
 
 /*
- * If the error is already larger, we look ahead even further
- * to compensate for late or lost adjustments.
- */
-static __always_inline int timekeeping_bigadjust(struct timekeeper *tk,
-						 s64 error, s64 *interval,
-						 s64 *offset)
-{
-	s64 tick_error, i;
-	u32 look_ahead, adj;
-	s32 error2, mult;
-
-	/*
-	 * Use the current error value to determine how much to look ahead.
-	 * The larger the error the slower we adjust for it to avoid problems
-	 * with losing too many ticks, otherwise we would overadjust and
-	 * produce an even larger error.  The smaller the adjustment the
-	 * faster we try to adjust for it, as lost ticks can do less harm
-	 * here.  This is tuned so that an error of about 1 msec is adjusted
-	 * within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks).
-	 */
-	error2 = tk->ntp_error >> (NTP_SCALE_SHIFT + 22 - 2 * SHIFT_HZ);
-	error2 = abs(error2);
-	for (look_ahead = 0; error2 > 0; look_ahead++)
-		error2 >>= 2;
-
-	/*
-	 * Now calculate the error in (1 << look_ahead) ticks, but first
-	 * remove the single look ahead already included in the error.
-	 */
-	tick_error = ntp_tick_length() >> (tk->ntp_error_shift + 1);
-	tick_error -= tk->xtime_interval >> 1;
-	error = ((error - tick_error) >> look_ahead) + tick_error;
-
-	/* Finally calculate the adjustment shift value.  */
-	i = *interval;
-	mult = 1;
-	if (error < 0) {
-		error = -error;
-		*interval = -*interval;
-		*offset = -*offset;
-		mult = -1;
-	}
-	for (adj = 0; error > i; adj++)
-		error >>= 1;
-
-	*interval <<= adj;
-	*offset <<= adj;
-	return mult << adj;
-}
-
-/*
- * Adjust the multiplier to reduce the error value,
- * this is optimized for the most common adjustments of -1,0,1,
- * for other values we can do a bit more work.
+ * Apply a multiplier adjustment to the timekeeper
  */
-static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
+static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk,
+							 s64 offset,
+							 bool negative,
+							 int adj_scale)
 {
-	s64 error, interval = tk->cycle_interval;
-	int adj;
+	s64 interval = tk->cycle_interval;
+	s32 mult_adj = 1;
 
-	/*
-	 * The point of this is to check if the error is greater than half
-	 * an interval.
-	 *
-	 * First we shift it down from NTP_SHIFT to clocksource->shifted nsecs.
-	 *
-	 * Note we subtract one in the shift, so that error is really error*2.
-	 * This "saves" dividing(shifting) interval twice, but keeps the
-	 * (error > interval) comparison as still measuring if error is
-	 * larger than half an interval.
-	 *
-	 * Note: It does not "save" on aggravation when reading the code.
-	 */
-	error = tk->ntp_error >> (tk->ntp_error_shift - 1);
-	if (error > interval) {
-		/*
-		 * We now divide error by 4(via shift), which checks if
-		 * the error is greater than twice the interval.
-		 * If it is greater, we need a bigadjust, if its smaller,
-		 * we can adjust by 1.
-		 */
-		error >>= 2;
-		if (likely(error <= interval))
-			adj = 1;
-		else
-			adj = timekeeping_bigadjust(tk, error, &interval, &offset);
-	} else {
-		if (error < -interval) {
-			/* See comment above, this is just switched for the negative */
-			error >>= 2;
-			if (likely(error >= -interval)) {
-				adj = -1;
-				interval = -interval;
-				offset = -offset;
-			} else {
-				adj = timekeeping_bigadjust(tk, error, &interval, &offset);
-			}
-		} else {
-			goto out_adjust;
-		}
+	if (negative) {
+		mult_adj = -mult_adj;
+		interval = -interval;
+		offset  = -offset;
 	}
+	mult_adj <<= adj_scale;
+	interval <<= adj_scale;
+	offset <<= adj_scale;
 
-	if (unlikely(tk->tkr.clock->maxadj &&
-		(tk->tkr.mult + adj > tk->tkr.clock->mult + tk->tkr.clock->maxadj))) {
-		printk_deferred_once(KERN_WARNING
-			"Adjusting %s more than 11%% (%ld vs %ld)\n",
-			tk->tkr.clock->name, (long)tk->tkr.mult + adj,
-			(long)tk->tkr.clock->mult + tk->tkr.clock->maxadj);
-	}
 	/*
 	 * So the following can be confusing.
 	 *
-	 * To keep things simple, lets assume adj == 1 for now.
+	 * To keep things simple, lets assume mult_adj == 1 for now.
 	 *
-	 * When adj != 1, remember that the interval and offset values
+	 * When mult_adj != 1, remember that the interval and offset values
 	 * have been appropriately scaled so the math is the same.
 	 *
 	 * The basic idea here is that we're increasing the multiplier
@@ -1419,12 +1329,76 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
 	 *
 	 * XXX - TODO: Doc ntp_error calculation.
 	 */
-	tk->tkr.mult += adj;
+	tk->tkr.mult += mult_adj;
 	tk->xtime_interval += interval;
 	tk->tkr.xtime_nsec -= offset;
 	tk->ntp_error -= (interval - offset) << tk->ntp_error_shift;
+}
+
+/*
+ * Calculate the multiplier adjustment needed to match the frequency
+ * specified by NTP
+ */
+static __always_inline void timekeeping_freqadjust(struct timekeeper *tk,
+							s64 offset)
+{
+	s64 interval = tk->cycle_interval;
+	s64 xinterval = tk->xtime_interval;
+	s64 tick_error;
+	bool negative;
+	u32 adj;
+
+	/* Remove any current error adj from freq calculation */
+	if (tk->ntp_err_mult)
+		xinterval -= tk->cycle_interval;
+
+	/* Calculate current error per tick */
+	tick_error = ntp_tick_length() >> tk->ntp_error_shift;
+	tick_error -= (xinterval + tk->xtime_remainder);
+
+	/* Don't worry about correcting it if its small */
+	if (likely((tick_error >= 0) && (tick_error <= interval)))
+		return;
+
+	/* preserve the direction of correction */
+	negative = (tick_error < 0);
+
+	/* Sort out the magnitude of the correction */
+	tick_error = abs(tick_error);
+	for (adj = 0; tick_error > interval; adj++)
+		tick_error >>= 1;
+
+	/* scale the corrections */
+	timekeeping_apply_adjustment(tk, offset, negative, adj);
+}
+
+/*
+ * Adjust the timekeeper's multiplier to the correct frequency
+ * and also to reduce the accumulated error value.
+ */
+static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
+{
+	/* Correct for the current frequency error */
+	timekeeping_freqadjust(tk, offset);
+
+	/* Next make a small adjustment to fix any cumulative error */
+	if (!tk->ntp_err_mult && (tk->ntp_error > 0)) {
+		tk->ntp_err_mult = 1;
+		timekeeping_apply_adjustment(tk, offset, 0, 0);
+	} else if (tk->ntp_err_mult && (tk->ntp_error <= 0)) {
+		/* Undo any existing error adjustment */
+		timekeeping_apply_adjustment(tk, offset, 1, 0);
+		tk->ntp_err_mult = 0;
+	}
+
+	if (unlikely(tk->tkr.clock->maxadj &&
+		(tk->tkr.mult > tk->tkr.clock->mult + tk->tkr.clock->maxadj))) {
+		printk_once(KERN_WARNING
+			"Adjusting %s more than 11%% (%ld vs %ld)\n",
+			tk->tkr.clock->name, (long)tk->tkr.mult,
+			(long)tk->tkr.clock->mult + tk->tkr.clock->maxadj);
+	}
 
-out_adjust:
 	/*
 	 * It may be possible that when we entered this function, xtime_nsec
 	 * was very small.  Further, if we're slightly speeding the clocksource
@@ -1444,7 +1418,6 @@ out_adjust:
 		tk->tkr.xtime_nsec = 0;
 		tk->ntp_error += neg << tk->ntp_error_shift;
 	}
-
 }
 
 /**
-- 
cgit v1.2.3-59-g8ed1b


From 375f45b5b53a91dfa8f0c11328e0e044f82acbed Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Wed, 23 Apr 2014 20:53:29 -0700
Subject: timekeeping: Use cached ntp_tick_length when accumulating error

By caching the ntp_tick_length() when we correct the frequency error,
and then using that cached value to accumulate error, we avoid large
initial errors when the tick length is changed.

This makes convergence happen much faster in the simulator, since the
initial error doesn't have to be slowly whittled away.

This initially seems like an accounting error, but Miroslav pointed out
that ntp_tick_length() can change mid-tick, so when we apply it in the
error accumulation, we are applying any recent change to the entire tick.

This approach chooses to apply changes in the ntp_tick_length() only to
the next tick, which allows us to calculate the freq correction before
using the new tick length, which avoids accummulating error.

Credit to Miroslav for pointing this out and providing the original patch
this functionality has been pulled out from, along with the rational.

Cc: Miroslav Lichvar <mlichvar@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Reported-by: Miroslav Lichvar <mlichvar@redhat.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/timekeeper_internal.h | 9 +++++++++
 kernel/time/timekeeping.c           | 5 ++++-
 2 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index f7ac48d2edf5..e9660e52dc09 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -90,6 +90,15 @@ struct timekeeper {
 	u64			xtime_interval;
 	s64			xtime_remainder;
 	u32			raw_interval;
+	/* The ntp_tick_length() value currently being used.
+	 * This cached copy ensures we consistently apply the tick
+	 * length for an entire tick, as ntp_tick_length may change
+	 * mid-tick, and we don't want to apply that new value to
+	 * the tick in progress.
+	 */
+	u64			ntp_tick;
+	/* Difference between accumulated time and NTP time in ntp
+	 * shifted nano seconds. */
 	s64			ntp_error;
 	u32			ntp_error_shift;
 	u32			ntp_err_mult;
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 43c706a7a728..f36b02838a47 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -171,6 +171,7 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
 
 	tk->ntp_error = 0;
 	tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift;
+	tk->ntp_tick = ntpinterval << tk->ntp_error_shift;
 
 	/*
 	 * The timekeeper keeps its own mult values for the currently
@@ -1352,6 +1353,8 @@ static __always_inline void timekeeping_freqadjust(struct timekeeper *tk,
 	if (tk->ntp_err_mult)
 		xinterval -= tk->cycle_interval;
 
+	tk->ntp_tick = ntp_tick_length();
+
 	/* Calculate current error per tick */
 	tick_error = ntp_tick_length() >> tk->ntp_error_shift;
 	tick_error -= (xinterval + tk->xtime_remainder);
@@ -1497,7 +1500,7 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
 	tk->raw_time.tv_nsec = raw_nsecs;
 
 	/* Accumulate error between NTP and clock interval */
-	tk->ntp_error += ntp_tick_length() << shift;
+	tk->ntp_error += tk->ntp_tick << shift;
 	tk->ntp_error -= (tk->xtime_interval + tk->xtime_remainder) <<
 						(tk->ntp_error_shift + shift);
 
-- 
cgit v1.2.3-59-g8ed1b


From a0f7a756c2f7543585657cdeeefdfcc11b567293 Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Wed, 23 Jul 2014 14:00:01 -0700
Subject: mm/rmap.c: fix pgoff calculation to handle hugepage correctly

I triggered VM_BUG_ON() in vma_address() when I tried to migrate an
anonymous hugepage with mbind() in the kernel v3.16-rc3.  This is
because pgoff's calculation in rmap_walk_anon() fails to consider
compound_order() only to have an incorrect value.

This patch introduces page_to_pgoff(), which gets the page's offset in
PAGE_CACHE_SIZE.

Kirill pointed out that page cache tree should natively handle
hugepages, and in order to make hugetlbfs fit it, page->index of
hugetlbfs page should be in PAGE_CACHE_SIZE.  This is beyond this patch,
but page_to_pgoff() contains the point to be fixed in a single function.

Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Hillf Danton <dhillf@gmail.com>
Cc: Naoya Horiguchi <nao.horiguchi@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pagemap.h | 12 ++++++++++++
 mm/memory-failure.c     |  4 ++--
 mm/rmap.c               | 10 +++-------
 3 files changed, 17 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 0a97b583ee8d..e1474ae18c88 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -398,6 +398,18 @@ static inline struct page *read_mapping_page(struct address_space *mapping,
 	return read_cache_page(mapping, index, filler, data);
 }
 
+/*
+ * Get the offset in PAGE_SIZE.
+ * (TODO: hugepage should have ->index in PAGE_SIZE)
+ */
+static inline pgoff_t page_to_pgoff(struct page *page)
+{
+	if (unlikely(PageHeadHuge(page)))
+		return page->index << compound_order(page);
+	else
+		return page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+}
+
 /*
  * Return byte-offset into filesystem object for page.
  */
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index c6399e328931..7211a73ba14d 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -435,7 +435,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
 	if (av == NULL)	/* Not actually mapped anymore */
 		return;
 
-	pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+	pgoff = page_to_pgoff(page);
 	read_lock(&tasklist_lock);
 	for_each_process (tsk) {
 		struct anon_vma_chain *vmac;
@@ -469,7 +469,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
 	mutex_lock(&mapping->i_mmap_mutex);
 	read_lock(&tasklist_lock);
 	for_each_process(tsk) {
-		pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+		pgoff_t pgoff = page_to_pgoff(page);
 		struct task_struct *t = task_early_kill(tsk, force_early);
 
 		if (!t)
diff --git a/mm/rmap.c b/mm/rmap.c
index b7e94ebbd09e..22a4a7699cdb 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -517,11 +517,7 @@ void page_unlock_anon_vma_read(struct anon_vma *anon_vma)
 static inline unsigned long
 __vma_address(struct page *page, struct vm_area_struct *vma)
 {
-	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
-
-	if (unlikely(is_vm_hugetlb_page(vma)))
-		pgoff = page->index << huge_page_order(page_hstate(page));
-
+	pgoff_t pgoff = page_to_pgoff(page);
 	return vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
 }
 
@@ -1639,7 +1635,7 @@ static struct anon_vma *rmap_walk_anon_lock(struct page *page,
 static int rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc)
 {
 	struct anon_vma *anon_vma;
-	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+	pgoff_t pgoff = page_to_pgoff(page);
 	struct anon_vma_chain *avc;
 	int ret = SWAP_AGAIN;
 
@@ -1680,7 +1676,7 @@ static int rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc)
 static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc)
 {
 	struct address_space *mapping = page->mapping;
-	pgoff_t pgoff = page->index << compound_order(page);
+	pgoff_t pgoff = page_to_pgoff(page);
 	struct vm_area_struct *vma;
 	int ret = SWAP_AGAIN;
 
-- 
cgit v1.2.3-59-g8ed1b


From 8a2b22a2595bf89d4396530edf8388159fad9d83 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@linaro.org>
Date: Wed, 23 Jul 2014 17:05:06 -0600
Subject: of: Make devicetree sysfs update functions consistent.

All of the DT modification functions are split into two parts, the first
part manipulates the DT data structure, and the second part updates
sysfs, but the code isn't very consistent about how the second half is
called. They don't all enforce the same rules about when it is valid to
update sysfs, and there isn't any clarity on locking.

The transactional DT modification feature that is coming also needs
access to these functions so that it can perform all the structure
changes together, and then all the sysfs updates as a second stage
instead of doing each one at a time.

Fix up the second have by creating a separate __of_*_sysfs() function
for each of the helpers. The new functions have consistent naming (ie.
of_node_add() becomes __of_attach_node_sysfs()) and all of them now
defer if of_init hasn't been called yet.

Callers of the new functions must hold the of_mutex to ensure there are
no race conditions with of_init(). The mutex ensures that there will
only ever be one writer to the tree at any given time. There can still
be any number of readers and the raw_spin_lock is still used to make
sure access to the data structure is still consistent.

Finally, put the function prototypes into of_private.h so they are
accessible to the transaction code.

Signed-off-by: Pantelis Antoniou <pantelis.antoniou@konsulko.com>
[grant.likely: Changed suffix from _post to _sysfs to match existing code]
[grant.likely: Reorganized to eliminate trivial wrappers]
Signed-off-by: Grant Likely <grant.likely@linaro.org>
---
 drivers/of/base.c       | 96 +++++++++++++++++++++++++------------------------
 drivers/of/dynamic.c    | 12 +++++--
 drivers/of/of_private.h | 10 ++++++
 include/linux/of.h      |  2 --
 4 files changed, 69 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/base.c b/drivers/of/base.c
index b403f9d98461..ad4929cbd876 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -37,10 +37,13 @@ struct device_node *of_chosen;
 struct device_node *of_aliases;
 static struct device_node *of_stdout;
 
-static struct kset *of_kset;
+struct kset *of_kset;
 
 /*
- * Used to protect the of_aliases, to hold off addition of nodes to sysfs
+ * Used to protect the of_aliases, to hold off addition of nodes to sysfs.
+ * This mutex must be held whenever modifications are being made to the
+ * device tree. The of_{attach,detach}_node() and
+ * of_{add,remove,update}_property() helpers make sure this happens.
  */
 DEFINE_MUTEX(of_mutex);
 
@@ -127,13 +130,16 @@ static const char *safe_name(struct kobject *kobj, const char *orig_name)
 	return name;
 }
 
-static int __of_add_property_sysfs(struct device_node *np, struct property *pp)
+int __of_add_property_sysfs(struct device_node *np, struct property *pp)
 {
 	int rc;
 
 	/* Important: Don't leak passwords */
 	bool secure = strncmp(pp->name, "security-", 9) == 0;
 
+	if (!of_kset || !of_node_is_attached(np))
+		return 0;
+
 	sysfs_bin_attr_init(&pp->attr);
 	pp->attr.attr.name = safe_name(&np->kobj, pp->name);
 	pp->attr.attr.mode = secure ? S_IRUSR : S_IRUGO;
@@ -145,12 +151,15 @@ static int __of_add_property_sysfs(struct device_node *np, struct property *pp)
 	return rc;
 }
 
-static int __of_node_add(struct device_node *np)
+int __of_attach_node_sysfs(struct device_node *np)
 {
 	const char *name;
 	struct property *pp;
 	int rc;
 
+	if (!of_kset)
+		return 0;
+
 	np->kobj.kset = of_kset;
 	if (!np->parent) {
 		/* Nodes without parents are new top level trees */
@@ -172,26 +181,6 @@ static int __of_node_add(struct device_node *np)
 	return 0;
 }
 
-int of_node_add(struct device_node *np)
-{
-	int rc = 0;
-
-	BUG_ON(!of_node_is_initialized(np));
-
-	/*
-	 * Grab the mutex here so that in a race condition between of_init() and
-	 * of_node_add(), node addition will still be consistent.
-	 */
-	mutex_lock(&of_mutex);
-	if (of_kset)
-		rc = __of_node_add(np);
-	else
-		/* This scenario may be perfectly valid, but report it anyway */
-		pr_info("of_node_add(%s) before of_init()\n", np->full_name);
-	mutex_unlock(&of_mutex);
-	return rc;
-}
-
 static int __init of_init(void)
 {
 	struct device_node *np;
@@ -204,7 +193,7 @@ static int __init of_init(void)
 		return -ENOMEM;
 	}
 	for_each_of_allnodes(np)
-		__of_node_add(np);
+		__of_attach_node_sysfs(np);
 	mutex_unlock(&of_mutex);
 
 	/* Symlink in /proc as required by userspace ABI */
@@ -1689,15 +1678,17 @@ int of_add_property(struct device_node *np, struct property *prop)
 	if (rc)
 		return rc;
 
+	mutex_lock(&of_mutex);
+
 	raw_spin_lock_irqsave(&devtree_lock, flags);
 	rc = __of_add_property(np, prop);
 	raw_spin_unlock_irqrestore(&devtree_lock, flags);
-	if (rc)
-		return rc;
 
-	if (of_node_is_attached(np))
+	if (!rc)
 		__of_add_property_sysfs(np, prop);
 
+	mutex_unlock(&of_mutex);
+
 	return rc;
 }
 
@@ -1720,6 +1711,13 @@ int __of_remove_property(struct device_node *np, struct property *prop)
 	return 0;
 }
 
+void __of_remove_property_sysfs(struct device_node *np, struct property *prop)
+{
+	/* at early boot, bail here and defer setup to of_init() */
+	if (of_kset && of_node_is_attached(np))
+		sysfs_remove_bin_file(&np->kobj, &prop->attr);
+}
+
 /**
  * of_remove_property - Remove a property from a node.
  *
@@ -1737,20 +1735,18 @@ int of_remove_property(struct device_node *np, struct property *prop)
 	if (rc)
 		return rc;
 
+	mutex_lock(&of_mutex);
+
 	raw_spin_lock_irqsave(&devtree_lock, flags);
 	rc = __of_remove_property(np, prop);
 	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 
-	if (rc)
-		return rc;
-
-	/* at early boot, bail hear and defer setup to of_init() */
-	if (!of_kset)
-		return 0;
+	if (!rc)
+		__of_remove_property_sysfs(np, prop);
 
-	sysfs_remove_bin_file(&np->kobj, &prop->attr);
+	mutex_unlock(&of_mutex);
 
-	return 0;
+	return rc;
 }
 
 int __of_update_property(struct device_node *np, struct property *newprop,
@@ -1779,6 +1775,18 @@ int __of_update_property(struct device_node *np, struct property *newprop,
 	return 0;
 }
 
+void __of_update_property_sysfs(struct device_node *np, struct property *newprop,
+		struct property *oldprop)
+{
+	/* At early boot, bail out and defer setup to of_init() */
+	if (!of_kset)
+		return;
+
+	if (oldprop)
+		sysfs_remove_bin_file(&np->kobj, &oldprop->attr);
+	__of_add_property_sysfs(np, newprop);
+}
+
 /*
  * of_update_property - Update a property in a node, if the property does
  * not exist, add it.
@@ -1801,22 +1809,18 @@ int of_update_property(struct device_node *np, struct property *newprop)
 	if (rc)
 		return rc;
 
+	mutex_lock(&of_mutex);
+
 	raw_spin_lock_irqsave(&devtree_lock, flags);
 	rc = __of_update_property(np, newprop, &oldprop);
 	raw_spin_unlock_irqrestore(&devtree_lock, flags);
-	if (rc)
-		return rc;
 
-	/* At early boot, bail out and defer setup to of_init() */
-	if (!of_kset)
-		return 0;
+	if (!rc)
+		__of_update_property_sysfs(np, newprop, oldprop);
 
-	/* Update the sysfs attribute */
-	if (oldprop)
-		sysfs_remove_bin_file(&np->kobj, &oldprop->attr);
-	__of_add_property_sysfs(np, newprop);
+	mutex_unlock(&of_mutex);
 
-	return 0;
+	return rc;
 }
 
 static void of_alias_add(struct alias_prop *ap, struct device_node *np,
diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c
index 75fcc66fcefd..c875787fa394 100644
--- a/drivers/of/dynamic.c
+++ b/drivers/of/dynamic.c
@@ -41,11 +41,13 @@ void of_node_put(struct device_node *node)
 }
 EXPORT_SYMBOL(of_node_put);
 
-static void of_node_remove(struct device_node *np)
+void __of_detach_node_sysfs(struct device_node *np)
 {
 	struct property *pp;
 
 	BUG_ON(!of_node_is_initialized(np));
+	if (!of_kset)
+		return;
 
 	/* only remove properties if on sysfs */
 	if (of_node_is_attached(np)) {
@@ -115,11 +117,13 @@ int of_attach_node(struct device_node *np)
 	if (rc)
 		return rc;
 
+	mutex_lock(&of_mutex);
 	raw_spin_lock_irqsave(&devtree_lock, flags);
 	__of_attach_node(np);
 	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 
-	of_node_add(np);
+	__of_attach_node_sysfs(np);
+	mutex_unlock(&of_mutex);
 	return 0;
 }
 
@@ -174,11 +178,13 @@ int of_detach_node(struct device_node *np)
 	if (rc)
 		return rc;
 
+	mutex_lock(&of_mutex);
 	raw_spin_lock_irqsave(&devtree_lock, flags);
 	__of_detach_node(np);
 	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 
-	of_node_remove(np);
+	__of_detach_node_sysfs(np);
+	mutex_unlock(&of_mutex);
 	return rc;
 }
 
diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h
index 0f6089722af9..0d99ba8caeed 100644
--- a/drivers/of/of_private.h
+++ b/drivers/of/of_private.h
@@ -33,6 +33,8 @@ struct alias_prop {
 
 extern struct mutex of_mutex;
 extern struct list_head aliases_lookup;
+extern struct kset *of_kset;
+
 
 static inline struct device_node *kobj_to_device_node(struct kobject *kobj)
 {
@@ -62,11 +64,19 @@ struct property *__of_prop_dup(const struct property *prop, gfp_t allocflags);
 struct device_node *__of_node_alloc(const char *full_name, gfp_t allocflags);
 
 extern int __of_add_property(struct device_node *np, struct property *prop);
+extern int __of_add_property_sysfs(struct device_node *np,
+		struct property *prop);
 extern int __of_remove_property(struct device_node *np, struct property *prop);
+extern void __of_remove_property_sysfs(struct device_node *np,
+		struct property *prop);
 extern int __of_update_property(struct device_node *np,
 		struct property *newprop, struct property **oldprop);
+extern void __of_update_property_sysfs(struct device_node *np,
+		struct property *newprop, struct property *oldprop);
 
 extern void __of_attach_node(struct device_node *np);
+extern int __of_attach_node_sysfs(struct device_node *np);
 extern void __of_detach_node(struct device_node *np);
+extern void __of_detach_node_sysfs(struct device_node *np);
 
 #endif /* _LINUX_OF_PRIVATE_H */
diff --git a/include/linux/of.h b/include/linux/of.h
index abf829a1f150..705fa12fca7f 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -74,8 +74,6 @@ struct of_phandle_args {
 	uint32_t args[MAX_PHANDLE_ARGS];
 };
 
-extern int of_node_add(struct device_node *node);
-
 /* initialize a node */
 extern struct kobj_type of_node_ktype;
 static inline void of_node_init(struct device_node *node)
-- 
cgit v1.2.3-59-g8ed1b


From 259092a35c7e11f1d4616b0f5b3ba7b851fe4fa6 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@linaro.org>
Date: Wed, 16 Jul 2014 12:48:23 -0600
Subject: of: Reorder device tree changes and notifiers

Currently, devicetree reconfig notifiers get emitted before the change
is applied to the tree, but that behaviour is problematic if the
receiver wants the determine the new state of the tree. The current
users don't care, but the changeset code to follow will be making
multiple changes at once. Reorder notifiers to get emitted after the
change has been applied to the tree so that callbacks see the new tree
state.

At the same time, fixup the existing callbacks to expect the new order.
There are a few callbacks that compare the old and new values of a
changed property. Put both property pointers into the of_prop_reconfig
structure.

The current notifiers also allow the notifier callback to fail and
cancel the change to the tree, but that feature isn't actually used.
It really isn't valid to ignore a tree modification provided by firmware
anyway, so remove the ability to cancel a change to the tree.

Signed-off-by: Grant Likely <grant.likely@linaro.org>
Cc: Nathan Fontenot <nfont@austin.ibm.com>
---
 arch/powerpc/platforms/pseries/hotplug-memory.c |  2 +-
 drivers/crypto/nx/nx-842.c                      | 30 +++++++------------------
 drivers/of/base.c                               | 21 ++++++++---------
 drivers/of/dynamic.c                            | 18 +++++++--------
 drivers/of/of_private.h                         |  4 ++--
 include/linux/of.h                              |  1 +
 6 files changed, 29 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 7995135170a3..ac01e188faef 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -194,7 +194,7 @@ static int pseries_update_drconf_memory(struct of_prop_reconfig *pr)
 	if (!memblock_size)
 		return -EINVAL;
 
-	p = (u32 *)of_get_property(pr->dn, "ibm,dynamic-memory", NULL);
+	p = (u32 *) pr->old_prop->value;
 	if (!p)
 		return -EINVAL;
 
diff --git a/drivers/crypto/nx/nx-842.c b/drivers/crypto/nx/nx-842.c
index 502edf0a2933..c897c3a5ee17 100644
--- a/drivers/crypto/nx/nx-842.c
+++ b/drivers/crypto/nx/nx-842.c
@@ -936,28 +936,14 @@ static int nx842_OF_upd(struct property *new_prop)
 		goto error_out;
 	}
 
-	/* Set ptr to new property if provided */
-	if (new_prop) {
-		/* Single property */
-		if (!strncmp(new_prop->name, "status", new_prop->length)) {
-			status = new_prop;
-
-		} else if (!strncmp(new_prop->name, "ibm,max-sg-len",
-					new_prop->length)) {
-			maxsglen = new_prop;
-
-		} else if (!strncmp(new_prop->name, "ibm,max-sync-cop",
-					new_prop->length)) {
-			maxsyncop = new_prop;
-
-		} else {
-			/*
-			 * Skip the update, the property being updated
-			 * has no impact.
-			 */
-			goto out;
-		}
-	}
+	/*
+	 * If this is a property update, there are only certain properties that
+	 * we care about. Bail if it isn't in the below list
+	 */
+	if (new_prop && (strncmp(new_prop->name, "status", new_prop->length) ||
+		         strncmp(new_prop->name, "ibm,max-sg-len", new_prop->length) ||
+		         strncmp(new_prop->name, "ibm,max-sync-cop", new_prop->length)))
+		goto out;
 
 	/* Perform property updates */
 	ret = nx842_OF_upd_status(new_devdata, status);
diff --git a/drivers/of/base.c b/drivers/of/base.c
index ededf8e33145..a7ad1013edfa 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -1674,10 +1674,6 @@ int of_add_property(struct device_node *np, struct property *prop)
 	unsigned long flags;
 	int rc;
 
-	rc = of_property_notify(OF_RECONFIG_ADD_PROPERTY, np, prop);
-	if (rc)
-		return rc;
-
 	mutex_lock(&of_mutex);
 
 	raw_spin_lock_irqsave(&devtree_lock, flags);
@@ -1689,6 +1685,9 @@ int of_add_property(struct device_node *np, struct property *prop)
 
 	mutex_unlock(&of_mutex);
 
+	if (!rc)
+		of_property_notify(OF_RECONFIG_ADD_PROPERTY, np, prop, NULL);
+
 	return rc;
 }
 
@@ -1731,10 +1730,6 @@ int of_remove_property(struct device_node *np, struct property *prop)
 	unsigned long flags;
 	int rc;
 
-	rc = of_property_notify(OF_RECONFIG_REMOVE_PROPERTY, np, prop);
-	if (rc)
-		return rc;
-
 	mutex_lock(&of_mutex);
 
 	raw_spin_lock_irqsave(&devtree_lock, flags);
@@ -1746,6 +1741,9 @@ int of_remove_property(struct device_node *np, struct property *prop)
 
 	mutex_unlock(&of_mutex);
 
+	if (!rc)
+		of_property_notify(OF_RECONFIG_REMOVE_PROPERTY, np, prop, NULL);
+
 	return rc;
 }
 
@@ -1805,10 +1803,6 @@ int of_update_property(struct device_node *np, struct property *newprop)
 	if (!newprop->name)
 		return -EINVAL;
 
-	rc = of_property_notify(OF_RECONFIG_UPDATE_PROPERTY, np, newprop);
-	if (rc)
-		return rc;
-
 	mutex_lock(&of_mutex);
 
 	raw_spin_lock_irqsave(&devtree_lock, flags);
@@ -1820,6 +1814,9 @@ int of_update_property(struct device_node *np, struct property *newprop)
 
 	mutex_unlock(&of_mutex);
 
+	if (!rc)
+		of_property_notify(OF_RECONFIG_UPDATE_PROPERTY, np, newprop, oldprop);
+
 	return rc;
 }
 
diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c
index 7c020b9a3317..7bd5501736a6 100644
--- a/drivers/of/dynamic.c
+++ b/drivers/of/dynamic.c
@@ -83,7 +83,7 @@ int of_reconfig_notify(unsigned long action, void *p)
 }
 
 int of_property_notify(int action, struct device_node *np,
-		       struct property *prop)
+		       struct property *prop, struct property *oldprop)
 {
 	struct of_prop_reconfig pr;
 
@@ -93,6 +93,7 @@ int of_property_notify(int action, struct device_node *np,
 
 	pr.dn = np;
 	pr.prop = prop;
+	pr.old_prop = oldprop;
 	return of_reconfig_notify(action, &pr);
 }
 
@@ -125,11 +126,6 @@ void __of_attach_node(struct device_node *np)
 int of_attach_node(struct device_node *np)
 {
 	unsigned long flags;
-	int rc;
-
-	rc = of_reconfig_notify(OF_RECONFIG_ATTACH_NODE, np);
-	if (rc)
-		return rc;
 
 	mutex_lock(&of_mutex);
 	raw_spin_lock_irqsave(&devtree_lock, flags);
@@ -138,6 +134,9 @@ int of_attach_node(struct device_node *np)
 
 	__of_attach_node_sysfs(np);
 	mutex_unlock(&of_mutex);
+
+	of_reconfig_notify(OF_RECONFIG_ATTACH_NODE, np);
+
 	return 0;
 }
 
@@ -188,10 +187,6 @@ int of_detach_node(struct device_node *np)
 	unsigned long flags;
 	int rc = 0;
 
-	rc = of_reconfig_notify(OF_RECONFIG_DETACH_NODE, np);
-	if (rc)
-		return rc;
-
 	mutex_lock(&of_mutex);
 	raw_spin_lock_irqsave(&devtree_lock, flags);
 	__of_detach_node(np);
@@ -199,6 +194,9 @@ int of_detach_node(struct device_node *np)
 
 	__of_detach_node_sysfs(np);
 	mutex_unlock(&of_mutex);
+
+	of_reconfig_notify(OF_RECONFIG_DETACH_NODE, np);
+
 	return rc;
 }
 
diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h
index 8129c0e58d70..f69ccb1fa308 100644
--- a/drivers/of/of_private.h
+++ b/drivers/of/of_private.h
@@ -43,11 +43,11 @@ static inline struct device_node *kobj_to_device_node(struct kobject *kobj)
 
 #if defined(CONFIG_OF_DYNAMIC)
 extern int of_property_notify(int action, struct device_node *np,
-			      struct property *prop);
+			      struct property *prop, struct property *old_prop);
 extern void of_node_release(struct kobject *kobj);
 #else /* CONFIG_OF_DYNAMIC */
 static inline int of_property_notify(int action, struct device_node *np,
-				     struct property *prop)
+				     struct property *prop, struct property *old_prop)
 {
 	return 0;
 }
diff --git a/include/linux/of.h b/include/linux/of.h
index 705fa12fca7f..400f18cb4fff 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -321,6 +321,7 @@ extern int of_update_property(struct device_node *np, struct property *newprop);
 struct of_prop_reconfig {
 	struct device_node	*dn;
 	struct property		*prop;
+	struct property		*old_prop;
 };
 
 extern int of_reconfig_notifier_register(struct notifier_block *);
-- 
cgit v1.2.3-59-g8ed1b


From 201c910bd6898d81d4ac6685d0f421b7e10f3c5d Mon Sep 17 00:00:00 2001
From: Pantelis Antoniou <pantelis.antoniou@konsulko.com>
Date: Fri, 4 Jul 2014 19:58:49 +0300
Subject: of: Transactional DT support.

Introducing DT transactional support.

A DT transaction is a method which allows one to apply changes
in the live tree, in such a way that either the full set of changes
take effect, or the state of the tree can be rolled-back to the
state it was before it was attempted. An applied transaction
can be rolled-back at any time.

Documentation is in
	Documentation/devicetree/changesets.txt

Signed-off-by: Pantelis Antoniou <pantelis.antoniou@konsulko.com>
[glikely: Removed device notifiers and reworked to be more consistent]
Signed-off-by: Grant Likely <grant.likely@linaro.org>
---
 Documentation/devicetree/changesets.txt |  40 ++++
 drivers/of/dynamic.c                    | 344 ++++++++++++++++++++++++++++++++
 drivers/of/of_private.h                 |   9 +
 drivers/of/selftest.c                   |  51 +++++
 drivers/of/testcase-data/testcases.dtsi |  10 +
 include/linux/of.h                      |  76 +++++++
 6 files changed, 530 insertions(+)
 create mode 100644 Documentation/devicetree/changesets.txt

(limited to 'include/linux')

diff --git a/Documentation/devicetree/changesets.txt b/Documentation/devicetree/changesets.txt
new file mode 100644
index 000000000000..935ba5acc34e
--- /dev/null
+++ b/Documentation/devicetree/changesets.txt
@@ -0,0 +1,40 @@
+A DT changeset is a method which allows one to apply changes
+in the live tree in such a way that either the full set of changes
+will be applied, or none of them will be. If an error occurs partway
+through applying the changeset, then the tree will be rolled back to the
+previous state. A changeset can also be removed after it has been
+applied.
+
+When a changeset is applied, all of the changes get applied to the tree
+at once before emitting OF_RECONFIG notifiers. This is so that the
+receiver sees a complete and consistent state of the tree when it
+receives the notifier.
+
+The sequence of a changeset is as follows.
+
+1. of_changeset_init() - initializes a changeset
+
+2. A number of DT tree change calls, of_changeset_attach_node(),
+of_changeset_detach_node(), of_changeset_add_property(),
+of_changeset_remove_property, of_changeset_update_property() to prepare
+a set of changes. No changes to the active tree are made at this point.
+All the change operations are recorded in the of_changeset 'entries'
+list.
+
+3. mutex_lock(of_mutex) - starts a changeset; The global of_mutex
+ensures there can only be one editor at a time.
+
+4. of_changeset_apply() - Apply the changes to the tree. Either the
+entire changeset will get applied, or if there is an error the tree will
+be restored to the previous state
+
+5. mutex_unlock(of_mutex) - All operations complete, release the mutex
+
+If a successfully applied changeset needs to be removed, it can be done
+with the following sequence.
+
+1. mutex_lock(of_mutex)
+
+2. of_changeset_revert()
+
+3. mutex_unlock(of_mutex)
diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c
index 7bd5501736a6..c1002b7be786 100644
--- a/drivers/of/dynamic.c
+++ b/drivers/of/dynamic.c
@@ -314,3 +314,347 @@ struct device_node *__of_node_alloc(const char *full_name, gfp_t allocflags)
 	kfree(node);
 	return NULL;
 }
+
+static void __of_changeset_entry_destroy(struct of_changeset_entry *ce)
+{
+	of_node_put(ce->np);
+	list_del(&ce->node);
+	kfree(ce);
+}
+
+#ifdef DEBUG
+static void __of_changeset_entry_dump(struct of_changeset_entry *ce)
+{
+	switch (ce->action) {
+	case OF_RECONFIG_ADD_PROPERTY:
+		pr_debug("%p: %s %s/%s\n",
+			ce, "ADD_PROPERTY   ", ce->np->full_name,
+			ce->prop->name);
+		break;
+	case OF_RECONFIG_REMOVE_PROPERTY:
+		pr_debug("%p: %s %s/%s\n",
+			ce, "REMOVE_PROPERTY", ce->np->full_name,
+			ce->prop->name);
+		break;
+	case OF_RECONFIG_UPDATE_PROPERTY:
+		pr_debug("%p: %s %s/%s\n",
+			ce, "UPDATE_PROPERTY", ce->np->full_name,
+			ce->prop->name);
+		break;
+	case OF_RECONFIG_ATTACH_NODE:
+		pr_debug("%p: %s %s\n",
+			ce, "ATTACH_NODE    ", ce->np->full_name);
+		break;
+	case OF_RECONFIG_DETACH_NODE:
+		pr_debug("%p: %s %s\n",
+			ce, "DETACH_NODE    ", ce->np->full_name);
+		break;
+	}
+}
+#else
+static inline void __of_changeset_entry_dump(struct of_changeset_entry *ce)
+{
+	/* empty */
+}
+#endif
+
+static void __of_changeset_entry_invert(struct of_changeset_entry *ce,
+					  struct of_changeset_entry *rce)
+{
+	memcpy(rce, ce, sizeof(*rce));
+
+	switch (ce->action) {
+	case OF_RECONFIG_ATTACH_NODE:
+		rce->action = OF_RECONFIG_DETACH_NODE;
+		break;
+	case OF_RECONFIG_DETACH_NODE:
+		rce->action = OF_RECONFIG_ATTACH_NODE;
+		break;
+	case OF_RECONFIG_ADD_PROPERTY:
+		rce->action = OF_RECONFIG_REMOVE_PROPERTY;
+		break;
+	case OF_RECONFIG_REMOVE_PROPERTY:
+		rce->action = OF_RECONFIG_ADD_PROPERTY;
+		break;
+	case OF_RECONFIG_UPDATE_PROPERTY:
+		rce->old_prop = ce->prop;
+		rce->prop = ce->old_prop;
+		break;
+	}
+}
+
+static void __of_changeset_entry_notify(struct of_changeset_entry *ce, bool revert)
+{
+	struct of_changeset_entry ce_inverted;
+	int ret;
+
+	if (revert) {
+		__of_changeset_entry_invert(ce, &ce_inverted);
+		ce = &ce_inverted;
+	}
+
+	switch (ce->action) {
+	case OF_RECONFIG_ATTACH_NODE:
+	case OF_RECONFIG_DETACH_NODE:
+		ret = of_reconfig_notify(ce->action, ce->np);
+		break;
+	case OF_RECONFIG_ADD_PROPERTY:
+	case OF_RECONFIG_REMOVE_PROPERTY:
+	case OF_RECONFIG_UPDATE_PROPERTY:
+		ret = of_property_notify(ce->action, ce->np, ce->prop, ce->old_prop);
+		break;
+	default:
+		pr_err("%s: invalid devicetree changeset action: %i\n", __func__,
+			(int)ce->action);
+		return;
+	}
+
+	if (ret)
+		pr_err("%s: notifier error @%s\n", __func__, ce->np->full_name);
+}
+
+static int __of_changeset_entry_apply(struct of_changeset_entry *ce)
+{
+	struct property *old_prop, **propp;
+	unsigned long flags;
+	int ret = 0;
+
+	__of_changeset_entry_dump(ce);
+
+	raw_spin_lock_irqsave(&devtree_lock, flags);
+	switch (ce->action) {
+	case OF_RECONFIG_ATTACH_NODE:
+		__of_attach_node(ce->np);
+		break;
+	case OF_RECONFIG_DETACH_NODE:
+		__of_detach_node(ce->np);
+		break;
+	case OF_RECONFIG_ADD_PROPERTY:
+		/* If the property is in deadprops then it must be removed */
+		for (propp = &ce->np->deadprops; *propp; propp = &(*propp)->next) {
+			if (*propp == ce->prop) {
+				*propp = ce->prop->next;
+				ce->prop->next = NULL;
+				break;
+			}
+		}
+
+		ret = __of_add_property(ce->np, ce->prop);
+		if (ret) {
+			pr_err("%s: add_property failed @%s/%s\n",
+				__func__, ce->np->full_name,
+				ce->prop->name);
+			break;
+		}
+		break;
+	case OF_RECONFIG_REMOVE_PROPERTY:
+		ret = __of_remove_property(ce->np, ce->prop);
+		if (ret) {
+			pr_err("%s: remove_property failed @%s/%s\n",
+				__func__, ce->np->full_name,
+				ce->prop->name);
+			break;
+		}
+		break;
+
+	case OF_RECONFIG_UPDATE_PROPERTY:
+		/* If the property is in deadprops then it must be removed */
+		for (propp = &ce->np->deadprops; *propp; propp = &(*propp)->next) {
+			if (*propp == ce->prop) {
+				*propp = ce->prop->next;
+				ce->prop->next = NULL;
+				break;
+			}
+		}
+
+		ret = __of_update_property(ce->np, ce->prop, &old_prop);
+		if (ret) {
+			pr_err("%s: update_property failed @%s/%s\n",
+				__func__, ce->np->full_name,
+				ce->prop->name);
+			break;
+		}
+		break;
+	default:
+		ret = -EINVAL;
+	}
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
+
+	if (ret)
+		return ret;
+
+	switch (ce->action) {
+	case OF_RECONFIG_ATTACH_NODE:
+		__of_attach_node_sysfs(ce->np);
+		break;
+	case OF_RECONFIG_DETACH_NODE:
+		__of_detach_node_sysfs(ce->np);
+		break;
+	case OF_RECONFIG_ADD_PROPERTY:
+		/* ignore duplicate names */
+		__of_add_property_sysfs(ce->np, ce->prop);
+		break;
+	case OF_RECONFIG_REMOVE_PROPERTY:
+		__of_remove_property_sysfs(ce->np, ce->prop);
+		break;
+	case OF_RECONFIG_UPDATE_PROPERTY:
+		__of_update_property_sysfs(ce->np, ce->prop, ce->old_prop);
+		break;
+	}
+
+	return 0;
+}
+
+static inline int __of_changeset_entry_revert(struct of_changeset_entry *ce)
+{
+	struct of_changeset_entry ce_inverted;
+
+	__of_changeset_entry_invert(ce, &ce_inverted);
+	return __of_changeset_entry_apply(&ce_inverted);
+}
+
+/**
+ * of_changeset_init - Initialize a changeset for use
+ *
+ * @ocs:	changeset pointer
+ *
+ * Initialize a changeset structure
+ */
+void of_changeset_init(struct of_changeset *ocs)
+{
+	memset(ocs, 0, sizeof(*ocs));
+	INIT_LIST_HEAD(&ocs->entries);
+}
+
+/**
+ * of_changeset_destroy - Destroy a changeset
+ *
+ * @ocs:	changeset pointer
+ *
+ * Destroys a changeset. Note that if a changeset is applied,
+ * its changes to the tree cannot be reverted.
+ */
+void of_changeset_destroy(struct of_changeset *ocs)
+{
+	struct of_changeset_entry *ce, *cen;
+
+	list_for_each_entry_safe_reverse(ce, cen, &ocs->entries, node)
+		__of_changeset_entry_destroy(ce);
+}
+
+/**
+ * of_changeset_apply - Applies a changeset
+ *
+ * @ocs:	changeset pointer
+ *
+ * Applies a changeset to the live tree.
+ * Any side-effects of live tree state changes are applied here on
+ * sucess, like creation/destruction of devices and side-effects
+ * like creation of sysfs properties and directories.
+ * Returns 0 on success, a negative error value in case of an error.
+ * On error the partially applied effects are reverted.
+ */
+int of_changeset_apply(struct of_changeset *ocs)
+{
+	struct of_changeset_entry *ce;
+	int ret;
+
+	/* perform the rest of the work */
+	pr_debug("of_changeset: applying...\n");
+	list_for_each_entry(ce, &ocs->entries, node) {
+		ret = __of_changeset_entry_apply(ce);
+		if (ret) {
+			pr_err("%s: Error applying changeset (%d)\n", __func__, ret);
+			list_for_each_entry_continue_reverse(ce, &ocs->entries, node)
+				__of_changeset_entry_revert(ce);
+			return ret;
+		}
+	}
+	pr_debug("of_changeset: applied, emitting notifiers.\n");
+
+	/* drop the global lock while emitting notifiers */
+	mutex_unlock(&of_mutex);
+	list_for_each_entry(ce, &ocs->entries, node)
+		__of_changeset_entry_notify(ce, 0);
+	mutex_lock(&of_mutex);
+	pr_debug("of_changeset: notifiers sent.\n");
+
+	return 0;
+}
+
+/**
+ * of_changeset_revert - Reverts an applied changeset
+ *
+ * @ocs:	changeset pointer
+ *
+ * Reverts a changeset returning the state of the tree to what it
+ * was before the application.
+ * Any side-effects like creation/destruction of devices and
+ * removal of sysfs properties and directories are applied.
+ * Returns 0 on success, a negative error value in case of an error.
+ */
+int of_changeset_revert(struct of_changeset *ocs)
+{
+	struct of_changeset_entry *ce;
+	int ret;
+
+	pr_debug("of_changeset: reverting...\n");
+	list_for_each_entry_reverse(ce, &ocs->entries, node) {
+		ret = __of_changeset_entry_revert(ce);
+		if (ret) {
+			pr_err("%s: Error reverting changeset (%d)\n", __func__, ret);
+			list_for_each_entry_continue(ce, &ocs->entries, node)
+				__of_changeset_entry_apply(ce);
+			return ret;
+		}
+	}
+	pr_debug("of_changeset: reverted, emitting notifiers.\n");
+
+	/* drop the global lock while emitting notifiers */
+	mutex_unlock(&of_mutex);
+	list_for_each_entry_reverse(ce, &ocs->entries, node)
+		__of_changeset_entry_notify(ce, 1);
+	mutex_lock(&of_mutex);
+	pr_debug("of_changeset: notifiers sent.\n");
+
+	return 0;
+}
+
+/**
+ * of_changeset_action - Perform a changeset action
+ *
+ * @ocs:	changeset pointer
+ * @action:	action to perform
+ * @np:		Pointer to device node
+ * @prop:	Pointer to property
+ *
+ * On action being one of:
+ * + OF_RECONFIG_ATTACH_NODE
+ * + OF_RECONFIG_DETACH_NODE,
+ * + OF_RECONFIG_ADD_PROPERTY
+ * + OF_RECONFIG_REMOVE_PROPERTY,
+ * + OF_RECONFIG_UPDATE_PROPERTY
+ * Returns 0 on success, a negative error value in case of an error.
+ */
+int of_changeset_action(struct of_changeset *ocs, unsigned long action,
+		struct device_node *np, struct property *prop)
+{
+	struct of_changeset_entry *ce;
+
+	ce = kzalloc(sizeof(*ce), GFP_KERNEL);
+	if (!ce) {
+		pr_err("%s: Failed to allocate\n", __func__);
+		return -ENOMEM;
+	}
+	/* get a reference to the node */
+	ce->action = action;
+	ce->np = of_node_get(np);
+	ce->prop = prop;
+
+	if (action == OF_RECONFIG_UPDATE_PROPERTY && prop)
+		ce->old_prop = of_find_property(np, prop->name, NULL);
+
+	/* add it to the list */
+	list_add_tail(&ce->node, &ocs->entries);
+	return 0;
+}
diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h
index f69ccb1fa308..858e0a5d9a11 100644
--- a/drivers/of/of_private.h
+++ b/drivers/of/of_private.h
@@ -81,4 +81,13 @@ extern int __of_attach_node_sysfs(struct device_node *np);
 extern void __of_detach_node(struct device_node *np);
 extern void __of_detach_node_sysfs(struct device_node *np);
 
+/* iterators for transactions, used for overlays */
+/* forward iterator */
+#define for_each_transaction_entry(_oft, _te) \
+	list_for_each_entry(_te, &(_oft)->te_list, node)
+
+/* reverse iterator */
+#define for_each_transaction_entry_reverse(_oft, _te) \
+	list_for_each_entry_reverse(_te, &(_oft)->te_list, node)
+
 #endif /* _LINUX_OF_PRIVATE_H */
diff --git a/drivers/of/selftest.c b/drivers/of/selftest.c
index ee2166f0f36a..04e39a183e53 100644
--- a/drivers/of/selftest.c
+++ b/drivers/of/selftest.c
@@ -293,6 +293,56 @@ static void __init of_selftest_property_copy(void)
 #endif
 }
 
+static void __init of_selftest_changeset(void)
+{
+#ifdef CONFIG_OF_DYNAMIC
+	struct property *ppadd, padd = { .name = "prop-add", .length = 0, .value = "" };
+	struct property *ppupdate, pupdate = { .name = "prop-update", .length = 5, .value = "abcd" };
+	struct property *ppremove;
+	struct device_node *n1, *n2, *n21, *nremove, *parent;
+	struct of_changeset chgset;
+
+	of_changeset_init(&chgset);
+	n1 = __of_node_alloc("/testcase-data/changeset/n1", GFP_KERNEL);
+	selftest(n1, "testcase setup failure\n");
+	n2 = __of_node_alloc("/testcase-data/changeset/n2", GFP_KERNEL);
+	selftest(n2, "testcase setup failure\n");
+	n21 = __of_node_alloc("/testcase-data/changeset/n2/n21", GFP_KERNEL);
+	selftest(n21, "testcase setup failure %p\n", n21);
+	nremove = of_find_node_by_path("/testcase-data/changeset/node-remove");
+	selftest(nremove, "testcase setup failure\n");
+	ppadd = __of_prop_dup(&padd, GFP_KERNEL);
+	selftest(ppadd, "testcase setup failure\n");
+	ppupdate = __of_prop_dup(&pupdate, GFP_KERNEL);
+	selftest(ppupdate, "testcase setup failure\n");
+	parent = nremove->parent;
+	n1->parent = parent;
+	n2->parent = parent;
+	n21->parent = n2;
+	n2->child = n21;
+	ppremove = of_find_property(parent, "prop-remove", NULL);
+	selftest(ppremove, "failed to find removal prop");
+
+	of_changeset_init(&chgset);
+	selftest(!of_changeset_attach_node(&chgset, n1), "fail attach n1\n");
+	selftest(!of_changeset_attach_node(&chgset, n2), "fail attach n2\n");
+	selftest(!of_changeset_detach_node(&chgset, nremove), "fail remove node\n");
+	selftest(!of_changeset_attach_node(&chgset, n21), "fail attach n21\n");
+	selftest(!of_changeset_add_property(&chgset, parent, ppadd), "fail add prop\n");
+	selftest(!of_changeset_update_property(&chgset, parent, ppupdate), "fail update prop\n");
+	selftest(!of_changeset_remove_property(&chgset, parent, ppremove), "fail remove prop\n");
+	mutex_lock(&of_mutex);
+	selftest(!of_changeset_apply(&chgset), "apply failed\n");
+	mutex_unlock(&of_mutex);
+
+	mutex_lock(&of_mutex);
+	selftest(!of_changeset_revert(&chgset), "revert failed\n");
+	mutex_unlock(&of_mutex);
+
+	of_changeset_destroy(&chgset);
+#endif
+}
+
 static void __init of_selftest_parse_interrupts(void)
 {
 	struct device_node *np;
@@ -561,6 +611,7 @@ static int __init of_selftest(void)
 	of_selftest_parse_phandle_with_args();
 	of_selftest_property_match_string();
 	of_selftest_property_copy();
+	of_selftest_changeset();
 	of_selftest_parse_interrupts();
 	of_selftest_parse_interrupts_extended();
 	of_selftest_match_node();
diff --git a/drivers/of/testcase-data/testcases.dtsi b/drivers/of/testcase-data/testcases.dtsi
index 6d8d980ac858..669bb07df142 100644
--- a/drivers/of/testcase-data/testcases.dtsi
+++ b/drivers/of/testcase-data/testcases.dtsi
@@ -1,3 +1,13 @@
+/ {
+	testcase-data {
+		changeset {
+			prop-update = "hello";
+			prop-remove = "world";
+			node-remove {
+			};
+		};
+	};
+};
 #include "tests-phandle.dtsi"
 #include "tests-interrupts.dtsi"
 #include "tests-match.dtsi"
diff --git a/include/linux/of.h b/include/linux/of.h
index 400f18cb4fff..bc91fbb13ce8 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -786,4 +786,80 @@ typedef void (*of_init_fn_1)(struct device_node *);
 #define OF_DECLARE_2(table, name, compat, fn) \
 		_OF_DECLARE(table, name, compat, fn, of_init_fn_2)
 
+/**
+ * struct of_changeset_entry	- Holds a changeset entry
+ *
+ * @node:	list_head for the log list
+ * @action:	notifier action
+ * @np:		pointer to the device node affected
+ * @prop:	pointer to the property affected
+ * @old_prop:	hold a pointer to the original property
+ *
+ * Every modification of the device tree during a changeset
+ * is held in a list of of_changeset_entry structures.
+ * That way we can recover from a partial application, or we can
+ * revert the changeset
+ */
+struct of_changeset_entry {
+	struct list_head node;
+	unsigned long action;
+	struct device_node *np;
+	struct property *prop;
+	struct property *old_prop;
+};
+
+/**
+ * struct of_changeset - changeset tracker structure
+ *
+ * @entries:	list_head for the changeset entries
+ *
+ * changesets are a convenient way to apply bulk changes to the
+ * live tree. In case of an error, changes are rolled-back.
+ * changesets live on after initial application, and if not
+ * destroyed after use, they can be reverted in one single call.
+ */
+struct of_changeset {
+	struct list_head entries;
+};
+
+#ifdef CONFIG_OF_DYNAMIC
+extern void of_changeset_init(struct of_changeset *ocs);
+extern void of_changeset_destroy(struct of_changeset *ocs);
+extern int of_changeset_apply(struct of_changeset *ocs);
+extern int of_changeset_revert(struct of_changeset *ocs);
+extern int of_changeset_action(struct of_changeset *ocs,
+		unsigned long action, struct device_node *np,
+		struct property *prop);
+
+static inline int of_changeset_attach_node(struct of_changeset *ocs,
+		struct device_node *np)
+{
+	return of_changeset_action(ocs, OF_RECONFIG_ATTACH_NODE, np, NULL);
+}
+
+static inline int of_changeset_detach_node(struct of_changeset *ocs,
+		struct device_node *np)
+{
+	return of_changeset_action(ocs, OF_RECONFIG_DETACH_NODE, np, NULL);
+}
+
+static inline int of_changeset_add_property(struct of_changeset *ocs,
+		struct device_node *np, struct property *prop)
+{
+	return of_changeset_action(ocs, OF_RECONFIG_ADD_PROPERTY, np, prop);
+}
+
+static inline int of_changeset_remove_property(struct of_changeset *ocs,
+		struct device_node *np, struct property *prop)
+{
+	return of_changeset_action(ocs, OF_RECONFIG_REMOVE_PROPERTY, np, prop);
+}
+
+static inline int of_changeset_update_property(struct of_changeset *ocs,
+		struct device_node *np, struct property *prop)
+{
+	return of_changeset_action(ocs, OF_RECONFIG_UPDATE_PROPERTY, np, prop);
+}
+#endif
+
 #endif /* _LINUX_OF_H */
-- 
cgit v1.2.3-59-g8ed1b


From f6eec614d2252a99b861e288b6301599d2d58da4 Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Thu, 17 Jul 2014 15:14:15 -0700
Subject: openvswitch: Enable tunnel GSO for OVS bridge.

Following patch enables all available tunnel GSO features for OVS
bridge device so that ovs can use hardware offloads available to
underling device.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>
---
 include/linux/netdev_features.h      | 8 ++++++++
 net/openvswitch/vport-internal_dev.c | 5 ++++-
 2 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index d99800cbdcf3..dcfdecbfa0b7 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -176,4 +176,12 @@ enum {
 				 NETIF_F_HW_VLAN_STAG_RX | \
 				 NETIF_F_HW_VLAN_STAG_TX)
 
+#define NETIF_F_GSO_ENCAP_ALL	(NETIF_F_GSO_GRE |			\
+				 NETIF_F_GSO_GRE_CSUM |			\
+				 NETIF_F_GSO_IPIP |			\
+				 NETIF_F_GSO_SIT |			\
+				 NETIF_F_GSO_UDP_TUNNEL |		\
+				 NETIF_F_GSO_UDP_TUNNEL_CSUM |		\
+				 NETIF_F_GSO_MPLS)
+
 #endif	/* _LINUX_NETDEV_FEATURES_H */
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index bd658555afdf..84516126e5f3 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -140,11 +140,14 @@ static void do_setup(struct net_device *netdev)
 	netdev->tx_queue_len = 0;
 
 	netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST |
-			   NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE;
+			   NETIF_F_HIGHDMA | NETIF_F_HW_CSUM |
+			   NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL;
 
 	netdev->vlan_features = netdev->features;
+	netdev->hw_enc_features = netdev->features;
 	netdev->features |= NETIF_F_HW_VLAN_CTAG_TX;
 	netdev->hw_features = netdev->features & ~NETIF_F_LLTX;
+
 	eth_hw_addr_random(netdev);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From f7d4ad98fdd08932ffda2354c62e2e2ee059adcc Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Tue, 22 Jul 2014 08:01:01 -0700
Subject: gpiolib: Export gpiochip_request_own_desc and gpiochip_free_own_desc

Both functions were introduced to let gpio drivers request their own
gpio pins. Without exporting the functions, this can however only be
used by gpio drivers built into the kernel.

Secondary impact is that the functions can not currently be used by
platform initialization code associated with the gpio-pca953x driver.
This code permits auto-export of gpio pins through platform data, but
if this functionality is used, the module can no longer be unloaded due
to the problem solved with the introduction of gpiochip_request_own_desc
and gpiochip_free_own_desc.

Export both function so they can be used from modules and from
platform initialization code.

Reviewed-by: Alexandre Courbot <acourbot@nvidia.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/gpio/driver.txt | 21 +++++++++++++++++++++
 drivers/gpio/gpiolib.c        |  2 ++
 drivers/gpio/gpiolib.h        |  3 ---
 include/linux/gpio/driver.h   |  3 +++
 4 files changed, 26 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/gpio/driver.txt b/Documentation/gpio/driver.txt
index 224dbbcd1804..18790c237977 100644
--- a/Documentation/gpio/driver.txt
+++ b/Documentation/gpio/driver.txt
@@ -167,3 +167,24 @@ is released:
 When implementing an irqchip inside a GPIO driver, these two functions should
 typically be called in the .startup() and .shutdown() callbacks from the
 irqchip.
+
+
+Requesting self-owned GPIO pins
+-------------------------------
+
+Sometimes it is useful to allow a GPIO chip driver to request its own GPIO
+descriptors through the gpiolib API. Using gpio_request() for this purpose
+does not help since it pins the module to the kernel forever (it calls
+try_module_get()). A GPIO driver can use the following functions instead
+to request and free descriptors without being pinned to the kernel forever.
+
+	int gpiochip_request_own_desc(struct gpio_desc *desc, const char *label)
+
+	void gpiochip_free_own_desc(struct gpio_desc *desc)
+
+Descriptors requested with gpiochip_request_own_desc() must be released with
+gpiochip_free_own_desc().
+
+These functions must be used with care since they do not affect module use
+count. Do not use the functions to request gpio descriptors not owned by the
+calling driver.
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 412d64e93cfb..768f0831db18 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -897,6 +897,7 @@ int gpiochip_request_own_desc(struct gpio_desc *desc, const char *label)
 
 	return __gpiod_request(desc, label);
 }
+EXPORT_SYMBOL_GPL(gpiochip_request_own_desc);
 
 /**
  * gpiochip_free_own_desc - Free GPIO requested by the chip driver
@@ -910,6 +911,7 @@ void gpiochip_free_own_desc(struct gpio_desc *desc)
 	if (desc)
 		__gpiod_free(desc);
 }
+EXPORT_SYMBOL_GPL(gpiochip_free_own_desc);
 
 /* Drivers MUST set GPIO direction before making get/set calls.  In
  * some cases this is done in early boot, before IRQs are enabled.
diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h
index acbb9335f08c..7fcb645ded4c 100644
--- a/drivers/gpio/gpiolib.h
+++ b/drivers/gpio/gpiolib.h
@@ -45,9 +45,6 @@ acpi_get_gpiod_by_index(struct device *dev, int index,
 }
 #endif
 
-int gpiochip_request_own_desc(struct gpio_desc *desc, const char *label);
-void gpiochip_free_own_desc(struct gpio_desc *desc);
-
 struct gpio_desc *of_get_named_gpiod_flags(struct device_node *np,
 		   const char *list_name, int index, enum of_gpio_flags *flags);
 
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index c66c91682d9e..4c463fb0155e 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -220,6 +220,9 @@ int gpiochip_irqchip_add(struct gpio_chip *gpiochip,
 
 #endif /* CONFIG_GPIO_IRQCHIP */
 
+int gpiochip_request_own_desc(struct gpio_desc *desc, const char *label);
+void gpiochip_free_own_desc(struct gpio_desc *desc);
+
 #else /* CONFIG_GPIOLIB */
 
 static inline struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc)
-- 
cgit v1.2.3-59-g8ed1b


From 7d8b6c63751cfbbe5eef81a48c22978b3407a3ad Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 23 Jul 2014 15:36:26 -0400
Subject: CAPABILITIES: remove undefined caps from all processes

This is effectively a revert of 7b9a7ec565505699f503b4fcf61500dceb36e744
plus fixing it a different way...

We found, when trying to run an application from an application which
had dropped privs that the kernel does security checks on undefined
capability bits.  This was ESPECIALLY difficult to debug as those
undefined bits are hidden from /proc/$PID/status.

Consider a root application which drops all capabilities from ALL 4
capability sets.  We assume, since the application is going to set
eff/perm/inh from an array that it will clear not only the defined caps
less than CAP_LAST_CAP, but also the higher 28ish bits which are
undefined future capabilities.

The BSET gets cleared differently.  Instead it is cleared one bit at a
time.  The problem here is that in security/commoncap.c::cap_task_prctl()
we actually check the validity of a capability being read.  So any task
which attempts to 'read all things set in bset' followed by 'unset all
things set in bset' will not even attempt to unset the undefined bits
higher than CAP_LAST_CAP.

So the 'parent' will look something like:
CapInh:	0000000000000000
CapPrm:	0000000000000000
CapEff:	0000000000000000
CapBnd:	ffffffc000000000

All of this 'should' be fine.  Given that these are undefined bits that
aren't supposed to have anything to do with permissions.  But they do...

So lets now consider a task which cleared the eff/perm/inh completely
and cleared all of the valid caps in the bset (but not the invalid caps
it couldn't read out of the kernel).  We know that this is exactly what
the libcap-ng library does and what the go capabilities library does.
They both leave you in that above situation if you try to clear all of
you capapabilities from all 4 sets.  If that root task calls execve()
the child task will pick up all caps not blocked by the bset.  The bset
however does not block bits higher than CAP_LAST_CAP.  So now the child
task has bits in eff which are not in the parent.  These are
'meaningless' undefined bits, but still bits which the parent doesn't
have.

The problem is now in cred_cap_issubset() (or any operation which does a
subset test) as the child, while a subset for valid cap bits, is not a
subset for invalid cap bits!  So now we set durring commit creds that
the child is not dumpable.  Given it is 'more priv' than its parent.  It
also means the parent cannot ptrace the child and other stupidity.

The solution here:
1) stop hiding capability bits in status
	This makes debugging easier!

2) stop giving any task undefined capability bits.  it's simple, it you
don't put those invalid bits in CAP_FULL_SET you won't get them in init
and you won't get them in any other task either.
	This fixes the cap_issubset() tests and resulting fallout (which
	made the init task in a docker container untraceable among other
	things)

3) mask out undefined bits when sys_capset() is called as it might use
~0, ~0 to denote 'all capabilities' for backward/forward compatibility.
	This lets 'capsh --caps="all=eip" -- -c /bin/bash' run.

4) mask out undefined bit when we read a file capability off of disk as
again likely all bits are set in the xattr for forward/backward
compatibility.
	This lets 'setcap all+pe /bin/bash; /bin/bash' run

Signed-off-by: Eric Paris <eparis@redhat.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Vagin <avagin@openvz.org>
Cc: Andrew G. Morgan <morgan@kernel.org>
Cc: Serge E. Hallyn <serge.hallyn@canonical.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Steve Grubb <sgrubb@redhat.com>
Cc: Dan Walsh <dwalsh@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 fs/proc/array.c            | 11 +----------
 include/linux/capability.h |  5 ++++-
 kernel/audit.c             |  2 +-
 kernel/capability.c        |  4 ++++
 security/commoncap.c       |  3 +++
 5 files changed, 13 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/array.c b/fs/proc/array.c
index 64db2bceac59..3e1290b0492e 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -297,15 +297,11 @@ static void render_cap_t(struct seq_file *m, const char *header,
 	seq_puts(m, header);
 	CAP_FOR_EACH_U32(__capi) {
 		seq_printf(m, "%08x",
-			   a->cap[(_KERNEL_CAPABILITY_U32S-1) - __capi]);
+			   a->cap[CAP_LAST_U32 - __capi]);
 	}
 	seq_putc(m, '\n');
 }
 
-/* Remove non-existent capabilities */
-#define NORM_CAPS(v) (v.cap[CAP_TO_INDEX(CAP_LAST_CAP)] &= \
-				CAP_TO_MASK(CAP_LAST_CAP + 1) - 1)
-
 static inline void task_cap(struct seq_file *m, struct task_struct *p)
 {
 	const struct cred *cred;
@@ -319,11 +315,6 @@ static inline void task_cap(struct seq_file *m, struct task_struct *p)
 	cap_bset	= cred->cap_bset;
 	rcu_read_unlock();
 
-	NORM_CAPS(cap_inheritable);
-	NORM_CAPS(cap_permitted);
-	NORM_CAPS(cap_effective);
-	NORM_CAPS(cap_bset);
-
 	render_cap_t(m, "CapInh:\t", &cap_inheritable);
 	render_cap_t(m, "CapPrm:\t", &cap_permitted);
 	render_cap_t(m, "CapEff:\t", &cap_effective);
diff --git a/include/linux/capability.h b/include/linux/capability.h
index 84b13ad67c1c..aa93e5ef594c 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -78,8 +78,11 @@ extern const kernel_cap_t __cap_init_eff_set;
 # error Fix up hand-coded capability macro initializers
 #else /* HAND-CODED capability initializers */
 
+#define CAP_LAST_U32			((_KERNEL_CAPABILITY_U32S) - 1)
+#define CAP_LAST_U32_VALID_MASK		(CAP_TO_MASK(CAP_LAST_CAP + 1) -1)
+
 # define CAP_EMPTY_SET    ((kernel_cap_t){{ 0, 0 }})
-# define CAP_FULL_SET     ((kernel_cap_t){{ ~0, ~0 }})
+# define CAP_FULL_SET     ((kernel_cap_t){{ ~0, CAP_LAST_U32_VALID_MASK }})
 # define CAP_FS_SET       ((kernel_cap_t){{ CAP_FS_MASK_B0 \
 				    | CAP_TO_MASK(CAP_LINUX_IMMUTABLE), \
 				    CAP_FS_MASK_B1 } })
diff --git a/kernel/audit.c b/kernel/audit.c
index 3ef2e0e797e8..ba2ff5a5c600 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1677,7 +1677,7 @@ void audit_log_cap(struct audit_buffer *ab, char *prefix, kernel_cap_t *cap)
 	audit_log_format(ab, " %s=", prefix);
 	CAP_FOR_EACH_U32(i) {
 		audit_log_format(ab, "%08x",
-				 cap->cap[(_KERNEL_CAPABILITY_U32S-1) - i]);
+				 cap->cap[CAP_LAST_U32 - i]);
 	}
 }
 
diff --git a/kernel/capability.c b/kernel/capability.c
index a5cf13c018ce..989f5bfc57dc 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -258,6 +258,10 @@ SYSCALL_DEFINE2(capset, cap_user_header_t, header, const cap_user_data_t, data)
 		i++;
 	}
 
+	effective.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
+	permitted.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
+	inheritable.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
+
 	new = prepare_creds();
 	if (!new)
 		return -ENOMEM;
diff --git a/security/commoncap.c b/security/commoncap.c
index 9fe46e22c7f2..bab0611afc1e 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -421,6 +421,9 @@ int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data
 		cpu_caps->inheritable.cap[i] = le32_to_cpu(caps.data[i].inheritable);
 	}
 
+	cpu_caps->permitted.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
+	cpu_caps->inheritable.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
+
 	return 0;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 2695fb552cbef1029aa025a98acb80cc51d66de5 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Thu, 24 Jul 2014 16:38:21 -0700
Subject: net: filter: rename 'struct sock_filter_int' into 'struct bpf_insn'

eBPF is used by socket filtering, seccomp and soon by tracing and
exposed to userspace, therefore 'sock_filter_int' name is not accurate.
Rename it to 'bpf_insn'

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/x86/net/bpf_jit_comp.c |  2 +-
 include/linux/filter.h      | 50 ++++++++++++++++++++++-----------------------
 kernel/bpf/core.c           |  2 +-
 kernel/seccomp.c            |  2 +-
 lib/test_bpf.c              |  4 ++--
 net/core/filter.c           | 18 ++++++++--------
 6 files changed, 39 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 99bef86ed6df..71737a83f022 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -214,7 +214,7 @@ struct jit_context {
 static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image,
 		  int oldproglen, struct jit_context *ctx)
 {
-	struct sock_filter_int *insn = bpf_prog->insnsi;
+	struct bpf_insn *insn = bpf_prog->insnsi;
 	int insn_cnt = bpf_prog->len;
 	u8 temp[64];
 	int i;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index c43c8258e682..20dd50ef7271 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -82,7 +82,7 @@ enum {
 /* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */
 
 #define BPF_ALU64_REG(OP, DST, SRC)				\
-	((struct sock_filter_int) {				\
+	((struct bpf_insn) {					\
 		.code  = BPF_ALU64 | BPF_OP(OP) | BPF_X,	\
 		.dst_reg = DST,					\
 		.src_reg = SRC,					\
@@ -90,7 +90,7 @@ enum {
 		.imm   = 0 })
 
 #define BPF_ALU32_REG(OP, DST, SRC)				\
-	((struct sock_filter_int) {				\
+	((struct bpf_insn) {					\
 		.code  = BPF_ALU | BPF_OP(OP) | BPF_X,		\
 		.dst_reg = DST,					\
 		.src_reg = SRC,					\
@@ -100,7 +100,7 @@ enum {
 /* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */
 
 #define BPF_ALU64_IMM(OP, DST, IMM)				\
-	((struct sock_filter_int) {				\
+	((struct bpf_insn) {					\
 		.code  = BPF_ALU64 | BPF_OP(OP) | BPF_K,	\
 		.dst_reg = DST,					\
 		.src_reg = 0,					\
@@ -108,7 +108,7 @@ enum {
 		.imm   = IMM })
 
 #define BPF_ALU32_IMM(OP, DST, IMM)				\
-	((struct sock_filter_int) {				\
+	((struct bpf_insn) {					\
 		.code  = BPF_ALU | BPF_OP(OP) | BPF_K,		\
 		.dst_reg = DST,					\
 		.src_reg = 0,					\
@@ -118,7 +118,7 @@ enum {
 /* Endianess conversion, cpu_to_{l,b}e(), {l,b}e_to_cpu() */
 
 #define BPF_ENDIAN(TYPE, DST, LEN)				\
-	((struct sock_filter_int) {				\
+	((struct bpf_insn) {					\
 		.code  = BPF_ALU | BPF_END | BPF_SRC(TYPE),	\
 		.dst_reg = DST,					\
 		.src_reg = 0,					\
@@ -128,7 +128,7 @@ enum {
 /* Short form of mov, dst_reg = src_reg */
 
 #define BPF_MOV64_REG(DST, SRC)					\
-	((struct sock_filter_int) {				\
+	((struct bpf_insn) {					\
 		.code  = BPF_ALU64 | BPF_MOV | BPF_X,		\
 		.dst_reg = DST,					\
 		.src_reg = SRC,					\
@@ -136,7 +136,7 @@ enum {
 		.imm   = 0 })
 
 #define BPF_MOV32_REG(DST, SRC)					\
-	((struct sock_filter_int) {				\
+	((struct bpf_insn) {					\
 		.code  = BPF_ALU | BPF_MOV | BPF_X,		\
 		.dst_reg = DST,					\
 		.src_reg = SRC,					\
@@ -146,7 +146,7 @@ enum {
 /* Short form of mov, dst_reg = imm32 */
 
 #define BPF_MOV64_IMM(DST, IMM)					\
-	((struct sock_filter_int) {				\
+	((struct bpf_insn) {					\
 		.code  = BPF_ALU64 | BPF_MOV | BPF_K,		\
 		.dst_reg = DST,					\
 		.src_reg = 0,					\
@@ -154,7 +154,7 @@ enum {
 		.imm   = IMM })
 
 #define BPF_MOV32_IMM(DST, IMM)					\
-	((struct sock_filter_int) {				\
+	((struct bpf_insn) {					\
 		.code  = BPF_ALU | BPF_MOV | BPF_K,		\
 		.dst_reg = DST,					\
 		.src_reg = 0,					\
@@ -164,7 +164,7 @@ enum {
 /* Short form of mov based on type, BPF_X: dst_reg = src_reg, BPF_K: dst_reg = imm32 */
 
 #define BPF_MOV64_RAW(TYPE, DST, SRC, IMM)			\
-	((struct sock_filter_int) {				\
+	((struct bpf_insn) {					\
 		.code  = BPF_ALU64 | BPF_MOV | BPF_SRC(TYPE),	\
 		.dst_reg = DST,					\
 		.src_reg = SRC,					\
@@ -172,7 +172,7 @@ enum {
 		.imm   = IMM })
 
 #define BPF_MOV32_RAW(TYPE, DST, SRC, IMM)			\
-	((struct sock_filter_int) {				\
+	((struct bpf_insn) {					\
 		.code  = BPF_ALU | BPF_MOV | BPF_SRC(TYPE),	\
 		.dst_reg = DST,					\
 		.src_reg = SRC,					\
@@ -182,7 +182,7 @@ enum {
 /* Direct packet access, R0 = *(uint *) (skb->data + imm32) */
 
 #define BPF_LD_ABS(SIZE, IMM)					\
-	((struct sock_filter_int) {				\
+	((struct bpf_insn) {					\
 		.code  = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS,	\
 		.dst_reg = 0,					\
 		.src_reg = 0,					\
@@ -192,7 +192,7 @@ enum {
 /* Indirect packet access, R0 = *(uint *) (skb->data + src_reg + imm32) */
 
 #define BPF_LD_IND(SIZE, SRC, IMM)				\
-	((struct sock_filter_int) {				\
+	((struct bpf_insn) {					\
 		.code  = BPF_LD | BPF_SIZE(SIZE) | BPF_IND,	\
 		.dst_reg = 0,					\
 		.src_reg = SRC,					\
@@ -202,7 +202,7 @@ enum {
 /* Memory load, dst_reg = *(uint *) (src_reg + off16) */
 
 #define BPF_LDX_MEM(SIZE, DST, SRC, OFF)			\
-	((struct sock_filter_int) {				\
+	((struct bpf_insn) {					\
 		.code  = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM,	\
 		.dst_reg = DST,					\
 		.src_reg = SRC,					\
@@ -212,7 +212,7 @@ enum {
 /* Memory store, *(uint *) (dst_reg + off16) = src_reg */
 
 #define BPF_STX_MEM(SIZE, DST, SRC, OFF)			\
-	((struct sock_filter_int) {				\
+	((struct bpf_insn) {					\
 		.code  = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM,	\
 		.dst_reg = DST,					\
 		.src_reg = SRC,					\
@@ -222,7 +222,7 @@ enum {
 /* Memory store, *(uint *) (dst_reg + off16) = imm32 */
 
 #define BPF_ST_MEM(SIZE, DST, OFF, IMM)				\
-	((struct sock_filter_int) {				\
+	((struct bpf_insn) {					\
 		.code  = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM,	\
 		.dst_reg = DST,					\
 		.src_reg = 0,					\
@@ -232,7 +232,7 @@ enum {
 /* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */
 
 #define BPF_JMP_REG(OP, DST, SRC, OFF)				\
-	((struct sock_filter_int) {				\
+	((struct bpf_insn) {					\
 		.code  = BPF_JMP | BPF_OP(OP) | BPF_X,		\
 		.dst_reg = DST,					\
 		.src_reg = SRC,					\
@@ -242,7 +242,7 @@ enum {
 /* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */
 
 #define BPF_JMP_IMM(OP, DST, IMM, OFF)				\
-	((struct sock_filter_int) {				\
+	((struct bpf_insn) {					\
 		.code  = BPF_JMP | BPF_OP(OP) | BPF_K,		\
 		.dst_reg = DST,					\
 		.src_reg = 0,					\
@@ -252,7 +252,7 @@ enum {
 /* Function call */
 
 #define BPF_EMIT_CALL(FUNC)					\
-	((struct sock_filter_int) {				\
+	((struct bpf_insn) {					\
 		.code  = BPF_JMP | BPF_CALL,			\
 		.dst_reg = 0,					\
 		.src_reg = 0,					\
@@ -262,7 +262,7 @@ enum {
 /* Raw code statement block */
 
 #define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM)			\
-	((struct sock_filter_int) {				\
+	((struct bpf_insn) {					\
 		.code  = CODE,					\
 		.dst_reg = DST,					\
 		.src_reg = SRC,					\
@@ -272,7 +272,7 @@ enum {
 /* Program exit */
 
 #define BPF_EXIT_INSN()						\
-	((struct sock_filter_int) {				\
+	((struct bpf_insn) {					\
 		.code  = BPF_JMP | BPF_EXIT,			\
 		.dst_reg = 0,					\
 		.src_reg = 0,					\
@@ -298,7 +298,7 @@ enum {
 /* Macro to invoke filter function. */
 #define SK_RUN_FILTER(filter, ctx)  (*filter->bpf_func)(ctx, filter->insnsi)
 
-struct sock_filter_int {
+struct bpf_insn {
 	__u8	code;		/* opcode */
 	__u8	dst_reg:4;	/* dest register */
 	__u8	src_reg:4;	/* source register */
@@ -330,10 +330,10 @@ struct sk_filter {
 	struct sock_fprog_kern	*orig_prog;	/* Original BPF program */
 	struct rcu_head		rcu;
 	unsigned int		(*bpf_func)(const struct sk_buff *skb,
-					    const struct sock_filter_int *filter);
+					    const struct bpf_insn *filter);
 	union {
 		struct sock_filter	insns[0];
-		struct sock_filter_int	insnsi[0];
+		struct bpf_insn		insnsi[0];
 		struct work_struct	work;
 	};
 };
@@ -353,7 +353,7 @@ void sk_filter_select_runtime(struct sk_filter *fp);
 void sk_filter_free(struct sk_filter *fp);
 
 int sk_convert_filter(struct sock_filter *prog, int len,
-		      struct sock_filter_int *new_prog, int *new_len);
+		      struct bpf_insn *new_prog, int *new_len);
 
 int sk_unattached_filter_create(struct sk_filter **pfp,
 				struct sock_fprog_kern *fprog);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 77a240a1ce11..265a02cc822d 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -81,7 +81,7 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
  * keep, 0 for none. @ctx is the data we are operating on, @insn is the
  * array of filter instructions.
  */
-static unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn)
+static unsigned int __sk_run_filter(void *ctx, const struct bpf_insn *insn)
 {
 	u64 stack[MAX_BPF_STACK / sizeof(u64)];
 	u64 regs[MAX_BPF_REG], tmp;
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 301bbc24739c..565743db5384 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -248,7 +248,7 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
 	if (ret)
 		goto free_prog;
 
-	/* Convert 'sock_filter' insns to 'sock_filter_int' insns */
+	/* Convert 'sock_filter' insns to 'bpf_insn' insns */
 	ret = sk_convert_filter(fp, fprog->len, NULL, &new_len);
 	if (ret)
 		goto free_prog;
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index c579e0f58818..5f48623ee1a7 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -66,7 +66,7 @@ struct bpf_test {
 	const char *descr;
 	union {
 		struct sock_filter insns[MAX_INSNS];
-		struct sock_filter_int insns_int[MAX_INSNS];
+		struct bpf_insn insns_int[MAX_INSNS];
 	} u;
 	__u8 aux;
 	__u8 data[MAX_DATA];
@@ -1807,7 +1807,7 @@ static struct sk_filter *generate_filter(int which, int *err)
 
 		fp->len = flen;
 		memcpy(fp->insnsi, tests[which].u.insns_int,
-		       fp->len * sizeof(struct sock_filter_int));
+		       fp->len * sizeof(struct bpf_insn));
 
 		sk_filter_select_runtime(fp);
 		break;
diff --git a/net/core/filter.c b/net/core/filter.c
index 1d0e9492e4fa..f3b2d5e9fe5f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -174,9 +174,9 @@ static u64 __get_random_u32(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
 }
 
 static bool convert_bpf_extensions(struct sock_filter *fp,
-				   struct sock_filter_int **insnp)
+				   struct bpf_insn **insnp)
 {
-	struct sock_filter_int *insn = *insnp;
+	struct bpf_insn *insn = *insnp;
 
 	switch (fp->k) {
 	case SKF_AD_OFF + SKF_AD_PROTOCOL:
@@ -326,7 +326,7 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
  *
  * 2) 2nd pass to remap in two passes: 1st pass finds new
  *    jump offsets, 2nd pass remapping:
- *   new_prog = kmalloc(sizeof(struct sock_filter_int) * new_len);
+ *   new_prog = kmalloc(sizeof(struct bpf_insn) * new_len);
  *   sk_convert_filter(old_prog, old_len, new_prog, &new_len);
  *
  * User BPF's register A is mapped to our BPF register 6, user BPF
@@ -336,10 +336,10 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
  * ctx == 'struct seccomp_data *'.
  */
 int sk_convert_filter(struct sock_filter *prog, int len,
-		      struct sock_filter_int *new_prog, int *new_len)
+		      struct bpf_insn *new_prog, int *new_len)
 {
 	int new_flen = 0, pass = 0, target, i;
-	struct sock_filter_int *new_insn;
+	struct bpf_insn *new_insn;
 	struct sock_filter *fp;
 	int *addrs = NULL;
 	u8 bpf_src;
@@ -365,8 +365,8 @@ do_pass:
 	new_insn++;
 
 	for (i = 0; i < len; fp++, i++) {
-		struct sock_filter_int tmp_insns[6] = { };
-		struct sock_filter_int *insn = tmp_insns;
+		struct bpf_insn tmp_insns[6] = { };
+		struct bpf_insn *insn = tmp_insns;
 
 		if (addrs)
 			addrs[i] = new_insn - new_prog;
@@ -913,7 +913,7 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
 	 * representation.
 	 */
 	BUILD_BUG_ON(sizeof(struct sock_filter) !=
-		     sizeof(struct sock_filter_int));
+		     sizeof(struct bpf_insn));
 
 	/* Conversion cannot happen on overlapping memory areas,
 	 * so we need to keep the user BPF around until the 2nd
@@ -945,7 +945,7 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
 
 	fp->len = new_len;
 
-	/* 2nd pass: remap sock_filter insns into sock_filter_int insns. */
+	/* 2nd pass: remap sock_filter insns into bpf_insn insns. */
 	err = sk_convert_filter(old_prog, old_len, fp->insnsi, &new_len);
 	if (err)
 		/* 2nd sk_convert_filter() can fail only if it fails
-- 
cgit v1.2.3-59-g8ed1b


From f892afb07eeecf575179c4747952644a82a92a36 Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicoleotsuka@gmail.com>
Date: Mon, 16 Jun 2014 11:31:05 +0800
Subject: dmaengine: imx-sdma: Add a new DMATYPE for Shared Peripheral ASRC

Shared Peripheral ASRC, running on SPBA, needs to use shp sciprts for
DMA transfer. So this patch just adds a new DMATYPE for it.

Signed-off-by: Nicolin Chen <nicoleotsuka@gmail.com>
Acked-by: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt | 1 +
 drivers/dma/imx-sdma.c                                 | 5 +++++
 include/linux/platform_data/dma-imx.h                  | 1 +
 3 files changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt b/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt
index e577196a12c0..4659fd952301 100644
--- a/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt
+++ b/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt
@@ -47,6 +47,7 @@ The full ID of peripheral types can be found below.
 	20	ASRC
 	21	ESAI
 	22	SSI Dual FIFO	(needs firmware ver >= 2)
+	23	Shared ASRC
 
 The third cell specifies the transfer priority as below.
 
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index 8269c200b53b..de584e605db5 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -750,6 +750,11 @@ static void sdma_get_pc(struct sdma_channel *sdmac,
 		emi_2_per = sdma->script_addrs->asrc_2_mcu_addr;
 		per_2_per = sdma->script_addrs->per_2_per_addr;
 		break;
+	case IMX_DMATYPE_ASRC_SP:
+		per_2_emi = sdma->script_addrs->shp_2_mcu_addr;
+		emi_2_per = sdma->script_addrs->mcu_2_shp_addr;
+		per_2_per = sdma->script_addrs->per_2_per_addr;
+		break;
 	case IMX_DMATYPE_MSHC:
 		per_2_emi = sdma->script_addrs->mshc_2_mcu_addr;
 		emi_2_per = sdma->script_addrs->mcu_2_mshc_addr;
diff --git a/include/linux/platform_data/dma-imx.h b/include/linux/platform_data/dma-imx.h
index bcbc6c3c14c0..7aa0e89d1bcc 100644
--- a/include/linux/platform_data/dma-imx.h
+++ b/include/linux/platform_data/dma-imx.h
@@ -40,6 +40,7 @@ enum sdma_peripheral_type {
 	IMX_DMATYPE_ASRC,	/* ASRC */
 	IMX_DMATYPE_ESAI,	/* ESAI */
 	IMX_DMATYPE_SSI_DUAL,	/* SSI Dual FIFO */
+	IMX_DMATYPE_ASRC_SP,	/* Shared ASRC */
 };
 
 enum imx_dma_prio {
-- 
cgit v1.2.3-59-g8ed1b


From 0c9dbebdb6611d2cd75d025ec09035c3e8ce2160 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 11 Jul 2014 18:18:26 +0200
Subject: dmaengine: Remove unused definition of DMA_MAX_COOKIE

As of commit commit f04cd40701deace2efb9edd7120e59366bda2118 ("fsldma: fix
controller lockups"), its last (and only ever) user is gone.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 include/linux/dmaengine.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index d2c5cc7c583c..4eb2f82aed1d 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -37,7 +37,6 @@
  */
 typedef s32 dma_cookie_t;
 #define DMA_MIN_COOKIE	1
-#define DMA_MAX_COOKIE	INT_MAX
 
 static inline int dma_submit_error(dma_cookie_t cookie)
 {
-- 
cgit v1.2.3-59-g8ed1b


From 72c5839515260dce966cd24f54436e6583288e6c Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 24 Jul 2014 14:14:42 +0100
Subject: arm64: gicv3: Allow GICv3 compilation with older binutils

GICv3 introduces new system registers accessible with the full msr/mrs
syntax (e.g. mrs x0, Sop0_op1_CRm_CRn_op2). However, only recent
binutils understand the new syntax. This patch introduces msr_s/mrs_s
assembly macros which generate the equivalent instructions above and
converts the existing GICv3 code (both drivers/irqchip/ and
arch/arm64/kernel/).

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Reported-by: Olof Johansson <olof@lixom.net>
Tested-by: Olof Johansson <olof@lixom.net>
Suggested-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Jason Cooper <jason@lakedaemon.net>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/include/asm/sysreg.h    | 60 ++++++++++++++++++++++++++++++++++++++
 arch/arm64/kernel/head.S           |  6 ++--
 drivers/irqchip/irq-gic-v3.c       | 16 +++++-----
 include/linux/irqchip/arm-gic-v3.h | 42 +++++++++++++-------------
 4 files changed, 93 insertions(+), 31 deletions(-)
 create mode 100644 arch/arm64/include/asm/sysreg.h

(limited to 'include/linux')

diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
new file mode 100644
index 000000000000..5c89df0acbcb
--- /dev/null
+++ b/arch/arm64/include/asm/sysreg.h
@@ -0,0 +1,60 @@
+/*
+ * Macros for accessing system registers with older binutils.
+ *
+ * Copyright (C) 2014 ARM Ltd.
+ * Author: Catalin Marinas <catalin.marinas@arm.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ASM_SYSREG_H
+#define __ASM_SYSREG_H
+
+#define sys_reg(op0, op1, crn, crm, op2) \
+	((((op0)-2)<<19)|((op1)<<16)|((crn)<<12)|((crm)<<8)|((op2)<<5))
+
+#ifdef __ASSEMBLY__
+
+	.irp	num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
+	.equ	__reg_num_x\num, \num
+	.endr
+	.equ	__reg_num_xzr, 31
+
+	.macro	mrs_s, rt, sreg
+	.inst	0xd5300000|(\sreg)|(__reg_num_\rt)
+	.endm
+
+	.macro	msr_s, sreg, rt
+	.inst	0xd5100000|(\sreg)|(__reg_num_\rt)
+	.endm
+
+#else
+
+asm(
+"	.irp	num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n"
+"	.equ	__reg_num_x\\num, \\num\n"
+"	.endr\n"
+"	.equ	__reg_num_xzr, 31\n"
+"\n"
+"	.macro	mrs_s, rt, sreg\n"
+"	.inst	0xd5300000|(\\sreg)|(__reg_num_\\rt)\n"
+"	.endm\n"
+"\n"
+"	.macro	msr_s, sreg, rt\n"
+"	.inst	0xd5100000|(\\sreg)|(__reg_num_\\rt)\n"
+"	.endm\n"
+);
+
+#endif
+
+#endif	/* __ASM_SYSREG_H */
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index c99e3a879ebc..144f10567f82 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -297,12 +297,12 @@ CPU_LE(	bic	x0, x0, #(3 << 24)	)	// Clear the EE and E0E bits for EL1
 	cmp	x0, #1
 	b.ne	3f
 
-	mrs	x0, ICC_SRE_EL2
+	mrs_s	x0, ICC_SRE_EL2
 	orr	x0, x0, #ICC_SRE_EL2_SRE	// Set ICC_SRE_EL2.SRE==1
 	orr	x0, x0, #ICC_SRE_EL2_ENABLE	// Set ICC_SRE_EL2.Enable==1
-	msr	ICC_SRE_EL2, x0
+	msr_s	ICC_SRE_EL2, x0
 	isb					// Make sure SRE is now set
-	msr	ICH_HCR_EL2, xzr		// Reset ICC_HCR_EL2 to defaults
+	msr_s	ICH_HCR_EL2, xzr		// Reset ICC_HCR_EL2 to defaults
 
 3:
 #endif
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 81519bae0453..57eaa5a0b1e3 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -108,39 +108,39 @@ static u64 gic_read_iar(void)
 {
 	u64 irqstat;
 
-	asm volatile("mrs %0, " __stringify(ICC_IAR1_EL1) : "=r" (irqstat));
+	asm volatile("mrs_s %0, " __stringify(ICC_IAR1_EL1) : "=r" (irqstat));
 	return irqstat;
 }
 
 static void gic_write_pmr(u64 val)
 {
-	asm volatile("msr " __stringify(ICC_PMR_EL1) ", %0" : : "r" (val));
+	asm volatile("msr_s " __stringify(ICC_PMR_EL1) ", %0" : : "r" (val));
 }
 
 static void gic_write_ctlr(u64 val)
 {
-	asm volatile("msr " __stringify(ICC_CTLR_EL1) ", %0" : : "r" (val));
+	asm volatile("msr_s " __stringify(ICC_CTLR_EL1) ", %0" : : "r" (val));
 	isb();
 }
 
 static void gic_write_grpen1(u64 val)
 {
-	asm volatile("msr " __stringify(ICC_GRPEN1_EL1) ", %0" : : "r" (val));
+	asm volatile("msr_s " __stringify(ICC_GRPEN1_EL1) ", %0" : : "r" (val));
 	isb();
 }
 
 static void gic_write_sgi1r(u64 val)
 {
-	asm volatile("msr " __stringify(ICC_SGI1R_EL1) ", %0" : : "r" (val));
+	asm volatile("msr_s " __stringify(ICC_SGI1R_EL1) ", %0" : : "r" (val));
 }
 
 static void gic_enable_sre(void)
 {
 	u64 val;
 
-	asm volatile("mrs %0, " __stringify(ICC_SRE_EL1) : "=r" (val));
+	asm volatile("mrs_s %0, " __stringify(ICC_SRE_EL1) : "=r" (val));
 	val |= ICC_SRE_EL1_SRE;
-	asm volatile("msr " __stringify(ICC_SRE_EL1) ", %0" : : "r" (val));
+	asm volatile("msr_s " __stringify(ICC_SRE_EL1) ", %0" : : "r" (val));
 	isb();
 
 	/*
@@ -150,7 +150,7 @@ static void gic_enable_sre(void)
 	 *
 	 * Kindly inform the luser.
 	 */
-	asm volatile("mrs %0, " __stringify(ICC_SRE_EL1) : "=r" (val));
+	asm volatile("mrs_s %0, " __stringify(ICC_SRE_EL1) : "=r" (val));
 	if (!(val & ICC_SRE_EL1_SRE))
 		pr_err("GIC: unable to set SRE (disabled at EL2), panic ahead\n");
 }
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 30cb7556d43f..03a4ea37ba86 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -18,6 +18,8 @@
 #ifndef __LINUX_IRQCHIP_ARM_GIC_V3_H
 #define __LINUX_IRQCHIP_ARM_GIC_V3_H
 
+#include <asm/sysreg.h>
+
 /*
  * Distributor registers. We assume we're running non-secure, with ARE
  * being set. Secure-only and non-ARE registers are not described.
@@ -125,17 +127,17 @@
 #define ICH_VMCR_PMR_SHIFT		24
 #define ICH_VMCR_PMR_MASK		(0xffUL << ICH_VMCR_PMR_SHIFT)
 
-#define ICC_EOIR1_EL1			S3_0_C12_C12_1
-#define ICC_IAR1_EL1			S3_0_C12_C12_0
-#define ICC_SGI1R_EL1			S3_0_C12_C11_5
-#define ICC_PMR_EL1			S3_0_C4_C6_0
-#define ICC_CTLR_EL1			S3_0_C12_C12_4
-#define ICC_SRE_EL1			S3_0_C12_C12_5
-#define ICC_GRPEN1_EL1			S3_0_C12_C12_7
+#define ICC_EOIR1_EL1			sys_reg(3, 0, 12, 12, 1)
+#define ICC_IAR1_EL1			sys_reg(3, 0, 12, 12, 0)
+#define ICC_SGI1R_EL1			sys_reg(3, 0, 12, 11, 5)
+#define ICC_PMR_EL1			sys_reg(3, 0, 4, 6, 0)
+#define ICC_CTLR_EL1			sys_reg(3, 0, 12, 12, 4)
+#define ICC_SRE_EL1			sys_reg(3, 0, 12, 12, 5)
+#define ICC_GRPEN1_EL1			sys_reg(3, 0, 12, 12, 7)
 
 #define ICC_IAR1_EL1_SPURIOUS		0x3ff
 
-#define ICC_SRE_EL2			S3_4_C12_C9_5
+#define ICC_SRE_EL2			sys_reg(3, 4, 12, 9, 5)
 
 #define ICC_SRE_EL2_SRE			(1 << 0)
 #define ICC_SRE_EL2_ENABLE		(1 << 3)
@@ -143,16 +145,16 @@
 /*
  * System register definitions
  */
-#define ICH_VSEIR_EL2			S3_4_C12_C9_4
-#define ICH_HCR_EL2			S3_4_C12_C11_0
-#define ICH_VTR_EL2			S3_4_C12_C11_1
-#define ICH_MISR_EL2			S3_4_C12_C11_2
-#define ICH_EISR_EL2			S3_4_C12_C11_3
-#define ICH_ELSR_EL2			S3_4_C12_C11_5
-#define ICH_VMCR_EL2			S3_4_C12_C11_7
+#define ICH_VSEIR_EL2			sys_reg(3, 4, 12, 9, 4)
+#define ICH_HCR_EL2			sys_reg(3, 4, 12, 11, 0)
+#define ICH_VTR_EL2			sys_reg(3, 4, 12, 11, 1)
+#define ICH_MISR_EL2			sys_reg(3, 4, 12, 11, 2)
+#define ICH_EISR_EL2			sys_reg(3, 4, 12, 11, 3)
+#define ICH_ELSR_EL2			sys_reg(3, 4, 12, 11, 5)
+#define ICH_VMCR_EL2			sys_reg(3, 4, 12, 11, 7)
 
-#define __LR0_EL2(x)			S3_4_C12_C12_ ## x
-#define __LR8_EL2(x)			S3_4_C12_C13_ ## x
+#define __LR0_EL2(x)			sys_reg(3, 4, 12, 12, x)
+#define __LR8_EL2(x)			sys_reg(3, 4, 12, 13, x)
 
 #define ICH_LR0_EL2			__LR0_EL2(0)
 #define ICH_LR1_EL2			__LR0_EL2(1)
@@ -171,13 +173,13 @@
 #define ICH_LR14_EL2			__LR8_EL2(6)
 #define ICH_LR15_EL2			__LR8_EL2(7)
 
-#define __AP0Rx_EL2(x)			S3_4_C12_C8_ ## x
+#define __AP0Rx_EL2(x)			sys_reg(3, 4, 12, 8, x)
 #define ICH_AP0R0_EL2			__AP0Rx_EL2(0)
 #define ICH_AP0R1_EL2			__AP0Rx_EL2(1)
 #define ICH_AP0R2_EL2			__AP0Rx_EL2(2)
 #define ICH_AP0R3_EL2			__AP0Rx_EL2(3)
 
-#define __AP1Rx_EL2(x)			S3_4_C12_C9_ ## x
+#define __AP1Rx_EL2(x)			sys_reg(3, 4, 12, 9, x)
 #define ICH_AP1R0_EL2			__AP1Rx_EL2(0)
 #define ICH_AP1R1_EL2			__AP1Rx_EL2(1)
 #define ICH_AP1R2_EL2			__AP1Rx_EL2(2)
@@ -189,7 +191,7 @@
 
 static inline void gic_write_eoir(u64 irq)
 {
-	asm volatile("msr " __stringify(ICC_EOIR1_EL1) ", %0" : : "r" (irq));
+	asm volatile("msr_s " __stringify(ICC_EOIR1_EL1) ", %0" : : "r" (irq));
 	isb();
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From a259f3896a39ec7cbcd5f630a6ec95bdcbc080d2 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier.martinez@collabora.co.uk>
Date: Thu, 24 Jul 2014 14:39:24 +0200
Subject: mfd: max77686: Add Maxim 77802 PMIC support

Maxim MAX77802 is a power management chip that contains 10 high
efficiency Buck regulators, 32 Low-dropout (LDO) regulators used
to power up application processors and peripherals, a 2-channel
32kHz clock outputs, a Real-Time-Clock (RTC) and a I2C interface
to program the individual regulators, clocks outputs and the RTC.

This patch adds support for MAX77802 to the MAX77686 driver and is
based on a driver added to the Chrome OS kernel 3.8 by Simon Glass.

Signed-off-by: Javier Martinez Canillas <javier.martinez@collabora.co.uk>
Reviewed-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/Kconfig                  |   6 +-
 drivers/mfd/max77686.c               | 197 ++++++++++++++++++++++++++++-----
 include/linux/mfd/max77686-private.h | 208 ++++++++++++++++++++++++++++++++++-
 include/linux/mfd/max77686.h         |  57 +++++++++-
 4 files changed, 436 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 30102042dcaf..de5abf244746 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -380,15 +380,15 @@ config MFD_MAX14577
 	  of the device.
 
 config MFD_MAX77686
-	bool "Maxim Semiconductor MAX77686 PMIC Support"
+	bool "Maxim Semiconductor MAX77686/802 PMIC Support"
 	depends on I2C=y
 	select MFD_CORE
 	select REGMAP_I2C
 	select REGMAP_IRQ
 	select IRQ_DOMAIN
 	help
-	  Say yes here to add support for Maxim Semiconductor MAX77686.
-	  This is a Power Management IC with RTC on chip.
+	  Say yes here to add support for Maxim Semiconductor MAX77686 and
+	  MAX77802 which are Power Management IC with an RTC on chip.
 	  This driver provides common support for accessing the device;
 	  additional drivers must be enabled in order to use the functionality
 	  of the device.
diff --git a/drivers/mfd/max77686.c b/drivers/mfd/max77686.c
index f2bd69915987..c65332291bb4 100644
--- a/drivers/mfd/max77686.c
+++ b/drivers/mfd/max77686.c
@@ -1,5 +1,5 @@
 /*
- * max77686.c - mfd core driver for the Maxim 77686
+ * max77686.c - mfd core driver for the Maxim 77686/802
  *
  * Copyright (C) 2012 Samsung Electronics
  * Chiwoong Byun <woong.byun@smasung.com>
@@ -43,6 +43,74 @@ static const struct mfd_cell max77686_devs[] = {
 	{ .name = "max77686-clk", },
 };
 
+static const struct mfd_cell max77802_devs[] = {
+	{ .name = "max77802-pmic", },
+	{ .name = "max77802-clk", },
+	{ .name = "max77802-rtc", },
+};
+
+static bool max77802_pmic_is_accessible_reg(struct device *dev,
+					    unsigned int reg)
+{
+	return (reg >= MAX77802_REG_DEVICE_ID && reg < MAX77802_REG_PMIC_END);
+}
+
+static bool max77802_rtc_is_accessible_reg(struct device *dev,
+					   unsigned int reg)
+{
+	return (reg >= MAX77802_RTC_INT && reg < MAX77802_RTC_END);
+}
+
+static bool max77802_is_accessible_reg(struct device *dev, unsigned int reg)
+{
+	return (max77802_pmic_is_accessible_reg(dev, reg) ||
+		max77802_rtc_is_accessible_reg(dev, reg));
+}
+
+static bool max77802_pmic_is_precious_reg(struct device *dev, unsigned int reg)
+{
+	return (reg == MAX77802_REG_INTSRC || reg == MAX77802_REG_INT1 ||
+		reg == MAX77802_REG_INT2);
+}
+
+static bool max77802_rtc_is_precious_reg(struct device *dev, unsigned int reg)
+{
+	return (reg == MAX77802_RTC_INT ||
+		reg == MAX77802_RTC_UPDATE0 ||
+		reg == MAX77802_RTC_UPDATE1);
+}
+
+static bool max77802_is_precious_reg(struct device *dev, unsigned int reg)
+{
+	return (max77802_pmic_is_precious_reg(dev, reg) ||
+		max77802_rtc_is_precious_reg(dev, reg));
+}
+
+static bool max77802_pmic_is_volatile_reg(struct device *dev, unsigned int reg)
+{
+	return (max77802_is_precious_reg(dev, reg) ||
+		reg == MAX77802_REG_STATUS1 || reg == MAX77802_REG_STATUS2 ||
+		reg == MAX77802_REG_PWRON);
+}
+
+static bool max77802_rtc_is_volatile_reg(struct device *dev, unsigned int reg)
+{
+	return (max77802_rtc_is_precious_reg(dev, reg) ||
+		reg == MAX77802_RTC_SEC ||
+		reg == MAX77802_RTC_MIN ||
+		reg == MAX77802_RTC_HOUR ||
+		reg == MAX77802_RTC_WEEKDAY ||
+		reg == MAX77802_RTC_MONTH ||
+		reg == MAX77802_RTC_YEAR ||
+		reg == MAX77802_RTC_DATE);
+}
+
+static bool max77802_is_volatile_reg(struct device *dev, unsigned int reg)
+{
+	return (max77802_pmic_is_volatile_reg(dev, reg) ||
+		max77802_rtc_is_volatile_reg(dev, reg));
+}
+
 static struct regmap_config max77686_regmap_config = {
 	.reg_bits = 8,
 	.val_bits = 8,
@@ -53,6 +121,17 @@ static struct regmap_config max77686_rtc_regmap_config = {
 	.val_bits = 8,
 };
 
+static struct regmap_config max77802_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.writeable_reg = max77802_is_accessible_reg,
+	.readable_reg = max77802_is_accessible_reg,
+	.precious_reg = max77802_is_precious_reg,
+	.volatile_reg = max77802_is_volatile_reg,
+	.name = "max77802-pmic",
+	.cache_type = REGCACHE_RBTREE,
+};
+
 static const struct regmap_irq max77686_irqs[] = {
 	/* INT1 interrupts */
 	{ .reg_offset = 0, .mask = MAX77686_INT1_PWRONF_MSK, },
@@ -96,9 +175,34 @@ static const struct regmap_irq_chip max77686_rtc_irq_chip = {
 	.num_irqs		= ARRAY_SIZE(max77686_rtc_irqs),
 };
 
+static const struct regmap_irq_chip max77802_irq_chip = {
+	.name			= "max77802-pmic",
+	.status_base		= MAX77802_REG_INT1,
+	.mask_base		= MAX77802_REG_INT1MSK,
+	.num_regs		= 2,
+	.irqs			= max77686_irqs, /* same masks as 77686 */
+	.num_irqs		= ARRAY_SIZE(max77686_irqs),
+};
+
+static const struct regmap_irq_chip max77802_rtc_irq_chip = {
+	.name			= "max77802-rtc",
+	.status_base		= MAX77802_RTC_INT,
+	.mask_base		= MAX77802_RTC_INTM,
+	.num_regs		= 1,
+	.irqs			= max77686_rtc_irqs, /* same masks as 77686 */
+	.num_irqs		= ARRAY_SIZE(max77686_rtc_irqs),
+};
+
 static const struct of_device_id max77686_pmic_dt_match[] = {
-	{.compatible = "maxim,max77686", .data = NULL},
-	{},
+	{
+		.compatible = "maxim,max77686",
+		.data = (void *)TYPE_MAX77686,
+	},
+	{
+		.compatible = "maxim,max77802",
+		.data = (void *)TYPE_MAX77802,
+	},
+	{ },
 };
 
 static struct max77686_platform_data *max77686_i2c_parse_dt_pdata(struct device
@@ -119,8 +223,15 @@ static int max77686_i2c_probe(struct i2c_client *i2c,
 {
 	struct max77686_dev *max77686 = NULL;
 	struct max77686_platform_data *pdata = dev_get_platdata(&i2c->dev);
+	const struct of_device_id *match;
 	unsigned int data;
 	int ret = 0;
+	const struct regmap_config *config;
+	const struct regmap_irq_chip *irq_chip;
+	const struct regmap_irq_chip *rtc_irq_chip;
+	struct regmap **rtc_regmap;
+	const struct mfd_cell *cells;
+	int n_devs;
 
 	if (IS_ENABLED(CONFIG_OF) && i2c->dev.of_node && !pdata)
 		pdata = max77686_i2c_parse_dt_pdata(&i2c->dev);
@@ -135,15 +246,40 @@ static int max77686_i2c_probe(struct i2c_client *i2c,
 	if (!max77686)
 		return -ENOMEM;
 
+	if (i2c->dev.of_node) {
+		match = of_match_node(max77686_pmic_dt_match, i2c->dev.of_node);
+		if (!match)
+			return -EINVAL;
+
+		max77686->type = (int)match->data;
+	} else {
+		max77686->type = id->driver_data;
+	}
+
 	i2c_set_clientdata(i2c, max77686);
 	max77686->dev = &i2c->dev;
 	max77686->i2c = i2c;
-	max77686->type = id->driver_data;
 
 	max77686->wakeup = pdata->wakeup;
 	max77686->irq = i2c->irq;
 
-	max77686->regmap = devm_regmap_init_i2c(i2c, &max77686_regmap_config);
+	if (max77686->type == TYPE_MAX77686) {
+		config = &max77686_regmap_config;
+		irq_chip = &max77686_irq_chip;
+		rtc_irq_chip = &max77686_rtc_irq_chip;
+		rtc_regmap = &max77686->rtc_regmap;
+		cells =  max77686_devs;
+		n_devs = ARRAY_SIZE(max77686_devs);
+	} else {
+		config = &max77802_regmap_config;
+		irq_chip = &max77802_irq_chip;
+		rtc_irq_chip = &max77802_rtc_irq_chip;
+		rtc_regmap = &max77686->regmap;
+		cells =  max77802_devs;
+		n_devs = ARRAY_SIZE(max77802_devs);
+	}
+
+	max77686->regmap = devm_regmap_init_i2c(i2c, config);
 	if (IS_ERR(max77686->regmap)) {
 		ret = PTR_ERR(max77686->regmap);
 		dev_err(max77686->dev, "Failed to allocate register map: %d\n",
@@ -158,41 +294,46 @@ static int max77686_i2c_probe(struct i2c_client *i2c,
 		return -ENODEV;
 	}
 
-	max77686->rtc = i2c_new_dummy(i2c->adapter, I2C_ADDR_RTC);
-	if (!max77686->rtc) {
-		dev_err(max77686->dev, "Failed to allocate I2C device for RTC\n");
-		return -ENODEV;
-	}
-	i2c_set_clientdata(max77686->rtc, max77686);
-
-	max77686->rtc_regmap = devm_regmap_init_i2c(max77686->rtc,
-						&max77686_rtc_regmap_config);
-	if (IS_ERR(max77686->rtc_regmap)) {
-		ret = PTR_ERR(max77686->rtc_regmap);
-		dev_err(max77686->dev, "failed to allocate RTC regmap: %d\n",
-			ret);
-		goto err_unregister_i2c;
+	if (max77686->type == TYPE_MAX77686) {
+		max77686->rtc = i2c_new_dummy(i2c->adapter, I2C_ADDR_RTC);
+		if (!max77686->rtc) {
+			dev_err(max77686->dev,
+				"Failed to allocate I2C device for RTC\n");
+			return -ENODEV;
+		}
+		i2c_set_clientdata(max77686->rtc, max77686);
+
+		max77686->rtc_regmap =
+			devm_regmap_init_i2c(max77686->rtc,
+					     &max77686_rtc_regmap_config);
+		if (IS_ERR(max77686->rtc_regmap)) {
+			ret = PTR_ERR(max77686->rtc_regmap);
+			dev_err(max77686->dev,
+				"failed to allocate RTC regmap: %d\n",
+				ret);
+			goto err_unregister_i2c;
+		}
 	}
 
 	ret = regmap_add_irq_chip(max77686->regmap, max77686->irq,
 				  IRQF_TRIGGER_FALLING | IRQF_ONESHOT |
-				  IRQF_SHARED, 0, &max77686_irq_chip,
+				  IRQF_SHARED, 0, irq_chip,
 				  &max77686->irq_data);
 	if (ret) {
 		dev_err(&i2c->dev, "failed to add PMIC irq chip: %d\n", ret);
 		goto err_unregister_i2c;
 	}
-	ret = regmap_add_irq_chip(max77686->rtc_regmap, max77686->irq,
+
+	ret = regmap_add_irq_chip(*rtc_regmap, max77686->irq,
 				  IRQF_TRIGGER_FALLING | IRQF_ONESHOT |
-				  IRQF_SHARED, 0, &max77686_rtc_irq_chip,
+				  IRQF_SHARED, 0, rtc_irq_chip,
 				  &max77686->rtc_irq_data);
 	if (ret) {
 		dev_err(&i2c->dev, "failed to add RTC irq chip: %d\n", ret);
 		goto err_del_irqc;
 	}
 
-	ret = mfd_add_devices(max77686->dev, -1, max77686_devs,
-			      ARRAY_SIZE(max77686_devs), NULL, 0, NULL);
+	ret = mfd_add_devices(max77686->dev, -1, cells, n_devs, NULL, 0, NULL);
 	if (ret < 0) {
 		dev_err(&i2c->dev, "failed to add MFD devices: %d\n", ret);
 		goto err_del_rtc_irqc;
@@ -205,7 +346,8 @@ err_del_rtc_irqc:
 err_del_irqc:
 	regmap_del_irq_chip(max77686->irq, max77686->irq_data);
 err_unregister_i2c:
-	i2c_unregister_device(max77686->rtc);
+	if (max77686->type == TYPE_MAX77686)
+		i2c_unregister_device(max77686->rtc);
 
 	return ret;
 }
@@ -219,7 +361,8 @@ static int max77686_i2c_remove(struct i2c_client *i2c)
 	regmap_del_irq_chip(max77686->irq, max77686->rtc_irq_data);
 	regmap_del_irq_chip(max77686->irq, max77686->irq_data);
 
-	i2c_unregister_device(max77686->rtc);
+	if (max77686->type == TYPE_MAX77686)
+		i2c_unregister_device(max77686->rtc);
 
 	return 0;
 }
@@ -294,6 +437,6 @@ static void __exit max77686_i2c_exit(void)
 }
 module_exit(max77686_i2c_exit);
 
-MODULE_DESCRIPTION("MAXIM 77686 multi-function core driver");
+MODULE_DESCRIPTION("MAXIM 77686/802 multi-function core driver");
 MODULE_AUTHOR("Chiwoong Byun <woong.byun@samsung.com>");
 MODULE_LICENSE("GPL");
diff --git a/include/linux/mfd/max77686-private.h b/include/linux/mfd/max77686-private.h
index 8e177806cba1..0d60b38e5b5c 100644
--- a/include/linux/mfd/max77686-private.h
+++ b/include/linux/mfd/max77686-private.h
@@ -1,5 +1,5 @@
 /*
- * max77686-private.h - Voltage regulator driver for the Maxim 77686
+ * max77686-private.h - Voltage regulator driver for the Maxim 77686/802
  *
  *  Copyright (C) 2012 Samsung Electrnoics
  *  Chiwoong Byun <woong.byun@samsung.com>
@@ -28,6 +28,7 @@
 
 #define MAX77686_REG_INVALID		(0xff)
 
+/* MAX77686 PMIC registers */
 enum max77686_pmic_reg {
 	MAX77686_REG_DEVICE_ID		= 0x00,
 	MAX77686_REG_INTSRC		= 0x01,
@@ -181,6 +182,210 @@ enum max77686_rtc_reg {
 	MAX77686_ALARM2_DATE		= 0x1B,
 };
 
+/* MAX77802 PMIC registers */
+enum max77802_pmic_reg {
+	MAX77802_REG_DEVICE_ID		= 0x00,
+	MAX77802_REG_INTSRC		= 0x01,
+	MAX77802_REG_INT1		= 0x02,
+	MAX77802_REG_INT2		= 0x03,
+
+	MAX77802_REG_INT1MSK		= 0x04,
+	MAX77802_REG_INT2MSK		= 0x05,
+
+	MAX77802_REG_STATUS1		= 0x06,
+	MAX77802_REG_STATUS2		= 0x07,
+
+	MAX77802_REG_PWRON		= 0x08,
+	/* Reserved: 0x09 */
+	MAX77802_REG_MRSTB		= 0x0A,
+	MAX77802_REG_EPWRHOLD		= 0x0B,
+	/* Reserved: 0x0C-0x0D */
+	MAX77802_REG_BOOSTCTRL		= 0x0E,
+	MAX77802_REG_BOOSTOUT		= 0x0F,
+
+	MAX77802_REG_BUCK1CTRL		= 0x10,
+	MAX77802_REG_BUCK1DVS1		= 0x11,
+	MAX77802_REG_BUCK1DVS2		= 0x12,
+	MAX77802_REG_BUCK1DVS3		= 0x13,
+	MAX77802_REG_BUCK1DVS4		= 0x14,
+	MAX77802_REG_BUCK1DVS5		= 0x15,
+	MAX77802_REG_BUCK1DVS6		= 0x16,
+	MAX77802_REG_BUCK1DVS7		= 0x17,
+	MAX77802_REG_BUCK1DVS8		= 0x18,
+	/* Reserved: 0x19 */
+	MAX77802_REG_BUCK2CTRL1		= 0x1A,
+	MAX77802_REG_BUCK2CTRL2		= 0x1B,
+	MAX77802_REG_BUCK2PHTRAN	= 0x1C,
+	MAX77802_REG_BUCK2DVS1		= 0x1D,
+	MAX77802_REG_BUCK2DVS2		= 0x1E,
+	MAX77802_REG_BUCK2DVS3		= 0x1F,
+	MAX77802_REG_BUCK2DVS4		= 0x20,
+	MAX77802_REG_BUCK2DVS5		= 0x21,
+	MAX77802_REG_BUCK2DVS6		= 0x22,
+	MAX77802_REG_BUCK2DVS7		= 0x23,
+	MAX77802_REG_BUCK2DVS8		= 0x24,
+	/* Reserved: 0x25-0x26 */
+	MAX77802_REG_BUCK3CTRL1		= 0x27,
+	MAX77802_REG_BUCK3DVS1		= 0x28,
+	MAX77802_REG_BUCK3DVS2		= 0x29,
+	MAX77802_REG_BUCK3DVS3		= 0x2A,
+	MAX77802_REG_BUCK3DVS4		= 0x2B,
+	MAX77802_REG_BUCK3DVS5		= 0x2C,
+	MAX77802_REG_BUCK3DVS6		= 0x2D,
+	MAX77802_REG_BUCK3DVS7		= 0x2E,
+	MAX77802_REG_BUCK3DVS8		= 0x2F,
+	/* Reserved: 0x30-0x36 */
+	MAX77802_REG_BUCK4CTRL1		= 0x37,
+	MAX77802_REG_BUCK4DVS1		= 0x38,
+	MAX77802_REG_BUCK4DVS2		= 0x39,
+	MAX77802_REG_BUCK4DVS3		= 0x3A,
+	MAX77802_REG_BUCK4DVS4		= 0x3B,
+	MAX77802_REG_BUCK4DVS5		= 0x3C,
+	MAX77802_REG_BUCK4DVS6		= 0x3D,
+	MAX77802_REG_BUCK4DVS7		= 0x3E,
+	MAX77802_REG_BUCK4DVS8		= 0x3F,
+	/* Reserved: 0x40 */
+	MAX77802_REG_BUCK5CTRL		= 0x41,
+	MAX77802_REG_BUCK5OUT		= 0x42,
+	/* Reserved: 0x43 */
+	MAX77802_REG_BUCK6CTRL		= 0x44,
+	MAX77802_REG_BUCK6DVS1		= 0x45,
+	MAX77802_REG_BUCK6DVS2		= 0x46,
+	MAX77802_REG_BUCK6DVS3		= 0x47,
+	MAX77802_REG_BUCK6DVS4		= 0x48,
+	MAX77802_REG_BUCK6DVS5		= 0x49,
+	MAX77802_REG_BUCK6DVS6		= 0x4A,
+	MAX77802_REG_BUCK6DVS7		= 0x4B,
+	MAX77802_REG_BUCK6DVS8		= 0x4C,
+	/* Reserved: 0x4D */
+	MAX77802_REG_BUCK7CTRL		= 0x4E,
+	MAX77802_REG_BUCK7OUT		= 0x4F,
+	/* Reserved: 0x50 */
+	MAX77802_REG_BUCK8CTRL		= 0x51,
+	MAX77802_REG_BUCK8OUT		= 0x52,
+	/* Reserved: 0x53 */
+	MAX77802_REG_BUCK9CTRL		= 0x54,
+	MAX77802_REG_BUCK9OUT		= 0x55,
+	/* Reserved: 0x56 */
+	MAX77802_REG_BUCK10CTRL		= 0x57,
+	MAX77802_REG_BUCK10OUT		= 0x58,
+
+	/* Reserved: 0x59-0x5F */
+
+	MAX77802_REG_LDO1CTRL1		= 0x60,
+	MAX77802_REG_LDO2CTRL1		= 0x61,
+	MAX77802_REG_LDO3CTRL1		= 0x62,
+	MAX77802_REG_LDO4CTRL1		= 0x63,
+	MAX77802_REG_LDO5CTRL1		= 0x64,
+	MAX77802_REG_LDO6CTRL1		= 0x65,
+	MAX77802_REG_LDO7CTRL1		= 0x66,
+	MAX77802_REG_LDO8CTRL1		= 0x67,
+	MAX77802_REG_LDO9CTRL1		= 0x68,
+	MAX77802_REG_LDO10CTRL1		= 0x69,
+	MAX77802_REG_LDO11CTRL1		= 0x6A,
+	MAX77802_REG_LDO12CTRL1		= 0x6B,
+	MAX77802_REG_LDO13CTRL1		= 0x6C,
+	MAX77802_REG_LDO14CTRL1		= 0x6D,
+	MAX77802_REG_LDO15CTRL1		= 0x6E,
+	/* Reserved: 0x6F */
+	MAX77802_REG_LDO17CTRL1		= 0x70,
+	MAX77802_REG_LDO18CTRL1		= 0x71,
+	MAX77802_REG_LDO19CTRL1		= 0x72,
+	MAX77802_REG_LDO20CTRL1		= 0x73,
+	MAX77802_REG_LDO21CTRL1		= 0x74,
+	MAX77802_REG_LDO22CTRL1		= 0x75,
+	MAX77802_REG_LDO23CTRL1		= 0x76,
+	MAX77802_REG_LDO24CTRL1		= 0x77,
+	MAX77802_REG_LDO25CTRL1		= 0x78,
+	MAX77802_REG_LDO26CTRL1		= 0x79,
+	MAX77802_REG_LDO27CTRL1		= 0x7A,
+	MAX77802_REG_LDO28CTRL1		= 0x7B,
+	MAX77802_REG_LDO29CTRL1		= 0x7C,
+	MAX77802_REG_LDO30CTRL1		= 0x7D,
+	/* Reserved: 0x7E */
+	MAX77802_REG_LDO32CTRL1		= 0x7F,
+	MAX77802_REG_LDO33CTRL1		= 0x80,
+	MAX77802_REG_LDO34CTRL1		= 0x81,
+	MAX77802_REG_LDO35CTRL1		= 0x82,
+	/* Reserved: 0x83-0x8F */
+	MAX77802_REG_LDO1CTRL2		= 0x90,
+	MAX77802_REG_LDO2CTRL2		= 0x91,
+	MAX77802_REG_LDO3CTRL2		= 0x92,
+	MAX77802_REG_LDO4CTRL2		= 0x93,
+	MAX77802_REG_LDO5CTRL2		= 0x94,
+	MAX77802_REG_LDO6CTRL2		= 0x95,
+	MAX77802_REG_LDO7CTRL2		= 0x96,
+	MAX77802_REG_LDO8CTRL2		= 0x97,
+	MAX77802_REG_LDO9CTRL2		= 0x98,
+	MAX77802_REG_LDO10CTRL2		= 0x99,
+	MAX77802_REG_LDO11CTRL2		= 0x9A,
+	MAX77802_REG_LDO12CTRL2		= 0x9B,
+	MAX77802_REG_LDO13CTRL2		= 0x9C,
+	MAX77802_REG_LDO14CTRL2		= 0x9D,
+	MAX77802_REG_LDO15CTRL2		= 0x9E,
+	/* Reserved: 0x9F */
+	MAX77802_REG_LDO17CTRL2		= 0xA0,
+	MAX77802_REG_LDO18CTRL2		= 0xA1,
+	MAX77802_REG_LDO19CTRL2		= 0xA2,
+	MAX77802_REG_LDO20CTRL2		= 0xA3,
+	MAX77802_REG_LDO21CTRL2		= 0xA4,
+	MAX77802_REG_LDO22CTRL2		= 0xA5,
+	MAX77802_REG_LDO23CTRL2		= 0xA6,
+	MAX77802_REG_LDO24CTRL2		= 0xA7,
+	MAX77802_REG_LDO25CTRL2		= 0xA8,
+	MAX77802_REG_LDO26CTRL2		= 0xA9,
+	MAX77802_REG_LDO27CTRL2		= 0xAA,
+	MAX77802_REG_LDO28CTRL2		= 0xAB,
+	MAX77802_REG_LDO29CTRL2		= 0xAC,
+	MAX77802_REG_LDO30CTRL2		= 0xAD,
+	/* Reserved: 0xAE */
+	MAX77802_REG_LDO32CTRL2		= 0xAF,
+	MAX77802_REG_LDO33CTRL2		= 0xB0,
+	MAX77802_REG_LDO34CTRL2		= 0xB1,
+	MAX77802_REG_LDO35CTRL2		= 0xB2,
+	/* Reserved: 0xB3 */
+
+	MAX77802_REG_BBAT_CHG		= 0xB4,
+	MAX77802_REG_32KHZ		= 0xB5,
+
+	MAX77802_REG_PMIC_END		= 0xB6,
+};
+
+enum max77802_rtc_reg {
+	MAX77802_RTC_INT		= 0xC0,
+	MAX77802_RTC_INTM		= 0xC1,
+	MAX77802_RTC_CONTROLM		= 0xC2,
+	MAX77802_RTC_CONTROL		= 0xC3,
+	MAX77802_RTC_UPDATE0		= 0xC4,
+	MAX77802_RTC_UPDATE1		= 0xC5,
+	MAX77802_WTSR_SMPL_CNTL		= 0xC6,
+	MAX77802_RTC_SEC		= 0xC7,
+	MAX77802_RTC_MIN		= 0xC8,
+	MAX77802_RTC_HOUR		= 0xC9,
+	MAX77802_RTC_WEEKDAY		= 0xCA,
+	MAX77802_RTC_MONTH		= 0xCB,
+	MAX77802_RTC_YEAR		= 0xCC,
+	MAX77802_RTC_DATE		= 0xCD,
+	MAX77802_RTC_AE1		= 0xCE,
+	MAX77802_ALARM1_SEC		= 0xCF,
+	MAX77802_ALARM1_MIN		= 0xD0,
+	MAX77802_ALARM1_HOUR		= 0xD1,
+	MAX77802_ALARM1_WEEKDAY		= 0xD2,
+	MAX77802_ALARM1_MONTH		= 0xD3,
+	MAX77802_ALARM1_YEAR		= 0xD4,
+	MAX77802_ALARM1_DATE		= 0xD5,
+	MAX77802_RTC_AE2		= 0xD6,
+	MAX77802_ALARM2_SEC		= 0xD7,
+	MAX77802_ALARM2_MIN		= 0xD8,
+	MAX77802_ALARM2_HOUR		= 0xD9,
+	MAX77802_ALARM2_WEEKDAY		= 0xDA,
+	MAX77802_ALARM2_MONTH		= 0xDB,
+	MAX77802_ALARM2_YEAR		= 0xDC,
+	MAX77802_ALARM2_DATE		= 0xDD,
+
+	MAX77802_RTC_END		= 0xDF,
+};
+
 enum max77686_irq_source {
 	PMIC_INT1 = 0,
 	PMIC_INT2,
@@ -250,6 +455,7 @@ struct max77686_dev {
 
 enum max77686_types {
 	TYPE_MAX77686,
+	TYPE_MAX77802,
 };
 
 extern int max77686_irq_init(struct max77686_dev *max77686);
diff --git a/include/linux/mfd/max77686.h b/include/linux/mfd/max77686.h
index 4cbcc13e8a2a..7e6dc4b2b795 100644
--- a/include/linux/mfd/max77686.h
+++ b/include/linux/mfd/max77686.h
@@ -1,5 +1,5 @@
 /*
- * max77686.h - Driver for the Maxim 77686
+ * max77686.h - Driver for the Maxim 77686/802
  *
  *  Copyright (C) 2012 Samsung Electrnoics
  *  Chiwoong Byun <woong.byun@samsung.com>
@@ -71,6 +71,54 @@ enum max77686_regulators {
 	MAX77686_REG_MAX,
 };
 
+/* MAX77802 regulator IDs */
+enum max77802_regulators {
+	MAX77802_BUCK1 = 0,
+	MAX77802_BUCK2,
+	MAX77802_BUCK3,
+	MAX77802_BUCK4,
+	MAX77802_BUCK5,
+	MAX77802_BUCK6,
+	MAX77802_BUCK7,
+	MAX77802_BUCK8,
+	MAX77802_BUCK9,
+	MAX77802_BUCK10,
+	MAX77802_LDO1,
+	MAX77802_LDO2,
+	MAX77802_LDO3,
+	MAX77802_LDO4,
+	MAX77802_LDO5,
+	MAX77802_LDO6,
+	MAX77802_LDO7,
+	MAX77802_LDO8,
+	MAX77802_LDO9,
+	MAX77802_LDO10,
+	MAX77802_LDO11,
+	MAX77802_LDO12,
+	MAX77802_LDO13,
+	MAX77802_LDO14,
+	MAX77802_LDO15,
+	MAX77802_LDO17,
+	MAX77802_LDO18,
+	MAX77802_LDO19,
+	MAX77802_LDO20,
+	MAX77802_LDO21,
+	MAX77802_LDO23,
+	MAX77802_LDO24,
+	MAX77802_LDO25,
+	MAX77802_LDO26,
+	MAX77802_LDO27,
+	MAX77802_LDO28,
+	MAX77802_LDO29,
+	MAX77802_LDO30,
+	MAX77802_LDO32,
+	MAX77802_LDO33,
+	MAX77802_LDO34,
+	MAX77802_LDO35,
+
+	MAX77802_REG_MAX,
+};
+
 struct max77686_regulator_data {
 	int id;
 	struct regulator_init_data *initdata;
@@ -83,6 +131,13 @@ enum max77686_opmode {
 	MAX77686_OPMODE_STANDBY,
 };
 
+enum max77802_opmode {
+	MAX77802_OPMODE_OFF,
+	MAX77802_OPMODE_STANDBY,
+	MAX77802_OPMODE_LP,
+	MAX77802_OPMODE_NORMAL,
+};
+
 struct max77686_opmode_data {
 	int id;
 	int mode;
-- 
cgit v1.2.3-59-g8ed1b


From ec8bd56699cb4371994437583a285b855b6f5e3a Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Thu, 24 Jul 2014 17:07:16 +0100
Subject: mfd: max77686: Ensure device type IDs are architecture agnostic
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Extinguishes:

../drivers/mfd/max77686.c: In function ‘max77686_i2c_probe’:
../drivers/mfd/max77686.c:254:20:
	warning: cast from pointer to integer of different size

Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/max77686.c               | 5 ++---
 include/linux/mfd/max77686-private.h | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/max77686.c b/drivers/mfd/max77686.c
index c65332291bb4..86e552348db4 100644
--- a/drivers/mfd/max77686.c
+++ b/drivers/mfd/max77686.c
@@ -251,10 +251,9 @@ static int max77686_i2c_probe(struct i2c_client *i2c,
 		if (!match)
 			return -EINVAL;
 
-		max77686->type = (int)match->data;
-	} else {
+		max77686->type = (unsigned long)match->data;
+	} else
 		max77686->type = id->driver_data;
-	}
 
 	i2c_set_clientdata(i2c, max77686);
 	max77686->dev = &i2c->dev;
diff --git a/include/linux/mfd/max77686-private.h b/include/linux/mfd/max77686-private.h
index 0d60b38e5b5c..960b92ad450d 100644
--- a/include/linux/mfd/max77686-private.h
+++ b/include/linux/mfd/max77686-private.h
@@ -439,7 +439,7 @@ struct max77686_dev {
 	struct i2c_client *i2c; /* 0xcc / PMIC, Battery Control, and FLASH */
 	struct i2c_client *rtc; /* slave addr 0x0c */
 
-	int type;
+	unsigned long type;
 
 	struct regmap *regmap;		/* regmap for mfd */
 	struct regmap *rtc_regmap;	/* regmap for rtc */
-- 
cgit v1.2.3-59-g8ed1b


From 8d7d3972a9ae962bbf8ce49c83f4a40082708f69 Mon Sep 17 00:00:00 2001
From: Tuomas Tynkkynen <ttynkkynen@nvidia.com>
Date: Mon, 21 Jul 2014 18:38:47 +0300
Subject: regmap: Add regmap_get_device

Add a new function regmap_get_device to obtain the underlying struct
device from a regmap.

Signed-off-by: Tuomas Tynkkynen <ttynkkynen@nvidia.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/base/regmap/regmap.c | 12 ++++++++++++
 include/linux/regmap.h       |  7 +++++++
 2 files changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 74d8c0672cf6..5ba0263fce21 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -1073,6 +1073,18 @@ struct regmap *dev_get_regmap(struct device *dev, const char *name)
 }
 EXPORT_SYMBOL_GPL(dev_get_regmap);
 
+/**
+ * regmap_get_device(): Obtain the device from a regmap
+ *
+ * @map: Register map to operate on.
+ *
+ * Returns the underlying device that the regmap has been created for.
+ */
+struct device *regmap_get_device(struct regmap *map)
+{
+	return map->dev;
+}
+
 static int _regmap_select_page(struct regmap *map, unsigned int *reg,
 			       struct regmap_range_node *range,
 			       unsigned int val_num)
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 7b0e4b425cdf..cd480fd59795 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -396,6 +396,7 @@ void regmap_exit(struct regmap *map);
 int regmap_reinit_cache(struct regmap *map,
 			const struct regmap_config *config);
 struct regmap *dev_get_regmap(struct device *dev, const char *name);
+struct device *regmap_get_device(struct regmap *map);
 int regmap_write(struct regmap *map, unsigned int reg, unsigned int val);
 int regmap_write_async(struct regmap *map, unsigned int reg, unsigned int val);
 int regmap_raw_write(struct regmap *map, unsigned int reg,
@@ -729,6 +730,12 @@ static inline struct regmap *dev_get_regmap(struct device *dev,
 	return NULL;
 }
 
+static inline struct device *regmap_get_device(struct regmap *map)
+{
+	WARN_ONCE(1, "regmap API is disabled");
+	return -EINVAL;
+}
+
 #endif
 
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 04eca28cde52cdf9eb91e127cc358ad79a8ec53b Mon Sep 17 00:00:00 2001
From: Tuomas Tynkkynen <ttynkkynen@nvidia.com>
Date: Mon, 21 Jul 2014 18:38:48 +0300
Subject: regulator: Add helpers for low-level register access

Add helper functions that allow regulator consumers to obtain low-level
details about the regulator hardware, like the voltage selector register
address and such. These details can be useful when configuring hardware
or firmware that want to do low-level access to regulators, with no
involvement from the kernel.

The use-case for Tegra is a voltage-controlled oscillator clocksource
which has control logic to change the supply voltage via I2C to achieve
a desired output clock rate.

Signed-off-by: Tuomas Tynkkynen <ttynkkynen@nvidia.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 Documentation/power/regulator/consumer.txt | 35 +++++++++++++++
 drivers/regulator/core.c                   | 71 ++++++++++++++++++++++++++++++
 include/linux/regulator/consumer.h         | 26 +++++++++++
 3 files changed, 132 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/power/regulator/consumer.txt b/Documentation/power/regulator/consumer.txt
index 55c4175d8099..81c0e2b49cd8 100644
--- a/Documentation/power/regulator/consumer.txt
+++ b/Documentation/power/regulator/consumer.txt
@@ -180,3 +180,38 @@ int regulator_unregister_notifier(struct regulator *regulator,
 
 Regulators use the kernel notifier framework to send event to their interested
 consumers.
+
+7. Regulator Direct Register Access
+===================================
+Some kinds of power management hardware or firmware are designed such that
+they need to do low-level hardware access to regulators, with no involvement
+from the kernel. Examples of such devices are:
+
+- clocksource with a voltage-controlled oscillator and control logic to change
+  the supply voltage over I2C to achieve a desired output clock rate
+- thermal management firmware that can issue an arbitrary I2C transaction to
+  perform system poweroff during overtemperature conditions
+
+To set up such a device/firmware, various parameters like I2C address of the
+regulator, addresses of various regulator registers etc. need to be configured
+to it. The regulator framework provides the following helpers for querying
+these details.
+
+Bus-specific details, like I2C addresses or transfer rates are handled by the
+regmap framework. To get the regulator's regmap (if supported), use :-
+
+struct regmap *regulator_get_regmap(struct regulator *regulator);
+
+To obtain the hardware register offset and bitmask for the regulator's voltage
+selector register, use :-
+
+int regulator_get_hardware_vsel_register(struct regulator *regulator,
+					 unsigned *vsel_reg,
+					 unsigned *vsel_mask);
+
+To convert a regulator framework voltage selector code (used by
+regulator_list_voltage) to a hardware-specific voltage selector that can be
+directly written to the voltage selector register, use :-
+
+int regulator_list_hardware_vsel(struct regulator *regulator,
+				 unsigned selector);
diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 4c1f999041dd..486b5908469e 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -2221,6 +2221,77 @@ int regulator_list_voltage(struct regulator *regulator, unsigned selector)
 }
 EXPORT_SYMBOL_GPL(regulator_list_voltage);
 
+/**
+ * regulator_get_regmap - get the regulator's register map
+ * @regulator: regulator source
+ *
+ * Returns the register map for the given regulator, or an ERR_PTR value
+ * if the regulator doesn't use regmap.
+ */
+struct regmap *regulator_get_regmap(struct regulator *regulator)
+{
+	struct regmap *map = regulator->rdev->regmap;
+
+	return map ? map : ERR_PTR(-EOPNOTSUPP);
+}
+
+/**
+ * regulator_get_hardware_vsel_register - get the HW voltage selector register
+ * @regulator: regulator source
+ * @vsel_reg: voltage selector register, output parameter
+ * @vsel_mask: mask for voltage selector bitfield, output parameter
+ *
+ * Returns the hardware register offset and bitmask used for setting the
+ * regulator voltage. This might be useful when configuring voltage-scaling
+ * hardware or firmware that can make I2C requests behind the kernel's back,
+ * for example.
+ *
+ * On success, the output parameters @vsel_reg and @vsel_mask are filled in
+ * and 0 is returned, otherwise a negative errno is returned.
+ */
+int regulator_get_hardware_vsel_register(struct regulator *regulator,
+					 unsigned *vsel_reg,
+					 unsigned *vsel_mask)
+{
+	struct regulator_dev	*rdev = regulator->rdev;
+	struct regulator_ops	*ops = rdev->desc->ops;
+
+	if (ops->set_voltage_sel != regulator_set_voltage_sel_regmap)
+		return -EOPNOTSUPP;
+
+	 *vsel_reg = rdev->desc->vsel_reg;
+	 *vsel_mask = rdev->desc->vsel_mask;
+
+	 return 0;
+}
+EXPORT_SYMBOL_GPL(regulator_get_hardware_vsel_register);
+
+/**
+ * regulator_list_hardware_vsel - get the HW-specific register value for a selector
+ * @regulator: regulator source
+ * @selector: identify voltage to list
+ *
+ * Converts the selector to a hardware-specific voltage selector that can be
+ * directly written to the regulator registers. The address of the voltage
+ * register can be determined by calling @regulator_get_hardware_vsel_register.
+ *
+ * On error a negative errno is returned.
+ */
+int regulator_list_hardware_vsel(struct regulator *regulator,
+				 unsigned selector)
+{
+	struct regulator_dev	*rdev = regulator->rdev;
+	struct regulator_ops	*ops = rdev->desc->ops;
+
+	if (selector >= rdev->desc->n_voltages)
+		return -EINVAL;
+	if (ops->set_voltage_sel != regulator_set_voltage_sel_regmap)
+		return -EOPNOTSUPP;
+
+	return selector;
+}
+EXPORT_SYMBOL_GPL(regulator_list_hardware_vsel);
+
 /**
  * regulator_get_linear_step - return the voltage step size between VSEL values
  * @regulator: regulator source
diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index a2d9d81038d1..0b1c8d09a6b1 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -37,6 +37,7 @@
 
 struct device;
 struct notifier_block;
+struct regmap;
 
 /*
  * Regulator operating modes.
@@ -215,6 +216,13 @@ int regulator_set_optimum_mode(struct regulator *regulator, int load_uA);
 
 int regulator_allow_bypass(struct regulator *regulator, bool allow);
 
+struct regmap *regulator_get_regmap(struct regulator *regulator);
+int regulator_get_hardware_vsel_register(struct regulator *regulator,
+					 unsigned *vsel_reg,
+					 unsigned *vsel_mask);
+int regulator_list_hardware_vsel(struct regulator *regulator,
+				 unsigned selector);
+
 /* regulator notifier block */
 int regulator_register_notifier(struct regulator *regulator,
 			      struct notifier_block *nb);
@@ -452,6 +460,24 @@ static inline int regulator_allow_bypass(struct regulator *regulator,
 	return 0;
 }
 
+struct regmap *regulator_get_regmap(struct regulator *regulator)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+int regulator_get_hardware_vsel_register(struct regulator *regulator,
+					 unsigned *vsel_reg,
+					 unsigned *vsel_mask)
+{
+	return -EOPNOTSUPP;
+}
+
+int regulator_list_hardware_vsel(struct regulator *regulator,
+				 unsigned selector)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline int regulator_register_notifier(struct regulator *regulator,
 			      struct notifier_block *nb)
 {
-- 
cgit v1.2.3-59-g8ed1b


From 1d33dc6b0f0fd1a1f65011f54165c558daf46638 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@linaro.org>
Date: Fri, 25 Jul 2014 19:01:53 +0100
Subject: regmap: Fix return code for stub regmap_get_device()

We return a pointer, not an int.

Signed-off-by: Mark Brown <broonie@linaro.org>
---
 include/linux/regmap.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index cd480fd59795..c5ed83f49c4e 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -733,7 +733,7 @@ static inline struct regmap *dev_get_regmap(struct device *dev,
 static inline struct device *regmap_get_device(struct regmap *map)
 {
 	WARN_ONCE(1, "regmap API is disabled");
-	return -EINVAL;
+	return NULL;
 }
 
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 13752fe2d7f2d41c2fd92a5d1b1c6e38c4de0c05 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 25 Feb 2014 10:28:04 -0800
Subject: security: introduce kernel_fw_from_file hook

In order to validate the contents of firmware being loaded, there must be
a hook to evaluate any loaded firmware that wasn't built into the kernel
itself. Without this, there is a risk that a root user could load malicious
firmware designed to mount an attack against kernel memory (e.g. via DMA).

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Takashi Iwai <tiwai@suse.de>
---
 include/linux/security.h | 17 +++++++++++++++++
 security/capability.c    |  6 ++++++
 security/security.c      |  6 ++++++
 3 files changed, 29 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index 59820f8782a1..0ae4b147718a 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -702,6 +702,15 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@inode points to the inode to use as a reference.
  *	The current task must be the one that nominated @inode.
  *	Return 0 if successful.
+ * @kernel_fw_from_file:
+ *	Load firmware from userspace (not called for built-in firmware).
+ *	@file contains the file structure pointing to the file containing
+ *	the firmware to load. This argument will be NULL if the firmware
+ *	was loaded via the uevent-triggered blob-based interface exposed
+ *	by CONFIG_FW_LOADER_USER_HELPER.
+ *	@buf pointer to buffer containing firmware contents.
+ *	@size length of the firmware contents.
+ *	Return 0 if permission is granted.
  * @kernel_module_request:
  *	Ability to trigger the kernel to automatically upcall to userspace for
  *	userspace to load a kernel module with the given name.
@@ -1568,6 +1577,7 @@ struct security_operations {
 	void (*cred_transfer)(struct cred *new, const struct cred *old);
 	int (*kernel_act_as)(struct cred *new, u32 secid);
 	int (*kernel_create_files_as)(struct cred *new, struct inode *inode);
+	int (*kernel_fw_from_file)(struct file *file, char *buf, size_t size);
 	int (*kernel_module_request)(char *kmod_name);
 	int (*kernel_module_from_file)(struct file *file);
 	int (*task_fix_setuid) (struct cred *new, const struct cred *old,
@@ -1840,6 +1850,7 @@ int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp);
 void security_transfer_creds(struct cred *new, const struct cred *old);
 int security_kernel_act_as(struct cred *new, u32 secid);
 int security_kernel_create_files_as(struct cred *new, struct inode *inode);
+int security_kernel_fw_from_file(struct file *file, char *buf, size_t size);
 int security_kernel_module_request(char *kmod_name);
 int security_kernel_module_from_file(struct file *file);
 int security_task_fix_setuid(struct cred *new, const struct cred *old,
@@ -2366,6 +2377,12 @@ static inline int security_kernel_create_files_as(struct cred *cred,
 	return 0;
 }
 
+static inline int security_kernel_fw_from_file(struct file *file,
+					       char *buf, size_t size)
+{
+	return 0;
+}
+
 static inline int security_kernel_module_request(char *kmod_name)
 {
 	return 0;
diff --git a/security/capability.c b/security/capability.c
index e76373de3129..a74fde6a7468 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -401,6 +401,11 @@ static int cap_kernel_create_files_as(struct cred *new, struct inode *inode)
 	return 0;
 }
 
+static int cap_kernel_fw_from_file(struct file *file, char *buf, size_t size)
+{
+	return 0;
+}
+
 static int cap_kernel_module_request(char *kmod_name)
 {
 	return 0;
@@ -1015,6 +1020,7 @@ void __init security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, cred_transfer);
 	set_to_cap_if_null(ops, kernel_act_as);
 	set_to_cap_if_null(ops, kernel_create_files_as);
+	set_to_cap_if_null(ops, kernel_fw_from_file);
 	set_to_cap_if_null(ops, kernel_module_request);
 	set_to_cap_if_null(ops, kernel_module_from_file);
 	set_to_cap_if_null(ops, task_fix_setuid);
diff --git a/security/security.c b/security/security.c
index 31614e9e96e5..35d37d0f0d49 100644
--- a/security/security.c
+++ b/security/security.c
@@ -845,6 +845,12 @@ int security_kernel_create_files_as(struct cred *new, struct inode *inode)
 	return security_ops->kernel_create_files_as(new, inode);
 }
 
+int security_kernel_fw_from_file(struct file *file, char *buf, size_t size)
+{
+	return security_ops->kernel_fw_from_file(file, buf, size);
+}
+EXPORT_SYMBOL_GPL(security_kernel_fw_from_file);
+
 int security_kernel_module_request(char *kmod_name)
 {
 	return security_ops->kernel_module_request(kmod_name);
-- 
cgit v1.2.3-59-g8ed1b


From 5a9196d715607f76d6b7d96a0970d6065335e62b Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Tue, 22 Jul 2014 10:39:48 -0400
Subject: ima: add support for measuring and appraising firmware

The "security: introduce kernel_fw_from_file hook" patch defined a
new security hook to evaluate any loaded firmware that wasn't built
into the kernel.

This patch defines ima_fw_from_file(), which is called from the new
security hook, to measure and/or appraise the loaded firmware's
integrity.

Signed-off-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 Documentation/ABI/testing/ima_policy  |  4 +++-
 include/linux/ima.h                   |  6 ++++++
 security/integrity/ima/ima.h          |  3 ++-
 security/integrity/ima/ima_appraise.c |  8 ++++++++
 security/integrity/ima/ima_main.c     | 11 +++++++++++
 security/integrity/ima/ima_policy.c   |  7 +++++++
 security/integrity/integrity.h        |  9 +++++++--
 security/security.c                   |  7 ++++++-
 8 files changed, 50 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/ima_policy b/Documentation/ABI/testing/ima_policy
index 4c3efe434806..d0d0c578324c 100644
--- a/Documentation/ABI/testing/ima_policy
+++ b/Documentation/ABI/testing/ima_policy
@@ -26,6 +26,7 @@ Description:
 			option:	[[appraise_type=]] [permit_directio]
 
 		base: 	func:= [BPRM_CHECK][MMAP_CHECK][FILE_CHECK][MODULE_CHECK]
+				[FIRMWARE_CHECK]
 			mask:= [MAY_READ] [MAY_WRITE] [MAY_APPEND] [MAY_EXEC]
 			fsmagic:= hex value
 			fsuuid:= file system UUID (e.g 8bcbe394-4f13-4144-be8e-5aa9ea2ce2f6)
@@ -57,7 +58,8 @@ Description:
 			measure func=BPRM_CHECK
 			measure func=FILE_MMAP mask=MAY_EXEC
 			measure func=FILE_CHECK mask=MAY_READ uid=0
-			measure func=MODULE_CHECK uid=0
+			measure func=MODULE_CHECK
+			measure func=FIRMWARE_CHECK
 			appraise fowner=0
 
 		The default policy measures all executables in bprm_check,
diff --git a/include/linux/ima.h b/include/linux/ima.h
index 1b7f268cddce..7cf5e9b32550 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -19,6 +19,7 @@ extern int ima_file_check(struct file *file, int mask);
 extern void ima_file_free(struct file *file);
 extern int ima_file_mmap(struct file *file, unsigned long prot);
 extern int ima_module_check(struct file *file);
+extern int ima_fw_from_file(struct file *file, char *buf, size_t size);
 
 #else
 static inline int ima_bprm_check(struct linux_binprm *bprm)
@@ -46,6 +47,11 @@ static inline int ima_module_check(struct file *file)
 	return 0;
 }
 
+static inline int ima_fw_from_file(struct file *file, char *buf, size_t size)
+{
+	return 0;
+}
+
 #endif /* CONFIG_IMA */
 
 #ifdef CONFIG_IMA_APPRAISE
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index c42056edfc97..57da4bd7ba0c 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -158,7 +158,7 @@ struct integrity_iint_cache *integrity_iint_insert(struct inode *inode);
 struct integrity_iint_cache *integrity_iint_find(struct inode *inode);
 
 /* IMA policy related functions */
-enum ima_hooks { FILE_CHECK = 1, MMAP_CHECK, BPRM_CHECK, MODULE_CHECK, POST_SETATTR };
+enum ima_hooks { FILE_CHECK = 1, MMAP_CHECK, BPRM_CHECK, MODULE_CHECK, FIRMWARE_CHECK, POST_SETATTR };
 
 int ima_match_policy(struct inode *inode, enum ima_hooks func, int mask,
 		     int flags);
@@ -171,6 +171,7 @@ void ima_delete_rules(void);
 #define IMA_APPRAISE_ENFORCE	0x01
 #define IMA_APPRAISE_FIX	0x02
 #define IMA_APPRAISE_MODULES	0x04
+#define IMA_APPRAISE_FIRMWARE	0x08
 
 #ifdef CONFIG_IMA_APPRAISE
 int ima_appraise_measurement(int func, struct integrity_iint_cache *iint,
diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c
index 59ac90275070..86bfd5c5df85 100644
--- a/security/integrity/ima/ima_appraise.c
+++ b/security/integrity/ima/ima_appraise.c
@@ -75,6 +75,8 @@ enum integrity_status ima_get_cache_status(struct integrity_iint_cache *iint,
 		return iint->ima_bprm_status;
 	case MODULE_CHECK:
 		return iint->ima_module_status;
+	case FIRMWARE_CHECK:
+		return iint->ima_firmware_status;
 	case FILE_CHECK:
 	default:
 		return iint->ima_file_status;
@@ -94,6 +96,9 @@ static void ima_set_cache_status(struct integrity_iint_cache *iint,
 	case MODULE_CHECK:
 		iint->ima_module_status = status;
 		break;
+	case FIRMWARE_CHECK:
+		iint->ima_firmware_status = status;
+		break;
 	case FILE_CHECK:
 	default:
 		iint->ima_file_status = status;
@@ -113,6 +118,9 @@ static void ima_cache_flags(struct integrity_iint_cache *iint, int func)
 	case MODULE_CHECK:
 		iint->flags |= (IMA_MODULE_APPRAISED | IMA_APPRAISED);
 		break;
+	case FIRMWARE_CHECK:
+		iint->flags |= (IMA_FIRMWARE_APPRAISED | IMA_APPRAISED);
+		break;
 	case FILE_CHECK:
 	default:
 		iint->flags |= (IMA_FILE_APPRAISED | IMA_APPRAISED);
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 0d696431209c..2917f980bf30 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -319,6 +319,17 @@ int ima_module_check(struct file *file)
 	return process_measurement(file, NULL, MAY_EXEC, MODULE_CHECK);
 }
 
+int ima_fw_from_file(struct file *file, char *buf, size_t size)
+{
+	if (!file) {
+		if ((ima_appraise & IMA_APPRAISE_FIRMWARE) &&
+		    (ima_appraise & IMA_APPRAISE_ENFORCE))
+			return -EACCES;	/* INTEGRITY_UNKNOWN */
+		return 0;
+	}
+	return process_measurement(file, NULL, MAY_EXEC, FIRMWARE_CHECK);
+}
+
 static int __init init_ima(void)
 {
 	int error;
diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index cea84d8bd7be..07099a8bc283 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -84,6 +84,7 @@ static struct ima_rule_entry default_rules[] = {
 	{.action = MEASURE, .func = FILE_CHECK, .mask = MAY_READ, .uid = GLOBAL_ROOT_UID,
 	 .flags = IMA_FUNC | IMA_MASK | IMA_UID},
 	{.action = MEASURE, .func = MODULE_CHECK, .flags = IMA_FUNC},
+	{.action = MEASURE, .func = FIRMWARE_CHECK, .flags = IMA_FUNC},
 };
 
 static struct ima_rule_entry default_appraise_rules[] = {
@@ -241,6 +242,8 @@ static int get_subaction(struct ima_rule_entry *rule, int func)
 		return IMA_BPRM_APPRAISE;
 	case MODULE_CHECK:
 		return IMA_MODULE_APPRAISE;
+	case FIRMWARE_CHECK:
+		return IMA_FIRMWARE_APPRAISE;
 	case FILE_CHECK:
 	default:
 		return IMA_FILE_APPRAISE;
@@ -486,6 +489,8 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry)
 				entry->func = FILE_CHECK;
 			else if (strcmp(args[0].from, "MODULE_CHECK") == 0)
 				entry->func = MODULE_CHECK;
+			else if (strcmp(args[0].from, "FIRMWARE_CHECK") == 0)
+				entry->func = FIRMWARE_CHECK;
 			else if ((strcmp(args[0].from, "FILE_MMAP") == 0)
 				|| (strcmp(args[0].from, "MMAP_CHECK") == 0))
 				entry->func = MMAP_CHECK;
@@ -636,6 +641,8 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry)
 		result = -EINVAL;
 	else if (entry->func == MODULE_CHECK)
 		ima_appraise |= IMA_APPRAISE_MODULES;
+	else if (entry->func == FIRMWARE_CHECK)
+		ima_appraise |= IMA_APPRAISE_FIRMWARE;
 	audit_log_format(ab, "res=%d", !result);
 	audit_log_end(ab);
 	return result;
diff --git a/security/integrity/integrity.h b/security/integrity/integrity.h
index 09c440d9aaee..19b8e314ca96 100644
--- a/security/integrity/integrity.h
+++ b/security/integrity/integrity.h
@@ -46,10 +46,14 @@
 #define IMA_BPRM_APPRAISED	0x00002000
 #define IMA_MODULE_APPRAISE	0x00004000
 #define IMA_MODULE_APPRAISED	0x00008000
+#define IMA_FIRMWARE_APPRAISE	0x00010000
+#define IMA_FIRMWARE_APPRAISED	0x00020000
 #define IMA_APPRAISE_SUBMASK	(IMA_FILE_APPRAISE | IMA_MMAP_APPRAISE | \
-				 IMA_BPRM_APPRAISE | IMA_MODULE_APPRAISE)
+				 IMA_BPRM_APPRAISE | IMA_MODULE_APPRAISE | \
+				 IMA_FIRMWARE_APPRAISE)
 #define IMA_APPRAISED_SUBMASK	(IMA_FILE_APPRAISED | IMA_MMAP_APPRAISED | \
-				 IMA_BPRM_APPRAISED | IMA_MODULE_APPRAISED)
+				 IMA_BPRM_APPRAISED | IMA_MODULE_APPRAISED | \
+				 IMA_FIRMWARE_APPRAISED)
 
 enum evm_ima_xattr_type {
 	IMA_XATTR_DIGEST = 0x01,
@@ -104,6 +108,7 @@ struct integrity_iint_cache {
 	enum integrity_status ima_mmap_status:4;
 	enum integrity_status ima_bprm_status:4;
 	enum integrity_status ima_module_status:4;
+	enum integrity_status ima_firmware_status:4;
 	enum integrity_status evm_status:4;
 	struct ima_digest_data *ima_hash;
 };
diff --git a/security/security.c b/security/security.c
index 35d37d0f0d49..e41b1a8d7644 100644
--- a/security/security.c
+++ b/security/security.c
@@ -847,7 +847,12 @@ int security_kernel_create_files_as(struct cred *new, struct inode *inode)
 
 int security_kernel_fw_from_file(struct file *file, char *buf, size_t size)
 {
-	return security_ops->kernel_fw_from_file(file, buf, size);
+	int ret;
+
+	ret = security_ops->kernel_fw_from_file(file, buf, size);
+	if (ret)
+		return ret;
+	return ima_fw_from_file(file, buf, size);
 }
 EXPORT_SYMBOL_GPL(security_kernel_fw_from_file);
 
-- 
cgit v1.2.3-59-g8ed1b


From 16369efb1f6006ec79babe53f388eed431533596 Mon Sep 17 00:00:00 2001
From: Alexander Popov <a13xp0p0v88@gmail.com>
Date: Wed, 25 Jun 2014 14:52:59 +0400
Subject: dmaengine: of: add common xlate function for matching by channel id

This patch adds a new common OF dma xlate callback function which will match a
channel by it's id. The binding expects one integer argument which it will use to
lookup the channel by the id.

Unlike of_dma_simple_xlate this function is able to handle a system with
multiple DMA controllers. When registering the of dma provider with
of_dma_controller_register a pointer to the dma_device struct which is
associated with the dt node needs to passed as the data parameter.
New function will use this pointer to match only channels which belong to the
specified DMA controller.

Signed-off-by: Alexander Popov <a13xp0p0v88@gmail.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/of-dma.c   | 35 +++++++++++++++++++++++++++++++++++
 include/linux/of_dma.h |  4 ++++
 2 files changed, 39 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/dma/of-dma.c b/drivers/dma/of-dma.c
index e8fe9dc455f4..d5fbeaa1e7ba 100644
--- a/drivers/dma/of-dma.c
+++ b/drivers/dma/of-dma.c
@@ -218,3 +218,38 @@ struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_spec,
 			&dma_spec->args[0]);
 }
 EXPORT_SYMBOL_GPL(of_dma_simple_xlate);
+
+/**
+ * of_dma_xlate_by_chan_id - Translate dt property to DMA channel by channel id
+ * @dma_spec:	pointer to DMA specifier as found in the device tree
+ * @of_dma:	pointer to DMA controller data
+ *
+ * This function can be used as the of xlate callback for DMA driver which wants
+ * to match the channel based on the channel id. When using this xlate function
+ * the #dma-cells propety of the DMA controller dt node needs to be set to 1.
+ * The data parameter of of_dma_controller_register must be a pointer to the
+ * dma_device struct the function should match upon.
+ *
+ * Returns pointer to appropriate dma channel on success or NULL on error.
+ */
+struct dma_chan *of_dma_xlate_by_chan_id(struct of_phandle_args *dma_spec,
+					 struct of_dma *ofdma)
+{
+	struct dma_device *dev = ofdma->of_dma_data;
+	struct dma_chan *chan, *candidate = NULL;
+
+	if (!dev || dma_spec->args_count != 1)
+		return NULL;
+
+	list_for_each_entry(chan, &dev->channels, device_node)
+		if (chan->chan_id == dma_spec->args[0]) {
+			candidate = chan;
+			break;
+		}
+
+	if (!candidate)
+		return NULL;
+
+	return dma_get_slave_channel(candidate);
+}
+EXPORT_SYMBOL_GPL(of_dma_xlate_by_chan_id);
diff --git a/include/linux/of_dma.h b/include/linux/of_dma.h
index ae36298ba076..56bc026c143f 100644
--- a/include/linux/of_dma.h
+++ b/include/linux/of_dma.h
@@ -41,6 +41,8 @@ extern struct dma_chan *of_dma_request_slave_channel(struct device_node *np,
 						     const char *name);
 extern struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_spec,
 		struct of_dma *ofdma);
+extern struct dma_chan *of_dma_xlate_by_chan_id(struct of_phandle_args *dma_spec,
+		struct of_dma *ofdma);
 #else
 static inline int of_dma_controller_register(struct device_node *np,
 		struct dma_chan *(*of_dma_xlate)
@@ -66,6 +68,8 @@ static inline struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_s
 	return NULL;
 }
 
+#define of_dma_xlate_by_chan_id NULL
+
 #endif
 
 #endif /* __LINUX_OF_DMA_H */
-- 
cgit v1.2.3-59-g8ed1b


From c53c6d6a68b13b1dff2892551b56cfdc07887d9e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 15 Apr 2014 14:38:31 +0200
Subject: scatterlist: allow chaining to preallocated chunks

Blk-mq drivers usually preallocate their S/G list as part of the request,
but if we want to support the very large S/G lists currently supported by
the SCSI code that would tie up a lot of memory in the preallocated request
pool.  Add support to the scatterlist code so that it can initialize a
S/G list that uses a preallocated first chunks and dynamically allocated
additional chunks.  That way the scsi-mq code can preallocate a first
page worth of S/G entries as part of the request, and dynamically extend
the S/G list when needed.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Webb Scales <webbnh@hp.com>
Acked-by: Jens Axboe <axboe@kernel.dk>
Tested-by: Bart Van Assche <bvanassche@acm.org>
Tested-by: Robert Elliott <elliott@hp.com>
---
 drivers/scsi/scsi_lib.c     | 16 +++++++---------
 include/linux/scatterlist.h |  6 +++---
 lib/scatterlist.c           | 25 +++++++++++++++++--------
 3 files changed, 27 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 8723abeb018e..bbd7a0a08692 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -564,6 +564,11 @@ static struct scatterlist *scsi_sg_alloc(unsigned int nents, gfp_t gfp_mask)
 	return mempool_alloc(sgp->pool, gfp_mask);
 }
 
+static void scsi_free_sgtable(struct scsi_data_buffer *sdb)
+{
+	__sg_free_table(&sdb->table, SCSI_MAX_SG_SEGMENTS, false, scsi_sg_free);
+}
+
 static int scsi_alloc_sgtable(struct scsi_data_buffer *sdb, int nents,
 			      gfp_t gfp_mask)
 {
@@ -572,19 +577,12 @@ static int scsi_alloc_sgtable(struct scsi_data_buffer *sdb, int nents,
 	BUG_ON(!nents);
 
 	ret = __sg_alloc_table(&sdb->table, nents, SCSI_MAX_SG_SEGMENTS,
-			       gfp_mask, scsi_sg_alloc);
+			       NULL, gfp_mask, scsi_sg_alloc);
 	if (unlikely(ret))
-		__sg_free_table(&sdb->table, SCSI_MAX_SG_SEGMENTS,
-				scsi_sg_free);
-
+		scsi_free_sgtable(sdb);
 	return ret;
 }
 
-static void scsi_free_sgtable(struct scsi_data_buffer *sdb)
-{
-	__sg_free_table(&sdb->table, SCSI_MAX_SG_SEGMENTS, scsi_sg_free);
-}
-
 /*
  * Function:    scsi_release_buffers()
  *
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index a964f7285600..f4ec8bbcb372 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -229,10 +229,10 @@ void sg_init_one(struct scatterlist *, const void *, unsigned int);
 typedef struct scatterlist *(sg_alloc_fn)(unsigned int, gfp_t);
 typedef void (sg_free_fn)(struct scatterlist *, unsigned int);
 
-void __sg_free_table(struct sg_table *, unsigned int, sg_free_fn *);
+void __sg_free_table(struct sg_table *, unsigned int, bool, sg_free_fn *);
 void sg_free_table(struct sg_table *);
-int __sg_alloc_table(struct sg_table *, unsigned int, unsigned int, gfp_t,
-		     sg_alloc_fn *);
+int __sg_alloc_table(struct sg_table *, unsigned int, unsigned int,
+		     struct scatterlist *, gfp_t, sg_alloc_fn *);
 int sg_alloc_table(struct sg_table *, unsigned int, gfp_t);
 int sg_alloc_table_from_pages(struct sg_table *sgt,
 	struct page **pages, unsigned int n_pages,
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 3a8e8e8fb2a5..b4415fceb7e7 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -165,6 +165,7 @@ static void sg_kfree(struct scatterlist *sg, unsigned int nents)
  * __sg_free_table - Free a previously mapped sg table
  * @table:	The sg table header to use
  * @max_ents:	The maximum number of entries per single scatterlist
+ * @skip_first_chunk: don't free the (preallocated) first scatterlist chunk
  * @free_fn:	Free function
  *
  *  Description:
@@ -174,7 +175,7 @@ static void sg_kfree(struct scatterlist *sg, unsigned int nents)
  *
  **/
 void __sg_free_table(struct sg_table *table, unsigned int max_ents,
-		     sg_free_fn *free_fn)
+		     bool skip_first_chunk, sg_free_fn *free_fn)
 {
 	struct scatterlist *sgl, *next;
 
@@ -202,7 +203,10 @@ void __sg_free_table(struct sg_table *table, unsigned int max_ents,
 		}
 
 		table->orig_nents -= sg_size;
-		free_fn(sgl, alloc_size);
+		if (!skip_first_chunk) {
+			free_fn(sgl, alloc_size);
+			skip_first_chunk = false;
+		}
 		sgl = next;
 	}
 
@@ -217,7 +221,7 @@ EXPORT_SYMBOL(__sg_free_table);
  **/
 void sg_free_table(struct sg_table *table)
 {
-	__sg_free_table(table, SG_MAX_SINGLE_ALLOC, sg_kfree);
+	__sg_free_table(table, SG_MAX_SINGLE_ALLOC, false, sg_kfree);
 }
 EXPORT_SYMBOL(sg_free_table);
 
@@ -241,8 +245,8 @@ EXPORT_SYMBOL(sg_free_table);
  *
  **/
 int __sg_alloc_table(struct sg_table *table, unsigned int nents,
-		     unsigned int max_ents, gfp_t gfp_mask,
-		     sg_alloc_fn *alloc_fn)
+		     unsigned int max_ents, struct scatterlist *first_chunk,
+		     gfp_t gfp_mask, sg_alloc_fn *alloc_fn)
 {
 	struct scatterlist *sg, *prv;
 	unsigned int left;
@@ -269,7 +273,12 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents,
 
 		left -= sg_size;
 
-		sg = alloc_fn(alloc_size, gfp_mask);
+		if (first_chunk) {
+			sg = first_chunk;
+			first_chunk = NULL;
+		} else {
+			sg = alloc_fn(alloc_size, gfp_mask);
+		}
 		if (unlikely(!sg)) {
 			/*
 			 * Adjust entry count to reflect that the last
@@ -324,9 +333,9 @@ int sg_alloc_table(struct sg_table *table, unsigned int nents, gfp_t gfp_mask)
 	int ret;
 
 	ret = __sg_alloc_table(table, nents, SG_MAX_SINGLE_ALLOC,
-			       gfp_mask, sg_kmalloc);
+			       NULL, gfp_mask, sg_kmalloc);
 	if (unlikely(ret))
-		__sg_free_table(table, SG_MAX_SINGLE_ALLOC, sg_kfree);
+		__sg_free_table(table, SG_MAX_SINGLE_ALLOC, false, sg_kfree);
 
 	return ret;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 86be408bfbd846fab3c4ac21d6f9298bd2e4b790 Mon Sep 17 00:00:00 2001
From: Sylwester Nawrocki <s.nawrocki@samsung.com>
Date: Wed, 18 Jun 2014 17:29:32 +0200
Subject: clk: Support for clock parents and rates assigned from device tree

This patch adds helper functions to configure clock parents and rates
as specified through 'assigned-clock-parents', 'assigned-clock-rates'
DT properties for a clock provider or clock consumer device.
The helpers are now being called by the bus code for the platform, I2C
and SPI busses, before the driver probing and also in the clock core
after registration of a clock provider.

Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Acked-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Mike Turquette <mturquette@linaro.org>
---
 .../devicetree/bindings/clock/clock-bindings.txt   |  36 ++++++
 drivers/base/platform.c                            |   5 +
 drivers/clk/Makefile                               |   3 +
 drivers/clk/clk-conf.c                             | 143 +++++++++++++++++++++
 drivers/clk/clk.c                                  |  12 +-
 drivers/i2c/i2c-core.c                             |   5 +
 drivers/spi/spi.c                                  |   5 +
 include/linux/clk/clk-conf.h                       |  20 +++
 8 files changed, 227 insertions(+), 2 deletions(-)
 create mode 100644 drivers/clk/clk-conf.c
 create mode 100644 include/linux/clk/clk-conf.h

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/clock/clock-bindings.txt b/Documentation/devicetree/bindings/clock/clock-bindings.txt
index f15787817d6b..06fc6d541c89 100644
--- a/Documentation/devicetree/bindings/clock/clock-bindings.txt
+++ b/Documentation/devicetree/bindings/clock/clock-bindings.txt
@@ -131,3 +131,39 @@ clock signal, and a UART.
   ("pll" and "pll-switched").
 * The UART has its baud clock connected the external oscillator and its
   register clock connected to the PLL clock (the "pll-switched" signal)
+
+==Assigned clock parents and rates==
+
+Some platforms may require initial configuration of default parent clocks
+and clock frequencies. Such a configuration can be specified in a device tree
+node through assigned-clocks, assigned-clock-parents and assigned-clock-rates
+properties. The assigned-clock-parents property should contain a list of parent
+clocks in form of phandle and clock specifier pairs, the assigned-clock-parents
+property the list of assigned clock frequency values - corresponding to clocks
+listed in the assigned-clocks property.
+
+To skip setting parent or rate of a clock its corresponding entry should be
+set to 0, or can be omitted if it is not followed by any non-zero entry.
+
+    uart@a000 {
+        compatible = "fsl,imx-uart";
+        reg = <0xa000 0x1000>;
+        ...
+        clocks = <&osc 0>, <&pll 1>;
+        clock-names = "baud", "register";
+
+        assigned-clocks = <&clkcon 0>, <&pll 2>;
+        assigned-clock-parents = <&pll 2>;
+        assigned-clock-rates = <0>, <460800>;
+    };
+
+In this example the <&pll 2> clock is set as parent of clock <&clkcon 0> and
+the <&pll 2> clock is assigned a frequency value of 460800 Hz.
+
+Configuring a clock's parent and rate through the device node that consumes
+the clock can be done only for clocks that have a single user. Specifying
+conflicting parent or rate configuration in multiple consumer nodes for
+a shared clock is forbidden.
+
+Configuration of common clocks, which affect multiple consumer devices can
+be similarly specified in the clock provider node.
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 9e9227e1762d..ac47643b1b69 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -23,6 +23,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/idr.h>
 #include <linux/acpi.h>
+#include <linux/clk/clk-conf.h>
 
 #include "base.h"
 #include "power/power.h"
@@ -489,6 +490,10 @@ static int platform_drv_probe(struct device *_dev)
 	struct platform_device *dev = to_platform_device(_dev);
 	int ret;
 
+	ret = of_clk_set_defaults(_dev->of_node, false);
+	if (ret < 0)
+		return ret;
+
 	acpi_dev_pm_attach(_dev, true);
 
 	ret = drv->probe(dev);
diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile
index 312742c10661..d5d325f28016 100644
--- a/drivers/clk/Makefile
+++ b/drivers/clk/Makefile
@@ -9,6 +9,9 @@ obj-$(CONFIG_COMMON_CLK)	+= clk-gate.o
 obj-$(CONFIG_COMMON_CLK)	+= clk-mux.o
 obj-$(CONFIG_COMMON_CLK)	+= clk-composite.o
 obj-$(CONFIG_COMMON_CLK)	+= clk-fractional-divider.o
+ifeq ($(CONFIG_OF), y)
+obj-$(CONFIG_COMMON_CLK)	+= clk-conf.o
+endif
 
 # hardware specific clock types
 # please keep this section sorted lexicographically by file/directory path name
diff --git a/drivers/clk/clk-conf.c b/drivers/clk/clk-conf.c
new file mode 100644
index 000000000000..1f73019a27c8
--- /dev/null
+++ b/drivers/clk/clk-conf.c
@@ -0,0 +1,143 @@
+/*
+ * Copyright (C) 2014 Samsung Electronics Co., Ltd.
+ * Sylwester Nawrocki <s.nawrocki@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/clk/clk-conf.h>
+#include <linux/device.h>
+#include <linux/of.h>
+#include <linux/printk.h>
+#include "clk.h"
+
+static int __set_clk_parents(struct device_node *node, bool clk_supplier)
+{
+	struct of_phandle_args clkspec;
+	int index, rc, num_parents;
+	struct clk *clk, *pclk;
+
+	num_parents = of_count_phandle_with_args(node, "assigned-clock-parents",
+						 "#clock-cells");
+	if (num_parents == -EINVAL)
+		pr_err("clk: invalid value of clock-parents property at %s\n",
+		       node->full_name);
+
+	for (index = 0; index < num_parents; index++) {
+		rc = of_parse_phandle_with_args(node, "assigned-clock-parents",
+					"#clock-cells",	index, &clkspec);
+		if (rc < 0) {
+			/* skip empty (null) phandles */
+			if (rc == -ENOENT)
+				continue;
+			else
+				return rc;
+		}
+		if (clkspec.np == node && !clk_supplier)
+			return 0;
+		pclk = of_clk_get_by_clkspec(&clkspec);
+		if (IS_ERR(pclk)) {
+			pr_warn("clk: couldn't get parent clock %d for %s\n",
+				index, node->full_name);
+			return PTR_ERR(pclk);
+		}
+
+		rc = of_parse_phandle_with_args(node, "assigned-clocks",
+					"#clock-cells", index, &clkspec);
+		if (rc < 0)
+			goto err;
+		if (clkspec.np == node && !clk_supplier) {
+			rc = 0;
+			goto err;
+		}
+		clk = of_clk_get_by_clkspec(&clkspec);
+		if (IS_ERR(pclk)) {
+			pr_warn("clk: couldn't get parent clock %d for %s\n",
+				index, node->full_name);
+			rc = PTR_ERR(pclk);
+			goto err;
+		}
+
+		rc = clk_set_parent(clk, pclk);
+		if (rc < 0)
+			pr_err("clk: failed to reparent %s to %s: %d\n",
+			       __clk_get_name(clk), __clk_get_name(pclk), rc);
+		clk_put(clk);
+		clk_put(pclk);
+	}
+	return 0;
+err:
+	clk_put(pclk);
+	return rc;
+}
+
+static int __set_clk_rates(struct device_node *node, bool clk_supplier)
+{
+	struct of_phandle_args clkspec;
+	struct property	*prop;
+	const __be32 *cur;
+	int rc, index = 0;
+	struct clk *clk;
+	u32 rate;
+
+	of_property_for_each_u32(node, "assigned-clock-rates", prop, cur, rate) {
+		if (rate) {
+			rc = of_parse_phandle_with_args(node, "assigned-clocks",
+					"#clock-cells",	index, &clkspec);
+			if (rc < 0) {
+				/* skip empty (null) phandles */
+				if (rc == -ENOENT)
+					continue;
+				else
+					return rc;
+			}
+			if (clkspec.np == node && !clk_supplier)
+				return 0;
+
+			clk = of_clk_get_by_clkspec(&clkspec);
+			if (IS_ERR(clk)) {
+				pr_warn("clk: couldn't get clock %d for %s\n",
+					index, node->full_name);
+				return PTR_ERR(clk);
+			}
+
+			rc = clk_set_rate(clk, rate);
+			if (rc < 0)
+				pr_err("clk: couldn't set %s clock rate: %d\n",
+				       __clk_get_name(clk), rc);
+			clk_put(clk);
+		}
+		index++;
+	}
+	return 0;
+}
+
+/**
+ * of_clk_set_defaults() - parse and set assigned clocks configuration
+ * @node: device node to apply clock settings for
+ * @clk_supplier: true if clocks supplied by @node should also be considered
+ *
+ * This function parses 'assigned-{clocks/clock-parents/clock-rates}' properties
+ * and sets any specified clock parents and rates. The @clk_supplier argument
+ * should be set to true if @node may be also a clock supplier of any clock
+ * listed in its 'assigned-clocks' or 'assigned-clock-parents' properties.
+ * If @clk_supplier is false the function exits returnning 0 as soon as it
+ * determines the @node is also a supplier of any of the clocks.
+ */
+int of_clk_set_defaults(struct device_node *node, bool clk_supplier)
+{
+	int rc;
+
+	if (!node)
+		return 0;
+
+	rc = __set_clk_parents(node, clk_supplier);
+	if (rc < 0)
+		return rc;
+
+	return __set_clk_rates(node, clk_supplier);
+}
diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index 9ad397050471..f95590a1e28e 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/clk-private.h>
+#include <linux/clk/clk-conf.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
@@ -2382,6 +2383,7 @@ int of_clk_add_provider(struct device_node *np,
 			void *data)
 {
 	struct of_clk_provider *cp;
+	int ret;
 
 	cp = kzalloc(sizeof(struct of_clk_provider), GFP_KERNEL);
 	if (!cp)
@@ -2396,7 +2398,11 @@ int of_clk_add_provider(struct device_node *np,
 	mutex_unlock(&of_clk_mutex);
 	pr_debug("Added clock from %s\n", np->full_name);
 
-	return 0;
+	ret = of_clk_set_defaults(np, true);
+	if (ret < 0)
+		of_clk_del_provider(np);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(of_clk_add_provider);
 
@@ -2573,7 +2579,10 @@ void __init of_clk_init(const struct of_device_id *matches)
 		list_for_each_entry_safe(clk_provider, next,
 					&clk_provider_list, node) {
 			if (force || parent_ready(clk_provider->np)) {
+
 				clk_provider->clk_init_cb(clk_provider->np);
+				of_clk_set_defaults(clk_provider->np, true);
+
 				list_del(&clk_provider->node);
 				kfree(clk_provider);
 				is_init_done = true;
@@ -2588,7 +2597,6 @@ void __init of_clk_init(const struct of_device_id *matches)
 		 */
 		if (!is_init_done)
 			force = true;
-
 	}
 }
 #endif
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 7c7f4b856bad..66aa83b99383 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -42,6 +42,7 @@
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/of_irq.h>
+#include <linux/clk/clk-conf.h>
 #include <linux/completion.h>
 #include <linux/hardirq.h>
 #include <linux/irqflags.h>
@@ -274,6 +275,10 @@ static int i2c_device_probe(struct device *dev)
 					client->flags & I2C_CLIENT_WAKE);
 	dev_dbg(dev, "probe\n");
 
+	status = of_clk_set_defaults(dev->of_node, false);
+	if (status < 0)
+		return status;
+
 	acpi_dev_pm_attach(&client->dev, true);
 	status = driver->probe(client, i2c_match_id(driver->id_table, client));
 	if (status)
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index d4f9670b51bc..22aa41cace82 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -29,6 +29,7 @@
 #include <linux/mutex.h>
 #include <linux/of_device.h>
 #include <linux/of_irq.h>
+#include <linux/clk/clk-conf.h>
 #include <linux/slab.h>
 #include <linux/mod_devicetable.h>
 #include <linux/spi/spi.h>
@@ -259,6 +260,10 @@ static int spi_drv_probe(struct device *dev)
 	const struct spi_driver		*sdrv = to_spi_driver(dev->driver);
 	int ret;
 
+	ret = of_clk_set_defaults(dev->of_node, false);
+	if (ret)
+		return ret;
+
 	acpi_dev_pm_attach(dev, true);
 	ret = sdrv->probe(to_spi_device(dev));
 	if (ret)
diff --git a/include/linux/clk/clk-conf.h b/include/linux/clk/clk-conf.h
new file mode 100644
index 000000000000..f3050e15f833
--- /dev/null
+++ b/include/linux/clk/clk-conf.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2014 Samsung Electronics Co., Ltd.
+ * Sylwester Nawrocki <s.nawrocki@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+struct device_node;
+
+#if defined(CONFIG_OF) && defined(CONFIG_COMMON_CLK)
+int of_clk_set_defaults(struct device_node *node, bool clk_supplier);
+#else
+static inline int of_clk_set_defaults(struct device_node *node,
+				      bool clk_supplier)
+{
+	return 0;
+}
+#endif
-- 
cgit v1.2.3-59-g8ed1b


From 3a611c3cfba2106aed3187b90903855e776e2761 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Sun, 27 Jul 2014 07:23:01 +0930
Subject: modules: Fix build error in moduleloader.h

Fengguang Wu's build bot detected that if moduleloader.h is included in
a C file (used by ftrace and kprobes to access module_alloc() when
available), that it can fail to build if CONFIG_MODULES and
CONFIG_MODULES_USE_ELF_REL is not defined.

This is because there's a printk() that dereferences struct module to
print the name of the module. But as struct module does not exist when
CONFIG_MODULES is not defined we get this error:

   include/linux/moduleloader.h: In function 'apply_relocate':
>> include/linux/moduleloader.h:48:63: error: dereferencing pointer to
>> incomplete type
     printk(KERN_ERR "module %s: REL relocation unsupported\n", me->name);
								  ^
Reported-by: kbuild test robot <fengguang.wu@intel.com>
Based-on-the-true-story-by: Steven Rostedt <rostedt@goodmis.org>
Confirms-rustys-story-ends-the-same-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/moduleloader.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h
index 560ca53a75fa..7eeb9bbfb816 100644
--- a/include/linux/moduleloader.h
+++ b/include/linux/moduleloader.h
@@ -45,7 +45,8 @@ static inline int apply_relocate(Elf_Shdr *sechdrs,
 				 unsigned int relsec,
 				 struct module *me)
 {
-	printk(KERN_ERR "module %s: REL relocation unsupported\n", me->name);
+	printk(KERN_ERR "module %s: REL relocation unsupported\n",
+	       module_name(me));
 	return -ENOEXEC;
 }
 #endif
@@ -67,7 +68,8 @@ static inline int apply_relocate_add(Elf_Shdr *sechdrs,
 				     unsigned int relsec,
 				     struct module *me)
 {
-	printk(KERN_ERR "module %s: REL relocation unsupported\n", me->name);
+	printk(KERN_ERR "module %s: REL relocation unsupported\n",
+	       module_name(me));
 	return -ENOEXEC;
 }
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 9b20a352d78a7651aa68a9220f77ccb03009d892 Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.cz>
Date: Sun, 27 Jul 2014 07:24:01 +0930
Subject: module: add within_module() function

It is just a small optimization that allows to replace few
occurrences of within_module_init() || within_module_core()
with a single call.

Signed-off-by: Petr Mladek <pmladek@suse.cz>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/module.h |  5 +++++
 kernel/module.c        | 12 ++++--------
 2 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index f520a767c86c..61d8fb2d0873 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -408,6 +408,11 @@ static inline int within_module_init(unsigned long addr, const struct module *mo
 	       addr < (unsigned long)mod->module_init + mod->init_size;
 }
 
+static inline int within_module(unsigned long addr, const struct module *mod)
+{
+	return within_module_init(addr, mod) || within_module_core(addr, mod);
+}
+
 /* Search for module by name: must hold module_mutex. */
 struct module *find_module(const char *name);
 
diff --git a/kernel/module.c b/kernel/module.c
index 81e727cf6df9..e87fdd2fc3c2 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3448,8 +3448,7 @@ const char *module_address_lookup(unsigned long addr,
 	list_for_each_entry_rcu(mod, &modules, list) {
 		if (mod->state == MODULE_STATE_UNFORMED)
 			continue;
-		if (within_module_init(addr, mod) ||
-		    within_module_core(addr, mod)) {
+		if (within_module(addr, mod)) {
 			if (modname)
 				*modname = mod->name;
 			ret = get_ksymbol(mod, addr, size, offset);
@@ -3473,8 +3472,7 @@ int lookup_module_symbol_name(unsigned long addr, char *symname)
 	list_for_each_entry_rcu(mod, &modules, list) {
 		if (mod->state == MODULE_STATE_UNFORMED)
 			continue;
-		if (within_module_init(addr, mod) ||
-		    within_module_core(addr, mod)) {
+		if (within_module(addr, mod)) {
 			const char *sym;
 
 			sym = get_ksymbol(mod, addr, NULL, NULL);
@@ -3499,8 +3497,7 @@ int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size,
 	list_for_each_entry_rcu(mod, &modules, list) {
 		if (mod->state == MODULE_STATE_UNFORMED)
 			continue;
-		if (within_module_init(addr, mod) ||
-		    within_module_core(addr, mod)) {
+		if (within_module(addr, mod)) {
 			const char *sym;
 
 			sym = get_ksymbol(mod, addr, size, offset);
@@ -3764,8 +3761,7 @@ struct module *__module_address(unsigned long addr)
 	list_for_each_entry_rcu(mod, &modules, list) {
 		if (mod->state == MODULE_STATE_UNFORMED)
 			continue;
-		if (within_module_core(addr, mod)
-		    || within_module_init(addr, mod))
+		if (within_module(addr, mod))
 			return mod;
 	}
 	return NULL;
-- 
cgit v1.2.3-59-g8ed1b


From 76681c8faa07f9e07caa3cc69f235c8719b2a6ea Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.cz>
Date: Sun, 27 Jul 2014 07:25:01 +0930
Subject: module: return bool from within_module*()

The within_module*() functions return only true or false. Let's use bool as
the return type.

Note that it should not change kABI because these are inline functions.

Signed-off-by: Petr Mladek <pmladek@suse.cz>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/module.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index 61d8fb2d0873..71f282a4e307 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -396,19 +396,21 @@ bool is_module_address(unsigned long addr);
 bool is_module_percpu_address(unsigned long addr);
 bool is_module_text_address(unsigned long addr);
 
-static inline int within_module_core(unsigned long addr, const struct module *mod)
+static inline bool within_module_core(unsigned long addr,
+				      const struct module *mod)
 {
 	return (unsigned long)mod->module_core <= addr &&
 	       addr < (unsigned long)mod->module_core + mod->core_size;
 }
 
-static inline int within_module_init(unsigned long addr, const struct module *mod)
+static inline bool within_module_init(unsigned long addr,
+				      const struct module *mod)
 {
 	return (unsigned long)mod->module_init <= addr &&
 	       addr < (unsigned long)mod->module_init + mod->init_size;
 }
 
-static inline int within_module(unsigned long addr, const struct module *mod)
+static inline bool within_module(unsigned long addr, const struct module *mod)
 {
 	return within_module_init(addr, mod) || within_module_core(addr, mod);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 37549e94c77a94a9c32b5ae3313a3801cb66adf9 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Sun, 27 Jul 2014 07:26:01 +0930
Subject: sysfs: disallow world-writable files.

This check was introduced in 2006 by Alexey Dobriyan (9774a1f54f173)
for module parameters; we removed it when we unified the check into
VERIFY_OCTAL_PERMISSIONS() as sysfs didn't have the same requirement.
Now all those users are fixed, reintroduce it.

Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Dave Jones <davej@redhat.com>
Cc: Joe Perches <joe@perches.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/kernel.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 4c52907a6d8b..43e1c6a9683e 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -849,5 +849,7 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
 	 /* User perms >= group perms >= other perms */			\
 	 BUILD_BUG_ON_ZERO(((perms) >> 6) < (((perms) >> 3) & 7)) +	\
 	 BUILD_BUG_ON_ZERO((((perms) >> 3) & 7) < ((perms) & 7)) +	\
+	 /* Other writable?  Generally considered a bad idea. */	\
+	 BUILD_BUG_ON_ZERO((perms) & 2) +				\
 	 (perms))
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 9cb42e2a8ed06e91ce9d2c59fbae8d1185ebe2f7 Mon Sep 17 00:00:00 2001
From: "Opensource [Steve Twiss]" <stwiss.opensource@diasemi.com>
Date: Mon, 21 Jul 2014 11:39:33 +0100
Subject: mfd: da9063: Add support for AD silicon variant

Add register definitions for DA9063 AD (0x3) silicon variant ID
the ability to choose the silicon variant at run-time using regmap
configuration. This patch also adds RTC support for the AD silicon
changes.

It adds both BB and AD support as regmap ranges and then makes the
distinction between the two tables at run-time. This allows both AD
and BB silicon variants to be supported at the same time.

Suggested-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Opensource [Steve Twiss] <stwiss.opensource@diasemi.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/da9063-core.c            |   6 +-
 drivers/mfd/da9063-i2c.c             | 134 ++++++++++++++++++++++++++++-------
 drivers/rtc/rtc-da9063.c             |  54 +++++++++-----
 include/linux/mfd/da9063/core.h      |   3 +-
 include/linux/mfd/da9063/registers.h | 129 +++++++++++++++++++++------------
 5 files changed, 236 insertions(+), 90 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/da9063-core.c b/drivers/mfd/da9063-core.c
index e70ae315abc7..93db8bb8c8f0 100644
--- a/drivers/mfd/da9063-core.c
+++ b/drivers/mfd/da9063-core.c
@@ -153,9 +153,9 @@ int da9063_device_init(struct da9063 *da9063, unsigned int irq)
 		 "Device detected (chip-ID: 0x%02X, var-ID: 0x%02X)\n",
 		 model, variant_id);
 
-	if (variant_code != PMIC_DA9063_BB) {
-		dev_err(da9063->dev, "Unknown chip variant code: 0x%02X\n",
-				variant_code);
+	if (variant_code < PMIC_DA9063_BB && variant_code != PMIC_DA9063_AD) {
+		dev_err(da9063->dev,
+			"Cannot support variant code: 0x%02X\n", variant_code);
 		return -ENODEV;
 	}
 
diff --git a/drivers/mfd/da9063-i2c.c b/drivers/mfd/da9063-i2c.c
index 8db5c805c64f..21fd8d9a217b 100644
--- a/drivers/mfd/da9063-i2c.c
+++ b/drivers/mfd/da9063-i2c.c
@@ -25,10 +25,10 @@
 #include <linux/mfd/da9063/pdata.h>
 #include <linux/mfd/da9063/registers.h>
 
-static const struct regmap_range da9063_readable_ranges[] = {
+static const struct regmap_range da9063_ad_readable_ranges[] = {
 	{
 		.range_min = DA9063_REG_PAGE_CON,
-		.range_max = DA9063_REG_SECOND_D,
+		.range_max = DA9063_AD_REG_SECOND_D,
 	}, {
 		.range_min = DA9063_REG_SEQ,
 		.range_max = DA9063_REG_ID_32_31,
@@ -37,14 +37,14 @@ static const struct regmap_range da9063_readable_ranges[] = {
 		.range_max = DA9063_REG_AUTO3_LOW,
 	}, {
 		.range_min = DA9063_REG_T_OFFSET,
-		.range_max = DA9063_REG_GP_ID_19,
+		.range_max = DA9063_AD_REG_GP_ID_19,
 	}, {
 		.range_min = DA9063_REG_CHIP_ID,
 		.range_max = DA9063_REG_CHIP_VARIANT,
 	},
 };
 
-static const struct regmap_range da9063_writeable_ranges[] = {
+static const struct regmap_range da9063_ad_writeable_ranges[] = {
 	{
 		.range_min = DA9063_REG_PAGE_CON,
 		.range_max = DA9063_REG_PAGE_CON,
@@ -53,7 +53,7 @@ static const struct regmap_range da9063_writeable_ranges[] = {
 		.range_max = DA9063_REG_VSYS_MON,
 	}, {
 		.range_min = DA9063_REG_COUNT_S,
-		.range_max = DA9063_REG_ALARM_Y,
+		.range_max = DA9063_AD_REG_ALARM_Y,
 	}, {
 		.range_min = DA9063_REG_SEQ,
 		.range_max = DA9063_REG_ID_32_31,
@@ -62,14 +62,14 @@ static const struct regmap_range da9063_writeable_ranges[] = {
 		.range_max = DA9063_REG_AUTO3_LOW,
 	}, {
 		.range_min = DA9063_REG_CONFIG_I,
-		.range_max = DA9063_REG_MON_REG_4,
+		.range_max = DA9063_AD_REG_MON_REG_4,
 	}, {
-		.range_min = DA9063_REG_GP_ID_0,
-		.range_max = DA9063_REG_GP_ID_19,
+		.range_min = DA9063_AD_REG_GP_ID_0,
+		.range_max = DA9063_AD_REG_GP_ID_19,
 	},
 };
 
-static const struct regmap_range da9063_volatile_ranges[] = {
+static const struct regmap_range da9063_ad_volatile_ranges[] = {
 	{
 		.range_min = DA9063_REG_STATUS_A,
 		.range_max = DA9063_REG_EVENT_D,
@@ -81,26 +81,104 @@ static const struct regmap_range da9063_volatile_ranges[] = {
 		.range_max = DA9063_REG_ADC_MAN,
 	}, {
 		.range_min = DA9063_REG_ADC_RES_L,
-		.range_max = DA9063_REG_SECOND_D,
+		.range_max = DA9063_AD_REG_SECOND_D,
 	}, {
-		.range_min = DA9063_REG_MON_REG_5,
-		.range_max = DA9063_REG_MON_REG_6,
+		.range_min = DA9063_AD_REG_MON_REG_5,
+		.range_max = DA9063_AD_REG_MON_REG_6,
 	},
 };
 
-static const struct regmap_access_table da9063_readable_table = {
-	.yes_ranges = da9063_readable_ranges,
-	.n_yes_ranges = ARRAY_SIZE(da9063_readable_ranges),
+static const struct regmap_access_table da9063_ad_readable_table = {
+	.yes_ranges = da9063_ad_readable_ranges,
+	.n_yes_ranges = ARRAY_SIZE(da9063_ad_readable_ranges),
 };
 
-static const struct regmap_access_table da9063_writeable_table = {
-	.yes_ranges = da9063_writeable_ranges,
-	.n_yes_ranges = ARRAY_SIZE(da9063_writeable_ranges),
+static const struct regmap_access_table da9063_ad_writeable_table = {
+	.yes_ranges = da9063_ad_writeable_ranges,
+	.n_yes_ranges = ARRAY_SIZE(da9063_ad_writeable_ranges),
 };
 
-static const struct regmap_access_table da9063_volatile_table = {
-	.yes_ranges = da9063_volatile_ranges,
-	.n_yes_ranges = ARRAY_SIZE(da9063_volatile_ranges),
+static const struct regmap_access_table da9063_ad_volatile_table = {
+	.yes_ranges = da9063_ad_volatile_ranges,
+	.n_yes_ranges = ARRAY_SIZE(da9063_ad_volatile_ranges),
+};
+
+static const struct regmap_range da9063_bb_readable_ranges[] = {
+	{
+		.range_min = DA9063_REG_PAGE_CON,
+		.range_max = DA9063_BB_REG_SECOND_D,
+	}, {
+		.range_min = DA9063_REG_SEQ,
+		.range_max = DA9063_REG_ID_32_31,
+	}, {
+		.range_min = DA9063_REG_SEQ_A,
+		.range_max = DA9063_REG_AUTO3_LOW,
+	}, {
+		.range_min = DA9063_REG_T_OFFSET,
+		.range_max = DA9063_BB_REG_GP_ID_19,
+	}, {
+		.range_min = DA9063_REG_CHIP_ID,
+		.range_max = DA9063_REG_CHIP_VARIANT,
+	},
+};
+
+static const struct regmap_range da9063_bb_writeable_ranges[] = {
+	{
+		.range_min = DA9063_REG_PAGE_CON,
+		.range_max = DA9063_REG_PAGE_CON,
+	}, {
+		.range_min = DA9063_REG_FAULT_LOG,
+		.range_max = DA9063_REG_VSYS_MON,
+	}, {
+		.range_min = DA9063_REG_COUNT_S,
+		.range_max = DA9063_BB_REG_ALARM_Y,
+	}, {
+		.range_min = DA9063_REG_SEQ,
+		.range_max = DA9063_REG_ID_32_31,
+	}, {
+		.range_min = DA9063_REG_SEQ_A,
+		.range_max = DA9063_REG_AUTO3_LOW,
+	}, {
+		.range_min = DA9063_REG_CONFIG_I,
+		.range_max = DA9063_BB_REG_MON_REG_4,
+	}, {
+		.range_min = DA9063_BB_REG_GP_ID_0,
+		.range_max = DA9063_BB_REG_GP_ID_19,
+	},
+};
+
+static const struct regmap_range da9063_bb_volatile_ranges[] = {
+	{
+		.range_min = DA9063_REG_STATUS_A,
+		.range_max = DA9063_REG_EVENT_D,
+	}, {
+		.range_min = DA9063_REG_CONTROL_F,
+		.range_max = DA9063_REG_CONTROL_F,
+	}, {
+		.range_min = DA9063_REG_ADC_MAN,
+		.range_max = DA9063_REG_ADC_MAN,
+	}, {
+		.range_min = DA9063_REG_ADC_RES_L,
+		.range_max = DA9063_BB_REG_SECOND_D,
+	}, {
+		.range_min = DA9063_BB_REG_MON_REG_5,
+		.range_max = DA9063_BB_REG_MON_REG_6,
+	},
+};
+
+static const struct regmap_access_table da9063_bb_readable_table = {
+	.yes_ranges = da9063_bb_readable_ranges,
+	.n_yes_ranges = ARRAY_SIZE(da9063_bb_readable_ranges),
+};
+
+static const struct regmap_access_table da9063_bb_writeable_table = {
+	.yes_ranges = da9063_bb_writeable_ranges,
+	.n_yes_ranges = ARRAY_SIZE(da9063_bb_writeable_ranges),
+};
+
+static const struct regmap_access_table da9063_bb_volatile_table = {
+	.yes_ranges = da9063_bb_volatile_ranges,
+	.n_yes_ranges = ARRAY_SIZE(da9063_bb_volatile_ranges),
 };
 
 static const struct regmap_range_cfg da9063_range_cfg[] = {
@@ -123,10 +201,6 @@ static struct regmap_config da9063_regmap_config = {
 	.max_register = DA9063_REG_CHIP_VARIANT,
 
 	.cache_type = REGCACHE_RBTREE,
-
-	.rd_table = &da9063_readable_table,
-	.wr_table = &da9063_writeable_table,
-	.volatile_table = &da9063_volatile_table,
 };
 
 static int da9063_i2c_probe(struct i2c_client *i2c,
@@ -143,6 +217,16 @@ static int da9063_i2c_probe(struct i2c_client *i2c,
 	da9063->dev = &i2c->dev;
 	da9063->chip_irq = i2c->irq;
 
+	if (da9063->variant_code == PMIC_DA9063_AD) {
+		da9063_regmap_config.rd_table = &da9063_ad_readable_table;
+		da9063_regmap_config.wr_table = &da9063_ad_writeable_table;
+		da9063_regmap_config.volatile_table = &da9063_ad_volatile_table;
+	} else {
+		da9063_regmap_config.rd_table = &da9063_bb_readable_table;
+		da9063_regmap_config.wr_table = &da9063_bb_writeable_table;
+		da9063_regmap_config.volatile_table = &da9063_bb_volatile_table;
+	}
+
 	da9063->regmap = devm_regmap_init_i2c(i2c, &da9063_regmap_config);
 	if (IS_ERR(da9063->regmap)) {
 		ret = PTR_ERR(da9063->regmap);
diff --git a/drivers/rtc/rtc-da9063.c b/drivers/rtc/rtc-da9063.c
index 595393098b09..731ed1a97f59 100644
--- a/drivers/rtc/rtc-da9063.c
+++ b/drivers/rtc/rtc-da9063.c
@@ -29,6 +29,8 @@
 #define YEARS_FROM_DA9063(year)		((year) + 100)
 #define MONTHS_FROM_DA9063(month)	((month) - 1)
 
+#define RTC_ALARM_DATA_LEN (DA9063_AD_REG_ALARM_Y - DA9063_AD_REG_ALARM_MI + 1)
+
 #define RTC_DATA_LEN	(DA9063_REG_COUNT_Y - DA9063_REG_COUNT_S + 1)
 #define RTC_SEC		0
 #define RTC_MIN		1
@@ -42,6 +44,10 @@ struct da9063_rtc {
 	struct da9063		*hw;
 	struct rtc_time		alarm_time;
 	bool			rtc_sync;
+	int			alarm_year;
+	int			alarm_start;
+	int			alarm_len;
+	int			data_start;
 };
 
 static void da9063_data_to_tm(u8 *data, struct rtc_time *tm)
@@ -83,7 +89,7 @@ static int da9063_rtc_stop_alarm(struct device *dev)
 {
 	struct da9063_rtc *rtc = dev_get_drvdata(dev);
 
-	return regmap_update_bits(rtc->hw->regmap, DA9063_REG_ALARM_Y,
+	return regmap_update_bits(rtc->hw->regmap, rtc->alarm_year,
 				  DA9063_ALARM_ON, 0);
 }
 
@@ -91,7 +97,7 @@ static int da9063_rtc_start_alarm(struct device *dev)
 {
 	struct da9063_rtc *rtc = dev_get_drvdata(dev);
 
-	return regmap_update_bits(rtc->hw->regmap, DA9063_REG_ALARM_Y,
+	return regmap_update_bits(rtc->hw->regmap, rtc->alarm_year,
 				  DA9063_ALARM_ON, DA9063_ALARM_ON);
 }
 
@@ -151,8 +157,9 @@ static int da9063_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 	int ret;
 	unsigned int val;
 
-	ret = regmap_bulk_read(rtc->hw->regmap, DA9063_REG_ALARM_S,
-			       &data[RTC_SEC], RTC_DATA_LEN);
+	data[RTC_SEC] = 0;
+	ret = regmap_bulk_read(rtc->hw->regmap, rtc->alarm_start,
+			       &data[rtc->data_start], rtc->alarm_len);
 	if (ret < 0)
 		return ret;
 
@@ -186,14 +193,14 @@ static int da9063_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 		return ret;
 	}
 
-	ret = regmap_bulk_write(rtc->hw->regmap, DA9063_REG_ALARM_S,
-				data, RTC_DATA_LEN);
+	ret = regmap_bulk_write(rtc->hw->regmap, rtc->alarm_start,
+			       &data[rtc->data_start], rtc->alarm_len);
 	if (ret < 0) {
 		dev_err(dev, "Failed to write alarm: %d\n", ret);
 		return ret;
 	}
 
-	rtc->alarm_time = alrm->time;
+	da9063_data_to_tm(data, &rtc->alarm_time);
 
 	if (alrm->enabled) {
 		ret = da9063_rtc_start_alarm(dev);
@@ -218,7 +225,7 @@ static irqreturn_t da9063_alarm_event(int irq, void *data)
 {
 	struct da9063_rtc *rtc = data;
 
-	regmap_update_bits(rtc->hw->regmap, DA9063_REG_ALARM_Y,
+	regmap_update_bits(rtc->hw->regmap, rtc->alarm_year,
 			   DA9063_ALARM_ON, 0);
 
 	rtc->rtc_sync = true;
@@ -257,7 +264,23 @@ static int da9063_rtc_probe(struct platform_device *pdev)
 		goto err;
 	}
 
-	ret = regmap_update_bits(da9063->regmap, DA9063_REG_ALARM_S,
+	rtc = devm_kzalloc(&pdev->dev, sizeof(*rtc), GFP_KERNEL);
+	if (!rtc)
+		return -ENOMEM;
+
+	if (da9063->variant_code == PMIC_DA9063_AD) {
+		rtc->alarm_year = DA9063_AD_REG_ALARM_Y;
+		rtc->alarm_start = DA9063_AD_REG_ALARM_MI;
+		rtc->alarm_len = RTC_ALARM_DATA_LEN;
+		rtc->data_start = RTC_MIN;
+	} else {
+		rtc->alarm_year = DA9063_BB_REG_ALARM_Y;
+		rtc->alarm_start = DA9063_BB_REG_ALARM_S;
+		rtc->alarm_len = RTC_DATA_LEN;
+		rtc->data_start = RTC_SEC;
+	}
+
+	ret = regmap_update_bits(da9063->regmap, rtc->alarm_start,
 			DA9063_ALARM_STATUS_TICK | DA9063_ALARM_STATUS_ALARM,
 			0);
 	if (ret < 0) {
@@ -265,7 +288,7 @@ static int da9063_rtc_probe(struct platform_device *pdev)
 		goto err;
 	}
 
-	ret = regmap_update_bits(da9063->regmap, DA9063_REG_ALARM_S,
+	ret = regmap_update_bits(da9063->regmap, rtc->alarm_start,
 				 DA9063_ALARM_STATUS_ALARM,
 				 DA9063_ALARM_STATUS_ALARM);
 	if (ret < 0) {
@@ -273,25 +296,22 @@ static int da9063_rtc_probe(struct platform_device *pdev)
 		goto err;
 	}
 
-	ret = regmap_update_bits(da9063->regmap, DA9063_REG_ALARM_Y,
+	ret = regmap_update_bits(da9063->regmap, rtc->alarm_year,
 				 DA9063_TICK_ON, 0);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "Failed to disable TICKs\n");
 		goto err;
 	}
 
-	ret = regmap_bulk_read(da9063->regmap, DA9063_REG_ALARM_S,
-			       data, RTC_DATA_LEN);
+	data[RTC_SEC] = 0;
+	ret = regmap_bulk_read(da9063->regmap, rtc->alarm_start,
+			       &data[rtc->data_start], rtc->alarm_len);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "Failed to read initial alarm data: %d\n",
 			ret);
 		goto err;
 	}
 
-	rtc = devm_kzalloc(&pdev->dev, sizeof(*rtc), GFP_KERNEL);
-	if (!rtc)
-		return -ENOMEM;
-
 	platform_set_drvdata(pdev, rtc);
 
 	irq_alarm = platform_get_irq_byname(pdev, "ALARM");
diff --git a/include/linux/mfd/da9063/core.h b/include/linux/mfd/da9063/core.h
index 00a9aac5d1e8..b92a3262f8f6 100644
--- a/include/linux/mfd/da9063/core.h
+++ b/include/linux/mfd/da9063/core.h
@@ -34,7 +34,8 @@ enum da9063_models {
 };
 
 enum da9063_variant_codes {
-	PMIC_DA9063_BB = 0x5
+	PMIC_DA9063_AD = 0x3,
+	PMIC_DA9063_BB = 0x5,
 };
 
 /* Interrupts */
diff --git a/include/linux/mfd/da9063/registers.h b/include/linux/mfd/da9063/registers.h
index 09a85c699da1..2e0ba6d5fbc3 100644
--- a/include/linux/mfd/da9063/registers.h
+++ b/include/linux/mfd/da9063/registers.h
@@ -104,16 +104,27 @@
 #define	DA9063_REG_COUNT_D		0x43
 #define	DA9063_REG_COUNT_MO		0x44
 #define	DA9063_REG_COUNT_Y		0x45
-#define	DA9063_REG_ALARM_S		0x46
-#define	DA9063_REG_ALARM_MI		0x47
-#define	DA9063_REG_ALARM_H		0x48
-#define	DA9063_REG_ALARM_D		0x49
-#define	DA9063_REG_ALARM_MO		0x4A
-#define	DA9063_REG_ALARM_Y		0x4B
-#define	DA9063_REG_SECOND_A		0x4C
-#define	DA9063_REG_SECOND_B		0x4D
-#define	DA9063_REG_SECOND_C		0x4E
-#define	DA9063_REG_SECOND_D		0x4F
+
+#define	DA9063_AD_REG_ALARM_MI		0x46
+#define	DA9063_AD_REG_ALARM_H		0x47
+#define	DA9063_AD_REG_ALARM_D		0x48
+#define	DA9063_AD_REG_ALARM_MO		0x49
+#define	DA9063_AD_REG_ALARM_Y		0x4A
+#define	DA9063_AD_REG_SECOND_A		0x4B
+#define	DA9063_AD_REG_SECOND_B		0x4C
+#define	DA9063_AD_REG_SECOND_C		0x4D
+#define	DA9063_AD_REG_SECOND_D		0x4E
+
+#define	DA9063_BB_REG_ALARM_S		0x46
+#define	DA9063_BB_REG_ALARM_MI		0x47
+#define	DA9063_BB_REG_ALARM_H		0x48
+#define	DA9063_BB_REG_ALARM_D		0x49
+#define	DA9063_BB_REG_ALARM_MO		0x4A
+#define	DA9063_BB_REG_ALARM_Y		0x4B
+#define	DA9063_BB_REG_SECOND_A		0x4C
+#define	DA9063_BB_REG_SECOND_B		0x4D
+#define	DA9063_BB_REG_SECOND_C		0x4E
+#define	DA9063_BB_REG_SECOND_D		0x4F
 
 /* Sequencer Control Registers */
 #define	DA9063_REG_SEQ			0x81
@@ -223,37 +234,67 @@
 #define	DA9063_REG_CONFIG_J		0x10F
 #define	DA9063_REG_CONFIG_K		0x110
 #define	DA9063_REG_CONFIG_L		0x111
-#define	DA9063_REG_CONFIG_M		0x112
-#define	DA9063_REG_CONFIG_N		0x113
-
-#define	DA9063_REG_MON_REG_1		0x114
-#define	DA9063_REG_MON_REG_2		0x115
-#define	DA9063_REG_MON_REG_3		0x116
-#define	DA9063_REG_MON_REG_4		0x117
-#define	DA9063_REG_MON_REG_5		0x11E
-#define	DA9063_REG_MON_REG_6		0x11F
-#define	DA9063_REG_TRIM_CLDR		0x120
+
+#define	DA9063_AD_REG_MON_REG_1		0x112
+#define	DA9063_AD_REG_MON_REG_2		0x113
+#define	DA9063_AD_REG_MON_REG_3		0x114
+#define	DA9063_AD_REG_MON_REG_4		0x115
+#define	DA9063_AD_REG_MON_REG_5		0x116
+#define	DA9063_AD_REG_MON_REG_6		0x117
+#define	DA9063_AD_REG_TRIM_CLDR		0x118
+
+#define	DA9063_AD_REG_GP_ID_0		0x119
+#define	DA9063_AD_REG_GP_ID_1		0x11A
+#define	DA9063_AD_REG_GP_ID_2		0x11B
+#define	DA9063_AD_REG_GP_ID_3		0x11C
+#define	DA9063_AD_REG_GP_ID_4		0x11D
+#define	DA9063_AD_REG_GP_ID_5		0x11E
+#define	DA9063_AD_REG_GP_ID_6		0x11F
+#define	DA9063_AD_REG_GP_ID_7		0x120
+#define	DA9063_AD_REG_GP_ID_8		0x121
+#define	DA9063_AD_REG_GP_ID_9		0x122
+#define	DA9063_AD_REG_GP_ID_10		0x123
+#define	DA9063_AD_REG_GP_ID_11		0x124
+#define	DA9063_AD_REG_GP_ID_12		0x125
+#define	DA9063_AD_REG_GP_ID_13		0x126
+#define	DA9063_AD_REG_GP_ID_14		0x127
+#define	DA9063_AD_REG_GP_ID_15		0x128
+#define	DA9063_AD_REG_GP_ID_16		0x129
+#define	DA9063_AD_REG_GP_ID_17		0x12A
+#define	DA9063_AD_REG_GP_ID_18		0x12B
+#define	DA9063_AD_REG_GP_ID_19		0x12C
+
+#define	DA9063_BB_REG_CONFIG_M		0x112
+#define	DA9063_BB_REG_CONFIG_N		0x113
+
+#define	DA9063_BB_REG_MON_REG_1		0x114
+#define	DA9063_BB_REG_MON_REG_2		0x115
+#define	DA9063_BB_REG_MON_REG_3		0x116
+#define	DA9063_BB_REG_MON_REG_4		0x117
+#define	DA9063_BB_REG_MON_REG_5		0x11E
+#define	DA9063_BB_REG_MON_REG_6		0x11F
+#define	DA9063_BB_REG_TRIM_CLDR		0x120
 /* General Purpose Registers */
-#define	DA9063_REG_GP_ID_0		0x121
-#define	DA9063_REG_GP_ID_1		0x122
-#define	DA9063_REG_GP_ID_2		0x123
-#define	DA9063_REG_GP_ID_3		0x124
-#define	DA9063_REG_GP_ID_4		0x125
-#define	DA9063_REG_GP_ID_5		0x126
-#define	DA9063_REG_GP_ID_6		0x127
-#define	DA9063_REG_GP_ID_7		0x128
-#define	DA9063_REG_GP_ID_8		0x129
-#define	DA9063_REG_GP_ID_9		0x12A
-#define	DA9063_REG_GP_ID_10		0x12B
-#define	DA9063_REG_GP_ID_11		0x12C
-#define	DA9063_REG_GP_ID_12		0x12D
-#define	DA9063_REG_GP_ID_13		0x12E
-#define	DA9063_REG_GP_ID_14		0x12F
-#define	DA9063_REG_GP_ID_15		0x130
-#define	DA9063_REG_GP_ID_16		0x131
-#define	DA9063_REG_GP_ID_17		0x132
-#define	DA9063_REG_GP_ID_18		0x133
-#define	DA9063_REG_GP_ID_19		0x134
+#define	DA9063_BB_REG_GP_ID_0		0x121
+#define	DA9063_BB_REG_GP_ID_1		0x122
+#define	DA9063_BB_REG_GP_ID_2		0x123
+#define	DA9063_BB_REG_GP_ID_3		0x124
+#define	DA9063_BB_REG_GP_ID_4		0x125
+#define	DA9063_BB_REG_GP_ID_5		0x126
+#define	DA9063_BB_REG_GP_ID_6		0x127
+#define	DA9063_BB_REG_GP_ID_7		0x128
+#define	DA9063_BB_REG_GP_ID_8		0x129
+#define	DA9063_BB_REG_GP_ID_9		0x12A
+#define	DA9063_BB_REG_GP_ID_10		0x12B
+#define	DA9063_BB_REG_GP_ID_11		0x12C
+#define	DA9063_BB_REG_GP_ID_12		0x12D
+#define	DA9063_BB_REG_GP_ID_13		0x12E
+#define	DA9063_BB_REG_GP_ID_14		0x12F
+#define	DA9063_BB_REG_GP_ID_15		0x130
+#define	DA9063_BB_REG_GP_ID_16		0x131
+#define	DA9063_BB_REG_GP_ID_17		0x132
+#define	DA9063_BB_REG_GP_ID_18		0x133
+#define	DA9063_BB_REG_GP_ID_19		0x134
 
 /* Chip ID and variant */
 #define	DA9063_REG_CHIP_ID		0x181
@@ -404,10 +445,10 @@
 /* DA9063_REG_CONTROL_B (addr=0x0F) */
 #define	DA9063_CHG_SEL				0x01
 #define	DA9063_WATCHDOG_PD			0x02
-#define	DA9063_RESET_BLINKING			0x04
+#define	DA9063_BB_RESET_BLINKING		0x04
 #define	DA9063_NRES_MODE			0x08
 #define	DA9063_NONKEY_LOCK			0x10
-#define	DA9063_BUCK_SLOWSTART			0x80
+#define	DA9063_BB_BUCK_SLOWSTART		0x80
 
 /* DA9063_REG_CONTROL_C (addr=0x10) */
 #define	DA9063_DEBOUNCING_MASK			0x07
@@ -467,7 +508,7 @@
 #define	DA9063_GPADC_PAUSE			0x02
 #define	DA9063_PMIF_DIS				0x04
 #define	DA9063_HS2WIRE_DIS			0x08
-#define	DA9063_CLDR_PAUSE			0x10
+#define	DA9063_BB_CLDR_PAUSE			0x10
 #define	DA9063_BBAT_DIS				0x20
 #define	DA9063_OUT_32K_PAUSE			0x40
 #define	DA9063_PMCONT_DIS			0x80
@@ -844,7 +885,7 @@
 #define DA9063_MONITOR				0x40
 
 /* DA9063_REG_ALARM_S (addr=0x46) */
-#define DA9063_ALARM_S_MASK			0x3F
+#define DA9063_BB_ALARM_S_MASK			0x3F
 #define DA9063_ALARM_STATUS_ALARM		0x80
 #define DA9063_ALARM_STATUS_TICK		0x40
 /* DA9063_REG_ALARM_MI (addr=0x47) */
-- 
cgit v1.2.3-59-g8ed1b


From 3d2108dae4e1768c06718cdce19f8f0089ce310e Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Date: Tue, 15 Jul 2014 11:21:46 +0100
Subject: mfd: wm5110: Add in the output done interrupts

wm5110 has interrupts to signal that an output has fully enabled. This
patch adds in these interrupts although use is not made of them yet.

Signed-off-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/wm5110-tables.c           |  18 +++++
 include/linux/mfd/arizona/core.h      |  10 ++-
 include/linux/mfd/arizona/registers.h | 120 ++++++++++++++++++++++++++++++++++
 3 files changed, 146 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm5110-tables.c b/drivers/mfd/wm5110-tables.c
index b26be6e5e3f4..4729007055d8 100644
--- a/drivers/mfd/wm5110-tables.c
+++ b/drivers/mfd/wm5110-tables.c
@@ -416,6 +416,24 @@ static const struct regmap_irq wm5110_irqs[ARIZONA_NUM_IRQ] = {
 	[ARIZONA_IRQ_ISRC2_CFG_ERR] = {
 		.reg_offset = 3, .mask = ARIZONA_ISRC2_CFG_ERR_EINT1
 	},
+	[ARIZONA_IRQ_HP3R_DONE] = {
+		.reg_offset = 3, .mask = ARIZONA_HP3R_DONE_EINT1
+	},
+	[ARIZONA_IRQ_HP3L_DONE] = {
+		.reg_offset = 3, .mask = ARIZONA_HP3L_DONE_EINT1
+	},
+	[ARIZONA_IRQ_HP2R_DONE] = {
+		.reg_offset = 3, .mask = ARIZONA_HP2R_DONE_EINT1
+	},
+	[ARIZONA_IRQ_HP2L_DONE] = {
+		.reg_offset = 3, .mask = ARIZONA_HP2L_DONE_EINT1
+	},
+	[ARIZONA_IRQ_HP1R_DONE] = {
+		.reg_offset = 3, .mask = ARIZONA_HP1R_DONE_EINT1
+	},
+	[ARIZONA_IRQ_HP1L_DONE] = {
+		.reg_offset = 3, .mask = ARIZONA_HP1L_DONE_EINT1
+	},
 
 	[ARIZONA_IRQ_BOOT_DONE] = {
 		.reg_offset = 4, .mask = ARIZONA_BOOT_DONE_EINT1
diff --git a/include/linux/mfd/arizona/core.h b/include/linux/mfd/arizona/core.h
index 70854d892760..6a62fc99f399 100644
--- a/include/linux/mfd/arizona/core.h
+++ b/include/linux/mfd/arizona/core.h
@@ -78,8 +78,14 @@ enum arizona_type {
 #define ARIZONA_IRQ_FLL1_CLOCK_OK         49
 #define ARIZONA_IRQ_MICD_CLAMP_RISE	  50
 #define ARIZONA_IRQ_MICD_CLAMP_FALL	  51
-
-#define ARIZONA_NUM_IRQ                   52
+#define ARIZONA_IRQ_HP3R_DONE             52
+#define ARIZONA_IRQ_HP3L_DONE             53
+#define ARIZONA_IRQ_HP2R_DONE             54
+#define ARIZONA_IRQ_HP2L_DONE             55
+#define ARIZONA_IRQ_HP1R_DONE             56
+#define ARIZONA_IRQ_HP1L_DONE             57
+
+#define ARIZONA_NUM_IRQ                   58
 
 struct snd_soc_dapm_context;
 
diff --git a/include/linux/mfd/arizona/registers.h b/include/linux/mfd/arizona/registers.h
index 7204d8138b24..df93563ea8c5 100644
--- a/include/linux/mfd/arizona/registers.h
+++ b/include/linux/mfd/arizona/registers.h
@@ -4795,6 +4795,30 @@
 #define ARIZONA_ISRC2_CFG_ERR_EINT1_MASK         0x0040  /* ISRC2_CFG_ERR_EINT1 */
 #define ARIZONA_ISRC2_CFG_ERR_EINT1_SHIFT             6  /* ISRC2_CFG_ERR_EINT1 */
 #define ARIZONA_ISRC2_CFG_ERR_EINT1_WIDTH             1  /* ISRC2_CFG_ERR_EINT1 */
+#define ARIZONA_HP3R_DONE_EINT1                  0x0020  /* HP3R_DONE_EINT1 */
+#define ARIZONA_HP3R_DONE_EINT1_MASK             0x0020  /* HP3R_DONE_EINT1 */
+#define ARIZONA_HP3R_DONE_EINT1_SHIFT                 5  /* HP3R_DONE_EINT1 */
+#define ARIZONA_HP3R_DONE_EINT1_WIDTH                 1  /* HP3R_DONE_EINT1 */
+#define ARIZONA_HP3L_DONE_EINT1                  0x0010  /* HP3L_DONE_EINT1 */
+#define ARIZONA_HP3L_DONE_EINT1_MASK             0x0010  /* HP3L_DONE_EINT1 */
+#define ARIZONA_HP3L_DONE_EINT1_SHIFT                 4  /* HP3L_DONE_EINT1 */
+#define ARIZONA_HP3L_DONE_EINT1_WIDTH                 1  /* HP3L_DONE_EINT1 */
+#define ARIZONA_HP2R_DONE_EINT1                  0x0008  /* HP2R_DONE_EINT1 */
+#define ARIZONA_HP2R_DONE_EINT1_MASK             0x0008  /* HP2R_DONE_EINT1 */
+#define ARIZONA_HP2R_DONE_EINT1_SHIFT                 3  /* HP2R_DONE_EINT1 */
+#define ARIZONA_HP2R_DONE_EINT1_WIDTH                 1  /* HP2R_DONE_EINT1 */
+#define ARIZONA_HP2L_DONE_EINT1                  0x0004  /* HP2L_DONE_EINT1 */
+#define ARIZONA_HP2L_DONE_EINT1_MASK             0x0004  /* HP2L_DONE_EINT1 */
+#define ARIZONA_HP2L_DONE_EINT1_SHIFT                 2  /* HP2L_DONE_EINT1 */
+#define ARIZONA_HP2L_DONE_EINT1_WIDTH                 1  /* HP2L_DONE_EINT1 */
+#define ARIZONA_HP1R_DONE_EINT1                  0x0002  /* HP1R_DONE_EINT1 */
+#define ARIZONA_HP1R_DONE_EINT1_MASK             0x0002  /* HP1R_DONE_EINT1 */
+#define ARIZONA_HP1R_DONE_EINT1_SHIFT                 1  /* HP1R_DONE_EINT1 */
+#define ARIZONA_HP1R_DONE_EINT1_WIDTH                 1  /* HP1R_DONE_EINT1 */
+#define ARIZONA_HP1L_DONE_EINT1                  0x0001  /* HP1L_DONE_EINT1 */
+#define ARIZONA_HP1L_DONE_EINT1_MASK             0x0001  /* HP1L_DONE_EINT1 */
+#define ARIZONA_HP1L_DONE_EINT1_SHIFT                 0  /* HP1L_DONE_EINT1 */
+#define ARIZONA_HP1L_DONE_EINT1_WIDTH                 1  /* HP1L_DONE_EINT1 */
 
 /*
  * R3332 (0xD04) - Interrupt Status 5
@@ -4963,6 +4987,30 @@
 #define ARIZONA_IM_ISRC2_CFG_ERR_EINT1_MASK      0x0040  /* IM_ISRC2_CFG_ERR_EINT1 */
 #define ARIZONA_IM_ISRC2_CFG_ERR_EINT1_SHIFT          6  /* IM_ISRC2_CFG_ERR_EINT1 */
 #define ARIZONA_IM_ISRC2_CFG_ERR_EINT1_WIDTH          1  /* IM_ISRC2_CFG_ERR_EINT1 */
+#define ARIZONA_IM_HP3R_DONE_EINT1               0x0020  /* IM_HP3R_DONE_EINT1 */
+#define ARIZONA_IM_HP3R_DONE_EINT1_MASK          0x0020  /* IM_HP3R_DONE_EINT1 */
+#define ARIZONA_IM_HP3R_DONE_EINT1_SHIFT              5  /* IM_HP3R_DONE_EINT1 */
+#define ARIZONA_IM_HP3R_DONE_EINT1_WIDTH              1  /* IM_HP3R_DONE_EINT1 */
+#define ARIZONA_IM_HP3L_DONE_EINT1               0x0010  /* IM_HP3L_DONE_EINT1 */
+#define ARIZONA_IM_HP3L_DONE_EINT1_MASK          0x0010  /* IM_HP3L_DONE_EINT1 */
+#define ARIZONA_IM_HP3L_DONE_EINT1_SHIFT              4  /* IM_HP3L_DONE_EINT1 */
+#define ARIZONA_IM_HP3L_DONE_EINT1_WIDTH              1  /* IM_HP3L_DONE_EINT1 */
+#define ARIZONA_IM_HP2R_DONE_EINT1               0x0008  /* IM_HP2R_DONE_EINT1 */
+#define ARIZONA_IM_HP2R_DONE_EINT1_MASK          0x0008  /* IM_HP2R_DONE_EINT1 */
+#define ARIZONA_IM_HP2R_DONE_EINT1_SHIFT              3  /* IM_HP2R_DONE_EINT1 */
+#define ARIZONA_IM_HP2R_DONE_EINT1_WIDTH              1  /* IM_HP2R_DONE_EINT1 */
+#define ARIZONA_IM_HP2L_DONE_EINT1               0x0004  /* IM_HP2L_DONE_EINT1 */
+#define ARIZONA_IM_HP2L_DONE_EINT1_MASK          0x0004  /* IM_HP2L_DONE_EINT1 */
+#define ARIZONA_IM_HP2L_DONE_EINT1_SHIFT              2  /* IM_HP2L_DONE_EINT1 */
+#define ARIZONA_IM_HP2L_DONE_EINT1_WIDTH              1  /* IM_HP2L_DONE_EINT1 */
+#define ARIZONA_IM_HP1R_DONE_EINT1               0x0002  /* IM_HP1R_DONE_EINT1 */
+#define ARIZONA_IM_HP1R_DONE_EINT1_MASK          0x0002  /* IM_HP1R_DONE_EINT1 */
+#define ARIZONA_IM_HP1R_DONE_EINT1_SHIFT              1  /* IM_HP1R_DONE_EINT1 */
+#define ARIZONA_IM_HP1R_DONE_EINT1_WIDTH              1  /* IM_HP1R_DONE_EINT1 */
+#define ARIZONA_IM_HP1L_DONE_EINT1               0x0001  /* IM_HP1L_DONE_EINT1 */
+#define ARIZONA_IM_HP1L_DONE_EINT1_MASK          0x0001  /* IM_HP1L_DONE_EINT1 */
+#define ARIZONA_IM_HP1L_DONE_EINT1_SHIFT              0  /* IM_HP1L_DONE_EINT1 */
+#define ARIZONA_IM_HP1L_DONE_EINT1_WIDTH              1  /* IM_HP1L_DONE_EINT1 */
 
 /*
  * R3340 (0xD0C) - Interrupt Status 5 Mask
@@ -5139,6 +5187,30 @@
 #define ARIZONA_ISRC2_CFG_ERR_EINT2_MASK         0x0040  /* ISRC2_CFG_ERR_EINT2 */
 #define ARIZONA_ISRC2_CFG_ERR_EINT2_SHIFT             6  /* ISRC2_CFG_ERR_EINT2 */
 #define ARIZONA_ISRC2_CFG_ERR_EINT2_WIDTH             1  /* ISRC2_CFG_ERR_EINT2 */
+#define ARIZONA_HP3R_DONE_EINT2                  0x0020  /* HP3R_DONE_EINT2 */
+#define ARIZONA_HP3R_DONE_EINT2_MASK             0x0020  /* HP3R_DONE_EINT2 */
+#define ARIZONA_HP3R_DONE_EINT2_SHIFT                 5  /* HP3R_DONE_EINT2 */
+#define ARIZONA_HP3R_DONE_EINT2_WIDTH                 1  /* HP3R_DONE_EINT2 */
+#define ARIZONA_HP3L_DONE_EINT2                  0x0010  /* HP3L_DONE_EINT2 */
+#define ARIZONA_HP3L_DONE_EINT2_MASK             0x0010  /* HP3L_DONE_EINT2 */
+#define ARIZONA_HP3L_DONE_EINT2_SHIFT                 4  /* HP3L_DONE_EINT2 */
+#define ARIZONA_HP3L_DONE_EINT2_WIDTH                 1  /* HP3L_DONE_EINT2 */
+#define ARIZONA_HP2R_DONE_EINT2                  0x0008  /* HP2R_DONE_EINT2 */
+#define ARIZONA_HP2R_DONE_EINT2_MASK             0x0008  /* HP2R_DONE_EINT2 */
+#define ARIZONA_HP2R_DONE_EINT2_SHIFT                 3  /* HP2R_DONE_EINT2 */
+#define ARIZONA_HP2R_DONE_EINT2_WIDTH                 1  /* HP2R_DONE_EINT2 */
+#define ARIZONA_HP2L_DONE_EINT2                  0x0004  /* HP2L_DONE_EINT2 */
+#define ARIZONA_HP2L_DONE_EINT2_MASK             0x0004  /* HP2L_DONE_EINT2 */
+#define ARIZONA_HP2L_DONE_EINT2_SHIFT                 2  /* HP2L_DONE_EINT2 */
+#define ARIZONA_HP2L_DONE_EINT2_WIDTH                 1  /* HP2L_DONE_EINT2 */
+#define ARIZONA_HP1R_DONE_EINT2                  0x0002  /* HP1R_DONE_EINT2 */
+#define ARIZONA_HP1R_DONE_EINT2_MASK             0x0002  /* HP1R_DONE_EINT2 */
+#define ARIZONA_HP1R_DONE_EINT2_SHIFT                 1  /* HP1R_DONE_EINT2 */
+#define ARIZONA_HP1R_DONE_EINT2_WIDTH                 1  /* HP1R_DONE_EINT2 */
+#define ARIZONA_HP1L_DONE_EINT2                  0x0001  /* HP1L_DONE_EINT2 */
+#define ARIZONA_HP1L_DONE_EINT2_MASK             0x0001  /* HP1L_DONE_EINT2 */
+#define ARIZONA_HP1L_DONE_EINT2_SHIFT                 0  /* HP1L_DONE_EINT2 */
+#define ARIZONA_HP1L_DONE_EINT2_WIDTH                 1  /* HP1L_DONE_EINT2 */
 
 /*
  * R3348 (0xD14) - IRQ2 Status 5
@@ -5307,6 +5379,30 @@
 #define ARIZONA_IM_ISRC2_CFG_ERR_EINT2_MASK      0x0040  /* IM_ISRC2_CFG_ERR_EINT2 */
 #define ARIZONA_IM_ISRC2_CFG_ERR_EINT2_SHIFT          6  /* IM_ISRC2_CFG_ERR_EINT2 */
 #define ARIZONA_IM_ISRC2_CFG_ERR_EINT2_WIDTH          1  /* IM_ISRC2_CFG_ERR_EINT2 */
+#define ARIZONA_IM_HP3R_DONE_EINT2               0x0020  /* IM_HP3R_DONE_EINT2 */
+#define ARIZONA_IM_HP3R_DONE_EINT2_MASK          0x0020  /* IM_HP3R_DONE_EINT2 */
+#define ARIZONA_IM_HP3R_DONE_EINT2_SHIFT              5  /* IM_HP3R_DONE_EINT2 */
+#define ARIZONA_IM_HP3R_DONE_EINT2_WIDTH              1  /* IM_HP3R_DONE_EINT2 */
+#define ARIZONA_IM_HP3L_DONE_EINT2               0x0010  /* IM_HP3L_DONE_EINT2 */
+#define ARIZONA_IM_HP3L_DONE_EINT2_MASK          0x0010  /* IM_HP3L_DONE_EINT2 */
+#define ARIZONA_IM_HP3L_DONE_EINT2_SHIFT              4  /* IM_HP3L_DONE_EINT2 */
+#define ARIZONA_IM_HP3L_DONE_EINT2_WIDTH              1  /* IM_HP3L_DONE_EINT2 */
+#define ARIZONA_IM_HP2R_DONE_EINT2               0x0008  /* IM_HP2R_DONE_EINT2 */
+#define ARIZONA_IM_HP2R_DONE_EINT2_MASK          0x0008  /* IM_HP2R_DONE_EINT2 */
+#define ARIZONA_IM_HP2R_DONE_EINT2_SHIFT              3  /* IM_HP2R_DONE_EINT2 */
+#define ARIZONA_IM_HP2R_DONE_EINT2_WIDTH              1  /* IM_HP2R_DONE_EINT2 */
+#define ARIZONA_IM_HP2L_DONE_EINT2               0x0004  /* IM_HP2L_DONE_EINT2 */
+#define ARIZONA_IM_HP2L_DONE_EINT2_MASK          0x0004  /* IM_HP2L_DONE_EINT2 */
+#define ARIZONA_IM_HP2L_DONE_EINT2_SHIFT              2  /* IM_HP2L_DONE_EINT2 */
+#define ARIZONA_IM_HP2L_DONE_EINT2_WIDTH              1  /* IM_HP2L_DONE_EINT2 */
+#define ARIZONA_IM_HP1R_DONE_EINT2               0x0002  /* IM_HP1R_DONE_EINT2 */
+#define ARIZONA_IM_HP1R_DONE_EINT2_MASK          0x0002  /* IM_HP1R_DONE_EINT2 */
+#define ARIZONA_IM_HP1R_DONE_EINT2_SHIFT              1  /* IM_HP1R_DONE_EINT2 */
+#define ARIZONA_IM_HP1R_DONE_EINT2_WIDTH              1  /* IM_HP1R_DONE_EINT2 */
+#define ARIZONA_IM_HP1L_DONE_EINT2               0x0001  /* IM_HP1L_DONE_EINT2 */
+#define ARIZONA_IM_HP1L_DONE_EINT2_MASK          0x0001  /* IM_HP1L_DONE_EINT2 */
+#define ARIZONA_IM_HP1L_DONE_EINT2_SHIFT              0  /* IM_HP1L_DONE_EINT2 */
+#define ARIZONA_IM_HP1L_DONE_EINT2_WIDTH              1  /* IM_HP1L_DONE_EINT2 */
 
 /*
  * R3356 (0xD1C) - IRQ2 Status 5 Mask
@@ -5464,6 +5560,30 @@
 #define ARIZONA_ISRC2_CFG_ERR_STS_MASK           0x0040  /* ISRC2_CFG_ERR_STS */
 #define ARIZONA_ISRC2_CFG_ERR_STS_SHIFT               6  /* ISRC2_CFG_ERR_STS */
 #define ARIZONA_ISRC2_CFG_ERR_STS_WIDTH               1  /* ISRC2_CFG_ERR_STS */
+#define ARIZONA_HP3R_DONE_STS                    0x0020  /* HP3R_DONE_STS */
+#define ARIZONA_HP3R_DONE_STS_MASK               0x0020  /* HP3R_DONE_STS */
+#define ARIZONA_HP3R_DONE_STS_SHIFT                   5  /* HP3R_DONE_STS */
+#define ARIZONA_HP3R_DONE_STS_WIDTH                   1  /* HP3R_DONE_STS */
+#define ARIZONA_HP3L_DONE_STS                    0x0010  /* HP3L_DONE_STS */
+#define ARIZONA_HP3L_DONE_STS_MASK               0x0010  /* HP3L_DONE_STS */
+#define ARIZONA_HP3L_DONE_STS_SHIFT                   4  /* HP3L_DONE_STS */
+#define ARIZONA_HP3L_DONE_STS_WIDTH                   1  /* HP3L_DONE_STS */
+#define ARIZONA_HP2R_DONE_STS                    0x0008  /* HP2R_DONE_STS */
+#define ARIZONA_HP2R_DONE_STS_MASK               0x0008  /* HP2R_DONE_STS */
+#define ARIZONA_HP2R_DONE_STS_SHIFT                   3  /* HP2R_DONE_STS */
+#define ARIZONA_HP2R_DONE_STS_WIDTH                   1  /* HP2R_DONE_STS */
+#define ARIZONA_HP2L_DONE_STS                    0x0004  /* HP2L_DONE_STS */
+#define ARIZONA_HP2L_DONE_STS_MASK               0x0004  /* HP2L_DONE_STS */
+#define ARIZONA_HP2L_DONE_STS_SHIFT                   2  /* HP2L_DONE_STS */
+#define ARIZONA_HP2L_DONE_STS_WIDTH                   1  /* HP2L_DONE_STS */
+#define ARIZONA_HP1R_DONE_STS                    0x0002  /* HP1R_DONE_STS */
+#define ARIZONA_HP1R_DONE_STS_MASK               0x0002  /* HP1R_DONE_STS */
+#define ARIZONA_HP1R_DONE_STS_SHIFT                   1  /* HP1R_DONE_STS */
+#define ARIZONA_HP1R_DONE_STS_WIDTH                   1  /* HP1R_DONE_STS */
+#define ARIZONA_HP1L_DONE_STS                    0x0001  /* HP1L_DONE_STS */
+#define ARIZONA_HP1L_DONE_STS_MASK               0x0001  /* HP1L_DONE_STS */
+#define ARIZONA_HP1L_DONE_STS_SHIFT                   0  /* HP1L_DONE_STS */
+#define ARIZONA_HP1L_DONE_STS_WIDTH                   1  /* HP1L_DONE_STS */
 
 /*
  * R3363 (0xD23) - Interrupt Raw Status 5
-- 
cgit v1.2.3-59-g8ed1b


From c0fe2c5b3f730e3d56d37f7b731a5b1191a4e8bf Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Date: Tue, 15 Jul 2014 11:21:47 +0100
Subject: mfd: arizona: Rename thermal shutdown interrupt

Newer versions of the IP introduce short circuit protection which will
also shutdown the speaker. Rename the interrupt and associated register
bits associated with thermal events to better fit the function and avoid
conflict with future interrupt additions.

Signed-off-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/wm5102-tables.c           |  8 ++--
 drivers/mfd/wm5110-tables.c           |  8 ++--
 drivers/mfd/wm8997-tables.c           |  8 ++--
 include/linux/mfd/arizona/core.h      |  4 +-
 include/linux/mfd/arizona/registers.h | 80 +++++++++++++++++------------------
 sound/soc/codecs/arizona.c            | 10 ++---
 6 files changed, 59 insertions(+), 59 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm5102-tables.c b/drivers/mfd/wm5102-tables.c
index c8a993bd17ae..fb4d4bb0f47d 100644
--- a/drivers/mfd/wm5102-tables.c
+++ b/drivers/mfd/wm5102-tables.c
@@ -138,11 +138,11 @@ static const struct regmap_irq wm5102_irqs[ARIZONA_NUM_IRQ] = {
 		.reg_offset = 1, .mask = ARIZONA_DSP_IRQ1_EINT1
 	},
 
-	[ARIZONA_IRQ_SPK_SHUTDOWN_WARN] = {
-		.reg_offset = 2, .mask = ARIZONA_SPK_SHUTDOWN_WARN_EINT1
+	[ARIZONA_IRQ_SPK_OVERHEAT_WARN] = {
+		.reg_offset = 2, .mask = ARIZONA_SPK_OVERHEAT_WARN_EINT1
 	},
-	[ARIZONA_IRQ_SPK_SHUTDOWN] = {
-		.reg_offset = 2, .mask = ARIZONA_SPK_SHUTDOWN_EINT1
+	[ARIZONA_IRQ_SPK_OVERHEAT] = {
+		.reg_offset = 2, .mask = ARIZONA_SPK_OVERHEAT_EINT1
 	},
 	[ARIZONA_IRQ_HPDET] = {
 		.reg_offset = 2, .mask = ARIZONA_HPDET_EINT1
diff --git a/drivers/mfd/wm5110-tables.c b/drivers/mfd/wm5110-tables.c
index 4729007055d8..2822768f2df1 100644
--- a/drivers/mfd/wm5110-tables.c
+++ b/drivers/mfd/wm5110-tables.c
@@ -340,11 +340,11 @@ static const struct regmap_irq wm5110_irqs[ARIZONA_NUM_IRQ] = {
 		.reg_offset = 1, .mask = ARIZONA_DSP_IRQ1_EINT1
 	},
 
-	[ARIZONA_IRQ_SPK_SHUTDOWN_WARN] = {
-		.reg_offset = 2, .mask = ARIZONA_SPK_SHUTDOWN_WARN_EINT1
+	[ARIZONA_IRQ_SPK_OVERHEAT_WARN] = {
+		.reg_offset = 2, .mask = ARIZONA_SPK_OVERHEAT_WARN_EINT1
 	},
-	[ARIZONA_IRQ_SPK_SHUTDOWN] = {
-		.reg_offset = 2, .mask = ARIZONA_SPK_SHUTDOWN_EINT1
+	[ARIZONA_IRQ_SPK_OVERHEAT] = {
+		.reg_offset = 2, .mask = ARIZONA_SPK_OVERHEAT_EINT1
 	},
 	[ARIZONA_IRQ_HPDET] = {
 		.reg_offset = 2, .mask = ARIZONA_HPDET_EINT1
diff --git a/drivers/mfd/wm8997-tables.c b/drivers/mfd/wm8997-tables.c
index 04bc6d5ff285..510da3b52324 100644
--- a/drivers/mfd/wm8997-tables.c
+++ b/drivers/mfd/wm8997-tables.c
@@ -65,11 +65,11 @@ static const struct regmap_irq wm8997_irqs[ARIZONA_NUM_IRQ] = {
 	[ARIZONA_IRQ_GP2] = { .reg_offset = 0, .mask = ARIZONA_GP2_EINT1 },
 	[ARIZONA_IRQ_GP1] = { .reg_offset = 0, .mask = ARIZONA_GP1_EINT1 },
 
-	[ARIZONA_IRQ_SPK_SHUTDOWN_WARN] = {
-		.reg_offset = 2, .mask = ARIZONA_SPK_SHUTDOWN_WARN_EINT1
+	[ARIZONA_IRQ_SPK_OVERHEAT_WARN] = {
+		.reg_offset = 2, .mask = ARIZONA_SPK_OVERHEAT_WARN_EINT1
 	},
-	[ARIZONA_IRQ_SPK_SHUTDOWN] = {
-		.reg_offset = 2, .mask = ARIZONA_SPK_SHUTDOWN_EINT1
+	[ARIZONA_IRQ_SPK_OVERHEAT] = {
+		.reg_offset = 2, .mask = ARIZONA_SPK_OVERHEAT_EINT1
 	},
 	[ARIZONA_IRQ_HPDET] = {
 		.reg_offset = 2, .mask = ARIZONA_HPDET_EINT1
diff --git a/include/linux/mfd/arizona/core.h b/include/linux/mfd/arizona/core.h
index 6a62fc99f399..819edf5d1edf 100644
--- a/include/linux/mfd/arizona/core.h
+++ b/include/linux/mfd/arizona/core.h
@@ -46,8 +46,8 @@ enum arizona_type {
 #define ARIZONA_IRQ_DSP_IRQ6              17
 #define ARIZONA_IRQ_DSP_IRQ7              18
 #define ARIZONA_IRQ_DSP_IRQ8              19
-#define ARIZONA_IRQ_SPK_SHUTDOWN_WARN     20
-#define ARIZONA_IRQ_SPK_SHUTDOWN          21
+#define ARIZONA_IRQ_SPK_OVERHEAT_WARN     20
+#define ARIZONA_IRQ_SPK_OVERHEAT          21
 #define ARIZONA_IRQ_MICDET                22
 #define ARIZONA_IRQ_HPDET                 23
 #define ARIZONA_IRQ_WSEQ_DONE             24
diff --git a/include/linux/mfd/arizona/registers.h b/include/linux/mfd/arizona/registers.h
index df93563ea8c5..f7d6f9e91da1 100644
--- a/include/linux/mfd/arizona/registers.h
+++ b/include/linux/mfd/arizona/registers.h
@@ -4691,14 +4691,14 @@
 /*
  * R3330 (0xD02) - Interrupt Status 3
  */
-#define ARIZONA_SPK_SHUTDOWN_WARN_EINT1          0x8000  /* SPK_SHUTDOWN_WARN_EINT1 */
-#define ARIZONA_SPK_SHUTDOWN_WARN_EINT1_MASK     0x8000  /* SPK_SHUTDOWN_WARN_EINT1 */
-#define ARIZONA_SPK_SHUTDOWN_WARN_EINT1_SHIFT        15  /* SPK_SHUTDOWN_WARN_EINT1 */
-#define ARIZONA_SPK_SHUTDOWN_WARN_EINT1_WIDTH         1  /* SPK_SHUTDOWN_WARN_EINT1 */
-#define ARIZONA_SPK_SHUTDOWN_EINT1               0x4000  /* SPK_SHUTDOWN_EINT1 */
-#define ARIZONA_SPK_SHUTDOWN_EINT1_MASK          0x4000  /* SPK_SHUTDOWN_EINT1 */
-#define ARIZONA_SPK_SHUTDOWN_EINT1_SHIFT             14  /* SPK_SHUTDOWN_EINT1 */
-#define ARIZONA_SPK_SHUTDOWN_EINT1_WIDTH              1  /* SPK_SHUTDOWN_EINT1 */
+#define ARIZONA_SPK_OVERHEAT_WARN_EINT1          0x8000  /* SPK_OVERHEAT_WARN_EINT1 */
+#define ARIZONA_SPK_OVERHEAT_WARN_EINT1_MASK     0x8000  /* SPK_OVERHEAD_WARN_EINT1 */
+#define ARIZONA_SPK_OVERHEAT_WARN_EINT1_SHIFT        15  /* SPK_OVERHEAT_WARN_EINT1 */
+#define ARIZONA_SPK_OVERHEAT_WARN_EINT1_WIDTH         1  /* SPK_OVERHEAT_WARN_EINT1 */
+#define ARIZONA_SPK_OVERHEAT_EINT1               0x4000  /* SPK_OVERHEAT_EINT1 */
+#define ARIZONA_SPK_OVERHEAT_EINT1_MASK          0x4000  /* SPK_OVERHEAT_EINT1 */
+#define ARIZONA_SPK_OVERHEAT_EINT1_SHIFT             14  /* SPK_OVERHEAT_EINT1 */
+#define ARIZONA_SPK_OVERHEAT_EINT1_WIDTH              1  /* SPK_OVERHEAT_EINT1 */
 #define ARIZONA_HPDET_EINT1                      0x2000  /* HPDET_EINT1 */
 #define ARIZONA_HPDET_EINT1_MASK                 0x2000  /* HPDET_EINT1 */
 #define ARIZONA_HPDET_EINT1_SHIFT                    13  /* HPDET_EINT1 */
@@ -4883,14 +4883,14 @@
 /*
  * R3338 (0xD0A) - Interrupt Status 3 Mask
  */
-#define ARIZONA_IM_SPK_SHUTDOWN_WARN_EINT1       0x8000  /* IM_SPK_SHUTDOWN_WARN_EINT1 */
-#define ARIZONA_IM_SPK_SHUTDOWN_WARN_EINT1_MASK  0x8000  /* IM_SPK_SHUTDOWN_WARN_EINT1 */
-#define ARIZONA_IM_SPK_SHUTDOWN_WARN_EINT1_SHIFT     15  /* IM_SPK_SHUTDOWN_WARN_EINT1 */
-#define ARIZONA_IM_SPK_SHUTDOWN_WARN_EINT1_WIDTH      1  /* IM_SPK_SHUTDOWN_WARN_EINT1 */
-#define ARIZONA_IM_SPK_SHUTDOWN_EINT1            0x4000  /* IM_SPK_SHUTDOWN_EINT1 */
-#define ARIZONA_IM_SPK_SHUTDOWN_EINT1_MASK       0x4000  /* IM_SPK_SHUTDOWN_EINT1 */
-#define ARIZONA_IM_SPK_SHUTDOWN_EINT1_SHIFT          14  /* IM_SPK_SHUTDOWN_EINT1 */
-#define ARIZONA_IM_SPK_SHUTDOWN_EINT1_WIDTH           1  /* IM_SPK_SHUTDOWN_EINT1 */
+#define ARIZONA_IM_SPK_OVERHEAT_WARN_EINT1       0x8000  /* IM_SPK_OVERHEAT_WARN_EINT1 */
+#define ARIZONA_IM_SPK_OVERHEAT_WARN_EINT1_MASK  0x8000  /* IM_SPK_OVERHEAT_WARN_EINT1 */
+#define ARIZONA_IM_SPK_OVERHEAT_WARN_EINT1_SHIFT     15  /* IM_SPK_OVERHEAT_WARN_EINT1 */
+#define ARIZONA_IM_SPK_OVERHEAT_WARN_EINT1_WIDTH      1  /* IM_SPK_OVERHEAT_WARN_EINT1 */
+#define ARIZONA_IM_SPK_OVERHEAT_EINT1            0x4000  /* IM_SPK_OVERHEAT_EINT1 */
+#define ARIZONA_IM_SPK_OVERHEAT_EINT1_MASK       0x4000  /* IM_SPK_OVERHEAT_EINT1 */
+#define ARIZONA_IM_SPK_OVERHEAT_EINT1_SHIFT          14  /* IM_SPK_OVERHEAT_EINT1 */
+#define ARIZONA_IM_SPK_OVERHEAT_EINT1_WIDTH           1  /* IM_SPK_OVERHEAT_EINT1 */
 #define ARIZONA_IM_HPDET_EINT1                   0x2000  /* IM_HPDET_EINT1 */
 #define ARIZONA_IM_HPDET_EINT1_MASK              0x2000  /* IM_HPDET_EINT1 */
 #define ARIZONA_IM_HPDET_EINT1_SHIFT                 13  /* IM_HPDET_EINT1 */
@@ -5083,14 +5083,14 @@
 /*
  * R3346 (0xD12) - IRQ2 Status 3
  */
-#define ARIZONA_SPK_SHUTDOWN_WARN_EINT2          0x8000  /* SPK_SHUTDOWN_WARN_EINT2 */
-#define ARIZONA_SPK_SHUTDOWN_WARN_EINT2_MASK     0x8000  /* SPK_SHUTDOWN_WARN_EINT2 */
-#define ARIZONA_SPK_SHUTDOWN_WARN_EINT2_SHIFT        15  /* SPK_SHUTDOWN_WARN_EINT2 */
-#define ARIZONA_SPK_SHUTDOWN_WARN_EINT2_WIDTH         1  /* SPK_SHUTDOWN_WARN_EINT2 */
-#define ARIZONA_SPK_SHUTDOWN_EINT2               0x4000  /* SPK_SHUTDOWN_EINT2 */
-#define ARIZONA_SPK_SHUTDOWN_EINT2_MASK          0x4000  /* SPK_SHUTDOWN_EINT2 */
-#define ARIZONA_SPK_SHUTDOWN_EINT2_SHIFT             14  /* SPK_SHUTDOWN_EINT2 */
-#define ARIZONA_SPK_SHUTDOWN_EINT2_WIDTH              1  /* SPK_SHUTDOWN_EINT2 */
+#define ARIZONA_SPK_OVERHEAT_WARN_EINT2          0x8000  /* SPK_OVERHEAT_WARN_EINT2 */
+#define ARIZONA_SPK_OVERHEAT_WARN_EINT2_MASK     0x8000  /* SPK_OVERHEAT_WARN_EINT2 */
+#define ARIZONA_SPK_OVERHEAT_WARN_EINT2_SHIFT        15  /* SPK_OVERHEAT_WARN_EINT2 */
+#define ARIZONA_SPK_OVERHEAT_WARN_EINT2_WIDTH         1  /* SPK_OVERHEAT_WARN_EINT2 */
+#define ARIZONA_SPK_OVERHEAT_EINT2               0x4000  /* SPK_OVERHEAT_EINT2 */
+#define ARIZONA_SPK_OVERHEAT_EINT2_MASK          0x4000  /* SPK_OVERHEAT_EINT2 */
+#define ARIZONA_SPK_OVERHEAT_EINT2_SHIFT             14  /* SPK_OVERHEAT_EINT2 */
+#define ARIZONA_SPK_OVERHEAT_EINT2_WIDTH              1  /* SPK_OVERHEAT_EINT2 */
 #define ARIZONA_HPDET_EINT2                      0x2000  /* HPDET_EINT2 */
 #define ARIZONA_HPDET_EINT2_MASK                 0x2000  /* HPDET_EINT2 */
 #define ARIZONA_HPDET_EINT2_SHIFT                    13  /* HPDET_EINT2 */
@@ -5275,14 +5275,14 @@
 /*
  * R3354 (0xD1A) - IRQ2 Status 3 Mask
  */
-#define ARIZONA_IM_SPK_SHUTDOWN_WARN_EINT2       0x8000  /* IM_SPK_SHUTDOWN_WARN_EINT2 */
-#define ARIZONA_IM_SPK_SHUTDOWN_WARN_EINT2_MASK  0x8000  /* IM_SPK_SHUTDOWN_WARN_EINT2 */
-#define ARIZONA_IM_SPK_SHUTDOWN_WARN_EINT2_SHIFT     15  /* IM_SPK_SHUTDOWN_WARN_EINT2 */
-#define ARIZONA_IM_SPK_SHUTDOWN_WARN_EINT2_WIDTH      1  /* IM_SPK_SHUTDOWN_WARN_EINT2 */
-#define ARIZONA_IM_SPK_SHUTDOWN_EINT2            0x4000  /* IM_SPK_SHUTDOWN_EINT2 */
-#define ARIZONA_IM_SPK_SHUTDOWN_EINT2_MASK       0x4000  /* IM_SPK_SHUTDOWN_EINT2 */
-#define ARIZONA_IM_SPK_SHUTDOWN_EINT2_SHIFT          14  /* IM_SPK_SHUTDOWN_EINT2 */
-#define ARIZONA_IM_SPK_SHUTDOWN_EINT2_WIDTH           1  /* IM_SPK_SHUTDOWN_EINT2 */
+#define ARIZONA_IM_SPK_OVERHEAT_WARN_EINT2       0x8000  /* IM_SPK_OVERHEAT_WARN_EINT2 */
+#define ARIZONA_IM_SPK_OVERHEAT_WARN_EINT2_MASK  0x8000  /* IM_SPK_OVERHEAT_WARN_EINT2 */
+#define ARIZONA_IM_SPK_OVERHEAT_WARN_EINT2_SHIFT     15  /* IM_SPK_OVERHEAT_WARN_EINT2 */
+#define ARIZONA_IM_SPK_OVERHEAT_WARN_EINT2_WIDTH      1  /* IM_SPK_OVERHEAT_WARN_EINT2 */
+#define ARIZONA_IM_SPK_OVERHEAT_EINT2            0x4000  /* IM_SPK_OVERHEAT_EINT2 */
+#define ARIZONA_IM_SPK_OVERHEAT_EINT2_MASK       0x4000  /* IM_SPK_OVERHEAT_EINT2 */
+#define ARIZONA_IM_SPK_OVERHEAT_EINT2_SHIFT          14  /* IM_SPK_OVERHEAT_EINT2 */
+#define ARIZONA_IM_SPK_OVERHEAT_EINT2_WIDTH           1  /* IM_SPK_OVERHEAT_EINT2 */
 #define ARIZONA_IM_HPDET_EINT2                   0x2000  /* IM_HPDET_EINT2 */
 #define ARIZONA_IM_HPDET_EINT2_MASK              0x2000  /* IM_HPDET_EINT2 */
 #define ARIZONA_IM_HPDET_EINT2_SHIFT                 13  /* IM_HPDET_EINT2 */
@@ -5456,14 +5456,14 @@
 /*
  * R3361 (0xD21) - Interrupt Raw Status 3
  */
-#define ARIZONA_SPK_SHUTDOWN_WARN_STS            0x8000  /* SPK_SHUTDOWN_WARN_STS */
-#define ARIZONA_SPK_SHUTDOWN_WARN_STS_MASK       0x8000  /* SPK_SHUTDOWN_WARN_STS */
-#define ARIZONA_SPK_SHUTDOWN_WARN_STS_SHIFT          15  /* SPK_SHUTDOWN_WARN_STS */
-#define ARIZONA_SPK_SHUTDOWN_WARN_STS_WIDTH           1  /* SPK_SHUTDOWN_WARN_STS */
-#define ARIZONA_SPK_SHUTDOWN_STS                 0x4000  /* SPK_SHUTDOWN_STS */
-#define ARIZONA_SPK_SHUTDOWN_STS_MASK            0x4000  /* SPK_SHUTDOWN_STS */
-#define ARIZONA_SPK_SHUTDOWN_STS_SHIFT               14  /* SPK_SHUTDOWN_STS */
-#define ARIZONA_SPK_SHUTDOWN_STS_WIDTH                1  /* SPK_SHUTDOWN_STS */
+#define ARIZONA_SPK_OVERHEAT_WARN_STS            0x8000  /* SPK_OVERHEAT_WARN_STS */
+#define ARIZONA_SPK_OVERHEAT_WARN_STS_MASK       0x8000  /* SPK_OVERHEAT_WARN_STS */
+#define ARIZONA_SPK_OVERHEAT_WARN_STS_SHIFT          15  /* SPK_OVERHEAT_WARN_STS */
+#define ARIZONA_SPK_OVERHEAT_WARN_STS_WIDTH           1  /* SPK_OVERHEAT_WARN_STS */
+#define ARIZONA_SPK_OVERHEAT_STS                 0x4000  /* SPK_OVERHEAT_STS */
+#define ARIZONA_SPK_OVERHEAT_STS_MASK            0x4000  /* SPK_OVERHEAT_STS */
+#define ARIZONA_SPK_OVERHEAT_STS_SHIFT               14  /* SPK_OVERHEAT_STS */
+#define ARIZONA_SPK_OVERHEAT_STS_WIDTH                1  /* SPK_OVERHEAT_STS */
 #define ARIZONA_HPDET_STS                        0x2000  /* HPDET_STS */
 #define ARIZONA_HPDET_STS_MASK                   0x2000  /* HPDET_STS */
 #define ARIZONA_HPDET_STS_SHIFT                      13  /* HPDET_STS */
diff --git a/sound/soc/codecs/arizona.c b/sound/soc/codecs/arizona.c
index 29e198f57d4c..1b14105d8da3 100644
--- a/sound/soc/codecs/arizona.c
+++ b/sound/soc/codecs/arizona.c
@@ -107,7 +107,7 @@ static int arizona_spk_ev(struct snd_soc_dapm_widget *w,
 		break;
 	case SND_SOC_DAPM_POST_PMU:
 		val = snd_soc_read(codec, ARIZONA_INTERRUPT_RAW_STATUS_3);
-		if (val & ARIZONA_SPK_SHUTDOWN_STS) {
+		if (val & ARIZONA_SPK_OVERHEAT_STS) {
 			dev_crit(arizona->dev,
 				 "Speaker not enabled due to temperature\n");
 			return -EBUSY;
@@ -159,7 +159,7 @@ static irqreturn_t arizona_thermal_warn(int irq, void *data)
 	if (ret != 0) {
 		dev_err(arizona->dev, "Failed to read thermal status: %d\n",
 			ret);
-	} else if (val & ARIZONA_SPK_SHUTDOWN_WARN_STS) {
+	} else if (val & ARIZONA_SPK_OVERHEAT_WARN_STS) {
 		dev_crit(arizona->dev, "Thermal warning\n");
 	}
 
@@ -177,7 +177,7 @@ static irqreturn_t arizona_thermal_shutdown(int irq, void *data)
 	if (ret != 0) {
 		dev_err(arizona->dev, "Failed to read thermal status: %d\n",
 			ret);
-	} else if (val & ARIZONA_SPK_SHUTDOWN_STS) {
+	} else if (val & ARIZONA_SPK_OVERHEAT_STS) {
 		dev_crit(arizona->dev, "Thermal shutdown\n");
 		ret = regmap_update_bits(arizona->regmap,
 					 ARIZONA_OUTPUT_ENABLES_1,
@@ -223,7 +223,7 @@ int arizona_init_spk(struct snd_soc_codec *codec)
 		break;
 	}
 
-	ret = arizona_request_irq(arizona, ARIZONA_IRQ_SPK_SHUTDOWN_WARN,
+	ret = arizona_request_irq(arizona, ARIZONA_IRQ_SPK_OVERHEAT_WARN,
 				  "Thermal warning", arizona_thermal_warn,
 				  arizona);
 	if (ret != 0)
@@ -231,7 +231,7 @@ int arizona_init_spk(struct snd_soc_codec *codec)
 			"Failed to get thermal warning IRQ: %d\n",
 			ret);
 
-	ret = arizona_request_irq(arizona, ARIZONA_IRQ_SPK_SHUTDOWN,
+	ret = arizona_request_irq(arizona, ARIZONA_IRQ_SPK_OVERHEAT,
 				  "Thermal shutdown", arizona_thermal_shutdown,
 				  arizona);
 	if (ret != 0)
-- 
cgit v1.2.3-59-g8ed1b


From 3215501fc90e109c7b854423e02eb05bc638b555 Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Date: Tue, 15 Jul 2014 11:21:48 +0100
Subject: mfd: wm5110: Add new interrupt register definitions

Newer versions of the IP have a lot of new interrupts and move several
existing interrupts. This patch adds the register definitions and regmap
hookup for these interrupts.

Signed-off-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/arizona-irq.c             |  10 +-
 drivers/mfd/arizona.h                 |   1 +
 drivers/mfd/wm5110-tables.c           | 213 +++++++++++++
 include/linux/mfd/arizona/core.h      |  21 +-
 include/linux/mfd/arizona/registers.h | 585 ++++++++++++++++++++++++++++++++++
 5 files changed, 827 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/arizona-irq.c b/drivers/mfd/arizona-irq.c
index 17102f589100..e780bc40165d 100644
--- a/drivers/mfd/arizona-irq.c
+++ b/drivers/mfd/arizona-irq.c
@@ -203,7 +203,15 @@ int arizona_irq_init(struct arizona *arizona)
 #ifdef CONFIG_MFD_WM5110
 	case WM5110:
 		aod = &wm5110_aod;
-		irq = &wm5110_irq;
+
+		switch (arizona->rev) {
+		case 0 ... 2:
+			irq = &wm5110_irq;
+			break;
+		default:
+			irq = &wm5110_revd_irq;
+			break;
+		}
 
 		ctrlif_error = false;
 		break;
diff --git a/drivers/mfd/arizona.h b/drivers/mfd/arizona.h
index 2951498ab9a1..fbe2843271c5 100644
--- a/drivers/mfd/arizona.h
+++ b/drivers/mfd/arizona.h
@@ -36,6 +36,7 @@ extern const struct regmap_irq_chip wm5102_irq;
 
 extern const struct regmap_irq_chip wm5110_aod;
 extern const struct regmap_irq_chip wm5110_irq;
+extern const struct regmap_irq_chip wm5110_revd_irq;
 
 extern const struct regmap_irq_chip wm8997_aod;
 extern const struct regmap_irq_chip wm8997_irq;
diff --git a/drivers/mfd/wm5110-tables.c b/drivers/mfd/wm5110-tables.c
index 2822768f2df1..9b98ee559188 100644
--- a/drivers/mfd/wm5110-tables.c
+++ b/drivers/mfd/wm5110-tables.c
@@ -457,6 +457,209 @@ const struct regmap_irq_chip wm5110_irq = {
 };
 EXPORT_SYMBOL_GPL(wm5110_irq);
 
+static const struct regmap_irq wm5110_revd_irqs[ARIZONA_NUM_IRQ] = {
+	[ARIZONA_IRQ_GP4] = { .reg_offset = 0, .mask = ARIZONA_GP4_EINT1 },
+	[ARIZONA_IRQ_GP3] = { .reg_offset = 0, .mask = ARIZONA_GP3_EINT1 },
+	[ARIZONA_IRQ_GP2] = { .reg_offset = 0, .mask = ARIZONA_GP2_EINT1 },
+	[ARIZONA_IRQ_GP1] = { .reg_offset = 0, .mask = ARIZONA_GP1_EINT1 },
+
+	[ARIZONA_IRQ_DSP4_RAM_RDY] = {
+		.reg_offset = 1, .mask = ARIZONA_DSP4_RAM_RDY_EINT1
+	},
+	[ARIZONA_IRQ_DSP3_RAM_RDY] = {
+		.reg_offset = 1, .mask = ARIZONA_DSP3_RAM_RDY_EINT1
+	},
+	[ARIZONA_IRQ_DSP2_RAM_RDY] = {
+		.reg_offset = 1, .mask = ARIZONA_DSP2_RAM_RDY_EINT1
+	},
+	[ARIZONA_IRQ_DSP1_RAM_RDY] = {
+		.reg_offset = 1, .mask = ARIZONA_DSP1_RAM_RDY_EINT1
+	},
+	[ARIZONA_IRQ_DSP_IRQ8] = {
+		.reg_offset = 1, .mask = ARIZONA_DSP_IRQ8_EINT1
+	},
+	[ARIZONA_IRQ_DSP_IRQ7] = {
+		.reg_offset = 1, .mask = ARIZONA_DSP_IRQ7_EINT1
+	},
+	[ARIZONA_IRQ_DSP_IRQ6] = {
+		.reg_offset = 1, .mask = ARIZONA_DSP_IRQ6_EINT1
+	},
+	[ARIZONA_IRQ_DSP_IRQ5] = {
+		.reg_offset = 1, .mask = ARIZONA_DSP_IRQ5_EINT1
+	},
+	[ARIZONA_IRQ_DSP_IRQ4] = {
+		.reg_offset = 1, .mask = ARIZONA_DSP_IRQ4_EINT1
+	},
+	[ARIZONA_IRQ_DSP_IRQ3] = {
+		.reg_offset = 1, .mask = ARIZONA_DSP_IRQ3_EINT1
+	},
+	[ARIZONA_IRQ_DSP_IRQ2] = {
+		.reg_offset = 1, .mask = ARIZONA_DSP_IRQ2_EINT1
+	},
+	[ARIZONA_IRQ_DSP_IRQ1] = {
+		.reg_offset = 1, .mask = ARIZONA_DSP_IRQ1_EINT1
+	},
+
+	[ARIZONA_IRQ_SPK_OVERHEAT_WARN] = {
+		.reg_offset = 2, .mask = ARIZONA_SPK_OVERHEAT_WARN_EINT1
+	},
+	[ARIZONA_IRQ_SPK_OVERHEAT] = {
+		.reg_offset = 2, .mask = ARIZONA_SPK_OVERHEAT_EINT1
+	},
+	[ARIZONA_IRQ_HPDET] = {
+		.reg_offset = 2, .mask = ARIZONA_HPDET_EINT1
+	},
+	[ARIZONA_IRQ_MICDET] = {
+		.reg_offset = 2, .mask = ARIZONA_MICDET_EINT1
+	},
+	[ARIZONA_IRQ_WSEQ_DONE] = {
+		.reg_offset = 2, .mask = ARIZONA_WSEQ_DONE_EINT1
+	},
+	[ARIZONA_IRQ_DRC2_SIG_DET] = {
+		.reg_offset = 2, .mask = ARIZONA_DRC2_SIG_DET_EINT1
+	},
+	[ARIZONA_IRQ_DRC1_SIG_DET] = {
+		.reg_offset = 2, .mask = ARIZONA_DRC1_SIG_DET_EINT1
+	},
+	[ARIZONA_IRQ_ASRC2_LOCK] = {
+		.reg_offset = 2, .mask = ARIZONA_ASRC2_LOCK_EINT1
+	},
+	[ARIZONA_IRQ_ASRC1_LOCK] = {
+		.reg_offset = 2, .mask = ARIZONA_ASRC1_LOCK_EINT1
+	},
+	[ARIZONA_IRQ_UNDERCLOCKED] = {
+		.reg_offset = 2, .mask = ARIZONA_UNDERCLOCKED_EINT1
+	},
+	[ARIZONA_IRQ_OVERCLOCKED] = {
+		.reg_offset = 2, .mask = ARIZONA_OVERCLOCKED_EINT1
+	},
+	[ARIZONA_IRQ_FLL2_LOCK] = {
+		.reg_offset = 2, .mask = ARIZONA_FLL2_LOCK_EINT1
+	},
+	[ARIZONA_IRQ_FLL1_LOCK] = {
+		.reg_offset = 2, .mask = ARIZONA_FLL1_LOCK_EINT1
+	},
+	[ARIZONA_IRQ_CLKGEN_ERR] = {
+		.reg_offset = 2, .mask = ARIZONA_CLKGEN_ERR_EINT1
+	},
+	[ARIZONA_IRQ_CLKGEN_ERR_ASYNC] = {
+		.reg_offset = 2, .mask = ARIZONA_CLKGEN_ERR_ASYNC_EINT1
+	},
+
+	[ARIZONA_IRQ_CTRLIF_ERR] = {
+		.reg_offset = 3, .mask = ARIZONA_V2_CTRLIF_ERR_EINT1
+	},
+	[ARIZONA_IRQ_MIXER_DROPPED_SAMPLES] = {
+		.reg_offset = 3, .mask = ARIZONA_V2_MIXER_DROPPED_SAMPLE_EINT1
+	},
+	[ARIZONA_IRQ_ASYNC_CLK_ENA_LOW] = {
+		.reg_offset = 3, .mask = ARIZONA_V2_ASYNC_CLK_ENA_LOW_EINT1
+	},
+	[ARIZONA_IRQ_SYSCLK_ENA_LOW] = {
+		.reg_offset = 3, .mask = ARIZONA_V2_SYSCLK_ENA_LOW_EINT1
+	},
+	[ARIZONA_IRQ_ISRC1_CFG_ERR] = {
+		.reg_offset = 3, .mask = ARIZONA_V2_ISRC1_CFG_ERR_EINT1
+	},
+	[ARIZONA_IRQ_ISRC2_CFG_ERR] = {
+		.reg_offset = 3, .mask = ARIZONA_V2_ISRC2_CFG_ERR_EINT1
+	},
+	[ARIZONA_IRQ_ISRC3_CFG_ERR] = {
+		.reg_offset = 3, .mask = ARIZONA_V2_ISRC3_CFG_ERR_EINT1
+	},
+	[ARIZONA_IRQ_HP3R_DONE] = {
+		.reg_offset = 3, .mask = ARIZONA_HP3R_DONE_EINT1
+	},
+	[ARIZONA_IRQ_HP3L_DONE] = {
+		.reg_offset = 3, .mask = ARIZONA_HP3L_DONE_EINT1
+	},
+	[ARIZONA_IRQ_HP2R_DONE] = {
+		.reg_offset = 3, .mask = ARIZONA_HP2R_DONE_EINT1
+	},
+	[ARIZONA_IRQ_HP2L_DONE] = {
+		.reg_offset = 3, .mask = ARIZONA_HP2L_DONE_EINT1
+	},
+	[ARIZONA_IRQ_HP1R_DONE] = {
+		.reg_offset = 3, .mask = ARIZONA_HP1R_DONE_EINT1
+	},
+	[ARIZONA_IRQ_HP1L_DONE] = {
+		.reg_offset = 3, .mask = ARIZONA_HP1L_DONE_EINT1
+	},
+
+	[ARIZONA_IRQ_BOOT_DONE] = {
+		.reg_offset = 4, .mask = ARIZONA_BOOT_DONE_EINT1
+	},
+	[ARIZONA_IRQ_ASRC_CFG_ERR] = {
+		.reg_offset = 4, .mask = ARIZONA_V2_ASRC_CFG_ERR_EINT1
+	},
+	[ARIZONA_IRQ_FLL2_CLOCK_OK] = {
+		.reg_offset = 4, .mask = ARIZONA_FLL2_CLOCK_OK_EINT1
+	},
+	[ARIZONA_IRQ_FLL1_CLOCK_OK] = {
+		.reg_offset = 4, .mask = ARIZONA_FLL1_CLOCK_OK_EINT1
+	},
+
+	[ARIZONA_IRQ_DSP_SHARED_WR_COLL] = {
+		.reg_offset = 5, .mask = ARIZONA_DSP_SHARED_WR_COLL_EINT1
+	},
+	[ARIZONA_IRQ_SPK_SHUTDOWN] = {
+		.reg_offset = 5, .mask = ARIZONA_SPK_SHUTDOWN_EINT1
+	},
+	[ARIZONA_IRQ_SPK1R_SHORT] = {
+		.reg_offset = 5, .mask = ARIZONA_SPK1R_SHORT_EINT1
+	},
+	[ARIZONA_IRQ_SPK1L_SHORT] = {
+		.reg_offset = 5, .mask = ARIZONA_SPK1L_SHORT_EINT1
+	},
+	[ARIZONA_IRQ_HP3R_SC_NEG] = {
+		.reg_offset = 5, .mask = ARIZONA_HP3R_SC_NEG_EINT1
+	},
+	[ARIZONA_IRQ_HP3R_SC_POS] = {
+		.reg_offset = 5, .mask = ARIZONA_HP3R_SC_POS_EINT1
+	},
+	[ARIZONA_IRQ_HP3L_SC_NEG] = {
+		.reg_offset = 5, .mask = ARIZONA_HP3L_SC_NEG_EINT1
+	},
+	[ARIZONA_IRQ_HP3L_SC_POS] = {
+		.reg_offset = 5, .mask = ARIZONA_HP3L_SC_POS_EINT1
+	},
+	[ARIZONA_IRQ_HP2R_SC_NEG] = {
+		.reg_offset = 5, .mask = ARIZONA_HP2R_SC_NEG_EINT1
+	},
+	[ARIZONA_IRQ_HP2R_SC_POS] = {
+		.reg_offset = 5, .mask = ARIZONA_HP2R_SC_POS_EINT1
+	},
+	[ARIZONA_IRQ_HP2L_SC_NEG] = {
+		.reg_offset = 5, .mask = ARIZONA_HP2L_SC_NEG_EINT1
+	},
+	[ARIZONA_IRQ_HP2L_SC_POS] = {
+		.reg_offset = 5, .mask = ARIZONA_HP2L_SC_POS_EINT1
+	},
+	[ARIZONA_IRQ_HP1R_SC_NEG] = {
+		.reg_offset = 5, .mask = ARIZONA_HP1R_SC_NEG_EINT1
+	},
+	[ARIZONA_IRQ_HP1R_SC_POS] = {
+		.reg_offset = 5, .mask = ARIZONA_HP1R_SC_POS_EINT1
+	},
+	[ARIZONA_IRQ_HP1L_SC_NEG] = {
+		.reg_offset = 5, .mask = ARIZONA_HP1L_SC_NEG_EINT1
+	},
+	[ARIZONA_IRQ_HP1L_SC_POS] = {
+		.reg_offset = 5, .mask = ARIZONA_HP1L_SC_POS_EINT1
+	},
+};
+
+const struct regmap_irq_chip wm5110_revd_irq = {
+	.name = "wm5110 IRQ",
+	.status_base = ARIZONA_INTERRUPT_STATUS_1,
+	.mask_base = ARIZONA_INTERRUPT_STATUS_1_MASK,
+	.ack_base = ARIZONA_INTERRUPT_STATUS_1,
+	.num_regs = 6,
+	.irqs = wm5110_revd_irqs,
+	.num_irqs = ARRAY_SIZE(wm5110_revd_irqs),
+};
+EXPORT_SYMBOL_GPL(wm5110_revd_irq);
+
 static const struct reg_default wm5110_reg_default[] = {
 	{ 0x00000008, 0x0019 },    /* R8     - Ctrl IF SPI CFG 1 */
 	{ 0x00000009, 0x0001 },    /* R9     - Ctrl IF I2C1 CFG 1 */
@@ -1286,12 +1489,14 @@ static const struct reg_default wm5110_reg_default[] = {
 	{ 0x00000D0A, 0xFFFF },    /* R3338  - Interrupt Status 3 Mask */
 	{ 0x00000D0B, 0xFFFF },    /* R3339  - Interrupt Status 4 Mask */
 	{ 0x00000D0C, 0xFEFF },    /* R3340  - Interrupt Status 5 Mask */
+	{ 0x00000D0D, 0xFFFF },    /* R3341  - Interrupt Status 6 Mask */
 	{ 0x00000D0F, 0x0000 },    /* R3343  - Interrupt Control */
 	{ 0x00000D18, 0xFFFF },    /* R3352  - IRQ2 Status 1 Mask */
 	{ 0x00000D19, 0xFFFF },    /* R3353  - IRQ2 Status 2 Mask */
 	{ 0x00000D1A, 0xFFFF },    /* R3354  - IRQ2 Status 3 Mask */
 	{ 0x00000D1B, 0xFFFF },    /* R3355  - IRQ2 Status 4 Mask */
 	{ 0x00000D1C, 0xFFFF },    /* R3356  - IRQ2 Status 5 Mask */
+	{ 0x00000D1D, 0xFFFF },    /* R3357  - IRQ2 Status 6 Mask */
 	{ 0x00000D1F, 0x0000 },    /* R3359  - IRQ2 Control */
 	{ 0x00000D53, 0xFFFF },    /* R3411  - AOD IRQ Mask IRQ1 */
 	{ 0x00000D54, 0xFFFF },    /* R3412  - AOD IRQ Mask IRQ2 */
@@ -2323,22 +2528,26 @@ static bool wm5110_readable_register(struct device *dev, unsigned int reg)
 	case ARIZONA_INTERRUPT_STATUS_3:
 	case ARIZONA_INTERRUPT_STATUS_4:
 	case ARIZONA_INTERRUPT_STATUS_5:
+	case ARIZONA_INTERRUPT_STATUS_6:
 	case ARIZONA_INTERRUPT_STATUS_1_MASK:
 	case ARIZONA_INTERRUPT_STATUS_2_MASK:
 	case ARIZONA_INTERRUPT_STATUS_3_MASK:
 	case ARIZONA_INTERRUPT_STATUS_4_MASK:
 	case ARIZONA_INTERRUPT_STATUS_5_MASK:
+	case ARIZONA_INTERRUPT_STATUS_6_MASK:
 	case ARIZONA_INTERRUPT_CONTROL:
 	case ARIZONA_IRQ2_STATUS_1:
 	case ARIZONA_IRQ2_STATUS_2:
 	case ARIZONA_IRQ2_STATUS_3:
 	case ARIZONA_IRQ2_STATUS_4:
 	case ARIZONA_IRQ2_STATUS_5:
+	case ARIZONA_IRQ2_STATUS_6:
 	case ARIZONA_IRQ2_STATUS_1_MASK:
 	case ARIZONA_IRQ2_STATUS_2_MASK:
 	case ARIZONA_IRQ2_STATUS_3_MASK:
 	case ARIZONA_IRQ2_STATUS_4_MASK:
 	case ARIZONA_IRQ2_STATUS_5_MASK:
+	case ARIZONA_IRQ2_STATUS_6_MASK:
 	case ARIZONA_IRQ2_CONTROL:
 	case ARIZONA_INTERRUPT_RAW_STATUS_2:
 	case ARIZONA_INTERRUPT_RAW_STATUS_3:
@@ -2347,6 +2556,7 @@ static bool wm5110_readable_register(struct device *dev, unsigned int reg)
 	case ARIZONA_INTERRUPT_RAW_STATUS_6:
 	case ARIZONA_INTERRUPT_RAW_STATUS_7:
 	case ARIZONA_INTERRUPT_RAW_STATUS_8:
+	case ARIZONA_INTERRUPT_RAW_STATUS_9:
 	case ARIZONA_IRQ_PIN_STATUS:
 	case ARIZONA_AOD_WKUP_AND_TRIG:
 	case ARIZONA_AOD_IRQ1:
@@ -2622,11 +2832,13 @@ static bool wm5110_volatile_register(struct device *dev, unsigned int reg)
 	case ARIZONA_INTERRUPT_STATUS_3:
 	case ARIZONA_INTERRUPT_STATUS_4:
 	case ARIZONA_INTERRUPT_STATUS_5:
+	case ARIZONA_INTERRUPT_STATUS_6:
 	case ARIZONA_IRQ2_STATUS_1:
 	case ARIZONA_IRQ2_STATUS_2:
 	case ARIZONA_IRQ2_STATUS_3:
 	case ARIZONA_IRQ2_STATUS_4:
 	case ARIZONA_IRQ2_STATUS_5:
+	case ARIZONA_IRQ2_STATUS_6:
 	case ARIZONA_INTERRUPT_RAW_STATUS_2:
 	case ARIZONA_INTERRUPT_RAW_STATUS_3:
 	case ARIZONA_INTERRUPT_RAW_STATUS_4:
@@ -2634,6 +2846,7 @@ static bool wm5110_volatile_register(struct device *dev, unsigned int reg)
 	case ARIZONA_INTERRUPT_RAW_STATUS_6:
 	case ARIZONA_INTERRUPT_RAW_STATUS_7:
 	case ARIZONA_INTERRUPT_RAW_STATUS_8:
+	case ARIZONA_INTERRUPT_RAW_STATUS_9:
 	case ARIZONA_IRQ_PIN_STATUS:
 	case ARIZONA_AOD_WKUP_AND_TRIG:
 	case ARIZONA_AOD_IRQ1:
diff --git a/include/linux/mfd/arizona/core.h b/include/linux/mfd/arizona/core.h
index 819edf5d1edf..8bc7601cca68 100644
--- a/include/linux/mfd/arizona/core.h
+++ b/include/linux/mfd/arizona/core.h
@@ -84,8 +84,25 @@ enum arizona_type {
 #define ARIZONA_IRQ_HP2L_DONE             55
 #define ARIZONA_IRQ_HP1R_DONE             56
 #define ARIZONA_IRQ_HP1L_DONE             57
-
-#define ARIZONA_NUM_IRQ                   58
+#define ARIZONA_IRQ_ISRC3_CFG_ERR         58
+#define ARIZONA_IRQ_DSP_SHARED_WR_COLL    59
+#define ARIZONA_IRQ_SPK_SHUTDOWN          60
+#define ARIZONA_IRQ_SPK1R_SHORT           61
+#define ARIZONA_IRQ_SPK1L_SHORT           62
+#define ARIZONA_IRQ_HP3R_SC_NEG           63
+#define ARIZONA_IRQ_HP3R_SC_POS           64
+#define ARIZONA_IRQ_HP3L_SC_NEG           65
+#define ARIZONA_IRQ_HP3L_SC_POS           66
+#define ARIZONA_IRQ_HP2R_SC_NEG           67
+#define ARIZONA_IRQ_HP2R_SC_POS           68
+#define ARIZONA_IRQ_HP2L_SC_NEG           69
+#define ARIZONA_IRQ_HP2L_SC_POS           70
+#define ARIZONA_IRQ_HP1R_SC_NEG           71
+#define ARIZONA_IRQ_HP1R_SC_POS           72
+#define ARIZONA_IRQ_HP1L_SC_NEG           73
+#define ARIZONA_IRQ_HP1L_SC_POS           74
+
+#define ARIZONA_NUM_IRQ                   75
 
 struct snd_soc_dapm_context;
 
diff --git a/include/linux/mfd/arizona/registers.h b/include/linux/mfd/arizona/registers.h
index f7d6f9e91da1..dbd23c36de21 100644
--- a/include/linux/mfd/arizona/registers.h
+++ b/include/linux/mfd/arizona/registers.h
@@ -878,22 +878,26 @@
 #define ARIZONA_INTERRUPT_STATUS_3               0xD02
 #define ARIZONA_INTERRUPT_STATUS_4               0xD03
 #define ARIZONA_INTERRUPT_STATUS_5               0xD04
+#define ARIZONA_INTERRUPT_STATUS_6               0xD05
 #define ARIZONA_INTERRUPT_STATUS_1_MASK          0xD08
 #define ARIZONA_INTERRUPT_STATUS_2_MASK          0xD09
 #define ARIZONA_INTERRUPT_STATUS_3_MASK          0xD0A
 #define ARIZONA_INTERRUPT_STATUS_4_MASK          0xD0B
 #define ARIZONA_INTERRUPT_STATUS_5_MASK          0xD0C
+#define ARIZONA_INTERRUPT_STATUS_6_MASK          0xD0D
 #define ARIZONA_INTERRUPT_CONTROL                0xD0F
 #define ARIZONA_IRQ2_STATUS_1                    0xD10
 #define ARIZONA_IRQ2_STATUS_2                    0xD11
 #define ARIZONA_IRQ2_STATUS_3                    0xD12
 #define ARIZONA_IRQ2_STATUS_4                    0xD13
 #define ARIZONA_IRQ2_STATUS_5                    0xD14
+#define ARIZONA_IRQ2_STATUS_6                    0xD15
 #define ARIZONA_IRQ2_STATUS_1_MASK               0xD18
 #define ARIZONA_IRQ2_STATUS_2_MASK               0xD19
 #define ARIZONA_IRQ2_STATUS_3_MASK               0xD1A
 #define ARIZONA_IRQ2_STATUS_4_MASK               0xD1B
 #define ARIZONA_IRQ2_STATUS_5_MASK               0xD1C
+#define ARIZONA_IRQ2_STATUS_6_MASK               0xD1D
 #define ARIZONA_IRQ2_CONTROL                     0xD1F
 #define ARIZONA_INTERRUPT_RAW_STATUS_2           0xD20
 #define ARIZONA_INTERRUPT_RAW_STATUS_3           0xD21
@@ -902,6 +906,7 @@
 #define ARIZONA_INTERRUPT_RAW_STATUS_6           0xD24
 #define ARIZONA_INTERRUPT_RAW_STATUS_7           0xD25
 #define ARIZONA_INTERRUPT_RAW_STATUS_8           0xD26
+#define ARIZONA_INTERRUPT_RAW_STATUS_9           0xD28
 #define ARIZONA_IRQ_PIN_STATUS                   0xD40
 #define ARIZONA_ADSP2_IRQ0                       0xD41
 #define ARIZONA_AOD_WKUP_AND_TRIG                0xD50
@@ -4820,6 +4825,53 @@
 #define ARIZONA_HP1L_DONE_EINT1_SHIFT                 0  /* HP1L_DONE_EINT1 */
 #define ARIZONA_HP1L_DONE_EINT1_WIDTH                 1  /* HP1L_DONE_EINT1 */
 
+/*
+ * R3331 (0xD03) - Interrupt Status 4 (Alternate layout)
+ *
+ * Alternate layout used on later devices, note only fields that have moved
+ * are specified
+ */
+#define ARIZONA_V2_AIF3_ERR_EINT1                  0x8000  /* AIF3_ERR_EINT1 */
+#define ARIZONA_V2_AIF3_ERR_EINT1_MASK             0x8000  /* AIF3_ERR_EINT1 */
+#define ARIZONA_V2_AIF3_ERR_EINT1_SHIFT                15  /* AIF3_ERR_EINT1 */
+#define ARIZONA_V2_AIF3_ERR_EINT1_WIDTH                 1  /* AIF3_ERR_EINT1 */
+#define ARIZONA_V2_AIF2_ERR_EINT1                  0x4000  /* AIF2_ERR_EINT1 */
+#define ARIZONA_V2_AIF2_ERR_EINT1_MASK             0x4000  /* AIF2_ERR_EINT1 */
+#define ARIZONA_V2_AIF2_ERR_EINT1_SHIFT                14  /* AIF2_ERR_EINT1 */
+#define ARIZONA_V2_AIF2_ERR_EINT1_WIDTH                 1  /* AIF2_ERR_EINT1 */
+#define ARIZONA_V2_AIF1_ERR_EINT1                  0x2000  /* AIF1_ERR_EINT1 */
+#define ARIZONA_V2_AIF1_ERR_EINT1_MASK             0x2000  /* AIF1_ERR_EINT1 */
+#define ARIZONA_V2_AIF1_ERR_EINT1_SHIFT                13  /* AIF1_ERR_EINT1 */
+#define ARIZONA_V2_AIF1_ERR_EINT1_WIDTH                 1  /* AIF1_ERR_EINT1 */
+#define ARIZONA_V2_CTRLIF_ERR_EINT1                0x1000  /* CTRLIF_ERR_EINT1 */
+#define ARIZONA_V2_CTRLIF_ERR_EINT1_MASK           0x1000  /* CTRLIF_ERR_EINT1 */
+#define ARIZONA_V2_CTRLIF_ERR_EINT1_SHIFT              12  /* CTRLIF_ERR_EINT1 */
+#define ARIZONA_V2_CTRLIF_ERR_EINT1_WIDTH               1  /* CTRLIF_ERR_EINT1 */
+#define ARIZONA_V2_MIXER_DROPPED_SAMPLE_EINT1      0x0800  /* MIXER_DROPPED_SAMPLE_EINT1 */
+#define ARIZONA_V2_MIXER_DROPPED_SAMPLE_EINT1_MASK 0x0800  /* MIXER_DROPPED_SAMPLE_EINT1 */
+#define ARIZONA_V2_MIXER_DROPPED_SAMPLE_EINT1_SHIFT    11  /* MIXER_DROPPED_SAMPLE_EINT1 */
+#define ARIZONA_V2_MIXER_DROPPED_SAMPLE_EINT1_WIDTH     1  /* MIXER_DROPPED_SAMPLE_EINT1 */
+#define ARIZONA_V2_ASYNC_CLK_ENA_LOW_EINT1         0x0400  /* ASYNC_CLK_ENA_LOW_EINT1 */
+#define ARIZONA_V2_ASYNC_CLK_ENA_LOW_EINT1_MASK    0x0400  /* ASYNC_CLK_ENA_LOW_EINT1 */
+#define ARIZONA_V2_ASYNC_CLK_ENA_LOW_EINT1_SHIFT       10  /* ASYNC_CLK_ENA_LOW_EINT1 */
+#define ARIZONA_V2_ASYNC_CLK_ENA_LOW_EINT1_WIDTH        1  /* ASYNC_CLK_ENA_LOW_EINT1 */
+#define ARIZONA_V2_SYSCLK_ENA_LOW_EINT1            0x0200  /* SYSCLK_ENA_LOW_EINT1 */
+#define ARIZONA_V2_SYSCLK_ENA_LOW_EINT1_MASK       0x0200  /* SYSCLK_ENA_LOW_EINT1 */
+#define ARIZONA_V2_SYSCLK_ENA_LOW_EINT1_SHIFT           9  /* SYSCLK_ENA_LOW_EINT1 */
+#define ARIZONA_V2_SYSCLK_ENA_LOW_EINT1_WIDTH           1  /* SYSCLK_ENA_LOW_EINT1 */
+#define ARIZONA_V2_ISRC1_CFG_ERR_EINT1             0x0100  /* ISRC1_CFG_ERR_EINT1 */
+#define ARIZONA_V2_ISRC1_CFG_ERR_EINT1_MASK        0x0100  /* ISRC1_CFG_ERR_EINT1 */
+#define ARIZONA_V2_ISRC1_CFG_ERR_EINT1_SHIFT            8  /* ISRC1_CFG_ERR_EINT1 */
+#define ARIZONA_V2_ISRC1_CFG_ERR_EINT1_WIDTH            1  /* ISRC1_CFG_ERR_EINT1 */
+#define ARIZONA_V2_ISRC2_CFG_ERR_EINT1             0x0080  /* ISRC2_CFG_ERR_EINT1 */
+#define ARIZONA_V2_ISRC2_CFG_ERR_EINT1_MASK        0x0080  /* ISRC2_CFG_ERR_EINT1 */
+#define ARIZONA_V2_ISRC2_CFG_ERR_EINT1_SHIFT            7  /* ISRC2_CFG_ERR_EINT1 */
+#define ARIZONA_V2_ISRC2_CFG_ERR_EINT1_WIDTH            1  /* ISRC2_CFG_ERR_EINT1 */
+#define ARIZONA_V2_ISRC3_CFG_ERR_EINT1             0x0040  /* ISRC3_CFG_ERR_EINT1 */
+#define ARIZONA_V2_ISRC3_CFG_ERR_EINT1_MASK        0x0040  /* ISRC3_CFG_ERR_EINT1 */
+#define ARIZONA_V2_ISRC3_CFG_ERR_EINT1_SHIFT            6  /* ISRC3_CFG_ERR_EINT1 */
+#define ARIZONA_V2_ISRC3_CFG_ERR_EINT1_WIDTH            1  /* ISRC3_CFG_ERR_EINT1 */
+
 /*
  * R3332 (0xD04) - Interrupt Status 5
  */
@@ -4844,6 +4896,85 @@
 #define ARIZONA_FLL1_CLOCK_OK_EINT1_SHIFT             0  /* FLL1_CLOCK_OK_EINT1 */
 #define ARIZONA_FLL1_CLOCK_OK_EINT1_WIDTH             1  /* FLL1_CLOCK_OK_EINT1 */
 
+/*
+ * R3332 (0xD05) - Interrupt Status 5 (Alternate layout)
+ *
+ * Alternate layout used on later devices, note only fields that have moved
+ * are specified
+ */
+#define ARIZONA_V2_ASRC_CFG_ERR_EINT1            0x0008  /* ASRC_CFG_ERR_EINT1 */
+#define ARIZONA_V2_ASRC_CFG_ERR_EINT1_MASK       0x0008  /* ASRC_CFG_ERR_EINT1 */
+#define ARIZONA_V2_ASRC_CFG_ERR_EINT1_SHIFT           3  /* ASRC_CFG_ERR_EINT1 */
+#define ARIZONA_V2_ASRC_CFG_ERR_EINT1_WIDTH           1  /* ASRC_CFG_ERR_EINT1 */
+
+/*
+ * R3333 (0xD05) - Interrupt Status 6
+ */
+#define ARIZONA_DSP_SHARED_WR_COLL_EINT1         0x8000  /* DSP_SHARED_WR_COLL_EINT1 */
+#define ARIZONA_DSP_SHARED_WR_COLL_EINT1_MASK    0x8000  /* DSP_SHARED_WR_COLL_EINT1 */
+#define ARIZONA_DSP_SHARED_WR_COLL_EINT1_SHIFT       15  /* DSP_SHARED_WR_COLL_EINT1 */
+#define ARIZONA_DSP_SHARED_WR_COLL_EINT1_WIDTH        1  /* DSP_SHARED_WR_COLL_EINT1 */
+#define ARIZONA_SPK_SHUTDOWN_EINT1               0x4000  /* SPK_SHUTDOWN_EINT1 */
+#define ARIZONA_SPK_SHUTDOWN_EINT1_MASK          0x4000  /* SPK_SHUTDOWN_EINT1 */
+#define ARIZONA_SPK_SHUTDOWN_EINT1_SHIFT             14  /* SPK_SHUTDOWN_EINT1 */
+#define ARIZONA_SPK_SHUTDOWN_EINT1_WIDTH              1  /* SPK_SHUTDOWN_EINT1 */
+#define ARIZONA_SPK1R_SHORT_EINT1                0x2000  /* SPK1R_SHORT_EINT1 */
+#define ARIZONA_SPK1R_SHORT_EINT1_MASK           0x2000  /* SPK1R_SHORT_EINT1 */
+#define ARIZONA_SPK1R_SHORT_EINT1_SHIFT              13  /* SPK1R_SHORT_EINT1 */
+#define ARIZONA_SPK1R_SHORT_EINT1_WIDTH               1  /* SPK1R_SHORT_EINT1 */
+#define ARIZONA_SPK1L_SHORT_EINT1                0x1000  /* SPK1L_SHORT_EINT1 */
+#define ARIZONA_SPK1L_SHORT_EINT1_MASK           0x1000  /* SPK1L_SHORT_EINT1 */
+#define ARIZONA_SPK1L_SHORT_EINT1_SHIFT              12  /* SPK1L_SHORT_EINT1 */
+#define ARIZONA_SPK1L_SHORT_EINT1_WIDTH               1  /* SPK1L_SHORT_EINT1 */
+#define ARIZONA_HP3R_SC_NEG_EINT1                0x0800  /* HP3R_SC_NEG_EINT1 */
+#define ARIZONA_HP3R_SC_NEG_EINT1_MASK           0x0800  /* HP3R_SC_NEG_EINT1 */
+#define ARIZONA_HP3R_SC_NEG_EINT1_SHIFT              11  /* HP3R_SC_NEG_EINT1 */
+#define ARIZONA_HP3R_SC_NEG_EINT1_WIDTH               1  /* HP3R_SC_NEG_EINT1 */
+#define ARIZONA_HP3R_SC_POS_EINT1                0x0400  /* HP3R_SC_POS_EINT1 */
+#define ARIZONA_HP3R_SC_POS_EINT1_MASK           0x0400  /* HP3R_SC_POS_EINT1 */
+#define ARIZONA_HP3R_SC_POS_EINT1_SHIFT              10  /* HP3R_SC_POS_EINT1 */
+#define ARIZONA_HP3R_SC_POS_EINT1_WIDTH               1  /* HP3R_SC_POS_EINT1 */
+#define ARIZONA_HP3L_SC_NEG_EINT1                0x0200  /* HP3L_SC_NEG_EINT1 */
+#define ARIZONA_HP3L_SC_NEG_EINT1_MASK           0x0200  /* HP3L_SC_NEG_EINT1 */
+#define ARIZONA_HP3L_SC_NEG_EINT1_SHIFT               9  /* HP3L_SC_NEG_EINT1 */
+#define ARIZONA_HP3L_SC_NEG_EINT1_WIDTH               1  /* HP3L_SC_NEG_EINT1 */
+#define ARIZONA_HP3L_SC_POS_EINT1                0x0100  /* HP3L_SC_POS_EINT1 */
+#define ARIZONA_HP3L_SC_POS_EINT1_MASK           0x0100  /* HP3L_SC_POS_EINT1 */
+#define ARIZONA_HP3L_SC_POS_EINT1_SHIFT               8  /* HP3L_SC_POS_EINT1 */
+#define ARIZONA_HP3L_SC_POS_EINT1_WIDTH               1  /* HP3L_SC_POS_EINT1 */
+#define ARIZONA_HP2R_SC_NEG_EINT1                0x0080  /* HP2R_SC_NEG_EINT1 */
+#define ARIZONA_HP2R_SC_NEG_EINT1_MASK           0x0080  /* HP2R_SC_NEG_EINT1 */
+#define ARIZONA_HP2R_SC_NEG_EINT1_SHIFT               7  /* HP2R_SC_NEG_EINT1 */
+#define ARIZONA_HP2R_SC_NEG_EINT1_WIDTH               1  /* HP2R_SC_NEG_EINT1 */
+#define ARIZONA_HP2R_SC_POS_EINT1                0x0040  /* HP2R_SC_POS_EINT1 */
+#define ARIZONA_HP2R_SC_POS_EINT1_MASK           0x0040  /* HP2R_SC_POS_EINT1 */
+#define ARIZONA_HP2R_SC_POS_EINT1_SHIFT               6  /* HP2R_SC_POS_EINT1 */
+#define ARIZONA_HP2R_SC_POS_EINT1_WIDTH               1  /* HP2R_SC_POS_EINT1 */
+#define ARIZONA_HP2L_SC_NEG_EINT1                0x0020  /* HP2L_SC_NEG_EINT1 */
+#define ARIZONA_HP2L_SC_NEG_EINT1_MASK           0x0020  /* HP2L_SC_NEG_EINT1 */
+#define ARIZONA_HP2L_SC_NEG_EINT1_SHIFT               5  /* HP2L_SC_NEG_EINT1 */
+#define ARIZONA_HP2L_SC_NEG_EINT1_WIDTH               1  /* HP2L_SC_NEG_EINT1 */
+#define ARIZONA_HP2L_SC_POS_EINT1                0x0010  /* HP2L_SC_POS_EINT1 */
+#define ARIZONA_HP2L_SC_POS_EINT1_MASK           0x0010  /* HP2L_SC_POS_EINT1 */
+#define ARIZONA_HP2L_SC_POS_EINT1_SHIFT               4  /* HP2L_SC_POS_EINT1 */
+#define ARIZONA_HP2L_SC_POS_EINT1_WIDTH               1  /* HP2L_SC_POS_EINT1 */
+#define ARIZONA_HP1R_SC_NEG_EINT1                0x0008  /* HP1R_SC_NEG_EINT1 */
+#define ARIZONA_HP1R_SC_NEG_EINT1_MASK           0x0008  /* HP1R_SC_NEG_EINT1 */
+#define ARIZONA_HP1R_SC_NEG_EINT1_SHIFT               3  /* HP1R_SC_NEG_EINT1 */
+#define ARIZONA_HP1R_SC_NEG_EINT1_WIDTH               1  /* HP1R_SC_NEG_EINT1 */
+#define ARIZONA_HP1R_SC_POS_EINT1                0x0004  /* HP1R_SC_POS_EINT1 */
+#define ARIZONA_HP1R_SC_POS_EINT1_MASK           0x0004  /* HP1R_SC_POS_EINT1 */
+#define ARIZONA_HP1R_SC_POS_EINT1_SHIFT               2  /* HP1R_SC_POS_EINT1 */
+#define ARIZONA_HP1R_SC_POS_EINT1_WIDTH               1  /* HP1R_SC_POS_EINT1 */
+#define ARIZONA_HP1L_SC_NEG_EINT1                0x0002  /* HP1L_SC_NEG_EINT1 */
+#define ARIZONA_HP1L_SC_NEG_EINT1_MASK           0x0002  /* HP1L_SC_NEG_EINT1 */
+#define ARIZONA_HP1L_SC_NEG_EINT1_SHIFT               1  /* HP1L_SC_NEG_EINT1 */
+#define ARIZONA_HP1L_SC_NEG_EINT1_WIDTH               1  /* HP1L_SC_NEG_EINT1 */
+#define ARIZONA_HP1L_SC_POS_EINT1                0x0001  /* HP1L_SC_POS_EINT1 */
+#define ARIZONA_HP1L_SC_POS_EINT1_MASK           0x0001  /* HP1L_SC_POS_EINT1 */
+#define ARIZONA_HP1L_SC_POS_EINT1_SHIFT               0  /* HP1L_SC_POS_EINT1 */
+#define ARIZONA_HP1L_SC_POS_EINT1_WIDTH               1  /* HP1L_SC_POS_EINT1 */
+
 /*
  * R3336 (0xD08) - Interrupt Status 1 Mask
  */
@@ -5012,6 +5143,53 @@
 #define ARIZONA_IM_HP1L_DONE_EINT1_SHIFT              0  /* IM_HP1L_DONE_EINT1 */
 #define ARIZONA_IM_HP1L_DONE_EINT1_WIDTH              1  /* IM_HP1L_DONE_EINT1 */
 
+/*
+ * R3339 (0xD0B) - Interrupt Status 4 Mask (Alternate layout)
+ *
+ * Alternate layout used on later devices, note only fields that have moved
+ * are specified
+ */
+#define ARIZONA_V2_IM_AIF3_ERR_EINT1                  0x8000  /* IM_AIF3_ERR_EINT1 */
+#define ARIZONA_V2_IM_AIF3_ERR_EINT1_MASK             0x8000  /* IM_AIF3_ERR_EINT1 */
+#define ARIZONA_V2_IM_AIF3_ERR_EINT1_SHIFT                15  /* IM_AIF3_ERR_EINT1 */
+#define ARIZONA_V2_IM_AIF3_ERR_EINT1_WIDTH                 1  /* IM_AIF3_ERR_EINT1 */
+#define ARIZONA_V2_IM_AIF2_ERR_EINT1                  0x4000  /* IM_AIF2_ERR_EINT1 */
+#define ARIZONA_V2_IM_AIF2_ERR_EINT1_MASK             0x4000  /* IM_AIF2_ERR_EINT1 */
+#define ARIZONA_V2_IM_AIF2_ERR_EINT1_SHIFT                14  /* IM_AIF2_ERR_EINT1 */
+#define ARIZONA_V2_IM_AIF2_ERR_EINT1_WIDTH                 1  /* IM_AIF2_ERR_EINT1 */
+#define ARIZONA_V2_IM_AIF1_ERR_EINT1                  0x2000  /* IM_AIF1_ERR_EINT1 */
+#define ARIZONA_V2_IM_AIF1_ERR_EINT1_MASK             0x2000  /* IM_AIF1_ERR_EINT1 */
+#define ARIZONA_V2_IM_AIF1_ERR_EINT1_SHIFT                13  /* IM_AIF1_ERR_EINT1 */
+#define ARIZONA_V2_IM_AIF1_ERR_EINT1_WIDTH                 1  /* IM_AIF1_ERR_EINT1 */
+#define ARIZONA_V2_IM_CTRLIF_ERR_EINT1                0x1000  /* IM_CTRLIF_ERR_EINT1 */
+#define ARIZONA_V2_IM_CTRLIF_ERR_EINT1_MASK           0x1000  /* IM_CTRLIF_ERR_EINT1 */
+#define ARIZONA_V2_IM_CTRLIF_ERR_EINT1_SHIFT              12  /* IM_CTRLIF_ERR_EINT1 */
+#define ARIZONA_V2_IM_CTRLIF_ERR_EINT1_WIDTH               1  /* IM_CTRLIF_ERR_EINT1 */
+#define ARIZONA_V2_IM_MIXER_DROPPED_SAMPLE_EINT1      0x0800  /* IM_MIXER_DROPPED_SAMPLE_EINT1 */
+#define ARIZONA_V2_IM_MIXER_DROPPED_SAMPLE_EINT1_MASK 0x0800  /* IM_MIXER_DROPPED_SAMPLE_EINT1 */
+#define ARIZONA_V2_IM_MIXER_DROPPED_SAMPLE_EINT1_SHIFT    11  /* IM_MIXER_DROPPED_SAMPLE_EINT1 */
+#define ARIZONA_V2_IM_MIXER_DROPPED_SAMPLE_EINT1_WIDTH     1  /* IM_MIXER_DROPPED_SAMPLE_EINT1 */
+#define ARIZONA_V2_IM_ASYNC_CLK_ENA_LOW_EINT1         0x0400  /* IM_ASYNC_CLK_ENA_LOW_EINT1 */
+#define ARIZONA_V2_IM_ASYNC_CLK_ENA_LOW_EINT1_MASK    0x0400  /* IM_ASYNC_CLK_ENA_LOW_EINT1 */
+#define ARIZONA_V2_IM_ASYNC_CLK_ENA_LOW_EINT1_SHIFT       10  /* IM_ASYNC_CLK_ENA_LOW_EINT1 */
+#define ARIZONA_V2_IM_ASYNC_CLK_ENA_LOW_EINT1_WIDTH        1  /* IM_ASYNC_CLK_ENA_LOW_EINT1 */
+#define ARIZONA_V2_IM_SYSCLK_ENA_LOW_EINT1            0x0200  /* IM_SYSCLK_ENA_LOW_EINT1 */
+#define ARIZONA_V2_IM_SYSCLK_ENA_LOW_EINT1_MASK       0x0200  /* IM_SYSCLK_ENA_LOW_EINT1 */
+#define ARIZONA_V2_IM_SYSCLK_ENA_LOW_EINT1_SHIFT           9  /* IM_SYSCLK_ENA_LOW_EINT1 */
+#define ARIZONA_V2_IM_SYSCLK_ENA_LOW_EINT1_WIDTH           1  /* IM_SYSCLK_ENA_LOW_EINT1 */
+#define ARIZONA_V2_IM_ISRC1_CFG_ERR_EINT1             0x0100  /* IM_ISRC1_CFG_ERR_EINT1 */
+#define ARIZONA_V2_IM_ISRC1_CFG_ERR_EINT1_MASK        0x0100  /* IM_ISRC1_CFG_ERR_EINT1 */
+#define ARIZONA_V2_IM_ISRC1_CFG_ERR_EINT1_SHIFT            8  /* IM_ISRC1_CFG_ERR_EINT1 */
+#define ARIZONA_V2_IM_ISRC1_CFG_ERR_EINT1_WIDTH            1  /* IM_ISRC1_CFG_ERR_EINT1 */
+#define ARIZONA_V2_IM_ISRC2_CFG_ERR_EINT1             0x0080  /* IM_ISRC2_CFG_ERR_EINT1 */
+#define ARIZONA_V2_IM_ISRC2_CFG_ERR_EINT1_MASK        0x0080  /* IM_ISRC2_CFG_ERR_EINT1 */
+#define ARIZONA_V2_IM_ISRC2_CFG_ERR_EINT1_SHIFT            7  /* IM_ISRC2_CFG_ERR_EINT1 */
+#define ARIZONA_V2_IM_ISRC2_CFG_ERR_EINT1_WIDTH            1  /* IM_ISRC2_CFG_ERR_EINT1 */
+#define ARIZONA_V2_IM_ISRC3_CFG_ERR_EINT1             0x0040  /* IM_ISRC3_CFG_ERR_EINT1 */
+#define ARIZONA_V2_IM_ISRC3_CFG_ERR_EINT1_MASK        0x0040  /* IM_ISRC3_CFG_ERR_EINT1 */
+#define ARIZONA_V2_IM_ISRC3_CFG_ERR_EINT1_SHIFT            6  /* IM_ISRC3_CFG_ERR_EINT1 */
+#define ARIZONA_V2_IM_ISRC3_CFG_ERR_EINT1_WIDTH            1  /* IM_ISRC3_CFG_ERR_EINT1 */
+
 /*
  * R3340 (0xD0C) - Interrupt Status 5 Mask
  */
@@ -5036,6 +5214,85 @@
 #define ARIZONA_IM_FLL1_CLOCK_OK_EINT1_SHIFT          0  /* IM_FLL1_CLOCK_OK_EINT1 */
 #define ARIZONA_IM_FLL1_CLOCK_OK_EINT1_WIDTH          1  /* IM_FLL1_CLOCK_OK_EINT1 */
 
+/*
+ * R3340 (0xD0C) - Interrupt Status 5 Mask (Alternate layout)
+ *
+ * Alternate layout used on later devices, note only fields that have moved
+ * are specified
+ */
+#define ARIZONA_V2_IM_ASRC_CFG_ERR_EINT1         0x0008  /* IM_ASRC_CFG_ERR_EINT1 */
+#define ARIZONA_V2_IM_ASRC_CFG_ERR_EINT1_MASK    0x0008  /* IM_ASRC_CFG_ERR_EINT1 */
+#define ARIZONA_V2_IM_ASRC_CFG_ERR_EINT1_SHIFT        3  /* IM_ASRC_CFG_ERR_EINT1 */
+#define ARIZONA_V2_IM_ASRC_CFG_ERR_EINT1_WIDTH        1  /* IM_ASRC_CFG_ERR_EINT1 */
+
+/*
+ * R3341 (0xD0D) - Interrupt Status 6 Mask
+ */
+#define ARIZONA_IM_DSP_SHARED_WR_COLL_EINT1      0x8000  /* IM_DSP_SHARED_WR_COLL_EINT1 */
+#define ARIZONA_IM_DSP_SHARED_WR_COLL_EINT1_MASK 0x8000  /* IM_DSP_SHARED_WR_COLL_EINT1 */
+#define ARIZONA_IM_DSP_SHARED_WR_COLL_EINT1_SHIFT    15  /* IM_DSP_SHARED_WR_COLL_EINT1 */
+#define ARIZONA_IM_DSP_SHARED_WR_COLL_EINT1_WIDTH     1  /* IM_DSP_SHARED_WR_COLL_EINT1 */
+#define ARIZONA_IM_SPK_SHUTDOWN_EINT1            0x4000  /* IM_SPK_SHUTDOWN_EINT1 */
+#define ARIZONA_IM_SPK_SHUTDOWN_EINT1_MASK       0x4000  /* IM_SPK_SHUTDOWN_EINT1 */
+#define ARIZONA_IM_SPK_SHUTDOWN_EINT1_SHIFT          14  /* IM_SPK_SHUTDOWN_EINT1 */
+#define ARIZONA_IM_SPK_SHUTDOWN_EINT1_WIDTH           1  /* IM_SPK_SHUTDOWN_EINT1 */
+#define ARIZONA_IM_SPK1R_SHORT_EINT1             0x2000  /* IM_SPK1R_SHORT_EINT1 */
+#define ARIZONA_IM_SPK1R_SHORT_EINT1_MASK        0x2000  /* IM_SPK1R_SHORT_EINT1 */
+#define ARIZONA_IM_SPK1R_SHORT_EINT1_SHIFT           13  /* IM_SPK1R_SHORT_EINT1 */
+#define ARIZONA_IM_SPK1R_SHORT_EINT1_WIDTH            1  /* IM_SPK1R_SHORT_EINT1 */
+#define ARIZONA_IM_SPK1L_SHORT_EINT1             0x1000  /* IM_SPK1L_SHORT_EINT1 */
+#define ARIZONA_IM_SPK1L_SHORT_EINT1_MASK        0x1000  /* IM_SPK1L_SHORT_EINT1 */
+#define ARIZONA_IM_SPK1L_SHORT_EINT1_SHIFT           12  /* IM_SPK1L_SHORT_EINT1 */
+#define ARIZONA_IM_SPK1L_SHORT_EINT1_WIDTH            1  /* IM_SPK1L_SHORT_EINT1 */
+#define ARIZONA_IM_HP3R_SC_NEG_EINT1             0x0800  /* IM_HP3R_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP3R_SC_NEG_EINT1_MASK        0x0800  /* IM_HP3R_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP3R_SC_NEG_EINT1_SHIFT           11  /* IM_HP3R_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP3R_SC_NEG_EINT1_WIDTH            1  /* IM_HP3R_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP3R_SC_POS_EINT1             0x0400  /* IM_HP3R_SC_POS_EINT1 */
+#define ARIZONA_IM_HP3R_SC_POS_EINT1_MASK        0x0400  /* IM_HP3R_SC_POS_EINT1 */
+#define ARIZONA_IM_HP3R_SC_POS_EINT1_SHIFT           10  /* IM_HP3R_SC_POS_EINT1 */
+#define ARIZONA_IM_HP3R_SC_POS_EINT1_WIDTH            1  /* IM_HP3R_SC_POS_EINT1 */
+#define ARIZONA_IM_HP3L_SC_NEG_EINT1             0x0200  /* IM_HP3L_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP3L_SC_NEG_EINT1_MASK        0x0200  /* IM_HP3L_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP3L_SC_NEG_EINT1_SHIFT            9  /* IM_HP3L_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP3L_SC_NEG_EINT1_WIDTH            1  /* IM_HP3L_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP3L_SC_POS_EINT1             0x0100  /* IM_HP3L_SC_POS_EINT1 */
+#define ARIZONA_IM_HP3L_SC_POS_EINT1_MASK        0x0100  /* IM_HP3L_SC_POS_EINT1 */
+#define ARIZONA_IM_HP3L_SC_POS_EINT1_SHIFT            8  /* IM_HP3L_SC_POS_EINT1 */
+#define ARIZONA_IM_HP3L_SC_POS_EINT1_WIDTH            1  /* IM_HP3L_SC_POS_EINT1 */
+#define ARIZONA_IM_HP2R_SC_NEG_EINT1             0x0080  /* IM_HP2R_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP2R_SC_NEG_EINT1_MASK        0x0080  /* IM_HP2R_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP2R_SC_NEG_EINT1_SHIFT            7  /* IM_HP2R_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP2R_SC_NEG_EINT1_WIDTH            1  /* IM_HP2R_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP2R_SC_POS_EINT1             0x0040  /* IM_HP2R_SC_POS_EINT1 */
+#define ARIZONA_IM_HP2R_SC_POS_EINT1_MASK        0x0040  /* IM_HP2R_SC_POS_EINT1 */
+#define ARIZONA_IM_HP2R_SC_POS_EINT1_SHIFT            6  /* IM_HP2R_SC_POS_EINT1 */
+#define ARIZONA_IM_HP2R_SC_POS_EINT1_WIDTH            1  /* IM_HP2R_SC_POS_EINT1 */
+#define ARIZONA_IM_HP2L_SC_NEG_EINT1             0x0020  /* IM_HP2L_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP2L_SC_NEG_EINT1_MASK        0x0020  /* IM_HP2L_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP2L_SC_NEG_EINT1_SHIFT            5  /* IM_HP2L_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP2L_SC_NEG_EINT1_WIDTH            1  /* IM_HP2L_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP2L_SC_POS_EINT1             0x0010  /* IM_HP2L_SC_POS_EINT1 */
+#define ARIZONA_IM_HP2L_SC_POS_EINT1_MASK        0x0010  /* IM_HP2L_SC_POS_EINT1 */
+#define ARIZONA_IM_HP2L_SC_POS_EINT1_SHIFT            4  /* IM_HP2L_SC_POS_EINT1 */
+#define ARIZONA_IM_HP2L_SC_POS_EINT1_WIDTH            1  /* IM_HP2L_SC_POS_EINT1 */
+#define ARIZONA_IM_HP1R_SC_NEG_EINT1             0x0008  /* IM_HP1R_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP1R_SC_NEG_EINT1_MASK        0x0008  /* IM_HP1R_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP1R_SC_NEG_EINT1_SHIFT            3  /* IM_HP1R_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP1R_SC_NEG_EINT1_WIDTH            1  /* IM_HP1R_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP1R_SC_POS_EINT1             0x0004  /* IM_HP1R_SC_POS_EINT1 */
+#define ARIZONA_IM_HP1R_SC_POS_EINT1_MASK        0x0004  /* IM_HP1R_SC_POS_EINT1 */
+#define ARIZONA_IM_HP1R_SC_POS_EINT1_SHIFT            2  /* IM_HP1R_SC_POS_EINT1 */
+#define ARIZONA_IM_HP1R_SC_POS_EINT1_WIDTH            1  /* IM_HP1R_SC_POS_EINT1 */
+#define ARIZONA_IM_HP1L_SC_NEG_EINT1             0x0002  /* IM_HP1L_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP1L_SC_NEG_EINT1_MASK        0x0002  /* IM_HP1L_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP1L_SC_NEG_EINT1_SHIFT            1  /* IM_HP1L_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP1L_SC_NEG_EINT1_WIDTH            1  /* IM_HP1L_SC_NEG_EINT1 */
+#define ARIZONA_IM_HP1L_SC_POS_EINT1             0x0001  /* IM_HP1L_SC_POS_EINT1 */
+#define ARIZONA_IM_HP1L_SC_POS_EINT1_MASK        0x0001  /* IM_HP1L_SC_POS_EINT1 */
+#define ARIZONA_IM_HP1L_SC_POS_EINT1_SHIFT            0  /* IM_HP1L_SC_POS_EINT1 */
+#define ARIZONA_IM_HP1L_SC_POS_EINT1_WIDTH            1  /* IM_HP1L_SC_POS_EINT1 */
+
 /*
  * R3343 (0xD0F) - Interrupt Control
  */
@@ -5212,6 +5469,53 @@
 #define ARIZONA_HP1L_DONE_EINT2_SHIFT                 0  /* HP1L_DONE_EINT2 */
 #define ARIZONA_HP1L_DONE_EINT2_WIDTH                 1  /* HP1L_DONE_EINT2 */
 
+/*
+ * R3347 (0xD13) - IRQ2 Status 4 (Alternate layout)
+ *
+ * Alternate layout used on later devices, note only fields that have moved
+ * are specified
+ */
+#define ARIZONA_V2_AIF3_ERR_EINT2                  0x8000  /* AIF3_ERR_EINT2 */
+#define ARIZONA_V2_AIF3_ERR_EINT2_MASK             0x8000  /* AIF3_ERR_EINT2 */
+#define ARIZONA_V2_AIF3_ERR_EINT2_SHIFT                15  /* AIF3_ERR_EINT2 */
+#define ARIZONA_V2_AIF3_ERR_EINT2_WIDTH                 1  /* AIF3_ERR_EINT2 */
+#define ARIZONA_V2_AIF2_ERR_EINT2                  0x4000  /* AIF2_ERR_EINT2 */
+#define ARIZONA_V2_AIF2_ERR_EINT2_MASK             0x4000  /* AIF2_ERR_EINT2 */
+#define ARIZONA_V2_AIF2_ERR_EINT2_SHIFT                14  /* AIF2_ERR_EINT2 */
+#define ARIZONA_V2_AIF2_ERR_EINT2_WIDTH                 1  /* AIF2_ERR_EINT2 */
+#define ARIZONA_V2_AIF1_ERR_EINT2                  0x2000  /* AIF1_ERR_EINT2 */
+#define ARIZONA_V2_AIF1_ERR_EINT2_MASK             0x2000  /* AIF1_ERR_EINT2 */
+#define ARIZONA_V2_AIF1_ERR_EINT2_SHIFT                13  /* AIF1_ERR_EINT2 */
+#define ARIZONA_V2_AIF1_ERR_EINT2_WIDTH                 1  /* AIF1_ERR_EINT2 */
+#define ARIZONA_V2_CTRLIF_ERR_EINT2                0x1000  /* CTRLIF_ERR_EINT2 */
+#define ARIZONA_V2_CTRLIF_ERR_EINT2_MASK           0x1000  /* CTRLIF_ERR_EINT2 */
+#define ARIZONA_V2_CTRLIF_ERR_EINT2_SHIFT              12  /* CTRLIF_ERR_EINT2 */
+#define ARIZONA_V2_CTRLIF_ERR_EINT2_WIDTH               1  /* CTRLIF_ERR_EINT2 */
+#define ARIZONA_V2_MIXER_DROPPED_SAMPLE_EINT2      0x0800  /* MIXER_DROPPED_SAMPLE_EINT2 */
+#define ARIZONA_V2_MIXER_DROPPED_SAMPLE_EINT2_MASK 0x0800  /* MIXER_DROPPED_SAMPLE_EINT2 */
+#define ARIZONA_V2_MIXER_DROPPED_SAMPLE_EINT2_SHIFT    11  /* MIXER_DROPPED_SAMPLE_EINT2 */
+#define ARIZONA_V2_MIXER_DROPPED_SAMPLE_EINT2_WIDTH     1  /* MIXER_DROPPED_SAMPLE_EINT2 */
+#define ARIZONA_V2_ASYNC_CLK_ENA_LOW_EINT2         0x0400  /* ASYNC_CLK_ENA_LOW_EINT2 */
+#define ARIZONA_V2_ASYNC_CLK_ENA_LOW_EINT2_MASK    0x0400  /* ASYNC_CLK_ENA_LOW_EINT2 */
+#define ARIZONA_V2_ASYNC_CLK_ENA_LOW_EINT2_SHIFT       10  /* ASYNC_CLK_ENA_LOW_EINT2 */
+#define ARIZONA_V2_ASYNC_CLK_ENA_LOW_EINT2_WIDTH        1  /* ASYNC_CLK_ENA_LOW_EINT2 */
+#define ARIZONA_V2_SYSCLK_ENA_LOW_EINT2            0x0200  /* SYSCLK_ENA_LOW_EINT2 */
+#define ARIZONA_V2_SYSCLK_ENA_LOW_EINT2_MASK       0x0200  /* SYSCLK_ENA_LOW_EINT2 */
+#define ARIZONA_V2_SYSCLK_ENA_LOW_EINT2_SHIFT           9  /* SYSCLK_ENA_LOW_EINT2 */
+#define ARIZONA_V2_SYSCLK_ENA_LOW_EINT2_WIDTH           1  /* SYSCLK_ENA_LOW_EINT2 */
+#define ARIZONA_V2_ISRC1_CFG_ERR_EINT2             0x0100  /* ISRC1_CFG_ERR_EINT2 */
+#define ARIZONA_V2_ISRC1_CFG_ERR_EINT2_MASK        0x0100  /* ISRC1_CFG_ERR_EINT2 */
+#define ARIZONA_V2_ISRC1_CFG_ERR_EINT2_SHIFT            8  /* ISRC1_CFG_ERR_EINT2 */
+#define ARIZONA_V2_ISRC1_CFG_ERR_EINT2_WIDTH            1  /* ISRC1_CFG_ERR_EINT2 */
+#define ARIZONA_V2_ISRC2_CFG_ERR_EINT2             0x0080  /* ISRC2_CFG_ERR_EINT2 */
+#define ARIZONA_V2_ISRC2_CFG_ERR_EINT2_MASK        0x0080  /* ISRC2_CFG_ERR_EINT2 */
+#define ARIZONA_V2_ISRC2_CFG_ERR_EINT2_SHIFT            7  /* ISRC2_CFG_ERR_EINT2 */
+#define ARIZONA_V2_ISRC2_CFG_ERR_EINT2_WIDTH            1  /* ISRC2_CFG_ERR_EINT2 */
+#define ARIZONA_V2_ISRC3_CFG_ERR_EINT2             0x0040  /* ISRC3_CFG_ERR_EINT2 */
+#define ARIZONA_V2_ISRC3_CFG_ERR_EINT2_MASK        0x0040  /* ISRC3_CFG_ERR_EINT2 */
+#define ARIZONA_V2_ISRC3_CFG_ERR_EINT2_SHIFT            6  /* ISRC3_CFG_ERR_EINT2 */
+#define ARIZONA_V2_ISRC3_CFG_ERR_EINT2_WIDTH            1  /* ISRC3_CFG_ERR_EINT2 */
+
 /*
  * R3348 (0xD14) - IRQ2 Status 5
  */
@@ -5236,6 +5540,85 @@
 #define ARIZONA_FLL1_CLOCK_OK_EINT2_SHIFT             0  /* FLL1_CLOCK_OK_EINT2 */
 #define ARIZONA_FLL1_CLOCK_OK_EINT2_WIDTH             1  /* FLL1_CLOCK_OK_EINT2 */
 
+/*
+ * R3348 (0xD14) - IRQ2 Status 5 (Alternate layout)
+ *
+ * Alternate layout used on later devices, note only fields that have moved
+ * are specified
+ */
+#define ARIZONA_V2_ASRC_CFG_ERR_EINT2            0x0008  /* ASRC_CFG_ERR_EINT2 */
+#define ARIZONA_V2_ASRC_CFG_ERR_EINT2_MASK       0x0008  /* ASRC_CFG_ERR_EINT2 */
+#define ARIZONA_V2_ASRC_CFG_ERR_EINT2_SHIFT           3  /* ASRC_CFG_ERR_EINT2 */
+#define ARIZONA_V2_ASRC_CFG_ERR_EINT2_WIDTH           1  /* ASRC_CFG_ERR_EINT2 */
+
+/*
+ * R3349 (0xD15) - IRQ2 Status 6
+ */
+#define ARIZONA_DSP_SHARED_WR_COLL_EINT2         0x8000  /* DSP_SHARED_WR_COLL_EINT2 */
+#define ARIZONA_DSP_SHARED_WR_COLL_EINT2_MASK    0x8000  /* DSP_SHARED_WR_COLL_EINT2 */
+#define ARIZONA_DSP_SHARED_WR_COLL_EINT2_SHIFT       15  /* DSP_SHARED_WR_COLL_EINT2 */
+#define ARIZONA_DSP_SHARED_WR_COLL_EINT2_WIDTH        1  /* DSP_SHARED_WR_COLL_EINT2 */
+#define ARIZONA_SPK_SHUTDOWN_EINT2               0x4000  /* SPK_SHUTDOWN_EINT2 */
+#define ARIZONA_SPK_SHUTDOWN_EINT2_MASK          0x4000  /* SPK_SHUTDOWN_EINT2 */
+#define ARIZONA_SPK_SHUTDOWN_EINT2_SHIFT             14  /* SPK_SHUTDOWN_EINT2 */
+#define ARIZONA_SPK_SHUTDOWN_EINT2_WIDTH              1  /* SPK_SHUTDOWN_EINT2 */
+#define ARIZONA_SPK1R_SHORT_EINT2                0x2000  /* SPK1R_SHORT_EINT2 */
+#define ARIZONA_SPK1R_SHORT_EINT2_MASK           0x2000  /* SPK1R_SHORT_EINT2 */
+#define ARIZONA_SPK1R_SHORT_EINT2_SHIFT              13  /* SPK1R_SHORT_EINT2 */
+#define ARIZONA_SPK1R_SHORT_EINT2_WIDTH               1  /* SPK1R_SHORT_EINT2 */
+#define ARIZONA_SPK1L_SHORT_EINT2                0x1000  /* SPK1L_SHORT_EINT2 */
+#define ARIZONA_SPK1L_SHORT_EINT2_MASK           0x1000  /* SPK1L_SHORT_EINT2 */
+#define ARIZONA_SPK1L_SHORT_EINT2_SHIFT              12  /* SPK1L_SHORT_EINT2 */
+#define ARIZONA_SPK1L_SHORT_EINT2_WIDTH               1  /* SPK1L_SHORT_EINT2 */
+#define ARIZONA_HP3R_SC_NEG_EINT2                0x0800  /* HP3R_SC_NEG_EINT2 */
+#define ARIZONA_HP3R_SC_NEG_EINT2_MASK           0x0800  /* HP3R_SC_NEG_EINT2 */
+#define ARIZONA_HP3R_SC_NEG_EINT2_SHIFT              11  /* HP3R_SC_NEG_EINT2 */
+#define ARIZONA_HP3R_SC_NEG_EINT2_WIDTH               1  /* HP3R_SC_NEG_EINT2 */
+#define ARIZONA_HP3R_SC_POS_EINT2                0x0400  /* HP3R_SC_POS_EINT2 */
+#define ARIZONA_HP3R_SC_POS_EINT2_MASK           0x0400  /* HP3R_SC_POS_EINT2 */
+#define ARIZONA_HP3R_SC_POS_EINT2_SHIFT              10  /* HP3R_SC_POS_EINT2 */
+#define ARIZONA_HP3R_SC_POS_EINT2_WIDTH               1  /* HP3R_SC_POS_EINT2 */
+#define ARIZONA_HP3L_SC_NEG_EINT2                0x0200  /* HP3L_SC_NEG_EINT2 */
+#define ARIZONA_HP3L_SC_NEG_EINT2_MASK           0x0200  /* HP3L_SC_NEG_EINT2 */
+#define ARIZONA_HP3L_SC_NEG_EINT2_SHIFT               9  /* HP3L_SC_NEG_EINT2 */
+#define ARIZONA_HP3L_SC_NEG_EINT2_WIDTH               1  /* HP3L_SC_NEG_EINT2 */
+#define ARIZONA_HP3L_SC_POS_EINT2                0x0100  /* HP3L_SC_POS_EINT2 */
+#define ARIZONA_HP3L_SC_POS_EINT2_MASK           0x0100  /* HP3L_SC_POS_EINT2 */
+#define ARIZONA_HP3L_SC_POS_EINT2_SHIFT               8  /* HP3L_SC_POS_EINT2 */
+#define ARIZONA_HP3L_SC_POS_EINT2_WIDTH               1  /* HP3L_SC_POS_EINT2 */
+#define ARIZONA_HP2R_SC_NEG_EINT2                0x0080  /* HP2R_SC_NEG_EINT2 */
+#define ARIZONA_HP2R_SC_NEG_EINT2_MASK           0x0080  /* HP2R_SC_NEG_EINT2 */
+#define ARIZONA_HP2R_SC_NEG_EINT2_SHIFT               7  /* HP2R_SC_NEG_EINT2 */
+#define ARIZONA_HP2R_SC_NEG_EINT2_WIDTH               1  /* HP2R_SC_NEG_EINT2 */
+#define ARIZONA_HP2R_SC_POS_EINT2                0x0040  /* HP2R_SC_POS_EINT2 */
+#define ARIZONA_HP2R_SC_POS_EINT2_MASK           0x0040  /* HP2R_SC_POS_EINT2 */
+#define ARIZONA_HP2R_SC_POS_EINT2_SHIFT               6  /* HP2R_SC_POS_EINT2 */
+#define ARIZONA_HP2R_SC_POS_EINT2_WIDTH               1  /* HP2R_SC_POS_EINT2 */
+#define ARIZONA_HP2L_SC_NEG_EINT2                0x0020  /* HP2L_SC_NEG_EINT2 */
+#define ARIZONA_HP2L_SC_NEG_EINT2_MASK           0x0020  /* HP2L_SC_NEG_EINT2 */
+#define ARIZONA_HP2L_SC_NEG_EINT2_SHIFT               5  /* HP2L_SC_NEG_EINT2 */
+#define ARIZONA_HP2L_SC_NEG_EINT2_WIDTH               1  /* HP2L_SC_NEG_EINT2 */
+#define ARIZONA_HP2L_SC_POS_EINT2                0x0010  /* HP2L_SC_POS_EINT2 */
+#define ARIZONA_HP2L_SC_POS_EINT2_MASK           0x0010  /* HP2L_SC_POS_EINT2 */
+#define ARIZONA_HP2L_SC_POS_EINT2_SHIFT               4  /* HP2L_SC_POS_EINT2 */
+#define ARIZONA_HP2L_SC_POS_EINT2_WIDTH               1  /* HP2L_SC_POS_EINT2 */
+#define ARIZONA_HP1R_SC_NEG_EINT2                0x0008  /* HP1R_SC_NEG_EINT2 */
+#define ARIZONA_HP1R_SC_NEG_EINT2_MASK           0x0008  /* HP1R_SC_NEG_EINT2 */
+#define ARIZONA_HP1R_SC_NEG_EINT2_SHIFT               3  /* HP1R_SC_NEG_EINT2 */
+#define ARIZONA_HP1R_SC_NEG_EINT2_WIDTH               1  /* HP1R_SC_NEG_EINT2 */
+#define ARIZONA_HP1R_SC_POS_EINT2                0x0004  /* HP1R_SC_POS_EINT2 */
+#define ARIZONA_HP1R_SC_POS_EINT2_MASK           0x0004  /* HP1R_SC_POS_EINT2 */
+#define ARIZONA_HP1R_SC_POS_EINT2_SHIFT               2  /* HP1R_SC_POS_EINT2 */
+#define ARIZONA_HP1R_SC_POS_EINT2_WIDTH               1  /* HP1R_SC_POS_EINT2 */
+#define ARIZONA_HP1L_SC_NEG_EINT2                0x0002  /* HP1L_SC_NEG_EINT2 */
+#define ARIZONA_HP1L_SC_NEG_EINT2_MASK           0x0002  /* HP1L_SC_NEG_EINT2 */
+#define ARIZONA_HP1L_SC_NEG_EINT2_SHIFT               1  /* HP1L_SC_NEG_EINT2 */
+#define ARIZONA_HP1L_SC_NEG_EINT2_WIDTH               1  /* HP1L_SC_NEG_EINT2 */
+#define ARIZONA_HP1L_SC_POS_EINT2                0x0001  /* HP1L_SC_POS_EINT2 */
+#define ARIZONA_HP1L_SC_POS_EINT2_MASK           0x0001  /* HP1L_SC_POS_EINT2 */
+#define ARIZONA_HP1L_SC_POS_EINT2_SHIFT               0  /* HP1L_SC_POS_EINT2 */
+#define ARIZONA_HP1L_SC_POS_EINT2_WIDTH               1  /* HP1L_SC_POS_EINT2 */
+
 /*
  * R3352 (0xD18) - IRQ2 Status 1 Mask
  */
@@ -5404,6 +5787,53 @@
 #define ARIZONA_IM_HP1L_DONE_EINT2_SHIFT              0  /* IM_HP1L_DONE_EINT2 */
 #define ARIZONA_IM_HP1L_DONE_EINT2_WIDTH              1  /* IM_HP1L_DONE_EINT2 */
 
+/*
+ * R3355 (0xD1B) - IRQ2 Status 4 Mask (Alternate layout)
+ *
+ * Alternate layout used on later devices, note only fields that have moved
+ * are specified
+ */
+#define ARIZONA_V2_IM_AIF3_ERR_EINT2                  0x8000  /* IM_AIF3_ERR_EINT2 */
+#define ARIZONA_V2_IM_AIF3_ERR_EINT2_MASK             0x8000  /* IM_AIF3_ERR_EINT2 */
+#define ARIZONA_V2_IM_AIF3_ERR_EINT2_SHIFT                15  /* IM_AIF3_ERR_EINT2 */
+#define ARIZONA_V2_IM_AIF3_ERR_EINT2_WIDTH                 1  /* IM_AIF3_ERR_EINT2 */
+#define ARIZONA_V2_IM_AIF2_ERR_EINT2                  0x4000  /* IM_AIF2_ERR_EINT2 */
+#define ARIZONA_V2_IM_AIF2_ERR_EINT2_MASK             0x4000  /* IM_AIF2_ERR_EINT2 */
+#define ARIZONA_V2_IM_AIF2_ERR_EINT2_SHIFT                14  /* IM_AIF2_ERR_EINT2 */
+#define ARIZONA_V2_IM_AIF2_ERR_EINT2_WIDTH                 1  /* IM_AIF2_ERR_EINT2 */
+#define ARIZONA_V2_IM_AIF1_ERR_EINT2                  0x2000  /* IM_AIF1_ERR_EINT2 */
+#define ARIZONA_V2_IM_AIF1_ERR_EINT2_MASK             0x2000  /* IM_AIF1_ERR_EINT2 */
+#define ARIZONA_V2_IM_AIF1_ERR_EINT2_SHIFT                13  /* IM_AIF1_ERR_EINT2 */
+#define ARIZONA_V2_IM_AIF1_ERR_EINT2_WIDTH                 1  /* IM_AIF1_ERR_EINT2 */
+#define ARIZONA_V2_IM_CTRLIF_ERR_EINT2                0x1000  /* IM_CTRLIF_ERR_EINT2 */
+#define ARIZONA_V2_IM_CTRLIF_ERR_EINT2_MASK           0x1000  /* IM_CTRLIF_ERR_EINT2 */
+#define ARIZONA_V2_IM_CTRLIF_ERR_EINT2_SHIFT              12  /* IM_CTRLIF_ERR_EINT2 */
+#define ARIZONA_V2_IM_CTRLIF_ERR_EINT2_WIDTH               1  /* IM_CTRLIF_ERR_EINT2 */
+#define ARIZONA_V2_IM_MIXER_DROPPED_SAMPLE_EINT2      0x0800  /* IM_MIXER_DROPPED_SAMPLE_EINT2 */
+#define ARIZONA_V2_IM_MIXER_DROPPED_SAMPLE_EINT2_MASK 0x0800  /* IM_MIXER_DROPPED_SAMPLE_EINT2 */
+#define ARIZONA_V2_IM_MIXER_DROPPED_SAMPLE_EINT2_SHIFT    11  /* IM_MIXER_DROPPED_SAMPLE_EINT2 */
+#define ARIZONA_V2_IM_MIXER_DROPPED_SAMPLE_EINT2_WIDTH     1  /* IM_MIXER_DROPPED_SAMPLE_EINT2 */
+#define ARIZONA_V2_IM_ASYNC_CLK_ENA_LOW_EINT2         0x0400  /* IM_ASYNC_CLK_ENA_LOW_EINT2 */
+#define ARIZONA_V2_IM_ASYNC_CLK_ENA_LOW_EINT2_MASK    0x0400  /* IM_ASYNC_CLK_ENA_LOW_EINT2 */
+#define ARIZONA_V2_IM_ASYNC_CLK_ENA_LOW_EINT2_SHIFT       10  /* IM_ASYNC_CLK_ENA_LOW_EINT2 */
+#define ARIZONA_V2_IM_ASYNC_CLK_ENA_LOW_EINT2_WIDTH        1  /* IM_ASYNC_CLK_ENA_LOW_EINT2 */
+#define ARIZONA_V2_IM_SYSCLK_ENA_LOW_EINT2            0x0200  /* IM_SYSCLK_ENA_LOW_EINT2 */
+#define ARIZONA_V2_IM_SYSCLK_ENA_LOW_EINT2_MASK       0x0200  /* IM_SYSCLK_ENA_LOW_EINT2 */
+#define ARIZONA_V2_IM_SYSCLK_ENA_LOW_EINT2_SHIFT           9  /* IM_SYSCLK_ENA_LOW_EINT2 */
+#define ARIZONA_V2_IM_SYSCLK_ENA_LOW_EINT2_WIDTH           1  /* IM_SYSCLK_ENA_LOW_EINT2 */
+#define ARIZONA_V2_IM_ISRC1_CFG_ERR_EINT2             0x0100  /* IM_ISRC1_CFG_ERR_EINT2 */
+#define ARIZONA_V2_IM_ISRC1_CFG_ERR_EINT2_MASK        0x0100  /* IM_ISRC1_CFG_ERR_EINT2 */
+#define ARIZONA_V2_IM_ISRC1_CFG_ERR_EINT2_SHIFT            8  /* IM_ISRC1_CFG_ERR_EINT2 */
+#define ARIZONA_V2_IM_ISRC1_CFG_ERR_EINT2_WIDTH            1  /* IM_ISRC1_CFG_ERR_EINT2 */
+#define ARIZONA_V2_IM_ISRC2_CFG_ERR_EINT2             0x0080  /* IM_ISRC2_CFG_ERR_EINT2 */
+#define ARIZONA_V2_IM_ISRC2_CFG_ERR_EINT2_MASK        0x0080  /* IM_ISRC2_CFG_ERR_EINT2 */
+#define ARIZONA_V2_IM_ISRC2_CFG_ERR_EINT2_SHIFT            7  /* IM_ISRC2_CFG_ERR_EINT2 */
+#define ARIZONA_V2_IM_ISRC2_CFG_ERR_EINT2_WIDTH            1  /* IM_ISRC2_CFG_ERR_EINT2 */
+#define ARIZONA_V2_IM_ISRC3_CFG_ERR_EINT2             0x0040  /* IM_ISRC3_CFG_ERR_EINT2 */
+#define ARIZONA_V2_IM_ISRC3_CFG_ERR_EINT2_MASK        0x0040  /* IM_ISRC3_CFG_ERR_EINT2 */
+#define ARIZONA_V2_IM_ISRC3_CFG_ERR_EINT2_SHIFT            6  /* IM_ISRC3_CFG_ERR_EINT2 */
+#define ARIZONA_V2_IM_ISRC3_CFG_ERR_EINT2_WIDTH            1  /* IM_ISRC3_CFG_ERR_EINT2 */
+
 /*
  * R3356 (0xD1C) - IRQ2 Status 5 Mask
  */
@@ -5429,6 +5859,85 @@
 #define ARIZONA_IM_FLL1_CLOCK_OK_EINT2_SHIFT          0  /* IM_FLL1_CLOCK_OK_EINT2 */
 #define ARIZONA_IM_FLL1_CLOCK_OK_EINT2_WIDTH          1  /* IM_FLL1_CLOCK_OK_EINT2 */
 
+/*
+ * R3340 (0xD0C) - Interrupt Status 5 Mask (Alternate layout)
+ *
+ * Alternate layout used on later devices, note only fields that have moved
+ * are specified
+ */
+#define ARIZONA_V2_IM_ASRC_CFG_ERR_EINT2         0x0008  /* IM_ASRC_CFG_ERR_EINT2 */
+#define ARIZONA_V2_IM_ASRC_CFG_ERR_EINT2_MASK    0x0008  /* IM_ASRC_CFG_ERR_EINT2 */
+#define ARIZONA_V2_IM_ASRC_CFG_ERR_EINT2_SHIFT        3  /* IM_ASRC_CFG_ERR_EINT2 */
+#define ARIZONA_V2_IM_ASRC_CFG_ERR_EINT2_WIDTH        1  /* IM_ASRC_CFG_ERR_EINT2 */
+
+/*
+ * R3357 (0xD1D) - IRQ2 Status 6 Mask
+ */
+#define ARIZONA_IM_DSP_SHARED_WR_COLL_EINT2      0x8000  /* IM_DSP_SHARED_WR_COLL_EINT2 */
+#define ARIZONA_IM_DSP_SHARED_WR_COLL_EINT2_MASK 0x8000  /* IM_DSP_SHARED_WR_COLL_EINT2 */
+#define ARIZONA_IM_DSP_SHARED_WR_COLL_EINT2_SHIFT    15  /* IM_DSP_SHARED_WR_COLL_EINT2 */
+#define ARIZONA_IM_DSP_SHARED_WR_COLL_EINT2_WIDTH     1  /* IM_DSP_SHARED_WR_COLL_EINT2 */
+#define ARIZONA_IM_SPK_SHUTDOWN_EINT2            0x4000  /* IM_SPK_SHUTDOWN_EINT2 */
+#define ARIZONA_IM_SPK_SHUTDOWN_EINT2_MASK       0x4000  /* IM_SPK_SHUTDOWN_EINT2 */
+#define ARIZONA_IM_SPK_SHUTDOWN_EINT2_SHIFT          14  /* IM_SPK_SHUTDOWN_EINT2 */
+#define ARIZONA_IM_SPK_SHUTDOWN_EINT2_WIDTH           1  /* IM_SPK_SHUTDOWN_EINT2 */
+#define ARIZONA_IM_SPK1R_SHORT_EINT2             0x2000  /* IM_SPK1R_SHORT_EINT2 */
+#define ARIZONA_IM_SPK1R_SHORT_EINT2_MASK        0x2000  /* IM_SPK1R_SHORT_EINT2 */
+#define ARIZONA_IM_SPK1R_SHORT_EINT2_SHIFT           13  /* IM_SPK1R_SHORT_EINT2 */
+#define ARIZONA_IM_SPK1R_SHORT_EINT2_WIDTH            1  /* IM_SPK1R_SHORT_EINT2 */
+#define ARIZONA_IM_SPK1L_SHORT_EINT2             0x1000  /* IM_SPK1L_SHORT_EINT2 */
+#define ARIZONA_IM_SPK1L_SHORT_EINT2_MASK        0x1000  /* IM_SPK1L_SHORT_EINT2 */
+#define ARIZONA_IM_SPK1L_SHORT_EINT2_SHIFT           12  /* IM_SPK1L_SHORT_EINT2 */
+#define ARIZONA_IM_SPK1L_SHORT_EINT2_WIDTH            1  /* IM_SPK1L_SHORT_EINT2 */
+#define ARIZONA_IM_HP3R_SC_NEG_EINT2             0x0800  /* IM_HP3R_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP3R_SC_NEG_EINT2_MASK        0x0800  /* IM_HP3R_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP3R_SC_NEG_EINT2_SHIFT           11  /* IM_HP3R_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP3R_SC_NEG_EINT2_WIDTH            1  /* IM_HP3R_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP3R_SC_POS_EINT2             0x0400  /* IM_HP3R_SC_POS_EINT2 */
+#define ARIZONA_IM_HP3R_SC_POS_EINT2_MASK        0x0400  /* IM_HP3R_SC_POS_EINT2 */
+#define ARIZONA_IM_HP3R_SC_POS_EINT2_SHIFT           10  /* IM_HP3R_SC_POS_EINT2 */
+#define ARIZONA_IM_HP3R_SC_POS_EINT2_WIDTH            1  /* IM_HP3R_SC_POS_EINT2 */
+#define ARIZONA_IM_HP3L_SC_NEG_EINT2             0x0200  /* IM_HP3L_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP3L_SC_NEG_EINT2_MASK        0x0200  /* IM_HP3L_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP3L_SC_NEG_EINT2_SHIFT            9  /* IM_HP3L_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP3L_SC_NEG_EINT2_WIDTH            1  /* IM_HP3L_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP3L_SC_POS_EINT2             0x0100  /* IM_HP3L_SC_POS_EINT2 */
+#define ARIZONA_IM_HP3L_SC_POS_EINT2_MASK        0x0100  /* IM_HP3L_SC_POS_EINT2 */
+#define ARIZONA_IM_HP3L_SC_POS_EINT2_SHIFT            8  /* IM_HP3L_SC_POS_EINT2 */
+#define ARIZONA_IM_HP3L_SC_POS_EINT2_WIDTH            1  /* IM_HP3L_SC_POS_EINT2 */
+#define ARIZONA_IM_HP2R_SC_NEG_EINT2             0x0080  /* IM_HP2R_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP2R_SC_NEG_EINT2_MASK        0x0080  /* IM_HP2R_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP2R_SC_NEG_EINT2_SHIFT            7  /* IM_HP2R_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP2R_SC_NEG_EINT2_WIDTH            1  /* IM_HP2R_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP2R_SC_POS_EINT2             0x0040  /* IM_HP2R_SC_POS_EINT2 */
+#define ARIZONA_IM_HP2R_SC_POS_EINT2_MASK        0x0040  /* IM_HP2R_SC_POS_EINT2 */
+#define ARIZONA_IM_HP2R_SC_POS_EINT2_SHIFT            6  /* IM_HP2R_SC_POS_EINT2 */
+#define ARIZONA_IM_HP2R_SC_POS_EINT2_WIDTH            1  /* IM_HP2R_SC_POS_EINT2 */
+#define ARIZONA_IM_HP2L_SC_NEG_EINT2             0x0020  /* IM_HP2L_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP2L_SC_NEG_EINT2_MASK        0x0020  /* IM_HP2L_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP2L_SC_NEG_EINT2_SHIFT            5  /* IM_HP2L_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP2L_SC_NEG_EINT2_WIDTH            1  /* IM_HP2L_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP2L_SC_POS_EINT2             0x0010  /* IM_HP2L_SC_POS_EINT2 */
+#define ARIZONA_IM_HP2L_SC_POS_EINT2_MASK        0x0010  /* IM_HP2L_SC_POS_EINT2 */
+#define ARIZONA_IM_HP2L_SC_POS_EINT2_SHIFT            4  /* IM_HP2L_SC_POS_EINT2 */
+#define ARIZONA_IM_HP2L_SC_POS_EINT2_WIDTH            1  /* IM_HP2L_SC_POS_EINT2 */
+#define ARIZONA_IM_HP1R_SC_NEG_EINT2             0x0008  /* IM_HP1R_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP1R_SC_NEG_EINT2_MASK        0x0008  /* IM_HP1R_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP1R_SC_NEG_EINT2_SHIFT            3  /* IM_HP1R_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP1R_SC_NEG_EINT2_WIDTH            1  /* IM_HP1R_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP1R_SC_POS_EINT2             0x0004  /* IM_HP1R_SC_POS_EINT2 */
+#define ARIZONA_IM_HP1R_SC_POS_EINT2_MASK        0x0004  /* IM_HP1R_SC_POS_EINT2 */
+#define ARIZONA_IM_HP1R_SC_POS_EINT2_SHIFT            2  /* IM_HP1R_SC_POS_EINT2 */
+#define ARIZONA_IM_HP1R_SC_POS_EINT2_WIDTH            1  /* IM_HP1R_SC_POS_EINT2 */
+#define ARIZONA_IM_HP1L_SC_NEG_EINT2             0x0002  /* IM_HP1L_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP1L_SC_NEG_EINT2_MASK        0x0002  /* IM_HP1L_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP1L_SC_NEG_EINT2_SHIFT            1  /* IM_HP1L_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP1L_SC_NEG_EINT2_WIDTH            1  /* IM_HP1L_SC_NEG_EINT2 */
+#define ARIZONA_IM_HP1L_SC_POS_EINT2             0x0001  /* IM_HP1L_SC_POS_EINT2 */
+#define ARIZONA_IM_HP1L_SC_POS_EINT2_MASK        0x0001  /* IM_HP1L_SC_POS_EINT2 */
+#define ARIZONA_IM_HP1L_SC_POS_EINT2_SHIFT            0  /* IM_HP1L_SC_POS_EINT2 */
+#define ARIZONA_IM_HP1L_SC_POS_EINT2_WIDTH            1  /* IM_HP1L_SC_POS_EINT2 */
+
 /*
  * R3359 (0xD1F) - IRQ2 Control
  */
@@ -5700,6 +6209,10 @@
 #define ARIZONA_ADSP2_1_OVERCLOCKED_STS_MASK     0x0008  /* ADSP2_1_OVERCLOCKED_STS */
 #define ARIZONA_ADSP2_1_OVERCLOCKED_STS_SHIFT         3  /* ADSP2_1_OVERCLOCKED_STS */
 #define ARIZONA_ADSP2_1_OVERCLOCKED_STS_WIDTH         1  /* ADSP2_1_OVERCLOCKED_STS */
+#define ARIZONA_ISRC3_OVERCLOCKED_STS            0x0004  /* ISRC3_OVERCLOCKED_STS */
+#define ARIZONA_ISRC3_OVERCLOCKED_STS_MASK       0x0004  /* ISRC3_OVERCLOCKED_STS */
+#define ARIZONA_ISRC3_OVERCLOCKED_STS_SHIFT           2  /* ISRC3_OVERCLOCKED_STS */
+#define ARIZONA_ISRC3_OVERCLOCKED_STS_WIDTH           1  /* ISRC3_OVERCLOCKED_STS */
 #define ARIZONA_ISRC2_OVERCLOCKED_STS            0x0002  /* ISRC2_OVERCLOCKED_STS */
 #define ARIZONA_ISRC2_OVERCLOCKED_STS_MASK       0x0002  /* ISRC2_OVERCLOCKED_STS */
 #define ARIZONA_ISRC2_OVERCLOCKED_STS_SHIFT           1  /* ISRC2_OVERCLOCKED_STS */
@@ -5724,6 +6237,10 @@
 #define ARIZONA_AIF1_UNDERCLOCKED_STS_MASK       0x0100  /* AIF1_UNDERCLOCKED_STS */
 #define ARIZONA_AIF1_UNDERCLOCKED_STS_SHIFT           8  /* AIF1_UNDERCLOCKED_STS */
 #define ARIZONA_AIF1_UNDERCLOCKED_STS_WIDTH           1  /* AIF1_UNDERCLOCKED_STS */
+#define ARIZONA_ISRC3_UNDERCLOCKED_STS           0x0080  /* ISRC3_UNDERCLOCKED_STS */
+#define ARIZONA_ISRC3_UNDERCLOCKED_STS_MASK      0x0080  /* ISRC3_UNDERCLOCKED_STS */
+#define ARIZONA_ISRC3_UNDERCLOCKED_STS_SHIFT          7  /* ISRC3_UNDERCLOCKED_STS */
+#define ARIZONA_ISRC3_UNDERCLOCKED_STS_WIDTH          1  /* ISRC3_UNDERCLOCKED_STS */
 #define ARIZONA_ISRC2_UNDERCLOCKED_STS           0x0040  /* ISRC2_UNDERCLOCKED_STS */
 #define ARIZONA_ISRC2_UNDERCLOCKED_STS_MASK      0x0040  /* ISRC2_UNDERCLOCKED_STS */
 #define ARIZONA_ISRC2_UNDERCLOCKED_STS_SHIFT          6  /* ISRC2_UNDERCLOCKED_STS */
@@ -5753,6 +6270,74 @@
 #define ARIZONA_MIXER_UNDERCLOCKED_STS_SHIFT          0  /* MIXER_UNDERCLOCKED_STS */
 #define ARIZONA_MIXER_UNDERCLOCKED_STS_WIDTH          1  /* MIXER_UNDERCLOCKED_STS */
 
+/*
+ * R3368 (0xD28) - Interrupt Raw Status 9
+ */
+#define ARIZONA_DSP_SHARED_WR_COLL_STS           0x8000  /* DSP_SHARED_WR_COLL_STS */
+#define ARIZONA_DSP_SHARED_WR_COLL_STS_MASK      0x8000  /* DSP_SHARED_WR_COLL_STS */
+#define ARIZONA_DSP_SHARED_WR_COLL_STS_SHIFT         15  /* DSP_SHARED_WR_COLL_STS */
+#define ARIZONA_DSP_SHARED_WR_COLL_STS_WIDTH          1  /* DSP_SHARED_WR_COLL_STS */
+#define ARIZONA_SPK_SHUTDOWN_STS                 0x4000  /* SPK_SHUTDOWN_STS */
+#define ARIZONA_SPK_SHUTDOWN_STS_MASK            0x4000  /* SPK_SHUTDOWN_STS */
+#define ARIZONA_SPK_SHUTDOWN_STS_SHIFT               14  /* SPK_SHUTDOWN_STS */
+#define ARIZONA_SPK_SHUTDOWN_STS_WIDTH                1  /* SPK_SHUTDOWN_STS */
+#define ARIZONA_SPK1R_SHORT_STS                  0x2000  /* SPK1R_SHORT_STS */
+#define ARIZONA_SPK1R_SHORT_STS_MASK             0x2000  /* SPK1R_SHORT_STS */
+#define ARIZONA_SPK1R_SHORT_STS_SHIFT                13  /* SPK1R_SHORT_STS */
+#define ARIZONA_SPK1R_SHORT_STS_WIDTH                 1  /* SPK1R_SHORT_STS */
+#define ARIZONA_SPK1L_SHORT_STS                  0x1000  /* SPK1L_SHORT_STS */
+#define ARIZONA_SPK1L_SHORT_STS_MASK             0x1000  /* SPK1L_SHORT_STS */
+#define ARIZONA_SPK1L_SHORT_STS_SHIFT                12  /* SPK1L_SHORT_STS */
+#define ARIZONA_SPK1L_SHORT_STS_WIDTH                 1  /* SPK1L_SHORT_STS */
+#define ARIZONA_HP3R_SC_NEG_STS                  0x0800  /* HP3R_SC_NEG_STS */
+#define ARIZONA_HP3R_SC_NEG_STS_MASK             0x0800  /* HP3R_SC_NEG_STS */
+#define ARIZONA_HP3R_SC_NEG_STS_SHIFT                11  /* HP3R_SC_NEG_STS */
+#define ARIZONA_HP3R_SC_NEG_STS_WIDTH                 1  /* HP3R_SC_NEG_STS */
+#define ARIZONA_HP3R_SC_POS_STS                  0x0400  /* HP3R_SC_POS_STS */
+#define ARIZONA_HP3R_SC_POS_STS_MASK             0x0400  /* HP3R_SC_POS_STS */
+#define ARIZONA_HP3R_SC_POS_STS_SHIFT                10  /* HP3R_SC_POS_STS */
+#define ARIZONA_HP3R_SC_POS_STS_WIDTH                 1  /* HP3R_SC_POS_STS */
+#define ARIZONA_HP3L_SC_NEG_STS                  0x0200  /* HP3L_SC_NEG_STS */
+#define ARIZONA_HP3L_SC_NEG_STS_MASK             0x0200  /* HP3L_SC_NEG_STS */
+#define ARIZONA_HP3L_SC_NEG_STS_SHIFT                 9  /* HP3L_SC_NEG_STS */
+#define ARIZONA_HP3L_SC_NEG_STS_WIDTH                 1  /* HP3L_SC_NEG_STS */
+#define ARIZONA_HP3L_SC_POS_STS                  0x0100  /* HP3L_SC_POS_STS */
+#define ARIZONA_HP3L_SC_POS_STS_MASK             0x0100  /* HP3L_SC_POS_STS */
+#define ARIZONA_HP3L_SC_POS_STS_SHIFT                 8  /* HP3L_SC_POS_STS */
+#define ARIZONA_HP3L_SC_POS_STS_WIDTH                 1  /* HP3L_SC_POS_STS */
+#define ARIZONA_HP2R_SC_NEG_STS                  0x0080  /* HP2R_SC_NEG_STS */
+#define ARIZONA_HP2R_SC_NEG_STS_MASK             0x0080  /* HP2R_SC_NEG_STS */
+#define ARIZONA_HP2R_SC_NEG_STS_SHIFT                 7  /* HP2R_SC_NEG_STS */
+#define ARIZONA_HP2R_SC_NEG_STS_WIDTH                 1  /* HP2R_SC_NEG_STS */
+#define ARIZONA_HP2R_SC_POS_STS                  0x0040  /* HP2R_SC_POS_STS */
+#define ARIZONA_HP2R_SC_POS_STS_MASK             0x0040  /* HP2R_SC_POS_STS */
+#define ARIZONA_HP2R_SC_POS_STS_SHIFT                 6  /* HP2R_SC_POS_STS */
+#define ARIZONA_HP2R_SC_POS_STS_WIDTH                 1  /* HP2R_SC_POS_STS */
+#define ARIZONA_HP2L_SC_NEG_STS                  0x0020  /* HP2L_SC_NEG_STS */
+#define ARIZONA_HP2L_SC_NEG_STS_MASK             0x0020  /* HP2L_SC_NEG_STS */
+#define ARIZONA_HP2L_SC_NEG_STS_SHIFT                 5  /* HP2L_SC_NEG_STS */
+#define ARIZONA_HP2L_SC_NEG_STS_WIDTH                 1  /* HP2L_SC_NEG_STS */
+#define ARIZONA_HP2L_SC_POS_STS                  0x0010  /* HP2L_SC_POS_STS */
+#define ARIZONA_HP2L_SC_POS_STS_MASK             0x0010  /* HP2L_SC_POS_STS */
+#define ARIZONA_HP2L_SC_POS_STS_SHIFT                 4  /* HP2L_SC_POS_STS */
+#define ARIZONA_HP2L_SC_POS_STS_WIDTH                 1  /* HP2L_SC_POS_STS */
+#define ARIZONA_HP1R_SC_NEG_STS                  0x0008  /* HP1R_SC_NEG_STS */
+#define ARIZONA_HP1R_SC_NEG_STS_MASK             0x0008  /* HP1R_SC_NEG_STS */
+#define ARIZONA_HP1R_SC_NEG_STS_SHIFT                 3  /* HP1R_SC_NEG_STS */
+#define ARIZONA_HP1R_SC_NEG_STS_WIDTH                 1  /* HP1R_SC_NEG_STS */
+#define ARIZONA_HP1R_SC_POS_STS                  0x0004  /* HP1R_SC_POS_STS */
+#define ARIZONA_HP1R_SC_POS_STS_MASK             0x0004  /* HP1R_SC_POS_STS */
+#define ARIZONA_HP1R_SC_POS_STS_SHIFT                 2  /* HP1R_SC_POS_STS */
+#define ARIZONA_HP1R_SC_POS_STS_WIDTH                 1  /* HP1R_SC_POS_STS */
+#define ARIZONA_HP1L_SC_NEG_STS                  0x0002  /* HP1L_SC_NEG_STS */
+#define ARIZONA_HP1L_SC_NEG_STS_MASK             0x0002  /* HP1L_SC_NEG_STS */
+#define ARIZONA_HP1L_SC_NEG_STS_SHIFT                 1  /* HP1L_SC_NEG_STS */
+#define ARIZONA_HP1L_SC_NEG_STS_WIDTH                 1  /* HP1L_SC_NEG_STS */
+#define ARIZONA_HP1L_SC_POS_STS                  0x0001  /* HP1L_SC_POS_STS */
+#define ARIZONA_HP1L_SC_POS_STS_MASK             0x0001  /* HP1L_SC_POS_STS */
+#define ARIZONA_HP1L_SC_POS_STS_SHIFT                 0  /* HP1L_SC_POS_STS */
+#define ARIZONA_HP1L_SC_POS_STS_WIDTH                 1  /* HP1L_SC_POS_STS */
+
 /*
  * R3392 (0xD40) - IRQ Pin Status
  */
-- 
cgit v1.2.3-59-g8ed1b


From 30a2af3a320d5c0598cde08ba6e5d22a724f82e4 Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Date: Tue, 15 Jul 2014 11:21:50 +0100
Subject: mfd: arizona: Only free the CTRLIF_ERR IRQ if we requested it

We only request the control interface error IRQ if we set ctrlif_error,
as such we should only free it in that situation. Otherwise we will
attempt to free an IRQ we never requested and get a warning from the IRQ
core.

This patch moves the ctrlif_error variable into the arizona structure
and checks it in all cases we free the control interface error IRQ.

Signed-off-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/arizona-irq.c        | 19 ++++++++++++-------
 include/linux/mfd/arizona/core.h |  2 ++
 2 files changed, 14 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/arizona-irq.c b/drivers/mfd/arizona-irq.c
index e780bc40165d..d420dbc0e2b0 100644
--- a/drivers/mfd/arizona-irq.c
+++ b/drivers/mfd/arizona-irq.c
@@ -188,16 +188,17 @@ int arizona_irq_init(struct arizona *arizona)
 	int flags = IRQF_ONESHOT;
 	int ret, i;
 	const struct regmap_irq_chip *aod, *irq;
-	bool ctrlif_error = true;
 	struct irq_data *irq_data;
 
+	arizona->ctrlif_error = true;
+
 	switch (arizona->type) {
 #ifdef CONFIG_MFD_WM5102
 	case WM5102:
 		aod = &wm5102_aod;
 		irq = &wm5102_irq;
 
-		ctrlif_error = false;
+		arizona->ctrlif_error = false;
 		break;
 #endif
 #ifdef CONFIG_MFD_WM5110
@@ -213,7 +214,7 @@ int arizona_irq_init(struct arizona *arizona)
 			break;
 		}
 
-		ctrlif_error = false;
+		arizona->ctrlif_error = false;
 		break;
 #endif
 #ifdef CONFIG_MFD_WM8997
@@ -221,7 +222,7 @@ int arizona_irq_init(struct arizona *arizona)
 		aod = &wm8997_aod;
 		irq = &wm8997_irq;
 
-		ctrlif_error = false;
+		arizona->ctrlif_error = false;
 		break;
 #endif
 	default:
@@ -308,7 +309,7 @@ int arizona_irq_init(struct arizona *arizona)
 	}
 
 	/* Handle control interface errors in the core */
-	if (ctrlif_error) {
+	if (arizona->ctrlif_error) {
 		i = arizona_map_irq(arizona, ARIZONA_IRQ_CTRLIF_ERR);
 		ret = request_threaded_irq(i, NULL, arizona_ctrlif_err,
 					   IRQF_ONESHOT,
@@ -353,7 +354,9 @@ int arizona_irq_init(struct arizona *arizona)
 	return 0;
 
 err_main_irq:
-	free_irq(arizona_map_irq(arizona, ARIZONA_IRQ_CTRLIF_ERR), arizona);
+	if (arizona->ctrlif_error)
+		free_irq(arizona_map_irq(arizona, ARIZONA_IRQ_CTRLIF_ERR),
+			 arizona);
 err_ctrlif:
 	free_irq(arizona_map_irq(arizona, ARIZONA_IRQ_BOOT_DONE), arizona);
 err_boot_done:
@@ -369,7 +372,9 @@ err:
 
 int arizona_irq_exit(struct arizona *arizona)
 {
-	free_irq(arizona_map_irq(arizona, ARIZONA_IRQ_CTRLIF_ERR), arizona);
+	if (arizona->ctrlif_error)
+		free_irq(arizona_map_irq(arizona, ARIZONA_IRQ_CTRLIF_ERR),
+			 arizona);
 	free_irq(arizona_map_irq(arizona, ARIZONA_IRQ_BOOT_DONE), arizona);
 	regmap_del_irq_chip(irq_create_mapping(arizona->virq, 1),
 			    arizona->irq_chip);
diff --git a/include/linux/mfd/arizona/core.h b/include/linux/mfd/arizona/core.h
index 8bc7601cca68..fdd8b7b82db5 100644
--- a/include/linux/mfd/arizona/core.h
+++ b/include/linux/mfd/arizona/core.h
@@ -132,6 +132,8 @@ struct arizona {
 	struct mutex clk_lock;
 	int clk32k_ref;
 
+	bool ctrlif_error;
+
 	struct snd_soc_dapm_context *dapm;
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From 0a6d315827eedc733d404ecff3cd4cc0e6437865 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Thu, 24 Jul 2014 20:08:55 +0200
Subject: gpio: split gpiod board registration into machine header

As per example from the regulator subsystem: put all defines and
functions related to registering board info for GPIO descriptors
into a separate <linux/gpio/machine.h> header.

Cc: Andrew Victor <linux@maxim.org.za>
Cc: Nicolas Ferre <nicolas.ferre@atmel.com>
Cc: Jean-Christophe Plagniol-Villard <plagnioj@jcrosoft.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Thierry Reding <thierry.reding@gmail.com>
Acked-by: Stephen Warren <swarren@wwwdotorg.org>
Reviewed-by: Alexandre Courbot <gnurou@gmail.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/gpio/board.txt            |  2 +-
 arch/arm/mach-at91/at91rm9200_devices.c |  2 +-
 arch/arm/mach-tegra/board-paz00.c       |  2 +-
 arch/mips/jz4740/board-qi_lb60.c        |  1 +
 drivers/gpio/gpiolib.c                  |  1 +
 include/linux/gpio/driver.h             | 54 ------------------------------
 include/linux/gpio/machine.h            | 58 +++++++++++++++++++++++++++++++++
 7 files changed, 63 insertions(+), 57 deletions(-)
 create mode 100644 include/linux/gpio/machine.h

(limited to 'include/linux')

diff --git a/Documentation/gpio/board.txt b/Documentation/gpio/board.txt
index ba169faad5c6..4452786225b8 100644
--- a/Documentation/gpio/board.txt
+++ b/Documentation/gpio/board.txt
@@ -60,7 +60,7 @@ Platform Data
 Finally, GPIOs can be bound to devices and functions using platform data. Board
 files that desire to do so need to include the following header:
 
-	#include <linux/gpio/driver.h>
+	#include <linux/gpio/machine.h>
 
 GPIOs are mapped by the means of tables of lookups, containing instances of the
 gpiod_lookup structure. Two macros are defined to help declaring such mappings:
diff --git a/arch/arm/mach-at91/at91rm9200_devices.c b/arch/arm/mach-at91/at91rm9200_devices.c
index 3f4bb58aea54..74f1eaf97801 100644
--- a/arch/arm/mach-at91/at91rm9200_devices.c
+++ b/arch/arm/mach-at91/at91rm9200_devices.c
@@ -15,7 +15,7 @@
 
 #include <linux/dma-mapping.h>
 #include <linux/gpio.h>
-#include <linux/gpio/driver.h>
+#include <linux/gpio/machine.h>
 #include <linux/platform_device.h>
 #include <linux/i2c-gpio.h>
 
diff --git a/arch/arm/mach-tegra/board-paz00.c b/arch/arm/mach-tegra/board-paz00.c
index 9c6029ba526f..91fd858ced0d 100644
--- a/arch/arm/mach-tegra/board-paz00.c
+++ b/arch/arm/mach-tegra/board-paz00.c
@@ -18,7 +18,7 @@
  */
 
 #include <linux/platform_device.h>
-#include <linux/gpio/driver.h>
+#include <linux/gpio/machine.h>
 #include <linux/rfkill-gpio.h>
 #include "board.h"
 
diff --git a/arch/mips/jz4740/board-qi_lb60.c b/arch/mips/jz4740/board-qi_lb60.c
index 088e92a79ae6..c454525e7695 100644
--- a/arch/mips/jz4740/board-qi_lb60.c
+++ b/arch/mips/jz4740/board-qi_lb60.c
@@ -15,6 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/gpio.h>
+#include <linux/gpio/machine.h>
 
 #include <linux/input.h>
 #include <linux/gpio_keys.h>
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 768f0831db18..18b069e6ba03 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -14,6 +14,7 @@
 #include <linux/slab.h>
 #include <linux/acpi.h>
 #include <linux/gpio/driver.h>
+#include <linux/gpio/machine.h>
 
 #include "gpiolib.h"
 
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 4c463fb0155e..e78a2373e374 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -151,60 +151,6 @@ void gpio_unlock_as_irq(struct gpio_chip *chip, unsigned int offset);
 
 struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc);
 
-enum gpio_lookup_flags {
-	GPIO_ACTIVE_HIGH = (0 << 0),
-	GPIO_ACTIVE_LOW = (1 << 0),
-	GPIO_OPEN_DRAIN = (1 << 1),
-	GPIO_OPEN_SOURCE = (1 << 2),
-};
-
-/**
- * struct gpiod_lookup - lookup table
- * @chip_label: name of the chip the GPIO belongs to
- * @chip_hwnum: hardware number (i.e. relative to the chip) of the GPIO
- * @con_id: name of the GPIO from the device's point of view
- * @idx: index of the GPIO in case several GPIOs share the same name
- * @flags: mask of GPIO_* values
- *
- * gpiod_lookup is a lookup table for associating GPIOs to specific devices and
- * functions using platform data.
- */
-struct gpiod_lookup {
-	const char *chip_label;
-	u16 chip_hwnum;
-	const char *con_id;
-	unsigned int idx;
-	enum gpio_lookup_flags flags;
-};
-
-struct gpiod_lookup_table {
-	struct list_head list;
-	const char *dev_id;
-	struct gpiod_lookup table[];
-};
-
-/*
- * Simple definition of a single GPIO under a con_id
- */
-#define GPIO_LOOKUP(_chip_label, _chip_hwnum, _con_id, _flags) \
-	GPIO_LOOKUP_IDX(_chip_label, _chip_hwnum, _con_id, 0, _flags)
-
-/*
- * Use this macro if you need to have several GPIOs under the same con_id.
- * Each GPIO needs to use a different index and can be accessed using
- * gpiod_get_index()
- */
-#define GPIO_LOOKUP_IDX(_chip_label, _chip_hwnum, _con_id, _idx, _flags)  \
-{                                                                         \
-	.chip_label = _chip_label,                                        \
-	.chip_hwnum = _chip_hwnum,                                        \
-	.con_id = _con_id,                                                \
-	.idx = _idx,                                                      \
-	.flags = _flags,                                                  \
-}
-
-void gpiod_add_lookup_table(struct gpiod_lookup_table *table);
-
 #ifdef CONFIG_GPIOLIB_IRQCHIP
 
 void gpiochip_set_chained_irqchip(struct gpio_chip *gpiochip,
diff --git a/include/linux/gpio/machine.h b/include/linux/gpio/machine.h
new file mode 100644
index 000000000000..b8ad87fab4ce
--- /dev/null
+++ b/include/linux/gpio/machine.h
@@ -0,0 +1,58 @@
+#ifndef __LINUX_GPIO_MACHINE_H
+#define __LINUX_GPIO_MACHINE_H
+
+enum gpio_lookup_flags {
+	GPIO_ACTIVE_HIGH = (0 << 0),
+	GPIO_ACTIVE_LOW = (1 << 0),
+	GPIO_OPEN_DRAIN = (1 << 1),
+	GPIO_OPEN_SOURCE = (1 << 2),
+};
+
+/**
+ * struct gpiod_lookup - lookup table
+ * @chip_label: name of the chip the GPIO belongs to
+ * @chip_hwnum: hardware number (i.e. relative to the chip) of the GPIO
+ * @con_id: name of the GPIO from the device's point of view
+ * @idx: index of the GPIO in case several GPIOs share the same name
+ * @flags: mask of GPIO_* values
+ *
+ * gpiod_lookup is a lookup table for associating GPIOs to specific devices and
+ * functions using platform data.
+ */
+struct gpiod_lookup {
+	const char *chip_label;
+	u16 chip_hwnum;
+	const char *con_id;
+	unsigned int idx;
+	enum gpio_lookup_flags flags;
+};
+
+struct gpiod_lookup_table {
+	struct list_head list;
+	const char *dev_id;
+	struct gpiod_lookup table[];
+};
+
+/*
+ * Simple definition of a single GPIO under a con_id
+ */
+#define GPIO_LOOKUP(_chip_label, _chip_hwnum, _con_id, _flags) \
+	GPIO_LOOKUP_IDX(_chip_label, _chip_hwnum, _con_id, 0, _flags)
+
+/*
+ * Use this macro if you need to have several GPIOs under the same con_id.
+ * Each GPIO needs to use a different index and can be accessed using
+ * gpiod_get_index()
+ */
+#define GPIO_LOOKUP_IDX(_chip_label, _chip_hwnum, _con_id, _idx, _flags)  \
+{                                                                         \
+	.chip_label = _chip_label,                                        \
+	.chip_hwnum = _chip_hwnum,                                        \
+	.con_id = _con_id,                                                \
+	.idx = _idx,                                                      \
+	.flags = _flags,                                                  \
+}
+
+void gpiod_add_lookup_table(struct gpiod_lookup_table *table);
+
+#endif /* __LINUX_GPIO_MACHINE_H */
-- 
cgit v1.2.3-59-g8ed1b


From 39b2bbe3d715cf5013b5c48695ccdd25bd3bf120 Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@nvidia.com>
Date: Fri, 25 Jul 2014 23:38:36 +0900
Subject: gpio: add flags argument to gpiod_get*() functions

The huge majority of GPIOs have their direction and initial value set
right after being obtained by one of the gpiod_get() functions. The
integer GPIO API had gpio_request_one() that took a convenience flags
parameter allowing to specify an direction and value applied to the
returned GPIO. This feature greatly simplifies client code and ensures
errors are always handled properly.

A similar feature has been requested for the gpiod API. Since setting
the direction of a GPIO is so often the very next action done after
obtaining its descriptor, we prefer to extend the existing functions
instead of introducing new functions that would raise the
number of gpiod getters to 16 (!).

The drawback of this approach is that all gpiod clients need to be
updated. To limit the pain, temporary macros are introduced that allow
gpiod_get*() to be called with or without the extra flags argument. They
will be removed once all consumer code has been updated.

Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
Reviewed-by: Mark Brown <broonie@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/gpio/consumer.txt | 26 ++++++++++---
 drivers/gpio/devres.c           | 40 ++++++++++++--------
 drivers/gpio/gpiolib.c          | 67 +++++++++++++++++++++++-----------
 include/linux/gpio/consumer.h   | 81 +++++++++++++++++++++++++++++++++--------
 4 files changed, 155 insertions(+), 59 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/gpio/consumer.txt b/Documentation/gpio/consumer.txt
index d8abfc31abbe..76546324e968 100644
--- a/Documentation/gpio/consumer.txt
+++ b/Documentation/gpio/consumer.txt
@@ -29,13 +29,24 @@ gpiod_get() functions. Like many other kernel subsystems, gpiod_get() takes the
 device that will use the GPIO and the function the requested GPIO is supposed to
 fulfill:
 
-	struct gpio_desc *gpiod_get(struct device *dev, const char *con_id)
+	struct gpio_desc *gpiod_get(struct device *dev, const char *con_id,
+				    enum gpiod_flags flags)
 
 If a function is implemented by using several GPIOs together (e.g. a simple LED
 device that displays digits), an additional index argument can be specified:
 
 	struct gpio_desc *gpiod_get_index(struct device *dev,
-					  const char *con_id, unsigned int idx)
+					  const char *con_id, unsigned int idx,
+					  enum gpiod_flags flags)
+
+The flags parameter is used to optionally specify a direction and initial value
+for the GPIO. Values can be:
+
+* GPIOD_ASIS or 0 to not initialize the GPIO at all. The direction must be set
+  later with one of the dedicated functions.
+* GPIOD_IN to initialize the GPIO as input.
+* GPIOD_OUT_LOW to initialize the GPIO as output with a value of 0.
+* GPIOD_OUT_HIGH to initialize the GPIO as output with a value of 1.
 
 Both functions return either a valid GPIO descriptor, or an error code checkable
 with IS_ERR() (they will never return a NULL pointer). -ENOENT will be returned
@@ -46,11 +57,13 @@ errors and an absence of GPIO for optional GPIO parameters.
 
 Device-managed variants of these functions are also defined:
 
-	struct gpio_desc *devm_gpiod_get(struct device *dev, const char *con_id)
+	struct gpio_desc *devm_gpiod_get(struct device *dev, const char *con_id,
+					 enum gpiod_flags flags)
 
 	struct gpio_desc *devm_gpiod_get_index(struct device *dev,
 					       const char *con_id,
-					       unsigned int idx)
+					       unsigned int idx,
+					       enum gpiod_flags flags)
 
 A GPIO descriptor can be disposed of using the gpiod_put() function:
 
@@ -67,8 +80,9 @@ Using GPIOs
 
 Setting Direction
 -----------------
-The first thing a driver must do with a GPIO is setting its direction. This is
-done by invoking one of the gpiod_direction_*() functions:
+The first thing a driver must do with a GPIO is setting its direction. If no
+direction-setting flags have been given to gpiod_get*(), this is done by
+invoking one of the gpiod_direction_*() functions:
 
 	int gpiod_direction_input(struct gpio_desc *desc)
 	int gpiod_direction_output(struct gpio_desc *desc, int value)
diff --git a/drivers/gpio/devres.c b/drivers/gpio/devres.c
index 65978cf85f79..41b2f40578d5 100644
--- a/drivers/gpio/devres.c
+++ b/drivers/gpio/devres.c
@@ -39,47 +39,53 @@ static int devm_gpiod_match(struct device *dev, void *res, void *data)
  * devm_gpiod_get - Resource-managed gpiod_get()
  * @dev:	GPIO consumer
  * @con_id:	function within the GPIO consumer
+ * @flags:	optional GPIO initialization flags
  *
  * Managed gpiod_get(). GPIO descriptors returned from this function are
  * automatically disposed on driver detach. See gpiod_get() for detailed
  * information about behavior and return values.
  */
-struct gpio_desc *__must_check devm_gpiod_get(struct device *dev,
-					      const char *con_id)
+struct gpio_desc *__must_check __devm_gpiod_get(struct device *dev,
+					      const char *con_id,
+					      enum gpiod_flags flags)
 {
-	return devm_gpiod_get_index(dev, con_id, 0);
+	return devm_gpiod_get_index(dev, con_id, 0, flags);
 }
-EXPORT_SYMBOL(devm_gpiod_get);
+EXPORT_SYMBOL(__devm_gpiod_get);
 
 /**
  * devm_gpiod_get_optional - Resource-managed gpiod_get_optional()
  * @dev: GPIO consumer
  * @con_id: function within the GPIO consumer
+ * @flags: optional GPIO initialization flags
  *
  * Managed gpiod_get_optional(). GPIO descriptors returned from this function
  * are automatically disposed on driver detach. See gpiod_get_optional() for
  * detailed information about behavior and return values.
  */
-struct gpio_desc *__must_check devm_gpiod_get_optional(struct device *dev,
-						       const char *con_id)
+struct gpio_desc *__must_check __devm_gpiod_get_optional(struct device *dev,
+						       const char *con_id,
+						       enum gpiod_flags flags)
 {
-	return devm_gpiod_get_index_optional(dev, con_id, 0);
+	return devm_gpiod_get_index_optional(dev, con_id, 0, flags);
 }
-EXPORT_SYMBOL(devm_gpiod_get_optional);
+EXPORT_SYMBOL(__devm_gpiod_get_optional);
 
 /**
  * devm_gpiod_get_index - Resource-managed gpiod_get_index()
  * @dev:	GPIO consumer
  * @con_id:	function within the GPIO consumer
  * @idx:	index of the GPIO to obtain in the consumer
+ * @flags:	optional GPIO initialization flags
  *
  * Managed gpiod_get_index(). GPIO descriptors returned from this function are
  * automatically disposed on driver detach. See gpiod_get_index() for detailed
  * information about behavior and return values.
  */
-struct gpio_desc *__must_check devm_gpiod_get_index(struct device *dev,
+struct gpio_desc *__must_check __devm_gpiod_get_index(struct device *dev,
 						    const char *con_id,
-						    unsigned int idx)
+						    unsigned int idx,
+						    enum gpiod_flags flags)
 {
 	struct gpio_desc **dr;
 	struct gpio_desc *desc;
@@ -89,7 +95,7 @@ struct gpio_desc *__must_check devm_gpiod_get_index(struct device *dev,
 	if (!dr)
 		return ERR_PTR(-ENOMEM);
 
-	desc = gpiod_get_index(dev, con_id, idx);
+	desc = gpiod_get_index(dev, con_id, idx, flags);
 	if (IS_ERR(desc)) {
 		devres_free(dr);
 		return desc;
@@ -100,26 +106,28 @@ struct gpio_desc *__must_check devm_gpiod_get_index(struct device *dev,
 
 	return desc;
 }
-EXPORT_SYMBOL(devm_gpiod_get_index);
+EXPORT_SYMBOL(__devm_gpiod_get_index);
 
 /**
  * devm_gpiod_get_index_optional - Resource-managed gpiod_get_index_optional()
  * @dev: GPIO consumer
  * @con_id: function within the GPIO consumer
  * @index: index of the GPIO to obtain in the consumer
+ * @flags: optional GPIO initialization flags
  *
  * Managed gpiod_get_index_optional(). GPIO descriptors returned from this
  * function are automatically disposed on driver detach. See
  * gpiod_get_index_optional() for detailed information about behavior and
  * return values.
  */
-struct gpio_desc *__must_check devm_gpiod_get_index_optional(struct device *dev,
+struct gpio_desc *__must_check __devm_gpiod_get_index_optional(struct device *dev,
 							     const char *con_id,
-							     unsigned int index)
+							     unsigned int index,
+							 enum gpiod_flags flags)
 {
 	struct gpio_desc *desc;
 
-	desc = devm_gpiod_get_index(dev, con_id, index);
+	desc = devm_gpiod_get_index(dev, con_id, index, flags);
 	if (IS_ERR(desc)) {
 		if (PTR_ERR(desc) == -ENOENT)
 			return NULL;
@@ -127,7 +135,7 @@ struct gpio_desc *__must_check devm_gpiod_get_index_optional(struct device *dev,
 
 	return desc;
 }
-EXPORT_SYMBOL(devm_gpiod_get_index_optional);
+EXPORT_SYMBOL(__devm_gpiod_get_index_optional);
 
 /**
  * devm_gpiod_put - Resource-managed gpiod_put()
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 330227581a25..15cc0bb65dda 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1582,38 +1582,43 @@ static struct gpio_desc *gpiod_find(struct device *dev, const char *con_id,
  * gpiod_get - obtain a GPIO for a given GPIO function
  * @dev:	GPIO consumer, can be NULL for system-global GPIOs
  * @con_id:	function within the GPIO consumer
+ * @flags:	optional GPIO initialization flags
  *
  * Return the GPIO descriptor corresponding to the function con_id of device
  * dev, -ENOENT if no GPIO has been assigned to the requested function, or
  * another IS_ERR() code if an error occured while trying to acquire the GPIO.
  */
-struct gpio_desc *__must_check gpiod_get(struct device *dev, const char *con_id)
+struct gpio_desc *__must_check __gpiod_get(struct device *dev, const char *con_id,
+					 enum gpiod_flags flags)
 {
-	return gpiod_get_index(dev, con_id, 0);
+	return gpiod_get_index(dev, con_id, 0, flags);
 }
-EXPORT_SYMBOL_GPL(gpiod_get);
+EXPORT_SYMBOL_GPL(__gpiod_get);
 
 /**
  * gpiod_get_optional - obtain an optional GPIO for a given GPIO function
  * @dev: GPIO consumer, can be NULL for system-global GPIOs
  * @con_id: function within the GPIO consumer
+ * @flags: optional GPIO initialization flags
  *
  * This is equivalent to gpiod_get(), except that when no GPIO was assigned to
  * the requested function it will return NULL. This is convenient for drivers
  * that need to handle optional GPIOs.
  */
-struct gpio_desc *__must_check gpiod_get_optional(struct device *dev,
-						  const char *con_id)
+struct gpio_desc *__must_check __gpiod_get_optional(struct device *dev,
+						  const char *con_id,
+						  enum gpiod_flags flags)
 {
-	return gpiod_get_index_optional(dev, con_id, 0);
+	return gpiod_get_index_optional(dev, con_id, 0, flags);
 }
-EXPORT_SYMBOL_GPL(gpiod_get_optional);
+EXPORT_SYMBOL_GPL(__gpiod_get_optional);
 
 /**
  * gpiod_get_index - obtain a GPIO from a multi-index GPIO function
  * @dev:	GPIO consumer, can be NULL for system-global GPIOs
  * @con_id:	function within the GPIO consumer
  * @idx:	index of the GPIO to obtain in the consumer
+ * @flags:	optional GPIO initialization flags
  *
  * This variant of gpiod_get() allows to access GPIOs other than the first
  * defined one for functions that define several GPIOs.
@@ -1622,23 +1627,24 @@ EXPORT_SYMBOL_GPL(gpiod_get_optional);
  * requested function and/or index, or another IS_ERR() code if an error
  * occured while trying to acquire the GPIO.
  */
-struct gpio_desc *__must_check gpiod_get_index(struct device *dev,
+struct gpio_desc *__must_check __gpiod_get_index(struct device *dev,
 					       const char *con_id,
-					       unsigned int idx)
+					       unsigned int idx,
+					       enum gpiod_flags flags)
 {
 	struct gpio_desc *desc = NULL;
 	int status;
-	enum gpio_lookup_flags flags = 0;
+	enum gpio_lookup_flags lookupflags = 0;
 
 	dev_dbg(dev, "GPIO lookup for consumer %s\n", con_id);
 
 	/* Using device tree? */
 	if (IS_ENABLED(CONFIG_OF) && dev && dev->of_node) {
 		dev_dbg(dev, "using device tree for GPIO lookup\n");
-		desc = of_find_gpio(dev, con_id, idx, &flags);
+		desc = of_find_gpio(dev, con_id, idx, &lookupflags);
 	} else if (IS_ENABLED(CONFIG_ACPI) && dev && ACPI_HANDLE(dev)) {
 		dev_dbg(dev, "using ACPI for GPIO lookup\n");
-		desc = acpi_find_gpio(dev, con_id, idx, &flags);
+		desc = acpi_find_gpio(dev, con_id, idx, &lookupflags);
 	}
 
 	/*
@@ -1647,7 +1653,7 @@ struct gpio_desc *__must_check gpiod_get_index(struct device *dev,
 	 */
 	if (!desc || desc == ERR_PTR(-ENOENT)) {
 		dev_dbg(dev, "using lookup tables for GPIO lookup");
-		desc = gpiod_find(dev, con_id, idx, &flags);
+		desc = gpiod_find(dev, con_id, idx, &lookupflags);
 	}
 
 	if (IS_ERR(desc)) {
@@ -1660,16 +1666,33 @@ struct gpio_desc *__must_check gpiod_get_index(struct device *dev,
 	if (status < 0)
 		return ERR_PTR(status);
 
-	if (flags & GPIO_ACTIVE_LOW)
+	if (lookupflags & GPIO_ACTIVE_LOW)
 		set_bit(FLAG_ACTIVE_LOW, &desc->flags);
-	if (flags & GPIO_OPEN_DRAIN)
+	if (lookupflags & GPIO_OPEN_DRAIN)
 		set_bit(FLAG_OPEN_DRAIN, &desc->flags);
-	if (flags & GPIO_OPEN_SOURCE)
+	if (lookupflags & GPIO_OPEN_SOURCE)
 		set_bit(FLAG_OPEN_SOURCE, &desc->flags);
 
+	/* No particular flag request, return here... */
+	if (flags & GPIOD_FLAGS_BIT_DIR_SET)
+		return desc;
+
+	/* Process flags */
+	if (flags & GPIOD_FLAGS_BIT_DIR_OUT)
+		status = gpiod_direction_output(desc,
+					      flags & GPIOD_FLAGS_BIT_DIR_VAL);
+	else
+		status = gpiod_direction_input(desc);
+
+	if (status < 0) {
+		dev_dbg(dev, "setup of GPIO %s failed\n", con_id);
+		gpiod_put(desc);
+		return ERR_PTR(status);
+	}
+
 	return desc;
 }
-EXPORT_SYMBOL_GPL(gpiod_get_index);
+EXPORT_SYMBOL_GPL(__gpiod_get_index);
 
 /**
  * gpiod_get_index_optional - obtain an optional GPIO from a multi-index GPIO
@@ -1677,18 +1700,20 @@ EXPORT_SYMBOL_GPL(gpiod_get_index);
  * @dev: GPIO consumer, can be NULL for system-global GPIOs
  * @con_id: function within the GPIO consumer
  * @index: index of the GPIO to obtain in the consumer
+ * @flags: optional GPIO initialization flags
  *
  * This is equivalent to gpiod_get_index(), except that when no GPIO with the
  * specified index was assigned to the requested function it will return NULL.
  * This is convenient for drivers that need to handle optional GPIOs.
  */
-struct gpio_desc *__must_check gpiod_get_index_optional(struct device *dev,
+struct gpio_desc *__must_check __gpiod_get_index_optional(struct device *dev,
 							const char *con_id,
-							unsigned int index)
+							unsigned int index,
+							enum gpiod_flags flags)
 {
 	struct gpio_desc *desc;
 
-	desc = gpiod_get_index(dev, con_id, index);
+	desc = gpiod_get_index(dev, con_id, index, flags);
 	if (IS_ERR(desc)) {
 		if (PTR_ERR(desc) == -ENOENT)
 			return NULL;
@@ -1696,7 +1721,7 @@ struct gpio_desc *__must_check gpiod_get_index_optional(struct device *dev,
 
 	return desc;
 }
-EXPORT_SYMBOL_GPL(gpiod_get_index_optional);
+EXPORT_SYMBOL_GPL(__gpiod_get_index_optional);
 
 /**
  * gpiod_put - dispose of a GPIO descriptor
diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index 05e53ccb708b..b7ce0c64c6f3 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -18,30 +18,79 @@ struct gpio_desc;
 
 #ifdef CONFIG_GPIOLIB
 
+#define GPIOD_FLAGS_BIT_DIR_SET		BIT(0)
+#define GPIOD_FLAGS_BIT_DIR_OUT		BIT(1)
+#define GPIOD_FLAGS_BIT_DIR_VAL		BIT(2)
+
+/**
+ * Optional flags that can be passed to one of gpiod_* to configure direction
+ * and output value. These values cannot be OR'd.
+ */
+enum gpiod_flags {
+	GPIOD_ASIS	= 0,
+	GPIOD_IN	= GPIOD_FLAGS_BIT_DIR_SET,
+	GPIOD_OUT_LOW	= GPIOD_FLAGS_BIT_DIR_SET | GPIOD_FLAGS_BIT_DIR_OUT,
+	GPIOD_OUT_HIGH	= GPIOD_FLAGS_BIT_DIR_SET | GPIOD_FLAGS_BIT_DIR_OUT |
+			  GPIOD_FLAGS_BIT_DIR_VAL,
+};
+
 /* Acquire and dispose GPIOs */
-struct gpio_desc *__must_check gpiod_get(struct device *dev,
-					 const char *con_id);
-struct gpio_desc *__must_check gpiod_get_index(struct device *dev,
+struct gpio_desc *__must_check __gpiod_get(struct device *dev,
+					 const char *con_id,
+					 enum gpiod_flags flags);
+#define __gpiod_get(dev, con_id, flags, ...) __gpiod_get(dev, con_id, flags)
+#define gpiod_get(varargs...) __gpiod_get(varargs, 0)
+struct gpio_desc *__must_check __gpiod_get_index(struct device *dev,
 					       const char *con_id,
-					       unsigned int idx);
-struct gpio_desc *__must_check gpiod_get_optional(struct device *dev,
-						  const char *con_id);
-struct gpio_desc *__must_check gpiod_get_index_optional(struct device *dev,
+					       unsigned int idx,
+					       enum gpiod_flags flags);
+#define __gpiod_get_index(dev, con_id, index, flags, ...)		\
+	__gpiod_get_index(dev, con_id, index, flags)
+#define gpiod_get_index(varargs...) __gpiod_get_index(varargs, 0)
+struct gpio_desc *__must_check __gpiod_get_optional(struct device *dev,
+						  const char *con_id,
+						  enum gpiod_flags flags);
+#define __gpiod_get_optional(dev, con_id, flags, ...)			\
+	__gpiod_get_optional(dev, con_id, flags)
+#define gpiod_get_optional(varargs...) __gpiod_get_optional(varargs, 0)
+struct gpio_desc *__must_check __gpiod_get_index_optional(struct device *dev,
 							const char *con_id,
-							unsigned int index);
+							unsigned int index,
+							enum gpiod_flags flags);
+#define __gpiod_get_index_optional(dev, con_id, index, flags, ...)	\
+	__gpiod_get_index_optional(dev, con_id, index, flags)
+#define gpiod_get_index_optional(varargs...)				\
+	__gpiod_get_index_optional(varargs, 0)
 
 void gpiod_put(struct gpio_desc *desc);
 
-struct gpio_desc *__must_check devm_gpiod_get(struct device *dev,
-					      const char *con_id);
-struct gpio_desc *__must_check devm_gpiod_get_index(struct device *dev,
+struct gpio_desc *__must_check __devm_gpiod_get(struct device *dev,
+					      const char *con_id,
+					      enum gpiod_flags flags);
+#define __devm_gpiod_get(dev, con_id, flags, ...)			\
+	__devm_gpiod_get(dev, con_id, flags)
+#define devm_gpiod_get(varargs...) __devm_gpiod_get(varargs, 0)
+struct gpio_desc *__must_check __devm_gpiod_get_index(struct device *dev,
 						    const char *con_id,
-						    unsigned int idx);
-struct gpio_desc *__must_check devm_gpiod_get_optional(struct device *dev,
-						       const char *con_id);
+						    unsigned int idx,
+						    enum gpiod_flags flags);
+#define __devm_gpiod_get_index(dev, con_id, index, flags, ...)		\
+	__devm_gpiod_get_index(dev, con_id, index, flags)
+#define devm_gpiod_get_index(varargs...) __devm_gpiod_get_index(varargs, 0)
+struct gpio_desc *__must_check __devm_gpiod_get_optional(struct device *dev,
+						       const char *con_id,
+						       enum gpiod_flags flags);
+#define __devm_gpiod_get_optional(dev, con_id, flags, ...)		\
+	__devm_gpiod_get_optional(dev, con_id, flags)
+#define devm_gpiod_get_optional(varargs...)				\
+	__devm_gpiod_get_optional(varargs, 0)
 struct gpio_desc *__must_check
-devm_gpiod_get_index_optional(struct device *dev, const char *con_id,
-			      unsigned int index);
+__devm_gpiod_get_index_optional(struct device *dev, const char *con_id,
+			      unsigned int index, enum gpiod_flags flags);
+#define __devm_gpiod_get_index_optional(dev, con_id, index, flags, ...)	\
+	__devm_gpiod_get_index_optional(dev, con_id, index, flags)
+#define devm_gpiod_get_index_optional(varargs...)			\
+	__devm_gpiod_get_index_optional(varargs, 0)
 
 void devm_gpiod_put(struct device *dev, struct gpio_desc *desc);
 
-- 
cgit v1.2.3-59-g8ed1b


From eb3fe7def66511120766c8fc05ee9631cce7fe6f Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Tue, 8 Jul 2014 13:46:37 +0300
Subject: ARM: edma: Add edma_assign_channel_eventq() to move channel to a give
 queue

In some cases it is desired to move a channel to a specific event queue.
Such a use case is audio, where it is preferred that it is served with
highest priority compared to other DMA clients.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Acked-by: Sekhar Nori <nsekhar@ti.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 arch/arm/common/edma.c             | 28 ++++++++++++++++++++++++++++
 include/linux/platform_data/edma.h |  2 ++
 2 files changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/arch/arm/common/edma.c b/arch/arm/common/edma.c
index f834aae7720f..88099175fc56 100644
--- a/arch/arm/common/edma.c
+++ b/arch/arm/common/edma.c
@@ -1414,6 +1414,34 @@ void edma_clear_event(unsigned channel)
 }
 EXPORT_SYMBOL(edma_clear_event);
 
+/*
+ * edma_assign_channel_eventq - move given channel to desired eventq
+ * Arguments:
+ *	channel - channel number
+ *	eventq_no - queue to move the channel
+ *
+ * Can be used to move a channel to a selected event queue.
+ */
+void edma_assign_channel_eventq(unsigned channel, enum dma_event_q eventq_no)
+{
+	unsigned ctlr;
+
+	ctlr = EDMA_CTLR(channel);
+	channel = EDMA_CHAN_SLOT(channel);
+
+	if (channel >= edma_cc[ctlr]->num_channels)
+		return;
+
+	/* default to low priority queue */
+	if (eventq_no == EVENTQ_DEFAULT)
+		eventq_no = edma_cc[ctlr]->default_queue;
+	if (eventq_no >= edma_cc[ctlr]->num_tc)
+		return;
+
+	map_dmach_queue(ctlr, channel, eventq_no);
+}
+EXPORT_SYMBOL(edma_assign_channel_eventq);
+
 static int edma_setup_from_hw(struct device *dev, struct edma_soc_info *pdata,
 			      struct edma *edma_cc)
 {
diff --git a/include/linux/platform_data/edma.h b/include/linux/platform_data/edma.h
index eb8d5627d080..bdb2710e2aab 100644
--- a/include/linux/platform_data/edma.h
+++ b/include/linux/platform_data/edma.h
@@ -150,6 +150,8 @@ void edma_clear_event(unsigned channel);
 void edma_pause(unsigned channel);
 void edma_resume(unsigned channel);
 
+void edma_assign_channel_eventq(unsigned channel, enum dma_event_q eventq_no);
+
 struct edma_rsv_info {
 
 	const s16	(*rsv_chans)[2];
-- 
cgit v1.2.3-59-g8ed1b


From 784aa3d7fb6f729c06d5836c9d9569f58e4d05ae Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 14 Jul 2014 18:27:35 +0200
Subject: KVM: Rename and add argument to check_extension

In preparation to make the check_extension function available to VM scope
we add a struct kvm * argument to the function header and rename the function
accordingly. It will still be called from the /dev/kvm fd, but with a NULL
argument for struct kvm *.

Signed-off-by: Alexander Graf <agraf@suse.de>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/arm/kvm/arm.c         | 2 +-
 arch/ia64/kvm/kvm-ia64.c   | 2 +-
 arch/mips/kvm/mips.c       | 2 +-
 arch/powerpc/kvm/powerpc.c | 2 +-
 arch/s390/kvm/kvm-s390.c   | 2 +-
 arch/x86/kvm/x86.c         | 2 +-
 include/linux/kvm_host.h   | 2 +-
 virt/kvm/kvm_main.c        | 6 +++---
 8 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 3c82b37c0f9e..cb77f999badd 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -184,7 +184,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	}
 }
 
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 	int r;
 	switch (ext) {
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 6a4309bb821a..0729ba6acddf 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -190,7 +190,7 @@ void kvm_arch_check_processor_compat(void *rtn)
 	*(int *)rtn = 0;
 }
 
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 
 	int r;
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index d687c6e3258d..3ca79aa011df 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -885,7 +885,7 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 	return VM_FAULT_SIGBUS;
 }
 
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 	int r;
 
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 8e0356835960..d870bacc2f75 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -391,7 +391,7 @@ void kvm_arch_sync_events(struct kvm *kvm)
 {
 }
 
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 	int r;
 	/* FIXME!!
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 2f3e14fe91a4..00268cacdf4c 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -146,7 +146,7 @@ long kvm_arch_dev_ioctl(struct file *filp,
 	return -EINVAL;
 }
 
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 	int r;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5a8691b0ed76..5a62d91c96e7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2616,7 +2616,7 @@ out:
 	return r;
 }
 
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 	int r;
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ec4e3bd83d47..5065b953e6e8 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -602,7 +602,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 			 unsigned int ioctl, unsigned long arg);
 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf);
 
-int kvm_dev_ioctl_check_extension(long ext);
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext);
 
 int kvm_get_dirty_log(struct kvm *kvm,
 			struct kvm_dirty_log *log, int *is_dirty);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4b6c01b477f9..e28f3caa539d 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2571,7 +2571,7 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
 	return r;
 }
 
-static long kvm_dev_ioctl_check_extension_generic(long arg)
+static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
 {
 	switch (arg) {
 	case KVM_CAP_USER_MEMORY:
@@ -2595,7 +2595,7 @@ static long kvm_dev_ioctl_check_extension_generic(long arg)
 	default:
 		break;
 	}
-	return kvm_dev_ioctl_check_extension(arg);
+	return kvm_vm_ioctl_check_extension(kvm, arg);
 }
 
 static long kvm_dev_ioctl(struct file *filp,
@@ -2614,7 +2614,7 @@ static long kvm_dev_ioctl(struct file *filp,
 		r = kvm_dev_ioctl_create_vm(arg);
 		break;
 	case KVM_CHECK_EXTENSION:
-		r = kvm_dev_ioctl_check_extension_generic(arg);
+		r = kvm_vm_ioctl_check_extension_generic(NULL, arg);
 		break;
 	case KVM_GET_VCPU_MMAP_SIZE:
 		r = -EINVAL;
-- 
cgit v1.2.3-59-g8ed1b


From 2873ead7e46694910ac49c3a8ee0f54956f96e0c Mon Sep 17 00:00:00 2001
From: Paul Moore <pmoore@redhat.com>
Date: Mon, 28 Jul 2014 10:42:48 -0400
Subject: Revert "selinux: fix the default socket labeling in sock_graft()"

This reverts commit 4da6daf4d3df5a977e4623963f141a627fd2efce.

Unfortunately, the commit in question caused problems with Bluetooth
devices, specifically it caused them to get caught in the newly
created BUG_ON() check.  The AF_ALG problem still exists, but will be
addressed in a future patch.

Cc: stable@vger.kernel.org
Signed-off-by: Paul Moore <pmoore@redhat.com>
---
 include/linux/security.h |  5 +----
 security/selinux/hooks.c | 13 ++-----------
 2 files changed, 3 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index 794be735ff4b..6478ce3252c7 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -987,10 +987,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	Retrieve the LSM-specific secid for the sock to enable caching of network
  *	authorizations.
  * @sock_graft:
- *	This hook is called in response to a newly created sock struct being
- *	grafted onto an existing socket and allows the security module to
- *	perform whatever security attribute management is necessary for both
- *	the sock and socket.
+ *	Sets the socket's isec sid to the sock's sid.
  * @inet_conn_request:
  *	Sets the openreq's sid to socket's sid with MLS portion taken from peer sid.
  * @inet_csk_clone:
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index b3a6754e932b..336f0a04450e 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -4499,18 +4499,9 @@ static void selinux_sock_graft(struct sock *sk, struct socket *parent)
 	struct inode_security_struct *isec = SOCK_INODE(parent)->i_security;
 	struct sk_security_struct *sksec = sk->sk_security;
 
-	switch (sk->sk_family) {
-	case PF_INET:
-	case PF_INET6:
-	case PF_UNIX:
+	if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6 ||
+	    sk->sk_family == PF_UNIX)
 		isec->sid = sksec->sid;
-		break;
-	default:
-		/* by default there is no special labeling mechanism for the
-		 * sksec label so inherit the label from the parent socket */
-		BUG_ON(sksec->sid != SECINITSID_UNLABELED);
-		sksec->sid = isec->sid;
-	}
 	sksec->sclass = isec->sclass;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 95847e1bd34c0de86039408b24a05f07e788061d Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Sat, 26 Jul 2014 12:04:00 +0800
Subject: kthread_work: remove the unused wait_queue_head

The wait_queue_head_t kthread_work->done is unused since
flush_kthread_work() has been re-implemented.  Let's remove it
including the initialization code.  This makes
DEFINE_KTHREAD_WORK_ONSTACK() unnecessary, removed.

tj: Updated description.  Removed DEFINE_KTHREAD_WORK_ONSTACK().

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/kthread.h | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 7dcef3317689..13d55206ccf6 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -73,7 +73,6 @@ struct kthread_worker {
 struct kthread_work {
 	struct list_head	node;
 	kthread_work_func_t	func;
-	wait_queue_head_t	done;
 	struct kthread_worker	*worker;
 };
 
@@ -85,7 +84,6 @@ struct kthread_work {
 #define KTHREAD_WORK_INIT(work, fn)	{				\
 	.node = LIST_HEAD_INIT((work).node),				\
 	.func = (fn),							\
-	.done = __WAIT_QUEUE_HEAD_INITIALIZER((work).done),		\
 	}
 
 #define DEFINE_KTHREAD_WORKER(worker)					\
@@ -95,22 +93,16 @@ struct kthread_work {
 	struct kthread_work work = KTHREAD_WORK_INIT(work, fn)
 
 /*
- * kthread_worker.lock and kthread_work.done need their own lockdep class
- * keys if they are defined on stack with lockdep enabled.  Use the
- * following macros when defining them on stack.
+ * kthread_worker.lock needs its own lockdep class key when defined on
+ * stack with lockdep enabled.  Use the following macros in such cases.
  */
 #ifdef CONFIG_LOCKDEP
 # define KTHREAD_WORKER_INIT_ONSTACK(worker)				\
 	({ init_kthread_worker(&worker); worker; })
 # define DEFINE_KTHREAD_WORKER_ONSTACK(worker)				\
 	struct kthread_worker worker = KTHREAD_WORKER_INIT_ONSTACK(worker)
-# define KTHREAD_WORK_INIT_ONSTACK(work, fn)				\
-	({ init_kthread_work((&work), fn); work; })
-# define DEFINE_KTHREAD_WORK_ONSTACK(work, fn)				\
-	struct kthread_work work = KTHREAD_WORK_INIT_ONSTACK(work, fn)
 #else
 # define DEFINE_KTHREAD_WORKER_ONSTACK(worker) DEFINE_KTHREAD_WORKER(worker)
-# define DEFINE_KTHREAD_WORK_ONSTACK(work, fn) DEFINE_KTHREAD_WORK(work, fn)
 #endif
 
 extern void __init_kthread_worker(struct kthread_worker *worker,
@@ -127,7 +119,6 @@ extern void __init_kthread_worker(struct kthread_worker *worker,
 		memset((work), 0, sizeof(struct kthread_work));		\
 		INIT_LIST_HEAD(&(work)->node);				\
 		(work)->func = (fn);					\
-		init_waitqueue_head(&(work)->done);			\
 	} while (0)
 
 int kthread_worker_fn(void *worker_ptr);
-- 
cgit v1.2.3-59-g8ed1b


From f3d4ff0e04cc4450bdc7a4140020913b1280d205 Mon Sep 17 00:00:00 2001
From: Jamie Lentin <jm@lentin.co.uk>
Date: Wed, 23 Jul 2014 23:30:48 +0100
Subject: HID: lenovo: Add support for Compact (BT|USB) keyboard

Add support for both ThinkPad Compact Bluetooth Keyboard with
TrackPoint and ThinkPad Compact USB Keyboard with TrackPoint.

Signed-off-by: Jamie Lentin <jm@lentin.co.uk>
Reviewed-by: Antonio Ospite <ao2@ao2.it>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 Documentation/ABI/testing/sysfs-driver-hid-lenovo |  12 ++
 drivers/hid/Kconfig                               |   2 +
 drivers/hid/hid-core.c                            |   2 +
 drivers/hid/hid-ids.h                             |   2 +
 drivers/hid/hid-lenovo.c                          | 208 ++++++++++++++++++++++
 include/linux/hid.h                               |   1 +
 6 files changed, 227 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-driver-hid-lenovo b/Documentation/ABI/testing/sysfs-driver-hid-lenovo
index 57b92cbdceae..53a0725962e1 100644
--- a/Documentation/ABI/testing/sysfs-driver-hid-lenovo
+++ b/Documentation/ABI/testing/sysfs-driver-hid-lenovo
@@ -4,18 +4,21 @@ Contact:	linux-input@vger.kernel.org
 Description:	This controls if mouse clicks should be generated if the trackpoint is quickly pressed. How fast this press has to be
 		is being controlled by press_speed.
 		Values are 0 or 1.
+		Applies to Thinkpad USB Keyboard with TrackPoint.
 
 What:		/sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/dragging
 Date:		July 2011
 Contact:	linux-input@vger.kernel.org
 Description:	If this setting is enabled, it is possible to do dragging by pressing the trackpoint. This requires press_to_select to be enabled.
 		Values are 0 or 1.
+		Applies to Thinkpad USB Keyboard with TrackPoint.
 
 What:		/sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/release_to_select
 Date:		July 2011
 Contact:	linux-input@vger.kernel.org
 Description:	For details regarding this setting please refer to http://www.pc.ibm.com/ww/healthycomputing/trkpntb.html
 		Values are 0 or 1.
+		Applies to Thinkpad USB Keyboard with TrackPoint.
 
 What:		/sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/select_right
 Date:		July 2011
@@ -23,16 +26,25 @@ Contact:	linux-input@vger.kernel.org
 Description:	This setting controls if the mouse click events generated by pressing the trackpoint (if press_to_select is enabled) generate
 		a left or right mouse button click.
 		Values are 0 or 1.
+		Applies to Thinkpad USB Keyboard with TrackPoint.
 
 What:		/sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/sensitivity
 Date:		July 2011
 Contact:	linux-input@vger.kernel.org
 Description:	This file contains the trackpoint sensitivity.
 		Values are decimal integers from 1 (lowest sensitivity) to 255 (highest sensitivity).
+		Applies to Thinkpad USB Keyboard with TrackPoint.
 
 What:		/sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/press_speed
 Date:		July 2011
 Contact:	linux-input@vger.kernel.org
 Description:	This setting controls how fast the trackpoint needs to be pressed to generate a mouse click if press_to_select is enabled.
 		Values are decimal integers from 1 (slowest) to 255 (fastest).
+		Applies to Thinkpad USB Keyboard with TrackPoint.
 
+What:		/sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/fn_lock
+Date:		July 2014
+Contact:	linux-input@vger.kernel.org
+Description:	This setting controls whether Fn Lock is enabled on the keyboard (i.e. if F1 is Mute or F1)
+		Values are 0 or 1
+		Applies to ThinkPad Compact (USB|Bluetooth) Keyboard with TrackPoint.
diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index 687d7a1b6bed..21cce19d301e 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -343,6 +343,8 @@ config HID_LENOVO
 	Thinkpad standalone keyboards, e.g:
 	- ThinkPad USB Keyboard with TrackPoint (supports extra LEDs and trackpoint
 	  configuration)
+	- ThinkPad Compact Bluetooth Keyboard with TrackPoint (supports Fn keys)
+	- ThinkPad Compact USB Keyboard with TrackPoint (supports Fn keys)
 
 config HID_LOGITECH
 	tristate "Logitech devices" if EXPERT
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 55841bd5b461..81b3bb6c4fd1 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1798,6 +1798,8 @@ static const struct hid_device_id hid_have_special_driver[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LCPOWER, USB_DEVICE_ID_LCPOWER_LC1000 ) },
 #if IS_ENABLED(CONFIG_HID_LENOVO)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_TPKBD) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_CUSBKBD) },
+	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_CBTKBD) },
 #endif
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_MX3000_RECEIVER) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_S510_RECEIVER) },
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 48b66bbffc94..315891d07521 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -561,6 +561,8 @@
 
 #define USB_VENDOR_ID_LENOVO		0x17ef
 #define USB_DEVICE_ID_LENOVO_TPKBD	0x6009
+#define USB_DEVICE_ID_LENOVO_CUSBKBD	0x6047
+#define USB_DEVICE_ID_LENOVO_CBTKBD	0x6048
 
 #define USB_VENDOR_ID_LG		0x1fd2
 #define USB_DEVICE_ID_LG_MULTITOUCH	0x0064
diff --git a/drivers/hid/hid-lenovo.c b/drivers/hid/hid-lenovo.c
index f96bf095670a..bf227f7679af 100644
--- a/drivers/hid/hid-lenovo.c
+++ b/drivers/hid/hid-lenovo.c
@@ -1,8 +1,11 @@
 /*
  *  HID driver for Lenovo:
  *  - ThinkPad USB Keyboard with TrackPoint (tpkbd)
+ *  - ThinkPad Compact Bluetooth Keyboard with TrackPoint (cptkbd)
+ *  - ThinkPad Compact USB Keyboard with TrackPoint (cptkbd)
  *
  *  Copyright (c) 2012 Bernhard Seibold
+ *  Copyright (c) 2014 Jamie Lentin <jm@lentin.co.uk>
  */
 
 /*
@@ -33,6 +36,10 @@ struct lenovo_drvdata_tpkbd {
 	int press_speed;
 };
 
+struct lenovo_drvdata_cptkbd {
+	bool fn_lock;
+};
+
 #define map_key_clear(c) hid_map_usage_clear(hi, usage, bit, max, EV_KEY, (c))
 
 static int lenovo_input_mapping_tpkbd(struct hid_device *hdev,
@@ -48,6 +55,49 @@ static int lenovo_input_mapping_tpkbd(struct hid_device *hdev,
 	return 0;
 }
 
+static int lenovo_input_mapping_cptkbd(struct hid_device *hdev,
+		struct hid_input *hi, struct hid_field *field,
+		struct hid_usage *usage, unsigned long **bit, int *max)
+{
+	/* HID_UP_LNVENDOR = USB, HID_UP_MSVENDOR = BT */
+	if ((usage->hid & HID_USAGE_PAGE) == HID_UP_MSVENDOR ||
+	    (usage->hid & HID_USAGE_PAGE) == HID_UP_LNVENDOR) {
+		set_bit(EV_REP, hi->input->evbit);
+		switch (usage->hid & HID_USAGE) {
+		case 0x00f1: /* Fn-F4: Mic mute */
+			map_key_clear(KEY_MICMUTE);
+			return 1;
+		case 0x00f2: /* Fn-F5: Brightness down */
+			map_key_clear(KEY_BRIGHTNESSDOWN);
+			return 1;
+		case 0x00f3: /* Fn-F6: Brightness up */
+			map_key_clear(KEY_BRIGHTNESSUP);
+			return 1;
+		case 0x00f4: /* Fn-F7: External display (projector) */
+			map_key_clear(KEY_SWITCHVIDEOMODE);
+			return 1;
+		case 0x00f5: /* Fn-F8: Wireless */
+			map_key_clear(KEY_WLAN);
+			return 1;
+		case 0x00f6: /* Fn-F9: Control panel */
+			map_key_clear(KEY_CONFIG);
+			return 1;
+		case 0x00f8: /* Fn-F11: View open applications (3 boxes) */
+			map_key_clear(KEY_SCALE);
+			return 1;
+		case 0x00fa: /* Fn-Esc: Fn-lock toggle */
+			map_key_clear(KEY_FN_ESC);
+			return 1;
+		case 0x00fb: /* Fn-F12: Open My computer (6 boxes) USB-only */
+			/* NB: This mapping is invented in raw_event below */
+			map_key_clear(KEY_FILE);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
 static int lenovo_input_mapping(struct hid_device *hdev,
 		struct hid_input *hi, struct hid_field *field,
 		struct hid_usage *usage, unsigned long **bit, int *max)
@@ -56,6 +106,10 @@ static int lenovo_input_mapping(struct hid_device *hdev,
 	case USB_DEVICE_ID_LENOVO_TPKBD:
 		return lenovo_input_mapping_tpkbd(hdev, hi, field,
 							usage, bit, max);
+	case USB_DEVICE_ID_LENOVO_CUSBKBD:
+	case USB_DEVICE_ID_LENOVO_CBTKBD:
+		return lenovo_input_mapping_cptkbd(hdev, hi, field,
+							usage, bit, max);
 	default:
 		return 0;
 	}
@@ -63,6 +117,103 @@ static int lenovo_input_mapping(struct hid_device *hdev,
 
 #undef map_key_clear
 
+/* Send a config command to the keyboard */
+static int lenovo_send_cmd_cptkbd(struct hid_device *hdev,
+			unsigned char byte2, unsigned char byte3)
+{
+	int ret;
+	unsigned char buf[] = {0x18, byte2, byte3};
+
+	switch (hdev->product) {
+	case USB_DEVICE_ID_LENOVO_CUSBKBD:
+		ret = hid_hw_raw_request(hdev, 0x13, buf, sizeof(buf),
+					HID_FEATURE_REPORT, HID_REQ_SET_REPORT);
+		break;
+	case USB_DEVICE_ID_LENOVO_CBTKBD:
+		ret = hid_hw_output_report(hdev, buf, sizeof(buf));
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret < 0 ? ret : 0; /* BT returns 0, USB returns sizeof(buf) */
+}
+
+static void lenovo_features_set_cptkbd(struct hid_device *hdev)
+{
+	int ret;
+	struct lenovo_drvdata_cptkbd *cptkbd_data = hid_get_drvdata(hdev);
+
+	ret = lenovo_send_cmd_cptkbd(hdev, 0x05, cptkbd_data->fn_lock);
+	if (ret)
+		hid_err(hdev, "Fn-lock setting failed: %d\n", ret);
+}
+
+static ssize_t attr_fn_lock_show_cptkbd(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	struct hid_device *hdev = container_of(dev, struct hid_device, dev);
+	struct lenovo_drvdata_cptkbd *cptkbd_data = hid_get_drvdata(hdev);
+
+	return snprintf(buf, PAGE_SIZE, "%u\n", cptkbd_data->fn_lock);
+}
+
+static ssize_t attr_fn_lock_store_cptkbd(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf,
+		size_t count)
+{
+	struct hid_device *hdev = container_of(dev, struct hid_device, dev);
+	struct lenovo_drvdata_cptkbd *cptkbd_data = hid_get_drvdata(hdev);
+	int value;
+
+	if (kstrtoint(buf, 10, &value))
+		return -EINVAL;
+	if (value < 0 || value > 1)
+		return -EINVAL;
+
+	cptkbd_data->fn_lock = !!value;
+	lenovo_features_set_cptkbd(hdev);
+
+	return count;
+}
+
+static struct device_attribute dev_attr_fn_lock_cptkbd =
+	__ATTR(fn_lock, S_IWUSR | S_IRUGO,
+			attr_fn_lock_show_cptkbd,
+			attr_fn_lock_store_cptkbd);
+
+static struct attribute *lenovo_attributes_cptkbd[] = {
+	&dev_attr_fn_lock_cptkbd.attr,
+	NULL
+};
+
+static const struct attribute_group lenovo_attr_group_cptkbd = {
+	.attrs = lenovo_attributes_cptkbd,
+};
+
+static int lenovo_raw_event(struct hid_device *hdev,
+			struct hid_report *report, u8 *data, int size)
+{
+	/*
+	 * Compact USB keyboard's Fn-F12 report holds down many other keys, and
+	 * its own key is outside the usage page range. Remove extra
+	 * keypresses and remap to inside usage page.
+	 */
+	if (unlikely(hdev->product == USB_DEVICE_ID_LENOVO_CUSBKBD
+			&& size == 3
+			&& data[0] == 0x15
+			&& data[1] == 0x94
+			&& data[2] == 0x01)) {
+		data[1] = 0x0;
+		data[2] = 0x4;
+	}
+
+	return 0;
+}
+
 static int lenovo_features_set_tpkbd(struct hid_device *hdev)
 {
 	struct hid_report *report;
@@ -415,6 +566,46 @@ static int lenovo_probe_tpkbd(struct hid_device *hdev)
 	return 0;
 }
 
+static int lenovo_probe_cptkbd(struct hid_device *hdev)
+{
+	int ret;
+	struct lenovo_drvdata_cptkbd *cptkbd_data;
+
+	/* All the custom action happens on the USBMOUSE device for USB */
+	if (hdev->product == USB_DEVICE_ID_LENOVO_CUSBKBD
+			&& hdev->type != HID_TYPE_USBMOUSE) {
+		hid_dbg(hdev, "Ignoring keyboard half of device\n");
+		return 0;
+	}
+
+	cptkbd_data = devm_kzalloc(&hdev->dev,
+					sizeof(*cptkbd_data),
+					GFP_KERNEL);
+	if (cptkbd_data == NULL) {
+		hid_err(hdev, "can't alloc keyboard descriptor\n");
+		return -ENOMEM;
+	}
+	hid_set_drvdata(hdev, cptkbd_data);
+
+	/*
+	 * Tell the keyboard a driver understands it, and turn F7, F9, F11 into
+	 * regular keys
+	 */
+	ret = lenovo_send_cmd_cptkbd(hdev, 0x01, 0x03);
+	if (ret)
+		hid_warn(hdev, "Failed to switch F7/9/11 mode: %d\n", ret);
+
+	/* Turn Fn-Lock on by default */
+	cptkbd_data->fn_lock = true;
+	lenovo_features_set_cptkbd(hdev);
+
+	ret = sysfs_create_group(&hdev->dev.kobj, &lenovo_attr_group_cptkbd);
+	if (ret)
+		hid_warn(hdev, "Could not create sysfs group: %d\n", ret);
+
+	return 0;
+}
+
 static int lenovo_probe(struct hid_device *hdev,
 		const struct hid_device_id *id)
 {
@@ -436,6 +627,10 @@ static int lenovo_probe(struct hid_device *hdev,
 	case USB_DEVICE_ID_LENOVO_TPKBD:
 		ret = lenovo_probe_tpkbd(hdev);
 		break;
+	case USB_DEVICE_ID_LENOVO_CUSBKBD:
+	case USB_DEVICE_ID_LENOVO_CBTKBD:
+		ret = lenovo_probe_cptkbd(hdev);
+		break;
 	default:
 		ret = 0;
 		break;
@@ -470,12 +665,22 @@ static void lenovo_remove_tpkbd(struct hid_device *hdev)
 	hid_set_drvdata(hdev, NULL);
 }
 
+static void lenovo_remove_cptkbd(struct hid_device *hdev)
+{
+	sysfs_remove_group(&hdev->dev.kobj,
+			&lenovo_attr_group_cptkbd);
+}
+
 static void lenovo_remove(struct hid_device *hdev)
 {
 	switch (hdev->product) {
 	case USB_DEVICE_ID_LENOVO_TPKBD:
 		lenovo_remove_tpkbd(hdev);
 		break;
+	case USB_DEVICE_ID_LENOVO_CUSBKBD:
+	case USB_DEVICE_ID_LENOVO_CBTKBD:
+		lenovo_remove_cptkbd(hdev);
+		break;
 	}
 
 	hid_hw_stop(hdev);
@@ -483,6 +688,8 @@ static void lenovo_remove(struct hid_device *hdev)
 
 static const struct hid_device_id lenovo_devices[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_TPKBD) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_CUSBKBD) },
+	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_CBTKBD) },
 	{ }
 };
 
@@ -494,6 +701,7 @@ static struct hid_driver lenovo_driver = {
 	.input_mapping = lenovo_input_mapping,
 	.probe = lenovo_probe,
 	.remove = lenovo_remove,
+	.raw_event = lenovo_raw_event,
 };
 module_hid_driver(lenovo_driver);
 
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 77632cf159c0..fca74f1d5c84 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -167,6 +167,7 @@ struct hid_item {
 #define HID_UP_MSVENDOR		0xff000000
 #define HID_UP_CUSTOM		0x00ff0000
 #define HID_UP_LOGIVENDOR	0xffbc0000
+#define HID_UP_LNVENDOR		0xffa00000
 #define HID_UP_SENSOR		0x00200000
 
 #define HID_USAGE		0x0000ffff
-- 
cgit v1.2.3-59-g8ed1b


From baaa7b5d4f1e515a39f1eebd5fb16b67e00b22fb Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Fri, 18 Jul 2014 12:49:55 +0200
Subject: iommu/omap: Remove virtual memory manager

The OMAP3 ISP driver was the only user of the OMAP IOVMM API. Now that
is has been ported to the DMA API, remove the unused virtual memory
manager.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/Kconfig            |  10 +-
 drivers/iommu/Makefile           |   1 -
 drivers/iommu/omap-iommu-debug.c | 114 ------
 drivers/iommu/omap-iommu.c       |  13 -
 drivers/iommu/omap-iommu.h       |   8 +-
 drivers/iommu/omap-iovmm.c       | 791 ---------------------------------------
 include/linux/omap-iommu.h       |  37 +-
 7 files changed, 8 insertions(+), 966 deletions(-)
 delete mode 100644 drivers/iommu/omap-iovmm.c

(limited to 'include/linux')

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index d260605e6d5f..154e5a838257 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -143,16 +143,12 @@ config OMAP_IOMMU
 	depends on ARCH_OMAP2PLUS
 	select IOMMU_API
 
-config OMAP_IOVMM
-	tristate "OMAP IO Virtual Memory Manager Support"
-	depends on OMAP_IOMMU
-
 config OMAP_IOMMU_DEBUG
-       tristate "Export OMAP IOMMU/IOVMM internals in DebugFS"
-       depends on OMAP_IOVMM && DEBUG_FS
+       tristate "Export OMAP IOMMU internals in DebugFS"
+       depends on OMAP_IOMMU && DEBUG_FS
        help
          Select this to see extensive information about
-         the internal state of OMAP IOMMU/IOVMM in debugfs.
+         the internal state of OMAP IOMMU in debugfs.
 
          Say N unless you know you need this.
 
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 8893bad048e0..6a4a00ef088b 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -11,7 +11,6 @@ obj-$(CONFIG_IPMMU_VMSA) += ipmmu-vmsa.o
 obj-$(CONFIG_IRQ_REMAP) += intel_irq_remapping.o irq_remapping.o
 obj-$(CONFIG_OMAP_IOMMU) += omap-iommu.o
 obj-$(CONFIG_OMAP_IOMMU) += omap-iommu2.o
-obj-$(CONFIG_OMAP_IOVMM) += omap-iovmm.o
 obj-$(CONFIG_OMAP_IOMMU_DEBUG) += omap-iommu-debug.o
 obj-$(CONFIG_TEGRA_IOMMU_GART) += tegra-gart.o
 obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o
diff --git a/drivers/iommu/omap-iommu-debug.c b/drivers/iommu/omap-iommu-debug.c
index 80fffba7f12d..531658d17333 100644
--- a/drivers/iommu/omap-iommu-debug.c
+++ b/drivers/iommu/omap-iommu-debug.c
@@ -213,116 +213,6 @@ static ssize_t debug_read_pagetable(struct file *file, char __user *userbuf,
 	return bytes;
 }
 
-static ssize_t debug_read_mmap(struct file *file, char __user *userbuf,
-			       size_t count, loff_t *ppos)
-{
-	struct device *dev = file->private_data;
-	struct omap_iommu *obj = dev_to_omap_iommu(dev);
-	char *p, *buf;
-	struct iovm_struct *tmp;
-	int uninitialized_var(i);
-	ssize_t bytes;
-
-	buf = (char *)__get_free_page(GFP_KERNEL);
-	if (!buf)
-		return -ENOMEM;
-	p = buf;
-
-	p += sprintf(p, "%-3s %-8s %-8s %6s %8s\n",
-		     "No", "start", "end", "size", "flags");
-	p += sprintf(p, "-------------------------------------------------\n");
-
-	mutex_lock(&iommu_debug_lock);
-
-	list_for_each_entry(tmp, &obj->mmap, list) {
-		size_t len;
-		const char *str = "%3d %08x-%08x %6x %8x\n";
-		const int maxcol = 39;
-
-		len = tmp->da_end - tmp->da_start;
-		p += snprintf(p, maxcol, str,
-			      i, tmp->da_start, tmp->da_end, len, tmp->flags);
-
-		if (PAGE_SIZE - (p - buf) < maxcol)
-			break;
-		i++;
-	}
-
-	bytes = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
-
-	mutex_unlock(&iommu_debug_lock);
-	free_page((unsigned long)buf);
-
-	return bytes;
-}
-
-static ssize_t debug_read_mem(struct file *file, char __user *userbuf,
-			      size_t count, loff_t *ppos)
-{
-	struct device *dev = file->private_data;
-	char *p, *buf;
-	struct iovm_struct *area;
-	ssize_t bytes;
-
-	count = min_t(ssize_t, count, PAGE_SIZE);
-
-	buf = (char *)__get_free_page(GFP_KERNEL);
-	if (!buf)
-		return -ENOMEM;
-	p = buf;
-
-	mutex_lock(&iommu_debug_lock);
-
-	area = omap_find_iovm_area(dev, (u32)ppos);
-	if (!area) {
-		bytes = -EINVAL;
-		goto err_out;
-	}
-	memcpy(p, area->va, count);
-	p += count;
-
-	bytes = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
-err_out:
-	mutex_unlock(&iommu_debug_lock);
-	free_page((unsigned long)buf);
-
-	return bytes;
-}
-
-static ssize_t debug_write_mem(struct file *file, const char __user *userbuf,
-			       size_t count, loff_t *ppos)
-{
-	struct device *dev = file->private_data;
-	struct iovm_struct *area;
-	char *p, *buf;
-
-	count = min_t(size_t, count, PAGE_SIZE);
-
-	buf = (char *)__get_free_page(GFP_KERNEL);
-	if (!buf)
-		return -ENOMEM;
-	p = buf;
-
-	mutex_lock(&iommu_debug_lock);
-
-	if (copy_from_user(p, userbuf, count)) {
-		count =  -EFAULT;
-		goto err_out;
-	}
-
-	area = omap_find_iovm_area(dev, (u32)ppos);
-	if (!area) {
-		count = -EINVAL;
-		goto err_out;
-	}
-	memcpy(area->va, p, count);
-err_out:
-	mutex_unlock(&iommu_debug_lock);
-	free_page((unsigned long)buf);
-
-	return count;
-}
-
 #define DEBUG_FOPS(name)						\
 	static const struct file_operations debug_##name##_fops = {	\
 		.open = simple_open,					\
@@ -342,8 +232,6 @@ DEBUG_FOPS_RO(ver);
 DEBUG_FOPS_RO(regs);
 DEBUG_FOPS_RO(tlb);
 DEBUG_FOPS(pagetable);
-DEBUG_FOPS_RO(mmap);
-DEBUG_FOPS(mem);
 
 #define __DEBUG_ADD_FILE(attr, mode)					\
 	{								\
@@ -389,8 +277,6 @@ static int iommu_debug_register(struct device *dev, void *data)
 	DEBUG_ADD_FILE_RO(regs);
 	DEBUG_ADD_FILE_RO(tlb);
 	DEBUG_ADD_FILE(pagetable);
-	DEBUG_ADD_FILE_RO(mmap);
-	DEBUG_ADD_FILE(mem);
 
 	return 0;
 
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index 895af06a667f..61599e2b33ca 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -959,31 +959,18 @@ static int omap_iommu_probe(struct platform_device *pdev)
 			return err;
 		if (obj->nr_tlb_entries != 32 && obj->nr_tlb_entries != 8)
 			return -EINVAL;
-		/*
-		 * da_start and da_end are needed for omap-iovmm, so hardcode
-		 * these values as used by OMAP3 ISP - the only user for
-		 * omap-iovmm
-		 */
-		obj->da_start = 0;
-		obj->da_end = 0xfffff000;
 		if (of_find_property(of, "ti,iommu-bus-err-back", NULL))
 			obj->has_bus_err_back = MMU_GP_REG_BUS_ERR_BACK_EN;
 	} else {
 		obj->nr_tlb_entries = pdata->nr_tlb_entries;
 		obj->name = pdata->name;
-		obj->da_start = pdata->da_start;
-		obj->da_end = pdata->da_end;
 	}
-	if (obj->da_end <= obj->da_start)
-		return -EINVAL;
 
 	obj->dev = &pdev->dev;
 	obj->ctx = (void *)obj + sizeof(*obj);
 
 	spin_lock_init(&obj->iommu_lock);
-	mutex_init(&obj->mmap_lock);
 	spin_lock_init(&obj->page_table_lock);
-	INIT_LIST_HEAD(&obj->mmap);
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	obj->regbase = devm_ioremap_resource(obj->dev, res);
diff --git a/drivers/iommu/omap-iommu.h b/drivers/iommu/omap-iommu.h
index ea920c3e94ff..1275a822934b 100644
--- a/drivers/iommu/omap-iommu.h
+++ b/drivers/iommu/omap-iommu.h
@@ -46,12 +46,7 @@ struct omap_iommu {
 
 	int		nr_tlb_entries;
 
-	struct list_head	mmap;
-	struct mutex		mmap_lock; /* protect mmap */
-
 	void *ctx; /* iommu context: registres saved area */
-	u32 da_start;
-	u32 da_end;
 
 	int has_bus_err_back;
 };
@@ -154,9 +149,12 @@ static inline struct omap_iommu *dev_to_omap_iommu(struct device *dev)
 #define MMU_RAM_PADDR_MASK \
 	((~0UL >> MMU_RAM_PADDR_SHIFT) << MMU_RAM_PADDR_SHIFT)
 
+#define MMU_RAM_ENDIAN_SHIFT	9
 #define MMU_RAM_ENDIAN_MASK	(1 << MMU_RAM_ENDIAN_SHIFT)
+#define MMU_RAM_ENDIAN_LITTLE	(0 << MMU_RAM_ENDIAN_SHIFT)
 #define MMU_RAM_ENDIAN_BIG	(1 << MMU_RAM_ENDIAN_SHIFT)
 
+#define MMU_RAM_ELSZ_SHIFT	7
 #define MMU_RAM_ELSZ_MASK	(3 << MMU_RAM_ELSZ_SHIFT)
 #define MMU_RAM_ELSZ_8		(0 << MMU_RAM_ELSZ_SHIFT)
 #define MMU_RAM_ELSZ_16		(1 << MMU_RAM_ELSZ_SHIFT)
diff --git a/drivers/iommu/omap-iovmm.c b/drivers/iommu/omap-iovmm.c
deleted file mode 100644
index f583ba049168..000000000000
--- a/drivers/iommu/omap-iovmm.c
+++ /dev/null
@@ -1,791 +0,0 @@
-/*
- * omap iommu: simple virtual address space management
- *
- * Copyright (C) 2008-2009 Nokia Corporation
- *
- * Written by Hiroshi DOYU <Hiroshi.DOYU@nokia.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/err.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/device.h>
-#include <linux/scatterlist.h>
-#include <linux/iommu.h>
-#include <linux/omap-iommu.h>
-#include <linux/platform_data/iommu-omap.h>
-
-#include <asm/cacheflush.h>
-#include <asm/mach/map.h>
-
-#include "omap-iopgtable.h"
-#include "omap-iommu.h"
-
-/*
- * IOVMF_FLAGS: attribute for iommu virtual memory area(iovma)
- *
- * lower 16 bit is used for h/w and upper 16 bit is for s/w.
- */
-#define IOVMF_SW_SHIFT		16
-
-/*
- * iovma: h/w flags derived from cam and ram attribute
- */
-#define IOVMF_CAM_MASK		(~((1 << 10) - 1))
-#define IOVMF_RAM_MASK		(~IOVMF_CAM_MASK)
-
-#define IOVMF_PGSZ_MASK		(3 << 0)
-#define IOVMF_PGSZ_1M		MMU_CAM_PGSZ_1M
-#define IOVMF_PGSZ_64K		MMU_CAM_PGSZ_64K
-#define IOVMF_PGSZ_4K		MMU_CAM_PGSZ_4K
-#define IOVMF_PGSZ_16M		MMU_CAM_PGSZ_16M
-
-#define IOVMF_ENDIAN_MASK	(1 << 9)
-#define IOVMF_ENDIAN_BIG	MMU_RAM_ENDIAN_BIG
-
-#define IOVMF_ELSZ_MASK		(3 << 7)
-#define IOVMF_ELSZ_16		MMU_RAM_ELSZ_16
-#define IOVMF_ELSZ_32		MMU_RAM_ELSZ_32
-#define IOVMF_ELSZ_NONE		MMU_RAM_ELSZ_NONE
-
-#define IOVMF_MIXED_MASK	(1 << 6)
-#define IOVMF_MIXED		MMU_RAM_MIXED
-
-/*
- * iovma: s/w flags, used for mapping and umapping internally.
- */
-#define IOVMF_MMIO		(1 << IOVMF_SW_SHIFT)
-#define IOVMF_ALLOC		(2 << IOVMF_SW_SHIFT)
-#define IOVMF_ALLOC_MASK	(3 << IOVMF_SW_SHIFT)
-
-/* "superpages" is supported just with physically linear pages */
-#define IOVMF_DISCONT		(1 << (2 + IOVMF_SW_SHIFT))
-#define IOVMF_LINEAR		(2 << (2 + IOVMF_SW_SHIFT))
-#define IOVMF_LINEAR_MASK	(3 << (2 + IOVMF_SW_SHIFT))
-
-#define IOVMF_DA_FIXED		(1 << (4 + IOVMF_SW_SHIFT))
-
-static struct kmem_cache *iovm_area_cachep;
-
-/* return the offset of the first scatterlist entry in a sg table */
-static unsigned int sgtable_offset(const struct sg_table *sgt)
-{
-	if (!sgt || !sgt->nents)
-		return 0;
-
-	return sgt->sgl->offset;
-}
-
-/* return total bytes of sg buffers */
-static size_t sgtable_len(const struct sg_table *sgt)
-{
-	unsigned int i, total = 0;
-	struct scatterlist *sg;
-
-	if (!sgt)
-		return 0;
-
-	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
-		size_t bytes;
-
-		bytes = sg->length + sg->offset;
-
-		if (!iopgsz_ok(bytes)) {
-			pr_err("%s: sg[%d] not iommu pagesize(%u %u)\n",
-			       __func__, i, bytes, sg->offset);
-			return 0;
-		}
-
-		if (i && sg->offset) {
-			pr_err("%s: sg[%d] offset not allowed in internal entries\n",
-				__func__, i);
-			return 0;
-		}
-
-		total += bytes;
-	}
-
-	return total;
-}
-#define sgtable_ok(x)	(!!sgtable_len(x))
-
-static unsigned max_alignment(u32 addr)
-{
-	int i;
-	unsigned pagesize[] = { SZ_16M, SZ_1M, SZ_64K, SZ_4K, };
-	for (i = 0; i < ARRAY_SIZE(pagesize) && addr & (pagesize[i] - 1); i++)
-		;
-	return (i < ARRAY_SIZE(pagesize)) ? pagesize[i] : 0;
-}
-
-/*
- * calculate the optimal number sg elements from total bytes based on
- * iommu superpages
- */
-static unsigned sgtable_nents(size_t bytes, u32 da, u32 pa)
-{
-	unsigned nr_entries = 0, ent_sz;
-
-	if (!PAGE_ALIGNED(bytes)) {
-		pr_err("%s: wrong size %08x\n", __func__, bytes);
-		return 0;
-	}
-
-	while (bytes) {
-		ent_sz = max_alignment(da | pa);
-		ent_sz = min_t(unsigned, ent_sz, iopgsz_max(bytes));
-		nr_entries++;
-		da += ent_sz;
-		pa += ent_sz;
-		bytes -= ent_sz;
-	}
-
-	return nr_entries;
-}
-
-/* allocate and initialize sg_table header(a kind of 'superblock') */
-static struct sg_table *sgtable_alloc(const size_t bytes, u32 flags,
-							u32 da, u32 pa)
-{
-	unsigned int nr_entries;
-	int err;
-	struct sg_table *sgt;
-
-	if (!bytes)
-		return ERR_PTR(-EINVAL);
-
-	if (!PAGE_ALIGNED(bytes))
-		return ERR_PTR(-EINVAL);
-
-	if (flags & IOVMF_LINEAR) {
-		nr_entries = sgtable_nents(bytes, da, pa);
-		if (!nr_entries)
-			return ERR_PTR(-EINVAL);
-	} else
-		nr_entries =  bytes / PAGE_SIZE;
-
-	sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
-	if (!sgt)
-		return ERR_PTR(-ENOMEM);
-
-	err = sg_alloc_table(sgt, nr_entries, GFP_KERNEL);
-	if (err) {
-		kfree(sgt);
-		return ERR_PTR(err);
-	}
-
-	pr_debug("%s: sgt:%p(%d entries)\n", __func__, sgt, nr_entries);
-
-	return sgt;
-}
-
-/* free sg_table header(a kind of superblock) */
-static void sgtable_free(struct sg_table *sgt)
-{
-	if (!sgt)
-		return;
-
-	sg_free_table(sgt);
-	kfree(sgt);
-
-	pr_debug("%s: sgt:%p\n", __func__, sgt);
-}
-
-/* map 'sglist' to a contiguous mpu virtual area and return 'va' */
-static void *vmap_sg(const struct sg_table *sgt)
-{
-	u32 va;
-	size_t total;
-	unsigned int i;
-	struct scatterlist *sg;
-	struct vm_struct *new;
-	const struct mem_type *mtype;
-
-	mtype = get_mem_type(MT_DEVICE);
-	if (!mtype)
-		return ERR_PTR(-EINVAL);
-
-	total = sgtable_len(sgt);
-	if (!total)
-		return ERR_PTR(-EINVAL);
-
-	new = __get_vm_area(total, VM_IOREMAP, VMALLOC_START, VMALLOC_END);
-	if (!new)
-		return ERR_PTR(-ENOMEM);
-	va = (u32)new->addr;
-
-	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
-		size_t bytes;
-		u32 pa;
-		int err;
-
-		pa = sg_phys(sg) - sg->offset;
-		bytes = sg->length + sg->offset;
-
-		BUG_ON(bytes != PAGE_SIZE);
-
-		err = ioremap_page(va,  pa, mtype);
-		if (err)
-			goto err_out;
-
-		va += bytes;
-	}
-
-	flush_cache_vmap((unsigned long)new->addr,
-				(unsigned long)(new->addr + total));
-	return new->addr;
-
-err_out:
-	WARN_ON(1); /* FIXME: cleanup some mpu mappings */
-	vunmap(new->addr);
-	return ERR_PTR(-EAGAIN);
-}
-
-static inline void vunmap_sg(const void *va)
-{
-	vunmap(va);
-}
-
-static struct iovm_struct *__find_iovm_area(struct omap_iommu *obj,
-							const u32 da)
-{
-	struct iovm_struct *tmp;
-
-	list_for_each_entry(tmp, &obj->mmap, list) {
-		if ((da >= tmp->da_start) && (da < tmp->da_end)) {
-			size_t len;
-
-			len = tmp->da_end - tmp->da_start;
-
-			dev_dbg(obj->dev, "%s: %08x-%08x-%08x(%x) %08x\n",
-				__func__, tmp->da_start, da, tmp->da_end, len,
-				tmp->flags);
-
-			return tmp;
-		}
-	}
-
-	return NULL;
-}
-
-/**
- * omap_find_iovm_area  -  find iovma which includes @da
- * @dev:	client device
- * @da:		iommu device virtual address
- *
- * Find the existing iovma starting at @da
- */
-struct iovm_struct *omap_find_iovm_area(struct device *dev, u32 da)
-{
-	struct omap_iommu *obj = dev_to_omap_iommu(dev);
-	struct iovm_struct *area;
-
-	mutex_lock(&obj->mmap_lock);
-	area = __find_iovm_area(obj, da);
-	mutex_unlock(&obj->mmap_lock);
-
-	return area;
-}
-EXPORT_SYMBOL_GPL(omap_find_iovm_area);
-
-/*
- * This finds the hole(area) which fits the requested address and len
- * in iovmas mmap, and returns the new allocated iovma.
- */
-static struct iovm_struct *alloc_iovm_area(struct omap_iommu *obj, u32 da,
-					   size_t bytes, u32 flags)
-{
-	struct iovm_struct *new, *tmp;
-	u32 start, prev_end, alignment;
-
-	if (!obj || !bytes)
-		return ERR_PTR(-EINVAL);
-
-	start = da;
-	alignment = PAGE_SIZE;
-
-	if (~flags & IOVMF_DA_FIXED) {
-		/* Don't map address 0 */
-		start = obj->da_start ? obj->da_start : alignment;
-
-		if (flags & IOVMF_LINEAR)
-			alignment = iopgsz_max(bytes);
-		start = roundup(start, alignment);
-	} else if (start < obj->da_start || start > obj->da_end ||
-					obj->da_end - start < bytes) {
-		return ERR_PTR(-EINVAL);
-	}
-
-	tmp = NULL;
-	if (list_empty(&obj->mmap))
-		goto found;
-
-	prev_end = 0;
-	list_for_each_entry(tmp, &obj->mmap, list) {
-
-		if (prev_end > start)
-			break;
-
-		if (tmp->da_start > start && (tmp->da_start - start) >= bytes)
-			goto found;
-
-		if (tmp->da_end >= start && ~flags & IOVMF_DA_FIXED)
-			start = roundup(tmp->da_end + 1, alignment);
-
-		prev_end = tmp->da_end;
-	}
-
-	if ((start >= prev_end) && (obj->da_end - start >= bytes))
-		goto found;
-
-	dev_dbg(obj->dev, "%s: no space to fit %08x(%x) flags: %08x\n",
-		__func__, da, bytes, flags);
-
-	return ERR_PTR(-EINVAL);
-
-found:
-	new = kmem_cache_zalloc(iovm_area_cachep, GFP_KERNEL);
-	if (!new)
-		return ERR_PTR(-ENOMEM);
-
-	new->iommu = obj;
-	new->da_start = start;
-	new->da_end = start + bytes;
-	new->flags = flags;
-
-	/*
-	 * keep ascending order of iovmas
-	 */
-	if (tmp)
-		list_add_tail(&new->list, &tmp->list);
-	else
-		list_add(&new->list, &obj->mmap);
-
-	dev_dbg(obj->dev, "%s: found %08x-%08x-%08x(%x) %08x\n",
-		__func__, new->da_start, start, new->da_end, bytes, flags);
-
-	return new;
-}
-
-static void free_iovm_area(struct omap_iommu *obj, struct iovm_struct *area)
-{
-	size_t bytes;
-
-	BUG_ON(!obj || !area);
-
-	bytes = area->da_end - area->da_start;
-
-	dev_dbg(obj->dev, "%s: %08x-%08x(%x) %08x\n",
-		__func__, area->da_start, area->da_end, bytes, area->flags);
-
-	list_del(&area->list);
-	kmem_cache_free(iovm_area_cachep, area);
-}
-
-/**
- * omap_da_to_va - convert (d) to (v)
- * @dev:	client device
- * @da:		iommu device virtual address
- * @va:		mpu virtual address
- *
- * Returns mpu virtual addr which corresponds to a given device virtual addr
- */
-void *omap_da_to_va(struct device *dev, u32 da)
-{
-	struct omap_iommu *obj = dev_to_omap_iommu(dev);
-	void *va = NULL;
-	struct iovm_struct *area;
-
-	mutex_lock(&obj->mmap_lock);
-
-	area = __find_iovm_area(obj, da);
-	if (!area) {
-		dev_dbg(obj->dev, "%s: no da area(%08x)\n", __func__, da);
-		goto out;
-	}
-	va = area->va;
-out:
-	mutex_unlock(&obj->mmap_lock);
-
-	return va;
-}
-EXPORT_SYMBOL_GPL(omap_da_to_va);
-
-static void sgtable_fill_vmalloc(struct sg_table *sgt, void *_va)
-{
-	unsigned int i;
-	struct scatterlist *sg;
-	void *va = _va;
-	void *va_end;
-
-	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
-		struct page *pg;
-		const size_t bytes = PAGE_SIZE;
-
-		/*
-		 * iommu 'superpage' isn't supported with 'omap_iommu_vmalloc()'
-		 */
-		pg = vmalloc_to_page(va);
-		BUG_ON(!pg);
-		sg_set_page(sg, pg, bytes, 0);
-
-		va += bytes;
-	}
-
-	va_end = _va + PAGE_SIZE * i;
-}
-
-static inline void sgtable_drain_vmalloc(struct sg_table *sgt)
-{
-	/*
-	 * Actually this is not necessary at all, just exists for
-	 * consistency of the code readability.
-	 */
-	BUG_ON(!sgt);
-}
-
-/* create 'da' <-> 'pa' mapping from 'sgt' */
-static int map_iovm_area(struct iommu_domain *domain, struct iovm_struct *new,
-			const struct sg_table *sgt, u32 flags)
-{
-	int err;
-	unsigned int i, j;
-	struct scatterlist *sg;
-	u32 da = new->da_start;
-
-	if (!domain || !sgt)
-		return -EINVAL;
-
-	BUG_ON(!sgtable_ok(sgt));
-
-	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
-		u32 pa;
-		size_t bytes;
-
-		pa = sg_phys(sg) - sg->offset;
-		bytes = sg->length + sg->offset;
-
-		flags &= ~IOVMF_PGSZ_MASK;
-
-		if (bytes_to_iopgsz(bytes) < 0)
-			goto err_out;
-
-		pr_debug("%s: [%d] %08x %08x(%x)\n", __func__,
-			 i, da, pa, bytes);
-
-		err = iommu_map(domain, da, pa, bytes, flags);
-		if (err)
-			goto err_out;
-
-		da += bytes;
-	}
-	return 0;
-
-err_out:
-	da = new->da_start;
-
-	for_each_sg(sgt->sgl, sg, i, j) {
-		size_t bytes;
-
-		bytes = sg->length + sg->offset;
-
-		/* ignore failures.. we're already handling one */
-		iommu_unmap(domain, da, bytes);
-
-		da += bytes;
-	}
-	return err;
-}
-
-/* release 'da' <-> 'pa' mapping */
-static void unmap_iovm_area(struct iommu_domain *domain, struct omap_iommu *obj,
-						struct iovm_struct *area)
-{
-	u32 start;
-	size_t total = area->da_end - area->da_start;
-	const struct sg_table *sgt = area->sgt;
-	struct scatterlist *sg;
-	int i;
-	size_t unmapped;
-
-	BUG_ON(!sgtable_ok(sgt));
-	BUG_ON((!total) || !PAGE_ALIGNED(total));
-
-	start = area->da_start;
-	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
-		size_t bytes;
-
-		bytes = sg->length + sg->offset;
-
-		unmapped = iommu_unmap(domain, start, bytes);
-		if (unmapped < bytes)
-			break;
-
-		dev_dbg(obj->dev, "%s: unmap %08x(%x) %08x\n",
-				__func__, start, bytes, area->flags);
-
-		BUG_ON(!PAGE_ALIGNED(bytes));
-
-		total -= bytes;
-		start += bytes;
-	}
-	BUG_ON(total);
-}
-
-/* template function for all unmapping */
-static struct sg_table *unmap_vm_area(struct iommu_domain *domain,
-				      struct omap_iommu *obj, const u32 da,
-				      void (*fn)(const void *), u32 flags)
-{
-	struct sg_table *sgt = NULL;
-	struct iovm_struct *area;
-
-	if (!PAGE_ALIGNED(da)) {
-		dev_err(obj->dev, "%s: alignment err(%08x)\n", __func__, da);
-		return NULL;
-	}
-
-	mutex_lock(&obj->mmap_lock);
-
-	area = __find_iovm_area(obj, da);
-	if (!area) {
-		dev_dbg(obj->dev, "%s: no da area(%08x)\n", __func__, da);
-		goto out;
-	}
-
-	if ((area->flags & flags) != flags) {
-		dev_err(obj->dev, "%s: wrong flags(%08x)\n", __func__,
-			area->flags);
-		goto out;
-	}
-	sgt = (struct sg_table *)area->sgt;
-
-	unmap_iovm_area(domain, obj, area);
-
-	fn(area->va);
-
-	dev_dbg(obj->dev, "%s: %08x-%08x-%08x(%x) %08x\n", __func__,
-		area->da_start, da, area->da_end,
-		area->da_end - area->da_start, area->flags);
-
-	free_iovm_area(obj, area);
-out:
-	mutex_unlock(&obj->mmap_lock);
-
-	return sgt;
-}
-
-static u32 map_iommu_region(struct iommu_domain *domain, struct omap_iommu *obj,
-				u32 da, const struct sg_table *sgt, void *va,
-				size_t bytes, u32 flags)
-{
-	int err = -ENOMEM;
-	struct iovm_struct *new;
-
-	mutex_lock(&obj->mmap_lock);
-
-	new = alloc_iovm_area(obj, da, bytes, flags);
-	if (IS_ERR(new)) {
-		err = PTR_ERR(new);
-		goto err_alloc_iovma;
-	}
-	new->va = va;
-	new->sgt = sgt;
-
-	if (map_iovm_area(domain, new, sgt, new->flags))
-		goto err_map;
-
-	mutex_unlock(&obj->mmap_lock);
-
-	dev_dbg(obj->dev, "%s: da:%08x(%x) flags:%08x va:%p\n",
-		__func__, new->da_start, bytes, new->flags, va);
-
-	return new->da_start;
-
-err_map:
-	free_iovm_area(obj, new);
-err_alloc_iovma:
-	mutex_unlock(&obj->mmap_lock);
-	return err;
-}
-
-static inline u32
-__iommu_vmap(struct iommu_domain *domain, struct omap_iommu *obj,
-				u32 da, const struct sg_table *sgt,
-				void *va, size_t bytes, u32 flags)
-{
-	return map_iommu_region(domain, obj, da, sgt, va, bytes, flags);
-}
-
-/**
- * omap_iommu_vmap  -  (d)-(p)-(v) address mapper
- * @domain:	iommu domain
- * @dev:	client device
- * @sgt:	address of scatter gather table
- * @flags:	iovma and page property
- *
- * Creates 1-n-1 mapping with given @sgt and returns @da.
- * All @sgt element must be io page size aligned.
- */
-u32 omap_iommu_vmap(struct iommu_domain *domain, struct device *dev, u32 da,
-		const struct sg_table *sgt, u32 flags)
-{
-	struct omap_iommu *obj = dev_to_omap_iommu(dev);
-	size_t bytes;
-	void *va = NULL;
-
-	if (!obj || !obj->dev || !sgt)
-		return -EINVAL;
-
-	bytes = sgtable_len(sgt);
-	if (!bytes)
-		return -EINVAL;
-	bytes = PAGE_ALIGN(bytes);
-
-	if (flags & IOVMF_MMIO) {
-		va = vmap_sg(sgt);
-		if (IS_ERR(va))
-			return PTR_ERR(va);
-	}
-
-	flags |= IOVMF_DISCONT;
-	flags |= IOVMF_MMIO;
-
-	da = __iommu_vmap(domain, obj, da, sgt, va, bytes, flags);
-	if (IS_ERR_VALUE(da))
-		vunmap_sg(va);
-
-	return da + sgtable_offset(sgt);
-}
-EXPORT_SYMBOL_GPL(omap_iommu_vmap);
-
-/**
- * omap_iommu_vunmap  -  release virtual mapping obtained by 'omap_iommu_vmap()'
- * @domain:	iommu domain
- * @dev:	client device
- * @da:		iommu device virtual address
- *
- * Free the iommu virtually contiguous memory area starting at
- * @da, which was returned by 'omap_iommu_vmap()'.
- */
-struct sg_table *
-omap_iommu_vunmap(struct iommu_domain *domain, struct device *dev, u32 da)
-{
-	struct omap_iommu *obj = dev_to_omap_iommu(dev);
-	struct sg_table *sgt;
-	/*
-	 * 'sgt' is allocated before 'omap_iommu_vmalloc()' is called.
-	 * Just returns 'sgt' to the caller to free
-	 */
-	da &= PAGE_MASK;
-	sgt = unmap_vm_area(domain, obj, da, vunmap_sg,
-					IOVMF_DISCONT | IOVMF_MMIO);
-	if (!sgt)
-		dev_dbg(obj->dev, "%s: No sgt\n", __func__);
-	return sgt;
-}
-EXPORT_SYMBOL_GPL(omap_iommu_vunmap);
-
-/**
- * omap_iommu_vmalloc  -  (d)-(p)-(v) address allocator and mapper
- * @dev:	client device
- * @da:		contiguous iommu virtual memory
- * @bytes:	allocation size
- * @flags:	iovma and page property
- *
- * Allocate @bytes linearly and creates 1-n-1 mapping and returns
- * @da again, which might be adjusted if 'IOVMF_DA_FIXED' is not set.
- */
-u32
-omap_iommu_vmalloc(struct iommu_domain *domain, struct device *dev, u32 da,
-						size_t bytes, u32 flags)
-{
-	struct omap_iommu *obj = dev_to_omap_iommu(dev);
-	void *va;
-	struct sg_table *sgt;
-
-	if (!obj || !obj->dev || !bytes)
-		return -EINVAL;
-
-	bytes = PAGE_ALIGN(bytes);
-
-	va = vmalloc(bytes);
-	if (!va)
-		return -ENOMEM;
-
-	flags |= IOVMF_DISCONT;
-	flags |= IOVMF_ALLOC;
-
-	sgt = sgtable_alloc(bytes, flags, da, 0);
-	if (IS_ERR(sgt)) {
-		da = PTR_ERR(sgt);
-		goto err_sgt_alloc;
-	}
-	sgtable_fill_vmalloc(sgt, va);
-
-	da = __iommu_vmap(domain, obj, da, sgt, va, bytes, flags);
-	if (IS_ERR_VALUE(da))
-		goto err_iommu_vmap;
-
-	return da;
-
-err_iommu_vmap:
-	sgtable_drain_vmalloc(sgt);
-	sgtable_free(sgt);
-err_sgt_alloc:
-	vfree(va);
-	return da;
-}
-EXPORT_SYMBOL_GPL(omap_iommu_vmalloc);
-
-/**
- * omap_iommu_vfree  -  release memory allocated by 'omap_iommu_vmalloc()'
- * @dev:	client device
- * @da:		iommu device virtual address
- *
- * Frees the iommu virtually continuous memory area starting at
- * @da, as obtained from 'omap_iommu_vmalloc()'.
- */
-void omap_iommu_vfree(struct iommu_domain *domain, struct device *dev,
-								const u32 da)
-{
-	struct omap_iommu *obj = dev_to_omap_iommu(dev);
-	struct sg_table *sgt;
-
-	sgt = unmap_vm_area(domain, obj, da, vfree,
-						IOVMF_DISCONT | IOVMF_ALLOC);
-	if (!sgt)
-		dev_dbg(obj->dev, "%s: No sgt\n", __func__);
-	sgtable_free(sgt);
-}
-EXPORT_SYMBOL_GPL(omap_iommu_vfree);
-
-static int __init iovmm_init(void)
-{
-	const unsigned long flags = SLAB_HWCACHE_ALIGN;
-	struct kmem_cache *p;
-
-	p = kmem_cache_create("iovm_area_cache", sizeof(struct iovm_struct), 0,
-			      flags, NULL);
-	if (!p)
-		return -ENOMEM;
-	iovm_area_cachep = p;
-
-	return 0;
-}
-module_init(iovmm_init);
-
-static void __exit iovmm_exit(void)
-{
-	kmem_cache_destroy(iovm_area_cachep);
-}
-module_exit(iovmm_exit);
-
-MODULE_DESCRIPTION("omap iommu: simple virtual address space management");
-MODULE_AUTHOR("Hiroshi DOYU <Hiroshi.DOYU@nokia.com>");
-MODULE_LICENSE("GPL v2");
diff --git a/include/linux/omap-iommu.h b/include/linux/omap-iommu.h
index cac78de09c07..c1aede46718b 100644
--- a/include/linux/omap-iommu.h
+++ b/include/linux/omap-iommu.h
@@ -10,41 +10,8 @@
  * published by the Free Software Foundation.
  */
 
-#ifndef _INTEL_IOMMU_H_
-#define _INTEL_IOMMU_H_
-
-struct iovm_struct {
-	struct omap_iommu	*iommu;	/* iommu object which this belongs to */
-	u32			da_start; /* area definition */
-	u32			da_end;
-	u32			flags; /* IOVMF_: see below */
-	struct list_head	list; /* linked in ascending order */
-	const struct sg_table	*sgt; /* keep 'page' <-> 'da' mapping */
-	void			*va; /* mpu side mapped address */
-};
-
-#define MMU_RAM_ENDIAN_SHIFT	9
-#define MMU_RAM_ENDIAN_LITTLE	(0 << MMU_RAM_ENDIAN_SHIFT)
-#define MMU_RAM_ELSZ_8		(0 << MMU_RAM_ELSZ_SHIFT)
-#define IOVMF_ENDIAN_LITTLE	MMU_RAM_ENDIAN_LITTLE
-#define MMU_RAM_ELSZ_SHIFT	7
-#define IOVMF_ELSZ_8		MMU_RAM_ELSZ_8
-
-struct iommu_domain;
-
-extern struct iovm_struct *omap_find_iovm_area(struct device *dev, u32 da);
-extern u32
-omap_iommu_vmap(struct iommu_domain *domain, struct device *dev, u32 da,
-			const struct sg_table *sgt, u32 flags);
-extern struct sg_table *omap_iommu_vunmap(struct iommu_domain *domain,
-				struct device *dev, u32 da);
-extern u32
-omap_iommu_vmalloc(struct iommu_domain *domain, struct device *dev,
-				u32 da, size_t bytes, u32 flags);
-extern void
-omap_iommu_vfree(struct iommu_domain *domain, struct device *dev,
-				const u32 da);
-extern void *omap_da_to_va(struct device *dev, u32 da);
+#ifndef _OMAP_IOMMU_H_
+#define _OMAP_IOMMU_H_
 
 extern void omap_iommu_save_ctx(struct device *dev);
 extern void omap_iommu_restore_ctx(struct device *dev);
-- 
cgit v1.2.3-59-g8ed1b


From e8a6ebc71e40ec02cd0113c803e919c8329b411b Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Fri, 18 Jul 2014 12:49:57 +0200
Subject: iommu/omap: Remove platform data da_start and da_end fields

The fields were used by the now gone omap-iovmm driver. They're not used
anymore, remove them.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/platform_data/iommu-omap.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/iommu-omap.h b/include/linux/platform_data/iommu-omap.h
index 5b429c43a297..54a0a9582fad 100644
--- a/include/linux/platform_data/iommu-omap.h
+++ b/include/linux/platform_data/iommu-omap.h
@@ -31,14 +31,10 @@ struct omap_iommu_arch_data {
 
 /**
  * struct omap_mmu_dev_attr - OMAP mmu device attributes for omap_hwmod
- * @da_start:		device address where the va space starts.
- * @da_end:		device address where the va space ends.
  * @nr_tlb_entries:	number of entries supported by the translation
  *			look-aside buffer (TLB).
  */
 struct omap_mmu_dev_attr {
-	u32 da_start;
-	u32 da_end;
 	int nr_tlb_entries;
 };
 
@@ -46,8 +42,6 @@ struct iommu_platform_data {
 	const char *name;
 	const char *reset_name;
 	int nr_tlb_entries;
-	u32 da_start;
-	u32 da_end;
 
 	int (*assert_reset)(struct platform_device *pdev, const char *name);
 	int (*deassert_reset)(struct platform_device *pdev, const char *name);
-- 
cgit v1.2.3-59-g8ed1b


From 3bc0312e67f0be679039980291f88405f20b0a95 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@linaro.org>
Date: Fri, 25 Jul 2014 19:07:11 +0100
Subject: regulator: Add missing statics and inlines for stub functions

So we don't get multiple definitions.

Signed-off-by: Mark Brown <broonie@linaro.org>
---
 include/linux/regulator/consumer.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index 0b1c8d09a6b1..d9e6c20ab9bb 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -460,20 +460,20 @@ static inline int regulator_allow_bypass(struct regulator *regulator,
 	return 0;
 }
 
-struct regmap *regulator_get_regmap(struct regulator *regulator)
+static inline struct regmap *regulator_get_regmap(struct regulator *regulator)
 {
 	return ERR_PTR(-EOPNOTSUPP);
 }
 
-int regulator_get_hardware_vsel_register(struct regulator *regulator,
-					 unsigned *vsel_reg,
-					 unsigned *vsel_mask)
+static inline int regulator_get_hardware_vsel_register(struct regulator *regulator,
+						       unsigned *vsel_reg,
+						       unsigned *vsel_mask)
 {
 	return -EOPNOTSUPP;
 }
 
-int regulator_list_hardware_vsel(struct regulator *regulator,
-				 unsigned selector)
+static inline int regulator_list_hardware_vsel(struct regulator *regulator,
+					       unsigned selector)
 {
 	return -EOPNOTSUPP;
 }
-- 
cgit v1.2.3-59-g8ed1b


From a67d19d4c5b92853550dc20f4afce8c914a8ea0b Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Thu, 24 Jul 2014 15:29:18 +0200
Subject: b43: add support for BCM43131 chipset with N-PHY rev 17
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It contains radio 0x2057 rev 14 just like a BCM43217, so it doesn't
require any magic. The main difference is that BCM4313 is 1x1:1.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/b43/main.c  | 3 ++-
 drivers/net/wireless/b43/phy_n.c | 3 ++-
 include/linux/bcma/bcma.h        | 1 +
 3 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c
index d7055febe119..2af1ac396eb4 100644
--- a/drivers/net/wireless/b43/main.c
+++ b/drivers/net/wireless/b43/main.c
@@ -2985,7 +2985,8 @@ void b43_mac_switch_freq(struct b43_wldev *dev, u8 spurmode)
 {
 	u16 chip_id = dev->dev->chip_id;
 
-	if (chip_id == BCMA_CHIP_ID_BCM43217 ||
+	if (chip_id == BCMA_CHIP_ID_BCM43131 ||
+	    chip_id == BCMA_CHIP_ID_BCM43217 ||
 	    chip_id == BCMA_CHIP_ID_BCM43222 ||
 	    chip_id == BCMA_CHIP_ID_BCM43224 ||
 	    chip_id == BCMA_CHIP_ID_BCM43225 ||
diff --git a/drivers/net/wireless/b43/phy_n.c b/drivers/net/wireless/b43/phy_n.c
index d269fbb27b9e..1eead7af6899 100644
--- a/drivers/net/wireless/b43/phy_n.c
+++ b/drivers/net/wireless/b43/phy_n.c
@@ -4982,7 +4982,8 @@ static void b43_nphy_int_pa_set_tx_dig_filters(struct b43_wldev *dev)
 	if (dev->phy.rev == 16)
 		b43_nphy_pa_set_tx_dig_filter(dev, 0x186, dig_filter_phy_rev16);
 
-	if (dev->dev->chip_id == BCMA_CHIP_ID_BCM43217) {
+	/* Verified with BCM43131 and BCM43217 */
+	if (dev->phy.rev == 17) {
 		b43_nphy_pa_set_tx_dig_filter(dev, 0x186, dig_filter_phy_rev16);
 		b43_nphy_pa_set_tx_dig_filter(dev, 0x195,
 					      tbl_tx_filter_coef_rev4[1]);
diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index 969af0f2bdf9..70b8d88b3982 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -158,6 +158,7 @@ struct bcma_host_ops {
 /* Chip IDs of PCIe devices */
 #define BCMA_CHIP_ID_BCM4313	0x4313
 #define BCMA_CHIP_ID_BCM43142	43142
+#define BCMA_CHIP_ID_BCM43131	43131
 #define BCMA_CHIP_ID_BCM43217	43217
 #define BCMA_CHIP_ID_BCM43222	43222
 #define BCMA_CHIP_ID_BCM43224	43224
-- 
cgit v1.2.3-59-g8ed1b


From 94b912e42829b25d97b6b1f2be66c6aa81ac125f Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicoleotsuka@gmail.com>
Date: Tue, 29 Jul 2014 18:08:52 +0800
Subject: ARM: imx: Add the secondary request into the structure for imx-sdma

SDMA supports device to device (per_2_per) scripts to handle DMA transfering
between two peripheral devices. The per_2_per script, however, needs two dma
requests from two sides while the current structure only defined one request.

So this patch just simply adds the secondary request so as to let SDMA and
its user to add its implementation later.

[ Both change in the SDMA driver and its users like Freescale ASRC ASoC driver
  should be taken along with this change in order to truly support per_2_per
  sciprts. However, we here make an expediency by adding this first so that
  we can add either side later since this patch won't break any function and
  meanwhile it can make merge window more smoothly: we don't need to apply the
  change inside dmaengine branch via ASoC tree any more. -- Nicolin ]

Signed-off-by: Nicolin Chen <nicoleotsuka@gmail.com>
Acked-by: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 include/linux/platform_data/dma-imx.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/dma-imx.h b/include/linux/platform_data/dma-imx.h
index bcbc6c3c14c0..d05542aafa3e 100644
--- a/include/linux/platform_data/dma-imx.h
+++ b/include/linux/platform_data/dma-imx.h
@@ -50,6 +50,7 @@ enum imx_dma_prio {
 
 struct imx_dma_data {
 	int dma_request; /* DMA request line */
+	int dma_request2; /* secondary DMA request line */
 	enum sdma_peripheral_type peripheral_type;
 	int priority;
 };
-- 
cgit v1.2.3-59-g8ed1b


From 4d276eb6a478307a28ae843836c455bf04b37a3c Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Fri, 25 Jul 2014 18:01:32 -0400
Subject: net: remove deprecated syststamp timestamp

The SO_TIMESTAMPING API defines three types of timestamps: software,
hardware in raw format (hwtstamp) and hardware converted to system
format (syststamp). The last has been deprecated in favor of combining
hwtstamp with a PTP clock driver. There are no active users in the
kernel.

The option was device driver dependent. If set, but without hardware
support, the correct behavior is to return zero in the relevant field
in the SCM_TIMESTAMPING ancillary message. Without device drivers
implementing the option, this field is effectively always zero.

Remove the internal plumbing to dissuage new drivers from implementing
the feature. Keep the SOF_TIMESTAMPING_SYS_HARDWARE flag, however, to
avoid breaking existing applications that request the timestamp.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/timestamping.txt            | 12 ++----------
 Documentation/networking/timestamping/timestamping.c |  7 +------
 include/linux/skbuff.h                               | 14 +-------------
 include/net/sock.h                                   | 11 +++--------
 net/core/sock.c                                      |  4 ----
 net/socket.c                                         | 12 ++++--------
 6 files changed, 11 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt
index 8b4ad809df27..897f942b976b 100644
--- a/Documentation/networking/timestamping.txt
+++ b/Documentation/networking/timestamping.txt
@@ -88,15 +88,8 @@ hwtimeraw is the original hardware time stamp. Filled in if
 SOF_TIMESTAMPING_RAW_HARDWARE is set. No assumptions about its
 relation to system time should be made.
 
-hwtimetrans is the hardware time stamp transformed so that it
-corresponds as good as possible to system time. This correlation is
-not perfect; as a consequence, sorting packets received via different
-NICs by their hwtimetrans may differ from the order in which they were
-received. hwtimetrans may be non-monotonic even for the same NIC.
-Filled in if SOF_TIMESTAMPING_SYS_HARDWARE is set. Requires support
-by the network device and will be empty without that support. This
-field is DEPRECATED. Only one driver computes this value. New device
-drivers must leave this zero. Instead, they can expose the hardware
+hwtimetrans is always zero. This field is deprecated. It used to hold
+hw timestamps converted to system time. Instead, expose the hardware
 clock device on the NIC directly as a HW PTP clock source, to allow
 time conversion in userspace and optionally synchronize system time
 with a userspace PTP stack such as linuxptp. For the PTP clock API,
@@ -191,7 +184,6 @@ struct skb_shared_hwtstamps {
 	 * since arbitrary point in time
 	 */
 	ktime_t	hwtstamp;
-	ktime_t	syststamp; /* hwtstamp transformed to system time base */
 };
 
 Time stamps for outgoing packets are to be generated as follows:
diff --git a/Documentation/networking/timestamping/timestamping.c b/Documentation/networking/timestamping/timestamping.c
index 8ba82bfe6a33..5cdfd743447b 100644
--- a/Documentation/networking/timestamping/timestamping.c
+++ b/Documentation/networking/timestamping/timestamping.c
@@ -76,7 +76,6 @@ static void usage(const char *error)
 	       "  SOF_TIMESTAMPING_RX_HARDWARE - hardware time stamping of incoming packets\n"
 	       "  SOF_TIMESTAMPING_RX_SOFTWARE - software fallback for incoming packets\n"
 	       "  SOF_TIMESTAMPING_SOFTWARE - request reporting of software time stamps\n"
-	       "  SOF_TIMESTAMPING_SYS_HARDWARE - request reporting of transformed HW time stamps\n"
 	       "  SOF_TIMESTAMPING_RAW_HARDWARE - request reporting of raw HW time stamps\n"
 	       "  SIOCGSTAMP - check last socket time stamp\n"
 	       "  SIOCGSTAMPNS - more accurate socket time stamp\n");
@@ -202,9 +201,7 @@ static void printpacket(struct msghdr *msg, int res,
 				       (long)stamp->tv_sec,
 				       (long)stamp->tv_nsec);
 				stamp++;
-				printf("HW transformed %ld.%09ld ",
-				       (long)stamp->tv_sec,
-				       (long)stamp->tv_nsec);
+				/* skip deprecated HW transformed */
 				stamp++;
 				printf("HW raw %ld.%09ld",
 				       (long)stamp->tv_sec,
@@ -361,8 +358,6 @@ int main(int argc, char **argv)
 			so_timestamping_flags |= SOF_TIMESTAMPING_RX_SOFTWARE;
 		else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_SOFTWARE"))
 			so_timestamping_flags |= SOF_TIMESTAMPING_SOFTWARE;
-		else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_SYS_HARDWARE"))
-			so_timestamping_flags |= SOF_TIMESTAMPING_SYS_HARDWARE;
 		else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RAW_HARDWARE"))
 			so_timestamping_flags |= SOF_TIMESTAMPING_RAW_HARDWARE;
 		else
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index b613557132b9..281deced7469 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -210,20 +210,9 @@ static inline void skb_frag_size_sub(skb_frag_t *frag, int delta)
  * struct skb_shared_hwtstamps - hardware time stamps
  * @hwtstamp:	hardware time stamp transformed into duration
  *		since arbitrary point in time
- * @syststamp:	hwtstamp transformed to system time base (deprecated)
  *
  * Software time stamps generated by ktime_get_real() are stored in
- * skb->tstamp. The relation between the different kinds of time
- * stamps is as follows:
- *
- * syststamp and tstamp can be compared against each other in
- * arbitrary combinations.  The accuracy of a
- * syststamp/tstamp/"syststamp from other device" comparison is
- * limited by the accuracy of the transformation into system time
- * base. This depends on the device driver and its underlying
- * hardware. The syststamp implementation is deprecated in favor
- * of hwtstamps and hw PTP clock sources exposed directly to
- * userspace.
+ * skb->tstamp.
  *
  * hwtstamps can only be compared against other hwtstamps from
  * the same device.
@@ -233,7 +222,6 @@ static inline void skb_frag_size_sub(skb_frag_t *frag, int delta)
  */
 struct skb_shared_hwtstamps {
 	ktime_t	hwtstamp;
-	ktime_t	syststamp;
 };
 
 /* Definitions for tx_flags in struct skb_shared_info */
diff --git a/include/net/sock.h b/include/net/sock.h
index 720773304a85..b91c8868ab8d 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -707,7 +707,6 @@ enum sock_flags {
 	SOCK_TIMESTAMPING_RX_SOFTWARE,  /* %SOF_TIMESTAMPING_RX_SOFTWARE */
 	SOCK_TIMESTAMPING_SOFTWARE,     /* %SOF_TIMESTAMPING_SOFTWARE */
 	SOCK_TIMESTAMPING_RAW_HARDWARE, /* %SOF_TIMESTAMPING_RAW_HARDWARE */
-	SOCK_TIMESTAMPING_SYS_HARDWARE, /* %SOF_TIMESTAMPING_SYS_HARDWARE */
 	SOCK_FASYNC, /* fasync() active */
 	SOCK_RXQ_OVFL,
 	SOCK_ZEROCOPY, /* buffers from userspace */
@@ -2166,16 +2165,13 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
 	 * - software time stamp available and wanted
 	 *   (SOCK_TIMESTAMPING_SOFTWARE)
 	 * - hardware time stamps available and wanted
-	 *   (SOCK_TIMESTAMPING_SYS_HARDWARE or
-	 *   SOCK_TIMESTAMPING_RAW_HARDWARE)
+	 *   SOCK_TIMESTAMPING_RAW_HARDWARE
 	 */
 	if (sock_flag(sk, SOCK_RCVTSTAMP) ||
 	    sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE) ||
 	    (kt.tv64 && sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) ||
 	    (hwtstamps->hwtstamp.tv64 &&
-	     sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE)) ||
-	    (hwtstamps->syststamp.tv64 &&
-	     sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE)))
+	     sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE)))
 		__sock_recv_timestamp(msg, sk, skb);
 	else
 		sk->sk_stamp = kt;
@@ -2193,8 +2189,7 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
 #define FLAGS_TS_OR_DROPS ((1UL << SOCK_RXQ_OVFL)			| \
 			   (1UL << SOCK_RCVTSTAMP)			| \
 			   (1UL << SOCK_TIMESTAMPING_SOFTWARE)		| \
-			   (1UL << SOCK_TIMESTAMPING_RAW_HARDWARE)	| \
-			   (1UL << SOCK_TIMESTAMPING_SYS_HARDWARE))
+			   (1UL << SOCK_TIMESTAMPING_RAW_HARDWARE))
 
 	if (sk->sk_flags & FLAGS_TS_OR_DROPS)
 		__sock_recv_ts_and_drops(msg, sk, skb);
diff --git a/net/core/sock.c b/net/core/sock.c
index ca9b65199d28..134291d73fcd 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -862,8 +862,6 @@ set_rcvbuf:
 					       (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
 		sock_valbool_flag(sk, SOCK_TIMESTAMPING_SOFTWARE,
 				  val & SOF_TIMESTAMPING_SOFTWARE);
-		sock_valbool_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE,
-				  val & SOF_TIMESTAMPING_SYS_HARDWARE);
 		sock_valbool_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE,
 				  val & SOF_TIMESTAMPING_RAW_HARDWARE);
 		break;
@@ -1102,8 +1100,6 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 			v.val |= SOF_TIMESTAMPING_RX_SOFTWARE;
 		if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE))
 			v.val |= SOF_TIMESTAMPING_SOFTWARE;
-		if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE))
-			v.val |= SOF_TIMESTAMPING_SYS_HARDWARE;
 		if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE))
 			v.val |= SOF_TIMESTAMPING_RAW_HARDWARE;
 		break;
diff --git a/net/socket.c b/net/socket.c
index abf56b2a14f9..d8222c025061 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -725,14 +725,10 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 	if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE) &&
 	    ktime_to_timespec_cond(skb->tstamp, ts + 0))
 		empty = 0;
-	if (shhwtstamps) {
-		if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) &&
-		    ktime_to_timespec_cond(shhwtstamps->syststamp, ts + 1))
-			empty = 0;
-		if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) &&
-		    ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts + 2))
-			empty = 0;
-	}
+	if (shhwtstamps &&
+	    sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) &&
+	    ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts + 2))
+		empty = 0;
 	if (!empty)
 		put_cmsg(msg, SOL_SOCKET,
 			 SCM_TIMESTAMPING, sizeof(ts), &ts);
-- 
cgit v1.2.3-59-g8ed1b


From 518776800c094a518ae6d303660b57f1400eb1eb Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Thu, 24 Jul 2014 23:59:33 -0400
Subject: SUNRPC: Allow svc_reserve() to notify TCP socket that space has been
 freed

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_xprt.h | 1 +
 net/sunrpc/svc_xprt.c           | 2 ++
 net/sunrpc/svcsock.c            | 9 +++++++++
 3 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 5d9d6f84b382..ce6e4182a5b2 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -25,6 +25,7 @@ struct svc_xprt_ops {
 	void		(*xpo_detach)(struct svc_xprt *);
 	void		(*xpo_free)(struct svc_xprt *);
 	int		(*xpo_secure_port)(struct svc_rqst *);
+	void		(*xpo_adjust_wspace)(struct svc_xprt *);
 };
 
 struct svc_xprt_class {
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 9cfa391e2bd0..6666c6745858 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -448,6 +448,8 @@ void svc_reserve(struct svc_rqst *rqstp, int space)
 		atomic_sub((rqstp->rq_reserved - space), &xprt->xpt_reserved);
 		rqstp->rq_reserved = space;
 
+		if (xprt->xpt_ops->xpo_adjust_wspace)
+			xprt->xpt_ops->xpo_adjust_wspace(xprt);
 		svc_xprt_enqueue(xprt);
 	}
 }
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 88db211d4264..c24a8ff33f8f 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -475,6 +475,14 @@ static void svc_tcp_write_space(struct sock *sk)
 	svc_write_space(sk);
 }
 
+static void svc_tcp_adjust_wspace(struct svc_xprt *xprt)
+{
+	struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
+
+	if (svc_tcp_has_wspace(xprt))
+		clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
+}
+
 /*
  * See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo
  */
@@ -1289,6 +1297,7 @@ static struct svc_xprt_ops svc_tcp_ops = {
 	.xpo_has_wspace = svc_tcp_has_wspace,
 	.xpo_accept = svc_tcp_accept,
 	.xpo_secure_port = svc_sock_secure_port,
+	.xpo_adjust_wspace = svc_tcp_adjust_wspace,
 };
 
 static struct svc_xprt_class svc_tcp_class = {
-- 
cgit v1.2.3-59-g8ed1b


From 8e54caf407b98efa05409e1fee0e5381abd2b088 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Date: Wed, 21 May 2014 18:26:44 -0600
Subject: tpm: Provide a generic means to override the chip returned timeouts

Some Atmel TPMs provide completely wrong timeouts from their
TPM_CAP_PROP_TIS_TIMEOUT query. This patch detects that and returns
new correct values via a DID/VID table in the TIS driver.

Tested on ARM using an AT97SC3204T FW version 37.16

Cc: <stable@vger.kernel.org>
[PHuewe: without this fix these 'broken' Atmel TPMs won't function on
older kernels]
Signed-off-by: "Berg, Christopher" <Christopher.Berg@atmel.com>
Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>

Signed-off-by: Peter Huewe <peterhuewe@gmx.de>
---
 drivers/char/tpm/tpm-interface.c | 62 ++++++++++++++++++++++++++--------------
 drivers/char/tpm/tpm_tis.c       | 31 ++++++++++++++++++++
 include/linux/tpm.h              |  3 ++
 3 files changed, 75 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c
index bef6cceffc3a..6af17002a115 100644
--- a/drivers/char/tpm/tpm-interface.c
+++ b/drivers/char/tpm/tpm-interface.c
@@ -491,11 +491,10 @@ static int tpm_startup(struct tpm_chip *chip, __be16 startup_type)
 int tpm_get_timeouts(struct tpm_chip *chip)
 {
 	struct tpm_cmd_t tpm_cmd;
-	struct timeout_t *timeout_cap;
+	unsigned long new_timeout[4];
+	unsigned long old_timeout[4];
 	struct duration_t *duration_cap;
 	ssize_t rc;
-	u32 timeout;
-	unsigned int scale = 1;
 
 	tpm_cmd.header.in = tpm_getcap_header;
 	tpm_cmd.params.getcap_in.cap = TPM_CAP_PROP;
@@ -529,25 +528,46 @@ int tpm_get_timeouts(struct tpm_chip *chip)
 	    != sizeof(tpm_cmd.header.out) + sizeof(u32) + 4 * sizeof(u32))
 		return -EINVAL;
 
-	timeout_cap = &tpm_cmd.params.getcap_out.cap.timeout;
-	/* Don't overwrite default if value is 0 */
-	timeout = be32_to_cpu(timeout_cap->a);
-	if (timeout && timeout < 1000) {
-		/* timeouts in msec rather usec */
-		scale = 1000;
-		chip->vendor.timeout_adjusted = true;
+	old_timeout[0] = be32_to_cpu(tpm_cmd.params.getcap_out.cap.timeout.a);
+	old_timeout[1] = be32_to_cpu(tpm_cmd.params.getcap_out.cap.timeout.b);
+	old_timeout[2] = be32_to_cpu(tpm_cmd.params.getcap_out.cap.timeout.c);
+	old_timeout[3] = be32_to_cpu(tpm_cmd.params.getcap_out.cap.timeout.d);
+	memcpy(new_timeout, old_timeout, sizeof(new_timeout));
+
+	/*
+	 * Provide ability for vendor overrides of timeout values in case
+	 * of misreporting.
+	 */
+	if (chip->ops->update_timeouts != NULL)
+		chip->vendor.timeout_adjusted =
+			chip->ops->update_timeouts(chip, new_timeout);
+
+	if (!chip->vendor.timeout_adjusted) {
+		/* Don't overwrite default if value is 0 */
+		if (new_timeout[0] != 0 && new_timeout[0] < 1000) {
+			int i;
+
+			/* timeouts in msec rather usec */
+			for (i = 0; i != ARRAY_SIZE(new_timeout); i++)
+				new_timeout[i] *= 1000;
+			chip->vendor.timeout_adjusted = true;
+		}
 	}
-	if (timeout)
-		chip->vendor.timeout_a = usecs_to_jiffies(timeout * scale);
-	timeout = be32_to_cpu(timeout_cap->b);
-	if (timeout)
-		chip->vendor.timeout_b = usecs_to_jiffies(timeout * scale);
-	timeout = be32_to_cpu(timeout_cap->c);
-	if (timeout)
-		chip->vendor.timeout_c = usecs_to_jiffies(timeout * scale);
-	timeout = be32_to_cpu(timeout_cap->d);
-	if (timeout)
-		chip->vendor.timeout_d = usecs_to_jiffies(timeout * scale);
+
+	/* Report adjusted timeouts */
+	if (chip->vendor.timeout_adjusted) {
+		dev_info(chip->dev,
+			 HW_ERR "Adjusting reported timeouts: A %lu->%luus B %lu->%luus C %lu->%luus D %lu->%luus\n",
+			 old_timeout[0], new_timeout[0],
+			 old_timeout[1], new_timeout[1],
+			 old_timeout[2], new_timeout[2],
+			 old_timeout[3], new_timeout[3]);
+	}
+
+	chip->vendor.timeout_a = usecs_to_jiffies(new_timeout[0]);
+	chip->vendor.timeout_b = usecs_to_jiffies(new_timeout[1]);
+	chip->vendor.timeout_c = usecs_to_jiffies(new_timeout[2]);
+	chip->vendor.timeout_d = usecs_to_jiffies(new_timeout[3]);
 
 duration:
 	tpm_cmd.header.in = tpm_getcap_header;
diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c
index a9ed2270c25d..2c46734b266d 100644
--- a/drivers/char/tpm/tpm_tis.c
+++ b/drivers/char/tpm/tpm_tis.c
@@ -373,6 +373,36 @@ out_err:
 	return rc;
 }
 
+struct tis_vendor_timeout_override {
+	u32 did_vid;
+	unsigned long timeout_us[4];
+};
+
+static const struct tis_vendor_timeout_override vendor_timeout_overrides[] = {
+	/* Atmel 3204 */
+	{ 0x32041114, { (TIS_SHORT_TIMEOUT*1000), (TIS_LONG_TIMEOUT*1000),
+			(TIS_SHORT_TIMEOUT*1000), (TIS_SHORT_TIMEOUT*1000) } },
+};
+
+static bool tpm_tis_update_timeouts(struct tpm_chip *chip,
+				    unsigned long *timeout_cap)
+{
+	int i;
+	u32 did_vid;
+
+	did_vid = ioread32(chip->vendor.iobase + TPM_DID_VID(0));
+
+	for (i = 0; i != ARRAY_SIZE(vendor_timeout_overrides); i++) {
+		if (vendor_timeout_overrides[i].did_vid != did_vid)
+			continue;
+		memcpy(timeout_cap, vendor_timeout_overrides[i].timeout_us,
+		       sizeof(vendor_timeout_overrides[i].timeout_us));
+		return true;
+	}
+
+	return false;
+}
+
 /*
  * Early probing for iTPM with STS_DATA_EXPECT flaw.
  * Try sending command without itpm flag set and if that
@@ -437,6 +467,7 @@ static const struct tpm_class_ops tpm_tis = {
 	.recv = tpm_tis_recv,
 	.send = tpm_tis_send,
 	.cancel = tpm_tis_ready,
+	.update_timeouts = tpm_tis_update_timeouts,
 	.req_complete_mask = TPM_STS_DATA_AVAIL | TPM_STS_VALID,
 	.req_complete_val = TPM_STS_DATA_AVAIL | TPM_STS_VALID,
 	.req_canceled = tpm_tis_req_canceled,
diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index fff1d0976f80..8350c538b486 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -39,6 +39,9 @@ struct tpm_class_ops {
 	int (*send) (struct tpm_chip *chip, u8 *buf, size_t len);
 	void (*cancel) (struct tpm_chip *chip);
 	u8 (*status) (struct tpm_chip *chip);
+	bool (*update_timeouts)(struct tpm_chip *chip,
+				unsigned long *timeout_cap);
+
 };
 
 #if defined(CONFIG_TCG_TPM) || defined(CONFIG_TCG_TPM_MODULE)
-- 
cgit v1.2.3-59-g8ed1b


From 728dba3a39c66b3d8ac889ddbe38b5b1c264aec3 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 3 Feb 2014 19:13:49 -0800
Subject: namespaces: Use task_lock and not rcu to protect nsproxy

The synchronous syncrhonize_rcu in switch_task_namespaces makes setns
a sufficiently expensive system call that people have complained.

Upon inspect nsproxy no longer needs rcu protection for remote reads.
remote reads are rare.  So optimize for same process reads and write
by switching using rask_lock instead.

This yields a simpler to understand lock, and a faster setns system call.

In particular this fixes a performance regression observed
by Rafael David Tinoco <rafael.tinoco@canonical.com>.

This is effectively a revert of Pavel Emelyanov's commit
cf7b708c8d1d7a27736771bcf4c457b332b0f818 Make access to task's nsproxy lighter
from 2007.  The race this originialy fixed no longer exists as
do_notify_parent uses task_active_pid_ns(parent) instead of
parent->nsproxy.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/namespace.c           |  6 +++---
 fs/proc/proc_net.c       |  4 +++-
 fs/proc_namespace.c      |  8 +++-----
 include/linux/nsproxy.h  | 16 ++++++----------
 ipc/namespace.c          |  6 +++---
 kernel/nsproxy.c         | 15 ++++-----------
 kernel/utsname.c         |  6 +++---
 net/core/net_namespace.c | 10 ++++++----
 8 files changed, 31 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namespace.c b/fs/namespace.c
index 182bc41cd887..7187d01329c3 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2972,13 +2972,13 @@ static void *mntns_get(struct task_struct *task)
 	struct mnt_namespace *ns = NULL;
 	struct nsproxy *nsproxy;
 
-	rcu_read_lock();
-	nsproxy = task_nsproxy(task);
+	task_lock(task);
+	nsproxy = task->nsproxy;
 	if (nsproxy) {
 		ns = nsproxy->mnt_ns;
 		get_mnt_ns(ns);
 	}
-	rcu_read_unlock();
+	task_unlock(task);
 
 	return ns;
 }
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 4677bb7dc7c2..a63af3e0a612 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -113,9 +113,11 @@ static struct net *get_proc_task_net(struct inode *dir)
 	rcu_read_lock();
 	task = pid_task(proc_pid(dir), PIDTYPE_PID);
 	if (task != NULL) {
-		ns = task_nsproxy(task);
+		task_lock(task);
+		ns = task->nsproxy;
 		if (ns != NULL)
 			net = get_net(ns->net_ns);
+		task_unlock(task);
 	}
 	rcu_read_unlock();
 
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 1a81373947f3..73ca1740d839 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -232,17 +232,15 @@ static int mounts_open_common(struct inode *inode, struct file *file,
 	if (!task)
 		goto err;
 
-	rcu_read_lock();
-	nsp = task_nsproxy(task);
+	task_lock(task);
+	nsp = task->nsproxy;
 	if (!nsp || !nsp->mnt_ns) {
-		rcu_read_unlock();
+		task_unlock(task);
 		put_task_struct(task);
 		goto err;
 	}
 	ns = nsp->mnt_ns;
 	get_mnt_ns(ns);
-	rcu_read_unlock();
-	task_lock(task);
 	if (!task->fs) {
 		task_unlock(task);
 		put_task_struct(task);
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index b4ec59d159ac..35fa08fd7739 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -40,32 +40,28 @@ extern struct nsproxy init_nsproxy;
  * the namespaces access rules are:
  *
  *  1. only current task is allowed to change tsk->nsproxy pointer or
- *     any pointer on the nsproxy itself
+ *     any pointer on the nsproxy itself.  Current must hold the task_lock
+ *     when changing tsk->nsproxy.
  *
  *  2. when accessing (i.e. reading) current task's namespaces - no
  *     precautions should be taken - just dereference the pointers
  *
  *  3. the access to other task namespaces is performed like this
- *     rcu_read_lock();
- *     nsproxy = task_nsproxy(tsk);
+ *     task_lock(task);
+ *     nsproxy = task->nsproxy;
  *     if (nsproxy != NULL) {
  *             / *
  *               * work with the namespaces here
  *               * e.g. get the reference on one of them
  *               * /
  *     } / *
- *         * NULL task_nsproxy() means that this task is
+ *         * NULL task->nsproxy means that this task is
  *         * almost dead (zombie)
  *         * /
- *     rcu_read_unlock();
+ *     task_unlock(task);
  *
  */
 
-static inline struct nsproxy *task_nsproxy(struct task_struct *tsk)
-{
-	return rcu_dereference(tsk->nsproxy);
-}
-
 int copy_namespaces(unsigned long flags, struct task_struct *tsk);
 void exit_task_namespaces(struct task_struct *tsk);
 void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new);
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 59451c1e214d..b54468e48e32 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -154,11 +154,11 @@ static void *ipcns_get(struct task_struct *task)
 	struct ipc_namespace *ns = NULL;
 	struct nsproxy *nsproxy;
 
-	rcu_read_lock();
-	nsproxy = task_nsproxy(task);
+	task_lock(task);
+	nsproxy = task->nsproxy;
 	if (nsproxy)
 		ns = get_ipc_ns(nsproxy->ipc_ns);
-	rcu_read_unlock();
+	task_unlock(task);
 
 	return ns;
 }
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 8e7811086b82..ef42d0ab3115 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -204,20 +204,13 @@ void switch_task_namespaces(struct task_struct *p, struct nsproxy *new)
 
 	might_sleep();
 
+	task_lock(p);
 	ns = p->nsproxy;
+	p->nsproxy = new;
+	task_unlock(p);
 
-	rcu_assign_pointer(p->nsproxy, new);
-
-	if (ns && atomic_dec_and_test(&ns->count)) {
-		/*
-		 * wait for others to get what they want from this nsproxy.
-		 *
-		 * cannot release this nsproxy via the call_rcu() since
-		 * put_mnt_ns() will want to sleep
-		 */
-		synchronize_rcu();
+	if (ns && atomic_dec_and_test(&ns->count))
 		free_nsproxy(ns);
-	}
 }
 
 void exit_task_namespaces(struct task_struct *p)
diff --git a/kernel/utsname.c b/kernel/utsname.c
index fd393124e507..883aaaa7de8a 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -93,13 +93,13 @@ static void *utsns_get(struct task_struct *task)
 	struct uts_namespace *ns = NULL;
 	struct nsproxy *nsproxy;
 
-	rcu_read_lock();
-	nsproxy = task_nsproxy(task);
+	task_lock(task);
+	nsproxy = task->nsproxy;
 	if (nsproxy) {
 		ns = nsproxy->uts_ns;
 		get_uts_ns(ns);
 	}
-	rcu_read_unlock();
+	task_unlock(task);
 
 	return ns;
 }
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 85b62691f4f2..7c6b51a58968 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -373,9 +373,11 @@ struct net *get_net_ns_by_pid(pid_t pid)
 	tsk = find_task_by_vpid(pid);
 	if (tsk) {
 		struct nsproxy *nsproxy;
-		nsproxy = task_nsproxy(tsk);
+		task_lock(tsk);
+		nsproxy = tsk->nsproxy;
 		if (nsproxy)
 			net = get_net(nsproxy->net_ns);
+		task_unlock(tsk);
 	}
 	rcu_read_unlock();
 	return net;
@@ -632,11 +634,11 @@ static void *netns_get(struct task_struct *task)
 	struct net *net = NULL;
 	struct nsproxy *nsproxy;
 
-	rcu_read_lock();
-	nsproxy = task_nsproxy(task);
+	task_lock(task);
+	nsproxy = task->nsproxy;
 	if (nsproxy)
 		net = get_net(nsproxy->net_ns);
-	rcu_read_unlock();
+	task_unlock(task);
 
 	return net;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 4972a74b888c6b52ca41fae6076786dbbeb746d5 Mon Sep 17 00:00:00 2001
From: Laura Abbott <lauraa@codeaurora.org>
Date: Tue, 15 Jul 2014 10:03:34 -0700
Subject: of: Split early_init_dt_scan into two parts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently, early_init_dt_scan validates the header, sets the
boot params, and scans for chosen/memory all in one function.
Split this up into two separate functions (validation/setting
boot params in one, scanning in another) to allow for
additional setup between boot params and scanning the memory.

Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
Tested-by: Andreas Färber <afaerber@suse.de>
[glikely: s/early_init_dt_scan_all/early_init_dt_scan_nodes/]
Signed-off-by: Grant Likely <grant.likely@linaro.org>
---
 drivers/of/fdt.c       | 18 +++++++++++++++++-
 include/linux/of_fdt.h |  2 ++
 2 files changed, 19 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index b777d8f46bd5..ecc7a02d868e 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -937,7 +937,7 @@ int __init __weak early_init_dt_reserve_memory_arch(phys_addr_t base,
 }
 #endif
 
-bool __init early_init_dt_scan(void *params)
+bool __init early_init_dt_verify(void *params)
 {
 	if (!params)
 		return false;
@@ -951,6 +951,12 @@ bool __init early_init_dt_scan(void *params)
 		return false;
 	}
 
+	return true;
+}
+
+
+void __init early_init_dt_scan_nodes(void)
+{
 	/* Retrieve various information from the /chosen node */
 	of_scan_flat_dt(early_init_dt_scan_chosen, boot_command_line);
 
@@ -959,7 +965,17 @@ bool __init early_init_dt_scan(void *params)
 
 	/* Setup memory, calling early_init_dt_add_memory_arch */
 	of_scan_flat_dt(early_init_dt_scan_memory, NULL);
+}
+
+bool __init early_init_dt_scan(void *params)
+{
+	bool status;
+
+	status = early_init_dt_verify(params);
+	if (!status)
+		return false;
 
+	early_init_dt_scan_nodes();
 	return true;
 }
 
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index 05117899fcb4..ebb2449082fb 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -73,6 +73,8 @@ extern int early_init_dt_scan_root(unsigned long node, const char *uname,
 				   int depth, void *data);
 
 extern bool early_init_dt_scan(void *params);
+extern bool early_init_dt_verify(void *params);
+extern void early_init_dt_scan_nodes(void);
 
 extern const char *of_flat_dt_get_machine_name(void);
 extern const void *of_flat_dt_match_machine(const void *default_match,
-- 
cgit v1.2.3-59-g8ed1b


From 704033cee2e5b3c1c6eaf5bb398e465a9c3667b5 Mon Sep 17 00:00:00 2001
From: Laura Abbott <lauraa@codeaurora.org>
Date: Tue, 15 Jul 2014 10:03:35 -0700
Subject: of: Add memory limiting function for flattened devicetrees
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Buggy bootloaders may pass bogus memory entries in the devicetree.
Add of_fdt_limit_memory to add an upper bound on the number of
entries that can be present in the devicetree.

Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
Tested-by: Andreas Färber <afaerber@suse.de>
Signed-off-by: Grant Likely <grant.likely@linaro.org>
---
 drivers/of/fdt.c       | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/of_fdt.h |  1 +
 2 files changed, 49 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index ecc7a02d868e..9aa012e6ea0a 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -26,6 +26,54 @@
 #include <asm/setup.h>  /* for COMMAND_LINE_SIZE */
 #include <asm/page.h>
 
+/*
+ * of_fdt_limit_memory - limit the number of regions in the /memory node
+ * @limit: maximum entries
+ *
+ * Adjust the flattened device tree to have at most 'limit' number of
+ * memory entries in the /memory node. This function may be called
+ * any time after initial_boot_param is set.
+ */
+void of_fdt_limit_memory(int limit)
+{
+	int memory;
+	int len;
+	const void *val;
+	int nr_address_cells = OF_ROOT_NODE_ADDR_CELLS_DEFAULT;
+	int nr_size_cells = OF_ROOT_NODE_SIZE_CELLS_DEFAULT;
+	const uint32_t *addr_prop;
+	const uint32_t *size_prop;
+	int root_offset;
+	int cell_size;
+
+	root_offset = fdt_path_offset(initial_boot_params, "/");
+	if (root_offset < 0)
+		return;
+
+	addr_prop = fdt_getprop(initial_boot_params, root_offset,
+				"#address-cells", NULL);
+	if (addr_prop)
+		nr_address_cells = fdt32_to_cpu(*addr_prop);
+
+	size_prop = fdt_getprop(initial_boot_params, root_offset,
+				"#size-cells", NULL);
+	if (size_prop)
+		nr_size_cells = fdt32_to_cpu(*size_prop);
+
+	cell_size = sizeof(uint32_t)*(nr_address_cells + nr_size_cells);
+
+	memory = fdt_path_offset(initial_boot_params, "/memory");
+	if (memory > 0) {
+		val = fdt_getprop(initial_boot_params, memory, "reg", &len);
+		if (len > limit*cell_size) {
+			len = limit*cell_size;
+			pr_debug("Limiting number of entries to %d\n", limit);
+			fdt_setprop(initial_boot_params, memory, "reg", val,
+					len);
+		}
+	}
+}
+
 /**
  * of_fdt_is_compatible - Return true if given node from the given blob has
  * compat in its compatible list
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index ebb2449082fb..0ff360d5b3b3 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -86,6 +86,7 @@ extern void unflatten_and_copy_device_tree(void);
 extern void early_init_devtree(void *);
 extern void early_get_first_memblock_info(void *, phys_addr_t *);
 extern u64 fdt_translate_address(const void *blob, int node_offset);
+extern void of_fdt_limit_memory(int limit);
 #else /* CONFIG_OF_FLATTREE */
 static inline void early_init_fdt_scan_reserved_mem(void) {}
 static inline const char *of_flat_dt_get_machine_name(void) { return NULL; }
-- 
cgit v1.2.3-59-g8ed1b


From 953dec21aed4038464fec02f96a2f1b8701a5bce Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Fri, 25 Jul 2014 21:37:19 -0700
Subject: timekeeping: Fixup typo in update_vsyscall_old definition

In commit 4a0e637738f0 ("clocksource: Get rid of cycle_last"),
currently in the -tip tree, there was a small typo where cycles_t
was used intstead of cycle_t. This broke ppc64 builds.

Fix this by using the proper cycle_t type for this usage, in
both the definition and the ia64 implementation.

Now, having both cycle_t and cycles_t types seems like a very
bad idea just asking for these sorts of issues. But that
will be a cleanup for another day.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1406349439-11785-1-git-send-email-john.stultz@linaro.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/ia64/kernel/time.c             | 2 +-
 include/linux/timekeeper_internal.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 11dc42da7daf..3e71ef85e439 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -441,7 +441,7 @@ void update_vsyscall_tz(void)
 }
 
 void update_vsyscall_old(struct timespec *wall, struct timespec *wtm,
-			 struct clocksource *c, u32 mult, cycles_t cycle_last)
+			 struct clocksource *c, u32 mult, cycle_t cycle_last)
 {
 	write_seqcount_begin(&fsyscall_gtod_data.seq);
 
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index e9660e52dc09..95640dcd1899 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -113,7 +113,7 @@ extern void update_vsyscall_tz(void);
 
 extern void update_vsyscall_old(struct timespec *ts, struct timespec *wtm,
 				struct clocksource *c, u32 mult,
-				cycles_t cycle_last);
+				cycle_t cycle_last);
 extern void update_vsyscall_tz(void);
 
 #else
-- 
cgit v1.2.3-59-g8ed1b


From 5ccb8225abf2ac51cd023a99f28366ac9823bd0d Mon Sep 17 00:00:00 2001
From: Mike Qiu <qiudayu@linux.vnet.ibm.com>
Date: Tue, 29 Jul 2014 10:49:25 -0700
Subject: x86/ras: Fix build warnings in <linux/aer.h>
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix build warning due to a missing forward declaration in
<linux/aer.h>.  We need struct pci_dev to be forward declared so we
can define pointers to it, but we don't need to pull in the whole
definition.

build log:

In file included from include/ras/ras_event.h:11:0,
                 from drivers/ras/ras.c:13:
include/linux/aer.h:42:129: warning: ‘struct pci_dev’
declared inside parameter list [enabled by default]

include/linux/aer.h:42:129: warning: its scope is only
this definition or declaration, which is probably not
what you want [enabled by default]

include/linux/aer.h:46:130: warning: ‘struct pci_dev’
declared inside parameter list [enabled by default]

include/linux/aer.h:50:136: warning: ‘struct pci_dev’
declared inside parameter list [enabled by default]

include/linux/aer.h:57:14: warning: ‘struct pci_dev’
declared inside parameter list [enabled by default]

Signed-off-by: Mike Qiu <qiudayu@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/53d7dea511471321bb@agluck-desk.sc.intel.com
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 include/linux/aer.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/aer.h b/include/linux/aer.h
index 4dbaa7081530..c826d1c28f9c 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -11,6 +11,8 @@
 #define AER_FATAL			1
 #define AER_CORRECTABLE			2
 
+struct pci_dev;
+
 struct aer_header_log_regs {
 	unsigned int dw0;
 	unsigned int dw1;
-- 
cgit v1.2.3-59-g8ed1b


From 725c7b570fda4207e465ff8856c2c12c2645a685 Mon Sep 17 00:00:00 2001
From: Antoine Ténart <antoine.tenart@free-electrons.com>
Date: Wed, 30 Jul 2014 20:13:56 +0200
Subject: ata: libahci_platform: move port_map parameters into the AHCI
 structure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch moves force_port_map and mask_port_map into the
ahci_host_priv structure. This allows to modify them into the AHCI
framework. This is needed by the new dt bindings representing ports as
the port_map mask is computed automatically.

Parameters modifying force_port_map, mask_port_map and flags have been
removed from the ahci_platform_init_host() function, and inputs in the
ahci_host_priv structure are now directly filed.

Signed-off-by: Antoine Ténart <antoine.tenart@free-electrons.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/acard-ahci.c       |  2 +-
 drivers/ata/ahci.c             |  3 +--
 drivers/ata/ahci.h             | 10 ++++++----
 drivers/ata/ahci_da850.c       |  3 +--
 drivers/ata/ahci_imx.c         |  3 +--
 drivers/ata/ahci_mvebu.c       |  3 +--
 drivers/ata/ahci_platform.c    |  6 ++----
 drivers/ata/ahci_st.c          |  2 +-
 drivers/ata/ahci_sunxi.c       |  8 +++-----
 drivers/ata/ahci_tegra.c       |  3 +--
 drivers/ata/ahci_xgene.c       |  6 ++----
 drivers/ata/libahci.c          | 19 +++++++------------
 drivers/ata/libahci_platform.c | 13 +++----------
 drivers/ata/sata_highbank.c    |  2 +-
 include/linux/ahci_platform.h  |  5 +----
 15 files changed, 32 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/acard-ahci.c b/drivers/ata/acard-ahci.c
index 0cd7c7a39e5b..25d0ac32e721 100644
--- a/drivers/ata/acard-ahci.c
+++ b/drivers/ata/acard-ahci.c
@@ -441,7 +441,7 @@ static int acard_ahci_init_one(struct pci_dev *pdev, const struct pci_device_id
 	hpriv->mmio = pcim_iomap_table(pdev)[AHCI_PCI_BAR];
 
 	/* save initial config */
-	ahci_save_initial_config(&pdev->dev, hpriv, 0, 0);
+	ahci_save_initial_config(&pdev->dev, hpriv);
 
 	/* prepare host */
 	if (hpriv->cap & HOST_CAP_NCQ)
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 4cd52a4541a9..a29f8012fb08 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -526,8 +526,7 @@ static void ahci_pci_save_initial_config(struct pci_dev *pdev,
 			  "Disabling your PATA port. Use the boot option 'ahci.marvell_enable=0' to avoid this.\n");
 	}
 
-	ahci_save_initial_config(&pdev->dev, hpriv, force_port_map,
-				 mask_port_map);
+	ahci_save_initial_config(&pdev->dev, hpriv);
 }
 
 static int ahci_pci_reset_controller(struct ata_host *host)
diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h
index 2ed84e1c70ea..15396f0330ae 100644
--- a/drivers/ata/ahci.h
+++ b/drivers/ata/ahci.h
@@ -316,8 +316,12 @@ struct ahci_port_priv {
 };
 
 struct ahci_host_priv {
-	void __iomem *		mmio;		/* bus-independent mem map */
+	/* Input fields */
 	unsigned int		flags;		/* AHCI_HFLAG_* */
+	u32			force_port_map;	/* force port map */
+	u32			mask_port_map;	/* mask out particular bits */
+
+	void __iomem *		mmio;		/* bus-independent mem map */
 	u32			cap;		/* cap to use */
 	u32			cap2;		/* cap2 to use */
 	u32			port_map;	/* port map to use */
@@ -361,9 +365,7 @@ unsigned int ahci_dev_classify(struct ata_port *ap);
 void ahci_fill_cmd_slot(struct ahci_port_priv *pp, unsigned int tag,
 			u32 opts);
 void ahci_save_initial_config(struct device *dev,
-			      struct ahci_host_priv *hpriv,
-			      unsigned int force_port_map,
-			      unsigned int mask_port_map);
+			      struct ahci_host_priv *hpriv);
 void ahci_init_controller(struct ata_host *host);
 int ahci_reset_controller(struct ata_host *host);
 
diff --git a/drivers/ata/ahci_da850.c b/drivers/ata/ahci_da850.c
index 2b77d53bccf8..ad1e71ec10cf 100644
--- a/drivers/ata/ahci_da850.c
+++ b/drivers/ata/ahci_da850.c
@@ -85,8 +85,7 @@ static int ahci_da850_probe(struct platform_device *pdev)
 
 	da850_sata_init(dev, pwrdn_reg, hpriv->mmio);
 
-	rc = ahci_platform_init_host(pdev, hpriv, &ahci_da850_port_info,
-				     0, 0, 0);
+	rc = ahci_platform_init_host(pdev, hpriv, &ahci_da850_port_info);
 	if (rc)
 		goto disable_resources;
 
diff --git a/drivers/ata/ahci_imx.c b/drivers/ata/ahci_imx.c
index 1e5fa5f21aff..f3970b4ed889 100644
--- a/drivers/ata/ahci_imx.c
+++ b/drivers/ata/ahci_imx.c
@@ -620,8 +620,7 @@ static int imx_ahci_probe(struct platform_device *pdev)
 	reg_val = clk_get_rate(imxpriv->ahb_clk) / 1000;
 	writel(reg_val, hpriv->mmio + IMX_TIMER1MS);
 
-	ret = ahci_platform_init_host(pdev, hpriv, &ahci_imx_port_info,
-				      0, 0, 0);
+	ret = ahci_platform_init_host(pdev, hpriv, &ahci_imx_port_info);
 	if (ret)
 		goto disable_sata;
 
diff --git a/drivers/ata/ahci_mvebu.c b/drivers/ata/ahci_mvebu.c
index fd3dfd733b84..68672d2692ee 100644
--- a/drivers/ata/ahci_mvebu.c
+++ b/drivers/ata/ahci_mvebu.c
@@ -88,8 +88,7 @@ static int ahci_mvebu_probe(struct platform_device *pdev)
 	ahci_mvebu_mbus_config(hpriv, dram);
 	ahci_mvebu_regret_option(hpriv);
 
-	rc = ahci_platform_init_host(pdev, hpriv, &ahci_mvebu_port_info,
-				     0, 0, 0);
+	rc = ahci_platform_init_host(pdev, hpriv, &ahci_mvebu_port_info);
 	if (rc)
 		goto disable_resources;
 
diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c
index b10d81ddb528..fb3eca5cd66c 100644
--- a/drivers/ata/ahci_platform.c
+++ b/drivers/ata/ahci_platform.c
@@ -34,7 +34,6 @@ static int ahci_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	struct ahci_platform_data *pdata = dev_get_platdata(dev);
 	struct ahci_host_priv *hpriv;
-	unsigned long hflags = 0;
 	int rc;
 
 	hpriv = ahci_platform_get_resources(pdev);
@@ -58,10 +57,9 @@ static int ahci_probe(struct platform_device *pdev)
 	}
 
 	if (of_device_is_compatible(dev->of_node, "hisilicon,hisi-ahci"))
-		hflags |= AHCI_HFLAG_NO_FBS | AHCI_HFLAG_NO_NCQ;
+		hpriv->flags |= AHCI_HFLAG_NO_FBS | AHCI_HFLAG_NO_NCQ;
 
-	rc = ahci_platform_init_host(pdev, hpriv, &ahci_port_info,
-				     hflags, 0, 0);
+	rc = ahci_platform_init_host(pdev, hpriv, &ahci_port_info);
 	if (rc)
 		goto pdata_exit;
 
diff --git a/drivers/ata/ahci_st.c b/drivers/ata/ahci_st.c
index 29821b9fd13d..835d6eea84fd 100644
--- a/drivers/ata/ahci_st.c
+++ b/drivers/ata/ahci_st.c
@@ -166,7 +166,7 @@ static int st_ahci_probe(struct platform_device *pdev)
 	if (err)
 		return err;
 
-	err = ahci_platform_init_host(pdev, hpriv, &st_ahci_port_info, 0, 0, 0);
+	err = ahci_platform_init_host(pdev, hpriv, &st_ahci_port_info);
 	if (err) {
 		ahci_platform_disable_resources(hpriv);
 		return err;
diff --git a/drivers/ata/ahci_sunxi.c b/drivers/ata/ahci_sunxi.c
index 02002f125bd4..e44d675a30ec 100644
--- a/drivers/ata/ahci_sunxi.c
+++ b/drivers/ata/ahci_sunxi.c
@@ -167,7 +167,6 @@ static int ahci_sunxi_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct ahci_host_priv *hpriv;
-	unsigned long hflags;
 	int rc;
 
 	hpriv = ahci_platform_get_resources(pdev);
@@ -184,11 +183,10 @@ static int ahci_sunxi_probe(struct platform_device *pdev)
 	if (rc)
 		goto disable_resources;
 
-	hflags = AHCI_HFLAG_32BIT_ONLY | AHCI_HFLAG_NO_MSI |
-		 AHCI_HFLAG_NO_PMP | AHCI_HFLAG_YES_NCQ;
+	hpriv->flags = AHCI_HFLAG_32BIT_ONLY | AHCI_HFLAG_NO_MSI |
+		       AHCI_HFLAG_NO_PMP | AHCI_HFLAG_YES_NCQ;
 
-	rc = ahci_platform_init_host(pdev, hpriv, &ahci_sunxi_port_info,
-				     hflags, 0, 0);
+	rc = ahci_platform_init_host(pdev, hpriv, &ahci_sunxi_port_info);
 	if (rc)
 		goto disable_resources;
 
diff --git a/drivers/ata/ahci_tegra.c b/drivers/ata/ahci_tegra.c
index d30bb21afd67..fc3df47fca35 100644
--- a/drivers/ata/ahci_tegra.c
+++ b/drivers/ata/ahci_tegra.c
@@ -348,8 +348,7 @@ static int tegra_ahci_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	ret = ahci_platform_init_host(pdev, hpriv, &ahci_tegra_port_info,
-				      0, 0, 0);
+	ret = ahci_platform_init_host(pdev, hpriv, &ahci_tegra_port_info);
 	if (ret)
 		goto deinit_controller;
 
diff --git a/drivers/ata/ahci_xgene.c b/drivers/ata/ahci_xgene.c
index 1cfbdca638d2..bc281115490b 100644
--- a/drivers/ata/ahci_xgene.c
+++ b/drivers/ata/ahci_xgene.c
@@ -422,7 +422,6 @@ static int xgene_ahci_probe(struct platform_device *pdev)
 	struct ahci_host_priv *hpriv;
 	struct xgene_ahci_context *ctx;
 	struct resource *res;
-	unsigned long hflags;
 	int rc;
 
 	hpriv = ahci_platform_get_resources(pdev);
@@ -481,10 +480,9 @@ static int xgene_ahci_probe(struct platform_device *pdev)
 	/* Configure the host controller */
 	xgene_ahci_hw_init(hpriv);
 
-	hflags = AHCI_HFLAG_NO_PMP | AHCI_HFLAG_YES_NCQ;
+	hpriv->flags = AHCI_HFLAG_NO_PMP | AHCI_HFLAG_YES_NCQ;
 
-	rc = ahci_platform_init_host(pdev, hpriv, &xgene_ahci_port_info,
-				     hflags, 0, 0);
+	rc = ahci_platform_init_host(pdev, hpriv, &xgene_ahci_port_info);
 	if (rc)
 		goto disable_resources;
 
diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
index d72ce0470309..b784e9de426a 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@@ -382,8 +382,6 @@ static ssize_t ahci_show_em_supported(struct device *dev,
  *	ahci_save_initial_config - Save and fixup initial config values
  *	@dev: target AHCI device
  *	@hpriv: host private area to store config values
- *	@force_port_map: force port map to a specified value
- *	@mask_port_map: mask out particular bits from port map
  *
  *	Some registers containing configuration info might be setup by
  *	BIOS and might be cleared on reset.  This function saves the
@@ -398,10 +396,7 @@ static ssize_t ahci_show_em_supported(struct device *dev,
  *	LOCKING:
  *	None.
  */
-void ahci_save_initial_config(struct device *dev,
-			      struct ahci_host_priv *hpriv,
-			      unsigned int force_port_map,
-			      unsigned int mask_port_map)
+void ahci_save_initial_config(struct device *dev, struct ahci_host_priv *hpriv)
 {
 	void __iomem *mmio = hpriv->mmio;
 	u32 cap, cap2, vers, port_map;
@@ -468,17 +463,17 @@ void ahci_save_initial_config(struct device *dev,
 		cap &= ~HOST_CAP_FBS;
 	}
 
-	if (force_port_map && port_map != force_port_map) {
+	if (hpriv->force_port_map && port_map != hpriv->force_port_map) {
 		dev_info(dev, "forcing port_map 0x%x -> 0x%x\n",
-			 port_map, force_port_map);
-		port_map = force_port_map;
+			 port_map, hpriv->force_port_map);
+		port_map = hpriv->force_port_map;
 	}
 
-	if (mask_port_map) {
+	if (hpriv->mask_port_map) {
 		dev_warn(dev, "masking port_map 0x%x -> 0x%x\n",
 			port_map,
-			port_map & mask_port_map);
-		port_map &= mask_port_map;
+			port_map & hpriv->mask_port_map);
+		port_map &= hpriv->mask_port_map;
 	}
 
 	/* cross check port_map and cap.n_ports */
diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c
index 28840a2f470f..00582d3a46a4 100644
--- a/drivers/ata/libahci_platform.c
+++ b/drivers/ata/libahci_platform.c
@@ -288,9 +288,6 @@ EXPORT_SYMBOL_GPL(ahci_platform_get_resources);
  * @pdev: platform device pointer for the host
  * @hpriv: ahci-host private data for the host
  * @pi_template: template for the ata_port_info to use
- * @host_flags: ahci host flags used in ahci_host_priv
- * @force_port_map: param passed to ahci_save_initial_config
- * @mask_port_map: param passed to ahci_save_initial_config
  *
  * This function does all the usual steps needed to bring up an
  * ahci-platform host, note any necessary resources (ie clks, phy, etc.)
@@ -301,10 +298,7 @@ EXPORT_SYMBOL_GPL(ahci_platform_get_resources);
  */
 int ahci_platform_init_host(struct platform_device *pdev,
 			    struct ahci_host_priv *hpriv,
-			    const struct ata_port_info *pi_template,
-			    unsigned long host_flags,
-			    unsigned int force_port_map,
-			    unsigned int mask_port_map)
+			    const struct ata_port_info *pi_template)
 {
 	struct device *dev = &pdev->dev;
 	struct ata_port_info pi = *pi_template;
@@ -319,10 +313,9 @@ int ahci_platform_init_host(struct platform_device *pdev,
 	}
 
 	/* prepare host */
-	pi.private_data = (void *)host_flags;
-	hpriv->flags |= host_flags;
+	pi.private_data = (void *)hpriv->flags;
 
-	ahci_save_initial_config(dev, hpriv, force_port_map, mask_port_map);
+	ahci_save_initial_config(dev, hpriv);
 
 	if (hpriv->cap & HOST_CAP_NCQ)
 		pi.flags |= ATA_FLAG_NCQ;
diff --git a/drivers/ata/sata_highbank.c b/drivers/ata/sata_highbank.c
index 65965cf5af06..da3bc2709c63 100644
--- a/drivers/ata/sata_highbank.c
+++ b/drivers/ata/sata_highbank.c
@@ -512,7 +512,7 @@ static int ahci_highbank_probe(struct platform_device *pdev)
 		return rc;
 
 
-	ahci_save_initial_config(dev, hpriv, 0, 0);
+	ahci_save_initial_config(dev, hpriv);
 
 	/* prepare host */
 	if (hpriv->cap & HOST_CAP_NCQ)
diff --git a/include/linux/ahci_platform.h b/include/linux/ahci_platform.h
index 6dfd51a04d77..09a947e8bc87 100644
--- a/include/linux/ahci_platform.h
+++ b/include/linux/ahci_platform.h
@@ -43,10 +43,7 @@ struct ahci_host_priv *ahci_platform_get_resources(
 	struct platform_device *pdev);
 int ahci_platform_init_host(struct platform_device *pdev,
 			    struct ahci_host_priv *hpriv,
-			    const struct ata_port_info *pi_template,
-			    unsigned long host_flags,
-			    unsigned int force_port_map,
-			    unsigned int mask_port_map);
+			    const struct ata_port_info *pi_template);
 
 int ahci_platform_suspend_host(struct device *dev);
 int ahci_platform_resume_host(struct device *dev);
-- 
cgit v1.2.3-59-g8ed1b


From 9603b61de1eee92977d74ff42541be20c0c5b1a7 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Mon, 28 Jul 2014 23:30:22 +0300
Subject: mlx5: Move pci device handling from mlx5_ib to mlx5_core

In preparation for a new mlx5 device which is VPI (i.e., ports can be
either IB or ETH), move the pci device functionality from mlx5_ib
to mlx5_core.

This involves the following changes:
1. Move mlx5_core_dev struct out of mlx5_ib_dev. mlx5_core_dev
   is now an independent structure maintained by mlx5_core.
   mlx5_ib_dev now has a pointer to that struct.
   This requires changing a lot of places where the core_dev
   struct was accessed via mlx5_ib_dev (now, this needs to
   be a pointer dereference).
2. All PCI initializations are now done in mlx5_core. Thus,
   it is now mlx5_core which does pci_register_device (and not
   mlx5_ib, as was previously).
3. mlx5_ib now registers itself with mlx5_core as an "interface"
   driver. This is very similar to the mechanism employed for
   the mlx4 (ConnectX) driver. Once the HCA is initialized
   (by mlx5_core), it invokes the interface drivers to do
   their initializations.
4. There is a new event handler which the core registers:
   mlx5_core_event(). This event handler invokes the
   event handlers registered by the interfaces.

Based on a patch by Eli Cohen <eli@mellanox.com>

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/mlx5/cq.c                |  46 ++--
 drivers/infiniband/hw/mlx5/mad.c               |   4 +-
 drivers/infiniband/hw/mlx5/main.c              | 281 ++++++++----------------
 drivers/infiniband/hw/mlx5/mlx5_ib.h           |  12 +-
 drivers/infiniband/hw/mlx5/mr.c                |  48 ++--
 drivers/infiniband/hw/mlx5/qp.c                |  84 +++----
 drivers/infiniband/hw/mlx5/srq.c               |  26 +--
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 290 ++++++++++++++++++++++++-
 include/linux/mlx5/driver.h                    |  17 +-
 9 files changed, 498 insertions(+), 310 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 8ae4f896cb41..3b4dc858cef9 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -180,7 +180,7 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
 		struct mlx5_core_srq *msrq = NULL;
 
 		if (qp->ibqp.xrcd) {
-			msrq = mlx5_core_get_srq(&dev->mdev,
+			msrq = mlx5_core_get_srq(dev->mdev,
 						 be32_to_cpu(cqe->srqn));
 			srq = to_mibsrq(msrq);
 		} else {
@@ -364,7 +364,7 @@ static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
 
 static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
 {
-	mlx5_buf_free(&dev->mdev, &buf->buf);
+	mlx5_buf_free(dev->mdev, &buf->buf);
 }
 
 static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe,
@@ -450,7 +450,7 @@ repoll:
 		 * because CQs will be locked while QPs are removed
 		 * from the table.
 		 */
-		mqp = __mlx5_qp_lookup(&dev->mdev, qpn);
+		mqp = __mlx5_qp_lookup(dev->mdev, qpn);
 		if (unlikely(!mqp)) {
 			mlx5_ib_warn(dev, "CQE@CQ %06x for unknown QPN %6x\n",
 				     cq->mcq.cqn, qpn);
@@ -514,11 +514,11 @@ repoll:
 	case MLX5_CQE_SIG_ERR:
 		sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64;
 
-		read_lock(&dev->mdev.priv.mr_table.lock);
-		mmr = __mlx5_mr_lookup(&dev->mdev,
+		read_lock(&dev->mdev->priv.mr_table.lock);
+		mmr = __mlx5_mr_lookup(dev->mdev,
 				       mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
 		if (unlikely(!mmr)) {
-			read_unlock(&dev->mdev.priv.mr_table.lock);
+			read_unlock(&dev->mdev->priv.mr_table.lock);
 			mlx5_ib_warn(dev, "CQE@CQ %06x for unknown MR %6x\n",
 				     cq->mcq.cqn, be32_to_cpu(sig_err_cqe->mkey));
 			return -EINVAL;
@@ -536,7 +536,7 @@ repoll:
 			     mr->sig->err_item.expected,
 			     mr->sig->err_item.actual);
 
-		read_unlock(&dev->mdev.priv.mr_table.lock);
+		read_unlock(&dev->mdev->priv.mr_table.lock);
 		goto repoll;
 	}
 
@@ -575,8 +575,8 @@ int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
 	mlx5_cq_arm(&to_mcq(ibcq)->mcq,
 		    (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
 		    MLX5_CQ_DB_REQ_NOT_SOL : MLX5_CQ_DB_REQ_NOT,
-		    to_mdev(ibcq->device)->mdev.priv.uuari.uars[0].map,
-		    MLX5_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->mdev.priv.cq_uar_lock));
+		    to_mdev(ibcq->device)->mdev->priv.uuari.uars[0].map,
+		    MLX5_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->mdev->priv.cq_uar_lock));
 
 	return 0;
 }
@@ -586,7 +586,7 @@ static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf,
 {
 	int err;
 
-	err = mlx5_buf_alloc(&dev->mdev, nent * cqe_size,
+	err = mlx5_buf_alloc(dev->mdev, nent * cqe_size,
 			     PAGE_SIZE * 2, &buf->buf);
 	if (err)
 		return err;
@@ -691,7 +691,7 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
 {
 	int err;
 
-	err = mlx5_db_alloc(&dev->mdev, &cq->db);
+	err = mlx5_db_alloc(dev->mdev, &cq->db);
 	if (err)
 		return err;
 
@@ -716,7 +716,7 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
 	mlx5_fill_page_array(&cq->buf.buf, (*cqb)->pas);
 
 	(*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT;
-	*index = dev->mdev.priv.uuari.uars[0].index;
+	*index = dev->mdev->priv.uuari.uars[0].index;
 
 	return 0;
 
@@ -724,14 +724,14 @@ err_buf:
 	free_cq_buf(dev, &cq->buf);
 
 err_db:
-	mlx5_db_free(&dev->mdev, &cq->db);
+	mlx5_db_free(dev->mdev, &cq->db);
 	return err;
 }
 
 static void destroy_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq)
 {
 	free_cq_buf(dev, &cq->buf);
-	mlx5_db_free(&dev->mdev, &cq->db);
+	mlx5_db_free(dev->mdev, &cq->db);
 }
 
 struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries,
@@ -752,7 +752,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries,
 		return ERR_PTR(-EINVAL);
 
 	entries = roundup_pow_of_two(entries + 1);
-	if (entries > dev->mdev.caps.max_cqes)
+	if (entries > dev->mdev->caps.max_cqes)
 		return ERR_PTR(-EINVAL);
 
 	cq = kzalloc(sizeof(*cq), GFP_KERNEL);
@@ -789,7 +789,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries,
 	cqb->ctx.c_eqn = cpu_to_be16(eqn);
 	cqb->ctx.db_record_addr = cpu_to_be64(cq->db.dma);
 
-	err = mlx5_core_create_cq(&dev->mdev, &cq->mcq, cqb, inlen);
+	err = mlx5_core_create_cq(dev->mdev, &cq->mcq, cqb, inlen);
 	if (err)
 		goto err_cqb;
 
@@ -809,7 +809,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries,
 	return &cq->ibcq;
 
 err_cmd:
-	mlx5_core_destroy_cq(&dev->mdev, &cq->mcq);
+	mlx5_core_destroy_cq(dev->mdev, &cq->mcq);
 
 err_cqb:
 	mlx5_vfree(cqb);
@@ -834,7 +834,7 @@ int mlx5_ib_destroy_cq(struct ib_cq *cq)
 	if (cq->uobject)
 		context = cq->uobject->context;
 
-	mlx5_core_destroy_cq(&dev->mdev, &mcq->mcq);
+	mlx5_core_destroy_cq(dev->mdev, &mcq->mcq);
 	if (context)
 		destroy_cq_user(mcq, context);
 	else
@@ -919,7 +919,7 @@ int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
 	int err;
 	u32 fsel;
 
-	if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_CQ_MODER))
+	if (!(dev->mdev->caps.flags & MLX5_DEV_CAP_FLAG_CQ_MODER))
 		return -ENOSYS;
 
 	in = kzalloc(sizeof(*in), GFP_KERNEL);
@@ -931,7 +931,7 @@ int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
 	in->ctx.cq_period = cpu_to_be16(cq_period);
 	in->ctx.cq_max_count = cpu_to_be16(cq_count);
 	in->field_select = cpu_to_be32(fsel);
-	err = mlx5_core_modify_cq(&dev->mdev, &mcq->mcq, in, sizeof(*in));
+	err = mlx5_core_modify_cq(dev->mdev, &mcq->mcq, in, sizeof(*in));
 	kfree(in);
 
 	if (err)
@@ -1074,7 +1074,7 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
 	int uninitialized_var(cqe_size);
 	unsigned long flags;
 
-	if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_RESIZE_CQ)) {
+	if (!(dev->mdev->caps.flags & MLX5_DEV_CAP_FLAG_RESIZE_CQ)) {
 		pr_info("Firmware does not support resize CQ\n");
 		return -ENOSYS;
 	}
@@ -1083,7 +1083,7 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
 		return -EINVAL;
 
 	entries = roundup_pow_of_two(entries + 1);
-	if (entries > dev->mdev.caps.max_cqes + 1)
+	if (entries > dev->mdev->caps.max_cqes + 1)
 		return -EINVAL;
 
 	if (entries == ibcq->cqe + 1)
@@ -1128,7 +1128,7 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
 	in->hdr.opmod = cpu_to_be16(MLX5_CQ_OPMOD_RESIZE);
 	in->cqn = cpu_to_be32(cq->mcq.cqn);
 
-	err = mlx5_core_modify_cq(&dev->mdev, &cq->mcq, in, inlen);
+	err = mlx5_core_modify_cq(dev->mdev, &cq->mcq, in, inlen);
 	if (err)
 		goto ex_alloc;
 
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index 5c8938be0e08..e259e7393152 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -54,7 +54,7 @@ int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
 	if (ignore_bkey || !in_wc)
 		op_modifier |= 0x2;
 
-	return mlx5_core_mad_ifc(&dev->mdev, in_mad, response_mad, op_modifier, port);
+	return mlx5_core_mad_ifc(dev->mdev, in_mad, response_mad, op_modifier, port);
 }
 
 int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
@@ -129,7 +129,7 @@ int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port)
 
 	packet_error = be16_to_cpu(out_mad->status);
 
-	dev->mdev.caps.ext_port_cap[port - 1] = (!err && !packet_error) ?
+	dev->mdev->caps.ext_port_cap[port - 1] = (!err && !packet_error) ?
 		MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO : 0;
 
 out:
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 364d4b6937f5..f2cfd363a705 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -54,96 +54,17 @@ MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver");
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_VERSION(DRIVER_VERSION);
 
-static int prof_sel = 2;
-module_param_named(prof_sel, prof_sel, int, 0444);
-MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2");
+static int deprecated_prof_sel = 2;
+module_param_named(prof_sel, deprecated_prof_sel, int, 0444);
+MODULE_PARM_DESC(prof_sel, "profile selector. Deprecated here. Moved to module mlx5_core");
 
 static char mlx5_version[] =
 	DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
 	DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
 
-static struct mlx5_profile profile[] = {
-	[0] = {
-		.mask		= 0,
-	},
-	[1] = {
-		.mask		= MLX5_PROF_MASK_QP_SIZE,
-		.log_max_qp	= 12,
-	},
-	[2] = {
-		.mask		= MLX5_PROF_MASK_QP_SIZE |
-				  MLX5_PROF_MASK_MR_CACHE,
-		.log_max_qp	= 17,
-		.mr_cache[0]	= {
-			.size	= 500,
-			.limit	= 250
-		},
-		.mr_cache[1]	= {
-			.size	= 500,
-			.limit	= 250
-		},
-		.mr_cache[2]	= {
-			.size	= 500,
-			.limit	= 250
-		},
-		.mr_cache[3]	= {
-			.size	= 500,
-			.limit	= 250
-		},
-		.mr_cache[4]	= {
-			.size	= 500,
-			.limit	= 250
-		},
-		.mr_cache[5]	= {
-			.size	= 500,
-			.limit	= 250
-		},
-		.mr_cache[6]	= {
-			.size	= 500,
-			.limit	= 250
-		},
-		.mr_cache[7]	= {
-			.size	= 500,
-			.limit	= 250
-		},
-		.mr_cache[8]	= {
-			.size	= 500,
-			.limit	= 250
-		},
-		.mr_cache[9]	= {
-			.size	= 500,
-			.limit	= 250
-		},
-		.mr_cache[10]	= {
-			.size	= 500,
-			.limit	= 250
-		},
-		.mr_cache[11]	= {
-			.size	= 500,
-			.limit	= 250
-		},
-		.mr_cache[12]	= {
-			.size	= 64,
-			.limit	= 32
-		},
-		.mr_cache[13]	= {
-			.size	= 32,
-			.limit	= 16
-		},
-		.mr_cache[14]	= {
-			.size	= 16,
-			.limit	= 8
-		},
-		.mr_cache[15]	= {
-			.size	= 8,
-			.limit	= 4
-		},
-	},
-};
-
 int mlx5_vector2eqn(struct mlx5_ib_dev *dev, int vector, int *eqn, int *irqn)
 {
-	struct mlx5_eq_table *table = &dev->mdev.priv.eq_table;
+	struct mlx5_eq_table *table = &dev->mdev->priv.eq_table;
 	struct mlx5_eq *eq, *n;
 	int err = -ENOENT;
 
@@ -163,7 +84,7 @@ int mlx5_vector2eqn(struct mlx5_ib_dev *dev, int vector, int *eqn, int *irqn)
 
 static int alloc_comp_eqs(struct mlx5_ib_dev *dev)
 {
-	struct mlx5_eq_table *table = &dev->mdev.priv.eq_table;
+	struct mlx5_eq_table *table = &dev->mdev->priv.eq_table;
 	char name[MLX5_MAX_EQ_NAME];
 	struct mlx5_eq *eq, *n;
 	int ncomp_vec;
@@ -182,9 +103,9 @@ static int alloc_comp_eqs(struct mlx5_ib_dev *dev)
 		}
 
 		snprintf(name, MLX5_MAX_EQ_NAME, "mlx5_comp%d", i);
-		err = mlx5_create_map_eq(&dev->mdev, eq,
+		err = mlx5_create_map_eq(dev->mdev, eq,
 					 i + MLX5_EQ_VEC_COMP_BASE, nent, 0,
-					 name, &dev->mdev.priv.uuari.uars[0]);
+					 name, &dev->mdev->priv.uuari.uars[0]);
 		if (err) {
 			kfree(eq);
 			goto clean;
@@ -204,7 +125,7 @@ clean:
 	list_for_each_entry_safe(eq, n, &dev->eqs_list, list) {
 		list_del(&eq->list);
 		spin_unlock(&table->lock);
-		if (mlx5_destroy_unmap_eq(&dev->mdev, eq))
+		if (mlx5_destroy_unmap_eq(dev->mdev, eq))
 			mlx5_ib_warn(dev, "failed to destroy EQ 0x%x\n", eq->eqn);
 		kfree(eq);
 		spin_lock(&table->lock);
@@ -215,14 +136,14 @@ clean:
 
 static void free_comp_eqs(struct mlx5_ib_dev *dev)
 {
-	struct mlx5_eq_table *table = &dev->mdev.priv.eq_table;
+	struct mlx5_eq_table *table = &dev->mdev->priv.eq_table;
 	struct mlx5_eq *eq, *n;
 
 	spin_lock(&table->lock);
 	list_for_each_entry_safe(eq, n, &dev->eqs_list, list) {
 		list_del(&eq->list);
 		spin_unlock(&table->lock);
-		if (mlx5_destroy_unmap_eq(&dev->mdev, eq))
+		if (mlx5_destroy_unmap_eq(dev->mdev, eq))
 			mlx5_ib_warn(dev, "failed to destroy EQ 0x%x\n", eq->eqn);
 		kfree(eq);
 		spin_lock(&table->lock);
@@ -255,14 +176,14 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
 
 	memset(props, 0, sizeof(*props));
 
-	props->fw_ver = ((u64)fw_rev_maj(&dev->mdev) << 32) |
-		(fw_rev_min(&dev->mdev) << 16) |
-		fw_rev_sub(&dev->mdev);
+	props->fw_ver = ((u64)fw_rev_maj(dev->mdev) << 32) |
+		(fw_rev_min(dev->mdev) << 16) |
+		fw_rev_sub(dev->mdev);
 	props->device_cap_flags    = IB_DEVICE_CHANGE_PHY_PORT |
 		IB_DEVICE_PORT_ACTIVE_EVENT		|
 		IB_DEVICE_SYS_IMAGE_GUID		|
 		IB_DEVICE_RC_RNR_NAK_GEN;
-	flags = dev->mdev.caps.flags;
+	flags = dev->mdev->caps.flags;
 	if (flags & MLX5_DEV_CAP_FLAG_BAD_PKEY_CNTR)
 		props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
 	if (flags & MLX5_DEV_CAP_FLAG_BAD_QKEY_CNTR)
@@ -292,30 +213,30 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
 	memcpy(&props->sys_image_guid, out_mad->data +	4, 8);
 
 	props->max_mr_size	   = ~0ull;
-	props->page_size_cap	   = dev->mdev.caps.min_page_sz;
-	props->max_qp		   = 1 << dev->mdev.caps.log_max_qp;
-	props->max_qp_wr	   = dev->mdev.caps.max_wqes;
-	max_rq_sg = dev->mdev.caps.max_rq_desc_sz / sizeof(struct mlx5_wqe_data_seg);
-	max_sq_sg = (dev->mdev.caps.max_sq_desc_sz - sizeof(struct mlx5_wqe_ctrl_seg)) /
+	props->page_size_cap	   = dev->mdev->caps.min_page_sz;
+	props->max_qp		   = 1 << dev->mdev->caps.log_max_qp;
+	props->max_qp_wr	   = dev->mdev->caps.max_wqes;
+	max_rq_sg = dev->mdev->caps.max_rq_desc_sz / sizeof(struct mlx5_wqe_data_seg);
+	max_sq_sg = (dev->mdev->caps.max_sq_desc_sz - sizeof(struct mlx5_wqe_ctrl_seg)) /
 		sizeof(struct mlx5_wqe_data_seg);
 	props->max_sge = min(max_rq_sg, max_sq_sg);
-	props->max_cq		   = 1 << dev->mdev.caps.log_max_cq;
-	props->max_cqe		   = dev->mdev.caps.max_cqes - 1;
-	props->max_mr		   = 1 << dev->mdev.caps.log_max_mkey;
-	props->max_pd		   = 1 << dev->mdev.caps.log_max_pd;
-	props->max_qp_rd_atom	   = dev->mdev.caps.max_ra_req_qp;
-	props->max_qp_init_rd_atom = dev->mdev.caps.max_ra_res_qp;
+	props->max_cq		   = 1 << dev->mdev->caps.log_max_cq;
+	props->max_cqe		   = dev->mdev->caps.max_cqes - 1;
+	props->max_mr		   = 1 << dev->mdev->caps.log_max_mkey;
+	props->max_pd		   = 1 << dev->mdev->caps.log_max_pd;
+	props->max_qp_rd_atom	   = dev->mdev->caps.max_ra_req_qp;
+	props->max_qp_init_rd_atom = dev->mdev->caps.max_ra_res_qp;
 	props->max_res_rd_atom	   = props->max_qp_rd_atom * props->max_qp;
-	props->max_srq		   = 1 << dev->mdev.caps.log_max_srq;
-	props->max_srq_wr	   = dev->mdev.caps.max_srq_wqes - 1;
+	props->max_srq		   = 1 << dev->mdev->caps.log_max_srq;
+	props->max_srq_wr	   = dev->mdev->caps.max_srq_wqes - 1;
 	props->max_srq_sge	   = max_rq_sg - 1;
 	props->max_fast_reg_page_list_len = (unsigned int)-1;
-	props->local_ca_ack_delay  = dev->mdev.caps.local_ca_ack_delay;
+	props->local_ca_ack_delay  = dev->mdev->caps.local_ca_ack_delay;
 	props->atomic_cap	   = IB_ATOMIC_NONE;
 	props->masked_atomic_cap   = IB_ATOMIC_NONE;
 	props->max_pkeys	   = be16_to_cpup((__be16 *)(out_mad->data + 28));
-	props->max_mcast_grp	   = 1 << dev->mdev.caps.log_max_mcg;
-	props->max_mcast_qp_attach = dev->mdev.caps.max_qp_mcg;
+	props->max_mcast_grp	   = 1 << dev->mdev->caps.log_max_mcg;
+	props->max_mcast_qp_attach = dev->mdev->caps.max_qp_mcg;
 	props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
 					   props->max_mcast_grp;
 	props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
@@ -336,7 +257,7 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
 	int ext_active_speed;
 	int err = -ENOMEM;
 
-	if (port < 1 || port > dev->mdev.caps.num_ports) {
+	if (port < 1 || port > dev->mdev->caps.num_ports) {
 		mlx5_ib_warn(dev, "invalid port number %d\n", port);
 		return -EINVAL;
 	}
@@ -367,8 +288,8 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
 	props->phys_state	= out_mad->data[33] >> 4;
 	props->port_cap_flags	= be32_to_cpup((__be32 *)(out_mad->data + 20));
 	props->gid_tbl_len	= out_mad->data[50];
-	props->max_msg_sz	= 1 << to_mdev(ibdev)->mdev.caps.log_max_msg;
-	props->pkey_tbl_len	= to_mdev(ibdev)->mdev.caps.port[port - 1].pkey_table_len;
+	props->max_msg_sz	= 1 << to_mdev(ibdev)->mdev->caps.log_max_msg;
+	props->pkey_tbl_len	= to_mdev(ibdev)->mdev->caps.port[port - 1].pkey_table_len;
 	props->bad_pkey_cntr	= be16_to_cpup((__be16 *)(out_mad->data + 46));
 	props->qkey_viol_cntr	= be16_to_cpup((__be16 *)(out_mad->data + 48));
 	props->active_width	= out_mad->data[31] & 0xf;
@@ -395,7 +316,7 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
 
 	/* If reported active speed is QDR, check if is FDR-10 */
 	if (props->active_speed == 4) {
-		if (dev->mdev.caps.ext_port_cap[port - 1] &
+		if (dev->mdev->caps.ext_port_cap[port - 1] &
 		    MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO) {
 			init_query_mad(in_mad);
 			in_mad->attr_id = MLX5_ATTR_EXTENDED_PORT_INFO;
@@ -508,7 +429,7 @@ static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask,
 	 * a 144 trap.  If cmd fails, just ignore.
 	 */
 	memcpy(&in, props->node_desc, 64);
-	err = mlx5_core_access_reg(&dev->mdev, &in, sizeof(in), &out,
+	err = mlx5_core_access_reg(dev->mdev, &in, sizeof(in), &out,
 				   sizeof(out), MLX5_REG_NODE_DESC, 0, 1);
 	if (err)
 		return err;
@@ -535,7 +456,7 @@ static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
 	tmp = (attr.port_cap_flags | props->set_port_cap_mask) &
 		~props->clr_port_cap_mask;
 
-	err = mlx5_set_port_caps(&dev->mdev, port, tmp);
+	err = mlx5_set_port_caps(dev->mdev, port, tmp);
 
 out:
 	mutex_unlock(&dev->cap_mask_mutex);
@@ -591,14 +512,14 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
 
 	num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE;
 	gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE;
-	resp.qp_tab_size      = 1 << dev->mdev.caps.log_max_qp;
-	resp.bf_reg_size      = dev->mdev.caps.bf_reg_size;
+	resp.qp_tab_size      = 1 << dev->mdev->caps.log_max_qp;
+	resp.bf_reg_size      = dev->mdev->caps.bf_reg_size;
 	resp.cache_line_size  = L1_CACHE_BYTES;
-	resp.max_sq_desc_sz = dev->mdev.caps.max_sq_desc_sz;
-	resp.max_rq_desc_sz = dev->mdev.caps.max_rq_desc_sz;
-	resp.max_send_wqebb = dev->mdev.caps.max_wqes;
-	resp.max_recv_wr = dev->mdev.caps.max_wqes;
-	resp.max_srq_recv_wr = dev->mdev.caps.max_srq_wqes;
+	resp.max_sq_desc_sz = dev->mdev->caps.max_sq_desc_sz;
+	resp.max_rq_desc_sz = dev->mdev->caps.max_rq_desc_sz;
+	resp.max_send_wqebb = dev->mdev->caps.max_wqes;
+	resp.max_recv_wr = dev->mdev->caps.max_wqes;
+	resp.max_srq_recv_wr = dev->mdev->caps.max_srq_wqes;
 
 	context = kzalloc(sizeof(*context), GFP_KERNEL);
 	if (!context)
@@ -635,7 +556,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
 	}
 
 	for (i = 0; i < num_uars; i++) {
-		err = mlx5_cmd_alloc_uar(&dev->mdev, &uars[i].index);
+		err = mlx5_cmd_alloc_uar(dev->mdev, &uars[i].index);
 		if (err)
 			goto out_count;
 	}
@@ -644,7 +565,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
 	mutex_init(&context->db_page_mutex);
 
 	resp.tot_uuars = req.total_num_uuars;
-	resp.num_ports = dev->mdev.caps.num_ports;
+	resp.num_ports = dev->mdev->caps.num_ports;
 	err = ib_copy_to_udata(udata, &resp,
 			       sizeof(resp) - sizeof(resp.reserved));
 	if (err)
@@ -658,7 +579,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
 
 out_uars:
 	for (i--; i >= 0; i--)
-		mlx5_cmd_free_uar(&dev->mdev, uars[i].index);
+		mlx5_cmd_free_uar(dev->mdev, uars[i].index);
 out_count:
 	kfree(uuari->count);
 
@@ -681,7 +602,7 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
 	int i;
 
 	for (i = 0; i < uuari->num_uars; i++) {
-		if (mlx5_cmd_free_uar(&dev->mdev, uuari->uars[i].index))
+		if (mlx5_cmd_free_uar(dev->mdev, uuari->uars[i].index))
 			mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index);
 	}
 
@@ -695,7 +616,7 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
 
 static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, int index)
 {
-	return (pci_resource_start(dev->mdev.pdev, 0) >> PAGE_SHIFT) + index;
+	return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) + index;
 }
 
 static int get_command(unsigned long offset)
@@ -773,7 +694,7 @@ static int alloc_pa_mkey(struct mlx5_ib_dev *dev, u32 *key, u32 pdn)
 	seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
 	seg->start_addr = 0;
 
-	err = mlx5_core_create_mkey(&dev->mdev, &mr, in, sizeof(*in),
+	err = mlx5_core_create_mkey(dev->mdev, &mr, in, sizeof(*in),
 				    NULL, NULL, NULL);
 	if (err) {
 		mlx5_ib_warn(dev, "failed to create mkey, %d\n", err);
@@ -798,7 +719,7 @@ static void free_pa_mkey(struct mlx5_ib_dev *dev, u32 key)
 
 	memset(&mr, 0, sizeof(mr));
 	mr.key = key;
-	err = mlx5_core_destroy_mkey(&dev->mdev, &mr);
+	err = mlx5_core_destroy_mkey(dev->mdev, &mr);
 	if (err)
 		mlx5_ib_warn(dev, "failed to destroy mkey 0x%x\n", key);
 }
@@ -815,7 +736,7 @@ static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
 	if (!pd)
 		return ERR_PTR(-ENOMEM);
 
-	err = mlx5_core_alloc_pd(&to_mdev(ibdev)->mdev, &pd->pdn);
+	err = mlx5_core_alloc_pd(to_mdev(ibdev)->mdev, &pd->pdn);
 	if (err) {
 		kfree(pd);
 		return ERR_PTR(err);
@@ -824,14 +745,14 @@ static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
 	if (context) {
 		resp.pdn = pd->pdn;
 		if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
-			mlx5_core_dealloc_pd(&to_mdev(ibdev)->mdev, pd->pdn);
+			mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn);
 			kfree(pd);
 			return ERR_PTR(-EFAULT);
 		}
 	} else {
 		err = alloc_pa_mkey(to_mdev(ibdev), &pd->pa_lkey, pd->pdn);
 		if (err) {
-			mlx5_core_dealloc_pd(&to_mdev(ibdev)->mdev, pd->pdn);
+			mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn);
 			kfree(pd);
 			return ERR_PTR(err);
 		}
@@ -848,7 +769,7 @@ static int mlx5_ib_dealloc_pd(struct ib_pd *pd)
 	if (!pd->uobject)
 		free_pa_mkey(mdev, mpd->pa_lkey);
 
-	mlx5_core_dealloc_pd(&mdev->mdev, mpd->pdn);
+	mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn);
 	kfree(mpd);
 
 	return 0;
@@ -859,7 +780,7 @@ static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 	struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
 	int err;
 
-	err = mlx5_core_attach_mcg(&dev->mdev, gid, ibqp->qp_num);
+	err = mlx5_core_attach_mcg(dev->mdev, gid, ibqp->qp_num);
 	if (err)
 		mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n",
 			     ibqp->qp_num, gid->raw);
@@ -872,7 +793,7 @@ static int mlx5_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 	struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
 	int err;
 
-	err = mlx5_core_detach_mcg(&dev->mdev, gid, ibqp->qp_num);
+	err = mlx5_core_detach_mcg(dev->mdev, gid, ibqp->qp_num);
 	if (err)
 		mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n",
 			     ibqp->qp_num, gid->raw);
@@ -906,7 +827,7 @@ static int init_node_data(struct mlx5_ib_dev *dev)
 	if (err)
 		goto out;
 
-	dev->mdev.rev_id = be32_to_cpup((__be32 *)(out_mad->data + 32));
+	dev->mdev->rev_id = be32_to_cpup((__be32 *)(out_mad->data + 32));
 	memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8);
 
 out:
@@ -921,7 +842,7 @@ static ssize_t show_fw_pages(struct device *device, struct device_attribute *att
 	struct mlx5_ib_dev *dev =
 		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
 
-	return sprintf(buf, "%d\n", dev->mdev.priv.fw_pages);
+	return sprintf(buf, "%d\n", dev->mdev->priv.fw_pages);
 }
 
 static ssize_t show_reg_pages(struct device *device,
@@ -930,7 +851,7 @@ static ssize_t show_reg_pages(struct device *device,
 	struct mlx5_ib_dev *dev =
 		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
 
-	return sprintf(buf, "%d\n", dev->mdev.priv.reg_pages);
+	return sprintf(buf, "%d\n", dev->mdev->priv.reg_pages);
 }
 
 static ssize_t show_hca(struct device *device, struct device_attribute *attr,
@@ -938,7 +859,7 @@ static ssize_t show_hca(struct device *device, struct device_attribute *attr,
 {
 	struct mlx5_ib_dev *dev =
 		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
-	return sprintf(buf, "MT%d\n", dev->mdev.pdev->device);
+	return sprintf(buf, "MT%d\n", dev->mdev->pdev->device);
 }
 
 static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
@@ -946,8 +867,8 @@ static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
 {
 	struct mlx5_ib_dev *dev =
 		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
-	return sprintf(buf, "%d.%d.%d\n", fw_rev_maj(&dev->mdev),
-		       fw_rev_min(&dev->mdev), fw_rev_sub(&dev->mdev));
+	return sprintf(buf, "%d.%d.%d\n", fw_rev_maj(dev->mdev),
+		       fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev));
 }
 
 static ssize_t show_rev(struct device *device, struct device_attribute *attr,
@@ -955,7 +876,7 @@ static ssize_t show_rev(struct device *device, struct device_attribute *attr,
 {
 	struct mlx5_ib_dev *dev =
 		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
-	return sprintf(buf, "%x\n", dev->mdev.rev_id);
+	return sprintf(buf, "%x\n", dev->mdev->rev_id);
 }
 
 static ssize_t show_board(struct device *device, struct device_attribute *attr,
@@ -964,7 +885,7 @@ static ssize_t show_board(struct device *device, struct device_attribute *attr,
 	struct mlx5_ib_dev *dev =
 		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
 	return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN,
-		       dev->mdev.board_id);
+		       dev->mdev->board_id);
 }
 
 static DEVICE_ATTR(hw_rev,   S_IRUGO, show_rev,    NULL);
@@ -983,11 +904,12 @@ static struct device_attribute *mlx5_class_attributes[] = {
 	&dev_attr_reg_pages,
 };
 
-static void mlx5_ib_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
-			  void *data)
+static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
+			  enum mlx5_dev_event event, void *data)
 {
-	struct mlx5_ib_dev *ibdev = container_of(dev, struct mlx5_ib_dev, mdev);
+	struct mlx5_ib_dev *ibdev = (struct mlx5_ib_dev *)context;
 	struct ib_event ibev;
+
 	u8 port = 0;
 
 	switch (event) {
@@ -1047,7 +969,7 @@ static void get_ext_port_caps(struct mlx5_ib_dev *dev)
 {
 	int port;
 
-	for (port = 1; port <= dev->mdev.caps.num_ports; port++)
+	for (port = 1; port <= dev->mdev->caps.num_ports; port++)
 		mlx5_query_ext_port_caps(dev, port);
 }
 
@@ -1072,14 +994,14 @@ static int get_port_caps(struct mlx5_ib_dev *dev)
 		goto out;
 	}
 
-	for (port = 1; port <= dev->mdev.caps.num_ports; port++) {
+	for (port = 1; port <= dev->mdev->caps.num_ports; port++) {
 		err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
 		if (err) {
 			mlx5_ib_warn(dev, "query_port %d failed %d\n", port, err);
 			break;
 		}
-		dev->mdev.caps.port[port - 1].pkey_table_len = dprops->max_pkeys;
-		dev->mdev.caps.port[port - 1].gid_table_len = pprops->gid_tbl_len;
+		dev->mdev->caps.port[port - 1].pkey_table_len = dprops->max_pkeys;
+		dev->mdev->caps.port[port - 1].gid_table_len = pprops->gid_tbl_len;
 		mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n",
 			    dprops->max_pkeys, pprops->gid_tbl_len);
 	}
@@ -1328,10 +1250,8 @@ static void destroy_dev_resources(struct mlx5_ib_resources *devr)
 	mlx5_ib_dealloc_pd(devr->p0);
 }
 
-static int init_one(struct pci_dev *pdev,
-		    const struct pci_device_id *id)
+static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 {
-	struct mlx5_core_dev *mdev;
 	struct mlx5_ib_dev *dev;
 	int err;
 	int i;
@@ -1340,28 +1260,19 @@ static int init_one(struct pci_dev *pdev,
 
 	dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
 	if (!dev)
-		return -ENOMEM;
+		return NULL;
 
-	mdev = &dev->mdev;
-	mdev->event = mlx5_ib_event;
-	if (prof_sel >= ARRAY_SIZE(profile)) {
-		pr_warn("selected pofile out of range, selceting default\n");
-		prof_sel = 0;
-	}
-	mdev->profile = &profile[prof_sel];
-	err = mlx5_dev_init(mdev, pdev);
-	if (err)
-		goto err_free;
+	dev->mdev = mdev;
 
 	err = get_port_caps(dev);
 	if (err)
-		goto err_cleanup;
+		goto err_dealloc;
 
 	get_ext_port_caps(dev);
 
 	err = alloc_comp_eqs(dev);
 	if (err)
-		goto err_cleanup;
+		goto err_dealloc;
 
 	MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock);
 
@@ -1480,7 +1391,7 @@ static int init_one(struct pci_dev *pdev,
 
 	dev->ib_active = true;
 
-	return 0;
+	return dev;
 
 err_umrc:
 	destroy_umrc_res(dev);
@@ -1494,49 +1405,39 @@ err_rsrc:
 err_eqs:
 	free_comp_eqs(dev);
 
-err_cleanup:
-	mlx5_dev_cleanup(mdev);
-
-err_free:
+err_dealloc:
 	ib_dealloc_device((struct ib_device *)dev);
 
-	return err;
+	return NULL;
 }
 
-static void remove_one(struct pci_dev *pdev)
+static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
 {
-	struct mlx5_ib_dev *dev = mlx5_pci2ibdev(pdev);
-
+	struct mlx5_ib_dev *dev = context;
 	destroy_umrc_res(dev);
 	ib_unregister_device(&dev->ib_dev);
 	destroy_dev_resources(&dev->devr);
 	free_comp_eqs(dev);
-	mlx5_dev_cleanup(&dev->mdev);
 	ib_dealloc_device(&dev->ib_dev);
 }
 
-static DEFINE_PCI_DEVICE_TABLE(mlx5_ib_pci_table) = {
-	{ PCI_VDEVICE(MELLANOX, 4113) }, /* MT4113 Connect-IB */
-	{ 0, }
-};
-
-MODULE_DEVICE_TABLE(pci, mlx5_ib_pci_table);
-
-static struct pci_driver mlx5_ib_driver = {
-	.name		= DRIVER_NAME,
-	.id_table	= mlx5_ib_pci_table,
-	.probe		= init_one,
-	.remove		= remove_one
+static struct mlx5_interface mlx5_ib_interface = {
+	.add            = mlx5_ib_add,
+	.remove         = mlx5_ib_remove,
+	.event          = mlx5_ib_event,
 };
 
 static int __init mlx5_ib_init(void)
 {
-	return pci_register_driver(&mlx5_ib_driver);
+	if (deprecated_prof_sel != 2)
+		pr_warn("prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n");
+
+	return mlx5_register_interface(&mlx5_ib_interface);
 }
 
 static void __exit mlx5_ib_cleanup(void)
 {
-	pci_unregister_driver(&mlx5_ib_driver);
+	mlx5_unregister_interface(&mlx5_ib_interface);
 }
 
 module_init(mlx5_ib_init);
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index f2ccf1a5a291..a0e204ffe367 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -360,7 +360,7 @@ struct mlx5_ib_resources {
 
 struct mlx5_ib_dev {
 	struct ib_device		ib_dev;
-	struct mlx5_core_dev		mdev;
+	struct mlx5_core_dev		*mdev;
 	MLX5_DECLARE_DOORBELL_LOCK(uar_lock);
 	struct list_head		eqs_list;
 	int				num_ports;
@@ -454,16 +454,6 @@ static inline struct mlx5_ib_ah *to_mah(struct ib_ah *ibah)
 	return container_of(ibah, struct mlx5_ib_ah, ibah);
 }
 
-static inline struct mlx5_ib_dev *mlx5_core2ibdev(struct mlx5_core_dev *dev)
-{
-	return container_of(dev, struct mlx5_ib_dev, mdev);
-}
-
-static inline struct mlx5_ib_dev *mlx5_pci2ibdev(struct pci_dev *pdev)
-{
-	return mlx5_core2ibdev(pci2mlx5_core_dev(pdev));
-}
-
 int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
 			struct mlx5_db *db);
 void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db);
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index afa873bd028e..80b3c63eab5d 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -73,7 +73,7 @@ static void reg_mr_callback(int status, void *context)
 	struct mlx5_cache_ent *ent = &cache->ent[c];
 	u8 key;
 	unsigned long flags;
-	struct mlx5_mr_table *table = &dev->mdev.priv.mr_table;
+	struct mlx5_mr_table *table = &dev->mdev->priv.mr_table;
 	int err;
 
 	spin_lock_irqsave(&ent->lock, flags);
@@ -97,9 +97,9 @@ static void reg_mr_callback(int status, void *context)
 		return;
 	}
 
-	spin_lock_irqsave(&dev->mdev.priv.mkey_lock, flags);
-	key = dev->mdev.priv.mkey_key++;
-	spin_unlock_irqrestore(&dev->mdev.priv.mkey_lock, flags);
+	spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags);
+	key = dev->mdev->priv.mkey_key++;
+	spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags);
 	mr->mmr.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key;
 
 	cache->last_add = jiffies;
@@ -155,7 +155,7 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
 		spin_lock_irq(&ent->lock);
 		ent->pending++;
 		spin_unlock_irq(&ent->lock);
-		err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in,
+		err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in,
 					    sizeof(*in), reg_mr_callback,
 					    mr, &mr->out);
 		if (err) {
@@ -188,7 +188,7 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
 		ent->cur--;
 		ent->size--;
 		spin_unlock_irq(&ent->lock);
-		err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
+		err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
 		if (err)
 			mlx5_ib_warn(dev, "failed destroy mkey\n");
 		else
@@ -479,7 +479,7 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
 		ent->cur--;
 		ent->size--;
 		spin_unlock_irq(&ent->lock);
-		err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
+		err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
 		if (err)
 			mlx5_ib_warn(dev, "failed destroy mkey\n");
 		else
@@ -496,7 +496,7 @@ static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
 	if (!mlx5_debugfs_root)
 		return 0;
 
-	cache->root = debugfs_create_dir("mr_cache", dev->mdev.priv.dbg_root);
+	cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root);
 	if (!cache->root)
 		return -ENOMEM;
 
@@ -571,8 +571,8 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
 		ent->order = i + 2;
 		ent->dev = dev;
 
-		if (dev->mdev.profile->mask & MLX5_PROF_MASK_MR_CACHE)
-			limit = dev->mdev.profile->mr_cache[i].limit;
+		if (dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE)
+			limit = dev->mdev->profile->mr_cache[i].limit;
 		else
 			limit = 0;
 
@@ -610,7 +610,7 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
 struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
 {
 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
-	struct mlx5_core_dev *mdev = &dev->mdev;
+	struct mlx5_core_dev *mdev = dev->mdev;
 	struct mlx5_create_mkey_mbox_in *in;
 	struct mlx5_mkey_seg *seg;
 	struct mlx5_ib_mr *mr;
@@ -846,7 +846,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
 	in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
 	in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length,
 							 1 << page_shift));
-	err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, inlen, NULL,
+	err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, inlen, NULL,
 				    NULL, NULL);
 	if (err) {
 		mlx5_ib_warn(dev, "create mkey failed\n");
@@ -923,7 +923,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 	mr->umem = umem;
 	mr->npages = npages;
 	spin_lock(&dev->mr_lock);
-	dev->mdev.priv.reg_pages += npages;
+	dev->mdev->priv.reg_pages += npages;
 	spin_unlock(&dev->mr_lock);
 	mr->ibmr.lkey = mr->mmr.key;
 	mr->ibmr.rkey = mr->mmr.key;
@@ -978,7 +978,7 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
 	int err;
 
 	if (!umred) {
-		err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
+		err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
 		if (err) {
 			mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
 				     mr->mmr.key, err);
@@ -996,7 +996,7 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
 	if (umem) {
 		ib_umem_release(umem);
 		spin_lock(&dev->mr_lock);
-		dev->mdev.priv.reg_pages -= npages;
+		dev->mdev->priv.reg_pages -= npages;
 		spin_unlock(&dev->mr_lock);
 	}
 
@@ -1044,7 +1044,7 @@ struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd,
 		}
 
 		/* create mem & wire PSVs */
-		err = mlx5_core_create_psv(&dev->mdev, to_mpd(pd)->pdn,
+		err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn,
 					   2, psv_index);
 		if (err)
 			goto err_free_sig;
@@ -1060,7 +1060,7 @@ struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd,
 	}
 
 	in->seg.flags = MLX5_PERM_UMR_EN | access_mode;
-	err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in),
+	err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, sizeof(*in),
 				    NULL, NULL, NULL);
 	if (err)
 		goto err_destroy_psv;
@@ -1074,11 +1074,11 @@ struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd,
 
 err_destroy_psv:
 	if (mr->sig) {
-		if (mlx5_core_destroy_psv(&dev->mdev,
+		if (mlx5_core_destroy_psv(dev->mdev,
 					  mr->sig->psv_memory.psv_idx))
 			mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
 				     mr->sig->psv_memory.psv_idx);
-		if (mlx5_core_destroy_psv(&dev->mdev,
+		if (mlx5_core_destroy_psv(dev->mdev,
 					  mr->sig->psv_wire.psv_idx))
 			mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
 				     mr->sig->psv_wire.psv_idx);
@@ -1099,18 +1099,18 @@ int mlx5_ib_destroy_mr(struct ib_mr *ibmr)
 	int err;
 
 	if (mr->sig) {
-		if (mlx5_core_destroy_psv(&dev->mdev,
+		if (mlx5_core_destroy_psv(dev->mdev,
 					  mr->sig->psv_memory.psv_idx))
 			mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
 				     mr->sig->psv_memory.psv_idx);
-		if (mlx5_core_destroy_psv(&dev->mdev,
+		if (mlx5_core_destroy_psv(dev->mdev,
 					  mr->sig->psv_wire.psv_idx))
 			mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
 				     mr->sig->psv_wire.psv_idx);
 		kfree(mr->sig);
 	}
 
-	err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
+	err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
 	if (err) {
 		mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
 			     mr->mmr.key, err);
@@ -1149,7 +1149,7 @@ struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
 	 * TBD not needed - issue 197292 */
 	in->seg.log2_page_size = PAGE_SHIFT;
 
-	err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in), NULL,
+	err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, sizeof(*in), NULL,
 				    NULL, NULL);
 	kfree(in);
 	if (err)
@@ -1202,7 +1202,7 @@ void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
 	struct mlx5_ib_dev *dev = to_mdev(page_list->device);
 	int size = page_list->max_page_list_len * sizeof(u64);
 
-	dma_free_coherent(&dev->mdev.pdev->dev, size, mfrpl->mapped_page_list,
+	dma_free_coherent(&dev->mdev->pdev->dev, size, mfrpl->mapped_page_list,
 			  mfrpl->map);
 	kfree(mfrpl->ibfrpl.page_list);
 	kfree(mfrpl);
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index bbbcf389272c..b8bb6ad6350c 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -162,7 +162,7 @@ static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap,
 	int wq_size;
 
 	/* Sanity check RQ size before proceeding */
-	if (cap->max_recv_wr  > dev->mdev.caps.max_wqes)
+	if (cap->max_recv_wr  > dev->mdev->caps.max_wqes)
 		return -EINVAL;
 
 	if (!has_rq) {
@@ -182,10 +182,10 @@ static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap,
 			wq_size = roundup_pow_of_two(cap->max_recv_wr) * wqe_size;
 			wq_size = max_t(int, wq_size, MLX5_SEND_WQE_BB);
 			qp->rq.wqe_cnt = wq_size / wqe_size;
-			if (wqe_size > dev->mdev.caps.max_rq_desc_sz) {
+			if (wqe_size > dev->mdev->caps.max_rq_desc_sz) {
 				mlx5_ib_dbg(dev, "wqe_size %d, max %d\n",
 					    wqe_size,
-					    dev->mdev.caps.max_rq_desc_sz);
+					    dev->mdev->caps.max_rq_desc_sz);
 				return -EINVAL;
 			}
 			qp->rq.wqe_shift = ilog2(wqe_size);
@@ -277,9 +277,9 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
 	if (wqe_size < 0)
 		return wqe_size;
 
-	if (wqe_size > dev->mdev.caps.max_sq_desc_sz) {
+	if (wqe_size > dev->mdev->caps.max_sq_desc_sz) {
 		mlx5_ib_dbg(dev, "wqe_size(%d) > max_sq_desc_sz(%d)\n",
-			    wqe_size, dev->mdev.caps.max_sq_desc_sz);
+			    wqe_size, dev->mdev->caps.max_sq_desc_sz);
 		return -EINVAL;
 	}
 
@@ -292,9 +292,9 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
 
 	wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size);
 	qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB;
-	if (qp->sq.wqe_cnt > dev->mdev.caps.max_wqes) {
+	if (qp->sq.wqe_cnt > dev->mdev->caps.max_wqes) {
 		mlx5_ib_dbg(dev, "wqe count(%d) exceeds limits(%d)\n",
-			    qp->sq.wqe_cnt, dev->mdev.caps.max_wqes);
+			    qp->sq.wqe_cnt, dev->mdev->caps.max_wqes);
 		return -ENOMEM;
 	}
 	qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
@@ -311,9 +311,9 @@ static int set_user_buf_size(struct mlx5_ib_dev *dev,
 {
 	int desc_sz = 1 << qp->sq.wqe_shift;
 
-	if (desc_sz > dev->mdev.caps.max_sq_desc_sz) {
+	if (desc_sz > dev->mdev->caps.max_sq_desc_sz) {
 		mlx5_ib_warn(dev, "desc_sz %d, max_sq_desc_sz %d\n",
-			     desc_sz, dev->mdev.caps.max_sq_desc_sz);
+			     desc_sz, dev->mdev->caps.max_sq_desc_sz);
 		return -EINVAL;
 	}
 
@@ -325,9 +325,9 @@ static int set_user_buf_size(struct mlx5_ib_dev *dev,
 
 	qp->sq.wqe_cnt = ucmd->sq_wqe_count;
 
-	if (qp->sq.wqe_cnt > dev->mdev.caps.max_wqes) {
+	if (qp->sq.wqe_cnt > dev->mdev->caps.max_wqes) {
 		mlx5_ib_warn(dev, "wqe_cnt %d, max_wqes %d\n",
-			     qp->sq.wqe_cnt, dev->mdev.caps.max_wqes);
+			     qp->sq.wqe_cnt, dev->mdev->caps.max_wqes);
 		return -EINVAL;
 	}
 
@@ -674,7 +674,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
 	int uuarn;
 	int err;
 
-	uuari = &dev->mdev.priv.uuari;
+	uuari = &dev->mdev->priv.uuari;
 	if (init_attr->create_flags & ~(IB_QP_CREATE_SIGNATURE_EN | IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK))
 		return -EINVAL;
 
@@ -700,7 +700,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
 	qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
 	qp->buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift);
 
-	err = mlx5_buf_alloc(&dev->mdev, qp->buf_size, PAGE_SIZE * 2, &qp->buf);
+	err = mlx5_buf_alloc(dev->mdev, qp->buf_size, PAGE_SIZE * 2, &qp->buf);
 	if (err) {
 		mlx5_ib_dbg(dev, "err %d\n", err);
 		goto err_uuar;
@@ -722,7 +722,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
 
 	mlx5_fill_page_array(&qp->buf, (*in)->pas);
 
-	err = mlx5_db_alloc(&dev->mdev, &qp->db);
+	err = mlx5_db_alloc(dev->mdev, &qp->db);
 	if (err) {
 		mlx5_ib_dbg(dev, "err %d\n", err);
 		goto err_free;
@@ -747,7 +747,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
 	return 0;
 
 err_wrid:
-	mlx5_db_free(&dev->mdev, &qp->db);
+	mlx5_db_free(dev->mdev, &qp->db);
 	kfree(qp->sq.wqe_head);
 	kfree(qp->sq.w_list);
 	kfree(qp->sq.wrid);
@@ -758,23 +758,23 @@ err_free:
 	mlx5_vfree(*in);
 
 err_buf:
-	mlx5_buf_free(&dev->mdev, &qp->buf);
+	mlx5_buf_free(dev->mdev, &qp->buf);
 
 err_uuar:
-	free_uuar(&dev->mdev.priv.uuari, uuarn);
+	free_uuar(&dev->mdev->priv.uuari, uuarn);
 	return err;
 }
 
 static void destroy_qp_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
 {
-	mlx5_db_free(&dev->mdev, &qp->db);
+	mlx5_db_free(dev->mdev, &qp->db);
 	kfree(qp->sq.wqe_head);
 	kfree(qp->sq.w_list);
 	kfree(qp->sq.wrid);
 	kfree(qp->sq.wr_data);
 	kfree(qp->rq.wrid);
-	mlx5_buf_free(&dev->mdev, &qp->buf);
-	free_uuar(&dev->mdev.priv.uuari, qp->bf->uuarn);
+	mlx5_buf_free(dev->mdev, &qp->buf);
+	free_uuar(&dev->mdev->priv.uuari, qp->bf->uuarn);
 }
 
 static __be32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr)
@@ -812,7 +812,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 	spin_lock_init(&qp->rq.lock);
 
 	if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
-		if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_BLOCK_MCAST)) {
+		if (!(dev->mdev->caps.flags & MLX5_DEV_CAP_FLAG_BLOCK_MCAST)) {
 			mlx5_ib_dbg(dev, "block multicast loopback isn't supported\n");
 			return -EINVAL;
 		} else {
@@ -851,9 +851,9 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 				mlx5_ib_dbg(dev, "invalid rq params\n");
 				return -EINVAL;
 			}
-			if (ucmd.sq_wqe_count > dev->mdev.caps.max_wqes) {
+			if (ucmd.sq_wqe_count > dev->mdev->caps.max_wqes) {
 				mlx5_ib_dbg(dev, "requested sq_wqe_count (%d) > max allowed (%d)\n",
-					    ucmd.sq_wqe_count, dev->mdev.caps.max_wqes);
+					    ucmd.sq_wqe_count, dev->mdev->caps.max_wqes);
 				return -EINVAL;
 			}
 			err = create_user_qp(dev, pd, qp, udata, &in, &resp, &inlen);
@@ -957,7 +957,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 
 	in->ctx.db_rec_addr = cpu_to_be64(qp->db.dma);
 
-	err = mlx5_core_create_qp(&dev->mdev, &qp->mqp, in, inlen);
+	err = mlx5_core_create_qp(dev->mdev, &qp->mqp, in, inlen);
 	if (err) {
 		mlx5_ib_dbg(dev, "create qp failed\n");
 		goto err_create;
@@ -1081,7 +1081,7 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
 	if (!in)
 		return;
 	if (qp->state != IB_QPS_RESET)
-		if (mlx5_core_qp_modify(&dev->mdev, to_mlx5_state(qp->state),
+		if (mlx5_core_qp_modify(dev->mdev, to_mlx5_state(qp->state),
 					MLX5_QP_STATE_RST, in, sizeof(*in), &qp->mqp))
 			mlx5_ib_warn(dev, "mlx5_ib: modify QP %06x to RESET failed\n",
 				     qp->mqp.qpn);
@@ -1097,7 +1097,7 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
 		mlx5_ib_unlock_cqs(send_cq, recv_cq);
 	}
 
-	err = mlx5_core_destroy_qp(&dev->mdev, &qp->mqp);
+	err = mlx5_core_destroy_qp(dev->mdev, &qp->mqp);
 	if (err)
 		mlx5_ib_warn(dev, "failed to destroy QP 0x%x\n", qp->mqp.qpn);
 	kfree(in);
@@ -1165,7 +1165,7 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
 	switch (init_attr->qp_type) {
 	case IB_QPT_XRC_TGT:
 	case IB_QPT_XRC_INI:
-		if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_XRC)) {
+		if (!(dev->mdev->caps.flags & MLX5_DEV_CAP_FLAG_XRC)) {
 			mlx5_ib_dbg(dev, "XRC not supported\n");
 			return ERR_PTR(-ENOSYS);
 		}
@@ -1279,7 +1279,7 @@ static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
 	} else {
 		while (rate != IB_RATE_2_5_GBPS &&
 		       !(1 << (rate + MLX5_STAT_RATE_OFFSET) &
-			 dev->mdev.caps.stat_rate_support))
+			 dev->mdev->caps.stat_rate_support))
 			--rate;
 	}
 
@@ -1318,9 +1318,9 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
 	path->port = port;
 
 	if (ah->ah_flags & IB_AH_GRH) {
-		if (ah->grh.sgid_index >= dev->mdev.caps.port[port - 1].gid_table_len) {
+		if (ah->grh.sgid_index >= dev->mdev->caps.port[port - 1].gid_table_len) {
 			pr_err(KERN_ERR "sgid_index (%u) too large. max is %d\n",
-			       ah->grh.sgid_index, dev->mdev.caps.port[port - 1].gid_table_len);
+			       ah->grh.sgid_index, dev->mdev->caps.port[port - 1].gid_table_len);
 			return -EINVAL;
 		}
 
@@ -1539,7 +1539,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
 			err = -EINVAL;
 			goto out;
 		}
-		context->mtu_msgmax = (attr->path_mtu << 5) | dev->mdev.caps.log_max_msg;
+		context->mtu_msgmax = (attr->path_mtu << 5) | dev->mdev->caps.log_max_msg;
 	}
 
 	if (attr_mask & IB_QP_DEST_QPN)
@@ -1637,7 +1637,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
 	optpar = ib_mask_to_mlx5_opt(attr_mask);
 	optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st];
 	in->optparam = cpu_to_be32(optpar);
-	err = mlx5_core_qp_modify(&dev->mdev, to_mlx5_state(cur_state),
+	err = mlx5_core_qp_modify(dev->mdev, to_mlx5_state(cur_state),
 				  to_mlx5_state(new_state), in, sqd_event,
 				  &qp->mqp);
 	if (err)
@@ -1699,21 +1699,21 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 		goto out;
 
 	if ((attr_mask & IB_QP_PORT) &&
-	    (attr->port_num == 0 || attr->port_num > dev->mdev.caps.num_ports))
+	    (attr->port_num == 0 || attr->port_num > dev->mdev->caps.num_ports))
 		goto out;
 
 	if (attr_mask & IB_QP_PKEY_INDEX) {
 		port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
-		if (attr->pkey_index >= dev->mdev.caps.port[port - 1].pkey_table_len)
+		if (attr->pkey_index >= dev->mdev->caps.port[port - 1].pkey_table_len)
 			goto out;
 	}
 
 	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
-	    attr->max_rd_atomic > dev->mdev.caps.max_ra_res_qp)
+	    attr->max_rd_atomic > dev->mdev->caps.max_ra_res_qp)
 		goto out;
 
 	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
-	    attr->max_dest_rd_atomic > dev->mdev.caps.max_ra_req_qp)
+	    attr->max_dest_rd_atomic > dev->mdev->caps.max_ra_req_qp)
 		goto out;
 
 	if (cur_state == new_state && cur_state == IB_QPS_RESET) {
@@ -2479,7 +2479,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 {
 	struct mlx5_wqe_ctrl_seg *ctrl = NULL;  /* compiler warning */
 	struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
-	struct mlx5_core_dev *mdev = &dev->mdev;
+	struct mlx5_core_dev *mdev = dev->mdev;
 	struct mlx5_ib_qp *qp = to_mqp(ibqp);
 	struct mlx5_ib_mr *mr;
 	struct mlx5_wqe_data_seg *dpseg;
@@ -2888,7 +2888,7 @@ static int to_ib_qp_access_flags(int mlx5_flags)
 static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_attr,
 				struct mlx5_qp_path *path)
 {
-	struct mlx5_core_dev *dev = &ibdev->mdev;
+	struct mlx5_core_dev *dev = ibdev->mdev;
 
 	memset(ib_ah_attr, 0, sizeof(*ib_ah_attr));
 	ib_ah_attr->port_num	  = path->port;
@@ -2931,7 +2931,7 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
 		goto out;
 	}
 	context = &outb->ctx;
-	err = mlx5_core_qp_query(&dev->mdev, &qp->mqp, outb, sizeof(*outb));
+	err = mlx5_core_qp_query(dev->mdev, &qp->mqp, outb, sizeof(*outb));
 	if (err)
 		goto out_free;
 
@@ -3014,14 +3014,14 @@ struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
 	struct mlx5_ib_xrcd *xrcd;
 	int err;
 
-	if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_XRC))
+	if (!(dev->mdev->caps.flags & MLX5_DEV_CAP_FLAG_XRC))
 		return ERR_PTR(-ENOSYS);
 
 	xrcd = kmalloc(sizeof(*xrcd), GFP_KERNEL);
 	if (!xrcd)
 		return ERR_PTR(-ENOMEM);
 
-	err = mlx5_core_xrcd_alloc(&dev->mdev, &xrcd->xrcdn);
+	err = mlx5_core_xrcd_alloc(dev->mdev, &xrcd->xrcdn);
 	if (err) {
 		kfree(xrcd);
 		return ERR_PTR(-ENOMEM);
@@ -3036,7 +3036,7 @@ int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
 	u32 xrcdn = to_mxrcd(xrcd)->xrcdn;
 	int err;
 
-	err = mlx5_core_xrcd_dealloc(&dev->mdev, xrcdn);
+	err = mlx5_core_xrcd_dealloc(dev->mdev, xrcdn);
 	if (err) {
 		mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn);
 		return err;
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 384af6dec5eb..70bd131ba646 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -159,7 +159,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
 	int page_shift;
 	int npages;
 
-	err = mlx5_db_alloc(&dev->mdev, &srq->db);
+	err = mlx5_db_alloc(dev->mdev, &srq->db);
 	if (err) {
 		mlx5_ib_warn(dev, "alloc dbell rec failed\n");
 		return err;
@@ -167,7 +167,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
 
 	*srq->db.db = 0;
 
-	if (mlx5_buf_alloc(&dev->mdev, buf_size, PAGE_SIZE * 2, &srq->buf)) {
+	if (mlx5_buf_alloc(dev->mdev, buf_size, PAGE_SIZE * 2, &srq->buf)) {
 		mlx5_ib_dbg(dev, "buf alloc failed\n");
 		err = -ENOMEM;
 		goto err_db;
@@ -212,10 +212,10 @@ err_in:
 	mlx5_vfree(*in);
 
 err_buf:
-	mlx5_buf_free(&dev->mdev, &srq->buf);
+	mlx5_buf_free(dev->mdev, &srq->buf);
 
 err_db:
-	mlx5_db_free(&dev->mdev, &srq->db);
+	mlx5_db_free(dev->mdev, &srq->db);
 	return err;
 }
 
@@ -229,8 +229,8 @@ static void destroy_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq)
 static void destroy_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq)
 {
 	kfree(srq->wrid);
-	mlx5_buf_free(&dev->mdev, &srq->buf);
-	mlx5_db_free(&dev->mdev, &srq->db);
+	mlx5_buf_free(dev->mdev, &srq->buf);
+	mlx5_db_free(dev->mdev, &srq->db);
 }
 
 struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
@@ -248,10 +248,10 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
 	u32 flgs, xrcdn;
 
 	/* Sanity check SRQ size before proceeding */
-	if (init_attr->attr.max_wr >= dev->mdev.caps.max_srq_wqes) {
+	if (init_attr->attr.max_wr >= dev->mdev->caps.max_srq_wqes) {
 		mlx5_ib_dbg(dev, "max_wr %d, cap %d\n",
 			    init_attr->attr.max_wr,
-			    dev->mdev.caps.max_srq_wqes);
+			    dev->mdev->caps.max_srq_wqes);
 		return ERR_PTR(-EINVAL);
 	}
 
@@ -303,7 +303,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
 
 	in->ctx.pd = cpu_to_be32(to_mpd(pd)->pdn);
 	in->ctx.db_record = cpu_to_be64(srq->db.dma);
-	err = mlx5_core_create_srq(&dev->mdev, &srq->msrq, in, inlen);
+	err = mlx5_core_create_srq(dev->mdev, &srq->msrq, in, inlen);
 	mlx5_vfree(in);
 	if (err) {
 		mlx5_ib_dbg(dev, "create SRQ failed, err %d\n", err);
@@ -327,7 +327,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
 	return &srq->ibsrq;
 
 err_core:
-	mlx5_core_destroy_srq(&dev->mdev, &srq->msrq);
+	mlx5_core_destroy_srq(dev->mdev, &srq->msrq);
 
 err_usr_kern_srq:
 	if (pd->uobject)
@@ -357,7 +357,7 @@ int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
 			return -EINVAL;
 
 		mutex_lock(&srq->mutex);
-		ret = mlx5_core_arm_srq(&dev->mdev, &srq->msrq, attr->srq_limit, 1);
+		ret = mlx5_core_arm_srq(dev->mdev, &srq->msrq, attr->srq_limit, 1);
 		mutex_unlock(&srq->mutex);
 
 		if (ret)
@@ -378,7 +378,7 @@ int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
 	if (!out)
 		return -ENOMEM;
 
-	ret = mlx5_core_query_srq(&dev->mdev, &srq->msrq, out);
+	ret = mlx5_core_query_srq(dev->mdev, &srq->msrq, out);
 	if (ret)
 		goto out_box;
 
@@ -396,7 +396,7 @@ int mlx5_ib_destroy_srq(struct ib_srq *srq)
 	struct mlx5_ib_dev *dev = to_mdev(srq->device);
 	struct mlx5_ib_srq *msrq = to_msrq(srq);
 
-	mlx5_core_destroy_srq(&dev->mdev, &msrq->msrq);
+	mlx5_core_destroy_srq(dev->mdev, &msrq->msrq);
 
 	if (srq->uobject) {
 		mlx5_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index ee24f132e319..4b7f9da4bf11 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -58,7 +58,100 @@ int mlx5_core_debug_mask;
 module_param_named(debug_mask, mlx5_core_debug_mask, int, 0644);
 MODULE_PARM_DESC(debug_mask, "debug mask: 1 = dump cmd data, 2 = dump cmd exec time, 3 = both. Default=0");
 
+#define MLX5_DEFAULT_PROF	2
+static int prof_sel = MLX5_DEFAULT_PROF;
+module_param_named(prof_sel, prof_sel, int, 0444);
+MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2");
+
 struct workqueue_struct *mlx5_core_wq;
+static LIST_HEAD(intf_list);
+static LIST_HEAD(dev_list);
+static DEFINE_MUTEX(intf_mutex);
+
+struct mlx5_device_context {
+	struct list_head	list;
+	struct mlx5_interface  *intf;
+	void		       *context;
+};
+
+static struct mlx5_profile profile[] = {
+	[0] = {
+		.mask           = 0,
+	},
+	[1] = {
+		.mask		= MLX5_PROF_MASK_QP_SIZE,
+		.log_max_qp	= 12,
+	},
+	[2] = {
+		.mask		= MLX5_PROF_MASK_QP_SIZE |
+				  MLX5_PROF_MASK_MR_CACHE,
+		.log_max_qp	= 17,
+		.mr_cache[0]	= {
+			.size	= 500,
+			.limit	= 250
+		},
+		.mr_cache[1]	= {
+			.size	= 500,
+			.limit	= 250
+		},
+		.mr_cache[2]	= {
+			.size	= 500,
+			.limit	= 250
+		},
+		.mr_cache[3]	= {
+			.size	= 500,
+			.limit	= 250
+		},
+		.mr_cache[4]	= {
+			.size	= 500,
+			.limit	= 250
+		},
+		.mr_cache[5]	= {
+			.size	= 500,
+			.limit	= 250
+		},
+		.mr_cache[6]	= {
+			.size	= 500,
+			.limit	= 250
+		},
+		.mr_cache[7]	= {
+			.size	= 500,
+			.limit	= 250
+		},
+		.mr_cache[8]	= {
+			.size	= 500,
+			.limit	= 250
+		},
+		.mr_cache[9]	= {
+			.size	= 500,
+			.limit	= 250
+		},
+		.mr_cache[10]	= {
+			.size	= 500,
+			.limit	= 250
+		},
+		.mr_cache[11]	= {
+			.size	= 500,
+			.limit	= 250
+		},
+		.mr_cache[12]	= {
+			.size	= 64,
+			.limit	= 32
+		},
+		.mr_cache[13]	= {
+			.size	= 32,
+			.limit	= 16
+		},
+		.mr_cache[14]	= {
+			.size	= 16,
+			.limit	= 8
+		},
+		.mr_cache[15]	= {
+			.size	= 8,
+			.limit	= 4
+		},
+	},
+};
 
 static int set_dma_caps(struct pci_dev *pdev)
 {
@@ -299,7 +392,7 @@ static int mlx5_core_disable_hca(struct mlx5_core_dev *dev)
 	return 0;
 }
 
-int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev)
+static int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev)
 {
 	struct mlx5_priv *priv = &dev->priv;
 	int err;
@@ -489,7 +582,7 @@ err_dbg:
 }
 EXPORT_SYMBOL(mlx5_dev_init);
 
-void mlx5_dev_cleanup(struct mlx5_core_dev *dev)
+static void mlx5_dev_cleanup(struct mlx5_core_dev *dev)
 {
 	struct mlx5_priv *priv = &dev->priv;
 
@@ -516,7 +609,190 @@ void mlx5_dev_cleanup(struct mlx5_core_dev *dev)
 	pci_disable_device(dev->pdev);
 	debugfs_remove(priv->dbg_root);
 }
-EXPORT_SYMBOL(mlx5_dev_cleanup);
+
+static void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
+{
+	struct mlx5_device_context *dev_ctx;
+	struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
+
+	dev_ctx = kmalloc(sizeof(*dev_ctx), GFP_KERNEL);
+	if (!dev_ctx) {
+		pr_warn("mlx5_add_device: alloc context failed\n");
+		return;
+	}
+
+	dev_ctx->intf    = intf;
+	dev_ctx->context = intf->add(dev);
+
+	if (dev_ctx->context) {
+		spin_lock_irq(&priv->ctx_lock);
+		list_add_tail(&dev_ctx->list, &priv->ctx_list);
+		spin_unlock_irq(&priv->ctx_lock);
+	} else {
+		kfree(dev_ctx);
+	}
+}
+
+static void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
+{
+	struct mlx5_device_context *dev_ctx;
+	struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
+
+	list_for_each_entry(dev_ctx, &priv->ctx_list, list)
+		if (dev_ctx->intf == intf) {
+			spin_lock_irq(&priv->ctx_lock);
+			list_del(&dev_ctx->list);
+			spin_unlock_irq(&priv->ctx_lock);
+
+			intf->remove(dev, dev_ctx->context);
+			kfree(dev_ctx);
+			return;
+		}
+}
+static int mlx5_register_device(struct mlx5_core_dev *dev)
+{
+	struct mlx5_priv *priv = &dev->priv;
+	struct mlx5_interface *intf;
+
+	mutex_lock(&intf_mutex);
+	list_add_tail(&priv->dev_list, &dev_list);
+	list_for_each_entry(intf, &intf_list, list)
+		mlx5_add_device(intf, priv);
+	mutex_unlock(&intf_mutex);
+
+	return 0;
+}
+static void mlx5_unregister_device(struct mlx5_core_dev *dev)
+{
+	struct mlx5_priv *priv = &dev->priv;
+	struct mlx5_interface *intf;
+
+	mutex_lock(&intf_mutex);
+	list_for_each_entry(intf, &intf_list, list)
+		mlx5_remove_device(intf, priv);
+	list_del(&priv->dev_list);
+	mutex_unlock(&intf_mutex);
+}
+
+int mlx5_register_interface(struct mlx5_interface *intf)
+{
+	struct mlx5_priv *priv;
+
+	if (!intf->add || !intf->remove)
+		return -EINVAL;
+
+	mutex_lock(&intf_mutex);
+	list_add_tail(&intf->list, &intf_list);
+	list_for_each_entry(priv, &dev_list, dev_list)
+		mlx5_add_device(intf, priv);
+	mutex_unlock(&intf_mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL(mlx5_register_interface);
+
+void mlx5_unregister_interface(struct mlx5_interface *intf)
+{
+	struct mlx5_priv *priv;
+
+	mutex_lock(&intf_mutex);
+	list_for_each_entry(priv, &dev_list, dev_list)
+	       mlx5_remove_device(intf, priv);
+	list_del(&intf->list);
+	mutex_unlock(&intf_mutex);
+}
+EXPORT_SYMBOL(mlx5_unregister_interface);
+
+static void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
+			    void *data)
+{
+	struct mlx5_priv *priv = &dev->priv;
+	struct mlx5_device_context *dev_ctx;
+	unsigned long flags;
+
+	spin_lock_irqsave(&priv->ctx_lock, flags);
+
+	list_for_each_entry(dev_ctx, &priv->ctx_list, list)
+		if (dev_ctx->intf->event)
+			dev_ctx->intf->event(dev, dev_ctx->context, event, data);
+
+	spin_unlock_irqrestore(&priv->ctx_lock, flags);
+}
+
+struct mlx5_core_event_handler {
+	void (*event)(struct mlx5_core_dev *dev,
+		      enum mlx5_dev_event event,
+		      void *data);
+};
+
+static int init_one(struct pci_dev *pdev,
+		    const struct pci_device_id *id)
+{
+	struct mlx5_core_dev *dev;
+	struct mlx5_priv *priv;
+	int err;
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev) {
+		dev_err(&pdev->dev, "kzalloc failed\n");
+		return -ENOMEM;
+	}
+	priv = &dev->priv;
+
+	pci_set_drvdata(pdev, dev);
+
+	if (prof_sel < 0 || prof_sel >= ARRAY_SIZE(profile)) {
+		pr_warn("selected profile out of range, selecting default (%d)\n",
+			MLX5_DEFAULT_PROF);
+		prof_sel = MLX5_DEFAULT_PROF;
+	}
+	dev->profile = &profile[prof_sel];
+	dev->event = mlx5_core_event;
+
+	err = mlx5_dev_init(dev, pdev);
+	if (err) {
+		dev_err(&pdev->dev, "mlx5_dev_init failed %d\n", err);
+		goto out;
+	}
+
+	INIT_LIST_HEAD(&priv->ctx_list);
+	spin_lock_init(&priv->ctx_lock);
+	err = mlx5_register_device(dev);
+	if (err) {
+		dev_err(&pdev->dev, "mlx5_register_device failed %d\n", err);
+		goto out_init;
+	}
+
+	return 0;
+
+out_init:
+	mlx5_dev_cleanup(dev);
+out:
+	kfree(dev);
+	return err;
+}
+static void remove_one(struct pci_dev *pdev)
+{
+	struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
+
+	mlx5_unregister_device(dev);
+	mlx5_dev_cleanup(dev);
+	kfree(dev);
+}
+
+static const struct pci_device_id mlx5_core_pci_table[] = {
+	{ PCI_VDEVICE(MELLANOX, 4113) }, /* MT4113 Connect-IB */
+	{ 0, }
+};
+
+MODULE_DEVICE_TABLE(pci, mlx5_core_pci_table);
+
+static struct pci_driver mlx5_core_driver = {
+	.name           = DRIVER_NAME,
+	.id_table       = mlx5_core_pci_table,
+	.probe          = init_one,
+	.remove         = remove_one
+};
 
 static int __init init(void)
 {
@@ -530,8 +806,15 @@ static int __init init(void)
 	}
 	mlx5_health_init();
 
+	err = pci_register_driver(&mlx5_core_driver);
+	if (err)
+		goto err_health;
+
 	return 0;
 
+err_health:
+	mlx5_health_cleanup();
+	destroy_workqueue(mlx5_core_wq);
 err_debug:
 	mlx5_unregister_debugfs();
 	return err;
@@ -539,6 +822,7 @@ err_debug:
 
 static void __exit cleanup(void)
 {
+	pci_unregister_driver(&mlx5_core_driver);
 	mlx5_health_cleanup();
 	destroy_workqueue(mlx5_core_wq);
 	mlx5_unregister_debugfs();
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 2bce4aad2570..d0cb5984a45f 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -543,6 +543,10 @@ struct mlx5_priv {
 	/* protect mkey key part */
 	spinlock_t		mkey_lock;
 	u8			mkey_key;
+
+	struct list_head        dev_list;
+	struct list_head        ctx_list;
+	spinlock_t              ctx_lock;
 };
 
 struct mlx5_core_dev {
@@ -686,8 +690,6 @@ static inline u32 mlx5_base_mkey(const u32 key)
 	return key & 0xffffff00u;
 }
 
-int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev);
-void mlx5_dev_cleanup(struct mlx5_core_dev *dev);
 int mlx5_cmd_init(struct mlx5_core_dev *dev);
 void mlx5_cmd_cleanup(struct mlx5_core_dev *dev);
 void mlx5_cmd_use_events(struct mlx5_core_dev *dev);
@@ -811,6 +813,17 @@ enum {
 	MAX_MR_CACHE_ENTRIES    = 16,
 };
 
+struct mlx5_interface {
+	void *			(*add)(struct mlx5_core_dev *dev);
+	void			(*remove)(struct mlx5_core_dev *dev, void *context);
+	void			(*event)(struct mlx5_core_dev *dev, void *context,
+					 enum mlx5_dev_event event, void *data);
+	struct list_head	list;
+};
+
+int mlx5_register_interface(struct mlx5_interface *intf);
+void mlx5_unregister_interface(struct mlx5_interface *intf);
+
 struct mlx5_profile {
 	u64	mask;
 	u32	log_max_qp;
-- 
cgit v1.2.3-59-g8ed1b


From f241e7497ec2d22b83002b17ae91a851d4034cb7 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Mon, 28 Jul 2014 23:30:23 +0300
Subject: mlx5: minor fixes (mainly avoidance of hidden casts)

There were many places where parameters which should be u8/u16 were
integer type.

Additionally, in 2 places, a check for a non-null pointer was added
before dereferencing the pointer (this is actually a bug fix).

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/mlx5/cq.c                     | 2 +-
 drivers/infiniband/hw/mlx5/mad.c                    | 2 +-
 drivers/infiniband/hw/mlx5/main.c                   | 2 +-
 drivers/infiniband/hw/mlx5/mem.c                    | 2 +-
 drivers/infiniband/hw/mlx5/mlx5_ib.h                | 2 +-
 drivers/infiniband/hw/mlx5/qp.c                     | 4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/alloc.c     | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c       | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/eq.c        | 3 ++-
 drivers/net/ethernet/mellanox/mlx5/core/mad.c       | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/main.c      | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/port.c      | 2 +-
 include/linux/mlx5/device.h                         | 4 ----
 include/linux/mlx5/driver.h                         | 8 ++++----
 15 files changed, 19 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 3b4dc858cef9..e4056279166d 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -348,7 +348,7 @@ static void handle_atomic(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
 static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
 			   u16 tail, u16 head)
 {
-	int idx;
+	u16 idx;
 
 	do {
 		idx = tail & (qp->sq.wqe_cnt - 1);
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index e259e7393152..b514bbb5610f 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -41,7 +41,7 @@ enum {
 };
 
 int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
-		 int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
+		 u8 port, struct ib_wc *in_wc, struct ib_grh *in_grh,
 		 void *in_mad, void *response_mad)
 {
 	u8 op_modifier = 0;
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index f2cfd363a705..166335a95c59 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -478,7 +478,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
 	int uuarn;
 	int err;
 	int i;
-	int reqlen;
+	size_t reqlen;
 
 	if (!dev->ib_active)
 		return ERR_PTR(-EAGAIN);
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index 8499aec94db6..a3e81444c825 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -148,7 +148,7 @@ int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset)
 	u64 off_mask;
 	u64 buf_off;
 
-	page_size = 1 << page_shift;
+	page_size = (u64)1 << page_shift;
 	page_mask = page_size - 1;
 	buf_off = addr & page_mask;
 	off_size = page_size >> 6;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index a0e204ffe367..386780f0d1e1 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -461,7 +461,7 @@ void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq)
 void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq);
 void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index);
 int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
-		 int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
+		 u8 port, struct ib_wc *in_wc, struct ib_grh *in_grh,
 		 void *in_mad, void *response_mad);
 struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
 			   struct mlx5_ib_ah *ah);
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index b8bb6ad6350c..7efe6e3f3542 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -2539,7 +2539,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 			case IB_WR_RDMA_WRITE_WITH_IMM:
 				set_raddr_seg(seg, wr->wr.rdma.remote_addr,
 					      wr->wr.rdma.rkey);
-				seg  += sizeof(struct mlx5_wqe_raddr_seg);
+				seg += sizeof(struct mlx5_wqe_raddr_seg);
 				size += sizeof(struct mlx5_wqe_raddr_seg) / 16;
 				break;
 
@@ -2668,7 +2668,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 		case IB_QPT_SMI:
 		case IB_QPT_GSI:
 			set_datagram_seg(seg, wr);
-			seg  += sizeof(struct mlx5_wqe_datagram_seg);
+			seg += sizeof(struct mlx5_wqe_datagram_seg);
 			size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
 			if (unlikely((seg == qend)))
 				seg = mlx5_get_send_wqe(qp, 0);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
index b215742b842f..56779c1c7811 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
@@ -56,7 +56,7 @@ int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, int max_direct,
 	if (size <= max_direct) {
 		buf->nbufs        = 1;
 		buf->npages       = 1;
-		buf->page_shift   = get_order(size) + PAGE_SHIFT;
+		buf->page_shift   = (u8)get_order(size) + PAGE_SHIFT;
 		buf->direct.buf   = dma_zalloc_coherent(&dev->pdev->dev,
 							size, &t, GFP_KERNEL);
 		if (!buf->direct.buf)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 87d1b018a9c3..4671747dd365 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -464,7 +464,7 @@ static void dump_command(struct mlx5_core_dev *dev,
 	struct mlx5_cmd_msg *msg = input ? ent->in : ent->out;
 	struct mlx5_cmd_mailbox *next = msg->next;
 	int data_only;
-	int offset = 0;
+	u32 offset = 0;
 	int dump_len;
 
 	data_only = !!(mlx5_core_debug_mask & (1 << MLX5_CMD_DATA));
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 7f39ebcd6ad0..67cead2c079e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -252,7 +252,8 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
 			case MLX5_PORT_CHANGE_SUBTYPE_GUID:
 			case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG:
 			case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
-				dev->event(dev, port_subtype_event(eqe->sub_type), &port);
+				if (dev->event)
+					dev->event(dev, port_subtype_event(eqe->sub_type), &port);
 				break;
 			default:
 				mlx5_core_warn(dev, "Port event with unrecognized subtype: port %d, sub_type %d\n",
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mad.c b/drivers/net/ethernet/mellanox/mlx5/core/mad.c
index 18d6fd5dd90b..fd80ecfa7195 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mad.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mad.c
@@ -37,7 +37,7 @@
 #include "mlx5_core.h"
 
 int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, void *inb, void *outb,
-		      u16 opmod, int port)
+		      u16 opmod, u8 port)
 {
 	struct mlx5_mad_ifc_mbox_in *in = NULL;
 	struct mlx5_mad_ifc_mbox_out *out = NULL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 4b7f9da4bf11..fd782bf49dc6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -311,7 +311,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev)
 
 	copy_rw_fields(&set_ctx->hca_cap, &query_out->hca_cap);
 
-	if (dev->profile->mask & MLX5_PROF_MASK_QP_SIZE)
+	if (dev->profile && dev->profile->mask & MLX5_PROF_MASK_QP_SIZE)
 		set_ctx->hca_cap.log_max_qp = dev->profile->log_max_qp;
 
 	flags = be64_to_cpu(query_out->hca_cap.flags);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index c2a953ef0e67..d476918ef269 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -51,7 +51,7 @@ enum {
 
 struct mlx5_pages_req {
 	struct mlx5_core_dev *dev;
-	u32	func_id;
+	u16	func_id;
 	s32	npages;
 	struct work_struct work;
 };
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index 8c9ac870ecb1..313965853e10 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -86,7 +86,7 @@ struct mlx5_reg_pcap {
 	__be32			caps_31_0;
 };
 
-int mlx5_set_port_caps(struct mlx5_core_dev *dev, int port_num, u32 caps)
+int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps)
 {
 	struct mlx5_reg_pcap in;
 	struct mlx5_reg_pcap out;
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 3406cfb1267a..334947151dfc 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -456,9 +456,6 @@ struct mlx5_eqe_cq_err {
 	u8	syndrome;
 };
 
-struct mlx5_eqe_dropped_packet {
-};
-
 struct mlx5_eqe_port_state {
 	u8	reserved0[8];
 	u8	port;
@@ -498,7 +495,6 @@ union ev_data {
 	struct mlx5_eqe_comp		comp;
 	struct mlx5_eqe_qp_srq		qp_srq;
 	struct mlx5_eqe_cq_err		cq_err;
-	struct mlx5_eqe_dropped_packet	dp;
 	struct mlx5_eqe_port_state	port;
 	struct mlx5_eqe_gpio		gpio;
 	struct mlx5_eqe_congestion	cong;
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index d0cb5984a45f..76de0cc41640 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -381,8 +381,8 @@ struct mlx5_buf {
 	struct mlx5_buf_list   *page_list;
 	int			nbufs;
 	int			npages;
-	int			page_shift;
 	int			size;
+	u8			page_shift;
 };
 
 struct mlx5_eq {
@@ -736,7 +736,7 @@ int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
 int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn);
 int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn);
 int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, void *inb, void *outb,
-		      u16 opmod, int port);
+		      u16 opmod, u8 port);
 void mlx5_pagealloc_init(struct mlx5_core_dev *dev);
 void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev);
 int mlx5_pagealloc_start(struct mlx5_core_dev *dev);
@@ -769,7 +769,7 @@ void mlx5_qp_debugfs_cleanup(struct mlx5_core_dev *dev);
 int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
 			 int size_in, void *data_out, int size_out,
 			 u16 reg_num, int arg, int write);
-int mlx5_set_port_caps(struct mlx5_core_dev *dev, int port_num, u32 caps);
+int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps);
 
 int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
 void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
@@ -826,7 +826,7 @@ void mlx5_unregister_interface(struct mlx5_interface *intf);
 
 struct mlx5_profile {
 	u64	mask;
-	u32	log_max_qp;
+	u8	log_max_qp;
 	struct {
 		int	size;
 		int	limit;
-- 
cgit v1.2.3-59-g8ed1b


From 4d2f9bbb654b91a262638ac2c84dcb169d014aa6 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Mon, 28 Jul 2014 23:30:24 +0300
Subject: mlx5: Adjust events to use unsigned long param instead of void *

In the event flow, we currently pass only a port number in the
void *data argument.  Rather than pass a pointer to the event handlers,
we should use an "unsigned long" parameter, and pass the port number
value directly.

In the future, if necessary for some events, we can use the unsigned long
parameter to pass a pointer.

Based on a patch by Eli Cohen <eli@mellanox.com>

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/mlx5/main.c              | 14 +++++++-------
 drivers/net/ethernet/mellanox/mlx5/core/eq.c   |  3 ++-
 drivers/net/ethernet/mellanox/mlx5/core/main.c |  4 ++--
 include/linux/mlx5/driver.h                    |  4 ++--
 4 files changed, 13 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 166335a95c59..d8907b20522a 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -905,7 +905,7 @@ static struct device_attribute *mlx5_class_attributes[] = {
 };
 
 static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
-			  enum mlx5_dev_event event, void *data)
+			  enum mlx5_dev_event event, unsigned long param)
 {
 	struct mlx5_ib_dev *ibdev = (struct mlx5_ib_dev *)context;
 	struct ib_event ibev;
@@ -920,12 +920,12 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
 
 	case MLX5_DEV_EVENT_PORT_UP:
 		ibev.event = IB_EVENT_PORT_ACTIVE;
-		port = *(u8 *)data;
+		port = (u8)param;
 		break;
 
 	case MLX5_DEV_EVENT_PORT_DOWN:
 		ibev.event = IB_EVENT_PORT_ERR;
-		port = *(u8 *)data;
+		port = (u8)param;
 		break;
 
 	case MLX5_DEV_EVENT_PORT_INITIALIZED:
@@ -934,22 +934,22 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
 
 	case MLX5_DEV_EVENT_LID_CHANGE:
 		ibev.event = IB_EVENT_LID_CHANGE;
-		port = *(u8 *)data;
+		port = (u8)param;
 		break;
 
 	case MLX5_DEV_EVENT_PKEY_CHANGE:
 		ibev.event = IB_EVENT_PKEY_CHANGE;
-		port = *(u8 *)data;
+		port = (u8)param;
 		break;
 
 	case MLX5_DEV_EVENT_GUID_CHANGE:
 		ibev.event = IB_EVENT_GID_CHANGE;
-		port = *(u8 *)data;
+		port = (u8)param;
 		break;
 
 	case MLX5_DEV_EVENT_CLIENT_REREG:
 		ibev.event = IB_EVENT_CLIENT_REREGISTER;
-		port = *(u8 *)data;
+		port = (u8)param;
 		break;
 	}
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 67cead2c079e..4e8bd0b34bb0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -253,7 +253,8 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
 			case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG:
 			case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
 				if (dev->event)
-					dev->event(dev, port_subtype_event(eqe->sub_type), &port);
+					dev->event(dev, port_subtype_event(eqe->sub_type),
+						   (unsigned long)port);
 				break;
 			default:
 				mlx5_core_warn(dev, "Port event with unrecognized subtype: port %d, sub_type %d\n",
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index fd782bf49dc6..f2716cc1f51d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -704,7 +704,7 @@ void mlx5_unregister_interface(struct mlx5_interface *intf)
 EXPORT_SYMBOL(mlx5_unregister_interface);
 
 static void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
-			    void *data)
+			    unsigned long param)
 {
 	struct mlx5_priv *priv = &dev->priv;
 	struct mlx5_device_context *dev_ctx;
@@ -714,7 +714,7 @@ static void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event
 
 	list_for_each_entry(dev_ctx, &priv->ctx_list, list)
 		if (dev_ctx->intf->event)
-			dev_ctx->intf->event(dev, dev_ctx->context, event, data);
+			dev_ctx->intf->event(dev, dev_ctx->context, event, param);
 
 	spin_unlock_irqrestore(&priv->ctx_lock, flags);
 }
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 76de0cc41640..9f3a5476bb71 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -559,7 +559,7 @@ struct mlx5_core_dev {
 	struct mlx5_init_seg __iomem *iseg;
 	void			(*event) (struct mlx5_core_dev *dev,
 					  enum mlx5_dev_event event,
-					  void *data);
+					  unsigned long param);
 	struct mlx5_priv	priv;
 	struct mlx5_profile	*profile;
 	atomic_t		num_qps;
@@ -817,7 +817,7 @@ struct mlx5_interface {
 	void *			(*add)(struct mlx5_core_dev *dev);
 	void			(*remove)(struct mlx5_core_dev *dev, void *context);
 	void			(*event)(struct mlx5_core_dev *dev, void *context,
-					 enum mlx5_dev_event event, void *data);
+					 enum mlx5_dev_event event, unsigned long param);
 	struct list_head	list;
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From 8f1d26d0e59b9676587c54578f976709b625d6e9 Mon Sep 17 00:00:00 2001
From: Atsushi Kumagai <kumagai-atsushi@mxc.nes.nec.co.jp>
Date: Wed, 30 Jul 2014 16:08:39 -0700
Subject: kexec: export free_huge_page to VMCOREINFO

PG_head_mask was added into VMCOREINFO to filter huge pages in b3acc56bfe1
("kexec: save PG_head_mask in VMCOREINFO"), but makedumpfile still need
another symbol to filter *hugetlbfs* pages.

If a user hope to filter user pages, makedumpfile tries to exclude them by
checking the condition whether the page is anonymous, but hugetlbfs pages
aren't anonymous while they also be user pages.

We know it's possible to detect them in the same way as PageHuge(),
so we need the start address of free_huge_page():

    int PageHuge(struct page *page)
    {
            if (!PageCompound(page))
                    return 0;

            page = compound_head(page);
            return get_compound_page_dtor(page) == free_huge_page;
    }

For that reason, this patch changes free_huge_page() into public
to export it to VMCOREINFO.

Signed-off-by: Atsushi Kumagai <kumagai-atsushi@mxc.nes.nec.co.jp>
Acked-by: Baoquan He <bhe@redhat.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h | 1 +
 kernel/kexec.c          | 2 ++
 mm/hugetlb.c            | 2 +-
 3 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 255cd5cc0754..a23c096b3080 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -80,6 +80,7 @@ int dequeue_hwpoisoned_huge_page(struct page *page);
 bool isolate_huge_page(struct page *page, struct list_head *list);
 void putback_active_hugepage(struct page *page);
 bool is_hugepage_active(struct page *page);
+void free_huge_page(struct page *page);
 
 #ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
 pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud);
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 369f41a94124..23a088fec3c0 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -33,6 +33,7 @@
 #include <linux/swap.h>
 #include <linux/syscore_ops.h>
 #include <linux/compiler.h>
+#include <linux/hugetlb.h>
 
 #include <asm/page.h>
 #include <asm/uaccess.h>
@@ -1619,6 +1620,7 @@ static int __init crash_save_vmcoreinfo_init(void)
 #endif
 	VMCOREINFO_NUMBER(PG_head_mask);
 	VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE);
+	VMCOREINFO_SYMBOL(free_huge_page);
 
 	arch_crash_save_vmcoreinfo();
 	update_vmcoreinfo_note();
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 9221c02ed9e2..7a0a73d2fcff 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -856,7 +856,7 @@ struct hstate *size_to_hstate(unsigned long size)
 	return NULL;
 }
 
-static void free_huge_page(struct page *page)
+void free_huge_page(struct page *page)
 {
 	/*
 	 * Can't pass hstate in here because it is called from the
-- 
cgit v1.2.3-59-g8ed1b


From 0c1d77dfb56660329d639090352bf690d3c33466 Mon Sep 17 00:00:00 2001
From: Vince Bridgers <vbridgers2013@gmail.com>
Date: Tue, 29 Jul 2014 15:19:57 -0500
Subject: net: libphy: Add phy specific function to access mmd phy registers

libphy was originally written assuming all phy devices support clause 45
access extensions to the mmd registers through the indirection registers
located within the first 16 phy registers. This assumption is not true
in all cases, and one specific example is the Micrel ksz9021 10/100/1000
Mbps phy. Using the stmmac driver, accessing the mmd registers to query
and configure energy efficient Ethernet (EEE) features yielded unexpected
behavior.

This patch adds mmd access functions to the phy driver that can be
overriden by the phy specific driver if the phy does not support this
mechanism or uses it's own non-standard access mechanism. By default,
the IEEE Compatible clause 45 access mechanism described in clause 22
is used. With this patch, EEE query/configure functions as expected
using the stmmac and the Micrel ksz9021 phy.

Signed-off-by: Vince Bridgers <vbridgers2013@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy.c | 61 ++++++++++++++++++++++++++++++++-------------------
 include/linux/phy.h   | 18 +++++++++++++++
 2 files changed, 56 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index e56e269a6eb3..c94e2a27446a 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -942,7 +942,7 @@ static inline void mmd_phy_indirect(struct mii_bus *bus, int prtad, int devad,
 
 /**
  * phy_read_mmd_indirect - reads data from the MMD registers
- * @bus: the target MII bus
+ * @phydev: The PHY device bus
  * @prtad: MMD Address
  * @devad: MMD DEVAD
  * @addr: PHY address on the MII bus
@@ -955,18 +955,26 @@ static inline void mmd_phy_indirect(struct mii_bus *bus, int prtad, int devad,
  * 3) Write reg 13 // MMD Data Command for MMD DEVAD
  * 3) Read  reg 14 // Read MMD data
  */
-static int phy_read_mmd_indirect(struct mii_bus *bus, int prtad, int devad,
-				 int addr)
+static int phy_read_mmd_indirect(struct phy_device *phydev, int prtad,
+				 int devad, int addr)
 {
-	mmd_phy_indirect(bus, prtad, devad, addr);
+	struct phy_driver *phydrv = phydev->drv;
+	int value = -1;
 
-	/* Read the content of the MMD's selected register */
-	return bus->read(bus, addr, MII_MMD_DATA);
+	if (phydrv->read_mmd_indirect == NULL) {
+		mmd_phy_indirect(phydev->bus, prtad, devad, addr);
+
+		/* Read the content of the MMD's selected register */
+		value = phydev->bus->read(phydev->bus, addr, MII_MMD_DATA);
+	} else {
+		value = phydrv->read_mmd_indirect(phydev, prtad, devad, addr);
+	}
+	return value;
 }
 
 /**
  * phy_write_mmd_indirect - writes data to the MMD registers
- * @bus: the target MII bus
+ * @phydev: The PHY device
  * @prtad: MMD Address
  * @devad: MMD DEVAD
  * @addr: PHY address on the MII bus
@@ -980,13 +988,19 @@ static int phy_read_mmd_indirect(struct mii_bus *bus, int prtad, int devad,
  * 3) Write reg 13 // MMD Data Command for MMD DEVAD
  * 3) Write reg 14 // Write MMD data
  */
-static void phy_write_mmd_indirect(struct mii_bus *bus, int prtad, int devad,
-				   int addr, u32 data)
+static void phy_write_mmd_indirect(struct phy_device *phydev, int prtad,
+				   int devad, int addr, u32 data)
 {
-	mmd_phy_indirect(bus, prtad, devad, addr);
+	struct phy_driver *phydrv = phydev->drv;
 
-	/* Write the data into MMD's selected register */
-	bus->write(bus, addr, MII_MMD_DATA, data);
+	if (phydrv->write_mmd_indirect == NULL) {
+		mmd_phy_indirect(phydev->bus, prtad, devad, addr);
+
+		/* Write the data into MMD's selected register */
+		phydev->bus->write(phydev->bus, addr, MII_MMD_DATA, data);
+	} else {
+		phydrv->write_mmd_indirect(phydev, prtad, devad, addr, data);
+	}
 }
 
 /**
@@ -1020,7 +1034,7 @@ int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable)
 			return status;
 
 		/* First check if the EEE ability is supported */
-		eee_cap = phy_read_mmd_indirect(phydev->bus, MDIO_PCS_EEE_ABLE,
+		eee_cap = phy_read_mmd_indirect(phydev, MDIO_PCS_EEE_ABLE,
 						MDIO_MMD_PCS, phydev->addr);
 		if (eee_cap < 0)
 			return eee_cap;
@@ -1032,12 +1046,12 @@ int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable)
 		/* Check which link settings negotiated and verify it in
 		 * the EEE advertising registers.
 		 */
-		eee_lp = phy_read_mmd_indirect(phydev->bus, MDIO_AN_EEE_LPABLE,
+		eee_lp = phy_read_mmd_indirect(phydev, MDIO_AN_EEE_LPABLE,
 					       MDIO_MMD_AN, phydev->addr);
 		if (eee_lp < 0)
 			return eee_lp;
 
-		eee_adv = phy_read_mmd_indirect(phydev->bus, MDIO_AN_EEE_ADV,
+		eee_adv = phy_read_mmd_indirect(phydev, MDIO_AN_EEE_ADV,
 						MDIO_MMD_AN, phydev->addr);
 		if (eee_adv < 0)
 			return eee_adv;
@@ -1052,15 +1066,16 @@ int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable)
 			/* Configure the PHY to stop receiving xMII
 			 * clock while it is signaling LPI.
 			 */
-			int val = phy_read_mmd_indirect(phydev->bus, MDIO_CTRL1,
+			int val = phy_read_mmd_indirect(phydev, MDIO_CTRL1,
 							MDIO_MMD_PCS,
 							phydev->addr);
 			if (val < 0)
 				return val;
 
 			val |= MDIO_PCS_CTRL1_CLKSTOP_EN;
-			phy_write_mmd_indirect(phydev->bus, MDIO_CTRL1,
-					       MDIO_MMD_PCS, phydev->addr, val);
+			phy_write_mmd_indirect(phydev, MDIO_CTRL1,
+					       MDIO_MMD_PCS, phydev->addr,
+					       val);
 		}
 
 		return 0; /* EEE supported */
@@ -1079,7 +1094,7 @@ EXPORT_SYMBOL(phy_init_eee);
  */
 int phy_get_eee_err(struct phy_device *phydev)
 {
-	return phy_read_mmd_indirect(phydev->bus, MDIO_PCS_EEE_WK_ERR,
+	return phy_read_mmd_indirect(phydev, MDIO_PCS_EEE_WK_ERR,
 				     MDIO_MMD_PCS, phydev->addr);
 }
 EXPORT_SYMBOL(phy_get_eee_err);
@@ -1097,21 +1112,21 @@ int phy_ethtool_get_eee(struct phy_device *phydev, struct ethtool_eee *data)
 	int val;
 
 	/* Get Supported EEE */
-	val = phy_read_mmd_indirect(phydev->bus, MDIO_PCS_EEE_ABLE,
+	val = phy_read_mmd_indirect(phydev, MDIO_PCS_EEE_ABLE,
 				    MDIO_MMD_PCS, phydev->addr);
 	if (val < 0)
 		return val;
 	data->supported = mmd_eee_cap_to_ethtool_sup_t(val);
 
 	/* Get advertisement EEE */
-	val = phy_read_mmd_indirect(phydev->bus, MDIO_AN_EEE_ADV,
+	val = phy_read_mmd_indirect(phydev, MDIO_AN_EEE_ADV,
 				    MDIO_MMD_AN, phydev->addr);
 	if (val < 0)
 		return val;
 	data->advertised = mmd_eee_adv_to_ethtool_adv_t(val);
 
 	/* Get LP advertisement EEE */
-	val = phy_read_mmd_indirect(phydev->bus, MDIO_AN_EEE_LPABLE,
+	val = phy_read_mmd_indirect(phydev, MDIO_AN_EEE_LPABLE,
 				    MDIO_MMD_AN, phydev->addr);
 	if (val < 0)
 		return val;
@@ -1132,7 +1147,7 @@ int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_eee *data)
 {
 	int val = ethtool_adv_to_mmd_eee_adv_t(data->advertised);
 
-	phy_write_mmd_indirect(phydev->bus, MDIO_AN_EEE_ADV, MDIO_MMD_AN,
+	phy_write_mmd_indirect(phydev, MDIO_AN_EEE_ADV, MDIO_MMD_AN,
 			       phydev->addr, val);
 
 	return 0;
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 68041446c450..ed39956b5613 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -545,6 +545,24 @@ struct phy_driver {
 	 */
 	void (*link_change_notify)(struct phy_device *dev);
 
+	/* A function provided by a phy specific driver to override the
+	 * the PHY driver framework support for reading a MMD register
+	 * from the PHY. If not supported, return -1. This function is
+	 * optional for PHY specific drivers, if not provided then the
+	 * default MMD read function is used by the PHY framework.
+	 */
+	int (*read_mmd_indirect)(struct phy_device *dev, int ptrad,
+				 int devnum, int regnum);
+
+	/* A function provided by a phy specific driver to override the
+	 * the PHY driver framework support for writing a MMD register
+	 * from the PHY. This function is optional for PHY specific drivers,
+	 * if not provided then the default MMD read function is used by
+	 * the PHY framework.
+	 */
+	void (*write_mmd_indirect)(struct phy_device *dev, int ptrad,
+				   int devnum, int regnum, u32 val);
+
 	struct device_driver driver;
 };
 #define to_phy_driver(d) container_of(d, struct phy_driver, driver)
-- 
cgit v1.2.3-59-g8ed1b


From d17d8f9dedb9dd76fd540a5c497101529d9eb25a Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave@sr71.net>
Date: Thu, 31 Jul 2014 08:40:59 -0700
Subject: x86/mm: Add tracepoints for TLB flushes

We don't have any good way to figure out what kinds of flushes
are being attempted.  Right now, we can try to use the vm
counters, but those only tell us what we actually did with the
hardware (one-by-one vs full) and don't tell us what was actually
_requested_.

This allows us to select out "interesting" TLB flushes that we
might want to optimize (like the ranged ones) and ignore the ones
that we have very little control over (the ones at context
switch).

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Link: http://lkml.kernel.org/r/20140731154059.4C96CBA5@viggo.jf.intel.com
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/include/asm/mmu_context.h |  6 ++++++
 arch/x86/mm/init.c                 |  7 +++++++
 arch/x86/mm/tlb.c                  | 11 +++++++++--
 include/linux/mm_types.h           |  8 ++++++++
 include/trace/events/tlb.h         | 40 ++++++++++++++++++++++++++++++++++++++
 5 files changed, 70 insertions(+), 2 deletions(-)
 create mode 100644 include/trace/events/tlb.h

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index be12c534fd59..166af2a8e865 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -3,6 +3,10 @@
 
 #include <asm/desc.h>
 #include <linux/atomic.h>
+#include <linux/mm_types.h>
+
+#include <trace/events/tlb.h>
+
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 #include <asm/paravirt.h>
@@ -44,6 +48,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 
 		/* Re-load page tables */
 		load_cr3(next->pgd);
+		trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
 
 		/* Stop flush ipis for the previous mm */
 		cpumask_clear_cpu(cpu, mm_cpumask(prev));
@@ -71,6 +76,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 			 * to make sure to use no freed page tables.
 			 */
 			load_cr3(next->pgd);
+			trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
 			load_LDT_nolock(&next->context);
 		}
 	}
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index f97130618113..66dba36f2343 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -18,6 +18,13 @@
 #include <asm/dma.h>		/* for MAX_DMA_PFN */
 #include <asm/microcode.h>
 
+/*
+ * We need to define the tracepoints somewhere, and tlb.c
+ * is only compied when SMP=y.
+ */
+#define CREATE_TRACE_POINTS
+#include <trace/events/tlb.h>
+
 #include "mm_internal.h"
 
 static unsigned long __initdata pgt_buf_start;
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index add5a0fc3c5f..6f00ecb9feeb 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -49,6 +49,7 @@ void leave_mm(int cpu)
 	if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) {
 		cpumask_clear_cpu(cpu, mm_cpumask(active_mm));
 		load_cr3(swapper_pg_dir);
+		trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
 	}
 }
 EXPORT_SYMBOL_GPL(leave_mm);
@@ -107,15 +108,19 @@ static void flush_tlb_func(void *info)
 
 	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
 	if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
-		if (f->flush_end == TLB_FLUSH_ALL)
+		if (f->flush_end == TLB_FLUSH_ALL) {
 			local_flush_tlb();
-		else {
+			trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, TLB_FLUSH_ALL);
+		} else {
 			unsigned long addr;
+			unsigned long nr_pages =
+				f->flush_end - f->flush_start / PAGE_SIZE;
 			addr = f->flush_start;
 			while (addr < f->flush_end) {
 				__flush_tlb_single(addr);
 				addr += PAGE_SIZE;
 			}
+			trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, nr_pages);
 		}
 	} else
 		leave_mm(smp_processor_id());
@@ -153,6 +158,7 @@ void flush_tlb_current_task(void)
 
 	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
 	local_flush_tlb();
+	trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL);
 	if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
 		flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
 	preempt_enable();
@@ -191,6 +197,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
 			__flush_tlb_single(addr);
 		}
 	}
+	trace_tlb_flush(TLB_LOCAL_MM_SHOOTDOWN, base_pages_to_flush);
 out:
 	if (base_pages_to_flush == TLB_FLUSH_ALL) {
 		start = 0UL;
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 96c5750e3110..796deac19fcf 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -516,4 +516,12 @@ struct vm_special_mapping
 	struct page **pages;
 };
 
+enum tlb_flush_reason {
+	TLB_FLUSH_ON_TASK_SWITCH,
+	TLB_REMOTE_SHOOTDOWN,
+	TLB_LOCAL_SHOOTDOWN,
+	TLB_LOCAL_MM_SHOOTDOWN,
+	NR_TLB_FLUSH_REASONS,
+};
+
 #endif /* _LINUX_MM_TYPES_H */
diff --git a/include/trace/events/tlb.h b/include/trace/events/tlb.h
new file mode 100644
index 000000000000..13391d288107
--- /dev/null
+++ b/include/trace/events/tlb.h
@@ -0,0 +1,40 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM tlb
+
+#if !defined(_TRACE_TLB_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_TLB_H
+
+#include <linux/mm_types.h>
+#include <linux/tracepoint.h>
+
+#define TLB_FLUSH_REASON	\
+	{ TLB_FLUSH_ON_TASK_SWITCH,	"flush on task switch" },	\
+	{ TLB_REMOTE_SHOOTDOWN,		"remote shootdown" },		\
+	{ TLB_LOCAL_SHOOTDOWN,		"local shootdown" },		\
+	{ TLB_LOCAL_MM_SHOOTDOWN,	"local mm shootdown" }
+
+TRACE_EVENT(tlb_flush,
+
+	TP_PROTO(int reason, unsigned long pages),
+	TP_ARGS(reason, pages),
+
+	TP_STRUCT__entry(
+		__field(	  int, reason)
+		__field(unsigned long,  pages)
+	),
+
+	TP_fast_assign(
+		__entry->reason = reason;
+		__entry->pages  = pages;
+	),
+
+	TP_printk("pages:%ld reason:%s (%d)",
+		__entry->pages,
+		__print_symbolic(__entry->reason, TLB_FLUSH_REASON),
+		__entry->reason)
+);
+
+#endif /* _TRACE_TLB_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
-- 
cgit v1.2.3-59-g8ed1b


From dc6be9f54a4ecb0a09765d1f515ed947d86b7528 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Wed, 30 Jul 2014 23:21:06 +0200
Subject: bcma: use NS prefix for names of Northstar specific cores
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It's cleaner and we don't have quite identical names like
BCMA_CORE_PCIEG2 and BCMA_CORE_PCIE2.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/bcma/scan.c       | 22 +++++++++++-----------
 include/linux/bcma/bcma.h | 22 +++++++++++-----------
 2 files changed, 22 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/bcma/scan.c b/drivers/bcma/scan.c
index 37768401d113..b4764c6bcf17 100644
--- a/drivers/bcma/scan.c
+++ b/drivers/bcma/scan.c
@@ -32,17 +32,17 @@ static const struct bcma_device_id_name bcma_bcm_device_names[] = {
 	{ BCMA_CORE_4706_CHIPCOMMON, "BCM4706 ChipCommon" },
 	{ BCMA_CORE_4706_SOC_RAM, "BCM4706 SOC RAM" },
 	{ BCMA_CORE_4706_MAC_GBIT, "BCM4706 GBit MAC" },
-	{ BCMA_CORE_PCIEG2, "PCIe Gen 2" },
-	{ BCMA_CORE_DMA, "DMA" },
-	{ BCMA_CORE_SDIO3, "SDIO3" },
-	{ BCMA_CORE_USB20, "USB 2.0" },
-	{ BCMA_CORE_USB30, "USB 3.0" },
-	{ BCMA_CORE_A9JTAG, "ARM Cortex A9 JTAG" },
-	{ BCMA_CORE_DDR23, "Denali DDR2/DDR3 memory controller" },
-	{ BCMA_CORE_ROM, "ROM" },
-	{ BCMA_CORE_NAND, "NAND flash controller" },
-	{ BCMA_CORE_QSPI, "SPI flash controller" },
-	{ BCMA_CORE_CHIPCOMMON_B, "Chipcommon B" },
+	{ BCMA_CORE_NS_PCIEG2, "PCIe Gen 2" },
+	{ BCMA_CORE_NS_DMA, "DMA" },
+	{ BCMA_CORE_NS_SDIO3, "SDIO3" },
+	{ BCMA_CORE_NS_USB20, "USB 2.0" },
+	{ BCMA_CORE_NS_USB30, "USB 3.0" },
+	{ BCMA_CORE_NS_A9JTAG, "ARM Cortex A9 JTAG" },
+	{ BCMA_CORE_NS_DDR23, "Denali DDR2/DDR3 memory controller" },
+	{ BCMA_CORE_NS_ROM, "ROM" },
+	{ BCMA_CORE_NS_NAND, "NAND flash controller" },
+	{ BCMA_CORE_NS_QSPI, "SPI flash controller" },
+	{ BCMA_CORE_NS_CHIPCOMMON_B, "Chipcommon B" },
 	{ BCMA_CORE_ARMCA9, "ARM Cortex A9 core (ihost)" },
 	{ BCMA_CORE_AMEMC, "AMEMC (DDR)" },
 	{ BCMA_CORE_ALTA, "ALTA (I2S)" },
diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index 70b8d88b3982..0272e49135d0 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -73,17 +73,17 @@ struct bcma_host_ops {
 /* Core-ID values. */
 #define BCMA_CORE_OOB_ROUTER		0x367	/* Out of band */
 #define BCMA_CORE_4706_CHIPCOMMON	0x500
-#define BCMA_CORE_PCIEG2		0x501
-#define BCMA_CORE_DMA			0x502
-#define BCMA_CORE_SDIO3			0x503
-#define BCMA_CORE_USB20			0x504
-#define BCMA_CORE_USB30			0x505
-#define BCMA_CORE_A9JTAG		0x506
-#define BCMA_CORE_DDR23			0x507
-#define BCMA_CORE_ROM			0x508
-#define BCMA_CORE_NAND			0x509
-#define BCMA_CORE_QSPI			0x50A
-#define BCMA_CORE_CHIPCOMMON_B		0x50B
+#define BCMA_CORE_NS_PCIEG2		0x501
+#define BCMA_CORE_NS_DMA		0x502
+#define BCMA_CORE_NS_SDIO3		0x503
+#define BCMA_CORE_NS_USB20		0x504
+#define BCMA_CORE_NS_USB30		0x505
+#define BCMA_CORE_NS_A9JTAG		0x506
+#define BCMA_CORE_NS_DDR23		0x507
+#define BCMA_CORE_NS_ROM		0x508
+#define BCMA_CORE_NS_NAND		0x509
+#define BCMA_CORE_NS_QSPI		0x50A
+#define BCMA_CORE_NS_CHIPCOMMON_B	0x50B
 #define BCMA_CORE_4706_SOC_RAM		0x50E
 #define BCMA_CORE_ARMCA9		0x510
 #define BCMA_CORE_4706_MAC_GBIT		0x52D
-- 
cgit v1.2.3-59-g8ed1b


From a779ca5fa766e270b9e11c162d877295e2904f4e Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 29 Jul 2014 17:26:04 -0400
Subject: xprtrdma: Remove RPCRDMA_PERSISTENT_REGISTRATION macro

Clean up.

RPCRDMA_PERSISTENT_REGISTRATION was a compile-time switch between
RPCRDMA_REGISTER mode and RPCRDMA_ALLPHYSICAL mode.  Since
RPCRDMA_REGISTER has been removed, there's no need for the extra
conditional compilation.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Tested-by: Steve Wise <swise@opengridcomputing.com>
Tested-by: Shirley Ma <shirley.ma@oracle.com>
Tested-by: Devesh Sharma <devesh.sharma@emulex.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/sunrpc/xprtrdma.h |  2 --
 net/sunrpc/xprtrdma/verbs.c     | 13 -------------
 2 files changed, 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h
index c2f04e1ae159..64a0a0a97b23 100644
--- a/include/linux/sunrpc/xprtrdma.h
+++ b/include/linux/sunrpc/xprtrdma.h
@@ -62,8 +62,6 @@
 #define RPCRDMA_INLINE_PAD_THRESH  (512)/* payload threshold to pad (bytes) */
 
 /* memory registration strategies */
-#define RPCRDMA_PERSISTENT_REGISTRATION (1)
-
 enum rpcrdma_memreg {
 	RPCRDMA_BOUNCEBUFFERS = 0,
 	RPCRDMA_REGISTER,
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 1208ab2e655f..c2253d4c64df 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -561,12 +561,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
 		if (!ia->ri_id->device->alloc_fmr) {
 			dprintk("RPC:       %s: MTHCAFMR registration "
 				"not supported by HCA\n", __func__);
-#if RPCRDMA_PERSISTENT_REGISTRATION
 			memreg = RPCRDMA_ALLPHYSICAL;
-#else
-			rc = -ENOMEM;
-			goto out2;
-#endif
 		}
 	}
 
@@ -581,20 +576,16 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
 	switch (memreg) {
 	case RPCRDMA_FRMR:
 		break;
-#if RPCRDMA_PERSISTENT_REGISTRATION
 	case RPCRDMA_ALLPHYSICAL:
 		mem_priv = IB_ACCESS_LOCAL_WRITE |
 				IB_ACCESS_REMOTE_WRITE |
 				IB_ACCESS_REMOTE_READ;
 		goto register_setup;
-#endif
 	case RPCRDMA_MTHCAFMR:
 		if (ia->ri_have_dma_lkey)
 			break;
 		mem_priv = IB_ACCESS_LOCAL_WRITE;
-#if RPCRDMA_PERSISTENT_REGISTRATION
 	register_setup:
-#endif
 		ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
 		if (IS_ERR(ia->ri_bind_mem)) {
 			printk(KERN_ALERT "%s: ib_get_dma_mr for "
@@ -1905,7 +1896,6 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
 
 	switch (ia->ri_memreg_strategy) {
 
-#if RPCRDMA_PERSISTENT_REGISTRATION
 	case RPCRDMA_ALLPHYSICAL:
 		rpcrdma_map_one(ia, seg, writing);
 		seg->mr_rkey = ia->ri_bind_mem->rkey;
@@ -1913,7 +1903,6 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
 		seg->mr_nsegs = 1;
 		nsegs = 1;
 		break;
-#endif
 
 	/* Registration using frmr registration */
 	case RPCRDMA_FRMR:
@@ -1943,13 +1932,11 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
 
 	switch (ia->ri_memreg_strategy) {
 
-#if RPCRDMA_PERSISTENT_REGISTRATION
 	case RPCRDMA_ALLPHYSICAL:
 		read_lock(&ia->ri_qplock);
 		rpcrdma_unmap_one(ia, seg);
 		read_unlock(&ia->ri_qplock);
 		break;
-#endif
 
 	case RPCRDMA_FRMR:
 		rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt);
-- 
cgit v1.2.3-59-g8ed1b


From 3b57de958e2aa39abe020eb31bf19000d5899389 Mon Sep 17 00:00:00 2001
From: Vince Bridgers <vbridgers2013@gmail.com>
Date: Thu, 31 Jul 2014 15:49:17 -0500
Subject: net: stmmac: Support devicetree configs for mcast and ucast filter
 entries

This patch adds and modifies code to support multiple Multicast and Unicast
Synopsys MAC filter configurations. The default configuration is defined to
support legacy driver behavior, which is 64 Multicast bins. The Unicast
filter code previously assumed all controllers support 32 or 16 Unicast
addresses based on controller version number, but this has been corrected
to support a default of 1 Unicast address. The filter configuration may
be specified through the devicetree using a Synopsys specific device tree
entry. This information was verified with Synopsys through
Synopsys Support Case #8000684337 and shared with the maintainer.

Signed-off-by: Vince Bridgers <vbridgers2013@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/common.h       |  8 ++-
 drivers/net/ethernet/stmicro/stmmac/dwmac1000.h    |  1 +
 .../net/ethernet/stmicro/stmmac/dwmac1000_core.c   | 66 +++++++++++++++------
 .../net/ethernet/stmicro/stmmac/dwmac100_core.c    |  3 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c  |  6 +-
 .../net/ethernet/stmicro/stmmac/stmmac_platform.c  | 67 ++++++++++++++++++++++
 include/linux/stmmac.h                             |  2 +
 7 files changed, 132 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 49f72e1ffbef..de507c32036c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -381,7 +381,7 @@ struct stmmac_ops {
 	int (*host_irq_status)(struct mac_device_info *hw,
 			       struct stmmac_extra_stats *x);
 	/* Multicast filter setting */
-	void (*set_filter)(struct net_device *dev);
+	void (*set_filter)(struct mac_device_info *hw, struct net_device *dev);
 	/* Flow control setting */
 	void (*flow_ctrl)(struct mac_device_info *hw, unsigned int duplex,
 			  unsigned int fc, unsigned int pause_time);
@@ -442,9 +442,13 @@ struct mac_device_info {
 	struct mac_link link;
 	unsigned int synopsys_uid;
 	void __iomem *pcsr;     /* vpointer to device CSRs */
+	int multicast_filter_bins;
+	int unicast_filter_entries;
+	int mcast_bits_log2;
 };
 
-struct mac_device_info *dwmac1000_setup(void __iomem *ioaddr);
+struct mac_device_info *dwmac1000_setup(void __iomem *ioaddr, int mcbins,
+					int perfect_uc_entries);
 struct mac_device_info *dwmac100_setup(void __iomem *ioaddr);
 
 void stmmac_set_mac_addr(void __iomem *ioaddr, u8 addr[6],
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
index 285e3056f362..71b5419256c1 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
@@ -261,6 +261,7 @@ enum rtc_control {
 #define GMAC_MMC_RX_INTR   0x104
 #define GMAC_MMC_TX_INTR   0x108
 #define GMAC_MMC_RX_CSUM_OFFLOAD   0x208
+#define GMAC_EXTHASH_BASE  0x500
 
 extern const struct stmmac_dma_ops dwmac1000_dma_ops;
 #endif /* __DWMAC1000_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
index cdcbad1f1ac0..d8ef18786a1c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
@@ -97,12 +97,41 @@ static void dwmac1000_get_umac_addr(struct mac_device_info *hw,
 			    GMAC_ADDR_LOW(reg_n));
 }
 
-static void dwmac1000_set_filter(struct net_device *dev)
+static void dwmac1000_set_mchash(void __iomem *ioaddr, u32 *mcfilterbits,
+				 int mcbitslog2)
+{
+	int numhashregs, regs;
+
+	switch (mcbitslog2) {
+	case 6:
+		writel(mcfilterbits[0], ioaddr + GMAC_HASH_LOW);
+		writel(mcfilterbits[1], ioaddr + GMAC_HASH_HIGH);
+		return;
+		break;
+	case 7:
+		numhashregs = 4;
+		break;
+	case 8:
+		numhashregs = 8;
+		break;
+	default:
+		pr_debug("STMMAC: err in setting mulitcast filter\n");
+		return;
+		break;
+	}
+	for (regs = 0; regs < numhashregs; regs++)
+		writel(mcfilterbits[regs],
+		       ioaddr + GMAC_EXTHASH_BASE + regs * 4);
+}
+
+static void dwmac1000_set_filter(struct mac_device_info *hw,
+				 struct net_device *dev)
 {
 	void __iomem *ioaddr = (void __iomem *)dev->base_addr;
 	unsigned int value = 0;
-	unsigned int perfect_addr_number;
+	unsigned int perfect_addr_number = hw->unicast_filter_entries;
 	u32 mc_filter[2];
+	int mcbitslog2 = hw->mcast_bits_log2;
 
 	pr_debug("%s: # mcasts %d, # unicast %d\n", __func__,
 		 netdev_mc_count(dev), netdev_uc_count(dev));
@@ -120,10 +149,14 @@ static void dwmac1000_set_filter(struct net_device *dev)
 		value = GMAC_FRAME_FILTER_HMC;
 
 		netdev_for_each_mc_addr(ha, dev) {
-			/* The upper 6 bits of the calculated CRC are used to
-			 * index the contens of the hash table
+			/* The upper n bits of the calculated CRC are used to
+			 * index the contents of the hash table. The number of
+			 * bits used depends on the hardware configuration
+			 * selected at core configuration time.
 			 */
-			int bit_nr = bitrev32(~crc32_le(~0, ha->addr, 6)) >> 26;
+			int bit_nr = bitrev32(~crc32_le(~0, ha->addr,
+					      ETH_ALEN)) >>
+					      (32 - mcbitslog2);
 			/* The most significant bit determines the register to
 			 * use (H/L) while the other 5 bits determine the bit
 			 * within the register.
@@ -132,15 +165,12 @@ static void dwmac1000_set_filter(struct net_device *dev)
 		}
 	}
 
-	writel(mc_filter[0], ioaddr + GMAC_HASH_LOW);
-	writel(mc_filter[1], ioaddr + GMAC_HASH_HIGH);
-
-	perfect_addr_number = GMAC_MAX_PERFECT_ADDRESSES;
+	dwmac1000_set_mchash(ioaddr, mc_filter, mcbitslog2);
 
 	/* Handle multiple unicast addresses (perfect filtering) */
 	if (netdev_uc_count(dev) > perfect_addr_number)
-		/* Switch to promiscuous mode if more than 16 addrs
-		 * are required
+		/* Switch to promiscuous mode if more than unicast
+		 * addresses are requested than supported by hardware.
 		 */
 		value |= GMAC_FRAME_FILTER_PR;
 	else {
@@ -160,10 +190,6 @@ static void dwmac1000_set_filter(struct net_device *dev)
 	value |= GMAC_FRAME_FILTER_RA;
 #endif
 	writel(value, ioaddr + GMAC_FRAME_FILTER);
-
-	pr_debug("\tFilter: 0x%08x\n\tHash: HI 0x%08x, LO 0x%08x\n",
-		 readl(ioaddr + GMAC_FRAME_FILTER),
-		 readl(ioaddr + GMAC_HASH_HIGH), readl(ioaddr + GMAC_HASH_LOW));
 }
 
 
@@ -382,7 +408,8 @@ static const struct stmmac_ops dwmac1000_ops = {
 	.get_adv = dwmac1000_get_adv,
 };
 
-struct mac_device_info *dwmac1000_setup(void __iomem *ioaddr)
+struct mac_device_info *dwmac1000_setup(void __iomem *ioaddr, int mcbins,
+					int perfect_uc_entries)
 {
 	struct mac_device_info *mac;
 	u32 hwid = readl(ioaddr + GMAC_VERSION);
@@ -392,6 +419,13 @@ struct mac_device_info *dwmac1000_setup(void __iomem *ioaddr)
 		return NULL;
 
 	mac->pcsr = ioaddr;
+	mac->multicast_filter_bins = mcbins;
+	mac->unicast_filter_entries = perfect_uc_entries;
+	mac->mcast_bits_log2 = 0;
+
+	if (mac->multicast_filter_bins)
+		mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins);
+
 	mac->mac = &dwmac1000_ops;
 	mac->dma = &dwmac1000_dma_ops;
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
index 3a2d63388f8d..f8dd773f246c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
@@ -95,7 +95,8 @@ static void dwmac100_get_umac_addr(struct mac_device_info *hw,
 	stmmac_get_mac_addr(ioaddr, addr, MAC_ADDR_HIGH, MAC_ADDR_LOW);
 }
 
-static void dwmac100_set_filter(struct net_device *dev)
+static void dwmac100_set_filter(struct mac_device_info *hw,
+				struct net_device *dev)
 {
 	void __iomem *ioaddr = (void __iomem *)dev->base_addr;
 	u32 value = readl(ioaddr + MAC_CONTROL);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index cff2b69e62ee..08addd653728 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2225,7 +2225,7 @@ static void stmmac_set_rx_mode(struct net_device *dev)
 	struct stmmac_priv *priv = netdev_priv(dev);
 
 	spin_lock(&priv->lock);
-	priv->hw->mac->set_filter(dev);
+	priv->hw->mac->set_filter(priv->hw, dev);
 	spin_unlock(&priv->lock);
 }
 
@@ -2598,7 +2598,9 @@ static int stmmac_hw_init(struct stmmac_priv *priv)
 	/* Identify the MAC HW device */
 	if (priv->plat->has_gmac) {
 		priv->dev->priv_flags |= IFF_UNICAST_FLT;
-		mac = dwmac1000_setup(priv->ioaddr);
+		mac = dwmac1000_setup(priv->ioaddr,
+				      priv->plat->multicast_filter_bins,
+				      priv->plat->unicast_filter_entries);
 	} else {
 		mac = dwmac100_setup(priv->ioaddr);
 	}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index ea7a65be1f9a..bb524a932be4 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -52,6 +52,59 @@ static const struct of_device_id stmmac_dt_ids[] = {
 MODULE_DEVICE_TABLE(of, stmmac_dt_ids);
 
 #ifdef CONFIG_OF
+
+/* This function validates the number of Multicast filtering bins specified
+ * by the configuration through the device tree. The Synopsys GMAC supports
+ * 64 bins, 128 bins, or 256 bins. "bins" refer to the division of CRC
+ * number space. 64 bins correspond to 6 bits of the CRC, 128 corresponds
+ * to 7 bits, and 256 refers to 8 bits of the CRC. Any other setting is
+ * invalid and will cause the filtering algorithm to use Multicast
+ * promiscuous mode.
+ */
+static int dwmac1000_validate_mcast_bins(int mcast_bins)
+{
+	int x = mcast_bins;
+
+	switch (x) {
+	case HASH_TABLE_SIZE:
+	case 128:
+	case 256:
+		break;
+	default:
+		x = 0;
+		pr_info("Hash table entries set to unexpected value %d",
+			mcast_bins);
+		break;
+	}
+	return x;
+}
+
+/* This function validates the number of Unicast address entries supported
+ * by a particular Synopsys 10/100/1000 controller. The Synopsys controller
+ * supports 1, 32, 64, or 128 Unicast filter entries for it's Unicast filter
+ * logic. This function validates a valid, supported configuration is
+ * selected, and defaults to 1 Unicast address if an unsupported
+ * configuration is selected.
+ */
+static int dwmac1000_validate_ucast_entries(int ucast_entries)
+{
+	int x = ucast_entries;
+
+	switch (x) {
+	case 1:
+	case 32:
+	case 64:
+	case 128:
+		break;
+	default:
+		x = 1;
+		pr_info("Unicast table entries set to unexpected value %d\n",
+			ucast_entries);
+		break;
+	}
+	return x;
+}
+
 static int stmmac_probe_config_dt(struct platform_device *pdev,
 				  struct plat_stmmacenet_data *plat,
 				  const char **mac)
@@ -115,6 +168,12 @@ static int stmmac_probe_config_dt(struct platform_device *pdev,
 	 */
 	plat->maxmtu = JUMBO_LEN;
 
+	/* Set default value for multicast hash bins */
+	plat->multicast_filter_bins = HASH_TABLE_SIZE;
+
+	/* Set default value for unicast filter entries */
+	plat->unicast_filter_entries = 1;
+
 	/*
 	 * Currently only the properties needed on SPEAr600
 	 * are provided. All other properties should be added
@@ -131,6 +190,14 @@ static int stmmac_probe_config_dt(struct platform_device *pdev,
 		 * are clearly MTUs
 		 */
 		of_property_read_u32(np, "max-frame-size", &plat->maxmtu);
+		of_property_read_u32(np, "snps,multicast-filter-bins",
+				     &plat->multicast_filter_bins);
+		of_property_read_u32(np, "snps,perfect-filter-entries",
+				     &plat->unicast_filter_entries);
+		plat->unicast_filter_entries = dwmac1000_validate_ucast_entries(
+					       plat->unicast_filter_entries);
+		plat->multicast_filter_bins = dwmac1000_validate_mcast_bins(
+					      plat->multicast_filter_bins);
 		plat->has_gmac = 1;
 		plat->pmt = 1;
 	}
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 6f27d4f957bd..cd63851b57f2 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -112,6 +112,8 @@ struct plat_stmmacenet_data {
 	int riwt_off;
 	int max_speed;
 	int maxmtu;
+	int multicast_filter_bins;
+	int unicast_filter_entries;
 	void (*fix_mac_speed)(void *priv, unsigned int speed);
 	void (*bus_setup)(void __iomem *ioaddr);
 	void *(*setup)(struct platform_device *pdev);
-- 
cgit v1.2.3-59-g8ed1b


From a6138db815df5ee542d848318e5dae681590fccd Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 28 Jul 2014 16:26:53 -0700
Subject: mnt: Only change user settable mount flags in remount

Kenton Varda <kenton@sandstorm.io> discovered that by remounting a
read-only bind mount read-only in a user namespace the
MNT_LOCK_READONLY bit would be cleared, allowing an unprivileged user
to the remount a read-only mount read-write.

Correct this by replacing the mask of mount flags to preserve
with a mask of mount flags that may be changed, and preserve
all others.   This ensures that any future bugs with this mask and
remount will fail in an easy to detect way where new mount flags
simply won't change.

Cc: stable@vger.kernel.org
Acked-by: Serge E. Hallyn <serge.hallyn@ubuntu.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/namespace.c        | 2 +-
 include/linux/mount.h | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namespace.c b/fs/namespace.c
index 7187d01329c3..cb40449ea0df 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1937,7 +1937,7 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
 		err = do_remount_sb(sb, flags, data, 0);
 	if (!err) {
 		lock_mount_hash();
-		mnt_flags |= mnt->mnt.mnt_flags & MNT_PROPAGATION_MASK;
+		mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
 		mnt->mnt.mnt_flags = mnt_flags;
 		touch_mnt_namespace(mnt->mnt_ns);
 		unlock_mount_hash();
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 839bac270904..b637a89e1fae 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -42,7 +42,9 @@ struct mnt_namespace;
  * flag, consider how it interacts with shared mounts.
  */
 #define MNT_SHARED_MASK	(MNT_UNBINDABLE)
-#define MNT_PROPAGATION_MASK	(MNT_SHARED | MNT_UNBINDABLE)
+#define MNT_USER_SETTABLE_MASK  (MNT_NOSUID | MNT_NODEV | MNT_NOEXEC \
+				 | MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME \
+				 | MNT_READONLY)
 
 #define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \
 			    MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED)
-- 
cgit v1.2.3-59-g8ed1b


From 9566d6742852c527bf5af38af5cbb878dad75705 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 28 Jul 2014 17:26:07 -0700
Subject: mnt: Correct permission checks in do_remount

While invesgiating the issue where in "mount --bind -oremount,ro ..."
would result in later "mount --bind -oremount,rw" succeeding even if
the mount started off locked I realized that there are several
additional mount flags that should be locked and are not.

In particular MNT_NOSUID, MNT_NODEV, MNT_NOEXEC, and the atime
flags in addition to MNT_READONLY should all be locked.  These
flags are all per superblock, can all be changed with MS_BIND,
and should not be changable if set by a more privileged user.

The following additions to the current logic are added in this patch.
- nosuid may not be clearable by a less privileged user.
- nodev  may not be clearable by a less privielged user.
- noexec may not be clearable by a less privileged user.
- atime flags may not be changeable by a less privileged user.

The logic with atime is that always setting atime on access is a
global policy and backup software and auditing software could break if
atime bits are not updated (when they are configured to be updated),
and serious performance degradation could result (DOS attack) if atime
updates happen when they have been explicitly disabled.  Therefore an
unprivileged user should not be able to mess with the atime bits set
by a more privileged user.

The additional restrictions are implemented with the addition of
MNT_LOCK_NOSUID, MNT_LOCK_NODEV, MNT_LOCK_NOEXEC, and MNT_LOCK_ATIME
mnt flags.

Taken together these changes and the fixes for MNT_LOCK_READONLY
should make it safe for an unprivileged user to create a user
namespace and to call "mount --bind -o remount,... ..." without
the danger of mount flags being changed maliciously.

Cc: stable@vger.kernel.org
Acked-by: Serge E. Hallyn <serge.hallyn@ubuntu.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/namespace.c        | 36 +++++++++++++++++++++++++++++++++---
 include/linux/mount.h |  5 +++++
 2 files changed, 38 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namespace.c b/fs/namespace.c
index 1105a577a14f..dd9c93b5a9d5 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -890,8 +890,21 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
 
 	mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~(MNT_WRITE_HOLD|MNT_MARKED);
 	/* Don't allow unprivileged users to change mount flags */
-	if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY))
-		mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
+	if (flag & CL_UNPRIVILEGED) {
+		mnt->mnt.mnt_flags |= MNT_LOCK_ATIME;
+
+		if (mnt->mnt.mnt_flags & MNT_READONLY)
+			mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
+
+		if (mnt->mnt.mnt_flags & MNT_NODEV)
+			mnt->mnt.mnt_flags |= MNT_LOCK_NODEV;
+
+		if (mnt->mnt.mnt_flags & MNT_NOSUID)
+			mnt->mnt.mnt_flags |= MNT_LOCK_NOSUID;
+
+		if (mnt->mnt.mnt_flags & MNT_NOEXEC)
+			mnt->mnt.mnt_flags |= MNT_LOCK_NOEXEC;
+	}
 
 	/* Don't allow unprivileged users to reveal what is under a mount */
 	if ((flag & CL_UNPRIVILEGED) && list_empty(&old->mnt_expire))
@@ -1931,6 +1944,23 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
 	    !(mnt_flags & MNT_READONLY)) {
 		return -EPERM;
 	}
+	if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) &&
+	    !(mnt_flags & MNT_NODEV)) {
+		return -EPERM;
+	}
+	if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) &&
+	    !(mnt_flags & MNT_NOSUID)) {
+		return -EPERM;
+	}
+	if ((mnt->mnt.mnt_flags & MNT_LOCK_NOEXEC) &&
+	    !(mnt_flags & MNT_NOEXEC)) {
+		return -EPERM;
+	}
+	if ((mnt->mnt.mnt_flags & MNT_LOCK_ATIME) &&
+	    ((mnt->mnt.mnt_flags & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK))) {
+		return -EPERM;
+	}
+
 	err = security_sb_remount(sb, data);
 	if (err)
 		return err;
@@ -2129,7 +2159,7 @@ static int do_new_mount(struct path *path, const char *fstype, int flags,
 		 */
 		if (!(type->fs_flags & FS_USERNS_DEV_MOUNT)) {
 			flags |= MS_NODEV;
-			mnt_flags |= MNT_NODEV;
+			mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV;
 		}
 	}
 
diff --git a/include/linux/mount.h b/include/linux/mount.h
index b637a89e1fae..b0c1e6574e7f 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -45,12 +45,17 @@ struct mnt_namespace;
 #define MNT_USER_SETTABLE_MASK  (MNT_NOSUID | MNT_NODEV | MNT_NOEXEC \
 				 | MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME \
 				 | MNT_READONLY)
+#define MNT_ATIME_MASK (MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME )
 
 #define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \
 			    MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED)
 
 #define MNT_INTERNAL	0x4000
 
+#define MNT_LOCK_ATIME		0x040000
+#define MNT_LOCK_NOEXEC		0x080000
+#define MNT_LOCK_NOSUID		0x100000
+#define MNT_LOCK_NODEV		0x200000
 #define MNT_LOCK_READONLY	0x400000
 #define MNT_LOCKED		0x800000
 #define MNT_DOOMED		0x1000000
-- 
cgit v1.2.3-59-g8ed1b


From 536721b1cb3fb50034bf6f6c7a7ea16166970e69 Mon Sep 17 00:00:00 2001
From: Karoly Kemeny <karoly.kemeny@gmail.com>
Date: Wed, 30 Jul 2014 20:27:36 +0200
Subject: net: kernel-doc compliant documentation for net_device

Net_device is a vast and important structure, but it has no kernel-doc
compliant documentation. This patch extracts the comments from the structure
to clean it up, and let the scripts extract documentation from it. I know that
the patch is big, but it's just reordering of comments into the appropriate
form, and adding a few more, for the missing members.

Signed-off-by: Karoly Kemeny <karoly.kemeny@gmail.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 381 ++++++++++++++++++++++++++++++----------------
 1 file changed, 250 insertions(+), 131 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 8e8fb3ed574b..38377392d082 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1231,42 +1231,228 @@ enum netdev_priv_flags {
 #define IFF_LIVE_ADDR_CHANGE		IFF_LIVE_ADDR_CHANGE
 #define IFF_MACVLAN			IFF_MACVLAN
 
-/*
- *	The DEVICE structure.
- *	Actually, this whole structure is a big mistake.  It mixes I/O
- *	data with strictly "high-level" data, and it has to know about
- *	almost every data structure used in the INET module.
+/**
+ *	struct net_device - The DEVICE structure.
+ *		Actually, this whole structure is a big mistake.  It mixes I/O
+ *		data with strictly "high-level" data, and it has to know about
+ *		almost every data structure used in the INET module.
+ *
+ *	@name:	This is the first field of the "visible" part of this structure
+ *		(i.e. as seen by users in the "Space.c" file).  It is the name
+ *	 	of the interface.
+ *
+ *	@name_hlist: 	Device name hash chain, please keep it close to name[]
+ *	@ifalias:	SNMP alias
+ *	@mem_end:	Shared memory end
+ *	@mem_start:	Shared memory start
+ *	@base_addr:	Device I/O address
+ *	@irq:		Device IRQ number
+ *
+ *	@state:		Generic network queuing layer state, see netdev_state_t
+ *	@dev_list:	The global list of network devices
+ *	@napi_list:	List entry, that is used for polling napi devices
+ *	@unreg_list:	List entry, that is used, when we are unregistering the
+ *			device, see the function unregister_netdev
+ *	@close_list:	List entry, that is used, when we are closing the device
+ *
+ *	@adj_list:	Directly linked devices, like slaves for bonding
+ *	@all_adj_list:	All linked devices, *including* neighbours
+ *	@features:	Currently active device features
+ *	@hw_features:	User-changeable features
+ *
+ *	@wanted_features:	User-requested features
+ *	@vlan_features:		Mask of features inheritable by VLAN devices
+ *
+ *	@hw_enc_features:	Mask of features inherited by encapsulating devices
+ *				This field indicates what encapsulation
+ *				offloads the hardware is capable of doing,
+ *				and drivers will need to set them appropriately.
+ *
+ *	@mpls_features:	Mask of features inheritable by MPLS
+ *
+ *	@ifindex:	interface index
+ *	@iflink:	unique device identifier
+ *
+ *	@stats:		Statistics struct, which was left as a legacy, use
+ *			rtnl_link_stats64 instead
+ *
+ *	@rx_dropped:	Dropped packets by core network,
+ *			do not use this in drivers
+ *	@tx_dropped:	Dropped packets by core network,
+ *			do not use this in drivers
+ *
+ *	@carrier_changes:	Stats to monitor carrier on<->off transitions
+ *
+ *	@wireless_handlers:	List of functions to handle Wireless Extensions,
+ *				instead of ioctl,
+ *				see <net/iw_handler.h> for details.
+ *	@wireless_data:	Instance data managed by the core of wireless extensions
+ *
+ *	@netdev_ops:	Includes several pointers to callbacks,
+ *			if one wants to override the ndo_*() functions
+ *	@ethtool_ops:	Management operations
+ *	@fwd_ops:	Management operations
+ *	@header_ops:	Includes callbacks for creating,parsing,rebuilding,etc
+ *			of Layer 2 headers.
+ *
+ *	@flags:		Interface flags (a la BSD)
+ *	@priv_flags:	Like 'flags' but invisible to userspace,
+ *			see if.h for the definitions
+ *	@gflags:	Global flags ( kept as legacy )
+ *	@padded:	How much padding added by alloc_netdev()
+ *	@operstate:	RFC2863 operstate
+ *	@link_mode:	Mapping policy to operstate
+ *	@if_port:	Selectable AUI, TP, ...
+ *	@dma:		DMA channel
+ *	@mtu:		Interface MTU value
+ *	@type:		Interface hardware type
+ *	@hard_header_len: Hardware header length
+ *
+ *	@needed_headroom: Extra headroom the hardware may need, but not in all
+ *			  cases can this be guaranteed
+ *	@needed_tailroom: Extra tailroom the hardware may need, but not in all
+ *			  cases can this be guaranteed. Some cases also use
+ *			  LL_MAX_HEADER instead to allocate the skb
+ *
+ *	interface address info:
+ *
+ * 	@perm_addr:		Permanent hw address
+ * 	@addr_assign_type:	Hw address assignment type
+ * 	@addr_len:		Hardware address length
+ * 	@neigh_priv_len;	Used in neigh_alloc(),
+ * 				initialized only in atm/clip.c
+ * 	@dev_id:		Used to differentiate devices that share
+ * 				the same link layer address
+ * 	@dev_port:		Used to differentiate devices that share
+ * 				the same function
+ *	@addr_list_lock:	XXX: need comments on this one
+ *	@uc:			unicast mac addresses
+ *	@mc:			multicast mac addresses
+ *	@dev_addrs:		list of device hw addresses
+ *	@queues_kset:		Group of all Kobjects in the Tx and RX queues
+ *	@uc_promisc:		Counter, that indicates, that promiscuous mode
+ *				has been enabled due to the need to listen to
+ *				additional unicast addresses in a device that
+ *				does not implement ndo_set_rx_mode()
+ *	@promiscuity:		Number of times, the NIC is told to work in
+ *				Promiscuous mode, if it becomes 0 the NIC will
+ *				exit from working in Promiscuous mode
+ *	@allmulti:		Counter, enables or disables allmulticast mode
+ *
+ *	@vlan_info:	VLAN info
+ *	@dsa_ptr:	dsa specific data
+ *	@tipc_ptr:	TIPC specific data
+ *	@atalk_ptr:	AppleTalk link
+ *	@ip_ptr:	IPv4 specific data
+ *	@dn_ptr:	DECnet specific data
+ *	@ip6_ptr:	IPv6 specific data
+ *	@ax25_ptr:	AX.25 specific data
+ *	@ieee80211_ptr:	IEEE 802.11 specific data, assign before registering
+ *
+ *	@last_rx:	Time of last Rx
+ *	@dev_addr:	Hw address (before bcast,
+ *			because most packets are unicast)
+ *
+ *	@_rx:			Array of RX queues
+ *	@num_rx_queues:		Number of RX queues
+ *				allocated at register_netdev() time
+ *	@real_num_rx_queues: 	Number of RX queues currently active in device
+ *
+ *	@rx_handler:		handler for received packets
+ *	@rx_handler_data: 	XXX: need comments on this one
+ *	@ingress_queue:		XXX: need comments on this one
+ *	@broadcast:		hw bcast address
+ *
+ *	@_tx:			Array of TX queues
+ *	@num_tx_queues:		Number of TX queues allocated at alloc_netdev_mq() time
+ *	@real_num_tx_queues: 	Number of TX queues currently active in device
+ *	@qdisc:			Root qdisc from userspace point of view
+ *	@tx_queue_len:		Max frames per queue allowed
+ *	@tx_global_lock: 	XXX: need comments on this one
+ *
+ *	@xps_maps:	XXX: need comments on this one
+ *
+ *	@rx_cpu_rmap:	CPU reverse-mapping for RX completion interrupts,
+ *			indexed by RX queue number. Assigned by driver.
+ *			This must only be set if the ndo_rx_flow_steer
+ *			operation is defined
+ *
+ *	@trans_start:		Time (in jiffies) of last Tx
+ *	@watchdog_timeo:	Represents the timeout that is used by
+ *				the watchdog ( see dev_watchdog() )
+ *	@watchdog_timer:	List of timers
+ *
+ *	@pcpu_refcnt:		Number of references to this device
+ *	@todo_list:		Delayed register/unregister
+ *	@index_hlist:		Device index hash chain
+ *	@link_watch_list:	XXX: need comments on this one
+ *
+ *	@reg_state:		Register/unregister state machine
+ *	@dismantle:		Device is going to be freed
+ *	@rtnl_link_state:	This enum represents the phases of creating
+ *				a new link
+ *
+ *	@destructor:		Called from unregister,
+ *				can be used to call free_netdev
+ *	@npinfo:		XXX: need comments on this one
+ * 	@nd_net:		Network namespace this network device is inside
+ *
+ * 	@ml_priv:	Mid-layer private
+ * 	@lstats:	Loopback statistics
+ * 	@tstats:	Tunnel statistics
+ * 	@dstats:	Dummy statistics
+ * 	@vstats:	Virtual ethernet statistics
+ *
+ *	@garp_port:	GARP
+ *	@mrp_port:	MRP
+ *
+ *	@dev:		Class/net/name entry
+ *	@sysfs_groups:	Space for optional device, statistics and wireless
+ *			sysfs groups
+ *
+ *	@sysfs_rx_queue_group:	Space for optional per-rx queue attributes
+ *	@rtnl_link_ops:	Rtnl_link_ops
+ *
+ *	@gso_max_size:	Maximum size of generic segmentation offload
+ *	@gso_max_segs:	Maximum number of segments that can be passed to the
+ *			NIC for GSO
+ *
+ *	@dcbnl_ops:	Data Center Bridging netlink ops
+ *	@num_tc:	Number of traffic classes in the net device
+ *	@tc_to_txq:	XXX: need comments on this one
+ *	@prio_tc_map	XXX: need comments on this one
+ *
+ *	@fcoe_ddp_xid:	Max exchange id for FCoE LRO by ddp
+ *
+ *	@priomap:	XXX: need comments on this one
+ *	@phydev:	Physical device may attach itself
+ *			for hardware timestamping
+ *
+ *	@qdisc_tx_busylock:	XXX: need comments on this one
+ *
+ *	@group:		The group, that the device belongs to
+ *	@pm_qos_req:	Power Management QoS object
  *
  *	FIXME: cleanup struct net_device such that network protocol info
  *	moves out.
  */
 
 struct net_device {
-
-	/*
-	 * This is the first field of the "visible" part of this structure
-	 * (i.e. as seen by users in the "Space.c" file).  It is the name
-	 * of the interface.
-	 */
 	char			name[IFNAMSIZ];
-
-	/* device name hash chain, please keep it close to name[] */
 	struct hlist_node	name_hlist;
-
-	/* snmp alias */
 	char 			*ifalias;
-
 	/*
 	 *	I/O specific fields
 	 *	FIXME: Merge these and struct ifmap into one
 	 */
-	unsigned long		mem_end;	/* shared mem end	*/
-	unsigned long		mem_start;	/* shared mem start	*/
-	unsigned long		base_addr;	/* device I/O address	*/
-	int			irq;		/* device IRQ number	*/
+	unsigned long		mem_end;
+	unsigned long		mem_start;
+	unsigned long		base_addr;
+	int			irq;
 
 	/*
-	 *	Some hardware also needs these fields, but they are not
+	 *	Some hardware also needs these fields (state,dev_list,
+	 *	napi_list,unreg_list,close_list) but they are not
 	 *	part of the usual set specified in Space.c.
 	 */
 
@@ -1277,106 +1463,74 @@ struct net_device {
 	struct list_head	unreg_list;
 	struct list_head	close_list;
 
-	/* directly linked devices, like slaves for bonding */
 	struct {
 		struct list_head upper;
 		struct list_head lower;
 	} adj_list;
 
-	/* all linked devices, *including* neighbours */
 	struct {
 		struct list_head upper;
 		struct list_head lower;
 	} all_adj_list;
 
-
-	/* currently active device features */
 	netdev_features_t	features;
-	/* user-changeable features */
 	netdev_features_t	hw_features;
-	/* user-requested features */
 	netdev_features_t	wanted_features;
-	/* mask of features inheritable by VLAN devices */
 	netdev_features_t	vlan_features;
-	/* mask of features inherited by encapsulating devices
-	 * This field indicates what encapsulation offloads
-	 * the hardware is capable of doing, and drivers will
-	 * need to set them appropriately.
-	 */
 	netdev_features_t	hw_enc_features;
-	/* mask of fetures inheritable by MPLS */
 	netdev_features_t	mpls_features;
 
-	/* Interface index. Unique device identifier	*/
 	int			ifindex;
 	int			iflink;
 
 	struct net_device_stats	stats;
 
-	/* dropped packets by core network, Do not use this in drivers */
 	atomic_long_t		rx_dropped;
 	atomic_long_t		tx_dropped;
 
-	/* Stats to monitor carrier on<->off transitions */
 	atomic_t		carrier_changes;
 
 #ifdef CONFIG_WIRELESS_EXT
-	/* List of functions to handle Wireless Extensions (instead of ioctl).
-	 * See <net/iw_handler.h> for details. Jean II */
 	const struct iw_handler_def *	wireless_handlers;
-	/* Instance data managed by the core of Wireless Extensions. */
 	struct iw_public_data *	wireless_data;
 #endif
-	/* Management operations */
 	const struct net_device_ops *netdev_ops;
 	const struct ethtool_ops *ethtool_ops;
 	const struct forwarding_accel_ops *fwd_ops;
 
-	/* Hardware header description */
 	const struct header_ops *header_ops;
 
-	unsigned int		flags;	/* interface flags (a la BSD)	*/
-	unsigned int		priv_flags; /* Like 'flags' but invisible to userspace.
-					     * See if.h for definitions. */
+	unsigned int		flags;
+	unsigned int		priv_flags;
+
 	unsigned short		gflags;
-	unsigned short		padded;	/* How much padding added by alloc_netdev() */
+	unsigned short		padded;
 
-	unsigned char		operstate; /* RFC2863 operstate */
-	unsigned char		link_mode; /* mapping policy to operstate */
+	unsigned char		operstate;
+	unsigned char		link_mode;
 
-	unsigned char		if_port;	/* Selectable AUI, TP,..*/
-	unsigned char		dma;		/* DMA channel		*/
+	unsigned char		if_port;
+	unsigned char		dma;
 
-	unsigned int		mtu;	/* interface MTU value		*/
-	unsigned short		type;	/* interface hardware type	*/
-	unsigned short		hard_header_len;	/* hardware hdr length	*/
+	unsigned int		mtu;
+	unsigned short		type;
+	unsigned short		hard_header_len;
 
-	/* extra head- and tailroom the hardware may need, but not in all cases
-	 * can this be guaranteed, especially tailroom. Some cases also use
-	 * LL_MAX_HEADER instead to allocate the skb.
-	 */
 	unsigned short		needed_headroom;
 	unsigned short		needed_tailroom;
 
 	/* Interface address info. */
-	unsigned char		perm_addr[MAX_ADDR_LEN]; /* permanent hw address */
-	unsigned char		addr_assign_type; /* hw address assignment type */
-	unsigned char		addr_len;	/* hardware address length	*/
+	unsigned char		perm_addr[MAX_ADDR_LEN];
+	unsigned char		addr_assign_type;
+	unsigned char		addr_len;
 	unsigned short		neigh_priv_len;
-	unsigned short          dev_id;		/* Used to differentiate devices
-						 * that share the same link
-						 * layer address
-						 */
-	unsigned short          dev_port;	/* Used to differentiate
-						 * devices that share the same
-						 * function
-						 */
+	unsigned short          dev_id;
+	unsigned short          dev_port;
 	spinlock_t		addr_list_lock;
-	struct netdev_hw_addr_list	uc;	/* Unicast mac addresses */
-	struct netdev_hw_addr_list	mc;	/* Multicast mac addresses */
-	struct netdev_hw_addr_list	dev_addrs; /* list of device
-						    * hw addresses
-						    */
+	struct netdev_hw_addr_list	uc;
+	struct netdev_hw_addr_list	mc;
+	struct netdev_hw_addr_list	dev_addrs;
+
 #ifdef CONFIG_SYSFS
 	struct kset		*queues_kset;
 #endif
@@ -1391,40 +1545,34 @@ struct net_device {
 	/* Protocol specific pointers */
 
 #if IS_ENABLED(CONFIG_VLAN_8021Q)
-	struct vlan_info __rcu	*vlan_info;	/* VLAN info */
+	struct vlan_info __rcu	*vlan_info;
 #endif
 #if IS_ENABLED(CONFIG_NET_DSA)
-	struct dsa_switch_tree	*dsa_ptr;	/* dsa specific data */
+	struct dsa_switch_tree	*dsa_ptr;
 #endif
 #if IS_ENABLED(CONFIG_TIPC)
-	struct tipc_bearer __rcu *tipc_ptr;	/* TIPC specific data */
+	struct tipc_bearer __rcu *tipc_ptr;
 #endif
-	void 			*atalk_ptr;	/* AppleTalk link 	*/
-	struct in_device __rcu	*ip_ptr;	/* IPv4 specific data	*/
-	struct dn_dev __rcu     *dn_ptr;        /* DECnet specific data */
-	struct inet6_dev __rcu	*ip6_ptr;       /* IPv6 specific data */
-	void			*ax25_ptr;	/* AX.25 specific data */
-	struct wireless_dev	*ieee80211_ptr;	/* IEEE 802.11 specific data,
-						   assign before registering */
+	void 			*atalk_ptr;
+	struct in_device __rcu	*ip_ptr;
+	struct dn_dev __rcu     *dn_ptr;
+	struct inet6_dev __rcu	*ip6_ptr;
+	void			*ax25_ptr;
+	struct wireless_dev	*ieee80211_ptr;
 
 /*
  * Cache lines mostly used on receive path (including eth_type_trans())
  */
-	unsigned long		last_rx;	/* Time of last Rx */
+	unsigned long		last_rx;
 
 	/* Interface address info used in eth_type_trans() */
-	unsigned char		*dev_addr;	/* hw address, (before bcast
-						   because most packets are
-						   unicast) */
+	unsigned char		*dev_addr;
 
 
 #ifdef CONFIG_SYSFS
 	struct netdev_rx_queue	*_rx;
 
-	/* Number of RX queues allocated at register_netdev() time */
 	unsigned int		num_rx_queues;
-
-	/* Number of RX queues currently active in device */
 	unsigned int		real_num_rx_queues;
 
 #endif
@@ -1433,33 +1581,23 @@ struct net_device {
 	void __rcu		*rx_handler_data;
 
 	struct netdev_queue __rcu *ingress_queue;
-	unsigned char		broadcast[MAX_ADDR_LEN];	/* hw bcast add	*/
+	unsigned char		broadcast[MAX_ADDR_LEN];
 
 
 /*
  * Cache lines mostly used on transmit path
  */
 	struct netdev_queue	*_tx ____cacheline_aligned_in_smp;
-
-	/* Number of TX queues allocated at alloc_netdev_mq() time  */
 	unsigned int		num_tx_queues;
-
-	/* Number of TX queues currently active in device  */
 	unsigned int		real_num_tx_queues;
-
-	/* root qdisc from userspace point of view */
 	struct Qdisc		*qdisc;
-
-	unsigned long		tx_queue_len;	/* Max frames per queue allowed */
+	unsigned long		tx_queue_len;
 	spinlock_t		tx_global_lock;
 
 #ifdef CONFIG_XPS
 	struct xps_dev_maps __rcu *xps_maps;
 #endif
 #ifdef CONFIG_RFS_ACCEL
-	/* CPU reverse-mapping for RX completion interrupts, indexed
-	 * by RX queue number.  Assigned by driver.  This must only be
-	 * set if the ndo_rx_flow_steer operation is defined. */
 	struct cpu_rmap		*rx_cpu_rmap;
 #endif
 
@@ -1469,22 +1607,17 @@ struct net_device {
 	 * trans_start here is expensive for high speed devices on SMP,
 	 * please use netdev_queue->trans_start instead.
 	 */
-	unsigned long		trans_start;	/* Time (in jiffies) of last Tx	*/
+	unsigned long		trans_start;
 
-	int			watchdog_timeo; /* used by dev_watchdog() */
+	int			watchdog_timeo;
 	struct timer_list	watchdog_timer;
 
-	/* Number of references to this device */
 	int __percpu		*pcpu_refcnt;
-
-	/* delayed register/unregister */
 	struct list_head	todo_list;
-	/* device index hash chain */
-	struct hlist_node	index_hlist;
 
+	struct hlist_node	index_hlist;
 	struct list_head	link_watch_list;
 
-	/* register/unregister state machine */
 	enum { NETREG_UNINITIALIZED=0,
 	       NETREG_REGISTERED,	/* completed register_netdevice */
 	       NETREG_UNREGISTERING,	/* called unregister_netdevice */
@@ -1493,14 +1626,13 @@ struct net_device {
 	       NETREG_DUMMY,		/* dummy device for NAPI poll */
 	} reg_state:8;
 
-	bool dismantle; /* device is going do be freed */
+	bool dismantle;
 
 	enum {
 		RTNL_LINK_INITIALIZED,
 		RTNL_LINK_INITIALIZING,
 	} rtnl_link_state:16;
 
-	/* Called from unregister, can be used to call free_netdev */
 	void (*destructor)(struct net_device *dev);
 
 #ifdef CONFIG_NETPOLL
@@ -1508,31 +1640,25 @@ struct net_device {
 #endif
 
 #ifdef CONFIG_NET_NS
-	/* Network namespace this network device is inside */
 	struct net		*nd_net;
 #endif
 
 	/* mid-layer private */
 	union {
-		void				*ml_priv;
-		struct pcpu_lstats __percpu	*lstats; /* loopback stats */
+		void					*ml_priv;
+		struct pcpu_lstats __percpu		*lstats;
 		struct pcpu_sw_netstats __percpu	*tstats;
-		struct pcpu_dstats __percpu	*dstats; /* dummy stats */
-		struct pcpu_vstats __percpu	*vstats; /* veth stats */
+		struct pcpu_dstats __percpu		*dstats;
+		struct pcpu_vstats __percpu		*vstats;
 	};
-	/* GARP */
+
 	struct garp_port __rcu	*garp_port;
-	/* MRP */
 	struct mrp_port __rcu	*mrp_port;
 
-	/* class/net/name entry */
-	struct device		dev;
-	/* space for optional device, statistics, and wireless sysfs groups */
+	struct device	dev;
 	const struct attribute_group *sysfs_groups[4];
-	/* space for optional per-rx queue attributes */
 	const struct attribute_group *sysfs_rx_queue_group;
 
-	/* rtnetlink link ops */
 	const struct rtnl_link_ops *rtnl_link_ops;
 
 	/* for setting kernel sock attribute on TCP connection setup */
@@ -1542,7 +1668,6 @@ struct net_device {
 	u16			gso_max_segs;
 
 #ifdef CONFIG_DCB
-	/* Data Center Bridging netlink ops */
 	const struct dcbnl_rtnl_ops *dcbnl_ops;
 #endif
 	u8 num_tc;
@@ -1550,20 +1675,14 @@ struct net_device {
 	u8 prio_tc_map[TC_BITMASK + 1];
 
 #if IS_ENABLED(CONFIG_FCOE)
-	/* max exchange id for FCoE LRO by ddp */
 	unsigned int		fcoe_ddp_xid;
 #endif
 #if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
 	struct netprio_map __rcu *priomap;
 #endif
-	/* phy device may attach itself for hardware timestamping */
 	struct phy_device *phydev;
-
 	struct lock_class_key *qdisc_tx_busylock;
-
-	/* group the device belongs to */
 	int group;
-
 	struct pm_qos_request	pm_qos_req;
 };
 #define to_net_dev(d) container_of(d, struct net_device, dev)
-- 
cgit v1.2.3-59-g8ed1b


From 3e3dc25fe7d5e33026bdfca5e8fab08be6a8729c Mon Sep 17 00:00:00 2001
From: Mark Rustad <mark.d.rustad@intel.com>
Date: Fri, 25 Jul 2014 02:53:38 -0700
Subject: crypto: Resolve shadow warnings

Change formal parameters to not clash with global names to
eliminate many W=2 warnings.

Signed-off-by: Mark Rustad <mark.d.rustad@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/cryptd.c           | 12 ++++----
 crypto/eseqiv.c           |  8 ++---
 crypto/gcm.c              | 30 +++++++++---------
 crypto/seqiv.c            | 16 +++++-----
 crypto/tcrypt.c           | 77 ++++++++++++++++++++++++-----------------------
 include/crypto/aead.h     |  4 +--
 include/crypto/hash.h     |  4 +--
 include/crypto/skcipher.h |  4 +--
 include/linux/crypto.h    |  8 ++---
 9 files changed, 82 insertions(+), 81 deletions(-)

(limited to 'include/linux')

diff --git a/crypto/cryptd.c b/crypto/cryptd.c
index 7bdd61b867c8..e592c90abebb 100644
--- a/crypto/cryptd.c
+++ b/crypto/cryptd.c
@@ -233,7 +233,7 @@ static void cryptd_blkcipher_decrypt(struct crypto_async_request *req, int err)
 }
 
 static int cryptd_blkcipher_enqueue(struct ablkcipher_request *req,
-				    crypto_completion_t complete)
+				    crypto_completion_t compl)
 {
 	struct cryptd_blkcipher_request_ctx *rctx = ablkcipher_request_ctx(req);
 	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
@@ -241,7 +241,7 @@ static int cryptd_blkcipher_enqueue(struct ablkcipher_request *req,
 
 	queue = cryptd_get_queue(crypto_ablkcipher_tfm(tfm));
 	rctx->complete = req->base.complete;
-	req->base.complete = complete;
+	req->base.complete = compl;
 
 	return cryptd_enqueue_request(queue, &req->base);
 }
@@ -414,7 +414,7 @@ static int cryptd_hash_setkey(struct crypto_ahash *parent,
 }
 
 static int cryptd_hash_enqueue(struct ahash_request *req,
-				crypto_completion_t complete)
+				crypto_completion_t compl)
 {
 	struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
@@ -422,7 +422,7 @@ static int cryptd_hash_enqueue(struct ahash_request *req,
 		cryptd_get_queue(crypto_ahash_tfm(tfm));
 
 	rctx->complete = req->base.complete;
-	req->base.complete = complete;
+	req->base.complete = compl;
 
 	return cryptd_enqueue_request(queue, &req->base);
 }
@@ -667,14 +667,14 @@ static void cryptd_aead_decrypt(struct crypto_async_request *areq, int err)
 }
 
 static int cryptd_aead_enqueue(struct aead_request *req,
-				    crypto_completion_t complete)
+				    crypto_completion_t compl)
 {
 	struct cryptd_aead_request_ctx *rctx = aead_request_ctx(req);
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct cryptd_queue *queue = cryptd_get_queue(crypto_aead_tfm(tfm));
 
 	rctx->complete = req->base.complete;
-	req->base.complete = complete;
+	req->base.complete = compl;
 	return cryptd_enqueue_request(queue, &req->base);
 }
 
diff --git a/crypto/eseqiv.c b/crypto/eseqiv.c
index 42ce9f570aec..bf7ab4a89493 100644
--- a/crypto/eseqiv.c
+++ b/crypto/eseqiv.c
@@ -68,7 +68,7 @@ static int eseqiv_givencrypt(struct skcipher_givcrypt_request *req)
 	struct eseqiv_ctx *ctx = crypto_ablkcipher_ctx(geniv);
 	struct eseqiv_request_ctx *reqctx = skcipher_givcrypt_reqctx(req);
 	struct ablkcipher_request *subreq;
-	crypto_completion_t complete;
+	crypto_completion_t compl;
 	void *data;
 	struct scatterlist *osrc, *odst;
 	struct scatterlist *dst;
@@ -86,7 +86,7 @@ static int eseqiv_givencrypt(struct skcipher_givcrypt_request *req)
 	ablkcipher_request_set_tfm(subreq, skcipher_geniv_cipher(geniv));
 
 	giv = req->giv;
-	complete = req->creq.base.complete;
+	compl = req->creq.base.complete;
 	data = req->creq.base.data;
 
 	osrc = req->creq.src;
@@ -101,11 +101,11 @@ static int eseqiv_givencrypt(struct skcipher_givcrypt_request *req)
 	if (vsrc != giv + ivsize && vdst != giv + ivsize) {
 		giv = PTR_ALIGN((u8 *)reqctx->tail,
 				crypto_ablkcipher_alignmask(geniv) + 1);
-		complete = eseqiv_complete;
+		compl = eseqiv_complete;
 		data = req;
 	}
 
-	ablkcipher_request_set_callback(subreq, req->creq.base.flags, complete,
+	ablkcipher_request_set_callback(subreq, req->creq.base.flags, compl,
 					data);
 
 	sg_init_table(reqctx->src, 2);
diff --git a/crypto/gcm.c b/crypto/gcm.c
index b4f017939004..276cdac567b6 100644
--- a/crypto/gcm.c
+++ b/crypto/gcm.c
@@ -228,14 +228,14 @@ static void gcm_hash_final_done(struct crypto_async_request *areq, int err);
 
 static int gcm_hash_update(struct aead_request *req,
 			   struct crypto_gcm_req_priv_ctx *pctx,
-			   crypto_completion_t complete,
+			   crypto_completion_t compl,
 			   struct scatterlist *src,
 			   unsigned int len)
 {
 	struct ahash_request *ahreq = &pctx->u.ahreq;
 
 	ahash_request_set_callback(ahreq, aead_request_flags(req),
-				   complete, req);
+				   compl, req);
 	ahash_request_set_crypt(ahreq, src, NULL, len);
 
 	return crypto_ahash_update(ahreq);
@@ -244,12 +244,12 @@ static int gcm_hash_update(struct aead_request *req,
 static int gcm_hash_remain(struct aead_request *req,
 			   struct crypto_gcm_req_priv_ctx *pctx,
 			   unsigned int remain,
-			   crypto_completion_t complete)
+			   crypto_completion_t compl)
 {
 	struct ahash_request *ahreq = &pctx->u.ahreq;
 
 	ahash_request_set_callback(ahreq, aead_request_flags(req),
-				   complete, req);
+				   compl, req);
 	sg_init_one(pctx->src, gcm_zeroes, remain);
 	ahash_request_set_crypt(ahreq, pctx->src, NULL, remain);
 
@@ -375,14 +375,14 @@ static void __gcm_hash_assoc_remain_done(struct aead_request *req, int err)
 {
 	struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req);
 	struct crypto_gcm_ghash_ctx *gctx = &pctx->ghash_ctx;
-	crypto_completion_t complete;
+	crypto_completion_t compl;
 	unsigned int remain = 0;
 
 	if (!err && gctx->cryptlen) {
 		remain = gcm_remain(gctx->cryptlen);
-		complete = remain ? gcm_hash_crypt_done :
+		compl = remain ? gcm_hash_crypt_done :
 			gcm_hash_crypt_remain_done;
-		err = gcm_hash_update(req, pctx, complete,
+		err = gcm_hash_update(req, pctx, compl,
 				      gctx->src, gctx->cryptlen);
 		if (err == -EINPROGRESS || err == -EBUSY)
 			return;
@@ -429,14 +429,14 @@ static void gcm_hash_assoc_done(struct crypto_async_request *areq, int err)
 static void __gcm_hash_init_done(struct aead_request *req, int err)
 {
 	struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req);
-	crypto_completion_t complete;
+	crypto_completion_t compl;
 	unsigned int remain = 0;
 
 	if (!err && req->assoclen) {
 		remain = gcm_remain(req->assoclen);
-		complete = remain ? gcm_hash_assoc_done :
+		compl = remain ? gcm_hash_assoc_done :
 			gcm_hash_assoc_remain_done;
-		err = gcm_hash_update(req, pctx, complete,
+		err = gcm_hash_update(req, pctx, compl,
 				      req->assoc, req->assoclen);
 		if (err == -EINPROGRESS || err == -EBUSY)
 			return;
@@ -462,7 +462,7 @@ static int gcm_hash(struct aead_request *req,
 	struct crypto_gcm_ghash_ctx *gctx = &pctx->ghash_ctx;
 	struct crypto_gcm_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
 	unsigned int remain;
-	crypto_completion_t complete;
+	crypto_completion_t compl;
 	int err;
 
 	ahash_request_set_tfm(ahreq, ctx->ghash);
@@ -473,8 +473,8 @@ static int gcm_hash(struct aead_request *req,
 	if (err)
 		return err;
 	remain = gcm_remain(req->assoclen);
-	complete = remain ? gcm_hash_assoc_done : gcm_hash_assoc_remain_done;
-	err = gcm_hash_update(req, pctx, complete, req->assoc, req->assoclen);
+	compl = remain ? gcm_hash_assoc_done : gcm_hash_assoc_remain_done;
+	err = gcm_hash_update(req, pctx, compl, req->assoc, req->assoclen);
 	if (err)
 		return err;
 	if (remain) {
@@ -484,8 +484,8 @@ static int gcm_hash(struct aead_request *req,
 			return err;
 	}
 	remain = gcm_remain(gctx->cryptlen);
-	complete = remain ? gcm_hash_crypt_done : gcm_hash_crypt_remain_done;
-	err = gcm_hash_update(req, pctx, complete, gctx->src, gctx->cryptlen);
+	compl = remain ? gcm_hash_crypt_done : gcm_hash_crypt_remain_done;
+	err = gcm_hash_update(req, pctx, compl, gctx->src, gctx->cryptlen);
 	if (err)
 		return err;
 	if (remain) {
diff --git a/crypto/seqiv.c b/crypto/seqiv.c
index f2cba4ed6f25..ee190fcedcd2 100644
--- a/crypto/seqiv.c
+++ b/crypto/seqiv.c
@@ -100,7 +100,7 @@ static int seqiv_givencrypt(struct skcipher_givcrypt_request *req)
 	struct crypto_ablkcipher *geniv = skcipher_givcrypt_reqtfm(req);
 	struct seqiv_ctx *ctx = crypto_ablkcipher_ctx(geniv);
 	struct ablkcipher_request *subreq = skcipher_givcrypt_reqctx(req);
-	crypto_completion_t complete;
+	crypto_completion_t compl;
 	void *data;
 	u8 *info;
 	unsigned int ivsize;
@@ -108,7 +108,7 @@ static int seqiv_givencrypt(struct skcipher_givcrypt_request *req)
 
 	ablkcipher_request_set_tfm(subreq, skcipher_geniv_cipher(geniv));
 
-	complete = req->creq.base.complete;
+	compl = req->creq.base.complete;
 	data = req->creq.base.data;
 	info = req->creq.info;
 
@@ -122,11 +122,11 @@ static int seqiv_givencrypt(struct skcipher_givcrypt_request *req)
 		if (!info)
 			return -ENOMEM;
 
-		complete = seqiv_complete;
+		compl = seqiv_complete;
 		data = req;
 	}
 
-	ablkcipher_request_set_callback(subreq, req->creq.base.flags, complete,
+	ablkcipher_request_set_callback(subreq, req->creq.base.flags, compl,
 					data);
 	ablkcipher_request_set_crypt(subreq, req->creq.src, req->creq.dst,
 				     req->creq.nbytes, info);
@@ -146,7 +146,7 @@ static int seqiv_aead_givencrypt(struct aead_givcrypt_request *req)
 	struct seqiv_ctx *ctx = crypto_aead_ctx(geniv);
 	struct aead_request *areq = &req->areq;
 	struct aead_request *subreq = aead_givcrypt_reqctx(req);
-	crypto_completion_t complete;
+	crypto_completion_t compl;
 	void *data;
 	u8 *info;
 	unsigned int ivsize;
@@ -154,7 +154,7 @@ static int seqiv_aead_givencrypt(struct aead_givcrypt_request *req)
 
 	aead_request_set_tfm(subreq, aead_geniv_base(geniv));
 
-	complete = areq->base.complete;
+	compl = areq->base.complete;
 	data = areq->base.data;
 	info = areq->iv;
 
@@ -168,11 +168,11 @@ static int seqiv_aead_givencrypt(struct aead_givcrypt_request *req)
 		if (!info)
 			return -ENOMEM;
 
-		complete = seqiv_aead_complete;
+		compl = seqiv_aead_complete;
 		data = req;
 	}
 
-	aead_request_set_callback(subreq, areq->base.flags, complete, data);
+	aead_request_set_callback(subreq, areq->base.flags, compl, data);
 	aead_request_set_crypt(subreq, areq->src, areq->dst, areq->cryptlen,
 			       info);
 	aead_request_set_assoc(subreq, areq->assoc, areq->assoclen);
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index c48d07857a2b..890449e6e7ef 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -73,13 +73,13 @@ static char *check[] = {
 };
 
 static int test_cipher_jiffies(struct blkcipher_desc *desc, int enc,
-			       struct scatterlist *sg, int blen, int sec)
+			       struct scatterlist *sg, int blen, int secs)
 {
 	unsigned long start, end;
 	int bcount;
 	int ret;
 
-	for (start = jiffies, end = start + sec * HZ, bcount = 0;
+	for (start = jiffies, end = start + secs * HZ, bcount = 0;
 	     time_before(jiffies, end); bcount++) {
 		if (enc)
 			ret = crypto_blkcipher_encrypt(desc, sg, sg, blen);
@@ -91,7 +91,7 @@ static int test_cipher_jiffies(struct blkcipher_desc *desc, int enc,
 	}
 
 	printk("%d operations in %d seconds (%ld bytes)\n",
-	       bcount, sec, (long)bcount * blen);
+	       bcount, secs, (long)bcount * blen);
 	return 0;
 }
 
@@ -143,13 +143,13 @@ out:
 }
 
 static int test_aead_jiffies(struct aead_request *req, int enc,
-				int blen, int sec)
+				int blen, int secs)
 {
 	unsigned long start, end;
 	int bcount;
 	int ret;
 
-	for (start = jiffies, end = start + sec * HZ, bcount = 0;
+	for (start = jiffies, end = start + secs * HZ, bcount = 0;
 	     time_before(jiffies, end); bcount++) {
 		if (enc)
 			ret = crypto_aead_encrypt(req);
@@ -161,7 +161,7 @@ static int test_aead_jiffies(struct aead_request *req, int enc,
 	}
 
 	printk("%d operations in %d seconds (%ld bytes)\n",
-	       bcount, sec, (long)bcount * blen);
+	       bcount, secs, (long)bcount * blen);
 	return 0;
 }
 
@@ -265,7 +265,7 @@ static void sg_init_aead(struct scatterlist *sg, char *xbuf[XBUFSIZE],
 	}
 }
 
-static void test_aead_speed(const char *algo, int enc, unsigned int sec,
+static void test_aead_speed(const char *algo, int enc, unsigned int secs,
 			    struct aead_speed_template *template,
 			    unsigned int tcount, u8 authsize,
 			    unsigned int aad_size, u8 *keysize)
@@ -379,8 +379,9 @@ static void test_aead_speed(const char *algo, int enc, unsigned int sec,
 			aead_request_set_crypt(req, sg, sgout, *b_size, iv);
 			aead_request_set_assoc(req, asg, aad_size);
 
-			if (sec)
-				ret = test_aead_jiffies(req, enc, *b_size, sec);
+			if (secs)
+				ret = test_aead_jiffies(req, enc, *b_size,
+							secs);
 			else
 				ret = test_aead_cycles(req, enc, *b_size);
 
@@ -410,7 +411,7 @@ out_noxbuf:
 	return;
 }
 
-static void test_cipher_speed(const char *algo, int enc, unsigned int sec,
+static void test_cipher_speed(const char *algo, int enc, unsigned int secs,
 			      struct cipher_speed_template *template,
 			      unsigned int tcount, u8 *keysize)
 {
@@ -489,9 +490,9 @@ static void test_cipher_speed(const char *algo, int enc, unsigned int sec,
 				crypto_blkcipher_set_iv(tfm, iv, iv_len);
 			}
 
-			if (sec)
+			if (secs)
 				ret = test_cipher_jiffies(&desc, enc, sg,
-							  *b_size, sec);
+							  *b_size, secs);
 			else
 				ret = test_cipher_cycles(&desc, enc, sg,
 							 *b_size);
@@ -512,13 +513,13 @@ out:
 
 static int test_hash_jiffies_digest(struct hash_desc *desc,
 				    struct scatterlist *sg, int blen,
-				    char *out, int sec)
+				    char *out, int secs)
 {
 	unsigned long start, end;
 	int bcount;
 	int ret;
 
-	for (start = jiffies, end = start + sec * HZ, bcount = 0;
+	for (start = jiffies, end = start + secs * HZ, bcount = 0;
 	     time_before(jiffies, end); bcount++) {
 		ret = crypto_hash_digest(desc, sg, blen, out);
 		if (ret)
@@ -526,22 +527,22 @@ static int test_hash_jiffies_digest(struct hash_desc *desc,
 	}
 
 	printk("%6u opers/sec, %9lu bytes/sec\n",
-	       bcount / sec, ((long)bcount * blen) / sec);
+	       bcount / secs, ((long)bcount * blen) / secs);
 
 	return 0;
 }
 
 static int test_hash_jiffies(struct hash_desc *desc, struct scatterlist *sg,
-			     int blen, int plen, char *out, int sec)
+			     int blen, int plen, char *out, int secs)
 {
 	unsigned long start, end;
 	int bcount, pcount;
 	int ret;
 
 	if (plen == blen)
-		return test_hash_jiffies_digest(desc, sg, blen, out, sec);
+		return test_hash_jiffies_digest(desc, sg, blen, out, secs);
 
-	for (start = jiffies, end = start + sec * HZ, bcount = 0;
+	for (start = jiffies, end = start + secs * HZ, bcount = 0;
 	     time_before(jiffies, end); bcount++) {
 		ret = crypto_hash_init(desc);
 		if (ret)
@@ -558,7 +559,7 @@ static int test_hash_jiffies(struct hash_desc *desc, struct scatterlist *sg,
 	}
 
 	printk("%6u opers/sec, %9lu bytes/sec\n",
-	       bcount / sec, ((long)bcount * blen) / sec);
+	       bcount / secs, ((long)bcount * blen) / secs);
 
 	return 0;
 }
@@ -679,7 +680,7 @@ static void test_hash_sg_init(struct scatterlist *sg)
 	}
 }
 
-static void test_hash_speed(const char *algo, unsigned int sec,
+static void test_hash_speed(const char *algo, unsigned int secs,
 			    struct hash_speed *speed)
 {
 	struct scatterlist sg[TVMEMSIZE];
@@ -725,9 +726,9 @@ static void test_hash_speed(const char *algo, unsigned int sec,
 		       "(%5u byte blocks,%5u bytes per update,%4u updates): ",
 		       i, speed[i].blen, speed[i].plen, speed[i].blen / speed[i].plen);
 
-		if (sec)
+		if (secs)
 			ret = test_hash_jiffies(&desc, sg, speed[i].blen,
-						speed[i].plen, output, sec);
+						speed[i].plen, output, secs);
 		else
 			ret = test_hash_cycles(&desc, sg, speed[i].blen,
 					       speed[i].plen, output);
@@ -772,13 +773,13 @@ static inline int do_one_ahash_op(struct ahash_request *req, int ret)
 }
 
 static int test_ahash_jiffies_digest(struct ahash_request *req, int blen,
-				     char *out, int sec)
+				     char *out, int secs)
 {
 	unsigned long start, end;
 	int bcount;
 	int ret;
 
-	for (start = jiffies, end = start + sec * HZ, bcount = 0;
+	for (start = jiffies, end = start + secs * HZ, bcount = 0;
 	     time_before(jiffies, end); bcount++) {
 		ret = do_one_ahash_op(req, crypto_ahash_digest(req));
 		if (ret)
@@ -786,22 +787,22 @@ static int test_ahash_jiffies_digest(struct ahash_request *req, int blen,
 	}
 
 	printk("%6u opers/sec, %9lu bytes/sec\n",
-	       bcount / sec, ((long)bcount * blen) / sec);
+	       bcount / secs, ((long)bcount * blen) / secs);
 
 	return 0;
 }
 
 static int test_ahash_jiffies(struct ahash_request *req, int blen,
-			      int plen, char *out, int sec)
+			      int plen, char *out, int secs)
 {
 	unsigned long start, end;
 	int bcount, pcount;
 	int ret;
 
 	if (plen == blen)
-		return test_ahash_jiffies_digest(req, blen, out, sec);
+		return test_ahash_jiffies_digest(req, blen, out, secs);
 
-	for (start = jiffies, end = start + sec * HZ, bcount = 0;
+	for (start = jiffies, end = start + secs * HZ, bcount = 0;
 	     time_before(jiffies, end); bcount++) {
 		ret = crypto_ahash_init(req);
 		if (ret)
@@ -818,7 +819,7 @@ static int test_ahash_jiffies(struct ahash_request *req, int blen,
 	}
 
 	pr_cont("%6u opers/sec, %9lu bytes/sec\n",
-		bcount / sec, ((long)bcount * blen) / sec);
+		bcount / secs, ((long)bcount * blen) / secs);
 
 	return 0;
 }
@@ -918,7 +919,7 @@ out:
 	return 0;
 }
 
-static void test_ahash_speed(const char *algo, unsigned int sec,
+static void test_ahash_speed(const char *algo, unsigned int secs,
 			     struct hash_speed *speed)
 {
 	struct scatterlist sg[TVMEMSIZE];
@@ -968,9 +969,9 @@ static void test_ahash_speed(const char *algo, unsigned int sec,
 
 		ahash_request_set_crypt(req, sg, output, speed[i].plen);
 
-		if (sec)
+		if (secs)
 			ret = test_ahash_jiffies(req, speed[i].blen,
-						 speed[i].plen, output, sec);
+						 speed[i].plen, output, secs);
 		else
 			ret = test_ahash_cycles(req, speed[i].blen,
 						speed[i].plen, output);
@@ -1002,13 +1003,13 @@ static inline int do_one_acipher_op(struct ablkcipher_request *req, int ret)
 }
 
 static int test_acipher_jiffies(struct ablkcipher_request *req, int enc,
-				int blen, int sec)
+				int blen, int secs)
 {
 	unsigned long start, end;
 	int bcount;
 	int ret;
 
-	for (start = jiffies, end = start + sec * HZ, bcount = 0;
+	for (start = jiffies, end = start + secs * HZ, bcount = 0;
 	     time_before(jiffies, end); bcount++) {
 		if (enc)
 			ret = do_one_acipher_op(req,
@@ -1022,7 +1023,7 @@ static int test_acipher_jiffies(struct ablkcipher_request *req, int enc,
 	}
 
 	pr_cont("%d operations in %d seconds (%ld bytes)\n",
-		bcount, sec, (long)bcount * blen);
+		bcount, secs, (long)bcount * blen);
 	return 0;
 }
 
@@ -1073,7 +1074,7 @@ out:
 	return ret;
 }
 
-static void test_acipher_speed(const char *algo, int enc, unsigned int sec,
+static void test_acipher_speed(const char *algo, int enc, unsigned int secs,
 			       struct cipher_speed_template *template,
 			       unsigned int tcount, u8 *keysize)
 {
@@ -1177,9 +1178,9 @@ static void test_acipher_speed(const char *algo, int enc, unsigned int sec,
 
 			ablkcipher_request_set_crypt(req, sg, sg, *b_size, iv);
 
-			if (sec)
+			if (secs)
 				ret = test_acipher_jiffies(req, enc,
-							   *b_size, sec);
+							   *b_size, secs);
 			else
 				ret = test_acipher_cycles(req, enc,
 							  *b_size);
diff --git a/include/crypto/aead.h b/include/crypto/aead.h
index 0edf949f6369..94b19be67574 100644
--- a/include/crypto/aead.h
+++ b/include/crypto/aead.h
@@ -75,9 +75,9 @@ static inline void aead_givcrypt_free(struct aead_givcrypt_request *req)
 
 static inline void aead_givcrypt_set_callback(
 	struct aead_givcrypt_request *req, u32 flags,
-	crypto_completion_t complete, void *data)
+	crypto_completion_t compl, void *data)
 {
-	aead_request_set_callback(&req->areq, flags, complete, data);
+	aead_request_set_callback(&req->areq, flags, compl, data);
 }
 
 static inline void aead_givcrypt_set_crypt(struct aead_givcrypt_request *req,
diff --git a/include/crypto/hash.h b/include/crypto/hash.h
index 26cb1eb16f4c..a39195539601 100644
--- a/include/crypto/hash.h
+++ b/include/crypto/hash.h
@@ -238,10 +238,10 @@ static inline struct ahash_request *ahash_request_cast(
 
 static inline void ahash_request_set_callback(struct ahash_request *req,
 					      u32 flags,
-					      crypto_completion_t complete,
+					      crypto_completion_t compl,
 					      void *data)
 {
-	req->base.complete = complete;
+	req->base.complete = compl;
 	req->base.data = data;
 	req->base.flags = flags;
 }
diff --git a/include/crypto/skcipher.h b/include/crypto/skcipher.h
index 25fd6126522d..07d245f073d1 100644
--- a/include/crypto/skcipher.h
+++ b/include/crypto/skcipher.h
@@ -86,9 +86,9 @@ static inline void skcipher_givcrypt_free(struct skcipher_givcrypt_request *req)
 
 static inline void skcipher_givcrypt_set_callback(
 	struct skcipher_givcrypt_request *req, u32 flags,
-	crypto_completion_t complete, void *data)
+	crypto_completion_t compl, void *data)
 {
-	ablkcipher_request_set_callback(&req->creq, flags, complete, data);
+	ablkcipher_request_set_callback(&req->creq, flags, compl, data);
 }
 
 static inline void skcipher_givcrypt_set_crypt(
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index b92eadf92d72..d45e949699ea 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -710,9 +710,9 @@ static inline void ablkcipher_request_free(struct ablkcipher_request *req)
 
 static inline void ablkcipher_request_set_callback(
 	struct ablkcipher_request *req,
-	u32 flags, crypto_completion_t complete, void *data)
+	u32 flags, crypto_completion_t compl, void *data)
 {
-	req->base.complete = complete;
+	req->base.complete = compl;
 	req->base.data = data;
 	req->base.flags = flags;
 }
@@ -841,10 +841,10 @@ static inline void aead_request_free(struct aead_request *req)
 
 static inline void aead_request_set_callback(struct aead_request *req,
 					     u32 flags,
-					     crypto_completion_t complete,
+					     crypto_completion_t compl,
 					     void *data)
 {
-	req->base.complete = complete;
+	req->base.complete = compl;
 	req->base.data = data;
 	req->base.flags = flags;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 9dcfee01930e6cc1e84d28c232664f0c19a1f86c Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Mon, 14 Jul 2014 10:28:04 +0200
Subject: drivers: of: add automated assignment of reserved regions to client
 devices

This patch adds code for automated assignment of reserved memory regions
to struct device. reserved_mem->ops->device_init()/device_cleanup()
callbacks are called to perform reserved memory driver specific
initialization and cleanup

Based on previous code provided by Josh Cartwright <joshc@codeaurora.org>

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Grant Likely <grant.likely@linaro.org>
---
 drivers/of/of_reserved_mem.c    | 70 +++++++++++++++++++++++++++++++++++++++++
 include/linux/of_reserved_mem.h |  7 +++++
 2 files changed, 77 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index 632aae861375..59fb12e84e6b 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -206,8 +206,16 @@ void __init fdt_init_reserved_mem(void)
 	for (i = 0; i < reserved_mem_count; i++) {
 		struct reserved_mem *rmem = &reserved_mem[i];
 		unsigned long node = rmem->fdt_node;
+		int len;
+		const __be32 *prop;
 		int err = 0;
 
+		prop = of_get_flat_dt_prop(node, "phandle", &len);
+		if (!prop)
+			prop = of_get_flat_dt_prop(node, "linux,phandle", &len);
+		if (prop)
+			rmem->phandle = of_read_number(prop, len/4);
+
 		if (rmem->size == 0)
 			err = __reserved_mem_alloc_size(node, rmem->name,
 						 &rmem->base, &rmem->size);
@@ -215,3 +223,65 @@ void __init fdt_init_reserved_mem(void)
 			__reserved_mem_init_node(rmem);
 	}
 }
+
+static inline struct reserved_mem *__find_rmem(struct device_node *node)
+{
+	unsigned int i;
+
+	if (!node->phandle)
+		return NULL;
+
+	for (i = 0; i < reserved_mem_count; i++)
+		if (reserved_mem[i].phandle == node->phandle)
+			return &reserved_mem[i];
+	return NULL;
+}
+
+/**
+ * of_reserved_mem_device_init() - assign reserved memory region to given device
+ *
+ * This function assign memory region pointed by "memory-region" device tree
+ * property to the given device.
+ */
+void of_reserved_mem_device_init(struct device *dev)
+{
+	struct reserved_mem *rmem;
+	struct device_node *np;
+
+	np = of_parse_phandle(dev->of_node, "memory-region", 0);
+	if (!np)
+		return;
+
+	rmem = __find_rmem(np);
+	of_node_put(np);
+
+	if (!rmem || !rmem->ops || !rmem->ops->device_init)
+		return;
+
+	rmem->ops->device_init(rmem, dev);
+	dev_info(dev, "assigned reserved memory node %s\n", rmem->name);
+}
+
+/**
+ * of_reserved_mem_device_release() - release reserved memory device structures
+ *
+ * This function releases structures allocated for memory region handling for
+ * the given device.
+ */
+void of_reserved_mem_device_release(struct device *dev)
+{
+	struct reserved_mem *rmem;
+	struct device_node *np;
+
+	np = of_parse_phandle(dev->of_node, "memory-region", 0);
+	if (!np)
+		return;
+
+	rmem = __find_rmem(np);
+	of_node_put(np);
+
+	if (!rmem || !rmem->ops || !rmem->ops->device_release)
+		return;
+
+	rmem->ops->device_release(rmem, dev);
+}
diff --git a/include/linux/of_reserved_mem.h b/include/linux/of_reserved_mem.h
index 4669ddfdd5af..5b5efae09135 100644
--- a/include/linux/of_reserved_mem.h
+++ b/include/linux/of_reserved_mem.h
@@ -8,6 +8,7 @@ struct reserved_mem_ops;
 struct reserved_mem {
 	const char			*name;
 	unsigned long			fdt_node;
+	unsigned long			phandle;
 	const struct reserved_mem_ops	*ops;
 	phys_addr_t			base;
 	phys_addr_t			size;
@@ -27,10 +28,16 @@ typedef int (*reservedmem_of_init_fn)(struct reserved_mem *rmem);
 	_OF_DECLARE(reservedmem, name, compat, init, reservedmem_of_init_fn)
 
 #ifdef CONFIG_OF_RESERVED_MEM
+void of_reserved_mem_device_init(struct device *dev);
+void of_reserved_mem_device_release(struct device *dev);
+
 void fdt_init_reserved_mem(void);
 void fdt_reserved_mem_save_node(unsigned long node, const char *uname,
 			       phys_addr_t base, phys_addr_t size);
 #else
+static inline void of_reserved_mem_device_init(struct device *dev) { }
+static inline void of_reserved_mem_device_release(struct device *pdev) { }
+
 static inline void fdt_init_reserved_mem(void) { }
 static inline void fdt_reserved_mem_save_node(unsigned long node,
 		const char *uname, phys_addr_t base, phys_addr_t size) { }
-- 
cgit v1.2.3-59-g8ed1b


From e630664c8383f300c4146d7613d61e5a8eb1f8e3 Mon Sep 17 00:00:00 2001
From: Matan Barak <matanb@mellanox.com>
Date: Thu, 31 Jul 2014 11:01:29 +0300
Subject: mlx4_core: Add helper functions to support MR re-registration

Add few helper functions to support a mechanism of getting an MPT,
modifying it and updating the HCA with the modified object.

The code takes 2 paths, one for directly changing the MPT (and
sometimes its related MTTs) and another one which queries the MPT and
updates the HCA via fw command SW2HW_MPT. The first path is used in
native mode; the second path is slower and is used only in SRIOV.

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/net/ethernet/mellanox/mlx4/mlx4.h          |   2 +
 drivers/net/ethernet/mellanox/mlx4/mr.c            | 160 +++++++++++++++++++++
 .../net/ethernet/mellanox/mlx4/resource_tracker.c  |  26 +++-
 include/linux/mlx4/device.h                        |  16 +++
 4 files changed, 202 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 1d8af7336807..b40d587974fa 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -279,6 +279,8 @@ struct mlx4_icm_table {
 #define MLX4_MPT_FLAG_PHYSICAL	    (1 <<  9)
 #define MLX4_MPT_FLAG_REGION	    (1 <<  8)
 
+#define MLX4_MPT_PD_MASK	    (0x1FFFFUL)
+#define MLX4_MPT_PD_VF_MASK	    (0xFE0000UL)
 #define MLX4_MPT_PD_FLAG_FAST_REG   (1 << 27)
 #define MLX4_MPT_PD_FLAG_RAE	    (1 << 28)
 #define MLX4_MPT_PD_FLAG_EN_INV	    (3 << 24)
diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c
index 2839abb878a6..7d717eccb7b0 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mr.c
@@ -298,6 +298,131 @@ static int mlx4_HW2SW_MPT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox
 			    MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
 }
 
+int mlx4_mr_hw_get_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr,
+		       struct mlx4_mpt_entry ***mpt_entry)
+{
+	int err;
+	int key = key_to_hw_index(mmr->key) & (dev->caps.num_mpts - 1);
+	struct mlx4_cmd_mailbox *mailbox = NULL;
+
+	/* Make sure that at this point we have single-threaded access only */
+
+	if (mmr->enabled != MLX4_MPT_EN_HW)
+		return -EINVAL;
+
+	err = mlx4_HW2SW_MPT(dev, NULL, key);
+
+	if (err) {
+		mlx4_warn(dev, "HW2SW_MPT failed (%d).", err);
+		mlx4_warn(dev, "Most likely the MR has MWs bound to it.\n");
+		return err;
+	}
+
+	mmr->enabled = MLX4_MPT_EN_SW;
+
+	if (!mlx4_is_mfunc(dev)) {
+		**mpt_entry = mlx4_table_find(
+				&mlx4_priv(dev)->mr_table.dmpt_table,
+				key, NULL);
+	} else {
+		mailbox = mlx4_alloc_cmd_mailbox(dev);
+		if (IS_ERR_OR_NULL(mailbox))
+			return PTR_ERR(mailbox);
+
+		err = mlx4_cmd_box(dev, 0, mailbox->dma, key,
+				   0, MLX4_CMD_QUERY_MPT,
+				   MLX4_CMD_TIME_CLASS_B,
+				   MLX4_CMD_WRAPPED);
+
+		if (err)
+			goto free_mailbox;
+
+		*mpt_entry = (struct mlx4_mpt_entry **)&mailbox->buf;
+	}
+
+	if (!(*mpt_entry) || !(**mpt_entry)) {
+		err = -ENOMEM;
+		goto free_mailbox;
+	}
+
+	return 0;
+
+free_mailbox:
+	mlx4_free_cmd_mailbox(dev, mailbox);
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_hw_get_mpt);
+
+int mlx4_mr_hw_write_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr,
+			 struct mlx4_mpt_entry **mpt_entry)
+{
+	int err;
+
+	if (!mlx4_is_mfunc(dev)) {
+		/* Make sure any changes to this entry are flushed */
+		wmb();
+
+		*(u8 *)(*mpt_entry) = MLX4_MPT_STATUS_HW;
+
+		/* Make sure the new status is written */
+		wmb();
+
+		err = mlx4_SYNC_TPT(dev);
+	} else {
+		int key = key_to_hw_index(mmr->key) & (dev->caps.num_mpts - 1);
+
+		struct mlx4_cmd_mailbox *mailbox =
+			container_of((void *)mpt_entry, struct mlx4_cmd_mailbox,
+				     buf);
+
+		err = mlx4_SW2HW_MPT(dev, mailbox, key);
+	}
+
+	mmr->pd = be32_to_cpu((*mpt_entry)->pd_flags) & MLX4_MPT_PD_MASK;
+	if (!err)
+		mmr->enabled = MLX4_MPT_EN_HW;
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_hw_write_mpt);
+
+void mlx4_mr_hw_put_mpt(struct mlx4_dev *dev,
+			struct mlx4_mpt_entry **mpt_entry)
+{
+	if (mlx4_is_mfunc(dev)) {
+		struct mlx4_cmd_mailbox *mailbox =
+			container_of((void *)mpt_entry, struct mlx4_cmd_mailbox,
+				     buf);
+		mlx4_free_cmd_mailbox(dev, mailbox);
+	}
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_hw_put_mpt);
+
+int mlx4_mr_hw_change_pd(struct mlx4_dev *dev, struct mlx4_mpt_entry *mpt_entry,
+			 u32 pdn)
+{
+	u32 pd_flags = be32_to_cpu(mpt_entry->pd_flags);
+	/* The wrapper function will put the slave's id here */
+	if (mlx4_is_mfunc(dev))
+		pd_flags &= ~MLX4_MPT_PD_VF_MASK;
+	mpt_entry->pd_flags = cpu_to_be32((pd_flags &  ~MLX4_MPT_PD_MASK) |
+					  (pdn & MLX4_MPT_PD_MASK)
+					  | MLX4_MPT_PD_FLAG_EN_INV);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_hw_change_pd);
+
+int mlx4_mr_hw_change_access(struct mlx4_dev *dev,
+			     struct mlx4_mpt_entry *mpt_entry,
+			     u32 access)
+{
+	u32 flags = (be32_to_cpu(mpt_entry->flags) & ~MLX4_PERM_MASK) |
+		    (access & MLX4_PERM_MASK);
+
+	mpt_entry->flags = cpu_to_be32(flags);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_hw_change_access);
+
 static int mlx4_mr_alloc_reserved(struct mlx4_dev *dev, u32 mridx, u32 pd,
 			   u64 iova, u64 size, u32 access, int npages,
 			   int page_shift, struct mlx4_mr *mr)
@@ -463,6 +588,41 @@ int mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr)
 }
 EXPORT_SYMBOL_GPL(mlx4_mr_free);
 
+void mlx4_mr_rereg_mem_cleanup(struct mlx4_dev *dev, struct mlx4_mr *mr)
+{
+	mlx4_mtt_cleanup(dev, &mr->mtt);
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_rereg_mem_cleanup);
+
+int mlx4_mr_rereg_mem_write(struct mlx4_dev *dev, struct mlx4_mr *mr,
+			    u64 iova, u64 size, int npages,
+			    int page_shift, struct mlx4_mpt_entry *mpt_entry)
+{
+	int err;
+
+	mpt_entry->start       = cpu_to_be64(mr->iova);
+	mpt_entry->length      = cpu_to_be64(mr->size);
+	mpt_entry->entity_size = cpu_to_be32(mr->mtt.page_shift);
+
+	err = mlx4_mtt_init(dev, npages, page_shift, &mr->mtt);
+	if (err)
+		return err;
+
+	if (mr->mtt.order < 0) {
+		mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_PHYSICAL);
+		mpt_entry->mtt_addr = 0;
+	} else {
+		mpt_entry->mtt_addr = cpu_to_be64(mlx4_mtt_addr(dev,
+						  &mr->mtt));
+		if (mr->mtt.page_shift == 0)
+			mpt_entry->mtt_sz    = cpu_to_be32(1 << mr->mtt.order);
+	}
+	mr->enabled = MLX4_MPT_EN_SW;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_rereg_mem_write);
+
 int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr)
 {
 	struct mlx4_cmd_mailbox *mailbox;
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 0efc1368e5a8..1089367fed22 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -2613,12 +2613,34 @@ int mlx4_QUERY_MPT_wrapper(struct mlx4_dev *dev, int slave,
 	if (err)
 		return err;
 
-	if (mpt->com.from_state != RES_MPT_HW) {
+	if (mpt->com.from_state == RES_MPT_MAPPED) {
+		/* In order to allow rereg in SRIOV, we need to alter the MPT entry. To do
+		 * that, the VF must read the MPT. But since the MPT entry memory is not
+		 * in the VF's virtual memory space, it must use QUERY_MPT to obtain the
+		 * entry contents. To guarantee that the MPT cannot be changed, the driver
+		 * must perform HW2SW_MPT before this query and return the MPT entry to HW
+		 * ownership fofollowing the change. The change here allows the VF to
+		 * perform QUERY_MPT also when the entry is in SW ownership.
+		 */
+		struct mlx4_mpt_entry *mpt_entry = mlx4_table_find(
+					&mlx4_priv(dev)->mr_table.dmpt_table,
+					mpt->key, NULL);
+
+		if (NULL == mpt_entry || NULL == outbox->buf) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		memcpy(outbox->buf, mpt_entry, sizeof(*mpt_entry));
+
+		err = 0;
+	} else if (mpt->com.from_state == RES_MPT_HW) {
+		err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+	} else {
 		err = -EBUSY;
 		goto out;
 	}
 
-	err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
 
 out:
 	put_res(dev, slave, id, RES_MPT);
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 35b51e7af886..bac002167ace 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -262,6 +262,7 @@ enum {
 	MLX4_PERM_REMOTE_WRITE	= 1 << 13,
 	MLX4_PERM_ATOMIC	= 1 << 14,
 	MLX4_PERM_BIND_MW	= 1 << 15,
+	MLX4_PERM_MASK		= 0xFC00
 };
 
 enum {
@@ -1243,4 +1244,19 @@ int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port);
 int mlx4_vf_get_enable_smi_admin(struct mlx4_dev *dev, int slave, int port);
 int mlx4_vf_set_enable_smi_admin(struct mlx4_dev *dev, int slave, int port,
 				 int enable);
+int mlx4_mr_hw_get_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr,
+		       struct mlx4_mpt_entry ***mpt_entry);
+int mlx4_mr_hw_write_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr,
+			 struct mlx4_mpt_entry **mpt_entry);
+int mlx4_mr_hw_change_pd(struct mlx4_dev *dev, struct mlx4_mpt_entry *mpt_entry,
+			 u32 pdn);
+int mlx4_mr_hw_change_access(struct mlx4_dev *dev,
+			     struct mlx4_mpt_entry *mpt_entry,
+			     u32 access);
+void mlx4_mr_hw_put_mpt(struct mlx4_dev *dev,
+			struct mlx4_mpt_entry **mpt_entry);
+void mlx4_mr_rereg_mem_cleanup(struct mlx4_dev *dev, struct mlx4_mr *mr);
+int mlx4_mr_rereg_mem_write(struct mlx4_dev *dev, struct mlx4_mr *mr,
+			    u64 iova, u64 size, int npages,
+			    int page_shift, struct mlx4_mpt_entry *mpt_entry);
 #endif /* MLX4_DEVICE_H */
-- 
cgit v1.2.3-59-g8ed1b


From cd83ce9e6195aa3ea15ab4db92892802c20df5d0 Mon Sep 17 00:00:00 2001
From: James P Michels III <james.p.michels@gmail.com>
Date: Sun, 27 Jul 2014 13:28:04 -0400
Subject: usb-core bInterval quirk

This patch adds a usb quirk to support devices with interupt endpoints
and bInterval values expressed as microframes. The quirk causes the
parse endpoint function to modify the reported bInterval to a standards
conforming value.

There is currently code in the endpoint parser that checks for
bIntervals that are outside of the valid range (1-16 for USB 2+ high
speed and super speed interupt endpoints). In this case, the code assumes
the bInterval is being reported in 1ms frames. As well, the correction
is only applied if the original bInterval value is out of the 1-16 range.

With this quirk applied to the device, the bInterval will be
accurately adjusted from microframes to an exponent.

Signed-off-by: James P Michels III <james.p.michels@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/config.c  | 11 +++++++++++
 drivers/usb/core/quirks.c  |  4 ++++
 include/linux/usb/quirks.h | 11 +++++++++++
 3 files changed, 26 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c
index 1ab4df1de2da..b2a540b43f97 100644
--- a/drivers/usb/core/config.c
+++ b/drivers/usb/core/config.c
@@ -199,6 +199,17 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum,
 			if (n == 0)
 				n = 9;	/* 32 ms = 2^(9-1) uframes */
 			j = 16;
+
+			/*
+			 * Adjust bInterval for quirked devices.
+			 * This quirk fixes bIntervals reported in
+			 * linear microframes.
+			 */
+			if (to_usb_device(ddev)->quirks &
+				USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL) {
+				n = clamp(fls(d->bInterval), i, j);
+				i = j = n;
+			}
 			break;
 		default:		/* USB_SPEED_FULL or _LOW */
 			/* For low-speed, 10 ms is the official minimum.
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index 2c9ba4077075..bae636e2a1a3 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -145,6 +145,10 @@ static const struct usb_device_id usb_quirk_list[] = {
 	/* SKYMEDI USB_DRIVE */
 	{ USB_DEVICE(0x1516, 0x8628), .driver_info = USB_QUIRK_RESET_RESUME },
 
+	/* Razer - Razer Blade Keyboard */
+	{ USB_DEVICE(0x1532, 0x0116), .driver_info =
+			USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL },
+
 	/* BUILDWIN Photo Frame */
 	{ USB_DEVICE(0x1908, 0x1315), .driver_info =
 			USB_QUIRK_HONOR_BNUMINTERFACES },
diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h
index 52f944dfe2fd..55a17b188daa 100644
--- a/include/linux/usb/quirks.h
+++ b/include/linux/usb/quirks.h
@@ -30,4 +30,15 @@
    descriptor */
 #define USB_QUIRK_DELAY_INIT		0x00000040
 
+/*
+ * For high speed and super speed interupt endpoints, the USB 2.0 and
+ * USB 3.0 spec require the interval in microframes
+ * (1 microframe = 125 microseconds) to be calculated as
+ * interval = 2 ^ (bInterval-1).
+ *
+ * Devices with this quirk report their bInterval as the result of this
+ * calculation instead of the exponent variable used in the calculation.
+ */
+#define USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL	0x00000080
+
 #endif /* __LINUX_USB_QUIRKS_H */
-- 
cgit v1.2.3-59-g8ed1b


From 278571baca2aecf5fb5cb5c8b002dbfa0a6c524c Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Wed, 30 Jul 2014 20:34:12 -0700
Subject: net: filter: simplify socket charging

attaching bpf program to a socket involves multiple socket memory arithmetic,
since size of 'sk_filter' is changing when classic BPF is converted to eBPF.
Also common path of program creation has to deal with two ways of freeing
the memory.

Simplify the code by delaying socket charging until program is ready and
its size is known

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h |  2 +-
 net/core/filter.c      | 87 +++++++++++++++++++++-----------------------------
 net/core/sock.c        |  9 ++++--
 3 files changed, 45 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 20dd50ef7271..00640edc166f 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -366,7 +366,7 @@ int sk_chk_filter(const struct sock_filter *filter, unsigned int flen);
 int sk_get_filter(struct sock *sk, struct sock_filter __user *filter,
 		  unsigned int len);
 
-void sk_filter_charge(struct sock *sk, struct sk_filter *fp);
+bool sk_filter_charge(struct sock *sk, struct sk_filter *fp);
 void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp);
 
 u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
diff --git a/net/core/filter.c b/net/core/filter.c
index 42c1944b0c63..5a6aeb1d40b8 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -872,41 +872,30 @@ static void sk_filter_release(struct sk_filter *fp)
 
 void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
 {
-	atomic_sub(sk_filter_size(fp->len), &sk->sk_omem_alloc);
-	sk_filter_release(fp);
-}
+	u32 filter_size = sk_filter_size(fp->len);
 
-void sk_filter_charge(struct sock *sk, struct sk_filter *fp)
-{
-	atomic_inc(&fp->refcnt);
-	atomic_add(sk_filter_size(fp->len), &sk->sk_omem_alloc);
+	atomic_sub(filter_size, &sk->sk_omem_alloc);
+	sk_filter_release(fp);
 }
 
-static struct sk_filter *__sk_migrate_realloc(struct sk_filter *fp,
-					      struct sock *sk,
-					      unsigned int len)
+/* try to charge the socket memory if there is space available
+ * return true on success
+ */
+bool sk_filter_charge(struct sock *sk, struct sk_filter *fp)
 {
-	struct sk_filter *fp_new;
-
-	if (sk == NULL)
-		return krealloc(fp, len, GFP_KERNEL);
-
-	fp_new = sock_kmalloc(sk, len, GFP_KERNEL);
-	if (fp_new) {
-		*fp_new = *fp;
-		/* As we're keeping orig_prog in fp_new along,
-		 * we need to make sure we're not evicting it
-		 * from the old fp.
-		 */
-		fp->orig_prog = NULL;
-		sk_filter_uncharge(sk, fp);
+	u32 filter_size = sk_filter_size(fp->len);
+
+	/* same check as in sock_kmalloc() */
+	if (filter_size <= sysctl_optmem_max &&
+	    atomic_read(&sk->sk_omem_alloc) + filter_size < sysctl_optmem_max) {
+		atomic_inc(&fp->refcnt);
+		atomic_add(filter_size, &sk->sk_omem_alloc);
+		return true;
 	}
-
-	return fp_new;
+	return false;
 }
 
-static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
-					     struct sock *sk)
+static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp)
 {
 	struct sock_filter *old_prog;
 	struct sk_filter *old_fp;
@@ -938,7 +927,7 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
 
 	/* Expand fp for appending the new filter representation. */
 	old_fp = fp;
-	fp = __sk_migrate_realloc(old_fp, sk, sk_filter_size(new_len));
+	fp = krealloc(old_fp, sk_filter_size(new_len), GFP_KERNEL);
 	if (!fp) {
 		/* The old_fp is still around in case we couldn't
 		 * allocate new memory, so uncharge on that one.
@@ -956,7 +945,7 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
 		/* 2nd sk_convert_filter() can fail only if it fails
 		 * to allocate memory, remapping must succeed. Note,
 		 * that at this time old_fp has already been released
-		 * by __sk_migrate_realloc().
+		 * by krealloc().
 		 */
 		goto out_err_free;
 
@@ -968,16 +957,11 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
 out_err_free:
 	kfree(old_prog);
 out_err:
-	/* Rollback filter setup. */
-	if (sk != NULL)
-		sk_filter_uncharge(sk, fp);
-	else
-		kfree(fp);
+	__sk_filter_release(fp);
 	return ERR_PTR(err);
 }
 
-static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
-					     struct sock *sk)
+static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp)
 {
 	int err;
 
@@ -986,10 +970,7 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
 
 	err = sk_chk_filter(fp->insns, fp->len);
 	if (err) {
-		if (sk != NULL)
-			sk_filter_uncharge(sk, fp);
-		else
-			kfree(fp);
+		__sk_filter_release(fp);
 		return ERR_PTR(err);
 	}
 
@@ -1002,7 +983,7 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
 	 * internal BPF translation for the optimized interpreter.
 	 */
 	if (!fp->jited)
-		fp = __sk_migrate_filter(fp, sk);
+		fp = __sk_migrate_filter(fp);
 
 	return fp;
 }
@@ -1041,10 +1022,10 @@ int sk_unattached_filter_create(struct sk_filter **pfp,
 	 */
 	fp->orig_prog = NULL;
 
-	/* __sk_prepare_filter() already takes care of uncharging
+	/* __sk_prepare_filter() already takes care of freeing
 	 * memory in case something goes wrong.
 	 */
-	fp = __sk_prepare_filter(fp, NULL);
+	fp = __sk_prepare_filter(fp);
 	if (IS_ERR(fp))
 		return PTR_ERR(fp);
 
@@ -1083,31 +1064,37 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 	if (fprog->filter == NULL)
 		return -EINVAL;
 
-	fp = sock_kmalloc(sk, sk_fsize, GFP_KERNEL);
+	fp = kmalloc(sk_fsize, GFP_KERNEL);
 	if (!fp)
 		return -ENOMEM;
 
 	if (copy_from_user(fp->insns, fprog->filter, fsize)) {
-		sock_kfree_s(sk, fp, sk_fsize);
+		kfree(fp);
 		return -EFAULT;
 	}
 
-	atomic_set(&fp->refcnt, 1);
 	fp->len = fprog->len;
 
 	err = sk_store_orig_filter(fp, fprog);
 	if (err) {
-		sk_filter_uncharge(sk, fp);
+		kfree(fp);
 		return -ENOMEM;
 	}
 
-	/* __sk_prepare_filter() already takes care of uncharging
+	/* __sk_prepare_filter() already takes care of freeing
 	 * memory in case something goes wrong.
 	 */
-	fp = __sk_prepare_filter(fp, sk);
+	fp = __sk_prepare_filter(fp);
 	if (IS_ERR(fp))
 		return PTR_ERR(fp);
 
+	atomic_set(&fp->refcnt, 0);
+
+	if (!sk_filter_charge(sk, fp)) {
+		__sk_filter_release(fp);
+		return -ENOMEM;
+	}
+
 	old_fp = rcu_dereference_protected(sk->sk_filter,
 					   sock_owned_by_user(sk));
 	rcu_assign_pointer(sk->sk_filter, fp);
diff --git a/net/core/sock.c b/net/core/sock.c
index 134291d73fcd..a741163568fa 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1474,6 +1474,7 @@ static void sk_update_clone(const struct sock *sk, struct sock *newsk)
 struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 {
 	struct sock *newsk;
+	bool is_charged = true;
 
 	newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family);
 	if (newsk != NULL) {
@@ -1518,9 +1519,13 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 
 		filter = rcu_dereference_protected(newsk->sk_filter, 1);
 		if (filter != NULL)
-			sk_filter_charge(newsk, filter);
+			/* though it's an empty new sock, the charging may fail
+			 * if sysctl_optmem_max was changed between creation of
+			 * original socket and cloning
+			 */
+			is_charged = sk_filter_charge(newsk, filter);
 
-		if (unlikely(xfrm_sk_clone_policy(newsk))) {
+		if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk))) {
 			/* It is still raw copy of parent, so invalidate
 			 * destructor and make plain sk_free() */
 			newsk->sk_destruct = NULL;
-- 
cgit v1.2.3-59-g8ed1b


From 009937e78a45553a86d26654f192b2fd9ebe289d Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Wed, 30 Jul 2014 20:34:13 -0700
Subject: net: filter: rename sk_filter_proglen -> bpf_classic_proglen

trivial rename to better match semantics of macro

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h | 3 +--
 net/core/filter.c      | 8 ++++----
 net/core/sock_diag.c   | 2 +-
 3 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 00640edc166f..3769341a745d 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -344,8 +344,7 @@ static inline unsigned int sk_filter_size(unsigned int proglen)
 		   offsetof(struct sk_filter, insns[proglen]));
 }
 
-#define sk_filter_proglen(fprog)			\
-		(fprog->len * sizeof(fprog->filter[0]))
+#define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0]))
 
 int sk_filter(struct sock *sk, struct sk_buff *skb);
 
diff --git a/net/core/filter.c b/net/core/filter.c
index 5a6aeb1d40b8..d6cb287e4f59 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -813,7 +813,7 @@ EXPORT_SYMBOL(sk_chk_filter);
 static int sk_store_orig_filter(struct sk_filter *fp,
 				const struct sock_fprog *fprog)
 {
-	unsigned int fsize = sk_filter_proglen(fprog);
+	unsigned int fsize = bpf_classic_proglen(fprog);
 	struct sock_fprog_kern *fkprog;
 
 	fp->orig_prog = kmalloc(sizeof(*fkprog), GFP_KERNEL);
@@ -1001,7 +1001,7 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp)
 int sk_unattached_filter_create(struct sk_filter **pfp,
 				struct sock_fprog_kern *fprog)
 {
-	unsigned int fsize = sk_filter_proglen(fprog);
+	unsigned int fsize = bpf_classic_proglen(fprog);
 	struct sk_filter *fp;
 
 	/* Make sure new filter is there and in the right amounts. */
@@ -1053,7 +1053,7 @@ EXPORT_SYMBOL_GPL(sk_unattached_filter_destroy);
 int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 {
 	struct sk_filter *fp, *old_fp;
-	unsigned int fsize = sk_filter_proglen(fprog);
+	unsigned int fsize = bpf_classic_proglen(fprog);
 	unsigned int sk_fsize = sk_filter_size(fprog->len);
 	int err;
 
@@ -1154,7 +1154,7 @@ int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
 		goto out;
 
 	ret = -EFAULT;
-	if (copy_to_user(ubuf, fprog->filter, sk_filter_proglen(fprog)))
+	if (copy_to_user(ubuf, fprog->filter, bpf_classic_proglen(fprog)))
 		goto out;
 
 	/* Instead of bytes, the API requests to return the number
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index a4216a4c9572..57d922320c59 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -69,7 +69,7 @@ int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk,
 		goto out;
 
 	fprog = filter->orig_prog;
-	flen = sk_filter_proglen(fprog);
+	flen = bpf_classic_proglen(fprog);
 
 	attr = nla_reserve(skb, attrtype, flen);
 	if (attr == NULL) {
-- 
cgit v1.2.3-59-g8ed1b


From 4df95ff488eb796aab9566652c250330179def17 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Wed, 30 Jul 2014 20:34:14 -0700
Subject: net: filter: rename sk_chk_filter() -> bpf_check_classic()

trivial rename to indicate that this functions performs classic BPF checking

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/filter.txt |  2 +-
 include/linux/filter.h              |  2 +-
 kernel/bpf/core.c                   |  2 +-
 kernel/seccomp.c                    |  4 ++--
 net/core/filter.c                   | 10 +++++-----
 5 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
index ee78eba78a9d..712068be8171 100644
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@@ -591,7 +591,7 @@ sk_unattached_filter_destroy() for destroying it. The macro
 SK_RUN_FILTER(filter, ctx) transparently invokes eBPF interpreter or JITed
 code to run the filter. 'filter' is a pointer to struct sk_filter that we
 got from sk_unattached_filter_create(), and 'ctx' the given context (e.g.
-skb pointer). All constraints and restrictions from sk_chk_filter() apply
+skb pointer). All constraints and restrictions from bpf_check_classic() apply
 before a conversion to the new layout is being done behind the scenes!
 
 Currently, the classic BPF format is being used for JITing on most of the
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 3769341a745d..c4d0be4c5e75 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -361,7 +361,7 @@ void sk_unattached_filter_destroy(struct sk_filter *fp);
 int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
 int sk_detach_filter(struct sock *sk);
 
-int sk_chk_filter(const struct sock_filter *filter, unsigned int flen);
+int bpf_check_classic(const struct sock_filter *filter, unsigned int flen);
 int sk_get_filter(struct sock *sk, struct sock_filter __user *filter,
 		  unsigned int len);
 
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 265a02cc822d..b479807ec383 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -18,7 +18,7 @@
  * 2 of the License, or (at your option) any later version.
  *
  * Andi Kleen - Fix a few bad bugs and races.
- * Kris Katterjohn - Added many additional checks in sk_chk_filter()
+ * Kris Katterjohn - Added many additional checks in bpf_check_classic()
  */
 #include <linux/filter.h>
 #include <linux/skbuff.h>
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 565743db5384..f4a77d23f209 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -87,7 +87,7 @@ static void populate_seccomp_data(struct seccomp_data *sd)
  *	@filter: filter to verify
  *	@flen: length of filter
  *
- * Takes a previously checked filter (by sk_chk_filter) and
+ * Takes a previously checked filter (by bpf_check_classic) and
  * redirects all filter code that loads struct sk_buff data
  * and related data through seccomp_bpf_load.  It also
  * enforces length and alignment checking of those loads.
@@ -239,7 +239,7 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
 		goto free_prog;
 
 	/* Check and rewrite the fprog via the skb checker */
-	ret = sk_chk_filter(fp, fprog->len);
+	ret = bpf_check_classic(fp, fprog->len);
 	if (ret)
 		goto free_prog;
 
diff --git a/net/core/filter.c b/net/core/filter.c
index d6cb287e4f59..5740ea08a3ad 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -18,7 +18,7 @@
  * 2 of the License, or (at your option) any later version.
  *
  * Andi Kleen - Fix a few bad bugs and races.
- * Kris Katterjohn - Added many additional checks in sk_chk_filter()
+ * Kris Katterjohn - Added many additional checks in bpf_check_classic()
  */
 
 #include <linux/module.h>
@@ -721,7 +721,7 @@ static bool chk_code_allowed(u16 code_to_probe)
 }
 
 /**
- *	sk_chk_filter - verify socket filter code
+ *	bpf_check_classic - verify socket filter code
  *	@filter: filter to verify
  *	@flen: length of filter
  *
@@ -734,7 +734,7 @@ static bool chk_code_allowed(u16 code_to_probe)
  *
  * Returns 0 if the rule set is legal or -EINVAL if not.
  */
-int sk_chk_filter(const struct sock_filter *filter, unsigned int flen)
+int bpf_check_classic(const struct sock_filter *filter, unsigned int flen)
 {
 	bool anc_found;
 	int pc;
@@ -808,7 +808,7 @@ int sk_chk_filter(const struct sock_filter *filter, unsigned int flen)
 
 	return -EINVAL;
 }
-EXPORT_SYMBOL(sk_chk_filter);
+EXPORT_SYMBOL(bpf_check_classic);
 
 static int sk_store_orig_filter(struct sk_filter *fp,
 				const struct sock_fprog *fprog)
@@ -968,7 +968,7 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp)
 	fp->bpf_func = NULL;
 	fp->jited = 0;
 
-	err = sk_chk_filter(fp->insns, fp->len);
+	err = bpf_check_classic(fp->insns, fp->len);
 	if (err) {
 		__sk_filter_release(fp);
 		return ERR_PTR(err);
-- 
cgit v1.2.3-59-g8ed1b


From 8fb575ca396bc31d9fa99c26336e2432b41d1bfc Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Wed, 30 Jul 2014 20:34:15 -0700
Subject: net: filter: rename sk_convert_filter() -> bpf_convert_filter()

to indicate that this function is converting classic BPF into eBPF
and not related to sockets

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/x86/net/bpf_jit_comp.c |  2 +-
 include/linux/filter.h      |  4 ++--
 kernel/bpf/core.c           |  2 +-
 kernel/seccomp.c            |  4 ++--
 net/core/filter.c           | 16 ++++++++--------
 5 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 71737a83f022..e2ecc1380b3d 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -235,7 +235,7 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image,
 	/* mov qword ptr [rbp-X],rbx */
 	EMIT3_off32(0x48, 0x89, 0x9D, -stacksize);
 
-	/* sk_convert_filter() maps classic BPF register X to R7 and uses R8
+	/* bpf_convert_filter() maps classic BPF register X to R7 and uses R8
 	 * as temporary, so all tcpdump filters need to spill/fill R7(r13) and
 	 * R8(r14). R9(r15) spill could be made conditional, but there is only
 	 * one 'bpf_error' return path out of helper functions inside bpf_jit.S
diff --git a/include/linux/filter.h b/include/linux/filter.h
index c4d0be4c5e75..7cb9b40e9a2f 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -351,8 +351,8 @@ int sk_filter(struct sock *sk, struct sk_buff *skb);
 void sk_filter_select_runtime(struct sk_filter *fp);
 void sk_filter_free(struct sk_filter *fp);
 
-int sk_convert_filter(struct sock_filter *prog, int len,
-		      struct bpf_insn *new_prog, int *new_len);
+int bpf_convert_filter(struct sock_filter *prog, int len,
+		       struct bpf_insn *new_prog, int *new_len);
 
 int sk_unattached_filter_create(struct sk_filter **pfp,
 				struct sock_fprog_kern *fprog);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index b479807ec383..188ac5ba3900 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -446,7 +446,7 @@ load_word:
 		/* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are
 		 * only appearing in the programs where ctx ==
 		 * skb. All programs keep 'ctx' in regs[BPF_REG_CTX]
-		 * == BPF_R6, sk_convert_filter() saves it in BPF_R6,
+		 * == BPF_R6, bpf_convert_filter() saves it in BPF_R6,
 		 * internal BPF verifier will check that BPF_R6 ==
 		 * ctx.
 		 *
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index f4a77d23f209..33a3a97e2b58 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -249,7 +249,7 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
 		goto free_prog;
 
 	/* Convert 'sock_filter' insns to 'bpf_insn' insns */
-	ret = sk_convert_filter(fp, fprog->len, NULL, &new_len);
+	ret = bpf_convert_filter(fp, fprog->len, NULL, &new_len);
 	if (ret)
 		goto free_prog;
 
@@ -265,7 +265,7 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
 	if (!filter->prog)
 		goto free_filter;
 
-	ret = sk_convert_filter(fp, fprog->len, filter->prog->insnsi, &new_len);
+	ret = bpf_convert_filter(fp, fprog->len, filter->prog->insnsi, &new_len);
 	if (ret)
 		goto free_filter_prog;
 	kfree(fp);
diff --git a/net/core/filter.c b/net/core/filter.c
index 5740ea08a3ad..6ac901613bee 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -312,7 +312,7 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 }
 
 /**
- *	sk_convert_filter - convert filter program
+ *	bpf_convert_filter - convert filter program
  *	@prog: the user passed filter program
  *	@len: the length of the user passed filter program
  *	@new_prog: buffer where converted program will be stored
@@ -322,12 +322,12 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
  * Conversion workflow:
  *
  * 1) First pass for calculating the new program length:
- *   sk_convert_filter(old_prog, old_len, NULL, &new_len)
+ *   bpf_convert_filter(old_prog, old_len, NULL, &new_len)
  *
  * 2) 2nd pass to remap in two passes: 1st pass finds new
  *    jump offsets, 2nd pass remapping:
  *   new_prog = kmalloc(sizeof(struct bpf_insn) * new_len);
- *   sk_convert_filter(old_prog, old_len, new_prog, &new_len);
+ *   bpf_convert_filter(old_prog, old_len, new_prog, &new_len);
  *
  * User BPF's register A is mapped to our BPF register 6, user BPF
  * register X is mapped to BPF register 7; frame pointer is always
@@ -335,8 +335,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
  * for socket filters: ctx == 'struct sk_buff *', for seccomp:
  * ctx == 'struct seccomp_data *'.
  */
-int sk_convert_filter(struct sock_filter *prog, int len,
-		      struct bpf_insn *new_prog, int *new_len)
+int bpf_convert_filter(struct sock_filter *prog, int len,
+		       struct bpf_insn *new_prog, int *new_len)
 {
 	int new_flen = 0, pass = 0, target, i;
 	struct bpf_insn *new_insn;
@@ -921,7 +921,7 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp)
 	}
 
 	/* 1st pass: calculate the new program length. */
-	err = sk_convert_filter(old_prog, old_len, NULL, &new_len);
+	err = bpf_convert_filter(old_prog, old_len, NULL, &new_len);
 	if (err)
 		goto out_err_free;
 
@@ -940,9 +940,9 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp)
 	fp->len = new_len;
 
 	/* 2nd pass: remap sock_filter insns into bpf_insn insns. */
-	err = sk_convert_filter(old_prog, old_len, fp->insnsi, &new_len);
+	err = bpf_convert_filter(old_prog, old_len, fp->insnsi, &new_len);
 	if (err)
-		/* 2nd sk_convert_filter() can fail only if it fails
+		/* 2nd bpf_convert_filter() can fail only if it fails
 		 * to allocate memory, remapping must succeed. Note,
 		 * that at this time old_fp has already been released
 		 * by krealloc().
-- 
cgit v1.2.3-59-g8ed1b


From 7ae457c1e5b45a1b826fad9d62b32191d2bdcfdb Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Wed, 30 Jul 2014 20:34:16 -0700
Subject: net: filter: split 'struct sk_filter' into socket and bpf parts

clean up names related to socket filtering and bpf in the following way:
- everything that deals with sockets keeps 'sk_*' prefix
- everything that is pure BPF is changed to 'bpf_*' prefix

split 'struct sk_filter' into
struct sk_filter {
	atomic_t        refcnt;
	struct rcu_head rcu;
	struct bpf_prog *prog;
};
and
struct bpf_prog {
        u32                     jited:1,
                                len:31;
        struct sock_fprog_kern  *orig_prog;
        unsigned int            (*bpf_func)(const struct sk_buff *skb,
                                            const struct bpf_insn *filter);
        union {
                struct sock_filter      insns[0];
                struct bpf_insn         insnsi[0];
                struct work_struct      work;
        };
};
so that 'struct bpf_prog' can be used independent of sockets and cleans up
'unattached' bpf use cases

split SK_RUN_FILTER macro into:
    SK_RUN_FILTER to be used with 'struct sk_filter *' and
    BPF_PROG_RUN to be used with 'struct bpf_prog *'

__sk_filter_release(struct sk_filter *) gains
__bpf_prog_release(struct bpf_prog *) helper function

also perform related renames for the functions that work
with 'struct bpf_prog *', since they're on the same lines:

sk_filter_size -> bpf_prog_size
sk_filter_select_runtime -> bpf_prog_select_runtime
sk_filter_free -> bpf_prog_free
sk_unattached_filter_create -> bpf_prog_create
sk_unattached_filter_destroy -> bpf_prog_destroy
sk_store_orig_filter -> bpf_prog_store_orig_filter
sk_release_orig_filter -> bpf_release_orig_filter
__sk_migrate_filter -> bpf_migrate_filter
__sk_prepare_filter -> bpf_prepare_filter

API for attaching classic BPF to a socket stays the same:
sk_attach_filter(prog, struct sock *)/sk_detach_filter(struct sock *)
and SK_RUN_FILTER(struct sk_filter *, ctx) to execute a program
which is used by sockets, tun, af_packet

API for 'unattached' BPF programs becomes:
bpf_prog_create(struct bpf_prog **)/bpf_prog_destroy(struct bpf_prog *)
and BPF_PROG_RUN(struct bpf_prog *, ctx) to execute a program
which is used by isdn, ppp, team, seccomp, ptp, xt_bpf, cls_bpf, test_bpf

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/filter.txt      | 10 ++--
 arch/arm/net/bpf_jit_32.c                |  8 +--
 arch/mips/net/bpf_jit.c                  |  8 +--
 arch/powerpc/net/bpf_jit_comp.c          |  8 +--
 arch/s390/net/bpf_jit_comp.c             |  4 +-
 arch/sparc/net/bpf_jit_comp.c            |  4 +-
 arch/x86/net/bpf_jit_comp.c              | 12 ++---
 drivers/isdn/i4l/isdn_ppp.c              | 26 +++++----
 drivers/net/ppp/ppp_generic.c            | 28 +++++-----
 drivers/net/team/team_mode_loadbalance.c | 14 ++---
 include/linux/filter.h                   | 40 ++++++++------
 include/linux/isdn_ppp.h                 |  4 +-
 include/uapi/linux/netfilter/xt_bpf.h    |  4 +-
 kernel/bpf/core.c                        | 30 +++++------
 kernel/seccomp.c                         | 10 ++--
 lib/test_bpf.c                           | 24 ++++-----
 net/core/filter.c                        | 92 ++++++++++++++++++--------------
 net/core/ptp_classifier.c                |  6 +--
 net/core/sock_diag.c                     |  2 +-
 net/netfilter/xt_bpf.c                   |  6 +--
 net/sched/cls_bpf.c                      | 12 ++---
 21 files changed, 183 insertions(+), 169 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
index 712068be8171..c48a9704bda8 100644
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@@ -586,11 +586,11 @@ team driver's classifier for its load-balancing mode, netfilter's xt_bpf
 extension, PTP dissector/classifier, and much more. They are all internally
 converted by the kernel into the new instruction set representation and run
 in the eBPF interpreter. For in-kernel handlers, this all works transparently
-by using sk_unattached_filter_create() for setting up the filter, resp.
-sk_unattached_filter_destroy() for destroying it. The macro
-SK_RUN_FILTER(filter, ctx) transparently invokes eBPF interpreter or JITed
-code to run the filter. 'filter' is a pointer to struct sk_filter that we
-got from sk_unattached_filter_create(), and 'ctx' the given context (e.g.
+by using bpf_prog_create() for setting up the filter, resp.
+bpf_prog_destroy() for destroying it. The macro
+BPF_PROG_RUN(filter, ctx) transparently invokes eBPF interpreter or JITed
+code to run the filter. 'filter' is a pointer to struct bpf_prog that we
+got from bpf_prog_create(), and 'ctx' the given context (e.g.
 skb pointer). All constraints and restrictions from bpf_check_classic() apply
 before a conversion to the new layout is being done behind the scenes!
 
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index fb5503ce016f..a37b989a2f91 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -56,7 +56,7 @@
 #define FLAG_NEED_X_RESET	(1 << 0)
 
 struct jit_ctx {
-	const struct sk_filter *skf;
+	const struct bpf_prog *skf;
 	unsigned idx;
 	unsigned prologue_bytes;
 	int ret0_fp_idx;
@@ -465,7 +465,7 @@ static inline void update_on_xread(struct jit_ctx *ctx)
 static int build_body(struct jit_ctx *ctx)
 {
 	void *load_func[] = {jit_get_skb_b, jit_get_skb_h, jit_get_skb_w};
-	const struct sk_filter *prog = ctx->skf;
+	const struct bpf_prog *prog = ctx->skf;
 	const struct sock_filter *inst;
 	unsigned i, load_order, off, condt;
 	int imm12;
@@ -857,7 +857,7 @@ b_epilogue:
 }
 
 
-void bpf_jit_compile(struct sk_filter *fp)
+void bpf_jit_compile(struct bpf_prog *fp)
 {
 	struct jit_ctx ctx;
 	unsigned tmp_idx;
@@ -926,7 +926,7 @@ out:
 	return;
 }
 
-void bpf_jit_free(struct sk_filter *fp)
+void bpf_jit_free(struct bpf_prog *fp)
 {
 	if (fp->jited)
 		module_free(NULL, fp->bpf_func);
diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index b87390a56a2f..05a56619ece2 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -131,7 +131,7 @@
  * @target:		Memory location for the compiled filter
  */
 struct jit_ctx {
-	const struct sk_filter *skf;
+	const struct bpf_prog *skf;
 	unsigned int prologue_bytes;
 	u32 idx;
 	u32 flags;
@@ -789,7 +789,7 @@ static int pkt_type_offset(void)
 static int build_body(struct jit_ctx *ctx)
 {
 	void *load_func[] = {jit_get_skb_b, jit_get_skb_h, jit_get_skb_w};
-	const struct sk_filter *prog = ctx->skf;
+	const struct bpf_prog *prog = ctx->skf;
 	const struct sock_filter *inst;
 	unsigned int i, off, load_order, condt;
 	u32 k, b_off __maybe_unused;
@@ -1369,7 +1369,7 @@ jmp_cmp:
 
 int bpf_jit_enable __read_mostly;
 
-void bpf_jit_compile(struct sk_filter *fp)
+void bpf_jit_compile(struct bpf_prog *fp)
 {
 	struct jit_ctx ctx;
 	unsigned int alloc_size, tmp_idx;
@@ -1423,7 +1423,7 @@ out:
 	kfree(ctx.offsets);
 }
 
-void bpf_jit_free(struct sk_filter *fp)
+void bpf_jit_free(struct bpf_prog *fp)
 {
 	if (fp->jited)
 		module_free(NULL, fp->bpf_func);
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 82e82cadcde5..3afa6f4c1957 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -25,7 +25,7 @@ static inline void bpf_flush_icache(void *start, void *end)
 	flush_icache_range((unsigned long)start, (unsigned long)end);
 }
 
-static void bpf_jit_build_prologue(struct sk_filter *fp, u32 *image,
+static void bpf_jit_build_prologue(struct bpf_prog *fp, u32 *image,
 				   struct codegen_context *ctx)
 {
 	int i;
@@ -121,7 +121,7 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
 	((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
 
 /* Assemble the body code between the prologue & epilogue. */
-static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
+static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
 			      struct codegen_context *ctx,
 			      unsigned int *addrs)
 {
@@ -569,7 +569,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 	return 0;
 }
 
-void bpf_jit_compile(struct sk_filter *fp)
+void bpf_jit_compile(struct bpf_prog *fp)
 {
 	unsigned int proglen;
 	unsigned int alloclen;
@@ -693,7 +693,7 @@ out:
 	return;
 }
 
-void bpf_jit_free(struct sk_filter *fp)
+void bpf_jit_free(struct bpf_prog *fp)
 {
 	if (fp->jited)
 		module_free(NULL, fp->bpf_func);
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index a2cbd875543a..61e45b7c04d7 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -812,7 +812,7 @@ static struct bpf_binary_header *bpf_alloc_binary(unsigned int bpfsize,
 	return header;
 }
 
-void bpf_jit_compile(struct sk_filter *fp)
+void bpf_jit_compile(struct bpf_prog *fp)
 {
 	struct bpf_binary_header *header = NULL;
 	unsigned long size, prg_len, lit_len;
@@ -875,7 +875,7 @@ out:
 	kfree(addrs);
 }
 
-void bpf_jit_free(struct sk_filter *fp)
+void bpf_jit_free(struct bpf_prog *fp)
 {
 	unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
 	struct bpf_binary_header *header = (void *)addr;
diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c
index 892a102671ad..1f76c22a6a75 100644
--- a/arch/sparc/net/bpf_jit_comp.c
+++ b/arch/sparc/net/bpf_jit_comp.c
@@ -354,7 +354,7 @@ do {	*prog++ = BR_OPC | WDISP22(OFF);		\
  * emit_jump() calls with adjusted offsets.
  */
 
-void bpf_jit_compile(struct sk_filter *fp)
+void bpf_jit_compile(struct bpf_prog *fp)
 {
 	unsigned int cleanup_addr, proglen, oldproglen = 0;
 	u32 temp[8], *prog, *func, seen = 0, pass;
@@ -808,7 +808,7 @@ out:
 	return;
 }
 
-void bpf_jit_free(struct sk_filter *fp)
+void bpf_jit_free(struct bpf_prog *fp)
 {
 	if (fp->jited)
 		module_free(NULL, fp->bpf_func);
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index e2ecc1380b3d..5c8cb8043c5a 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -211,7 +211,7 @@ struct jit_context {
 	bool seen_ld_abs;
 };
 
-static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image,
+static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 		  int oldproglen, struct jit_context *ctx)
 {
 	struct bpf_insn *insn = bpf_prog->insnsi;
@@ -841,7 +841,7 @@ common_load:		ctx->seen_ld_abs = true;
 			/* By design x64 JIT should support all BPF instructions
 			 * This error will be seen if new instruction was added
 			 * to interpreter, but not to JIT
-			 * or if there is junk in sk_filter
+			 * or if there is junk in bpf_prog
 			 */
 			pr_err("bpf_jit: unknown opcode %02x\n", insn->code);
 			return -EINVAL;
@@ -862,11 +862,11 @@ common_load:		ctx->seen_ld_abs = true;
 	return proglen;
 }
 
-void bpf_jit_compile(struct sk_filter *prog)
+void bpf_jit_compile(struct bpf_prog *prog)
 {
 }
 
-void bpf_int_jit_compile(struct sk_filter *prog)
+void bpf_int_jit_compile(struct bpf_prog *prog)
 {
 	struct bpf_binary_header *header = NULL;
 	int proglen, oldproglen = 0;
@@ -932,7 +932,7 @@ out:
 
 static void bpf_jit_free_deferred(struct work_struct *work)
 {
-	struct sk_filter *fp = container_of(work, struct sk_filter, work);
+	struct bpf_prog *fp = container_of(work, struct bpf_prog, work);
 	unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
 	struct bpf_binary_header *header = (void *)addr;
 
@@ -941,7 +941,7 @@ static void bpf_jit_free_deferred(struct work_struct *work)
 	kfree(fp);
 }
 
-void bpf_jit_free(struct sk_filter *fp)
+void bpf_jit_free(struct bpf_prog *fp)
 {
 	if (fp->jited) {
 		INIT_WORK(&fp->work, bpf_jit_free_deferred);
diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c
index 62f0688d45a5..c4198fa490bf 100644
--- a/drivers/isdn/i4l/isdn_ppp.c
+++ b/drivers/isdn/i4l/isdn_ppp.c
@@ -379,12 +379,12 @@ isdn_ppp_release(int min, struct file *file)
 #endif
 #ifdef CONFIG_IPPP_FILTER
 	if (is->pass_filter) {
-		sk_unattached_filter_destroy(is->pass_filter);
+		bpf_prog_destroy(is->pass_filter);
 		is->pass_filter = NULL;
 	}
 
 	if (is->active_filter) {
-		sk_unattached_filter_destroy(is->active_filter);
+		bpf_prog_destroy(is->active_filter);
 		is->active_filter = NULL;
 	}
 #endif
@@ -639,12 +639,11 @@ isdn_ppp_ioctl(int min, struct file *file, unsigned int cmd, unsigned long arg)
 		fprog.filter = code;
 
 		if (is->pass_filter) {
-			sk_unattached_filter_destroy(is->pass_filter);
+			bpf_prog_destroy(is->pass_filter);
 			is->pass_filter = NULL;
 		}
 		if (fprog.filter != NULL)
-			err = sk_unattached_filter_create(&is->pass_filter,
-							  &fprog);
+			err = bpf_prog_create(&is->pass_filter, &fprog);
 		else
 			err = 0;
 		kfree(code);
@@ -664,12 +663,11 @@ isdn_ppp_ioctl(int min, struct file *file, unsigned int cmd, unsigned long arg)
 		fprog.filter = code;
 
 		if (is->active_filter) {
-			sk_unattached_filter_destroy(is->active_filter);
+			bpf_prog_destroy(is->active_filter);
 			is->active_filter = NULL;
 		}
 		if (fprog.filter != NULL)
-			err = sk_unattached_filter_create(&is->active_filter,
-							  &fprog);
+			err = bpf_prog_create(&is->active_filter, &fprog);
 		else
 			err = 0;
 		kfree(code);
@@ -1174,14 +1172,14 @@ isdn_ppp_push_higher(isdn_net_dev *net_dev, isdn_net_local *lp, struct sk_buff *
 	}
 
 	if (is->pass_filter
-	    && SK_RUN_FILTER(is->pass_filter, skb) == 0) {
+	    && BPF_PROG_RUN(is->pass_filter, skb) == 0) {
 		if (is->debug & 0x2)
 			printk(KERN_DEBUG "IPPP: inbound frame filtered.\n");
 		kfree_skb(skb);
 		return;
 	}
 	if (!(is->active_filter
-	      && SK_RUN_FILTER(is->active_filter, skb) == 0)) {
+	      && BPF_PROG_RUN(is->active_filter, skb) == 0)) {
 		if (is->debug & 0x2)
 			printk(KERN_DEBUG "IPPP: link-active filter: resetting huptimer.\n");
 		lp->huptimer = 0;
@@ -1320,14 +1318,14 @@ isdn_ppp_xmit(struct sk_buff *skb, struct net_device *netdev)
 	}
 
 	if (ipt->pass_filter
-	    && SK_RUN_FILTER(ipt->pass_filter, skb) == 0) {
+	    && BPF_PROG_RUN(ipt->pass_filter, skb) == 0) {
 		if (ipt->debug & 0x4)
 			printk(KERN_DEBUG "IPPP: outbound frame filtered.\n");
 		kfree_skb(skb);
 		goto unlock;
 	}
 	if (!(ipt->active_filter
-	      && SK_RUN_FILTER(ipt->active_filter, skb) == 0)) {
+	      && BPF_PROG_RUN(ipt->active_filter, skb) == 0)) {
 		if (ipt->debug & 0x4)
 			printk(KERN_DEBUG "IPPP: link-active filter: resetting huptimer.\n");
 		lp->huptimer = 0;
@@ -1517,9 +1515,9 @@ int isdn_ppp_autodial_filter(struct sk_buff *skb, isdn_net_local *lp)
 	}
 
 	drop |= is->pass_filter
-		&& SK_RUN_FILTER(is->pass_filter, skb) == 0;
+		&& BPF_PROG_RUN(is->pass_filter, skb) == 0;
 	drop |= is->active_filter
-		&& SK_RUN_FILTER(is->active_filter, skb) == 0;
+		&& BPF_PROG_RUN(is->active_filter, skb) == 0;
 
 	skb_push(skb, IPPP_MAX_HEADER - 4);
 	return drop;
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 765248b42a0a..fa0d71727894 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -143,8 +143,8 @@ struct ppp {
 	struct sk_buff_head mrq;	/* MP: receive reconstruction queue */
 #endif /* CONFIG_PPP_MULTILINK */
 #ifdef CONFIG_PPP_FILTER
-	struct sk_filter *pass_filter;	/* filter for packets to pass */
-	struct sk_filter *active_filter;/* filter for pkts to reset idle */
+	struct bpf_prog *pass_filter;	/* filter for packets to pass */
+	struct bpf_prog *active_filter; /* filter for pkts to reset idle */
 #endif /* CONFIG_PPP_FILTER */
 	struct net	*ppp_net;	/* the net we belong to */
 	struct ppp_link_stats stats64;	/* 64 bit network stats */
@@ -762,12 +762,12 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 
 			ppp_lock(ppp);
 			if (ppp->pass_filter) {
-				sk_unattached_filter_destroy(ppp->pass_filter);
+				bpf_prog_destroy(ppp->pass_filter);
 				ppp->pass_filter = NULL;
 			}
 			if (fprog.filter != NULL)
-				err = sk_unattached_filter_create(&ppp->pass_filter,
-								  &fprog);
+				err = bpf_prog_create(&ppp->pass_filter,
+						      &fprog);
 			else
 				err = 0;
 			kfree(code);
@@ -788,12 +788,12 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 
 			ppp_lock(ppp);
 			if (ppp->active_filter) {
-				sk_unattached_filter_destroy(ppp->active_filter);
+				bpf_prog_destroy(ppp->active_filter);
 				ppp->active_filter = NULL;
 			}
 			if (fprog.filter != NULL)
-				err = sk_unattached_filter_create(&ppp->active_filter,
-								  &fprog);
+				err = bpf_prog_create(&ppp->active_filter,
+						      &fprog);
 			else
 				err = 0;
 			kfree(code);
@@ -1205,7 +1205,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb)
 		   a four-byte PPP header on each packet */
 		*skb_push(skb, 2) = 1;
 		if (ppp->pass_filter &&
-		    SK_RUN_FILTER(ppp->pass_filter, skb) == 0) {
+		    BPF_PROG_RUN(ppp->pass_filter, skb) == 0) {
 			if (ppp->debug & 1)
 				netdev_printk(KERN_DEBUG, ppp->dev,
 					      "PPP: outbound frame "
@@ -1215,7 +1215,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb)
 		}
 		/* if this packet passes the active filter, record the time */
 		if (!(ppp->active_filter &&
-		      SK_RUN_FILTER(ppp->active_filter, skb) == 0))
+		      BPF_PROG_RUN(ppp->active_filter, skb) == 0))
 			ppp->last_xmit = jiffies;
 		skb_pull(skb, 2);
 #else
@@ -1839,7 +1839,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb)
 
 			*skb_push(skb, 2) = 0;
 			if (ppp->pass_filter &&
-			    SK_RUN_FILTER(ppp->pass_filter, skb) == 0) {
+			    BPF_PROG_RUN(ppp->pass_filter, skb) == 0) {
 				if (ppp->debug & 1)
 					netdev_printk(KERN_DEBUG, ppp->dev,
 						      "PPP: inbound frame "
@@ -1848,7 +1848,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb)
 				return;
 			}
 			if (!(ppp->active_filter &&
-			      SK_RUN_FILTER(ppp->active_filter, skb) == 0))
+			      BPF_PROG_RUN(ppp->active_filter, skb) == 0))
 				ppp->last_recv = jiffies;
 			__skb_pull(skb, 2);
 		} else
@@ -2829,12 +2829,12 @@ static void ppp_destroy_interface(struct ppp *ppp)
 #endif /* CONFIG_PPP_MULTILINK */
 #ifdef CONFIG_PPP_FILTER
 	if (ppp->pass_filter) {
-		sk_unattached_filter_destroy(ppp->pass_filter);
+		bpf_prog_destroy(ppp->pass_filter);
 		ppp->pass_filter = NULL;
 	}
 
 	if (ppp->active_filter) {
-		sk_unattached_filter_destroy(ppp->active_filter);
+		bpf_prog_destroy(ppp->active_filter);
 		ppp->active_filter = NULL;
 	}
 #endif /* CONFIG_PPP_FILTER */
diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c
index d7be9b36bce6..a1536d0d83a9 100644
--- a/drivers/net/team/team_mode_loadbalance.c
+++ b/drivers/net/team/team_mode_loadbalance.c
@@ -58,7 +58,7 @@ struct lb_priv_ex {
 };
 
 struct lb_priv {
-	struct sk_filter __rcu *fp;
+	struct bpf_prog __rcu *fp;
 	lb_select_tx_port_func_t __rcu *select_tx_port_func;
 	struct lb_pcpu_stats __percpu *pcpu_stats;
 	struct lb_priv_ex *ex; /* priv extension */
@@ -174,14 +174,14 @@ static lb_select_tx_port_func_t *lb_select_tx_port_get_func(const char *name)
 static unsigned int lb_get_skb_hash(struct lb_priv *lb_priv,
 				    struct sk_buff *skb)
 {
-	struct sk_filter *fp;
+	struct bpf_prog *fp;
 	uint32_t lhash;
 	unsigned char *c;
 
 	fp = rcu_dereference_bh(lb_priv->fp);
 	if (unlikely(!fp))
 		return 0;
-	lhash = SK_RUN_FILTER(fp, skb);
+	lhash = BPF_PROG_RUN(fp, skb);
 	c = (char *) &lhash;
 	return c[0] ^ c[1] ^ c[2] ^ c[3];
 }
@@ -271,8 +271,8 @@ static void __fprog_destroy(struct sock_fprog_kern *fprog)
 static int lb_bpf_func_set(struct team *team, struct team_gsetter_ctx *ctx)
 {
 	struct lb_priv *lb_priv = get_lb_priv(team);
-	struct sk_filter *fp = NULL;
-	struct sk_filter *orig_fp = NULL;
+	struct bpf_prog *fp = NULL;
+	struct bpf_prog *orig_fp = NULL;
 	struct sock_fprog_kern *fprog = NULL;
 	int err;
 
@@ -281,7 +281,7 @@ static int lb_bpf_func_set(struct team *team, struct team_gsetter_ctx *ctx)
 				     ctx->data.bin_val.ptr);
 		if (err)
 			return err;
-		err = sk_unattached_filter_create(&fp, fprog);
+		err = bpf_prog_create(&fp, fprog);
 		if (err) {
 			__fprog_destroy(fprog);
 			return err;
@@ -300,7 +300,7 @@ static int lb_bpf_func_set(struct team *team, struct team_gsetter_ctx *ctx)
 
 	if (orig_fp) {
 		synchronize_rcu();
-		sk_unattached_filter_destroy(orig_fp);
+		bpf_prog_destroy(orig_fp);
 	}
 	return 0;
 }
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 7cb9b40e9a2f..a5227ab8ccb1 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -296,7 +296,8 @@ enum {
 })
 
 /* Macro to invoke filter function. */
-#define SK_RUN_FILTER(filter, ctx)  (*filter->bpf_func)(ctx, filter->insnsi)
+#define SK_RUN_FILTER(filter, ctx) \
+	(*filter->prog->bpf_func)(ctx, filter->prog->insnsi)
 
 struct bpf_insn {
 	__u8	code;		/* opcode */
@@ -323,12 +324,10 @@ struct sk_buff;
 struct sock;
 struct seccomp_data;
 
-struct sk_filter {
-	atomic_t		refcnt;
+struct bpf_prog {
 	u32			jited:1,	/* Is our filter JIT'ed? */
 				len:31;		/* Number of filter blocks */
 	struct sock_fprog_kern	*orig_prog;	/* Original BPF program */
-	struct rcu_head		rcu;
 	unsigned int		(*bpf_func)(const struct sk_buff *skb,
 					    const struct bpf_insn *filter);
 	union {
@@ -338,25 +337,32 @@ struct sk_filter {
 	};
 };
 
-static inline unsigned int sk_filter_size(unsigned int proglen)
+struct sk_filter {
+	atomic_t	refcnt;
+	struct rcu_head	rcu;
+	struct bpf_prog	*prog;
+};
+
+#define BPF_PROG_RUN(filter, ctx)  (*filter->bpf_func)(ctx, filter->insnsi)
+
+static inline unsigned int bpf_prog_size(unsigned int proglen)
 {
-	return max(sizeof(struct sk_filter),
-		   offsetof(struct sk_filter, insns[proglen]));
+	return max(sizeof(struct bpf_prog),
+		   offsetof(struct bpf_prog, insns[proglen]));
 }
 
 #define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0]))
 
 int sk_filter(struct sock *sk, struct sk_buff *skb);
 
-void sk_filter_select_runtime(struct sk_filter *fp);
-void sk_filter_free(struct sk_filter *fp);
+void bpf_prog_select_runtime(struct bpf_prog *fp);
+void bpf_prog_free(struct bpf_prog *fp);
 
 int bpf_convert_filter(struct sock_filter *prog, int len,
 		       struct bpf_insn *new_prog, int *new_len);
 
-int sk_unattached_filter_create(struct sk_filter **pfp,
-				struct sock_fprog_kern *fprog);
-void sk_unattached_filter_destroy(struct sk_filter *fp);
+int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog);
+void bpf_prog_destroy(struct bpf_prog *fp);
 
 int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
 int sk_detach_filter(struct sock *sk);
@@ -369,7 +375,7 @@ bool sk_filter_charge(struct sock *sk, struct sk_filter *fp);
 void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp);
 
 u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
-void bpf_int_jit_compile(struct sk_filter *fp);
+void bpf_int_jit_compile(struct bpf_prog *fp);
 
 #define BPF_ANC		BIT(15)
 
@@ -423,8 +429,8 @@ static inline void *bpf_load_pointer(const struct sk_buff *skb, int k,
 #include <linux/linkage.h>
 #include <linux/printk.h>
 
-void bpf_jit_compile(struct sk_filter *fp);
-void bpf_jit_free(struct sk_filter *fp);
+void bpf_jit_compile(struct bpf_prog *fp);
+void bpf_jit_free(struct bpf_prog *fp);
 
 static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen,
 				u32 pass, void *image)
@@ -438,11 +444,11 @@ static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen,
 #else
 #include <linux/slab.h>
 
-static inline void bpf_jit_compile(struct sk_filter *fp)
+static inline void bpf_jit_compile(struct bpf_prog *fp)
 {
 }
 
-static inline void bpf_jit_free(struct sk_filter *fp)
+static inline void bpf_jit_free(struct bpf_prog *fp)
 {
 	kfree(fp);
 }
diff --git a/include/linux/isdn_ppp.h b/include/linux/isdn_ppp.h
index 8e10f57f109f..a0070c6dfaf8 100644
--- a/include/linux/isdn_ppp.h
+++ b/include/linux/isdn_ppp.h
@@ -180,8 +180,8 @@ struct ippp_struct {
   struct slcompress *slcomp;
 #endif
 #ifdef CONFIG_IPPP_FILTER
-  struct sk_filter *pass_filter;   /* filter for packets to pass */
-  struct sk_filter *active_filter; /* filter for pkts to reset idle */
+  struct bpf_prog *pass_filter;   /* filter for packets to pass */
+  struct bpf_prog *active_filter; /* filter for pkts to reset idle */
 #endif
   unsigned long debug;
   struct isdn_ppp_compressor *compressor,*decompressor;
diff --git a/include/uapi/linux/netfilter/xt_bpf.h b/include/uapi/linux/netfilter/xt_bpf.h
index 2ec9fbcd06f9..1fad2c27ac32 100644
--- a/include/uapi/linux/netfilter/xt_bpf.h
+++ b/include/uapi/linux/netfilter/xt_bpf.h
@@ -6,14 +6,14 @@
 
 #define XT_BPF_MAX_NUM_INSTR	64
 
-struct sk_filter;
+struct bpf_prog;
 
 struct xt_bpf_info {
 	__u16 bpf_program_num_elem;
 	struct sock_filter bpf_program[XT_BPF_MAX_NUM_INSTR];
 
 	/* only used in the kernel */
-	struct sk_filter *filter __attribute__((aligned(8)));
+	struct bpf_prog *filter __attribute__((aligned(8)));
 };
 
 #endif /*_XT_BPF_H */
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 188ac5ba3900..7f0dbcbb34af 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -73,15 +73,13 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 }
 
 /**
- *	__sk_run_filter - run a filter on a given context
- *	@ctx: buffer to run the filter on
- *	@insn: filter to apply
+ *	__bpf_prog_run - run eBPF program on a given context
+ *	@ctx: is the data we are operating on
+ *	@insn: is the array of eBPF instructions
  *
- * Decode and apply filter instructions to the skb->data. Return length to
- * keep, 0 for none. @ctx is the data we are operating on, @insn is the
- * array of filter instructions.
+ * Decode and execute eBPF instructions.
  */
-static unsigned int __sk_run_filter(void *ctx, const struct bpf_insn *insn)
+static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn)
 {
 	u64 stack[MAX_BPF_STACK / sizeof(u64)];
 	u64 regs[MAX_BPF_REG], tmp;
@@ -508,29 +506,29 @@ load_byte:
 		return 0;
 }
 
-void __weak bpf_int_jit_compile(struct sk_filter *prog)
+void __weak bpf_int_jit_compile(struct bpf_prog *prog)
 {
 }
 
 /**
- *	sk_filter_select_runtime - select execution runtime for BPF program
- *	@fp: sk_filter populated with internal BPF program
+ *	bpf_prog_select_runtime - select execution runtime for BPF program
+ *	@fp: bpf_prog populated with internal BPF program
  *
  * try to JIT internal BPF program, if JIT is not available select interpreter
- * BPF program will be executed via SK_RUN_FILTER() macro
+ * BPF program will be executed via BPF_PROG_RUN() macro
  */
-void sk_filter_select_runtime(struct sk_filter *fp)
+void bpf_prog_select_runtime(struct bpf_prog *fp)
 {
-	fp->bpf_func = (void *) __sk_run_filter;
+	fp->bpf_func = (void *) __bpf_prog_run;
 
 	/* Probe if internal BPF can be JITed */
 	bpf_int_jit_compile(fp);
 }
-EXPORT_SYMBOL_GPL(sk_filter_select_runtime);
+EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
 
 /* free internal BPF program */
-void sk_filter_free(struct sk_filter *fp)
+void bpf_prog_free(struct bpf_prog *fp)
 {
 	bpf_jit_free(fp);
 }
-EXPORT_SYMBOL_GPL(sk_filter_free);
+EXPORT_SYMBOL_GPL(bpf_prog_free);
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 33a3a97e2b58..2f3fa2cc2eac 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -54,7 +54,7 @@
 struct seccomp_filter {
 	atomic_t usage;
 	struct seccomp_filter *prev;
-	struct sk_filter *prog;
+	struct bpf_prog *prog;
 };
 
 /* Limit any path through the tree to 256KB worth of instructions. */
@@ -187,7 +187,7 @@ static u32 seccomp_run_filters(int syscall)
 	 * value always takes priority (ignoring the DATA).
 	 */
 	for (f = current->seccomp.filter; f; f = f->prev) {
-		u32 cur_ret = SK_RUN_FILTER(f->prog, (void *)&sd);
+		u32 cur_ret = BPF_PROG_RUN(f->prog, (void *)&sd);
 
 		if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
 			ret = cur_ret;
@@ -260,7 +260,7 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
 	if (!filter)
 		goto free_prog;
 
-	filter->prog = kzalloc(sk_filter_size(new_len),
+	filter->prog = kzalloc(bpf_prog_size(new_len),
 			       GFP_KERNEL|__GFP_NOWARN);
 	if (!filter->prog)
 		goto free_filter;
@@ -273,7 +273,7 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
 	atomic_set(&filter->usage, 1);
 	filter->prog->len = new_len;
 
-	sk_filter_select_runtime(filter->prog);
+	bpf_prog_select_runtime(filter->prog);
 
 	/*
 	 * If there is an existing filter, make it the prev and don't drop its
@@ -337,7 +337,7 @@ void put_seccomp_filter(struct task_struct *tsk)
 	while (orig && atomic_dec_and_test(&orig->usage)) {
 		struct seccomp_filter *freeme = orig;
 		orig = orig->prev;
-		sk_filter_free(freeme->prog);
+		bpf_prog_free(freeme->prog);
 		kfree(freeme);
 	}
 }
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 5f48623ee1a7..89e0345733bd 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -1761,9 +1761,9 @@ static int probe_filter_length(struct sock_filter *fp)
 	return len + 1;
 }
 
-static struct sk_filter *generate_filter(int which, int *err)
+static struct bpf_prog *generate_filter(int which, int *err)
 {
-	struct sk_filter *fp;
+	struct bpf_prog *fp;
 	struct sock_fprog_kern fprog;
 	unsigned int flen = probe_filter_length(tests[which].u.insns);
 	__u8 test_type = tests[which].aux & TEST_TYPE_MASK;
@@ -1773,7 +1773,7 @@ static struct sk_filter *generate_filter(int which, int *err)
 		fprog.filter = tests[which].u.insns;
 		fprog.len = flen;
 
-		*err = sk_unattached_filter_create(&fp, &fprog);
+		*err = bpf_prog_create(&fp, &fprog);
 		if (tests[which].aux & FLAG_EXPECTED_FAIL) {
 			if (*err == -EINVAL) {
 				pr_cont("PASS\n");
@@ -1798,7 +1798,7 @@ static struct sk_filter *generate_filter(int which, int *err)
 		break;
 
 	case INTERNAL:
-		fp = kzalloc(sk_filter_size(flen), GFP_KERNEL);
+		fp = kzalloc(bpf_prog_size(flen), GFP_KERNEL);
 		if (fp == NULL) {
 			pr_cont("UNEXPECTED_FAIL no memory left\n");
 			*err = -ENOMEM;
@@ -1809,7 +1809,7 @@ static struct sk_filter *generate_filter(int which, int *err)
 		memcpy(fp->insnsi, tests[which].u.insns_int,
 		       fp->len * sizeof(struct bpf_insn));
 
-		sk_filter_select_runtime(fp);
+		bpf_prog_select_runtime(fp);
 		break;
 	}
 
@@ -1817,21 +1817,21 @@ static struct sk_filter *generate_filter(int which, int *err)
 	return fp;
 }
 
-static void release_filter(struct sk_filter *fp, int which)
+static void release_filter(struct bpf_prog *fp, int which)
 {
 	__u8 test_type = tests[which].aux & TEST_TYPE_MASK;
 
 	switch (test_type) {
 	case CLASSIC:
-		sk_unattached_filter_destroy(fp);
+		bpf_prog_destroy(fp);
 		break;
 	case INTERNAL:
-		sk_filter_free(fp);
+		bpf_prog_free(fp);
 		break;
 	}
 }
 
-static int __run_one(const struct sk_filter *fp, const void *data,
+static int __run_one(const struct bpf_prog *fp, const void *data,
 		     int runs, u64 *duration)
 {
 	u64 start, finish;
@@ -1840,7 +1840,7 @@ static int __run_one(const struct sk_filter *fp, const void *data,
 	start = ktime_to_us(ktime_get());
 
 	for (i = 0; i < runs; i++)
-		ret = SK_RUN_FILTER(fp, data);
+		ret = BPF_PROG_RUN(fp, data);
 
 	finish = ktime_to_us(ktime_get());
 
@@ -1850,7 +1850,7 @@ static int __run_one(const struct sk_filter *fp, const void *data,
 	return ret;
 }
 
-static int run_one(const struct sk_filter *fp, struct bpf_test *test)
+static int run_one(const struct bpf_prog *fp, struct bpf_test *test)
 {
 	int err_cnt = 0, i, runs = MAX_TESTRUNS;
 
@@ -1884,7 +1884,7 @@ static __init int test_bpf(void)
 	int i, err_cnt = 0, pass_cnt = 0;
 
 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
-		struct sk_filter *fp;
+		struct bpf_prog *fp;
 		int err;
 
 		pr_info("#%d %s ", i, tests[i].descr);
diff --git a/net/core/filter.c b/net/core/filter.c
index 6ac901613bee..d814b8a89d0f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -810,8 +810,8 @@ int bpf_check_classic(const struct sock_filter *filter, unsigned int flen)
 }
 EXPORT_SYMBOL(bpf_check_classic);
 
-static int sk_store_orig_filter(struct sk_filter *fp,
-				const struct sock_fprog *fprog)
+static int bpf_prog_store_orig_filter(struct bpf_prog *fp,
+				      const struct sock_fprog *fprog)
 {
 	unsigned int fsize = bpf_classic_proglen(fprog);
 	struct sock_fprog_kern *fkprog;
@@ -831,7 +831,7 @@ static int sk_store_orig_filter(struct sk_filter *fp,
 	return 0;
 }
 
-static void sk_release_orig_filter(struct sk_filter *fp)
+static void bpf_release_orig_filter(struct bpf_prog *fp)
 {
 	struct sock_fprog_kern *fprog = fp->orig_prog;
 
@@ -841,10 +841,16 @@ static void sk_release_orig_filter(struct sk_filter *fp)
 	}
 }
 
+static void __bpf_prog_release(struct bpf_prog *prog)
+{
+	bpf_release_orig_filter(prog);
+	bpf_prog_free(prog);
+}
+
 static void __sk_filter_release(struct sk_filter *fp)
 {
-	sk_release_orig_filter(fp);
-	sk_filter_free(fp);
+	__bpf_prog_release(fp->prog);
+	kfree(fp);
 }
 
 /**
@@ -872,7 +878,7 @@ static void sk_filter_release(struct sk_filter *fp)
 
 void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
 {
-	u32 filter_size = sk_filter_size(fp->len);
+	u32 filter_size = bpf_prog_size(fp->prog->len);
 
 	atomic_sub(filter_size, &sk->sk_omem_alloc);
 	sk_filter_release(fp);
@@ -883,7 +889,7 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
  */
 bool sk_filter_charge(struct sock *sk, struct sk_filter *fp)
 {
-	u32 filter_size = sk_filter_size(fp->len);
+	u32 filter_size = bpf_prog_size(fp->prog->len);
 
 	/* same check as in sock_kmalloc() */
 	if (filter_size <= sysctl_optmem_max &&
@@ -895,10 +901,10 @@ bool sk_filter_charge(struct sock *sk, struct sk_filter *fp)
 	return false;
 }
 
-static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp)
+static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
 {
 	struct sock_filter *old_prog;
-	struct sk_filter *old_fp;
+	struct bpf_prog *old_fp;
 	int err, new_len, old_len = fp->len;
 
 	/* We are free to overwrite insns et al right here as it
@@ -927,7 +933,7 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp)
 
 	/* Expand fp for appending the new filter representation. */
 	old_fp = fp;
-	fp = krealloc(old_fp, sk_filter_size(new_len), GFP_KERNEL);
+	fp = krealloc(old_fp, bpf_prog_size(new_len), GFP_KERNEL);
 	if (!fp) {
 		/* The old_fp is still around in case we couldn't
 		 * allocate new memory, so uncharge on that one.
@@ -949,7 +955,7 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp)
 		 */
 		goto out_err_free;
 
-	sk_filter_select_runtime(fp);
+	bpf_prog_select_runtime(fp);
 
 	kfree(old_prog);
 	return fp;
@@ -957,11 +963,11 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp)
 out_err_free:
 	kfree(old_prog);
 out_err:
-	__sk_filter_release(fp);
+	__bpf_prog_release(fp);
 	return ERR_PTR(err);
 }
 
-static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp)
+static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp)
 {
 	int err;
 
@@ -970,7 +976,7 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp)
 
 	err = bpf_check_classic(fp->insns, fp->len);
 	if (err) {
-		__sk_filter_release(fp);
+		__bpf_prog_release(fp);
 		return ERR_PTR(err);
 	}
 
@@ -983,13 +989,13 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp)
 	 * internal BPF translation for the optimized interpreter.
 	 */
 	if (!fp->jited)
-		fp = __sk_migrate_filter(fp);
+		fp = bpf_migrate_filter(fp);
 
 	return fp;
 }
 
 /**
- *	sk_unattached_filter_create - create an unattached filter
+ *	bpf_prog_create - create an unattached filter
  *	@pfp: the unattached filter that is created
  *	@fprog: the filter program
  *
@@ -998,23 +1004,21 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp)
  * If an error occurs or there is insufficient memory for the filter
  * a negative errno code is returned. On success the return is zero.
  */
-int sk_unattached_filter_create(struct sk_filter **pfp,
-				struct sock_fprog_kern *fprog)
+int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog)
 {
 	unsigned int fsize = bpf_classic_proglen(fprog);
-	struct sk_filter *fp;
+	struct bpf_prog *fp;
 
 	/* Make sure new filter is there and in the right amounts. */
 	if (fprog->filter == NULL)
 		return -EINVAL;
 
-	fp = kmalloc(sk_filter_size(fprog->len), GFP_KERNEL);
+	fp = kmalloc(bpf_prog_size(fprog->len), GFP_KERNEL);
 	if (!fp)
 		return -ENOMEM;
 
 	memcpy(fp->insns, fprog->filter, fsize);
 
-	atomic_set(&fp->refcnt, 1);
 	fp->len = fprog->len;
 	/* Since unattached filters are not copied back to user
 	 * space through sk_get_filter(), we do not need to hold
@@ -1022,23 +1026,23 @@ int sk_unattached_filter_create(struct sk_filter **pfp,
 	 */
 	fp->orig_prog = NULL;
 
-	/* __sk_prepare_filter() already takes care of freeing
+	/* bpf_prepare_filter() already takes care of freeing
 	 * memory in case something goes wrong.
 	 */
-	fp = __sk_prepare_filter(fp);
+	fp = bpf_prepare_filter(fp);
 	if (IS_ERR(fp))
 		return PTR_ERR(fp);
 
 	*pfp = fp;
 	return 0;
 }
-EXPORT_SYMBOL_GPL(sk_unattached_filter_create);
+EXPORT_SYMBOL_GPL(bpf_prog_create);
 
-void sk_unattached_filter_destroy(struct sk_filter *fp)
+void bpf_prog_destroy(struct bpf_prog *fp)
 {
-	__sk_filter_release(fp);
+	__bpf_prog_release(fp);
 }
-EXPORT_SYMBOL_GPL(sk_unattached_filter_destroy);
+EXPORT_SYMBOL_GPL(bpf_prog_destroy);
 
 /**
  *	sk_attach_filter - attach a socket filter
@@ -1054,7 +1058,8 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 {
 	struct sk_filter *fp, *old_fp;
 	unsigned int fsize = bpf_classic_proglen(fprog);
-	unsigned int sk_fsize = sk_filter_size(fprog->len);
+	unsigned int bpf_fsize = bpf_prog_size(fprog->len);
+	struct bpf_prog *prog;
 	int err;
 
 	if (sock_flag(sk, SOCK_FILTER_LOCKED))
@@ -1064,29 +1069,36 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 	if (fprog->filter == NULL)
 		return -EINVAL;
 
-	fp = kmalloc(sk_fsize, GFP_KERNEL);
-	if (!fp)
+	prog = kmalloc(bpf_fsize, GFP_KERNEL);
+	if (!prog)
 		return -ENOMEM;
 
-	if (copy_from_user(fp->insns, fprog->filter, fsize)) {
-		kfree(fp);
+	if (copy_from_user(prog->insns, fprog->filter, fsize)) {
+		kfree(prog);
 		return -EFAULT;
 	}
 
-	fp->len = fprog->len;
+	prog->len = fprog->len;
 
-	err = sk_store_orig_filter(fp, fprog);
+	err = bpf_prog_store_orig_filter(prog, fprog);
 	if (err) {
-		kfree(fp);
+		kfree(prog);
 		return -ENOMEM;
 	}
 
-	/* __sk_prepare_filter() already takes care of freeing
+	/* bpf_prepare_filter() already takes care of freeing
 	 * memory in case something goes wrong.
 	 */
-	fp = __sk_prepare_filter(fp);
-	if (IS_ERR(fp))
-		return PTR_ERR(fp);
+	prog = bpf_prepare_filter(prog);
+	if (IS_ERR(prog))
+		return PTR_ERR(prog);
+
+	fp = kmalloc(sizeof(*fp), GFP_KERNEL);
+	if (!fp) {
+		__bpf_prog_release(prog);
+		return -ENOMEM;
+	}
+	fp->prog = prog;
 
 	atomic_set(&fp->refcnt, 0);
 
@@ -1142,7 +1154,7 @@ int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
 	/* We're copying the filter that has been originally attached,
 	 * so no conversion/decode needed anymore.
 	 */
-	fprog = filter->orig_prog;
+	fprog = filter->prog->orig_prog;
 
 	ret = fprog->len;
 	if (!len)
diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c
index 12ab7b4be609..4eab4a94a59d 100644
--- a/net/core/ptp_classifier.c
+++ b/net/core/ptp_classifier.c
@@ -107,11 +107,11 @@
 #include <linux/filter.h>
 #include <linux/ptp_classify.h>
 
-static struct sk_filter *ptp_insns __read_mostly;
+static struct bpf_prog *ptp_insns __read_mostly;
 
 unsigned int ptp_classify_raw(const struct sk_buff *skb)
 {
-	return SK_RUN_FILTER(ptp_insns, skb);
+	return BPF_PROG_RUN(ptp_insns, skb);
 }
 EXPORT_SYMBOL_GPL(ptp_classify_raw);
 
@@ -189,5 +189,5 @@ void __init ptp_classifier_init(void)
 		.len = ARRAY_SIZE(ptp_filter), .filter = ptp_filter,
 	};
 
-	BUG_ON(sk_unattached_filter_create(&ptp_insns, &ptp_prog));
+	BUG_ON(bpf_prog_create(&ptp_insns, &ptp_prog));
 }
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 57d922320c59..ad704c757bb4 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -68,7 +68,7 @@ int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk,
 	if (!filter)
 		goto out;
 
-	fprog = filter->orig_prog;
+	fprog = filter->prog->orig_prog;
 	flen = bpf_classic_proglen(fprog);
 
 	attr = nla_reserve(skb, attrtype, flen);
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
index bbffdbdaf603..dffee9d47ec4 100644
--- a/net/netfilter/xt_bpf.c
+++ b/net/netfilter/xt_bpf.c
@@ -28,7 +28,7 @@ static int bpf_mt_check(const struct xt_mtchk_param *par)
 	program.len = info->bpf_program_num_elem;
 	program.filter = info->bpf_program;
 
-	if (sk_unattached_filter_create(&info->filter, &program)) {
+	if (bpf_prog_create(&info->filter, &program)) {
 		pr_info("bpf: check failed: parse error\n");
 		return -EINVAL;
 	}
@@ -40,13 +40,13 @@ static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_bpf_info *info = par->matchinfo;
 
-	return SK_RUN_FILTER(info->filter, skb);
+	return BPF_PROG_RUN(info->filter, skb);
 }
 
 static void bpf_mt_destroy(const struct xt_mtdtor_param *par)
 {
 	const struct xt_bpf_info *info = par->matchinfo;
-	sk_unattached_filter_destroy(info->filter);
+	bpf_prog_destroy(info->filter);
 }
 
 static struct xt_match bpf_mt_reg __read_mostly = {
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 13f64df2c710..0e30d58149da 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -30,7 +30,7 @@ struct cls_bpf_head {
 };
 
 struct cls_bpf_prog {
-	struct sk_filter *filter;
+	struct bpf_prog *filter;
 	struct sock_filter *bpf_ops;
 	struct tcf_exts exts;
 	struct tcf_result res;
@@ -54,7 +54,7 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 	int ret;
 
 	list_for_each_entry(prog, &head->plist, link) {
-		int filter_res = SK_RUN_FILTER(prog->filter, skb);
+		int filter_res = BPF_PROG_RUN(prog->filter, skb);
 
 		if (filter_res == 0)
 			continue;
@@ -92,7 +92,7 @@ static void cls_bpf_delete_prog(struct tcf_proto *tp, struct cls_bpf_prog *prog)
 	tcf_unbind_filter(tp, &prog->res);
 	tcf_exts_destroy(tp, &prog->exts);
 
-	sk_unattached_filter_destroy(prog->filter);
+	bpf_prog_destroy(prog->filter);
 
 	kfree(prog->bpf_ops);
 	kfree(prog);
@@ -161,7 +161,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
 	struct sock_filter *bpf_ops, *bpf_old;
 	struct tcf_exts exts;
 	struct sock_fprog_kern tmp;
-	struct sk_filter *fp, *fp_old;
+	struct bpf_prog *fp, *fp_old;
 	u16 bpf_size, bpf_len;
 	u32 classid;
 	int ret;
@@ -193,7 +193,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
 	tmp.len = bpf_len;
 	tmp.filter = bpf_ops;
 
-	ret = sk_unattached_filter_create(&fp, &tmp);
+	ret = bpf_prog_create(&fp, &tmp);
 	if (ret)
 		goto errout_free;
 
@@ -211,7 +211,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
 	tcf_exts_change(tp, &prog->exts, &exts);
 
 	if (fp_old)
-		sk_unattached_filter_destroy(fp_old);
+		bpf_prog_destroy(fp_old);
 	if (bpf_old)
 		kfree(bpf_old);
 
-- 
cgit v1.2.3-59-g8ed1b


From dbcdd4d58c7230bea3157d56d6ef77c493b3865b Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.de>
Date: Fri, 1 Aug 2014 14:01:51 +0200
Subject: cdc_subset: deal with a device that needs reset for timeout

This device needs to be reset to recover from a timeout.
Unfortunately this can be handled only at the level of
the subdrivers.

Signed-off-by: Oliver Neukum <oneukum@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_subset.c | 27 ++++++++++++++++++++++++++-
 drivers/net/usb/usbnet.c     |  8 ++++++--
 include/linux/usb/usbnet.h   |  3 +++
 3 files changed, 35 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/usb/cdc_subset.c b/drivers/net/usb/cdc_subset.c
index 91f0919fe278..6ea98cff2d3b 100644
--- a/drivers/net/usb/cdc_subset.c
+++ b/drivers/net/usb/cdc_subset.c
@@ -85,14 +85,28 @@ static int always_connected (struct usbnet *dev)
  *
  *-------------------------------------------------------------------------*/
 
+static void m5632_recover(struct usbnet *dev)
+{
+	struct usb_device	*udev = dev->udev;
+	struct usb_interface	*intf = dev->intf;
+	int r;
+
+	r = usb_lock_device_for_reset(udev, intf);
+	if (r < 0)
+		return;
+
+	usb_reset_device(udev);
+	usb_unlock_device(udev);
+}
+
 static const struct driver_info	ali_m5632_info = {
 	.description =	"ALi M5632",
 	.flags       = FLAG_POINTTOPOINT,
+	.recover     = m5632_recover,
 };
 
 #endif
 
-
 #ifdef	CONFIG_USB_AN2720
 #define	HAVE_HARDWARE
 
@@ -326,12 +340,23 @@ static const struct usb_device_id	products [] = {
 MODULE_DEVICE_TABLE(usb, products);
 
 /*-------------------------------------------------------------------------*/
+static int dummy_prereset(struct usb_interface *intf)
+{
+        return 0;
+}
+
+static int dummy_postreset(struct usb_interface *intf)
+{
+        return 0;
+}
 
 static struct usb_driver cdc_subset_driver = {
 	.name =		"cdc_subset",
 	.probe =	usbnet_probe,
 	.suspend =	usbnet_suspend,
 	.resume =	usbnet_resume,
+	.pre_reset =	dummy_prereset,
+	.post_reset =	dummy_postreset,
 	.disconnect =	usbnet_disconnect,
 	.id_table =	products,
 	.disable_hub_initiated_lpm = 1,
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index f9e96c427558..5173821a9575 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -1218,8 +1218,12 @@ void usbnet_tx_timeout (struct net_device *net)
 
 	unlink_urbs (dev, &dev->txq);
 	tasklet_schedule (&dev->bh);
-
-	// FIXME: device recovery -- reset?
+	/* this needs to be handled individually because the generic layer
+	 * doesn't know what is sufficient and could not restore private
+	 * information if a remedy of an unconditional reset were used.
+	 */
+	if (dev->driver_info->recover)
+		(dev->driver_info->recover)(dev);
 }
 EXPORT_SYMBOL_GPL(usbnet_tx_timeout);
 
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index 0662e98fef72..26088feb6608 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -148,6 +148,9 @@ struct driver_info {
 	struct sk_buff	*(*tx_fixup)(struct usbnet *dev,
 				struct sk_buff *skb, gfp_t flags);
 
+	/* recover from timeout */
+	void	(*recover)(struct usbnet *dev);
+
 	/* early initialization code, can sleep. This is for minidrivers
 	 * having 'subminidrivers' that need to do extra initialization
 	 * right after minidriver have initialized hardware. */
-- 
cgit v1.2.3-59-g8ed1b


From 7e1e77636e36075ebf118298855268468f1028e8 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Sat, 2 Aug 2014 11:47:44 +0200
Subject: lib: Resizable, Scalable, Concurrent Hash Table

Generic implementation of a resizable, scalable, concurrent hash table
based on [0]. The implementation supports both, fixed size keys specified
via an offset and length, or arbitrary keys via own hash and compare
functions.

Lookups are lockless and protected as RCU read side critical sections.
Automatic growing/shrinking based on user configurable watermarks is
available while allowing concurrent lookups to take place.

Objects to be hashed must include a struct rhash_head. The reason for not
using the existing struct hlist_head is that the expansion and shrinking
will have two buckets point to a single entry which would lead in obscure
reverse chaining behaviour.

Code includes a boot selftest if CONFIG_TEST_RHASHTABLE is defined.

[0] https://www.usenix.org/legacy/event/atc11/tech/final_files/Triplett.pdf

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Reviewed-by: Nikolay Aleksandrov <nikolay@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 213 ++++++++++++
 lib/Kconfig.debug          |   8 +
 lib/Makefile               |   2 +-
 lib/rhashtable.c           | 797 +++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 1019 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/rhashtable.h
 create mode 100644 lib/rhashtable.c

(limited to 'include/linux')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
new file mode 100644
index 000000000000..9cda293c867d
--- /dev/null
+++ b/include/linux/rhashtable.h
@@ -0,0 +1,213 @@
+/*
+ * Resizable, Scalable, Concurrent Hash Table
+ *
+ * Copyright (c) 2014 Thomas Graf <tgraf@suug.ch>
+ * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
+ *
+ * Based on the following paper by Josh Triplett, Paul E. McKenney
+ * and Jonathan Walpole:
+ * https://www.usenix.org/legacy/event/atc11/tech/final_files/Triplett.pdf
+ *
+ * Code partially derived from nft_hash
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _LINUX_RHASHTABLE_H
+#define _LINUX_RHASHTABLE_H
+
+#include <linux/rculist.h>
+
+struct rhash_head {
+	struct rhash_head		*next;
+};
+
+#define INIT_HASH_HEAD(ptr) ((ptr)->next = NULL)
+
+struct bucket_table {
+	size_t				size;
+	struct rhash_head __rcu		*buckets[];
+};
+
+typedef u32 (*rht_hashfn_t)(const void *data, u32 len, u32 seed);
+typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 seed);
+
+struct rhashtable;
+
+/**
+ * struct rhashtable_params - Hash table construction parameters
+ * @nelem_hint: Hint on number of elements, should be 75% of desired size
+ * @key_len: Length of key
+ * @key_offset: Offset of key in struct to be hashed
+ * @head_offset: Offset of rhash_head in struct to be hashed
+ * @hash_rnd: Seed to use while hashing
+ * @max_shift: Maximum number of shifts while expanding
+ * @hashfn: Function to hash key
+ * @obj_hashfn: Function to hash object
+ * @grow_decision: If defined, may return true if table should expand
+ * @shrink_decision: If defined, may return true if table should shrink
+ * @mutex_is_held: Must return true if protecting mutex is held
+ */
+struct rhashtable_params {
+	size_t			nelem_hint;
+	size_t			key_len;
+	size_t			key_offset;
+	size_t			head_offset;
+	u32			hash_rnd;
+	size_t			max_shift;
+	rht_hashfn_t		hashfn;
+	rht_obj_hashfn_t	obj_hashfn;
+	bool			(*grow_decision)(const struct rhashtable *ht,
+						 size_t new_size);
+	bool			(*shrink_decision)(const struct rhashtable *ht,
+						   size_t new_size);
+	int			(*mutex_is_held)(void);
+};
+
+/**
+ * struct rhashtable - Hash table handle
+ * @tbl: Bucket table
+ * @nelems: Number of elements in table
+ * @shift: Current size (1 << shift)
+ * @p: Configuration parameters
+ */
+struct rhashtable {
+	struct bucket_table __rcu	*tbl;
+	size_t				nelems;
+	size_t				shift;
+	struct rhashtable_params	p;
+};
+
+#ifdef CONFIG_PROVE_LOCKING
+int lockdep_rht_mutex_is_held(const struct rhashtable *ht);
+#else
+static inline int lockdep_rht_mutex_is_held(const struct rhashtable *ht)
+{
+	return 1;
+}
+#endif /* CONFIG_PROVE_LOCKING */
+
+int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params);
+
+u32 rhashtable_hashfn(const struct rhashtable *ht, const void *key, u32 len);
+u32 rhashtable_obj_hashfn(const struct rhashtable *ht, void *ptr);
+
+void rhashtable_insert(struct rhashtable *ht, struct rhash_head *node, gfp_t);
+bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *node, gfp_t);
+void rhashtable_remove_pprev(struct rhashtable *ht, struct rhash_head *obj,
+			     struct rhash_head **pprev, gfp_t flags);
+
+bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size);
+bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size);
+
+int rhashtable_expand(struct rhashtable *ht, gfp_t flags);
+int rhashtable_shrink(struct rhashtable *ht, gfp_t flags);
+
+void *rhashtable_lookup(const struct rhashtable *ht, const void *key);
+void *rhashtable_lookup_compare(const struct rhashtable *ht, u32 hash,
+				bool (*compare)(void *, void *), void *arg);
+
+void rhashtable_destroy(const struct rhashtable *ht);
+
+#define rht_dereference(p, ht) \
+	rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht))
+
+#define rht_dereference_rcu(p, ht) \
+	rcu_dereference_check(p, lockdep_rht_mutex_is_held(ht))
+
+/* Internal, use rht_obj() instead */
+#define rht_entry(ptr, type, member) container_of(ptr, type, member)
+#define rht_entry_safe(ptr, type, member) \
+({ \
+	typeof(ptr) __ptr = (ptr); \
+	   __ptr ? rht_entry(__ptr, type, member) : NULL; \
+})
+#define rht_entry_safe_rcu(ptr, type, member) \
+({ \
+	typeof(*ptr) __rcu *__ptr = (typeof(*ptr) __rcu __force *)ptr; \
+	__ptr ? container_of((typeof(ptr))rcu_dereference_raw(__ptr), type, member) : NULL; \
+})
+
+#define rht_next_entry_safe(pos, ht, member) \
+({ \
+	pos ? rht_entry_safe(rht_dereference((pos)->member.next, ht), \
+			     typeof(*(pos)), member) : NULL; \
+})
+
+/**
+ * rht_for_each - iterate over hash chain
+ * @pos:	&struct rhash_head to use as a loop cursor.
+ * @head:	head of the hash chain (struct rhash_head *)
+ * @ht:		pointer to your struct rhashtable
+ */
+#define rht_for_each(pos, head, ht) \
+	for (pos = rht_dereference(head, ht); \
+	     pos; \
+	     pos = rht_dereference((pos)->next, ht))
+
+/**
+ * rht_for_each_entry - iterate over hash chain of given type
+ * @pos:	type * to use as a loop cursor.
+ * @head:	head of the hash chain (struct rhash_head *)
+ * @ht:		pointer to your struct rhashtable
+ * @member:	name of the rhash_head within the hashable struct.
+ */
+#define rht_for_each_entry(pos, head, ht, member) \
+	for (pos = rht_entry_safe(rht_dereference(head, ht), \
+				   typeof(*(pos)), member); \
+	     pos; \
+	     pos = rht_next_entry_safe(pos, ht, member))
+
+/**
+ * rht_for_each_entry_safe - safely iterate over hash chain of given type
+ * @pos:	type * to use as a loop cursor.
+ * @n:		type * to use for temporary next object storage
+ * @head:	head of the hash chain (struct rhash_head *)
+ * @ht:		pointer to your struct rhashtable
+ * @member:	name of the rhash_head within the hashable struct.
+ *
+ * This hash chain list-traversal primitive allows for the looped code to
+ * remove the loop cursor from the list.
+ */
+#define rht_for_each_entry_safe(pos, n, head, ht, member)		\
+	for (pos = rht_entry_safe(rht_dereference(head, ht), \
+				  typeof(*(pos)), member), \
+	     n = rht_next_entry_safe(pos, ht, member); \
+	     pos; \
+	     pos = n, \
+	     n = rht_next_entry_safe(pos, ht, member))
+
+/**
+ * rht_for_each_rcu - iterate over rcu hash chain
+ * @pos:	&struct rhash_head to use as a loop cursor.
+ * @head:	head of the hash chain (struct rhash_head *)
+ * @ht:		pointer to your struct rhashtable
+ *
+ * This hash chain list-traversal primitive may safely run concurrently with
+ * the _rcu fkht mutation primitives such as rht_insert() as long as the
+ * traversal is guarded by rcu_read_lock().
+ */
+#define rht_for_each_rcu(pos, head, ht) \
+	for (pos = rht_dereference_rcu(head, ht); \
+	     pos; \
+	     pos = rht_dereference_rcu((pos)->next, ht))
+
+/**
+ * rht_for_each_entry_rcu - iterate over rcu hash chain of given type
+ * @pos:	type * to use as a loop cursor.
+ * @head:	head of the hash chain (struct rhash_head *)
+ * @member:	name of the rhash_head within the hashable struct.
+ *
+ * This hash chain list-traversal primitive may safely run concurrently with
+ * the _rcu fkht mutation primitives such as rht_insert() as long as the
+ * traversal is guarded by rcu_read_lock().
+ */
+#define rht_for_each_entry_rcu(pos, head, member) \
+	for (pos = rht_entry_safe_rcu(head, typeof(*(pos)), member); \
+	     pos; \
+	     pos = rht_entry_safe_rcu((pos)->member.next, \
+				      typeof(*(pos)), member))
+
+#endif /* _LINUX_RHASHTABLE_H */
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 7a638aa3545b..f11a2e8f6157 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1550,6 +1550,14 @@ config TEST_STRING_HELPERS
 config TEST_KSTRTOX
 	tristate "Test kstrto*() family of functions at runtime"
 
+config TEST_RHASHTABLE
+	bool "Perform selftest on resizable hash table"
+	default n
+	help
+	  Enable this option to test the rhashtable functions at boot.
+
+	  If unsure, say N.
+
 endmenu # runtime tests
 
 config PROVIDE_OHCI1394_DMA_INIT
diff --git a/lib/Makefile b/lib/Makefile
index ba967a19edba..fd248e4c05ad 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -26,7 +26,7 @@ obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
 	 gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \
 	 bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \
-	 percpu-refcount.o percpu_ida.o hash.o
+	 percpu-refcount.o percpu_ida.o hash.o rhashtable.o
 obj-y += string_helpers.o
 obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
 obj-y += kstrtox.o
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
new file mode 100644
index 000000000000..e6940cf16628
--- /dev/null
+++ b/lib/rhashtable.c
@@ -0,0 +1,797 @@
+/*
+ * Resizable, Scalable, Concurrent Hash Table
+ *
+ * Copyright (c) 2014 Thomas Graf <tgraf@suug.ch>
+ * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
+ *
+ * Based on the following paper:
+ * https://www.usenix.org/legacy/event/atc11/tech/final_files/Triplett.pdf
+ *
+ * Code partially derived from nft_hash
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/log2.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/hash.h>
+#include <linux/random.h>
+#include <linux/rhashtable.h>
+#include <linux/log2.h>
+
+#define HASH_DEFAULT_SIZE	64UL
+#define HASH_MIN_SIZE		4UL
+
+#define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT))
+
+#ifdef CONFIG_PROVE_LOCKING
+int lockdep_rht_mutex_is_held(const struct rhashtable *ht)
+{
+	return ht->p.mutex_is_held();
+}
+EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held);
+#endif
+
+/**
+ * rht_obj - cast hash head to outer object
+ * @ht:		hash table
+ * @he:		hashed node
+ */
+void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he)
+{
+	return (void *) he - ht->p.head_offset;
+}
+EXPORT_SYMBOL_GPL(rht_obj);
+
+static u32 __hashfn(const struct rhashtable *ht, const void *key,
+		      u32 len, u32 hsize)
+{
+	u32 h;
+
+	h = ht->p.hashfn(key, len, ht->p.hash_rnd);
+
+	return h & (hsize - 1);
+}
+
+/**
+ * rhashtable_hashfn - compute hash for key of given length
+ * @ht:		hash table to compuate for
+ * @key:	pointer to key
+ * @len:	length of key
+ *
+ * Computes the hash value using the hash function provided in the 'hashfn'
+ * of struct rhashtable_params. The returned value is guaranteed to be
+ * smaller than the number of buckets in the hash table.
+ */
+u32 rhashtable_hashfn(const struct rhashtable *ht, const void *key, u32 len)
+{
+	struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
+
+	return __hashfn(ht, key, len, tbl->size);
+}
+EXPORT_SYMBOL_GPL(rhashtable_hashfn);
+
+static u32 obj_hashfn(const struct rhashtable *ht, const void *ptr, u32 hsize)
+{
+	if (unlikely(!ht->p.key_len)) {
+		u32 h;
+
+		h = ht->p.obj_hashfn(ptr, ht->p.hash_rnd);
+
+		return h & (hsize - 1);
+	}
+
+	return __hashfn(ht, ptr + ht->p.key_offset, ht->p.key_len, hsize);
+}
+
+/**
+ * rhashtable_obj_hashfn - compute hash for hashed object
+ * @ht:		hash table to compuate for
+ * @ptr:	pointer to hashed object
+ *
+ * Computes the hash value using the hash function `hashfn` respectively
+ * 'obj_hashfn' depending on whether the hash table is set up to work with
+ * a fixed length key. The returned value is guaranteed to be smaller than
+ * the number of buckets in the hash table.
+ */
+u32 rhashtable_obj_hashfn(const struct rhashtable *ht, void *ptr)
+{
+	struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
+
+	return obj_hashfn(ht, ptr, tbl->size);
+}
+EXPORT_SYMBOL_GPL(rhashtable_obj_hashfn);
+
+static u32 head_hashfn(const struct rhashtable *ht,
+		       const struct rhash_head *he, u32 hsize)
+{
+	return obj_hashfn(ht, rht_obj(ht, he), hsize);
+}
+
+static struct bucket_table *bucket_table_alloc(size_t nbuckets, gfp_t flags)
+{
+	struct bucket_table *tbl;
+	size_t size;
+
+	size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
+	tbl = kzalloc(size, flags);
+	if (tbl == NULL)
+		tbl = vzalloc(size);
+
+	if (tbl == NULL)
+		return NULL;
+
+	tbl->size = nbuckets;
+
+	return tbl;
+}
+
+static void bucket_table_free(const struct bucket_table *tbl)
+{
+	kvfree(tbl);
+}
+
+/**
+ * rht_grow_above_75 - returns true if nelems > 0.75 * table-size
+ * @ht:		hash table
+ * @new_size:	new table size
+ */
+bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size)
+{
+	/* Expand table when exceeding 75% load */
+	return ht->nelems > (new_size / 4 * 3);
+}
+EXPORT_SYMBOL_GPL(rht_grow_above_75);
+
+/**
+ * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size
+ * @ht:		hash table
+ * @new_size:	new table size
+ */
+bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size)
+{
+	/* Shrink table beneath 30% load */
+	return ht->nelems < (new_size * 3 / 10);
+}
+EXPORT_SYMBOL_GPL(rht_shrink_below_30);
+
+static void hashtable_chain_unzip(const struct rhashtable *ht,
+				  const struct bucket_table *new_tbl,
+				  struct bucket_table *old_tbl, size_t n)
+{
+	struct rhash_head *he, *p, *next;
+	unsigned int h;
+
+	/* Old bucket empty, no work needed. */
+	p = rht_dereference(old_tbl->buckets[n], ht);
+	if (!p)
+		return;
+
+	/* Advance the old bucket pointer one or more times until it
+	 * reaches a node that doesn't hash to the same bucket as the
+	 * previous node p. Call the previous node p;
+	 */
+	h = head_hashfn(ht, p, new_tbl->size);
+	rht_for_each(he, p->next, ht) {
+		if (head_hashfn(ht, he, new_tbl->size) != h)
+			break;
+		p = he;
+	}
+	RCU_INIT_POINTER(old_tbl->buckets[n], p->next);
+
+	/* Find the subsequent node which does hash to the same
+	 * bucket as node P, or NULL if no such node exists.
+	 */
+	next = NULL;
+	if (he) {
+		rht_for_each(he, he->next, ht) {
+			if (head_hashfn(ht, he, new_tbl->size) == h) {
+				next = he;
+				break;
+			}
+		}
+	}
+
+	/* Set p's next pointer to that subsequent node pointer,
+	 * bypassing the nodes which do not hash to p's bucket
+	 */
+	RCU_INIT_POINTER(p->next, next);
+}
+
+/**
+ * rhashtable_expand - Expand hash table while allowing concurrent lookups
+ * @ht:		the hash table to expand
+ * @flags:	allocation flags
+ *
+ * A secondary bucket array is allocated and the hash entries are migrated
+ * while keeping them on both lists until the end of the RCU grace period.
+ *
+ * This function may only be called in a context where it is safe to call
+ * synchronize_rcu(), e.g. not within a rcu_read_lock() section.
+ *
+ * The caller must ensure that no concurrent table mutations take place.
+ * It is however valid to have concurrent lookups if they are RCU protected.
+ */
+int rhashtable_expand(struct rhashtable *ht, gfp_t flags)
+{
+	struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
+	struct rhash_head *he;
+	unsigned int i, h;
+	bool complete;
+
+	ASSERT_RHT_MUTEX(ht);
+
+	if (ht->p.max_shift && ht->shift >= ht->p.max_shift)
+		return 0;
+
+	new_tbl = bucket_table_alloc(old_tbl->size * 2, flags);
+	if (new_tbl == NULL)
+		return -ENOMEM;
+
+	ht->shift++;
+
+	/* For each new bucket, search the corresponding old bucket
+	 * for the ﬁrst entry that hashes to the new bucket, and
+	 * link the new bucket to that entry. Since all the entries
+	 * which will end up in the new bucket appear in the same
+	 * old bucket, this constructs an entirely valid new hash
+	 * table, but with multiple buckets "zipped" together into a
+	 * single imprecise chain.
+	 */
+	for (i = 0; i < new_tbl->size; i++) {
+		h = i & (old_tbl->size - 1);
+		rht_for_each(he, old_tbl->buckets[h], ht) {
+			if (head_hashfn(ht, he, new_tbl->size) == i) {
+				RCU_INIT_POINTER(new_tbl->buckets[i], he);
+				break;
+			}
+		}
+	}
+
+	/* Publish the new table pointer. Lookups may now traverse
+	 * the new table, but they will not beneﬁt from any
+	 * additional efﬁciency until later steps unzip the buckets.
+	 */
+	rcu_assign_pointer(ht->tbl, new_tbl);
+
+	/* Unzip interleaved hash chains */
+	do {
+		/* Wait for readers. All new readers will see the new
+		 * table, and thus no references to the old table will
+		 * remain.
+		 */
+		synchronize_rcu();
+
+		/* For each bucket in the old table (each of which
+		 * contains items from multiple buckets of the new
+		 * table): ...
+		 */
+		complete = true;
+		for (i = 0; i < old_tbl->size; i++) {
+			hashtable_chain_unzip(ht, new_tbl, old_tbl, i);
+			if (old_tbl->buckets[i] != NULL)
+				complete = false;
+		}
+	} while (!complete);
+
+	bucket_table_free(old_tbl);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rhashtable_expand);
+
+/**
+ * rhashtable_shrink - Shrink hash table while allowing concurrent lookups
+ * @ht:		the hash table to shrink
+ * @flags:	allocation flags
+ *
+ * This function may only be called in a context where it is safe to call
+ * synchronize_rcu(), e.g. not within a rcu_read_lock() section.
+ *
+ * The caller must ensure that no concurrent table mutations take place.
+ * It is however valid to have concurrent lookups if they are RCU protected.
+ */
+int rhashtable_shrink(struct rhashtable *ht, gfp_t flags)
+{
+	struct bucket_table *ntbl, *tbl = rht_dereference(ht->tbl, ht);
+	struct rhash_head __rcu **pprev;
+	unsigned int i;
+
+	ASSERT_RHT_MUTEX(ht);
+
+	if (tbl->size <= HASH_MIN_SIZE)
+		return 0;
+
+	ntbl = bucket_table_alloc(tbl->size / 2, flags);
+	if (ntbl == NULL)
+		return -ENOMEM;
+
+	ht->shift--;
+
+	/* Link each bucket in the new table to the ﬁrst bucket
+	 * in the old table that contains entries which will hash
+	 * to the new bucket.
+	 */
+	for (i = 0; i < ntbl->size; i++) {
+		ntbl->buckets[i] = tbl->buckets[i];
+
+		/* Link each bucket in the new table to the ﬁrst bucket
+		 * in the old table that contains entries which will hash
+		 * to the new bucket.
+		 */
+		for (pprev = &ntbl->buckets[i]; *pprev != NULL;
+		     pprev = &rht_dereference(*pprev, ht)->next)
+			;
+		RCU_INIT_POINTER(*pprev, tbl->buckets[i + ntbl->size]);
+	}
+
+	/* Publish the new, valid hash table */
+	rcu_assign_pointer(ht->tbl, ntbl);
+
+	/* Wait for readers. No new readers will have references to the
+	 * old hash table.
+	 */
+	synchronize_rcu();
+
+	bucket_table_free(tbl);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rhashtable_shrink);
+
+/**
+ * rhashtable_insert - insert object into hash hash table
+ * @ht:		hash table
+ * @obj:	pointer to hash head inside object
+ * @flags:	allocation flags (table expansion)
+ *
+ * Will automatically grow the table via rhashtable_expand() if the the
+ * grow_decision function specified at rhashtable_init() returns true.
+ *
+ * The caller must ensure that no concurrent table mutations occur. It is
+ * however valid to have concurrent lookups if they are RCU protected.
+ */
+void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj,
+		       gfp_t flags)
+{
+	struct bucket_table *tbl = rht_dereference(ht->tbl, ht);
+	u32 hash;
+
+	ASSERT_RHT_MUTEX(ht);
+
+	hash = head_hashfn(ht, obj, tbl->size);
+	RCU_INIT_POINTER(obj->next, tbl->buckets[hash]);
+	rcu_assign_pointer(tbl->buckets[hash], obj);
+	ht->nelems++;
+
+	if (ht->p.grow_decision && ht->p.grow_decision(ht, tbl->size))
+		rhashtable_expand(ht, flags);
+}
+EXPORT_SYMBOL_GPL(rhashtable_insert);
+
+/**
+ * rhashtable_remove_pprev - remove object from hash table given previous element
+ * @ht:		hash table
+ * @obj:	pointer to hash head inside object
+ * @pprev:	pointer to previous element
+ * @flags:	allocation flags (table expansion)
+ *
+ * Identical to rhashtable_remove() but caller is alreayd aware of the element
+ * in front of the element to be deleted. This is in particular useful for
+ * deletion when combined with walking or lookup.
+ */
+void rhashtable_remove_pprev(struct rhashtable *ht, struct rhash_head *obj,
+			     struct rhash_head **pprev, gfp_t flags)
+{
+	struct bucket_table *tbl = rht_dereference(ht->tbl, ht);
+
+	ASSERT_RHT_MUTEX(ht);
+
+	RCU_INIT_POINTER(*pprev, obj->next);
+	ht->nelems--;
+
+	if (ht->p.shrink_decision &&
+	    ht->p.shrink_decision(ht, tbl->size))
+		rhashtable_shrink(ht, flags);
+}
+EXPORT_SYMBOL_GPL(rhashtable_remove_pprev);
+
+/**
+ * rhashtable_remove - remove object from hash table
+ * @ht:		hash table
+ * @obj:	pointer to hash head inside object
+ * @flags:	allocation flags (table expansion)
+ *
+ * Since the hash chain is single linked, the removal operation needs to
+ * walk the bucket chain upon removal. The removal operation is thus
+ * considerable slow if the hash table is not correctly sized.
+ *
+ * Will automatically shrink the table via rhashtable_expand() if the the
+ * shrink_decision function specified at rhashtable_init() returns true.
+ *
+ * The caller must ensure that no concurrent table mutations occur. It is
+ * however valid to have concurrent lookups if they are RCU protected.
+ */
+bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj,
+		       gfp_t flags)
+{
+	struct bucket_table *tbl = rht_dereference(ht->tbl, ht);
+	struct rhash_head __rcu **pprev;
+	struct rhash_head *he;
+	u32 h;
+
+	ASSERT_RHT_MUTEX(ht);
+
+	h = head_hashfn(ht, obj, tbl->size);
+
+	pprev = &tbl->buckets[h];
+	rht_for_each(he, tbl->buckets[h], ht) {
+		if (he != obj) {
+			pprev = &he->next;
+			continue;
+		}
+
+		rhashtable_remove_pprev(ht, he, pprev, flags);
+		return true;
+	}
+
+	return false;
+}
+EXPORT_SYMBOL_GPL(rhashtable_remove);
+
+/**
+ * rhashtable_lookup - lookup key in hash table
+ * @ht:		hash table
+ * @key:	pointer to key
+ *
+ * Computes the hash value for the key and traverses the bucket chain looking
+ * for a entry with an identical key. The first matching entry is returned.
+ *
+ * This lookup function may only be used for fixed key hash table (key_len
+ * paramter set). It will BUG() if used inappropriately.
+ *
+ * Lookups may occur in parallel with hash mutations as long as the lookup is
+ * guarded by rcu_read_lock(). The caller must take care of this.
+ */
+void *rhashtable_lookup(const struct rhashtable *ht, const void *key)
+{
+	const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
+	struct rhash_head *he;
+	u32 h;
+
+	BUG_ON(!ht->p.key_len);
+
+	h = __hashfn(ht, key, ht->p.key_len, tbl->size);
+	rht_for_each_rcu(he, tbl->buckets[h], ht) {
+		if (memcmp(rht_obj(ht, he) + ht->p.key_offset, key,
+			   ht->p.key_len))
+			continue;
+		return (void *) he - ht->p.head_offset;
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(rhashtable_lookup);
+
+/**
+ * rhashtable_lookup_compare - search hash table with compare function
+ * @ht:		hash table
+ * @hash:	hash value of desired entry
+ * @compare:	compare function, must return true on match
+ * @arg:	argument passed on to compare function
+ *
+ * Traverses the bucket chain behind the provided hash value and calls the
+ * specified compare function for each entry.
+ *
+ * Lookups may occur in parallel with hash mutations as long as the lookup is
+ * guarded by rcu_read_lock(). The caller must take care of this.
+ *
+ * Returns the first entry on which the compare function returned true.
+ */
+void *rhashtable_lookup_compare(const struct rhashtable *ht, u32 hash,
+				bool (*compare)(void *, void *), void *arg)
+{
+	const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
+	struct rhash_head *he;
+
+	if (unlikely(hash >= tbl->size))
+		return NULL;
+
+	rht_for_each_rcu(he, tbl->buckets[hash], ht) {
+		if (!compare(rht_obj(ht, he), arg))
+			continue;
+		return (void *) he - ht->p.head_offset;
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(rhashtable_lookup_compare);
+
+static size_t rounded_hashtable_size(unsigned int nelem)
+{
+	return max(roundup_pow_of_two(nelem * 4 / 3), HASH_MIN_SIZE);
+}
+
+/**
+ * rhashtable_init - initialize a new hash table
+ * @ht:		hash table to be initialized
+ * @params:	configuration parameters
+ *
+ * Initializes a new hash table based on the provided configuration
+ * parameters. A table can be configured either with a variable or
+ * fixed length key:
+ *
+ * Configuration Example 1: Fixed length keys
+ * struct test_obj {
+ *	int			key;
+ *	void *			my_member;
+ *	struct rhash_head	node;
+ * };
+ *
+ * struct rhashtable_params params = {
+ *	.head_offset = offsetof(struct test_obj, node),
+ *	.key_offset = offsetof(struct test_obj, key),
+ *	.key_len = sizeof(int),
+ *	.hashfn = arch_fast_hash,
+ *	.mutex_is_held = &my_mutex_is_held,
+ * };
+ *
+ * Configuration Example 2: Variable length keys
+ * struct test_obj {
+ *	[...]
+ *	struct rhash_head	node;
+ * };
+ *
+ * u32 my_hash_fn(const void *data, u32 seed)
+ * {
+ *	struct test_obj *obj = data;
+ *
+ *	return [... hash ...];
+ * }
+ *
+ * struct rhashtable_params params = {
+ *	.head_offset = offsetof(struct test_obj, node),
+ *	.hashfn = arch_fast_hash,
+ *	.obj_hashfn = my_hash_fn,
+ *	.mutex_is_held = &my_mutex_is_held,
+ * };
+ */
+int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
+{
+	struct bucket_table *tbl;
+	size_t size;
+
+	size = HASH_DEFAULT_SIZE;
+
+	if ((params->key_len && !params->hashfn) ||
+	    (!params->key_len && !params->obj_hashfn))
+		return -EINVAL;
+
+	if (params->nelem_hint)
+		size = rounded_hashtable_size(params->nelem_hint);
+
+	tbl = bucket_table_alloc(size, GFP_KERNEL);
+	if (tbl == NULL)
+		return -ENOMEM;
+
+	memset(ht, 0, sizeof(*ht));
+	ht->shift = ilog2(tbl->size);
+	memcpy(&ht->p, params, sizeof(*params));
+	RCU_INIT_POINTER(ht->tbl, tbl);
+
+	if (!ht->p.hash_rnd)
+		get_random_bytes(&ht->p.hash_rnd, sizeof(ht->p.hash_rnd));
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rhashtable_init);
+
+/**
+ * rhashtable_destroy - destroy hash table
+ * @ht:		the hash table to destroy
+ *
+ * Frees the bucket array.
+ */
+void rhashtable_destroy(const struct rhashtable *ht)
+{
+	const struct bucket_table *tbl = rht_dereference(ht->tbl, ht);
+
+	bucket_table_free(tbl);
+}
+EXPORT_SYMBOL_GPL(rhashtable_destroy);
+
+/**************************************************************************
+ * Self Test
+ **************************************************************************/
+
+#ifdef CONFIG_TEST_RHASHTABLE
+
+#define TEST_HT_SIZE	8
+#define TEST_ENTRIES	2048
+#define TEST_PTR	((void *) 0xdeadbeef)
+#define TEST_NEXPANDS	4
+
+static int test_mutex_is_held(void)
+{
+	return 1;
+}
+
+struct test_obj {
+	void			*ptr;
+	int			value;
+	struct rhash_head	node;
+};
+
+static int __init test_rht_lookup(struct rhashtable *ht)
+{
+	unsigned int i;
+
+	for (i = 0; i < TEST_ENTRIES * 2; i++) {
+		struct test_obj *obj;
+		bool expected = !(i % 2);
+		u32 key = i;
+
+		obj = rhashtable_lookup(ht, &key);
+
+		if (expected && !obj) {
+			pr_warn("Test failed: Could not find key %u\n", key);
+			return -ENOENT;
+		} else if (!expected && obj) {
+			pr_warn("Test failed: Unexpected entry found for key %u\n",
+				key);
+			return -EEXIST;
+		} else if (expected && obj) {
+			if (obj->ptr != TEST_PTR || obj->value != i) {
+				pr_warn("Test failed: Lookup value mismatch %p!=%p, %u!=%u\n",
+					obj->ptr, TEST_PTR, obj->value, i);
+				return -EINVAL;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static void test_bucket_stats(struct rhashtable *ht,
+				     struct bucket_table *tbl,
+				     bool quiet)
+{
+	unsigned int cnt, i, total = 0;
+	struct test_obj *obj;
+
+	for (i = 0; i < tbl->size; i++) {
+		cnt = 0;
+
+		if (!quiet)
+			pr_info(" [%#4x/%zu]", i, tbl->size);
+
+		rht_for_each_entry_rcu(obj, tbl->buckets[i], node) {
+			cnt++;
+			total++;
+			if (!quiet)
+				pr_cont(" [%p],", obj);
+		}
+
+		if (!quiet)
+			pr_cont("\n  [%#x] first element: %p, chain length: %u\n",
+				i, tbl->buckets[i], cnt);
+	}
+
+	pr_info("  Traversal complete: counted=%u, nelems=%zu, entries=%d\n",
+		total, ht->nelems, TEST_ENTRIES);
+}
+
+static int __init test_rhashtable(struct rhashtable *ht)
+{
+	struct bucket_table *tbl;
+	struct test_obj *obj, *next;
+	int err;
+	unsigned int i;
+
+	/*
+	 * Insertion Test:
+	 * Insert TEST_ENTRIES into table with all keys even numbers
+	 */
+	pr_info("  Adding %d keys\n", TEST_ENTRIES);
+	for (i = 0; i < TEST_ENTRIES; i++) {
+		struct test_obj *obj;
+
+		obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+		if (!obj) {
+			err = -ENOMEM;
+			goto error;
+		}
+
+		obj->ptr = TEST_PTR;
+		obj->value = i * 2;
+
+		rhashtable_insert(ht, &obj->node, GFP_KERNEL);
+	}
+
+	rcu_read_lock();
+	tbl = rht_dereference_rcu(ht->tbl, ht);
+	test_bucket_stats(ht, tbl, true);
+	test_rht_lookup(ht);
+	rcu_read_unlock();
+
+	for (i = 0; i < TEST_NEXPANDS; i++) {
+		pr_info("  Table expansion iteration %u...\n", i);
+		rhashtable_expand(ht, GFP_KERNEL);
+
+		rcu_read_lock();
+		pr_info("  Verifying lookups...\n");
+		test_rht_lookup(ht);
+		rcu_read_unlock();
+	}
+
+	for (i = 0; i < TEST_NEXPANDS; i++) {
+		pr_info("  Table shrinkage iteration %u...\n", i);
+		rhashtable_shrink(ht, GFP_KERNEL);
+
+		rcu_read_lock();
+		pr_info("  Verifying lookups...\n");
+		test_rht_lookup(ht);
+		rcu_read_unlock();
+	}
+
+	pr_info("  Deleting %d keys\n", TEST_ENTRIES);
+	for (i = 0; i < TEST_ENTRIES; i++) {
+		u32 key = i * 2;
+
+		obj = rhashtable_lookup(ht, &key);
+		BUG_ON(!obj);
+
+		rhashtable_remove(ht, &obj->node, GFP_KERNEL);
+		kfree(obj);
+	}
+
+	return 0;
+
+error:
+	tbl = rht_dereference_rcu(ht->tbl, ht);
+	for (i = 0; i < tbl->size; i++)
+		rht_for_each_entry_safe(obj, next, tbl->buckets[i], ht, node)
+			kfree(obj);
+
+	return err;
+}
+
+static int __init test_rht_init(void)
+{
+	struct rhashtable ht;
+	struct rhashtable_params params = {
+		.nelem_hint = TEST_HT_SIZE,
+		.head_offset = offsetof(struct test_obj, node),
+		.key_offset = offsetof(struct test_obj, value),
+		.key_len = sizeof(int),
+		.hashfn = arch_fast_hash,
+		.mutex_is_held = &test_mutex_is_held,
+		.grow_decision = rht_grow_above_75,
+		.shrink_decision = rht_shrink_below_30,
+	};
+	int err;
+
+	pr_info("Running resizable hashtable tests...\n");
+
+	err = rhashtable_init(&ht, &params);
+	if (err < 0) {
+		pr_warn("Test failed: Unable to initialize hashtable: %d\n",
+			err);
+		return err;
+	}
+
+	err = test_rhashtable(&ht);
+
+	rhashtable_destroy(&ht);
+
+	return err;
+}
+
+subsys_initcall(test_rht_init);
+
+#endif /* CONFIG_TEST_RHASHTABLE */
-- 
cgit v1.2.3-59-g8ed1b


From a3b255717fed1cad0dd4ed5be77114d32ef22a6d Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@primarydata.com>
Date: Wed, 16 Jul 2014 06:52:18 -0400
Subject: sunrpc: remove __rcu annotation from struct gss_cl_ctx->gc_gss_ctx

Commit 5b22216e11f7 (nfs: __rcu annotations) added a __rcu annotation to
the gc_gss_ctx field. I see no rationale for adding that though, as that
field does not seem to be managed via RCU at all.

Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/sunrpc/auth_gss.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h
index cbc6875fb9cf..36eebc451b41 100644
--- a/include/linux/sunrpc/auth_gss.h
+++ b/include/linux/sunrpc/auth_gss.h
@@ -69,7 +69,7 @@ struct gss_cl_ctx {
 	enum rpc_gss_proc	gc_proc;
 	u32			gc_seq;
 	spinlock_t		gc_seq_lock;
-	struct gss_ctx __rcu	*gc_gss_ctx;
+	struct gss_ctx		*gc_gss_ctx;
 	struct xdr_netobj	gc_wire_ctx;
 	struct xdr_netobj	gc_acceptor;
 	u32			gc_win;
-- 
cgit v1.2.3-59-g8ed1b


From ec25422c669d38f4e8a83da7f77950094349de48 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@primarydata.com>
Date: Wed, 16 Jul 2014 06:52:22 -0400
Subject: sunrpc: remove "ec" argument from encrypt_v2 operation

It's always 0.

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/sunrpc/gss_krb5.h       | 4 ++--
 net/sunrpc/auth_gss/gss_krb5_crypto.c | 9 ++-------
 net/sunrpc/auth_gss/gss_krb5_wrap.c   | 2 +-
 3 files changed, 5 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h
index 5af2931cf58d..df02a4188487 100644
--- a/include/linux/sunrpc/gss_krb5.h
+++ b/include/linux/sunrpc/gss_krb5.h
@@ -81,7 +81,7 @@ struct gss_krb5_enctype {
 		       struct xdr_netobj *in,
 		       struct xdr_netobj *out);	/* complete key generation */
 	u32 (*encrypt_v2) (struct krb5_ctx *kctx, u32 offset,
-			   struct xdr_buf *buf, int ec,
+			   struct xdr_buf *buf,
 			   struct page **pages); /* v2 encryption function */
 	u32 (*decrypt_v2) (struct krb5_ctx *kctx, u32 offset,
 			   struct xdr_buf *buf, u32 *headskip,
@@ -310,7 +310,7 @@ gss_krb5_aes_make_key(const struct gss_krb5_enctype *gk5e,
 
 u32
 gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
-		     struct xdr_buf *buf, int ec,
+		     struct xdr_buf *buf,
 		     struct page **pages);
 
 u32
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index 0f43e894bc0a..f5ed9f6ece06 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -641,7 +641,7 @@ out:
 
 u32
 gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
-		     struct xdr_buf *buf, int ec, struct page **pages)
+		     struct xdr_buf *buf, struct page **pages)
 {
 	u32 err;
 	struct xdr_netobj hmac;
@@ -684,13 +684,8 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
 		ecptr = buf->tail[0].iov_base;
 	}
 
-	memset(ecptr, 'X', ec);
-	buf->tail[0].iov_len += ec;
-	buf->len += ec;
-
 	/* copy plaintext gss token header after filler (if any) */
-	memcpy(ecptr + ec, buf->head[0].iov_base + offset,
-						GSS_KRB5_TOK_HDR_LEN);
+	memcpy(ecptr, buf->head[0].iov_base + offset, GSS_KRB5_TOK_HDR_LEN);
 	buf->tail[0].iov_len += GSS_KRB5_TOK_HDR_LEN;
 	buf->len += GSS_KRB5_TOK_HDR_LEN;
 
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 88cd24aacddc..4b614c604fe0 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -483,7 +483,7 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset,
 	*be64ptr = cpu_to_be64(kctx->seq_send64++);
 	spin_unlock(&krb5_seq_lock);
 
-	err = (*kctx->gk5e->encrypt_v2)(kctx, offset, buf, 0, pages);
+	err = (*kctx->gk5e->encrypt_v2)(kctx, offset, buf, pages);
 	if (err)
 		return err;
 
-- 
cgit v1.2.3-59-g8ed1b


From e7029206ff43f6cf7d6fcb741adb126f47200516 Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Thu, 17 Jul 2014 20:42:15 -0400
Subject: nfs: check wait_on_bit_lock err in page_group_lock

Return errors from wait_on_bit_lock from nfs_page_group_lock.

Add a bool argument @wait to nfs_page_group_lock. If true, loop over
wait_on_bit_lock until it returns cleanly. If false, return the error
from wait_on_bit_lock.

Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/pagelist.c        | 29 +++++++++++++++++++++++------
 fs/nfs/write.c           |  6 ++++--
 include/linux/nfs_page.h |  2 +-
 3 files changed, 28 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index e76a40e298f2..9425118e91d7 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -147,17 +147,25 @@ static int nfs_wait_bit_uninterruptible(void *word)
  * @req - request in group that is to be locked
  *
  * this lock must be held if modifying the page group list
+ *
+ * returns result from wait_on_bit_lock: 0 on success, < 0 on error
  */
-void
-nfs_page_group_lock(struct nfs_page *req)
+int
+nfs_page_group_lock(struct nfs_page *req, bool wait)
 {
 	struct nfs_page *head = req->wb_head;
+	int ret;
 
 	WARN_ON_ONCE(head != head->wb_head);
 
-	wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,
+	do {
+		ret = wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,
 			nfs_wait_bit_uninterruptible,
 			TASK_UNINTERRUPTIBLE);
+	} while (wait && ret != 0);
+
+	WARN_ON_ONCE(ret > 0);
+	return ret;
 }
 
 /*
@@ -218,7 +226,7 @@ bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit)
 {
 	bool ret;
 
-	nfs_page_group_lock(req);
+	nfs_page_group_lock(req, true);
 	ret = nfs_page_group_sync_on_bit_locked(req, bit);
 	nfs_page_group_unlock(req);
 
@@ -858,8 +866,13 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
 	struct nfs_page *subreq;
 	unsigned int bytes_left = 0;
 	unsigned int offset, pgbase;
+	int ret;
 
-	nfs_page_group_lock(req);
+	ret = nfs_page_group_lock(req, false);
+	if (ret < 0) {
+		desc->pg_error = ret;
+		return 0;
+	}
 
 	subreq = req;
 	bytes_left = subreq->wb_bytes;
@@ -881,7 +894,11 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
 			if (desc->pg_recoalesce)
 				return 0;
 			/* retry add_request for this subreq */
-			nfs_page_group_lock(req);
+			ret = nfs_page_group_lock(req, false);
+			if (ret < 0) {
+				desc->pg_error = ret;
+				return 0;
+			}
 			continue;
 		}
 
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index d357728ed8ba..8d1ed2b9c16c 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -216,7 +216,7 @@ static bool nfs_page_group_covers_page(struct nfs_page *req)
 	unsigned int pos = 0;
 	unsigned int len = nfs_page_length(req->wb_page);
 
-	nfs_page_group_lock(req);
+	nfs_page_group_lock(req, true);
 
 	do {
 		tmp = nfs_page_group_search_locked(req->wb_head, pos);
@@ -456,7 +456,9 @@ try_again:
 	}
 
 	/* lock each request in the page group */
-	nfs_page_group_lock(head);
+	ret = nfs_page_group_lock(head, false);
+	if (ret < 0)
+		return ERR_PTR(ret);
 	subreq = head;
 	do {
 		/*
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 4b48548e700e..291924ca9517 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -122,7 +122,7 @@ extern size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
 extern  int nfs_wait_on_request(struct nfs_page *);
 extern	void nfs_unlock_request(struct nfs_page *req);
 extern	void nfs_unlock_and_release_request(struct nfs_page *);
-extern void nfs_page_group_lock(struct nfs_page *);
+extern int nfs_page_group_lock(struct nfs_page *, bool);
 extern void nfs_page_group_unlock(struct nfs_page *);
 extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int);
 
-- 
cgit v1.2.3-59-g8ed1b


From b412ddf0661e11485876a202c48868143e3a01cf Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Thu, 17 Jul 2014 20:42:16 -0400
Subject: nfs: fix comment and add warn_on for PG_INODE_REF

Fix the comment in nfs_page.h for PG_INODE_REF to reflect that it's no longer
set only on head requests. Also add a WARN_ON_ONCE in nfs_inode_remove_request
as PG_INODE_REF should always be set.

Suggested-by: Peng Tao <tao.peng@primarydata.com>
Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/write.c           | 2 ++
 include/linux/nfs_page.h | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 8d1ed2b9c16c..e6bc5b51f325 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -707,6 +707,8 @@ static void nfs_inode_remove_request(struct nfs_page *req)
 
 	if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags))
 		nfs_release_request(req);
+	else
+		WARN_ON_ONCE(1);
 }
 
 static void
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 291924ca9517..6ad2bbcad405 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -26,7 +26,7 @@ enum {
 	PG_MAPPED,		/* page private set for buffered io */
 	PG_CLEAN,		/* write succeeded */
 	PG_COMMIT_TO_DS,	/* used by pnfs layouts */
-	PG_INODE_REF,		/* extra ref held by inode (head req only) */
+	PG_INODE_REF,		/* extra ref held by inode when in writeback */
 	PG_HEADLOCK,		/* page group lock of wb_head */
 	PG_TEARDOWN,		/* page group sync for destroy */
 	PG_UNLOCKPAGE,		/* page group sync bit in read path */
-- 
cgit v1.2.3-59-g8ed1b


From bd95608053b7f7813351b0defc0e3e7ef8cf2803 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 14 Jul 2014 11:28:20 +1000
Subject: sunrpc/auth: allow lockless (rcu) lookup of credential cache.

The new flag RPCAUTH_LOOKUP_RCU to credential lookup avoids locking,
does not take a reference on the returned credential, and returns
-ECHILD if a simple lookup was not possible.

The returned value can only be used within an rcu_read_lock protected
region.

The main user of this is the new rpc_lookup_cred_nonblock() which
returns a pointer to the current credential which is only rcu-safe (no
ref-count held), and might return -ECHILD if allocation was required.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/sunrpc/auth.h |  2 ++
 net/sunrpc/auth.c           | 17 +++++++++++++++--
 net/sunrpc/auth_generic.c   |  6 ++++++
 net/sunrpc/auth_null.c      |  2 ++
 4 files changed, 25 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index c683b9a06913..8e030075fe79 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -103,6 +103,7 @@ struct rpc_auth_create_args {
 
 /* Flags for rpcauth_lookupcred() */
 #define RPCAUTH_LOOKUP_NEW		0x01	/* Accept an uninitialised cred */
+#define RPCAUTH_LOOKUP_RCU		0x02	/* lock-less lookup */
 
 /*
  * Client authentication ops
@@ -154,6 +155,7 @@ void			rpc_destroy_generic_auth(void);
 void 			rpc_destroy_authunix(void);
 
 struct rpc_cred *	rpc_lookup_cred(void);
+struct rpc_cred *	rpc_lookup_cred_nonblock(void);
 struct rpc_cred *	rpc_lookup_machine_cred(const char *service_name);
 int			rpcauth_register(const struct rpc_authops *);
 int			rpcauth_unregister(const struct rpc_authops *);
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 360decdddc78..24fcbd23ae6c 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -557,6 +557,12 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
 	hlist_for_each_entry_rcu(entry, &cache->hashtable[nr], cr_hash) {
 		if (!entry->cr_ops->crmatch(acred, entry, flags))
 			continue;
+		if (flags & RPCAUTH_LOOKUP_RCU) {
+			if (test_bit(RPCAUTH_CRED_HASHED, &entry->cr_flags) &&
+			    !test_bit(RPCAUTH_CRED_NEW, &entry->cr_flags))
+				cred = entry;
+			break;
+		}
 		spin_lock(&cache->lock);
 		if (test_bit(RPCAUTH_CRED_HASHED, &entry->cr_flags) == 0) {
 			spin_unlock(&cache->lock);
@@ -571,6 +577,9 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
 	if (cred != NULL)
 		goto found;
 
+	if (flags & RPCAUTH_LOOKUP_RCU)
+		return ERR_PTR(-ECHILD);
+
 	new = auth->au_ops->crcreate(auth, acred, flags);
 	if (IS_ERR(new)) {
 		cred = new;
@@ -621,10 +630,14 @@ rpcauth_lookupcred(struct rpc_auth *auth, int flags)
 	memset(&acred, 0, sizeof(acred));
 	acred.uid = cred->fsuid;
 	acred.gid = cred->fsgid;
-	acred.group_info = get_group_info(((struct cred *)cred)->group_info);
+	if (flags & RPCAUTH_LOOKUP_RCU)
+		acred.group_info = rcu_dereference(cred->group_info);
+	else
+		acred.group_info = get_group_info(((struct cred *)cred)->group_info);
 
 	ret = auth->au_ops->lookup_cred(auth, &acred, flags);
-	put_group_info(acred.group_info);
+	if (!(flags & RPCAUTH_LOOKUP_RCU))
+		put_group_info(acred.group_info);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(rpcauth_lookupcred);
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index ed04869b2d4f..6f6b829c9e8e 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -38,6 +38,12 @@ struct rpc_cred *rpc_lookup_cred(void)
 }
 EXPORT_SYMBOL_GPL(rpc_lookup_cred);
 
+struct rpc_cred *rpc_lookup_cred_nonblock(void)
+{
+	return rpcauth_lookupcred(&generic_auth, RPCAUTH_LOOKUP_RCU);
+}
+EXPORT_SYMBOL_GPL(rpc_lookup_cred_nonblock);
+
 /*
  * Public call interface for looking up machine creds.
  */
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index f0ebe07978a2..712c123e04e9 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -35,6 +35,8 @@ nul_destroy(struct rpc_auth *auth)
 static struct rpc_cred *
 nul_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
 {
+	if (flags & RPCAUTH_LOOKUP_RCU)
+		return &null_cred;
 	return get_rpccred(&null_cred);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 912a108da767ae75cc929d2854e698aff527ec5d Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 14 Jul 2014 11:28:20 +1000
Subject: NFS: teach nfs_neg_need_reval to understand LOOKUP_RCU

This requires nfs_check_verifier to take an rcu_walk flag, and requires
an rcu version of nfs_revalidate_inode which returns -ECHILD rather
than making an RPC call.

With this, nfs_lookup_revalidate can call nfs_neg_need_reval in
RCU-walk mode.

We can also move the LOOKUP_RCU check past the nfs_check_verifier()
call in nfs_lookup_revalidate.

If RCU_WALK prevents nfs_check_verifier or nfs_neg_need_reval from
doing a full check, they return a status indicating that a revalidation
is required.  As this revalidation will not be possible in RCU_WALK
mode, -ECHILD will ultimately be returned, which is the desired result.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/dir.c           | 59 +++++++++++++++++++++++++++++++++++---------------
 fs/nfs/inode.c         |  9 ++++++++
 include/linux/nfs_fs.h |  1 +
 3 files changed, 52 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 8a3c36984fc4..dcd4fe5831d6 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -988,9 +988,13 @@ EXPORT_SYMBOL_GPL(nfs_force_lookup_revalidate);
  * A check for whether or not the parent directory has changed.
  * In the case it has, we assume that the dentries are untrustworthy
  * and may need to be looked up again.
+ * If rcu_walk prevents us from performing a full check, return 0.
  */
-static int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
+static int nfs_check_verifier(struct inode *dir, struct dentry *dentry,
+			      int rcu_walk)
 {
+	int ret;
+
 	if (IS_ROOT(dentry))
 		return 1;
 	if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONE)
@@ -998,7 +1002,11 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
 	if (!nfs_verify_change_attribute(dir, dentry->d_time))
 		return 0;
 	/* Revalidate nfsi->cache_change_attribute before we declare a match */
-	if (nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0)
+	if (rcu_walk)
+		ret = nfs_revalidate_inode_rcu(NFS_SERVER(dir), dir);
+	else
+		ret = nfs_revalidate_inode(NFS_SERVER(dir), dir);
+	if (ret < 0)
 		return 0;
 	if (!nfs_verify_change_attribute(dir, dentry->d_time))
 		return 0;
@@ -1054,6 +1062,9 @@ out_force:
  *
  * If parent mtime has changed, we revalidate, else we wait for a
  * period corresponding to the parent's attribute cache timeout value.
+ *
+ * If LOOKUP_RCU prevents us from performing a full check, return 1
+ * suggesting a reval is needed.
  */
 static inline
 int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
@@ -1064,7 +1075,7 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
 		return 0;
 	if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG)
 		return 1;
-	return !nfs_check_verifier(dir, dentry);
+	return !nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU);
 }
 
 /*
@@ -1101,11 +1112,11 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
 	inode = dentry->d_inode;
 
 	if (!inode) {
-		if (flags & LOOKUP_RCU)
-			return -ECHILD;
-
-		if (nfs_neg_need_reval(dir, dentry, flags))
+		if (nfs_neg_need_reval(dir, dentry, flags)) {
+			if (flags & LOOKUP_RCU)
+				return -ECHILD;
 			goto out_bad;
+		}
 		goto out_valid_noent;
 	}
 
@@ -1120,16 +1131,21 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
 	if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
 		goto out_set_verifier;
 
-	if (flags & LOOKUP_RCU)
-		return -ECHILD;
-
 	/* Force a full look up iff the parent directory has changed */
-	if (!nfs_is_exclusive_create(dir, flags) && nfs_check_verifier(dir, dentry)) {
+	if (!nfs_is_exclusive_create(dir, flags) &&
+	    nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) {
+
+		if (flags & LOOKUP_RCU)
+			return -ECHILD;
+
 		if (nfs_lookup_verify_inode(inode, flags))
 			goto out_zap_parent;
 		goto out_valid;
 	}
 
+	if (flags & LOOKUP_RCU)
+		return -ECHILD;
+
 	if (NFS_STALE(inode))
 		goto out_bad;
 
@@ -1566,14 +1582,23 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
 		struct dentry *parent;
 		struct inode *dir;
 
-		if (flags & LOOKUP_RCU)
-			return -ECHILD;
-
-		parent = dget_parent(dentry);
-		dir = parent->d_inode;
+		if (flags & LOOKUP_RCU) {
+			parent = rcu_dereference(dentry);
+			dir = ACCESS_ONCE(parent->d_inode);
+			if (!dir)
+				return -ECHILD;
+		} else {
+			parent = dget_parent(dentry);
+			dir = parent->d_inode;
+		}
 		if (!nfs_neg_need_reval(dir, dentry, flags))
 			ret = 1;
-		dput(parent);
+		else if (flags & LOOKUP_RCU)
+			ret = -ECHILD;
+		if (!(flags & LOOKUP_RCU))
+			dput(parent);
+		else if (parent != rcu_dereference(dentry))
+			return -ECHILD;
 		goto out;
 	}
 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 9927913c97c2..147fd17e7920 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1002,6 +1002,15 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 }
 EXPORT_SYMBOL_GPL(nfs_revalidate_inode);
 
+int nfs_revalidate_inode_rcu(struct nfs_server *server, struct inode *inode)
+{
+	if (!(NFS_I(inode)->cache_validity &
+			(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL))
+			&& !nfs_attribute_cache_expired(inode))
+		return NFS_STALE(inode) ? -ESTALE : 0;
+	return -ECHILD;
+}
+
 static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index e30f6059ecd6..60cd9e377926 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -352,6 +352,7 @@ extern int nfs_release(struct inode *, struct file *);
 extern int nfs_attribute_timeout(struct inode *inode);
 extern int nfs_attribute_cache_expired(struct inode *inode);
 extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode);
+extern int nfs_revalidate_inode_rcu(struct nfs_server *server, struct inode *inode);
 extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *);
 extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping);
 extern int nfs_setattr(struct dentry *, struct iattr *);
-- 
cgit v1.2.3-59-g8ed1b


From f682a398b2e24ae0a775ddf37cced83b897198ee Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 14 Jul 2014 11:28:20 +1000
Subject: NFS: allow lockless access to access_cache

The access cache is used during RCU-walk path lookups, so it is best
to avoid locking if possible as taking a lock kills concurrency.

The rbtree is not rcu-safe and cannot easily be made so.
Instead we simply check the last (i.e. most recent) entry on the LRU
list.  If this doesn't match, then we return -ECHILD and retry in
lock/refcount mode.

This requires freeing the nfs_access_entry struct with rcu, and
requires using rcu access primatives when adding entries to the lru, and
when examining the last entry.

Calling put_rpccred before kfree_rcu looks a bit odd, but as
put_rpccred already provides rcu protection, we know that the cred will
not actually be freed until the next grace period, so any concurrent
access will be safe.

This patch provides about 5% performance improvement on a stat-heavy
synthetic work load with 4 threads on a 2-core CPU.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/dir.c           | 43 +++++++++++++++++++++++++++++++++++++++++--
 include/linux/nfs_fs.h |  1 +
 2 files changed, 42 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 2bfbde0f7176..1b5f38f48dab 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2079,7 +2079,7 @@ MODULE_PARM_DESC(nfs_access_max_cachesize, "NFS access maximum total cache lengt
 static void nfs_access_free_entry(struct nfs_access_entry *entry)
 {
 	put_rpccred(entry->cred);
-	kfree(entry);
+	kfree_rcu(entry, rcu_head);
 	smp_mb__before_atomic();
 	atomic_long_dec(&nfs_access_nr_entries);
 	smp_mb__after_atomic();
@@ -2257,6 +2257,38 @@ out_zap:
 	return -ENOENT;
 }
 
+static int nfs_access_get_cached_rcu(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res)
+{
+	/* Only check the most recently returned cache entry,
+	 * but do it without locking.
+	 */
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct nfs_access_entry *cache;
+	int err = -ECHILD;
+	struct list_head *lh;
+
+	rcu_read_lock();
+	if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
+		goto out;
+	lh = rcu_dereference(nfsi->access_cache_entry_lru.prev);
+	cache = list_entry(lh, struct nfs_access_entry, lru);
+	if (lh == &nfsi->access_cache_entry_lru ||
+	    cred != cache->cred)
+		cache = NULL;
+	if (cache == NULL)
+		goto out;
+	if (!nfs_have_delegated_attributes(inode) &&
+	    !time_in_range_open(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo))
+		goto out;
+	res->jiffies = cache->jiffies;
+	res->cred = cache->cred;
+	res->mask = cache->mask;
+	err = 0;
+out:
+	rcu_read_unlock();
+	return err;
+}
+
 static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
@@ -2300,6 +2332,11 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
 	cache->cred = get_rpccred(set->cred);
 	cache->mask = set->mask;
 
+	/* The above field assignments must be visible
+	 * before this item appears on the lru.  We cannot easily
+	 * use rcu_assign_pointer, so just force the memory barrier.
+	 */
+	smp_wmb();
 	nfs_access_add_rbtree(inode, cache);
 
 	/* Update accounting */
@@ -2339,7 +2376,9 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
 
 	trace_nfs_access_enter(inode);
 
-	status = nfs_access_get_cached(inode, cred, &cache);
+	status = nfs_access_get_cached_rcu(inode, cred, &cache);
+	if (status != 0)
+		status = nfs_access_get_cached(inode, cred, &cache);
 	if (status == 0)
 		goto out_cached;
 
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 60cd9e377926..5180a7ededec 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -52,6 +52,7 @@ struct nfs_access_entry {
 	unsigned long		jiffies;
 	struct rpc_cred *	cred;
 	int			mask;
+	struct rcu_head		rcu_head;
 };
 
 struct nfs_lockowner {
-- 
cgit v1.2.3-59-g8ed1b


From 961e3beae3b29ae9463631415342244cdaf1cd47 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Tue, 10 Jun 2014 10:25:00 +0200
Subject: drm/tegra: Make job submission 64-bit safe

Job submission currently relies on the fact that struct drm_tegra_reloc
and struct host1x_reloc are the same size and uses a simple call to the
copy_from_user() function to copy them to kernel space. This causes the
handle to be stored in the buffer object field, which then needs a cast
to a 32 bit integer to resolve it to a proper buffer object pointer and
store it back in the buffer object field.

On 64-bit architectures that will no longer work, since pointers are 64
bits wide whereas handles will remain 32 bits. This causes the sizes of
both structures to because different and copying will no longer work.

Fix this by adding a new function, host1x_reloc_get_user(), that copies
the structures field by field.

While at it, use substructures for the command and target buffers in
struct host1x_reloc for better readability. Also use unsized types to
make it more obvious that this isn't part of userspace ABI.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/drm.c | 62 ++++++++++++++++++++++++++++++++-------------
 drivers/gpu/host1x/job.c    | 22 ++++++++--------
 include/linux/host1x.h      | 15 ++++++-----
 3 files changed, 64 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 5cba5e736130..59736bb810cd 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -132,6 +132,45 @@ host1x_bo_lookup(struct drm_device *drm, struct drm_file *file, u32 handle)
 	return &bo->base;
 }
 
+static int host1x_reloc_copy_from_user(struct host1x_reloc *dest,
+				       struct drm_tegra_reloc __user *src,
+				       struct drm_device *drm,
+				       struct drm_file *file)
+{
+	u32 cmdbuf, target;
+	int err;
+
+	err = get_user(cmdbuf, &src->cmdbuf.handle);
+	if (err < 0)
+		return err;
+
+	err = get_user(dest->cmdbuf.offset, &src->cmdbuf.offset);
+	if (err < 0)
+		return err;
+
+	err = get_user(target, &src->target.handle);
+	if (err < 0)
+		return err;
+
+	err = get_user(dest->target.offset, &src->cmdbuf.offset);
+	if (err < 0)
+		return err;
+
+	err = get_user(dest->shift, &src->shift);
+	if (err < 0)
+		return err;
+
+	dest->cmdbuf.bo = host1x_bo_lookup(drm, file, cmdbuf);
+	if (!dest->cmdbuf.bo)
+		return -ENOENT;
+
+	dest->target.bo = host1x_bo_lookup(drm, file, target);
+	if (!dest->target.bo)
+		return -ENOENT;
+
+	return 0;
+}
+
 int tegra_drm_submit(struct tegra_drm_context *context,
 		     struct drm_tegra_submit *args, struct drm_device *drm,
 		     struct drm_file *file)
@@ -184,26 +223,13 @@ int tegra_drm_submit(struct tegra_drm_context *context,
 		cmdbufs++;
 	}
 
-	if (copy_from_user(job->relocarray, relocs,
-			   sizeof(*relocs) * num_relocs)) {
-		err = -EFAULT;
-		goto fail;
-	}
-
+	/* copy and resolve relocations from submit */
 	while (num_relocs--) {
-		struct host1x_reloc *reloc = &job->relocarray[num_relocs];
-		struct host1x_bo *cmdbuf, *target;
-
-		cmdbuf = host1x_bo_lookup(drm, file, (u32)reloc->cmdbuf);
-		target = host1x_bo_lookup(drm, file, (u32)reloc->target);
-
-		reloc->cmdbuf = cmdbuf;
-		reloc->target = target;
-
-		if (!reloc->target || !reloc->cmdbuf) {
-			err = -ENOENT;
+		err = host1x_reloc_copy_from_user(&job->relocarray[num_relocs],
+						  &relocs[num_relocs], drm,
+						  file);
+		if (err < 0)
 			goto fail;
-		}
 	}
 
 	if (copy_from_user(job->waitchk, waitchks,
diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
index 112f27e51bc7..63bd63f3c7df 100644
--- a/drivers/gpu/host1x/job.c
+++ b/drivers/gpu/host1x/job.c
@@ -185,16 +185,16 @@ static unsigned int pin_job(struct host1x_job *job)
 		struct sg_table *sgt;
 		dma_addr_t phys_addr;
 
-		reloc->target = host1x_bo_get(reloc->target);
-		if (!reloc->target)
+		reloc->target.bo = host1x_bo_get(reloc->target.bo);
+		if (!reloc->target.bo)
 			goto unpin;
 
-		phys_addr = host1x_bo_pin(reloc->target, &sgt);
+		phys_addr = host1x_bo_pin(reloc->target.bo, &sgt);
 		if (!phys_addr)
 			goto unpin;
 
 		job->addr_phys[job->num_unpins] = phys_addr;
-		job->unpins[job->num_unpins].bo = reloc->target;
+		job->unpins[job->num_unpins].bo = reloc->target.bo;
 		job->unpins[job->num_unpins].sgt = sgt;
 		job->num_unpins++;
 	}
@@ -235,21 +235,21 @@ static unsigned int do_relocs(struct host1x_job *job, struct host1x_bo *cmdbuf)
 	for (i = 0; i < job->num_relocs; i++) {
 		struct host1x_reloc *reloc = &job->relocarray[i];
 		u32 reloc_addr = (job->reloc_addr_phys[i] +
-			reloc->target_offset) >> reloc->shift;
+				  reloc->target.offset) >> reloc->shift;
 		u32 *target;
 
 		/* skip all other gathers */
-		if (cmdbuf != reloc->cmdbuf)
+		if (cmdbuf != reloc->cmdbuf.bo)
 			continue;
 
-		if (last_page != reloc->cmdbuf_offset >> PAGE_SHIFT) {
+		if (last_page != reloc->cmdbuf.offset >> PAGE_SHIFT) {
 			if (cmdbuf_page_addr)
 				host1x_bo_kunmap(cmdbuf, last_page,
 						 cmdbuf_page_addr);
 
 			cmdbuf_page_addr = host1x_bo_kmap(cmdbuf,
-					reloc->cmdbuf_offset >> PAGE_SHIFT);
-			last_page = reloc->cmdbuf_offset >> PAGE_SHIFT;
+					reloc->cmdbuf.offset >> PAGE_SHIFT);
+			last_page = reloc->cmdbuf.offset >> PAGE_SHIFT;
 
 			if (unlikely(!cmdbuf_page_addr)) {
 				pr_err("Could not map cmdbuf for relocation\n");
@@ -257,7 +257,7 @@ static unsigned int do_relocs(struct host1x_job *job, struct host1x_bo *cmdbuf)
 			}
 		}
 
-		target = cmdbuf_page_addr + (reloc->cmdbuf_offset & ~PAGE_MASK);
+		target = cmdbuf_page_addr + (reloc->cmdbuf.offset & ~PAGE_MASK);
 		*target = reloc_addr;
 	}
 
@@ -272,7 +272,7 @@ static bool check_reloc(struct host1x_reloc *reloc, struct host1x_bo *cmdbuf,
 {
 	offset *= sizeof(u32);
 
-	if (reloc->cmdbuf != cmdbuf || reloc->cmdbuf_offset != offset)
+	if (reloc->cmdbuf.bo != cmdbuf || reloc->cmdbuf.offset != offset)
 		return false;
 
 	return true;
diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index d2b52999e771..bb9840fd1e18 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -164,12 +164,15 @@ int host1x_job_submit(struct host1x_job *job);
  */
 
 struct host1x_reloc {
-	struct host1x_bo *cmdbuf;
-	u32 cmdbuf_offset;
-	struct host1x_bo *target;
-	u32 target_offset;
-	u32 shift;
-	u32 pad;
+	struct {
+		struct host1x_bo *bo;
+		unsigned long offset;
+	} cmdbuf;
+	struct {
+		struct host1x_bo *bo;
+		unsigned long offset;
+	} target;
+	unsigned long shift;
 };
 
 struct host1x_job {
-- 
cgit v1.2.3-59-g8ed1b


From 31c1e5a1350ae8d1bc2018f5de8264266d9773e1 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Fri, 1 Aug 2014 12:20:10 +0200
Subject: dmaengine: Remove the context argument to the prep_dma_cyclic
 operation

The argument is always set to NULL and never used. Remove it.

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/amba-pl08x.c      | 2 +-
 drivers/dma/at_hdmac.c        | 3 +--
 drivers/dma/bcm2835-dma.c     | 2 +-
 drivers/dma/dma-jz4740.c      | 2 +-
 drivers/dma/edma.c            | 2 +-
 drivers/dma/ep93xx_dma.c      | 4 +---
 drivers/dma/fsl-edma.c        | 2 +-
 drivers/dma/imx-dma.c         | 2 +-
 drivers/dma/imx-sdma.c        | 2 +-
 drivers/dma/mmp_pdma.c        | 2 +-
 drivers/dma/mmp_tdma.c        | 2 +-
 drivers/dma/mxs-dma.c         | 2 +-
 drivers/dma/omap-dma.c        | 3 +--
 drivers/dma/pl330.c           | 2 +-
 drivers/dma/s3c24xx-dma.c     | 3 +--
 drivers/dma/sa11x0-dma.c      | 2 +-
 drivers/dma/sh/shdma-base.c   | 2 +-
 drivers/dma/sirf-dma.c        | 2 +-
 drivers/dma/ste_dma40.c       | 3 +--
 drivers/dma/tegra20-apb-dma.c | 2 +-
 include/linux/dmaengine.h     | 4 ++--
 21 files changed, 22 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
index 8114731a1c62..48ec81fe1eac 100644
--- a/drivers/dma/amba-pl08x.c
+++ b/drivers/dma/amba-pl08x.c
@@ -1653,7 +1653,7 @@ static struct dma_async_tx_descriptor *pl08x_prep_slave_sg(
 static struct dma_async_tx_descriptor *pl08x_prep_dma_cyclic(
 		struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
 		size_t period_len, enum dma_transfer_direction direction,
-		unsigned long flags, void *context)
+		unsigned long flags)
 {
 	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
 	struct pl08x_driver_data *pl08x = plchan->host;
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index c13a3bb0f594..d20ab1b73a3a 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -893,12 +893,11 @@ atc_dma_cyclic_fill_desc(struct dma_chan *chan, struct at_desc *desc,
  * @period_len: number of bytes for each period
  * @direction: transfer direction, to or from device
  * @flags: tx descriptor status flags
- * @context: transfer context (ignored)
  */
 static struct dma_async_tx_descriptor *
 atc_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
 		size_t period_len, enum dma_transfer_direction direction,
-		unsigned long flags, void *context)
+		unsigned long flags)
 {
 	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
 	struct at_dma_slave	*atslave = chan->private;
diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c
index a03602164e3e..68007974961a 100644
--- a/drivers/dma/bcm2835-dma.c
+++ b/drivers/dma/bcm2835-dma.c
@@ -335,7 +335,7 @@ static void bcm2835_dma_issue_pending(struct dma_chan *chan)
 static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_cyclic(
 	struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
 	size_t period_len, enum dma_transfer_direction direction,
-	unsigned long flags, void *context)
+	unsigned long flags)
 {
 	struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
 	enum dma_slave_buswidth dev_width;
diff --git a/drivers/dma/dma-jz4740.c b/drivers/dma/dma-jz4740.c
index bfbce6b07902..6a9d89c93b1f 100644
--- a/drivers/dma/dma-jz4740.c
+++ b/drivers/dma/dma-jz4740.c
@@ -433,7 +433,7 @@ static struct dma_async_tx_descriptor *jz4740_dma_prep_slave_sg(
 static struct dma_async_tx_descriptor *jz4740_dma_prep_dma_cyclic(
 	struct dma_chan *c, dma_addr_t buf_addr, size_t buf_len,
 	size_t period_len, enum dma_transfer_direction direction,
-	unsigned long flags, void *context)
+	unsigned long flags)
 {
 	struct jz4740_dmaengine_chan *chan = to_jz4740_dma_chan(c);
 	struct jz4740_dma_desc *desc;
diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index a13f37f719ed..d566650abf62 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -598,7 +598,7 @@ struct dma_async_tx_descriptor *edma_prep_dma_memcpy(
 static struct dma_async_tx_descriptor *edma_prep_dma_cyclic(
 	struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
 	size_t period_len, enum dma_transfer_direction direction,
-	unsigned long tx_flags, void *context)
+	unsigned long tx_flags)
 {
 	struct edma_chan *echan = to_edma_chan(chan);
 	struct device *dev = chan->device->dev;
diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c
index cb4bf682a708..7650470196c4 100644
--- a/drivers/dma/ep93xx_dma.c
+++ b/drivers/dma/ep93xx_dma.c
@@ -1092,7 +1092,6 @@ fail:
  * @period_len: length of a single period
  * @dir: direction of the operation
  * @flags: tx descriptor status flags
- * @context: operation context (ignored)
  *
  * Prepares a descriptor for cyclic DMA operation. This means that once the
  * descriptor is submitted, we will be submitting in a @period_len sized
@@ -1105,8 +1104,7 @@ fail:
 static struct dma_async_tx_descriptor *
 ep93xx_dma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr,
 			   size_t buf_len, size_t period_len,
-			   enum dma_transfer_direction dir, unsigned long flags,
-			   void *context)
+			   enum dma_transfer_direction dir, unsigned long flags)
 {
 	struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
 	struct ep93xx_dma_desc *desc, *first;
diff --git a/drivers/dma/fsl-edma.c b/drivers/dma/fsl-edma.c
index 24ab3d371954..3c5711d5fe97 100644
--- a/drivers/dma/fsl-edma.c
+++ b/drivers/dma/fsl-edma.c
@@ -517,7 +517,7 @@ err:
 static struct dma_async_tx_descriptor *fsl_edma_prep_dma_cyclic(
 		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
 		size_t period_len, enum dma_transfer_direction direction,
-		unsigned long flags, void *context)
+		unsigned long flags)
 {
 	struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan);
 	struct fsl_edma_desc *fsl_desc;
diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c
index 286660a12cc6..9d2c9e7374dc 100644
--- a/drivers/dma/imx-dma.c
+++ b/drivers/dma/imx-dma.c
@@ -866,7 +866,7 @@ static struct dma_async_tx_descriptor *imxdma_prep_slave_sg(
 static struct dma_async_tx_descriptor *imxdma_prep_dma_cyclic(
 		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
 		size_t period_len, enum dma_transfer_direction direction,
-		unsigned long flags, void *context)
+		unsigned long flags)
 {
 	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
 	struct imxdma_engine *imxdma = imxdmac->imxdma;
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index de584e605db5..f7626e37d0b8 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -1125,7 +1125,7 @@ err_out:
 static struct dma_async_tx_descriptor *sdma_prep_dma_cyclic(
 		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
 		size_t period_len, enum dma_transfer_direction direction,
-		unsigned long flags, void *context)
+		unsigned long flags)
 {
 	struct sdma_channel *sdmac = to_sdma_chan(chan);
 	struct sdma_engine *sdma = sdmac->sdma;
diff --git a/drivers/dma/mmp_pdma.c b/drivers/dma/mmp_pdma.c
index a7b186d536b3..a1a4db5721b8 100644
--- a/drivers/dma/mmp_pdma.c
+++ b/drivers/dma/mmp_pdma.c
@@ -601,7 +601,7 @@ static struct dma_async_tx_descriptor *
 mmp_pdma_prep_dma_cyclic(struct dma_chan *dchan,
 			 dma_addr_t buf_addr, size_t len, size_t period_len,
 			 enum dma_transfer_direction direction,
-			 unsigned long flags, void *context)
+			 unsigned long flags)
 {
 	struct mmp_pdma_chan *chan;
 	struct mmp_pdma_desc_sw *first = NULL, *prev = NULL, *new;
diff --git a/drivers/dma/mmp_tdma.c b/drivers/dma/mmp_tdma.c
index 724f7f4c9720..6ad30e2c5038 100644
--- a/drivers/dma/mmp_tdma.c
+++ b/drivers/dma/mmp_tdma.c
@@ -389,7 +389,7 @@ struct mmp_tdma_desc *mmp_tdma_alloc_descriptor(struct mmp_tdma_chan *tdmac)
 static struct dma_async_tx_descriptor *mmp_tdma_prep_dma_cyclic(
 		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
 		size_t period_len, enum dma_transfer_direction direction,
-		unsigned long flags, void *context)
+		unsigned long flags)
 {
 	struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan);
 	struct mmp_tdma_desc *desc;
diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c
index dc1dba78e529..5ea61201dbf0 100644
--- a/drivers/dma/mxs-dma.c
+++ b/drivers/dma/mxs-dma.c
@@ -589,7 +589,7 @@ err_out:
 static struct dma_async_tx_descriptor *mxs_dma_prep_dma_cyclic(
 		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
 		size_t period_len, enum dma_transfer_direction direction,
-		unsigned long flags, void *context)
+		unsigned long flags)
 {
 	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
 	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
diff --git a/drivers/dma/omap-dma.c b/drivers/dma/omap-dma.c
index b19f04f4390b..4cf7d9a950d7 100644
--- a/drivers/dma/omap-dma.c
+++ b/drivers/dma/omap-dma.c
@@ -853,8 +853,7 @@ static struct dma_async_tx_descriptor *omap_dma_prep_slave_sg(
 
 static struct dma_async_tx_descriptor *omap_dma_prep_dma_cyclic(
 	struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
-	size_t period_len, enum dma_transfer_direction dir, unsigned long flags,
-	void *context)
+	size_t period_len, enum dma_transfer_direction dir, unsigned long flags)
 {
 	struct omap_dmadev *od = to_omap_dma_dev(chan->device);
 	struct omap_chan *c = to_omap_dma_chan(chan);
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index a55d75498098..d5149aacd2fe 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -2362,7 +2362,7 @@ static inline int get_burst_len(struct dma_pl330_desc *desc, size_t len)
 static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic(
 		struct dma_chan *chan, dma_addr_t dma_addr, size_t len,
 		size_t period_len, enum dma_transfer_direction direction,
-		unsigned long flags, void *context)
+		unsigned long flags)
 {
 	struct dma_pl330_desc *desc = NULL, *first = NULL;
 	struct dma_pl330_chan *pch = to_pchan(chan);
diff --git a/drivers/dma/s3c24xx-dma.c b/drivers/dma/s3c24xx-dma.c
index 012520c9fd79..7416572d1e40 100644
--- a/drivers/dma/s3c24xx-dma.c
+++ b/drivers/dma/s3c24xx-dma.c
@@ -889,8 +889,7 @@ static struct dma_async_tx_descriptor *s3c24xx_dma_prep_memcpy(
 
 static struct dma_async_tx_descriptor *s3c24xx_dma_prep_dma_cyclic(
 	struct dma_chan *chan, dma_addr_t addr, size_t size, size_t period,
-	enum dma_transfer_direction direction, unsigned long flags,
-	void *context)
+	enum dma_transfer_direction direction, unsigned long flags)
 {
 	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
 	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
diff --git a/drivers/dma/sa11x0-dma.c b/drivers/dma/sa11x0-dma.c
index 5ebdfbc1051e..4b0ef043729a 100644
--- a/drivers/dma/sa11x0-dma.c
+++ b/drivers/dma/sa11x0-dma.c
@@ -612,7 +612,7 @@ static struct dma_async_tx_descriptor *sa11x0_dma_prep_slave_sg(
 
 static struct dma_async_tx_descriptor *sa11x0_dma_prep_dma_cyclic(
 	struct dma_chan *chan, dma_addr_t addr, size_t size, size_t period,
-	enum dma_transfer_direction dir, unsigned long flags, void *context)
+	enum dma_transfer_direction dir, unsigned long flags)
 {
 	struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan);
 	struct sa11x0_dma_desc *txd;
diff --git a/drivers/dma/sh/shdma-base.c b/drivers/dma/sh/shdma-base.c
index e427a03a0e8b..42d497416196 100644
--- a/drivers/dma/sh/shdma-base.c
+++ b/drivers/dma/sh/shdma-base.c
@@ -668,7 +668,7 @@ static struct dma_async_tx_descriptor *shdma_prep_slave_sg(
 static struct dma_async_tx_descriptor *shdma_prep_dma_cyclic(
 	struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
 	size_t period_len, enum dma_transfer_direction direction,
-	unsigned long flags, void *context)
+	unsigned long flags)
 {
 	struct shdma_chan *schan = to_shdma_chan(chan);
 	struct shdma_dev *sdev = to_shdma_dev(schan->dma_chan.device);
diff --git a/drivers/dma/sirf-dma.c b/drivers/dma/sirf-dma.c
index 03f7820fa333..aac03ab10c54 100644
--- a/drivers/dma/sirf-dma.c
+++ b/drivers/dma/sirf-dma.c
@@ -580,7 +580,7 @@ err_dir:
 static struct dma_async_tx_descriptor *
 sirfsoc_dma_prep_cyclic(struct dma_chan *chan, dma_addr_t addr,
 	size_t buf_len, size_t period_len,
-	enum dma_transfer_direction direction, unsigned long flags, void *context)
+	enum dma_transfer_direction direction, unsigned long flags)
 {
 	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
 	struct sirfsoc_dma_desc *sdesc = NULL;
diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index c7984459ede7..5fe59335e247 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -2531,8 +2531,7 @@ d40_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 static struct dma_async_tx_descriptor *
 dma40_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr,
 		     size_t buf_len, size_t period_len,
-		     enum dma_transfer_direction direction, unsigned long flags,
-		     void *context)
+		     enum dma_transfer_direction direction, unsigned long flags)
 {
 	unsigned int periods = buf_len / period_len;
 	struct dma_async_tx_descriptor *txd;
diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c
index 03ad64ecaaf0..16efa603ff65 100644
--- a/drivers/dma/tegra20-apb-dma.c
+++ b/drivers/dma/tegra20-apb-dma.c
@@ -1055,7 +1055,7 @@ static struct dma_async_tx_descriptor *tegra_dma_prep_slave_sg(
 static struct dma_async_tx_descriptor *tegra_dma_prep_dma_cyclic(
 	struct dma_chan *dc, dma_addr_t buf_addr, size_t buf_len,
 	size_t period_len, enum dma_transfer_direction direction,
-	unsigned long flags, void *context)
+	unsigned long flags)
 {
 	struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
 	struct tegra_dma_desc *dma_desc = NULL;
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 4eb2f82aed1d..94ddccd706fc 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -669,7 +669,7 @@ struct dma_device {
 	struct dma_async_tx_descriptor *(*device_prep_dma_cyclic)(
 		struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
 		size_t period_len, enum dma_transfer_direction direction,
-		unsigned long flags, void *context);
+		unsigned long flags);
 	struct dma_async_tx_descriptor *(*device_prep_interleaved_dma)(
 		struct dma_chan *chan, struct dma_interleaved_template *xt,
 		unsigned long flags);
@@ -744,7 +744,7 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_cyclic(
 		unsigned long flags)
 {
 	return chan->device->device_prep_dma_cyclic(chan, buf_addr, buf_len,
-						period_len, dir, flags, NULL);
+						period_len, dir, flags);
 }
 
 static inline struct dma_async_tx_descriptor *dmaengine_prep_interleaved_dma(
-- 
cgit v1.2.3-59-g8ed1b


From ed67f0872be1aa516831332c732752022d4edc7c Mon Sep 17 00:00:00 2001
From: Johannes Pointner <johannes.pointner@gmail.com>
Date: Tue, 1 Jul 2014 08:05:52 +0200
Subject: hwmon: (ntc_thermistor) Support B57330V2103 from EPCOS

This patch adds support for the ntc thermistor B57330V2103 from EPCOS.

Signed-off-by: Johannes Pointner <johannes.pointner@gmail.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 .../devicetree/bindings/hwmon/ntc_thermistor.txt   |  1 +
 Documentation/hwmon/ntc_thermistor                 |  5 +++
 drivers/hwmon/Kconfig                              |  2 +-
 drivers/hwmon/ntc_thermistor.c                     | 50 +++++++++++++++++++++-
 include/linux/platform_data/ntc_thermistor.h       |  1 +
 5 files changed, 57 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/hwmon/ntc_thermistor.txt b/Documentation/devicetree/bindings/hwmon/ntc_thermistor.txt
index b117b2e9e1a7..2391e5c41999 100644
--- a/Documentation/devicetree/bindings/hwmon/ntc_thermistor.txt
+++ b/Documentation/devicetree/bindings/hwmon/ntc_thermistor.txt
@@ -3,6 +3,7 @@ NTC Thermistor hwmon sensors
 
 Requires node properties:
 - "compatible" value : one of
+	"epcos,b57330v2103"
 	"murata,ncp15wb473"
 	"murata,ncp18wb473"
 	"murata,ncp21wb473"
diff --git a/Documentation/hwmon/ntc_thermistor b/Documentation/hwmon/ntc_thermistor
index 057b77029f26..c5e05e2900a3 100644
--- a/Documentation/hwmon/ntc_thermistor
+++ b/Documentation/hwmon/ntc_thermistor
@@ -6,6 +6,11 @@ Supported thermistors from Murata:
   Prefixes: 'ncp15wb473', 'ncp18wb473', 'ncp21wb473', 'ncp03wb473', 'ncp15wl333'
   Datasheet: Publicly available at Murata
 
+Supported thermistors from EPCOS:
+* EPCOS NTC Thermistors B57330V2103
+  Prefixes: b57330v2103
+  Datasheet: Publicly available at EPCOS
+
 Other NTC thermistors can be supported simply by adding compensation
 tables; e.g., NCP15WL333 support is added by the table ncpXXwl333.
 
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 0c7359c5c317..ae1e2160c1b5 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -1073,7 +1073,7 @@ config SENSORS_NTC_THERMISTOR
 
 	  Currently, this driver supports
 	  NCP15WB473, NCP18WB473, NCP21WB473, NCP03WB473, and NCP15WL333
-	  from Murata.
+	  from Murata and B57330V2103 from EPCOS.
 
 	  This driver can also be built as a module.  If so, the module
 	  will be called ntc-thermistor.
diff --git a/drivers/hwmon/ntc_thermistor.c b/drivers/hwmon/ntc_thermistor.c
index ae66f42c4d6d..bd410722cd4b 100644
--- a/drivers/hwmon/ntc_thermistor.c
+++ b/drivers/hwmon/ntc_thermistor.c
@@ -51,6 +51,7 @@ static const struct platform_device_id ntc_thermistor_id[] = {
 	{ "ncp21wb473", TYPE_NCPXXWB473 },
 	{ "ncp03wb473", TYPE_NCPXXWB473 },
 	{ "ncp15wl333", TYPE_NCPXXWL333 },
+	{ "b57330v2103", TYPE_B57330V2103},
 	{ },
 };
 
@@ -133,6 +134,47 @@ static const struct ntc_compensation ncpXXwl333[] = {
 	{ .temp_c	= 125, .ohm	= 707 },
 };
 
+/*
+ * The following compensation table is from the specification of EPCOS NTC
+ * Thermistors Datasheet
+ */
+static const struct ntc_compensation b57330v2103[] = {
+	{ .temp_c	= -40, .ohm	= 190030 },
+	{ .temp_c	= -35, .ohm	= 145360 },
+	{ .temp_c	= -30, .ohm	= 112060 },
+	{ .temp_c	= -25, .ohm	= 87041 },
+	{ .temp_c	= -20, .ohm	= 68104 },
+	{ .temp_c	= -15, .ohm	= 53665 },
+	{ .temp_c	= -10, .ohm	= 42576 },
+	{ .temp_c	= -5, .ohm	= 34001 },
+	{ .temp_c	= 0, .ohm	= 27326 },
+	{ .temp_c	= 5, .ohm	= 22096 },
+	{ .temp_c	= 10, .ohm	= 17973 },
+	{ .temp_c	= 15, .ohm	= 14703 },
+	{ .temp_c	= 20, .ohm	= 12090 },
+	{ .temp_c	= 25, .ohm	= 10000 },
+	{ .temp_c	= 30, .ohm	= 8311 },
+	{ .temp_c	= 35, .ohm	= 6941 },
+	{ .temp_c	= 40, .ohm	= 5825 },
+	{ .temp_c	= 45, .ohm	= 4911 },
+	{ .temp_c	= 50, .ohm	= 4158 },
+	{ .temp_c	= 55, .ohm	= 3536 },
+	{ .temp_c	= 60, .ohm	= 3019 },
+	{ .temp_c	= 65, .ohm	= 2588 },
+	{ .temp_c	= 70, .ohm	= 2227 },
+	{ .temp_c	= 75, .ohm	= 1924 },
+	{ .temp_c	= 80, .ohm	= 1668 },
+	{ .temp_c	= 85, .ohm	= 1451 },
+	{ .temp_c	= 90, .ohm	= 1266 },
+	{ .temp_c	= 95, .ohm	= 1108 },
+	{ .temp_c	= 100, .ohm	= 973 },
+	{ .temp_c	= 105, .ohm	= 857 },
+	{ .temp_c	= 110, .ohm	= 757 },
+	{ .temp_c	= 115, .ohm	= 671 },
+	{ .temp_c	= 120, .ohm	= 596 },
+	{ .temp_c	= 125, .ohm	= 531 },
+};
+
 struct ntc_data {
 	struct device *hwmon_dev;
 	struct ntc_thermistor_platform_data *pdata;
@@ -173,6 +215,8 @@ static const struct of_device_id ntc_match[] = {
 		.data = &ntc_thermistor_id[3] },
 	{ .compatible = "murata,ncp15wl333",
 		.data = &ntc_thermistor_id[4] },
+	{ .compatible = "epcos,b57330v2103",
+		.data = &ntc_thermistor_id[5]},
 
 	/* Usage of vendor name "ntc" is deprecated */
 	{ .compatible = "ntc,ncp15wb473",
@@ -490,6 +534,10 @@ static int ntc_thermistor_probe(struct platform_device *pdev)
 		data->comp = ncpXXwl333;
 		data->n_comp = ARRAY_SIZE(ncpXXwl333);
 		break;
+	case TYPE_B57330V2103:
+		data->comp = b57330v2103;
+		data->n_comp = ARRAY_SIZE(b57330v2103);
+		break;
 	default:
 		dev_err(&pdev->dev, "Unknown device type: %lu(%s)\n",
 				pdev_id->driver_data, pdev_id->name);
@@ -546,7 +594,7 @@ static struct platform_driver ntc_thermistor_driver = {
 
 module_platform_driver(ntc_thermistor_driver);
 
-MODULE_DESCRIPTION("NTC Thermistor Driver from Murata");
+MODULE_DESCRIPTION("NTC Thermistor Driver");
 MODULE_AUTHOR("MyungJoo Ham <myungjoo.ham@samsung.com>");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("platform:ntc-thermistor");
diff --git a/include/linux/platform_data/ntc_thermistor.h b/include/linux/platform_data/ntc_thermistor.h
index c7285b575462..0a6de4ca4930 100644
--- a/include/linux/platform_data/ntc_thermistor.h
+++ b/include/linux/platform_data/ntc_thermistor.h
@@ -26,6 +26,7 @@ struct iio_channel;
 enum ntc_thermistor_type {
 	TYPE_NCPXXWB473,
 	TYPE_NCPXXWL333,
+	TYPE_B57330V2103,
 };
 
 struct ntc_thermistor_platform_data {
-- 
cgit v1.2.3-59-g8ed1b


From 0097875bd41528922fb3bb5f348c53f17e00e2fd Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 31 Jul 2014 03:10:50 -0700
Subject: proc: Implement /proc/thread-self to point at the directory of the
 current thread

/proc/thread-self is derived from /proc/self.  /proc/thread-self
points to the directory in proc containing information about the
current thread.

This funtionality has been missing for a long time, and is tricky to
implement in userspace as gettid() is not exported by glibc.  More
importantly this allows fixing defects in /proc/mounts and /proc/net
where in a threaded application today they wind up being empty files
when only the initial pthread has exited, causing problems for other
threads.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/proc/Makefile              |  1 +
 fs/proc/base.c                | 15 +++++---
 fs/proc/inode.c               |  7 +++-
 fs/proc/internal.h            |  6 +++
 fs/proc/root.c                |  3 ++
 fs/proc/thread_self.c         | 85 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/pid_namespace.h |  1 +
 7 files changed, 112 insertions(+), 6 deletions(-)
 create mode 100644 fs/proc/thread_self.c

(limited to 'include/linux')

diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index 239493ec718e..7151ea428041 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -23,6 +23,7 @@ proc-y	+= version.o
 proc-y	+= softirqs.o
 proc-y	+= namespaces.o
 proc-y	+= self.o
+proc-y	+= thread_self.o
 proc-$(CONFIG_PROC_SYSCTL)	+= proc_sysctl.o
 proc-$(CONFIG_NET)		+= proc_net.o
 proc-$(CONFIG_PROC_KCORE)	+= kcore.o
diff --git a/fs/proc/base.c b/fs/proc/base.c
index ed34e405c6b9..0131156ce7c9 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2847,7 +2847,7 @@ retry:
 	return iter;
 }
 
-#define TGID_OFFSET (FIRST_PROCESS_ENTRY + 1)
+#define TGID_OFFSET (FIRST_PROCESS_ENTRY + 2)
 
 /* for the /proc/ directory itself, after non-process stuff has been done */
 int proc_pid_readdir(struct file *file, struct dir_context *ctx)
@@ -2859,14 +2859,19 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx)
 	if (pos >= PID_MAX_LIMIT + TGID_OFFSET)
 		return 0;
 
-	if (pos == TGID_OFFSET - 1) {
+	if (pos == TGID_OFFSET - 2) {
 		struct inode *inode = ns->proc_self->d_inode;
 		if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK))
 			return 0;
-		iter.tgid = 0;
-	} else {
-		iter.tgid = pos - TGID_OFFSET;
+		ctx->pos = pos = pos + 1;
+	}
+	if (pos == TGID_OFFSET - 1) {
+		struct inode *inode = ns->proc_thread_self->d_inode;
+		if (!dir_emit(ctx, "thread-self", 11, inode->i_ino, DT_LNK))
+			return 0;
+		ctx->pos = pos = pos + 1;
 	}
+	iter.tgid = pos - TGID_OFFSET;
 	iter.task = NULL;
 	for (iter = next_tgid(ns, iter);
 	     iter.task;
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 0adbc02d60e3..333080d7a671 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -442,6 +442,7 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
 int proc_fill_super(struct super_block *s)
 {
 	struct inode *root_inode;
+	int ret;
 
 	s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC;
 	s->s_blocksize = 1024;
@@ -463,5 +464,9 @@ int proc_fill_super(struct super_block *s)
 		return -ENOMEM;
 	}
 
-	return proc_setup_self(s);
+	ret = proc_setup_self(s);
+	if (ret) {
+		return ret;
+	}
+	return proc_setup_thread_self(s);
 }
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 3ab6d14e71c5..ee04619173b2 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -233,6 +233,12 @@ static inline int proc_net_init(void) { return 0; }
  */
 extern int proc_setup_self(struct super_block *);
 
+/*
+ * proc_thread_self.c
+ */
+extern int proc_setup_thread_self(struct super_block *);
+extern void proc_thread_self_init(void);
+
 /*
  * proc_sysctl.c
  */
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 5dbadecb234d..48f1c03bc7ed 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -149,6 +149,8 @@ static void proc_kill_sb(struct super_block *sb)
 	ns = (struct pid_namespace *)sb->s_fs_info;
 	if (ns->proc_self)
 		dput(ns->proc_self);
+	if (ns->proc_thread_self)
+		dput(ns->proc_thread_self);
 	kill_anon_super(sb);
 	put_pid_ns(ns);
 }
@@ -170,6 +172,7 @@ void __init proc_root_init(void)
 		return;
 
 	proc_self_init();
+	proc_thread_self_init();
 	proc_symlink("mounts", NULL, "self/mounts");
 
 	proc_net_init();
diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c
new file mode 100644
index 000000000000..59075b509df3
--- /dev/null
+++ b/fs/proc/thread_self.c
@@ -0,0 +1,85 @@
+#include <linux/sched.h>
+#include <linux/namei.h>
+#include <linux/slab.h>
+#include <linux/pid_namespace.h>
+#include "internal.h"
+
+/*
+ * /proc/thread_self:
+ */
+static int proc_thread_self_readlink(struct dentry *dentry, char __user *buffer,
+			      int buflen)
+{
+	struct pid_namespace *ns = dentry->d_sb->s_fs_info;
+	pid_t tgid = task_tgid_nr_ns(current, ns);
+	pid_t pid = task_pid_nr_ns(current, ns);
+	char tmp[PROC_NUMBUF + 6 + PROC_NUMBUF];
+	if (!pid)
+		return -ENOENT;
+	sprintf(tmp, "%d/task/%d", tgid, pid);
+	return readlink_copy(buffer, buflen, tmp);
+}
+
+static void *proc_thread_self_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+	struct pid_namespace *ns = dentry->d_sb->s_fs_info;
+	pid_t tgid = task_tgid_nr_ns(current, ns);
+	pid_t pid = task_pid_nr_ns(current, ns);
+	char *name = ERR_PTR(-ENOENT);
+	if (pid) {
+		name = kmalloc(PROC_NUMBUF + 6 + PROC_NUMBUF, GFP_KERNEL);
+		if (!name)
+			name = ERR_PTR(-ENOMEM);
+		else
+			sprintf(name, "%d/task/%d", tgid, pid);
+	}
+	nd_set_link(nd, name);
+	return NULL;
+}
+
+static const struct inode_operations proc_thread_self_inode_operations = {
+	.readlink	= proc_thread_self_readlink,
+	.follow_link	= proc_thread_self_follow_link,
+	.put_link	= kfree_put_link,
+};
+
+static unsigned thread_self_inum;
+
+int proc_setup_thread_self(struct super_block *s)
+{
+	struct inode *root_inode = s->s_root->d_inode;
+	struct pid_namespace *ns = s->s_fs_info;
+	struct dentry *thread_self;
+
+	mutex_lock(&root_inode->i_mutex);
+	thread_self = d_alloc_name(s->s_root, "thread-self");
+	if (thread_self) {
+		struct inode *inode = new_inode_pseudo(s);
+		if (inode) {
+			inode->i_ino = thread_self_inum;
+			inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+			inode->i_mode = S_IFLNK | S_IRWXUGO;
+			inode->i_uid = GLOBAL_ROOT_UID;
+			inode->i_gid = GLOBAL_ROOT_GID;
+			inode->i_op = &proc_thread_self_inode_operations;
+			d_add(thread_self, inode);
+		} else {
+			dput(thread_self);
+			thread_self = ERR_PTR(-ENOMEM);
+		}
+	} else {
+		thread_self = ERR_PTR(-ENOMEM);
+	}
+	mutex_unlock(&root_inode->i_mutex);
+	if (IS_ERR(thread_self)) {
+		pr_err("proc_fill_super: can't allocate /proc/thread_self\n");
+		return PTR_ERR(thread_self);
+	}
+	ns->proc_thread_self = thread_self;
+	return 0;
+}
+
+void __init proc_thread_self_init(void)
+{
+	proc_alloc_inum(&thread_self_inum);
+}
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 7246ef3d4455..1997ffc295a7 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -33,6 +33,7 @@ struct pid_namespace {
 #ifdef CONFIG_PROC_FS
 	struct vfsmount *proc_mnt;
 	struct dentry *proc_self;
+	struct dentry *proc_thread_self;
 #endif
 #ifdef CONFIG_BSD_PROCESS_ACCT
 	struct bsd_acct_struct *bacct;
-- 
cgit v1.2.3-59-g8ed1b


From 1b69be5e8afc634f39ad695a6ab6aad0cf0975c7 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gwshan@linux.vnet.ibm.com>
Date: Tue, 10 Jun 2014 11:41:57 +1000
Subject: drivers/vfio: EEH support for VFIO PCI device

The patch adds new IOCTL commands for sPAPR VFIO container device
to support EEH functionality for PCI devices, which have been passed
through from host to somebody else via VFIO.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Acked-by: Alexander Graf <agraf@suse.de>
Acked-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 Documentation/vfio.txt              | 87 +++++++++++++++++++++++++++++++++++--
 drivers/vfio/Makefile               |  1 +
 drivers/vfio/pci/vfio_pci.c         | 18 ++++++--
 drivers/vfio/vfio_iommu_spapr_tce.c | 17 +++++++-
 drivers/vfio/vfio_spapr_eeh.c       | 87 +++++++++++++++++++++++++++++++++++++
 include/linux/vfio.h                | 23 ++++++++++
 include/uapi/linux/vfio.h           | 34 +++++++++++++++
 7 files changed, 259 insertions(+), 8 deletions(-)
 create mode 100644 drivers/vfio/vfio_spapr_eeh.c

(limited to 'include/linux')

diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
index b9ca02370d46..96978eced341 100644
--- a/Documentation/vfio.txt
+++ b/Documentation/vfio.txt
@@ -305,7 +305,15 @@ faster, the map/unmap handling has been implemented in real mode which provides
 an excellent performance which has limitations such as inability to do
 locked pages accounting in real time.
 
-So 3 additional ioctls have been added:
+4) According to sPAPR specification, A Partitionable Endpoint (PE) is an I/O
+subtree that can be treated as a unit for the purposes of partitioning and
+error recovery. A PE may be a single or multi-function IOA (IO Adapter), a
+function of a multi-function IOA, or multiple IOAs (possibly including switch
+and bridge structures above the multiple IOAs). PPC64 guests detect PCI errors
+and recover from them via EEH RTAS services, which works on the basis of
+additional ioctl commands.
+
+So 4 additional ioctls have been added:
 
 	VFIO_IOMMU_SPAPR_TCE_GET_INFO - returns the size and the start
 		of the DMA window on the PCI bus.
@@ -316,9 +324,12 @@ So 3 additional ioctls have been added:
 
 	VFIO_IOMMU_DISABLE - disables the container.
 
+	VFIO_EEH_PE_OP - provides an API for EEH setup, error detection and recovery.
 
 The code flow from the example above should be slightly changed:
 
+	struct vfio_eeh_pe_op pe_op = { .argsz = sizeof(pe_op), .flags = 0 };
+
 	.....
 	/* Add the group to the container */
 	ioctl(group, VFIO_GROUP_SET_CONTAINER, &container);
@@ -342,9 +353,79 @@ The code flow from the example above should be slightly changed:
 	dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
 
 	/* Check here is .iova/.size are within DMA window from spapr_iommu_info */
-
 	ioctl(container, VFIO_IOMMU_MAP_DMA, &dma_map);
-	.....
+
+	/* Get a file descriptor for the device */
+	device = ioctl(group, VFIO_GROUP_GET_DEVICE_FD, "0000:06:0d.0");
+
+	....
+
+	/* Gratuitous device reset and go... */
+	ioctl(device, VFIO_DEVICE_RESET);
+
+	/* Make sure EEH is supported */
+	ioctl(container, VFIO_CHECK_EXTENSION, VFIO_EEH);
+
+	/* Enable the EEH functionality on the device */
+	pe_op.op = VFIO_EEH_PE_ENABLE;
+	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+	/* You're suggested to create additional data struct to represent
+	 * PE, and put child devices belonging to same IOMMU group to the
+	 * PE instance for later reference.
+	 */
+
+	/* Check the PE's state and make sure it's in functional state */
+	pe_op.op = VFIO_EEH_PE_GET_STATE;
+	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+	/* Save device state using pci_save_state().
+	 * EEH should be enabled on the specified device.
+	 */
+
+	....
+
+	/* When 0xFF's returned from reading PCI config space or IO BARs
+	 * of the PCI device. Check the PE's state to see if that has been
+	 * frozen.
+	 */
+	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+	/* Waiting for pending PCI transactions to be completed and don't
+	 * produce any more PCI traffic from/to the affected PE until
+	 * recovery is finished.
+	 */
+
+	/* Enable IO for the affected PE and collect logs. Usually, the
+	 * standard part of PCI config space, AER registers are dumped
+	 * as logs for further analysis.
+	 */
+	pe_op.op = VFIO_EEH_PE_UNFREEZE_IO;
+	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+	/*
+	 * Issue PE reset: hot or fundamental reset. Usually, hot reset
+	 * is enough. However, the firmware of some PCI adapters would
+	 * require fundamental reset.
+	 */
+	pe_op.op = VFIO_EEH_PE_RESET_HOT;
+	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+	pe_op.op = VFIO_EEH_PE_RESET_DEACTIVATE;
+	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+	/* Configure the PCI bridges for the affected PE */
+	pe_op.op = VFIO_EEH_PE_CONFIGURE;
+	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+	/* Restored state we saved at initialization time. pci_restore_state()
+	 * is good enough as an example.
+	 */
+
+	/* Hopefully, error is recovered successfully. Now, you can resume to
+	 * start PCI traffic to/from the affected PE.
+	 */
+
+	....
 
 -------------------------------------------------------------------------------
 
diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
index 72bfabc8629e..50e30bc75e85 100644
--- a/drivers/vfio/Makefile
+++ b/drivers/vfio/Makefile
@@ -1,4 +1,5 @@
 obj-$(CONFIG_VFIO) += vfio.o
 obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o
 obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o
+obj-$(CONFIG_EEH) += vfio_spapr_eeh.o
 obj-$(CONFIG_VFIO_PCI) += pci/
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 010e0f8b8e4f..e2ee80f36e3e 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -157,8 +157,10 @@ static void vfio_pci_release(void *device_data)
 {
 	struct vfio_pci_device *vdev = device_data;
 
-	if (atomic_dec_and_test(&vdev->refcnt))
+	if (atomic_dec_and_test(&vdev->refcnt)) {
+		vfio_spapr_pci_eeh_release(vdev->pdev);
 		vfio_pci_disable(vdev);
+	}
 
 	module_put(THIS_MODULE);
 }
@@ -166,19 +168,27 @@ static void vfio_pci_release(void *device_data)
 static int vfio_pci_open(void *device_data)
 {
 	struct vfio_pci_device *vdev = device_data;
+	int ret;
 
 	if (!try_module_get(THIS_MODULE))
 		return -ENODEV;
 
 	if (atomic_inc_return(&vdev->refcnt) == 1) {
-		int ret = vfio_pci_enable(vdev);
+		ret = vfio_pci_enable(vdev);
+		if (ret)
+			goto error;
+
+		ret = vfio_spapr_pci_eeh_open(vdev->pdev);
 		if (ret) {
-			module_put(THIS_MODULE);
-			return ret;
+			vfio_pci_disable(vdev);
+			goto error;
 		}
 	}
 
 	return 0;
+error:
+	module_put(THIS_MODULE);
+	return ret;
 }
 
 static int vfio_pci_get_irq_count(struct vfio_pci_device *vdev, int irq_type)
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index a84788ba662c..730b4ef3e0cc 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -156,7 +156,16 @@ static long tce_iommu_ioctl(void *iommu_data,
 
 	switch (cmd) {
 	case VFIO_CHECK_EXTENSION:
-		return (arg == VFIO_SPAPR_TCE_IOMMU) ? 1 : 0;
+		switch (arg) {
+		case VFIO_SPAPR_TCE_IOMMU:
+			ret = 1;
+			break;
+		default:
+			ret = vfio_spapr_iommu_eeh_ioctl(NULL, cmd, arg);
+			break;
+		}
+
+		return (ret < 0) ? 0 : ret;
 
 	case VFIO_IOMMU_SPAPR_TCE_GET_INFO: {
 		struct vfio_iommu_spapr_tce_info info;
@@ -283,6 +292,12 @@ static long tce_iommu_ioctl(void *iommu_data,
 		tce_iommu_disable(container);
 		mutex_unlock(&container->lock);
 		return 0;
+	case VFIO_EEH_PE_OP:
+		if (!container->tbl || !container->tbl->it_group)
+			return -ENODEV;
+
+		return vfio_spapr_iommu_eeh_ioctl(container->tbl->it_group,
+						  cmd, arg);
 	}
 
 	return -ENOTTY;
diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
new file mode 100644
index 000000000000..f834b4ce1431
--- /dev/null
+++ b/drivers/vfio/vfio_spapr_eeh.c
@@ -0,0 +1,87 @@
+/*
+ * EEH functionality support for VFIO devices. The feature is only
+ * available on sPAPR compatible platforms.
+ *
+ * Copyright Gavin Shan, IBM Corporation 2014.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/uaccess.h>
+#include <linux/vfio.h>
+#include <asm/eeh.h>
+
+/* We might build address mapping here for "fast" path later */
+int vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
+{
+	return eeh_dev_open(pdev);
+}
+
+void vfio_spapr_pci_eeh_release(struct pci_dev *pdev)
+{
+	eeh_dev_release(pdev);
+}
+
+long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
+				unsigned int cmd, unsigned long arg)
+{
+	struct eeh_pe *pe;
+	struct vfio_eeh_pe_op op;
+	unsigned long minsz;
+	long ret = -EINVAL;
+
+	switch (cmd) {
+	case VFIO_CHECK_EXTENSION:
+		if (arg == VFIO_EEH)
+			ret = eeh_enabled() ? 1 : 0;
+		else
+			ret = 0;
+		break;
+	case VFIO_EEH_PE_OP:
+		pe = eeh_iommu_group_to_pe(group);
+		if (!pe)
+			return -ENODEV;
+
+		minsz = offsetofend(struct vfio_eeh_pe_op, op);
+		if (copy_from_user(&op, (void __user *)arg, minsz))
+			return -EFAULT;
+		if (op.argsz < minsz || op.flags)
+			return -EINVAL;
+
+		switch (op.op) {
+		case VFIO_EEH_PE_DISABLE:
+			ret = eeh_pe_set_option(pe, EEH_OPT_DISABLE);
+			break;
+		case VFIO_EEH_PE_ENABLE:
+			ret = eeh_pe_set_option(pe, EEH_OPT_ENABLE);
+			break;
+		case VFIO_EEH_PE_UNFREEZE_IO:
+			ret = eeh_pe_set_option(pe, EEH_OPT_THAW_MMIO);
+			break;
+		case VFIO_EEH_PE_UNFREEZE_DMA:
+			ret = eeh_pe_set_option(pe, EEH_OPT_THAW_DMA);
+			break;
+		case VFIO_EEH_PE_GET_STATE:
+			ret = eeh_pe_get_state(pe);
+			break;
+		case VFIO_EEH_PE_RESET_DEACTIVATE:
+			ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE);
+			break;
+		case VFIO_EEH_PE_RESET_HOT:
+			ret = eeh_pe_reset(pe, EEH_RESET_HOT);
+			break;
+		case VFIO_EEH_PE_RESET_FUNDAMENTAL:
+			ret = eeh_pe_reset(pe, EEH_RESET_FUNDAMENTAL);
+			break;
+		case VFIO_EEH_PE_CONFIGURE:
+			ret = eeh_pe_configure(pe);
+			break;
+		default:
+			ret = -EINVAL;
+		}
+	}
+
+	return ret;
+}
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 8ec980b5e3af..25a0fbd4b998 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -98,4 +98,27 @@ extern int vfio_external_user_iommu_id(struct vfio_group *group);
 extern long vfio_external_check_extension(struct vfio_group *group,
 					  unsigned long arg);
 
+#ifdef CONFIG_EEH
+extern int vfio_spapr_pci_eeh_open(struct pci_dev *pdev);
+extern void vfio_spapr_pci_eeh_release(struct pci_dev *pdev);
+extern long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
+				       unsigned int cmd,
+				       unsigned long arg);
+#else
+static inline int vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
+{
+	return 0;
+}
+
+static inline void vfio_spapr_pci_eeh_release(struct pci_dev *pdev)
+{
+}
+
+static inline long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
+					      unsigned int cmd,
+					      unsigned long arg)
+{
+	return -ENOTTY;
+}
+#endif /* CONFIG_EEH */
 #endif /* VFIO_H */
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index cb9023d4f063..6612974c64bf 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -30,6 +30,9 @@
  */
 #define VFIO_DMA_CC_IOMMU		4
 
+/* Check if EEH is supported */
+#define VFIO_EEH			5
+
 /*
  * The IOCTL interface is designed for extensibility by embedding the
  * structure length (argsz) and flags into structures passed between
@@ -455,6 +458,37 @@ struct vfio_iommu_spapr_tce_info {
 
 #define VFIO_IOMMU_SPAPR_TCE_GET_INFO	_IO(VFIO_TYPE, VFIO_BASE + 12)
 
+/*
+ * EEH PE operation struct provides ways to:
+ * - enable/disable EEH functionality;
+ * - unfreeze IO/DMA for frozen PE;
+ * - read PE state;
+ * - reset PE;
+ * - configure PE.
+ */
+struct vfio_eeh_pe_op {
+	__u32 argsz;
+	__u32 flags;
+	__u32 op;
+};
+
+#define VFIO_EEH_PE_DISABLE		0	/* Disable EEH functionality */
+#define VFIO_EEH_PE_ENABLE		1	/* Enable EEH functionality  */
+#define VFIO_EEH_PE_UNFREEZE_IO		2	/* Enable IO for frozen PE   */
+#define VFIO_EEH_PE_UNFREEZE_DMA	3	/* Enable DMA for frozen PE  */
+#define VFIO_EEH_PE_GET_STATE		4	/* PE state retrieval        */
+#define  VFIO_EEH_PE_STATE_NORMAL	0	/* PE in functional state    */
+#define  VFIO_EEH_PE_STATE_RESET	1	/* PE reset in progress      */
+#define  VFIO_EEH_PE_STATE_STOPPED	2	/* Stopped DMA and IO        */
+#define  VFIO_EEH_PE_STATE_STOPPED_DMA	4	/* Stopped DMA only          */
+#define  VFIO_EEH_PE_STATE_UNAVAIL	5	/* State unavailable         */
+#define VFIO_EEH_PE_RESET_DEACTIVATE	5	/* Deassert PE reset         */
+#define VFIO_EEH_PE_RESET_HOT		6	/* Assert hot reset          */
+#define VFIO_EEH_PE_RESET_FUNDAMENTAL	7	/* Assert fundamental reset  */
+#define VFIO_EEH_PE_CONFIGURE		8	/* PE configuration          */
+
+#define VFIO_EEH_PE_OP			_IO(VFIO_TYPE, VFIO_BASE + 21)
+
 /* ***************************************************************** */
 
 #endif /* _UAPIVFIO_H */
-- 
cgit v1.2.3-59-g8ed1b


From 8ba918d488caded2c4368b0b922eb905fe3bb101 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 30 Jun 2014 20:51:10 +1000
Subject: KVM: irqchip: Provide and use accessors for irq routing table

This provides accessor functions for the KVM interrupt mappings, in
order to reduce the amount of code that accesses the fields of the
kvm_irq_routing_table struct, and restrict that code to one file,
virt/kvm/irqchip.c.  The new functions are kvm_irq_map_gsi(), which
maps from a global interrupt number to a set of IRQ routing entries,
and kvm_irq_map_chip_pin, which maps from IRQ chip and pin numbers to
a global interrupt number.

This also moves the update of kvm_irq_routing_table::chip[][]
into irqchip.c, out of the various kvm_set_routing_entry
implementations.  That means that none of the kvm_set_routing_entry
implementations need the kvm_irq_routing_table argument anymore,
so this removes it.

This does not change any locking or data lifetime rules.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Tested-by: Eric Auger <eric.auger@linaro.org>
Tested-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/powerpc/kvm/mpic.c   |  4 +---
 arch/s390/kvm/interrupt.c |  3 +--
 include/linux/kvm_host.h  |  8 ++++++--
 virt/kvm/eventfd.c        | 10 ++++++----
 virt/kvm/irq_comm.c       | 20 +++++++++-----------
 virt/kvm/irqchip.c        | 42 ++++++++++++++++++++++++++++++++++--------
 6 files changed, 57 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c
index b68d0dc9479a..39b3a8f816f2 100644
--- a/arch/powerpc/kvm/mpic.c
+++ b/arch/powerpc/kvm/mpic.c
@@ -1826,8 +1826,7 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
 	return 0;
 }
 
-int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
-			  struct kvm_kernel_irq_routing_entry *e,
+int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 			  const struct kvm_irq_routing_entry *ue)
 {
 	int r = -EINVAL;
@@ -1839,7 +1838,6 @@ int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
 		e->irqchip.pin = ue->u.irqchip.pin;
 		if (e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS)
 			goto out;
-		rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi;
 		break;
 	case KVM_IRQ_ROUTING_MSI:
 		e->set = kvm_set_msi;
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 92528a0bdda6..f4c819bfc193 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -1556,8 +1556,7 @@ static int set_adapter_int(struct kvm_kernel_irq_routing_entry *e,
 	return ret;
 }
 
-int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
-			  struct kvm_kernel_irq_routing_entry *e,
+int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 			  const struct kvm_irq_routing_entry *ue)
 {
 	int ret;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5065b953e6e8..4956149e962a 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -752,6 +752,11 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
 void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
 			     bool mask);
 
+int kvm_irq_map_gsi(struct kvm_kernel_irq_routing_entry *entries,
+		    struct kvm_irq_routing_table *irq_rt, int gsi);
+int kvm_irq_map_chip_pin(struct kvm_irq_routing_table *irq_rt,
+			 unsigned irqchip, unsigned pin);
+
 int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 		bool line_status);
 int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level);
@@ -942,8 +947,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
 			const struct kvm_irq_routing_entry *entries,
 			unsigned nr,
 			unsigned flags);
-int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
-			  struct kvm_kernel_irq_routing_entry *e,
+int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 			  const struct kvm_irq_routing_entry *ue);
 void kvm_free_irq_routing(struct kvm *kvm);
 
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index bae593a545c5..15fa9488b2d0 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -282,20 +282,22 @@ static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd,
 			 struct kvm_irq_routing_table *irq_rt)
 {
 	struct kvm_kernel_irq_routing_entry *e;
+	struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
+	int i, n_entries;
+
+	n_entries = kvm_irq_map_gsi(entries, irq_rt, irqfd->gsi);
 
 	write_seqcount_begin(&irqfd->irq_entry_sc);
 
 	irqfd->irq_entry.type = 0;
-	if (irqfd->gsi >= irq_rt->nr_rt_entries)
-		goto out;
 
-	hlist_for_each_entry(e, &irq_rt->map[irqfd->gsi], link) {
+	e = entries;
+	for (i = 0; i < n_entries; ++i, ++e) {
 		/* Only fast-path MSI. */
 		if (e->type == KVM_IRQ_ROUTING_MSI)
 			irqfd->irq_entry = *e;
 	}
 
- out:
 	write_seqcount_end(&irqfd->irq_entry_sc);
 }
 
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index a228ee82bad2..175844593243 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -160,6 +160,7 @@ static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e,
  */
 int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 {
+	struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
 	struct kvm_kernel_irq_routing_entry *e;
 	int ret = -EINVAL;
 	struct kvm_irq_routing_table *irq_rt;
@@ -177,14 +178,13 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 	 */
 	idx = srcu_read_lock(&kvm->irq_srcu);
 	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
-	if (irq < irq_rt->nr_rt_entries)
-		hlist_for_each_entry(e, &irq_rt->map[irq], link) {
-			if (likely(e->type == KVM_IRQ_ROUTING_MSI))
-				ret = kvm_set_msi_inatomic(e, kvm);
-			else
-				ret = -EWOULDBLOCK;
-			break;
-		}
+	if (kvm_irq_map_gsi(entries, irq_rt, irq) > 0) {
+		e = &entries[0];
+		if (likely(e->type == KVM_IRQ_ROUTING_MSI))
+			ret = kvm_set_msi_inatomic(e, kvm);
+		else
+			ret = -EWOULDBLOCK;
+	}
 	srcu_read_unlock(&kvm->irq_srcu, idx);
 	return ret;
 }
@@ -272,8 +272,7 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
 	srcu_read_unlock(&kvm->irq_srcu, idx);
 }
 
-int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
-			  struct kvm_kernel_irq_routing_entry *e,
+int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 			  const struct kvm_irq_routing_entry *ue)
 {
 	int r = -EINVAL;
@@ -304,7 +303,6 @@ int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
 		e->irqchip.pin = ue->u.irqchip.pin + delta;
 		if (e->irqchip.pin >= max_pin)
 			goto out;
-		rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi;
 		break;
 	case KVM_IRQ_ROUTING_MSI:
 		e->set = kvm_set_msi;
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index b43c275775cd..f4648dd94888 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -31,13 +31,37 @@
 #include <trace/events/kvm.h>
 #include "irq.h"
 
+int kvm_irq_map_gsi(struct kvm_kernel_irq_routing_entry *entries,
+		    struct kvm_irq_routing_table *irq_rt, int gsi)
+{
+	struct kvm_kernel_irq_routing_entry *e;
+	int n = 0;
+
+	if (gsi < irq_rt->nr_rt_entries) {
+		hlist_for_each_entry(e, &irq_rt->map[gsi], link) {
+			entries[n] = *e;
+			++n;
+		}
+	}
+
+	return n;
+}
+
+int kvm_irq_map_chip_pin(struct kvm_irq_routing_table *irq_rt,
+			 unsigned irqchip, unsigned pin)
+{
+	return irq_rt->chip[irqchip][pin];
+}
+
 bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
 {
+	struct kvm_irq_routing_table *irq_rt;
 	struct kvm_irq_ack_notifier *kian;
 	int gsi, idx;
 
 	idx = srcu_read_lock(&kvm->irq_srcu);
-	gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin];
+	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
+	gsi = kvm_irq_map_chip_pin(irq_rt, irqchip, pin);
 	if (gsi != -1)
 		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
 					 link)
@@ -54,13 +78,15 @@ EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
 
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 {
+	struct kvm_irq_routing_table *irq_rt;
 	struct kvm_irq_ack_notifier *kian;
 	int gsi, idx;
 
 	trace_kvm_ack_irq(irqchip, pin);
 
 	idx = srcu_read_lock(&kvm->irq_srcu);
-	gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin];
+	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
+	gsi = kvm_irq_map_chip_pin(irq_rt, irqchip, pin);
 	if (gsi != -1)
 		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
 					 link)
@@ -115,8 +141,8 @@ int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
 int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 		bool line_status)
 {
-	struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS];
-	int ret = -1, i = 0, idx;
+	struct kvm_kernel_irq_routing_entry irq_set[KVM_NR_IRQCHIPS];
+	int ret = -1, i, idx;
 	struct kvm_irq_routing_table *irq_rt;
 
 	trace_kvm_set_irq(irq, level, irq_source_id);
@@ -127,9 +153,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 	 */
 	idx = srcu_read_lock(&kvm->irq_srcu);
 	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
-	if (irq < irq_rt->nr_rt_entries)
-		hlist_for_each_entry(e, &irq_rt->map[irq], link)
-			irq_set[i++] = *e;
+	i = kvm_irq_map_gsi(irq_set, irq_rt, irq);
 	srcu_read_unlock(&kvm->irq_srcu, idx);
 
 	while(i--) {
@@ -171,9 +195,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,
 
 	e->gsi = ue->gsi;
 	e->type = ue->type;
-	r = kvm_set_routing_entry(rt, e, ue);
+	r = kvm_set_routing_entry(e, ue);
 	if (r)
 		goto out;
+	if (e->type == KVM_IRQ_ROUTING_IRQCHIP)
+		rt->chip[e->irqchip.irqchip][e->irqchip.pin] = e->gsi;
 
 	hlist_add_head(&e->link, &rt->map[e->gsi]);
 	r = 0;
-- 
cgit v1.2.3-59-g8ed1b


From 9957c86d659a4d5a2bed25ccbd3bfc9c3f25e658 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 30 Jun 2014 20:51:11 +1000
Subject: KVM: Move all accesses to kvm::irq_routing into irqchip.c

Now that struct _irqfd does not keep a reference to storage pointed
to by the irq_routing field of struct kvm, we can move the statement
that updates it out from under the irqfds.lock and put it in
kvm_set_irq_routing() instead.  That means we then have to take a
srcu_read_lock on kvm->irq_srcu around the irqfd_update call in
kvm_irqfd_assign(), since holding the kvm->irqfds.lock no longer
ensures that that the routing can't change.

Combined with changing kvm_irq_map_gsi() and kvm_irq_map_chip_pin()
to take a struct kvm * argument instead of the pointer to the routing
table, this allows us to to move all references to kvm->irq_routing
into irqchip.c.  That in turn allows us to move the definition of the
kvm_irq_routing_table struct into irqchip.c as well.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Tested-by: Eric Auger <eric.auger@linaro.org>
Tested-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 35 +++++++----------------------------
 virt/kvm/eventfd.c       | 22 +++++++++-------------
 virt/kvm/irq_comm.c      |  6 ++----
 virt/kvm/irqchip.c       | 39 +++++++++++++++++++++++++--------------
 4 files changed, 43 insertions(+), 59 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 4956149e962a..ddd33e1aeee1 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -325,24 +325,7 @@ struct kvm_kernel_irq_routing_entry {
 	struct hlist_node link;
 };
 
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
-
-struct kvm_irq_routing_table {
-	int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
-	struct kvm_kernel_irq_routing_entry *rt_entries;
-	u32 nr_rt_entries;
-	/*
-	 * Array indexed by gsi. Each entry contains list of irq chips
-	 * the gsi is connected to.
-	 */
-	struct hlist_head map[0];
-};
-
-#else
-
-struct kvm_irq_routing_table {};
-
-#endif
+struct kvm_irq_routing_table;
 
 #ifndef KVM_PRIVATE_MEM_SLOTS
 #define KVM_PRIVATE_MEM_SLOTS 0
@@ -401,8 +384,7 @@ struct kvm {
 	struct mutex irq_lock;
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 	/*
-	 * Update side is protected by irq_lock and,
-	 * if configured, irqfds.lock.
+	 * Update side is protected by irq_lock.
 	 */
 	struct kvm_irq_routing_table __rcu *irq_routing;
 	struct hlist_head mask_notifier_list;
@@ -752,10 +734,9 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
 void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
 			     bool mask);
 
-int kvm_irq_map_gsi(struct kvm_kernel_irq_routing_entry *entries,
-		    struct kvm_irq_routing_table *irq_rt, int gsi);
-int kvm_irq_map_chip_pin(struct kvm_irq_routing_table *irq_rt,
-			 unsigned irqchip, unsigned pin);
+int kvm_irq_map_gsi(struct kvm *kvm,
+		    struct kvm_kernel_irq_routing_entry *entries, int gsi);
+int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin);
 
 int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 		bool line_status);
@@ -967,7 +948,7 @@ int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args);
 void kvm_irqfd_release(struct kvm *kvm);
-void kvm_irq_routing_update(struct kvm *, struct kvm_irq_routing_table *);
+void kvm_irq_routing_update(struct kvm *);
 #else
 static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
 {
@@ -989,10 +970,8 @@ static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
 static inline void kvm_irqfd_release(struct kvm *kvm) {}
 
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
-static inline void kvm_irq_routing_update(struct kvm *kvm,
-					  struct kvm_irq_routing_table *irq_rt)
+static inline void kvm_irq_routing_update(struct kvm *kvm)
 {
-	rcu_assign_pointer(kvm->irq_routing, irq_rt);
 }
 #endif
 
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 15fa9488b2d0..f0075ffb0c35 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -278,14 +278,13 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
 }
 
 /* Must be called under irqfds.lock */
-static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd,
-			 struct kvm_irq_routing_table *irq_rt)
+static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd)
 {
 	struct kvm_kernel_irq_routing_entry *e;
 	struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
 	int i, n_entries;
 
-	n_entries = kvm_irq_map_gsi(entries, irq_rt, irqfd->gsi);
+	n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi);
 
 	write_seqcount_begin(&irqfd->irq_entry_sc);
 
@@ -304,12 +303,12 @@ static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd,
 static int
 kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 {
-	struct kvm_irq_routing_table *irq_rt;
 	struct _irqfd *irqfd, *tmp;
 	struct fd f;
 	struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
 	int ret;
 	unsigned int events;
+	int idx;
 
 	irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL);
 	if (!irqfd)
@@ -403,9 +402,9 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 		goto fail;
 	}
 
-	irq_rt = rcu_dereference_protected(kvm->irq_routing,
-					   lockdep_is_held(&kvm->irqfds.lock));
-	irqfd_update(kvm, irqfd, irq_rt);
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	irqfd_update(kvm, irqfd);
+	srcu_read_unlock(&kvm->irq_srcu, idx);
 
 	list_add_tail(&irqfd->list, &kvm->irqfds.items);
 
@@ -539,20 +538,17 @@ kvm_irqfd_release(struct kvm *kvm)
 }
 
 /*
- * Change irq_routing and irqfd.
+ * Take note of a change in irq routing.
  * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards.
  */
-void kvm_irq_routing_update(struct kvm *kvm,
-			    struct kvm_irq_routing_table *irq_rt)
+void kvm_irq_routing_update(struct kvm *kvm)
 {
 	struct _irqfd *irqfd;
 
 	spin_lock_irq(&kvm->irqfds.lock);
 
-	rcu_assign_pointer(kvm->irq_routing, irq_rt);
-
 	list_for_each_entry(irqfd, &kvm->irqfds.items, list)
-		irqfd_update(kvm, irqfd, irq_rt);
+		irqfd_update(kvm, irqfd);
 
 	spin_unlock_irq(&kvm->irqfds.lock);
 }
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 175844593243..963b8995a9e8 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -163,7 +163,6 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 	struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
 	struct kvm_kernel_irq_routing_entry *e;
 	int ret = -EINVAL;
-	struct kvm_irq_routing_table *irq_rt;
 	int idx;
 
 	trace_kvm_set_irq(irq, level, irq_source_id);
@@ -177,8 +176,7 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 	 * which is limited to 1:1 GSI mapping.
 	 */
 	idx = srcu_read_lock(&kvm->irq_srcu);
-	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
-	if (kvm_irq_map_gsi(entries, irq_rt, irq) > 0) {
+	if (kvm_irq_map_gsi(kvm, entries, irq) > 0) {
 		e = &entries[0];
 		if (likely(e->type == KVM_IRQ_ROUTING_MSI))
 			ret = kvm_set_msi_inatomic(e, kvm);
@@ -264,7 +262,7 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
 	int idx, gsi;
 
 	idx = srcu_read_lock(&kvm->irq_srcu);
-	gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin];
+	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
 	if (gsi != -1)
 		hlist_for_each_entry_rcu(kimn, &kvm->mask_notifier_list, link)
 			if (kimn->irq == gsi)
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index f4648dd94888..04faac50cef5 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -31,12 +31,26 @@
 #include <trace/events/kvm.h>
 #include "irq.h"
 
-int kvm_irq_map_gsi(struct kvm_kernel_irq_routing_entry *entries,
-		    struct kvm_irq_routing_table *irq_rt, int gsi)
+struct kvm_irq_routing_table {
+	int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
+	struct kvm_kernel_irq_routing_entry *rt_entries;
+	u32 nr_rt_entries;
+	/*
+	 * Array indexed by gsi. Each entry contains list of irq chips
+	 * the gsi is connected to.
+	 */
+	struct hlist_head map[0];
+};
+
+int kvm_irq_map_gsi(struct kvm *kvm,
+		    struct kvm_kernel_irq_routing_entry *entries, int gsi)
 {
+	struct kvm_irq_routing_table *irq_rt;
 	struct kvm_kernel_irq_routing_entry *e;
 	int n = 0;
 
+	irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu,
+					lockdep_is_held(&kvm->irq_lock));
 	if (gsi < irq_rt->nr_rt_entries) {
 		hlist_for_each_entry(e, &irq_rt->map[gsi], link) {
 			entries[n] = *e;
@@ -47,21 +61,21 @@ int kvm_irq_map_gsi(struct kvm_kernel_irq_routing_entry *entries,
 	return n;
 }
 
-int kvm_irq_map_chip_pin(struct kvm_irq_routing_table *irq_rt,
-			 unsigned irqchip, unsigned pin)
+int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin)
 {
+	struct kvm_irq_routing_table *irq_rt;
+
+	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
 	return irq_rt->chip[irqchip][pin];
 }
 
 bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
 {
-	struct kvm_irq_routing_table *irq_rt;
 	struct kvm_irq_ack_notifier *kian;
 	int gsi, idx;
 
 	idx = srcu_read_lock(&kvm->irq_srcu);
-	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
-	gsi = kvm_irq_map_chip_pin(irq_rt, irqchip, pin);
+	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
 	if (gsi != -1)
 		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
 					 link)
@@ -78,15 +92,13 @@ EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
 
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 {
-	struct kvm_irq_routing_table *irq_rt;
 	struct kvm_irq_ack_notifier *kian;
 	int gsi, idx;
 
 	trace_kvm_ack_irq(irqchip, pin);
 
 	idx = srcu_read_lock(&kvm->irq_srcu);
-	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
-	gsi = kvm_irq_map_chip_pin(irq_rt, irqchip, pin);
+	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
 	if (gsi != -1)
 		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
 					 link)
@@ -143,7 +155,6 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 {
 	struct kvm_kernel_irq_routing_entry irq_set[KVM_NR_IRQCHIPS];
 	int ret = -1, i, idx;
-	struct kvm_irq_routing_table *irq_rt;
 
 	trace_kvm_set_irq(irq, level, irq_source_id);
 
@@ -152,8 +163,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 	 * writes to the unused one.
 	 */
 	idx = srcu_read_lock(&kvm->irq_srcu);
-	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
-	i = kvm_irq_map_gsi(irq_set, irq_rt, irq);
+	i = kvm_irq_map_gsi(kvm, irq_set, irq);
 	srcu_read_unlock(&kvm->irq_srcu, idx);
 
 	while(i--) {
@@ -250,7 +260,8 @@ int kvm_set_irq_routing(struct kvm *kvm,
 
 	mutex_lock(&kvm->irq_lock);
 	old = kvm->irq_routing;
-	kvm_irq_routing_update(kvm, new);
+	rcu_assign_pointer(kvm->irq_routing, new);
+	kvm_irq_routing_update(kvm);
 	mutex_unlock(&kvm->irq_lock);
 
 	synchronize_srcu_expedited(&kvm->irq_srcu);
-- 
cgit v1.2.3-59-g8ed1b


From 297e21053a52f060944e9f0de4c64fad9bcd72fc Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 30 Jun 2014 20:51:13 +1000
Subject: KVM: Give IRQFD its own separate enabling Kconfig option

Currently, the IRQFD code is conditional on CONFIG_HAVE_KVM_IRQ_ROUTING.
So that we can have the IRQFD code compiled in without having the
IRQ routing code, this creates a new CONFIG_HAVE_KVM_IRQFD, makes
the IRQFD code conditional on it instead of CONFIG_HAVE_KVM_IRQ_ROUTING,
and makes all the platforms that currently select HAVE_KVM_IRQ_ROUTING
also select HAVE_KVM_IRQFD.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Tested-by: Eric Auger <eric.auger@linaro.org>
Tested-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/ia64/kvm/Kconfig    | 1 +
 arch/powerpc/kvm/Kconfig | 1 +
 arch/s390/kvm/Kconfig    | 1 +
 arch/x86/kvm/Kconfig     | 1 +
 include/linux/kvm_host.h | 8 ++++----
 virt/kvm/Kconfig         | 3 +++
 virt/kvm/eventfd.c       | 6 +++---
 virt/kvm/kvm_main.c      | 2 +-
 8 files changed, 15 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig
index 990b86420cc6..3d50ea955c4c 100644
--- a/arch/ia64/kvm/Kconfig
+++ b/arch/ia64/kvm/Kconfig
@@ -25,6 +25,7 @@ config KVM
 	select PREEMPT_NOTIFIERS
 	select ANON_INODES
 	select HAVE_KVM_IRQCHIP
+	select HAVE_KVM_IRQFD
 	select HAVE_KVM_IRQ_ROUTING
 	select KVM_APIC_ARCHITECTURE
 	select KVM_MMIO
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 8f104a6879f0..d4741dba91af 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -158,6 +158,7 @@ config KVM_MPIC
 	bool "KVM in-kernel MPIC emulation"
 	depends on KVM && E500
 	select HAVE_KVM_IRQCHIP
+	select HAVE_KVM_IRQFD
 	select HAVE_KVM_IRQ_ROUTING
 	select HAVE_KVM_MSI
 	help
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index 10d529ac9821..646db9c467d1 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -26,6 +26,7 @@ config KVM
 	select KVM_ASYNC_PF
 	select KVM_ASYNC_PF_SYNC
 	select HAVE_KVM_IRQCHIP
+	select HAVE_KVM_IRQFD
 	select HAVE_KVM_IRQ_ROUTING
 	---help---
 	  Support hosting paravirtualized guest machines using the SIE
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 287e4c85fff9..f9d16ff56c6b 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -27,6 +27,7 @@ config KVM
 	select MMU_NOTIFIER
 	select ANON_INODES
 	select HAVE_KVM_IRQCHIP
+	select HAVE_KVM_IRQFD
 	select HAVE_KVM_IRQ_ROUTING
 	select HAVE_KVM_EVENTFD
 	select KVM_APIC_ARCHITECTURE
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ddd33e1aeee1..8593d2e61cbf 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -437,7 +437,7 @@ void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
 int __must_check vcpu_load(struct kvm_vcpu *vcpu);
 void vcpu_put(struct kvm_vcpu *vcpu);
 
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+#ifdef CONFIG_HAVE_KVM_IRQFD
 int kvm_irqfd_init(void);
 void kvm_irqfd_exit(void);
 #else
@@ -932,20 +932,20 @@ int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 			  const struct kvm_irq_routing_entry *ue);
 void kvm_free_irq_routing(struct kvm *kvm);
 
-int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi);
-
 #else
 
 static inline void kvm_free_irq_routing(struct kvm *kvm) {}
 
 #endif
 
+int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi);
+
 #ifdef CONFIG_HAVE_KVM_EVENTFD
 
 void kvm_eventfd_init(struct kvm *kvm);
 int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
 
-#ifdef CONFIG_HAVE_KVM_IRQCHIP
+#ifdef CONFIG_HAVE_KVM_IRQFD
 int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args);
 void kvm_irqfd_release(struct kvm *kvm);
 void kvm_irq_routing_update(struct kvm *);
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 13f2d19793e3..fc0c5e603eb4 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -6,6 +6,9 @@ config HAVE_KVM
 config HAVE_KVM_IRQCHIP
        bool
 
+config HAVE_KVM_IRQFD
+       bool
+
 config HAVE_KVM_IRQ_ROUTING
        bool
 
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 99957df69cf2..f5f61548f60d 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -39,7 +39,7 @@
 #include "irq.h"
 #include "iodev.h"
 
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+#ifdef CONFIG_HAVE_KVM_IRQFD
 /*
  * --------------------------------------------------------------------
  * irqfd: Allows an fd to be used to inject an interrupt to the guest
@@ -450,7 +450,7 @@ out:
 void
 kvm_eventfd_init(struct kvm *kvm)
 {
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+#ifdef CONFIG_HAVE_KVM_IRQFD
 	spin_lock_init(&kvm->irqfds.lock);
 	INIT_LIST_HEAD(&kvm->irqfds.items);
 	INIT_LIST_HEAD(&kvm->irqfds.resampler_list);
@@ -459,7 +459,7 @@ kvm_eventfd_init(struct kvm *kvm)
 	INIT_LIST_HEAD(&kvm->ioeventfds);
 }
 
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+#ifdef CONFIG_HAVE_KVM_IRQFD
 /*
  * shutdown any irqfd's that match fd+gsi
  */
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 1b95cc926cfc..a69a623938b8 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2337,7 +2337,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
 #ifdef CONFIG_HAVE_KVM_MSI
 	case KVM_CAP_SIGNAL_MSI:
 #endif
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+#ifdef CONFIG_HAVE_KVM_IRQFD
 	case KVM_CAP_IRQFD_RESAMPLE:
 #endif
 	case KVM_CAP_CHECK_EXTENSION_VM:
-- 
cgit v1.2.3-59-g8ed1b


From 114840c3d29b9cbd867faa69595a2aee6f6b54a2 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Sun, 1 Jun 2014 11:53:50 +0300
Subject: mlx4_core: Add support for secure-host and SMP firewall

Secure-host is the general term for the capability of a device
to protect itself and the subnet from malicious host software.

This is achieved by:
1. Not allowing un-trusted entities to access device configuration
   registers, directly (through pci_cr or pci_conf) and indirectly
   (through MADs).

2. Hiding M_Key from untrusted entities.

3. Preventing the modification of GUID0 by un-trusted entities

4. Not allowing drivers on untrusted hosts to receive nor to transmit
   packets over QP0 (SMP Firewall).

The secure-host capability depends on firmware handling all QP0
packets, and not passing these packets up to the driver. Any information
required by the driver for proper operation (e.g., SM lid) is passed
via events generated by the firmware while processing QP0 MADs.

Driver support mainly requires using the MAD_DEMUX FW command at startup,
where the feature is enabled/disabled through a procedure described in
the Mellanox HCA tools package.

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>

[ Fix error path in mlx4_setup_hca to go to err_mcg_table_free. - Roland ]

Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/net/ethernet/mellanox/mlx4/cmd.c  |  9 +++
 drivers/net/ethernet/mellanox/mlx4/fw.c   | 91 ++++++++++++++++++++++++++++++-
 drivers/net/ethernet/mellanox/mlx4/main.c |  5 ++
 drivers/net/ethernet/mellanox/mlx4/mlx4.h |  1 +
 include/linux/mlx4/cmd.h                  |  7 +++
 include/linux/mlx4/device.h               |  1 +
 6 files changed, 113 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 5d940a26055c..65a4a0f88ea0 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -1310,6 +1310,15 @@ static struct mlx4_cmd_info cmd_info[] = {
 		.verify = NULL,
 		.wrapper = mlx4_MAD_IFC_wrapper
 	},
+	{
+		.opcode = MLX4_CMD_MAD_DEMUX,
+		.has_inbox = false,
+		.has_outbox = false,
+		.out_is_imm = false,
+		.encode_slave_id = false,
+		.verify = NULL,
+		.wrapper = mlx4_CMD_EPERM_wrapper
+	},
 	{
 		.opcode = MLX4_CMD_QUERY_IF_STAT,
 		.has_inbox = false,
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 688e1eabab29..494753e44ae3 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -136,7 +136,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
 		[7] = "FSM (MAC anti-spoofing) support",
 		[8] = "Dynamic QP updates support",
 		[9] = "Device managed flow steering IPoIB support",
-		[10] = "TCP/IP offloads/flow-steering for VXLAN support"
+		[10] = "TCP/IP offloads/flow-steering for VXLAN support",
+		[11] = "MAD DEMUX (Secure-Host) support"
 	};
 	int i;
 
@@ -571,6 +572,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 #define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET		0xa0
 #define QUERY_DEV_CAP_FW_REASSIGN_MAC		0x9d
 #define QUERY_DEV_CAP_VXLAN			0x9e
+#define QUERY_DEV_CAP_MAD_DEMUX_OFFSET		0xb0
 
 	dev_cap->flags2 = 0;
 	mailbox = mlx4_alloc_cmd_mailbox(dev);
@@ -748,6 +750,11 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		MLX4_GET(dev_cap->max_counters, outbox,
 			 QUERY_DEV_CAP_MAX_COUNTERS_OFFSET);
 
+	MLX4_GET(field32, outbox,
+		 QUERY_DEV_CAP_MAD_DEMUX_OFFSET);
+	if (field32 & (1 << 0))
+		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_MAD_DEMUX;
+
 	MLX4_GET(field32, outbox, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET);
 	if (field32 & (1 << 16))
 		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP;
@@ -2016,3 +2023,85 @@ void mlx4_opreq_action(struct work_struct *work)
 out:
 	mlx4_free_cmd_mailbox(dev, mailbox);
 }
+
+static int mlx4_check_smp_firewall_active(struct mlx4_dev *dev,
+					  struct mlx4_cmd_mailbox *mailbox)
+{
+#define MLX4_CMD_MAD_DEMUX_SET_ATTR_OFFSET		0x10
+#define MLX4_CMD_MAD_DEMUX_GETRESP_ATTR_OFFSET		0x20
+#define MLX4_CMD_MAD_DEMUX_TRAP_ATTR_OFFSET		0x40
+#define MLX4_CMD_MAD_DEMUX_TRAP_REPRESS_ATTR_OFFSET	0x70
+
+	u32 set_attr_mask, getresp_attr_mask;
+	u32 trap_attr_mask, traprepress_attr_mask;
+
+	MLX4_GET(set_attr_mask, mailbox->buf,
+		 MLX4_CMD_MAD_DEMUX_SET_ATTR_OFFSET);
+	mlx4_dbg(dev, "SMP firewall set_attribute_mask = 0x%x\n",
+		 set_attr_mask);
+
+	MLX4_GET(getresp_attr_mask, mailbox->buf,
+		 MLX4_CMD_MAD_DEMUX_GETRESP_ATTR_OFFSET);
+	mlx4_dbg(dev, "SMP firewall getresp_attribute_mask = 0x%x\n",
+		 getresp_attr_mask);
+
+	MLX4_GET(trap_attr_mask, mailbox->buf,
+		 MLX4_CMD_MAD_DEMUX_TRAP_ATTR_OFFSET);
+	mlx4_dbg(dev, "SMP firewall trap_attribute_mask = 0x%x\n",
+		 trap_attr_mask);
+
+	MLX4_GET(traprepress_attr_mask, mailbox->buf,
+		 MLX4_CMD_MAD_DEMUX_TRAP_REPRESS_ATTR_OFFSET);
+	mlx4_dbg(dev, "SMP firewall traprepress_attribute_mask = 0x%x\n",
+		 traprepress_attr_mask);
+
+	if (set_attr_mask && getresp_attr_mask && trap_attr_mask &&
+	    traprepress_attr_mask)
+		return 1;
+
+	return 0;
+}
+
+int mlx4_config_mad_demux(struct mlx4_dev *dev)
+{
+	struct mlx4_cmd_mailbox *mailbox;
+	int secure_host_active;
+	int err;
+
+	/* Check if mad_demux is supported */
+	if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_MAD_DEMUX))
+		return 0;
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox)) {
+		mlx4_warn(dev, "Failed to allocate mailbox for cmd MAD_DEMUX");
+		return -ENOMEM;
+	}
+
+	/* Query mad_demux to find out which MADs are handled by internal sma */
+	err = mlx4_cmd_box(dev, 0, mailbox->dma, 0x01 /* subn mgmt class */,
+			   MLX4_CMD_MAD_DEMUX_QUERY_RESTR, MLX4_CMD_MAD_DEMUX,
+			   MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+	if (err) {
+		mlx4_warn(dev, "MLX4_CMD_MAD_DEMUX: query restrictions failed (%d)\n",
+			  err);
+		goto out;
+	}
+
+	secure_host_active = mlx4_check_smp_firewall_active(dev, mailbox);
+
+	/* Config mad_demux to handle all MADs returned by the query above */
+	err = mlx4_cmd(dev, mailbox->dma, 0x01 /* subn mgmt class */,
+		       MLX4_CMD_MAD_DEMUX_CONFIG, MLX4_CMD_MAD_DEMUX,
+		       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+	if (err) {
+		mlx4_warn(dev, "MLX4_CMD_MAD_DEMUX: configure failed (%d)\n", err);
+		goto out;
+	}
+
+	if (secure_host_active)
+		mlx4_warn(dev, "HCA operating in secure-host mode. SMP firewall activated.\n");
+out:
+	mlx4_free_cmd_mailbox(dev, mailbox);
+	return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 82ab427290c3..f2c8e8ba23fe 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -1831,6 +1831,11 @@ static int mlx4_setup_hca(struct mlx4_dev *dev)
 			mlx4_err(dev, "Failed to initialize multicast group table, aborting\n");
 			goto err_mr_table_free;
 		}
+		err = mlx4_config_mad_demux(dev);
+		if (err) {
+			mlx4_err(dev, "Failed in config_mad_demux, aborting\n");
+			goto err_mcg_table_free;
+		}
 	}
 
 	err = mlx4_init_eq_table(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 1d8af7336807..310b30b4682c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -1311,5 +1311,6 @@ void mlx4_init_quotas(struct mlx4_dev *dev);
 int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave, int port);
 /* Returns the VF index of slave */
 int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave);
+int mlx4_config_mad_demux(struct mlx4_dev *dev);
 
 #endif /* MLX4_H */
diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h
index c8450366c130..379c02648ab3 100644
--- a/include/linux/mlx4/cmd.h
+++ b/include/linux/mlx4/cmd.h
@@ -116,6 +116,7 @@ enum {
 	/* special QP and management commands */
 	MLX4_CMD_CONF_SPECIAL_QP = 0x23,
 	MLX4_CMD_MAD_IFC	 = 0x24,
+	MLX4_CMD_MAD_DEMUX	 = 0x203,
 
 	/* multicast commands */
 	MLX4_CMD_READ_MCG	 = 0x25,
@@ -185,6 +186,12 @@ enum {
 	MLX4_SET_PORT_VXLAN	= 0xB
 };
 
+enum {
+	MLX4_CMD_MAD_DEMUX_CONFIG	= 0,
+	MLX4_CMD_MAD_DEMUX_QUERY_STATE	= 1,
+	MLX4_CMD_MAD_DEMUX_QUERY_RESTR	= 2, /* Query mad demux restrictions */
+};
+
 enum {
 	MLX4_CMD_WRAPPED,
 	MLX4_CMD_NATIVE
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 35b51e7af886..cee9561e8ef6 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -172,6 +172,7 @@ enum {
 	MLX4_DEV_CAP_FLAG2_UPDATE_QP		= 1LL <<  8,
 	MLX4_DEV_CAP_FLAG2_DMFS_IPOIB		= 1LL <<  9,
 	MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS	= 1LL <<  10,
+	MLX4_DEV_CAP_FLAG2_MAD_DEMUX		= 1LL <<  11,
 };
 
 enum {
-- 
cgit v1.2.3-59-g8ed1b


From 50a03e35b09ea5098ca42a59ad3fdb56a965f2dd Mon Sep 17 00:00:00 2001
From: Beniamino Galvani <b.galvani@gmail.com>
Date: Sat, 5 Jul 2014 15:20:54 +0200
Subject: regulator: act8865: prepare support for other act88xx devices

This patch prepares support for other devices in the act88xx family of
PMUs manufactured by Active-Semi.

http://www.active-semi.com/products/power-management-units/act88xx/

Signed-off-by: Beniamino Galvani <b.galvani@gmail.com>
Tested-by: Wenyou Yang <wenyou.yang@atmel.com>
Reviewed-by: Axel Lin <axel.lin@ingics.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/regulator/act8865-regulator.c | 211 +++++++++++++---------------------
 include/linux/regulator/act8865.h     |   6 +-
 2 files changed, 88 insertions(+), 129 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/act8865-regulator.c b/drivers/regulator/act8865-regulator.c
index 52d1b6cad76f..b26eaf7580fc 100644
--- a/drivers/regulator/act8865-regulator.c
+++ b/drivers/regulator/act8865-regulator.c
@@ -1,6 +1,7 @@
 /*
- * act8865-regulator.c - Voltage regulation for the active-semi ACT8865
- * http://www.active-semi.com/sheets/ACT8865_Datasheet.pdf
+ * act8865-regulator.c - Voltage regulation for active-semi ACT88xx PMUs
+ *
+ * http://www.active-semi.com/products/power-management-units/act88xx/
  *
  * Copyright (C) 2013 Atmel Corporation
  *
@@ -70,7 +71,7 @@ static const struct regmap_config act8865_regmap_config = {
 	.val_bits = 8,
 };
 
-static const struct regulator_linear_range act8865_volatge_ranges[] = {
+static const struct regulator_linear_range act8865_voltage_ranges[] = {
 	REGULATOR_LINEAR_RANGE(600000, 0, 23, 25000),
 	REGULATOR_LINEAR_RANGE(1200000, 24, 47, 50000),
 	REGULATOR_LINEAR_RANGE(2400000, 48, 63, 100000),
@@ -86,110 +87,35 @@ static struct regulator_ops act8865_ops = {
 	.is_enabled		= regulator_is_enabled_regmap,
 };
 
-static const struct regulator_desc act8865_reg[] = {
-	{
-		.name = "DCDC_REG1",
-		.id = ACT8865_ID_DCDC1,
-		.ops = &act8865_ops,
-		.type = REGULATOR_VOLTAGE,
-		.n_voltages = ACT8865_VOLTAGE_NUM,
-		.linear_ranges = act8865_volatge_ranges,
-		.n_linear_ranges = ARRAY_SIZE(act8865_volatge_ranges),
-		.vsel_reg = ACT8865_DCDC1_VSET1,
-		.vsel_mask = ACT8865_VSEL_MASK,
-		.enable_reg = ACT8865_DCDC1_CTRL,
-		.enable_mask = ACT8865_ENA,
-		.owner = THIS_MODULE,
-	},
-	{
-		.name = "DCDC_REG2",
-		.id = ACT8865_ID_DCDC2,
-		.ops = &act8865_ops,
-		.type = REGULATOR_VOLTAGE,
-		.n_voltages = ACT8865_VOLTAGE_NUM,
-		.linear_ranges = act8865_volatge_ranges,
-		.n_linear_ranges = ARRAY_SIZE(act8865_volatge_ranges),
-		.vsel_reg = ACT8865_DCDC2_VSET1,
-		.vsel_mask = ACT8865_VSEL_MASK,
-		.enable_reg = ACT8865_DCDC2_CTRL,
-		.enable_mask = ACT8865_ENA,
-		.owner = THIS_MODULE,
-	},
-	{
-		.name = "DCDC_REG3",
-		.id = ACT8865_ID_DCDC3,
-		.ops = &act8865_ops,
-		.type = REGULATOR_VOLTAGE,
-		.n_voltages = ACT8865_VOLTAGE_NUM,
-		.linear_ranges = act8865_volatge_ranges,
-		.n_linear_ranges = ARRAY_SIZE(act8865_volatge_ranges),
-		.vsel_reg = ACT8865_DCDC3_VSET1,
-		.vsel_mask = ACT8865_VSEL_MASK,
-		.enable_reg = ACT8865_DCDC3_CTRL,
-		.enable_mask = ACT8865_ENA,
-		.owner = THIS_MODULE,
-	},
-	{
-		.name = "LDO_REG1",
-		.id = ACT8865_ID_LDO1,
-		.ops = &act8865_ops,
-		.type = REGULATOR_VOLTAGE,
-		.n_voltages = ACT8865_VOLTAGE_NUM,
-		.linear_ranges = act8865_volatge_ranges,
-		.n_linear_ranges = ARRAY_SIZE(act8865_volatge_ranges),
-		.vsel_reg = ACT8865_LDO1_VSET,
-		.vsel_mask = ACT8865_VSEL_MASK,
-		.enable_reg = ACT8865_LDO1_CTRL,
-		.enable_mask = ACT8865_ENA,
-		.owner = THIS_MODULE,
-	},
-	{
-		.name = "LDO_REG2",
-		.id = ACT8865_ID_LDO2,
-		.ops = &act8865_ops,
-		.type = REGULATOR_VOLTAGE,
-		.n_voltages = ACT8865_VOLTAGE_NUM,
-		.linear_ranges = act8865_volatge_ranges,
-		.n_linear_ranges = ARRAY_SIZE(act8865_volatge_ranges),
-		.vsel_reg = ACT8865_LDO2_VSET,
-		.vsel_mask = ACT8865_VSEL_MASK,
-		.enable_reg = ACT8865_LDO2_CTRL,
-		.enable_mask = ACT8865_ENA,
-		.owner = THIS_MODULE,
-	},
-	{
-		.name = "LDO_REG3",
-		.id = ACT8865_ID_LDO3,
-		.ops = &act8865_ops,
-		.type = REGULATOR_VOLTAGE,
-		.n_voltages = ACT8865_VOLTAGE_NUM,
-		.linear_ranges = act8865_volatge_ranges,
-		.n_linear_ranges = ARRAY_SIZE(act8865_volatge_ranges),
-		.vsel_reg = ACT8865_LDO3_VSET,
-		.vsel_mask = ACT8865_VSEL_MASK,
-		.enable_reg = ACT8865_LDO3_CTRL,
-		.enable_mask = ACT8865_ENA,
-		.owner = THIS_MODULE,
-	},
-	{
-		.name = "LDO_REG4",
-		.id = ACT8865_ID_LDO4,
-		.ops = &act8865_ops,
-		.type = REGULATOR_VOLTAGE,
-		.n_voltages = ACT8865_VOLTAGE_NUM,
-		.linear_ranges = act8865_volatge_ranges,
-		.n_linear_ranges = ARRAY_SIZE(act8865_volatge_ranges),
-		.vsel_reg = ACT8865_LDO4_VSET,
-		.vsel_mask = ACT8865_VSEL_MASK,
-		.enable_reg = ACT8865_LDO4_CTRL,
-		.enable_mask = ACT8865_ENA,
-		.owner = THIS_MODULE,
-	},
+#define ACT88xx_REG(_name, _family, _id, _vsel_reg)			\
+	[_family##_ID_##_id] = {					\
+		.name			= _name,			\
+		.id			= _family##_ID_##_id,		\
+		.type			= REGULATOR_VOLTAGE,		\
+		.ops			= &act8865_ops,			\
+		.n_voltages		= ACT8865_VOLTAGE_NUM,		\
+		.linear_ranges		= act8865_voltage_ranges,	\
+		.n_linear_ranges	= ARRAY_SIZE(act8865_voltage_ranges), \
+		.vsel_reg		= _family##_##_id##_##_vsel_reg, \
+		.vsel_mask		= ACT8865_VSEL_MASK,		\
+		.enable_reg		= _family##_##_id##_CTRL,	\
+		.enable_mask		= ACT8865_ENA,			\
+		.owner			= THIS_MODULE,			\
+	}
+
+static const struct regulator_desc act8865_regulators[] = {
+	ACT88xx_REG("DCDC_REG1", ACT8865, DCDC1, VSET1),
+	ACT88xx_REG("DCDC_REG2", ACT8865, DCDC2, VSET1),
+	ACT88xx_REG("DCDC_REG3", ACT8865, DCDC3, VSET1),
+	ACT88xx_REG("LDO_REG1", ACT8865, LDO1, VSET),
+	ACT88xx_REG("LDO_REG2", ACT8865, LDO2, VSET),
+	ACT88xx_REG("LDO_REG3", ACT8865, LDO3, VSET),
+	ACT88xx_REG("LDO_REG4", ACT8865, LDO4, VSET),
 };
 
 #ifdef CONFIG_OF
 static const struct of_device_id act8865_dt_ids[] = {
-	{ .compatible = "active-semi,act8865" },
+	{ .compatible = "active-semi,act8865", .data = (void *)ACT8865 },
 	{ }
 };
 MODULE_DEVICE_TABLE(of, act8865_dt_ids);
@@ -206,7 +132,9 @@ static struct of_regulator_match act8865_matches[] = {
 
 static int act8865_pdata_from_dt(struct device *dev,
 				 struct device_node **of_node,
-				 struct act8865_platform_data *pdata)
+				 struct act8865_platform_data *pdata,
+				 struct of_regulator_match *matches,
+				 int num_matches)
 {
 	int matched, i;
 	struct device_node *np;
@@ -218,26 +146,25 @@ static int act8865_pdata_from_dt(struct device *dev,
 		return -EINVAL;
 	}
 
-	matched = of_regulator_match(dev, np,
-				act8865_matches, ARRAY_SIZE(act8865_matches));
+	matched = of_regulator_match(dev, np, matches, num_matches);
 	of_node_put(np);
 	if (matched <= 0)
 		return matched;
 
 	pdata->regulators = devm_kzalloc(dev,
-				sizeof(struct act8865_regulator_data) *
-				ARRAY_SIZE(act8865_matches), GFP_KERNEL);
+					 sizeof(struct act8865_regulator_data) *
+					 num_matches, GFP_KERNEL);
 	if (!pdata->regulators)
 		return -ENOMEM;
 
-	pdata->num_regulators = ARRAY_SIZE(act8865_matches);
+	pdata->num_regulators = num_matches;
 	regulator = pdata->regulators;
 
-	for (i = 0; i < ARRAY_SIZE(act8865_matches); i++) {
+	for (i = 0; i < num_matches; i++) {
 		regulator->id = i;
-		regulator->name = act8865_matches[i].name;
-		regulator->platform_data = act8865_matches[i].init_data;
-		of_node[i] = act8865_matches[i].of_node;
+		regulator->name = matches[i].name;
+		regulator->platform_data = matches[i].init_data;
+		of_node[i] = matches[i].of_node;
 		regulator++;
 	}
 
@@ -269,34 +196,59 @@ static struct regulator_init_data
 }
 
 static int act8865_pmic_probe(struct i2c_client *client,
-			   const struct i2c_device_id *i2c_id)
+			      const struct i2c_device_id *i2c_id)
 {
-	struct regulator_dev *rdev;
+	static const struct regulator_desc *regulators;
+	struct act8865_platform_data pdata_of, *pdata;
+	struct of_regulator_match *matches;
 	struct device *dev = &client->dev;
-	struct act8865_platform_data *pdata = dev_get_platdata(dev);
-	struct regulator_config config = { };
+	struct device_node **of_node;
+	int i, ret, num_regulators;
 	struct act8865 *act8865;
-	struct device_node *of_node[ACT8865_REG_NUM];
-	int i;
-	int ret;
+	unsigned long type;
+
+	pdata = dev_get_platdata(dev);
 
 	if (dev->of_node && !pdata) {
 		const struct of_device_id *id;
-		struct act8865_platform_data pdata_of;
 
 		id = of_match_device(of_match_ptr(act8865_dt_ids), dev);
 		if (!id)
 			return -ENODEV;
 
-		ret = act8865_pdata_from_dt(dev, of_node, &pdata_of);
+		type = (unsigned long) id->data;
+	} else {
+		type = i2c_id->driver_data;
+	}
+
+	switch (type) {
+	case ACT8865:
+		matches = act8865_matches;
+		regulators = act8865_regulators;
+		num_regulators = ARRAY_SIZE(act8865_regulators);
+		break;
+	default:
+		dev_err(dev, "invalid device id %lu\n", type);
+		return -EINVAL;
+	}
+
+	of_node = devm_kzalloc(dev, sizeof(struct device_node *) *
+			       num_regulators, GFP_KERNEL);
+	if (!of_node)
+		return -ENOMEM;
+
+	if (dev->of_node && !pdata) {
+		ret = act8865_pdata_from_dt(dev, of_node, &pdata_of, matches,
+					    num_regulators);
 		if (ret < 0)
 			return ret;
 
 		pdata = &pdata_of;
 	}
 
-	if (pdata->num_regulators > ACT8865_REG_NUM) {
-		dev_err(dev, "Too many regulators found!\n");
+	if (pdata->num_regulators > num_regulators) {
+		dev_err(dev, "too many regulators: %d\n",
+			pdata->num_regulators);
 		return -EINVAL;
 	}
 
@@ -313,8 +265,10 @@ static int act8865_pmic_probe(struct i2c_client *client,
 	}
 
 	/* Finally register devices */
-	for (i = 0; i < ACT8865_REG_NUM; i++) {
-		const struct regulator_desc *desc = &act8865_reg[i];
+	for (i = 0; i < num_regulators; i++) {
+		const struct regulator_desc *desc = &regulators[i];
+		struct regulator_config config = { };
+		struct regulator_dev *rdev;
 
 		config.dev = dev;
 		config.init_data = act8865_get_init_data(desc->id, pdata);
@@ -330,12 +284,13 @@ static int act8865_pmic_probe(struct i2c_client *client,
 	}
 
 	i2c_set_clientdata(client, act8865);
+	devm_kfree(dev, of_node);
 
 	return 0;
 }
 
 static const struct i2c_device_id act8865_ids[] = {
-	{ "act8865", 0 },
+	{ .name = "act8865", .driver_data = ACT8865 },
 	{ },
 };
 MODULE_DEVICE_TABLE(i2c, act8865_ids);
@@ -351,6 +306,6 @@ static struct i2c_driver act8865_pmic_driver = {
 
 module_i2c_driver(act8865_pmic_driver);
 
-MODULE_DESCRIPTION("active-semi act8865 voltage regulator driver");
+MODULE_DESCRIPTION("active-semi act88xx voltage regulator driver");
 MODULE_AUTHOR("Wenyou Yang <wenyou.yang@atmel.com>");
 MODULE_LICENSE("GPL v2");
diff --git a/include/linux/regulator/act8865.h b/include/linux/regulator/act8865.h
index 49206c1b4905..b49be816fc54 100644
--- a/include/linux/regulator/act8865.h
+++ b/include/linux/regulator/act8865.h
@@ -1,5 +1,5 @@
 /*
- * act8865.h  --  Voltage regulation for the active-semi act8865
+ * act8865.h  --  Voltage regulation for active-semi act88xx PMUs
  *
  * Copyright (C) 2013 Atmel Corporation.
  *
@@ -29,6 +29,10 @@ enum {
 	ACT8865_REG_NUM,
 };
 
+enum {
+	ACT8865,
+};
+
 /**
  * act8865_regulator_data - regulator data
  * @id: regulator id
-- 
cgit v1.2.3-59-g8ed1b


From ac0c0ea8b62949eb371b3227481385362a1425c6 Mon Sep 17 00:00:00 2001
From: Beniamino Galvani <b.galvani@gmail.com>
Date: Sat, 5 Jul 2014 15:20:55 +0200
Subject: regulator: act8865: add support for act8846

Add device id and definition of registers and regulators to support
the act8846 PMU.

Signed-off-by: Beniamino Galvani <b.galvani@gmail.com>
Tested-by: Wenyou Yang <wenyou.yang@atmel.com>
Reviewed-by: Axel Lin <axel.lin@ingics.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/regulator/act8865-regulator.c | 71 +++++++++++++++++++++++++++++++++++
 include/linux/regulator/act8865.h     | 17 +++++++++
 2 files changed, 88 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/regulator/act8865-regulator.c b/drivers/regulator/act8865-regulator.c
index b26eaf7580fc..a5ad69a0aca9 100644
--- a/drivers/regulator/act8865-regulator.c
+++ b/drivers/regulator/act8865-regulator.c
@@ -28,6 +28,40 @@
 #include <linux/regulator/of_regulator.h>
 #include <linux/regmap.h>
 
+/*
+ * ACT8846 Global Register Map.
+ */
+#define	ACT8846_SYS0		0x00
+#define	ACT8846_SYS1		0x01
+#define	ACT8846_REG1_VSET	0x10
+#define	ACT8846_REG1_CTRL	0x12
+#define	ACT8846_REG2_VSET0	0x20
+#define	ACT8846_REG2_VSET1	0x21
+#define	ACT8846_REG2_CTRL	0x22
+#define	ACT8846_REG3_VSET0	0x30
+#define	ACT8846_REG3_VSET1	0x31
+#define	ACT8846_REG3_CTRL	0x32
+#define	ACT8846_REG4_VSET0	0x40
+#define	ACT8846_REG4_VSET1	0x41
+#define	ACT8846_REG4_CTRL	0x42
+#define	ACT8846_REG5_VSET	0x50
+#define	ACT8846_REG5_CTRL	0x51
+#define	ACT8846_REG6_VSET	0x58
+#define	ACT8846_REG6_CTRL	0x59
+#define	ACT8846_REG7_VSET	0x60
+#define	ACT8846_REG7_CTRL	0x61
+#define	ACT8846_REG8_VSET	0x68
+#define	ACT8846_REG8_CTRL	0x69
+#define	ACT8846_REG9_VSET	0x70
+#define	ACT8846_REG9_CTRL	0x71
+#define	ACT8846_REG10_VSET	0x80
+#define	ACT8846_REG10_CTRL	0x81
+#define	ACT8846_REG11_VSET	0x90
+#define	ACT8846_REG11_CTRL	0x91
+#define	ACT8846_REG12_VSET	0xa0
+#define	ACT8846_REG12_CTRL	0xa1
+#define	ACT8846_REG13_CTRL	0xb1
+
 /*
  * ACT8865 Global Register Map.
  */
@@ -103,6 +137,21 @@ static struct regulator_ops act8865_ops = {
 		.owner			= THIS_MODULE,			\
 	}
 
+static const struct regulator_desc act8846_regulators[] = {
+	ACT88xx_REG("REG1", ACT8846, REG1, VSET),
+	ACT88xx_REG("REG2", ACT8846, REG2, VSET0),
+	ACT88xx_REG("REG3", ACT8846, REG3, VSET0),
+	ACT88xx_REG("REG4", ACT8846, REG4, VSET0),
+	ACT88xx_REG("REG5", ACT8846, REG5, VSET),
+	ACT88xx_REG("REG6", ACT8846, REG6, VSET),
+	ACT88xx_REG("REG7", ACT8846, REG7, VSET),
+	ACT88xx_REG("REG8", ACT8846, REG8, VSET),
+	ACT88xx_REG("REG9", ACT8846, REG9, VSET),
+	ACT88xx_REG("REG10", ACT8846, REG10, VSET),
+	ACT88xx_REG("REG11", ACT8846, REG11, VSET),
+	ACT88xx_REG("REG12", ACT8846, REG12, VSET),
+};
+
 static const struct regulator_desc act8865_regulators[] = {
 	ACT88xx_REG("DCDC_REG1", ACT8865, DCDC1, VSET1),
 	ACT88xx_REG("DCDC_REG2", ACT8865, DCDC2, VSET1),
@@ -115,11 +164,27 @@ static const struct regulator_desc act8865_regulators[] = {
 
 #ifdef CONFIG_OF
 static const struct of_device_id act8865_dt_ids[] = {
+	{ .compatible = "active-semi,act8846", .data = (void *)ACT8846 },
 	{ .compatible = "active-semi,act8865", .data = (void *)ACT8865 },
 	{ }
 };
 MODULE_DEVICE_TABLE(of, act8865_dt_ids);
 
+static struct of_regulator_match act8846_matches[] = {
+	[ACT8846_ID_REG1]	= { .name = "REG1" },
+	[ACT8846_ID_REG2]	= { .name = "REG2" },
+	[ACT8846_ID_REG3]	= { .name = "REG3" },
+	[ACT8846_ID_REG4]	= { .name = "REG4" },
+	[ACT8846_ID_REG5]	= { .name = "REG5" },
+	[ACT8846_ID_REG6]	= { .name = "REG6" },
+	[ACT8846_ID_REG7]	= { .name = "REG7" },
+	[ACT8846_ID_REG8]	= { .name = "REG8" },
+	[ACT8846_ID_REG9]	= { .name = "REG9" },
+	[ACT8846_ID_REG10]	= { .name = "REG10" },
+	[ACT8846_ID_REG11]	= { .name = "REG11" },
+	[ACT8846_ID_REG12]	= { .name = "REG12" },
+};
+
 static struct of_regulator_match act8865_matches[] = {
 	[ACT8865_ID_DCDC1]	= { .name = "DCDC_REG1"},
 	[ACT8865_ID_DCDC2]	= { .name = "DCDC_REG2"},
@@ -222,6 +287,11 @@ static int act8865_pmic_probe(struct i2c_client *client,
 	}
 
 	switch (type) {
+	case ACT8846:
+		matches = act8846_matches;
+		regulators = act8846_regulators;
+		num_regulators = ARRAY_SIZE(act8846_regulators);
+		break;
 	case ACT8865:
 		matches = act8865_matches;
 		regulators = act8865_regulators;
@@ -290,6 +360,7 @@ static int act8865_pmic_probe(struct i2c_client *client,
 }
 
 static const struct i2c_device_id act8865_ids[] = {
+	{ .name = "act8846", .driver_data = ACT8846 },
 	{ .name = "act8865", .driver_data = ACT8865 },
 	{ },
 };
diff --git a/include/linux/regulator/act8865.h b/include/linux/regulator/act8865.h
index b49be816fc54..b6c4909b33af 100644
--- a/include/linux/regulator/act8865.h
+++ b/include/linux/regulator/act8865.h
@@ -29,8 +29,25 @@ enum {
 	ACT8865_REG_NUM,
 };
 
+enum {
+	ACT8846_ID_REG1,
+	ACT8846_ID_REG2,
+	ACT8846_ID_REG3,
+	ACT8846_ID_REG4,
+	ACT8846_ID_REG5,
+	ACT8846_ID_REG6,
+	ACT8846_ID_REG7,
+	ACT8846_ID_REG8,
+	ACT8846_ID_REG9,
+	ACT8846_ID_REG10,
+	ACT8846_ID_REG11,
+	ACT8846_ID_REG12,
+	ACT8846_REG_NUM,
+};
+
 enum {
 	ACT8865,
+	ACT8846,
 };
 
 /**
-- 
cgit v1.2.3-59-g8ed1b


From c6e9d6f38894798696f23c8084ca7edbf16ee895 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Thu, 17 Jul 2014 04:13:05 -0400
Subject: random: introduce getrandom(2) system call

The getrandom(2) system call was requested by the LibreSSL Portable
developers.  It is analoguous to the getentropy(2) system call in
OpenBSD.

The rationale of this system call is to provide resiliance against
file descriptor exhaustion attacks, where the attacker consumes all
available file descriptors, forcing the use of the fallback code where
/dev/[u]random is not available.  Since the fallback code is often not
well-tested, it is better to eliminate this potential failure mode
entirely.

The other feature provided by this new system call is the ability to
request randomness from the /dev/urandom entropy pool, but to block
until at least 128 bits of entropy has been accumulated in the
/dev/urandom entropy pool.  Historically, the emphasis in the
/dev/urandom development has been to ensure that urandom pool is
initialized as quickly as possible after system boot, and preferably
before the init scripts start execution.

This is because changing /dev/urandom reads to block represents an
interface change that could potentially break userspace which is not
acceptable.  In practice, on most x86 desktop and server systems, in
general the entropy pool can be initialized before it is needed (and
in modern kernels, we will printk a warning message if not).  However,
on an embedded system, this may not be the case.  And so with this new
interface, we can provide the functionality of blocking until the
urandom pool has been initialized.  Any userspace program which uses
this new functionality must take care to assure that if it is used
during the boot process, that it will not cause the init scripts or
other portions of the system startup to hang indefinitely.

SYNOPSIS
	#include <linux/random.h>

	int getrandom(void *buf, size_t buflen, unsigned int flags);

DESCRIPTION
	The system call getrandom() fills the buffer pointed to by buf
	with up to buflen random bytes which can be used to seed user
	space random number generators (i.e., DRBG's) or for other
	cryptographic uses.  It should not be used for Monte Carlo
	simulations or other programs/algorithms which are doing
	probabilistic sampling.

	If the GRND_RANDOM flags bit is set, then draw from the
	/dev/random pool instead of the /dev/urandom pool.  The
	/dev/random pool is limited based on the entropy that can be
	obtained from environmental noise, so if there is insufficient
	entropy, the requested number of bytes may not be returned.
	If there is no entropy available at all, getrandom(2) will
	either block, or return an error with errno set to EAGAIN if
	the GRND_NONBLOCK bit is set in flags.

	If the GRND_RANDOM bit is not set, then the /dev/urandom pool
	will be used.  Unlike using read(2) to fetch data from
	/dev/urandom, if the urandom pool has not been sufficiently
	initialized, getrandom(2) will block (or return -1 with the
	errno set to EAGAIN if the GRND_NONBLOCK bit is set in flags).

	The getentropy(2) system call in OpenBSD can be emulated using
	the following function:

            int getentropy(void *buf, size_t buflen)
            {
                    int     ret;

                    if (buflen > 256)
                            goto failure;
                    ret = getrandom(buf, buflen, 0);
                    if (ret < 0)
                            return ret;
                    if (ret == buflen)
                            return 0;
            failure:
                    errno = EIO;
                    return -1;
            }

RETURN VALUE
       On success, the number of bytes that was filled in the buf is
       returned.  This may not be all the bytes requested by the
       caller via buflen if insufficient entropy was present in the
       /dev/random pool, or if the system call was interrupted by a
       signal.

       On error, -1 is returned, and errno is set appropriately.

ERRORS
	EINVAL		An invalid flag was passed to getrandom(2)

	EFAULT		buf is outside the accessible address space.

	EAGAIN		The requested entropy was not available, and
			getentropy(2) would have blocked if the
			GRND_NONBLOCK flag was not set.

	EINTR		While blocked waiting for entropy, the call was
			interrupted by a signal handler; see the description
			of how interrupted read(2) calls on "slow" devices
			are handled with and without the SA_RESTART flag
			in the signal(7) man page.

NOTES
	For small requests (buflen <= 256) getrandom(2) will not
	return EINTR when reading from the urandom pool once the
	entropy pool has been initialized, and it will return all of
	the bytes that have been requested.  This is the recommended
	way to use getrandom(2), and is designed for compatibility
	with OpenBSD's getentropy() system call.

	However, if you are using GRND_RANDOM, then getrandom(2) may
	block until the entropy accounting determines that sufficient
	environmental noise has been gathered such that getrandom(2)
	will be operating as a NRBG instead of a DRBG for those people
	who are working in the NIST SP 800-90 regime.  Since it may
	block for a long time, these guarantees do *not* apply.  The
	user may want to interrupt a hanging process using a signal,
	so blocking until all of the requested bytes are returned
	would be unfriendly.

	For this reason, the user of getrandom(2) MUST always check
	the return value, in case it returns some error, or if fewer
	bytes than requested was returned.  In the case of
	!GRND_RANDOM and small request, the latter should never
	happen, but the careful userspace code (and all crypto code
	should be careful) should check for this anyway!

	Finally, unless you are doing long-term key generation (and
	perhaps not even then), you probably shouldn't be using
	GRND_RANDOM.  The cryptographic algorithms used for
	/dev/urandom are quite conservative, and so should be
	sufficient for all purposes.  The disadvantage of GRND_RANDOM
	is that it can block, and the increased complexity required to
	deal with partially fulfilled getrandom(2) requests.

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Zach Brown <zab@zabbo.net>
---
 arch/x86/syscalls/syscall_32.tbl  |  1 +
 arch/x86/syscalls/syscall_64.tbl  |  1 +
 drivers/char/random.c             | 40 ++++++++++++++++++++++++++++++++++++---
 include/linux/syscalls.h          |  3 +++
 include/uapi/asm-generic/unistd.h |  4 +++-
 include/uapi/linux/random.h       |  9 +++++++++
 6 files changed, 54 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index d6b867921612..5b46a618aeb1 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -360,3 +360,4 @@
 351	i386	sched_setattr		sys_sched_setattr
 352	i386	sched_getattr		sys_sched_getattr
 353	i386	renameat2		sys_renameat2
+355	i386	getrandom		sys_getrandom
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index ec255a1646d2..0dc4bf891460 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -323,6 +323,7 @@
 314	common	sched_setattr		sys_sched_setattr
 315	common	sched_getattr		sys_sched_getattr
 316	common	renameat2		sys_renameat2
+318	common	getrandom		sys_getrandom
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/drivers/char/random.c b/drivers/char/random.c
index aa22fe551c2a..7d1682ea1e86 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -258,6 +258,8 @@
 #include <linux/kmemcheck.h>
 #include <linux/workqueue.h>
 #include <linux/irq.h>
+#include <linux/syscalls.h>
+#include <linux/completion.h>
 
 #include <asm/processor.h>
 #include <asm/uaccess.h>
@@ -404,6 +406,7 @@ static struct poolinfo {
  */
 static DECLARE_WAIT_QUEUE_HEAD(random_read_wait);
 static DECLARE_WAIT_QUEUE_HEAD(random_write_wait);
+static DECLARE_WAIT_QUEUE_HEAD(urandom_init_wait);
 static struct fasync_struct *fasync;
 
 /**********************************************************************
@@ -657,6 +660,7 @@ retry:
 		r->entropy_total = 0;
 		if (r == &nonblocking_pool) {
 			prandom_reseed_late();
+			wake_up_interruptible(&urandom_init_wait);
 			pr_notice("random: %s pool is initialized\n", r->name);
 		}
 	}
@@ -1174,13 +1178,14 @@ static ssize_t extract_entropy_user(struct entropy_store *r, void __user *buf,
 {
 	ssize_t ret = 0, i;
 	__u8 tmp[EXTRACT_SIZE];
+	int large_request = (nbytes > 256);
 
 	trace_extract_entropy_user(r->name, nbytes, ENTROPY_BITS(r), _RET_IP_);
 	xfer_secondary_pool(r, nbytes);
 	nbytes = account(r, nbytes, 0, 0);
 
 	while (nbytes) {
-		if (need_resched()) {
+		if (large_request && need_resched()) {
 			if (signal_pending(current)) {
 				if (ret == 0)
 					ret = -ERESTARTSYS;
@@ -1355,7 +1360,7 @@ static int arch_random_refill(void)
 }
 
 static ssize_t
-random_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
+_random_read(int nonblock, char __user *buf, size_t nbytes)
 {
 	ssize_t n;
 
@@ -1379,7 +1384,7 @@ random_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
 		if (arch_random_refill())
 			continue;
 
-		if (file->f_flags & O_NONBLOCK)
+		if (nonblock)
 			return -EAGAIN;
 
 		wait_event_interruptible(random_read_wait,
@@ -1390,6 +1395,12 @@ random_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
 	}
 }
 
+static ssize_t
+random_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
+{
+	return _random_read(file->f_flags & O_NONBLOCK, buf, nbytes);
+}
+
 static ssize_t
 urandom_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
 {
@@ -1533,6 +1544,29 @@ const struct file_operations urandom_fops = {
 	.llseek = noop_llseek,
 };
 
+SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count,
+		unsigned int, flags)
+{
+	if (flags & ~(GRND_NONBLOCK|GRND_RANDOM))
+		return -EINVAL;
+
+	if (count > INT_MAX)
+		count = INT_MAX;
+
+	if (flags & GRND_RANDOM)
+		return _random_read(flags & GRND_NONBLOCK, buf, count);
+
+	if (unlikely(nonblocking_pool.initialized == 0)) {
+		if (flags & GRND_NONBLOCK)
+			return -EAGAIN;
+		wait_event_interruptible(urandom_init_wait,
+					 nonblocking_pool.initialized);
+		if (signal_pending(current))
+			return -ERESTARTSYS;
+	}
+	return urandom_read(NULL, buf, count, NULL);
+}
+
 /***************************************************************
  * Random UUID interface
  *
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index b0881a0ed322..43324a897cf2 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -866,4 +866,7 @@ asmlinkage long sys_process_vm_writev(pid_t pid,
 asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type,
 			 unsigned long idx1, unsigned long idx2);
 asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags);
+asmlinkage long sys_getrandom(char __user *buf, size_t count,
+			      unsigned int flags);
+
 #endif
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 333640608087..1d104a2ca643 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -699,9 +699,11 @@ __SYSCALL(__NR_sched_setattr, sys_sched_setattr)
 __SYSCALL(__NR_sched_getattr, sys_sched_getattr)
 #define __NR_renameat2 276
 __SYSCALL(__NR_renameat2, sys_renameat2)
+#define __NR_getrandom 278
+__SYSCALL(__NR_getrandom, sys_getrandom)
 
 #undef __NR_syscalls
-#define __NR_syscalls 277
+#define __NR_syscalls 279
 
 /*
  * All syscalls below here should go away really,
diff --git a/include/uapi/linux/random.h b/include/uapi/linux/random.h
index fff3528a078f..3f93d1695e7f 100644
--- a/include/uapi/linux/random.h
+++ b/include/uapi/linux/random.h
@@ -40,4 +40,13 @@ struct rand_pool_info {
 	__u32	buf[0];
 };
 
+/*
+ * Flags for getrandom(2)
+ *
+ * GRND_NONBLOCK	Don't block and return EAGAIN instead
+ * GRND_RANDOM		Use the /dev/random pool instead of /dev/urandom
+ */
+#define GRND_NONBLOCK	0x0001
+#define GRND_RANDOM	0x0002
+
 #endif /* _UAPI_LINUX_RANDOM_H */
-- 
cgit v1.2.3-59-g8ed1b


From f24b9be5957b38bb420b838115040dc2031b7d0c Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Mon, 4 Aug 2014 22:11:45 -0400
Subject: net-timestamp: extend SCM_TIMESTAMPING ancillary data struct

Applications that request kernel tx timestamps with SO_TIMESTAMPING
read timestamps as recvmsg() ancillary data. The response is defined
implicitly as timespec[3].

1) define struct scm_timestamping explicitly and

2) add support for new tstamp types. On tx, scm_timestamping always
   accompanies a sock_extended_err. Define previously unused field
   ee_info to signal the type of ts[0]. Introduce SCM_TSTAMP_SND to
   define the existing behavior.

The reception path is not modified. On rx, no struct similar to
sock_extended_err is passed along with SCM_TIMESTAMPING.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h        |  3 +++
 include/net/sock.h            |  4 +++-
 include/uapi/linux/errqueue.h | 18 ++++++++++++++++++
 net/core/skbuff.c             |  1 +
 net/socket.c                  | 20 +++++++++++---------
 5 files changed, 36 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 281deced7469..477f0f60db45 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -249,6 +249,9 @@ enum {
 	SKBTX_SHARED_FRAG = 1 << 5,
 };
 
+#define SKBTX_ANY_SW_TSTAMP	SKBTX_SW_TSTAMP
+#define SKBTX_ANY_TSTAMP	(SKBTX_HW_TSTAMP | SKBTX_ANY_SW_TSTAMP)
+
 /*
  * The callback notifies userspace to release buffers when skb DMA is done in
  * lower device, the skb last reference should be 0 when calling this.
diff --git a/include/net/sock.h b/include/net/sock.h
index b91c8868ab8d..02f5b35e65f1 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2169,7 +2169,9 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
 	 */
 	if (sock_flag(sk, SOCK_RCVTSTAMP) ||
 	    sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE) ||
-	    (kt.tv64 && sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) ||
+	    (kt.tv64 &&
+	     (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE) ||
+	      skb_shinfo(skb)->tx_flags & SKBTX_ANY_SW_TSTAMP)) ||
 	    (hwtstamps->hwtstamp.tv64 &&
 	     sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE)))
 		__sock_recv_timestamp(msg, sk, skb);
diff --git a/include/uapi/linux/errqueue.h b/include/uapi/linux/errqueue.h
index aacd4fb7102a..accee72cae7c 100644
--- a/include/uapi/linux/errqueue.h
+++ b/include/uapi/linux/errqueue.h
@@ -22,5 +22,23 @@ struct sock_extended_err {
 
 #define SO_EE_OFFENDER(ee)	((struct sockaddr*)((ee)+1))
 
+/**
+ *	struct scm_timestamping - timestamps exposed through cmsg
+ *
+ *	The timestamping interfaces SO_TIMESTAMPING, MSG_TSTAMP_*
+ *	communicate network timestamps by passing this struct in a cmsg with
+ *	recvmsg(). See Documentation/networking/timestamping.txt for details.
+ */
+struct scm_timestamping {
+	struct timespec ts[3];
+};
+
+/* The type of scm_timestamping, passed in sock_extended_err ee_info.
+ * This defines the type of ts[0]. For SCM_TSTAMP_SND only, if ts[0]
+ * is zero, then this is a hardware timestamp and recorded in ts[2].
+ */
+enum {
+	SCM_TSTAMP_SND,		/* driver passed skb to NIC, or HW */
+};
 
 #endif /* _UAPI_LINUX_ERRQUEUE_H */
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c1a33033cbe2..c9f68802e992 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3521,6 +3521,7 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
 	memset(serr, 0, sizeof(*serr));
 	serr->ee.ee_errno = ENOMSG;
 	serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
+	serr->ee.ee_info = SCM_TSTAMP_SND;
 
 	err = sock_queue_err_skb(sk, skb);
 
diff --git a/net/socket.c b/net/socket.c
index d8222c025061..dc0cc5d95ee5 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -106,6 +106,7 @@
 #include <linux/sockios.h>
 #include <linux/atalk.h>
 #include <net/busy_poll.h>
+#include <linux/errqueue.h>
 
 #ifdef CONFIG_NET_RX_BUSY_POLL
 unsigned int sysctl_net_busy_read __read_mostly;
@@ -697,7 +698,7 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 	struct sk_buff *skb)
 {
 	int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
-	struct timespec ts[3];
+	struct scm_timestamping tss;
 	int empty = 1;
 	struct skb_shared_hwtstamps *shhwtstamps =
 		skb_hwtstamps(skb);
@@ -714,24 +715,25 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 			put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
 				 sizeof(tv), &tv);
 		} else {
-			skb_get_timestampns(skb, &ts[0]);
+			struct timespec ts;
+			skb_get_timestampns(skb, &ts);
 			put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
-				 sizeof(ts[0]), &ts[0]);
+				 sizeof(ts), &ts);
 		}
 	}
 
-
-	memset(ts, 0, sizeof(ts));
-	if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE) &&
-	    ktime_to_timespec_cond(skb->tstamp, ts + 0))
+	memset(&tss, 0, sizeof(tss));
+	if ((sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE) ||
+	     skb_shinfo(skb)->tx_flags & SKBTX_ANY_SW_TSTAMP) &&
+	    ktime_to_timespec_cond(skb->tstamp, tss.ts + 0))
 		empty = 0;
 	if (shhwtstamps &&
 	    sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) &&
-	    ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts + 2))
+	    ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2))
 		empty = 0;
 	if (!empty)
 		put_cmsg(msg, SOL_SOCKET,
-			 SCM_TIMESTAMPING, sizeof(ts), &ts);
+			 SCM_TIMESTAMPING, sizeof(tss), &tss);
 }
 EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
 
-- 
cgit v1.2.3-59-g8ed1b


From 09c2d251b70723650ba47e83571ff49281320f7c Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Mon, 4 Aug 2014 22:11:47 -0400
Subject: net-timestamp: add key to disambiguate concurrent datagrams

Datagrams timestamped on transmission can coexist in the kernel stack
and be reordered in packet scheduling. When reading looped datagrams
from the socket error queue it is not always possible to unique
correlate looped data with original send() call (for application
level retransmits). Even if possible, it may be expensive and complex,
requiring packet inspection.

Introduce a data-independent ID mechanism to associate timestamps with
send calls. Pass an ID alongside the timestamp in field ee_data of
sock_extended_err.

The ID is a simple 32 bit unsigned int that is associated with the
socket and incremented on each send() call for which software tx
timestamp generation is enabled.

The feature is enabled only if SOF_TIMESTAMPING_OPT_ID is set, to
avoid changing ee_data for existing applications that expect it 0.
The counter is reset each time the flag is reenabled. Reenabling
does not change the ID of already submitted data. It is possible
to receive out of order IDs if the timestamp stream is not quiesced
first.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h          | 1 +
 include/net/sock.h              | 2 ++
 include/uapi/linux/net_tstamp.h | 8 +++++---
 net/core/skbuff.c               | 2 ++
 net/core/sock.c                 | 3 +++
 net/ipv4/ip_output.c            | 6 ++++++
 net/ipv6/ip6_output.c           | 9 ++++++++-
 7 files changed, 27 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 477f0f60db45..0e35b3af7317 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -278,6 +278,7 @@ struct skb_shared_info {
 	unsigned short  gso_type;
 	struct sk_buff	*frag_list;
 	struct skb_shared_hwtstamps hwtstamps;
+	u32		tskey;
 	__be32          ip6_frag_id;
 
 	/*
diff --git a/include/net/sock.h b/include/net/sock.h
index a21129716aae..52fe0bc5598a 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -280,6 +280,7 @@ struct cg_proto;
   *	@sk_timer: sock cleanup timer
   *	@sk_stamp: time stamp of last packet received
   *	@sk_tsflags: SO_TIMESTAMPING socket options
+  *	@sk_tskey: counter to disambiguate concurrent tstamp requests
   *	@sk_socket: Identd and reporting IO signals
   *	@sk_user_data: RPC layer private data
   *	@sk_frag: cached page frag
@@ -414,6 +415,7 @@ struct sock {
 	struct timer_list	sk_timer;
 	ktime_t			sk_stamp;
 	u16			sk_tsflags;
+	u32			sk_tskey;
 	struct socket		*sk_socket;
 	void			*sk_user_data;
 	struct page_frag	sk_frag;
diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index f53879c0f590..1e861d2e1a31 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -20,9 +20,11 @@ enum {
 	SOF_TIMESTAMPING_SOFTWARE = (1<<4),
 	SOF_TIMESTAMPING_SYS_HARDWARE = (1<<5),
 	SOF_TIMESTAMPING_RAW_HARDWARE = (1<<6),
-	SOF_TIMESTAMPING_MASK =
-	(SOF_TIMESTAMPING_RAW_HARDWARE - 1) |
-	SOF_TIMESTAMPING_RAW_HARDWARE
+	SOF_TIMESTAMPING_OPT_ID = (1<<7),
+
+	SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_ID,
+	SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) |
+				 SOF_TIMESTAMPING_LAST
 };
 
 /**
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c9f68802e992..0df4f1d14c5a 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3522,6 +3522,8 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
 	serr->ee.ee_errno = ENOMSG;
 	serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
 	serr->ee.ee_info = SCM_TSTAMP_SND;
+	if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
+		serr->ee.ee_data = skb_shinfo(skb)->tskey;
 
 	err = sock_queue_err_skb(sk, skb);
 
diff --git a/net/core/sock.c b/net/core/sock.c
index 47c9377e14b9..1e0f1c63ad6b 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -848,6 +848,9 @@ set_rcvbuf:
 			ret = -EINVAL;
 			break;
 		}
+		if (val & SOF_TIMESTAMPING_OPT_ID &&
+		    !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID))
+			sk->sk_tskey = 0;
 		sk->sk_tsflags = val;
 		if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
 			sock_enable_timestamp(sk,
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index b16556836d66..215af2b155cb 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -855,11 +855,15 @@ static int __ip_append_data(struct sock *sk,
 	unsigned int maxfraglen, fragheaderlen, maxnonfragsize;
 	int csummode = CHECKSUM_NONE;
 	struct rtable *rt = (struct rtable *)cork->dst;
+	u32 tskey = 0;
 
 	skb = skb_peek_tail(queue);
 
 	exthdrlen = !skb ? rt->dst.header_len : 0;
 	mtu = cork->fragsize;
+	if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
+	    sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
+		tskey = sk->sk_tskey++;
 
 	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
 
@@ -976,6 +980,8 @@ alloc_new_skb:
 			/* only the initial fragment is time stamped */
 			skb_shinfo(skb)->tx_flags = cork->tx_flags;
 			cork->tx_flags = 0;
+			skb_shinfo(skb)->tskey = tskey;
+			tskey = 0;
 
 			/*
 			 *	Find where to start putting bytes.
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index f5dafe609f8b..315a55d66079 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1157,6 +1157,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 	int err;
 	int offset = 0;
 	__u8 tx_flags = 0;
+	u32 tskey = 0;
 
 	if (flags&MSG_PROBE)
 		return 0;
@@ -1272,8 +1273,12 @@ emsgsize:
 		}
 	}
 
-	if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW)
+	if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
 		sock_tx_timestamp(sk, &tx_flags);
+		if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
+		    sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
+			tskey = sk->sk_tskey++;
+	}
 
 	/*
 	 * Let's try using as much space as possible.
@@ -1397,6 +1402,8 @@ alloc_new_skb:
 			/* Only the initial fragment is time stamped */
 			skb_shinfo(skb)->tx_flags = tx_flags;
 			tx_flags = 0;
+			skb_shinfo(skb)->tskey = tskey;
+			tskey = 0;
 
 			/*
 			 *	Find where to start putting bytes
-- 
cgit v1.2.3-59-g8ed1b


From e7fd2885385157d46c85f282fc6d7d297db43e1f Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Mon, 4 Aug 2014 22:11:48 -0400
Subject: net-timestamp: SCHED timestamp on entering packet scheduler

Kernel transmit latency is often incurred in the packet scheduler.
Introduce a new timestamp on transmission just before entering the
scheduler. When data travels through multiple devices (bonding,
tunneling, ...) each device will export an individual timestamp.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h          | 11 +++++++++--
 include/uapi/linux/errqueue.h   |  1 +
 include/uapi/linux/net_tstamp.h |  3 ++-
 net/core/dev.c                  |  4 ++++
 net/core/skbuff.c               | 16 ++++++++++++----
 net/socket.c                    |  3 +++
 6 files changed, 31 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 0e35b3af7317..50e1e9b3a5a5 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -229,7 +229,7 @@ enum {
 	/* generate hardware time stamp */
 	SKBTX_HW_TSTAMP = 1 << 0,
 
-	/* generate software time stamp */
+	/* generate software time stamp when queueing packet to NIC */
 	SKBTX_SW_TSTAMP = 1 << 1,
 
 	/* device driver is going to provide hardware time stamp */
@@ -247,9 +247,12 @@ enum {
 	 * all frags to avoid possible bad checksum
 	 */
 	SKBTX_SHARED_FRAG = 1 << 5,
+
+	/* generate software time stamp when entering packet scheduling */
+	SKBTX_SCHED_TSTAMP = 1 << 6,
 };
 
-#define SKBTX_ANY_SW_TSTAMP	SKBTX_SW_TSTAMP
+#define SKBTX_ANY_SW_TSTAMP	(SKBTX_SW_TSTAMP | SKBTX_SCHED_TSTAMP)
 #define SKBTX_ANY_TSTAMP	(SKBTX_HW_TSTAMP | SKBTX_ANY_SW_TSTAMP)
 
 /*
@@ -2695,6 +2698,10 @@ static inline bool skb_defer_rx_timestamp(struct sk_buff *skb)
 void skb_complete_tx_timestamp(struct sk_buff *skb,
 			       struct skb_shared_hwtstamps *hwtstamps);
 
+void __skb_tstamp_tx(struct sk_buff *orig_skb,
+		     struct skb_shared_hwtstamps *hwtstamps,
+		     struct sock *sk, int tstype);
+
 /**
  * skb_tstamp_tx - queue clone of skb with send time stamps
  * @orig_skb:	the original outgoing packet
diff --git a/include/uapi/linux/errqueue.h b/include/uapi/linux/errqueue.h
index accee72cae7c..17437cf297b7 100644
--- a/include/uapi/linux/errqueue.h
+++ b/include/uapi/linux/errqueue.h
@@ -39,6 +39,7 @@ struct scm_timestamping {
  */
 enum {
 	SCM_TSTAMP_SND,		/* driver passed skb to NIC, or HW */
+	SCM_TSTAMP_SCHED,	/* data entered the packet scheduler */
 };
 
 #endif /* _UAPI_LINUX_ERRQUEUE_H */
diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index 1e861d2e1a31..60733845fcdd 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -21,8 +21,9 @@ enum {
 	SOF_TIMESTAMPING_SYS_HARDWARE = (1<<5),
 	SOF_TIMESTAMPING_RAW_HARDWARE = (1<<6),
 	SOF_TIMESTAMPING_OPT_ID = (1<<7),
+	SOF_TIMESTAMPING_TX_SCHED = (1<<8),
 
-	SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_ID,
+	SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_TX_SCHED,
 	SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) |
 				 SOF_TIMESTAMPING_LAST
 };
diff --git a/net/core/dev.c b/net/core/dev.c
index b370230fe1d3..1c15b189c52b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -132,6 +132,7 @@
 #include <linux/hashtable.h>
 #include <linux/vmalloc.h>
 #include <linux/if_macvlan.h>
+#include <linux/errqueue.h>
 
 #include "net-sysfs.h"
 
@@ -2876,6 +2877,9 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
 
 	skb_reset_mac_header(skb);
 
+	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP))
+		__skb_tstamp_tx(skb, NULL, skb->sk, SCM_TSTAMP_SCHED);
+
 	/* Disable soft irqs for various locks below. Also
 	 * stops preemption for RCU.
 	 */
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 0df4f1d14c5a..9705c0732aab 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3490,10 +3490,10 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(sock_queue_err_skb);
 
-void skb_tstamp_tx(struct sk_buff *orig_skb,
-		struct skb_shared_hwtstamps *hwtstamps)
+void __skb_tstamp_tx(struct sk_buff *orig_skb,
+		     struct skb_shared_hwtstamps *hwtstamps,
+		     struct sock *sk, int tstype)
 {
-	struct sock *sk = orig_skb->sk;
 	struct sock_exterr_skb *serr;
 	struct sk_buff *skb;
 	int err;
@@ -3521,7 +3521,7 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
 	memset(serr, 0, sizeof(*serr));
 	serr->ee.ee_errno = ENOMSG;
 	serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
-	serr->ee.ee_info = SCM_TSTAMP_SND;
+	serr->ee.ee_info = tstype;
 	if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
 		serr->ee.ee_data = skb_shinfo(skb)->tskey;
 
@@ -3530,6 +3530,14 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
 	if (err)
 		kfree_skb(skb);
 }
+EXPORT_SYMBOL_GPL(__skb_tstamp_tx);
+
+void skb_tstamp_tx(struct sk_buff *orig_skb,
+		   struct skb_shared_hwtstamps *hwtstamps)
+{
+	return __skb_tstamp_tx(orig_skb, hwtstamps, orig_skb->sk,
+			       SCM_TSTAMP_SND);
+}
 EXPORT_SYMBOL_GPL(skb_tstamp_tx);
 
 void skb_complete_wifi_ack(struct sk_buff *skb, bool acked)
diff --git a/net/socket.c b/net/socket.c
index 255d9b802723..3a2778d71631 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -617,6 +617,9 @@ void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
 		*tx_flags |= SKBTX_HW_TSTAMP;
 	if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
 		*tx_flags |= SKBTX_SW_TSTAMP;
+	if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED)
+		*tx_flags |= SKBTX_SCHED_TSTAMP;
+
 	if (sock_flag(sk, SOCK_WIFI_STATUS))
 		*tx_flags |= SKBTX_WIFI_STATUS;
 }
-- 
cgit v1.2.3-59-g8ed1b


From e1c8a607b28190cd09a271508aa3025d3c2f312e Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Mon, 4 Aug 2014 22:11:50 -0400
Subject: net-timestamp: ACK timestamp for bytestreams

Add SOF_TIMESTAMPING_TX_ACK, a request for a tstamp when the last byte
in the send() call is acknowledged. It implements the feature for TCP.

The timestamp is generated when the TCP socket cumulative ACK is moved
beyond the tracked seqno for the first time. The feature ignores SACK
and FACK, because those acknowledge the specific byte, but not
necessarily the entire contents of the buffer up to that byte.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h          | 7 ++++++-
 include/uapi/linux/errqueue.h   | 1 +
 include/uapi/linux/net_tstamp.h | 3 ++-
 net/ipv4/tcp_input.c            | 6 ++++++
 net/socket.c                    | 2 ++
 5 files changed, 17 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 50e1e9b3a5a5..11c270551d25 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -250,9 +250,14 @@ enum {
 
 	/* generate software time stamp when entering packet scheduling */
 	SKBTX_SCHED_TSTAMP = 1 << 6,
+
+	/* generate software timestamp on peer data acknowledgment */
+	SKBTX_ACK_TSTAMP = 1 << 7,
 };
 
-#define SKBTX_ANY_SW_TSTAMP	(SKBTX_SW_TSTAMP | SKBTX_SCHED_TSTAMP)
+#define SKBTX_ANY_SW_TSTAMP	(SKBTX_SW_TSTAMP    | \
+				 SKBTX_SCHED_TSTAMP | \
+				 SKBTX_ACK_TSTAMP)
 #define SKBTX_ANY_TSTAMP	(SKBTX_HW_TSTAMP | SKBTX_ANY_SW_TSTAMP)
 
 /*
diff --git a/include/uapi/linux/errqueue.h b/include/uapi/linux/errqueue.h
index 17437cf297b7..07bdce1f444a 100644
--- a/include/uapi/linux/errqueue.h
+++ b/include/uapi/linux/errqueue.h
@@ -40,6 +40,7 @@ struct scm_timestamping {
 enum {
 	SCM_TSTAMP_SND,		/* driver passed skb to NIC, or HW */
 	SCM_TSTAMP_SCHED,	/* data entered the packet scheduler */
+	SCM_TSTAMP_ACK,		/* data acknowledged by peer */
 };
 
 #endif /* _UAPI_LINUX_ERRQUEUE_H */
diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index 60733845fcdd..ff354021bb69 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -22,8 +22,9 @@ enum {
 	SOF_TIMESTAMPING_RAW_HARDWARE = (1<<6),
 	SOF_TIMESTAMPING_OPT_ID = (1<<7),
 	SOF_TIMESTAMPING_TX_SCHED = (1<<8),
+	SOF_TIMESTAMPING_TX_ACK = (1<<9),
 
-	SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_TX_SCHED,
+	SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_TX_ACK,
 	SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) |
 				 SOF_TIMESTAMPING_LAST
 };
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 6a2984507755..a3d47af01906 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -74,6 +74,7 @@
 #include <linux/ipsec.h>
 #include <asm/unaligned.h>
 #include <net/netdma.h>
+#include <linux/errqueue.h>
 
 int sysctl_tcp_timestamps __read_mostly = 1;
 int sysctl_tcp_window_scaling __read_mostly = 1;
@@ -3106,6 +3107,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 			tp->retrans_stamp = 0;
 		}
 
+		if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_ACK_TSTAMP) &&
+		    between(skb_shinfo(skb)->tskey, prior_snd_una,
+			    tp->snd_una + 1))
+			__skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
+
 		if (!fully_acked)
 			break;
 
diff --git a/net/socket.c b/net/socket.c
index 3a2778d71631..ae89569a2db5 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -619,6 +619,8 @@ void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
 		*tx_flags |= SKBTX_SW_TSTAMP;
 	if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED)
 		*tx_flags |= SKBTX_SCHED_TSTAMP;
+	if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)
+		*tx_flags |= SKBTX_ACK_TSTAMP;
 
 	if (sock_flag(sk, SOCK_WIFI_STATUS))
 		*tx_flags |= SKBTX_WIFI_STATUS;
-- 
cgit v1.2.3-59-g8ed1b


From df5601f9c3d831b4c478b004a1ed90a18643adbe Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Mon, 7 Oct 2013 15:37:19 +0200
Subject: tracehook_signal_handler: Remove sig, info, ka and regs

These parameters are nowhere used, so we can remove them.

Signed-off-by: Richard Weinberger <richard@nod.at>
---
 include/linux/tracehook.h | 8 +-------
 kernel/signal.c           | 2 +-
 2 files changed, 2 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 6f8ab7da27c4..84d497297c5f 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -133,10 +133,6 @@ static inline void tracehook_report_syscall_exit(struct pt_regs *regs, int step)
 
 /**
  * tracehook_signal_handler - signal handler setup is complete
- * @sig:		number of signal being delivered
- * @info:		siginfo_t of signal being delivered
- * @ka:			sigaction setting that chose the handler
- * @regs:		user register state
  * @stepping:		nonzero if debugger single-step or block-step in use
  *
  * Called by the arch code after a signal handler has been set up.
@@ -146,9 +142,7 @@ static inline void tracehook_report_syscall_exit(struct pt_regs *regs, int step)
  * Called without locks, shortly before returning to user mode
  * (or handling more signals).
  */
-static inline void tracehook_signal_handler(int sig, siginfo_t *info,
-					    const struct k_sigaction *ka,
-					    struct pt_regs *regs, int stepping)
+static inline void tracehook_signal_handler(int stepping)
 {
 	if (stepping)
 		ptrace_notify(SIGTRAP);
diff --git a/kernel/signal.c b/kernel/signal.c
index a4077e90f19f..c4d47661cc86 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2379,7 +2379,7 @@ void signal_delivered(int sig, siginfo_t *info, struct k_sigaction *ka,
 	if (!(ka->sa.sa_flags & SA_NODEFER))
 		sigaddset(&blocked, sig);
 	set_current_blocked(&blocked);
-	tracehook_signal_handler(sig, info, ka, regs, stepping);
+	tracehook_signal_handler(stepping);
 }
 
 void signal_setup_done(int failed, struct ksignal *ksig, int stepping)
-- 
cgit v1.2.3-59-g8ed1b


From 10b1c7ac8bfed429cf3dcb0225482c8dc1485d8e Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Sun, 13 Jul 2014 13:36:04 +0200
Subject: Clean up signal_delivered()

 - Pass a ksignal struct to it
 - Remove unused regs parameter
 - Make it private as it's nowhere outside of kernel/signal.c is used

Signed-off-by: Richard Weinberger <richard@nod.at>
---
 include/linux/signal.h |  1 -
 kernel/signal.c        | 21 ++++++++-------------
 2 files changed, 8 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index c9e65360c49a..b005cc3dc1dc 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -282,7 +282,6 @@ struct ksignal {
 
 extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct pt_regs *regs, void *cookie);
 extern void signal_setup_done(int failed, struct ksignal *ksig, int stepping);
-extern void signal_delivered(int sig, siginfo_t *info, struct k_sigaction *ka, struct pt_regs *regs, int stepping);
 extern void exit_signals(struct task_struct *tsk);
 extern void kernel_sigaction(int, __sighandler_t);
 
diff --git a/kernel/signal.c b/kernel/signal.c
index c4d47661cc86..0d75cf875d44 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2353,19 +2353,15 @@ relock:
 
 /**
  * signal_delivered - 
- * @sig:		number of signal being delivered
- * @info:		siginfo_t of signal being delivered
- * @ka:			sigaction setting that chose the handler
- * @regs:		user register state
+ * @ksig:		kernel signal struct
  * @stepping:		nonzero if debugger single-step or block-step in use
  *
  * This function should be called when a signal has successfully been
- * delivered. It updates the blocked signals accordingly (@ka->sa.sa_mask
+ * delivered. It updates the blocked signals accordingly (@ksig->ka.sa.sa_mask
  * is always blocked, and the signal itself is blocked unless %SA_NODEFER
- * is set in @ka->sa.sa_flags.  Tracing is notified.
+ * is set in @ksig->ka.sa.sa_flags.  Tracing is notified.
  */
-void signal_delivered(int sig, siginfo_t *info, struct k_sigaction *ka,
-			struct pt_regs *regs, int stepping)
+static void signal_delivered(struct ksignal *ksig, int stepping)
 {
 	sigset_t blocked;
 
@@ -2375,9 +2371,9 @@ void signal_delivered(int sig, siginfo_t *info, struct k_sigaction *ka,
 	   simply clear the restore sigmask flag.  */
 	clear_restore_sigmask();
 
-	sigorsets(&blocked, &current->blocked, &ka->sa.sa_mask);
-	if (!(ka->sa.sa_flags & SA_NODEFER))
-		sigaddset(&blocked, sig);
+	sigorsets(&blocked, &current->blocked, &ksig->ka.sa.sa_mask);
+	if (!(ksig->ka.sa.sa_flags & SA_NODEFER))
+		sigaddset(&blocked, ksig->sig);
 	set_current_blocked(&blocked);
 	tracehook_signal_handler(stepping);
 }
@@ -2387,8 +2383,7 @@ void signal_setup_done(int failed, struct ksignal *ksig, int stepping)
 	if (failed)
 		force_sigsegv(ksig->sig, current);
 	else
-		signal_delivered(ksig->sig, &ksig->info, &ksig->ka,
-			signal_pt_regs(), stepping);
+		signal_delivered(ksig, stepping);
 }
 
 /*
-- 
cgit v1.2.3-59-g8ed1b


From 828b1f65d23cf8a68795739f6dd08fc8abd9ee64 Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Mon, 7 Oct 2013 15:26:57 +0200
Subject: Rip out get_signal_to_deliver()

Now we can turn get_signal() to the main function.

Signed-off-by: Richard Weinberger <richard@nod.at>
---
 include/linux/signal.h | 14 +-------------
 kernel/signal.c        | 23 ++++++++++++-----------
 2 files changed, 13 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index b005cc3dc1dc..750196fcc0a5 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -280,7 +280,7 @@ struct ksignal {
 	int sig;
 };
 
-extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct pt_regs *regs, void *cookie);
+extern int get_signal(struct ksignal *ksig);
 extern void signal_setup_done(int failed, struct ksignal *ksig, int stepping);
 extern void exit_signals(struct task_struct *tsk);
 extern void kernel_sigaction(int, __sighandler_t);
@@ -300,18 +300,6 @@ static inline void disallow_signal(int sig)
 	kernel_sigaction(sig, SIG_IGN);
 }
 
-/*
- * Eventually that'll replace get_signal_to_deliver(); macro for now,
- * to avoid nastiness with include order.
- */
-#define get_signal(ksig)					\
-({								\
-	struct ksignal *p = (ksig);				\
-	p->sig = get_signal_to_deliver(&p->info, &p->ka,	\
-					signal_pt_regs(), NULL);\
-	p->sig > 0;						\
-})
-
 extern struct kmem_cache *sighand_cachep;
 
 int unhandled_signal(struct task_struct *tsk, int sig);
diff --git a/kernel/signal.c b/kernel/signal.c
index 0d75cf875d44..5c6020040388 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2166,8 +2166,7 @@ static int ptrace_signal(int signr, siginfo_t *info)
 	return signr;
 }
 
-int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka,
-			  struct pt_regs *regs, void *cookie)
+int get_signal(struct ksignal *ksig)
 {
 	struct sighand_struct *sighand = current->sighand;
 	struct signal_struct *signal = current->signal;
@@ -2237,13 +2236,13 @@ relock:
 			goto relock;
 		}
 
-		signr = dequeue_signal(current, &current->blocked, info);
+		signr = dequeue_signal(current, &current->blocked, &ksig->info);
 
 		if (!signr)
 			break; /* will return 0 */
 
 		if (unlikely(current->ptrace) && signr != SIGKILL) {
-			signr = ptrace_signal(signr, info);
+			signr = ptrace_signal(signr, &ksig->info);
 			if (!signr)
 				continue;
 		}
@@ -2251,13 +2250,13 @@ relock:
 		ka = &sighand->action[signr-1];
 
 		/* Trace actually delivered signals. */
-		trace_signal_deliver(signr, info, ka);
+		trace_signal_deliver(signr, &ksig->info, ka);
 
 		if (ka->sa.sa_handler == SIG_IGN) /* Do nothing.  */
 			continue;
 		if (ka->sa.sa_handler != SIG_DFL) {
 			/* Run the handler.  */
-			*return_ka = *ka;
+			ksig->ka = *ka;
 
 			if (ka->sa.sa_flags & SA_ONESHOT)
 				ka->sa.sa_handler = SIG_DFL;
@@ -2307,7 +2306,7 @@ relock:
 				spin_lock_irq(&sighand->siglock);
 			}
 
-			if (likely(do_signal_stop(info->si_signo))) {
+			if (likely(do_signal_stop(ksig->info.si_signo))) {
 				/* It released the siglock.  */
 				goto relock;
 			}
@@ -2328,7 +2327,7 @@ relock:
 
 		if (sig_kernel_coredump(signr)) {
 			if (print_fatal_signals)
-				print_fatal_signal(info->si_signo);
+				print_fatal_signal(ksig->info.si_signo);
 			proc_coredump_connector(current);
 			/*
 			 * If it was able to dump core, this kills all
@@ -2338,17 +2337,19 @@ relock:
 			 * first and our do_group_exit call below will use
 			 * that value and ignore the one we pass it.
 			 */
-			do_coredump(info);
+			do_coredump(&ksig->info);
 		}
 
 		/*
 		 * Death signals, no core dump.
 		 */
-		do_group_exit(info->si_signo);
+		do_group_exit(ksig->info.si_signo);
 		/* NOTREACHED */
 	}
 	spin_unlock_irq(&sighand->siglock);
-	return signr;
+
+	ksig->sig = signr;
+	return ksig->sig > 0;
 }
 
 /**
-- 
cgit v1.2.3-59-g8ed1b


From 72f15c03977acc8f06080e6c8a91d93bfc655a65 Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Wed, 5 Mar 2014 15:15:22 +0100
Subject: sas_ss_flags: Remove nested ternary if

...to make it readable.

Signed-off-by: Richard Weinberger <richard@nod.at>
---
 include/linux/sched.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0376b054a0d0..795ea2bc3d4f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2360,8 +2360,10 @@ static inline int on_sig_stack(unsigned long sp)
 
 static inline int sas_ss_flags(unsigned long sp)
 {
-	return (current->sas_ss_size == 0 ? SS_DISABLE
-		: on_sig_stack(sp) ? SS_ONSTACK : 0);
+	if (!current->sas_ss_size)
+		return SS_DISABLE;
+
+	return on_sig_stack(sp) ? SS_ONSTACK : 0;
 }
 
 static inline unsigned long sigsp(unsigned long sp, struct ksignal *ksig)
-- 
cgit v1.2.3-59-g8ed1b


From c77dcacb397519b6ade8f08201a4a90a7f4f751e Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 6 Aug 2014 14:24:45 +0200
Subject: KVM: Move more code under CONFIG_HAVE_KVM_IRQFD

Commits e4d57e1ee1ab (KVM: Move irq notifier implementation into
eventfd.c, 2014-06-30) included the irq notifier code unconditionally
in eventfd.c, while it was under CONFIG_HAVE_KVM_IRQCHIP before.

Similarly, commit 297e21053a52 (KVM: Give IRQFD its own separate enabling
Kconfig option, 2014-06-30) moved code from CONFIG_HAVE_IRQ_ROUTING
to CONFIG_HAVE_KVM_IRQFD but forgot to move the pieces that used to be
under CONFIG_HAVE_KVM_IRQCHIP.

Together, this broke compilation without CONFIG_KVM_XICS.  Fix by adding
or changing the #ifdefs so that they point at CONFIG_HAVE_KVM_IRQFD.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h   |   2 +
 include/trace/events/kvm.h |   8 +--
 virt/kvm/eventfd.c         | 122 ++++++++++++++++++++++-----------------------
 virt/kvm/kvm_main.c        |   2 +
 4 files changed, 69 insertions(+), 65 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8593d2e61cbf..a4c33b34fe3f 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -388,6 +388,8 @@ struct kvm {
 	 */
 	struct kvm_irq_routing_table __rcu *irq_routing;
 	struct hlist_head mask_notifier_list;
+#endif
+#ifdef CONFIG_HAVE_KVM_IRQFD
 	struct hlist_head irq_ack_notifier_list;
 #endif
 
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index 131a0bda7aec..908925ace776 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -37,7 +37,7 @@ TRACE_EVENT(kvm_userspace_exit,
 		  __entry->errno < 0 ? -__entry->errno : __entry->reason)
 );
 
-#if defined(CONFIG_HAVE_KVM_IRQCHIP)
+#if defined(CONFIG_HAVE_KVM_IRQFD)
 TRACE_EVENT(kvm_set_irq,
 	TP_PROTO(unsigned int gsi, int level, int irq_source_id),
 	TP_ARGS(gsi, level, irq_source_id),
@@ -57,7 +57,7 @@ TRACE_EVENT(kvm_set_irq,
 	TP_printk("gsi %u level %d source %d",
 		  __entry->gsi, __entry->level, __entry->irq_source_id)
 );
-#endif
+#endif /* defined(CONFIG_HAVE_KVM_IRQFD) */
 
 #if defined(__KVM_HAVE_IOAPIC)
 #define kvm_deliver_mode		\
@@ -124,7 +124,7 @@ TRACE_EVENT(kvm_msi_set_irq,
 
 #endif /* defined(__KVM_HAVE_IOAPIC) */
 
-#if defined(CONFIG_HAVE_KVM_IRQCHIP)
+#if defined(CONFIG_HAVE_KVM_IRQFD)
 
 TRACE_EVENT(kvm_ack_irq,
 	TP_PROTO(unsigned int irqchip, unsigned int pin),
@@ -149,7 +149,7 @@ TRACE_EVENT(kvm_ack_irq,
 #endif
 );
 
-#endif /* defined(CONFIG_HAVE_KVM_IRQCHIP) */
+#endif /* defined(CONFIG_HAVE_KVM_IRQFD) */
 
 
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index f5f61548f60d..3c5981c87c3f 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -445,6 +445,67 @@ out:
 	kfree(irqfd);
 	return ret;
 }
+
+bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
+{
+	struct kvm_irq_ack_notifier *kian;
+	int gsi, idx;
+
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
+	if (gsi != -1)
+		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
+					 link)
+			if (kian->gsi == gsi) {
+				srcu_read_unlock(&kvm->irq_srcu, idx);
+				return true;
+			}
+
+	srcu_read_unlock(&kvm->irq_srcu, idx);
+
+	return false;
+}
+EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
+
+void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
+{
+	struct kvm_irq_ack_notifier *kian;
+	int gsi, idx;
+
+	trace_kvm_ack_irq(irqchip, pin);
+
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
+	if (gsi != -1)
+		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
+					 link)
+			if (kian->gsi == gsi)
+				kian->irq_acked(kian);
+	srcu_read_unlock(&kvm->irq_srcu, idx);
+}
+
+void kvm_register_irq_ack_notifier(struct kvm *kvm,
+				   struct kvm_irq_ack_notifier *kian)
+{
+	mutex_lock(&kvm->irq_lock);
+	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
+	mutex_unlock(&kvm->irq_lock);
+#ifdef __KVM_HAVE_IOAPIC
+	kvm_vcpu_request_scan_ioapic(kvm);
+#endif
+}
+
+void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
+				    struct kvm_irq_ack_notifier *kian)
+{
+	mutex_lock(&kvm->irq_lock);
+	hlist_del_init_rcu(&kian->link);
+	mutex_unlock(&kvm->irq_lock);
+	synchronize_srcu(&kvm->irq_srcu);
+#ifdef __KVM_HAVE_IOAPIC
+	kvm_vcpu_request_scan_ioapic(kvm);
+#endif
+}
 #endif
 
 void
@@ -867,64 +928,3 @@ kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 
 	return kvm_assign_ioeventfd(kvm, args);
 }
-
-bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
-{
-	struct kvm_irq_ack_notifier *kian;
-	int gsi, idx;
-
-	idx = srcu_read_lock(&kvm->irq_srcu);
-	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
-	if (gsi != -1)
-		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
-					 link)
-			if (kian->gsi == gsi) {
-				srcu_read_unlock(&kvm->irq_srcu, idx);
-				return true;
-			}
-
-	srcu_read_unlock(&kvm->irq_srcu, idx);
-
-	return false;
-}
-EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
-
-void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
-{
-	struct kvm_irq_ack_notifier *kian;
-	int gsi, idx;
-
-	trace_kvm_ack_irq(irqchip, pin);
-
-	idx = srcu_read_lock(&kvm->irq_srcu);
-	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
-	if (gsi != -1)
-		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
-					 link)
-			if (kian->gsi == gsi)
-				kian->irq_acked(kian);
-	srcu_read_unlock(&kvm->irq_srcu, idx);
-}
-
-void kvm_register_irq_ack_notifier(struct kvm *kvm,
-				   struct kvm_irq_ack_notifier *kian)
-{
-	mutex_lock(&kvm->irq_lock);
-	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
-	mutex_unlock(&kvm->irq_lock);
-#ifdef __KVM_HAVE_IOAPIC
-	kvm_vcpu_request_scan_ioapic(kvm);
-#endif
-}
-
-void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
-				    struct kvm_irq_ack_notifier *kian)
-{
-	mutex_lock(&kvm->irq_lock);
-	hlist_del_init_rcu(&kian->link);
-	mutex_unlock(&kvm->irq_lock);
-	synchronize_srcu(&kvm->irq_srcu);
-#ifdef __KVM_HAVE_IOAPIC
-	kvm_vcpu_request_scan_ioapic(kvm);
-#endif
-}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a69a623938b8..33712fb26eb1 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -465,6 +465,8 @@ static struct kvm *kvm_create_vm(unsigned long type)
 
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 	INIT_HLIST_HEAD(&kvm->mask_notifier_list);
+#endif
+#ifdef CONFIG_HAVE_KVM_IRQFD
 	INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
 #endif
 
-- 
cgit v1.2.3-59-g8ed1b


From 372ba8cb46b271a7662b92cbefedee56725f6bd0 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Wed, 6 Aug 2014 14:19:21 +0100
Subject: cpuidle: menu: Lookup CPU runqueues less

The menu governer makes separate lookups of the CPU runqueue to get
load and number of IO waiters but it can be done with a single lookup.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/governors/menu.c | 17 +++++++----------
 include/linux/sched.h            |  3 +--
 kernel/sched/core.c              |  7 +++++++
 kernel/sched/proc.c              |  7 -------
 4 files changed, 15 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index f55d8260ec43..27702742b319 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -134,12 +134,9 @@ struct menu_device {
 #define LOAD_INT(x) ((x) >> FSHIFT)
 #define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
 
-static int get_loadavg(void)
+static inline int get_loadavg(unsigned long load)
 {
-	unsigned long this = this_cpu_load();
-
-
-	return LOAD_INT(this) * 10 + LOAD_FRAC(this) / 10;
+	return LOAD_INT(load) * 10 + LOAD_FRAC(load) / 10;
 }
 
 static inline int which_bucket(unsigned int duration, unsigned long nr_iowaiters)
@@ -175,13 +172,13 @@ static inline int which_bucket(unsigned int duration, unsigned long nr_iowaiters
  * to be, the higher this multiplier, and thus the higher
  * the barrier to go to an expensive C state.
  */
-static inline int performance_multiplier(unsigned long nr_iowaiters)
+static inline int performance_multiplier(unsigned long nr_iowaiters, unsigned long load)
 {
 	int mult = 1;
 
 	/* for higher loadavg, we are more reluctant */
 
-	mult += 2 * get_loadavg();
+	mult += 2 * get_loadavg(load);
 
 	/* for IO wait tasks (per cpu!) we add 5x each */
 	mult += 10 * nr_iowaiters;
@@ -296,7 +293,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
 	int i;
 	unsigned int interactivity_req;
-	unsigned long nr_iowaiters;
+	unsigned long nr_iowaiters, cpu_load;
 
 	if (data->needs_update) {
 		menu_update(drv, dev);
@@ -312,7 +309,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	/* determine the expected residency time, round up */
 	data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length());
 
-	nr_iowaiters = nr_iowait_cpu(smp_processor_id());
+	get_iowait_load(&nr_iowaiters, &cpu_load);
 	data->bucket = which_bucket(data->next_timer_us, nr_iowaiters);
 
 	/*
@@ -331,7 +328,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	 * duration / latency ratio. Adjust the latency limit if
 	 * necessary.
 	 */
-	interactivity_req = data->predicted_us / performance_multiplier(nr_iowaiters);
+	interactivity_req = data->predicted_us / performance_multiplier(nr_iowaiters, cpu_load);
 	if (latency_req > interactivity_req)
 		latency_req = interactivity_req;
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 306f4f0c987a..641bd954bb5d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -168,8 +168,7 @@ extern int nr_processes(void);
 extern unsigned long nr_running(void);
 extern unsigned long nr_iowait(void);
 extern unsigned long nr_iowait_cpu(int cpu);
-extern unsigned long this_cpu_load(void);
-
+extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
 
 extern void calc_global_load(unsigned long ticks);
 extern void update_cpu_load_nohz(void);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3bdf01b494fe..863ef1d19563 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2385,6 +2385,13 @@ unsigned long nr_iowait_cpu(int cpu)
 	return atomic_read(&this->nr_iowait);
 }
 
+void get_iowait_load(unsigned long *nr_waiters, unsigned long *load)
+{
+	struct rq *this = this_rq();
+	*nr_waiters = atomic_read(&this->nr_iowait);
+	*load = this->cpu_load[0];
+}
+
 #ifdef CONFIG_SMP
 
 /*
diff --git a/kernel/sched/proc.c b/kernel/sched/proc.c
index 16f5a30f9c88..8ecd552fe4f2 100644
--- a/kernel/sched/proc.c
+++ b/kernel/sched/proc.c
@@ -8,13 +8,6 @@
 
 #include "sched.h"
 
-unsigned long this_cpu_load(void)
-{
-	struct rq *this = this_rq();
-	return this->cpu_load[0];
-}
-
-
 /*
  * Global load-average calculations
  *
-- 
cgit v1.2.3-59-g8ed1b


From 8ba8fa917093510cdcb4ec8ff8b9603e1b525658 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 6 Aug 2014 16:03:26 -0700
Subject: fsnotify: rename event handling functions

Rename fsnotify_add_notify_event() to fsnotify_add_event() since the
"notify" part is duplicit.  Rename fsnotify_remove_notify_event() and
fsnotify_peek_notify_event() to fsnotify_remove_first_event() and
fsnotify_peek_first_event() respectively since "notify" part is duplicit
and they really look at the first event in the queue.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Jan Kara <jack@suse.cz>
Cc: Eric Paris <eparis@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/notify/fanotify/fanotify.c        |  2 +-
 fs/notify/fanotify/fanotify_user.c   |  2 +-
 fs/notify/inotify/inotify_fsnotify.c |  2 +-
 fs/notify/inotify/inotify_user.c     |  4 ++--
 fs/notify/notification.c             | 19 ++++++++++---------
 include/linux/fsnotify_backend.h     | 12 ++++++------
 6 files changed, 21 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index ee9cb3795c2b..fdeb36b70c65 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -210,7 +210,7 @@ static int fanotify_handle_event(struct fsnotify_group *group,
 		return -ENOMEM;
 
 	fsn_event = &event->fse;
-	ret = fsnotify_add_notify_event(group, fsn_event, fanotify_merge);
+	ret = fsnotify_add_event(group, fsn_event, fanotify_merge);
 	if (ret) {
 		/* Permission events shouldn't be merged */
 		BUG_ON(ret == 1 && mask & FAN_ALL_PERM_EVENTS);
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 3fdc8a3e1134..fbf2210823ab 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -66,7 +66,7 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
 
 	/* held the notification_mutex the whole time, so this is the
 	 * same event we peeked above */
-	return fsnotify_remove_notify_event(group);
+	return fsnotify_remove_first_event(group);
 }
 
 static int create_fd(struct fsnotify_group *group,
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 43ab1e1a07a2..0f88bc0b4e6c 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -108,7 +108,7 @@ int inotify_handle_event(struct fsnotify_group *group,
 	if (len)
 		strcpy(event->name, file_name);
 
-	ret = fsnotify_add_notify_event(group, fsn_event, inotify_merge);
+	ret = fsnotify_add_event(group, fsn_event, inotify_merge);
 	if (ret) {
 		/* Our event wasn't used in the end. Free it. */
 		fsnotify_destroy_event(group, fsn_event);
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index cc423a30a0c8..daf76652fe58 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -149,7 +149,7 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
 	if (fsnotify_notify_queue_is_empty(group))
 		return NULL;
 
-	event = fsnotify_peek_notify_event(group);
+	event = fsnotify_peek_first_event(group);
 
 	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
 
@@ -159,7 +159,7 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
 
 	/* held the notification_mutex the whole time, so this is the
 	 * same event we peeked above */
-	fsnotify_remove_notify_event(group);
+	fsnotify_remove_first_event(group);
 
 	return event;
 }
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index 1e58402171a5..1d394220acbe 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -83,10 +83,10 @@ void fsnotify_destroy_event(struct fsnotify_group *group,
  * added to the queue, 1 if the event was merged with some other queued event,
  * 2 if the queue of events has overflown.
  */
-int fsnotify_add_notify_event(struct fsnotify_group *group,
-			      struct fsnotify_event *event,
-			      int (*merge)(struct list_head *,
-					   struct fsnotify_event *))
+int fsnotify_add_event(struct fsnotify_group *group,
+		       struct fsnotify_event *event,
+		       int (*merge)(struct list_head *,
+				    struct fsnotify_event *))
 {
 	int ret = 0;
 	struct list_head *list = &group->notification_list;
@@ -128,7 +128,7 @@ queue:
  * Remove and return the first event from the notification list.  It is the
  * responsibility of the caller to destroy the obtained event
  */
-struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group)
+struct fsnotify_event *fsnotify_remove_first_event(struct fsnotify_group *group)
 {
 	struct fsnotify_event *event;
 
@@ -140,7 +140,7 @@ struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group
 				 struct fsnotify_event, list);
 	/*
 	 * We need to init list head for the case of overflow event so that
-	 * check in fsnotify_add_notify_events() works
+	 * check in fsnotify_add_event() works
 	 */
 	list_del_init(&event->list);
 	group->q_len--;
@@ -149,9 +149,10 @@ struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group
 }
 
 /*
- * This will not remove the event, that must be done with fsnotify_remove_notify_event()
+ * This will not remove the event, that must be done with
+ * fsnotify_remove_first_event()
  */
-struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group)
+struct fsnotify_event *fsnotify_peek_first_event(struct fsnotify_group *group)
 {
 	BUG_ON(!mutex_is_locked(&group->notification_mutex));
 
@@ -169,7 +170,7 @@ void fsnotify_flush_notify(struct fsnotify_group *group)
 
 	mutex_lock(&group->notification_mutex);
 	while (!fsnotify_notify_queue_is_empty(group)) {
-		event = fsnotify_remove_notify_event(group);
+		event = fsnotify_remove_first_event(group);
 		fsnotify_destroy_event(group, event);
 	}
 	mutex_unlock(&group->notification_mutex);
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index fc7718c6bd3e..a6e899943363 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -322,16 +322,16 @@ extern int fsnotify_fasync(int fd, struct file *file, int on);
 extern void fsnotify_destroy_event(struct fsnotify_group *group,
 				   struct fsnotify_event *event);
 /* attach the event to the group notification queue */
-extern int fsnotify_add_notify_event(struct fsnotify_group *group,
-				     struct fsnotify_event *event,
-				     int (*merge)(struct list_head *,
-						  struct fsnotify_event *));
+extern int fsnotify_add_event(struct fsnotify_group *group,
+			      struct fsnotify_event *event,
+			      int (*merge)(struct list_head *,
+					   struct fsnotify_event *));
 /* true if the group notification queue is empty */
 extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group);
 /* return, but do not dequeue the first event on the notification queue */
-extern struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group);
+extern struct fsnotify_event *fsnotify_peek_first_event(struct fsnotify_group *group);
 /* return AND dequeue the first event on the notification queue */
-extern struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group);
+extern struct fsnotify_event *fsnotify_remove_first_event(struct fsnotify_group *group);
 
 /* functions used to manipulate the marks attached to inodes */
 
-- 
cgit v1.2.3-59-g8ed1b


From 5838d4442bd5971687b72221736222637e03140d Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 6 Aug 2014 16:03:28 -0700
Subject: fanotify: fix double free of pending permission events

Commit 85816794240b ("fanotify: Fix use after free for permission
events") introduced a double free issue for permission events which are
pending in group's notification queue while group is being destroyed.
These events are freed from fanotify_handle_event() but they are not
removed from groups notification queue and thus they get freed again
from fsnotify_flush_notify().

Fix the problem by removing permission events from notification queue
before freeing them if we skip processing access response.  Also expand
comments in fanotify_release() to explain group shutdown in detail.

Fixes: 85816794240b9659e66e4d9b0df7c6e814e5f603
Signed-off-by: Jan Kara <jack@suse.cz>
Reported-by: Douglas Leeder <douglas.leeder@sophos.com>
Tested-by: Douglas Leeder <douglas.leeder@sophos.com>
Reported-by: Heinrich Schuchard <xypron.glpk@gmx.de>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/notify/fanotify/fanotify.c      |  9 ++++++++-
 fs/notify/fanotify/fanotify_user.c | 12 ++++++++++++
 fs/notify/notification.c           | 18 +++++++++++++++++-
 include/linux/fsnotify_backend.h   |  2 ++
 4 files changed, 39 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index fdeb36b70c65..30d3addfad75 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -70,8 +70,15 @@ static int fanotify_get_response(struct fsnotify_group *group,
 	wait_event(group->fanotify_data.access_waitq, event->response ||
 				atomic_read(&group->fanotify_data.bypass_perm));
 
-	if (!event->response) /* bypass_perm set */
+	if (!event->response) {	/* bypass_perm set */
+		/*
+		 * Event was canceled because group is being destroyed. Remove
+		 * it from group's event list because we are responsible for
+		 * freeing the permission event.
+		 */
+		fsnotify_remove_event(group, &event->fae.fse);
 		return 0;
+	}
 
 	/* userspace responded, convert to something usable */
 	switch (event->response) {
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index fbf2210823ab..b13992a41bd9 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -359,6 +359,11 @@ static int fanotify_release(struct inode *ignored, struct file *file)
 #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
 	struct fanotify_perm_event_info *event, *next;
 
+	/*
+	 * There may be still new events arriving in the notification queue
+	 * but since userspace cannot use fanotify fd anymore, no event can
+	 * enter or leave access_list by now.
+	 */
 	spin_lock(&group->fanotify_data.access_lock);
 
 	atomic_inc(&group->fanotify_data.bypass_perm);
@@ -373,6 +378,13 @@ static int fanotify_release(struct inode *ignored, struct file *file)
 	}
 	spin_unlock(&group->fanotify_data.access_lock);
 
+	/*
+	 * Since bypass_perm is set, newly queued events will not wait for
+	 * access response. Wake up the already sleeping ones now.
+	 * synchronize_srcu() in fsnotify_destroy_group() will wait for all
+	 * processes sleeping in fanotify_handle_event() waiting for access
+	 * response and thus also for all permission events to be freed.
+	 */
 	wake_up(&group->fanotify_data.access_waitq);
 #endif
 
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index 1d394220acbe..a95d8e037aeb 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -73,7 +73,8 @@ void fsnotify_destroy_event(struct fsnotify_group *group,
 	/* Overflow events are per-group and we don't want to free them */
 	if (!event || event->mask == FS_Q_OVERFLOW)
 		return;
-
+	/* If the event is still queued, we have a problem... */
+	WARN_ON(!list_empty(&event->list));
 	group->ops->free_event(event);
 }
 
@@ -124,6 +125,21 @@ queue:
 	return ret;
 }
 
+/*
+ * Remove @event from group's notification queue. It is the responsibility of
+ * the caller to destroy the event.
+ */
+void fsnotify_remove_event(struct fsnotify_group *group,
+			   struct fsnotify_event *event)
+{
+	mutex_lock(&group->notification_mutex);
+	if (!list_empty(&event->list)) {
+		list_del_init(&event->list);
+		group->q_len--;
+	}
+	mutex_unlock(&group->notification_mutex);
+}
+
 /*
  * Remove and return the first event from the notification list.  It is the
  * responsibility of the caller to destroy the obtained event
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index a6e899943363..ca060d7c4fa6 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -326,6 +326,8 @@ extern int fsnotify_add_event(struct fsnotify_group *group,
 			      struct fsnotify_event *event,
 			      int (*merge)(struct list_head *,
 					   struct fsnotify_event *));
+/* Remove passed event from groups notification queue */
+extern void fsnotify_remove_event(struct fsnotify_group *group, struct fsnotify_event *event);
 /* true if the group notification queue is empty */
 extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group);
 /* return, but do not dequeue the first event on the notification queue */
-- 
cgit v1.2.3-59-g8ed1b


From e19318116048d5fbdb8d230d6d37625834b503cd Mon Sep 17 00:00:00 2001
From: Fabian Frederick <fabf@skynet.be>
Date: Wed, 6 Aug 2014 16:04:59 -0700
Subject: mm/page_alloc.c: add __meminit to alloc_pages_exact_nid()

alloc_pages_exact_nid() is only called by __meminit alloc_page_cgroup()

Signed-off-by: Fabian Frederick <fabf@skynet.be>
Cc: Mel Gorman <mgorman@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 2 +-
 mm/page_alloc.c     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 6eb1fb37de9a..5e7219dc0fae 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -360,7 +360,7 @@ extern unsigned long get_zeroed_page(gfp_t gfp_mask);
 void *alloc_pages_exact(size_t size, gfp_t gfp_mask);
 void free_pages_exact(void *virt, size_t size);
 /* This is different from alloc_pages_exact_node !!! */
-void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask);
+void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask);
 
 #define __get_free_page(gfp_mask) \
 		__get_free_pages((gfp_mask), 0)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ef44ad736ca1..fd4322cc096d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2962,7 +2962,7 @@ EXPORT_SYMBOL(alloc_pages_exact);
  * Note this is not alloc_pages_exact_node() which allocates on a specific node,
  * but is not exact.
  */
-void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
+void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
 {
 	unsigned order = get_order(size);
 	struct page *p = alloc_pages_node(nid, gfp_mask, order);
-- 
cgit v1.2.3-59-g8ed1b


From 2cfb3665e864755400dc57b6ceee2ebb6b382910 Mon Sep 17 00:00:00 2001
From: Fabian Frederick <fabf@skynet.be>
Date: Wed, 6 Aug 2014 16:05:03 -0700
Subject: include/linux/memblock.h: add __init to memblock_set_bottom_up()

memblock_set_bottom_up() is only called by __init
cmdline_parse_movable_node() and __init numa_init().

Signed-off-by: Fabian Frederick <fabf@skynet.be>
Reviewed-by: Tang Chen <tangchen@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memblock.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index b660e05b63d4..e8cc45307f8f 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -249,7 +249,7 @@ phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align);
 /*
  * Set the allocation direction to bottom-up or top-down.
  */
-static inline void memblock_set_bottom_up(bool enable)
+static inline void __init memblock_set_bottom_up(bool enable)
 {
 	memblock.bottom_up = enable;
 }
@@ -264,7 +264,7 @@ static inline bool memblock_bottom_up(void)
 	return memblock.bottom_up;
 }
 #else
-static inline void memblock_set_bottom_up(bool enable) {}
+static inline void __init memblock_set_bottom_up(bool enable) {}
 static inline bool memblock_bottom_up(void) { return false; }
 #endif
 
-- 
cgit v1.2.3-59-g8ed1b


From 4f7c6b49c45a398d72763d1f0e64ddff8b3653c7 Mon Sep 17 00:00:00 2001
From: Tang Chen <tangchen@cn.fujitsu.com>
Date: Wed, 6 Aug 2014 16:05:13 -0700
Subject: mem-hotplug: introduce MMOP_OFFLINE to replace the hard coding -1

In store_mem_state(), we have:

  ...
  334         else if (!strncmp(buf, "offline", min_t(int, count, 7)))
  335                 online_type = -1;
  ...
  355         case -1:
  356                 ret = device_offline(&mem->dev);
  357                 break;
  ...

Here, "offline" is hard coded as -1.

This patch does the following renaming:

 ONLINE_KEEP     ->  MMOP_ONLINE_KEEP
 ONLINE_KERNEL   ->  MMOP_ONLINE_KERNEL
 ONLINE_MOVABLE  ->  MMOP_ONLINE_MOVABLE

and introduces MMOP_OFFLINE = -1 to avoid hard coding.

Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Cc: Hu Tao <hutao@cn.fujitsu.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Gu Zheng <guz.fnst@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/memory.c          | 18 +++++++++---------
 include/linux/memory_hotplug.h |  9 +++++----
 mm/memory_hotplug.c            |  9 ++++++---
 3 files changed, 20 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index c6707dfb5284..7c60ed27e711 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -284,7 +284,7 @@ static int memory_subsys_online(struct device *dev)
 	 * attribute and need to set the online_type.
 	 */
 	if (mem->online_type < 0)
-		mem->online_type = ONLINE_KEEP;
+		mem->online_type = MMOP_ONLINE_KEEP;
 
 	ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE);
 
@@ -316,22 +316,22 @@ store_mem_state(struct device *dev,
 		return ret;
 
 	if (sysfs_streq(buf, "online_kernel"))
-		online_type = ONLINE_KERNEL;
+		online_type = MMOP_ONLINE_KERNEL;
 	else if (sysfs_streq(buf, "online_movable"))
-		online_type = ONLINE_MOVABLE;
+		online_type = MMOP_ONLINE_MOVABLE;
 	else if (sysfs_streq(buf, "online"))
-		online_type = ONLINE_KEEP;
+		online_type = MMOP_ONLINE_KEEP;
 	else if (sysfs_streq(buf, "offline"))
-		online_type = -1;
+		online_type = MMOP_OFFLINE;
 	else {
 		ret = -EINVAL;
 		goto err;
 	}
 
 	switch (online_type) {
-	case ONLINE_KERNEL:
-	case ONLINE_MOVABLE:
-	case ONLINE_KEEP:
+	case MMOP_ONLINE_KERNEL:
+	case MMOP_ONLINE_MOVABLE:
+	case MMOP_ONLINE_KEEP:
 		/*
 		 * mem->online_type is not protected so there can be a
 		 * race here.  However, when racing online, the first
@@ -342,7 +342,7 @@ store_mem_state(struct device *dev,
 		mem->online_type = online_type;
 		ret = device_online(&mem->dev);
 		break;
-	case -1:
+	case MMOP_OFFLINE:
 		ret = device_offline(&mem->dev);
 		break;
 	default:
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 010d125bffbf..79dd9eca054f 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -26,11 +26,12 @@ enum {
 	MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE = NODE_INFO,
 };
 
-/* Types for control the zone type of onlined memory */
+/* Types for control the zone type of onlined and offlined memory */
 enum {
-	ONLINE_KEEP,
-	ONLINE_KERNEL,
-	ONLINE_MOVABLE,
+	MMOP_OFFLINE = -1,
+	MMOP_ONLINE_KEEP,
+	MMOP_ONLINE_KERNEL,
+	MMOP_ONLINE_MOVABLE,
 };
 
 /*
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 3557e8c9e8de..a3797d3fd8a4 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -977,15 +977,18 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
 	zone = page_zone(pfn_to_page(pfn));
 
 	ret = -EINVAL;
-	if ((zone_idx(zone) > ZONE_NORMAL || online_type == ONLINE_MOVABLE) &&
+	if ((zone_idx(zone) > ZONE_NORMAL ||
+	    online_type == MMOP_ONLINE_MOVABLE) &&
 	    !can_online_high_movable(zone))
 		goto out;
 
-	if (online_type == ONLINE_KERNEL && zone_idx(zone) == ZONE_MOVABLE) {
+	if (online_type == MMOP_ONLINE_KERNEL &&
+	    zone_idx(zone) == ZONE_MOVABLE) {
 		if (move_pfn_range_left(zone - 1, zone, pfn, pfn + nr_pages))
 			goto out;
 	}
-	if (online_type == ONLINE_MOVABLE && zone_idx(zone) == ZONE_MOVABLE - 1) {
+	if (online_type == MMOP_ONLINE_MOVABLE &&
+	    zone_idx(zone) == ZONE_MOVABLE - 1) {
 		if (move_pfn_range_right(zone, zone + 1, pfn, pfn + nr_pages))
 			goto out;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From a254129e8686bff7a340b58f35241b04927e81c0 Mon Sep 17 00:00:00 2001
From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Date: Wed, 6 Aug 2014 16:05:25 -0700
Subject: CMA: generalize CMA reserved area management functionality

Currently, there are two users on CMA functionality, one is the DMA
subsystem and the other is the KVM on powerpc.  They have their own code
to manage CMA reserved area even if they looks really similar.  From my
guess, it is caused by some needs on bitmap management.  KVM side wants
to maintain bitmap not for 1 page, but for more size.  Eventually it use
bitmap where one bit represents 64 pages.

When I implement CMA related patches, I should change those two places
to apply my change and it seem to be painful to me.  I want to change
this situation and reduce future code management overhead through this
patch.

This change could also help developer who want to use CMA in their new
feature development, since they can use CMA easily without copying &
pasting this reserved area management code.

In previous patches, we have prepared some features to generalize CMA
reserved area management and now it's time to do it.  This patch moves
core functions to mm/cma.c and change DMA APIs to use these functions.

There is no functional change in DMA APIs.

Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Acked-by: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Acked-by: Minchan Kim <minchan@kernel.org>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Alexander Graf <agraf@suse.de>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Gleb Natapov <gleb@kernel.org>
Acked-by: Marek Szyprowski <m.szyprowski@samsung.com>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/mm/dma-mapping.c      |   1 +
 drivers/base/Kconfig           |  10 --
 drivers/base/dma-contiguous.c  | 280 +---------------------------------
 include/linux/cma.h            |  27 ++++
 include/linux/dma-contiguous.h |  11 +-
 mm/Kconfig                     |  11 ++
 mm/Makefile                    |   1 +
 mm/cma.c                       | 333 +++++++++++++++++++++++++++++++++++++++++
 8 files changed, 383 insertions(+), 291 deletions(-)
 create mode 100644 include/linux/cma.h
 create mode 100644 mm/cma.c

(limited to 'include/linux')

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 1f88db06b133..7a996aaa061e 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -26,6 +26,7 @@
 #include <linux/io.h>
 #include <linux/vmalloc.h>
 #include <linux/sizes.h>
+#include <linux/cma.h>
 
 #include <asm/memory.h>
 #include <asm/highmem.h>
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 88500fed3c7a..4e7f0ff83ae7 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -289,16 +289,6 @@ config CMA_ALIGNMENT
 
 	  If unsure, leave the default value "8".
 
-config CMA_AREAS
-	int "Maximum count of the CMA device-private areas"
-	default 7
-	help
-	  CMA allows to create CMA areas for particular devices. This parameter
-	  sets the maximum number of such device private CMA areas in the
-	  system.
-
-	  If unsure, leave the default value "7".
-
 endif
 
 endmenu
diff --git a/drivers/base/dma-contiguous.c b/drivers/base/dma-contiguous.c
index ad8a85bf852f..0411c1c57005 100644
--- a/drivers/base/dma-contiguous.c
+++ b/drivers/base/dma-contiguous.c
@@ -24,25 +24,9 @@
 
 #include <linux/memblock.h>
 #include <linux/err.h>
-#include <linux/mm.h>
-#include <linux/mutex.h>
-#include <linux/page-isolation.h>
 #include <linux/sizes.h>
-#include <linux/slab.h>
-#include <linux/swap.h>
-#include <linux/mm_types.h>
 #include <linux/dma-contiguous.h>
-#include <linux/log2.h>
-
-struct cma {
-	unsigned long	base_pfn;
-	unsigned long	count;
-	unsigned long	*bitmap;
-	unsigned int order_per_bit; /* Order of pages represented by one bit */
-	struct mutex	lock;
-};
-
-struct cma *dma_contiguous_default_area;
+#include <linux/cma.h>
 
 #ifdef CONFIG_CMA_SIZE_MBYTES
 #define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES
@@ -50,6 +34,8 @@ struct cma *dma_contiguous_default_area;
 #define CMA_SIZE_MBYTES 0
 #endif
 
+struct cma *dma_contiguous_default_area;
+
 /*
  * Default global CMA area size can be defined in kernel's .config.
  * This is useful mainly for distro maintainers to create a kernel
@@ -156,169 +142,6 @@ void __init dma_contiguous_reserve(phys_addr_t limit)
 	}
 }
 
-static DEFINE_MUTEX(cma_mutex);
-
-static unsigned long cma_bitmap_aligned_mask(struct cma *cma, int align_order)
-{
-	return (1UL << (align_order >> cma->order_per_bit)) - 1;
-}
-
-static unsigned long cma_bitmap_maxno(struct cma *cma)
-{
-	return cma->count >> cma->order_per_bit;
-}
-
-static unsigned long cma_bitmap_pages_to_bits(struct cma *cma,
-						unsigned long pages)
-{
-	return ALIGN(pages, 1UL << cma->order_per_bit) >> cma->order_per_bit;
-}
-
-static void cma_clear_bitmap(struct cma *cma, unsigned long pfn, int count)
-{
-	unsigned long bitmap_no, bitmap_count;
-
-	bitmap_no = (pfn - cma->base_pfn) >> cma->order_per_bit;
-	bitmap_count = cma_bitmap_pages_to_bits(cma, count);
-
-	mutex_lock(&cma->lock);
-	bitmap_clear(cma->bitmap, bitmap_no, bitmap_count);
-	mutex_unlock(&cma->lock);
-}
-
-static int __init cma_activate_area(struct cma *cma)
-{
-	int bitmap_size = BITS_TO_LONGS(cma_bitmap_maxno(cma)) * sizeof(long);
-	unsigned long base_pfn = cma->base_pfn, pfn = base_pfn;
-	unsigned i = cma->count >> pageblock_order;
-	struct zone *zone;
-
-	cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
-
-	if (!cma->bitmap)
-		return -ENOMEM;
-
-	WARN_ON_ONCE(!pfn_valid(pfn));
-	zone = page_zone(pfn_to_page(pfn));
-
-	do {
-		unsigned j;
-		base_pfn = pfn;
-		for (j = pageblock_nr_pages; j; --j, pfn++) {
-			WARN_ON_ONCE(!pfn_valid(pfn));
-			/*
-			 * alloc_contig_range requires the pfn range
-			 * specified to be in the same zone. Make this
-			 * simple by forcing the entire CMA resv range
-			 * to be in the same zone.
-			 */
-			if (page_zone(pfn_to_page(pfn)) != zone)
-				goto err;
-		}
-		init_cma_reserved_pageblock(pfn_to_page(base_pfn));
-	} while (--i);
-
-	mutex_init(&cma->lock);
-	return 0;
-
-err:
-	kfree(cma->bitmap);
-	return -EINVAL;
-}
-
-static struct cma cma_areas[MAX_CMA_AREAS];
-static unsigned cma_area_count;
-
-static int __init cma_init_reserved_areas(void)
-{
-	int i;
-
-	for (i = 0; i < cma_area_count; i++) {
-		int ret = cma_activate_area(&cma_areas[i]);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
-}
-core_initcall(cma_init_reserved_areas);
-
-static int __init __dma_contiguous_reserve_area(phys_addr_t size,
-			phys_addr_t base, phys_addr_t limit,
-			phys_addr_t alignment, unsigned int order_per_bit,
-			struct cma **res_cma, bool fixed)
-{
-	struct cma *cma = &cma_areas[cma_area_count];
-	int ret = 0;
-
-	pr_debug("%s(size %lx, base %08lx, limit %08lx alignment %08lx)\n",
-		__func__, (unsigned long)size, (unsigned long)base,
-		(unsigned long)limit, (unsigned long)alignment);
-
-	if (cma_area_count == ARRAY_SIZE(cma_areas)) {
-		pr_err("Not enough slots for CMA reserved regions!\n");
-		return -ENOSPC;
-	}
-
-	if (!size)
-		return -EINVAL;
-
-	if (alignment && !is_power_of_2(alignment))
-		return -EINVAL;
-
-	/*
-	 * Sanitise input arguments.
-	 * Pages both ends in CMA area could be merged into adjacent unmovable
-	 * migratetype page by page allocator's buddy algorithm. In the case,
-	 * you couldn't get a contiguous memory, which is not what we want.
-	 */
-	alignment = max(alignment,
-		(phys_addr_t)PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order));
-	base = ALIGN(base, alignment);
-	size = ALIGN(size, alignment);
-	limit &= ~(alignment - 1);
-
-	/* size should be aligned with order_per_bit */
-	if (!IS_ALIGNED(size >> PAGE_SHIFT, 1 << order_per_bit))
-		return -EINVAL;
-
-	/* Reserve memory */
-	if (base && fixed) {
-		if (memblock_is_region_reserved(base, size) ||
-		    memblock_reserve(base, size) < 0) {
-			ret = -EBUSY;
-			goto err;
-		}
-	} else {
-		phys_addr_t addr = memblock_alloc_range(size, alignment, base,
-							limit);
-		if (!addr) {
-			ret = -ENOMEM;
-			goto err;
-		} else {
-			base = addr;
-		}
-	}
-
-	/*
-	 * Each reserved area must be initialised later, when more kernel
-	 * subsystems (like slab allocator) are available.
-	 */
-	cma->base_pfn = PFN_DOWN(base);
-	cma->count = size >> PAGE_SHIFT;
-	cma->order_per_bit = order_per_bit;
-	*res_cma = cma;
-	cma_area_count++;
-
-	pr_info("CMA: reserved %ld MiB at %08lx\n", (unsigned long)size / SZ_1M,
-		(unsigned long)base);
-	return 0;
-
-err:
-	pr_err("CMA: failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M);
-	return ret;
-}
-
 /**
  * dma_contiguous_reserve_area() - reserve custom contiguous area
  * @size: Size of the reserved area (in bytes),
@@ -342,77 +165,17 @@ int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
 {
 	int ret;
 
-	ret = __dma_contiguous_reserve_area(size, base, limit, 0, 0,
-						res_cma, fixed);
+	ret = cma_declare_contiguous(size, base, limit, 0, 0, res_cma, fixed);
 	if (ret)
 		return ret;
 
 	/* Architecture specific contiguous memory fixup. */
-	dma_contiguous_early_fixup(PFN_PHYS((*res_cma)->base_pfn),
-				(*res_cma)->count << PAGE_SHIFT);
+	dma_contiguous_early_fixup(cma_get_base(*res_cma),
+				cma_get_size(*res_cma));
 
 	return 0;
 }
 
-static struct page *__dma_alloc_from_contiguous(struct cma *cma, int count,
-				       unsigned int align)
-{
-	unsigned long mask, pfn, start = 0;
-	unsigned long bitmap_maxno, bitmap_no, bitmap_count;
-	struct page *page = NULL;
-	int ret;
-
-	if (!cma || !cma->count)
-		return NULL;
-
-	pr_debug("%s(cma %p, count %d, align %d)\n", __func__, (void *)cma,
-		 count, align);
-
-	if (!count)
-		return NULL;
-
-	mask = cma_bitmap_aligned_mask(cma, align);
-	bitmap_maxno = cma_bitmap_maxno(cma);
-	bitmap_count = cma_bitmap_pages_to_bits(cma, count);
-
-	for (;;) {
-		mutex_lock(&cma->lock);
-		bitmap_no = bitmap_find_next_zero_area(cma->bitmap,
-				bitmap_maxno, start, bitmap_count, mask);
-		if (bitmap_no >= bitmap_maxno) {
-			mutex_unlock(&cma->lock);
-			break;
-		}
-		bitmap_set(cma->bitmap, bitmap_no, bitmap_count);
-		/*
-		 * It's safe to drop the lock here. We've marked this region for
-		 * our exclusive use. If the migration fails we will take the
-		 * lock again and unmark it.
-		 */
-		mutex_unlock(&cma->lock);
-
-		pfn = cma->base_pfn + (bitmap_no << cma->order_per_bit);
-		mutex_lock(&cma_mutex);
-		ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA);
-		mutex_unlock(&cma_mutex);
-		if (ret == 0) {
-			page = pfn_to_page(pfn);
-			break;
-		} else if (ret != -EBUSY) {
-			cma_clear_bitmap(cma, pfn, count);
-			break;
-		}
-		cma_clear_bitmap(cma, pfn, count);
-		pr_debug("%s(): memory range at %p is busy, retrying\n",
-			 __func__, pfn_to_page(pfn));
-		/* try again with a bit different memory target */
-		start = bitmap_no + mask + 1;
-	}
-
-	pr_debug("%s(): returned %p\n", __func__, page);
-	return page;
-}
-
 /**
  * dma_alloc_from_contiguous() - allocate pages from contiguous area
  * @dev:   Pointer to device for which the allocation is performed.
@@ -427,35 +190,10 @@ static struct page *__dma_alloc_from_contiguous(struct cma *cma, int count,
 struct page *dma_alloc_from_contiguous(struct device *dev, int count,
 				       unsigned int align)
 {
-	struct cma *cma = dev_get_cma_area(dev);
-
 	if (align > CONFIG_CMA_ALIGNMENT)
 		align = CONFIG_CMA_ALIGNMENT;
 
-	return __dma_alloc_from_contiguous(cma, count, align);
-}
-
-static bool __dma_release_from_contiguous(struct cma *cma, struct page *pages,
-				 int count)
-{
-	unsigned long pfn;
-
-	if (!cma || !pages)
-		return false;
-
-	pr_debug("%s(page %p)\n", __func__, (void *)pages);
-
-	pfn = page_to_pfn(pages);
-
-	if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count)
-		return false;
-
-	VM_BUG_ON(pfn + count > cma->base_pfn + cma->count);
-
-	free_contig_range(pfn, count);
-	cma_clear_bitmap(cma, pfn, count);
-
-	return true;
+	return cma_alloc(dev_get_cma_area(dev), count, align);
 }
 
 /**
@@ -471,7 +209,5 @@ static bool __dma_release_from_contiguous(struct cma *cma, struct page *pages,
 bool dma_release_from_contiguous(struct device *dev, struct page *pages,
 				 int count)
 {
-	struct cma *cma = dev_get_cma_area(dev);
-
-	return __dma_release_from_contiguous(cma, pages, count);
+	return cma_release(dev_get_cma_area(dev), pages, count);
 }
diff --git a/include/linux/cma.h b/include/linux/cma.h
new file mode 100644
index 000000000000..f6f7809acb98
--- /dev/null
+++ b/include/linux/cma.h
@@ -0,0 +1,27 @@
+#ifndef __CMA_H__
+#define __CMA_H__
+
+/*
+ * There is always at least global CMA area and a few optional
+ * areas configured in kernel .config.
+ */
+#ifdef CONFIG_CMA_AREAS
+#define MAX_CMA_AREAS	(1 + CONFIG_CMA_AREAS)
+
+#else
+#define MAX_CMA_AREAS	(0)
+
+#endif
+
+struct cma;
+
+extern phys_addr_t cma_get_base(struct cma *cma);
+extern unsigned long cma_get_size(struct cma *cma);
+
+extern int __init cma_declare_contiguous(phys_addr_t size,
+			phys_addr_t base, phys_addr_t limit,
+			phys_addr_t alignment, unsigned int order_per_bit,
+			struct cma **res_cma, bool fixed);
+extern struct page *cma_alloc(struct cma *cma, int count, unsigned int align);
+extern bool cma_release(struct cma *cma, struct page *pages, int count);
+#endif
diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h
index 772eab5d524a..569bbd039896 100644
--- a/include/linux/dma-contiguous.h
+++ b/include/linux/dma-contiguous.h
@@ -53,18 +53,13 @@
 
 #ifdef __KERNEL__
 
+#include <linux/device.h>
+
 struct cma;
 struct page;
-struct device;
 
 #ifdef CONFIG_DMA_CMA
 
-/*
- * There is always at least global CMA area and a few optional device
- * private areas configured in kernel .config.
- */
-#define MAX_CMA_AREAS	(1 + CONFIG_CMA_AREAS)
-
 extern struct cma *dma_contiguous_default_area;
 
 static inline struct cma *dev_get_cma_area(struct device *dev)
@@ -123,8 +118,6 @@ bool dma_release_from_contiguous(struct device *dev, struct page *pages,
 
 #else
 
-#define MAX_CMA_AREAS	(0)
-
 static inline struct cma *dev_get_cma_area(struct device *dev)
 {
 	return NULL;
diff --git a/mm/Kconfig b/mm/Kconfig
index 3e9977a9d657..f4899ec39cf4 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -508,6 +508,17 @@ config CMA_DEBUG
 	  processing calls such as dma_alloc_from_contiguous().
 	  This option does not affect warning and error messages.
 
+config CMA_AREAS
+	int "Maximum count of the CMA areas"
+	depends on CMA
+	default 7
+	help
+	  CMA allows to create CMA areas for particular purpose, mainly,
+	  used as device private area. This parameter sets the maximum
+	  number of CMA area in the system.
+
+	  If unsure, leave the default value "7".
+
 config ZBUD
 	tristate
 	default n
diff --git a/mm/Makefile b/mm/Makefile
index 4064f3ec145e..8338473c329a 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -62,3 +62,4 @@ obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o
 obj-$(CONFIG_ZBUD)	+= zbud.o
 obj-$(CONFIG_ZSMALLOC)	+= zsmalloc.o
 obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += early_ioremap.o
+obj-$(CONFIG_CMA)	+= cma.o
diff --git a/mm/cma.c b/mm/cma.c
new file mode 100644
index 000000000000..656004216953
--- /dev/null
+++ b/mm/cma.c
@@ -0,0 +1,333 @@
+/*
+ * Contiguous Memory Allocator
+ *
+ * Copyright (c) 2010-2011 by Samsung Electronics.
+ * Copyright IBM Corporation, 2013
+ * Copyright LG Electronics Inc., 2014
+ * Written by:
+ *	Marek Szyprowski <m.szyprowski@samsung.com>
+ *	Michal Nazarewicz <mina86@mina86.com>
+ *	Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *	Joonsoo Kim <iamjoonsoo.kim@lge.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License or (at your optional) any later version of the license.
+ */
+
+#define pr_fmt(fmt) "cma: " fmt
+
+#ifdef CONFIG_CMA_DEBUG
+#ifndef DEBUG
+#  define DEBUG
+#endif
+#endif
+
+#include <linux/memblock.h>
+#include <linux/err.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/log2.h>
+#include <linux/cma.h>
+
+struct cma {
+	unsigned long	base_pfn;
+	unsigned long	count;
+	unsigned long	*bitmap;
+	unsigned int order_per_bit; /* Order of pages represented by one bit */
+	struct mutex	lock;
+};
+
+static struct cma cma_areas[MAX_CMA_AREAS];
+static unsigned cma_area_count;
+static DEFINE_MUTEX(cma_mutex);
+
+phys_addr_t cma_get_base(struct cma *cma)
+{
+	return PFN_PHYS(cma->base_pfn);
+}
+
+unsigned long cma_get_size(struct cma *cma)
+{
+	return cma->count << PAGE_SHIFT;
+}
+
+static unsigned long cma_bitmap_aligned_mask(struct cma *cma, int align_order)
+{
+	return (1UL << (align_order >> cma->order_per_bit)) - 1;
+}
+
+static unsigned long cma_bitmap_maxno(struct cma *cma)
+{
+	return cma->count >> cma->order_per_bit;
+}
+
+static unsigned long cma_bitmap_pages_to_bits(struct cma *cma,
+						unsigned long pages)
+{
+	return ALIGN(pages, 1UL << cma->order_per_bit) >> cma->order_per_bit;
+}
+
+static void cma_clear_bitmap(struct cma *cma, unsigned long pfn, int count)
+{
+	unsigned long bitmap_no, bitmap_count;
+
+	bitmap_no = (pfn - cma->base_pfn) >> cma->order_per_bit;
+	bitmap_count = cma_bitmap_pages_to_bits(cma, count);
+
+	mutex_lock(&cma->lock);
+	bitmap_clear(cma->bitmap, bitmap_no, bitmap_count);
+	mutex_unlock(&cma->lock);
+}
+
+static int __init cma_activate_area(struct cma *cma)
+{
+	int bitmap_size = BITS_TO_LONGS(cma_bitmap_maxno(cma)) * sizeof(long);
+	unsigned long base_pfn = cma->base_pfn, pfn = base_pfn;
+	unsigned i = cma->count >> pageblock_order;
+	struct zone *zone;
+
+	cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
+
+	if (!cma->bitmap)
+		return -ENOMEM;
+
+	WARN_ON_ONCE(!pfn_valid(pfn));
+	zone = page_zone(pfn_to_page(pfn));
+
+	do {
+		unsigned j;
+
+		base_pfn = pfn;
+		for (j = pageblock_nr_pages; j; --j, pfn++) {
+			WARN_ON_ONCE(!pfn_valid(pfn));
+			/*
+			 * alloc_contig_range requires the pfn range
+			 * specified to be in the same zone. Make this
+			 * simple by forcing the entire CMA resv range
+			 * to be in the same zone.
+			 */
+			if (page_zone(pfn_to_page(pfn)) != zone)
+				goto err;
+		}
+		init_cma_reserved_pageblock(pfn_to_page(base_pfn));
+	} while (--i);
+
+	mutex_init(&cma->lock);
+	return 0;
+
+err:
+	kfree(cma->bitmap);
+	return -EINVAL;
+}
+
+static int __init cma_init_reserved_areas(void)
+{
+	int i;
+
+	for (i = 0; i < cma_area_count; i++) {
+		int ret = cma_activate_area(&cma_areas[i]);
+
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+core_initcall(cma_init_reserved_areas);
+
+/**
+ * cma_declare_contiguous() - reserve custom contiguous area
+ * @size: Size of the reserved area (in bytes),
+ * @base: Base address of the reserved area optional, use 0 for any
+ * @limit: End address of the reserved memory (optional, 0 for any).
+ * @alignment: Alignment for the CMA area, should be power of 2 or zero
+ * @order_per_bit: Order of pages represented by one bit on bitmap.
+ * @res_cma: Pointer to store the created cma region.
+ * @fixed: hint about where to place the reserved area
+ *
+ * This function reserves memory from early allocator. It should be
+ * called by arch specific code once the early allocator (memblock or bootmem)
+ * has been activated and all other subsystems have already allocated/reserved
+ * memory. This function allows to create custom reserved areas.
+ *
+ * If @fixed is true, reserve contiguous area at exactly @base.  If false,
+ * reserve in range from @base to @limit.
+ */
+int __init cma_declare_contiguous(phys_addr_t size,
+			phys_addr_t base, phys_addr_t limit,
+			phys_addr_t alignment, unsigned int order_per_bit,
+			struct cma **res_cma, bool fixed)
+{
+	struct cma *cma = &cma_areas[cma_area_count];
+	int ret = 0;
+
+	pr_debug("%s(size %lx, base %08lx, limit %08lx alignment %08lx)\n",
+		__func__, (unsigned long)size, (unsigned long)base,
+		(unsigned long)limit, (unsigned long)alignment);
+
+	if (cma_area_count == ARRAY_SIZE(cma_areas)) {
+		pr_err("Not enough slots for CMA reserved regions!\n");
+		return -ENOSPC;
+	}
+
+	if (!size)
+		return -EINVAL;
+
+	if (alignment && !is_power_of_2(alignment))
+		return -EINVAL;
+
+	/*
+	 * Sanitise input arguments.
+	 * Pages both ends in CMA area could be merged into adjacent unmovable
+	 * migratetype page by page allocator's buddy algorithm. In the case,
+	 * you couldn't get a contiguous memory, which is not what we want.
+	 */
+	alignment = max(alignment,
+		(phys_addr_t)PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order));
+	base = ALIGN(base, alignment);
+	size = ALIGN(size, alignment);
+	limit &= ~(alignment - 1);
+
+	/* size should be aligned with order_per_bit */
+	if (!IS_ALIGNED(size >> PAGE_SHIFT, 1 << order_per_bit))
+		return -EINVAL;
+
+	/* Reserve memory */
+	if (base && fixed) {
+		if (memblock_is_region_reserved(base, size) ||
+		    memblock_reserve(base, size) < 0) {
+			ret = -EBUSY;
+			goto err;
+		}
+	} else {
+		phys_addr_t addr = memblock_alloc_range(size, alignment, base,
+							limit);
+		if (!addr) {
+			ret = -ENOMEM;
+			goto err;
+		} else {
+			base = addr;
+		}
+	}
+
+	/*
+	 * Each reserved area must be initialised later, when more kernel
+	 * subsystems (like slab allocator) are available.
+	 */
+	cma->base_pfn = PFN_DOWN(base);
+	cma->count = size >> PAGE_SHIFT;
+	cma->order_per_bit = order_per_bit;
+	*res_cma = cma;
+	cma_area_count++;
+
+	pr_info("CMA: reserved %ld MiB at %08lx\n", (unsigned long)size / SZ_1M,
+		(unsigned long)base);
+	return 0;
+
+err:
+	pr_err("CMA: failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M);
+	return ret;
+}
+
+/**
+ * cma_alloc() - allocate pages from contiguous area
+ * @cma:   Contiguous memory region for which the allocation is performed.
+ * @count: Requested number of pages.
+ * @align: Requested alignment of pages (in PAGE_SIZE order).
+ *
+ * This function allocates part of contiguous memory on specific
+ * contiguous memory area.
+ */
+struct page *cma_alloc(struct cma *cma, int count, unsigned int align)
+{
+	unsigned long mask, pfn, start = 0;
+	unsigned long bitmap_maxno, bitmap_no, bitmap_count;
+	struct page *page = NULL;
+	int ret;
+
+	if (!cma || !cma->count)
+		return NULL;
+
+	pr_debug("%s(cma %p, count %d, align %d)\n", __func__, (void *)cma,
+		 count, align);
+
+	if (!count)
+		return NULL;
+
+	mask = cma_bitmap_aligned_mask(cma, align);
+	bitmap_maxno = cma_bitmap_maxno(cma);
+	bitmap_count = cma_bitmap_pages_to_bits(cma, count);
+
+	for (;;) {
+		mutex_lock(&cma->lock);
+		bitmap_no = bitmap_find_next_zero_area(cma->bitmap,
+				bitmap_maxno, start, bitmap_count, mask);
+		if (bitmap_no >= bitmap_maxno) {
+			mutex_unlock(&cma->lock);
+			break;
+		}
+		bitmap_set(cma->bitmap, bitmap_no, bitmap_count);
+		/*
+		 * It's safe to drop the lock here. We've marked this region for
+		 * our exclusive use. If the migration fails we will take the
+		 * lock again and unmark it.
+		 */
+		mutex_unlock(&cma->lock);
+
+		pfn = cma->base_pfn + (bitmap_no << cma->order_per_bit);
+		mutex_lock(&cma_mutex);
+		ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA);
+		mutex_unlock(&cma_mutex);
+		if (ret == 0) {
+			page = pfn_to_page(pfn);
+			break;
+		} else if (ret != -EBUSY) {
+			cma_clear_bitmap(cma, pfn, count);
+			break;
+		}
+		cma_clear_bitmap(cma, pfn, count);
+		pr_debug("%s(): memory range at %p is busy, retrying\n",
+			 __func__, pfn_to_page(pfn));
+		/* try again with a bit different memory target */
+		start = bitmap_no + mask + 1;
+	}
+
+	pr_debug("%s(): returned %p\n", __func__, page);
+	return page;
+}
+
+/**
+ * cma_release() - release allocated pages
+ * @cma:   Contiguous memory region for which the allocation is performed.
+ * @pages: Allocated pages.
+ * @count: Number of allocated pages.
+ *
+ * This function releases memory allocated by alloc_cma().
+ * It returns false when provided pages do not belong to contiguous area and
+ * true otherwise.
+ */
+bool cma_release(struct cma *cma, struct page *pages, int count)
+{
+	unsigned long pfn;
+
+	if (!cma || !pages)
+		return false;
+
+	pr_debug("%s(page %p)\n", __func__, (void *)pages);
+
+	pfn = page_to_pfn(pages);
+
+	if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count)
+		return false;
+
+	VM_BUG_ON(pfn + count > cma->base_pfn + cma->count);
+
+	free_contig_range(pfn, count);
+	cma_clear_bitmap(cma, pfn, count);
+
+	return true;
+}
-- 
cgit v1.2.3-59-g8ed1b


From c1f733aaaf30a0068a3126d5aa9d5b4c25ba4c0c Mon Sep 17 00:00:00 2001
From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Date: Wed, 6 Aug 2014 16:05:32 -0700
Subject: mm, CMA: change cma_declare_contiguous() to obey coding convention

Conventionally, we put output param to the end of param list and put the
'base' ahead of 'size', but cma_declare_contiguous() doesn't look like
that, so change it.

Additionally, move down cma_areas reference code to the position where
it is really needed.

Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Alexander Graf <agraf@suse.de>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Gleb Natapov <gleb@kernel.org>
Acked-by: Marek Szyprowski <m.szyprowski@samsung.com>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/kvm/book3s_hv_builtin.c |  4 ++--
 drivers/base/dma-contiguous.c        |  2 +-
 include/linux/cma.h                  |  2 +-
 mm/cma.c                             | 13 +++++++------
 4 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 3960e0bceaf2..6cf498a9bc98 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -185,8 +185,8 @@ void __init kvm_cma_reserve(void)
 			align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
 
 		align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size);
-		cma_declare_contiguous(selected_size, 0, 0, align_size,
-			KVM_CMA_CHUNK_ORDER - PAGE_SHIFT, &kvm_cma, false);
+		cma_declare_contiguous(0, selected_size, 0, align_size,
+			KVM_CMA_CHUNK_ORDER - PAGE_SHIFT, false, &kvm_cma);
 	}
 }
 
diff --git a/drivers/base/dma-contiguous.c b/drivers/base/dma-contiguous.c
index 0411c1c57005..6606abdf880c 100644
--- a/drivers/base/dma-contiguous.c
+++ b/drivers/base/dma-contiguous.c
@@ -165,7 +165,7 @@ int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
 {
 	int ret;
 
-	ret = cma_declare_contiguous(size, base, limit, 0, 0, res_cma, fixed);
+	ret = cma_declare_contiguous(base, size, limit, 0, 0, fixed, res_cma);
 	if (ret)
 		return ret;
 
diff --git a/include/linux/cma.h b/include/linux/cma.h
index f6f7809acb98..371b93042520 100644
--- a/include/linux/cma.h
+++ b/include/linux/cma.h
@@ -21,7 +21,7 @@ extern unsigned long cma_get_size(struct cma *cma);
 extern int __init cma_declare_contiguous(phys_addr_t size,
 			phys_addr_t base, phys_addr_t limit,
 			phys_addr_t alignment, unsigned int order_per_bit,
-			struct cma **res_cma, bool fixed);
+			bool fixed, struct cma **res_cma);
 extern struct page *cma_alloc(struct cma *cma, int count, unsigned int align);
 extern bool cma_release(struct cma *cma, struct page *pages, int count);
 #endif
diff --git a/mm/cma.c b/mm/cma.c
index 103a6663b7c7..488e50810ed1 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -141,13 +141,13 @@ core_initcall(cma_init_reserved_areas);
 
 /**
  * cma_declare_contiguous() - reserve custom contiguous area
- * @size: Size of the reserved area (in bytes),
  * @base: Base address of the reserved area optional, use 0 for any
+ * @size: Size of the reserved area (in bytes),
  * @limit: End address of the reserved memory (optional, 0 for any).
  * @alignment: Alignment for the CMA area, should be power of 2 or zero
  * @order_per_bit: Order of pages represented by one bit on bitmap.
- * @res_cma: Pointer to store the created cma region.
  * @fixed: hint about where to place the reserved area
+ * @res_cma: Pointer to store the created cma region.
  *
  * This function reserves memory from early allocator. It should be
  * called by arch specific code once the early allocator (memblock or bootmem)
@@ -157,12 +157,12 @@ core_initcall(cma_init_reserved_areas);
  * If @fixed is true, reserve contiguous area at exactly @base.  If false,
  * reserve in range from @base to @limit.
  */
-int __init cma_declare_contiguous(phys_addr_t size,
-			phys_addr_t base, phys_addr_t limit,
+int __init cma_declare_contiguous(phys_addr_t base,
+			phys_addr_t size, phys_addr_t limit,
 			phys_addr_t alignment, unsigned int order_per_bit,
-			struct cma **res_cma, bool fixed)
+			bool fixed, struct cma **res_cma)
 {
-	struct cma *cma = &cma_areas[cma_area_count];
+	struct cma *cma;
 	int ret = 0;
 
 	pr_debug("%s(size %lx, base %08lx, limit %08lx alignment %08lx)\n",
@@ -218,6 +218,7 @@ int __init cma_declare_contiguous(phys_addr_t size,
 	 * Each reserved area must be initialised later, when more kernel
 	 * subsystems (like slab allocator) are available.
 	 */
+	cma = &cma_areas[cma_area_count];
 	cma->base_pfn = PFN_DOWN(base);
 	cma->count = size >> PAGE_SHIFT;
 	cma->order_per_bit = order_per_bit;
-- 
cgit v1.2.3-59-g8ed1b


From 2f3e442ccceb85c51c7dffd3799bfd84de213874 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Wed, 6 Aug 2014 16:05:40 -0700
Subject: mm: page-flags: clean up the page flag test, set, clear macros

- PAGEFLAG_FALSE only defines TEST, make it define SET and CLEAR as
  well, analogous to PAGEFLAG.

- Define TESTSETFLAG_FALSE, analogous to TESTSETFLAG.

- Define TESTSCFLAG_FALSE, analogous to TESTSCFLAG

- Make PG_mlocked accessors the same on both MMU and !MMU setups

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-flags.h | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 8304959ad336..e1f5fcd79792 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -171,13 +171,12 @@ static inline int __TestClearPage##uname(struct page *page)		\
 #define __PAGEFLAG(uname, lname) TESTPAGEFLAG(uname, lname)		\
 	__SETPAGEFLAG(uname, lname)  __CLEARPAGEFLAG(uname, lname)
 
-#define PAGEFLAG_FALSE(uname) 						\
-static inline int Page##uname(const struct page *page)			\
-			{ return 0; }
-
 #define TESTSCFLAG(uname, lname)					\
 	TESTSETFLAG(uname, lname) TESTCLEARFLAG(uname, lname)
 
+#define TESTPAGEFLAG_FALSE(uname)					\
+static inline int Page##uname(const struct page *page) { return 0; }
+
 #define SETPAGEFLAG_NOOP(uname)						\
 static inline void SetPage##uname(struct page *page) {  }
 
@@ -187,12 +186,21 @@ static inline void ClearPage##uname(struct page *page) {  }
 #define __CLEARPAGEFLAG_NOOP(uname)					\
 static inline void __ClearPage##uname(struct page *page) {  }
 
+#define TESTSETFLAG_FALSE(uname)					\
+static inline int TestSetPage##uname(struct page *page) { return 0; }
+
 #define TESTCLEARFLAG_FALSE(uname)					\
 static inline int TestClearPage##uname(struct page *page) { return 0; }
 
 #define __TESTCLEARFLAG_FALSE(uname)					\
 static inline int __TestClearPage##uname(struct page *page) { return 0; }
 
+#define PAGEFLAG_FALSE(uname) TESTPAGEFLAG_FALSE(uname)			\
+	SETPAGEFLAG_NOOP(uname) CLEARPAGEFLAG_NOOP(uname)
+
+#define TESTSCFLAG_FALSE(uname)						\
+	TESTSETFLAG_FALSE(uname) TESTCLEARFLAG_FALSE(uname)
+
 struct page;	/* forward declaration */
 
 TESTPAGEFLAG(Locked, locked)
@@ -248,7 +256,6 @@ PAGEFLAG_FALSE(HighMem)
 PAGEFLAG(SwapCache, swapcache)
 #else
 PAGEFLAG_FALSE(SwapCache)
-	SETPAGEFLAG_NOOP(SwapCache) CLEARPAGEFLAG_NOOP(SwapCache)
 #endif
 
 PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable)
@@ -258,8 +265,8 @@ PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable)
 PAGEFLAG(Mlocked, mlocked) __CLEARPAGEFLAG(Mlocked, mlocked)
 	TESTSCFLAG(Mlocked, mlocked) __TESTCLEARFLAG(Mlocked, mlocked)
 #else
-PAGEFLAG_FALSE(Mlocked) SETPAGEFLAG_NOOP(Mlocked)
-	TESTCLEARFLAG_FALSE(Mlocked) __TESTCLEARFLAG_FALSE(Mlocked)
+PAGEFLAG_FALSE(Mlocked) __CLEARPAGEFLAG_NOOP(Mlocked)
+	TESTSCFLAG_FALSE(Mlocked) __TESTCLEARFLAG_FALSE(Mlocked)
 #endif
 
 #ifdef CONFIG_ARCH_USES_PG_UNCACHED
-- 
cgit v1.2.3-59-g8ed1b


From 1a4dc5bc7cb5659a8004d105afeb0571126f8f56 Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Wed, 6 Aug 2014 16:06:08 -0700
Subject: mem-hotplug: improve zone_movable_is_highmem logic

In original code, zone_movable_is_highmem() assumes ZONE_MOVABLE not
highmem if CONFIG_HAVE_MEMBLOCK_NODE_MAP is not set.  In online_pages,
it extracts pages from the previous zone before ZONE_MOVABLE.  Which is
logically inconsistent:

If HAVE_MEMBLOCK_NODE_MAP is turned off but HIGHMEM is on,
zone_movable_is_highmem() makes movable zone not highmem, but
online_pages() extracts pages from ZONE_HIGHMEM.

This inconsistency doesn't cause real problem currently, because all
architectures support online_pages also have HAVE_MEMBLOCK_NODE_MAP.
However, fixing it makes code clear, and also helps futher coding.

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Cc: Zhang Zhen <zhangzhen@huawei.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Jiang Liu <liuj97@gmail.com>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 6cbd1b6c3d20..559e659288fc 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -872,6 +872,8 @@ static inline int zone_movable_is_highmem(void)
 {
 #if defined(CONFIG_HIGHMEM) && defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP)
 	return movable_zone == ZONE_HIGHMEM;
+#elif defined(CONFIG_HIGHMEM)
+	return (ZONE_MOVABLE - 1) == ZONE_HIGHMEM;
 #else
 	return 0;
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From ef6b571fb8920d5006349a6e29ac47c4817e9691 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Wed, 6 Aug 2014 16:06:30 -0700
Subject: include/linux/mmdebug.h: add VM_WARN_ONCE()

It was missing...

Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmdebug.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index edd82a105220..2f348d02f640 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -20,11 +20,13 @@ extern void dump_page_badflags(struct page *page, const char *reason,
 	} while (0)
 #define VM_WARN_ON(cond) WARN_ON(cond)
 #define VM_WARN_ON_ONCE(cond) WARN_ON_ONCE(cond)
+#define VM_WARN_ONCE(cond, format...) WARN_ONCE(cond, format)
 #else
 #define VM_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond)
 #define VM_BUG_ON_PAGE(cond, page) VM_BUG_ON(cond)
 #define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond)
 #define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond)
+#define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
 #endif
 
 #ifdef CONFIG_DEBUG_VIRTUAL
-- 
cgit v1.2.3-59-g8ed1b


From eb39d618f9e80f81cfc5788cf1b252d141c2f0c3 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Wed, 6 Aug 2014 16:06:43 -0700
Subject: mm: replace init_page_accessed by __SetPageReferenced

Do we really need an exported alias for __SetPageReferenced()? Its
callers better know what they're doing, in which case the page would not
be already marked referenced.  Kill init_page_accessed(), just
__SetPageReferenced() inline.

Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Prabhakar Lad <prabhakar.csengg@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h |  1 -
 mm/filemap.c         |  4 ++--
 mm/shmem.c           |  2 +-
 mm/swap.c            | 14 +++-----------
 4 files changed, 6 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 4bdbee80eede..1eb64043c076 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -311,7 +311,6 @@ extern void lru_add_page_tail(struct page *page, struct page *page_tail,
 			 struct lruvec *lruvec, struct list_head *head);
 extern void activate_page(struct page *);
 extern void mark_page_accessed(struct page *);
-extern void init_page_accessed(struct page *page);
 extern void lru_add_drain(void);
 extern void lru_add_drain_cpu(int cpu);
 extern void lru_add_drain_all(void);
diff --git a/mm/filemap.c b/mm/filemap.c
index 65d44fd88c78..7e85c8147e1b 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1091,9 +1091,9 @@ no_page:
 		if (WARN_ON_ONCE(!(fgp_flags & FGP_LOCK)))
 			fgp_flags |= FGP_LOCK;
 
-		/* Init accessed so avoit atomic mark_page_accessed later */
+		/* Init accessed so avoid atomic mark_page_accessed later */
 		if (fgp_flags & FGP_ACCESSED)
-			init_page_accessed(page);
+			__SetPageReferenced(page);
 
 		err = add_to_page_cache_lru(page, mapping, offset, radix_gfp_mask);
 		if (unlikely(err)) {
diff --git a/mm/shmem.c b/mm/shmem.c
index 57fd82a5af7a..fe15d96c3166 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1166,7 +1166,7 @@ repeat:
 		__SetPageSwapBacked(page);
 		__set_page_locked(page);
 		if (sgp == SGP_WRITE)
-			init_page_accessed(page);
+			__SetPageReferenced(page);
 
 		error = mem_cgroup_charge_file(page, current->mm,
 						gfp & GFP_RECLAIM_MASK);
diff --git a/mm/swap.c b/mm/swap.c
index 9e8e3472248b..d8eb4d09ffa2 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -589,6 +589,9 @@ static void __lru_cache_activate_page(struct page *page)
  * inactive,unreferenced	->	inactive,referenced
  * inactive,referenced		->	active,unreferenced
  * active,unreferenced		->	active,referenced
+ *
+ * When a newly allocated page is not yet visible, so safe for non-atomic ops,
+ * __SetPageReferenced(page) may be substituted for mark_page_accessed(page).
  */
 void mark_page_accessed(struct page *page)
 {
@@ -614,17 +617,6 @@ void mark_page_accessed(struct page *page)
 }
 EXPORT_SYMBOL(mark_page_accessed);
 
-/*
- * Used to mark_page_accessed(page) that is not visible yet and when it is
- * still safe to use non-atomic ops
- */
-void init_page_accessed(struct page *page)
-{
-	if (!PageReferenced(page))
-		__SetPageReferenced(page);
-}
-EXPORT_SYMBOL(init_page_accessed);
-
 static void __lru_cache_add(struct page *page)
 {
 	struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
-- 
cgit v1.2.3-59-g8ed1b


From ed4d4902ebdd7ca8b5a51daaf6bebf4b172895cc Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Wed, 6 Aug 2014 16:06:54 -0700
Subject: mm, hugetlb: remove hugetlb_zero and hugetlb_infinity

They are unnecessary: "zero" can be used in place of "hugetlb_zero" and
passing extra2 == NULL is equivalent to infinity.

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Reviewed-by: Luiz Capitulino <lcapitulino@redhat.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h | 1 -
 kernel/sysctl.c         | 9 +++------
 mm/hugetlb.c            | 1 -
 3 files changed, 3 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index a23c096b3080..6e6d338641fe 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -87,7 +87,6 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud);
 #endif
 
 extern unsigned long hugepages_treat_as_movable;
-extern const unsigned long hugetlb_zero, hugetlb_infinity;
 extern int sysctl_hugetlb_shm_group;
 extern struct list_head huge_boot_pages;
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 75b22e22a72c..75875a741b5e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1240,8 +1240,7 @@ static struct ctl_table vm_table[] = {
 		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
 		.proc_handler	= hugetlb_sysctl_handler,
-		.extra1		= (void *)&hugetlb_zero,
-		.extra2		= (void *)&hugetlb_infinity,
+		.extra1		= &zero,
 	},
 #ifdef CONFIG_NUMA
 	{
@@ -1250,8 +1249,7 @@ static struct ctl_table vm_table[] = {
 		.maxlen         = sizeof(unsigned long),
 		.mode           = 0644,
 		.proc_handler   = &hugetlb_mempolicy_sysctl_handler,
-		.extra1		= (void *)&hugetlb_zero,
-		.extra2		= (void *)&hugetlb_infinity,
+		.extra1		= &zero,
 	},
 #endif
 	 {
@@ -1274,8 +1272,7 @@ static struct ctl_table vm_table[] = {
 		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
 		.proc_handler	= hugetlb_overcommit_handler,
-		.extra1		= (void *)&hugetlb_zero,
-		.extra2		= (void *)&hugetlb_infinity,
+		.extra1		= &zero,
 	},
 #endif
 	{
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 7a0fcb33973e..d9ad93b55585 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -35,7 +35,6 @@
 #include <linux/node.h>
 #include "internal.h"
 
-const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
 unsigned long hugepages_treat_as_movable;
 
 int hugetlb_max_hstate __read_mostly;
-- 
cgit v1.2.3-59-g8ed1b


From 21bda264f4243f61dfcc485174055f12ad0530b4 Mon Sep 17 00:00:00 2001
From: Jerome Marchand <jmarchan@redhat.com>
Date: Wed, 6 Aug 2014 16:06:56 -0700
Subject: mm: make copy_pte_range static again

Commit 71e3aac0724f ("thp: transparent hugepage core") adds
copy_pte_range prototype to huge_mm.h.  I'm not sure why (or if) this
function have been used outside of memory.c, but it currently isn't.
This patch makes copy_pte_range() static again.

Signed-off-by: Jerome Marchand <jmarchan@redhat.com>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/huge_mm.h | 4 ----
 mm/memory.c             | 2 +-
 2 files changed, 1 insertion(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index b826239bdce0..63579cb8d3dc 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -93,10 +93,6 @@ extern bool is_vma_temporary_stack(struct vm_area_struct *vma);
 #endif /* CONFIG_DEBUG_VM */
 
 extern unsigned long transparent_hugepage_flags;
-extern int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
-			  pmd_t *dst_pmd, pmd_t *src_pmd,
-			  struct vm_area_struct *vma,
-			  unsigned long addr, unsigned long end);
 extern int split_huge_page_to_list(struct page *page, struct list_head *list);
 static inline int split_huge_page(struct page *page)
 {
diff --git a/mm/memory.c b/mm/memory.c
index 06ff0720d75a..01d0289f30a7 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -884,7 +884,7 @@ out_set_pte:
 	return 0;
 }
 
-int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 		   pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma,
 		   unsigned long addr, unsigned long end)
 {
-- 
cgit v1.2.3-59-g8ed1b


From f6f8ed47353597dcb895eb4a15a28af657392e72 Mon Sep 17 00:00:00 2001
From: WANG Chao <chaowang@redhat.com>
Date: Wed, 6 Aug 2014 16:06:58 -0700
Subject: mm/vmalloc.c: clean up map_vm_area third argument

Currently map_vm_area() takes (struct page *** pages) as third argument,
and after mapping, it moves (*pages) to point to (*pages +
nr_mappped_pages).

It looks like this kind of increment is useless to its caller these
days.  The callers don't care about the increments and actually they're
trying to avoid this by passing another copy to map_vm_area().

The caller can always guarantee all the pages can be mapped into vm_area
as specified in first argument and the caller only cares about whether
map_vm_area() fails or not.

This patch cleans up the pointer movement in map_vm_area() and updates
its callers accordingly.

Signed-off-by: WANG Chao <chaowang@redhat.com>
Cc: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Nitin Gupta <ngupta@vflare.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/tile/kernel/module.c        |  2 +-
 drivers/lguest/core.c            |  7 ++-----
 drivers/staging/android/binder.c |  4 +---
 include/linux/vmalloc.h          |  2 +-
 mm/vmalloc.c                     | 14 +++++---------
 mm/zsmalloc.c                    |  2 +-
 6 files changed, 11 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/arch/tile/kernel/module.c b/arch/tile/kernel/module.c
index 4918d91bc3a6..d19b13e3a59f 100644
--- a/arch/tile/kernel/module.c
+++ b/arch/tile/kernel/module.c
@@ -58,7 +58,7 @@ void *module_alloc(unsigned long size)
 	area->nr_pages = npages;
 	area->pages = pages;
 
-	if (map_vm_area(area, prot_rwx, &pages)) {
+	if (map_vm_area(area, prot_rwx, pages)) {
 		vunmap(area->addr);
 		goto error;
 	}
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index 0bf1e4edf04d..6590558d1d31 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -42,7 +42,6 @@ DEFINE_MUTEX(lguest_lock);
 static __init int map_switcher(void)
 {
 	int i, err;
-	struct page **pagep;
 
 	/*
 	 * Map the Switcher in to high memory.
@@ -110,11 +109,9 @@ static __init int map_switcher(void)
 	 * This code actually sets up the pages we've allocated to appear at
 	 * switcher_addr.  map_vm_area() takes the vma we allocated above, the
 	 * kind of pages we're mapping (kernel pages), and a pointer to our
-	 * array of struct pages.  It increments that pointer, but we don't
-	 * care.
+	 * array of struct pages.
 	 */
-	pagep = lg_switcher_pages;
-	err = map_vm_area(switcher_vma, PAGE_KERNEL_EXEC, &pagep);
+	err = map_vm_area(switcher_vma, PAGE_KERNEL_EXEC, lg_switcher_pages);
 	if (err) {
 		printk("lguest: map_vm_area failed: %i\n", err);
 		goto free_vma;
diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c
index 02b0379ae550..4f34dc0095b5 100644
--- a/drivers/staging/android/binder.c
+++ b/drivers/staging/android/binder.c
@@ -585,7 +585,6 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate,
 
 	for (page_addr = start; page_addr < end; page_addr += PAGE_SIZE) {
 		int ret;
-		struct page **page_array_ptr;
 
 		page = &proc->pages[(page_addr - proc->buffer) / PAGE_SIZE];
 
@@ -598,8 +597,7 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate,
 		}
 		tmp_area.addr = page_addr;
 		tmp_area.size = PAGE_SIZE + PAGE_SIZE /* guard page? */;
-		page_array_ptr = page;
-		ret = map_vm_area(&tmp_area, PAGE_KERNEL, &page_array_ptr);
+		ret = map_vm_area(&tmp_area, PAGE_KERNEL, page);
 		if (ret) {
 			pr_err("%d: binder_alloc_buf failed to map page at %p in kernel\n",
 			       proc->pid, page_addr);
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 4b8a89189a29..b87696fdf06a 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -113,7 +113,7 @@ extern struct vm_struct *remove_vm_area(const void *addr);
 extern struct vm_struct *find_vm_area(const void *addr);
 
 extern int map_vm_area(struct vm_struct *area, pgprot_t prot,
-			struct page ***pages);
+			struct page **pages);
 #ifdef CONFIG_MMU
 extern int map_kernel_range_noflush(unsigned long start, unsigned long size,
 				    pgprot_t prot, struct page **pages);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 9ec4173f48a8..2b0aa5486092 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1270,19 +1270,15 @@ void unmap_kernel_range(unsigned long addr, unsigned long size)
 }
 EXPORT_SYMBOL_GPL(unmap_kernel_range);
 
-int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
+int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page **pages)
 {
 	unsigned long addr = (unsigned long)area->addr;
 	unsigned long end = addr + get_vm_area_size(area);
 	int err;
 
-	err = vmap_page_range(addr, end, prot, *pages);
-	if (err > 0) {
-		*pages += err;
-		err = 0;
-	}
+	err = vmap_page_range(addr, end, prot, pages);
 
-	return err;
+	return err > 0 ? 0 : err;
 }
 EXPORT_SYMBOL_GPL(map_vm_area);
 
@@ -1548,7 +1544,7 @@ void *vmap(struct page **pages, unsigned int count,
 	if (!area)
 		return NULL;
 
-	if (map_vm_area(area, prot, &pages)) {
+	if (map_vm_area(area, prot, pages)) {
 		vunmap(area->addr);
 		return NULL;
 	}
@@ -1606,7 +1602,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 			cond_resched();
 	}
 
-	if (map_vm_area(area, prot, &pages))
+	if (map_vm_area(area, prot, pages))
 		goto fail;
 	return area->addr;
 
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index fe78189624cf..bb62a4adc328 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -690,7 +690,7 @@ static inline void __zs_cpu_down(struct mapping_area *area)
 static inline void *__zs_map_object(struct mapping_area *area,
 				struct page *pages[2], int off, int size)
 {
-	BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, &pages));
+	BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, pages));
 	area->vm_addr = area->vm->addr;
 	return area->vm_addr + off;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 3484b2de9499df23c4604a513b36f96326ae81ad Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Wed, 6 Aug 2014 16:07:14 -0700
Subject: mm: rearrange zone fields into read-only, page alloc, statistics and
 page reclaim lines

The arrangement of struct zone has changed over time and now it has
reached the point where there is some inappropriate sharing going on.
On x86-64 for example

o The zone->node field is shared with the zone lock and zone->node is
  accessed frequently from the page allocator due to the fair zone
  allocation policy.

o span_seqlock is almost never used by shares a line with free_area

o Some zone statistics share a cache line with the LRU lock so
  reclaim-intensive and allocator-intensive workloads can bounce the cache
  line on a stat update

This patch rearranges struct zone to put read-only and read-mostly
fields together and then splits the page allocator intensive fields, the
zone statistics and the page reclaim intensive fields into their own
cache lines.  Note that the type of lowmem_reserve changes due to the
watermark calculations being signed and avoiding a signed/unsigned
conversion there.

On the test configuration I used the overall size of struct zone shrunk
by one cache line.  On smaller machines, this is not likely to be
noticable.  However, on a 4-node NUMA machine running tiobench the
system CPU overhead is reduced by this patch.

          3.16.0-rc3  3.16.0-rc3
             vanillarearrange-v5r9
User          746.94      759.78
System      65336.22    58350.98
Elapsed     27553.52    27282.02

Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 211 +++++++++++++++++++++++++------------------------
 mm/page_alloc.c        |   7 +-
 mm/vmstat.c            |   4 +-
 3 files changed, 113 insertions(+), 109 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 559e659288fc..ed0876bb902c 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -324,18 +324,11 @@ enum zone_type {
 #ifndef __GENERATING_BOUNDS_H
 
 struct zone {
-	/* Fields commonly accessed by the page allocator */
+	/* Read-mostly fields */
 
 	/* zone watermarks, access with *_wmark_pages(zone) macros */
 	unsigned long watermark[NR_WMARK];
 
-	/*
-	 * When free pages are below this point, additional steps are taken
-	 * when reading the number of free pages to avoid per-cpu counter
-	 * drift allowing watermarks to be breached
-	 */
-	unsigned long percpu_drift_mark;
-
 	/*
 	 * We don't know if the memory that we're going to allocate will be freeable
 	 * or/and it will be released eventually, so to avoid totally wasting several
@@ -344,41 +337,26 @@ struct zone {
 	 * on the higher zones). This array is recalculated at runtime if the
 	 * sysctl_lowmem_reserve_ratio sysctl changes.
 	 */
-	unsigned long		lowmem_reserve[MAX_NR_ZONES];
-
-	/*
-	 * This is a per-zone reserve of pages that should not be
-	 * considered dirtyable memory.
-	 */
-	unsigned long		dirty_balance_reserve;
+	long lowmem_reserve[MAX_NR_ZONES];
 
 #ifdef CONFIG_NUMA
 	int node;
+#endif
+
 	/*
-	 * zone reclaim becomes active if more unmapped pages exist.
+	 * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
+	 * this zone's LRU.  Maintained by the pageout code.
 	 */
-	unsigned long		min_unmapped_pages;
-	unsigned long		min_slab_pages;
-#endif
+	unsigned int inactive_ratio;
+
+	struct pglist_data	*zone_pgdat;
 	struct per_cpu_pageset __percpu *pageset;
+
 	/*
-	 * free areas of different sizes
+	 * This is a per-zone reserve of pages that should not be
+	 * considered dirtyable memory.
 	 */
-	spinlock_t		lock;
-#if defined CONFIG_COMPACTION || defined CONFIG_CMA
-	/* Set to true when the PG_migrate_skip bits should be cleared */
-	bool			compact_blockskip_flush;
-
-	/* pfn where compaction free scanner should start */
-	unsigned long		compact_cached_free_pfn;
-	/* pfn where async and sync compaction migration scanner should start */
-	unsigned long		compact_cached_migrate_pfn[2];
-#endif
-#ifdef CONFIG_MEMORY_HOTPLUG
-	/* see spanned/present_pages for more description */
-	seqlock_t		span_seqlock;
-#endif
-	struct free_area	free_area[MAX_ORDER];
+	unsigned long		dirty_balance_reserve;
 
 #ifndef CONFIG_SPARSEMEM
 	/*
@@ -388,74 +366,14 @@ struct zone {
 	unsigned long		*pageblock_flags;
 #endif /* CONFIG_SPARSEMEM */
 
-#ifdef CONFIG_COMPACTION
-	/*
-	 * On compaction failure, 1<<compact_defer_shift compactions
-	 * are skipped before trying again. The number attempted since
-	 * last failure is tracked with compact_considered.
-	 */
-	unsigned int		compact_considered;
-	unsigned int		compact_defer_shift;
-	int			compact_order_failed;
-#endif
-
-	ZONE_PADDING(_pad1_)
-
-	/* Fields commonly accessed by the page reclaim scanner */
-	spinlock_t		lru_lock;
-	struct lruvec		lruvec;
-
-	/* Evictions & activations on the inactive file list */
-	atomic_long_t		inactive_age;
-
-	unsigned long		pages_scanned;	   /* since last reclaim */
-	unsigned long		flags;		   /* zone flags, see below */
-
-	/* Zone statistics */
-	atomic_long_t		vm_stat[NR_VM_ZONE_STAT_ITEMS];
-
-	/*
-	 * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
-	 * this zone's LRU.  Maintained by the pageout code.
-	 */
-	unsigned int inactive_ratio;
-
-
-	ZONE_PADDING(_pad2_)
-	/* Rarely used or read-mostly fields */
-
+#ifdef CONFIG_NUMA
 	/*
-	 * wait_table		-- the array holding the hash table
-	 * wait_table_hash_nr_entries	-- the size of the hash table array
-	 * wait_table_bits	-- wait_table_size == (1 << wait_table_bits)
-	 *
-	 * The purpose of all these is to keep track of the people
-	 * waiting for a page to become available and make them
-	 * runnable again when possible. The trouble is that this
-	 * consumes a lot of space, especially when so few things
-	 * wait on pages at a given time. So instead of using
-	 * per-page waitqueues, we use a waitqueue hash table.
-	 *
-	 * The bucket discipline is to sleep on the same queue when
-	 * colliding and wake all in that wait queue when removing.
-	 * When something wakes, it must check to be sure its page is
-	 * truly available, a la thundering herd. The cost of a
-	 * collision is great, but given the expected load of the
-	 * table, they should be so rare as to be outweighed by the
-	 * benefits from the saved space.
-	 *
-	 * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the
-	 * primary users of these fields, and in mm/page_alloc.c
-	 * free_area_init_core() performs the initialization of them.
+	 * zone reclaim becomes active if more unmapped pages exist.
 	 */
-	wait_queue_head_t	* wait_table;
-	unsigned long		wait_table_hash_nr_entries;
-	unsigned long		wait_table_bits;
+	unsigned long		min_unmapped_pages;
+	unsigned long		min_slab_pages;
+#endif /* CONFIG_NUMA */
 
-	/*
-	 * Discontig memory support fields.
-	 */
-	struct pglist_data	*zone_pgdat;
 	/* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
 	unsigned long		zone_start_pfn;
 
@@ -500,9 +418,11 @@ struct zone {
 	 * adjust_managed_page_count() should be used instead of directly
 	 * touching zone->managed_pages and totalram_pages.
 	 */
+	unsigned long		managed_pages;
 	unsigned long		spanned_pages;
 	unsigned long		present_pages;
-	unsigned long		managed_pages;
+
+	const char		*name;
 
 	/*
 	 * Number of MIGRATE_RESEVE page block. To maintain for just
@@ -510,10 +430,95 @@ struct zone {
 	 */
 	int			nr_migrate_reserve_block;
 
+#ifdef CONFIG_MEMORY_HOTPLUG
+	/* see spanned/present_pages for more description */
+	seqlock_t		span_seqlock;
+#endif
+
 	/*
-	 * rarely used fields:
+	 * wait_table		-- the array holding the hash table
+	 * wait_table_hash_nr_entries	-- the size of the hash table array
+	 * wait_table_bits	-- wait_table_size == (1 << wait_table_bits)
+	 *
+	 * The purpose of all these is to keep track of the people
+	 * waiting for a page to become available and make them
+	 * runnable again when possible. The trouble is that this
+	 * consumes a lot of space, especially when so few things
+	 * wait on pages at a given time. So instead of using
+	 * per-page waitqueues, we use a waitqueue hash table.
+	 *
+	 * The bucket discipline is to sleep on the same queue when
+	 * colliding and wake all in that wait queue when removing.
+	 * When something wakes, it must check to be sure its page is
+	 * truly available, a la thundering herd. The cost of a
+	 * collision is great, but given the expected load of the
+	 * table, they should be so rare as to be outweighed by the
+	 * benefits from the saved space.
+	 *
+	 * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the
+	 * primary users of these fields, and in mm/page_alloc.c
+	 * free_area_init_core() performs the initialization of them.
 	 */
-	const char		*name;
+	wait_queue_head_t	*wait_table;
+	unsigned long		wait_table_hash_nr_entries;
+	unsigned long		wait_table_bits;
+
+	ZONE_PADDING(_pad1_)
+
+	/* Write-intensive fields used from the page allocator */
+	spinlock_t		lock;
+
+	/* free areas of different sizes */
+	struct free_area	free_area[MAX_ORDER];
+
+	/* zone flags, see below */
+	unsigned long		flags;
+
+	ZONE_PADDING(_pad2_)
+
+	/* Write-intensive fields used by page reclaim */
+
+	/* Fields commonly accessed by the page reclaim scanner */
+	spinlock_t		lru_lock;
+	unsigned long		pages_scanned;	   /* since last reclaim */
+	struct lruvec		lruvec;
+
+	/* Evictions & activations on the inactive file list */
+	atomic_long_t		inactive_age;
+
+	/*
+	 * When free pages are below this point, additional steps are taken
+	 * when reading the number of free pages to avoid per-cpu counter
+	 * drift allowing watermarks to be breached
+	 */
+	unsigned long percpu_drift_mark;
+
+#if defined CONFIG_COMPACTION || defined CONFIG_CMA
+	/* pfn where compaction free scanner should start */
+	unsigned long		compact_cached_free_pfn;
+	/* pfn where async and sync compaction migration scanner should start */
+	unsigned long		compact_cached_migrate_pfn[2];
+#endif
+
+#ifdef CONFIG_COMPACTION
+	/*
+	 * On compaction failure, 1<<compact_defer_shift compactions
+	 * are skipped before trying again. The number attempted since
+	 * last failure is tracked with compact_considered.
+	 */
+	unsigned int		compact_considered;
+	unsigned int		compact_defer_shift;
+	int			compact_order_failed;
+#endif
+
+#if defined CONFIG_COMPACTION || defined CONFIG_CMA
+	/* Set to true when the PG_migrate_skip bits should be cleared */
+	bool			compact_blockskip_flush;
+#endif
+
+	ZONE_PADDING(_pad3_)
+	/* Zone statistics */
+	atomic_long_t		vm_stat[NR_VM_ZONE_STAT_ITEMS];
 } ____cacheline_internodealigned_in_smp;
 
 typedef enum {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0987ac9f0a4e..b7381d11f021 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1708,7 +1708,6 @@ static bool __zone_watermark_ok(struct zone *z, unsigned int order,
 {
 	/* free_pages my go negative - that's OK */
 	long min = mark;
-	long lowmem_reserve = z->lowmem_reserve[classzone_idx];
 	int o;
 	long free_cma = 0;
 
@@ -1723,7 +1722,7 @@ static bool __zone_watermark_ok(struct zone *z, unsigned int order,
 		free_cma = zone_page_state(z, NR_FREE_CMA_PAGES);
 #endif
 
-	if (free_pages - free_cma <= min + lowmem_reserve)
+	if (free_pages - free_cma <= min + z->lowmem_reserve[classzone_idx])
 		return false;
 	for (o = 0; o < order; o++) {
 		/* At the next order, this order's pages become unavailable */
@@ -3254,7 +3253,7 @@ void show_free_areas(unsigned int filter)
 			);
 		printk("lowmem_reserve[]:");
 		for (i = 0; i < MAX_NR_ZONES; i++)
-			printk(" %lu", zone->lowmem_reserve[i]);
+			printk(" %ld", zone->lowmem_reserve[i]);
 		printk("\n");
 	}
 
@@ -5575,7 +5574,7 @@ static void calculate_totalreserve_pages(void)
 	for_each_online_pgdat(pgdat) {
 		for (i = 0; i < MAX_NR_ZONES; i++) {
 			struct zone *zone = pgdat->node_zones + i;
-			unsigned long max = 0;
+			long max = 0;
 
 			/* Find valid and maximum lowmem_reserve in the zone */
 			for (j = i; j < MAX_NR_ZONES; j++) {
diff --git a/mm/vmstat.c b/mm/vmstat.c
index b37bd49bfd55..8267f77d1875 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1077,10 +1077,10 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
 				zone_page_state(zone, i));
 
 	seq_printf(m,
-		   "\n        protection: (%lu",
+		   "\n        protection: (%ld",
 		   zone->lowmem_reserve[0]);
 	for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
-		seq_printf(m, ", %lu", zone->lowmem_reserve[i]);
+		seq_printf(m, ", %ld", zone->lowmem_reserve[i]);
 	seq_printf(m,
 		   ")"
 		   "\n  pagesets");
-- 
cgit v1.2.3-59-g8ed1b


From 0d5d823ab4e608ec7b52ac4410de4cb74bbe0edd Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Wed, 6 Aug 2014 16:07:16 -0700
Subject: mm: move zone->pages_scanned into a vmstat counter

zone->pages_scanned is a write-intensive cache line during page reclaim
and it's also updated during page free.  Move the counter into vmstat to
take advantage of the per-cpu updates and do not update it in the free
paths unless necessary.

On a small UMA machine running tiobench the difference is marginal.  On
a 4-node machine the overhead is more noticable.  Note that automatic
NUMA balancing was disabled for this test as otherwise the system CPU
overhead is unpredictable.

          3.16.0-rc3  3.16.0-rc3  3.16.0-rc3
             vanillarearrange-v5   vmstat-v5
User          746.94      759.78      774.56
System      65336.22    58350.98    32847.27
Elapsed     27553.52    27282.02    27415.04

Note that the overhead reduction will vary depending on where exactly
pages are allocated and freed.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h |  2 +-
 mm/page_alloc.c        | 12 +++++++++---
 mm/vmscan.c            |  7 ++++---
 mm/vmstat.c            |  3 ++-
 4 files changed, 16 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index ed0876bb902c..0bd77f730b38 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -143,6 +143,7 @@ enum zone_stat_item {
 	NR_SHMEM,		/* shmem pages (included tmpfs/GEM pages) */
 	NR_DIRTIED,		/* page dirtyings since bootup */
 	NR_WRITTEN,		/* page writings since bootup */
+	NR_PAGES_SCANNED,	/* pages scanned since last reclaim */
 #ifdef CONFIG_NUMA
 	NUMA_HIT,		/* allocated in intended node */
 	NUMA_MISS,		/* allocated in non intended node */
@@ -480,7 +481,6 @@ struct zone {
 
 	/* Fields commonly accessed by the page reclaim scanner */
 	spinlock_t		lru_lock;
-	unsigned long		pages_scanned;	   /* since last reclaim */
 	struct lruvec		lruvec;
 
 	/* Evictions & activations on the inactive file list */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b7381d11f021..daa016063793 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -680,9 +680,12 @@ static void free_pcppages_bulk(struct zone *zone, int count,
 	int migratetype = 0;
 	int batch_free = 0;
 	int to_free = count;
+	unsigned long nr_scanned;
 
 	spin_lock(&zone->lock);
-	zone->pages_scanned = 0;
+	nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED);
+	if (nr_scanned)
+		__mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned);
 
 	while (to_free) {
 		struct page *page;
@@ -731,8 +734,11 @@ static void free_one_page(struct zone *zone,
 				unsigned int order,
 				int migratetype)
 {
+	unsigned long nr_scanned;
 	spin_lock(&zone->lock);
-	zone->pages_scanned = 0;
+	nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED);
+	if (nr_scanned)
+		__mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned);
 
 	__free_one_page(page, pfn, zone, order, migratetype);
 	if (unlikely(!is_migrate_isolate(migratetype)))
@@ -3248,7 +3254,7 @@ void show_free_areas(unsigned int filter)
 			K(zone_page_state(zone, NR_BOUNCE)),
 			K(zone_page_state(zone, NR_FREE_CMA_PAGES)),
 			K(zone_page_state(zone, NR_WRITEBACK_TEMP)),
-			zone->pages_scanned,
+			K(zone_page_state(zone, NR_PAGES_SCANNED)),
 			(!zone_reclaimable(zone) ? "yes" : "no")
 			);
 		printk("lowmem_reserve[]:");
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5fec1ba9951f..9c8222b499b4 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -174,7 +174,8 @@ static unsigned long zone_reclaimable_pages(struct zone *zone)
 
 bool zone_reclaimable(struct zone *zone)
 {
-	return zone->pages_scanned < zone_reclaimable_pages(zone) * 6;
+	return zone_page_state(zone, NR_PAGES_SCANNED) <
+		zone_reclaimable_pages(zone) * 6;
 }
 
 static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru)
@@ -1508,7 +1509,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
 	__mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken);
 
 	if (global_reclaim(sc)) {
-		zone->pages_scanned += nr_scanned;
+		__mod_zone_page_state(zone, NR_PAGES_SCANNED, nr_scanned);
 		if (current_is_kswapd())
 			__count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scanned);
 		else
@@ -1698,7 +1699,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
 	nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &l_hold,
 				     &nr_scanned, sc, isolate_mode, lru);
 	if (global_reclaim(sc))
-		zone->pages_scanned += nr_scanned;
+		__mod_zone_page_state(zone, NR_PAGES_SCANNED, nr_scanned);
 
 	reclaim_stat->recent_scanned[file] += nr_taken;
 
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 8267f77d1875..e574e883fa70 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -763,6 +763,7 @@ const char * const vmstat_text[] = {
 	"nr_shmem",
 	"nr_dirtied",
 	"nr_written",
+	"nr_pages_scanned",
 
 #ifdef CONFIG_NUMA
 	"numa_hit",
@@ -1067,7 +1068,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
 		   min_wmark_pages(zone),
 		   low_wmark_pages(zone),
 		   high_wmark_pages(zone),
-		   zone->pages_scanned,
+		   zone_page_state(zone, NR_PAGES_SCANNED),
 		   zone->spanned_pages,
 		   zone->present_pages,
 		   zone->managed_pages);
-- 
cgit v1.2.3-59-g8ed1b


From 4ffeaf3560a52b4a69cc7909873d08c0ef5909d4 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Wed, 6 Aug 2014 16:07:22 -0700
Subject: mm: page_alloc: reduce cost of the fair zone allocation policy

The fair zone allocation policy round-robins allocations between zones
within a node to avoid age inversion problems during reclaim.  If the
first allocation fails, the batch counts are reset and a second attempt
made before entering the slow path.

One assumption made with this scheme is that batches expire at roughly
the same time and the resets each time are justified.  This assumption
does not hold when zones reach their low watermark as the batches will
be consumed at uneven rates.  Allocation failure due to watermark
depletion result in additional zonelist scans for the reset and another
watermark check before hitting the slowpath.

On UMA, the benefit is negligible -- around 0.25%.  On 4-socket NUMA
machine it's variable due to the variability of measuring overhead with
the vmstat changes.  The system CPU overhead comparison looks like

          3.16.0-rc3  3.16.0-rc3  3.16.0-rc3
             vanilla   vmstat-v5 lowercost-v5
User          746.94      774.56      802.00
System      65336.22    32847.27    40852.33
Elapsed     27553.52    27415.04    27368.46

However it is worth noting that the overall benchmark still completed
faster and intuitively it makes sense to take as few passes as possible
through the zonelists.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h |   6 +++
 mm/page_alloc.c        | 101 ++++++++++++++++++++++++++-----------------------
 2 files changed, 59 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 0bd77f730b38..318df7051850 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -534,6 +534,7 @@ typedef enum {
 	ZONE_WRITEBACK,			/* reclaim scanning has recently found
 					 * many pages under writeback
 					 */
+	ZONE_FAIR_DEPLETED,		/* fair zone policy batch depleted */
 } zone_flags_t;
 
 static inline void zone_set_flag(struct zone *zone, zone_flags_t flag)
@@ -571,6 +572,11 @@ static inline int zone_is_reclaim_locked(const struct zone *zone)
 	return test_bit(ZONE_RECLAIM_LOCKED, &zone->flags);
 }
 
+static inline int zone_is_fair_depleted(const struct zone *zone)
+{
+	return test_bit(ZONE_FAIR_DEPLETED, &zone->flags);
+}
+
 static inline int zone_is_oom_locked(const struct zone *zone)
 {
 	return test_bit(ZONE_OOM_LOCKED, &zone->flags);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6e5e8f762532..fb9908148474 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1612,6 +1612,9 @@ again:
 	}
 
 	__mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
+	if (zone_page_state(zone, NR_ALLOC_BATCH) == 0 &&
+	    !zone_is_fair_depleted(zone))
+		zone_set_flag(zone, ZONE_FAIR_DEPLETED);
 
 	__count_zone_vm_events(PGALLOC, zone, 1 << order);
 	zone_statistics(preferred_zone, zone, gfp_flags);
@@ -1923,6 +1926,18 @@ static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
 
 #endif	/* CONFIG_NUMA */
 
+static void reset_alloc_batches(struct zone *preferred_zone)
+{
+	struct zone *zone = preferred_zone->zone_pgdat->node_zones;
+
+	do {
+		mod_zone_page_state(zone, NR_ALLOC_BATCH,
+			high_wmark_pages(zone) - low_wmark_pages(zone) -
+			atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));
+		zone_clear_flag(zone, ZONE_FAIR_DEPLETED);
+	} while (zone++ != preferred_zone);
+}
+
 /*
  * get_page_from_freelist goes through the zonelist trying to allocate
  * a page.
@@ -1940,8 +1955,12 @@ get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order,
 	int did_zlc_setup = 0;		/* just call zlc_setup() one time */
 	bool consider_zone_dirty = (alloc_flags & ALLOC_WMARK_LOW) &&
 				(gfp_mask & __GFP_WRITE);
+	int nr_fair_skipped = 0;
+	bool zonelist_rescan;
 
 zonelist_scan:
+	zonelist_rescan = false;
+
 	/*
 	 * Scan zonelist, looking for a zone with enough free.
 	 * See also __cpuset_node_allowed_softwall() comment in kernel/cpuset.c.
@@ -1966,8 +1985,10 @@ zonelist_scan:
 		if (alloc_flags & ALLOC_FAIR) {
 			if (!zone_local(preferred_zone, zone))
 				break;
-			if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0)
+			if (zone_is_fair_depleted(zone)) {
+				nr_fair_skipped++;
 				continue;
+			}
 		}
 		/*
 		 * When allocating a page cache page for writing, we
@@ -2073,13 +2094,7 @@ this_zone_full:
 			zlc_mark_zone_full(zonelist, z);
 	}
 
-	if (unlikely(IS_ENABLED(CONFIG_NUMA) && page == NULL && zlc_active)) {
-		/* Disable zlc cache for second zonelist scan */
-		zlc_active = 0;
-		goto zonelist_scan;
-	}
-
-	if (page)
+	if (page) {
 		/*
 		 * page->pfmemalloc is set when ALLOC_NO_WATERMARKS was
 		 * necessary to allocate the page. The expectation is
@@ -2088,8 +2103,37 @@ this_zone_full:
 		 * for !PFMEMALLOC purposes.
 		 */
 		page->pfmemalloc = !!(alloc_flags & ALLOC_NO_WATERMARKS);
+		return page;
+	}
 
-	return page;
+	/*
+	 * The first pass makes sure allocations are spread fairly within the
+	 * local node.  However, the local node might have free pages left
+	 * after the fairness batches are exhausted, and remote zones haven't
+	 * even been considered yet.  Try once more without fairness, and
+	 * include remote zones now, before entering the slowpath and waking
+	 * kswapd: prefer spilling to a remote zone over swapping locally.
+	 */
+	if (alloc_flags & ALLOC_FAIR) {
+		alloc_flags &= ~ALLOC_FAIR;
+		if (nr_fair_skipped) {
+			zonelist_rescan = true;
+			reset_alloc_batches(preferred_zone);
+		}
+		if (nr_online_nodes > 1)
+			zonelist_rescan = true;
+	}
+
+	if (unlikely(IS_ENABLED(CONFIG_NUMA) && zlc_active)) {
+		/* Disable zlc cache for second zonelist scan */
+		zlc_active = 0;
+		zonelist_rescan = true;
+	}
+
+	if (zonelist_rescan)
+		goto zonelist_scan;
+
+	return NULL;
 }
 
 /*
@@ -2410,28 +2454,6 @@ __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order,
 	return page;
 }
 
-static void reset_alloc_batches(struct zonelist *zonelist,
-				enum zone_type high_zoneidx,
-				struct zone *preferred_zone)
-{
-	struct zoneref *z;
-	struct zone *zone;
-
-	for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
-		/*
-		 * Only reset the batches of zones that were actually
-		 * considered in the fairness pass, we don't want to
-		 * trash fairness information for zones that are not
-		 * actually part of this zonelist's round-robin cycle.
-		 */
-		if (!zone_local(preferred_zone, zone))
-			continue;
-		mod_zone_page_state(zone, NR_ALLOC_BATCH,
-			high_wmark_pages(zone) - low_wmark_pages(zone) -
-			atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));
-	}
-}
-
 static void wake_all_kswapds(unsigned int order,
 			     struct zonelist *zonelist,
 			     enum zone_type high_zoneidx,
@@ -2767,28 +2789,11 @@ retry_cpuset:
 	if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
 		alloc_flags |= ALLOC_CMA;
 #endif
-retry:
 	/* First allocation attempt */
 	page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
 			zonelist, high_zoneidx, alloc_flags,
 			preferred_zone, classzone_idx, migratetype);
 	if (unlikely(!page)) {
-		/*
-		 * The first pass makes sure allocations are spread
-		 * fairly within the local node.  However, the local
-		 * node might have free pages left after the fairness
-		 * batches are exhausted, and remote zones haven't
-		 * even been considered yet.  Try once more without
-		 * fairness, and include remote zones now, before
-		 * entering the slowpath and waking kswapd: prefer
-		 * spilling to a remote zone over swapping locally.
-		 */
-		if (alloc_flags & ALLOC_FAIR) {
-			reset_alloc_batches(zonelist, high_zoneidx,
-					    preferred_zone);
-			alloc_flags &= ~ALLOC_FAIR;
-			goto retry;
-		}
 		/*
 		 * Runtime PM, block IO and its error handling path
 		 * can deadlock because I/O on the device might not
-- 
cgit v1.2.3-59-g8ed1b


From 9a95f3cf7b33d66fa64727cff8cd2f2a9d09f335 Mon Sep 17 00:00:00 2001
From: Paul Cassella <cassella@cray.com>
Date: Wed, 6 Aug 2014 16:07:24 -0700
Subject: mm: describe mmap_sem rules for __lock_page_or_retry() and callers

Add a comment describing the circumstances in which
__lock_page_or_retry() will or will not release the mmap_sem when
returning 0.

Add comments to lock_page_or_retry()'s callers (filemap_fault(),
do_swap_page()) noting the impact on VM_FAULT_RETRY returns.

Add comments on up the call tree, particularly replacing the false "We
return with mmap_sem still held" comments.

Signed-off-by: Paul Cassella <cassella@cray.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/mm/fault.c     |  3 ++-
 include/linux/pagemap.h |  3 +++
 mm/filemap.c            | 23 +++++++++++++++++++++++
 mm/gup.c                | 18 +++++++++++++++---
 mm/memory.c             | 34 +++++++++++++++++++++++++++++++---
 mm/mlock.c              |  9 ++++++++-
 6 files changed, 82 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 1dbade870f90..a24194681513 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1218,7 +1218,8 @@ good_area:
 	/*
 	 * If for any reason at all we couldn't handle the fault,
 	 * make sure we exit gracefully rather than endlessly redo
-	 * the fault:
+	 * the fault.  Since we never set FAULT_FLAG_RETRY_NOWAIT, if
+	 * we get VM_FAULT_RETRY back, the mmap_sem has been unlocked.
 	 */
 	fault = handle_mm_fault(mm, vma, address, flags);
 
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index e1474ae18c88..3df8c7db7a4e 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -484,6 +484,9 @@ static inline int lock_page_killable(struct page *page)
 /*
  * lock_page_or_retry - Lock the page, unless this would block and the
  * caller indicated that it can handle a retry.
+ *
+ * Return value and mmap_sem implications depend on flags; see
+ * __lock_page_or_retry().
  */
 static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm,
 				     unsigned int flags)
diff --git a/mm/filemap.c b/mm/filemap.c
index 7e85c8147e1b..af19a6b079f5 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -808,6 +808,17 @@ int __lock_page_killable(struct page *page)
 }
 EXPORT_SYMBOL_GPL(__lock_page_killable);
 
+/*
+ * Return values:
+ * 1 - page is locked; mmap_sem is still held.
+ * 0 - page is not locked.
+ *     mmap_sem has been released (up_read()), unless flags had both
+ *     FAULT_FLAG_ALLOW_RETRY and FAULT_FLAG_RETRY_NOWAIT set, in
+ *     which case mmap_sem is still held.
+ *
+ * If neither ALLOW_RETRY nor KILLABLE are set, will always return 1
+ * with the page locked and the mmap_sem unperturbed.
+ */
 int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
 			 unsigned int flags)
 {
@@ -1827,6 +1838,18 @@ static void do_async_mmap_readahead(struct vm_area_struct *vma,
  * The goto's are kind of ugly, but this streamlines the normal case of having
  * it in the page cache, and handles the special cases reasonably without
  * having a lot of duplicated code.
+ *
+ * vma->vm_mm->mmap_sem must be held on entry.
+ *
+ * If our return value has VM_FAULT_RETRY set, it's because
+ * lock_page_or_retry() returned 0.
+ * The mmap_sem has usually been released in this case.
+ * See __lock_page_or_retry() for the exception.
+ *
+ * If our return value does not have VM_FAULT_RETRY set, the mmap_sem
+ * has not been released.
+ *
+ * We never return with VM_FAULT_RETRY and a bit from VM_FAULT_ERROR set.
  */
 int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
diff --git a/mm/gup.c b/mm/gup.c
index cc5a9e7adea7..91d044b1600d 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -258,6 +258,11 @@ unmap:
 	return ret;
 }
 
+/*
+ * mmap_sem must be held on entry.  If @nonblocking != NULL and
+ * *@flags does not include FOLL_NOWAIT, the mmap_sem may be released.
+ * If it is, *@nonblocking will be set to 0 and -EBUSY returned.
+ */
 static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
 		unsigned long address, unsigned int *flags, int *nonblocking)
 {
@@ -373,7 +378,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
  * with a put_page() call when it is finished with. vmas will only
  * remain valid while mmap_sem is held.
  *
- * Must be called with mmap_sem held for read or write.
+ * Must be called with mmap_sem held.  It may be released.  See below.
  *
  * __get_user_pages walks a process's page tables and takes a reference to
  * each struct page that each user address corresponds to at a given
@@ -396,7 +401,14 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
  *
  * If @nonblocking != NULL, __get_user_pages will not wait for disk IO
  * or mmap_sem contention, and if waiting is needed to pin all pages,
- * *@nonblocking will be set to 0.
+ * *@nonblocking will be set to 0.  Further, if @gup_flags does not
+ * include FOLL_NOWAIT, the mmap_sem will be released via up_read() in
+ * this case.
+ *
+ * A caller using such a combination of @nonblocking and @gup_flags
+ * must therefore hold the mmap_sem for reading only, and recognize
+ * when it's been released.  Otherwise, it must be held for either
+ * reading or writing and will not be released.
  *
  * In most cases, get_user_pages or get_user_pages_fast should be used
  * instead of __get_user_pages. __get_user_pages should be used only if
@@ -528,7 +540,7 @@ EXPORT_SYMBOL(__get_user_pages);
  * such architectures, gup() will not be enough to make a subsequent access
  * succeed.
  *
- * This should be called with the mm_sem held for read.
+ * This has the same semantics wrt the @mm->mmap_sem as does filemap_fault().
  */
 int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
 		     unsigned long address, unsigned int fault_flags)
diff --git a/mm/memory.c b/mm/memory.c
index 7e131325bdf8..4d0a543f3bb3 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2399,7 +2399,10 @@ EXPORT_SYMBOL(unmap_mapping_range);
 /*
  * We enter with non-exclusive mmap_sem (to exclude vma changes,
  * but allow concurrent faults), and pte mapped but not yet locked.
- * We return with mmap_sem still held, but pte unmapped and unlocked.
+ * We return with pte unmapped and unlocked.
+ *
+ * We return with the mmap_sem locked or unlocked in the same cases
+ * as does filemap_fault().
  */
 static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		unsigned long address, pte_t *page_table, pmd_t *pmd,
@@ -2688,6 +2691,11 @@ oom:
 	return VM_FAULT_OOM;
 }
 
+/*
+ * The mmap_sem must have been held on entry, and may have been
+ * released depending on flags and vma->vm_ops->fault() return value.
+ * See filemap_fault() and __lock_page_retry().
+ */
 static int __do_fault(struct vm_area_struct *vma, unsigned long address,
 		pgoff_t pgoff, unsigned int flags, struct page **page)
 {
@@ -3016,6 +3024,12 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	return ret;
 }
 
+/*
+ * We enter with non-exclusive mmap_sem (to exclude vma changes,
+ * but allow concurrent faults).
+ * The mmap_sem may have been released depending on flags and our
+ * return value.  See filemap_fault() and __lock_page_or_retry().
+ */
 static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 		unsigned long address, pte_t *page_table, pmd_t *pmd,
 		unsigned int flags, pte_t orig_pte)
@@ -3040,7 +3054,9 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
  *
  * We enter with non-exclusive mmap_sem (to exclude vma changes,
  * but allow concurrent faults), and pte mapped but not yet locked.
- * We return with mmap_sem still held, but pte unmapped and unlocked.
+ * We return with pte unmapped and unlocked.
+ * The mmap_sem may have been released depending on flags and our
+ * return value.  See filemap_fault() and __lock_page_or_retry().
  */
 static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 		unsigned long address, pte_t *page_table, pmd_t *pmd,
@@ -3172,7 +3188,10 @@ out:
  *
  * We enter with non-exclusive mmap_sem (to exclude vma changes,
  * but allow concurrent faults), and pte mapped but not yet locked.
- * We return with mmap_sem still held, but pte unmapped and unlocked.
+ * We return with pte unmapped and unlocked.
+ *
+ * The mmap_sem may have been released depending on flags and our
+ * return value.  See filemap_fault() and __lock_page_or_retry().
  */
 static int handle_pte_fault(struct mm_struct *mm,
 		     struct vm_area_struct *vma, unsigned long address,
@@ -3232,6 +3251,9 @@ unlock:
 
 /*
  * By the time we get here, we already hold the mm semaphore
+ *
+ * The mmap_sem may have been released depending on flags and our
+ * return value.  See filemap_fault() and __lock_page_or_retry().
  */
 static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 			     unsigned long address, unsigned int flags)
@@ -3313,6 +3335,12 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	return handle_pte_fault(mm, vma, address, pte, pmd, flags);
 }
 
+/*
+ * By the time we get here, we already hold the mm semaphore
+ *
+ * The mmap_sem may have been released depending on flags and our
+ * return value.  See filemap_fault() and __lock_page_or_retry().
+ */
 int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 		    unsigned long address, unsigned int flags)
 {
diff --git a/mm/mlock.c b/mm/mlock.c
index b1eb53634005..ce84cb0b83ef 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -210,12 +210,19 @@ out:
  * @vma:   target vma
  * @start: start address
  * @end:   end address
+ * @nonblocking:
  *
  * This takes care of making the pages present too.
  *
  * return 0 on success, negative error code on error.
  *
- * vma->vm_mm->mmap_sem must be held for at least read.
+ * vma->vm_mm->mmap_sem must be held.
+ *
+ * If @nonblocking is NULL, it may be held for read or write and will
+ * be unperturbed.
+ *
+ * If @nonblocking is non-NULL, it must held for read only and may be
+ * released.  If it's released, *@nonblocking will be set to 0.
  */
 long __mlock_vma_pages_range(struct vm_area_struct *vma,
 		unsigned long start, unsigned long end, int *nonblocking)
-- 
cgit v1.2.3-59-g8ed1b


From 6326440077a48d2c3b2993f3b3f2d969f09b6917 Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Wed, 6 Aug 2014 16:07:36 -0700
Subject: memory-hotplug: add zone_for_memory() for selecting zone for new
 memory

This series of patches fixes a problem when adding memory in bad manner.
For example: for a x86_64 machine booted with "mem=400M" and with 2GiB
memory installed, following commands cause problem:

  # echo 0x40000000 > /sys/devices/system/memory/probe
 [   28.613895] init_memory_mapping: [mem 0x40000000-0x47ffffff]
  # echo 0x48000000 > /sys/devices/system/memory/probe
 [   28.693675] init_memory_mapping: [mem 0x48000000-0x4fffffff]
  # echo online_movable > /sys/devices/system/memory/memory9/state
  # echo 0x50000000 > /sys/devices/system/memory/probe
 [   29.084090] init_memory_mapping: [mem 0x50000000-0x57ffffff]
  # echo 0x58000000 > /sys/devices/system/memory/probe
 [   29.151880] init_memory_mapping: [mem 0x58000000-0x5fffffff]
  # echo online_movable > /sys/devices/system/memory/memory11/state
  # echo online> /sys/devices/system/memory/memory8/state
  # echo online> /sys/devices/system/memory/memory10/state
  # echo offline> /sys/devices/system/memory/memory9/state
 [   30.558819] Offlined Pages 32768
  # free
              total       used       free     shared    buffers     cached
 Mem:        780588 18014398509432020     830552          0          0      51180
 -/+ buffers/cache: 18014398509380840     881732
 Swap:            0          0          0

This is because the above commands probe higher memory after online a
section with online_movable, which causes ZONE_HIGHMEM (or ZONE_NORMAL
for systems without ZONE_HIGHMEM) overlaps ZONE_MOVABLE.

After the second online_movable, the problem can be observed from
zoneinfo:

  # cat /proc/zoneinfo
  ...
  Node 0, zone  Movable
    pages free     65491
          min      250
          low      312
          high     375
          scanned  0
          spanned  18446744073709518848
          present  65536
          managed  65536
  ...

This series of patches solve the problem by checking ZONE_MOVABLE when
choosing zone for new memory.  If new memory is inside or higher than
ZONE_MOVABLE, makes it go there instead.

After applying this series of patches, following are free and zoneinfo
result (after offlining memory9):

  bash-4.2# free
                total       used       free     shared    buffers     cached
   Mem:        780956      80112     700844          0          0      51180
   -/+ buffers/cache:      28932     752024
   Swap:            0          0          0

  bash-4.2# cat /proc/zoneinfo

  Node 0, zone      DMA
    pages free     3389
          min      14
          low      17
          high     21
          scanned  0
          spanned  4095
          present  3998
          managed  3977
      nr_free_pages 3389
  ...
    start_pfn:         1
    inactive_ratio:    1
  Node 0, zone    DMA32
    pages free     73724
          min      341
          low      426
          high     511
          scanned  0
          spanned  98304
          present  98304
          managed  92958
      nr_free_pages 73724
    ...
    start_pfn:         4096
    inactive_ratio:    1
  Node 0, zone   Normal
    pages free     32630
          min      120
          low      150
          high     180
          scanned  0
          spanned  32768
          present  32768
          managed  32768
      nr_free_pages 32630
  ...
    start_pfn:         262144
    inactive_ratio:    1
  Node 0, zone  Movable
    pages free     65476
          min      241
          low      301
          high     361
          scanned  0
          spanned  98304
          present  65536
          managed  65536
      nr_free_pages 65476
  ...
    start_pfn:         294912
    inactive_ratio:    1

This patch (of 7):

Introduce zone_for_memory() in arch independent code for
arch_add_memory() use.

Many arch_add_memory() function simply selects ZONE_HIGHMEM or
ZONE_NORMAL and add new memory into it.  However, with the existance of
ZONE_MOVABLE, the selection method should be carefully considered: if
new, higher memory is added after ZONE_MOVABLE is setup, the default
zone and ZONE_MOVABLE may overlap each other.

should_add_memory_movable() checks the status of ZONE_MOVABLE.  If it
has already contain memory, compare the address of new memory and
movable memory.  If new memory is higher than movable, it should be
added into ZONE_MOVABLE instead of default zone.

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Cc: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: "Mel Gorman" <mgorman@suse.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memory_hotplug.h |  1 +
 mm/memory_hotplug.c            | 28 ++++++++++++++++++++++++++++
 2 files changed, 29 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 79dd9eca054f..d9524c49d767 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -259,6 +259,7 @@ static inline void remove_memory(int nid, u64 start, u64 size) {}
 extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
 		void *arg, int (*func)(struct memory_block *, void *));
 extern int add_memory(int nid, u64 start, u64 size);
+extern int zone_for_memory(int nid, u64 start, u64 size, int zone_default);
 extern int arch_add_memory(int nid, u64 start, u64 size);
 extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
 extern bool is_memblock_offlined(struct memory_block *mem);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index a3797d3fd8a4..2ff8c2325e96 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1159,6 +1159,34 @@ static int check_hotplug_memory_range(u64 start, u64 size)
 	return 0;
 }
 
+/*
+ * If movable zone has already been setup, newly added memory should be check.
+ * If its address is higher than movable zone, it should be added as movable.
+ * Without this check, movable zone may overlap with other zone.
+ */
+static int should_add_memory_movable(int nid, u64 start, u64 size)
+{
+	unsigned long start_pfn = start >> PAGE_SHIFT;
+	pg_data_t *pgdat = NODE_DATA(nid);
+	struct zone *movable_zone = pgdat->node_zones + ZONE_MOVABLE;
+
+	if (zone_is_empty(movable_zone))
+		return 0;
+
+	if (movable_zone->zone_start_pfn <= start_pfn)
+		return 1;
+
+	return 0;
+}
+
+int zone_for_memory(int nid, u64 start, u64 size, int zone_default)
+{
+	if (should_add_memory_movable(nid, start, size))
+		return ZONE_MOVABLE;
+
+	return zone_default;
+}
+
 /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
 int __ref add_memory(int nid, u64 start, u64 size)
 {
-- 
cgit v1.2.3-59-g8ed1b


From 8d060bf490930f305c4efc45724e861a268f4d2f Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Wed, 6 Aug 2014 16:07:50 -0700
Subject: mm, oom: ensure memoryless node zonelist always includes zones

With memoryless node support being worked on, it's possible that for
optimizations that a node may not have a non-NULL zonelist.  When
CONFIG_NUMA is enabled and node 0 is memoryless, this means the zonelist
for first_online_node may become NULL.

The oom killer requires a zonelist that includes all memory zones for
the sysrq trigger and pagefault out of memory handler.

Ensure that a non-NULL zonelist is always passed to the oom killer.

[akpm@linux-foundation.org: fix non-numa build]
Signed-off-by: David Rientjes <rientjes@google.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/tty/sysrq.c      |  2 +-
 include/linux/nodemask.h | 11 ++++++++++-
 mm/oom_kill.c            |  2 +-
 3 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index 454b65898e2c..42bad18c66c9 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -355,7 +355,7 @@ static struct sysrq_key_op sysrq_term_op = {
 
 static void moom_callback(struct work_struct *ignored)
 {
-	out_of_memory(node_zonelist(first_online_node, GFP_KERNEL), GFP_KERNEL,
+	out_of_memory(node_zonelist(first_memory_node, GFP_KERNEL), GFP_KERNEL,
 		      0, NULL, true);
 }
 
diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index 58b9a02c38d2..83a6aeda899d 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -430,7 +430,15 @@ static inline int num_node_state(enum node_states state)
 	for_each_node_mask((__node), node_states[__state])
 
 #define first_online_node	first_node(node_states[N_ONLINE])
-#define next_online_node(nid)	next_node((nid), node_states[N_ONLINE])
+#define first_memory_node	first_node(node_states[N_MEMORY])
+static inline int next_online_node(int nid)
+{
+	return next_node(nid, node_states[N_ONLINE]);
+}
+static inline int next_memory_node(int nid)
+{
+	return next_node(nid, node_states[N_MEMORY]);
+}
 
 extern int nr_node_ids;
 extern int nr_online_nodes;
@@ -471,6 +479,7 @@ static inline int num_node_state(enum node_states state)
 	for ( (node) = 0; (node) == 0; (node) = 1)
 
 #define first_online_node	0
+#define first_memory_node	0
 #define next_online_node(nid)	(MAX_NUMNODES)
 #define nr_node_ids		1
 #define nr_online_nodes		1
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 3291e82d4352..b0a1e1ff0353 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -694,7 +694,7 @@ void pagefault_out_of_memory(void)
 	if (mem_cgroup_oom_synchronize(true))
 		return;
 
-	zonelist = node_zonelist(first_online_node, GFP_KERNEL);
+	zonelist = node_zonelist(first_memory_node, GFP_KERNEL);
 	if (try_set_zonelist_oom(zonelist, GFP_KERNEL)) {
 		out_of_memory(NULL, 0, 0, NULL, false);
 		clear_zonelist_oom(zonelist, GFP_KERNEL);
-- 
cgit v1.2.3-59-g8ed1b


From e972a070e2d3296cd2e2cc2fd0561ce89a1d5ebf Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Wed, 6 Aug 2014 16:07:52 -0700
Subject: mm, oom: rename zonelist locking functions

try_set_zonelist_oom() and clear_zonelist_oom() are not named properly
to imply that they require locking semantics to avoid out_of_memory()
being reordered.

zone_scan_lock is required for both functions to ensure that there is
proper locking synchronization.

Rename try_set_zonelist_oom() to oom_zonelist_trylock() and rename
clear_zonelist_oom() to oom_zonelist_unlock() to imply there is proper
locking semantics.

At the same time, convert oom_zonelist_trylock() to return bool instead
of int since only success and failure are tested.

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/oom.h |  4 ++--
 mm/oom_kill.c       | 30 +++++++++++++-----------------
 mm/page_alloc.c     |  6 +++---
 3 files changed, 18 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/oom.h b/include/linux/oom.h
index 4cd62677feb9..647395a1a550 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -55,8 +55,8 @@ extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 			     struct mem_cgroup *memcg, nodemask_t *nodemask,
 			     const char *message);
 
-extern int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);
-extern void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);
+extern bool oom_zonelist_trylock(struct zonelist *zonelist, gfp_t gfp_flags);
+extern void oom_zonelist_unlock(struct zonelist *zonelist, gfp_t gfp_flags);
 
 extern void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
 			       int order, const nodemask_t *nodemask);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index b0a1e1ff0353..d33aca1552ad 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -559,28 +559,25 @@ EXPORT_SYMBOL_GPL(unregister_oom_notifier);
  * if a parallel OOM killing is already taking place that includes a zone in
  * the zonelist.  Otherwise, locks all zones in the zonelist and returns 1.
  */
-int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask)
+bool oom_zonelist_trylock(struct zonelist *zonelist, gfp_t gfp_mask)
 {
 	struct zoneref *z;
 	struct zone *zone;
-	int ret = 1;
+	bool ret = true;
 
 	spin_lock(&zone_scan_lock);
-	for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
+	for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask))
 		if (zone_is_oom_locked(zone)) {
-			ret = 0;
+			ret = false;
 			goto out;
 		}
-	}
 
-	for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
-		/*
-		 * Lock each zone in the zonelist under zone_scan_lock so a
-		 * parallel invocation of try_set_zonelist_oom() doesn't succeed
-		 * when it shouldn't.
-		 */
+	/*
+	 * Lock each zone in the zonelist under zone_scan_lock so a parallel
+	 * call to oom_zonelist_trylock() doesn't succeed when it shouldn't.
+	 */
+	for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask))
 		zone_set_flag(zone, ZONE_OOM_LOCKED);
-	}
 
 out:
 	spin_unlock(&zone_scan_lock);
@@ -592,15 +589,14 @@ out:
  * allocation attempts with zonelists containing them may now recall the OOM
  * killer, if necessary.
  */
-void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask)
+void oom_zonelist_unlock(struct zonelist *zonelist, gfp_t gfp_mask)
 {
 	struct zoneref *z;
 	struct zone *zone;
 
 	spin_lock(&zone_scan_lock);
-	for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
+	for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask))
 		zone_clear_flag(zone, ZONE_OOM_LOCKED);
-	}
 	spin_unlock(&zone_scan_lock);
 }
 
@@ -695,8 +691,8 @@ void pagefault_out_of_memory(void)
 		return;
 
 	zonelist = node_zonelist(first_memory_node, GFP_KERNEL);
-	if (try_set_zonelist_oom(zonelist, GFP_KERNEL)) {
+	if (oom_zonelist_trylock(zonelist, GFP_KERNEL)) {
 		out_of_memory(NULL, 0, 0, NULL, false);
-		clear_zonelist_oom(zonelist, GFP_KERNEL);
+		oom_zonelist_unlock(zonelist, GFP_KERNEL);
 	}
 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index fb9908148474..578236089ec1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2246,8 +2246,8 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
 {
 	struct page *page;
 
-	/* Acquire the OOM killer lock for the zones in zonelist */
-	if (!try_set_zonelist_oom(zonelist, gfp_mask)) {
+	/* Acquire the per-zone oom lock for each zone */
+	if (!oom_zonelist_trylock(zonelist, gfp_mask)) {
 		schedule_timeout_uninterruptible(1);
 		return NULL;
 	}
@@ -2285,7 +2285,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
 	out_of_memory(zonelist, gfp_mask, order, nodemask, false);
 
 out:
-	clear_zonelist_oom(zonelist, gfp_mask);
+	oom_zonelist_unlock(zonelist, gfp_mask);
 	return page;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 1d352bfd41e8219cdf9bebe79677700bdc38b540 Mon Sep 17 00:00:00 2001
From: Chintan Pandya <cpandya@codeaurora.org>
Date: Wed, 6 Aug 2014 16:08:18 -0700
Subject: mm: BUG when __kmap_atomic_idx equals KM_TYPE_NR

__kmap_atomic_idx is per_cpu variable.  Each CPU can use KM_TYPE_NR
entries from FIXMAP i.e.  from 0 to KM_TYPE_NR - 1.  Allowing
__kmap_atomic_idx to over- shoot to KM_TYPE_NR can mess up with next
CPU's 0th entry which is a bug.  Hence BUG_ON if __kmap_atomic_idx >=
KM_TYPE_NR.

Fix the off-by-on in this test.

Signed-off-by: Chintan Pandya <cpandya@codeaurora.org>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/highmem.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 7fb31da45d03..9286a46b7d69 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -93,7 +93,7 @@ static inline int kmap_atomic_idx_push(void)
 
 #ifdef CONFIG_DEBUG_HIGHMEM
 	WARN_ON_ONCE(in_irq() && !irqs_disabled());
-	BUG_ON(idx > KM_TYPE_NR);
+	BUG_ON(idx >= KM_TYPE_NR);
 #endif
 	return idx;
 }
-- 
cgit v1.2.3-59-g8ed1b


From b972216e27d1c853eced33f8638926636c606341 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 6 Aug 2014 16:08:20 -0700
Subject: mmu_notifier: add call_srcu and sync function for listener to delay
 call and sync
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When kernel device drivers or subsystems want to bind their lifespan to
t= he lifespan of the mm_struct, they usually use one of the following
methods:

1. Manually calling a function in the interested kernel module.  The
   funct= ion call needs to be placed in mmput.  This method was rejected
   by several ker= nel maintainers.

2. Registering to the mmu notifier release mechanism.

The problem with the latter approach is that the mmu_notifier_release
cal= lback is called from__mmu_notifier_release (called from exit_mmap).
That functi= on iterates over the list of mmu notifiers and don't expect
the release call= back function to remove itself from the list.
Therefore, the callback function= in the kernel module can't release the
mmu_notifier_object, which is actuall= y the kernel module's object
itself.  As a result, the destruction of the kernel module's object must
to be done in a delayed fashion.

This patch adds support for this delayed callback, by adding a new
mmu_notifier_call_srcu function that receives a function ptr and calls
th= at function with call_srcu.  In that function, the kernel module
releases its object.  To use mmu_notifier_call_srcu, the calling module
needs to call b= efore that a new function called
mmu_notifier_unregister_no_release that as its= name implies,
unregisters a notifier without calling its notifier release call= back.

This patch also adds a function that will call barrier_srcu so those
kern= el modules can sync with mmu_notifier.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Jérôme Glisse <jglisse@redhat.com>
Signed-off-by: Oded Gabbay <oded.gabbay@amd.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmu_notifier.h |  6 ++++++
 mm/mmu_notifier.c            | 40 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 45 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index deca87452528..27288692241e 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -170,6 +170,8 @@ extern int __mmu_notifier_register(struct mmu_notifier *mn,
 				   struct mm_struct *mm);
 extern void mmu_notifier_unregister(struct mmu_notifier *mn,
 				    struct mm_struct *mm);
+extern void mmu_notifier_unregister_no_release(struct mmu_notifier *mn,
+					       struct mm_struct *mm);
 extern void __mmu_notifier_mm_destroy(struct mm_struct *mm);
 extern void __mmu_notifier_release(struct mm_struct *mm);
 extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
@@ -288,6 +290,10 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 	set_pte_at(___mm, ___address, __ptep, ___pte);			\
 })
 
+extern void mmu_notifier_call_srcu(struct rcu_head *rcu,
+				   void (*func)(struct rcu_head *rcu));
+extern void mmu_notifier_synchronize(void);
+
 #else /* CONFIG_MMU_NOTIFIER */
 
 static inline void mmu_notifier_release(struct mm_struct *mm)
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 41cefdf0aadd..950813b1eb36 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -22,6 +22,25 @@
 /* global SRCU for all MMs */
 static struct srcu_struct srcu;
 
+/*
+ * This function allows mmu_notifier::release callback to delay a call to
+ * a function that will free appropriate resources. The function must be
+ * quick and must not block.
+ */
+void mmu_notifier_call_srcu(struct rcu_head *rcu,
+			    void (*func)(struct rcu_head *rcu))
+{
+	call_srcu(&srcu, rcu, func);
+}
+EXPORT_SYMBOL_GPL(mmu_notifier_call_srcu);
+
+void mmu_notifier_synchronize(void)
+{
+	/* Wait for any running method to finish. */
+	srcu_barrier(&srcu);
+}
+EXPORT_SYMBOL_GPL(mmu_notifier_synchronize);
+
 /*
  * This function can't run concurrently against mmu_notifier_register
  * because mm->mm_users > 0 during mmu_notifier_register and exit_mmap
@@ -53,7 +72,6 @@ void __mmu_notifier_release(struct mm_struct *mm)
 		 */
 		if (mn->ops->release)
 			mn->ops->release(mn, mm);
-	srcu_read_unlock(&srcu, id);
 
 	spin_lock(&mm->mmu_notifier_mm->lock);
 	while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) {
@@ -69,6 +87,7 @@ void __mmu_notifier_release(struct mm_struct *mm)
 		hlist_del_init_rcu(&mn->hlist);
 	}
 	spin_unlock(&mm->mmu_notifier_mm->lock);
+	srcu_read_unlock(&srcu, id);
 
 	/*
 	 * synchronize_srcu here prevents mmu_notifier_release from returning to
@@ -325,6 +344,25 @@ void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm)
 }
 EXPORT_SYMBOL_GPL(mmu_notifier_unregister);
 
+/*
+ * Same as mmu_notifier_unregister but no callback and no srcu synchronization.
+ */
+void mmu_notifier_unregister_no_release(struct mmu_notifier *mn,
+					struct mm_struct *mm)
+{
+	spin_lock(&mm->mmu_notifier_mm->lock);
+	/*
+	 * Can not use list_del_rcu() since __mmu_notifier_release
+	 * can delete it before we hold the lock.
+	 */
+	hlist_del_init_rcu(&mn->hlist);
+	spin_unlock(&mm->mmu_notifier_mm->lock);
+
+	BUG_ON(atomic_read(&mm->mm_count) <= 0);
+	mmdrop(mm);
+}
+EXPORT_SYMBOL_GPL(mmu_notifier_unregister_no_release);
+
 static int __init mmu_notifier_init(void)
 {
 	return init_srcu_struct(&srcu);
-- 
cgit v1.2.3-59-g8ed1b


From 99eef8e9369abe009006b4fa7f6ca5086c09cf46 Mon Sep 17 00:00:00 2001
From: Dan Streetman <ddstreet@ieee.org>
Date: Wed, 6 Aug 2014 16:08:33 -0700
Subject: mm/zbud: change zbud_alloc size type to size_t

Change the type of the zbud_alloc() size param from unsigned int to
size_t.

Technically, this should not make any difference, as the zbud
implementation already restricts the size to well within either type's
limits; but as zsmalloc (and kmalloc) use size_t, and zpool will use
size_t, this brings the size parameter type in line with zsmalloc/zpool.

Signed-off-by: Dan Streetman <ddstreet@ieee.org>
Acked-by: Seth Jennings <sjennings@variantweb.net>
Tested-by: Seth Jennings <sjennings@variantweb.net>
Cc: Weijie Yang <weijie.yang@samsung.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Nitin Gupta <ngupta@vflare.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/zbud.h | 2 +-
 mm/zbud.c            | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/zbud.h b/include/linux/zbud.h
index 13af0d450bf6..f9d41a6e361f 100644
--- a/include/linux/zbud.h
+++ b/include/linux/zbud.h
@@ -11,7 +11,7 @@ struct zbud_ops {
 
 struct zbud_pool *zbud_create_pool(gfp_t gfp, struct zbud_ops *ops);
 void zbud_destroy_pool(struct zbud_pool *pool);
-int zbud_alloc(struct zbud_pool *pool, unsigned int size, gfp_t gfp,
+int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp,
 	unsigned long *handle);
 void zbud_free(struct zbud_pool *pool, unsigned long handle);
 int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries);
diff --git a/mm/zbud.c b/mm/zbud.c
index 01df13a7e2e1..d01226117b8d 100644
--- a/mm/zbud.c
+++ b/mm/zbud.c
@@ -122,7 +122,7 @@ enum buddy {
 };
 
 /* Converts an allocation size in bytes to size in zbud chunks */
-static int size_to_chunks(int size)
+static int size_to_chunks(size_t size)
 {
 	return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT;
 }
@@ -247,7 +247,7 @@ void zbud_destroy_pool(struct zbud_pool *pool)
  * gfp arguments are invalid or -ENOMEM if the pool was unable to allocate
  * a new page.
  */
-int zbud_alloc(struct zbud_pool *pool, unsigned int size, gfp_t gfp,
+int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp,
 			unsigned long *handle)
 {
 	int chunks, i, freechunks;
-- 
cgit v1.2.3-59-g8ed1b


From af8d417a04564bca0348e7e3c749ab12a3e837ad Mon Sep 17 00:00:00 2001
From: Dan Streetman <ddstreet@ieee.org>
Date: Wed, 6 Aug 2014 16:08:36 -0700
Subject: mm/zpool: implement common zpool api to zbud/zsmalloc

Add zpool api.

zpool provides an interface for memory storage, typically of compressed
memory.  Users can select what backend to use; currently the only
implementations are zbud, a low density implementation with up to two
compressed pages per storage page, and zsmalloc, a higher density
implementation with multiple compressed pages per storage page.

Signed-off-by: Dan Streetman <ddstreet@ieee.org>
Tested-by: Seth Jennings <sjennings@variantweb.net>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Nitin Gupta <ngupta@vflare.org>
Cc: Weijie Yang <weijie.yang@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/zpool.h | 106 +++++++++++++++
 mm/Kconfig            |  41 +++---
 mm/Makefile           |   1 +
 mm/zpool.c            | 364 ++++++++++++++++++++++++++++++++++++++++++++++++++
 mm/zsmalloc.c         |   1 -
 5 files changed, 495 insertions(+), 18 deletions(-)
 create mode 100644 include/linux/zpool.h
 create mode 100644 mm/zpool.c

(limited to 'include/linux')

diff --git a/include/linux/zpool.h b/include/linux/zpool.h
new file mode 100644
index 000000000000..f14bd75f08b3
--- /dev/null
+++ b/include/linux/zpool.h
@@ -0,0 +1,106 @@
+/*
+ * zpool memory storage api
+ *
+ * Copyright (C) 2014 Dan Streetman
+ *
+ * This is a common frontend for the zbud and zsmalloc memory
+ * storage pool implementations.  Typically, this is used to
+ * store compressed memory.
+ */
+
+#ifndef _ZPOOL_H_
+#define _ZPOOL_H_
+
+struct zpool;
+
+struct zpool_ops {
+	int (*evict)(struct zpool *pool, unsigned long handle);
+};
+
+/*
+ * Control how a handle is mapped.  It will be ignored if the
+ * implementation does not support it.  Its use is optional.
+ * Note that this does not refer to memory protection, it
+ * refers to how the memory will be copied in/out if copying
+ * is necessary during mapping; read-write is the safest as
+ * it copies the existing memory in on map, and copies the
+ * changed memory back out on unmap.  Write-only does not copy
+ * in the memory and should only be used for initialization.
+ * If in doubt, use ZPOOL_MM_DEFAULT which is read-write.
+ */
+enum zpool_mapmode {
+	ZPOOL_MM_RW, /* normal read-write mapping */
+	ZPOOL_MM_RO, /* read-only (no copy-out at unmap time) */
+	ZPOOL_MM_WO, /* write-only (no copy-in at map time) */
+
+	ZPOOL_MM_DEFAULT = ZPOOL_MM_RW
+};
+
+struct zpool *zpool_create_pool(char *type, gfp_t gfp, struct zpool_ops *ops);
+
+char *zpool_get_type(struct zpool *pool);
+
+void zpool_destroy_pool(struct zpool *pool);
+
+int zpool_malloc(struct zpool *pool, size_t size, gfp_t gfp,
+			unsigned long *handle);
+
+void zpool_free(struct zpool *pool, unsigned long handle);
+
+int zpool_shrink(struct zpool *pool, unsigned int pages,
+			unsigned int *reclaimed);
+
+void *zpool_map_handle(struct zpool *pool, unsigned long handle,
+			enum zpool_mapmode mm);
+
+void zpool_unmap_handle(struct zpool *pool, unsigned long handle);
+
+u64 zpool_get_total_size(struct zpool *pool);
+
+
+/**
+ * struct zpool_driver - driver implementation for zpool
+ * @type:	name of the driver.
+ * @list:	entry in the list of zpool drivers.
+ * @create:	create a new pool.
+ * @destroy:	destroy a pool.
+ * @malloc:	allocate mem from a pool.
+ * @free:	free mem from a pool.
+ * @shrink:	shrink the pool.
+ * @map:	map a handle.
+ * @unmap:	unmap a handle.
+ * @total_size:	get total size of a pool.
+ *
+ * This is created by a zpool implementation and registered
+ * with zpool.
+ */
+struct zpool_driver {
+	char *type;
+	struct module *owner;
+	atomic_t refcount;
+	struct list_head list;
+
+	void *(*create)(gfp_t gfp, struct zpool_ops *ops);
+	void (*destroy)(void *pool);
+
+	int (*malloc)(void *pool, size_t size, gfp_t gfp,
+				unsigned long *handle);
+	void (*free)(void *pool, unsigned long handle);
+
+	int (*shrink)(void *pool, unsigned int pages,
+				unsigned int *reclaimed);
+
+	void *(*map)(void *pool, unsigned long handle,
+				enum zpool_mapmode mm);
+	void (*unmap)(void *pool, unsigned long handle);
+
+	u64 (*total_size)(void *pool);
+};
+
+void zpool_register_driver(struct zpool_driver *driver);
+
+int zpool_unregister_driver(struct zpool_driver *driver);
+
+int zpool_evict(void *pool, unsigned long handle);
+
+#endif
diff --git a/mm/Kconfig b/mm/Kconfig
index f4899ec39cf4..12179b8c3b89 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -519,15 +519,17 @@ config CMA_AREAS
 
 	  If unsure, leave the default value "7".
 
-config ZBUD
-	tristate
-	default n
+config MEM_SOFT_DIRTY
+	bool "Track memory changes"
+	depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY && PROC_FS
+	select PROC_PAGE_MONITOR
 	help
-	  A special purpose allocator for storing compressed pages.
-	  It is designed to store up to two compressed pages per physical
-	  page.  While this design limits storage density, it has simple and
-	  deterministic reclaim properties that make it preferable to a higher
-	  density approach when reclaim will be used.
+	  This option enables memory changes tracking by introducing a
+	  soft-dirty bit on pte-s. This bit it set when someone writes
+	  into a page just as regular dirty bit, but unlike the latter
+	  it can be cleared by hands.
+
+	  See Documentation/vm/soft-dirty.txt for more details.
 
 config ZSWAP
 	bool "Compressed cache for swap pages (EXPERIMENTAL)"
@@ -549,17 +551,22 @@ config ZSWAP
 	  they have not be fully explored on the large set of potential
 	  configurations and workloads that exist.
 
-config MEM_SOFT_DIRTY
-	bool "Track memory changes"
-	depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY && PROC_FS
-	select PROC_PAGE_MONITOR
+config ZPOOL
+	tristate "Common API for compressed memory storage"
+	default n
 	help
-	  This option enables memory changes tracking by introducing a
-	  soft-dirty bit on pte-s. This bit it set when someone writes
-	  into a page just as regular dirty bit, but unlike the latter
-	  it can be cleared by hands.
+	  Compressed memory storage API.  This allows using either zbud or
+	  zsmalloc.
 
-	  See Documentation/vm/soft-dirty.txt for more details.
+config ZBUD
+	tristate "Low density storage for compressed pages"
+	default n
+	help
+	  A special purpose allocator for storing compressed pages.
+	  It is designed to store up to two compressed pages per physical
+	  page.  While this design limits storage density, it has simple and
+	  deterministic reclaim properties that make it preferable to a higher
+	  density approach when reclaim will be used.
 
 config ZSMALLOC
 	tristate "Memory allocator for compressed pages"
diff --git a/mm/Makefile b/mm/Makefile
index 8338473c329a..632ae77e6070 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -59,6 +59,7 @@ obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
 obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o
 obj-$(CONFIG_CLEANCACHE) += cleancache.o
 obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o
+obj-$(CONFIG_ZPOOL)	+= zpool.o
 obj-$(CONFIG_ZBUD)	+= zbud.o
 obj-$(CONFIG_ZSMALLOC)	+= zsmalloc.o
 obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += early_ioremap.o
diff --git a/mm/zpool.c b/mm/zpool.c
new file mode 100644
index 000000000000..e40612a1df00
--- /dev/null
+++ b/mm/zpool.c
@@ -0,0 +1,364 @@
+/*
+ * zpool memory storage api
+ *
+ * Copyright (C) 2014 Dan Streetman
+ *
+ * This is a common frontend for memory storage pool implementations.
+ * Typically, this is used to store compressed memory.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/zpool.h>
+
+struct zpool {
+	char *type;
+
+	struct zpool_driver *driver;
+	void *pool;
+	struct zpool_ops *ops;
+
+	struct list_head list;
+};
+
+static LIST_HEAD(drivers_head);
+static DEFINE_SPINLOCK(drivers_lock);
+
+static LIST_HEAD(pools_head);
+static DEFINE_SPINLOCK(pools_lock);
+
+/**
+ * zpool_register_driver() - register a zpool implementation.
+ * @driver:	driver to register
+ */
+void zpool_register_driver(struct zpool_driver *driver)
+{
+	spin_lock(&drivers_lock);
+	atomic_set(&driver->refcount, 0);
+	list_add(&driver->list, &drivers_head);
+	spin_unlock(&drivers_lock);
+}
+EXPORT_SYMBOL(zpool_register_driver);
+
+/**
+ * zpool_unregister_driver() - unregister a zpool implementation.
+ * @driver:	driver to unregister.
+ *
+ * Module usage counting is used to prevent using a driver
+ * while/after unloading, so if this is called from module
+ * exit function, this should never fail; if called from
+ * other than the module exit function, and this returns
+ * failure, the driver is in use and must remain available.
+ */
+int zpool_unregister_driver(struct zpool_driver *driver)
+{
+	int ret = 0, refcount;
+
+	spin_lock(&drivers_lock);
+	refcount = atomic_read(&driver->refcount);
+	WARN_ON(refcount < 0);
+	if (refcount > 0)
+		ret = -EBUSY;
+	else
+		list_del(&driver->list);
+	spin_unlock(&drivers_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(zpool_unregister_driver);
+
+/**
+ * zpool_evict() - evict callback from a zpool implementation.
+ * @pool:	pool to evict from.
+ * @handle:	handle to evict.
+ *
+ * This can be used by zpool implementations to call the
+ * user's evict zpool_ops struct evict callback.
+ */
+int zpool_evict(void *pool, unsigned long handle)
+{
+	struct zpool *zpool;
+
+	spin_lock(&pools_lock);
+	list_for_each_entry(zpool, &pools_head, list) {
+		if (zpool->pool == pool) {
+			spin_unlock(&pools_lock);
+			if (!zpool->ops || !zpool->ops->evict)
+				return -EINVAL;
+			return zpool->ops->evict(zpool, handle);
+		}
+	}
+	spin_unlock(&pools_lock);
+
+	return -ENOENT;
+}
+EXPORT_SYMBOL(zpool_evict);
+
+static struct zpool_driver *zpool_get_driver(char *type)
+{
+	struct zpool_driver *driver;
+
+	spin_lock(&drivers_lock);
+	list_for_each_entry(driver, &drivers_head, list) {
+		if (!strcmp(driver->type, type)) {
+			bool got = try_module_get(driver->owner);
+
+			if (got)
+				atomic_inc(&driver->refcount);
+			spin_unlock(&drivers_lock);
+			return got ? driver : NULL;
+		}
+	}
+
+	spin_unlock(&drivers_lock);
+	return NULL;
+}
+
+static void zpool_put_driver(struct zpool_driver *driver)
+{
+	atomic_dec(&driver->refcount);
+	module_put(driver->owner);
+}
+
+/**
+ * zpool_create_pool() - Create a new zpool
+ * @type	The type of the zpool to create (e.g. zbud, zsmalloc)
+ * @gfp		The GFP flags to use when allocating the pool.
+ * @ops		The optional ops callback.
+ *
+ * This creates a new zpool of the specified type.  The gfp flags will be
+ * used when allocating memory, if the implementation supports it.  If the
+ * ops param is NULL, then the created zpool will not be shrinkable.
+ *
+ * Implementations must guarantee this to be thread-safe.
+ *
+ * Returns: New zpool on success, NULL on failure.
+ */
+struct zpool *zpool_create_pool(char *type, gfp_t gfp, struct zpool_ops *ops)
+{
+	struct zpool_driver *driver;
+	struct zpool *zpool;
+
+	pr_info("creating pool type %s\n", type);
+
+	driver = zpool_get_driver(type);
+
+	if (!driver) {
+		request_module(type);
+		driver = zpool_get_driver(type);
+	}
+
+	if (!driver) {
+		pr_err("no driver for type %s\n", type);
+		return NULL;
+	}
+
+	zpool = kmalloc(sizeof(*zpool), gfp);
+	if (!zpool) {
+		pr_err("couldn't create zpool - out of memory\n");
+		zpool_put_driver(driver);
+		return NULL;
+	}
+
+	zpool->type = driver->type;
+	zpool->driver = driver;
+	zpool->pool = driver->create(gfp, ops);
+	zpool->ops = ops;
+
+	if (!zpool->pool) {
+		pr_err("couldn't create %s pool\n", type);
+		zpool_put_driver(driver);
+		kfree(zpool);
+		return NULL;
+	}
+
+	pr_info("created %s pool\n", type);
+
+	spin_lock(&pools_lock);
+	list_add(&zpool->list, &pools_head);
+	spin_unlock(&pools_lock);
+
+	return zpool;
+}
+
+/**
+ * zpool_destroy_pool() - Destroy a zpool
+ * @pool	The zpool to destroy.
+ *
+ * Implementations must guarantee this to be thread-safe,
+ * however only when destroying different pools.  The same
+ * pool should only be destroyed once, and should not be used
+ * after it is destroyed.
+ *
+ * This destroys an existing zpool.  The zpool should not be in use.
+ */
+void zpool_destroy_pool(struct zpool *zpool)
+{
+	pr_info("destroying pool type %s\n", zpool->type);
+
+	spin_lock(&pools_lock);
+	list_del(&zpool->list);
+	spin_unlock(&pools_lock);
+	zpool->driver->destroy(zpool->pool);
+	zpool_put_driver(zpool->driver);
+	kfree(zpool);
+}
+
+/**
+ * zpool_get_type() - Get the type of the zpool
+ * @pool	The zpool to check
+ *
+ * This returns the type of the pool.
+ *
+ * Implementations must guarantee this to be thread-safe.
+ *
+ * Returns: The type of zpool.
+ */
+char *zpool_get_type(struct zpool *zpool)
+{
+	return zpool->type;
+}
+
+/**
+ * zpool_malloc() - Allocate memory
+ * @pool	The zpool to allocate from.
+ * @size	The amount of memory to allocate.
+ * @gfp		The GFP flags to use when allocating memory.
+ * @handle	Pointer to the handle to set
+ *
+ * This allocates the requested amount of memory from the pool.
+ * The gfp flags will be used when allocating memory, if the
+ * implementation supports it.  The provided @handle will be
+ * set to the allocated object handle.
+ *
+ * Implementations must guarantee this to be thread-safe.
+ *
+ * Returns: 0 on success, negative value on error.
+ */
+int zpool_malloc(struct zpool *zpool, size_t size, gfp_t gfp,
+			unsigned long *handle)
+{
+	return zpool->driver->malloc(zpool->pool, size, gfp, handle);
+}
+
+/**
+ * zpool_free() - Free previously allocated memory
+ * @pool	The zpool that allocated the memory.
+ * @handle	The handle to the memory to free.
+ *
+ * This frees previously allocated memory.  This does not guarantee
+ * that the pool will actually free memory, only that the memory
+ * in the pool will become available for use by the pool.
+ *
+ * Implementations must guarantee this to be thread-safe,
+ * however only when freeing different handles.  The same
+ * handle should only be freed once, and should not be used
+ * after freeing.
+ */
+void zpool_free(struct zpool *zpool, unsigned long handle)
+{
+	zpool->driver->free(zpool->pool, handle);
+}
+
+/**
+ * zpool_shrink() - Shrink the pool size
+ * @pool	The zpool to shrink.
+ * @pages	The number of pages to shrink the pool.
+ * @reclaimed	The number of pages successfully evicted.
+ *
+ * This attempts to shrink the actual memory size of the pool
+ * by evicting currently used handle(s).  If the pool was
+ * created with no zpool_ops, or the evict call fails for any
+ * of the handles, this will fail.  If non-NULL, the @reclaimed
+ * parameter will be set to the number of pages reclaimed,
+ * which may be more than the number of pages requested.
+ *
+ * Implementations must guarantee this to be thread-safe.
+ *
+ * Returns: 0 on success, negative value on error/failure.
+ */
+int zpool_shrink(struct zpool *zpool, unsigned int pages,
+			unsigned int *reclaimed)
+{
+	return zpool->driver->shrink(zpool->pool, pages, reclaimed);
+}
+
+/**
+ * zpool_map_handle() - Map a previously allocated handle into memory
+ * @pool	The zpool that the handle was allocated from
+ * @handle	The handle to map
+ * @mm		How the memory should be mapped
+ *
+ * This maps a previously allocated handle into memory.  The @mm
+ * param indicates to the implementation how the memory will be
+ * used, i.e. read-only, write-only, read-write.  If the
+ * implementation does not support it, the memory will be treated
+ * as read-write.
+ *
+ * This may hold locks, disable interrupts, and/or preemption,
+ * and the zpool_unmap_handle() must be called to undo those
+ * actions.  The code that uses the mapped handle should complete
+ * its operatons on the mapped handle memory quickly and unmap
+ * as soon as possible.  As the implementation may use per-cpu
+ * data, multiple handles should not be mapped concurrently on
+ * any cpu.
+ *
+ * Returns: A pointer to the handle's mapped memory area.
+ */
+void *zpool_map_handle(struct zpool *zpool, unsigned long handle,
+			enum zpool_mapmode mapmode)
+{
+	return zpool->driver->map(zpool->pool, handle, mapmode);
+}
+
+/**
+ * zpool_unmap_handle() - Unmap a previously mapped handle
+ * @pool	The zpool that the handle was allocated from
+ * @handle	The handle to unmap
+ *
+ * This unmaps a previously mapped handle.  Any locks or other
+ * actions that the implementation took in zpool_map_handle()
+ * will be undone here.  The memory area returned from
+ * zpool_map_handle() should no longer be used after this.
+ */
+void zpool_unmap_handle(struct zpool *zpool, unsigned long handle)
+{
+	zpool->driver->unmap(zpool->pool, handle);
+}
+
+/**
+ * zpool_get_total_size() - The total size of the pool
+ * @pool	The zpool to check
+ *
+ * This returns the total size in bytes of the pool.
+ *
+ * Returns: Total size of the zpool in bytes.
+ */
+u64 zpool_get_total_size(struct zpool *zpool)
+{
+	return zpool->driver->total_size(zpool->pool);
+}
+
+static int __init init_zpool(void)
+{
+	pr_info("loaded\n");
+	return 0;
+}
+
+static void __exit exit_zpool(void)
+{
+	pr_info("unloaded\n");
+}
+
+module_init(init_zpool);
+module_exit(exit_zpool);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
+MODULE_DESCRIPTION("Common API for compressed memory storage");
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index bb62a4adc328..6a1827d3d231 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -240,7 +240,6 @@ struct mapping_area {
 	enum zs_mapmode vm_mm; /* mapping mode */
 };
 
-
 /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
 static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
 
-- 
cgit v1.2.3-59-g8ed1b


From 68be302963230fa76600cd598935a830ac95dca2 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 6 Aug 2014 16:08:45 -0700
Subject: fs.h, drivers/hwmon/asus_atk0110.c: fix DEFINE_SIMPLE_ATTRIBUTE
 semicolon definition and use

The DEFINE_SIMPLE_ATTRIBUTE macro should not end in a ; Fix the one use
in the kernel tree that did not have a semicolon.

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Guenter Roeck <linux@roeck-us.net>
Acked-by: Luca Tettamanti <kronos.it@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/hwmon/asus_atk0110.c | 2 +-
 include/linux/fs.h           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwmon/asus_atk0110.c b/drivers/hwmon/asus_atk0110.c
index ae208f612198..cccef87963e0 100644
--- a/drivers/hwmon/asus_atk0110.c
+++ b/drivers/hwmon/asus_atk0110.c
@@ -688,7 +688,7 @@ static int atk_debugfs_gitm_get(void *p, u64 *val)
 DEFINE_SIMPLE_ATTRIBUTE(atk_debugfs_gitm,
 			atk_debugfs_gitm_get,
 			NULL,
-			"0x%08llx\n")
+			"0x%08llx\n");
 
 static int atk_acpi_print(char *buf, size_t sz, union acpi_object *obj)
 {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2daccaf4b547..1ab6c6913040 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2688,7 +2688,7 @@ static const struct file_operations __fops = {				\
 	.read	 = simple_attr_read,					\
 	.write	 = simple_attr_write,					\
 	.llseek	 = generic_file_llseek,					\
-};
+}
 
 static inline __printf(1, 2)
 void __simple_attr_check_format(const char *fmt, ...)
-- 
cgit v1.2.3-59-g8ed1b


From 90a856436ddafbe0c6f8c18d7fc21aed3784e227 Mon Sep 17 00:00:00 2001
From: Geoff Levand <geoff@infradead.org>
Date: Wed, 6 Aug 2014 16:08:47 -0700
Subject: include/linux/byteorder/generic.h: minor comment fix

Signed-off-by: Geoff Levand <geoff@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/byteorder/generic.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/byteorder/generic.h b/include/linux/byteorder/generic.h
index 0846e6b931ce..89f67c1c3160 100644
--- a/include/linux/byteorder/generic.h
+++ b/include/linux/byteorder/generic.h
@@ -2,7 +2,7 @@
 #define _LINUX_BYTEORDER_GENERIC_H
 
 /*
- * linux/byteorder_generic.h
+ * linux/byteorder/generic.h
  * Generic Byte-reordering support
  *
  * The "... p" macros, like le64_to_cpup, can be used with pointers
-- 
cgit v1.2.3-59-g8ed1b


From 42a9dc0b3d0f749375c767c7d5cab56e89160576 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Wed, 6 Aug 2014 16:09:01 -0700
Subject: printk: rename DEFAULT_MESSAGE_LOGLEVEL

Commit a8fe19ebfbfd ("kernel/printk: use symbolic defines for console
loglevels") makes consistent use of symbolic values for printk() log
levels.

The naming scheme used is different from the one used for
DEFAULT_MESSAGE_LOGLEVEL though.  Change that symbol name to be
MESSAGE_LOGLEVEL_DEFAULT for consistency.  And because the value of that
symbol comes from a similarly-named config option, rename
CONFIG_DEFAULT_MESSAGE_LOGLEVEL as well.

Signed-off-by: Alex Elder <elder@linaro.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Jan Kara <jack@suse.cz>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Petr Mladek <pmladek@suse.cz>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/printk.h | 2 +-
 kernel/printk/printk.c | 2 +-
 lib/Kconfig.debug      | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/printk.h b/include/linux/printk.h
index 319ff7e53efb..0990997a5304 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -31,7 +31,7 @@ static inline const char *printk_skip_level(const char *buffer)
 }
 
 /* printk's without a loglevel use this.. */
-#define DEFAULT_MESSAGE_LOGLEVEL CONFIG_DEFAULT_MESSAGE_LOGLEVEL
+#define MESSAGE_LOGLEVEL_DEFAULT CONFIG_MESSAGE_LOGLEVEL_DEFAULT
 
 /* We show everything that is MORE important than this.. */
 #define CONSOLE_LOGLEVEL_SILENT  0 /* Mum's the word */
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index ec3bfb0b1f62..770ed4821ba9 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -56,7 +56,7 @@
 
 int console_printk[4] = {
 	CONSOLE_LOGLEVEL_DEFAULT,	/* console_loglevel */
-	DEFAULT_MESSAGE_LOGLEVEL,	/* default_message_loglevel */
+	MESSAGE_LOGLEVEL_DEFAULT,	/* default_message_loglevel */
 	CONSOLE_LOGLEVEL_MIN,		/* minimum_console_loglevel */
 	CONSOLE_LOGLEVEL_DEFAULT,	/* default_console_loglevel */
 };
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index cfe7df8f62cc..cb45f59685e6 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -15,7 +15,7 @@ config PRINTK_TIME
 	  The behavior is also controlled by the kernel command line
 	  parameter printk.time=1. See Documentation/kernel-parameters.txt
 
-config DEFAULT_MESSAGE_LOGLEVEL
+config MESSAGE_LOGLEVEL_DEFAULT
 	int "Default message log level (1-7)"
 	range 1 7
 	default "4"
-- 
cgit v1.2.3-59-g8ed1b


From bc18dd335a161f9229ed3aaab88ce0706cbd9867 Mon Sep 17 00:00:00 2001
From: Ken Helias <kenhelias@firemail.de>
Date: Wed, 6 Aug 2014 16:09:14 -0700
Subject: list: make hlist_add_after() argument names match
 hlist_add_after_rcu()

The argument names for hlist_add_after() are poorly chosen because they
look the same as the ones for hlist_add_before() but have to be used
differently.

hlist_add_after_rcu() has made a better choice.

Signed-off-by: Ken Helias <kenhelias@firemail.de>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/list.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/list.h b/include/linux/list.h
index ef9594171062..624ec7f48293 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -654,15 +654,15 @@ static inline void hlist_add_before(struct hlist_node *n,
 	*(n->pprev) = n;
 }
 
-static inline void hlist_add_after(struct hlist_node *n,
-					struct hlist_node *next)
+static inline void hlist_add_after(struct hlist_node *prev,
+				   struct hlist_node *n)
 {
-	next->next = n->next;
-	n->next = next;
-	next->pprev = &n->next;
+	n->next = prev->next;
+	prev->next = n;
+	n->pprev = &prev->next;
 
-	if(next->next)
-		next->next->pprev  = &next->next;
+	if (n->next)
+		n->next->pprev  = &n->next;
 }
 
 /* after that we'll appear to be on some hlist and hlist_del will work */
-- 
cgit v1.2.3-59-g8ed1b


From 1d023284c31a4e40a94d5bbcb7dbb7a35ee0bcbc Mon Sep 17 00:00:00 2001
From: Ken Helias <kenhelias@firemail.de>
Date: Wed, 6 Aug 2014 16:09:16 -0700
Subject: list: fix order of arguments for hlist_add_after(_rcu)

All other add functions for lists have the new item as first argument
and the position where it is added as second argument.  This was changed
for no good reason in this function and makes using it unnecessary
confusing.

The name was changed to hlist_add_behind() to cause unconverted code to
generate a compile error instead of using the wrong parameter order.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Ken Helias <kenhelias@firemail.de>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Acked-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>	[intel driver bits]
Cc: Hugh Dickins <hughd@google.com>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/RCU/whatisRCU.txt                  | 2 +-
 drivers/gpu/drm/drm_hashtab.c                    | 2 +-
 drivers/net/ethernet/intel/i40e/i40e_ethtool.c   | 2 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 2 +-
 drivers/staging/lustre/lustre/libcfs/hash.c      | 4 ++--
 fs/namespace.c                                   | 2 +-
 fs/notify/inode_mark.c                           | 2 +-
 fs/notify/vfsmount_mark.c                        | 2 +-
 include/linux/list.h                             | 4 ++--
 include/linux/rculist.h                          | 8 ++++----
 net/batman-adv/fragmentation.c                   | 2 +-
 net/bridge/br_multicast.c                        | 2 +-
 net/ipv4/fib_trie.c                              | 2 +-
 net/ipv6/addrlabel.c                             | 2 +-
 net/xfrm/xfrm_policy.c                           | 4 ++--
 15 files changed, 21 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index 49b8551a3b68..e48c57f1943b 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -818,7 +818,7 @@ RCU pointer/list update:
 	list_add_tail_rcu
 	list_del_rcu
 	list_replace_rcu
-	hlist_add_after_rcu
+	hlist_add_behind_rcu
 	hlist_add_before_rcu
 	hlist_add_head_rcu
 	hlist_del_rcu
diff --git a/drivers/gpu/drm/drm_hashtab.c b/drivers/gpu/drm/drm_hashtab.c
index 7e4bae760e27..c3b80fd65d62 100644
--- a/drivers/gpu/drm/drm_hashtab.c
+++ b/drivers/gpu/drm/drm_hashtab.c
@@ -125,7 +125,7 @@ int drm_ht_insert_item(struct drm_open_hash *ht, struct drm_hash_item *item)
 		parent = &entry->head;
 	}
 	if (parent) {
-		hlist_add_after_rcu(parent, &item->head);
+		hlist_add_behind_rcu(&item->head, parent);
 	} else {
 		hlist_add_head_rcu(&item->head, h_list);
 	}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 681a9e81ff51..e8ba7470700a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -1948,7 +1948,7 @@ static int i40e_update_ethtool_fdir_entry(struct i40e_vsi *vsi,
 
 	/* add filter to the list */
 	if (parent)
-		hlist_add_after(&parent->fdir_node, &input->fdir_node);
+		hlist_add_behind(&input->fdir_node, &parent->fdir_node);
 	else
 		hlist_add_head(&input->fdir_node,
 			       &pf->fdir_filter_list);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index 94a1c07efeb0..e4100b5737b6 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -2517,7 +2517,7 @@ static int ixgbe_update_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
 
 	/* add filter to the list */
 	if (parent)
-		hlist_add_after(&parent->fdir_node, &input->fdir_node);
+		hlist_add_behind(&input->fdir_node, &parent->fdir_node);
 	else
 		hlist_add_head(&input->fdir_node,
 			       &adapter->fdir_filter_list);
diff --git a/drivers/staging/lustre/lustre/libcfs/hash.c b/drivers/staging/lustre/lustre/libcfs/hash.c
index 5dde79418297..8ef1deb59d4a 100644
--- a/drivers/staging/lustre/lustre/libcfs/hash.c
+++ b/drivers/staging/lustre/lustre/libcfs/hash.c
@@ -351,7 +351,7 @@ cfs_hash_dh_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
 					    cfs_hash_dhead_t, dh_head);
 
 	if (dh->dh_tail != NULL) /* not empty */
-		hlist_add_after(dh->dh_tail, hnode);
+		hlist_add_behind(hnode, dh->dh_tail);
 	else /* empty list */
 		hlist_add_head(hnode, &dh->dh_head);
 	dh->dh_tail = hnode;
@@ -406,7 +406,7 @@ cfs_hash_dd_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
 						cfs_hash_dhead_dep_t, dd_head);
 
 	if (dh->dd_tail != NULL) /* not empty */
-		hlist_add_after(dh->dd_tail, hnode);
+		hlist_add_behind(hnode, dh->dd_tail);
 	else /* empty list */
 		hlist_add_head(hnode, &dh->dd_head);
 	dh->dd_tail = hnode;
diff --git a/fs/namespace.c b/fs/namespace.c
index 182bc41cd887..2a1447c946e7 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -798,7 +798,7 @@ static void commit_tree(struct mount *mnt, struct mount *shadows)
 	list_splice(&head, n->list.prev);
 
 	if (shadows)
-		hlist_add_after_rcu(&shadows->mnt_hash, &mnt->mnt_hash);
+		hlist_add_behind_rcu(&mnt->mnt_hash, &shadows->mnt_hash);
 	else
 		hlist_add_head_rcu(&mnt->mnt_hash,
 				m_hash(&parent->mnt, mnt->mnt_mountpoint));
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 74825be65b7b..9ce062218de9 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -232,7 +232,7 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
 
 	BUG_ON(last == NULL);
 	/* mark should be the last entry.  last is the current last entry */
-	hlist_add_after_rcu(&last->i.i_list, &mark->i.i_list);
+	hlist_add_behind_rcu(&mark->i.i_list, &last->i.i_list);
 out:
 	fsnotify_recalc_inode_mask_locked(inode);
 	spin_unlock(&inode->i_lock);
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
index 68ca5a8704b5..ac851e8376b1 100644
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -191,7 +191,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
 
 	BUG_ON(last == NULL);
 	/* mark should be the last entry.  last is the current last entry */
-	hlist_add_after_rcu(&last->m.m_list, &mark->m.m_list);
+	hlist_add_behind_rcu(&mark->m.m_list, &last->m.m_list);
 out:
 	fsnotify_recalc_vfsmount_mask_locked(mnt);
 	spin_unlock(&mnt->mnt_root->d_lock);
diff --git a/include/linux/list.h b/include/linux/list.h
index 624ec7f48293..cbbb96fcead9 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -654,8 +654,8 @@ static inline void hlist_add_before(struct hlist_node *n,
 	*(n->pprev) = n;
 }
 
-static inline void hlist_add_after(struct hlist_node *prev,
-				   struct hlist_node *n)
+static inline void hlist_add_behind(struct hlist_node *n,
+				    struct hlist_node *prev)
 {
 	n->next = prev->next;
 	prev->next = n;
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 8183b46fbaa2..372ad5e0dcb8 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -432,9 +432,9 @@ static inline void hlist_add_before_rcu(struct hlist_node *n,
 }
 
 /**
- * hlist_add_after_rcu
- * @prev: the existing element to add the new element after.
+ * hlist_add_behind_rcu
  * @n: the new element to add to the hash list.
+ * @prev: the existing element to add the new element after.
  *
  * Description:
  * Adds the specified element to the specified hlist
@@ -449,8 +449,8 @@ static inline void hlist_add_before_rcu(struct hlist_node *n,
  * hlist_for_each_entry_rcu(), used to prevent memory-consistency
  * problems on Alpha CPUs.
  */
-static inline void hlist_add_after_rcu(struct hlist_node *prev,
-				       struct hlist_node *n)
+static inline void hlist_add_behind_rcu(struct hlist_node *n,
+					struct hlist_node *prev)
 {
 	n->next = prev->next;
 	n->pprev = &prev->next;
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 022d18ab27a6..52c43f904220 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -188,7 +188,7 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node,
 
 	/* Reached the end of the list, so insert after 'frag_entry_last'. */
 	if (likely(frag_entry_last)) {
-		hlist_add_after(&frag_entry_last->list, &frag_entry_new->list);
+		hlist_add_behind(&frag_entry_last->list, &frag_entry_new->list);
 		chain->size += skb->len - hdr_size;
 		chain->timestamp = jiffies;
 		ret = true;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index b4845f4b2bb4..7751c92c8c57 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1174,7 +1174,7 @@ static void br_multicast_add_router(struct net_bridge *br,
 	}
 
 	if (slot)
-		hlist_add_after_rcu(slot, &port->rlist);
+		hlist_add_behind_rcu(&port->rlist, slot);
 	else
 		hlist_add_head_rcu(&port->rlist, &br->router_list);
 }
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 5afeb5aa4c7c..e9cb2588e416 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -940,7 +940,7 @@ static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
 			last = li;
 		}
 		if (last)
-			hlist_add_after_rcu(&last->hlist, &new->hlist);
+			hlist_add_behind_rcu(&new->hlist, &last->hlist);
 		else
 			hlist_add_before_rcu(&new->hlist, &li->hlist);
 	}
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 731e1e1722d9..fd0dc47f471d 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -277,7 +277,7 @@ static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
 		last = p;
 	}
 	if (last)
-		hlist_add_after_rcu(&last->list, &newp->list);
+		hlist_add_behind_rcu(&newp->list, &last->list);
 	else
 		hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head);
 out:
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 0525d78ba328..beeed602aeb3 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -389,7 +389,7 @@ redo:
 			if (h != h0)
 				continue;
 			hlist_del(&pol->bydst);
-			hlist_add_after(entry0, &pol->bydst);
+			hlist_add_behind(&pol->bydst, entry0);
 		}
 		entry0 = &pol->bydst;
 	}
@@ -654,7 +654,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 			break;
 	}
 	if (newpos)
-		hlist_add_after(newpos, &policy->bydst);
+		hlist_add_behind(&policy->bydst, newpos);
 	else
 		hlist_add_head(&policy->bydst, chain);
 	xfrm_pol_hold(policy);
-- 
cgit v1.2.3-59-g8ed1b


From 0f9859ca92c9182bcb8f18c55cae1a04627cbb59 Mon Sep 17 00:00:00 2001
From: Ken Helias <kenhelias@firemail.de>
Date: Wed, 6 Aug 2014 16:09:18 -0700
Subject: klist: use same naming scheme as hlist for klist_add_after()

The name was modified from hlist_add_after() to hlist_add_behind() when
adjusting the order of arguments to match the one with
klist_add_after().  This is necessary to break old code when it would
use it the wrong way.

Make klist follow this naming scheme for consistency.

Signed-off-by: Ken Helias <kenhelias@firemail.de>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/klist.h | 2 +-
 lib/klist.c           | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/klist.h b/include/linux/klist.h
index a370ce57cf1d..61e5b723ae73 100644
--- a/include/linux/klist.h
+++ b/include/linux/klist.h
@@ -44,7 +44,7 @@ struct klist_node {
 
 extern void klist_add_tail(struct klist_node *n, struct klist *k);
 extern void klist_add_head(struct klist_node *n, struct klist *k);
-extern void klist_add_after(struct klist_node *n, struct klist_node *pos);
+extern void klist_add_behind(struct klist_node *n, struct klist_node *pos);
 extern void klist_add_before(struct klist_node *n, struct klist_node *pos);
 
 extern void klist_del(struct klist_node *n);
diff --git a/lib/klist.c b/lib/klist.c
index 358a368a2947..89b485a2a58d 100644
--- a/lib/klist.c
+++ b/lib/klist.c
@@ -140,11 +140,11 @@ void klist_add_tail(struct klist_node *n, struct klist *k)
 EXPORT_SYMBOL_GPL(klist_add_tail);
 
 /**
- * klist_add_after - Init a klist_node and add it after an existing node
+ * klist_add_behind - Init a klist_node and add it after an existing node
  * @n: node we're adding.
  * @pos: node to put @n after
  */
-void klist_add_after(struct klist_node *n, struct klist_node *pos)
+void klist_add_behind(struct klist_node *n, struct klist_node *pos)
 {
 	struct klist *k = knode_klist(pos);
 
@@ -153,7 +153,7 @@ void klist_add_after(struct klist_node *n, struct klist_node *pos)
 	list_add(&n->n_node, &pos->n_node);
 	spin_unlock(&k->k_lock);
 }
-EXPORT_SYMBOL_GPL(klist_add_after);
+EXPORT_SYMBOL_GPL(klist_add_behind);
 
 /**
  * klist_add_before - Init a klist_node and add it before an existing node
-- 
cgit v1.2.3-59-g8ed1b


From 62e7ca5280fd8cbf523970757e13f0324ce0daa0 Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Date: Wed, 6 Aug 2014 16:09:21 -0700
Subject: zlib: clean up some dead code

Cleanup unused `if 0'-ed functions, which have been dead since 2006
(commits 87c2ce3b9305 ("lib/zlib*: cleanups") by Adrian Bunk and
4f3865fb57a0 ("zlib_inflate: Upgrade library code to a recent version")
by Richard Purdie):

 - zlib_deflateSetDictionary
 - zlib_deflateParams
 - zlib_deflateCopy
 - zlib_inflateSync
 - zlib_syncsearch
 - zlib_inflateSetDictionary
 - zlib_inflatePrime

Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/zlib.h       | 118 -------------------------------------
 lib/zlib_deflate/deflate.c | 143 ---------------------------------------------
 lib/zlib_inflate/inflate.c | 132 -----------------------------------------
 3 files changed, 393 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/zlib.h b/include/linux/zlib.h
index 9c5a6b4de0a3..197abb2a54c5 100644
--- a/include/linux/zlib.h
+++ b/include/linux/zlib.h
@@ -493,64 +493,6 @@ extern int deflateInit2 (z_streamp strm,
    method). msg is set to null if there is no error message.  deflateInit2 does
    not perform any compression: this will be done by deflate().
 */
-                            
-#if 0
-extern int zlib_deflateSetDictionary (z_streamp strm,
-						     const Byte *dictionary,
-						     uInt  dictLength);
-#endif
-/*
-     Initializes the compression dictionary from the given byte sequence
-   without producing any compressed output. This function must be called
-   immediately after deflateInit, deflateInit2 or deflateReset, before any
-   call of deflate. The compressor and decompressor must use exactly the same
-   dictionary (see inflateSetDictionary).
-
-     The dictionary should consist of strings (byte sequences) that are likely
-   to be encountered later in the data to be compressed, with the most commonly
-   used strings preferably put towards the end of the dictionary. Using a
-   dictionary is most useful when the data to be compressed is short and can be
-   predicted with good accuracy; the data can then be compressed better than
-   with the default empty dictionary.
-
-     Depending on the size of the compression data structures selected by
-   deflateInit or deflateInit2, a part of the dictionary may in effect be
-   discarded, for example if the dictionary is larger than the window size in
-   deflate or deflate2. Thus the strings most likely to be useful should be
-   put at the end of the dictionary, not at the front.
-
-     Upon return of this function, strm->adler is set to the Adler32 value
-   of the dictionary; the decompressor may later use this value to determine
-   which dictionary has been used by the compressor. (The Adler32 value
-   applies to the whole dictionary even if only a subset of the dictionary is
-   actually used by the compressor.)
-
-     deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a
-   parameter is invalid (such as NULL dictionary) or the stream state is
-   inconsistent (for example if deflate has already been called for this stream
-   or if the compression method is bsort). deflateSetDictionary does not
-   perform any compression: this will be done by deflate().
-*/
-
-#if 0
-extern int zlib_deflateCopy (z_streamp dest, z_streamp source);
-#endif
-
-/*
-     Sets the destination stream as a complete copy of the source stream.
-
-     This function can be useful when several compression strategies will be
-   tried, for example when there are several ways of pre-processing the input
-   data with a filter. The streams that will be discarded should then be freed
-   by calling deflateEnd.  Note that deflateCopy duplicates the internal
-   compression state which can be quite large, so this strategy is slow and
-   can consume lots of memory.
-
-     deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not
-   enough memory, Z_STREAM_ERROR if the source stream state was inconsistent
-   (such as zalloc being NULL). msg is left unchanged in both source and
-   destination.
-*/
 
 extern int zlib_deflateReset (z_streamp strm);
 /*
@@ -568,27 +510,6 @@ static inline unsigned long deflateBound(unsigned long s)
 	return s + ((s + 7) >> 3) + ((s + 63) >> 6) + 11;
 }
 
-#if 0
-extern int zlib_deflateParams (z_streamp strm, int level, int strategy);
-#endif
-/*
-     Dynamically update the compression level and compression strategy.  The
-   interpretation of level and strategy is as in deflateInit2.  This can be
-   used to switch between compression and straight copy of the input data, or
-   to switch to a different kind of input data requiring a different
-   strategy. If the compression level is changed, the input available so far
-   is compressed with the old level (and may be flushed); the new level will
-   take effect only at the next call of deflate().
-
-     Before the call of deflateParams, the stream state must be set as for
-   a call of deflate(), since the currently available input may have to
-   be compressed and flushed. In particular, strm->avail_out must be non-zero.
-
-     deflateParams returns Z_OK if success, Z_STREAM_ERROR if the source
-   stream state was inconsistent or if a parameter was invalid, Z_BUF_ERROR
-   if strm->avail_out was zero.
-*/
-
 /*   
 extern int inflateInit2 (z_streamp strm, int  windowBits);
 
@@ -631,45 +552,6 @@ extern int inflateInit2 (z_streamp strm, int  windowBits);
    and avail_out are unchanged.)
 */
 
-extern int zlib_inflateSetDictionary (z_streamp strm,
-						     const Byte *dictionary,
-						     uInt  dictLength);
-/*
-     Initializes the decompression dictionary from the given uncompressed byte
-   sequence. This function must be called immediately after a call of inflate,
-   if that call returned Z_NEED_DICT. The dictionary chosen by the compressor
-   can be determined from the adler32 value returned by that call of inflate.
-   The compressor and decompressor must use exactly the same dictionary (see
-   deflateSetDictionary).  For raw inflate, this function can be called
-   immediately after inflateInit2() or inflateReset() and before any call of
-   inflate() to set the dictionary.  The application must insure that the
-   dictionary that was used for compression is provided.
-
-     inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a
-   parameter is invalid (such as NULL dictionary) or the stream state is
-   inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the
-   expected one (incorrect adler32 value). inflateSetDictionary does not
-   perform any decompression: this will be done by subsequent calls of
-   inflate().
-*/
-
-#if 0
-extern int zlib_inflateSync (z_streamp strm);
-#endif
-/* 
-    Skips invalid compressed data until a full flush point (see above the
-  description of deflate with Z_FULL_FLUSH) can be found, or until all
-  available input is skipped. No output is provided.
-
-    inflateSync returns Z_OK if a full flush point has been found, Z_BUF_ERROR
-  if no more input was provided, Z_DATA_ERROR if no flush point has been found,
-  or Z_STREAM_ERROR if the stream structure was inconsistent. In the success
-  case, the application may save the current current value of total_in which
-  indicates where valid compressed data was found. In the error case, the
-  application may repeatedly call inflateSync, providing more input each time,
-  until success or end of the input data.
-*/
-
 extern int zlib_inflateReset (z_streamp strm);
 /*
      This function is equivalent to inflateEnd followed by inflateInit,
diff --git a/lib/zlib_deflate/deflate.c b/lib/zlib_deflate/deflate.c
index d63381e8e333..d20ef458f137 100644
--- a/lib/zlib_deflate/deflate.c
+++ b/lib/zlib_deflate/deflate.c
@@ -249,52 +249,6 @@ int zlib_deflateInit2(
     return zlib_deflateReset(strm);
 }
 
-/* ========================================================================= */
-#if 0
-int zlib_deflateSetDictionary(
-	z_streamp strm,
-	const Byte *dictionary,
-	uInt  dictLength
-)
-{
-    deflate_state *s;
-    uInt length = dictLength;
-    uInt n;
-    IPos hash_head = 0;
-
-    if (strm == NULL || strm->state == NULL || dictionary == NULL)
-	return Z_STREAM_ERROR;
-
-    s = (deflate_state *) strm->state;
-    if (s->status != INIT_STATE) return Z_STREAM_ERROR;
-
-    strm->adler = zlib_adler32(strm->adler, dictionary, dictLength);
-
-    if (length < MIN_MATCH) return Z_OK;
-    if (length > MAX_DIST(s)) {
-	length = MAX_DIST(s);
-#ifndef USE_DICT_HEAD
-	dictionary += dictLength - length; /* use the tail of the dictionary */
-#endif
-    }
-    memcpy((char *)s->window, dictionary, length);
-    s->strstart = length;
-    s->block_start = (long)length;
-
-    /* Insert all strings in the hash table (except for the last two bytes).
-     * s->lookahead stays null, so s->ins_h will be recomputed at the next
-     * call of fill_window.
-     */
-    s->ins_h = s->window[0];
-    UPDATE_HASH(s, s->ins_h, s->window[1]);
-    for (n = 0; n <= length - MIN_MATCH; n++) {
-	INSERT_STRING(s, n, hash_head);
-    }
-    if (hash_head) hash_head = 0;  /* to make compiler happy */
-    return Z_OK;
-}
-#endif  /*  0  */
-
 /* ========================================================================= */
 int zlib_deflateReset(
 	z_streamp strm
@@ -326,45 +280,6 @@ int zlib_deflateReset(
     return Z_OK;
 }
 
-/* ========================================================================= */
-#if 0
-int zlib_deflateParams(
-	z_streamp strm,
-	int level,
-	int strategy
-)
-{
-    deflate_state *s;
-    compress_func func;
-    int err = Z_OK;
-
-    if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
-    s = (deflate_state *) strm->state;
-
-    if (level == Z_DEFAULT_COMPRESSION) {
-	level = 6;
-    }
-    if (level < 0 || level > 9 || strategy < 0 || strategy > Z_HUFFMAN_ONLY) {
-	return Z_STREAM_ERROR;
-    }
-    func = configuration_table[s->level].func;
-
-    if (func != configuration_table[level].func && strm->total_in != 0) {
-	/* Flush the last buffer: */
-	err = zlib_deflate(strm, Z_PARTIAL_FLUSH);
-    }
-    if (s->level != level) {
-	s->level = level;
-	s->max_lazy_match   = configuration_table[level].max_lazy;
-	s->good_match       = configuration_table[level].good_length;
-	s->nice_match       = configuration_table[level].nice_length;
-	s->max_chain_length = configuration_table[level].max_chain;
-    }
-    s->strategy = strategy;
-    return err;
-}
-#endif  /*  0  */
-
 /* =========================================================================
  * Put a short in the pending buffer. The 16-bit value is put in MSB order.
  * IN assertion: the stream state is correct and there is enough room in
@@ -568,64 +483,6 @@ int zlib_deflateEnd(
     return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK;
 }
 
-/* =========================================================================
- * Copy the source state to the destination state.
- */
-#if 0
-int zlib_deflateCopy (
-	z_streamp dest,
-	z_streamp source
-)
-{
-#ifdef MAXSEG_64K
-    return Z_STREAM_ERROR;
-#else
-    deflate_state *ds;
-    deflate_state *ss;
-    ush *overlay;
-    deflate_workspace *mem;
-
-
-    if (source == NULL || dest == NULL || source->state == NULL) {
-        return Z_STREAM_ERROR;
-    }
-
-    ss = (deflate_state *) source->state;
-
-    *dest = *source;
-
-    mem = (deflate_workspace *) dest->workspace;
-
-    ds = &(mem->deflate_memory);
-
-    dest->state = (struct internal_state *) ds;
-    *ds = *ss;
-    ds->strm = dest;
-
-    ds->window = (Byte *) mem->window_memory;
-    ds->prev   = (Pos *)  mem->prev_memory;
-    ds->head   = (Pos *)  mem->head_memory;
-    overlay = (ush *) mem->overlay_memory;
-    ds->pending_buf = (uch *) overlay;
-
-    memcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(Byte));
-    memcpy(ds->prev, ss->prev, ds->w_size * sizeof(Pos));
-    memcpy(ds->head, ss->head, ds->hash_size * sizeof(Pos));
-    memcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size);
-
-    ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf);
-    ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush);
-    ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize;
-
-    ds->l_desc.dyn_tree = ds->dyn_ltree;
-    ds->d_desc.dyn_tree = ds->dyn_dtree;
-    ds->bl_desc.dyn_tree = ds->bl_tree;
-
-    return Z_OK;
-#endif
-}
-#endif  /*  0  */
-
 /* ===========================================================================
  * Read a new buffer from the current input stream, update the adler32
  * and total number of bytes read.  All deflate() input goes through
diff --git a/lib/zlib_inflate/inflate.c b/lib/zlib_inflate/inflate.c
index f5ce87b0800e..58a733b10387 100644
--- a/lib/zlib_inflate/inflate.c
+++ b/lib/zlib_inflate/inflate.c
@@ -45,21 +45,6 @@ int zlib_inflateReset(z_streamp strm)
     return Z_OK;
 }
 
-#if 0
-int zlib_inflatePrime(z_streamp strm, int bits, int value)
-{
-    struct inflate_state *state;
-
-    if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
-    state = (struct inflate_state *)strm->state;
-    if (bits > 16 || state->bits + bits > 32) return Z_STREAM_ERROR;
-    value &= (1L << bits) - 1;
-    state->hold += value << state->bits;
-    state->bits += bits;
-    return Z_OK;
-}
-#endif
-
 int zlib_inflateInit2(z_streamp strm, int windowBits)
 {
     struct inflate_state *state;
@@ -761,123 +746,6 @@ int zlib_inflateEnd(z_streamp strm)
     return Z_OK;
 }
 
-#if 0
-int zlib_inflateSetDictionary(z_streamp strm, const Byte *dictionary,
-        uInt dictLength)
-{
-    struct inflate_state *state;
-    unsigned long id;
-
-    /* check state */
-    if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
-    state = (struct inflate_state *)strm->state;
-    if (state->wrap != 0 && state->mode != DICT)
-        return Z_STREAM_ERROR;
-
-    /* check for correct dictionary id */
-    if (state->mode == DICT) {
-        id = zlib_adler32(0L, NULL, 0);
-        id = zlib_adler32(id, dictionary, dictLength);
-        if (id != state->check)
-            return Z_DATA_ERROR;
-    }
-
-    /* copy dictionary to window */
-    zlib_updatewindow(strm, strm->avail_out);
-
-    if (dictLength > state->wsize) {
-        memcpy(state->window, dictionary + dictLength - state->wsize,
-                state->wsize);
-        state->whave = state->wsize;
-    }
-    else {
-        memcpy(state->window + state->wsize - dictLength, dictionary,
-                dictLength);
-        state->whave = dictLength;
-    }
-    state->havedict = 1;
-    return Z_OK;
-}
-#endif
-
-#if 0
-/*
-   Search buf[0..len-1] for the pattern: 0, 0, 0xff, 0xff.  Return when found
-   or when out of input.  When called, *have is the number of pattern bytes
-   found in order so far, in 0..3.  On return *have is updated to the new
-   state.  If on return *have equals four, then the pattern was found and the
-   return value is how many bytes were read including the last byte of the
-   pattern.  If *have is less than four, then the pattern has not been found
-   yet and the return value is len.  In the latter case, zlib_syncsearch() can be
-   called again with more data and the *have state.  *have is initialized to
-   zero for the first call.
- */
-static unsigned zlib_syncsearch(unsigned *have, unsigned char *buf,
-        unsigned len)
-{
-    unsigned got;
-    unsigned next;
-
-    got = *have;
-    next = 0;
-    while (next < len && got < 4) {
-        if ((int)(buf[next]) == (got < 2 ? 0 : 0xff))
-            got++;
-        else if (buf[next])
-            got = 0;
-        else
-            got = 4 - got;
-        next++;
-    }
-    *have = got;
-    return next;
-}
-#endif
-
-#if 0
-int zlib_inflateSync(z_streamp strm)
-{
-    unsigned len;               /* number of bytes to look at or looked at */
-    unsigned long in, out;      /* temporary to save total_in and total_out */
-    unsigned char buf[4];       /* to restore bit buffer to byte string */
-    struct inflate_state *state;
-
-    /* check parameters */
-    if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
-    state = (struct inflate_state *)strm->state;
-    if (strm->avail_in == 0 && state->bits < 8) return Z_BUF_ERROR;
-
-    /* if first time, start search in bit buffer */
-    if (state->mode != SYNC) {
-        state->mode = SYNC;
-        state->hold <<= state->bits & 7;
-        state->bits -= state->bits & 7;
-        len = 0;
-        while (state->bits >= 8) {
-            buf[len++] = (unsigned char)(state->hold);
-            state->hold >>= 8;
-            state->bits -= 8;
-        }
-        state->have = 0;
-        zlib_syncsearch(&(state->have), buf, len);
-    }
-
-    /* search available input */
-    len = zlib_syncsearch(&(state->have), strm->next_in, strm->avail_in);
-    strm->avail_in -= len;
-    strm->next_in += len;
-    strm->total_in += len;
-
-    /* return no joy or set up to restart inflate() on a new block */
-    if (state->have != 4) return Z_DATA_ERROR;
-    in = strm->total_in;  out = strm->total_out;
-    zlib_inflateReset(strm);
-    strm->total_in = in;  strm->total_out = out;
-    state->mode = TYPE;
-    return Z_OK;
-}
-#endif
-
 /*
  * This subroutine adds the data at next_in/avail_in to the output history
  * without performing any output.  The output buffer must be "caught up";
-- 
cgit v1.2.3-59-g8ed1b


From b01250856b25f4417c51aa33afc451fbf7da1484 Mon Sep 17 00:00:00 2001
From: George Spelvin <linux@horizon.com>
Date: Wed, 6 Aug 2014 16:09:23 -0700
Subject: lib: add lib/glob.c

This is a helper function from drivers/ata/libata_core.c, where it is
used to blacklist particular device models.  It's being moved to lib/ so
other drivers may use it for the same purpose.

This implementation in non-recursive, so is safe for the kernel stack.

[akpm@linux-foundation.org: fix sparse warning]
Signed-off-by: George Spelvin <linux@horizon.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/glob.h |   9 ++++
 lib/Kconfig          |  19 ++++++++
 lib/Makefile         |   2 +
 lib/glob.c           | 123 +++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 153 insertions(+)
 create mode 100644 include/linux/glob.h
 create mode 100644 lib/glob.c

(limited to 'include/linux')

diff --git a/include/linux/glob.h b/include/linux/glob.h
new file mode 100644
index 000000000000..861d8347d08e
--- /dev/null
+++ b/include/linux/glob.h
@@ -0,0 +1,9 @@
+#ifndef _LINUX_GLOB_H
+#define _LINUX_GLOB_H
+
+#include <linux/types.h>	/* For bool */
+#include <linux/compiler.h>	/* For __pure */
+
+bool __pure glob_match(char const *pat, char const *str);
+
+#endif	/* _LINUX_GLOB_H */
diff --git a/lib/Kconfig b/lib/Kconfig
index a8a775730c09..41bfeec72e40 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -396,6 +396,25 @@ config CPU_RMAP
 config DQL
 	bool
 
+config GLOB
+	bool
+#	This actually supports modular compilation, but the module overhead
+#	is ridiculous for the amount of code involved.	Until an out-of-tree
+#	driver asks for it, we'll just link it directly it into the kernel
+#	when required.  Since we're ignoring out-of-tree users,	there's also
+#	no need bother prompting for a manual decision:
+#	prompt "glob_match() function"
+	help
+	  This option provides a glob_match function for performing
+	  simple text pattern matching.  It originated in the ATA code
+	  to blacklist particular drive models, but other device drivers
+	  may need similar functionality.
+
+	  All drivers in the Linux kernel tree that require this function
+	  should automatically select this option.  Say N unless you
+	  are compiling an out-of tree driver which tells you that it
+	  depends on this.
+
 #
 # Netlink attribute parsing support is select'ed if needed
 #
diff --git a/lib/Makefile b/lib/Makefile
index 8427df95dade..d6b4bc496408 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -137,6 +137,8 @@ obj-$(CONFIG_CORDIC) += cordic.o
 
 obj-$(CONFIG_DQL) += dynamic_queue_limits.o
 
+obj-$(CONFIG_GLOB) += glob.o
+
 obj-$(CONFIG_MPILIB) += mpi/
 obj-$(CONFIG_SIGNATURE) += digsig.o
 
diff --git a/lib/glob.c b/lib/glob.c
new file mode 100644
index 000000000000..0ba3ea86b546
--- /dev/null
+++ b/lib/glob.c
@@ -0,0 +1,123 @@
+#include <linux/module.h>
+#include <linux/glob.h>
+
+/*
+ * The only reason this code can be compiled as a module is because the
+ * ATA code that depends on it can be as well.  In practice, they're
+ * both usually compiled in and the module overhead goes away.
+ */
+MODULE_DESCRIPTION("glob(7) matching");
+MODULE_LICENSE("Dual MIT/GPL");
+
+/**
+ * glob_match - Shell-style pattern matching, like !fnmatch(pat, str, 0)
+ * @pat: Shell-style pattern to match, e.g. "*.[ch]".
+ * @str: String to match.  The pattern must match the entire string.
+ *
+ * Perform shell-style glob matching, returning true (1) if the match
+ * succeeds, or false (0) if it fails.  Equivalent to !fnmatch(@pat, @str, 0).
+ *
+ * Pattern metacharacters are ?, *, [ and \.
+ * (And, inside character classes, !, - and ].)
+ *
+ * This is small and simple implementation intended for device blacklists
+ * where a string is matched against a number of patterns.  Thus, it
+ * does not preprocess the patterns.  It is non-recursive, and run-time
+ * is at most quadratic: strlen(@str)*strlen(@pat).
+ *
+ * An example of the worst case is glob_match("*aaaaa", "aaaaaaaaaa");
+ * it takes 6 passes over the pattern before matching the string.
+ *
+ * Like !fnmatch(@pat, @str, 0) and unlike the shell, this does NOT
+ * treat / or leading . specially; it isn't actually used for pathnames.
+ *
+ * Note that according to glob(7) (and unlike bash), character classes
+ * are complemented by a leading !; this does not support the regex-style
+ * [^a-z] syntax.
+ *
+ * An opening bracket without a matching close is matched literally.
+ */
+bool __pure glob_match(char const *pat, char const *str)
+{
+	/*
+	 * Backtrack to previous * on mismatch and retry starting one
+	 * character later in the string.  Because * matches all characters
+	 * (no exception for /), it can be easily proved that there's
+	 * never a need to backtrack multiple levels.
+	 */
+	char const *back_pat = NULL, *back_str = back_str;
+
+	/*
+	 * Loop over each token (character or class) in pat, matching
+	 * it against the remaining unmatched tail of str.  Return false
+	 * on mismatch, or true after matching the trailing nul bytes.
+	 */
+	for (;;) {
+		unsigned char c = *str++;
+		unsigned char d = *pat++;
+
+		switch (d) {
+		case '?':	/* Wildcard: anything but nul */
+			if (c == '\0')
+				return false;
+			break;
+		case '*':	/* Any-length wildcard */
+			if (*pat == '\0')	/* Optimize trailing * case */
+				return true;
+			back_pat = pat;
+			back_str = --str;	/* Allow zero-length match */
+			break;
+		case '[': {	/* Character class */
+			bool match = false, inverted = (*pat == '!');
+			char const *class = pat + inverted;
+			unsigned char a = *class++;
+
+			/*
+			 * Iterate over each span in the character class.
+			 * A span is either a single character a, or a
+			 * range a-b.  The first span may begin with ']'.
+			 */
+			do {
+				unsigned char b = a;
+
+				if (a == '\0')	/* Malformed */
+					goto literal;
+
+				if (class[0] == '-' && class[1] != ']') {
+					b = class[1];
+
+					if (b == '\0')
+						goto literal;
+
+					class += 2;
+					/* Any special action if a > b? */
+				}
+				match |= (a <= c && c <= b);
+			} while ((a = *class++) != ']');
+
+			if (match == inverted)
+				goto backtrack;
+			pat = class;
+			}
+			break;
+		case '\\':
+			d = *pat++;
+			/*FALLTHROUGH*/
+		default:	/* Literal character */
+literal:
+			if (c == d) {
+				if (d == '\0')
+					return true;
+				break;
+			}
+backtrack:
+			if (c == '\0' || !back_pat)
+				return false;	/* No point continuing */
+			/* Try again from last *, one character later in str. */
+			pat = back_pat;
+			str = ++back_str;
+			break;
+		}
+	}
+}
+EXPORT_SYMBOL(glob_match);
-- 
cgit v1.2.3-59-g8ed1b


From 087face5265026d4fe664bdb580f4904bd10cfbf Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 6 Aug 2014 16:09:36 -0700
Subject: kernel.h: remove deprecated pack_hex_byte

It's been nearly 3 years now since commit 55036ba76b2d ("lib: rename
pack_hex_byte() to hex_byte_pack()") so it's time to remove this
deprecated and unused static inline.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index a9e2268ecccb..3dc22abbc68a 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -493,11 +493,6 @@ static inline char *hex_byte_pack_upper(char *buf, u8 byte)
 	return buf;
 }
 
-static inline char * __deprecated pack_hex_byte(char *buf, u8 byte)
-{
-	return hex_byte_pack(buf, byte);
-}
-
 extern int hex_to_bin(char ch);
 extern int __must_check hex2bin(u8 *dst, const char *src, size_t count);
 
-- 
cgit v1.2.3-59-g8ed1b


From 0679cc483669d08153d158273455398a389ee9ca Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Wed, 6 Aug 2014 16:09:49 -0700
Subject: lib: bitmap: make nbits parameter of bitmap_empty unsigned

Many functions in lib/bitmap.c start with an expression such as lim =
bits/BITS_PER_LONG.  Since bits has type (signed) int, and since gcc
cannot know that it is in fact non-negative, it generates worse code
than it could.  These patches, mostly consisting of changing various
parameters to unsigned, gives a slight overall code reduction:

  add/remove: 1/1 grow/shrink: 8/16 up/down: 251/-414 (-163)
  function                                     old     new   delta
  tick_device_uses_broadcast                   335     425     +90
  __irq_alloc_descs                            498     554     +56
  __bitmap_andnot                               73     115     +42
  __bitmap_and                                  70     101     +31
  bitmap_weight                                  -      11     +11
  copy_hugetlb_page_range                      752     762     +10
  follow_hugetlb_page                          846     854      +8
  hugetlb_init                                1415    1417      +2
  hugetlb_nrpages_setup                        130     131      +1
  hugetlb_add_hstate                           377     376      -1
  bitmap_allocate_region                        82      80      -2
  select_task_rq_fair                         2202    2191     -11
  hweight_long                                  66      55     -11
  __reg_op                                     230     219     -11
  dm_stats_message                            2849    2833     -16
  bitmap_parselist                              92      74     -18
  __bitmap_weight                              115      97     -18
  __bitmap_subset                              153     129     -24
  __bitmap_full                                128     104     -24
  __bitmap_empty                               120      96     -24
  bitmap_set                                   179     149     -30
  bitmap_clear                                 185     155     -30
  __bitmap_equal                               136     105     -31
  __bitmap_intersects                          148     108     -40
  __bitmap_complement                          109      67     -42
  tick_device_setup_broadcast_func.isra         81       -     -81

[The increases in __bitmap_and{,not} are due to bug fixes 17/18,18/18.
No idea why bitmap_weight suddenly appears.] While 163 bytes treewide is
insignificant, I believe the bitmap functions are often called with
locks held, so saving even a few cycles might be worth it.

While making these changes, I found a few other things that might be
worth including.  16,17,18 are actual bug fixes.  The rest shouldn't
change the behaviour of any of the functions, provided no-one passed
negative nbits values.  If something should come up, it should be fairly
bisectable.

A few issues I thought about, but didn't know what to do with:

* Many of the functions misbehave if nbits is compile-time 0; the
  out-of-line functions generally handle 0 correctly.  bitmap_fill() is
  particularly bad, whether the 0 is known at compile time or not.  It
  would probably be nice to add detection of at least compile-time 0 and
  handle that appropriately.

* I didn't change __bitmap_shift_{left,right} to use unsigned because I
  want to fully understand why the algorithm works before making that
  change.  However, AFAICT, they behave correctly for all (positive) shift
  amounts.  This is not the case for the small_const_nbits versions.  If
  for example nbits = n = BITS_PER_LONG, the shift operators turn into
  no-ops (at least on x86), so one get *dst = *src, whereas one would
  expect to get *dst=0.  That difference in behaviour is somewhat
  annoying.

This patch (of 18):

The compiler can generate slightly smaller and simpler code when it
knows that "nbits" is non-negative.  Since no-one passes a negative
bit-count, this shouldn't affect the semantics.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h | 4 ++--
 lib/bitmap.c           | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 7ad634501e48..3d3fd6b2f157 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -88,7 +88,7 @@
  * lib/bitmap.c provides these functions:
  */
 
-extern int __bitmap_empty(const unsigned long *bitmap, int bits);
+extern int __bitmap_empty(const unsigned long *bitmap, unsigned int nbits);
 extern int __bitmap_full(const unsigned long *bitmap, int bits);
 extern int __bitmap_equal(const unsigned long *bitmap1,
                 	const unsigned long *bitmap2, int bits);
@@ -257,7 +257,7 @@ static inline int bitmap_subset(const unsigned long *src1,
 		return __bitmap_subset(src1, src2, nbits);
 }
 
-static inline int bitmap_empty(const unsigned long *src, int nbits)
+static inline int bitmap_empty(const unsigned long *src, unsigned nbits)
 {
 	if (small_const_nbits(nbits))
 		return ! (*src & BITMAP_LAST_WORD_MASK(nbits));
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 06f7e4fe8d2d..378911001442 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -40,9 +40,9 @@
  * for the best explanations of this ordering.
  */
 
-int __bitmap_empty(const unsigned long *bitmap, int bits)
+int __bitmap_empty(const unsigned long *bitmap, unsigned int bits)
 {
-	int k, lim = bits/BITS_PER_LONG;
+	unsigned int k, lim = bits/BITS_PER_LONG;
 	for (k = 0; k < lim; ++k)
 		if (bitmap[k])
 			return 0;
-- 
cgit v1.2.3-59-g8ed1b


From 8397927c8045c58afc68ef839855eb5505259df3 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Wed, 6 Aug 2014 16:09:51 -0700
Subject: lib: bitmap: make nbits parameter of bitmap_full unsigned

The compiler can generate slightly smaller and simpler code when it
knows that "nbits" is non-negative.  Since no-one passes a negative
bit-count, this shouldn't affect the semantics.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h | 4 ++--
 lib/bitmap.c           | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 3d3fd6b2f157..bc7e520d3f78 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -89,7 +89,7 @@
  */
 
 extern int __bitmap_empty(const unsigned long *bitmap, unsigned int nbits);
-extern int __bitmap_full(const unsigned long *bitmap, int bits);
+extern int __bitmap_full(const unsigned long *bitmap, unsigned int nbits);
 extern int __bitmap_equal(const unsigned long *bitmap1,
                 	const unsigned long *bitmap2, int bits);
 extern void __bitmap_complement(unsigned long *dst, const unsigned long *src,
@@ -265,7 +265,7 @@ static inline int bitmap_empty(const unsigned long *src, unsigned nbits)
 		return __bitmap_empty(src, nbits);
 }
 
-static inline int bitmap_full(const unsigned long *src, int nbits)
+static inline int bitmap_full(const unsigned long *src, unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
 		return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits));
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 378911001442..9859f38660f9 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -55,9 +55,9 @@ int __bitmap_empty(const unsigned long *bitmap, unsigned int bits)
 }
 EXPORT_SYMBOL(__bitmap_empty);
 
-int __bitmap_full(const unsigned long *bitmap, int bits)
+int __bitmap_full(const unsigned long *bitmap, unsigned int bits)
 {
-	int k, lim = bits/BITS_PER_LONG;
+	unsigned int k, lim = bits/BITS_PER_LONG;
 	for (k = 0; k < lim; ++k)
 		if (~bitmap[k])
 			return 0;
-- 
cgit v1.2.3-59-g8ed1b


From 5e068069319a9fb02fb14337c2cedeae5f16d812 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Wed, 6 Aug 2014 16:09:53 -0700
Subject: lib: bitmap: make nbits parameter of bitmap_equal unsigned

The compiler can generate slightly smaller and simpler code when it
knows that "nbits" is non-negative.  Since no-one passes a negative
bit-count, this shouldn't affect the semantics.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h | 2 +-
 lib/bitmap.c           | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index bc7e520d3f78..1e0f46c91125 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -91,7 +91,7 @@
 extern int __bitmap_empty(const unsigned long *bitmap, unsigned int nbits);
 extern int __bitmap_full(const unsigned long *bitmap, unsigned int nbits);
 extern int __bitmap_equal(const unsigned long *bitmap1,
-                	const unsigned long *bitmap2, int bits);
+			  const unsigned long *bitmap2, unsigned int nbits);
 extern void __bitmap_complement(unsigned long *dst, const unsigned long *src,
 			int bits);
 extern void __bitmap_shift_right(unsigned long *dst,
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 9859f38660f9..d6bb955e71cb 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -71,9 +71,9 @@ int __bitmap_full(const unsigned long *bitmap, unsigned int bits)
 EXPORT_SYMBOL(__bitmap_full);
 
 int __bitmap_equal(const unsigned long *bitmap1,
-		const unsigned long *bitmap2, int bits)
+		const unsigned long *bitmap2, unsigned int bits)
 {
-	int k, lim = bits/BITS_PER_LONG;
+	unsigned int k, lim = bits/BITS_PER_LONG;
 	for (k = 0; k < lim; ++k)
 		if (bitmap1[k] != bitmap2[k])
 			return 0;
-- 
cgit v1.2.3-59-g8ed1b


From 3d6684f4e6a46f3a8263f5681e093bccbb767a1c Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Wed, 6 Aug 2014 16:09:55 -0700
Subject: lib: bitmap: make nbits parameter of bitmap_complement unsigned

The compiler can generate slightly smaller and simpler code when it
knows that "nbits" is non-negative.  Since no-one passes a negative
bit-count, this shouldn't affect the semantics.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h | 6 +++---
 lib/bitmap.c           | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 1e0f46c91125..21fb52ffe444 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -93,7 +93,7 @@ extern int __bitmap_full(const unsigned long *bitmap, unsigned int nbits);
 extern int __bitmap_equal(const unsigned long *bitmap1,
 			  const unsigned long *bitmap2, unsigned int nbits);
 extern void __bitmap_complement(unsigned long *dst, const unsigned long *src,
-			int bits);
+			unsigned int nbits);
 extern void __bitmap_shift_right(unsigned long *dst,
                         const unsigned long *src, int shift, int bits);
 extern void __bitmap_shift_left(unsigned long *dst,
@@ -222,7 +222,7 @@ static inline int bitmap_andnot(unsigned long *dst, const unsigned long *src1,
 }
 
 static inline void bitmap_complement(unsigned long *dst, const unsigned long *src,
-			int nbits)
+			unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
 		*dst = ~(*src) & BITMAP_LAST_WORD_MASK(nbits);
@@ -231,7 +231,7 @@ static inline void bitmap_complement(unsigned long *dst, const unsigned long *sr
 }
 
 static inline int bitmap_equal(const unsigned long *src1,
-			const unsigned long *src2, int nbits)
+			const unsigned long *src2, unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
 		return ! ((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits));
diff --git a/lib/bitmap.c b/lib/bitmap.c
index d6bb955e71cb..0f2f845702eb 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -86,9 +86,9 @@ int __bitmap_equal(const unsigned long *bitmap1,
 }
 EXPORT_SYMBOL(__bitmap_equal);
 
-void __bitmap_complement(unsigned long *dst, const unsigned long *src, int bits)
+void __bitmap_complement(unsigned long *dst, const unsigned long *src, unsigned int bits)
 {
-	int k, lim = bits/BITS_PER_LONG;
+	unsigned int k, lim = bits/BITS_PER_LONG;
 	for (k = 0; k < lim; ++k)
 		dst[k] = ~src[k];
 
-- 
cgit v1.2.3-59-g8ed1b


From 65b4ee62c9cd10640f0054f47fd84c7920e8c118 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Wed, 6 Aug 2014 16:09:57 -0700
Subject: lib: bitmap: remove unnecessary mask from bitmap_complement

Since the extra bits are "don't care", there is no reason to mask the
last word to the used bits when complementing.  This shaves off yet a
few bytes.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h | 2 +-
 lib/bitmap.c           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 21fb52ffe444..f42d72d5fe82 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -225,7 +225,7 @@ static inline void bitmap_complement(unsigned long *dst, const unsigned long *sr
 			unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
-		*dst = ~(*src) & BITMAP_LAST_WORD_MASK(nbits);
+		*dst = ~(*src);
 	else
 		__bitmap_complement(dst, src, nbits);
 }
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 0f2f845702eb..4387e3c092fd 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -93,7 +93,7 @@ void __bitmap_complement(unsigned long *dst, const unsigned long *src, unsigned
 		dst[k] = ~src[k];
 
 	if (bits % BITS_PER_LONG)
-		dst[k] = ~src[k] & BITMAP_LAST_WORD_MASK(bits);
+		dst[k] = ~src[k];
 }
 EXPORT_SYMBOL(__bitmap_complement);
 
-- 
cgit v1.2.3-59-g8ed1b


From 2f9305eb31097fdd3dc86daca65d8097d1fcf2ff Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Wed, 6 Aug 2014 16:09:59 -0700
Subject: lib: bitmap: make nbits parameter of bitmap_{and,or,xor,andnot}
 unsigned

This change is only for consistency with the changes to the other
bitmap_* functions; it doesn't change the size of the generated code:
inside BITS_TO_LONGS there is a sizeof(long), which causes bits to be
interpreted as unsigned anyway.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h | 16 ++++++++--------
 lib/bitmap.c           | 24 ++++++++++++------------
 2 files changed, 20 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index f42d72d5fe82..7048782fe5b9 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -99,13 +99,13 @@ extern void __bitmap_shift_right(unsigned long *dst,
 extern void __bitmap_shift_left(unsigned long *dst,
                         const unsigned long *src, int shift, int bits);
 extern int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
-			const unsigned long *bitmap2, int bits);
+			const unsigned long *bitmap2, unsigned int nbits);
 extern void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
-			const unsigned long *bitmap2, int bits);
+			const unsigned long *bitmap2, unsigned int nbits);
 extern void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
-			const unsigned long *bitmap2, int bits);
+			const unsigned long *bitmap2, unsigned int nbits);
 extern int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
-			const unsigned long *bitmap2, int bits);
+			const unsigned long *bitmap2, unsigned int nbits);
 extern int __bitmap_intersects(const unsigned long *bitmap1,
 			const unsigned long *bitmap2, int bits);
 extern int __bitmap_subset(const unsigned long *bitmap1,
@@ -188,7 +188,7 @@ static inline void bitmap_copy(unsigned long *dst, const unsigned long *src,
 }
 
 static inline int bitmap_and(unsigned long *dst, const unsigned long *src1,
-			const unsigned long *src2, int nbits)
+			const unsigned long *src2, unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
 		return (*dst = *src1 & *src2) != 0;
@@ -196,7 +196,7 @@ static inline int bitmap_and(unsigned long *dst, const unsigned long *src1,
 }
 
 static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
-			const unsigned long *src2, int nbits)
+			const unsigned long *src2, unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
 		*dst = *src1 | *src2;
@@ -205,7 +205,7 @@ static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
 }
 
 static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1,
-			const unsigned long *src2, int nbits)
+			const unsigned long *src2, unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
 		*dst = *src1 ^ *src2;
@@ -214,7 +214,7 @@ static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1,
 }
 
 static inline int bitmap_andnot(unsigned long *dst, const unsigned long *src1,
-			const unsigned long *src2, int nbits)
+			const unsigned long *src2, unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
 		return (*dst = *src1 & ~(*src2)) != 0;
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 4387e3c092fd..03207373cc5a 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -182,10 +182,10 @@ void __bitmap_shift_left(unsigned long *dst,
 EXPORT_SYMBOL(__bitmap_shift_left);
 
 int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
-				const unsigned long *bitmap2, int bits)
+				const unsigned long *bitmap2, unsigned int bits)
 {
-	int k;
-	int nr = BITS_TO_LONGS(bits);
+	unsigned int k;
+	unsigned int nr = BITS_TO_LONGS(bits);
 	unsigned long result = 0;
 
 	for (k = 0; k < nr; k++)
@@ -195,10 +195,10 @@ int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
 EXPORT_SYMBOL(__bitmap_and);
 
 void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
-				const unsigned long *bitmap2, int bits)
+				const unsigned long *bitmap2, unsigned int bits)
 {
-	int k;
-	int nr = BITS_TO_LONGS(bits);
+	unsigned int k;
+	unsigned int nr = BITS_TO_LONGS(bits);
 
 	for (k = 0; k < nr; k++)
 		dst[k] = bitmap1[k] | bitmap2[k];
@@ -206,10 +206,10 @@ void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
 EXPORT_SYMBOL(__bitmap_or);
 
 void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
-				const unsigned long *bitmap2, int bits)
+				const unsigned long *bitmap2, unsigned int bits)
 {
-	int k;
-	int nr = BITS_TO_LONGS(bits);
+	unsigned int k;
+	unsigned int nr = BITS_TO_LONGS(bits);
 
 	for (k = 0; k < nr; k++)
 		dst[k] = bitmap1[k] ^ bitmap2[k];
@@ -217,10 +217,10 @@ void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
 EXPORT_SYMBOL(__bitmap_xor);
 
 int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
-				const unsigned long *bitmap2, int bits)
+				const unsigned long *bitmap2, unsigned int bits)
 {
-	int k;
-	int nr = BITS_TO_LONGS(bits);
+	unsigned int k;
+	unsigned int nr = BITS_TO_LONGS(bits);
 	unsigned long result = 0;
 
 	for (k = 0; k < nr; k++)
-- 
cgit v1.2.3-59-g8ed1b


From 6dfe9799c2a03d225316a3e959b0447f3f50303e Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Wed, 6 Aug 2014 16:10:01 -0700
Subject: lib: bitmap: make nbits parameter of bitmap_intersects unsigned

The compiler can generate slightly smaller and simpler code when it
knows that "nbits" is non-negative.  Since no-one passes a negative
bit-count, this shouldn't affect the semantics.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h | 4 ++--
 lib/bitmap.c           | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 7048782fe5b9..2f3f3a4d5996 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -107,7 +107,7 @@ extern void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
 extern int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
 			const unsigned long *bitmap2, unsigned int nbits);
 extern int __bitmap_intersects(const unsigned long *bitmap1,
-			const unsigned long *bitmap2, int bits);
+			const unsigned long *bitmap2, unsigned int nbits);
 extern int __bitmap_subset(const unsigned long *bitmap1,
 			const unsigned long *bitmap2, int bits);
 extern int __bitmap_weight(const unsigned long *bitmap, int bits);
@@ -240,7 +240,7 @@ static inline int bitmap_equal(const unsigned long *src1,
 }
 
 static inline int bitmap_intersects(const unsigned long *src1,
-			const unsigned long *src2, int nbits)
+			const unsigned long *src2, unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
 		return ((*src1 & *src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0;
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 03207373cc5a..e85daa90b237 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -230,9 +230,9 @@ int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
 EXPORT_SYMBOL(__bitmap_andnot);
 
 int __bitmap_intersects(const unsigned long *bitmap1,
-				const unsigned long *bitmap2, int bits)
+			const unsigned long *bitmap2, unsigned int bits)
 {
-	int k, lim = bits/BITS_PER_LONG;
+	unsigned int k, lim = bits/BITS_PER_LONG;
 	for (k = 0; k < lim; ++k)
 		if (bitmap1[k] & bitmap2[k])
 			return 1;
-- 
cgit v1.2.3-59-g8ed1b


From 5be20213e855550de2b32fde6fc116f74bab86a6 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Wed, 6 Aug 2014 16:10:03 -0700
Subject: lib: bitmap: make nbits parameter of bitmap_subset unsigned

The compiler can generate slightly smaller and simpler code when it
knows that "nbits" is non-negative.  Since no-one passes a negative
bit-count, this shouldn't affect the semantics.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h | 4 ++--
 lib/bitmap.c           | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 2f3f3a4d5996..87e88f79def1 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -109,7 +109,7 @@ extern int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
 extern int __bitmap_intersects(const unsigned long *bitmap1,
 			const unsigned long *bitmap2, unsigned int nbits);
 extern int __bitmap_subset(const unsigned long *bitmap1,
-			const unsigned long *bitmap2, int bits);
+			const unsigned long *bitmap2, unsigned int nbits);
 extern int __bitmap_weight(const unsigned long *bitmap, int bits);
 
 extern void bitmap_set(unsigned long *map, int i, int len);
@@ -249,7 +249,7 @@ static inline int bitmap_intersects(const unsigned long *src1,
 }
 
 static inline int bitmap_subset(const unsigned long *src1,
-			const unsigned long *src2, int nbits)
+			const unsigned long *src2, unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
 		return ! ((*src1 & ~(*src2)) & BITMAP_LAST_WORD_MASK(nbits));
diff --git a/lib/bitmap.c b/lib/bitmap.c
index e85daa90b237..c9bff5379795 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -245,9 +245,9 @@ int __bitmap_intersects(const unsigned long *bitmap1,
 EXPORT_SYMBOL(__bitmap_intersects);
 
 int __bitmap_subset(const unsigned long *bitmap1,
-				const unsigned long *bitmap2, int bits)
+		    const unsigned long *bitmap2, unsigned int bits)
 {
-	int k, lim = bits/BITS_PER_LONG;
+	unsigned int k, lim = bits/BITS_PER_LONG;
 	for (k = 0; k < lim; ++k)
 		if (bitmap1[k] & ~bitmap2[k])
 			return 0;
-- 
cgit v1.2.3-59-g8ed1b


From 877d9f3b63ac2e5dbc51cbcdff156433f03b3a32 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Wed, 6 Aug 2014 16:10:05 -0700
Subject: lib: bitmap: make nbits parameter of bitmap_weight unsigned

The compiler can generate slightly smaller and simpler code when it
knows that "nbits" is non-negative.  Since no-one passes a negative
bit-count, this shouldn't affect the semantics.

I didn't change the return type, since that might change the semantics
of some expression containing a call to bitmap_weight(). Certainly an
int is capable of holding the result.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h | 4 ++--
 lib/bitmap.c           | 5 +++--
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 87e88f79def1..64b0ebe9f9a8 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -110,7 +110,7 @@ extern int __bitmap_intersects(const unsigned long *bitmap1,
 			const unsigned long *bitmap2, unsigned int nbits);
 extern int __bitmap_subset(const unsigned long *bitmap1,
 			const unsigned long *bitmap2, unsigned int nbits);
-extern int __bitmap_weight(const unsigned long *bitmap, int bits);
+extern int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits);
 
 extern void bitmap_set(unsigned long *map, int i, int len);
 extern void bitmap_clear(unsigned long *map, int start, int nr);
@@ -273,7 +273,7 @@ static inline int bitmap_full(const unsigned long *src, unsigned int nbits)
 		return __bitmap_full(src, nbits);
 }
 
-static inline int bitmap_weight(const unsigned long *src, int nbits)
+static inline int bitmap_weight(const unsigned long *src, unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
 		return hweight_long(*src & BITMAP_LAST_WORD_MASK(nbits));
diff --git a/lib/bitmap.c b/lib/bitmap.c
index c9bff5379795..f69435c23f9c 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -259,9 +259,10 @@ int __bitmap_subset(const unsigned long *bitmap1,
 }
 EXPORT_SYMBOL(__bitmap_subset);
 
-int __bitmap_weight(const unsigned long *bitmap, int bits)
+int __bitmap_weight(const unsigned long *bitmap, unsigned int bits)
 {
-	int k, w = 0, lim = bits/BITS_PER_LONG;
+	unsigned int k, lim = bits/BITS_PER_LONG;
+	int w = 0;
 
 	for (k = 0; k < lim; k++)
 		w += hweight_long(bitmap[k]);
-- 
cgit v1.2.3-59-g8ed1b


From fb5ac54263ef3fcb5c469a61e0ab6b06e45e2307 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Wed, 6 Aug 2014 16:10:07 -0700
Subject: lib: bitmap: make the start index of bitmap_set unsigned

The compiler can generate slightly smaller and simpler code when it
knows that "start" is non-negative.

Also, use the names "start" and "len" for the two parameters in both
header file and implementation, instead of the previous mix.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h |  2 +-
 lib/bitmap.c           | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 64b0ebe9f9a8..ad2c67d3583e 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -112,7 +112,7 @@ extern int __bitmap_subset(const unsigned long *bitmap1,
 			const unsigned long *bitmap2, unsigned int nbits);
 extern int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits);
 
-extern void bitmap_set(unsigned long *map, int i, int len);
+extern void bitmap_set(unsigned long *map, unsigned int start, int len);
 extern void bitmap_clear(unsigned long *map, int start, int nr);
 extern unsigned long bitmap_find_next_zero_area(unsigned long *map,
 					 unsigned long size,
diff --git a/lib/bitmap.c b/lib/bitmap.c
index f69435c23f9c..2a3a92fc3355 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -274,21 +274,21 @@ int __bitmap_weight(const unsigned long *bitmap, unsigned int bits)
 }
 EXPORT_SYMBOL(__bitmap_weight);
 
-void bitmap_set(unsigned long *map, int start, int nr)
+void bitmap_set(unsigned long *map, unsigned int start, int len)
 {
 	unsigned long *p = map + BIT_WORD(start);
-	const int size = start + nr;
+	const unsigned int size = start + len;
 	int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG);
 	unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start);
 
-	while (nr - bits_to_set >= 0) {
+	while (len - bits_to_set >= 0) {
 		*p |= mask_to_set;
-		nr -= bits_to_set;
+		len -= bits_to_set;
 		bits_to_set = BITS_PER_LONG;
 		mask_to_set = ~0UL;
 		p++;
 	}
-	if (nr) {
+	if (len) {
 		mask_to_set &= BITMAP_LAST_WORD_MASK(size);
 		*p |= mask_to_set;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 154f5e38f30f262025c8c2e825376f6eb51e8bcb Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Wed, 6 Aug 2014 16:10:10 -0700
Subject: lib: bitmap: make the start index of bitmap_clear unsigned

The compiler can generate slightly smaller and simpler code when it
knows that "start" is non-negative.

Also, use the names "start" and "len" for the two parameters for
consistency with bitmap_set.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h |  2 +-
 lib/bitmap.c           | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index ad2c67d3583e..83c1c7d25073 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -113,7 +113,7 @@ extern int __bitmap_subset(const unsigned long *bitmap1,
 extern int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits);
 
 extern void bitmap_set(unsigned long *map, unsigned int start, int len);
-extern void bitmap_clear(unsigned long *map, int start, int nr);
+extern void bitmap_clear(unsigned long *map, unsigned int start, int len);
 extern unsigned long bitmap_find_next_zero_area(unsigned long *map,
 					 unsigned long size,
 					 unsigned long start,
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 2a3a92fc3355..5d2540396300 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -295,21 +295,21 @@ void bitmap_set(unsigned long *map, unsigned int start, int len)
 }
 EXPORT_SYMBOL(bitmap_set);
 
-void bitmap_clear(unsigned long *map, int start, int nr)
+void bitmap_clear(unsigned long *map, unsigned int start, int len)
 {
 	unsigned long *p = map + BIT_WORD(start);
-	const int size = start + nr;
+	const unsigned int size = start + len;
 	int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
 	unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
 
-	while (nr - bits_to_clear >= 0) {
+	while (len - bits_to_clear >= 0) {
 		*p &= ~mask_to_clear;
-		nr -= bits_to_clear;
+		len -= bits_to_clear;
 		bits_to_clear = BITS_PER_LONG;
 		mask_to_clear = ~0UL;
 		p++;
 	}
-	if (nr) {
+	if (len) {
 		mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
 		*p &= ~mask_to_clear;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 9279d3286e10736766edcaf815ae10e00856e448 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Wed, 6 Aug 2014 16:10:16 -0700
Subject: lib: bitmap: change parameter of bitmap_*_region to unsigned

Changing the pos parameter of __reg_op to unsigned allows the compiler
to generate slightly smaller and simpler code.  Also update its callers
bitmap_*_region to receive and pass unsigned int.  The return types of
bitmap_find_free_region and bitmap_allocate_region are still int to
allow a negative error code to be returned.  An int is certainly capable
of representing any realistic return value.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h |  6 +++---
 lib/bitmap.c           | 12 ++++++------
 2 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 83c1c7d25073..210037833356 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -140,9 +140,9 @@ extern void bitmap_onto(unsigned long *dst, const unsigned long *orig,
 		const unsigned long *relmap, int bits);
 extern void bitmap_fold(unsigned long *dst, const unsigned long *orig,
 		int sz, int bits);
-extern int bitmap_find_free_region(unsigned long *bitmap, int bits, int order);
-extern void bitmap_release_region(unsigned long *bitmap, int pos, int order);
-extern int bitmap_allocate_region(unsigned long *bitmap, int pos, int order);
+extern int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order);
+extern void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order);
+extern int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order);
 extern void bitmap_copy_le(void *dst, const unsigned long *src, int nbits);
 extern int bitmap_ord_to_pos(const unsigned long *bitmap, int n, int bits);
 
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 2714df9f5cdb..c2f3807b3601 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -1042,7 +1042,7 @@ enum {
 	REG_OP_RELEASE,		/* clear all bits in region */
 };
 
-static int __reg_op(unsigned long *bitmap, int pos, int order, int reg_op)
+static int __reg_op(unsigned long *bitmap, unsigned int pos, int order, int reg_op)
 {
 	int nbits_reg;		/* number of bits in region */
 	int index;		/* index first long of region in bitmap */
@@ -1108,11 +1108,11 @@ done:
  * Return the bit offset in bitmap of the allocated region,
  * or -errno on failure.
  */
-int bitmap_find_free_region(unsigned long *bitmap, int bits, int order)
+int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order)
 {
-	int pos, end;		/* scans bitmap by regions of size order */
+	unsigned int pos, end;		/* scans bitmap by regions of size order */
 
-	for (pos = 0 ; (end = pos + (1 << order)) <= bits; pos = end) {
+	for (pos = 0 ; (end = pos + (1U << order)) <= bits; pos = end) {
 		if (!__reg_op(bitmap, pos, order, REG_OP_ISFREE))
 			continue;
 		__reg_op(bitmap, pos, order, REG_OP_ALLOC);
@@ -1133,7 +1133,7 @@ EXPORT_SYMBOL(bitmap_find_free_region);
  *
  * No return value.
  */
-void bitmap_release_region(unsigned long *bitmap, int pos, int order)
+void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order)
 {
 	__reg_op(bitmap, pos, order, REG_OP_RELEASE);
 }
@@ -1150,7 +1150,7 @@ EXPORT_SYMBOL(bitmap_release_region);
  * Return 0 on success, or %-EBUSY if specified region wasn't
  * free (not all bits were zero).
  */
-int bitmap_allocate_region(unsigned long *bitmap, int pos, int order)
+int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order)
 {
 	if (!__reg_op(bitmap, pos, order, REG_OP_ISFREE))
 		return -EBUSY;
-- 
cgit v1.2.3-59-g8ed1b


From c5341ec8904ebff50f365a2626da6ab525d63b9e Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Wed, 6 Aug 2014 16:10:20 -0700
Subject: lib: bitmap: add missing mask in bitmap_shift_right

There is no guarantee that *src does not contain garbage bits outside
the lower nbits, so we need to mask it before the shift-and-assign.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 210037833356..75df61d9ecfb 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -284,7 +284,7 @@ static inline void bitmap_shift_right(unsigned long *dst,
 			const unsigned long *src, int n, int nbits)
 {
 	if (small_const_nbits(nbits))
-		*dst = *src >> n;
+		*dst = (*src & BITMAP_LAST_WORD_MASK(nbits)) >> n;
 	else
 		__bitmap_shift_right(dst, src, n, nbits);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 7e5f97d1927f41affa21aa5b321865ceab1994ce Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Wed, 6 Aug 2014 16:10:22 -0700
Subject: lib: bitmap: add missing mask in bitmap_and

Apparently, bitmap_and is supposed to return whether the new bitmap is
empty.  But it didn't take potential garbage bits in the last word into
account.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h | 2 +-
 lib/bitmap.c           | 7 +++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 75df61d9ecfb..3399a9ecd991 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -191,7 +191,7 @@ static inline int bitmap_and(unsigned long *dst, const unsigned long *src1,
 			const unsigned long *src2, unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
-		return (*dst = *src1 & *src2) != 0;
+		return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
 	return __bitmap_and(dst, src1, src2, nbits);
 }
 
diff --git a/lib/bitmap.c b/lib/bitmap.c
index faaf7206d4cf..ce2ec80bf431 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -185,11 +185,14 @@ int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
 				const unsigned long *bitmap2, unsigned int bits)
 {
 	unsigned int k;
-	unsigned int nr = BITS_TO_LONGS(bits);
+	unsigned int lim = bits/BITS_PER_LONG;
 	unsigned long result = 0;
 
-	for (k = 0; k < nr; k++)
+	for (k = 0; k < lim; k++)
 		result |= (dst[k] = bitmap1[k] & bitmap2[k]);
+	if (bits % BITS_PER_LONG)
+		result |= (dst[k] = bitmap1[k] & bitmap2[k] &
+			   BITMAP_LAST_WORD_MASK(bits));
 	return result != 0;
 }
 EXPORT_SYMBOL(__bitmap_and);
-- 
cgit v1.2.3-59-g8ed1b


From 74e765319084bd2940a9612ada961f0f7385936c Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Wed, 6 Aug 2014 16:10:24 -0700
Subject: lib: bitmap: add missing mask in bitmap_andnot

Apparently, bitmap_andnot is supposed to return whether the new bitmap
is empty.  But it didn't take potential garbage bits in the last word
into account.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h | 2 +-
 lib/bitmap.c           | 7 +++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 3399a9ecd991..e1c8d080c427 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -217,7 +217,7 @@ static inline int bitmap_andnot(unsigned long *dst, const unsigned long *src1,
 			const unsigned long *src2, unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
-		return (*dst = *src1 & ~(*src2)) != 0;
+		return (*dst = *src1 & ~(*src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0;
 	return __bitmap_andnot(dst, src1, src2, nbits);
 }
 
diff --git a/lib/bitmap.c b/lib/bitmap.c
index ce2ec80bf431..1e031f2c9aba 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -223,11 +223,14 @@ int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
 				const unsigned long *bitmap2, unsigned int bits)
 {
 	unsigned int k;
-	unsigned int nr = BITS_TO_LONGS(bits);
+	unsigned int lim = bits/BITS_PER_LONG;
 	unsigned long result = 0;
 
-	for (k = 0; k < nr; k++)
+	for (k = 0; k < lim; k++)
 		result |= (dst[k] = bitmap1[k] & ~bitmap2[k]);
+	if (bits % BITS_PER_LONG)
+		result |= (dst[k] = bitmap1[k] & ~bitmap2[k] &
+			   BITMAP_LAST_WORD_MASK(bits));
 	return result != 0;
 }
 EXPORT_SYMBOL(__bitmap_andnot);
-- 
cgit v1.2.3-59-g8ed1b


From b3ea074fd3c798bee861aa076dc2f873461ae26f Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@nvidia.com>
Date: Mon, 4 Aug 2014 13:05:56 +0900
Subject: gpio: add missing includes in machine.h

linux/types.h and linux/list.h should be included so the typed used in
the header file are always properly declared.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Reported-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/gpio/machine.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/gpio/machine.h b/include/linux/gpio/machine.h
index b8ad87fab4ce..e2706140eaff 100644
--- a/include/linux/gpio/machine.h
+++ b/include/linux/gpio/machine.h
@@ -1,6 +1,9 @@
 #ifndef __LINUX_GPIO_MACHINE_H
 #define __LINUX_GPIO_MACHINE_H
 
+#include <linux/types.h>
+#include <linux/list.h>
+
 enum gpio_lookup_flags {
 	GPIO_ACTIVE_HIGH = (0 << 0),
 	GPIO_ACTIVE_LOW = (1 << 0),
-- 
cgit v1.2.3-59-g8ed1b


From ed44724b79d8e03a40665436019cf22baba80d30 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 19 Apr 2014 14:37:20 -0400
Subject: acct: switch to __kernel_write()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/internal.h      |  1 -
 include/linux/fs.h |  1 +
 kernel/acct.c      | 31 ++++++++++++-------------------
 3 files changed, 13 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/fs/internal.h b/fs/internal.h
index 465742407466..9a2edba87c2b 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -131,7 +131,6 @@ extern long prune_dcache_sb(struct super_block *sb, unsigned long nr_to_scan,
 /*
  * read_write.c
  */
-extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *);
 extern int rw_verify_area(int, struct file *, const loff_t *, size_t);
 
 /*
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e11d60cc867b..4b7d57cf7863 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2335,6 +2335,7 @@ extern int do_pipe_flags(int *, int);
 
 extern int kernel_read(struct file *, loff_t, char *, unsigned long);
 extern ssize_t kernel_write(struct file *, const char *, size_t, loff_t);
+extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *);
 extern struct file * open_exec(const char *);
  
 /* fs/dcache.c -- generic fs support functions */
diff --git a/kernel/acct.c b/kernel/acct.c
index 807ebc5d8333..8082d9875d6b 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -456,12 +456,16 @@ static void do_acct_process(struct bsd_acct_struct *acct,
 {
 	struct pacct_struct *pacct = &current->signal->pacct;
 	acct_t ac;
-	mm_segment_t fs;
 	unsigned long flim;
 	u64 elapsed, run_time;
 	struct tty_struct *tty;
 	const struct cred *orig_cred;
 
+	/*
+	 * Accounting records are not subject to resource limits.
+	 */
+	flim = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
+	current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
 	/* Perform file operations on behalf of whoever enabled accounting */
 	orig_cred = override_creds(file->f_cred);
 
@@ -536,25 +540,14 @@ static void do_acct_process(struct bsd_acct_struct *acct,
 	 * Get freeze protection. If the fs is frozen, just skip the write
 	 * as we could deadlock the system otherwise.
 	 */
-	if (!file_start_write_trylock(file))
-		goto out;
-	/*
-	 * Kernel segment override to datasegment and write it
-	 * to the accounting file.
-	 */
-	fs = get_fs();
-	set_fs(KERNEL_DS);
-	/*
-	 * Accounting records are not subject to resource limits.
-	 */
-	flim = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
-	current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
-	file->f_op->write(file, (char *)&ac,
-			       sizeof(acct_t), &file->f_pos);
-	current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim;
-	set_fs(fs);
-	file_end_write(file);
+	if (file_start_write_trylock(file)) {
+		/* it's been opened O_APPEND, so position is irrelevant */
+		loff_t pos = 0;
+		__kernel_write(file, (char *)&ac, sizeof(acct_t), &pos);
+		file_end_write(file);
+	}
 out:
+	current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim;
 	revert_creds(orig_cred);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 215752fce31c80f3b3a1530bc7cddb3ba6a69b3a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 7 Aug 2014 06:23:41 -0400
Subject: acct: get rid of acct_list

Put these suckers on per-vfsmount and per-superblock lists instead.
Note: right now it's still acct_lock for everything, but that's
going to change.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/mount.h           |   1 +
 fs/namespace.c       |   2 +-
 fs/super.c           |   2 +-
 include/linux/acct.h |   6 +--
 include/linux/fs.h   |   1 +
 kernel/acct.c        | 135 +++++++++++++++++++++------------------------------
 6 files changed, 62 insertions(+), 85 deletions(-)

(limited to 'include/linux')

diff --git a/fs/mount.h b/fs/mount.h
index d55297f2fa05..0a2d1458681f 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -56,6 +56,7 @@ struct mount {
 	int mnt_group_id;		/* peer group identifier */
 	int mnt_expiry_mark;		/* true if marked for expiry */
 	int mnt_pinned;
+	struct hlist_head mnt_pins;
 	struct path mnt_ex_mountpoint;
 };
 
diff --git a/fs/namespace.c b/fs/namespace.c
index 182bc41cd887..22e530addfaf 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -956,7 +956,7 @@ put_again:
 		mnt->mnt_pinned = 0;
 		rcu_read_unlock();
 		unlock_mount_hash();
-		acct_auto_close_mnt(&mnt->mnt);
+		acct_auto_close_mnt(&mnt->mnt_pins);
 		goto put_again;
 	}
 	if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) {
diff --git a/fs/super.c b/fs/super.c
index d20d5b11dedf..52ed93eb63df 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -703,7 +703,7 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
 #endif
 
 	if (flags & MS_RDONLY)
-		acct_auto_close(sb);
+		acct_auto_close(&sb->s_pins);
 	shrink_dcache_sb(sb);
 
 	remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY);
diff --git a/include/linux/acct.h b/include/linux/acct.h
index 4a5b7cb56079..65a4f889182e 100644
--- a/include/linux/acct.h
+++ b/include/linux/acct.h
@@ -24,14 +24,14 @@ struct super_block;
 struct pacct_struct;
 struct pid_namespace;
 extern int acct_parm[]; /* for sysctl */
-extern void acct_auto_close_mnt(struct vfsmount *m);
-extern void acct_auto_close(struct super_block *sb);
+extern void acct_auto_close(struct hlist_head *);
+extern void acct_auto_close_mnt(struct hlist_head *);
 extern void acct_collect(long exitcode, int group_dead);
 extern void acct_process(void);
 extern void acct_exit_ns(struct pid_namespace *);
 #else
-#define acct_auto_close_mnt(x)	do { } while (0)
 #define acct_auto_close(x)	do { } while (0)
+#define acct_auto_close_mnt(x)	do { } while (0)
 #define acct_collect(x,y)	do { } while (0)
 #define acct_process()		do { } while (0)
 #define acct_exit_ns(ns)	do { } while (0)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4b7d57cf7863..17f70872a4a5 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1250,6 +1250,7 @@ struct super_block {
 
 	/* AIO completions deferred from interrupt context */
 	struct workqueue_struct *s_dio_done_wq;
+	struct hlist_head s_pins;
 
 	/*
 	 * Keep the lru lists last in the structure so they always sit on their
diff --git a/kernel/acct.c b/kernel/acct.c
index 019f012a3c6f..21fbb3c27c2a 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -59,6 +59,7 @@
 #include <asm/div64.h>
 #include <linux/blkdev.h> /* sector_div */
 #include <linux/pid_namespace.h>
+#include <../fs/mount.h>	/* will go away when we refactor */
 
 /*
  * These constants control the amount of freespace that suspend and
@@ -79,16 +80,16 @@ static void do_acct_process(struct bsd_acct_struct *acct);
 
 struct bsd_acct_struct {
 	long			count;
+	struct hlist_node	s_list;
+	struct hlist_node	m_list;
 	struct mutex		lock;
 	int			active;
 	unsigned long		needcheck;
 	struct file		*file;
 	struct pid_namespace	*ns;
-	struct list_head	list;
 };
 
 static DEFINE_SPINLOCK(acct_lock);
-static LIST_HEAD(acct_list);
 
 /*
  * Check the amount of free space and suspend/resume accordingly.
@@ -133,25 +134,33 @@ static void acct_put(struct bsd_acct_struct *p)
 	spin_unlock(&acct_lock);
 }
 
-static struct bsd_acct_struct *acct_get(struct bsd_acct_struct **p)
+static struct bsd_acct_struct *__acct_get(struct bsd_acct_struct *res)
+{
+	res->count++;
+	spin_unlock(&acct_lock);
+	mutex_lock(&res->lock);
+	if (!res->ns) {
+		mutex_unlock(&res->lock);
+		spin_lock(&acct_lock);
+		if (!--res->count)
+			kfree(res);
+		return NULL;
+	}
+	return res;
+}
+
+static struct bsd_acct_struct *acct_get(struct pid_namespace *ns)
 {
 	struct bsd_acct_struct *res;
 	spin_lock(&acct_lock);
 again:
-	res = *p;
-	if (res)
-		res->count++;
-	spin_unlock(&acct_lock);
-	if (res) {
-		mutex_lock(&res->lock);
-		if (!res->ns) {
-			mutex_unlock(&res->lock);
-			spin_lock(&acct_lock);
-			if (!--res->count)
-				kfree(res);
-			goto again;
-		}
+	if (!ns->bacct) {
+		spin_unlock(&acct_lock);
+		return NULL;
 	}
+	res = __acct_get(ns->bacct);
+	if (!res)
+		goto again;
 	return res;
 }
 
@@ -162,7 +171,8 @@ static void acct_kill(struct bsd_acct_struct *acct,
 		struct file *file = acct->file;
 		struct pid_namespace *ns = acct->ns;
 		spin_lock(&acct_lock);
-		list_del(&acct->list);
+		hlist_del(&acct->m_list);
+		hlist_del(&acct->s_list);
 		mnt_unpin(file->f_path.mnt);
 		spin_unlock(&acct_lock);
 		do_acct_process(acct);
@@ -170,8 +180,10 @@ static void acct_kill(struct bsd_acct_struct *acct,
 		spin_lock(&acct_lock);
 		ns->bacct = new;
 		if (new) {
-			mnt_pin(new->file->f_path.mnt);
-			list_add(&new->list, &acct_list);
+			struct vfsmount *m = new->file->f_path.mnt;
+			mnt_pin(m);
+			hlist_add_head(&new->s_list, &m->mnt_sb->s_pins);
+			hlist_add_head(&new->m_list, &real_mount(m)->mnt_pins);
 		}
 		acct->ns = NULL;
 		mutex_unlock(&acct->lock);
@@ -218,14 +230,15 @@ static int acct_on(struct filename *pathname)
 	mutex_init(&acct->lock);
 	mnt = file->f_path.mnt;
 
-	old = acct_get(&ns->bacct);
+	old = acct_get(ns);
 	if (old) {
 		acct_kill(old, acct);
 	} else {
 		spin_lock(&acct_lock);
 		ns->bacct = acct;
 		mnt_pin(mnt);
-		list_add(&acct->list, &acct_list);
+		hlist_add_head(&acct->s_list, &mnt->mnt_sb->s_pins);
+		hlist_add_head(&acct->m_list, &real_mount(mnt)->mnt_pins);
 		spin_unlock(&acct_lock);
 	}
 	mntput(mnt); /* it's pinned, now give up active reference */
@@ -261,79 +274,41 @@ SYSCALL_DEFINE1(acct, const char __user *, name)
 		mutex_unlock(&acct_on_mutex);
 		putname(tmp);
 	} else {
-		acct_kill(acct_get(&task_active_pid_ns(current)->bacct), NULL);
+		acct_kill(acct_get(task_active_pid_ns(current)), NULL);
 	}
 
 	return error;
 }
 
-/**
- * acct_auto_close - turn off a filesystem's accounting if it is on
- * @m: vfsmount being shut down
- *
- * If the accounting is turned on for a file in the subtree pointed to
- * to by m, turn accounting off.  Done when m is about to die.
- */
-void acct_auto_close_mnt(struct vfsmount *m)
+void acct_auto_close_mnt(struct hlist_head *list)
 {
-	struct bsd_acct_struct *acct;
-
-	spin_lock(&acct_lock);
-restart:
-	list_for_each_entry(acct, &acct_list, list)
-		if (acct->file->f_path.mnt == m) {
-			acct->count++;
-			spin_unlock(&acct_lock);
-			mutex_lock(&acct->lock);
-			if (!acct->ns) {
-				mutex_unlock(&acct->lock);
-				spin_lock(&acct_lock);
-				if (!--acct->count)
-					kfree(acct);
-				goto restart;
-			}
-			acct_kill(acct, NULL);
-			spin_lock(&acct_lock);
-			goto restart;
-		}
+	while (1) {
+		spin_lock(&acct_lock);
+		if (!list->first)
+			break;
+		acct_kill(__acct_get(hlist_entry(list->first,
+						 struct bsd_acct_struct,
+						 m_list)), NULL);
+	}
 	spin_unlock(&acct_lock);
 }
 
-/**
- * acct_auto_close - turn off a filesystem's accounting if it is on
- * @sb: super block for the filesystem
- *
- * If the accounting is turned on for a file in the filesystem pointed
- * to by sb, turn accounting off.
- */
-void acct_auto_close(struct super_block *sb)
+void acct_auto_close(struct hlist_head *list)
 {
-	struct bsd_acct_struct *acct;
-
-	spin_lock(&acct_lock);
-restart:
-	list_for_each_entry(acct, &acct_list, list)
-		if (acct->file->f_path.dentry->d_sb == sb) {
-			acct->count++;
-			spin_unlock(&acct_lock);
-			mutex_lock(&acct->lock);
-			if (!acct->ns) {
-				mutex_unlock(&acct->lock);
-				spin_lock(&acct_lock);
-				if (!--acct->count)
-					kfree(acct);
-				goto restart;
-			}
-			acct_kill(acct, NULL);
-			spin_lock(&acct_lock);
-			goto restart;
-		}
+	while (1) {
+		spin_lock(&acct_lock);
+		if (!list->first)
+			break;
+		acct_kill(__acct_get(hlist_entry(list->first,
+						 struct bsd_acct_struct,
+						 s_list)), NULL);
+	}
 	spin_unlock(&acct_lock);
 }
 
 void acct_exit_ns(struct pid_namespace *ns)
 {
-	acct_kill(acct_get(&ns->bacct), NULL);
+	acct_kill(acct_get(ns), NULL);
 }
 
 /*
@@ -602,7 +577,7 @@ void acct_collect(long exitcode, int group_dead)
 static void slow_acct_process(struct pid_namespace *ns)
 {
 	for ( ; ns; ns = ns->parent) {
-		struct bsd_acct_struct *acct = acct_get(&ns->bacct);
+		struct bsd_acct_struct *acct = acct_get(ns);
 		if (acct) {
 			do_acct_process(acct);
 			mutex_unlock(&acct->lock);
-- 
cgit v1.2.3-59-g8ed1b


From efb170c22867cdc6f770de441bdefecec6712199 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 7 Aug 2014 08:39:04 -0400
Subject: take fs_pin stuff to fs/*

Add a new field to fs_pin - kill(pin).  That's what umount and r/o remount
will be calling for all pins attached to vfsmount and superblock resp.
Called after bumping the refcount, so it won't go away under us.  Dropping
the refcount is responsibility of the instance.  All generic stuff moved to
fs/fs_pin.c; the next step will rip all the knowledge of kernel/acct.c from
fs/super.c and fs/namespace.c.  After that - death to mnt_pin(); it was
intended to be usable as generic mechanism for code that wants to attach
objects to vfsmount, so that they would not make the sucker busy and
would get killed on umount.  Never got it right; it remained acct.c-specific
all along.  Now it's very close to being killable.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/Makefile            |   2 +-
 fs/fs_pin.c            |  77 ++++++++++++++++++++++++++++++
 include/linux/acct.h   |   6 +--
 include/linux/fs_pin.h |  17 +++++++
 kernel/acct.c          | 127 +++++++++++++------------------------------------
 5 files changed, 129 insertions(+), 100 deletions(-)
 create mode 100644 fs/fs_pin.c
 create mode 100644 include/linux/fs_pin.h

(limited to 'include/linux')

diff --git a/fs/Makefile b/fs/Makefile
index 4030cbfbc9af..90c88529892b 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -11,7 +11,7 @@ obj-y :=	open.o read_write.o file_table.o super.o \
 		attr.o bad_inode.o file.o filesystems.o namespace.o \
 		seq_file.o xattr.o libfs.o fs-writeback.o \
 		pnode.o splice.o sync.o utimes.o \
-		stack.o fs_struct.o statfs.o
+		stack.o fs_struct.o statfs.o fs_pin.o
 
 ifeq ($(CONFIG_BLOCK),y)
 obj-y +=	buffer.o block_dev.o direct-io.o mpage.o
diff --git a/fs/fs_pin.c b/fs/fs_pin.c
new file mode 100644
index 000000000000..f3ce0b874a44
--- /dev/null
+++ b/fs/fs_pin.c
@@ -0,0 +1,77 @@
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/fs_pin.h>
+#include "mount.h"
+
+static void pin_free_rcu(struct rcu_head *head)
+{
+	kfree(container_of(head, struct fs_pin, rcu));
+}
+
+static DEFINE_SPINLOCK(pin_lock);
+
+void pin_put(struct fs_pin *p)
+{
+	if (atomic_long_dec_and_test(&p->count))
+		call_rcu(&p->rcu, pin_free_rcu);
+}
+
+void pin_remove(struct fs_pin *pin)
+{
+	spin_lock(&pin_lock);
+	hlist_del(&pin->m_list);
+	hlist_del(&pin->s_list);
+	spin_unlock(&pin_lock);
+}
+
+void pin_insert(struct fs_pin *pin, struct vfsmount *m)
+{
+	spin_lock(&pin_lock);
+	hlist_add_head(&pin->s_list, &m->mnt_sb->s_pins);
+	hlist_add_head(&pin->m_list, &real_mount(m)->mnt_pins);
+	spin_unlock(&pin_lock);
+}
+
+void acct_auto_close_mnt(struct hlist_head *list)
+{
+	while (1) {
+		struct hlist_node *p;
+		struct fs_pin *pin;
+		rcu_read_lock();
+		p = ACCESS_ONCE(list->first);
+		if (!p) {
+			rcu_read_unlock();
+			break;
+		}
+		pin = hlist_entry(p, struct fs_pin, m_list);
+		if (!atomic_long_inc_not_zero(&pin->count)) {
+			rcu_read_unlock();
+			cpu_relax();
+			continue;
+		}
+		rcu_read_unlock();
+		pin->kill(pin);
+	}
+}
+
+void acct_auto_close(struct hlist_head *list)
+{
+	while (1) {
+		struct hlist_node *p;
+		struct fs_pin *pin;
+		rcu_read_lock();
+		p = ACCESS_ONCE(list->first);
+		if (!p) {
+			rcu_read_unlock();
+			break;
+		}
+		pin = hlist_entry(p, struct fs_pin, s_list);
+		if (!atomic_long_inc_not_zero(&pin->count)) {
+			rcu_read_unlock();
+			cpu_relax();
+			continue;
+		}
+		rcu_read_unlock();
+		pin->kill(pin);
+	}
+}
diff --git a/include/linux/acct.h b/include/linux/acct.h
index 65a4f889182e..137837929dbe 100644
--- a/include/linux/acct.h
+++ b/include/linux/acct.h
@@ -24,18 +24,16 @@ struct super_block;
 struct pacct_struct;
 struct pid_namespace;
 extern int acct_parm[]; /* for sysctl */
-extern void acct_auto_close(struct hlist_head *);
-extern void acct_auto_close_mnt(struct hlist_head *);
 extern void acct_collect(long exitcode, int group_dead);
 extern void acct_process(void);
 extern void acct_exit_ns(struct pid_namespace *);
 #else
-#define acct_auto_close(x)	do { } while (0)
-#define acct_auto_close_mnt(x)	do { } while (0)
 #define acct_collect(x,y)	do { } while (0)
 #define acct_process()		do { } while (0)
 #define acct_exit_ns(ns)	do { } while (0)
 #endif
+extern void acct_auto_close(struct hlist_head *);
+extern void acct_auto_close_mnt(struct hlist_head *);
 
 /*
  * ACCT_VERSION numbers as yet defined:
diff --git a/include/linux/fs_pin.h b/include/linux/fs_pin.h
new file mode 100644
index 000000000000..f66525e72ccf
--- /dev/null
+++ b/include/linux/fs_pin.h
@@ -0,0 +1,17 @@
+#include <linux/fs.h>
+
+struct fs_pin {
+	atomic_long_t		count;
+	union {
+		struct {
+			struct hlist_node	s_list;
+			struct hlist_node	m_list;
+		};
+		struct rcu_head rcu;
+	};
+	void (*kill)(struct fs_pin *);
+};
+
+void pin_put(struct fs_pin *);
+void pin_remove(struct fs_pin *);
+void pin_insert(struct fs_pin *, struct vfsmount *);
diff --git a/kernel/acct.c b/kernel/acct.c
index afeaaa6f49bf..a7993a6cb604 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -59,7 +59,7 @@
 #include <asm/div64.h>
 #include <linux/blkdev.h> /* sector_div */
 #include <linux/pid_namespace.h>
-#include <../fs/mount.h>	/* will go away when we refactor */
+#include <linux/fs_pin.h>
 
 /*
  * These constants control the amount of freespace that suspend and
@@ -78,17 +78,6 @@ int acct_parm[3] = {4, 2, 30};
  */
 static void do_acct_process(struct bsd_acct_struct *acct);
 
-struct fs_pin {
-	atomic_long_t		count;
-	union {
-		struct {
-			struct hlist_node	s_list;
-			struct hlist_node	m_list;
-		};
-		struct rcu_head rcu;
-	};
-};
-
 struct bsd_acct_struct {
 	struct fs_pin		pin;
 	struct mutex		lock;
@@ -100,13 +89,6 @@ struct bsd_acct_struct {
 	struct completion	done;
 };
 
-static void pin_free_rcu(struct rcu_head *head)
-{
-	kfree(container_of(head, struct fs_pin, rcu));
-}
-
-static DEFINE_SPINLOCK(acct_lock);
-
 /*
  * Check the amount of free space and suspend/resume accordingly.
  */
@@ -142,29 +124,6 @@ out:
 	return acct->active;
 }
 
-static void pin_put(struct fs_pin *p)
-{
-	if (atomic_long_dec_and_test(&p->count))
-		call_rcu(&p->rcu, pin_free_rcu);
-}
-
-static struct bsd_acct_struct *__acct_get(struct bsd_acct_struct *res)
-{
-	if (!atomic_long_inc_not_zero(&res->pin.count)) {
-		rcu_read_unlock();
-		cpu_relax();
-		return NULL;
-	}
-	rcu_read_unlock();
-	mutex_lock(&res->lock);
-	if (!res->ns) {
-		mutex_unlock(&res->lock);
-		pin_put(&res->pin);
-		return NULL;
-	}
-	return res;
-}
-
 static struct bsd_acct_struct *acct_get(struct pid_namespace *ns)
 {
 	struct bsd_acct_struct *res;
@@ -176,9 +135,18 @@ again:
 		rcu_read_unlock();
 		return NULL;
 	}
-	res = __acct_get(res);
-	if (!res)
+	if (!atomic_long_inc_not_zero(&res->pin.count)) {
+		rcu_read_unlock();
+		cpu_relax();
 		goto again;
+	}
+	rcu_read_unlock();
+	mutex_lock(&res->lock);
+	if (!res->ns) {
+		mutex_unlock(&res->lock);
+		pin_put(&res->pin);
+		goto again;
+	}
 	return res;
 }
 
@@ -203,19 +171,8 @@ static void acct_kill(struct bsd_acct_struct *acct,
 		init_completion(&acct->done);
 		schedule_work(&acct->work);
 		wait_for_completion(&acct->done);
-		spin_lock(&acct_lock);
-		hlist_del(&acct->pin.m_list);
-		hlist_del(&acct->pin.s_list);
-		spin_unlock(&acct_lock);
+		pin_remove(&acct->pin);
 		ns->bacct = new;
-		if (new) {
-			struct vfsmount *m = new->file->f_path.mnt;
-			spin_lock(&acct_lock);
-			hlist_add_head(&new->pin.s_list, &m->mnt_sb->s_pins);
-			hlist_add_head(&new->pin.m_list, &real_mount(m)->mnt_pins);
-			spin_unlock(&acct_lock);
-			mutex_unlock(&new->lock);
-		}
 		acct->ns = NULL;
 		atomic_long_dec(&acct->pin.count);
 		mutex_unlock(&acct->lock);
@@ -223,6 +180,19 @@ static void acct_kill(struct bsd_acct_struct *acct,
 	}
 }
 
+static void acct_pin_kill(struct fs_pin *pin)
+{
+	struct bsd_acct_struct *acct;
+	acct = container_of(pin, struct bsd_acct_struct, pin);
+	mutex_lock(&acct->lock);
+	if (!acct->ns) {
+		mutex_unlock(&acct->lock);
+		pin_put(pin);
+		acct = NULL;
+	}
+	acct_kill(acct, NULL);
+}
+
 static int acct_on(struct filename *pathname)
 {
 	struct file *file;
@@ -254,25 +224,22 @@ static int acct_on(struct filename *pathname)
 	}
 
 	atomic_long_set(&acct->pin.count, 1);
+	acct->pin.kill = acct_pin_kill;
 	acct->file = file;
 	acct->needcheck = jiffies;
 	acct->ns = ns;
 	mutex_init(&acct->lock);
 	mnt = file->f_path.mnt;
 	mnt_pin(mnt);
+	mutex_lock_nested(&acct->lock, 1);	/* nobody has seen it yet */
+	pin_insert(&acct->pin, mnt);
 
 	old = acct_get(ns);
-	mutex_lock_nested(&acct->lock, 1);	/* nobody has seen it yet */
-	if (old) {
+	if (old)
 		acct_kill(old, acct);
-	} else {
+	else
 		ns->bacct = acct;
-		spin_lock(&acct_lock);
-		hlist_add_head(&acct->pin.s_list, &mnt->mnt_sb->s_pins);
-		hlist_add_head(&acct->pin.m_list, &real_mount(mnt)->mnt_pins);
-		spin_unlock(&acct_lock);
-		mutex_unlock(&acct->lock);
-	}
+	mutex_unlock(&acct->lock);
 	mntput(mnt); /* it's pinned, now give up active reference */
 	return 0;
 }
@@ -312,36 +279,6 @@ SYSCALL_DEFINE1(acct, const char __user *, name)
 	return error;
 }
 
-void acct_auto_close_mnt(struct hlist_head *list)
-{
-	rcu_read_lock();
-	while (1) {
-		struct hlist_node *p = ACCESS_ONCE(list->first);
-		if (!p)
-			break;
-		acct_kill(__acct_get(hlist_entry(p,
-						 struct bsd_acct_struct,
-						 pin.m_list)), NULL);
-		rcu_read_lock();
-	}
-	rcu_read_unlock();
-}
-
-void acct_auto_close(struct hlist_head *list)
-{
-	rcu_read_lock();
-	while (1) {
-		struct hlist_node *p = ACCESS_ONCE(list->first);
-		if (!p)
-			break;
-		acct_kill(__acct_get(hlist_entry(p,
-						 struct bsd_acct_struct,
-						 pin.s_list)), NULL);
-		rcu_read_lock();
-	}
-	rcu_read_unlock();
-}
-
 void acct_exit_ns(struct pid_namespace *ns)
 {
 	acct_kill(acct_get(ns), NULL);
-- 
cgit v1.2.3-59-g8ed1b


From 8fa1f1c2bd86007beb4a4845e6087ac4a704dc80 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 21 May 2014 18:22:52 -0400
Subject: make fs/{namespace,super}.c forget about acct.h

These externs belong in fs/internal.h.  Rename (they are not acct-specific
anymore) and move them over there.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/fs_pin.c          | 9 +++++----
 fs/internal.h        | 6 ++++++
 fs/namespace.c       | 3 +--
 fs/super.c           | 3 +--
 include/linux/acct.h | 2 --
 5 files changed, 13 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fs_pin.c b/fs/fs_pin.c
index f3ce0b874a44..9368236ca100 100644
--- a/fs/fs_pin.c
+++ b/fs/fs_pin.c
@@ -1,6 +1,7 @@
 #include <linux/fs.h>
 #include <linux/slab.h>
 #include <linux/fs_pin.h>
+#include "internal.h"
 #include "mount.h"
 
 static void pin_free_rcu(struct rcu_head *head)
@@ -32,13 +33,13 @@ void pin_insert(struct fs_pin *pin, struct vfsmount *m)
 	spin_unlock(&pin_lock);
 }
 
-void acct_auto_close_mnt(struct hlist_head *list)
+void mnt_pin_kill(struct mount *m)
 {
 	while (1) {
 		struct hlist_node *p;
 		struct fs_pin *pin;
 		rcu_read_lock();
-		p = ACCESS_ONCE(list->first);
+		p = ACCESS_ONCE(m->mnt_pins.first);
 		if (!p) {
 			rcu_read_unlock();
 			break;
@@ -54,13 +55,13 @@ void acct_auto_close_mnt(struct hlist_head *list)
 	}
 }
 
-void acct_auto_close(struct hlist_head *list)
+void sb_pin_kill(struct super_block *sb)
 {
 	while (1) {
 		struct hlist_node *p;
 		struct fs_pin *pin;
 		rcu_read_lock();
-		p = ACCESS_ONCE(list->first);
+		p = ACCESS_ONCE(sb->s_pins.first);
 		if (!p) {
 			rcu_read_unlock();
 			break;
diff --git a/fs/internal.h b/fs/internal.h
index 9a2edba87c2b..e325b4f9c799 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -143,3 +143,9 @@ extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
  * pipe.c
  */
 extern const struct file_operations pipefifo_fops;
+
+/*
+ * fs_pin.c
+ */
+extern void sb_pin_kill(struct super_block *sb);
+extern void mnt_pin_kill(struct mount *m);
diff --git a/fs/namespace.c b/fs/namespace.c
index 22e530addfaf..0e4ce51c5277 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -16,7 +16,6 @@
 #include <linux/namei.h>
 #include <linux/security.h>
 #include <linux/idr.h>
-#include <linux/acct.h>		/* acct_auto_close_mnt */
 #include <linux/init.h>		/* init_rootfs */
 #include <linux/fs_struct.h>	/* get_fs_root et.al. */
 #include <linux/fsnotify.h>	/* fsnotify_vfsmount_delete */
@@ -956,7 +955,7 @@ put_again:
 		mnt->mnt_pinned = 0;
 		rcu_read_unlock();
 		unlock_mount_hash();
-		acct_auto_close_mnt(&mnt->mnt_pins);
+		mnt_pin_kill(mnt);
 		goto put_again;
 	}
 	if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) {
diff --git a/fs/super.c b/fs/super.c
index a369f8964dc1..a371ce6aa919 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -22,7 +22,6 @@
 
 #include <linux/export.h>
 #include <linux/slab.h>
-#include <linux/acct.h>
 #include <linux/blkdev.h>
 #include <linux/mount.h>
 #include <linux/security.h>
@@ -707,7 +706,7 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
 	if (remount_ro) {
 		if (sb->s_pins.first) {
 			up_write(&sb->s_umount);
-			acct_auto_close(&sb->s_pins);
+			sb_pin_kill(sb);
 			down_write(&sb->s_umount);
 			if (!sb->s_root)
 				return 0;
diff --git a/include/linux/acct.h b/include/linux/acct.h
index 137837929dbe..dccc2d4fe7de 100644
--- a/include/linux/acct.h
+++ b/include/linux/acct.h
@@ -32,8 +32,6 @@ extern void acct_exit_ns(struct pid_namespace *);
 #define acct_process()		do { } while (0)
 #define acct_exit_ns(ns)	do { } while (0)
 #endif
-extern void acct_auto_close(struct hlist_head *);
-extern void acct_auto_close_mnt(struct hlist_head *);
 
 /*
  * ACCT_VERSION numbers as yet defined:
-- 
cgit v1.2.3-59-g8ed1b


From 3064c3563ba4c23e2c7a47254ec056ed9ba0098a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 7 Aug 2014 09:12:31 -0400
Subject: death to mnt_pinned

Rather than playing silly buggers with vfsmount refcounts, just have
acct_on() ask fs/namespace.c for internal clone of file->f_path.mnt
and replace it with said clone.  Then attach the pin to original
vfsmount.  Voila - the clone will be alive until the file gets closed,
making sure that underlying superblock remains active, etc., and
we can drop the original vfsmount, so that it's not kept busy.
If the file lives until the final mntput of the original vfsmount,
we'll notice that there's an fs_pin (one in bsd_acct_struct that
holds that file) and mnt_pin_kill() will take it out.  Since
->kill() is synchronous, we won't proceed past that point until
these files are closed (and private clones of our vfsmount are
gone), so we get the same ordering warranties we used to get.

mnt_pin()/mnt_unpin()/->mnt_pinned is gone now, and good riddance -
it never became usable outside of kernel/acct.c (and racy wrt
umount even there).

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/mount.h            |  1 -
 fs/namespace.c        | 35 +++++++++--------------------------
 include/linux/mount.h |  4 ++--
 kernel/acct.c         | 24 +++++++++++++++++++-----
 4 files changed, 30 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/fs/mount.h b/fs/mount.h
index 0a2d1458681f..6740a6215529 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -55,7 +55,6 @@ struct mount {
 	int mnt_id;			/* mount identifier */
 	int mnt_group_id;		/* peer group identifier */
 	int mnt_expiry_mark;		/* true if marked for expiry */
-	int mnt_pinned;
 	struct hlist_head mnt_pins;
 	struct path mnt_ex_mountpoint;
 };
diff --git a/fs/namespace.c b/fs/namespace.c
index 0e4ce51c5277..65af9d0e0d67 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -937,7 +937,6 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
 
 static void mntput_no_expire(struct mount *mnt)
 {
-put_again:
 	rcu_read_lock();
 	mnt_add_count(mnt, -1);
 	if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */
@@ -950,14 +949,6 @@ put_again:
 		unlock_mount_hash();
 		return;
 	}
-	if (unlikely(mnt->mnt_pinned)) {
-		mnt_add_count(mnt, mnt->mnt_pinned + 1);
-		mnt->mnt_pinned = 0;
-		rcu_read_unlock();
-		unlock_mount_hash();
-		mnt_pin_kill(mnt);
-		goto put_again;
-	}
 	if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) {
 		rcu_read_unlock();
 		unlock_mount_hash();
@@ -980,6 +971,8 @@ put_again:
 	 * so mnt_get_writers() below is safe.
 	 */
 	WARN_ON(mnt_get_writers(mnt));
+	if (unlikely(mnt->mnt_pins.first))
+		mnt_pin_kill(mnt);
 	fsnotify_vfsmount_delete(&mnt->mnt);
 	dput(mnt->mnt.mnt_root);
 	deactivate_super(mnt->mnt.mnt_sb);
@@ -1007,25 +1000,15 @@ struct vfsmount *mntget(struct vfsmount *mnt)
 }
 EXPORT_SYMBOL(mntget);
 
-void mnt_pin(struct vfsmount *mnt)
+struct vfsmount *mnt_clone_internal(struct path *path)
 {
-	lock_mount_hash();
-	real_mount(mnt)->mnt_pinned++;
-	unlock_mount_hash();
-}
-EXPORT_SYMBOL(mnt_pin);
-
-void mnt_unpin(struct vfsmount *m)
-{
-	struct mount *mnt = real_mount(m);
-	lock_mount_hash();
-	if (mnt->mnt_pinned) {
-		mnt_add_count(mnt, 1);
-		mnt->mnt_pinned--;
-	}
-	unlock_mount_hash();
+	struct mount *p;
+	p = clone_mnt(real_mount(path->mnt), path->dentry, CL_PRIVATE);
+	if (IS_ERR(p))
+		return ERR_CAST(p);
+	p->mnt.mnt_flags |= MNT_INTERNAL;
+	return &p->mnt;
 }
-EXPORT_SYMBOL(mnt_unpin);
 
 static inline void mangle(struct seq_file *m, const char *s)
 {
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 839bac270904..864b120c1345 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -62,6 +62,7 @@ struct vfsmount {
 };
 
 struct file; /* forward dec */
+struct path;
 
 extern int mnt_want_write(struct vfsmount *mnt);
 extern int mnt_want_write_file(struct file *file);
@@ -70,8 +71,7 @@ extern void mnt_drop_write(struct vfsmount *mnt);
 extern void mnt_drop_write_file(struct file *file);
 extern void mntput(struct vfsmount *mnt);
 extern struct vfsmount *mntget(struct vfsmount *mnt);
-extern void mnt_pin(struct vfsmount *mnt);
-extern void mnt_unpin(struct vfsmount *mnt);
+extern struct vfsmount *mnt_clone_internal(struct path *path);
 extern int __mnt_is_readonly(struct vfsmount *mnt);
 
 struct file_system_type;
diff --git a/kernel/acct.c b/kernel/acct.c
index a7993a6cb604..2e6cf818021d 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -154,7 +154,6 @@ static void close_work(struct work_struct *work)
 {
 	struct bsd_acct_struct *acct = container_of(work, struct bsd_acct_struct, work);
 	struct file *file = acct->file;
-	mnt_unpin(file->f_path.mnt);
 	if (file->f_op->flush)
 		file->f_op->flush(file, NULL);
 	__fput_sync(file);
@@ -196,9 +195,10 @@ static void acct_pin_kill(struct fs_pin *pin)
 static int acct_on(struct filename *pathname)
 {
 	struct file *file;
-	struct vfsmount *mnt;
+	struct vfsmount *mnt, *internal;
 	struct pid_namespace *ns = task_active_pid_ns(current);
 	struct bsd_acct_struct *acct, *old;
+	int err;
 
 	acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL);
 	if (!acct)
@@ -222,6 +222,21 @@ static int acct_on(struct filename *pathname)
 		filp_close(file, NULL);
 		return -EIO;
 	}
+	internal = mnt_clone_internal(&file->f_path);
+	if (IS_ERR(internal)) {
+		kfree(acct);
+		filp_close(file, NULL);
+		return PTR_ERR(internal);
+	}
+	err = mnt_want_write(internal);
+	if (err) {
+		mntput(internal);
+		kfree(acct);
+		filp_close(file, NULL);
+		return err;
+	}
+	mnt = file->f_path.mnt;
+	file->f_path.mnt = internal;
 
 	atomic_long_set(&acct->pin.count, 1);
 	acct->pin.kill = acct_pin_kill;
@@ -229,8 +244,6 @@ static int acct_on(struct filename *pathname)
 	acct->needcheck = jiffies;
 	acct->ns = ns;
 	mutex_init(&acct->lock);
-	mnt = file->f_path.mnt;
-	mnt_pin(mnt);
 	mutex_lock_nested(&acct->lock, 1);	/* nobody has seen it yet */
 	pin_insert(&acct->pin, mnt);
 
@@ -240,7 +253,8 @@ static int acct_on(struct filename *pathname)
 	else
 		ns->bacct = acct;
 	mutex_unlock(&acct->lock);
-	mntput(mnt); /* it's pinned, now give up active reference */
+	mnt_drop_write(mnt);
+	mntput(mnt);
 	return 0;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 1a0a397e41cb1bf70cfe45fd0eeff08c7c501ec0 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Fri, 14 Feb 2014 17:35:37 -0500
Subject: dcache: d_obtain_alias callers don't all want DISCONNECTED

There are a few d_obtain_alias callers that are using it to get the
root of a filesystem which may already have an alias somewhere else.

This is not the same as the filehandle-lookup case, and none of them
actually need DCACHE_DISCONNECTED set.

It isn't really a serious problem, but it would really be clearer if we
reserved DCACHE_DISCONNECTED for those cases where it's actually needed.

In the btrfs case this was causing a spurious printk from
nfsd/nfsfh.c:fh_verify when it found an unexpected DCACHE_DISCONNECTED
dentry.  Josef worked around this by unsetting DCACHE_DISCONNECTED
manually in 3a0dfa6a12e "Btrfs: unset DCACHE_DISCONNECTED when mounting
default subvol", and this replaces that workaround.

Cc: Josef Bacik <jbacik@fb.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/btrfs/super.c       |  9 +------
 fs/ceph/super.c        |  2 +-
 fs/dcache.c            | 69 +++++++++++++++++++++++++++++++++++---------------
 fs/nfs/getroot.c       |  2 +-
 fs/nilfs2/super.c      |  2 +-
 include/linux/dcache.h |  1 +
 6 files changed, 54 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 8e16bca69c56..67b48b9a03e0 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -851,7 +851,6 @@ static struct dentry *get_default_root(struct super_block *sb,
 	struct btrfs_path *path;
 	struct btrfs_key location;
 	struct inode *inode;
-	struct dentry *dentry;
 	u64 dir_id;
 	int new = 0;
 
@@ -922,13 +921,7 @@ setup_root:
 		return dget(sb->s_root);
 	}
 
-	dentry = d_obtain_alias(inode);
-	if (!IS_ERR(dentry)) {
-		spin_lock(&dentry->d_lock);
-		dentry->d_flags &= ~DCACHE_DISCONNECTED;
-		spin_unlock(&dentry->d_lock);
-	}
-	return dentry;
+	return d_obtain_root(inode);
 }
 
 static int btrfs_fill_super(struct super_block *sb,
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 06150fd745ac..f6e12377335c 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -755,7 +755,7 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
 				goto out;
 			}
 		} else {
-			root = d_obtain_alias(inode);
+			root = d_obtain_root(inode);
 		}
 		ceph_init_dentry(root);
 		dout("open_root_inode success, root dentry is %p\n", root);
diff --git a/fs/dcache.c b/fs/dcache.c
index 3ed095363997..63d556c0e698 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1781,25 +1781,7 @@ struct dentry *d_find_any_alias(struct inode *inode)
 }
 EXPORT_SYMBOL(d_find_any_alias);
 
-/**
- * d_obtain_alias - find or allocate a dentry for a given inode
- * @inode: inode to allocate the dentry for
- *
- * Obtain a dentry for an inode resulting from NFS filehandle conversion or
- * similar open by handle operations.  The returned dentry may be anonymous,
- * or may have a full name (if the inode was already in the cache).
- *
- * When called on a directory inode, we must ensure that the inode only ever
- * has one dentry.  If a dentry is found, that is returned instead of
- * allocating a new one.
- *
- * On successful return, the reference to the inode has been transferred
- * to the dentry.  In case of an error the reference on the inode is released.
- * To make it easier to use in export operations a %NULL or IS_ERR inode may
- * be passed in and will be the error will be propagate to the return value,
- * with a %NULL @inode replaced by ERR_PTR(-ESTALE).
- */
-struct dentry *d_obtain_alias(struct inode *inode)
+struct dentry *__d_obtain_alias(struct inode *inode, int disconnected)
 {
 	static const struct qstr anonstring = QSTR_INIT("/", 1);
 	struct dentry *tmp;
@@ -1830,7 +1812,10 @@ struct dentry *d_obtain_alias(struct inode *inode)
 	}
 
 	/* attach a disconnected dentry */
-	add_flags = d_flags_for_inode(inode) | DCACHE_DISCONNECTED;
+	add_flags = d_flags_for_inode(inode);
+
+	if (disconnected)
+		add_flags |= DCACHE_DISCONNECTED;
 
 	spin_lock(&tmp->d_lock);
 	tmp->d_inode = inode;
@@ -1851,8 +1836,52 @@ struct dentry *d_obtain_alias(struct inode *inode)
 	iput(inode);
 	return res;
 }
+
+/**
+ * d_obtain_alias - find or allocate a DISCONNECTED dentry for a given inode
+ * @inode: inode to allocate the dentry for
+ *
+ * Obtain a dentry for an inode resulting from NFS filehandle conversion or
+ * similar open by handle operations.  The returned dentry may be anonymous,
+ * or may have a full name (if the inode was already in the cache).
+ *
+ * When called on a directory inode, we must ensure that the inode only ever
+ * has one dentry.  If a dentry is found, that is returned instead of
+ * allocating a new one.
+ *
+ * On successful return, the reference to the inode has been transferred
+ * to the dentry.  In case of an error the reference on the inode is released.
+ * To make it easier to use in export operations a %NULL or IS_ERR inode may
+ * be passed in and the error will be propagated to the return value,
+ * with a %NULL @inode replaced by ERR_PTR(-ESTALE).
+ */
+struct dentry *d_obtain_alias(struct inode *inode)
+{
+	return __d_obtain_alias(inode, 1);
+}
 EXPORT_SYMBOL(d_obtain_alias);
 
+/**
+ * d_obtain_root - find or allocate a dentry for a given inode
+ * @inode: inode to allocate the dentry for
+ *
+ * Obtain an IS_ROOT dentry for the root of a filesystem.
+ *
+ * We must ensure that directory inodes only ever have one dentry.  If a
+ * dentry is found, that is returned instead of allocating a new one.
+ *
+ * On successful return, the reference to the inode has been transferred
+ * to the dentry.  In case of an error the reference on the inode is
+ * released.  A %NULL or IS_ERR inode may be passed in and will be the
+ * error will be propagate to the return value, with a %NULL @inode
+ * replaced by ERR_PTR(-ESTALE).
+ */
+struct dentry *d_obtain_root(struct inode *inode)
+{
+	return __d_obtain_alias(inode, 0);
+}
+EXPORT_SYMBOL(d_obtain_root);
+
 /**
  * d_add_ci - lookup or allocate new dentry with case-exact name
  * @inode:  the inode case-insensitive lookup has found
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index b94f80420a58..880618a8b048 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -112,7 +112,7 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh,
 	 * if the dentry tree reaches them; however if the dentry already
 	 * exists, we'll pick it up at this point and use it as the root
 	 */
-	ret = d_obtain_alias(inode);
+	ret = d_obtain_root(inode);
 	if (IS_ERR(ret)) {
 		dprintk("nfs_get_root: get root dentry failed\n");
 		goto out;
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 8c532b2ca3ab..ac914994dfed 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -942,7 +942,7 @@ static int nilfs_get_root_dentry(struct super_block *sb,
 			iput(inode);
 		}
 	} else {
-		dentry = d_obtain_alias(inode);
+		dentry = d_obtain_root(inode);
 		if (IS_ERR(dentry)) {
 			ret = PTR_ERR(dentry);
 			goto failed_dentry;
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 3c7ec327ebd2..e4ae2ad48d07 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -249,6 +249,7 @@ extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
 extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *);
 extern struct dentry *d_find_any_alias(struct inode *inode);
 extern struct dentry * d_obtain_alias(struct inode *);
+extern struct dentry * d_obtain_root(struct inode *);
 extern void shrink_dcache_sb(struct super_block *);
 extern void shrink_dcache_parent(struct dentry *);
 extern void shrink_dcache_for_umount(struct super_block *);
-- 
cgit v1.2.3-59-g8ed1b


From c7f3888ad7f0932a87fb76e6e4edff2a90cc7920 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 18 Jun 2014 20:34:33 -0400
Subject: switch iov_iter_get_pages() to passing maximal number of pages

... instead of maximal size.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/direct-io.c      |  2 +-
 fs/fuse/file.c      |  4 ++--
 include/linux/uio.h |  2 +-
 mm/iov_iter.c       | 17 ++++++++---------
 4 files changed, 12 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/fs/direct-io.c b/fs/direct-io.c
index 17e39b047de5..c3116404ab49 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -158,7 +158,7 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
 {
 	ssize_t ret;
 
-	ret = iov_iter_get_pages(sdio->iter, dio->pages, DIO_PAGES * PAGE_SIZE,
+	ret = iov_iter_get_pages(sdio->iter, dio->pages, DIO_PAGES,
 				&sdio->from);
 
 	if (ret < 0 && sdio->blocks_available && (dio->rw & WRITE)) {
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 40ac2628ddcf..912061ac4baf 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1303,10 +1303,10 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
 	while (nbytes < *nbytesp && req->num_pages < req->max_pages) {
 		unsigned npages;
 		size_t start;
-		unsigned n = req->max_pages - req->num_pages;
 		ssize_t ret = iov_iter_get_pages(ii,
 					&req->pages[req->num_pages],
-					n * PAGE_SIZE, &start);
+					req->max_pages - req->num_pages,
+					&start);
 		if (ret < 0)
 			return ret;
 
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 09a7cffc224e..48d64e6ab292 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -84,7 +84,7 @@ unsigned long iov_iter_alignment(const struct iov_iter *i);
 void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov,
 			unsigned long nr_segs, size_t count);
 ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
-			size_t maxsize, size_t *start);
+			unsigned maxpages, size_t *start);
 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages,
 			size_t maxsize, size_t *start);
 int iov_iter_npages(const struct iov_iter *i, int maxpages);
diff --git a/mm/iov_iter.c b/mm/iov_iter.c
index 7b5dbd1517b5..ab88dc0ea1d3 100644
--- a/mm/iov_iter.c
+++ b/mm/iov_iter.c
@@ -310,7 +310,7 @@ void iov_iter_init(struct iov_iter *i, int direction,
 EXPORT_SYMBOL(iov_iter_init);
 
 static ssize_t get_pages_iovec(struct iov_iter *i,
-		   struct page **pages, size_t maxsize,
+		   struct page **pages, unsigned maxpages,
 		   size_t *start)
 {
 	size_t offset = i->iov_offset;
@@ -323,10 +323,10 @@ static ssize_t get_pages_iovec(struct iov_iter *i,
 	len = iov->iov_len - offset;
 	if (len > i->count)
 		len = i->count;
-	if (len > maxsize)
-		len = maxsize;
 	addr = (unsigned long)iov->iov_base + offset;
 	len += *start = addr & (PAGE_SIZE - 1);
+	if (len > maxpages * PAGE_SIZE)
+		len = maxpages * PAGE_SIZE;
 	addr &= ~(PAGE_SIZE - 1);
 	n = (len + PAGE_SIZE - 1) / PAGE_SIZE;
 	res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages);
@@ -588,15 +588,14 @@ static unsigned long alignment_bvec(const struct iov_iter *i)
 }
 
 static ssize_t get_pages_bvec(struct iov_iter *i,
-		   struct page **pages, size_t maxsize,
+		   struct page **pages, unsigned maxpages,
 		   size_t *start)
 {
 	const struct bio_vec *bvec = i->bvec;
 	size_t len = bvec->bv_len - i->iov_offset;
 	if (len > i->count)
 		len = i->count;
-	if (len > maxsize)
-		len = maxsize;
+	/* can't be more than PAGE_SIZE */
 	*start = bvec->bv_offset + i->iov_offset;
 
 	get_page(*pages = bvec->bv_page);
@@ -712,13 +711,13 @@ unsigned long iov_iter_alignment(const struct iov_iter *i)
 EXPORT_SYMBOL(iov_iter_alignment);
 
 ssize_t iov_iter_get_pages(struct iov_iter *i,
-		   struct page **pages, size_t maxsize,
+		   struct page **pages, unsigned maxpages,
 		   size_t *start)
 {
 	if (i->type & ITER_BVEC)
-		return get_pages_bvec(i, pages, maxsize, start);
+		return get_pages_bvec(i, pages, maxpages, start);
 	else
-		return get_pages_iovec(i, pages, maxsize, start);
+		return get_pages_iovec(i, pages, maxpages, start);
 }
 EXPORT_SYMBOL(iov_iter_get_pages);
 
-- 
cgit v1.2.3-59-g8ed1b


From 3c49b52b155d0f723792377e1a4480a0e7ca0ba2 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Fri, 25 Jul 2014 16:05:29 -0400
Subject: tracing: Do not do anything special with tracepoint_string when
 tracing is disabled

When CONFIG_TRACING is not enabled, there's no reason to save the trace
strings either by the linker or as a static variable that can be
referenced later. Simply pass back the string that is given to
tracepoint_string().

Had to move the define to include/linux/tracepoint.h so that it is still
visible when CONFIG_TRACING is not set.

Link: http://lkml.kernel.org/p/1406318733-26754-2-git-send-email-nicolas.pitre@linaro.org

Suggested-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h | 34 ----------------------------------
 include/linux/tracepoint.h   | 44 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index cff3106ffe2c..c9f619a2070f 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -574,40 +574,6 @@ do {									\
 		__trace_printk(ip, fmt, ##args);			\
 } while (0)
 
-/**
- * tracepoint_string - register constant persistent string to trace system
- * @str - a constant persistent string that will be referenced in tracepoints
- *
- * If constant strings are being used in tracepoints, it is faster and
- * more efficient to just save the pointer to the string and reference
- * that with a printf "%s" instead of saving the string in the ring buffer
- * and wasting space and time.
- *
- * The problem with the above approach is that userspace tools that read
- * the binary output of the trace buffers do not have access to the string.
- * Instead they just show the address of the string which is not very
- * useful to users.
- *
- * With tracepoint_string(), the string will be registered to the tracing
- * system and exported to userspace via the debugfs/tracing/printk_formats
- * file that maps the string address to the string text. This way userspace
- * tools that read the binary buffers have a way to map the pointers to
- * the ASCII strings they represent.
- *
- * The @str used must be a constant string and persistent as it would not
- * make sense to show a string that no longer exists. But it is still fine
- * to be used with modules, because when modules are unloaded, if they
- * had tracepoints, the ring buffers are cleared too. As long as the string
- * does not change during the life of the module, it is fine to use
- * tracepoint_string() within a module.
- */
-#define tracepoint_string(str)						\
-	({								\
-		static const char *___tp_str __tracepoint_string = str; \
-		___tp_str;						\
-	})
-#define __tracepoint_string	__attribute__((section("__tracepoint_str")))
-
 #ifdef CONFIG_PERF_EVENTS
 struct perf_event;
 
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 2e2a5f7717e5..b1293f15f592 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -249,6 +249,50 @@ extern void syscall_unregfunc(void);
 
 #endif /* CONFIG_TRACEPOINTS */
 
+#ifdef CONFIG_TRACING
+/**
+ * tracepoint_string - register constant persistent string to trace system
+ * @str - a constant persistent string that will be referenced in tracepoints
+ *
+ * If constant strings are being used in tracepoints, it is faster and
+ * more efficient to just save the pointer to the string and reference
+ * that with a printf "%s" instead of saving the string in the ring buffer
+ * and wasting space and time.
+ *
+ * The problem with the above approach is that userspace tools that read
+ * the binary output of the trace buffers do not have access to the string.
+ * Instead they just show the address of the string which is not very
+ * useful to users.
+ *
+ * With tracepoint_string(), the string will be registered to the tracing
+ * system and exported to userspace via the debugfs/tracing/printk_formats
+ * file that maps the string address to the string text. This way userspace
+ * tools that read the binary buffers have a way to map the pointers to
+ * the ASCII strings they represent.
+ *
+ * The @str used must be a constant string and persistent as it would not
+ * make sense to show a string that no longer exists. But it is still fine
+ * to be used with modules, because when modules are unloaded, if they
+ * had tracepoints, the ring buffers are cleared too. As long as the string
+ * does not change during the life of the module, it is fine to use
+ * tracepoint_string() within a module.
+ */
+#define tracepoint_string(str)						\
+	({								\
+		static const char *___tp_str __tracepoint_string = str; \
+		___tp_str;						\
+	})
+#define __tracepoint_string	__attribute__((section("__tracepoint_str")))
+#else
+/*
+ * tracepoint_string() is used to save the string address for userspace
+ * tracing tools. When tracing isn't configured, there's no need to save
+ * anything.
+ */
+# define tracepoint_string(str) str
+# define __tracepoint_string
+#endif
+
 /*
  * The need for the DECLARE_TRACE_NOARGS() is to handle the prototype
  * (void). "void" is a special value in a function prototype and can
-- 
cgit v1.2.3-59-g8ed1b


From 92d18a6851fb6295466657ad1cf7fe88c2054ffa Mon Sep 17 00:00:00 2001
From: Gavin Shan <gwshan@linux.vnet.ibm.com>
Date: Fri, 8 Aug 2014 10:36:20 -0600
Subject: drivers/vfio: Fix EEH build error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The VFIO related components could be built as dynamic modules.
Unfortunately, CONFIG_EEH can't be configured to "m". The patch
fixes the build errors when configuring VFIO related components
as dynamic modules as follows:

  CC [M]  drivers/vfio/vfio_iommu_spapr_tce.o
In file included from drivers/vfio/vfio.c:33:0:
include/linux/vfio.h:101:43: warning: ‘struct pci_dev’ declared \
inside parameter list [enabled by default]
   :
  WRAP    arch/powerpc/boot/zImage.pseries
  WRAP    arch/powerpc/boot/zImage.maple
  WRAP    arch/powerpc/boot/zImage.pmac
  WRAP    arch/powerpc/boot/zImage.epapr
  MODPOST 1818 modules
ERROR: ".vfio_spapr_iommu_eeh_ioctl" [drivers/vfio/vfio_iommu_spapr_tce.ko]\
undefined!
ERROR: ".vfio_spapr_pci_eeh_open" [drivers/vfio/pci/vfio-pci.ko] undefined!
ERROR: ".vfio_spapr_pci_eeh_release" [drivers/vfio/pci/vfio-pci.ko] undefined!

Reported-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/Kconfig          | 6 ++++++
 drivers/vfio/Makefile         | 2 +-
 drivers/vfio/vfio_spapr_eeh.c | 3 +++
 include/linux/vfio.h          | 1 +
 4 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index af7b204b9215..d8c57636b9ce 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -8,11 +8,17 @@ config VFIO_IOMMU_SPAPR_TCE
 	depends on VFIO && SPAPR_TCE_IOMMU
 	default n
 
+config VFIO_SPAPR_EEH
+	tristate
+	depends on EEH && VFIO_IOMMU_SPAPR_TCE
+	default n
+
 menuconfig VFIO
 	tristate "VFIO Non-Privileged userspace driver framework"
 	depends on IOMMU_API
 	select VFIO_IOMMU_TYPE1 if X86
 	select VFIO_IOMMU_SPAPR_TCE if (PPC_POWERNV || PPC_PSERIES)
+	select VFIO_SPAPR_EEH if (PPC_POWERNV || PPC_PSERIES)
 	select ANON_INODES
 	help
 	  VFIO provides a framework for secure userspace device drivers.
diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
index 50e30bc75e85..0b035b12600a 100644
--- a/drivers/vfio/Makefile
+++ b/drivers/vfio/Makefile
@@ -1,5 +1,5 @@
 obj-$(CONFIG_VFIO) += vfio.o
 obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o
 obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o
-obj-$(CONFIG_EEH) += vfio_spapr_eeh.o
+obj-$(CONFIG_VFIO_SPAPR_EEH) += vfio_spapr_eeh.o
 obj-$(CONFIG_VFIO_PCI) += pci/
diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
index f834b4ce1431..949f98e997af 100644
--- a/drivers/vfio/vfio_spapr_eeh.c
+++ b/drivers/vfio/vfio_spapr_eeh.c
@@ -18,11 +18,13 @@ int vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
 {
 	return eeh_dev_open(pdev);
 }
+EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_open);
 
 void vfio_spapr_pci_eeh_release(struct pci_dev *pdev)
 {
 	eeh_dev_release(pdev);
 }
+EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_release);
 
 long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
 				unsigned int cmd, unsigned long arg)
@@ -85,3 +87,4 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
 
 	return ret;
 }
+EXPORT_SYMBOL(vfio_spapr_iommu_eeh_ioctl);
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 25a0fbd4b998..224128a96b7f 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -98,6 +98,7 @@ extern int vfio_external_user_iommu_id(struct vfio_group *group);
 extern long vfio_external_check_extension(struct vfio_group *group,
 					  unsigned long arg);
 
+struct pci_dev;
 #ifdef CONFIG_EEH
 extern int vfio_spapr_pci_eeh_open(struct pci_dev *pdev);
 extern void vfio_spapr_pci_eeh_release(struct pci_dev *pdev);
-- 
cgit v1.2.3-59-g8ed1b


From 9b936c960f22954bfb89f2fefd8f96916bb42908 Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Fri, 8 Aug 2014 10:39:16 -0600
Subject: drivers/vfio: Enable VFIO if EEH is not supported

The existing vfio_pci_open() fails upon error returned from
vfio_spapr_pci_eeh_open(), which breaks POWER7's P5IOC2 PHB
support which this patch brings back.

The patch fixes the issue by dropping the return value of
vfio_spapr_pci_eeh_open().

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/pci/vfio_pci.c   | 6 +-----
 drivers/vfio/vfio_spapr_eeh.c | 4 ++--
 include/linux/vfio.h          | 5 ++---
 3 files changed, 5 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 1651c0769b72..f7825332a325 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -200,11 +200,7 @@ static int vfio_pci_open(void *device_data)
 		if (ret)
 			goto error;
 
-		ret = vfio_spapr_pci_eeh_open(vdev->pdev);
-		if (ret) {
-			vfio_pci_disable(vdev);
-			goto error;
-		}
+		vfio_spapr_pci_eeh_open(vdev->pdev);
 	}
 	vdev->refcnt++;
 error:
diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
index 4779cace8036..86dfceb9201f 100644
--- a/drivers/vfio/vfio_spapr_eeh.c
+++ b/drivers/vfio/vfio_spapr_eeh.c
@@ -19,9 +19,9 @@
 #define DRIVER_DESC	"VFIO IOMMU SPAPR EEH"
 
 /* We might build address mapping here for "fast" path later */
-int vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
+void vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
 {
-	return eeh_dev_open(pdev);
+	eeh_dev_open(pdev);
 }
 EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_open);
 
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 224128a96b7f..d3204115f15d 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -100,15 +100,14 @@ extern long vfio_external_check_extension(struct vfio_group *group,
 
 struct pci_dev;
 #ifdef CONFIG_EEH
-extern int vfio_spapr_pci_eeh_open(struct pci_dev *pdev);
+extern void vfio_spapr_pci_eeh_open(struct pci_dev *pdev);
 extern void vfio_spapr_pci_eeh_release(struct pci_dev *pdev);
 extern long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
 				       unsigned int cmd,
 				       unsigned long arg);
 #else
-static inline int vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
+static inline void vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
 {
-	return 0;
 }
 
 static inline void vfio_spapr_pci_eeh_release(struct pci_dev *pdev)
-- 
cgit v1.2.3-59-g8ed1b


From 00501b531c4723972aa11d6d4ebcf8d6552007c8 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Fri, 8 Aug 2014 14:19:20 -0700
Subject: mm: memcontrol: rewrite charge API

These patches rework memcg charge lifetime to integrate more naturally
with the lifetime of user pages.  This drastically simplifies the code and
reduces charging and uncharging overhead.  The most expensive part of
charging and uncharging is the page_cgroup bit spinlock, which is removed
entirely after this series.

Here are the top-10 profile entries of a stress test that reads a 128G
sparse file on a freshly booted box, without even a dedicated cgroup (i.e.
 executing in the root memcg).  Before:

    15.36%              cat  [kernel.kallsyms]   [k] copy_user_generic_string
    13.31%              cat  [kernel.kallsyms]   [k] memset
    11.48%              cat  [kernel.kallsyms]   [k] do_mpage_readpage
     4.23%              cat  [kernel.kallsyms]   [k] get_page_from_freelist
     2.38%              cat  [kernel.kallsyms]   [k] put_page
     2.32%              cat  [kernel.kallsyms]   [k] __mem_cgroup_commit_charge
     2.18%          kswapd0  [kernel.kallsyms]   [k] __mem_cgroup_uncharge_common
     1.92%          kswapd0  [kernel.kallsyms]   [k] shrink_page_list
     1.86%              cat  [kernel.kallsyms]   [k] __radix_tree_lookup
     1.62%              cat  [kernel.kallsyms]   [k] __pagevec_lru_add_fn

After:

    15.67%           cat  [kernel.kallsyms]   [k] copy_user_generic_string
    13.48%           cat  [kernel.kallsyms]   [k] memset
    11.42%           cat  [kernel.kallsyms]   [k] do_mpage_readpage
     3.98%           cat  [kernel.kallsyms]   [k] get_page_from_freelist
     2.46%           cat  [kernel.kallsyms]   [k] put_page
     2.13%       kswapd0  [kernel.kallsyms]   [k] shrink_page_list
     1.88%           cat  [kernel.kallsyms]   [k] __radix_tree_lookup
     1.67%           cat  [kernel.kallsyms]   [k] __pagevec_lru_add_fn
     1.39%       kswapd0  [kernel.kallsyms]   [k] free_pcppages_bulk
     1.30%           cat  [kernel.kallsyms]   [k] kfree

As you can see, the memcg footprint has shrunk quite a bit.

   text    data     bss     dec     hex filename
  37970    9892     400   48262    bc86 mm/memcontrol.o.old
  35239    9892     400   45531    b1db mm/memcontrol.o

This patch (of 4):

The memcg charge API charges pages before they are rmapped - i.e.  have an
actual "type" - and so every callsite needs its own set of charge and
uncharge functions to know what type is being operated on.  Worse,
uncharge has to happen from a context that is still type-specific, rather
than at the end of the page's lifetime with exclusive access, and so
requires a lot of synchronization.

Rewrite the charge API to provide a generic set of try_charge(),
commit_charge() and cancel_charge() transaction operations, much like
what's currently done for swap-in:

  mem_cgroup_try_charge() attempts to reserve a charge, reclaiming
  pages from the memcg if necessary.

  mem_cgroup_commit_charge() commits the page to the charge once it
  has a valid page->mapping and PageAnon() reliably tells the type.

  mem_cgroup_cancel_charge() aborts the transaction.

This reduces the charge API and enables subsequent patches to
drastically simplify uncharging.

As pages need to be committed after rmap is established but before they
are added to the LRU, page_add_new_anon_rmap() must stop doing LRU
additions again.  Revive lru_cache_add_active_or_unevictable().

[hughd@google.com: fix shmem_unuse]
[hughd@google.com: Add comments on the private use of -EAGAIN]
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vladimir Davydov <vdavydov@parallels.com>
Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/cgroups/memcg_test.txt |  32 +--
 include/linux/memcontrol.h           |  53 ++---
 include/linux/swap.h                 |   3 +
 kernel/events/uprobes.c              |  15 +-
 mm/filemap.c                         |  21 +-
 mm/huge_memory.c                     |  57 +++--
 mm/memcontrol.c                      | 407 ++++++++++++++---------------------
 mm/memory.c                          |  41 ++--
 mm/rmap.c                            |  19 --
 mm/shmem.c                           |  37 ++--
 mm/swap.c                            |  34 +++
 mm/swapfile.c                        |  14 +-
 12 files changed, 338 insertions(+), 395 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cgroups/memcg_test.txt b/Documentation/cgroups/memcg_test.txt
index 80ac454704b8..bcf750d3cecd 100644
--- a/Documentation/cgroups/memcg_test.txt
+++ b/Documentation/cgroups/memcg_test.txt
@@ -24,24 +24,7 @@ Please note that implementation details can be changed.
 
    a page/swp_entry may be charged (usage += PAGE_SIZE) at
 
-	mem_cgroup_charge_anon()
-	  Called at new page fault and Copy-On-Write.
-
-	mem_cgroup_try_charge_swapin()
-	  Called at do_swap_page() (page fault on swap entry) and swapoff.
-	  Followed by charge-commit-cancel protocol. (With swap accounting)
-	  At commit, a charge recorded in swap_cgroup is removed.
-
-	mem_cgroup_charge_file()
-	  Called at add_to_page_cache()
-
-	mem_cgroup_cache_charge_swapin()
-	  Called at shmem's swapin.
-
-	mem_cgroup_prepare_migration()
-	  Called before migration. "extra" charge is done and followed by
-	  charge-commit-cancel protocol.
-	  At commit, charge against oldpage or newpage will be committed.
+	mem_cgroup_try_charge()
 
 2. Uncharge
   a page/swp_entry may be uncharged (usage -= PAGE_SIZE) by
@@ -69,19 +52,14 @@ Please note that implementation details can be changed.
 	to new page is committed. At failure, charge to old page is committed.
 
 3. charge-commit-cancel
-	In some case, we can't know this "charge" is valid or not at charging
-	(because of races).
-	To handle such case, there are charge-commit-cancel functions.
-		mem_cgroup_try_charge_XXX
-		mem_cgroup_commit_charge_XXX
-		mem_cgroup_cancel_charge_XXX
-	these are used in swap-in and migration.
+	Memcg pages are charged in two steps:
+		mem_cgroup_try_charge()
+		mem_cgroup_commit_charge() or mem_cgroup_cancel_charge()
 
 	At try_charge(), there are no flags to say "this page is charged".
 	at this point, usage += PAGE_SIZE.
 
-	At commit(), the function checks the page should be charged or not
-	and set flags or avoid charging.(usage -= PAGE_SIZE)
+	At commit(), the page is associated with the memcg.
 
 	At cancel(), simply usage -= PAGE_SIZE.
 
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index eb65d29516ca..1a9a096858e0 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -54,28 +54,11 @@ struct mem_cgroup_reclaim_cookie {
 };
 
 #ifdef CONFIG_MEMCG
-/*
- * All "charge" functions with gfp_mask should use GFP_KERNEL or
- * (gfp_mask & GFP_RECLAIM_MASK). In current implementatin, memcg doesn't
- * alloc memory but reclaims memory from all available zones. So, "where I want
- * memory from" bits of gfp_mask has no meaning. So any bits of that field is
- * available but adding a rule is better. charge functions' gfp_mask should
- * be set to GFP_KERNEL or gfp_mask & GFP_RECLAIM_MASK for avoiding ambiguous
- * codes.
- * (Of course, if memcg does memory allocation in future, GFP_KERNEL is sane.)
- */
-
-extern int mem_cgroup_charge_anon(struct page *page, struct mm_struct *mm,
-				gfp_t gfp_mask);
-/* for swap handling */
-extern int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
-		struct page *page, gfp_t mask, struct mem_cgroup **memcgp);
-extern void mem_cgroup_commit_charge_swapin(struct page *page,
-					struct mem_cgroup *memcg);
-extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg);
-
-extern int mem_cgroup_charge_file(struct page *page, struct mm_struct *mm,
-					gfp_t gfp_mask);
+int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
+			  gfp_t gfp_mask, struct mem_cgroup **memcgp);
+void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
+			      bool lrucare);
+void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg);
 
 struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *);
 struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *);
@@ -233,30 +216,22 @@ void mem_cgroup_print_bad_page(struct page *page);
 #else /* CONFIG_MEMCG */
 struct mem_cgroup;
 
-static inline int mem_cgroup_charge_anon(struct page *page,
-					struct mm_struct *mm, gfp_t gfp_mask)
-{
-	return 0;
-}
-
-static inline int mem_cgroup_charge_file(struct page *page,
-					struct mm_struct *mm, gfp_t gfp_mask)
-{
-	return 0;
-}
-
-static inline int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
-		struct page *page, gfp_t gfp_mask, struct mem_cgroup **memcgp)
+static inline int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
+					gfp_t gfp_mask,
+					struct mem_cgroup **memcgp)
 {
+	*memcgp = NULL;
 	return 0;
 }
 
-static inline void mem_cgroup_commit_charge_swapin(struct page *page,
-					  struct mem_cgroup *memcg)
+static inline void mem_cgroup_commit_charge(struct page *page,
+					    struct mem_cgroup *memcg,
+					    bool lrucare)
 {
 }
 
-static inline void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg)
+static inline void mem_cgroup_cancel_charge(struct page *page,
+					    struct mem_cgroup *memcg)
 {
 }
 
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 1eb64043c076..46a649e4e8cd 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -320,6 +320,9 @@ extern void swap_setup(void);
 
 extern void add_page_to_unevictable_list(struct page *page);
 
+extern void lru_cache_add_active_or_unevictable(struct page *page,
+						struct vm_area_struct *vma);
+
 /* linux/mm/vmscan.c */
 extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 					gfp_t gfp_mask, nodemask_t *mask);
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 6f3254e8c137..1d0af8a2c646 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -167,6 +167,11 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
 	/* For mmu_notifiers */
 	const unsigned long mmun_start = addr;
 	const unsigned long mmun_end   = addr + PAGE_SIZE;
+	struct mem_cgroup *memcg;
+
+	err = mem_cgroup_try_charge(kpage, vma->vm_mm, GFP_KERNEL, &memcg);
+	if (err)
+		return err;
 
 	/* For try_to_free_swap() and munlock_vma_page() below */
 	lock_page(page);
@@ -179,6 +184,8 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
 
 	get_page(kpage);
 	page_add_new_anon_rmap(kpage, vma, addr);
+	mem_cgroup_commit_charge(kpage, memcg, false);
+	lru_cache_add_active_or_unevictable(kpage, vma);
 
 	if (!PageAnon(page)) {
 		dec_mm_counter(mm, MM_FILEPAGES);
@@ -200,6 +207,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
 
 	err = 0;
  unlock:
+	mem_cgroup_cancel_charge(kpage, memcg);
 	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
 	unlock_page(page);
 	return err;
@@ -315,18 +323,11 @@ retry:
 	if (!new_page)
 		goto put_old;
 
-	if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))
-		goto put_new;
-
 	__SetPageUptodate(new_page);
 	copy_highpage(new_page, old_page);
 	copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
 
 	ret = __replace_page(vma, vaddr, old_page, new_page);
-	if (ret)
-		mem_cgroup_uncharge_page(new_page);
-
-put_new:
 	page_cache_release(new_page);
 put_old:
 	put_page(old_page);
diff --git a/mm/filemap.c b/mm/filemap.c
index af19a6b079f5..349a40e35545 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -31,6 +31,7 @@
 #include <linux/security.h>
 #include <linux/cpuset.h>
 #include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
+#include <linux/hugetlb.h>
 #include <linux/memcontrol.h>
 #include <linux/cleancache.h>
 #include <linux/rmap.h>
@@ -548,19 +549,24 @@ static int __add_to_page_cache_locked(struct page *page,
 				      pgoff_t offset, gfp_t gfp_mask,
 				      void **shadowp)
 {
+	int huge = PageHuge(page);
+	struct mem_cgroup *memcg;
 	int error;
 
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
 	VM_BUG_ON_PAGE(PageSwapBacked(page), page);
 
-	error = mem_cgroup_charge_file(page, current->mm,
-					gfp_mask & GFP_RECLAIM_MASK);
-	if (error)
-		return error;
+	if (!huge) {
+		error = mem_cgroup_try_charge(page, current->mm,
+					      gfp_mask, &memcg);
+		if (error)
+			return error;
+	}
 
 	error = radix_tree_maybe_preload(gfp_mask & ~__GFP_HIGHMEM);
 	if (error) {
-		mem_cgroup_uncharge_cache_page(page);
+		if (!huge)
+			mem_cgroup_cancel_charge(page, memcg);
 		return error;
 	}
 
@@ -575,13 +581,16 @@ static int __add_to_page_cache_locked(struct page *page,
 		goto err_insert;
 	__inc_zone_page_state(page, NR_FILE_PAGES);
 	spin_unlock_irq(&mapping->tree_lock);
+	if (!huge)
+		mem_cgroup_commit_charge(page, memcg, false);
 	trace_mm_filemap_add_to_page_cache(page);
 	return 0;
 err_insert:
 	page->mapping = NULL;
 	/* Leave page->index set: truncation relies upon it */
 	spin_unlock_irq(&mapping->tree_lock);
-	mem_cgroup_uncharge_cache_page(page);
+	if (!huge)
+		mem_cgroup_cancel_charge(page, memcg);
 	page_cache_release(page);
 	return error;
 }
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 3630d577e987..d9a21d06b862 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -715,13 +715,20 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
 					unsigned long haddr, pmd_t *pmd,
 					struct page *page)
 {
+	struct mem_cgroup *memcg;
 	pgtable_t pgtable;
 	spinlock_t *ptl;
 
 	VM_BUG_ON_PAGE(!PageCompound(page), page);
+
+	if (mem_cgroup_try_charge(page, mm, GFP_TRANSHUGE, &memcg))
+		return VM_FAULT_OOM;
+
 	pgtable = pte_alloc_one(mm, haddr);
-	if (unlikely(!pgtable))
+	if (unlikely(!pgtable)) {
+		mem_cgroup_cancel_charge(page, memcg);
 		return VM_FAULT_OOM;
+	}
 
 	clear_huge_page(page, haddr, HPAGE_PMD_NR);
 	/*
@@ -734,7 +741,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
 	ptl = pmd_lock(mm, pmd);
 	if (unlikely(!pmd_none(*pmd))) {
 		spin_unlock(ptl);
-		mem_cgroup_uncharge_page(page);
+		mem_cgroup_cancel_charge(page, memcg);
 		put_page(page);
 		pte_free(mm, pgtable);
 	} else {
@@ -742,6 +749,8 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
 		entry = mk_huge_pmd(page, vma->vm_page_prot);
 		entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
 		page_add_new_anon_rmap(page, vma, haddr);
+		mem_cgroup_commit_charge(page, memcg, false);
+		lru_cache_add_active_or_unevictable(page, vma);
 		pgtable_trans_huge_deposit(mm, pmd, pgtable);
 		set_pmd_at(mm, haddr, pmd, entry);
 		add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR);
@@ -827,13 +836,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		count_vm_event(THP_FAULT_FALLBACK);
 		return VM_FAULT_FALLBACK;
 	}
-	if (unlikely(mem_cgroup_charge_anon(page, mm, GFP_TRANSHUGE))) {
-		put_page(page);
-		count_vm_event(THP_FAULT_FALLBACK);
-		return VM_FAULT_FALLBACK;
-	}
 	if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page))) {
-		mem_cgroup_uncharge_page(page);
 		put_page(page);
 		count_vm_event(THP_FAULT_FALLBACK);
 		return VM_FAULT_FALLBACK;
@@ -979,6 +982,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
 					struct page *page,
 					unsigned long haddr)
 {
+	struct mem_cgroup *memcg;
 	spinlock_t *ptl;
 	pgtable_t pgtable;
 	pmd_t _pmd;
@@ -999,20 +1003,21 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
 					       __GFP_OTHER_NODE,
 					       vma, address, page_to_nid(page));
 		if (unlikely(!pages[i] ||
-			     mem_cgroup_charge_anon(pages[i], mm,
-						       GFP_KERNEL))) {
+			     mem_cgroup_try_charge(pages[i], mm, GFP_KERNEL,
+						   &memcg))) {
 			if (pages[i])
 				put_page(pages[i]);
-			mem_cgroup_uncharge_start();
 			while (--i >= 0) {
-				mem_cgroup_uncharge_page(pages[i]);
+				memcg = (void *)page_private(pages[i]);
+				set_page_private(pages[i], 0);
+				mem_cgroup_cancel_charge(pages[i], memcg);
 				put_page(pages[i]);
 			}
-			mem_cgroup_uncharge_end();
 			kfree(pages);
 			ret |= VM_FAULT_OOM;
 			goto out;
 		}
+		set_page_private(pages[i], (unsigned long)memcg);
 	}
 
 	for (i = 0; i < HPAGE_PMD_NR; i++) {
@@ -1041,7 +1046,11 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
 		pte_t *pte, entry;
 		entry = mk_pte(pages[i], vma->vm_page_prot);
 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+		memcg = (void *)page_private(pages[i]);
+		set_page_private(pages[i], 0);
 		page_add_new_anon_rmap(pages[i], vma, haddr);
+		mem_cgroup_commit_charge(pages[i], memcg, false);
+		lru_cache_add_active_or_unevictable(pages[i], vma);
 		pte = pte_offset_map(&_pmd, haddr);
 		VM_BUG_ON(!pte_none(*pte));
 		set_pte_at(mm, haddr, pte, entry);
@@ -1065,12 +1074,12 @@ out:
 out_free_pages:
 	spin_unlock(ptl);
 	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
-	mem_cgroup_uncharge_start();
 	for (i = 0; i < HPAGE_PMD_NR; i++) {
-		mem_cgroup_uncharge_page(pages[i]);
+		memcg = (void *)page_private(pages[i]);
+		set_page_private(pages[i], 0);
+		mem_cgroup_cancel_charge(pages[i], memcg);
 		put_page(pages[i]);
 	}
-	mem_cgroup_uncharge_end();
 	kfree(pages);
 	goto out;
 }
@@ -1081,6 +1090,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	spinlock_t *ptl;
 	int ret = 0;
 	struct page *page = NULL, *new_page;
+	struct mem_cgroup *memcg;
 	unsigned long haddr;
 	unsigned long mmun_start;	/* For mmu_notifiers */
 	unsigned long mmun_end;		/* For mmu_notifiers */
@@ -1132,7 +1142,8 @@ alloc:
 		goto out;
 	}
 
-	if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_TRANSHUGE))) {
+	if (unlikely(mem_cgroup_try_charge(new_page, mm,
+					   GFP_TRANSHUGE, &memcg))) {
 		put_page(new_page);
 		if (page) {
 			split_huge_page(page);
@@ -1161,7 +1172,7 @@ alloc:
 		put_user_huge_page(page);
 	if (unlikely(!pmd_same(*pmd, orig_pmd))) {
 		spin_unlock(ptl);
-		mem_cgroup_uncharge_page(new_page);
+		mem_cgroup_cancel_charge(new_page, memcg);
 		put_page(new_page);
 		goto out_mn;
 	} else {
@@ -1170,6 +1181,8 @@ alloc:
 		entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
 		pmdp_clear_flush(vma, haddr, pmd);
 		page_add_new_anon_rmap(new_page, vma, haddr);
+		mem_cgroup_commit_charge(new_page, memcg, false);
+		lru_cache_add_active_or_unevictable(new_page, vma);
 		set_pmd_at(mm, haddr, pmd, entry);
 		update_mmu_cache_pmd(vma, address, pmd);
 		if (!page) {
@@ -2413,6 +2426,7 @@ static void collapse_huge_page(struct mm_struct *mm,
 	spinlock_t *pmd_ptl, *pte_ptl;
 	int isolated;
 	unsigned long hstart, hend;
+	struct mem_cgroup *memcg;
 	unsigned long mmun_start;	/* For mmu_notifiers */
 	unsigned long mmun_end;		/* For mmu_notifiers */
 
@@ -2423,7 +2437,8 @@ static void collapse_huge_page(struct mm_struct *mm,
 	if (!new_page)
 		return;
 
-	if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_TRANSHUGE)))
+	if (unlikely(mem_cgroup_try_charge(new_page, mm,
+					   GFP_TRANSHUGE, &memcg)))
 		return;
 
 	/*
@@ -2510,6 +2525,8 @@ static void collapse_huge_page(struct mm_struct *mm,
 	spin_lock(pmd_ptl);
 	BUG_ON(!pmd_none(*pmd));
 	page_add_new_anon_rmap(new_page, vma, address);
+	mem_cgroup_commit_charge(new_page, memcg, false);
+	lru_cache_add_active_or_unevictable(new_page, vma);
 	pgtable_trans_huge_deposit(mm, pmd, pgtable);
 	set_pmd_at(mm, address, pmd, _pmd);
 	update_mmu_cache_pmd(vma, address, pmd);
@@ -2523,7 +2540,7 @@ out_up_write:
 	return;
 
 out:
-	mem_cgroup_uncharge_page(new_page);
+	mem_cgroup_cancel_charge(new_page, memcg);
 	goto out_up_write;
 }
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 90dc501eaf3f..1cbe1e54ff5f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2551,17 +2551,8 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb,
 	return NOTIFY_OK;
 }
 
-/**
- * mem_cgroup_try_charge - try charging a memcg
- * @memcg: memcg to charge
- * @nr_pages: number of pages to charge
- *
- * Returns 0 if @memcg was charged successfully, -EINTR if the charge
- * was bypassed to root_mem_cgroup, and -ENOMEM if the charge failed.
- */
-static int mem_cgroup_try_charge(struct mem_cgroup *memcg,
-				 gfp_t gfp_mask,
-				 unsigned int nr_pages)
+static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
+		      unsigned int nr_pages)
 {
 	unsigned int batch = max(CHARGE_BATCH, nr_pages);
 	int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
@@ -2660,41 +2651,7 @@ done:
 	return ret;
 }
 
-/**
- * mem_cgroup_try_charge_mm - try charging a mm
- * @mm: mm_struct to charge
- * @nr_pages: number of pages to charge
- * @oom: trigger OOM if reclaim fails
- *
- * Returns the charged mem_cgroup associated with the given mm_struct or
- * NULL the charge failed.
- */
-static struct mem_cgroup *mem_cgroup_try_charge_mm(struct mm_struct *mm,
-				 gfp_t gfp_mask,
-				 unsigned int nr_pages)
-
-{
-	struct mem_cgroup *memcg;
-	int ret;
-
-	memcg = get_mem_cgroup_from_mm(mm);
-	ret = mem_cgroup_try_charge(memcg, gfp_mask, nr_pages);
-	css_put(&memcg->css);
-	if (ret == -EINTR)
-		memcg = root_mem_cgroup;
-	else if (ret)
-		memcg = NULL;
-
-	return memcg;
-}
-
-/*
- * Somemtimes we have to undo a charge we got by try_charge().
- * This function is for that and do uncharge, put css's refcnt.
- * gotten by try_charge().
- */
-static void __mem_cgroup_cancel_charge(struct mem_cgroup *memcg,
-				       unsigned int nr_pages)
+static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
 {
 	unsigned long bytes = nr_pages * PAGE_SIZE;
 
@@ -2760,17 +2717,13 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
 	return memcg;
 }
 
-static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
-				       struct page *page,
-				       unsigned int nr_pages,
-				       enum charge_type ctype,
-				       bool lrucare)
+static void commit_charge(struct page *page, struct mem_cgroup *memcg,
+			  unsigned int nr_pages, bool anon, bool lrucare)
 {
 	struct page_cgroup *pc = lookup_page_cgroup(page);
 	struct zone *uninitialized_var(zone);
 	struct lruvec *lruvec;
 	bool was_on_lru = false;
-	bool anon;
 
 	lock_page_cgroup(pc);
 	VM_BUG_ON_PAGE(PageCgroupUsed(pc), page);
@@ -2807,11 +2760,6 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
 		spin_unlock_irq(&zone->lru_lock);
 	}
 
-	if (ctype == MEM_CGROUP_CHARGE_TYPE_ANON)
-		anon = true;
-	else
-		anon = false;
-
 	mem_cgroup_charge_statistics(memcg, page, anon, nr_pages);
 	unlock_page_cgroup(pc);
 
@@ -2882,21 +2830,21 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
 	if (ret)
 		return ret;
 
-	ret = mem_cgroup_try_charge(memcg, gfp, size >> PAGE_SHIFT);
+	ret = try_charge(memcg, gfp, size >> PAGE_SHIFT);
 	if (ret == -EINTR)  {
 		/*
-		 * mem_cgroup_try_charge() chosed to bypass to root due to
-		 * OOM kill or fatal signal.  Since our only options are to
-		 * either fail the allocation or charge it to this cgroup, do
-		 * it as a temporary condition. But we can't fail. From a
-		 * kmem/slab perspective, the cache has already been selected,
-		 * by mem_cgroup_kmem_get_cache(), so it is too late to change
+		 * try_charge() chose to bypass to root due to OOM kill or
+		 * fatal signal.  Since our only options are to either fail
+		 * the allocation or charge it to this cgroup, do it as a
+		 * temporary condition. But we can't fail. From a kmem/slab
+		 * perspective, the cache has already been selected, by
+		 * mem_cgroup_kmem_get_cache(), so it is too late to change
 		 * our minds.
 		 *
 		 * This condition will only trigger if the task entered
-		 * memcg_charge_kmem in a sane state, but was OOM-killed during
-		 * mem_cgroup_try_charge() above. Tasks that were already
-		 * dying when the allocation triggers should have been already
+		 * memcg_charge_kmem in a sane state, but was OOM-killed
+		 * during try_charge() above. Tasks that were already dying
+		 * when the allocation triggers should have been already
 		 * directed to the root cgroup in memcontrol.h
 		 */
 		res_counter_charge_nofail(&memcg->res, size, &fail_res);
@@ -3618,164 +3566,6 @@ out:
 	return ret;
 }
 
-int mem_cgroup_charge_anon(struct page *page,
-			      struct mm_struct *mm, gfp_t gfp_mask)
-{
-	unsigned int nr_pages = 1;
-	struct mem_cgroup *memcg;
-
-	if (mem_cgroup_disabled())
-		return 0;
-
-	VM_BUG_ON_PAGE(page_mapped(page), page);
-	VM_BUG_ON_PAGE(page->mapping && !PageAnon(page), page);
-	VM_BUG_ON(!mm);
-
-	if (PageTransHuge(page)) {
-		nr_pages <<= compound_order(page);
-		VM_BUG_ON_PAGE(!PageTransHuge(page), page);
-	}
-
-	memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, nr_pages);
-	if (!memcg)
-		return -ENOMEM;
-	__mem_cgroup_commit_charge(memcg, page, nr_pages,
-				   MEM_CGROUP_CHARGE_TYPE_ANON, false);
-	return 0;
-}
-
-/*
- * While swap-in, try_charge -> commit or cancel, the page is locked.
- * And when try_charge() successfully returns, one refcnt to memcg without
- * struct page_cgroup is acquired. This refcnt will be consumed by
- * "commit()" or removed by "cancel()"
- */
-static int __mem_cgroup_try_charge_swapin(struct mm_struct *mm,
-					  struct page *page,
-					  gfp_t mask,
-					  struct mem_cgroup **memcgp)
-{
-	struct mem_cgroup *memcg = NULL;
-	struct page_cgroup *pc;
-	int ret;
-
-	pc = lookup_page_cgroup(page);
-	/*
-	 * Every swap fault against a single page tries to charge the
-	 * page, bail as early as possible.  shmem_unuse() encounters
-	 * already charged pages, too.  The USED bit is protected by
-	 * the page lock, which serializes swap cache removal, which
-	 * in turn serializes uncharging.
-	 */
-	if (PageCgroupUsed(pc))
-		goto out;
-	if (do_swap_account)
-		memcg = try_get_mem_cgroup_from_page(page);
-	if (!memcg)
-		memcg = get_mem_cgroup_from_mm(mm);
-	ret = mem_cgroup_try_charge(memcg, mask, 1);
-	css_put(&memcg->css);
-	if (ret == -EINTR)
-		memcg = root_mem_cgroup;
-	else if (ret)
-		return ret;
-out:
-	*memcgp = memcg;
-	return 0;
-}
-
-int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page,
-				 gfp_t gfp_mask, struct mem_cgroup **memcgp)
-{
-	if (mem_cgroup_disabled()) {
-		*memcgp = NULL;
-		return 0;
-	}
-	/*
-	 * A racing thread's fault, or swapoff, may have already
-	 * updated the pte, and even removed page from swap cache: in
-	 * those cases unuse_pte()'s pte_same() test will fail; but
-	 * there's also a KSM case which does need to charge the page.
-	 */
-	if (!PageSwapCache(page)) {
-		struct mem_cgroup *memcg;
-
-		memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1);
-		if (!memcg)
-			return -ENOMEM;
-		*memcgp = memcg;
-		return 0;
-	}
-	return __mem_cgroup_try_charge_swapin(mm, page, gfp_mask, memcgp);
-}
-
-void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg)
-{
-	if (mem_cgroup_disabled())
-		return;
-	if (!memcg)
-		return;
-	__mem_cgroup_cancel_charge(memcg, 1);
-}
-
-static void
-__mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *memcg,
-					enum charge_type ctype)
-{
-	if (mem_cgroup_disabled())
-		return;
-	if (!memcg)
-		return;
-
-	__mem_cgroup_commit_charge(memcg, page, 1, ctype, true);
-	/*
-	 * Now swap is on-memory. This means this page may be
-	 * counted both as mem and swap....double count.
-	 * Fix it by uncharging from memsw. Basically, this SwapCache is stable
-	 * under lock_page(). But in do_swap_page()::memory.c, reuse_swap_page()
-	 * may call delete_from_swap_cache() before reach here.
-	 */
-	if (do_swap_account && PageSwapCache(page)) {
-		swp_entry_t ent = {.val = page_private(page)};
-		mem_cgroup_uncharge_swap(ent);
-	}
-}
-
-void mem_cgroup_commit_charge_swapin(struct page *page,
-				     struct mem_cgroup *memcg)
-{
-	__mem_cgroup_commit_charge_swapin(page, memcg,
-					  MEM_CGROUP_CHARGE_TYPE_ANON);
-}
-
-int mem_cgroup_charge_file(struct page *page, struct mm_struct *mm,
-				gfp_t gfp_mask)
-{
-	enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
-	struct mem_cgroup *memcg;
-	int ret;
-
-	if (mem_cgroup_disabled())
-		return 0;
-	if (PageCompound(page))
-		return 0;
-
-	if (PageSwapCache(page)) { /* shmem */
-		ret = __mem_cgroup_try_charge_swapin(mm, page,
-						     gfp_mask, &memcg);
-		if (ret)
-			return ret;
-		__mem_cgroup_commit_charge_swapin(page, memcg, type);
-		return 0;
-	}
-
-	memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1);
-	if (!memcg)
-		return -ENOMEM;
-	__mem_cgroup_commit_charge(memcg, page, 1, type, false);
-	return 0;
-}
-
 static void mem_cgroup_do_uncharge(struct mem_cgroup *memcg,
 				   unsigned int nr_pages,
 				   const enum charge_type ctype)
@@ -4122,7 +3912,6 @@ void mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
 	struct mem_cgroup *memcg = NULL;
 	unsigned int nr_pages = 1;
 	struct page_cgroup *pc;
-	enum charge_type ctype;
 
 	*memcgp = NULL;
 
@@ -4184,16 +3973,12 @@ void mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
 	 * page. In the case new page is migrated but not remapped, new page's
 	 * mapcount will be finally 0 and we call uncharge in end_migration().
 	 */
-	if (PageAnon(page))
-		ctype = MEM_CGROUP_CHARGE_TYPE_ANON;
-	else
-		ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
 	/*
 	 * The page is committed to the memcg, but it's not actually
 	 * charged to the res_counter since we plan on replacing the
 	 * old one and only one page is going to be left afterwards.
 	 */
-	__mem_cgroup_commit_charge(memcg, newpage, nr_pages, ctype, false);
+	commit_charge(newpage, memcg, nr_pages, PageAnon(page), false);
 }
 
 /* remove redundant charge if migration failed*/
@@ -4252,7 +4037,6 @@ void mem_cgroup_replace_page_cache(struct page *oldpage,
 {
 	struct mem_cgroup *memcg = NULL;
 	struct page_cgroup *pc;
-	enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
 
 	if (mem_cgroup_disabled())
 		return;
@@ -4278,7 +4062,7 @@ void mem_cgroup_replace_page_cache(struct page *oldpage,
 	 * the newpage may be on LRU(or pagevec for LRU) already. We lock
 	 * LRU while we overwrite pc->mem_cgroup.
 	 */
-	__mem_cgroup_commit_charge(memcg, newpage, 1, type, true);
+	commit_charge(newpage, memcg, 1, false, true);
 }
 
 #ifdef CONFIG_DEBUG_VM
@@ -6319,20 +6103,19 @@ static int mem_cgroup_do_precharge(unsigned long count)
 	int ret;
 
 	/* Try a single bulk charge without reclaim first */
-	ret = mem_cgroup_try_charge(mc.to, GFP_KERNEL & ~__GFP_WAIT, count);
+	ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_WAIT, count);
 	if (!ret) {
 		mc.precharge += count;
 		return ret;
 	}
 	if (ret == -EINTR) {
-		__mem_cgroup_cancel_charge(root_mem_cgroup, count);
+		cancel_charge(root_mem_cgroup, count);
 		return ret;
 	}
 
 	/* Try charges one by one with reclaim */
 	while (count--) {
-		ret = mem_cgroup_try_charge(mc.to,
-					    GFP_KERNEL & ~__GFP_NORETRY, 1);
+		ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_NORETRY, 1);
 		/*
 		 * In case of failure, any residual charges against
 		 * mc.to will be dropped by mem_cgroup_clear_mc()
@@ -6340,7 +6123,7 @@ static int mem_cgroup_do_precharge(unsigned long count)
 		 * bypassed to root right away or they'll be lost.
 		 */
 		if (ret == -EINTR)
-			__mem_cgroup_cancel_charge(root_mem_cgroup, 1);
+			cancel_charge(root_mem_cgroup, 1);
 		if (ret)
 			return ret;
 		mc.precharge++;
@@ -6609,7 +6392,7 @@ static void __mem_cgroup_clear_mc(void)
 
 	/* we must uncharge all the leftover precharges from mc.to */
 	if (mc.precharge) {
-		__mem_cgroup_cancel_charge(mc.to, mc.precharge);
+		cancel_charge(mc.to, mc.precharge);
 		mc.precharge = 0;
 	}
 	/*
@@ -6617,7 +6400,7 @@ static void __mem_cgroup_clear_mc(void)
 	 * we must uncharge here.
 	 */
 	if (mc.moved_charge) {
-		__mem_cgroup_cancel_charge(mc.from, mc.moved_charge);
+		cancel_charge(mc.from, mc.moved_charge);
 		mc.moved_charge = 0;
 	}
 	/* we must fixup refcnts and charges */
@@ -6946,6 +6729,150 @@ static void __init enable_swap_cgroup(void)
 }
 #endif
 
+/**
+ * mem_cgroup_try_charge - try charging a page
+ * @page: page to charge
+ * @mm: mm context of the victim
+ * @gfp_mask: reclaim mode
+ * @memcgp: charged memcg return
+ *
+ * Try to charge @page to the memcg that @mm belongs to, reclaiming
+ * pages according to @gfp_mask if necessary.
+ *
+ * Returns 0 on success, with *@memcgp pointing to the charged memcg.
+ * Otherwise, an error code is returned.
+ *
+ * After page->mapping has been set up, the caller must finalize the
+ * charge with mem_cgroup_commit_charge().  Or abort the transaction
+ * with mem_cgroup_cancel_charge() in case page instantiation fails.
+ */
+int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
+			  gfp_t gfp_mask, struct mem_cgroup **memcgp)
+{
+	struct mem_cgroup *memcg = NULL;
+	unsigned int nr_pages = 1;
+	int ret = 0;
+
+	if (mem_cgroup_disabled())
+		goto out;
+
+	if (PageSwapCache(page)) {
+		struct page_cgroup *pc = lookup_page_cgroup(page);
+		/*
+		 * Every swap fault against a single page tries to charge the
+		 * page, bail as early as possible.  shmem_unuse() encounters
+		 * already charged pages, too.  The USED bit is protected by
+		 * the page lock, which serializes swap cache removal, which
+		 * in turn serializes uncharging.
+		 */
+		if (PageCgroupUsed(pc))
+			goto out;
+	}
+
+	if (PageTransHuge(page)) {
+		nr_pages <<= compound_order(page);
+		VM_BUG_ON_PAGE(!PageTransHuge(page), page);
+	}
+
+	if (do_swap_account && PageSwapCache(page))
+		memcg = try_get_mem_cgroup_from_page(page);
+	if (!memcg)
+		memcg = get_mem_cgroup_from_mm(mm);
+
+	ret = try_charge(memcg, gfp_mask, nr_pages);
+
+	css_put(&memcg->css);
+
+	if (ret == -EINTR) {
+		memcg = root_mem_cgroup;
+		ret = 0;
+	}
+out:
+	*memcgp = memcg;
+	return ret;
+}
+
+/**
+ * mem_cgroup_commit_charge - commit a page charge
+ * @page: page to charge
+ * @memcg: memcg to charge the page to
+ * @lrucare: page might be on LRU already
+ *
+ * Finalize a charge transaction started by mem_cgroup_try_charge(),
+ * after page->mapping has been set up.  This must happen atomically
+ * as part of the page instantiation, i.e. under the page table lock
+ * for anonymous pages, under the page lock for page and swap cache.
+ *
+ * In addition, the page must not be on the LRU during the commit, to
+ * prevent racing with task migration.  If it might be, use @lrucare.
+ *
+ * Use mem_cgroup_cancel_charge() to cancel the transaction instead.
+ */
+void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
+			      bool lrucare)
+{
+	unsigned int nr_pages = 1;
+
+	VM_BUG_ON_PAGE(!page->mapping, page);
+	VM_BUG_ON_PAGE(PageLRU(page) && !lrucare, page);
+
+	if (mem_cgroup_disabled())
+		return;
+	/*
+	 * Swap faults will attempt to charge the same page multiple
+	 * times.  But reuse_swap_page() might have removed the page
+	 * from swapcache already, so we can't check PageSwapCache().
+	 */
+	if (!memcg)
+		return;
+
+	if (PageTransHuge(page)) {
+		nr_pages <<= compound_order(page);
+		VM_BUG_ON_PAGE(!PageTransHuge(page), page);
+	}
+
+	commit_charge(page, memcg, nr_pages, PageAnon(page), lrucare);
+
+	if (do_swap_account && PageSwapCache(page)) {
+		swp_entry_t entry = { .val = page_private(page) };
+		/*
+		 * The swap entry might not get freed for a long time,
+		 * let's not wait for it.  The page already received a
+		 * memory+swap charge, drop the swap entry duplicate.
+		 */
+		mem_cgroup_uncharge_swap(entry);
+	}
+}
+
+/**
+ * mem_cgroup_cancel_charge - cancel a page charge
+ * @page: page to charge
+ * @memcg: memcg to charge the page to
+ *
+ * Cancel a charge transaction started by mem_cgroup_try_charge().
+ */
+void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg)
+{
+	unsigned int nr_pages = 1;
+
+	if (mem_cgroup_disabled())
+		return;
+	/*
+	 * Swap faults will attempt to charge the same page multiple
+	 * times.  But reuse_swap_page() might have removed the page
+	 * from swapcache already, so we can't check PageSwapCache().
+	 */
+	if (!memcg)
+		return;
+
+	if (PageTransHuge(page)) {
+		nr_pages <<= compound_order(page);
+		VM_BUG_ON_PAGE(!PageTransHuge(page), page);
+	}
+
+	cancel_charge(memcg, nr_pages);
+}
+
 /*
  * subsys_initcall() for memory controller.
  *
diff --git a/mm/memory.c b/mm/memory.c
index 5c55270729f7..6d7648773dc4 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2049,6 +2049,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	struct page *dirty_page = NULL;
 	unsigned long mmun_start = 0;	/* For mmu_notifiers */
 	unsigned long mmun_end = 0;	/* For mmu_notifiers */
+	struct mem_cgroup *memcg;
 
 	old_page = vm_normal_page(vma, address, orig_pte);
 	if (!old_page) {
@@ -2204,7 +2205,7 @@ gotten:
 	}
 	__SetPageUptodate(new_page);
 
-	if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))
+	if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg))
 		goto oom_free_new;
 
 	mmun_start  = address & PAGE_MASK;
@@ -2234,6 +2235,8 @@ gotten:
 		 */
 		ptep_clear_flush(vma, address, page_table);
 		page_add_new_anon_rmap(new_page, vma, address);
+		mem_cgroup_commit_charge(new_page, memcg, false);
+		lru_cache_add_active_or_unevictable(new_page, vma);
 		/*
 		 * We call the notify macro here because, when using secondary
 		 * mmu page tables (such as kvm shadow page tables), we want the
@@ -2271,7 +2274,7 @@ gotten:
 		new_page = old_page;
 		ret |= VM_FAULT_WRITE;
 	} else
-		mem_cgroup_uncharge_page(new_page);
+		mem_cgroup_cancel_charge(new_page, memcg);
 
 	if (new_page)
 		page_cache_release(new_page);
@@ -2410,10 +2413,10 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 {
 	spinlock_t *ptl;
 	struct page *page, *swapcache;
+	struct mem_cgroup *memcg;
 	swp_entry_t entry;
 	pte_t pte;
 	int locked;
-	struct mem_cgroup *ptr;
 	int exclusive = 0;
 	int ret = 0;
 
@@ -2489,7 +2492,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		goto out_page;
 	}
 
-	if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) {
+	if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg)) {
 		ret = VM_FAULT_OOM;
 		goto out_page;
 	}
@@ -2514,10 +2517,6 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	 * while the page is counted on swap but not yet in mapcount i.e.
 	 * before page_add_anon_rmap() and swap_free(); try_to_free_swap()
 	 * must be called after the swap_free(), or it will never succeed.
-	 * Because delete_from_swap_page() may be called by reuse_swap_page(),
-	 * mem_cgroup_commit_charge_swapin() may not be able to find swp_entry
-	 * in page->private. In this case, a record in swap_cgroup  is silently
-	 * discarded at swap_free().
 	 */
 
 	inc_mm_counter_fast(mm, MM_ANONPAGES);
@@ -2533,12 +2532,14 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (pte_swp_soft_dirty(orig_pte))
 		pte = pte_mksoft_dirty(pte);
 	set_pte_at(mm, address, page_table, pte);
-	if (page == swapcache)
+	if (page == swapcache) {
 		do_page_add_anon_rmap(page, vma, address, exclusive);
-	else /* ksm created a completely new copy */
+		mem_cgroup_commit_charge(page, memcg, true);
+	} else { /* ksm created a completely new copy */
 		page_add_new_anon_rmap(page, vma, address);
-	/* It's better to call commit-charge after rmap is established */
-	mem_cgroup_commit_charge_swapin(page, ptr);
+		mem_cgroup_commit_charge(page, memcg, false);
+		lru_cache_add_active_or_unevictable(page, vma);
+	}
 
 	swap_free(entry);
 	if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
@@ -2571,7 +2572,7 @@ unlock:
 out:
 	return ret;
 out_nomap:
-	mem_cgroup_cancel_charge_swapin(ptr);
+	mem_cgroup_cancel_charge(page, memcg);
 	pte_unmap_unlock(page_table, ptl);
 out_page:
 	unlock_page(page);
@@ -2627,6 +2628,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		unsigned long address, pte_t *page_table, pmd_t *pmd,
 		unsigned int flags)
 {
+	struct mem_cgroup *memcg;
 	struct page *page;
 	spinlock_t *ptl;
 	pte_t entry;
@@ -2660,7 +2662,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	 */
 	__SetPageUptodate(page);
 
-	if (mem_cgroup_charge_anon(page, mm, GFP_KERNEL))
+	if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg))
 		goto oom_free_page;
 
 	entry = mk_pte(page, vma->vm_page_prot);
@@ -2673,6 +2675,8 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 
 	inc_mm_counter_fast(mm, MM_ANONPAGES);
 	page_add_new_anon_rmap(page, vma, address);
+	mem_cgroup_commit_charge(page, memcg, false);
+	lru_cache_add_active_or_unevictable(page, vma);
 setpte:
 	set_pte_at(mm, address, page_table, entry);
 
@@ -2682,7 +2686,7 @@ unlock:
 	pte_unmap_unlock(page_table, ptl);
 	return 0;
 release:
-	mem_cgroup_uncharge_page(page);
+	mem_cgroup_cancel_charge(page, memcg);
 	page_cache_release(page);
 	goto unlock;
 oom_free_page:
@@ -2919,6 +2923,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 		pgoff_t pgoff, unsigned int flags, pte_t orig_pte)
 {
 	struct page *fault_page, *new_page;
+	struct mem_cgroup *memcg;
 	spinlock_t *ptl;
 	pte_t *pte;
 	int ret;
@@ -2930,7 +2935,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (!new_page)
 		return VM_FAULT_OOM;
 
-	if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL)) {
+	if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg)) {
 		page_cache_release(new_page);
 		return VM_FAULT_OOM;
 	}
@@ -2950,12 +2955,14 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 		goto uncharge_out;
 	}
 	do_set_pte(vma, address, new_page, pte, true, true);
+	mem_cgroup_commit_charge(new_page, memcg, false);
+	lru_cache_add_active_or_unevictable(new_page, vma);
 	pte_unmap_unlock(pte, ptl);
 	unlock_page(fault_page);
 	page_cache_release(fault_page);
 	return ret;
 uncharge_out:
-	mem_cgroup_uncharge_page(new_page);
+	mem_cgroup_cancel_charge(new_page, memcg);
 	page_cache_release(new_page);
 	return ret;
 }
diff --git a/mm/rmap.c b/mm/rmap.c
index 22a4a7699cdb..f56b5ed78128 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1032,25 +1032,6 @@ void page_add_new_anon_rmap(struct page *page,
 	__mod_zone_page_state(page_zone(page), NR_ANON_PAGES,
 			hpage_nr_pages(page));
 	__page_set_anon_rmap(page, vma, address, 1);
-
-	VM_BUG_ON_PAGE(PageLRU(page), page);
-	if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED)) {
-		SetPageActive(page);
-		lru_cache_add(page);
-		return;
-	}
-
-	if (!TestSetPageMlocked(page)) {
-		/*
-		 * We use the irq-unsafe __mod_zone_page_stat because this
-		 * counter is not modified from interrupt context, and the pte
-		 * lock is held(spinlock), which implies preemption disabled.
-		 */
-		__mod_zone_page_state(page_zone(page), NR_MLOCK,
-				    hpage_nr_pages(page));
-		count_vm_event(UNEVICTABLE_PGMLOCKED);
-	}
-	add_page_to_unevictable_list(page);
 }
 
 /**
diff --git a/mm/shmem.c b/mm/shmem.c
index 302d1cf7ad07..1f1a8085538b 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -621,7 +621,7 @@ static int shmem_unuse_inode(struct shmem_inode_info *info,
 	radswap = swp_to_radix_entry(swap);
 	index = radix_tree_locate_item(&mapping->page_tree, radswap);
 	if (index == -1)
-		return 0;
+		return -EAGAIN;	/* tell shmem_unuse we found nothing */
 
 	/*
 	 * Move _head_ to start search for next from here.
@@ -680,7 +680,6 @@ static int shmem_unuse_inode(struct shmem_inode_info *info,
 			spin_unlock(&info->lock);
 			swap_free(swap);
 		}
-		error = 1;	/* not an error, but entry was found */
 	}
 	return error;
 }
@@ -692,7 +691,7 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
 {
 	struct list_head *this, *next;
 	struct shmem_inode_info *info;
-	int found = 0;
+	struct mem_cgroup *memcg;
 	int error = 0;
 
 	/*
@@ -707,26 +706,32 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
 	 * the shmem_swaplist_mutex which might hold up shmem_writepage().
 	 * Charged back to the user (not to caller) when swap account is used.
 	 */
-	error = mem_cgroup_charge_file(page, current->mm, GFP_KERNEL);
+	error = mem_cgroup_try_charge(page, current->mm, GFP_KERNEL, &memcg);
 	if (error)
 		goto out;
 	/* No radix_tree_preload: swap entry keeps a place for page in tree */
+	error = -EAGAIN;
 
 	mutex_lock(&shmem_swaplist_mutex);
 	list_for_each_safe(this, next, &shmem_swaplist) {
 		info = list_entry(this, struct shmem_inode_info, swaplist);
 		if (info->swapped)
-			found = shmem_unuse_inode(info, swap, &page);
+			error = shmem_unuse_inode(info, swap, &page);
 		else
 			list_del_init(&info->swaplist);
 		cond_resched();
-		if (found)
+		if (error != -EAGAIN)
 			break;
+		/* found nothing in this: move on to search the next */
 	}
 	mutex_unlock(&shmem_swaplist_mutex);
 
-	if (found < 0)
-		error = found;
+	if (error) {
+		if (error != -ENOMEM)
+			error = 0;
+		mem_cgroup_cancel_charge(page, memcg);
+	} else
+		mem_cgroup_commit_charge(page, memcg, true);
 out:
 	unlock_page(page);
 	page_cache_release(page);
@@ -1030,6 +1035,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
 	struct address_space *mapping = inode->i_mapping;
 	struct shmem_inode_info *info;
 	struct shmem_sb_info *sbinfo;
+	struct mem_cgroup *memcg;
 	struct page *page;
 	swp_entry_t swap;
 	int error;
@@ -1108,8 +1114,7 @@ repeat:
 				goto failed;
 		}
 
-		error = mem_cgroup_charge_file(page, current->mm,
-						gfp & GFP_RECLAIM_MASK);
+		error = mem_cgroup_try_charge(page, current->mm, gfp, &memcg);
 		if (!error) {
 			error = shmem_add_to_page_cache(page, mapping, index,
 						swp_to_radix_entry(swap));
@@ -1125,12 +1130,16 @@ repeat:
 			 * Reset swap.val? No, leave it so "failed" goes back to
 			 * "repeat": reading a hole and writing should succeed.
 			 */
-			if (error)
+			if (error) {
+				mem_cgroup_cancel_charge(page, memcg);
 				delete_from_swap_cache(page);
+			}
 		}
 		if (error)
 			goto failed;
 
+		mem_cgroup_commit_charge(page, memcg, true);
+
 		spin_lock(&info->lock);
 		info->swapped--;
 		shmem_recalc_inode(inode);
@@ -1168,8 +1177,7 @@ repeat:
 		if (sgp == SGP_WRITE)
 			__SetPageReferenced(page);
 
-		error = mem_cgroup_charge_file(page, current->mm,
-						gfp & GFP_RECLAIM_MASK);
+		error = mem_cgroup_try_charge(page, current->mm, gfp, &memcg);
 		if (error)
 			goto decused;
 		error = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK);
@@ -1179,9 +1187,10 @@ repeat:
 			radix_tree_preload_end();
 		}
 		if (error) {
-			mem_cgroup_uncharge_cache_page(page);
+			mem_cgroup_cancel_charge(page, memcg);
 			goto decused;
 		}
+		mem_cgroup_commit_charge(page, memcg, false);
 		lru_cache_add_anon(page);
 
 		spin_lock(&info->lock);
diff --git a/mm/swap.c b/mm/swap.c
index c789d01c9ec3..3baca701bb78 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -687,6 +687,40 @@ void add_page_to_unevictable_list(struct page *page)
 	spin_unlock_irq(&zone->lru_lock);
 }
 
+/**
+ * lru_cache_add_active_or_unevictable
+ * @page:  the page to be added to LRU
+ * @vma:   vma in which page is mapped for determining reclaimability
+ *
+ * Place @page on the active or unevictable LRU list, depending on its
+ * evictability.  Note that if the page is not evictable, it goes
+ * directly back onto it's zone's unevictable list, it does NOT use a
+ * per cpu pagevec.
+ */
+void lru_cache_add_active_or_unevictable(struct page *page,
+					 struct vm_area_struct *vma)
+{
+	VM_BUG_ON_PAGE(PageLRU(page), page);
+
+	if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED)) {
+		SetPageActive(page);
+		lru_cache_add(page);
+		return;
+	}
+
+	if (!TestSetPageMlocked(page)) {
+		/*
+		 * We use the irq-unsafe __mod_zone_page_stat because this
+		 * counter is not modified from interrupt context, and the pte
+		 * lock is held(spinlock), which implies preemption disabled.
+		 */
+		__mod_zone_page_state(page_zone(page), NR_MLOCK,
+				    hpage_nr_pages(page));
+		count_vm_event(UNEVICTABLE_PGMLOCKED);
+	}
+	add_page_to_unevictable_list(page);
+}
+
 /*
  * If the page can not be invalidated, it is moved to the
  * inactive list to speed up its reclaim.  It is moved to the
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 4c524f7bd0bf..0883b4912ff7 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1106,15 +1106,14 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
 	if (unlikely(!page))
 		return -ENOMEM;
 
-	if (mem_cgroup_try_charge_swapin(vma->vm_mm, page,
-					 GFP_KERNEL, &memcg)) {
+	if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL, &memcg)) {
 		ret = -ENOMEM;
 		goto out_nolock;
 	}
 
 	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
 	if (unlikely(!maybe_same_pte(*pte, swp_entry_to_pte(entry)))) {
-		mem_cgroup_cancel_charge_swapin(memcg);
+		mem_cgroup_cancel_charge(page, memcg);
 		ret = 0;
 		goto out;
 	}
@@ -1124,11 +1123,14 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
 	get_page(page);
 	set_pte_at(vma->vm_mm, addr, pte,
 		   pte_mkold(mk_pte(page, vma->vm_page_prot)));
-	if (page == swapcache)
+	if (page == swapcache) {
 		page_add_anon_rmap(page, vma, addr);
-	else /* ksm created a completely new copy */
+		mem_cgroup_commit_charge(page, memcg, true);
+	} else { /* ksm created a completely new copy */
 		page_add_new_anon_rmap(page, vma, addr);
-	mem_cgroup_commit_charge_swapin(page, memcg);
+		mem_cgroup_commit_charge(page, memcg, false);
+		lru_cache_add_active_or_unevictable(page, vma);
+	}
 	swap_free(entry);
 	/*
 	 * Move the page to the active list so it is not
-- 
cgit v1.2.3-59-g8ed1b


From 0a31bc97c80c3fa87b32c091d9a930ac19cd0c40 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Fri, 8 Aug 2014 14:19:22 -0700
Subject: mm: memcontrol: rewrite uncharge API

The memcg uncharging code that is involved towards the end of a page's
lifetime - truncation, reclaim, swapout, migration - is impressively
complicated and fragile.

Because anonymous and file pages were always charged before they had their
page->mapping established, uncharges had to happen when the page type
could still be known from the context; as in unmap for anonymous, page
cache removal for file and shmem pages, and swap cache truncation for swap
pages.  However, these operations happen well before the page is actually
freed, and so a lot of synchronization is necessary:

- Charging, uncharging, page migration, and charge migration all need
  to take a per-page bit spinlock as they could race with uncharging.

- Swap cache truncation happens during both swap-in and swap-out, and
  possibly repeatedly before the page is actually freed.  This means
  that the memcg swapout code is called from many contexts that make
  no sense and it has to figure out the direction from page state to
  make sure memory and memory+swap are always correctly charged.

- On page migration, the old page might be unmapped but then reused,
  so memcg code has to prevent untimely uncharging in that case.
  Because this code - which should be a simple charge transfer - is so
  special-cased, it is not reusable for replace_page_cache().

But now that charged pages always have a page->mapping, introduce
mem_cgroup_uncharge(), which is called after the final put_page(), when we
know for sure that nobody is looking at the page anymore.

For page migration, introduce mem_cgroup_migrate(), which is called after
the migration is successful and the new page is fully rmapped.  Because
the old page is no longer uncharged after migration, prevent double
charges by decoupling the page's memcg association (PCG_USED and
pc->mem_cgroup) from the page holding an actual charge.  The new bits
PCG_MEM and PCG_MEMSW represent the respective charges and are transferred
to the new page during migration.

mem_cgroup_migrate() is suitable for replace_page_cache() as well,
which gets rid of mem_cgroup_replace_page_cache().  However, care
needs to be taken because both the source and the target page can
already be charged and on the LRU when fuse is splicing: grab the page
lock on the charge moving side to prevent changing pc->mem_cgroup of a
page under migration.  Also, the lruvecs of both pages change as we
uncharge the old and charge the new during migration, and putback may
race with us, so grab the lru lock and isolate the pages iff on LRU to
prevent races and ensure the pages are on the right lruvec afterward.

Swap accounting is massively simplified: because the page is no longer
uncharged as early as swap cache deletion, a new mem_cgroup_swapout() can
transfer the page's memory+swap charge (PCG_MEMSW) to the swap entry
before the final put_page() in page reclaim.

Finally, page_cgroup changes are now protected by whatever protection the
page itself offers: anonymous pages are charged under the page table lock,
whereas page cache insertions, swapin, and migration hold the page lock.
Uncharging happens under full exclusion with no outstanding references.
Charging and uncharging also ensure that the page is off-LRU, which
serializes against charge migration.  Remove the very costly page_cgroup
lock and set pc->flags non-atomically.

[mhocko@suse.cz: mem_cgroup_charge_statistics needs preempt_disable]
[vdavydov@parallels.com: fix flags definition]
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vladimir Davydov <vdavydov@parallels.com>
Tested-by: Jet Chen <jet.chen@intel.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Tested-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/cgroups/memcg_test.txt | 128 +-----
 include/linux/memcontrol.h           |  49 +--
 include/linux/page_cgroup.h          |  43 +-
 include/linux/swap.h                 |  12 +-
 mm/filemap.c                         |   4 +-
 mm/memcontrol.c                      | 828 ++++++++++++++---------------------
 mm/memory.c                          |   2 -
 mm/migrate.c                         |  38 +-
 mm/rmap.c                            |   1 -
 mm/shmem.c                           |   8 +-
 mm/swap.c                            |   6 +
 mm/swap_state.c                      |   8 +-
 mm/swapfile.c                        |   7 +-
 mm/truncate.c                        |   9 -
 mm/vmscan.c                          |  12 +-
 mm/zswap.c                           |   2 +-
 16 files changed, 389 insertions(+), 768 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cgroups/memcg_test.txt b/Documentation/cgroups/memcg_test.txt
index bcf750d3cecd..8870b0212150 100644
--- a/Documentation/cgroups/memcg_test.txt
+++ b/Documentation/cgroups/memcg_test.txt
@@ -29,28 +29,13 @@ Please note that implementation details can be changed.
 2. Uncharge
   a page/swp_entry may be uncharged (usage -= PAGE_SIZE) by
 
-	mem_cgroup_uncharge_page()
-	  Called when an anonymous page is fully unmapped. I.e., mapcount goes
-	  to 0. If the page is SwapCache, uncharge is delayed until
-	  mem_cgroup_uncharge_swapcache().
-
-	mem_cgroup_uncharge_cache_page()
-	  Called when a page-cache is deleted from radix-tree. If the page is
-	  SwapCache, uncharge is delayed until mem_cgroup_uncharge_swapcache().
-
-	mem_cgroup_uncharge_swapcache()
-	  Called when SwapCache is removed from radix-tree. The charge itself
-	  is moved to swap_cgroup. (If mem+swap controller is disabled, no
-	  charge to swap occurs.)
+	mem_cgroup_uncharge()
+	  Called when a page's refcount goes down to 0.
 
 	mem_cgroup_uncharge_swap()
 	  Called when swp_entry's refcnt goes down to 0. A charge against swap
 	  disappears.
 
-	mem_cgroup_end_migration(old, new)
-	At success of migration old is uncharged (if necessary), a charge
-	to new page is committed. At failure, charge to old page is committed.
-
 3. charge-commit-cancel
 	Memcg pages are charged in two steps:
 		mem_cgroup_try_charge()
@@ -69,18 +54,6 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
 	Anonymous page is newly allocated at
 		  - page fault into MAP_ANONYMOUS mapping.
 		  - Copy-On-Write.
- 	It is charged right after it's allocated before doing any page table
-	related operations. Of course, it's uncharged when another page is used
-	for the fault address.
-
-	At freeing anonymous page (by exit() or munmap()), zap_pte() is called
-	and pages for ptes are freed one by one.(see mm/memory.c). Uncharges
-	are done at page_remove_rmap() when page_mapcount() goes down to 0.
-
-	Another page freeing is by page-reclaim (vmscan.c) and anonymous
-	pages are swapped out. In this case, the page is marked as
-	PageSwapCache(). uncharge() routine doesn't uncharge the page marked
-	as SwapCache(). It's delayed until __delete_from_swap_cache().
 
 	4.1 Swap-in.
 	At swap-in, the page is taken from swap-cache. There are 2 cases.
@@ -89,41 +62,6 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
 	(b) If the SwapCache has been mapped by processes, it has been
 	    charged already.
 
-	This swap-in is one of the most complicated work. In do_swap_page(),
-	following events occur when pte is unchanged.
-
-	(1) the page (SwapCache) is looked up.
-	(2) lock_page()
-	(3) try_charge_swapin()
-	(4) reuse_swap_page() (may call delete_swap_cache())
-	(5) commit_charge_swapin()
-	(6) swap_free().
-
-	Considering following situation for example.
-
-	(A) The page has not been charged before (2) and reuse_swap_page()
-	    doesn't call delete_from_swap_cache().
-	(B) The page has not been charged before (2) and reuse_swap_page()
-	    calls delete_from_swap_cache().
-	(C) The page has been charged before (2) and reuse_swap_page() doesn't
-	    call delete_from_swap_cache().
-	(D) The page has been charged before (2) and reuse_swap_page() calls
-	    delete_from_swap_cache().
-
-	    memory.usage/memsw.usage changes to this page/swp_entry will be
-	 Case          (A)      (B)       (C)     (D)
-         Event
-       Before (2)     0/ 1     0/ 1      1/ 1    1/ 1
-          ===========================================
-          (3)        +1/+1    +1/+1     +1/+1   +1/+1
-          (4)          -       0/ 0       -     -1/ 0
-          (5)         0/-1     0/ 0     -1/-1    0/ 0
-          (6)          -       0/-1       -      0/-1
-          ===========================================
-       Result         1/ 1     1/ 1      1/ 1    1/ 1
-
-       In any cases, charges to this page should be 1/ 1.
-
 	4.2 Swap-out.
 	At swap-out, typical state transition is below.
 
@@ -136,28 +74,20 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
 	    swp_entry's refcnt -= 1.
 
 
-	At (b), the page is marked as SwapCache and not uncharged.
-	At (d), the page is removed from SwapCache and a charge in page_cgroup
-	is moved to swap_cgroup.
-
 	Finally, at task exit,
 	(e) zap_pte() is called and swp_entry's refcnt -=1 -> 0.
-	Here, a charge in swap_cgroup disappears.
 
 5. Page Cache
    	Page Cache is charged at
 	- add_to_page_cache_locked().
 
-	uncharged at
-	- __remove_from_page_cache().
-
 	The logic is very clear. (About migration, see below)
 	Note: __remove_from_page_cache() is called by remove_from_page_cache()
 	and __remove_mapping().
 
 6. Shmem(tmpfs) Page Cache
-	Memcg's charge/uncharge have special handlers of shmem. The best way
-	to understand shmem's page state transition is to read mm/shmem.c.
+	The best way to understand shmem's page state transition is to read
+	mm/shmem.c.
 	But brief explanation of the behavior of memcg around shmem will be
 	helpful to understand the logic.
 
@@ -170,56 +100,10 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
 	It's charged when...
 	- A new page is added to shmem's radix-tree.
 	- A swp page is read. (move a charge from swap_cgroup to page_cgroup)
-	It's uncharged when
-	- A page is removed from radix-tree and not SwapCache.
-	- When SwapCache is removed, a charge is moved to swap_cgroup.
-	- When swp_entry's refcnt goes down to 0, a charge in swap_cgroup
-	  disappears.
 
 7. Page Migration
-   	One of the most complicated functions is page-migration-handler.
-	Memcg has 2 routines. Assume that we are migrating a page's contents
-	from OLDPAGE to NEWPAGE.
-
-	Usual migration logic is..
-	(a) remove the page from LRU.
-	(b) allocate NEWPAGE (migration target)
-	(c) lock by lock_page().
-	(d) unmap all mappings.
-	(e-1) If necessary, replace entry in radix-tree.
-	(e-2) move contents of a page.
-	(f) map all mappings again.
-	(g) pushback the page to LRU.
-	(-) OLDPAGE will be freed.
-
-	Before (g), memcg should complete all necessary charge/uncharge to
-	NEWPAGE/OLDPAGE.
-
-	The point is....
-	- If OLDPAGE is anonymous, all charges will be dropped at (d) because
-          try_to_unmap() drops all mapcount and the page will not be
-	  SwapCache.
-
-	- If OLDPAGE is SwapCache, charges will be kept at (g) because
-	  __delete_from_swap_cache() isn't called at (e-1)
-
-	- If OLDPAGE is page-cache, charges will be kept at (g) because
-	  remove_from_swap_cache() isn't called at (e-1)
-
-	memcg provides following hooks.
-
-	- mem_cgroup_prepare_migration(OLDPAGE)
-	  Called after (b) to account a charge (usage += PAGE_SIZE) against
-	  memcg which OLDPAGE belongs to.
-
-        - mem_cgroup_end_migration(OLDPAGE, NEWPAGE)
-	  Called after (f) before (g).
-	  If OLDPAGE is used, commit OLDPAGE again. If OLDPAGE is already
-	  charged, a charge by prepare_migration() is automatically canceled.
-	  If NEWPAGE is used, commit NEWPAGE and uncharge OLDPAGE.
-
-	  But zap_pte() (by exit or munmap) can be called while migration,
-	  we have to check if OLDPAGE/NEWPAGE is a valid page after commit().
+
+	mem_cgroup_migrate()
 
 8. LRU
         Each memcg has its own private LRU. Now, its handling is under global
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 1a9a096858e0..806b8fa15c5f 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -60,15 +60,17 @@ void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
 			      bool lrucare);
 void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg);
 
-struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *);
-struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *);
+void mem_cgroup_uncharge(struct page *page);
+
+/* Batched uncharging */
+void mem_cgroup_uncharge_start(void);
+void mem_cgroup_uncharge_end(void);
 
-/* For coalescing uncharge for reducing memcg' overhead*/
-extern void mem_cgroup_uncharge_start(void);
-extern void mem_cgroup_uncharge_end(void);
+void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
+			bool lrucare);
 
-extern void mem_cgroup_uncharge_page(struct page *page);
-extern void mem_cgroup_uncharge_cache_page(struct page *page);
+struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *);
+struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *);
 
 bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
 				  struct mem_cgroup *memcg);
@@ -96,12 +98,6 @@ bool mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *memcg)
 
 extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg);
 
-extern void
-mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
-			     struct mem_cgroup **memcgp);
-extern void mem_cgroup_end_migration(struct mem_cgroup *memcg,
-	struct page *oldpage, struct page *newpage, bool migration_ok);
-
 struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
 				   struct mem_cgroup *,
 				   struct mem_cgroup_reclaim_cookie *);
@@ -116,8 +112,6 @@ unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list);
 void mem_cgroup_update_lru_size(struct lruvec *, enum lru_list, int);
 extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
 					struct task_struct *p);
-extern void mem_cgroup_replace_page_cache(struct page *oldpage,
-					struct page *newpage);
 
 static inline void mem_cgroup_oom_enable(void)
 {
@@ -235,19 +229,21 @@ static inline void mem_cgroup_cancel_charge(struct page *page,
 {
 }
 
-static inline void mem_cgroup_uncharge_start(void)
+static inline void mem_cgroup_uncharge(struct page *page)
 {
 }
 
-static inline void mem_cgroup_uncharge_end(void)
+static inline void mem_cgroup_uncharge_start(void)
 {
 }
 
-static inline void mem_cgroup_uncharge_page(struct page *page)
+static inline void mem_cgroup_uncharge_end(void)
 {
 }
 
-static inline void mem_cgroup_uncharge_cache_page(struct page *page)
+static inline void mem_cgroup_migrate(struct page *oldpage,
+				      struct page *newpage,
+				      bool lrucare)
 {
 }
 
@@ -286,17 +282,6 @@ static inline struct cgroup_subsys_state
 	return NULL;
 }
 
-static inline void
-mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
-			     struct mem_cgroup **memcgp)
-{
-}
-
-static inline void mem_cgroup_end_migration(struct mem_cgroup *memcg,
-		struct page *oldpage, struct page *newpage, bool migration_ok)
-{
-}
-
 static inline struct mem_cgroup *
 mem_cgroup_iter(struct mem_cgroup *root,
 		struct mem_cgroup *prev,
@@ -392,10 +377,6 @@ static inline
 void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx)
 {
 }
-static inline void mem_cgroup_replace_page_cache(struct page *oldpage,
-				struct page *newpage)
-{
-}
 #endif /* CONFIG_MEMCG */
 
 #if !defined(CONFIG_MEMCG) || !defined(CONFIG_DEBUG_VM)
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 777a524716db..9bfb8e68a595 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -3,9 +3,9 @@
 
 enum {
 	/* flags for mem_cgroup */
-	PCG_LOCK,  /* Lock for pc->mem_cgroup and following bits. */
-	PCG_USED, /* this object is in use. */
-	PCG_MIGRATION, /* under page migration */
+	PCG_USED = 0x01,	/* This page is charged to a memcg */
+	PCG_MEM = 0x02,		/* This page holds a memory charge */
+	PCG_MEMSW = 0x04,	/* This page holds a memory+swap charge */
 	__NR_PCG_FLAGS,
 };
 
@@ -44,42 +44,9 @@ static inline void __init page_cgroup_init(void)
 struct page_cgroup *lookup_page_cgroup(struct page *page);
 struct page *lookup_cgroup_page(struct page_cgroup *pc);
 
-#define TESTPCGFLAG(uname, lname)			\
-static inline int PageCgroup##uname(struct page_cgroup *pc)	\
-	{ return test_bit(PCG_##lname, &pc->flags); }
-
-#define SETPCGFLAG(uname, lname)			\
-static inline void SetPageCgroup##uname(struct page_cgroup *pc)\
-	{ set_bit(PCG_##lname, &pc->flags);  }
-
-#define CLEARPCGFLAG(uname, lname)			\
-static inline void ClearPageCgroup##uname(struct page_cgroup *pc)	\
-	{ clear_bit(PCG_##lname, &pc->flags);  }
-
-#define TESTCLEARPCGFLAG(uname, lname)			\
-static inline int TestClearPageCgroup##uname(struct page_cgroup *pc)	\
-	{ return test_and_clear_bit(PCG_##lname, &pc->flags);  }
-
-TESTPCGFLAG(Used, USED)
-CLEARPCGFLAG(Used, USED)
-SETPCGFLAG(Used, USED)
-
-SETPCGFLAG(Migration, MIGRATION)
-CLEARPCGFLAG(Migration, MIGRATION)
-TESTPCGFLAG(Migration, MIGRATION)
-
-static inline void lock_page_cgroup(struct page_cgroup *pc)
-{
-	/*
-	 * Don't take this lock in IRQ context.
-	 * This lock is for pc->mem_cgroup, USED, MIGRATION
-	 */
-	bit_spin_lock(PCG_LOCK, &pc->flags);
-}
-
-static inline void unlock_page_cgroup(struct page_cgroup *pc)
+static inline int PageCgroupUsed(struct page_cgroup *pc)
 {
-	bit_spin_unlock(PCG_LOCK, &pc->flags);
+	return !!(pc->flags & PCG_USED);
 }
 
 #else /* CONFIG_MEMCG */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 46a649e4e8cd..1b72060f093a 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -381,9 +381,13 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *mem)
 }
 #endif
 #ifdef CONFIG_MEMCG_SWAP
-extern void mem_cgroup_uncharge_swap(swp_entry_t ent);
+extern void mem_cgroup_swapout(struct page *page, swp_entry_t entry);
+extern void mem_cgroup_uncharge_swap(swp_entry_t entry);
 #else
-static inline void mem_cgroup_uncharge_swap(swp_entry_t ent)
+static inline void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
+{
+}
+static inline void mem_cgroup_uncharge_swap(swp_entry_t entry)
 {
 }
 #endif
@@ -443,7 +447,7 @@ extern void swap_shmem_alloc(swp_entry_t);
 extern int swap_duplicate(swp_entry_t);
 extern int swapcache_prepare(swp_entry_t);
 extern void swap_free(swp_entry_t);
-extern void swapcache_free(swp_entry_t, struct page *page);
+extern void swapcache_free(swp_entry_t);
 extern int free_swap_and_cache(swp_entry_t);
 extern int swap_type_of(dev_t, sector_t, struct block_device **);
 extern unsigned int count_swap_pages(int, int);
@@ -507,7 +511,7 @@ static inline void swap_free(swp_entry_t swp)
 {
 }
 
-static inline void swapcache_free(swp_entry_t swp, struct page *page)
+static inline void swapcache_free(swp_entry_t swp)
 {
 }
 
diff --git a/mm/filemap.c b/mm/filemap.c
index 349a40e35545..f501b56ec2c6 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -234,7 +234,6 @@ void delete_from_page_cache(struct page *page)
 	spin_lock_irq(&mapping->tree_lock);
 	__delete_from_page_cache(page, NULL);
 	spin_unlock_irq(&mapping->tree_lock);
-	mem_cgroup_uncharge_cache_page(page);
 
 	if (freepage)
 		freepage(page);
@@ -490,8 +489,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
 		if (PageSwapBacked(new))
 			__inc_zone_page_state(new, NR_SHMEM);
 		spin_unlock_irq(&mapping->tree_lock);
-		/* mem_cgroup codes must not be called under tree_lock */
-		mem_cgroup_replace_page_cache(old, new);
+		mem_cgroup_migrate(old, new, true);
 		radix_tree_preload_end();
 		if (freepage)
 			freepage(old);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1cbe1e54ff5f..9106f1b12f56 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -754,9 +754,11 @@ static void __mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz,
 static void mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz,
 				       struct mem_cgroup_tree_per_zone *mctz)
 {
-	spin_lock(&mctz->lock);
+	unsigned long flags;
+
+	spin_lock_irqsave(&mctz->lock, flags);
 	__mem_cgroup_remove_exceeded(mz, mctz);
-	spin_unlock(&mctz->lock);
+	spin_unlock_irqrestore(&mctz->lock, flags);
 }
 
 
@@ -779,7 +781,9 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
 		 * mem is over its softlimit.
 		 */
 		if (excess || mz->on_tree) {
-			spin_lock(&mctz->lock);
+			unsigned long flags;
+
+			spin_lock_irqsave(&mctz->lock, flags);
 			/* if on-tree, remove it */
 			if (mz->on_tree)
 				__mem_cgroup_remove_exceeded(mz, mctz);
@@ -788,7 +792,7 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
 			 * If excess is 0, no tree ops.
 			 */
 			__mem_cgroup_insert_exceeded(mz, mctz, excess);
-			spin_unlock(&mctz->lock);
+			spin_unlock_irqrestore(&mctz->lock, flags);
 		}
 	}
 }
@@ -839,9 +843,9 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
 {
 	struct mem_cgroup_per_zone *mz;
 
-	spin_lock(&mctz->lock);
+	spin_lock_irq(&mctz->lock);
 	mz = __mem_cgroup_largest_soft_limit_node(mctz);
-	spin_unlock(&mctz->lock);
+	spin_unlock_irq(&mctz->lock);
 	return mz;
 }
 
@@ -882,13 +886,6 @@ static long mem_cgroup_read_stat(struct mem_cgroup *memcg,
 	return val;
 }
 
-static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg,
-					 bool charge)
-{
-	int val = (charge) ? 1 : -1;
-	this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_SWAP], val);
-}
-
 static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg,
 					    enum mem_cgroup_events_index idx)
 {
@@ -909,13 +906,13 @@ static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg,
 
 static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
 					 struct page *page,
-					 bool anon, int nr_pages)
+					 int nr_pages)
 {
 	/*
 	 * Here, RSS means 'mapped anon' and anon's SwapCache. Shmem/tmpfs is
 	 * counted as CACHE even if it's on ANON LRU.
 	 */
-	if (anon)
+	if (PageAnon(page))
 		__this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_RSS],
 				nr_pages);
 	else
@@ -1013,7 +1010,6 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
  */
 static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
 {
-	preempt_disable();
 	/* threshold event is triggered in finer grain than soft limit */
 	if (unlikely(mem_cgroup_event_ratelimit(memcg,
 						MEM_CGROUP_TARGET_THRESH))) {
@@ -1026,8 +1022,6 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
 		do_numainfo = mem_cgroup_event_ratelimit(memcg,
 						MEM_CGROUP_TARGET_NUMAINFO);
 #endif
-		preempt_enable();
-
 		mem_cgroup_threshold(memcg);
 		if (unlikely(do_softlimit))
 			mem_cgroup_update_tree(memcg, page);
@@ -1035,8 +1029,7 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
 		if (unlikely(do_numainfo))
 			atomic_inc(&memcg->numainfo_events);
 #endif
-	} else
-		preempt_enable();
+	}
 }
 
 struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
@@ -1347,20 +1340,6 @@ out:
 	return lruvec;
 }
 
-/*
- * Following LRU functions are allowed to be used without PCG_LOCK.
- * Operations are called by routine of global LRU independently from memcg.
- * What we have to take care of here is validness of pc->mem_cgroup.
- *
- * Changes to pc->mem_cgroup happens when
- * 1. charge
- * 2. moving account
- * In typical case, "charge" is done before add-to-lru. Exception is SwapCache.
- * It is added to LRU before charge.
- * If PCG_USED bit is not set, page_cgroup is not added to this private LRU.
- * When moving account, the page is not on LRU. It's isolated.
- */
-
 /**
  * mem_cgroup_page_lruvec - return lruvec for adding an lru page
  * @page: the page
@@ -2261,22 +2240,14 @@ cleanup:
  *
  * Notes: Race condition
  *
- * We usually use lock_page_cgroup() for accessing page_cgroup member but
- * it tends to be costly. But considering some conditions, we doesn't need
- * to do so _always_.
- *
- * Considering "charge", lock_page_cgroup() is not required because all
- * file-stat operations happen after a page is attached to radix-tree. There
- * are no race with "charge".
+ * Charging occurs during page instantiation, while the page is
+ * unmapped and locked in page migration, or while the page table is
+ * locked in THP migration.  No race is possible.
  *
- * Considering "uncharge", we know that memcg doesn't clear pc->mem_cgroup
- * at "uncharge" intentionally. So, we always see valid pc->mem_cgroup even
- * if there are race with "uncharge". Statistics itself is properly handled
- * by flags.
+ * Uncharge happens to pages with zero references, no race possible.
  *
- * Considering "move", this is an only case we see a race. To make the race
- * small, we check memcg->moving_account and detect there are possibility
- * of race or not. If there is, we take a lock.
+ * Charge moving between groups is protected by checking mm->moving
+ * account and taking the move_lock in the slowpath.
  */
 
 void __mem_cgroup_begin_update_page_stat(struct page *page,
@@ -2689,6 +2660,16 @@ static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
 	return mem_cgroup_from_id(id);
 }
 
+/*
+ * try_get_mem_cgroup_from_page - look up page's memcg association
+ * @page: the page
+ *
+ * Look up, get a css reference, and return the memcg that owns @page.
+ *
+ * The page must be locked to prevent racing with swap-in and page
+ * cache charges.  If coming from an unlocked page table, the caller
+ * must ensure the page is on the LRU or this can race with charging.
+ */
 struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
 {
 	struct mem_cgroup *memcg = NULL;
@@ -2699,7 +2680,6 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
 
 	pc = lookup_page_cgroup(page);
-	lock_page_cgroup(pc);
 	if (PageCgroupUsed(pc)) {
 		memcg = pc->mem_cgroup;
 		if (memcg && !css_tryget_online(&memcg->css))
@@ -2713,19 +2693,46 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
 			memcg = NULL;
 		rcu_read_unlock();
 	}
-	unlock_page_cgroup(pc);
 	return memcg;
 }
 
+static void lock_page_lru(struct page *page, int *isolated)
+{
+	struct zone *zone = page_zone(page);
+
+	spin_lock_irq(&zone->lru_lock);
+	if (PageLRU(page)) {
+		struct lruvec *lruvec;
+
+		lruvec = mem_cgroup_page_lruvec(page, zone);
+		ClearPageLRU(page);
+		del_page_from_lru_list(page, lruvec, page_lru(page));
+		*isolated = 1;
+	} else
+		*isolated = 0;
+}
+
+static void unlock_page_lru(struct page *page, int isolated)
+{
+	struct zone *zone = page_zone(page);
+
+	if (isolated) {
+		struct lruvec *lruvec;
+
+		lruvec = mem_cgroup_page_lruvec(page, zone);
+		VM_BUG_ON_PAGE(PageLRU(page), page);
+		SetPageLRU(page);
+		add_page_to_lru_list(page, lruvec, page_lru(page));
+	}
+	spin_unlock_irq(&zone->lru_lock);
+}
+
 static void commit_charge(struct page *page, struct mem_cgroup *memcg,
-			  unsigned int nr_pages, bool anon, bool lrucare)
+			  unsigned int nr_pages, bool lrucare)
 {
 	struct page_cgroup *pc = lookup_page_cgroup(page);
-	struct zone *uninitialized_var(zone);
-	struct lruvec *lruvec;
-	bool was_on_lru = false;
+	int isolated;
 
-	lock_page_cgroup(pc);
 	VM_BUG_ON_PAGE(PageCgroupUsed(pc), page);
 	/*
 	 * we don't need page_cgroup_lock about tail pages, becase they are not
@@ -2736,39 +2743,38 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
 	 * In some cases, SwapCache and FUSE(splice_buf->radixtree), the page
 	 * may already be on some other mem_cgroup's LRU.  Take care of it.
 	 */
-	if (lrucare) {
-		zone = page_zone(page);
-		spin_lock_irq(&zone->lru_lock);
-		if (PageLRU(page)) {
-			lruvec = mem_cgroup_zone_lruvec(zone, pc->mem_cgroup);
-			ClearPageLRU(page);
-			del_page_from_lru_list(page, lruvec, page_lru(page));
-			was_on_lru = true;
-		}
-	}
+	if (lrucare)
+		lock_page_lru(page, &isolated);
 
+	/*
+	 * Nobody should be changing or seriously looking at
+	 * pc->mem_cgroup and pc->flags at this point:
+	 *
+	 * - the page is uncharged
+	 *
+	 * - the page is off-LRU
+	 *
+	 * - an anonymous fault has exclusive page access, except for
+	 *   a locked page table
+	 *
+	 * - a page cache insertion, a swapin fault, or a migration
+	 *   have the page locked
+	 */
 	pc->mem_cgroup = memcg;
-	SetPageCgroupUsed(pc);
-
-	if (lrucare) {
-		if (was_on_lru) {
-			lruvec = mem_cgroup_zone_lruvec(zone, pc->mem_cgroup);
-			VM_BUG_ON_PAGE(PageLRU(page), page);
-			SetPageLRU(page);
-			add_page_to_lru_list(page, lruvec, page_lru(page));
-		}
-		spin_unlock_irq(&zone->lru_lock);
-	}
+	pc->flags = PCG_USED | PCG_MEM | (do_swap_account ? PCG_MEMSW : 0);
 
-	mem_cgroup_charge_statistics(memcg, page, anon, nr_pages);
-	unlock_page_cgroup(pc);
+	if (lrucare)
+		unlock_page_lru(page, isolated);
 
+	local_irq_disable();
+	mem_cgroup_charge_statistics(memcg, page, nr_pages);
 	/*
 	 * "charge_statistics" updated event counter. Then, check it.
 	 * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
 	 * if they exceeds softlimit.
 	 */
 	memcg_check_events(memcg, page);
+	local_irq_enable();
 }
 
 static DEFINE_MUTEX(set_limit_mutex);
@@ -3395,7 +3401,6 @@ static inline void memcg_unregister_all_caches(struct mem_cgroup *memcg)
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 
-#define PCGF_NOCOPY_AT_SPLIT (1 << PCG_LOCK | 1 << PCG_MIGRATION)
 /*
  * Because tail pages are not marked as "used", set it. We're under
  * zone->lru_lock, 'splitting on pmd' and compound_lock.
@@ -3416,7 +3421,7 @@ void mem_cgroup_split_huge_fixup(struct page *head)
 	for (i = 1; i < HPAGE_PMD_NR; i++) {
 		pc = head_pc + i;
 		pc->mem_cgroup = memcg;
-		pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT;
+		pc->flags = head_pc->flags;
 	}
 	__this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE],
 		       HPAGE_PMD_NR);
@@ -3446,7 +3451,6 @@ static int mem_cgroup_move_account(struct page *page,
 {
 	unsigned long flags;
 	int ret;
-	bool anon = PageAnon(page);
 
 	VM_BUG_ON(from == to);
 	VM_BUG_ON_PAGE(PageLRU(page), page);
@@ -3460,15 +3464,21 @@ static int mem_cgroup_move_account(struct page *page,
 	if (nr_pages > 1 && !PageTransHuge(page))
 		goto out;
 
-	lock_page_cgroup(pc);
+	/*
+	 * Prevent mem_cgroup_migrate() from looking at pc->mem_cgroup
+	 * of its source page while we change it: page migration takes
+	 * both pages off the LRU, but page cache replacement doesn't.
+	 */
+	if (!trylock_page(page))
+		goto out;
 
 	ret = -EINVAL;
 	if (!PageCgroupUsed(pc) || pc->mem_cgroup != from)
-		goto unlock;
+		goto out_unlock;
 
 	move_lock_mem_cgroup(from, &flags);
 
-	if (!anon && page_mapped(page)) {
+	if (!PageAnon(page) && page_mapped(page)) {
 		__this_cpu_sub(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED],
 			       nr_pages);
 		__this_cpu_add(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED],
@@ -3482,20 +3492,25 @@ static int mem_cgroup_move_account(struct page *page,
 			       nr_pages);
 	}
 
-	mem_cgroup_charge_statistics(from, page, anon, -nr_pages);
+	/*
+	 * It is safe to change pc->mem_cgroup here because the page
+	 * is referenced, charged, and isolated - we can't race with
+	 * uncharging, charging, migration, or LRU putback.
+	 */
 
 	/* caller should have done css_get */
 	pc->mem_cgroup = to;
-	mem_cgroup_charge_statistics(to, page, anon, nr_pages);
 	move_unlock_mem_cgroup(from, &flags);
 	ret = 0;
-unlock:
-	unlock_page_cgroup(pc);
-	/*
-	 * check events
-	 */
+
+	local_irq_disable();
+	mem_cgroup_charge_statistics(to, page, nr_pages);
 	memcg_check_events(to, page);
+	mem_cgroup_charge_statistics(from, page, -nr_pages);
 	memcg_check_events(from, page);
+	local_irq_enable();
+out_unlock:
+	unlock_page(page);
 out:
 	return ret;
 }
@@ -3566,193 +3581,6 @@ out:
 	return ret;
 }
 
-static void mem_cgroup_do_uncharge(struct mem_cgroup *memcg,
-				   unsigned int nr_pages,
-				   const enum charge_type ctype)
-{
-	struct memcg_batch_info *batch = NULL;
-	bool uncharge_memsw = true;
-
-	/* If swapout, usage of swap doesn't decrease */
-	if (!do_swap_account || ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
-		uncharge_memsw = false;
-
-	batch = &current->memcg_batch;
-	/*
-	 * In usual, we do css_get() when we remember memcg pointer.
-	 * But in this case, we keep res->usage until end of a series of
-	 * uncharges. Then, it's ok to ignore memcg's refcnt.
-	 */
-	if (!batch->memcg)
-		batch->memcg = memcg;
-	/*
-	 * do_batch > 0 when unmapping pages or inode invalidate/truncate.
-	 * In those cases, all pages freed continuously can be expected to be in
-	 * the same cgroup and we have chance to coalesce uncharges.
-	 * But we do uncharge one by one if this is killed by OOM(TIF_MEMDIE)
-	 * because we want to do uncharge as soon as possible.
-	 */
-
-	if (!batch->do_batch || test_thread_flag(TIF_MEMDIE))
-		goto direct_uncharge;
-
-	if (nr_pages > 1)
-		goto direct_uncharge;
-
-	/*
-	 * In typical case, batch->memcg == mem. This means we can
-	 * merge a series of uncharges to an uncharge of res_counter.
-	 * If not, we uncharge res_counter ony by one.
-	 */
-	if (batch->memcg != memcg)
-		goto direct_uncharge;
-	/* remember freed charge and uncharge it later */
-	batch->nr_pages++;
-	if (uncharge_memsw)
-		batch->memsw_nr_pages++;
-	return;
-direct_uncharge:
-	res_counter_uncharge(&memcg->res, nr_pages * PAGE_SIZE);
-	if (uncharge_memsw)
-		res_counter_uncharge(&memcg->memsw, nr_pages * PAGE_SIZE);
-	if (unlikely(batch->memcg != memcg))
-		memcg_oom_recover(memcg);
-}
-
-/*
- * uncharge if !page_mapped(page)
- */
-static struct mem_cgroup *
-__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype,
-			     bool end_migration)
-{
-	struct mem_cgroup *memcg = NULL;
-	unsigned int nr_pages = 1;
-	struct page_cgroup *pc;
-	bool anon;
-
-	if (mem_cgroup_disabled())
-		return NULL;
-
-	if (PageTransHuge(page)) {
-		nr_pages <<= compound_order(page);
-		VM_BUG_ON_PAGE(!PageTransHuge(page), page);
-	}
-	/*
-	 * Check if our page_cgroup is valid
-	 */
-	pc = lookup_page_cgroup(page);
-	if (unlikely(!PageCgroupUsed(pc)))
-		return NULL;
-
-	lock_page_cgroup(pc);
-
-	memcg = pc->mem_cgroup;
-
-	if (!PageCgroupUsed(pc))
-		goto unlock_out;
-
-	anon = PageAnon(page);
-
-	switch (ctype) {
-	case MEM_CGROUP_CHARGE_TYPE_ANON:
-		/*
-		 * Generally PageAnon tells if it's the anon statistics to be
-		 * updated; but sometimes e.g. mem_cgroup_uncharge_page() is
-		 * used before page reached the stage of being marked PageAnon.
-		 */
-		anon = true;
-		/* fallthrough */
-	case MEM_CGROUP_CHARGE_TYPE_DROP:
-		/* See mem_cgroup_prepare_migration() */
-		if (page_mapped(page))
-			goto unlock_out;
-		/*
-		 * Pages under migration may not be uncharged.  But
-		 * end_migration() /must/ be the one uncharging the
-		 * unused post-migration page and so it has to call
-		 * here with the migration bit still set.  See the
-		 * res_counter handling below.
-		 */
-		if (!end_migration && PageCgroupMigration(pc))
-			goto unlock_out;
-		break;
-	case MEM_CGROUP_CHARGE_TYPE_SWAPOUT:
-		if (!PageAnon(page)) {	/* Shared memory */
-			if (page->mapping && !page_is_file_cache(page))
-				goto unlock_out;
-		} else if (page_mapped(page)) /* Anon */
-				goto unlock_out;
-		break;
-	default:
-		break;
-	}
-
-	mem_cgroup_charge_statistics(memcg, page, anon, -nr_pages);
-
-	ClearPageCgroupUsed(pc);
-	/*
-	 * pc->mem_cgroup is not cleared here. It will be accessed when it's
-	 * freed from LRU. This is safe because uncharged page is expected not
-	 * to be reused (freed soon). Exception is SwapCache, it's handled by
-	 * special functions.
-	 */
-
-	unlock_page_cgroup(pc);
-	/*
-	 * even after unlock, we have memcg->res.usage here and this memcg
-	 * will never be freed, so it's safe to call css_get().
-	 */
-	memcg_check_events(memcg, page);
-	if (do_swap_account && ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) {
-		mem_cgroup_swap_statistics(memcg, true);
-		css_get(&memcg->css);
-	}
-	/*
-	 * Migration does not charge the res_counter for the
-	 * replacement page, so leave it alone when phasing out the
-	 * page that is unused after the migration.
-	 */
-	if (!end_migration)
-		mem_cgroup_do_uncharge(memcg, nr_pages, ctype);
-
-	return memcg;
-
-unlock_out:
-	unlock_page_cgroup(pc);
-	return NULL;
-}
-
-void mem_cgroup_uncharge_page(struct page *page)
-{
-	/* early check. */
-	if (page_mapped(page))
-		return;
-	VM_BUG_ON_PAGE(page->mapping && !PageAnon(page), page);
-	/*
-	 * If the page is in swap cache, uncharge should be deferred
-	 * to the swap path, which also properly accounts swap usage
-	 * and handles memcg lifetime.
-	 *
-	 * Note that this check is not stable and reclaim may add the
-	 * page to swap cache at any time after this.  However, if the
-	 * page is not in swap cache by the time page->mapcount hits
-	 * 0, there won't be any page table references to the swap
-	 * slot, and reclaim will free it and not actually write the
-	 * page to disk.
-	 */
-	if (PageSwapCache(page))
-		return;
-	__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_ANON, false);
-}
-
-void mem_cgroup_uncharge_cache_page(struct page *page)
-{
-	VM_BUG_ON_PAGE(page_mapped(page), page);
-	VM_BUG_ON_PAGE(page->mapping, page);
-	__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE, false);
-}
-
 /*
  * Batch_start/batch_end is called in unmap_page_range/invlidate/trucate.
  * In that cases, pages are freed continuously and we can expect pages
@@ -3763,6 +3591,9 @@ void mem_cgroup_uncharge_cache_page(struct page *page)
 
 void mem_cgroup_uncharge_start(void)
 {
+	unsigned long flags;
+
+	local_irq_save(flags);
 	current->memcg_batch.do_batch++;
 	/* We can do nest. */
 	if (current->memcg_batch.do_batch == 1) {
@@ -3770,21 +3601,18 @@ void mem_cgroup_uncharge_start(void)
 		current->memcg_batch.nr_pages = 0;
 		current->memcg_batch.memsw_nr_pages = 0;
 	}
+	local_irq_restore(flags);
 }
 
 void mem_cgroup_uncharge_end(void)
 {
 	struct memcg_batch_info *batch = &current->memcg_batch;
+	unsigned long flags;
 
-	if (!batch->do_batch)
-		return;
-
-	batch->do_batch--;
-	if (batch->do_batch) /* If stacked, do nothing. */
-		return;
-
-	if (!batch->memcg)
-		return;
+	local_irq_save(flags);
+	VM_BUG_ON(!batch->do_batch);
+	if (--batch->do_batch) /* If stacked, do nothing */
+		goto out;
 	/*
 	 * This "batch->memcg" is valid without any css_get/put etc...
 	 * bacause we hide charges behind us.
@@ -3796,61 +3624,16 @@ void mem_cgroup_uncharge_end(void)
 		res_counter_uncharge(&batch->memcg->memsw,
 				     batch->memsw_nr_pages * PAGE_SIZE);
 	memcg_oom_recover(batch->memcg);
-	/* forget this pointer (for sanity check) */
-	batch->memcg = NULL;
-}
-
-#ifdef CONFIG_SWAP
-/*
- * called after __delete_from_swap_cache() and drop "page" account.
- * memcg information is recorded to swap_cgroup of "ent"
- */
-void
-mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
-{
-	struct mem_cgroup *memcg;
-	int ctype = MEM_CGROUP_CHARGE_TYPE_SWAPOUT;
-
-	if (!swapout) /* this was a swap cache but the swap is unused ! */
-		ctype = MEM_CGROUP_CHARGE_TYPE_DROP;
-
-	memcg = __mem_cgroup_uncharge_common(page, ctype, false);
-
-	/*
-	 * record memcg information,  if swapout && memcg != NULL,
-	 * css_get() was called in uncharge().
-	 */
-	if (do_swap_account && swapout && memcg)
-		swap_cgroup_record(ent, mem_cgroup_id(memcg));
+out:
+	local_irq_restore(flags);
 }
-#endif
 
 #ifdef CONFIG_MEMCG_SWAP
-/*
- * called from swap_entry_free(). remove record in swap_cgroup and
- * uncharge "memsw" account.
- */
-void mem_cgroup_uncharge_swap(swp_entry_t ent)
+static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg,
+					 bool charge)
 {
-	struct mem_cgroup *memcg;
-	unsigned short id;
-
-	if (!do_swap_account)
-		return;
-
-	id = swap_cgroup_record(ent, 0);
-	rcu_read_lock();
-	memcg = mem_cgroup_lookup(id);
-	if (memcg) {
-		/*
-		 * We uncharge this because swap is freed.  This memcg can
-		 * be obsolete one. We avoid calling css_tryget_online().
-		 */
-		res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
-		mem_cgroup_swap_statistics(memcg, false);
-		css_put(&memcg->css);
-	}
-	rcu_read_unlock();
+	int val = (charge) ? 1 : -1;
+	this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_SWAP], val);
 }
 
 /**
@@ -3902,169 +3685,6 @@ static inline int mem_cgroup_move_swap_account(swp_entry_t entry,
 }
 #endif
 
-/*
- * Before starting migration, account PAGE_SIZE to mem_cgroup that the old
- * page belongs to.
- */
-void mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
-				  struct mem_cgroup **memcgp)
-{
-	struct mem_cgroup *memcg = NULL;
-	unsigned int nr_pages = 1;
-	struct page_cgroup *pc;
-
-	*memcgp = NULL;
-
-	if (mem_cgroup_disabled())
-		return;
-
-	if (PageTransHuge(page))
-		nr_pages <<= compound_order(page);
-
-	pc = lookup_page_cgroup(page);
-	lock_page_cgroup(pc);
-	if (PageCgroupUsed(pc)) {
-		memcg = pc->mem_cgroup;
-		css_get(&memcg->css);
-		/*
-		 * At migrating an anonymous page, its mapcount goes down
-		 * to 0 and uncharge() will be called. But, even if it's fully
-		 * unmapped, migration may fail and this page has to be
-		 * charged again. We set MIGRATION flag here and delay uncharge
-		 * until end_migration() is called
-		 *
-		 * Corner Case Thinking
-		 * A)
-		 * When the old page was mapped as Anon and it's unmap-and-freed
-		 * while migration was ongoing.
-		 * If unmap finds the old page, uncharge() of it will be delayed
-		 * until end_migration(). If unmap finds a new page, it's
-		 * uncharged when it make mapcount to be 1->0. If unmap code
-		 * finds swap_migration_entry, the new page will not be mapped
-		 * and end_migration() will find it(mapcount==0).
-		 *
-		 * B)
-		 * When the old page was mapped but migraion fails, the kernel
-		 * remaps it. A charge for it is kept by MIGRATION flag even
-		 * if mapcount goes down to 0. We can do remap successfully
-		 * without charging it again.
-		 *
-		 * C)
-		 * The "old" page is under lock_page() until the end of
-		 * migration, so, the old page itself will not be swapped-out.
-		 * If the new page is swapped out before end_migraton, our
-		 * hook to usual swap-out path will catch the event.
-		 */
-		if (PageAnon(page))
-			SetPageCgroupMigration(pc);
-	}
-	unlock_page_cgroup(pc);
-	/*
-	 * If the page is not charged at this point,
-	 * we return here.
-	 */
-	if (!memcg)
-		return;
-
-	*memcgp = memcg;
-	/*
-	 * We charge new page before it's used/mapped. So, even if unlock_page()
-	 * is called before end_migration, we can catch all events on this new
-	 * page. In the case new page is migrated but not remapped, new page's
-	 * mapcount will be finally 0 and we call uncharge in end_migration().
-	 */
-	/*
-	 * The page is committed to the memcg, but it's not actually
-	 * charged to the res_counter since we plan on replacing the
-	 * old one and only one page is going to be left afterwards.
-	 */
-	commit_charge(newpage, memcg, nr_pages, PageAnon(page), false);
-}
-
-/* remove redundant charge if migration failed*/
-void mem_cgroup_end_migration(struct mem_cgroup *memcg,
-	struct page *oldpage, struct page *newpage, bool migration_ok)
-{
-	struct page *used, *unused;
-	struct page_cgroup *pc;
-	bool anon;
-
-	if (!memcg)
-		return;
-
-	if (!migration_ok) {
-		used = oldpage;
-		unused = newpage;
-	} else {
-		used = newpage;
-		unused = oldpage;
-	}
-	anon = PageAnon(used);
-	__mem_cgroup_uncharge_common(unused,
-				     anon ? MEM_CGROUP_CHARGE_TYPE_ANON
-				     : MEM_CGROUP_CHARGE_TYPE_CACHE,
-				     true);
-	css_put(&memcg->css);
-	/*
-	 * We disallowed uncharge of pages under migration because mapcount
-	 * of the page goes down to zero, temporarly.
-	 * Clear the flag and check the page should be charged.
-	 */
-	pc = lookup_page_cgroup(oldpage);
-	lock_page_cgroup(pc);
-	ClearPageCgroupMigration(pc);
-	unlock_page_cgroup(pc);
-
-	/*
-	 * If a page is a file cache, radix-tree replacement is very atomic
-	 * and we can skip this check. When it was an Anon page, its mapcount
-	 * goes down to 0. But because we added MIGRATION flage, it's not
-	 * uncharged yet. There are several case but page->mapcount check
-	 * and USED bit check in mem_cgroup_uncharge_page() will do enough
-	 * check. (see prepare_charge() also)
-	 */
-	if (anon)
-		mem_cgroup_uncharge_page(used);
-}
-
-/*
- * At replace page cache, newpage is not under any memcg but it's on
- * LRU. So, this function doesn't touch res_counter but handles LRU
- * in correct way. Both pages are locked so we cannot race with uncharge.
- */
-void mem_cgroup_replace_page_cache(struct page *oldpage,
-				  struct page *newpage)
-{
-	struct mem_cgroup *memcg = NULL;
-	struct page_cgroup *pc;
-
-	if (mem_cgroup_disabled())
-		return;
-
-	pc = lookup_page_cgroup(oldpage);
-	/* fix accounting on old pages */
-	lock_page_cgroup(pc);
-	if (PageCgroupUsed(pc)) {
-		memcg = pc->mem_cgroup;
-		mem_cgroup_charge_statistics(memcg, oldpage, false, -1);
-		ClearPageCgroupUsed(pc);
-	}
-	unlock_page_cgroup(pc);
-
-	/*
-	 * When called from shmem_replace_page(), in some cases the
-	 * oldpage has already been charged, and in some cases not.
-	 */
-	if (!memcg)
-		return;
-	/*
-	 * Even if newpage->mapping was NULL before starting replacement,
-	 * the newpage may be on LRU(or pagevec for LRU) already. We lock
-	 * LRU while we overwrite pc->mem_cgroup.
-	 */
-	commit_charge(newpage, memcg, 1, false, true);
-}
-
 #ifdef CONFIG_DEBUG_VM
 static struct page_cgroup *lookup_page_cgroup_used(struct page *page)
 {
@@ -4263,7 +3883,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
 						    gfp_mask, &nr_scanned);
 		nr_reclaimed += reclaimed;
 		*total_scanned += nr_scanned;
-		spin_lock(&mctz->lock);
+		spin_lock_irq(&mctz->lock);
 
 		/*
 		 * If we failed to reclaim anything from this memory cgroup
@@ -4303,7 +3923,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
 		 */
 		/* If excess == 0, no tree ops */
 		__mem_cgroup_insert_exceeded(mz, mctz, excess);
-		spin_unlock(&mctz->lock);
+		spin_unlock_irq(&mctz->lock);
 		css_put(&mz->memcg->css);
 		loop++;
 		/*
@@ -6265,9 +5885,9 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
 	if (page) {
 		pc = lookup_page_cgroup(page);
 		/*
-		 * Do only loose check w/o page_cgroup lock.
-		 * mem_cgroup_move_account() checks the pc is valid or not under
-		 * the lock.
+		 * Do only loose check w/o serialization.
+		 * mem_cgroup_move_account() checks the pc is valid or
+		 * not under LRU exclusion.
 		 */
 		if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) {
 			ret = MC_TARGET_PAGE;
@@ -6729,6 +6349,67 @@ static void __init enable_swap_cgroup(void)
 }
 #endif
 
+#ifdef CONFIG_MEMCG_SWAP
+/**
+ * mem_cgroup_swapout - transfer a memsw charge to swap
+ * @page: page whose memsw charge to transfer
+ * @entry: swap entry to move the charge to
+ *
+ * Transfer the memsw charge of @page to @entry.
+ */
+void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
+{
+	struct page_cgroup *pc;
+	unsigned short oldid;
+
+	VM_BUG_ON_PAGE(PageLRU(page), page);
+	VM_BUG_ON_PAGE(page_count(page), page);
+
+	if (!do_swap_account)
+		return;
+
+	pc = lookup_page_cgroup(page);
+
+	/* Readahead page, never charged */
+	if (!PageCgroupUsed(pc))
+		return;
+
+	VM_BUG_ON_PAGE(!(pc->flags & PCG_MEMSW), page);
+
+	oldid = swap_cgroup_record(entry, mem_cgroup_id(pc->mem_cgroup));
+	VM_BUG_ON_PAGE(oldid, page);
+
+	pc->flags &= ~PCG_MEMSW;
+	css_get(&pc->mem_cgroup->css);
+	mem_cgroup_swap_statistics(pc->mem_cgroup, true);
+}
+
+/**
+ * mem_cgroup_uncharge_swap - uncharge a swap entry
+ * @entry: swap entry to uncharge
+ *
+ * Drop the memsw charge associated with @entry.
+ */
+void mem_cgroup_uncharge_swap(swp_entry_t entry)
+{
+	struct mem_cgroup *memcg;
+	unsigned short id;
+
+	if (!do_swap_account)
+		return;
+
+	id = swap_cgroup_record(entry, 0);
+	rcu_read_lock();
+	memcg = mem_cgroup_lookup(id);
+	if (memcg) {
+		res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
+		mem_cgroup_swap_statistics(memcg, false);
+		css_put(&memcg->css);
+	}
+	rcu_read_unlock();
+}
+#endif
+
 /**
  * mem_cgroup_try_charge - try charging a page
  * @page: page to charge
@@ -6831,7 +6512,7 @@ void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
 		VM_BUG_ON_PAGE(!PageTransHuge(page), page);
 	}
 
-	commit_charge(page, memcg, nr_pages, PageAnon(page), lrucare);
+	commit_charge(page, memcg, nr_pages, lrucare);
 
 	if (do_swap_account && PageSwapCache(page)) {
 		swp_entry_t entry = { .val = page_private(page) };
@@ -6873,6 +6554,139 @@ void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg)
 	cancel_charge(memcg, nr_pages);
 }
 
+/**
+ * mem_cgroup_uncharge - uncharge a page
+ * @page: page to uncharge
+ *
+ * Uncharge a page previously charged with mem_cgroup_try_charge() and
+ * mem_cgroup_commit_charge().
+ */
+void mem_cgroup_uncharge(struct page *page)
+{
+	struct memcg_batch_info *batch;
+	unsigned int nr_pages = 1;
+	struct mem_cgroup *memcg;
+	struct page_cgroup *pc;
+	unsigned long pc_flags;
+	unsigned long flags;
+
+	VM_BUG_ON_PAGE(PageLRU(page), page);
+	VM_BUG_ON_PAGE(page_count(page), page);
+
+	if (mem_cgroup_disabled())
+		return;
+
+	pc = lookup_page_cgroup(page);
+
+	/* Every final put_page() ends up here */
+	if (!PageCgroupUsed(pc))
+		return;
+
+	if (PageTransHuge(page)) {
+		nr_pages <<= compound_order(page);
+		VM_BUG_ON_PAGE(!PageTransHuge(page), page);
+	}
+	/*
+	 * Nobody should be changing or seriously looking at
+	 * pc->mem_cgroup and pc->flags at this point, we have fully
+	 * exclusive access to the page.
+	 */
+	memcg = pc->mem_cgroup;
+	pc_flags = pc->flags;
+	pc->flags = 0;
+
+	local_irq_save(flags);
+
+	if (nr_pages > 1)
+		goto direct;
+	if (unlikely(test_thread_flag(TIF_MEMDIE)))
+		goto direct;
+	batch = &current->memcg_batch;
+	if (!batch->do_batch)
+		goto direct;
+	if (batch->memcg && batch->memcg != memcg)
+		goto direct;
+	if (!batch->memcg)
+		batch->memcg = memcg;
+	if (pc_flags & PCG_MEM)
+		batch->nr_pages++;
+	if (pc_flags & PCG_MEMSW)
+		batch->memsw_nr_pages++;
+	goto out;
+direct:
+	if (pc_flags & PCG_MEM)
+		res_counter_uncharge(&memcg->res, nr_pages * PAGE_SIZE);
+	if (pc_flags & PCG_MEMSW)
+		res_counter_uncharge(&memcg->memsw, nr_pages * PAGE_SIZE);
+	memcg_oom_recover(memcg);
+out:
+	mem_cgroup_charge_statistics(memcg, page, -nr_pages);
+	memcg_check_events(memcg, page);
+
+	local_irq_restore(flags);
+}
+
+/**
+ * mem_cgroup_migrate - migrate a charge to another page
+ * @oldpage: currently charged page
+ * @newpage: page to transfer the charge to
+ * @lrucare: both pages might be on the LRU already
+ *
+ * Migrate the charge from @oldpage to @newpage.
+ *
+ * Both pages must be locked, @newpage->mapping must be set up.
+ */
+void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
+			bool lrucare)
+{
+	unsigned int nr_pages = 1;
+	struct page_cgroup *pc;
+	int isolated;
+
+	VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage);
+	VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
+	VM_BUG_ON_PAGE(!lrucare && PageLRU(oldpage), oldpage);
+	VM_BUG_ON_PAGE(!lrucare && PageLRU(newpage), newpage);
+	VM_BUG_ON_PAGE(PageAnon(oldpage) != PageAnon(newpage), newpage);
+
+	if (mem_cgroup_disabled())
+		return;
+
+	/* Page cache replacement: new page already charged? */
+	pc = lookup_page_cgroup(newpage);
+	if (PageCgroupUsed(pc))
+		return;
+
+	/* Re-entrant migration: old page already uncharged? */
+	pc = lookup_page_cgroup(oldpage);
+	if (!PageCgroupUsed(pc))
+		return;
+
+	VM_BUG_ON_PAGE(!(pc->flags & PCG_MEM), oldpage);
+	VM_BUG_ON_PAGE(do_swap_account && !(pc->flags & PCG_MEMSW), oldpage);
+
+	if (PageTransHuge(oldpage)) {
+		nr_pages <<= compound_order(oldpage);
+		VM_BUG_ON_PAGE(!PageTransHuge(oldpage), oldpage);
+		VM_BUG_ON_PAGE(!PageTransHuge(newpage), newpage);
+	}
+
+	if (lrucare)
+		lock_page_lru(oldpage, &isolated);
+
+	pc->flags = 0;
+
+	if (lrucare)
+		unlock_page_lru(oldpage, isolated);
+
+	local_irq_disable();
+	mem_cgroup_charge_statistics(pc->mem_cgroup, oldpage, -nr_pages);
+	memcg_check_events(pc->mem_cgroup, oldpage);
+	local_irq_enable();
+
+	commit_charge(newpage, pc->mem_cgroup, nr_pages, lrucare);
+}
+
 /*
  * subsys_initcall() for memory controller.
  *
diff --git a/mm/memory.c b/mm/memory.c
index 6d7648773dc4..2a899e4e82ba 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1292,7 +1292,6 @@ static void unmap_page_range(struct mmu_gather *tlb,
 		details = NULL;
 
 	BUG_ON(addr >= end);
-	mem_cgroup_uncharge_start();
 	tlb_start_vma(tlb, vma);
 	pgd = pgd_offset(vma->vm_mm, addr);
 	do {
@@ -1302,7 +1301,6 @@ static void unmap_page_range(struct mmu_gather *tlb,
 		next = zap_pud_range(tlb, vma, pgd, addr, next, details);
 	} while (pgd++, addr = next, addr != end);
 	tlb_end_vma(tlb, vma);
-	mem_cgroup_uncharge_end();
 }
 
 
diff --git a/mm/migrate.c b/mm/migrate.c
index be6dbf995c0c..f78ec9bd454d 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -780,6 +780,7 @@ static int move_to_new_page(struct page *newpage, struct page *page,
 	if (rc != MIGRATEPAGE_SUCCESS) {
 		newpage->mapping = NULL;
 	} else {
+		mem_cgroup_migrate(page, newpage, false);
 		if (remap_swapcache)
 			remove_migration_ptes(page, newpage);
 		page->mapping = NULL;
@@ -795,7 +796,6 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
 {
 	int rc = -EAGAIN;
 	int remap_swapcache = 1;
-	struct mem_cgroup *mem;
 	struct anon_vma *anon_vma = NULL;
 
 	if (!trylock_page(page)) {
@@ -821,9 +821,6 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
 		lock_page(page);
 	}
 
-	/* charge against new page */
-	mem_cgroup_prepare_migration(page, newpage, &mem);
-
 	if (PageWriteback(page)) {
 		/*
 		 * Only in the case of a full synchronous migration is it
@@ -833,10 +830,10 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
 		 */
 		if (mode != MIGRATE_SYNC) {
 			rc = -EBUSY;
-			goto uncharge;
+			goto out_unlock;
 		}
 		if (!force)
-			goto uncharge;
+			goto out_unlock;
 		wait_on_page_writeback(page);
 	}
 	/*
@@ -872,7 +869,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
 			 */
 			remap_swapcache = 0;
 		} else {
-			goto uncharge;
+			goto out_unlock;
 		}
 	}
 
@@ -885,7 +882,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
 		 * the page migration right away (proteced by page lock).
 		 */
 		rc = balloon_page_migrate(newpage, page, mode);
-		goto uncharge;
+		goto out_unlock;
 	}
 
 	/*
@@ -904,7 +901,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
 		VM_BUG_ON_PAGE(PageAnon(page), page);
 		if (page_has_private(page)) {
 			try_to_free_buffers(page);
-			goto uncharge;
+			goto out_unlock;
 		}
 		goto skip_unmap;
 	}
@@ -923,10 +920,7 @@ skip_unmap:
 	if (anon_vma)
 		put_anon_vma(anon_vma);
 
-uncharge:
-	mem_cgroup_end_migration(mem, page, newpage,
-				 (rc == MIGRATEPAGE_SUCCESS ||
-				  rc == MIGRATEPAGE_BALLOON_SUCCESS));
+out_unlock:
 	unlock_page(page);
 out:
 	return rc;
@@ -1786,7 +1780,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
 	pg_data_t *pgdat = NODE_DATA(node);
 	int isolated = 0;
 	struct page *new_page = NULL;
-	struct mem_cgroup *memcg = NULL;
 	int page_lru = page_is_file_cache(page);
 	unsigned long mmun_start = address & HPAGE_PMD_MASK;
 	unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE;
@@ -1852,15 +1845,6 @@ fail_putback:
 		goto out_unlock;
 	}
 
-	/*
-	 * Traditional migration needs to prepare the memcg charge
-	 * transaction early to prevent the old page from being
-	 * uncharged when installing migration entries.  Here we can
-	 * save the potential rollback and start the charge transfer
-	 * only when migration is already known to end successfully.
-	 */
-	mem_cgroup_prepare_migration(page, new_page, &memcg);
-
 	orig_entry = *pmd;
 	entry = mk_pmd(new_page, vma->vm_page_prot);
 	entry = pmd_mkhuge(entry);
@@ -1888,14 +1872,10 @@ fail_putback:
 		goto fail_putback;
 	}
 
+	mem_cgroup_migrate(page, new_page, false);
+
 	page_remove_rmap(page);
 
-	/*
-	 * Finish the charge transaction under the page table lock to
-	 * prevent split_huge_page() from dividing up the charge
-	 * before it's fully transferred to the new page.
-	 */
-	mem_cgroup_end_migration(memcg, page, new_page, true);
 	spin_unlock(ptl);
 	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
 
diff --git a/mm/rmap.c b/mm/rmap.c
index f56b5ed78128..3e8491c504f8 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1089,7 +1089,6 @@ void page_remove_rmap(struct page *page)
 	if (unlikely(PageHuge(page)))
 		goto out;
 	if (anon) {
-		mem_cgroup_uncharge_page(page);
 		if (PageTransHuge(page))
 			__dec_zone_page_state(page,
 					      NR_ANON_TRANSPARENT_HUGEPAGES);
diff --git a/mm/shmem.c b/mm/shmem.c
index 1f1a8085538b..6dc80d298f9d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -419,7 +419,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
 			pvec.pages, indices);
 		if (!pvec.nr)
 			break;
-		mem_cgroup_uncharge_start();
 		for (i = 0; i < pagevec_count(&pvec); i++) {
 			struct page *page = pvec.pages[i];
 
@@ -447,7 +446,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
 		}
 		pagevec_remove_exceptionals(&pvec);
 		pagevec_release(&pvec);
-		mem_cgroup_uncharge_end();
 		cond_resched();
 		index++;
 	}
@@ -495,7 +493,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
 			index = start;
 			continue;
 		}
-		mem_cgroup_uncharge_start();
 		for (i = 0; i < pagevec_count(&pvec); i++) {
 			struct page *page = pvec.pages[i];
 
@@ -531,7 +528,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
 		}
 		pagevec_remove_exceptionals(&pvec);
 		pagevec_release(&pvec);
-		mem_cgroup_uncharge_end();
 		index++;
 	}
 
@@ -835,7 +831,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
 	}
 
 	mutex_unlock(&shmem_swaplist_mutex);
-	swapcache_free(swap, NULL);
+	swapcache_free(swap);
 redirty:
 	set_page_dirty(page);
 	if (wbc->for_reclaim)
@@ -1008,7 +1004,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
 		 */
 		oldpage = newpage;
 	} else {
-		mem_cgroup_replace_page_cache(oldpage, newpage);
+		mem_cgroup_migrate(oldpage, newpage, false);
 		lru_cache_add_anon(newpage);
 		*pagep = newpage;
 	}
diff --git a/mm/swap.c b/mm/swap.c
index 3baca701bb78..00523fffa5ed 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -62,6 +62,7 @@ static void __page_cache_release(struct page *page)
 		del_page_from_lru_list(page, lruvec, page_off_lru(page));
 		spin_unlock_irqrestore(&zone->lru_lock, flags);
 	}
+	mem_cgroup_uncharge(page);
 }
 
 static void __put_single_page(struct page *page)
@@ -907,6 +908,8 @@ void release_pages(struct page **pages, int nr, bool cold)
 	struct lruvec *lruvec;
 	unsigned long uninitialized_var(flags);
 
+	mem_cgroup_uncharge_start();
+
 	for (i = 0; i < nr; i++) {
 		struct page *page = pages[i];
 
@@ -938,6 +941,7 @@ void release_pages(struct page **pages, int nr, bool cold)
 			__ClearPageLRU(page);
 			del_page_from_lru_list(page, lruvec, page_off_lru(page));
 		}
+		mem_cgroup_uncharge(page);
 
 		/* Clear Active bit in case of parallel mark_page_accessed */
 		__ClearPageActive(page);
@@ -947,6 +951,8 @@ void release_pages(struct page **pages, int nr, bool cold)
 	if (zone)
 		spin_unlock_irqrestore(&zone->lru_lock, flags);
 
+	mem_cgroup_uncharge_end();
+
 	free_hot_cold_page_list(&pages_to_free, cold);
 }
 EXPORT_SYMBOL(release_pages);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 2972eee184a4..e160151da6b8 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -176,7 +176,7 @@ int add_to_swap(struct page *page, struct list_head *list)
 
 	if (unlikely(PageTransHuge(page)))
 		if (unlikely(split_huge_page_to_list(page, list))) {
-			swapcache_free(entry, NULL);
+			swapcache_free(entry);
 			return 0;
 		}
 
@@ -202,7 +202,7 @@ int add_to_swap(struct page *page, struct list_head *list)
 		 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
 		 * clear SWAP_HAS_CACHE flag.
 		 */
-		swapcache_free(entry, NULL);
+		swapcache_free(entry);
 		return 0;
 	}
 }
@@ -225,7 +225,7 @@ void delete_from_swap_cache(struct page *page)
 	__delete_from_swap_cache(page);
 	spin_unlock_irq(&address_space->tree_lock);
 
-	swapcache_free(entry, page);
+	swapcache_free(entry);
 	page_cache_release(page);
 }
 
@@ -386,7 +386,7 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 		 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
 		 * clear SWAP_HAS_CACHE flag.
 		 */
-		swapcache_free(entry, NULL);
+		swapcache_free(entry);
 	} while (err != -ENOMEM);
 
 	if (new_page)
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 0883b4912ff7..8798b2e0ac59 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -843,16 +843,13 @@ void swap_free(swp_entry_t entry)
 /*
  * Called after dropping swapcache to decrease refcnt to swap entries.
  */
-void swapcache_free(swp_entry_t entry, struct page *page)
+void swapcache_free(swp_entry_t entry)
 {
 	struct swap_info_struct *p;
-	unsigned char count;
 
 	p = swap_info_get(entry);
 	if (p) {
-		count = swap_entry_free(p, entry, SWAP_HAS_CACHE);
-		if (page)
-			mem_cgroup_uncharge_swapcache(page, entry, count != 0);
+		swap_entry_free(p, entry, SWAP_HAS_CACHE);
 		spin_unlock(&p->lock);
 	}
 }
diff --git a/mm/truncate.c b/mm/truncate.c
index eda247307164..96d167372d89 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -281,7 +281,6 @@ void truncate_inode_pages_range(struct address_space *mapping,
 	while (index < end && pagevec_lookup_entries(&pvec, mapping, index,
 			min(end - index, (pgoff_t)PAGEVEC_SIZE),
 			indices)) {
-		mem_cgroup_uncharge_start();
 		for (i = 0; i < pagevec_count(&pvec); i++) {
 			struct page *page = pvec.pages[i];
 
@@ -307,7 +306,6 @@ void truncate_inode_pages_range(struct address_space *mapping,
 		}
 		pagevec_remove_exceptionals(&pvec);
 		pagevec_release(&pvec);
-		mem_cgroup_uncharge_end();
 		cond_resched();
 		index++;
 	}
@@ -369,7 +367,6 @@ void truncate_inode_pages_range(struct address_space *mapping,
 			pagevec_release(&pvec);
 			break;
 		}
-		mem_cgroup_uncharge_start();
 		for (i = 0; i < pagevec_count(&pvec); i++) {
 			struct page *page = pvec.pages[i];
 
@@ -394,7 +391,6 @@ void truncate_inode_pages_range(struct address_space *mapping,
 		}
 		pagevec_remove_exceptionals(&pvec);
 		pagevec_release(&pvec);
-		mem_cgroup_uncharge_end();
 		index++;
 	}
 	cleancache_invalidate_inode(mapping);
@@ -493,7 +489,6 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
 	while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
 			min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
 			indices)) {
-		mem_cgroup_uncharge_start();
 		for (i = 0; i < pagevec_count(&pvec); i++) {
 			struct page *page = pvec.pages[i];
 
@@ -522,7 +517,6 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
 		}
 		pagevec_remove_exceptionals(&pvec);
 		pagevec_release(&pvec);
-		mem_cgroup_uncharge_end();
 		cond_resched();
 		index++;
 	}
@@ -553,7 +547,6 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
 	BUG_ON(page_has_private(page));
 	__delete_from_page_cache(page, NULL);
 	spin_unlock_irq(&mapping->tree_lock);
-	mem_cgroup_uncharge_cache_page(page);
 
 	if (mapping->a_ops->freepage)
 		mapping->a_ops->freepage(page);
@@ -602,7 +595,6 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
 	while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
 			min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
 			indices)) {
-		mem_cgroup_uncharge_start();
 		for (i = 0; i < pagevec_count(&pvec); i++) {
 			struct page *page = pvec.pages[i];
 
@@ -655,7 +647,6 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
 		}
 		pagevec_remove_exceptionals(&pvec);
 		pagevec_release(&pvec);
-		mem_cgroup_uncharge_end();
 		cond_resched();
 		index++;
 	}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index d2f65c856350..7068e838d22b 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -577,9 +577,10 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
 
 	if (PageSwapCache(page)) {
 		swp_entry_t swap = { .val = page_private(page) };
+		mem_cgroup_swapout(page, swap);
 		__delete_from_swap_cache(page);
 		spin_unlock_irq(&mapping->tree_lock);
-		swapcache_free(swap, page);
+		swapcache_free(swap);
 	} else {
 		void (*freepage)(struct page *);
 		void *shadow = NULL;
@@ -600,7 +601,6 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
 			shadow = workingset_eviction(mapping, page);
 		__delete_from_page_cache(page, shadow);
 		spin_unlock_irq(&mapping->tree_lock);
-		mem_cgroup_uncharge_cache_page(page);
 
 		if (freepage != NULL)
 			freepage(page);
@@ -1103,6 +1103,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 		 */
 		__clear_page_locked(page);
 free_it:
+		mem_cgroup_uncharge(page);
 		nr_reclaimed++;
 
 		/*
@@ -1132,12 +1133,13 @@ keep:
 		list_add(&page->lru, &ret_pages);
 		VM_BUG_ON_PAGE(PageLRU(page) || PageUnevictable(page), page);
 	}
+	mem_cgroup_uncharge_end();
 
 	free_hot_cold_page_list(&free_pages, true);
 
 	list_splice(&ret_pages, page_list);
 	count_vm_events(PGACTIVATE, pgactivate);
-	mem_cgroup_uncharge_end();
+
 	*ret_nr_dirty += nr_dirty;
 	*ret_nr_congested += nr_congested;
 	*ret_nr_unqueued_dirty += nr_unqueued_dirty;
@@ -1435,6 +1437,8 @@ putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list)
 			__ClearPageActive(page);
 			del_page_from_lru_list(page, lruvec, lru);
 
+			mem_cgroup_uncharge(page);
+
 			if (unlikely(PageCompound(page))) {
 				spin_unlock_irq(&zone->lru_lock);
 				(*get_compound_page_dtor(page))(page);
@@ -1656,6 +1660,8 @@ static void move_active_pages_to_lru(struct lruvec *lruvec,
 			__ClearPageActive(page);
 			del_page_from_lru_list(page, lruvec, lru);
 
+			mem_cgroup_uncharge(page);
+
 			if (unlikely(PageCompound(page))) {
 				spin_unlock_irq(&zone->lru_lock);
 				(*get_compound_page_dtor(page))(page);
diff --git a/mm/zswap.c b/mm/zswap.c
index 032c21eeab2b..9da56af24df5 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -507,7 +507,7 @@ static int zswap_get_swap_cache_page(swp_entry_t entry,
 		 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
 		 * clear SWAP_HAS_CACHE flag.
 		 */
-		swapcache_free(entry, NULL);
+		swapcache_free(entry);
 	} while (err != -ENOMEM);
 
 	if (new_page)
-- 
cgit v1.2.3-59-g8ed1b


From 747db954cab64c6b7a95b121b517165f34751898 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Fri, 8 Aug 2014 14:19:24 -0700
Subject: mm: memcontrol: use page lists for uncharge batching

Pages are now uncharged at release time, and all sources of batched
uncharges operate on lists of pages.  Directly use those lists, and
get rid of the per-task batching state.

This also batches statistics accounting, in addition to the res
counter charges, to reduce IRQ-disabling and re-enabling.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Hugh Dickins <hughd@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vladimir Davydov <vdavydov@parallels.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Vladimir Davydov <vdavydov@parallels.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  12 +--
 include/linux/sched.h      |   6 --
 kernel/fork.c              |   4 -
 mm/memcontrol.c            | 206 ++++++++++++++++++++++++---------------------
 mm/swap.c                  |   6 +-
 mm/vmscan.c                |  12 ++-
 6 files changed, 117 insertions(+), 129 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 806b8fa15c5f..e0752d204d9e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -59,12 +59,8 @@ int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
 void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
 			      bool lrucare);
 void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg);
-
 void mem_cgroup_uncharge(struct page *page);
-
-/* Batched uncharging */
-void mem_cgroup_uncharge_start(void);
-void mem_cgroup_uncharge_end(void);
+void mem_cgroup_uncharge_list(struct list_head *page_list);
 
 void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
 			bool lrucare);
@@ -233,11 +229,7 @@ static inline void mem_cgroup_uncharge(struct page *page)
 {
 }
 
-static inline void mem_cgroup_uncharge_start(void)
-{
-}
-
-static inline void mem_cgroup_uncharge_end(void)
+static inline void mem_cgroup_uncharge_list(struct list_head *page_list)
 {
 }
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7c19d552dc3f..4fcf82a4d243 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1628,12 +1628,6 @@ struct task_struct {
 	unsigned long trace_recursion;
 #endif /* CONFIG_TRACING */
 #ifdef CONFIG_MEMCG /* memcg uses this to do batch job */
-	struct memcg_batch_info {
-		int do_batch;	/* incremented when batch uncharge started */
-		struct mem_cgroup *memcg; /* target memcg of uncharge */
-		unsigned long nr_pages;	/* uncharged usage */
-		unsigned long memsw_nr_pages; /* uncharged mem+swap usage */
-	} memcg_batch;
 	unsigned int memcg_kmem_skip_account;
 	struct memcg_oom_info {
 		struct mem_cgroup *memcg;
diff --git a/kernel/fork.c b/kernel/fork.c
index fbd3497b221f..f6f5086c9e7d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1346,10 +1346,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 #ifdef CONFIG_DEBUG_MUTEXES
 	p->blocked_on = NULL; /* not blocked yet */
 #endif
-#ifdef CONFIG_MEMCG
-	p->memcg_batch.do_batch = 0;
-	p->memcg_batch.memcg = NULL;
-#endif
 #ifdef CONFIG_BCACHE
 	p->sequential_io	= 0;
 	p->sequential_io_avg	= 0;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9106f1b12f56..a6e2be0241af 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3581,53 +3581,6 @@ out:
 	return ret;
 }
 
-/*
- * Batch_start/batch_end is called in unmap_page_range/invlidate/trucate.
- * In that cases, pages are freed continuously and we can expect pages
- * are in the same memcg. All these calls itself limits the number of
- * pages freed at once, then uncharge_start/end() is called properly.
- * This may be called prural(2) times in a context,
- */
-
-void mem_cgroup_uncharge_start(void)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	current->memcg_batch.do_batch++;
-	/* We can do nest. */
-	if (current->memcg_batch.do_batch == 1) {
-		current->memcg_batch.memcg = NULL;
-		current->memcg_batch.nr_pages = 0;
-		current->memcg_batch.memsw_nr_pages = 0;
-	}
-	local_irq_restore(flags);
-}
-
-void mem_cgroup_uncharge_end(void)
-{
-	struct memcg_batch_info *batch = &current->memcg_batch;
-	unsigned long flags;
-
-	local_irq_save(flags);
-	VM_BUG_ON(!batch->do_batch);
-	if (--batch->do_batch) /* If stacked, do nothing */
-		goto out;
-	/*
-	 * This "batch->memcg" is valid without any css_get/put etc...
-	 * bacause we hide charges behind us.
-	 */
-	if (batch->nr_pages)
-		res_counter_uncharge(&batch->memcg->res,
-				     batch->nr_pages * PAGE_SIZE);
-	if (batch->memsw_nr_pages)
-		res_counter_uncharge(&batch->memcg->memsw,
-				     batch->memsw_nr_pages * PAGE_SIZE);
-	memcg_oom_recover(batch->memcg);
-out:
-	local_irq_restore(flags);
-}
-
 #ifdef CONFIG_MEMCG_SWAP
 static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg,
 					 bool charge)
@@ -6554,6 +6507,98 @@ void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg)
 	cancel_charge(memcg, nr_pages);
 }
 
+static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
+			   unsigned long nr_mem, unsigned long nr_memsw,
+			   unsigned long nr_anon, unsigned long nr_file,
+			   unsigned long nr_huge, struct page *dummy_page)
+{
+	unsigned long flags;
+
+	if (nr_mem)
+		res_counter_uncharge(&memcg->res, nr_mem * PAGE_SIZE);
+	if (nr_memsw)
+		res_counter_uncharge(&memcg->memsw, nr_memsw * PAGE_SIZE);
+
+	memcg_oom_recover(memcg);
+
+	local_irq_save(flags);
+	__this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS], nr_anon);
+	__this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_CACHE], nr_file);
+	__this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], nr_huge);
+	__this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT], pgpgout);
+	__this_cpu_add(memcg->stat->nr_page_events, nr_anon + nr_file);
+	memcg_check_events(memcg, dummy_page);
+	local_irq_restore(flags);
+}
+
+static void uncharge_list(struct list_head *page_list)
+{
+	struct mem_cgroup *memcg = NULL;
+	unsigned long nr_memsw = 0;
+	unsigned long nr_anon = 0;
+	unsigned long nr_file = 0;
+	unsigned long nr_huge = 0;
+	unsigned long pgpgout = 0;
+	unsigned long nr_mem = 0;
+	struct list_head *next;
+	struct page *page;
+
+	next = page_list->next;
+	do {
+		unsigned int nr_pages = 1;
+		struct page_cgroup *pc;
+
+		page = list_entry(next, struct page, lru);
+		next = page->lru.next;
+
+		VM_BUG_ON_PAGE(PageLRU(page), page);
+		VM_BUG_ON_PAGE(page_count(page), page);
+
+		pc = lookup_page_cgroup(page);
+		if (!PageCgroupUsed(pc))
+			continue;
+
+		/*
+		 * Nobody should be changing or seriously looking at
+		 * pc->mem_cgroup and pc->flags at this point, we have
+		 * fully exclusive access to the page.
+		 */
+
+		if (memcg != pc->mem_cgroup) {
+			if (memcg) {
+				uncharge_batch(memcg, pgpgout, nr_mem, nr_memsw,
+					       nr_anon, nr_file, nr_huge, page);
+				pgpgout = nr_mem = nr_memsw = 0;
+				nr_anon = nr_file = nr_huge = 0;
+			}
+			memcg = pc->mem_cgroup;
+		}
+
+		if (PageTransHuge(page)) {
+			nr_pages <<= compound_order(page);
+			VM_BUG_ON_PAGE(!PageTransHuge(page), page);
+			nr_huge += nr_pages;
+		}
+
+		if (PageAnon(page))
+			nr_anon += nr_pages;
+		else
+			nr_file += nr_pages;
+
+		if (pc->flags & PCG_MEM)
+			nr_mem += nr_pages;
+		if (pc->flags & PCG_MEMSW)
+			nr_memsw += nr_pages;
+		pc->flags = 0;
+
+		pgpgout++;
+	} while (next != page_list);
+
+	if (memcg)
+		uncharge_batch(memcg, pgpgout, nr_mem, nr_memsw,
+			       nr_anon, nr_file, nr_huge, page);
+}
+
 /**
  * mem_cgroup_uncharge - uncharge a page
  * @page: page to uncharge
@@ -6563,67 +6608,34 @@ void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg)
  */
 void mem_cgroup_uncharge(struct page *page)
 {
-	struct memcg_batch_info *batch;
-	unsigned int nr_pages = 1;
-	struct mem_cgroup *memcg;
 	struct page_cgroup *pc;
-	unsigned long pc_flags;
-	unsigned long flags;
-
-	VM_BUG_ON_PAGE(PageLRU(page), page);
-	VM_BUG_ON_PAGE(page_count(page), page);
 
 	if (mem_cgroup_disabled())
 		return;
 
+	/* Don't touch page->lru of any random page, pre-check: */
 	pc = lookup_page_cgroup(page);
-
-	/* Every final put_page() ends up here */
 	if (!PageCgroupUsed(pc))
 		return;
 
-	if (PageTransHuge(page)) {
-		nr_pages <<= compound_order(page);
-		VM_BUG_ON_PAGE(!PageTransHuge(page), page);
-	}
-	/*
-	 * Nobody should be changing or seriously looking at
-	 * pc->mem_cgroup and pc->flags at this point, we have fully
-	 * exclusive access to the page.
-	 */
-	memcg = pc->mem_cgroup;
-	pc_flags = pc->flags;
-	pc->flags = 0;
-
-	local_irq_save(flags);
+	INIT_LIST_HEAD(&page->lru);
+	uncharge_list(&page->lru);
+}
 
-	if (nr_pages > 1)
-		goto direct;
-	if (unlikely(test_thread_flag(TIF_MEMDIE)))
-		goto direct;
-	batch = &current->memcg_batch;
-	if (!batch->do_batch)
-		goto direct;
-	if (batch->memcg && batch->memcg != memcg)
-		goto direct;
-	if (!batch->memcg)
-		batch->memcg = memcg;
-	if (pc_flags & PCG_MEM)
-		batch->nr_pages++;
-	if (pc_flags & PCG_MEMSW)
-		batch->memsw_nr_pages++;
-	goto out;
-direct:
-	if (pc_flags & PCG_MEM)
-		res_counter_uncharge(&memcg->res, nr_pages * PAGE_SIZE);
-	if (pc_flags & PCG_MEMSW)
-		res_counter_uncharge(&memcg->memsw, nr_pages * PAGE_SIZE);
-	memcg_oom_recover(memcg);
-out:
-	mem_cgroup_charge_statistics(memcg, page, -nr_pages);
-	memcg_check_events(memcg, page);
+/**
+ * mem_cgroup_uncharge_list - uncharge a list of page
+ * @page_list: list of pages to uncharge
+ *
+ * Uncharge a list of pages previously charged with
+ * mem_cgroup_try_charge() and mem_cgroup_commit_charge().
+ */
+void mem_cgroup_uncharge_list(struct list_head *page_list)
+{
+	if (mem_cgroup_disabled())
+		return;
 
-	local_irq_restore(flags);
+	if (!list_empty(page_list))
+		uncharge_list(page_list);
 }
 
 /**
diff --git a/mm/swap.c b/mm/swap.c
index 00523fffa5ed..6b2dc3897cd5 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -908,8 +908,6 @@ void release_pages(struct page **pages, int nr, bool cold)
 	struct lruvec *lruvec;
 	unsigned long uninitialized_var(flags);
 
-	mem_cgroup_uncharge_start();
-
 	for (i = 0; i < nr; i++) {
 		struct page *page = pages[i];
 
@@ -941,7 +939,6 @@ void release_pages(struct page **pages, int nr, bool cold)
 			__ClearPageLRU(page);
 			del_page_from_lru_list(page, lruvec, page_off_lru(page));
 		}
-		mem_cgroup_uncharge(page);
 
 		/* Clear Active bit in case of parallel mark_page_accessed */
 		__ClearPageActive(page);
@@ -951,8 +948,7 @@ void release_pages(struct page **pages, int nr, bool cold)
 	if (zone)
 		spin_unlock_irqrestore(&zone->lru_lock, flags);
 
-	mem_cgroup_uncharge_end();
-
+	mem_cgroup_uncharge_list(&pages_to_free);
 	free_hot_cold_page_list(&pages_to_free, cold);
 }
 EXPORT_SYMBOL(release_pages);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 7068e838d22b..2836b5373b2e 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -822,7 +822,6 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 
 	cond_resched();
 
-	mem_cgroup_uncharge_start();
 	while (!list_empty(page_list)) {
 		struct address_space *mapping;
 		struct page *page;
@@ -1103,7 +1102,6 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 		 */
 		__clear_page_locked(page);
 free_it:
-		mem_cgroup_uncharge(page);
 		nr_reclaimed++;
 
 		/*
@@ -1133,8 +1131,8 @@ keep:
 		list_add(&page->lru, &ret_pages);
 		VM_BUG_ON_PAGE(PageLRU(page) || PageUnevictable(page), page);
 	}
-	mem_cgroup_uncharge_end();
 
+	mem_cgroup_uncharge_list(&free_pages);
 	free_hot_cold_page_list(&free_pages, true);
 
 	list_splice(&ret_pages, page_list);
@@ -1437,10 +1435,9 @@ putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list)
 			__ClearPageActive(page);
 			del_page_from_lru_list(page, lruvec, lru);
 
-			mem_cgroup_uncharge(page);
-
 			if (unlikely(PageCompound(page))) {
 				spin_unlock_irq(&zone->lru_lock);
+				mem_cgroup_uncharge(page);
 				(*get_compound_page_dtor(page))(page);
 				spin_lock_irq(&zone->lru_lock);
 			} else
@@ -1548,6 +1545,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
 
 	spin_unlock_irq(&zone->lru_lock);
 
+	mem_cgroup_uncharge_list(&page_list);
 	free_hot_cold_page_list(&page_list, true);
 
 	/*
@@ -1660,10 +1658,9 @@ static void move_active_pages_to_lru(struct lruvec *lruvec,
 			__ClearPageActive(page);
 			del_page_from_lru_list(page, lruvec, lru);
 
-			mem_cgroup_uncharge(page);
-
 			if (unlikely(PageCompound(page))) {
 				spin_unlock_irq(&zone->lru_lock);
+				mem_cgroup_uncharge(page);
 				(*get_compound_page_dtor(page))(page);
 				spin_lock_irq(&zone->lru_lock);
 			} else
@@ -1771,6 +1768,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
 	__mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken);
 	spin_unlock_irq(&zone->lru_lock);
 
+	mem_cgroup_uncharge_list(&l_hold);
 	free_hot_cold_page_list(&l_hold, true);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 434584fe68155f884b19f32b3befec8972c5d563 Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov@parallels.com>
Date: Fri, 8 Aug 2014 14:19:26 -0700
Subject: page-cgroup: trivial cleanup

Add forward declarations for struct pglist_data, mem_cgroup.

Remove __init, __meminit from function prototypes and inline functions.

Remove redundant inclusion of bit_spinlock.h.

Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page_cgroup.h | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 9bfb8e68a595..b8f8c9e36a3e 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -12,8 +12,10 @@ enum {
 #ifndef __GENERATING_BOUNDS_H
 #include <generated/bounds.h>
 
+struct pglist_data;
+
 #ifdef CONFIG_MEMCG
-#include <linux/bit_spinlock.h>
+struct mem_cgroup;
 
 /*
  * Page Cgroup can be considered as an extended mem_map.
@@ -27,16 +29,16 @@ struct page_cgroup {
 	struct mem_cgroup *mem_cgroup;
 };
 
-void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat);
+extern void pgdat_page_cgroup_init(struct pglist_data *pgdat);
 
 #ifdef CONFIG_SPARSEMEM
-static inline void __init page_cgroup_init_flatmem(void)
+static inline void page_cgroup_init_flatmem(void)
 {
 }
-extern void __init page_cgroup_init(void);
+extern void page_cgroup_init(void);
 #else
-void __init page_cgroup_init_flatmem(void);
-static inline void __init page_cgroup_init(void)
+extern void page_cgroup_init_flatmem(void);
+static inline void page_cgroup_init(void)
 {
 }
 #endif
@@ -48,11 +50,10 @@ static inline int PageCgroupUsed(struct page_cgroup *pc)
 {
 	return !!(pc->flags & PCG_USED);
 }
-
-#else /* CONFIG_MEMCG */
+#else /* !CONFIG_MEMCG */
 struct page_cgroup;
 
-static inline void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat)
+static inline void pgdat_page_cgroup_init(struct pglist_data *pgdat)
 {
 }
 
@@ -65,10 +66,9 @@ static inline void page_cgroup_init(void)
 {
 }
 
-static inline void __init page_cgroup_init_flatmem(void)
+static inline void page_cgroup_init_flatmem(void)
 {
 }
-
 #endif /* CONFIG_MEMCG */
 
 #include <linux/swap.h>
-- 
cgit v1.2.3-59-g8ed1b


From 9a3f4d85d58cb4e02e226f9be946d54c33eb715b Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov@parallels.com>
Date: Fri, 8 Aug 2014 14:19:28 -0700
Subject: page-cgroup: get rid of NR_PCG_FLAGS

It's not used anywhere today, so let's remove it.

Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page_cgroup.h | 6 ------
 kernel/bounds.c             | 2 --
 2 files changed, 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index b8f8c9e36a3e..9d9f540658f5 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -6,12 +6,8 @@ enum {
 	PCG_USED = 0x01,	/* This page is charged to a memcg */
 	PCG_MEM = 0x02,		/* This page holds a memory charge */
 	PCG_MEMSW = 0x04,	/* This page holds a memory+swap charge */
-	__NR_PCG_FLAGS,
 };
 
-#ifndef __GENERATING_BOUNDS_H
-#include <generated/bounds.h>
-
 struct pglist_data;
 
 #ifdef CONFIG_MEMCG
@@ -107,6 +103,4 @@ static inline void swap_cgroup_swapoff(int type)
 
 #endif /* CONFIG_MEMCG_SWAP */
 
-#endif /* !__GENERATING_BOUNDS_H */
-
 #endif /* __LINUX_PAGE_CGROUP_H */
diff --git a/kernel/bounds.c b/kernel/bounds.c
index 9fd4246b04b8..e1d1d1952bfa 100644
--- a/kernel/bounds.c
+++ b/kernel/bounds.c
@@ -9,7 +9,6 @@
 #include <linux/page-flags.h>
 #include <linux/mmzone.h>
 #include <linux/kbuild.h>
-#include <linux/page_cgroup.h>
 #include <linux/log2.h>
 #include <linux/spinlock_types.h>
 
@@ -18,7 +17,6 @@ void foo(void)
 	/* The enum constants to put into include/generated/bounds.h */
 	DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS);
 	DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES);
-	DEFINE(NR_PCG_FLAGS, __NR_PCG_FLAGS);
 #ifdef CONFIG_SMP
 	DEFINE(NR_CPUS_BITS, ilog2(CONFIG_NR_CPUS));
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 3cbb01871e22709fdd39478eca831de317df332f Mon Sep 17 00:00:00 2001
From: Greg Thelen <gthelen@google.com>
Date: Fri, 8 Aug 2014 14:19:31 -0700
Subject: memcg: remove lookup_cgroup_page() prototype

Commit 6b208e3f6e35 ("mm: memcg: remove unused node/section info from
pc->flags") deleted lookup_cgroup_page() but left a prototype for it.

Kill the vestigial prototype.

Signed-off-by: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page_cgroup.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 9d9f540658f5..5c831f1eca79 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -40,7 +40,6 @@ static inline void page_cgroup_init(void)
 #endif
 
 struct page_cgroup *lookup_page_cgroup(struct page *page);
-struct page *lookup_cgroup_page(struct page_cgroup *pc);
 
 static inline int PageCgroupUsed(struct page_cgroup *pc)
 {
-- 
cgit v1.2.3-59-g8ed1b


From ccf94f1b4a8560ffdc221840535bae5e5a91a53c Mon Sep 17 00:00:00 2001
From: Fabian Frederick <fabf@skynet.be>
Date: Fri, 8 Aug 2014 14:21:22 -0700
Subject: proc: constify seq_operations

proc_uid_seq_operations, proc_gid_seq_operations and
proc_projid_seq_operations are only called in proc_id_map_open with
seq_open as const struct seq_operations so we can constify the 3
structures and update proc_id_map_open prototype.

   text    data     bss     dec     hex filename
   6817     404    1984    9205    23f5 kernel/user_namespace.o-before
   6913     308    1984    9205    23f5 kernel/user_namespace.o-after

Signed-off-by: Fabian Frederick <fabf@skynet.be>
Cc: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/base.c                 | 2 +-
 include/linux/user_namespace.h | 6 +++---
 kernel/user_namespace.c        | 6 +++---
 3 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 2d696b0c93bf..79df9ff71afd 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2449,7 +2449,7 @@ static int proc_tgid_io_accounting(struct task_struct *task, char *buffer)
 
 #ifdef CONFIG_USER_NS
 static int proc_id_map_open(struct inode *inode, struct file *file,
-	struct seq_operations *seq_ops)
+	const struct seq_operations *seq_ops)
 {
 	struct user_namespace *ns = NULL;
 	struct task_struct *task;
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 4836ba3c1cd8..e95372654f09 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -57,9 +57,9 @@ static inline void put_user_ns(struct user_namespace *ns)
 }
 
 struct seq_operations;
-extern struct seq_operations proc_uid_seq_operations;
-extern struct seq_operations proc_gid_seq_operations;
-extern struct seq_operations proc_projid_seq_operations;
+extern const struct seq_operations proc_uid_seq_operations;
+extern const struct seq_operations proc_gid_seq_operations;
+extern const struct seq_operations proc_projid_seq_operations;
 extern ssize_t proc_uid_map_write(struct file *, const char __user *, size_t, loff_t *);
 extern ssize_t proc_gid_map_write(struct file *, const char __user *, size_t, loff_t *);
 extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, loff_t *);
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index fcc02560fd6b..aa312b0dc3ec 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -526,21 +526,21 @@ static void m_stop(struct seq_file *seq, void *v)
 	return;
 }
 
-struct seq_operations proc_uid_seq_operations = {
+const struct seq_operations proc_uid_seq_operations = {
 	.start = uid_m_start,
 	.stop = m_stop,
 	.next = m_next,
 	.show = uid_m_show,
 };
 
-struct seq_operations proc_gid_seq_operations = {
+const struct seq_operations proc_gid_seq_operations = {
 	.start = gid_m_start,
 	.stop = m_stop,
 	.next = m_next,
 	.show = gid_m_show,
 };
 
-struct seq_operations proc_projid_seq_operations = {
+const struct seq_operations proc_projid_seq_operations = {
 	.start = projid_m_start,
 	.stop = m_stop,
 	.next = m_next,
-- 
cgit v1.2.3-59-g8ed1b


From 41f727fde1fe40efeb4fef6fdce74ff794be5aeb Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov@parallels.com>
Date: Fri, 8 Aug 2014 14:21:56 -0700
Subject: fork/exec: cleanup mm initialization

mm initialization on fork/exec is spread all over the place, which makes
the code look inconsistent.

We have mm_init(), which is supposed to init/nullify mm's internals, but
it doesn't init all the fields it should:

 - on fork ->mmap,mm_rb,vmacache_seqnum,map_count,mm_cpumask,locked_vm
   are zeroed in dup_mmap();

 - on fork ->pmd_huge_pte is zeroed in dup_mm(), immediately before
   calling mm_init();

 - ->cpu_vm_mask_var ptr is initialized by mm_init_cpumask(), which is
   called before mm_init() on both fork and exec;

 - ->context is initialized by init_new_context(), which is called after
   mm_init() on both fork and exec;

Let's consolidate all the initializations in mm_init() to make the code
look cleaner.

Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Christoph Lameter <cl@linux.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c                |  4 ----
 include/linux/mm_types.h |  1 +
 kernel/fork.c            | 47 ++++++++++++++++++++---------------------------
 3 files changed, 21 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index ab1f1200ce5d..a2b42a98c743 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -368,10 +368,6 @@ static int bprm_mm_init(struct linux_binprm *bprm)
 	if (!mm)
 		goto err;
 
-	err = init_new_context(current, mm);
-	if (err)
-		goto err;
-
 	err = __bprm_mm_init(bprm);
 	if (err)
 		goto err;
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 796deac19fcf..6e0b286649f1 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -461,6 +461,7 @@ static inline void mm_init_cpumask(struct mm_struct *mm)
 #ifdef CONFIG_CPUMASK_OFFSTACK
 	mm->cpu_vm_mask_var = &mm->cpumask_allocation;
 #endif
+	cpumask_clear(mm->cpu_vm_mask_var);
 }
 
 /* Future-safe accessor for struct mm_struct's cpu_vm_mask. */
diff --git a/kernel/fork.c b/kernel/fork.c
index f6f5086c9e7d..418b52a9ec6a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -374,12 +374,6 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 	 */
 	down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING);
 
-	mm->locked_vm = 0;
-	mm->mmap = NULL;
-	mm->vmacache_seqnum = 0;
-	mm->map_count = 0;
-	cpumask_clear(mm_cpumask(mm));
-	mm->mm_rb = RB_ROOT;
 	rb_link = &mm->mm_rb.rb_node;
 	rb_parent = NULL;
 	pprev = &mm->mmap;
@@ -538,17 +532,27 @@ static void mm_init_aio(struct mm_struct *mm)
 
 static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
 {
+	mm->mmap = NULL;
+	mm->mm_rb = RB_ROOT;
+	mm->vmacache_seqnum = 0;
 	atomic_set(&mm->mm_users, 1);
 	atomic_set(&mm->mm_count, 1);
 	init_rwsem(&mm->mmap_sem);
 	INIT_LIST_HEAD(&mm->mmlist);
 	mm->core_state = NULL;
 	atomic_long_set(&mm->nr_ptes, 0);
+	mm->map_count = 0;
+	mm->locked_vm = 0;
 	memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
 	spin_lock_init(&mm->page_table_lock);
+	mm_init_cpumask(mm);
 	mm_init_aio(mm);
 	mm_init_owner(mm, p);
+	mmu_notifier_mm_init(mm);
 	clear_tlb_flush_pending(mm);
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
+	mm->pmd_huge_pte = NULL;
+#endif
 
 	if (current->mm) {
 		mm->flags = current->mm->flags & MMF_INIT_MASK;
@@ -558,11 +562,17 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
 		mm->def_flags = 0;
 	}
 
-	if (likely(!mm_alloc_pgd(mm))) {
-		mmu_notifier_mm_init(mm);
-		return mm;
-	}
+	if (mm_alloc_pgd(mm))
+		goto fail_nopgd;
+
+	if (init_new_context(p, mm))
+		goto fail_nocontext;
 
+	return mm;
+
+fail_nocontext:
+	mm_free_pgd(mm);
+fail_nopgd:
 	free_mm(mm);
 	return NULL;
 }
@@ -596,7 +606,6 @@ struct mm_struct *mm_alloc(void)
 		return NULL;
 
 	memset(mm, 0, sizeof(*mm));
-	mm_init_cpumask(mm);
 	return mm_init(mm, current);
 }
 
@@ -828,17 +837,10 @@ static struct mm_struct *dup_mm(struct task_struct *tsk)
 		goto fail_nomem;
 
 	memcpy(mm, oldmm, sizeof(*mm));
-	mm_init_cpumask(mm);
 
-#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
-	mm->pmd_huge_pte = NULL;
-#endif
 	if (!mm_init(mm, tsk))
 		goto fail_nomem;
 
-	if (init_new_context(tsk, mm))
-		goto fail_nocontext;
-
 	dup_mm_exe_file(oldmm, mm);
 
 	err = dup_mmap(mm, oldmm);
@@ -860,15 +862,6 @@ free_pt:
 
 fail_nomem:
 	return NULL;
-
-fail_nocontext:
-	/*
-	 * If init_new_context() failed, we cannot use mmput() to free the mm
-	 * because it calls destroy_context()
-	 */
-	mm_free_pgd(mm);
-	free_mm(mm);
-	return NULL;
 }
 
 static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
-- 
cgit v1.2.3-59-g8ed1b


From 33144e8429bd7fceacbb869a7f5061db42e13fe6 Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov@parallels.com>
Date: Fri, 8 Aug 2014 14:22:03 -0700
Subject: kernel/fork.c: make mm_init_owner static

It's only used in fork.c:mm_init().

Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h |  5 -----
 kernel/fork.c         | 14 +++++++-------
 2 files changed, 7 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4fcf82a4d243..b21e9218c0fd 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2961,15 +2961,10 @@ static inline void inc_syscw(struct task_struct *tsk)
 
 #ifdef CONFIG_MEMCG
 extern void mm_update_next_owner(struct mm_struct *mm);
-extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p);
 #else
 static inline void mm_update_next_owner(struct mm_struct *mm)
 {
 }
-
-static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
-{
-}
 #endif /* CONFIG_MEMCG */
 
 static inline unsigned long task_rlimit(const struct task_struct *tsk,
diff --git a/kernel/fork.c b/kernel/fork.c
index aff84f84b0d3..86da59e165ad 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -535,6 +535,13 @@ static void mm_init_aio(struct mm_struct *mm)
 #endif
 }
 
+static void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
+{
+#ifdef CONFIG_MEMCG
+	mm->owner = p;
+#endif
+}
+
 static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
 {
 	mm->mmap = NULL;
@@ -1139,13 +1146,6 @@ static void rt_mutex_init_task(struct task_struct *p)
 #endif
 }
 
-#ifdef CONFIG_MEMCG
-void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
-{
-	mm->owner = p;
-}
-#endif /* CONFIG_MEMCG */
-
 /*
  * Initialize POSIX timer handling for a single task.
  */
-- 
cgit v1.2.3-59-g8ed1b


From 4aff1ce7add1c432fe5ea3ae0231155f33e5ef38 Mon Sep 17 00:00:00 2001
From: Alexandre Bounine <alexandre.bounine@idt.com>
Date: Fri, 8 Aug 2014 14:22:09 -0700
Subject: rapidio: add new RapidIO DMA interface routines

Add RapidIO DMA interface routines that directly use reference to the mport
device object and/or target device destination ID as parameters.
This allows to perform RapidIO DMA transfer requests by modules that do not
have an access to the RapidIO device list.

Signed-off-by: Alexandre Bounine <alexandre.bounine@idt.com>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Andre van Herk <andre.van.herk@prodrive-technologies.com>
Cc: Stef van Os <stef.van.os@prodrive-technologies.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rapidio/rio.c   | 66 +++++++++++++++++++++++++++++++++++--------------
 include/linux/rio_drv.h |  5 ++++
 2 files changed, 52 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c
index a54ba0494dd3..d7b87c64b7cd 100644
--- a/drivers/rapidio/rio.c
+++ b/drivers/rapidio/rio.c
@@ -1509,30 +1509,39 @@ EXPORT_SYMBOL_GPL(rio_route_clr_table);
 
 static bool rio_chan_filter(struct dma_chan *chan, void *arg)
 {
-	struct rio_dev *rdev = arg;
+	struct rio_mport *mport = arg;
 
 	/* Check that DMA device belongs to the right MPORT */
-	return (rdev->net->hport ==
-		container_of(chan->device, struct rio_mport, dma));
+	return mport == container_of(chan->device, struct rio_mport, dma);
 }
 
 /**
- * rio_request_dma - request RapidIO capable DMA channel that supports
- *   specified target RapidIO device.
- * @rdev: RIO device control structure
+ * rio_request_mport_dma - request RapidIO capable DMA channel associated
+ *   with specified local RapidIO mport device.
+ * @mport: RIO mport to perform DMA data transfers
  *
  * Returns pointer to allocated DMA channel or NULL if failed.
  */
-struct dma_chan *rio_request_dma(struct rio_dev *rdev)
+struct dma_chan *rio_request_mport_dma(struct rio_mport *mport)
 {
 	dma_cap_mask_t mask;
-	struct dma_chan *dchan;
 
 	dma_cap_zero(mask);
 	dma_cap_set(DMA_SLAVE, mask);
-	dchan = dma_request_channel(mask, rio_chan_filter, rdev);
+	return dma_request_channel(mask, rio_chan_filter, mport);
+}
+EXPORT_SYMBOL_GPL(rio_request_mport_dma);
 
-	return dchan;
+/**
+ * rio_request_dma - request RapidIO capable DMA channel that supports
+ *   specified target RapidIO device.
+ * @rdev: RIO device associated with DMA transfer
+ *
+ * Returns pointer to allocated DMA channel or NULL if failed.
+ */
+struct dma_chan *rio_request_dma(struct rio_dev *rdev)
+{
+	return rio_request_mport_dma(rdev->net->hport);
 }
 EXPORT_SYMBOL_GPL(rio_request_dma);
 
@@ -1547,10 +1556,10 @@ void rio_release_dma(struct dma_chan *dchan)
 EXPORT_SYMBOL_GPL(rio_release_dma);
 
 /**
- * rio_dma_prep_slave_sg - RapidIO specific wrapper
+ * rio_dma_prep_xfer - RapidIO specific wrapper
  *   for device_prep_slave_sg callback defined by DMAENGINE.
- * @rdev: RIO device control structure
  * @dchan: DMA channel to configure
+ * @destid: target RapidIO device destination ID
  * @data: RIO specific data descriptor
  * @direction: DMA data transfer direction (TO or FROM the device)
  * @flags: dmaengine defined flags
@@ -1560,11 +1569,10 @@ EXPORT_SYMBOL_GPL(rio_release_dma);
  * target RIO device.
  * Returns pointer to DMA transaction descriptor or NULL if failed.
  */
-struct dma_async_tx_descriptor *rio_dma_prep_slave_sg(struct rio_dev *rdev,
-	struct dma_chan *dchan, struct rio_dma_data *data,
+struct dma_async_tx_descriptor *rio_dma_prep_xfer(struct dma_chan *dchan,
+	u16 destid, struct rio_dma_data *data,
 	enum dma_transfer_direction direction, unsigned long flags)
 {
-	struct dma_async_tx_descriptor *txd = NULL;
 	struct rio_dma_ext rio_ext;
 
 	if (dchan->device->device_prep_slave_sg == NULL) {
@@ -1572,15 +1580,35 @@ struct dma_async_tx_descriptor *rio_dma_prep_slave_sg(struct rio_dev *rdev,
 		return NULL;
 	}
 
-	rio_ext.destid = rdev->destid;
+	rio_ext.destid = destid;
 	rio_ext.rio_addr_u = data->rio_addr_u;
 	rio_ext.rio_addr = data->rio_addr;
 	rio_ext.wr_type = data->wr_type;
 
-	txd = dmaengine_prep_rio_sg(dchan, data->sg, data->sg_len,
-					direction, flags, &rio_ext);
+	return dmaengine_prep_rio_sg(dchan, data->sg, data->sg_len,
+				     direction, flags, &rio_ext);
+}
+EXPORT_SYMBOL_GPL(rio_dma_prep_xfer);
 
-	return txd;
+/**
+ * rio_dma_prep_slave_sg - RapidIO specific wrapper
+ *   for device_prep_slave_sg callback defined by DMAENGINE.
+ * @rdev: RIO device control structure
+ * @dchan: DMA channel to configure
+ * @data: RIO specific data descriptor
+ * @direction: DMA data transfer direction (TO or FROM the device)
+ * @flags: dmaengine defined flags
+ *
+ * Initializes RapidIO capable DMA channel for the specified data transfer.
+ * Uses DMA channel private extension to pass information related to remote
+ * target RIO device.
+ * Returns pointer to DMA transaction descriptor or NULL if failed.
+ */
+struct dma_async_tx_descriptor *rio_dma_prep_slave_sg(struct rio_dev *rdev,
+	struct dma_chan *dchan, struct rio_dma_data *data,
+	enum dma_transfer_direction direction, unsigned long flags)
+{
+	return rio_dma_prep_xfer(dchan,	rdev->destid, data, direction, flags);
 }
 EXPORT_SYMBOL_GPL(rio_dma_prep_slave_sg);
 
diff --git a/include/linux/rio_drv.h b/include/linux/rio_drv.h
index 5059994fe297..9fc2f213e74f 100644
--- a/include/linux/rio_drv.h
+++ b/include/linux/rio_drv.h
@@ -384,11 +384,16 @@ void rio_dev_put(struct rio_dev *);
 
 #ifdef CONFIG_RAPIDIO_DMA_ENGINE
 extern struct dma_chan *rio_request_dma(struct rio_dev *rdev);
+extern struct dma_chan *rio_request_mport_dma(struct rio_mport *mport);
 extern void rio_release_dma(struct dma_chan *dchan);
 extern struct dma_async_tx_descriptor *rio_dma_prep_slave_sg(
 		struct rio_dev *rdev, struct dma_chan *dchan,
 		struct rio_dma_data *data,
 		enum dma_transfer_direction direction, unsigned long flags);
+extern struct dma_async_tx_descriptor *rio_dma_prep_xfer(
+		struct dma_chan *dchan,	u16 destid,
+		struct rio_dma_data *data,
+		enum dma_transfer_direction direction, unsigned long flags);
 #endif
 
 /**
-- 
cgit v1.2.3-59-g8ed1b


From e5eea0981a3840f3f39f43d2d00461c4c24018e7 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Fri, 8 Aug 2014 14:22:16 -0700
Subject: sysctl: remove typedef ctl_table

Remove the final user, and the typedef itself.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/proc_sysctl.c  | 2 +-
 include/linux/sysctl.h | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 71290463a1d3..f92d5dd578a4 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -632,7 +632,7 @@ out:
 	return ret;
 }
 
-static int scan(struct ctl_table_header *head, ctl_table *table,
+static int scan(struct ctl_table_header *head, struct ctl_table *table,
 		unsigned long *pos, struct file *file,
 		struct dir_context *ctx)
 {
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 14a8ff2de11e..b7361f831226 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -34,8 +34,6 @@ struct ctl_table_root;
 struct ctl_table_header;
 struct ctl_dir;
 
-typedef struct ctl_table ctl_table;
-
 typedef int proc_handler (struct ctl_table *ctl, int write,
 			  void __user *buffer, size_t *lenp, loff_t *ppos);
 
-- 
cgit v1.2.3-59-g8ed1b


From 69361eef9056b0babb507798c2135ad1572f0ef7 Mon Sep 17 00:00:00 2001
From: Josh Hunt <johunt@akamai.com>
Date: Fri, 8 Aug 2014 14:22:31 -0700
Subject: panic: add TAINT_SOFTLOCKUP

This taint flag will be set if the system has ever entered a softlockup
state.  Similar to TAINT_WARN it is useful to know whether or not the
system has been in a softlockup state when debugging.

[akpm@linux-foundation.org: apply the taint before calling panic()]
Signed-off-by: Josh Hunt <johunt@akamai.com>
Cc: Jason Baron <jbaron@akamai.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/oops-tracing.txt  | 2 ++
 Documentation/sysctl/kernel.txt | 1 +
 include/linux/kernel.h          | 1 +
 kernel/panic.c                  | 1 +
 kernel/watchdog.c               | 1 +
 5 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/oops-tracing.txt b/Documentation/oops-tracing.txt
index e3155995ddd8..beefb9f82902 100644
--- a/Documentation/oops-tracing.txt
+++ b/Documentation/oops-tracing.txt
@@ -268,6 +268,8 @@ characters, each representing a particular tainted value.
  14: 'E' if an unsigned module has been loaded in a kernel supporting
      module signature.
 
+ 15: 'L' if a soft lockup has previously occurred on the system.
+
 The primary reason for the 'Tainted: ' string is to tell kernel
 debuggers if this is a clean kernel or if anything unusual has
 occurred.  Tainting is permanent: even if an offending module is
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index c14374e71775..f79eb9666379 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -826,6 +826,7 @@ can be ORed together:
 4096 - An out-of-tree module has been loaded.
 8192 - An unsigned module has been loaded in a kernel supporting module
        signature.
+16384 - A soft lockup has previously occurred on the system.
 
 ==============================================================
 
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 3dc22abbc68a..31ae66f34235 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -470,6 +470,7 @@ extern enum system_states {
 #define TAINT_FIRMWARE_WORKAROUND	11
 #define TAINT_OOT_MODULE		12
 #define TAINT_UNSIGNED_MODULE		13
+#define TAINT_SOFTLOCKUP		14
 
 extern const char hex_asc[];
 #define hex_asc_lo(x)	hex_asc[((x) & 0x0f)]
diff --git a/kernel/panic.c b/kernel/panic.c
index 62e16cef9cc2..d09dc5c32c67 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -224,6 +224,7 @@ static const struct tnt tnts[] = {
 	{ TAINT_FIRMWARE_WORKAROUND,	'I', ' ' },
 	{ TAINT_OOT_MODULE,		'O', ' ' },
 	{ TAINT_UNSIGNED_MODULE,	'E', ' ' },
+	{ TAINT_SOFTLOCKUP,		'L', ' ' },
 };
 
 /**
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 51b29e9d2ba6..a8d6914030fe 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -368,6 +368,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 			smp_mb__after_atomic();
 		}
 
+		add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
 		if (softlockup_panic)
 			panic("softlockup: hung tasks");
 		__this_cpu_write(soft_watchdog_warn, true);
-- 
cgit v1.2.3-59-g8ed1b


From d97b07c54f34e88352ebe676beb798c8f59ac588 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Fri, 8 Aug 2014 14:23:14 -0700
Subject: initramfs: support initramfs that is bigger than 2GiB

Now with 64bit bzImage and kexec tools, we support ramdisk that size is
bigger than 2g, as we could put it above 4G.

Found compressed initramfs image could not be decompressed properly.  It
turns out that image length is int during decompress detection, and it
will become < 0 when length is more than 2G.  Furthermore, during
decompressing len as int is used for inbuf count, that has problem too.

Change len to long, that should be ok as on 32 bit platform long is
32bits.

Tested with following compressed initramfs image as root with kexec.
	gzip, bzip2, xz, lzma, lzop, lz4.
run time for populate_rootfs():
   size        name       Nehalem-EX  Westmere-EX  Ivybridge-EX
 9034400256 root_img     :   26s           24s          30s
 3561095057 root_img.lz4 :   28s           27s          27s
 3459554629 root_img.lzo :   29s           29s          28s
 3219399480 root_img.gz  :   64s           62s          49s
 2251594592 root_img.xz  :  262s          260s         183s
 2226366598 root_img.lzma:  386s          376s         277s
 2901482513 root_img.bz2 :  635s          599s

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Rashika Kheria <rashika.kheria@gmail.com>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Kyungsik Lee <kyungsik.lee@lge.com>
Cc: P J P <ppandit@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Cc: "Daniel M. Weeks" <dan@danweeks.net>
Cc: Alexandre Courbot <acourbot@nvidia.com>
Cc: Jan Beulich <JBeulich@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 crypto/zlib.c                      |  8 ++++----
 fs/isofs/compress.c                |  4 ++--
 fs/jffs2/compr_zlib.c              |  7 ++++---
 include/linux/decompress/bunzip2.h |  8 ++++----
 include/linux/decompress/generic.h | 10 +++++-----
 include/linux/decompress/inflate.h |  8 ++++----
 include/linux/decompress/unlz4.h   |  8 ++++----
 include/linux/decompress/unlzma.h  |  8 ++++----
 include/linux/decompress/unlzo.h   |  8 ++++----
 include/linux/decompress/unxz.h    |  8 ++++----
 include/linux/zlib.h               |  4 ++--
 init/do_mounts_rd.c                | 10 +++++-----
 init/initramfs.c                   | 22 +++++++++++-----------
 lib/decompress.c                   |  2 +-
 lib/decompress_bunzip2.c           | 26 +++++++++++++-------------
 lib/decompress_inflate.c           | 12 ++++++------
 lib/decompress_unlz4.c             | 18 +++++++++---------
 lib/decompress_unlzma.c            | 28 ++++++++++++++--------------
 lib/decompress_unlzo.c             | 12 ++++++------
 lib/decompress_unxz.c              | 10 +++++-----
 20 files changed, 111 insertions(+), 110 deletions(-)

(limited to 'include/linux')

diff --git a/crypto/zlib.c b/crypto/zlib.c
index 06b62e5cdcc7..c9ee681d57fd 100644
--- a/crypto/zlib.c
+++ b/crypto/zlib.c
@@ -168,7 +168,7 @@ static int zlib_compress_update(struct crypto_pcomp *tfm,
 	}
 
 	ret = req->avail_out - stream->avail_out;
-	pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n",
+	pr_debug("avail_in %lu, avail_out %lu (consumed %lu, produced %u)\n",
 		 stream->avail_in, stream->avail_out,
 		 req->avail_in - stream->avail_in, ret);
 	req->next_in = stream->next_in;
@@ -198,7 +198,7 @@ static int zlib_compress_final(struct crypto_pcomp *tfm,
 	}
 
 	ret = req->avail_out - stream->avail_out;
-	pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n",
+	pr_debug("avail_in %lu, avail_out %lu (consumed %lu, produced %u)\n",
 		 stream->avail_in, stream->avail_out,
 		 req->avail_in - stream->avail_in, ret);
 	req->next_in = stream->next_in;
@@ -283,7 +283,7 @@ static int zlib_decompress_update(struct crypto_pcomp *tfm,
 	}
 
 	ret = req->avail_out - stream->avail_out;
-	pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n",
+	pr_debug("avail_in %lu, avail_out %lu (consumed %lu, produced %u)\n",
 		 stream->avail_in, stream->avail_out,
 		 req->avail_in - stream->avail_in, ret);
 	req->next_in = stream->next_in;
@@ -331,7 +331,7 @@ static int zlib_decompress_final(struct crypto_pcomp *tfm,
 	}
 
 	ret = req->avail_out - stream->avail_out;
-	pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n",
+	pr_debug("avail_in %lu, avail_out %lu (consumed %lu, produced %u)\n",
 		 stream->avail_in, stream->avail_out,
 		 req->avail_in - stream->avail_in, ret);
 	req->next_in = stream->next_in;
diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c
index 592e5115a561..f311bf084015 100644
--- a/fs/isofs/compress.c
+++ b/fs/isofs/compress.c
@@ -158,8 +158,8 @@ static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start,
 					       "zisofs: zisofs_inflate returned"
 					       " %d, inode = %lu,"
 					       " page idx = %d, bh idx = %d,"
-					       " avail_in = %d,"
-					       " avail_out = %d\n",
+					       " avail_in = %ld,"
+					       " avail_out = %ld\n",
 					       zerr, inode->i_ino, curpage,
 					       curbh, stream.avail_in,
 					       stream.avail_out);
diff --git a/fs/jffs2/compr_zlib.c b/fs/jffs2/compr_zlib.c
index 0b9a1e44e833..5698dae5d92d 100644
--- a/fs/jffs2/compr_zlib.c
+++ b/fs/jffs2/compr_zlib.c
@@ -94,11 +94,12 @@ static int jffs2_zlib_compress(unsigned char *data_in,
 
 	while (def_strm.total_out < *dstlen - STREAM_END_SPACE && def_strm.total_in < *sourcelen) {
 		def_strm.avail_out = *dstlen - (def_strm.total_out + STREAM_END_SPACE);
-		def_strm.avail_in = min((unsigned)(*sourcelen-def_strm.total_in), def_strm.avail_out);
-		jffs2_dbg(1, "calling deflate with avail_in %d, avail_out %d\n",
+		def_strm.avail_in = min_t(unsigned long,
+			(*sourcelen-def_strm.total_in), def_strm.avail_out);
+		jffs2_dbg(1, "calling deflate with avail_in %ld, avail_out %ld\n",
 			  def_strm.avail_in, def_strm.avail_out);
 		ret = zlib_deflate(&def_strm, Z_PARTIAL_FLUSH);
-		jffs2_dbg(1, "deflate returned with avail_in %d, avail_out %d, total_in %ld, total_out %ld\n",
+		jffs2_dbg(1, "deflate returned with avail_in %ld, avail_out %ld, total_in %ld, total_out %ld\n",
 			  def_strm.avail_in, def_strm.avail_out,
 			  def_strm.total_in, def_strm.total_out);
 		if (ret != Z_OK) {
diff --git a/include/linux/decompress/bunzip2.h b/include/linux/decompress/bunzip2.h
index 115272137a9c..4d683df898e6 100644
--- a/include/linux/decompress/bunzip2.h
+++ b/include/linux/decompress/bunzip2.h
@@ -1,10 +1,10 @@
 #ifndef DECOMPRESS_BUNZIP2_H
 #define DECOMPRESS_BUNZIP2_H
 
-int bunzip2(unsigned char *inbuf, int len,
-	    int(*fill)(void*, unsigned int),
-	    int(*flush)(void*, unsigned int),
+int bunzip2(unsigned char *inbuf, long len,
+	    long (*fill)(void*, unsigned long),
+	    long (*flush)(void*, unsigned long),
 	    unsigned char *output,
-	    int *pos,
+	    long *pos,
 	    void(*error)(char *x));
 #endif
diff --git a/include/linux/decompress/generic.h b/include/linux/decompress/generic.h
index 0c7111a55a1a..1fcfd64b5076 100644
--- a/include/linux/decompress/generic.h
+++ b/include/linux/decompress/generic.h
@@ -1,11 +1,11 @@
 #ifndef DECOMPRESS_GENERIC_H
 #define DECOMPRESS_GENERIC_H
 
-typedef int (*decompress_fn) (unsigned char *inbuf, int len,
-			      int(*fill)(void*, unsigned int),
-			      int(*flush)(void*, unsigned int),
+typedef int (*decompress_fn) (unsigned char *inbuf, long len,
+			      long (*fill)(void*, unsigned long),
+			      long (*flush)(void*, unsigned long),
 			      unsigned char *outbuf,
-			      int *posp,
+			      long *posp,
 			      void(*error)(char *x));
 
 /* inbuf   - input buffer
@@ -33,7 +33,7 @@ typedef int (*decompress_fn) (unsigned char *inbuf, int len,
 
 
 /* Utility routine to detect the decompression method */
-decompress_fn decompress_method(const unsigned char *inbuf, int len,
+decompress_fn decompress_method(const unsigned char *inbuf, long len,
 				const char **name);
 
 #endif
diff --git a/include/linux/decompress/inflate.h b/include/linux/decompress/inflate.h
index 1d0aedef9822..e4f411fdbd24 100644
--- a/include/linux/decompress/inflate.h
+++ b/include/linux/decompress/inflate.h
@@ -1,10 +1,10 @@
 #ifndef LINUX_DECOMPRESS_INFLATE_H
 #define LINUX_DECOMPRESS_INFLATE_H
 
-int gunzip(unsigned char *inbuf, int len,
-	   int(*fill)(void*, unsigned int),
-	   int(*flush)(void*, unsigned int),
+int gunzip(unsigned char *inbuf, long len,
+	   long (*fill)(void*, unsigned long),
+	   long (*flush)(void*, unsigned long),
 	   unsigned char *output,
-	   int *pos,
+	   long *pos,
 	   void(*error_fn)(char *x));
 #endif
diff --git a/include/linux/decompress/unlz4.h b/include/linux/decompress/unlz4.h
index d5b68bf3ec92..3273c2f36496 100644
--- a/include/linux/decompress/unlz4.h
+++ b/include/linux/decompress/unlz4.h
@@ -1,10 +1,10 @@
 #ifndef DECOMPRESS_UNLZ4_H
 #define DECOMPRESS_UNLZ4_H
 
-int unlz4(unsigned char *inbuf, int len,
-	int(*fill)(void*, unsigned int),
-	int(*flush)(void*, unsigned int),
+int unlz4(unsigned char *inbuf, long len,
+	long (*fill)(void*, unsigned long),
+	long (*flush)(void*, unsigned long),
 	unsigned char *output,
-	int *pos,
+	long *pos,
 	void(*error)(char *x));
 #endif
diff --git a/include/linux/decompress/unlzma.h b/include/linux/decompress/unlzma.h
index 7796538f1bf4..8a891a193840 100644
--- a/include/linux/decompress/unlzma.h
+++ b/include/linux/decompress/unlzma.h
@@ -1,11 +1,11 @@
 #ifndef DECOMPRESS_UNLZMA_H
 #define DECOMPRESS_UNLZMA_H
 
-int unlzma(unsigned char *, int,
-	   int(*fill)(void*, unsigned int),
-	   int(*flush)(void*, unsigned int),
+int unlzma(unsigned char *, long,
+	   long (*fill)(void*, unsigned long),
+	   long (*flush)(void*, unsigned long),
 	   unsigned char *output,
-	   int *posp,
+	   long *posp,
 	   void(*error)(char *x)
 	);
 
diff --git a/include/linux/decompress/unlzo.h b/include/linux/decompress/unlzo.h
index 987229752519..af18f95d6570 100644
--- a/include/linux/decompress/unlzo.h
+++ b/include/linux/decompress/unlzo.h
@@ -1,10 +1,10 @@
 #ifndef DECOMPRESS_UNLZO_H
 #define DECOMPRESS_UNLZO_H
 
-int unlzo(unsigned char *inbuf, int len,
-	int(*fill)(void*, unsigned int),
-	int(*flush)(void*, unsigned int),
+int unlzo(unsigned char *inbuf, long len,
+	long (*fill)(void*, unsigned long),
+	long (*flush)(void*, unsigned long),
 	unsigned char *output,
-	int *pos,
+	long *pos,
 	void(*error)(char *x));
 #endif
diff --git a/include/linux/decompress/unxz.h b/include/linux/decompress/unxz.h
index 41728fc6c8a1..f764e2a7201e 100644
--- a/include/linux/decompress/unxz.h
+++ b/include/linux/decompress/unxz.h
@@ -10,10 +10,10 @@
 #ifndef DECOMPRESS_UNXZ_H
 #define DECOMPRESS_UNXZ_H
 
-int unxz(unsigned char *in, int in_size,
-	 int (*fill)(void *dest, unsigned int size),
-	 int (*flush)(void *src, unsigned int size),
-	 unsigned char *out, int *in_used,
+int unxz(unsigned char *in, long in_size,
+	 long (*fill)(void *dest, unsigned long size),
+	 long (*flush)(void *src, unsigned long size),
+	 unsigned char *out, long *in_used,
 	 void (*error)(char *x));
 
 #endif
diff --git a/include/linux/zlib.h b/include/linux/zlib.h
index 197abb2a54c5..92dbbd3f6c75 100644
--- a/include/linux/zlib.h
+++ b/include/linux/zlib.h
@@ -83,11 +83,11 @@ struct internal_state;
 
 typedef struct z_stream_s {
     const Byte *next_in;   /* next input byte */
-    uInt     avail_in;  /* number of bytes available at next_in */
+	uLong avail_in;  /* number of bytes available at next_in */
     uLong    total_in;  /* total nb of input bytes read so far */
 
     Byte    *next_out;  /* next output byte should be put there */
-    uInt     avail_out; /* remaining free space at next_out */
+	uLong avail_out; /* remaining free space at next_out */
     uLong    total_out; /* total nb of bytes output so far */
 
     char     *msg;      /* last error message, NULL if no error */
diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c
index a8227022e3a0..e5d059e8aa11 100644
--- a/init/do_mounts_rd.c
+++ b/init/do_mounts_rd.c
@@ -311,9 +311,9 @@ static int exit_code;
 static int decompress_error;
 static int crd_infd, crd_outfd;
 
-static int __init compr_fill(void *buf, unsigned int len)
+static long __init compr_fill(void *buf, unsigned long len)
 {
-	int r = sys_read(crd_infd, buf, len);
+	long r = sys_read(crd_infd, buf, len);
 	if (r < 0)
 		printk(KERN_ERR "RAMDISK: error while reading compressed data");
 	else if (r == 0)
@@ -321,13 +321,13 @@ static int __init compr_fill(void *buf, unsigned int len)
 	return r;
 }
 
-static int __init compr_flush(void *window, unsigned int outcnt)
+static long __init compr_flush(void *window, unsigned long outcnt)
 {
-	int written = sys_write(crd_outfd, window, outcnt);
+	long written = sys_write(crd_outfd, window, outcnt);
 	if (written != outcnt) {
 		if (decompress_error == 0)
 			printk(KERN_ERR
-			       "RAMDISK: incomplete write (%d != %d)\n",
+			       "RAMDISK: incomplete write (%ld != %ld)\n",
 			       written, outcnt);
 		decompress_error = 1;
 		return -1;
diff --git a/init/initramfs.c b/init/initramfs.c
index 4f276b6a167b..a7566031242e 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -197,7 +197,7 @@ static __initdata enum state {
 } state, next_state;
 
 static __initdata char *victim;
-static __initdata unsigned count;
+static unsigned long count __initdata;
 static __initdata loff_t this_header, next_header;
 
 static inline void __init eat(unsigned n)
@@ -209,7 +209,7 @@ static inline void __init eat(unsigned n)
 
 static __initdata char *vcollected;
 static __initdata char *collected;
-static __initdata int remains;
+static long remains __initdata;
 static __initdata char *collect;
 
 static void __init read_into(char *buf, unsigned size, enum state next)
@@ -236,7 +236,7 @@ static int __init do_start(void)
 
 static int __init do_collect(void)
 {
-	unsigned n = remains;
+	unsigned long n = remains;
 	if (count < n)
 		n = count;
 	memcpy(collect, victim, n);
@@ -407,7 +407,7 @@ static __initdata int (*actions[])(void) = {
 	[Reset]		= do_reset,
 };
 
-static int __init write_buffer(char *buf, unsigned len)
+static long __init write_buffer(char *buf, unsigned long len)
 {
 	count = len;
 	victim = buf;
@@ -417,11 +417,11 @@ static int __init write_buffer(char *buf, unsigned len)
 	return len - count;
 }
 
-static int __init flush_buffer(void *bufv, unsigned len)
+static long __init flush_buffer(void *bufv, unsigned long len)
 {
 	char *buf = (char *) bufv;
-	int written;
-	int origLen = len;
+	long written;
+	long origLen = len;
 	if (message)
 		return -1;
 	while ((written = write_buffer(buf, len)) < len && !message) {
@@ -440,13 +440,13 @@ static int __init flush_buffer(void *bufv, unsigned len)
 	return origLen;
 }
 
-static unsigned my_inptr;   /* index of next byte to be processed in inbuf */
+static unsigned long my_inptr; /* index of next byte to be processed in inbuf */
 
 #include <linux/decompress/generic.h>
 
-static char * __init unpack_to_rootfs(char *buf, unsigned len)
+static char * __init unpack_to_rootfs(char *buf, unsigned long len)
 {
-	int written, res;
+	long written;
 	decompress_fn decompress;
 	const char *compress_name;
 	static __initdata char msg_buf[64];
@@ -480,7 +480,7 @@ static char * __init unpack_to_rootfs(char *buf, unsigned len)
 		decompress = decompress_method(buf, len, &compress_name);
 		pr_debug("Detected %s compressed data\n", compress_name);
 		if (decompress) {
-			res = decompress(buf, len, NULL, flush_buffer, NULL,
+			int res = decompress(buf, len, NULL, flush_buffer, NULL,
 				   &my_inptr, error);
 			if (res)
 				error("decompressor failed");
diff --git a/lib/decompress.c b/lib/decompress.c
index 86069d74c062..37f3c786348f 100644
--- a/lib/decompress.c
+++ b/lib/decompress.c
@@ -54,7 +54,7 @@ static const struct compress_format compressed_formats[] __initconst = {
 	{ {0, 0}, NULL, NULL }
 };
 
-decompress_fn __init decompress_method(const unsigned char *inbuf, int len,
+decompress_fn __init decompress_method(const unsigned char *inbuf, long len,
 				const char **name)
 {
 	const struct compress_format *cf;
diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c
index 31c5f7675fbf..8290e0bef7ea 100644
--- a/lib/decompress_bunzip2.c
+++ b/lib/decompress_bunzip2.c
@@ -92,8 +92,8 @@ struct bunzip_data {
 	/* State for interrupting output loop */
 	int writeCopies, writePos, writeRunCountdown, writeCount, writeCurrent;
 	/* I/O tracking data (file handles, buffers, positions, etc.) */
-	int (*fill)(void*, unsigned int);
-	int inbufCount, inbufPos /*, outbufPos*/;
+	long (*fill)(void*, unsigned long);
+	long inbufCount, inbufPos /*, outbufPos*/;
 	unsigned char *inbuf /*,*outbuf*/;
 	unsigned int inbufBitCount, inbufBits;
 	/* The CRC values stored in the block header and calculated from the
@@ -617,7 +617,7 @@ decode_next_byte:
 	goto decode_next_byte;
 }
 
-static int INIT nofill(void *buf, unsigned int len)
+static long INIT nofill(void *buf, unsigned long len)
 {
 	return -1;
 }
@@ -625,8 +625,8 @@ static int INIT nofill(void *buf, unsigned int len)
 /* Allocate the structure, read file header.  If in_fd ==-1, inbuf must contain
    a complete bunzip file (len bytes long).  If in_fd!=-1, inbuf and len are
    ignored, and data is read from file handle into temporary buffer. */
-static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, int len,
-			     int (*fill)(void*, unsigned int))
+static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, long len,
+			     long (*fill)(void*, unsigned long))
 {
 	struct bunzip_data *bd;
 	unsigned int i, j, c;
@@ -675,11 +675,11 @@ static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, int len,
 
 /* Example usage: decompress src_fd to dst_fd.  (Stops at end of bzip2 data,
    not end of file.) */
-STATIC int INIT bunzip2(unsigned char *buf, int len,
-			int(*fill)(void*, unsigned int),
-			int(*flush)(void*, unsigned int),
+STATIC int INIT bunzip2(unsigned char *buf, long len,
+			long (*fill)(void*, unsigned long),
+			long (*flush)(void*, unsigned long),
 			unsigned char *outbuf,
-			int *pos,
+			long *pos,
 			void(*error)(char *x))
 {
 	struct bunzip_data *bd;
@@ -743,11 +743,11 @@ exit_0:
 }
 
 #ifdef PREBOOT
-STATIC int INIT decompress(unsigned char *buf, int len,
-			int(*fill)(void*, unsigned int),
-			int(*flush)(void*, unsigned int),
+STATIC int INIT decompress(unsigned char *buf, long len,
+			long (*fill)(void*, unsigned long),
+			long (*flush)(void*, unsigned long),
 			unsigned char *outbuf,
-			int *pos,
+			long *pos,
 			void(*error)(char *x))
 {
 	return bunzip2(buf, len - 4, fill, flush, outbuf, pos, error);
diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c
index 0edfd742a154..d4c7891635ec 100644
--- a/lib/decompress_inflate.c
+++ b/lib/decompress_inflate.c
@@ -27,17 +27,17 @@
 
 #define GZIP_IOBUF_SIZE (16*1024)
 
-static int INIT nofill(void *buffer, unsigned int len)
+static long INIT nofill(void *buffer, unsigned long len)
 {
 	return -1;
 }
 
 /* Included from initramfs et al code */
-STATIC int INIT gunzip(unsigned char *buf, int len,
-		       int(*fill)(void*, unsigned int),
-		       int(*flush)(void*, unsigned int),
+STATIC int INIT gunzip(unsigned char *buf, long len,
+		       long (*fill)(void*, unsigned long),
+		       long (*flush)(void*, unsigned long),
 		       unsigned char *out_buf,
-		       int *pos,
+		       long *pos,
 		       void(*error)(char *x)) {
 	u8 *zbuf;
 	struct z_stream_s *strm;
@@ -142,7 +142,7 @@ STATIC int INIT gunzip(unsigned char *buf, int len,
 
 		/* Write any data generated */
 		if (flush && strm->next_out > out_buf) {
-			int l = strm->next_out - out_buf;
+			long l = strm->next_out - out_buf;
 			if (l != flush(out_buf, l)) {
 				rc = -1;
 				error("write error");
diff --git a/lib/decompress_unlz4.c b/lib/decompress_unlz4.c
index 3ad7f3954dfd..40f66ebe57b7 100644
--- a/lib/decompress_unlz4.c
+++ b/lib/decompress_unlz4.c
@@ -31,10 +31,10 @@
 #define LZ4_DEFAULT_UNCOMPRESSED_CHUNK_SIZE (8 << 20)
 #define ARCHIVE_MAGICNUMBER 0x184C2102
 
-STATIC inline int INIT unlz4(u8 *input, int in_len,
-				int (*fill) (void *, unsigned int),
-				int (*flush) (void *, unsigned int),
-				u8 *output, int *posp,
+STATIC inline int INIT unlz4(u8 *input, long in_len,
+				long (*fill)(void *, unsigned long),
+				long (*flush)(void *, unsigned long),
+				u8 *output, long *posp,
 				void (*error) (char *x))
 {
 	int ret = -1;
@@ -43,7 +43,7 @@ STATIC inline int INIT unlz4(u8 *input, int in_len,
 	u8 *inp;
 	u8 *inp_start;
 	u8 *outp;
-	int size = in_len;
+	long size = in_len;
 #ifdef PREBOOT
 	size_t out_len = get_unaligned_le32(input + in_len);
 #endif
@@ -196,11 +196,11 @@ exit_0:
 }
 
 #ifdef PREBOOT
-STATIC int INIT decompress(unsigned char *buf, int in_len,
-			      int(*fill)(void*, unsigned int),
-			      int(*flush)(void*, unsigned int),
+STATIC int INIT decompress(unsigned char *buf, long in_len,
+			      long (*fill)(void*, unsigned long),
+			      long (*flush)(void*, unsigned long),
 			      unsigned char *output,
-			      int *posp,
+			      long *posp,
 			      void(*error)(char *x)
 	)
 {
diff --git a/lib/decompress_unlzma.c b/lib/decompress_unlzma.c
index 32adb73a9038..0be83af62b88 100644
--- a/lib/decompress_unlzma.c
+++ b/lib/decompress_unlzma.c
@@ -65,11 +65,11 @@ static long long INIT read_int(unsigned char *ptr, int size)
 #define LZMA_IOBUF_SIZE	0x10000
 
 struct rc {
-	int (*fill)(void*, unsigned int);
+	long (*fill)(void*, unsigned long);
 	uint8_t *ptr;
 	uint8_t *buffer;
 	uint8_t *buffer_end;
-	int buffer_size;
+	long buffer_size;
 	uint32_t code;
 	uint32_t range;
 	uint32_t bound;
@@ -82,7 +82,7 @@ struct rc {
 #define RC_MODEL_TOTAL_BITS 11
 
 
-static int INIT nofill(void *buffer, unsigned int len)
+static long INIT nofill(void *buffer, unsigned long len)
 {
 	return -1;
 }
@@ -99,8 +99,8 @@ static void INIT rc_read(struct rc *rc)
 
 /* Called once */
 static inline void INIT rc_init(struct rc *rc,
-				       int (*fill)(void*, unsigned int),
-				       char *buffer, int buffer_size)
+				       long (*fill)(void*, unsigned long),
+				       char *buffer, long buffer_size)
 {
 	if (fill)
 		rc->fill = fill;
@@ -280,7 +280,7 @@ struct writer {
 	size_t buffer_pos;
 	int bufsize;
 	size_t global_pos;
-	int(*flush)(void*, unsigned int);
+	long (*flush)(void*, unsigned long);
 	struct lzma_header *header;
 };
 
@@ -534,11 +534,11 @@ static inline int INIT process_bit1(struct writer *wr, struct rc *rc,
 
 
-STATIC inline int INIT unlzma(unsigned char *buf, int in_len,
-			      int(*fill)(void*, unsigned int),
-			      int(*flush)(void*, unsigned int),
+STATIC inline int INIT unlzma(unsigned char *buf, long in_len,
+			      long (*fill)(void*, unsigned long),
+			      long (*flush)(void*, unsigned long),
 			      unsigned char *output,
-			      int *posp,
+			      long *posp,
 			      void(*error)(char *x)
 	)
 {
@@ -667,11 +667,11 @@ exit_0:
 }
 
 #ifdef PREBOOT
-STATIC int INIT decompress(unsigned char *buf, int in_len,
-			      int(*fill)(void*, unsigned int),
-			      int(*flush)(void*, unsigned int),
+STATIC int INIT decompress(unsigned char *buf, long in_len,
+			      long (*fill)(void*, unsigned long),
+			      long (*flush)(void*, unsigned long),
 			      unsigned char *output,
-			      int *posp,
+			      long *posp,
 			      void(*error)(char *x)
 	)
 {
diff --git a/lib/decompress_unlzo.c b/lib/decompress_unlzo.c
index 960183d4258f..b94a31bdd87d 100644
--- a/lib/decompress_unlzo.c
+++ b/lib/decompress_unlzo.c
@@ -51,7 +51,7 @@ static const unsigned char lzop_magic[] = {
 #define HEADER_SIZE_MIN       (9 + 7     + 4 + 8     + 1       + 4)
 #define HEADER_SIZE_MAX       (9 + 7 + 1 + 8 + 8 + 4 + 1 + 255 + 4)
 
-STATIC inline int INIT parse_header(u8 *input, int *skip, int in_len)
+STATIC inline long INIT parse_header(u8 *input, long *skip, long in_len)
 {
 	int l;
 	u8 *parse = input;
@@ -108,14 +108,14 @@ STATIC inline int INIT parse_header(u8 *input, int *skip, int in_len)
 	return 1;
 }
 
-STATIC inline int INIT unlzo(u8 *input, int in_len,
-				int (*fill) (void *, unsigned int),
-				int (*flush) (void *, unsigned int),
-				u8 *output, int *posp,
+STATIC int INIT unlzo(u8 *input, long in_len,
+				long (*fill)(void *, unsigned long),
+				long (*flush)(void *, unsigned long),
+				u8 *output, long *posp,
 				void (*error) (char *x))
 {
 	u8 r = 0;
-	int skip = 0;
+	long skip = 0;
 	u32 src_len, dst_len;
 	size_t tmp;
 	u8 *in_buf, *in_buf_save, *out_buf;
diff --git a/lib/decompress_unxz.c b/lib/decompress_unxz.c
index 9f34eb56854d..b07a78340e9d 100644
--- a/lib/decompress_unxz.c
+++ b/lib/decompress_unxz.c
@@ -248,10 +248,10 @@ void *memmove(void *dest, const void *src, size_t size)
  * both input and output buffers are available as a single chunk, i.e. when
  * fill() and flush() won't be used.
  */
-STATIC int INIT unxz(unsigned char *in, int in_size,
-		     int (*fill)(void *dest, unsigned int size),
-		     int (*flush)(void *src, unsigned int size),
-		     unsigned char *out, int *in_used,
+STATIC int INIT unxz(unsigned char *in, long in_size,
+		     long (*fill)(void *dest, unsigned long size),
+		     long (*flush)(void *src, unsigned long size),
+		     unsigned char *out, long *in_used,
 		     void (*error)(char *x))
 {
 	struct xz_buf b;
@@ -329,7 +329,7 @@ STATIC int INIT unxz(unsigned char *in, int in_size,
 				 * returned by xz_dec_run(), but probably
 				 * it's not too bad.
 				 */
-				if (flush(b.out, b.out_pos) != (int)b.out_pos)
+				if (flush(b.out, b.out_pos) != (long)b.out_pos)
 					ret = XZ_BUF_ERROR;
 
 				b.out_pos = 0;
-- 
cgit v1.2.3-59-g8ed1b


From ab602f799159393143d567e5c04b936fec79d6bd Mon Sep 17 00:00:00 2001
From: Jack Miller <millerjo@us.ibm.com>
Date: Fri, 8 Aug 2014 14:23:19 -0700
Subject: shm: make exit_shm work proportional to task activity

This is small set of patches our team has had kicking around for a few
versions internally that fixes tasks getting hung on shm_exit when there
are many threads hammering it at once.

Anton wrote a simple test to cause the issue:

  http://ozlabs.org/~anton/junkcode/bust_shm_exit.c

Before applying this patchset, this test code will cause either hanging
tracebacks or pthread out of memory errors.

After this patchset, it will still produce output like:

  root@somehost:~# ./bust_shm_exit 1024 160
  ...
  INFO: rcu_sched detected stalls on CPUs/tasks: {} (detected by 116, t=2111 jiffies, g=241, c=240, q=7113)
  INFO: Stall ended before state dump start
  ...

But the task will continue to run along happily, so we consider this an
improvement over hanging, even if it's a bit noisy.

This patch (of 3):

exit_shm obtains the ipc_ns shm rwsem for write and holds it while it
walks every shared memory segment in the namespace.  Thus the amount of
work is related to the number of shm segments in the namespace not the
number of segments that might need to be cleaned.

In addition, this occurs after the task has been notified the thread has
exited, so the number of tasks waiting for the ns shm rwsem can grow
without bound until memory is exausted.

Add a list to the task struct of all shmids allocated by this task.  Init
the list head in copy_process.  Use the ns->rwsem for locking.  Add
segments after id is added, remove before removing from id.

On unshare of NEW_IPCNS orphan any ids as if the task had exited, similar
to handling of semaphore undo.

I chose a define for the init sequence since its a simple list init,
otherwise it would require a function call to avoid include loops between
the semaphore code and the task struct.  Converting the list_del to
list_del_init for the unshare cases would remove the exit followed by
init, but I left it blow up if not inited.

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Jack Miller <millerjo@us.ibm.com>
Cc: Davidlohr Bueso <davidlohr@hp.com>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Anton Blanchard <anton@samba.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h |  2 ++
 include/linux/shm.h   | 16 +++++++++++++++-
 ipc/shm.c             | 22 +++++++++++-----------
 kernel/fork.c         |  6 ++++++
 4 files changed, 34 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b21e9218c0fd..db2f6474e95e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -33,6 +33,7 @@ struct sched_param {
 
 #include <linux/smp.h>
 #include <linux/sem.h>
+#include <linux/shm.h>
 #include <linux/signal.h>
 #include <linux/compiler.h>
 #include <linux/completion.h>
@@ -1385,6 +1386,7 @@ struct task_struct {
 #ifdef CONFIG_SYSVIPC
 /* ipc stuff */
 	struct sysv_sem sysvsem;
+	struct sysv_shm sysvshm;
 #endif
 #ifdef CONFIG_DETECT_HUNG_TASK
 /* hung task detection */
diff --git a/include/linux/shm.h b/include/linux/shm.h
index 57d77709fbe2..fd206387048a 100644
--- a/include/linux/shm.h
+++ b/include/linux/shm.h
@@ -1,6 +1,7 @@
 #ifndef _LINUX_SHM_H_
 #define _LINUX_SHM_H_
 
+#include <linux/list.h>
 #include <asm/page.h>
 #include <uapi/linux/shm.h>
 #include <asm/shmparam.h>
@@ -20,6 +21,7 @@ struct shmid_kernel /* private to the kernel */
 
 	/* The task created the shm object.  NULL if the task is dead. */
 	struct task_struct	*shm_creator;
+	struct list_head	shm_clist;	/* list by creator */
 };
 
 /* shm_mode upper byte flags */
@@ -44,11 +46,20 @@ struct shmid_kernel /* private to the kernel */
 #define SHM_HUGE_1GB    (30 << SHM_HUGE_SHIFT)
 
 #ifdef CONFIG_SYSVIPC
+struct sysv_shm {
+	struct list_head shm_clist;
+};
+
 long do_shmat(int shmid, char __user *shmaddr, int shmflg, unsigned long *addr,
 	      unsigned long shmlba);
 extern int is_file_shm_hugepages(struct file *file);
-extern void exit_shm(struct task_struct *task);
+void exit_shm(struct task_struct *task);
+#define shm_init_task(task) INIT_LIST_HEAD(&(task)->sysvshm.shm_clist)
 #else
+struct sysv_shm {
+	/* empty */
+};
+
 static inline long do_shmat(int shmid, char __user *shmaddr,
 			    int shmflg, unsigned long *addr,
 			    unsigned long shmlba)
@@ -62,6 +73,9 @@ static inline int is_file_shm_hugepages(struct file *file)
 static inline void exit_shm(struct task_struct *task)
 {
 }
+static inline void shm_init_task(struct task_struct *task)
+{
+}
 #endif
 
 #endif /* _LINUX_SHM_H_ */
diff --git a/ipc/shm.c b/ipc/shm.c
index 89fc354156cb..1fc3a61b443b 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -178,6 +178,7 @@ static void shm_rcu_free(struct rcu_head *head)
 
 static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
 {
+	list_del(&s->shm_clist);
 	ipc_rmid(&shm_ids(ns), &s->shm_perm);
 }
 
@@ -268,14 +269,10 @@ static void shm_close(struct vm_area_struct *vma)
 }
 
 /* Called with ns->shm_ids(ns).rwsem locked */
-static int shm_try_destroy_current(int id, void *p, void *data)
+static void shm_mark_orphan(struct shmid_kernel *shp, struct ipc_namespace *ns)
 {
-	struct ipc_namespace *ns = data;
-	struct kern_ipc_perm *ipcp = p;
-	struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm);
-
-	if (shp->shm_creator != current)
-		return 0;
+	if (WARN_ON(shp->shm_creator != current)) /* Remove me when it works */
+		return;
 
 	/*
 	 * Mark it as orphaned to destroy the segment when
@@ -289,13 +286,12 @@ static int shm_try_destroy_current(int id, void *p, void *data)
 	 * is not set, it shouldn't be deleted here.
 	 */
 	if (!ns->shm_rmid_forced)
-		return 0;
+		return;
 
 	if (shm_may_destroy(ns, shp)) {
 		shm_lock_by_ptr(shp);
 		shm_destroy(ns, shp);
 	}
-	return 0;
 }
 
 /* Called with ns->shm_ids(ns).rwsem locked */
@@ -333,14 +329,17 @@ void shm_destroy_orphaned(struct ipc_namespace *ns)
 void exit_shm(struct task_struct *task)
 {
 	struct ipc_namespace *ns = task->nsproxy->ipc_ns;
+	struct shmid_kernel *shp, *n;
 
 	if (shm_ids(ns).in_use == 0)
 		return;
 
 	/* Destroy all already created segments, but not mapped yet */
 	down_write(&shm_ids(ns).rwsem);
-	if (shm_ids(ns).in_use)
-		idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns);
+	list_for_each_entry_safe(shp, n, &task->sysvshm.shm_clist, shm_clist)
+		shm_mark_orphan(shp, ns);
+	/* remove the list head from any segments still attached */
+	list_del(&task->sysvshm.shm_clist);
 	up_write(&shm_ids(ns).rwsem);
 }
 
@@ -561,6 +560,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 	shp->shm_nattch = 0;
 	shp->shm_file = file;
 	shp->shm_creator = current;
+	list_add(&shp->shm_clist, &current->sysvshm.shm_clist);
 
 	/*
 	 * shmid gets reported as "inode#" in /proc/pid/maps.
diff --git a/kernel/fork.c b/kernel/fork.c
index 86da59e165ad..fa9124322cd4 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1362,6 +1362,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	if (retval)
 		goto bad_fork_cleanup_policy;
 	/* copy all the process information */
+	shm_init_task(p);
 	retval = copy_semundo(clone_flags, p);
 	if (retval)
 		goto bad_fork_cleanup_audit;
@@ -1913,6 +1914,11 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
 			 */
 			exit_sem(current);
 		}
+		if (unshare_flags & CLONE_NEWIPC) {
+			/* Orphan segments in old ns (see sem above). */
+			exit_shm(current);
+			shm_init_task(current);
+		}
 
 		if (new_nsproxy)
 			switch_task_namespaces(current, new_nsproxy);
-- 
cgit v1.2.3-59-g8ed1b


From 2f137d66fb65ef41df6e558f23d481f07394a424 Mon Sep 17 00:00:00 2001
From: Jack Miller <millerjo@us.ibm.com>
Date: Fri, 8 Aug 2014 14:23:23 -0700
Subject: shm: remove unneeded extern for function

A small cleanup while changing adjacent code.  Extern is not needed for
functions and only one declaration had it so remove it from the odd line.

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Jack Miller <millerjo@us.ibm.com>
Cc: Davidlohr Bueso <davidlohr@hp.com>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Anton Blanchard <anton@samba.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/shm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/shm.h b/include/linux/shm.h
index fd206387048a..6fb801686ad6 100644
--- a/include/linux/shm.h
+++ b/include/linux/shm.h
@@ -52,7 +52,7 @@ struct sysv_shm {
 
 long do_shmat(int shmid, char __user *shmaddr, int shmflg, unsigned long *addr,
 	      unsigned long shmlba);
-extern int is_file_shm_hugepages(struct file *file);
+int is_file_shm_hugepages(struct file *file);
 void exit_shm(struct task_struct *task);
 #define shm_init_task(task) INIT_LIST_HEAD(&(task)->sysvshm.shm_clist)
 #else
-- 
cgit v1.2.3-59-g8ed1b


From 308c09f17da4adc53935115dbeb5bce4f067d8f9 Mon Sep 17 00:00:00 2001
From: Laura Abbott <lauraa@codeaurora.org>
Date: Fri, 8 Aug 2014 14:23:25 -0700
Subject: lib/scatterlist: make ARCH_HAS_SG_CHAIN an actual Kconfig

Rather than have architectures #define ARCH_HAS_SG_CHAIN in an
architecture specific scatterlist.h, make it a proper Kconfig option and
use that instead.  At same time, remove the header files are are now
mostly useless and just include asm-generic/scatterlist.h.

[sfr@canb.auug.org.au: powerpc files now need asm/dma.h]
Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>			[x86]
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>	[powerpc]
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: "James E.J. Bottomley" <JBottomley@parallels.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/Kconfig                        |  1 +
 arch/arm/include/asm/Kbuild             |  1 +
 arch/arm/include/asm/scatterlist.h      | 12 ------------
 arch/arm64/Kconfig                      |  1 +
 arch/ia64/Kconfig                       |  1 +
 arch/ia64/include/asm/Kbuild            |  1 +
 arch/ia64/include/asm/scatterlist.h     |  7 -------
 arch/powerpc/Kconfig                    |  1 +
 arch/powerpc/include/asm/Kbuild         |  1 +
 arch/powerpc/include/asm/scatterlist.h  | 17 -----------------
 arch/powerpc/mm/dma-noncoherent.c       |  1 +
 arch/powerpc/platforms/44x/warp.c       |  1 +
 arch/powerpc/platforms/52xx/efika.c     |  1 +
 arch/powerpc/platforms/amigaone/setup.c |  1 +
 arch/s390/Kconfig                       |  1 +
 arch/s390/include/asm/Kbuild            |  1 +
 arch/s390/include/asm/scatterlist.h     |  3 ---
 arch/sparc/Kconfig                      |  1 +
 arch/sparc/include/asm/Kbuild           |  1 +
 arch/sparc/include/asm/scatterlist.h    |  8 --------
 arch/um/include/asm/Kbuild              |  1 +
 arch/x86/Kconfig                        |  1 +
 arch/x86/include/asm/Kbuild             |  3 ++-
 arch/x86/include/asm/scatterlist.h      |  8 --------
 include/linux/scatterlist.h             |  2 +-
 include/scsi/scsi.h                     |  2 +-
 lib/Kconfig                             |  7 +++++++
 lib/scatterlist.c                       |  4 ++--
 28 files changed, 30 insertions(+), 60 deletions(-)
 delete mode 100644 arch/arm/include/asm/scatterlist.h
 delete mode 100644 arch/ia64/include/asm/scatterlist.h
 delete mode 100644 arch/powerpc/include/asm/scatterlist.h
 delete mode 100644 arch/s390/include/asm/scatterlist.h
 delete mode 100644 arch/sparc/include/asm/scatterlist.h
 delete mode 100644 arch/x86/include/asm/scatterlist.h

(limited to 'include/linux')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index d31c500653a2..8e9dbcbcf5af 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -83,6 +83,7 @@ config ARM
 	  <http://www.arm.linux.org.uk/>.
 
 config ARM_HAS_SG_CHAIN
+	select ARCH_HAS_SG_CHAIN
 	bool
 
 config NEED_SG_DMA_LENGTH
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index f5a357601983..70cd84eb7fda 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -22,6 +22,7 @@ generic-y += poll.h
 generic-y += preempt.h
 generic-y += resource.h
 generic-y += rwsem.h
+generic-y += scatterlist.h
 generic-y += sections.h
 generic-y += segment.h
 generic-y += sembuf.h
diff --git a/arch/arm/include/asm/scatterlist.h b/arch/arm/include/asm/scatterlist.h
deleted file mode 100644
index cefdb8f898a1..000000000000
--- a/arch/arm/include/asm/scatterlist.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef _ASMARM_SCATTERLIST_H
-#define _ASMARM_SCATTERLIST_H
-
-#ifdef CONFIG_ARM_HAS_SG_CHAIN
-#define ARCH_HAS_SG_CHAIN
-#endif
-
-#include <asm/memory.h>
-#include <asm/types.h>
-#include <asm-generic/scatterlist.h>
-
-#endif /* _ASMARM_SCATTERLIST_H */
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index b0f9c9db9590..fd4e81a4e1ce 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1,6 +1,7 @@
 config ARM64
 	def_bool y
 	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
+	select ARCH_HAS_SG_CHAIN
 	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
 	select ARCH_USE_CMPXCHG_LOCKREF
 	select ARCH_SUPPORTS_ATOMIC_RMW
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 44a6915ab13d..c84c88bbbbd7 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -28,6 +28,7 @@ config IA64
 	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
 	select HAVE_VIRT_CPU_ACCOUNTING
+	select ARCH_HAS_SG_CHAIN
 	select VIRT_TO_BUS
 	select ARCH_DISCARD_MEMBLOCK
 	select GENERIC_IRQ_PROBE
diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
index 0da4aa2602ae..e8317d2d6c8d 100644
--- a/arch/ia64/include/asm/Kbuild
+++ b/arch/ia64/include/asm/Kbuild
@@ -5,5 +5,6 @@ generic-y += hash.h
 generic-y += kvm_para.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
+generic-y += scatterlist.h
 generic-y += trace_clock.h
 generic-y += vtime.h
diff --git a/arch/ia64/include/asm/scatterlist.h b/arch/ia64/include/asm/scatterlist.h
deleted file mode 100644
index 08fd93bff1db..000000000000
--- a/arch/ia64/include/asm/scatterlist.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef _ASM_IA64_SCATTERLIST_H
-#define _ASM_IA64_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-#define ARCH_HAS_SG_CHAIN
-
-#endif /* _ASM_IA64_SCATTERLIST_H */
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 80b94b0add1f..4bc7b62fb4b6 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -111,6 +111,7 @@ config PPC
 	select HAVE_DMA_API_DEBUG
 	select HAVE_OPROFILE
 	select HAVE_DEBUG_KMEMLEAK
+	select ARCH_HAS_SG_CHAIN
 	select GENERIC_ATOMIC64 if PPC32
 	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
 	select HAVE_PERF_EVENTS
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index 3fb1bc432f4f..7f23f162ce9c 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -4,5 +4,6 @@ generic-y += hash.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
 generic-y += rwsem.h
+generic-y += scatterlist.h
 generic-y += trace_clock.h
 generic-y += vtime.h
diff --git a/arch/powerpc/include/asm/scatterlist.h b/arch/powerpc/include/asm/scatterlist.h
deleted file mode 100644
index de1f620bd5c9..000000000000
--- a/arch/powerpc/include/asm/scatterlist.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef _ASM_POWERPC_SCATTERLIST_H
-#define _ASM_POWERPC_SCATTERLIST_H
-/*
- * Copyright (C) 2001 PPC64 Team, IBM Corp
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <asm/dma.h>
-#include <asm-generic/scatterlist.h>
-
-#define ARCH_HAS_SG_CHAIN
-
-#endif /* _ASM_POWERPC_SCATTERLIST_H */
diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c
index 7b6c10750179..d85e86aac7fb 100644
--- a/arch/powerpc/mm/dma-noncoherent.c
+++ b/arch/powerpc/mm/dma-noncoherent.c
@@ -33,6 +33,7 @@
 #include <linux/export.h>
 
 #include <asm/tlbflush.h>
+#include <asm/dma.h>
 
 #include "mmu_decl.h"
 
diff --git a/arch/powerpc/platforms/44x/warp.c b/arch/powerpc/platforms/44x/warp.c
index 534574a97ec9..3a104284b338 100644
--- a/arch/powerpc/platforms/44x/warp.c
+++ b/arch/powerpc/platforms/44x/warp.c
@@ -25,6 +25,7 @@
 #include <asm/time.h>
 #include <asm/uic.h>
 #include <asm/ppc4xx.h>
+#include <asm/dma.h>
 
 
 static __initdata struct of_device_id warp_of_bus[] = {
diff --git a/arch/powerpc/platforms/52xx/efika.c b/arch/powerpc/platforms/52xx/efika.c
index 6e19b0ad5d26..3feffde9128d 100644
--- a/arch/powerpc/platforms/52xx/efika.c
+++ b/arch/powerpc/platforms/52xx/efika.c
@@ -13,6 +13,7 @@
 #include <generated/utsrelease.h>
 #include <linux/pci.h>
 #include <linux/of.h>
+#include <asm/dma.h>
 #include <asm/prom.h>
 #include <asm/time.h>
 #include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/amigaone/setup.c b/arch/powerpc/platforms/amigaone/setup.c
index 03aabc0e16ac..2fe12046279e 100644
--- a/arch/powerpc/platforms/amigaone/setup.c
+++ b/arch/powerpc/platforms/amigaone/setup.c
@@ -24,6 +24,7 @@
 #include <asm/i8259.h>
 #include <asm/time.h>
 #include <asm/udbg.h>
+#include <asm/dma.h>
 
 extern void __flush_disable_L1(void);
 
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 8ca60f8d5683..05c78bb5f570 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -145,6 +145,7 @@ config S390
 	select TTY
 	select VIRT_CPU_ACCOUNTING
 	select VIRT_TO_BUS
+	select ARCH_HAS_SG_CHAIN
 
 config SCHED_OMIT_FRAME_POINTER
 	def_bool y
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 57892a8a9055..b3fea0722ff1 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -4,4 +4,5 @@ generic-y += clkdev.h
 generic-y += hash.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
+generic-y += scatterlist.h
 generic-y += trace_clock.h
diff --git a/arch/s390/include/asm/scatterlist.h b/arch/s390/include/asm/scatterlist.h
deleted file mode 100644
index 6d45ef6c12a7..000000000000
--- a/arch/s390/include/asm/scatterlist.h
+++ /dev/null
@@ -1,3 +0,0 @@
-#include <asm-generic/scatterlist.h>
-
-#define ARCH_HAS_SG_CHAIN
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 4692c90936f1..a537816613f9 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -42,6 +42,7 @@ config SPARC
 	select MODULES_USE_ELF_RELA
 	select ODD_RT_SIGACTION
 	select OLD_SIGSUSPEND
+	select ARCH_HAS_SG_CHAIN
 
 config SPARC32
 	def_bool !64BIT
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index a45821818003..cdd1b447bb6c 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -15,6 +15,7 @@ generic-y += mcs_spinlock.h
 generic-y += module.h
 generic-y += mutex.h
 generic-y += preempt.h
+generic-y += scatterlist.h
 generic-y += serial.h
 generic-y += trace_clock.h
 generic-y += types.h
diff --git a/arch/sparc/include/asm/scatterlist.h b/arch/sparc/include/asm/scatterlist.h
deleted file mode 100644
index 92bb638313f8..000000000000
--- a/arch/sparc/include/asm/scatterlist.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef _SPARC_SCATTERLIST_H
-#define _SPARC_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#define ARCH_HAS_SG_CHAIN
-
-#endif /* !(_SPARC_SCATTERLIST_H) */
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index a5e4b6068213..7bd64aa2e94a 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -21,6 +21,7 @@ generic-y += param.h
 generic-y += pci.h
 generic-y += percpu.h
 generic-y += preempt.h
+generic-y += scatterlist.h
 generic-y += sections.h
 generic-y += switch_to.h
 generic-y += topology.h
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index bf2405053af5..c915cc6e40be 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -96,6 +96,7 @@ config X86
 	select IRQ_FORCED_THREADING
 	select HAVE_BPF_JIT if X86_64
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+	select ARCH_HAS_SG_CHAIN
 	select CLKEVT_I8253
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select GENERIC_IOMAP
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 3ca9762e1649..3bf000fab0ae 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -5,6 +5,7 @@ genhdr-y += unistd_64.h
 genhdr-y += unistd_x32.h
 
 generic-y += clkdev.h
-generic-y += early_ioremap.h
 generic-y += cputime.h
+generic-y += early_ioremap.h
 generic-y += mcs_spinlock.h
+generic-y += scatterlist.h
diff --git a/arch/x86/include/asm/scatterlist.h b/arch/x86/include/asm/scatterlist.h
deleted file mode 100644
index 4240878b9d76..000000000000
--- a/arch/x86/include/asm/scatterlist.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef _ASM_X86_SCATTERLIST_H
-#define _ASM_X86_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#define ARCH_HAS_SG_CHAIN
-
-#endif /* _ASM_X86_SCATTERLIST_H */
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index f4ec8bbcb372..ed8f9e70df9b 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -136,7 +136,7 @@ static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
 static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents,
 			    struct scatterlist *sgl)
 {
-#ifndef ARCH_HAS_SG_CHAIN
+#ifndef CONFIG_ARCH_HAS_SG_CHAIN
 	BUG();
 #endif
 
diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h
index e6df23cae7be..261e708010da 100644
--- a/include/scsi/scsi.h
+++ b/include/scsi/scsi.h
@@ -31,7 +31,7 @@ enum scsi_timeouts {
  * Like SCSI_MAX_SG_SEGMENTS, but for archs that have sg chaining. This limit
  * is totally arbitrary, a setting of 2048 will get you at least 8mb ios.
  */
-#ifdef ARCH_HAS_SG_CHAIN
+#ifdef CONFIG_ARCH_HAS_SG_CHAIN
 #define SCSI_MAX_SG_CHAIN_SEGMENTS	2048
 #else
 #define SCSI_MAX_SG_CHAIN_SEGMENTS	SCSI_MAX_SG_SEGMENTS
diff --git a/lib/Kconfig b/lib/Kconfig
index df872659ddd3..a5ce0c7f6c30 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -508,4 +508,11 @@ config UCS2_STRING
 
 source "lib/fonts/Kconfig"
 
+#
+# sg chaining option
+#
+
+config ARCH_HAS_SG_CHAIN
+	def_bool n
+
 endmenu
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index b4415fceb7e7..9cdf62f8accd 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -73,7 +73,7 @@ EXPORT_SYMBOL(sg_nents);
  **/
 struct scatterlist *sg_last(struct scatterlist *sgl, unsigned int nents)
 {
-#ifndef ARCH_HAS_SG_CHAIN
+#ifndef CONFIG_ARCH_HAS_SG_CHAIN
 	struct scatterlist *ret = &sgl[nents - 1];
 #else
 	struct scatterlist *sg, *ret = NULL;
@@ -255,7 +255,7 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents,
 
 	if (nents == 0)
 		return -EINVAL;
-#ifndef ARCH_HAS_SG_CHAIN
+#ifndef CONFIG_ARCH_HAS_SG_CHAIN
 	if (WARN_ON_ONCE(nents > max_ents))
 		return -EINVAL;
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From a6c19dfe39941a5d3f4d072121c0a4841e7e26fd Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@amacapital.net>
Date: Fri, 8 Aug 2014 14:23:40 -0700
Subject: arm64,ia64,ppc,s390,sh,tile,um,x86,mm: remove default gate area

The core mm code will provide a default gate area based on
FIXADDR_USER_START and FIXADDR_USER_END if
!defined(__HAVE_ARCH_GATE_AREA) && defined(AT_SYSINFO_EHDR).

This default is only useful for ia64.  arm64, ppc, s390, sh, tile, 64-bit
UML, and x86_32 have their own code just to disable it.  arm, 32-bit UML,
and x86_64 have gate areas, but they have their own implementations.

This gets rid of the default and moves the code into ia64.

This should save some code on architectures without a gate area: it's now
possible to inline the gate_area functions in the default case.

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Acked-by: Nathan Lynch <nathan_lynch@mentor.com>
Acked-by: H. Peter Anvin <hpa@linux.intel.com>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> [in principle]
Acked-by: Richard Weinberger <richard@nod.at> [for um]
Acked-by: Will Deacon <will.deacon@arm.com> [for arm64]
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Nathan Lynch <Nathan_Lynch@mentor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm64/include/asm/page.h      |  3 ---
 arch/arm64/kernel/vdso.c           | 19 -------------------
 arch/ia64/include/asm/page.h       |  2 ++
 arch/ia64/mm/init.c                | 31 +++++++++++++++++++++++++++++++
 arch/powerpc/include/asm/page.h    |  3 ---
 arch/powerpc/kernel/vdso.c         | 16 ----------------
 arch/s390/include/asm/page.h       |  2 --
 arch/s390/kernel/vdso.c            | 15 ---------------
 arch/sh/include/asm/page.h         |  5 -----
 arch/sh/kernel/vsyscall/vsyscall.c | 15 ---------------
 arch/tile/include/asm/page.h       |  6 ------
 arch/tile/kernel/vdso.c            | 15 ---------------
 arch/um/include/asm/page.h         |  5 +++++
 arch/x86/include/asm/page.h        |  1 -
 arch/x86/include/asm/page_64.h     |  2 ++
 arch/x86/um/asm/elf.h              |  1 -
 arch/x86/um/mem_64.c               | 15 ---------------
 arch/x86/vdso/vdso32-setup.c       | 19 +------------------
 include/linux/mm.h                 | 17 ++++++++++++-----
 mm/memory.c                        | 38 --------------------------------------
 mm/nommu.c                         |  5 -----
 21 files changed, 53 insertions(+), 182 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index 7a3f462133b0..22b16232bd60 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -28,9 +28,6 @@
 #define PAGE_SIZE		(_AC(1,UL) << PAGE_SHIFT)
 #define PAGE_MASK		(~(PAGE_SIZE-1))
 
-/* We do define AT_SYSINFO_EHDR but don't use the gate mechanism */
-#define __HAVE_ARCH_GATE_AREA		1
-
 /*
  * The idmap and swapper page tables need some space reserved in the kernel
  * image. Both require pgd, pud (4 levels only) and pmd tables to (section)
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index a81a446a5786..32aeea083d93 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -194,25 +194,6 @@ up_fail:
 	return PTR_ERR(ret);
 }
 
-/*
- * We define AT_SYSINFO_EHDR, so we need these function stubs to keep
- * Linux happy.
- */
-int in_gate_area_no_mm(unsigned long addr)
-{
-	return 0;
-}
-
-int in_gate_area(struct mm_struct *mm, unsigned long addr)
-{
-	return 0;
-}
-
-struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
-{
-	return NULL;
-}
-
 /*
  * Update the vDSO data page to keep in sync with kernel timekeeping.
  */
diff --git a/arch/ia64/include/asm/page.h b/arch/ia64/include/asm/page.h
index f1e1b2e3cdb3..1f1bf144fe62 100644
--- a/arch/ia64/include/asm/page.h
+++ b/arch/ia64/include/asm/page.h
@@ -231,4 +231,6 @@ get_order (unsigned long size)
 #define PERCPU_ADDR		(-PERCPU_PAGE_SIZE)
 #define LOAD_OFFSET		(KERNEL_START - KERNEL_TR_PAGE_SIZE)
 
+#define __HAVE_ARCH_GATE_AREA	1
+
 #endif /* _ASM_IA64_PAGE_H */
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 892d43e32f3b..6b3345758d3e 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -278,6 +278,37 @@ setup_gate (void)
 	ia64_patch_gate();
 }
 
+static struct vm_area_struct gate_vma;
+
+static int __init gate_vma_init(void)
+{
+	gate_vma.vm_mm = NULL;
+	gate_vma.vm_start = FIXADDR_USER_START;
+	gate_vma.vm_end = FIXADDR_USER_END;
+	gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
+	gate_vma.vm_page_prot = __P101;
+
+	return 0;
+}
+__initcall(gate_vma_init);
+
+struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
+{
+	return &gate_vma;
+}
+
+int in_gate_area_no_mm(unsigned long addr)
+{
+	if ((addr >= FIXADDR_USER_START) && (addr < FIXADDR_USER_END))
+		return 1;
+	return 0;
+}
+
+int in_gate_area(struct mm_struct *mm, unsigned long addr)
+{
+	return in_gate_area_no_mm(addr);
+}
+
 void ia64_mmu_init(void *my_cpu_data)
 {
 	unsigned long pta, impl_va_bits;
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 32e4e212b9c1..26fe1ae15212 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -48,9 +48,6 @@ extern unsigned int HPAGE_SHIFT;
 #define HUGE_MAX_HSTATE		(MMU_PAGE_COUNT-1)
 #endif
 
-/* We do define AT_SYSINFO_EHDR but don't use the gate mechanism */
-#define __HAVE_ARCH_GATE_AREA		1
-
 /*
  * Subtle: (1 << PAGE_SHIFT) is an int, not an unsigned long. So if we
  * assign PAGE_MASK to a larger type it gets extended the way we want
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index ce74c335a6a4..f174351842cf 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -840,19 +840,3 @@ static int __init vdso_init(void)
 	return 0;
 }
 arch_initcall(vdso_init);
-
-int in_gate_area_no_mm(unsigned long addr)
-{
-	return 0;
-}
-
-int in_gate_area(struct mm_struct *mm, unsigned long addr)
-{
-	return 0;
-}
-
-struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
-{
-	return NULL;
-}
-
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 114258eeaacd..7b2ac6e44166 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -162,6 +162,4 @@ static inline int devmem_is_allowed(unsigned long pfn)
 #include <asm-generic/memory_model.h>
 #include <asm-generic/getorder.h>
 
-#define __HAVE_ARCH_GATE_AREA 1
-
 #endif /* _S390_PAGE_H */
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index 613649096783..0bbb7e027c5a 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -316,18 +316,3 @@ static int __init vdso_init(void)
 	return 0;
 }
 early_initcall(vdso_init);
-
-int in_gate_area_no_mm(unsigned long addr)
-{
-	return 0;
-}
-
-int in_gate_area(struct mm_struct *mm, unsigned long addr)
-{
-	return 0;
-}
-
-struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
-{
-	return NULL;
-}
diff --git a/arch/sh/include/asm/page.h b/arch/sh/include/asm/page.h
index 15d970328f71..fe20d14ae051 100644
--- a/arch/sh/include/asm/page.h
+++ b/arch/sh/include/asm/page.h
@@ -186,11 +186,6 @@ typedef struct page *pgtable_t;
 #include <asm-generic/memory_model.h>
 #include <asm-generic/getorder.h>
 
-/* vDSO support */
-#ifdef CONFIG_VSYSCALL
-#define __HAVE_ARCH_GATE_AREA
-#endif
-
 /*
  * Some drivers need to perform DMA into kmalloc'ed buffers
  * and so we have to increase the kmalloc minalign for this.
diff --git a/arch/sh/kernel/vsyscall/vsyscall.c b/arch/sh/kernel/vsyscall/vsyscall.c
index 5ca579720a09..ea2aa1393b87 100644
--- a/arch/sh/kernel/vsyscall/vsyscall.c
+++ b/arch/sh/kernel/vsyscall/vsyscall.c
@@ -92,18 +92,3 @@ const char *arch_vma_name(struct vm_area_struct *vma)
 
 	return NULL;
 }
-
-struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
-{
-	return NULL;
-}
-
-int in_gate_area(struct mm_struct *mm, unsigned long address)
-{
-	return 0;
-}
-
-int in_gate_area_no_mm(unsigned long address)
-{
-	return 0;
-}
diff --git a/arch/tile/include/asm/page.h b/arch/tile/include/asm/page.h
index 672768008618..a213a8d84a95 100644
--- a/arch/tile/include/asm/page.h
+++ b/arch/tile/include/asm/page.h
@@ -38,12 +38,6 @@
 #define PAGE_MASK	(~(PAGE_SIZE - 1))
 #define HPAGE_MASK	(~(HPAGE_SIZE - 1))
 
-/*
- * We do define AT_SYSINFO_EHDR to support vDSO,
- * but don't use the gate mechanism.
- */
-#define __HAVE_ARCH_GATE_AREA		1
-
 /*
  * If the Kconfig doesn't specify, set a maximum zone order that
  * is enough so that we can create huge pages from small pages given
diff --git a/arch/tile/kernel/vdso.c b/arch/tile/kernel/vdso.c
index 1533af24106e..5bc51d7dfdcb 100644
--- a/arch/tile/kernel/vdso.c
+++ b/arch/tile/kernel/vdso.c
@@ -121,21 +121,6 @@ const char *arch_vma_name(struct vm_area_struct *vma)
 	return NULL;
 }
 
-struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
-{
-	return NULL;
-}
-
-int in_gate_area(struct mm_struct *mm, unsigned long address)
-{
-	return 0;
-}
-
-int in_gate_area_no_mm(unsigned long address)
-{
-	return 0;
-}
-
 int setup_vdso_pages(void)
 {
 	struct page **pagelist;
diff --git a/arch/um/include/asm/page.h b/arch/um/include/asm/page.h
index 5ff53d9185f7..71c5d132062a 100644
--- a/arch/um/include/asm/page.h
+++ b/arch/um/include/asm/page.h
@@ -119,4 +119,9 @@ extern unsigned long uml_physmem;
 #include <asm-generic/getorder.h>
 
 #endif	/* __ASSEMBLY__ */
+
+#ifdef CONFIG_X86_32
+#define __HAVE_ARCH_GATE_AREA 1
+#endif
+
 #endif	/* __UM_PAGE_H */
diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index 775873d3be55..802dde30c928 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -70,7 +70,6 @@ extern bool __virt_addr_valid(unsigned long kaddr);
 #include <asm-generic/memory_model.h>
 #include <asm-generic/getorder.h>
 
-#define __HAVE_ARCH_GATE_AREA 1
 #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
 
 #endif	/* __KERNEL__ */
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index 0f1ddee6a0ce..f408caf73430 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -39,4 +39,6 @@ void copy_page(void *to, void *from);
 
 #endif	/* !__ASSEMBLY__ */
 
+#define __HAVE_ARCH_GATE_AREA 1
+
 #endif /* _ASM_X86_PAGE_64_H */
diff --git a/arch/x86/um/asm/elf.h b/arch/x86/um/asm/elf.h
index 0feee2fd5077..25a1022dd793 100644
--- a/arch/x86/um/asm/elf.h
+++ b/arch/x86/um/asm/elf.h
@@ -216,6 +216,5 @@ extern long elf_aux_hwcap;
 #define ELF_HWCAP (elf_aux_hwcap)
 
 #define SET_PERSONALITY(ex) do ; while(0)
-#define __HAVE_ARCH_GATE_AREA 1
 
 #endif
diff --git a/arch/x86/um/mem_64.c b/arch/x86/um/mem_64.c
index c6492e75797b..f8fecaddcc0d 100644
--- a/arch/x86/um/mem_64.c
+++ b/arch/x86/um/mem_64.c
@@ -9,18 +9,3 @@ const char *arch_vma_name(struct vm_area_struct *vma)
 
 	return NULL;
 }
-
-struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
-{
-	return NULL;
-}
-
-int in_gate_area(struct mm_struct *mm, unsigned long addr)
-{
-	return 0;
-}
-
-int in_gate_area_no_mm(unsigned long addr)
-{
-	return 0;
-}
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index e4f7781ee162..e904c270573b 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -115,23 +115,6 @@ static __init int ia32_binfmt_init(void)
 	return 0;
 }
 __initcall(ia32_binfmt_init);
-#endif
-
-#else  /* CONFIG_X86_32 */
-
-struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
-{
-	return NULL;
-}
-
-int in_gate_area(struct mm_struct *mm, unsigned long addr)
-{
-	return 0;
-}
-
-int in_gate_area_no_mm(unsigned long addr)
-{
-	return 0;
-}
+#endif /* CONFIG_SYSCTL */
 
 #endif	/* CONFIG_X86_64 */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index e03dd29145a0..8981cc882ed2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2014,13 +2014,20 @@ static inline bool kernel_page_present(struct page *page) { return true; }
 #endif /* CONFIG_HIBERNATION */
 #endif
 
+#ifdef __HAVE_ARCH_GATE_AREA
 extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm);
-#ifdef	__HAVE_ARCH_GATE_AREA
-int in_gate_area_no_mm(unsigned long addr);
-int in_gate_area(struct mm_struct *mm, unsigned long addr);
+extern int in_gate_area_no_mm(unsigned long addr);
+extern int in_gate_area(struct mm_struct *mm, unsigned long addr);
 #else
-int in_gate_area_no_mm(unsigned long addr);
-#define in_gate_area(mm, addr) ({(void)mm; in_gate_area_no_mm(addr);})
+static inline struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
+{
+	return NULL;
+}
+static inline int in_gate_area_no_mm(unsigned long addr) { return 0; }
+static inline int in_gate_area(struct mm_struct *mm, unsigned long addr)
+{
+	return 0;
+}
 #endif	/* __HAVE_ARCH_GATE_AREA */
 
 #ifdef CONFIG_SYSCTL
diff --git a/mm/memory.c b/mm/memory.c
index 2a899e4e82ba..ab3537bcfed2 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3430,44 +3430,6 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
 }
 #endif /* __PAGETABLE_PMD_FOLDED */
 
-#if !defined(__HAVE_ARCH_GATE_AREA)
-
-#if defined(AT_SYSINFO_EHDR)
-static struct vm_area_struct gate_vma;
-
-static int __init gate_vma_init(void)
-{
-	gate_vma.vm_mm = NULL;
-	gate_vma.vm_start = FIXADDR_USER_START;
-	gate_vma.vm_end = FIXADDR_USER_END;
-	gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
-	gate_vma.vm_page_prot = __P101;
-
-	return 0;
-}
-__initcall(gate_vma_init);
-#endif
-
-struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
-{
-#ifdef AT_SYSINFO_EHDR
-	return &gate_vma;
-#else
-	return NULL;
-#endif
-}
-
-int in_gate_area_no_mm(unsigned long addr)
-{
-#ifdef AT_SYSINFO_EHDR
-	if ((addr >= FIXADDR_USER_START) && (addr < FIXADDR_USER_END))
-		return 1;
-#endif
-	return 0;
-}
-
-#endif	/* __HAVE_ARCH_GATE_AREA */
-
 static int __follow_pte(struct mm_struct *mm, unsigned long address,
 		pte_t **ptepp, spinlock_t **ptlp)
 {
diff --git a/mm/nommu.c b/mm/nommu.c
index 4a852f6c5709..a881d9673c6b 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1981,11 +1981,6 @@ error:
 	return -ENOMEM;
 }
 
-int in_gate_area_no_mm(unsigned long addr)
-{
-	return 0;
-}
-
 int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	BUG();
-- 
cgit v1.2.3-59-g8ed1b


From 4bb5f5d9395bc112d93a134d8f5b05611eddc9c0 Mon Sep 17 00:00:00 2001
From: David Herrmann <dh.herrmann@gmail.com>
Date: Fri, 8 Aug 2014 14:25:25 -0700
Subject: mm: allow drivers to prevent new writable mappings

This patch (of 6):

The i_mmap_writable field counts existing writable mappings of an
address_space.  To allow drivers to prevent new writable mappings, make
this counter signed and prevent new writable mappings if it is negative.
This is modelled after i_writecount and DENYWRITE.

This will be required by the shmem-sealing infrastructure to prevent any
new writable mappings after the WRITE seal has been set.  In case there
exists a writable mapping, this operation will fail with EBUSY.

Note that we rely on the fact that iff you already own a writable mapping,
you can increase the counter without using the helpers.  This is the same
that we do for i_writecount.

Signed-off-by: David Herrmann <dh.herrmann@gmail.com>
Acked-by: Hugh Dickins <hughd@google.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Ryan Lortie <desrt@desrt.ca>
Cc: Lennart Poettering <lennart@poettering.net>
Cc: Daniel Mack <zonque@gmail.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/inode.c         |  1 +
 include/linux/fs.h | 29 +++++++++++++++++++++++++++--
 kernel/fork.c      |  2 +-
 mm/mmap.c          | 30 ++++++++++++++++++++++++------
 mm/swap_state.c    |  1 +
 5 files changed, 54 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/inode.c b/fs/inode.c
index 5938f3928944..26753ba7b6d6 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -165,6 +165,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
 	mapping->a_ops = &empty_aops;
 	mapping->host = inode;
 	mapping->flags = 0;
+	atomic_set(&mapping->i_mmap_writable, 0);
 	mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
 	mapping->private_data = NULL;
 	mapping->backing_dev_info = &default_backing_dev_info;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1ab6c6913040..f0890e4a7c25 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -387,7 +387,7 @@ struct address_space {
 	struct inode		*host;		/* owner: inode, block_device */
 	struct radix_tree_root	page_tree;	/* radix tree of all pages */
 	spinlock_t		tree_lock;	/* and lock protecting it */
-	unsigned int		i_mmap_writable;/* count VM_SHARED mappings */
+	atomic_t		i_mmap_writable;/* count VM_SHARED mappings */
 	struct rb_root		i_mmap;		/* tree of private and shared mappings */
 	struct list_head	i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
 	struct mutex		i_mmap_mutex;	/* protect tree, count, list */
@@ -470,10 +470,35 @@ static inline int mapping_mapped(struct address_space *mapping)
  * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap_pgoff
  * marks vma as VM_SHARED if it is shared, and the file was opened for
  * writing i.e. vma may be mprotected writable even if now readonly.
+ *
+ * If i_mmap_writable is negative, no new writable mappings are allowed. You
+ * can only deny writable mappings, if none exists right now.
  */
 static inline int mapping_writably_mapped(struct address_space *mapping)
 {
-	return mapping->i_mmap_writable != 0;
+	return atomic_read(&mapping->i_mmap_writable) > 0;
+}
+
+static inline int mapping_map_writable(struct address_space *mapping)
+{
+	return atomic_inc_unless_negative(&mapping->i_mmap_writable) ?
+		0 : -EPERM;
+}
+
+static inline void mapping_unmap_writable(struct address_space *mapping)
+{
+	atomic_dec(&mapping->i_mmap_writable);
+}
+
+static inline int mapping_deny_writable(struct address_space *mapping)
+{
+	return atomic_dec_unless_positive(&mapping->i_mmap_writable) ?
+		0 : -EBUSY;
+}
+
+static inline void mapping_allow_writable(struct address_space *mapping)
+{
+	atomic_inc(&mapping->i_mmap_writable);
 }
 
 /*
diff --git a/kernel/fork.c b/kernel/fork.c
index fa9124322cd4..1380d8ace334 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -429,7 +429,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 				atomic_dec(&inode->i_writecount);
 			mutex_lock(&mapping->i_mmap_mutex);
 			if (tmp->vm_flags & VM_SHARED)
-				mapping->i_mmap_writable++;
+				atomic_inc(&mapping->i_mmap_writable);
 			flush_dcache_mmap_lock(mapping);
 			/* insert tmp into the share list, just after mpnt */
 			if (unlikely(tmp->vm_flags & VM_NONLINEAR))
diff --git a/mm/mmap.c b/mm/mmap.c
index 64c9d736155c..c1f2ea4a0b99 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -221,7 +221,7 @@ static void __remove_shared_vm_struct(struct vm_area_struct *vma,
 	if (vma->vm_flags & VM_DENYWRITE)
 		atomic_inc(&file_inode(file)->i_writecount);
 	if (vma->vm_flags & VM_SHARED)
-		mapping->i_mmap_writable--;
+		mapping_unmap_writable(mapping);
 
 	flush_dcache_mmap_lock(mapping);
 	if (unlikely(vma->vm_flags & VM_NONLINEAR))
@@ -622,7 +622,7 @@ static void __vma_link_file(struct vm_area_struct *vma)
 		if (vma->vm_flags & VM_DENYWRITE)
 			atomic_dec(&file_inode(file)->i_writecount);
 		if (vma->vm_flags & VM_SHARED)
-			mapping->i_mmap_writable++;
+			atomic_inc(&mapping->i_mmap_writable);
 
 		flush_dcache_mmap_lock(mapping);
 		if (unlikely(vma->vm_flags & VM_NONLINEAR))
@@ -1577,6 +1577,17 @@ munmap_back:
 			if (error)
 				goto free_vma;
 		}
+		if (vm_flags & VM_SHARED) {
+			error = mapping_map_writable(file->f_mapping);
+			if (error)
+				goto allow_write_and_free_vma;
+		}
+
+		/* ->mmap() can change vma->vm_file, but must guarantee that
+		 * vma_link() below can deny write-access if VM_DENYWRITE is set
+		 * and map writably if VM_SHARED is set. This usually means the
+		 * new file must not have been exposed to user-space, yet.
+		 */
 		vma->vm_file = get_file(file);
 		error = file->f_op->mmap(file, vma);
 		if (error)
@@ -1616,8 +1627,12 @@ munmap_back:
 
 	vma_link(mm, vma, prev, rb_link, rb_parent);
 	/* Once vma denies write, undo our temporary denial count */
-	if (vm_flags & VM_DENYWRITE)
-		allow_write_access(file);
+	if (file) {
+		if (vm_flags & VM_SHARED)
+			mapping_unmap_writable(file->f_mapping);
+		if (vm_flags & VM_DENYWRITE)
+			allow_write_access(file);
+	}
 	file = vma->vm_file;
 out:
 	perf_event_mmap(vma);
@@ -1646,14 +1661,17 @@ out:
 	return addr;
 
 unmap_and_free_vma:
-	if (vm_flags & VM_DENYWRITE)
-		allow_write_access(file);
 	vma->vm_file = NULL;
 	fput(file);
 
 	/* Undo any partial mapping done by a device driver. */
 	unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
 	charged = 0;
+	if (vm_flags & VM_SHARED)
+		mapping_unmap_writable(file->f_mapping);
+allow_write_and_free_vma:
+	if (vm_flags & VM_DENYWRITE)
+		allow_write_access(file);
 free_vma:
 	kmem_cache_free(vm_area_cachep, vma);
 unacct_error:
diff --git a/mm/swap_state.c b/mm/swap_state.c
index e160151da6b8..3e0ec83d000c 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -39,6 +39,7 @@ static struct backing_dev_info swap_backing_dev_info = {
 struct address_space swapper_spaces[MAX_SWAPFILES] = {
 	[0 ... MAX_SWAPFILES - 1] = {
 		.page_tree	= RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN),
+		.i_mmap_writable = ATOMIC_INIT(0),
 		.a_ops		= &swap_aops,
 		.backing_dev_info = &swap_backing_dev_info,
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 40e041a2c858b3caefc757e26cb85bfceae5062b Mon Sep 17 00:00:00 2001
From: David Herrmann <dh.herrmann@gmail.com>
Date: Fri, 8 Aug 2014 14:25:27 -0700
Subject: shm: add sealing API

If two processes share a common memory region, they usually want some
guarantees to allow safe access. This often includes:
  - one side cannot overwrite data while the other reads it
  - one side cannot shrink the buffer while the other accesses it
  - one side cannot grow the buffer beyond previously set boundaries

If there is a trust-relationship between both parties, there is no need
for policy enforcement.  However, if there's no trust relationship (eg.,
for general-purpose IPC) sharing memory-regions is highly fragile and
often not possible without local copies.  Look at the following two
use-cases:

  1) A graphics client wants to share its rendering-buffer with a
     graphics-server. The memory-region is allocated by the client for
     read/write access and a second FD is passed to the server. While
     scanning out from the memory region, the server has no guarantee that
     the client doesn't shrink the buffer at any time, requiring rather
     cumbersome SIGBUS handling.
  2) A process wants to perform an RPC on another process. To avoid huge
     bandwidth consumption, zero-copy is preferred. After a message is
     assembled in-memory and a FD is passed to the remote side, both sides
     want to be sure that neither modifies this shared copy, anymore. The
     source may have put sensible data into the message without a separate
     copy and the target may want to parse the message inline, to avoid a
     local copy.

While SIGBUS handling, POSIX mandatory locking and MAP_DENYWRITE provide
ways to achieve most of this, the first one is unproportionally ugly to
use in libraries and the latter two are broken/racy or even disabled due
to denial of service attacks.

This patch introduces the concept of SEALING.  If you seal a file, a
specific set of operations is blocked on that file forever.  Unlike locks,
seals can only be set, never removed.  Hence, once you verified a specific
set of seals is set, you're guaranteed that no-one can perform the blocked
operations on this file, anymore.

An initial set of SEALS is introduced by this patch:
  - SHRINK: If SEAL_SHRINK is set, the file in question cannot be reduced
            in size. This affects ftruncate() and open(O_TRUNC).
  - GROW: If SEAL_GROW is set, the file in question cannot be increased
          in size. This affects ftruncate(), fallocate() and write().
  - WRITE: If SEAL_WRITE is set, no write operations (besides resizing)
           are possible. This affects fallocate(PUNCH_HOLE), mmap() and
           write().
  - SEAL: If SEAL_SEAL is set, no further seals can be added to a file.
          This basically prevents the F_ADD_SEAL operation on a file and
          can be set to prevent others from adding further seals that you
          don't want.

The described use-cases can easily use these seals to provide safe use
without any trust-relationship:

  1) The graphics server can verify that a passed file-descriptor has
     SEAL_SHRINK set. This allows safe scanout, while the client is
     allowed to increase buffer size for window-resizing on-the-fly.
     Concurrent writes are explicitly allowed.
  2) For general-purpose IPC, both processes can verify that SEAL_SHRINK,
     SEAL_GROW and SEAL_WRITE are set. This guarantees that neither
     process can modify the data while the other side parses it.
     Furthermore, it guarantees that even with writable FDs passed to the
     peer, it cannot increase the size to hit memory-limits of the source
     process (in case the file-storage is accounted to the source).

The new API is an extension to fcntl(), adding two new commands:
  F_GET_SEALS: Return a bitset describing the seals on the file. This
               can be called on any FD if the underlying file supports
               sealing.
  F_ADD_SEALS: Change the seals of a given file. This requires WRITE
               access to the file and F_SEAL_SEAL may not already be set.
               Furthermore, the underlying file must support sealing and
               there may not be any existing shared mapping of that file.
               Otherwise, EBADF/EPERM is returned.
               The given seals are _added_ to the existing set of seals
               on the file. You cannot remove seals again.

The fcntl() handler is currently specific to shmem and disabled on all
files. A file needs to explicitly support sealing for this interface to
work. A separate syscall is added in a follow-up, which creates files that
support sealing. There is no intention to support this on other
file-systems. Semantics are unclear for non-volatile files and we lack any
use-case right now. Therefore, the implementation is specific to shmem.

Signed-off-by: David Herrmann <dh.herrmann@gmail.com>
Acked-by: Hugh Dickins <hughd@google.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Ryan Lortie <desrt@desrt.ca>
Cc: Lennart Poettering <lennart@poettering.net>
Cc: Daniel Mack <zonque@gmail.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fcntl.c                 |   5 ++
 include/linux/shmem_fs.h   |  17 ++++++
 include/uapi/linux/fcntl.h |  15 +++++
 mm/shmem.c                 | 143 +++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 180 insertions(+)

(limited to 'include/linux')

diff --git a/fs/fcntl.c b/fs/fcntl.c
index 72c82f69b01b..22d1c3df61ac 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -21,6 +21,7 @@
 #include <linux/rcupdate.h>
 #include <linux/pid_namespace.h>
 #include <linux/user_namespace.h>
+#include <linux/shmem_fs.h>
 
 #include <asm/poll.h>
 #include <asm/siginfo.h>
@@ -336,6 +337,10 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 	case F_GETPIPE_SZ:
 		err = pipe_fcntl(filp, cmd, arg);
 		break;
+	case F_ADD_SEALS:
+	case F_GET_SEALS:
+		err = shmem_fcntl(filp, cmd, arg);
+		break;
 	default:
 		break;
 	}
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 4d1771c2d29f..50777b5b1e4c 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -1,6 +1,7 @@
 #ifndef __SHMEM_FS_H
 #define __SHMEM_FS_H
 
+#include <linux/file.h>
 #include <linux/swap.h>
 #include <linux/mempolicy.h>
 #include <linux/pagemap.h>
@@ -11,6 +12,7 @@
 
 struct shmem_inode_info {
 	spinlock_t		lock;
+	unsigned int		seals;		/* shmem seals */
 	unsigned long		flags;
 	unsigned long		alloced;	/* data pages alloced to file */
 	union {
@@ -65,4 +67,19 @@ static inline struct page *shmem_read_mapping_page(
 					mapping_gfp_mask(mapping));
 }
 
+#ifdef CONFIG_TMPFS
+
+extern int shmem_add_seals(struct file *file, unsigned int seals);
+extern int shmem_get_seals(struct file *file);
+extern long shmem_fcntl(struct file *file, unsigned int cmd, unsigned long arg);
+
+#else
+
+static inline long shmem_fcntl(struct file *f, unsigned int c, unsigned long a)
+{
+	return -EINVAL;
+}
+
+#endif
+
 #endif
diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
index 074b886c6be0..beed138bd359 100644
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h
@@ -27,6 +27,21 @@
 #define F_SETPIPE_SZ	(F_LINUX_SPECIFIC_BASE + 7)
 #define F_GETPIPE_SZ	(F_LINUX_SPECIFIC_BASE + 8)
 
+/*
+ * Set/Get seals
+ */
+#define F_ADD_SEALS	(F_LINUX_SPECIFIC_BASE + 9)
+#define F_GET_SEALS	(F_LINUX_SPECIFIC_BASE + 10)
+
+/*
+ * Types of seals
+ */
+#define F_SEAL_SEAL	0x0001	/* prevent further seals from being set */
+#define F_SEAL_SHRINK	0x0002	/* prevent file from shrinking */
+#define F_SEAL_GROW	0x0004	/* prevent file from growing */
+#define F_SEAL_WRITE	0x0008	/* prevent writes */
+/* (1U << 31) is reserved for signed error codes */
+
 /*
  * Types of directory notifications that may be requested.
  */
diff --git a/mm/shmem.c b/mm/shmem.c
index 6dc80d298f9d..8b43bb7a4efe 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -66,6 +66,7 @@ static struct vfsmount *shm_mnt;
 #include <linux/highmem.h>
 #include <linux/seq_file.h>
 #include <linux/magic.h>
+#include <linux/fcntl.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -547,6 +548,7 @@ EXPORT_SYMBOL_GPL(shmem_truncate_range);
 static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
 {
 	struct inode *inode = dentry->d_inode;
+	struct shmem_inode_info *info = SHMEM_I(inode);
 	int error;
 
 	error = inode_change_ok(inode, attr);
@@ -557,6 +559,11 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
 		loff_t oldsize = inode->i_size;
 		loff_t newsize = attr->ia_size;
 
+		/* protected by i_mutex */
+		if ((newsize < oldsize && (info->seals & F_SEAL_SHRINK)) ||
+		    (newsize > oldsize && (info->seals & F_SEAL_GROW)))
+			return -EPERM;
+
 		if (newsize != oldsize) {
 			error = shmem_reacct_size(SHMEM_I(inode)->flags,
 					oldsize, newsize);
@@ -1412,6 +1419,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
 		info = SHMEM_I(inode);
 		memset(info, 0, (char *)inode - (char *)info);
 		spin_lock_init(&info->lock);
+		info->seals = F_SEAL_SEAL;
 		info->flags = flags & VM_NORESERVE;
 		INIT_LIST_HEAD(&info->swaplist);
 		simple_xattrs_init(&info->xattrs);
@@ -1470,7 +1478,17 @@ shmem_write_begin(struct file *file, struct address_space *mapping,
 			struct page **pagep, void **fsdata)
 {
 	struct inode *inode = mapping->host;
+	struct shmem_inode_info *info = SHMEM_I(inode);
 	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+
+	/* i_mutex is held by caller */
+	if (unlikely(info->seals)) {
+		if (info->seals & F_SEAL_WRITE)
+			return -EPERM;
+		if ((info->seals & F_SEAL_GROW) && pos + len > inode->i_size)
+			return -EPERM;
+	}
+
 	return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL);
 }
 
@@ -1808,11 +1826,125 @@ static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
 	return offset;
 }
 
+static int shmem_wait_for_pins(struct address_space *mapping)
+{
+	return 0;
+}
+
+#define F_ALL_SEALS (F_SEAL_SEAL | \
+		     F_SEAL_SHRINK | \
+		     F_SEAL_GROW | \
+		     F_SEAL_WRITE)
+
+int shmem_add_seals(struct file *file, unsigned int seals)
+{
+	struct inode *inode = file_inode(file);
+	struct shmem_inode_info *info = SHMEM_I(inode);
+	int error;
+
+	/*
+	 * SEALING
+	 * Sealing allows multiple parties to share a shmem-file but restrict
+	 * access to a specific subset of file operations. Seals can only be
+	 * added, but never removed. This way, mutually untrusted parties can
+	 * share common memory regions with a well-defined policy. A malicious
+	 * peer can thus never perform unwanted operations on a shared object.
+	 *
+	 * Seals are only supported on special shmem-files and always affect
+	 * the whole underlying inode. Once a seal is set, it may prevent some
+	 * kinds of access to the file. Currently, the following seals are
+	 * defined:
+	 *   SEAL_SEAL: Prevent further seals from being set on this file
+	 *   SEAL_SHRINK: Prevent the file from shrinking
+	 *   SEAL_GROW: Prevent the file from growing
+	 *   SEAL_WRITE: Prevent write access to the file
+	 *
+	 * As we don't require any trust relationship between two parties, we
+	 * must prevent seals from being removed. Therefore, sealing a file
+	 * only adds a given set of seals to the file, it never touches
+	 * existing seals. Furthermore, the "setting seals"-operation can be
+	 * sealed itself, which basically prevents any further seal from being
+	 * added.
+	 *
+	 * Semantics of sealing are only defined on volatile files. Only
+	 * anonymous shmem files support sealing. More importantly, seals are
+	 * never written to disk. Therefore, there's no plan to support it on
+	 * other file types.
+	 */
+
+	if (file->f_op != &shmem_file_operations)
+		return -EINVAL;
+	if (!(file->f_mode & FMODE_WRITE))
+		return -EPERM;
+	if (seals & ~(unsigned int)F_ALL_SEALS)
+		return -EINVAL;
+
+	mutex_lock(&inode->i_mutex);
+
+	if (info->seals & F_SEAL_SEAL) {
+		error = -EPERM;
+		goto unlock;
+	}
+
+	if ((seals & F_SEAL_WRITE) && !(info->seals & F_SEAL_WRITE)) {
+		error = mapping_deny_writable(file->f_mapping);
+		if (error)
+			goto unlock;
+
+		error = shmem_wait_for_pins(file->f_mapping);
+		if (error) {
+			mapping_allow_writable(file->f_mapping);
+			goto unlock;
+		}
+	}
+
+	info->seals |= seals;
+	error = 0;
+
+unlock:
+	mutex_unlock(&inode->i_mutex);
+	return error;
+}
+EXPORT_SYMBOL_GPL(shmem_add_seals);
+
+int shmem_get_seals(struct file *file)
+{
+	if (file->f_op != &shmem_file_operations)
+		return -EINVAL;
+
+	return SHMEM_I(file_inode(file))->seals;
+}
+EXPORT_SYMBOL_GPL(shmem_get_seals);
+
+long shmem_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	long error;
+
+	switch (cmd) {
+	case F_ADD_SEALS:
+		/* disallow upper 32bit */
+		if (arg > UINT_MAX)
+			return -EINVAL;
+
+		error = shmem_add_seals(file, arg);
+		break;
+	case F_GET_SEALS:
+		error = shmem_get_seals(file);
+		break;
+	default:
+		error = -EINVAL;
+		break;
+	}
+
+	return error;
+}
+
 static long shmem_fallocate(struct file *file, int mode, loff_t offset,
 							 loff_t len)
 {
 	struct inode *inode = file_inode(file);
 	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+	struct shmem_inode_info *info = SHMEM_I(inode);
 	struct shmem_falloc shmem_falloc;
 	pgoff_t start, index, end;
 	int error;
@@ -1828,6 +1960,12 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
 		loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1;
 		DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq);
 
+		/* protected by i_mutex */
+		if (info->seals & F_SEAL_WRITE) {
+			error = -EPERM;
+			goto out;
+		}
+
 		shmem_falloc.waitq = &shmem_falloc_waitq;
 		shmem_falloc.start = unmap_start >> PAGE_SHIFT;
 		shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT;
@@ -1854,6 +1992,11 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
 	if (error)
 		goto out;
 
+	if ((info->seals & F_SEAL_GROW) && offset + len > inode->i_size) {
+		error = -EPERM;
+		goto out;
+	}
+
 	start = offset >> PAGE_CACHE_SHIFT;
 	end = (offset + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 	/* Try to avoid a swapstorm if len is impossible to satisfy */
-- 
cgit v1.2.3-59-g8ed1b


From 9183df25fe7b194563db3fec6dc3202a5855839c Mon Sep 17 00:00:00 2001
From: David Herrmann <dh.herrmann@gmail.com>
Date: Fri, 8 Aug 2014 14:25:29 -0700
Subject: shm: add memfd_create() syscall

memfd_create() is similar to mmap(MAP_ANON), but returns a file-descriptor
that you can pass to mmap().  It can support sealing and avoids any
connection to user-visible mount-points.  Thus, it's not subject to quotas
on mounted file-systems, but can be used like malloc()'ed memory, but with
a file-descriptor to it.

memfd_create() returns the raw shmem file, so calls like ftruncate() can
be used to modify the underlying inode.  Also calls like fstat() will
return proper information and mark the file as regular file.  If you want
sealing, you can specify MFD_ALLOW_SEALING.  Otherwise, sealing is not
supported (like on all other regular files).

Compared to O_TMPFILE, it does not require a tmpfs mount-point and is not
subject to a filesystem size limit.  It is still properly accounted to
memcg limits, though, and to the same overcommit or no-overcommit
accounting as all user memory.

Signed-off-by: David Herrmann <dh.herrmann@gmail.com>
Acked-by: Hugh Dickins <hughd@google.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Ryan Lortie <desrt@desrt.ca>
Cc: Lennart Poettering <lennart@poettering.net>
Cc: Daniel Mack <zonque@gmail.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/syscalls/syscall_32.tbl |  1 +
 arch/x86/syscalls/syscall_64.tbl |  1 +
 include/linux/syscalls.h         |  1 +
 include/uapi/linux/memfd.h       |  8 +++++
 kernel/sys_ni.c                  |  1 +
 mm/shmem.c                       | 73 ++++++++++++++++++++++++++++++++++++++++
 6 files changed, 85 insertions(+)
 create mode 100644 include/uapi/linux/memfd.h

(limited to 'include/linux')

diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index d1b4a119d4a5..028b78168d85 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -362,3 +362,4 @@
 353	i386	renameat2		sys_renameat2
 354	i386	seccomp			sys_seccomp
 355	i386	getrandom		sys_getrandom
+356	i386	memfd_create		sys_memfd_create
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index 252c804bb1aa..ca2b9aa78c81 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -325,6 +325,7 @@
 316	common	renameat2		sys_renameat2
 317	common	seccomp			sys_seccomp
 318	common	getrandom		sys_getrandom
+319	common	memfd_create		sys_memfd_create
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 701daff5d899..15a069425cbf 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -802,6 +802,7 @@ asmlinkage long sys_timerfd_settime(int ufd, int flags,
 asmlinkage long sys_timerfd_gettime(int ufd, struct itimerspec __user *otmr);
 asmlinkage long sys_eventfd(unsigned int count);
 asmlinkage long sys_eventfd2(unsigned int count, int flags);
+asmlinkage long sys_memfd_create(const char __user *uname_ptr, unsigned int flags);
 asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len);
 asmlinkage long sys_old_readdir(unsigned int, struct old_linux_dirent __user *, unsigned int);
 asmlinkage long sys_pselect6(int, fd_set __user *, fd_set __user *,
diff --git a/include/uapi/linux/memfd.h b/include/uapi/linux/memfd.h
new file mode 100644
index 000000000000..534e364bda92
--- /dev/null
+++ b/include/uapi/linux/memfd.h
@@ -0,0 +1,8 @@
+#ifndef _UAPI_LINUX_MEMFD_H
+#define _UAPI_LINUX_MEMFD_H
+
+/* flags for memfd_create(2) (unsigned int) */
+#define MFD_CLOEXEC		0x0001U
+#define MFD_ALLOW_SEALING	0x0002U
+
+#endif /* _UAPI_LINUX_MEMFD_H */
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 2904a2105914..1f79e3714533 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -197,6 +197,7 @@ cond_syscall(compat_sys_timerfd_settime);
 cond_syscall(compat_sys_timerfd_gettime);
 cond_syscall(sys_eventfd);
 cond_syscall(sys_eventfd2);
+cond_syscall(sys_memfd_create);
 
 /* performance counters: */
 cond_syscall(sys_perf_event_open);
diff --git a/mm/shmem.c b/mm/shmem.c
index 8b43bb7a4efe..4a5498795a2b 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -66,7 +66,9 @@ static struct vfsmount *shm_mnt;
 #include <linux/highmem.h>
 #include <linux/seq_file.h>
 #include <linux/magic.h>
+#include <linux/syscalls.h>
 #include <linux/fcntl.h>
+#include <uapi/linux/memfd.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -2732,6 +2734,77 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root)
 	shmem_show_mpol(seq, sbinfo->mpol);
 	return 0;
 }
+
+#define MFD_NAME_PREFIX "memfd:"
+#define MFD_NAME_PREFIX_LEN (sizeof(MFD_NAME_PREFIX) - 1)
+#define MFD_NAME_MAX_LEN (NAME_MAX - MFD_NAME_PREFIX_LEN)
+
+#define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING)
+
+SYSCALL_DEFINE2(memfd_create,
+		const char __user *, uname,
+		unsigned int, flags)
+{
+	struct shmem_inode_info *info;
+	struct file *file;
+	int fd, error;
+	char *name;
+	long len;
+
+	if (flags & ~(unsigned int)MFD_ALL_FLAGS)
+		return -EINVAL;
+
+	/* length includes terminating zero */
+	len = strnlen_user(uname, MFD_NAME_MAX_LEN + 1);
+	if (len <= 0)
+		return -EFAULT;
+	if (len > MFD_NAME_MAX_LEN + 1)
+		return -EINVAL;
+
+	name = kmalloc(len + MFD_NAME_PREFIX_LEN, GFP_TEMPORARY);
+	if (!name)
+		return -ENOMEM;
+
+	strcpy(name, MFD_NAME_PREFIX);
+	if (copy_from_user(&name[MFD_NAME_PREFIX_LEN], uname, len)) {
+		error = -EFAULT;
+		goto err_name;
+	}
+
+	/* terminating-zero may have changed after strnlen_user() returned */
+	if (name[len + MFD_NAME_PREFIX_LEN - 1]) {
+		error = -EFAULT;
+		goto err_name;
+	}
+
+	fd = get_unused_fd_flags((flags & MFD_CLOEXEC) ? O_CLOEXEC : 0);
+	if (fd < 0) {
+		error = fd;
+		goto err_name;
+	}
+
+	file = shmem_file_setup(name, 0, VM_NORESERVE);
+	if (IS_ERR(file)) {
+		error = PTR_ERR(file);
+		goto err_fd;
+	}
+	info = SHMEM_I(file_inode(file));
+	file->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
+	file->f_flags |= O_RDWR | O_LARGEFILE;
+	if (flags & MFD_ALLOW_SEALING)
+		info->seals &= ~F_SEAL_SEAL;
+
+	fd_install(fd, file);
+	kfree(name);
+	return fd;
+
+err_fd:
+	put_unused_fd(fd);
+err_name:
+	kfree(name);
+	return error;
+}
+
 #endif /* CONFIG_TMPFS */
 
 static void shmem_put_super(struct super_block *sb)
-- 
cgit v1.2.3-59-g8ed1b


From 7d3e2bca22feb1f4a624009ff6c15e6f724cb4e7 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Fri, 8 Aug 2014 14:25:43 -0700
Subject: kexec: rename unusebale_pages to unusable_pages

Let's use the more common "unusable".

This patch was originally written and posted by Boris. I am including it
in this patch series.

Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Matthew Garrett <mjg59@srcf.ucam.org>
Cc: Greg Kroah-Hartman <greg@kroah.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: WANG Chao <chaowang@redhat.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kexec.h | 2 +-
 kernel/kexec.c        | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index a75641930049..d9bb0a57d208 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -100,7 +100,7 @@ struct kimage {
 
 	struct list_head control_pages;
 	struct list_head dest_pages;
-	struct list_head unuseable_pages;
+	struct list_head unusable_pages;
 
 	/* Address of next control page to allocate for crash kernels. */
 	unsigned long control_page;
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 4b8f0c925884..c7cc2a00181c 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -154,7 +154,7 @@ static int do_kimage_alloc(struct kimage **rimage, unsigned long entry,
 	INIT_LIST_HEAD(&image->dest_pages);
 
 	/* Initialize the list of unusable pages */
-	INIT_LIST_HEAD(&image->unuseable_pages);
+	INIT_LIST_HEAD(&image->unusable_pages);
 
 	/* Read in the segments */
 	image->nr_segments = nr_segments;
@@ -609,7 +609,7 @@ static void kimage_free_extra_pages(struct kimage *image)
 	kimage_free_page_list(&image->dest_pages);
 
 	/* Walk through and free any unusable pages I have cached */
-	kimage_free_page_list(&image->unuseable_pages);
+	kimage_free_page_list(&image->unusable_pages);
 
 }
 static void kimage_terminate(struct kimage *image)
@@ -732,7 +732,7 @@ static struct page *kimage_alloc_page(struct kimage *image,
 		/* If the page cannot be used file it away */
 		if (page_to_pfn(page) >
 				(KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
-			list_add(&page->lru, &image->unuseable_pages);
+			list_add(&page->lru, &image->unusable_pages);
 			continue;
 		}
 		addr = page_to_pfn(page) << PAGE_SHIFT;
-- 
cgit v1.2.3-59-g8ed1b


From 8c86e70acead629aacb4afcd818add66bf6844d9 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Fri, 8 Aug 2014 14:25:50 -0700
Subject: resource: provide new functions to walk through resources

I have added two more functions to walk through resources.

Currently walk_system_ram_range() deals with pfn and /proc/iomem can
contain partial pages.  By dealing in pfn, callback function loses the
info that last page of a memory range is a partial page and not the full
page.  So I implemented walk_system_ram_res() which returns u64 values to
callback functions and now it properly return start and end address.

walk_system_ram_range() uses find_next_system_ram() to find the next ram
resource.  This in turn only travels through siblings of top level child
and does not travers through all the nodes of the resoruce tree.  I also
need another function where I can walk through all the resources, for
example figure out where "GART" aperture is.  Figure out where ACPI memory
is.

So I wrote another function walk_iomem_res() which walks through all
/proc/iomem resources and returns matches as asked by caller.  Caller can
specify "name" of resource, start and end and flags.

Got rid of find_next_system_ram_res() and instead implemented more generic
find_next_iomem_res() which can be used to traverse top level children
only based on an argument.

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Borislav Petkov <bp@suse.de>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Matthew Garrett <mjg59@srcf.ucam.org>
Cc: Greg Kroah-Hartman <greg@kroah.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: WANG Chao <chaowang@redhat.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ioport.h |   6 +++
 kernel/resource.c      | 101 ++++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 98 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 5e3a906cc089..142ec544167c 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -237,6 +237,12 @@ extern int iomem_is_exclusive(u64 addr);
 extern int
 walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
 		void *arg, int (*func)(unsigned long, unsigned long, void *));
+extern int
+walk_system_ram_res(u64 start, u64 end, void *arg,
+		    int (*func)(u64, u64, void *));
+extern int
+walk_iomem_res(char *name, unsigned long flags, u64 start, u64 end, void *arg,
+	       int (*func)(u64, u64, void *));
 
 /* True if any part of r1 overlaps r2 */
 static inline bool resource_overlaps(struct resource *r1, struct resource *r2)
diff --git a/kernel/resource.c b/kernel/resource.c
index 3c2237ac32db..da14b8d09296 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -59,10 +59,12 @@ static DEFINE_RWLOCK(resource_lock);
 static struct resource *bootmem_resource_free;
 static DEFINE_SPINLOCK(bootmem_resource_lock);
 
-static void *r_next(struct seq_file *m, void *v, loff_t *pos)
+static struct resource *next_resource(struct resource *p, bool sibling_only)
 {
-	struct resource *p = v;
-	(*pos)++;
+	/* Caller wants to traverse through siblings only */
+	if (sibling_only)
+		return p->sibling;
+
 	if (p->child)
 		return p->child;
 	while (!p->sibling && p->parent)
@@ -70,6 +72,13 @@ static void *r_next(struct seq_file *m, void *v, loff_t *pos)
 	return p->sibling;
 }
 
+static void *r_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct resource *p = v;
+	(*pos)++;
+	return (void *)next_resource(p, false);
+}
+
 #ifdef CONFIG_PROC_FS
 
 enum { MAX_IORES_LEVEL = 5 };
@@ -322,16 +331,19 @@ int release_resource(struct resource *old)
 
 EXPORT_SYMBOL(release_resource);
 
-#if !defined(CONFIG_ARCH_HAS_WALK_MEMORY)
 /*
- * Finds the lowest memory reosurce exists within [res->start.res->end)
+ * Finds the lowest iomem reosurce exists with-in [res->start.res->end)
  * the caller must specify res->start, res->end, res->flags and "name".
  * If found, returns 0, res is overwritten, if not found, returns -1.
+ * This walks through whole tree and not just first level children
+ * until and unless first_level_children_only is true.
  */
-static int find_next_system_ram(struct resource *res, char *name)
+static int find_next_iomem_res(struct resource *res, char *name,
+			       bool first_level_children_only)
 {
 	resource_size_t start, end;
 	struct resource *p;
+	bool sibling_only = false;
 
 	BUG_ON(!res);
 
@@ -340,8 +352,14 @@ static int find_next_system_ram(struct resource *res, char *name)
 	BUG_ON(start >= end);
 
 	read_lock(&resource_lock);
-	for (p = iomem_resource.child; p ; p = p->sibling) {
-		/* system ram is just marked as IORESOURCE_MEM */
+
+	if (first_level_children_only) {
+		p = iomem_resource.child;
+		sibling_only = true;
+	} else
+		p = &iomem_resource;
+
+	while ((p = next_resource(p, sibling_only))) {
 		if (p->flags != res->flags)
 			continue;
 		if (name && strcmp(p->name, name))
@@ -353,6 +371,7 @@ static int find_next_system_ram(struct resource *res, char *name)
 		if ((p->end >= start) && (p->start < end))
 			break;
 	}
+
 	read_unlock(&resource_lock);
 	if (!p)
 		return -1;
@@ -364,6 +383,70 @@ static int find_next_system_ram(struct resource *res, char *name)
 	return 0;
 }
 
+/*
+ * Walks through iomem resources and calls func() with matching resource
+ * ranges. This walks through whole tree and not just first level children.
+ * All the memory ranges which overlap start,end and also match flags and
+ * name are valid candidates.
+ *
+ * @name: name of resource
+ * @flags: resource flags
+ * @start: start addr
+ * @end: end addr
+ */
+int walk_iomem_res(char *name, unsigned long flags, u64 start, u64 end,
+		void *arg, int (*func)(u64, u64, void *))
+{
+	struct resource res;
+	u64 orig_end;
+	int ret = -1;
+
+	res.start = start;
+	res.end = end;
+	res.flags = flags;
+	orig_end = res.end;
+	while ((res.start < res.end) &&
+		(!find_next_iomem_res(&res, name, false))) {
+		ret = (*func)(res.start, res.end, arg);
+		if (ret)
+			break;
+		res.start = res.end + 1;
+		res.end = orig_end;
+	}
+	return ret;
+}
+
+/*
+ * This function calls callback against all memory range of "System RAM"
+ * which are marked as IORESOURCE_MEM and IORESOUCE_BUSY.
+ * Now, this function is only for "System RAM". This function deals with
+ * full ranges and not pfn. If resources are not pfn aligned, dealing
+ * with pfn can truncate ranges.
+ */
+int walk_system_ram_res(u64 start, u64 end, void *arg,
+				int (*func)(u64, u64, void *))
+{
+	struct resource res;
+	u64 orig_end;
+	int ret = -1;
+
+	res.start = start;
+	res.end = end;
+	res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+	orig_end = res.end;
+	while ((res.start < res.end) &&
+		(!find_next_iomem_res(&res, "System RAM", true))) {
+		ret = (*func)(res.start, res.end, arg);
+		if (ret)
+			break;
+		res.start = res.end + 1;
+		res.end = orig_end;
+	}
+	return ret;
+}
+
+#if !defined(CONFIG_ARCH_HAS_WALK_MEMORY)
+
 /*
  * This function calls callback against all memory range of "System RAM"
  * which are marked as IORESOURCE_MEM and IORESOUCE_BUSY.
@@ -382,7 +465,7 @@ int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
 	res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
 	orig_end = res.end;
 	while ((res.start < res.end) &&
-		(find_next_system_ram(&res, "System RAM") >= 0)) {
+		(find_next_iomem_res(&res, "System RAM", true) >= 0)) {
 		pfn = (res.start + PAGE_SIZE - 1) >> PAGE_SHIFT;
 		end_pfn = (res.end + 1) >> PAGE_SHIFT;
 		if (end_pfn > pfn)
-- 
cgit v1.2.3-59-g8ed1b


From 815d5704a337a662bf960757edbff7a0680d40fd Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Fri, 8 Aug 2014 14:25:52 -0700
Subject: kexec: make kexec_segment user buffer pointer a union

So far kexec_segment->buf was always a user space pointer as user space
passed the array of kexec_segment structures and kernel copied it.

But with new system call, list of kexec segments will be prepared by
kernel and kexec_segment->buf will point to a kernel memory.

So while I was adding code where I made assumption that ->buf is pointing
to kernel memory, sparse started giving warning.

Make ->buf a union.  And where a user space pointer is expected, access it
using ->buf and where a kernel space pointer is expected, access it using
->kbuf.  That takes care of sparse warnings.

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Matthew Garrett <mjg59@srcf.ucam.org>
Cc: Greg Kroah-Hartman <greg@kroah.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: WANG Chao <chaowang@redhat.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kexec.h | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index d9bb0a57d208..66d56ac0f64c 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -69,7 +69,18 @@ typedef unsigned long kimage_entry_t;
 #define IND_SOURCE       0x8
 
 struct kexec_segment {
-	void __user *buf;
+	/*
+	 * This pointer can point to user memory if kexec_load() system
+	 * call is used or will point to kernel memory if
+	 * kexec_file_load() system call is used.
+	 *
+	 * Use ->buf when expecting to deal with user memory and use ->kbuf
+	 * when expecting to deal with kernel memory.
+	 */
+	union {
+		void __user *buf;
+		void *kbuf;
+	};
 	size_t bufsz;
 	unsigned long mem;
 	size_t memsz;
-- 
cgit v1.2.3-59-g8ed1b


From f0895685c7fd8c938c91a9d8a6f7c11f22df58d2 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Fri, 8 Aug 2014 14:25:55 -0700
Subject: kexec: new syscall kexec_file_load() declaration

This is the new syscall kexec_file_load() declaration/interface.  I have
reserved the syscall number only for x86_64 so far.  Other architectures
(including i386) can reserve syscall number when they enable the support
for this new syscall.

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Matthew Garrett <mjg59@srcf.ucam.org>
Cc: Greg Kroah-Hartman <greg@kroah.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: WANG Chao <chaowang@redhat.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/syscalls/syscall_64.tbl | 1 +
 include/linux/syscalls.h         | 4 ++++
 kernel/kexec.c                   | 7 +++++++
 kernel/sys_ni.c                  | 1 +
 4 files changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index ca2b9aa78c81..35dd922727b9 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -326,6 +326,7 @@
 317	common	seccomp			sys_seccomp
 318	common	getrandom		sys_getrandom
 319	common	memfd_create		sys_memfd_create
+320	common	kexec_file_load		sys_kexec_file_load
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 15a069425cbf..0f86d85a9ce4 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -317,6 +317,10 @@ asmlinkage long sys_restart_syscall(void);
 asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments,
 				struct kexec_segment __user *segments,
 				unsigned long flags);
+asmlinkage long sys_kexec_file_load(int kernel_fd, int initrd_fd,
+				    unsigned long cmdline_len,
+				    const char __user *cmdline_ptr,
+				    unsigned long flags);
 
 asmlinkage long sys_exit(int error_code);
 asmlinkage long sys_exit_group(int error_code);
diff --git a/kernel/kexec.c b/kernel/kexec.c
index bfdda316697d..ec4386c1b94f 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1058,6 +1058,13 @@ COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry,
 }
 #endif
 
+SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
+		unsigned long, cmdline_len, const char __user *, cmdline_ptr,
+		unsigned long, flags)
+{
+	return -ENOSYS;
+}
+
 void crash_kexec(struct pt_regs *regs)
 {
 	/* Take the kexec_mutex here to prevent sys_kexec_load
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 1f79e3714533..391d4ddb6f4b 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -25,6 +25,7 @@ cond_syscall(sys_swapon);
 cond_syscall(sys_swapoff);
 cond_syscall(sys_kexec_load);
 cond_syscall(compat_sys_kexec_load);
+cond_syscall(sys_kexec_file_load);
 cond_syscall(sys_init_module);
 cond_syscall(sys_finit_module);
 cond_syscall(sys_delete_module);
-- 
cgit v1.2.3-59-g8ed1b


From cb1052581e2bddd6096544f3f944f4e7fdad4c7f Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Fri, 8 Aug 2014 14:25:57 -0700
Subject: kexec: implementation of new syscall kexec_file_load

Previous patch provided the interface definition and this patch prvides
implementation of new syscall.

Previously segment list was prepared in user space.  Now user space just
passes kernel fd, initrd fd and command line and kernel will create a
segment list internally.

This patch contains generic part of the code.  Actual segment preparation
and loading is done by arch and image specific loader.  Which comes in
next patch.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Matthew Garrett <mjg59@srcf.ucam.org>
Cc: Greg Kroah-Hartman <greg@kroah.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: WANG Chao <chaowang@redhat.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/machine_kexec_64.c |  45 ++++
 include/linux/kexec.h              |  53 ++++
 include/uapi/linux/kexec.h         |  11 +
 kernel/kexec.c                     | 483 ++++++++++++++++++++++++++++++++++++-
 4 files changed, 587 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 679cef0791cd..c8875b5545e1 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -22,6 +22,10 @@
 #include <asm/mmu_context.h>
 #include <asm/debugreg.h>
 
+static struct kexec_file_ops *kexec_file_loaders[] = {
+		NULL,
+};
+
 static void free_transition_pgtable(struct kimage *image)
 {
 	free_page((unsigned long)image->arch.pud);
@@ -283,3 +287,44 @@ void arch_crash_save_vmcoreinfo(void)
 			      (unsigned long)&_text - __START_KERNEL);
 }
 
+/* arch-dependent functionality related to kexec file-based syscall */
+
+int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
+				  unsigned long buf_len)
+{
+	int i, ret = -ENOEXEC;
+	struct kexec_file_ops *fops;
+
+	for (i = 0; i < ARRAY_SIZE(kexec_file_loaders); i++) {
+		fops = kexec_file_loaders[i];
+		if (!fops || !fops->probe)
+			continue;
+
+		ret = fops->probe(buf, buf_len);
+		if (!ret) {
+			image->fops = fops;
+			return ret;
+		}
+	}
+
+	return ret;
+}
+
+void *arch_kexec_kernel_image_load(struct kimage *image)
+{
+	if (!image->fops || !image->fops->load)
+		return ERR_PTR(-ENOEXEC);
+
+	return image->fops->load(image, image->kernel_buf,
+				 image->kernel_buf_len, image->initrd_buf,
+				 image->initrd_buf_len, image->cmdline_buf,
+				 image->cmdline_buf_len);
+}
+
+int arch_kimage_file_post_load_cleanup(struct kimage *image)
+{
+	if (!image->fops || !image->fops->cleanup)
+		return 0;
+
+	return image->fops->cleanup(image);
+}
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 66d56ac0f64c..8e80901e466f 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -121,13 +121,57 @@ struct kimage {
 #define KEXEC_TYPE_DEFAULT 0
 #define KEXEC_TYPE_CRASH   1
 	unsigned int preserve_context : 1;
+	/* If set, we are using file mode kexec syscall */
+	unsigned int file_mode:1;
 
 #ifdef ARCH_HAS_KIMAGE_ARCH
 	struct kimage_arch arch;
 #endif
+
+	/* Additional fields for file based kexec syscall */
+	void *kernel_buf;
+	unsigned long kernel_buf_len;
+
+	void *initrd_buf;
+	unsigned long initrd_buf_len;
+
+	char *cmdline_buf;
+	unsigned long cmdline_buf_len;
+
+	/* File operations provided by image loader */
+	struct kexec_file_ops *fops;
+
+	/* Image loader handling the kernel can store a pointer here */
+	void *image_loader_data;
 };
 
+/*
+ * Keeps track of buffer parameters as provided by caller for requesting
+ * memory placement of buffer.
+ */
+struct kexec_buf {
+	struct kimage *image;
+	char *buffer;
+	unsigned long bufsz;
+	unsigned long memsz;
+	unsigned long buf_align;
+	unsigned long buf_min;
+	unsigned long buf_max;
+	bool top_down;		/* allocate from top of memory hole */
+};
 
+typedef int (kexec_probe_t)(const char *kernel_buf, unsigned long kernel_size);
+typedef void *(kexec_load_t)(struct kimage *image, char *kernel_buf,
+			     unsigned long kernel_len, char *initrd,
+			     unsigned long initrd_len, char *cmdline,
+			     unsigned long cmdline_len);
+typedef int (kexec_cleanup_t)(struct kimage *image);
+
+struct kexec_file_ops {
+	kexec_probe_t *probe;
+	kexec_load_t *load;
+	kexec_cleanup_t *cleanup;
+};
 
 /* kexec interface functions */
 extern void machine_kexec(struct kimage *image);
@@ -138,6 +182,11 @@ extern asmlinkage long sys_kexec_load(unsigned long entry,
 					struct kexec_segment __user *segments,
 					unsigned long flags);
 extern int kernel_kexec(void);
+extern int kexec_add_buffer(struct kimage *image, char *buffer,
+			    unsigned long bufsz, unsigned long memsz,
+			    unsigned long buf_align, unsigned long buf_min,
+			    unsigned long buf_max, bool top_down,
+			    unsigned long *load_addr);
 extern struct page *kimage_alloc_control_pages(struct kimage *image,
 						unsigned int order);
 extern void crash_kexec(struct pt_regs *);
@@ -188,6 +237,10 @@ extern int kexec_load_disabled;
 #define KEXEC_FLAGS    (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT)
 #endif
 
+/* List of defined/legal kexec file flags */
+#define KEXEC_FILE_FLAGS	(KEXEC_FILE_UNLOAD | KEXEC_FILE_ON_CRASH | \
+				 KEXEC_FILE_NO_INITRAMFS)
+
 #define VMCOREINFO_BYTES           (4096)
 #define VMCOREINFO_NOTE_NAME       "VMCOREINFO"
 #define VMCOREINFO_NOTE_NAME_BYTES ALIGN(sizeof(VMCOREINFO_NOTE_NAME), 4)
diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h
index d6629d49a243..6925f5b42f89 100644
--- a/include/uapi/linux/kexec.h
+++ b/include/uapi/linux/kexec.h
@@ -13,6 +13,17 @@
 #define KEXEC_PRESERVE_CONTEXT	0x00000002
 #define KEXEC_ARCH_MASK		0xffff0000
 
+/*
+ * Kexec file load interface flags.
+ * KEXEC_FILE_UNLOAD : Unload already loaded kexec/kdump image.
+ * KEXEC_FILE_ON_CRASH : Load/unload operation belongs to kdump image.
+ * KEXEC_FILE_NO_INITRAMFS : No initramfs is being loaded. Ignore the initrd
+ *                           fd field.
+ */
+#define KEXEC_FILE_UNLOAD	0x00000001
+#define KEXEC_FILE_ON_CRASH	0x00000002
+#define KEXEC_FILE_NO_INITRAMFS	0x00000004
+
 /* These values match the ELF architecture values.
  * Unless there is a good reason that should continue to be the case.
  */
diff --git a/kernel/kexec.c b/kernel/kexec.c
index ec4386c1b94f..9b46219254dd 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -6,6 +6,8 @@
  * Version 2.  See the file COPYING for more details.
  */
 
+#define pr_fmt(fmt)	"kexec: " fmt
+
 #include <linux/capability.h>
 #include <linux/mm.h>
 #include <linux/file.h>
@@ -327,6 +329,221 @@ out_free_image:
 	return ret;
 }
 
+static int copy_file_from_fd(int fd, void **buf, unsigned long *buf_len)
+{
+	struct fd f = fdget(fd);
+	int ret;
+	struct kstat stat;
+	loff_t pos;
+	ssize_t bytes = 0;
+
+	if (!f.file)
+		return -EBADF;
+
+	ret = vfs_getattr(&f.file->f_path, &stat);
+	if (ret)
+		goto out;
+
+	if (stat.size > INT_MAX) {
+		ret = -EFBIG;
+		goto out;
+	}
+
+	/* Don't hand 0 to vmalloc, it whines. */
+	if (stat.size == 0) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	*buf = vmalloc(stat.size);
+	if (!*buf) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	pos = 0;
+	while (pos < stat.size) {
+		bytes = kernel_read(f.file, pos, (char *)(*buf) + pos,
+				    stat.size - pos);
+		if (bytes < 0) {
+			vfree(*buf);
+			ret = bytes;
+			goto out;
+		}
+
+		if (bytes == 0)
+			break;
+		pos += bytes;
+	}
+
+	if (pos != stat.size) {
+		ret = -EBADF;
+		vfree(*buf);
+		goto out;
+	}
+
+	*buf_len = pos;
+out:
+	fdput(f);
+	return ret;
+}
+
+/* Architectures can provide this probe function */
+int __weak arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
+					 unsigned long buf_len)
+{
+	return -ENOEXEC;
+}
+
+void * __weak arch_kexec_kernel_image_load(struct kimage *image)
+{
+	return ERR_PTR(-ENOEXEC);
+}
+
+void __weak arch_kimage_file_post_load_cleanup(struct kimage *image)
+{
+}
+
+/*
+ * Free up memory used by kernel, initrd, and comand line. This is temporary
+ * memory allocation which is not needed any more after these buffers have
+ * been loaded into separate segments and have been copied elsewhere.
+ */
+static void kimage_file_post_load_cleanup(struct kimage *image)
+{
+	vfree(image->kernel_buf);
+	image->kernel_buf = NULL;
+
+	vfree(image->initrd_buf);
+	image->initrd_buf = NULL;
+
+	kfree(image->cmdline_buf);
+	image->cmdline_buf = NULL;
+
+	/* See if architecture has anything to cleanup post load */
+	arch_kimage_file_post_load_cleanup(image);
+}
+
+/*
+ * In file mode list of segments is prepared by kernel. Copy relevant
+ * data from user space, do error checking, prepare segment list
+ */
+static int
+kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd,
+			     const char __user *cmdline_ptr,
+			     unsigned long cmdline_len, unsigned flags)
+{
+	int ret = 0;
+	void *ldata;
+
+	ret = copy_file_from_fd(kernel_fd, &image->kernel_buf,
+				&image->kernel_buf_len);
+	if (ret)
+		return ret;
+
+	/* Call arch image probe handlers */
+	ret = arch_kexec_kernel_image_probe(image, image->kernel_buf,
+					    image->kernel_buf_len);
+
+	if (ret)
+		goto out;
+
+	/* It is possible that there no initramfs is being loaded */
+	if (!(flags & KEXEC_FILE_NO_INITRAMFS)) {
+		ret = copy_file_from_fd(initrd_fd, &image->initrd_buf,
+					&image->initrd_buf_len);
+		if (ret)
+			goto out;
+	}
+
+	if (cmdline_len) {
+		image->cmdline_buf = kzalloc(cmdline_len, GFP_KERNEL);
+		if (!image->cmdline_buf) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		ret = copy_from_user(image->cmdline_buf, cmdline_ptr,
+				     cmdline_len);
+		if (ret) {
+			ret = -EFAULT;
+			goto out;
+		}
+
+		image->cmdline_buf_len = cmdline_len;
+
+		/* command line should be a string with last byte null */
+		if (image->cmdline_buf[cmdline_len - 1] != '\0') {
+			ret = -EINVAL;
+			goto out;
+		}
+	}
+
+	/* Call arch image load handlers */
+	ldata = arch_kexec_kernel_image_load(image);
+
+	if (IS_ERR(ldata)) {
+		ret = PTR_ERR(ldata);
+		goto out;
+	}
+
+	image->image_loader_data = ldata;
+out:
+	/* In case of error, free up all allocated memory in this function */
+	if (ret)
+		kimage_file_post_load_cleanup(image);
+	return ret;
+}
+
+static int
+kimage_file_alloc_init(struct kimage **rimage, int kernel_fd,
+		       int initrd_fd, const char __user *cmdline_ptr,
+		       unsigned long cmdline_len, unsigned long flags)
+{
+	int ret;
+	struct kimage *image;
+
+	image = do_kimage_alloc_init();
+	if (!image)
+		return -ENOMEM;
+
+	image->file_mode = 1;
+
+	ret = kimage_file_prepare_segments(image, kernel_fd, initrd_fd,
+					   cmdline_ptr, cmdline_len, flags);
+	if (ret)
+		goto out_free_image;
+
+	ret = sanity_check_segment_list(image);
+	if (ret)
+		goto out_free_post_load_bufs;
+
+	ret = -ENOMEM;
+	image->control_code_page = kimage_alloc_control_pages(image,
+					   get_order(KEXEC_CONTROL_PAGE_SIZE));
+	if (!image->control_code_page) {
+		pr_err("Could not allocate control_code_buffer\n");
+		goto out_free_post_load_bufs;
+	}
+
+	image->swap_page = kimage_alloc_control_pages(image, 0);
+	if (!image->swap_page) {
+		pr_err(KERN_ERR "Could not allocate swap buffer\n");
+		goto out_free_control_pages;
+	}
+
+	*rimage = image;
+	return 0;
+out_free_control_pages:
+	kimage_free_page_list(&image->control_pages);
+out_free_post_load_bufs:
+	kimage_file_post_load_cleanup(image);
+	kfree(image->image_loader_data);
+out_free_image:
+	kfree(image);
+	return ret;
+}
+
 static int kimage_is_destination_range(struct kimage *image,
 					unsigned long start,
 					unsigned long end)
@@ -644,6 +861,16 @@ static void kimage_free(struct kimage *image)
 
 	/* Free the kexec control pages... */
 	kimage_free_page_list(&image->control_pages);
+
+	kfree(image->image_loader_data);
+
+	/*
+	 * Free up any temporary buffers allocated. This might hit if
+	 * error occurred much later after buffer allocation.
+	 */
+	if (image->file_mode)
+		kimage_file_post_load_cleanup(image);
+
 	kfree(image);
 }
 
@@ -772,10 +999,14 @@ static int kimage_load_normal_segment(struct kimage *image,
 	unsigned long maddr;
 	size_t ubytes, mbytes;
 	int result;
-	unsigned char __user *buf;
+	unsigned char __user *buf = NULL;
+	unsigned char *kbuf = NULL;
 
 	result = 0;
-	buf = segment->buf;
+	if (image->file_mode)
+		kbuf = segment->kbuf;
+	else
+		buf = segment->buf;
 	ubytes = segment->bufsz;
 	mbytes = segment->memsz;
 	maddr = segment->mem;
@@ -807,7 +1038,11 @@ static int kimage_load_normal_segment(struct kimage *image,
 				PAGE_SIZE - (maddr & ~PAGE_MASK));
 		uchunk = min(ubytes, mchunk);
 
-		result = copy_from_user(ptr, buf, uchunk);
+		/* For file based kexec, source pages are in kernel memory */
+		if (image->file_mode)
+			memcpy(ptr, kbuf, uchunk);
+		else
+			result = copy_from_user(ptr, buf, uchunk);
 		kunmap(page);
 		if (result) {
 			result = -EFAULT;
@@ -815,7 +1050,10 @@ static int kimage_load_normal_segment(struct kimage *image,
 		}
 		ubytes -= uchunk;
 		maddr  += mchunk;
-		buf    += mchunk;
+		if (image->file_mode)
+			kbuf += mchunk;
+		else
+			buf += mchunk;
 		mbytes -= mchunk;
 	}
 out:
@@ -1062,7 +1300,72 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
 		unsigned long, cmdline_len, const char __user *, cmdline_ptr,
 		unsigned long, flags)
 {
-	return -ENOSYS;
+	int ret = 0, i;
+	struct kimage **dest_image, *image;
+
+	/* We only trust the superuser with rebooting the system. */
+	if (!capable(CAP_SYS_BOOT) || kexec_load_disabled)
+		return -EPERM;
+
+	/* Make sure we have a legal set of flags */
+	if (flags != (flags & KEXEC_FILE_FLAGS))
+		return -EINVAL;
+
+	image = NULL;
+
+	if (!mutex_trylock(&kexec_mutex))
+		return -EBUSY;
+
+	dest_image = &kexec_image;
+	if (flags & KEXEC_FILE_ON_CRASH)
+		dest_image = &kexec_crash_image;
+
+	if (flags & KEXEC_FILE_UNLOAD)
+		goto exchange;
+
+	/*
+	 * In case of crash, new kernel gets loaded in reserved region. It is
+	 * same memory where old crash kernel might be loaded. Free any
+	 * current crash dump kernel before we corrupt it.
+	 */
+	if (flags & KEXEC_FILE_ON_CRASH)
+		kimage_free(xchg(&kexec_crash_image, NULL));
+
+	ret = kimage_file_alloc_init(&image, kernel_fd, initrd_fd, cmdline_ptr,
+				     cmdline_len, flags);
+	if (ret)
+		goto out;
+
+	ret = machine_kexec_prepare(image);
+	if (ret)
+		goto out;
+
+	for (i = 0; i < image->nr_segments; i++) {
+		struct kexec_segment *ksegment;
+
+		ksegment = &image->segment[i];
+		pr_debug("Loading segment %d: buf=0x%p bufsz=0x%zx mem=0x%lx memsz=0x%zx\n",
+			 i, ksegment->buf, ksegment->bufsz, ksegment->mem,
+			 ksegment->memsz);
+
+		ret = kimage_load_segment(image, &image->segment[i]);
+		if (ret)
+			goto out;
+	}
+
+	kimage_terminate(image);
+
+	/*
+	 * Free up any temporary buffers allocated which are not needed
+	 * after image has been loaded
+	 */
+	kimage_file_post_load_cleanup(image);
+exchange:
+	image = xchg(dest_image, image);
+out:
+	mutex_unlock(&kexec_mutex);
+	kimage_free(image);
+	return ret;
 }
 
 void crash_kexec(struct pt_regs *regs)
@@ -1620,6 +1923,176 @@ static int __init crash_save_vmcoreinfo_init(void)
 
 subsys_initcall(crash_save_vmcoreinfo_init);
 
+static int __kexec_add_segment(struct kimage *image, char *buf,
+			       unsigned long bufsz, unsigned long mem,
+			       unsigned long memsz)
+{
+	struct kexec_segment *ksegment;
+
+	ksegment = &image->segment[image->nr_segments];
+	ksegment->kbuf = buf;
+	ksegment->bufsz = bufsz;
+	ksegment->mem = mem;
+	ksegment->memsz = memsz;
+	image->nr_segments++;
+
+	return 0;
+}
+
+static int locate_mem_hole_top_down(unsigned long start, unsigned long end,
+				    struct kexec_buf *kbuf)
+{
+	struct kimage *image = kbuf->image;
+	unsigned long temp_start, temp_end;
+
+	temp_end = min(end, kbuf->buf_max);
+	temp_start = temp_end - kbuf->memsz;
+
+	do {
+		/* align down start */
+		temp_start = temp_start & (~(kbuf->buf_align - 1));
+
+		if (temp_start < start || temp_start < kbuf->buf_min)
+			return 0;
+
+		temp_end = temp_start + kbuf->memsz - 1;
+
+		/*
+		 * Make sure this does not conflict with any of existing
+		 * segments
+		 */
+		if (kimage_is_destination_range(image, temp_start, temp_end)) {
+			temp_start = temp_start - PAGE_SIZE;
+			continue;
+		}
+
+		/* We found a suitable memory range */
+		break;
+	} while (1);
+
+	/* If we are here, we found a suitable memory range */
+	__kexec_add_segment(image, kbuf->buffer, kbuf->bufsz, temp_start,
+			    kbuf->memsz);
+
+	/* Success, stop navigating through remaining System RAM ranges */
+	return 1;
+}
+
+static int locate_mem_hole_bottom_up(unsigned long start, unsigned long end,
+				     struct kexec_buf *kbuf)
+{
+	struct kimage *image = kbuf->image;
+	unsigned long temp_start, temp_end;
+
+	temp_start = max(start, kbuf->buf_min);
+
+	do {
+		temp_start = ALIGN(temp_start, kbuf->buf_align);
+		temp_end = temp_start + kbuf->memsz - 1;
+
+		if (temp_end > end || temp_end > kbuf->buf_max)
+			return 0;
+		/*
+		 * Make sure this does not conflict with any of existing
+		 * segments
+		 */
+		if (kimage_is_destination_range(image, temp_start, temp_end)) {
+			temp_start = temp_start + PAGE_SIZE;
+			continue;
+		}
+
+		/* We found a suitable memory range */
+		break;
+	} while (1);
+
+	/* If we are here, we found a suitable memory range */
+	__kexec_add_segment(image, kbuf->buffer, kbuf->bufsz, temp_start,
+			    kbuf->memsz);
+
+	/* Success, stop navigating through remaining System RAM ranges */
+	return 1;
+}
+
+static int locate_mem_hole_callback(u64 start, u64 end, void *arg)
+{
+	struct kexec_buf *kbuf = (struct kexec_buf *)arg;
+	unsigned long sz = end - start + 1;
+
+	/* Returning 0 will take to next memory range */
+	if (sz < kbuf->memsz)
+		return 0;
+
+	if (end < kbuf->buf_min || start > kbuf->buf_max)
+		return 0;
+
+	/*
+	 * Allocate memory top down with-in ram range. Otherwise bottom up
+	 * allocation.
+	 */
+	if (kbuf->top_down)
+		return locate_mem_hole_top_down(start, end, kbuf);
+	return locate_mem_hole_bottom_up(start, end, kbuf);
+}
+
+/*
+ * Helper function for placing a buffer in a kexec segment. This assumes
+ * that kexec_mutex is held.
+ */
+int kexec_add_buffer(struct kimage *image, char *buffer, unsigned long bufsz,
+		     unsigned long memsz, unsigned long buf_align,
+		     unsigned long buf_min, unsigned long buf_max,
+		     bool top_down, unsigned long *load_addr)
+{
+
+	struct kexec_segment *ksegment;
+	struct kexec_buf buf, *kbuf;
+	int ret;
+
+	/* Currently adding segment this way is allowed only in file mode */
+	if (!image->file_mode)
+		return -EINVAL;
+
+	if (image->nr_segments >= KEXEC_SEGMENT_MAX)
+		return -EINVAL;
+
+	/*
+	 * Make sure we are not trying to add buffer after allocating
+	 * control pages. All segments need to be placed first before
+	 * any control pages are allocated. As control page allocation
+	 * logic goes through list of segments to make sure there are
+	 * no destination overlaps.
+	 */
+	if (!list_empty(&image->control_pages)) {
+		WARN_ON(1);
+		return -EINVAL;
+	}
+
+	memset(&buf, 0, sizeof(struct kexec_buf));
+	kbuf = &buf;
+	kbuf->image = image;
+	kbuf->buffer = buffer;
+	kbuf->bufsz = bufsz;
+
+	kbuf->memsz = ALIGN(memsz, PAGE_SIZE);
+	kbuf->buf_align = max(buf_align, PAGE_SIZE);
+	kbuf->buf_min = buf_min;
+	kbuf->buf_max = buf_max;
+	kbuf->top_down = top_down;
+
+	/* Walk the RAM ranges and allocate a suitable range for the buffer */
+	ret = walk_system_ram_res(0, -1, kbuf, locate_mem_hole_callback);
+	if (ret != 1) {
+		/* A suitable memory range could not be found for buffer */
+		return -EADDRNOTAVAIL;
+	}
+
+	/* Found a suitable memory range */
+	ksegment = &image->segment[image->nr_segments - 1];
+	*load_addr = ksegment->mem;
+	return 0;
+}
+
+
 /*
  * Move into place and start executing a preloaded standalone
  * executable.  If nothing was preloaded return an error.
-- 
cgit v1.2.3-59-g8ed1b


From 12db5562e0352986a265841638482b84f3a6899b Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Fri, 8 Aug 2014 14:26:04 -0700
Subject: kexec: load and relocate purgatory at kernel load time

Load purgatory code in RAM and relocate it based on the location.
Relocation code has been inspired by module relocation code and purgatory
relocation code in kexec-tools.

Also compute the checksums of loaded kexec segments and store them in
purgatory.

Arch independent code provides this functionality so that arch dependent
bootloaders can make use of it.

Helper functions are provided to get/set symbol values in purgatory which
are used by bootloaders later to set things like stack and entry point of
second kernel etc.

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Matthew Garrett <mjg59@srcf.ucam.org>
Cc: Greg Kroah-Hartman <greg@kroah.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: WANG Chao <chaowang@redhat.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/Kconfig                   |   2 +
 arch/ia64/Kconfig                  |   2 +
 arch/m68k/Kconfig                  |   2 +
 arch/mips/Kconfig                  |   2 +
 arch/powerpc/Kconfig               |   2 +
 arch/s390/Kconfig                  |   2 +
 arch/sh/Kconfig                    |   2 +
 arch/tile/Kconfig                  |   2 +
 arch/x86/Kconfig                   |   2 +
 arch/x86/kernel/machine_kexec_64.c | 142 ++++++++++
 include/linux/kexec.h              |  33 +++
 kernel/kexec.c                     | 544 ++++++++++++++++++++++++++++++++++++-
 12 files changed, 736 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 8e9dbcbcf5af..cacc8d5355b3 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -2065,6 +2065,8 @@ config XIP_PHYS_ADDR
 config KEXEC
 	bool "Kexec system call (EXPERIMENTAL)"
 	depends on (!SMP || PM_SLEEP_SMP)
+	select CRYPTO
+	select CRYPTO_SHA256
 	help
 	  kexec is a system call that implements the ability to shutdown your
 	  current kernel, and to start another kernel.  It is like a reboot
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index c84c88bbbbd7..64aefb76bd69 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -549,6 +549,8 @@ source "drivers/sn/Kconfig"
 config KEXEC
 	bool "kexec system call"
 	depends on !IA64_HP_SIM && (!SMP || HOTPLUG_CPU)
+	select CRYPTO
+	select CRYPTO_SHA256
 	help
 	  kexec is a system call that implements the ability to shutdown your
 	  current kernel, and to start another kernel.  It is like a reboot
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 87b7c7581b1d..3ff8c9a25335 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -91,6 +91,8 @@ config MMU_SUN3
 config KEXEC
 	bool "kexec system call"
 	depends on M68KCLASSIC
+	select CRYPTO
+	select CRYPTO_SHA256
 	help
 	  kexec is a system call that implements the ability to shutdown your
 	  current kernel, and to start another kernel.  It is like a reboot
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 900c7e5333b6..df51e78a72cc 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2396,6 +2396,8 @@ source "kernel/Kconfig.preempt"
 
 config KEXEC
 	bool "Kexec system call"
+	select CRYPTO
+	select CRYPTO_SHA256
 	help
 	  kexec is a system call that implements the ability to shutdown your
 	  current kernel, and to start another kernel.  It is like a reboot
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 4bc7b62fb4b6..a577609f8ed6 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -399,6 +399,8 @@ config PPC64_SUPPORTS_MEMORY_FAILURE
 config KEXEC
 	bool "kexec system call"
 	depends on (PPC_BOOK3S || FSL_BOOKE || (44x && !SMP))
+	select CRYPTO
+	select CRYPTO_SHA256
 	help
 	  kexec is a system call that implements the ability to shutdown your
 	  current kernel, and to start another kernel.  It is like a reboot
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 05c78bb5f570..ab39ceb89ecf 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -48,6 +48,8 @@ config ARCH_SUPPORTS_DEBUG_PAGEALLOC
 
 config KEXEC
 	def_bool y
+	select CRYPTO
+	select CRYPTO_SHA256
 
 config AUDIT_ARCH
 	def_bool y
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index aa2df3eaeb29..453fa5c09550 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -595,6 +595,8 @@ source kernel/Kconfig.hz
 config KEXEC
 	bool "kexec system call (EXPERIMENTAL)"
 	depends on SUPERH32 && MMU
+	select CRYPTO
+	select CRYPTO_SHA256
 	help
 	  kexec is a system call that implements the ability to shutdown your
 	  current kernel, and to start another kernel.  It is like a reboot
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 7fcd492adbfc..a3ffe2dd4832 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -191,6 +191,8 @@ source "kernel/Kconfig.hz"
 
 config KEXEC
 	bool "kexec system call"
+	select CRYPTO
+	select CRYPTO_SHA256
 	---help---
 	  kexec is a system call that implements the ability to shutdown your
 	  current kernel, and to start another kernel.  It is like a reboot
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 98fe3df6df82..9558b9fcafbf 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1583,6 +1583,8 @@ source kernel/Kconfig.hz
 config KEXEC
 	bool "kexec system call"
 	select BUILD_BIN2C
+	select CRYPTO
+	select CRYPTO_SHA256
 	---help---
 	  kexec is a system call that implements the ability to shutdown your
 	  current kernel, and to start another kernel.  It is like a reboot
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index c8875b5545e1..88404c440727 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -6,6 +6,8 @@
  * Version 2.  See the file COPYING for more details.
  */
 
+#define pr_fmt(fmt)	"kexec: " fmt
+
 #include <linux/mm.h>
 #include <linux/kexec.h>
 #include <linux/string.h>
@@ -328,3 +330,143 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image)
 
 	return image->fops->cleanup(image);
 }
+
+/*
+ * Apply purgatory relocations.
+ *
+ * ehdr: Pointer to elf headers
+ * sechdrs: Pointer to section headers.
+ * relsec: section index of SHT_RELA section.
+ *
+ * TODO: Some of the code belongs to generic code. Move that in kexec.c.
+ */
+int arch_kexec_apply_relocations_add(const Elf64_Ehdr *ehdr,
+				     Elf64_Shdr *sechdrs, unsigned int relsec)
+{
+	unsigned int i;
+	Elf64_Rela *rel;
+	Elf64_Sym *sym;
+	void *location;
+	Elf64_Shdr *section, *symtabsec;
+	unsigned long address, sec_base, value;
+	const char *strtab, *name, *shstrtab;
+
+	/*
+	 * ->sh_offset has been modified to keep the pointer to section
+	 * contents in memory
+	 */
+	rel = (void *)sechdrs[relsec].sh_offset;
+
+	/* Section to which relocations apply */
+	section = &sechdrs[sechdrs[relsec].sh_info];
+
+	pr_debug("Applying relocate section %u to %u\n", relsec,
+		 sechdrs[relsec].sh_info);
+
+	/* Associated symbol table */
+	symtabsec = &sechdrs[sechdrs[relsec].sh_link];
+
+	/* String table */
+	if (symtabsec->sh_link >= ehdr->e_shnum) {
+		/* Invalid strtab section number */
+		pr_err("Invalid string table section index %d\n",
+		       symtabsec->sh_link);
+		return -ENOEXEC;
+	}
+
+	strtab = (char *)sechdrs[symtabsec->sh_link].sh_offset;
+
+	/* section header string table */
+	shstrtab = (char *)sechdrs[ehdr->e_shstrndx].sh_offset;
+
+	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
+
+		/*
+		 * rel[i].r_offset contains byte offset from beginning
+		 * of section to the storage unit affected.
+		 *
+		 * This is location to update (->sh_offset). This is temporary
+		 * buffer where section is currently loaded. This will finally
+		 * be loaded to a different address later, pointed to by
+		 * ->sh_addr. kexec takes care of moving it
+		 *  (kexec_load_segment()).
+		 */
+		location = (void *)(section->sh_offset + rel[i].r_offset);
+
+		/* Final address of the location */
+		address = section->sh_addr + rel[i].r_offset;
+
+		/*
+		 * rel[i].r_info contains information about symbol table index
+		 * w.r.t which relocation must be made and type of relocation
+		 * to apply. ELF64_R_SYM() and ELF64_R_TYPE() macros get
+		 * these respectively.
+		 */
+		sym = (Elf64_Sym *)symtabsec->sh_offset +
+				ELF64_R_SYM(rel[i].r_info);
+
+		if (sym->st_name)
+			name = strtab + sym->st_name;
+		else
+			name = shstrtab + sechdrs[sym->st_shndx].sh_name;
+
+		pr_debug("Symbol: %s info: %02x shndx: %02x value=%llx size: %llx\n",
+			 name, sym->st_info, sym->st_shndx, sym->st_value,
+			 sym->st_size);
+
+		if (sym->st_shndx == SHN_UNDEF) {
+			pr_err("Undefined symbol: %s\n", name);
+			return -ENOEXEC;
+		}
+
+		if (sym->st_shndx == SHN_COMMON) {
+			pr_err("symbol '%s' in common section\n", name);
+			return -ENOEXEC;
+		}
+
+		if (sym->st_shndx == SHN_ABS)
+			sec_base = 0;
+		else if (sym->st_shndx >= ehdr->e_shnum) {
+			pr_err("Invalid section %d for symbol %s\n",
+			       sym->st_shndx, name);
+			return -ENOEXEC;
+		} else
+			sec_base = sechdrs[sym->st_shndx].sh_addr;
+
+		value = sym->st_value;
+		value += sec_base;
+		value += rel[i].r_addend;
+
+		switch (ELF64_R_TYPE(rel[i].r_info)) {
+		case R_X86_64_NONE:
+			break;
+		case R_X86_64_64:
+			*(u64 *)location = value;
+			break;
+		case R_X86_64_32:
+			*(u32 *)location = value;
+			if (value != *(u32 *)location)
+				goto overflow;
+			break;
+		case R_X86_64_32S:
+			*(s32 *)location = value;
+			if ((s64)value != *(s32 *)location)
+				goto overflow;
+			break;
+		case R_X86_64_PC32:
+			value -= (u64)address;
+			*(u32 *)location = value;
+			break;
+		default:
+			pr_err("Unknown rela relocation: %llu\n",
+			       ELF64_R_TYPE(rel[i].r_info));
+			return -ENOEXEC;
+		}
+	}
+	return 0;
+
+overflow:
+	pr_err("Overflow in relocation type %d value 0x%lx\n",
+	       (int)ELF64_R_TYPE(rel[i].r_info), value);
+	return -ENOEXEC;
+}
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 8e80901e466f..84f09e9eca26 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -10,6 +10,7 @@
 #include <linux/ioport.h>
 #include <linux/elfcore.h>
 #include <linux/elf.h>
+#include <linux/module.h>
 #include <asm/kexec.h>
 
 /* Verify architecture specific macros are defined */
@@ -95,6 +96,27 @@ struct compat_kexec_segment {
 };
 #endif
 
+struct kexec_sha_region {
+	unsigned long start;
+	unsigned long len;
+};
+
+struct purgatory_info {
+	/* Pointer to elf header of read only purgatory */
+	Elf_Ehdr *ehdr;
+
+	/* Pointer to purgatory sechdrs which are modifiable */
+	Elf_Shdr *sechdrs;
+	/*
+	 * Temporary buffer location where purgatory is loaded and relocated
+	 * This memory can be freed post image load
+	 */
+	void *purgatory_buf;
+
+	/* Address where purgatory is finally loaded and is executed from */
+	unsigned long purgatory_load_addr;
+};
+
 struct kimage {
 	kimage_entry_t head;
 	kimage_entry_t *entry;
@@ -143,6 +165,9 @@ struct kimage {
 
 	/* Image loader handling the kernel can store a pointer here */
 	void *image_loader_data;
+
+	/* Information for loading purgatory */
+	struct purgatory_info purgatory_info;
 };
 
 /*
@@ -189,6 +214,14 @@ extern int kexec_add_buffer(struct kimage *image, char *buffer,
 			    unsigned long *load_addr);
 extern struct page *kimage_alloc_control_pages(struct kimage *image,
 						unsigned int order);
+extern int kexec_load_purgatory(struct kimage *image, unsigned long min,
+				unsigned long max, int top_down,
+				unsigned long *load_addr);
+extern int kexec_purgatory_get_set_symbol(struct kimage *image,
+					  const char *name, void *buf,
+					  unsigned int size, bool get_value);
+extern void *kexec_purgatory_get_symbol_addr(struct kimage *image,
+					     const char *name);
 extern void crash_kexec(struct pt_regs *);
 int kexec_should_crash(struct task_struct *);
 void crash_save_cpu(struct pt_regs *regs, int cpu);
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 9b46219254dd..669e331aa9ec 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -42,6 +42,9 @@
 #include <asm/io.h>
 #include <asm/sections.h>
 
+#include <crypto/hash.h>
+#include <crypto/sha.h>
+
 /* Per cpu memory for storing cpu states in case of system crash. */
 note_buf_t __percpu *crash_notes;
 
@@ -54,6 +57,15 @@ size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
 /* Flag to indicate we are going to kexec a new kernel */
 bool kexec_in_progress = false;
 
+/*
+ * Declare these symbols weak so that if architecture provides a purgatory,
+ * these will be overridden.
+ */
+char __weak kexec_purgatory[0];
+size_t __weak kexec_purgatory_size = 0;
+
+static int kexec_calculate_store_digests(struct kimage *image);
+
 /* Location of the reserved area for the crash kernel */
 struct resource crashk_res = {
 	.name  = "Crash kernel",
@@ -404,6 +416,24 @@ void __weak arch_kimage_file_post_load_cleanup(struct kimage *image)
 {
 }
 
+/* Apply relocations of type RELA */
+int __weak
+arch_kexec_apply_relocations_add(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
+				 unsigned int relsec)
+{
+	pr_err("RELA relocation unsupported.\n");
+	return -ENOEXEC;
+}
+
+/* Apply relocations of type REL */
+int __weak
+arch_kexec_apply_relocations(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
+			     unsigned int relsec)
+{
+	pr_err("REL relocation unsupported.\n");
+	return -ENOEXEC;
+}
+
 /*
  * Free up memory used by kernel, initrd, and comand line. This is temporary
  * memory allocation which is not needed any more after these buffers have
@@ -411,6 +441,8 @@ void __weak arch_kimage_file_post_load_cleanup(struct kimage *image)
  */
 static void kimage_file_post_load_cleanup(struct kimage *image)
 {
+	struct purgatory_info *pi = &image->purgatory_info;
+
 	vfree(image->kernel_buf);
 	image->kernel_buf = NULL;
 
@@ -420,6 +452,12 @@ static void kimage_file_post_load_cleanup(struct kimage *image)
 	kfree(image->cmdline_buf);
 	image->cmdline_buf = NULL;
 
+	vfree(pi->purgatory_buf);
+	pi->purgatory_buf = NULL;
+
+	vfree(pi->sechdrs);
+	pi->sechdrs = NULL;
+
 	/* See if architecture has anything to cleanup post load */
 	arch_kimage_file_post_load_cleanup(image);
 }
@@ -1105,7 +1143,7 @@ static int kimage_load_crash_segment(struct kimage *image,
 		}
 		ubytes -= uchunk;
 		maddr  += mchunk;
-		buf    += mchunk;
+		buf += mchunk;
 		mbytes -= mchunk;
 	}
 out:
@@ -1340,6 +1378,10 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
 	if (ret)
 		goto out;
 
+	ret = kexec_calculate_store_digests(image);
+	if (ret)
+		goto out;
+
 	for (i = 0; i < image->nr_segments; i++) {
 		struct kexec_segment *ksegment;
 
@@ -2092,6 +2134,506 @@ int kexec_add_buffer(struct kimage *image, char *buffer, unsigned long bufsz,
 	return 0;
 }
 
+/* Calculate and store the digest of segments */
+static int kexec_calculate_store_digests(struct kimage *image)
+{
+	struct crypto_shash *tfm;
+	struct shash_desc *desc;
+	int ret = 0, i, j, zero_buf_sz, sha_region_sz;
+	size_t desc_size, nullsz;
+	char *digest;
+	void *zero_buf;
+	struct kexec_sha_region *sha_regions;
+	struct purgatory_info *pi = &image->purgatory_info;
+
+	zero_buf = __va(page_to_pfn(ZERO_PAGE(0)) << PAGE_SHIFT);
+	zero_buf_sz = PAGE_SIZE;
+
+	tfm = crypto_alloc_shash("sha256", 0, 0);
+	if (IS_ERR(tfm)) {
+		ret = PTR_ERR(tfm);
+		goto out;
+	}
+
+	desc_size = crypto_shash_descsize(tfm) + sizeof(*desc);
+	desc = kzalloc(desc_size, GFP_KERNEL);
+	if (!desc) {
+		ret = -ENOMEM;
+		goto out_free_tfm;
+	}
+
+	sha_region_sz = KEXEC_SEGMENT_MAX * sizeof(struct kexec_sha_region);
+	sha_regions = vzalloc(sha_region_sz);
+	if (!sha_regions)
+		goto out_free_desc;
+
+	desc->tfm   = tfm;
+	desc->flags = 0;
+
+	ret = crypto_shash_init(desc);
+	if (ret < 0)
+		goto out_free_sha_regions;
+
+	digest = kzalloc(SHA256_DIGEST_SIZE, GFP_KERNEL);
+	if (!digest) {
+		ret = -ENOMEM;
+		goto out_free_sha_regions;
+	}
+
+	for (j = i = 0; i < image->nr_segments; i++) {
+		struct kexec_segment *ksegment;
+
+		ksegment = &image->segment[i];
+		/*
+		 * Skip purgatory as it will be modified once we put digest
+		 * info in purgatory.
+		 */
+		if (ksegment->kbuf == pi->purgatory_buf)
+			continue;
+
+		ret = crypto_shash_update(desc, ksegment->kbuf,
+					  ksegment->bufsz);
+		if (ret)
+			break;
+
+		/*
+		 * Assume rest of the buffer is filled with zero and
+		 * update digest accordingly.
+		 */
+		nullsz = ksegment->memsz - ksegment->bufsz;
+		while (nullsz) {
+			unsigned long bytes = nullsz;
+
+			if (bytes > zero_buf_sz)
+				bytes = zero_buf_sz;
+			ret = crypto_shash_update(desc, zero_buf, bytes);
+			if (ret)
+				break;
+			nullsz -= bytes;
+		}
+
+		if (ret)
+			break;
+
+		sha_regions[j].start = ksegment->mem;
+		sha_regions[j].len = ksegment->memsz;
+		j++;
+	}
+
+	if (!ret) {
+		ret = crypto_shash_final(desc, digest);
+		if (ret)
+			goto out_free_digest;
+		ret = kexec_purgatory_get_set_symbol(image, "sha_regions",
+						sha_regions, sha_region_sz, 0);
+		if (ret)
+			goto out_free_digest;
+
+		ret = kexec_purgatory_get_set_symbol(image, "sha256_digest",
+						digest, SHA256_DIGEST_SIZE, 0);
+		if (ret)
+			goto out_free_digest;
+	}
+
+out_free_digest:
+	kfree(digest);
+out_free_sha_regions:
+	vfree(sha_regions);
+out_free_desc:
+	kfree(desc);
+out_free_tfm:
+	kfree(tfm);
+out:
+	return ret;
+}
+
+/* Actually load purgatory. Lot of code taken from kexec-tools */
+static int __kexec_load_purgatory(struct kimage *image, unsigned long min,
+				  unsigned long max, int top_down)
+{
+	struct purgatory_info *pi = &image->purgatory_info;
+	unsigned long align, buf_align, bss_align, buf_sz, bss_sz, bss_pad;
+	unsigned long memsz, entry, load_addr, curr_load_addr, bss_addr, offset;
+	unsigned char *buf_addr, *src;
+	int i, ret = 0, entry_sidx = -1;
+	const Elf_Shdr *sechdrs_c;
+	Elf_Shdr *sechdrs = NULL;
+	void *purgatory_buf = NULL;
+
+	/*
+	 * sechdrs_c points to section headers in purgatory and are read
+	 * only. No modifications allowed.
+	 */
+	sechdrs_c = (void *)pi->ehdr + pi->ehdr->e_shoff;
+
+	/*
+	 * We can not modify sechdrs_c[] and its fields. It is read only.
+	 * Copy it over to a local copy where one can store some temporary
+	 * data and free it at the end. We need to modify ->sh_addr and
+	 * ->sh_offset fields to keep track of permanent and temporary
+	 * locations of sections.
+	 */
+	sechdrs = vzalloc(pi->ehdr->e_shnum * sizeof(Elf_Shdr));
+	if (!sechdrs)
+		return -ENOMEM;
+
+	memcpy(sechdrs, sechdrs_c, pi->ehdr->e_shnum * sizeof(Elf_Shdr));
+
+	/*
+	 * We seem to have multiple copies of sections. First copy is which
+	 * is embedded in kernel in read only section. Some of these sections
+	 * will be copied to a temporary buffer and relocated. And these
+	 * sections will finally be copied to their final destination at
+	 * segment load time.
+	 *
+	 * Use ->sh_offset to reflect section address in memory. It will
+	 * point to original read only copy if section is not allocatable.
+	 * Otherwise it will point to temporary copy which will be relocated.
+	 *
+	 * Use ->sh_addr to contain final address of the section where it
+	 * will go during execution time.
+	 */
+	for (i = 0; i < pi->ehdr->e_shnum; i++) {
+		if (sechdrs[i].sh_type == SHT_NOBITS)
+			continue;
+
+		sechdrs[i].sh_offset = (unsigned long)pi->ehdr +
+						sechdrs[i].sh_offset;
+	}
+
+	/*
+	 * Identify entry point section and make entry relative to section
+	 * start.
+	 */
+	entry = pi->ehdr->e_entry;
+	for (i = 0; i < pi->ehdr->e_shnum; i++) {
+		if (!(sechdrs[i].sh_flags & SHF_ALLOC))
+			continue;
+
+		if (!(sechdrs[i].sh_flags & SHF_EXECINSTR))
+			continue;
+
+		/* Make entry section relative */
+		if (sechdrs[i].sh_addr <= pi->ehdr->e_entry &&
+		    ((sechdrs[i].sh_addr + sechdrs[i].sh_size) >
+		     pi->ehdr->e_entry)) {
+			entry_sidx = i;
+			entry -= sechdrs[i].sh_addr;
+			break;
+		}
+	}
+
+	/* Determine how much memory is needed to load relocatable object. */
+	buf_align = 1;
+	bss_align = 1;
+	buf_sz = 0;
+	bss_sz = 0;
+
+	for (i = 0; i < pi->ehdr->e_shnum; i++) {
+		if (!(sechdrs[i].sh_flags & SHF_ALLOC))
+			continue;
+
+		align = sechdrs[i].sh_addralign;
+		if (sechdrs[i].sh_type != SHT_NOBITS) {
+			if (buf_align < align)
+				buf_align = align;
+			buf_sz = ALIGN(buf_sz, align);
+			buf_sz += sechdrs[i].sh_size;
+		} else {
+			/* bss section */
+			if (bss_align < align)
+				bss_align = align;
+			bss_sz = ALIGN(bss_sz, align);
+			bss_sz += sechdrs[i].sh_size;
+		}
+	}
+
+	/* Determine the bss padding required to align bss properly */
+	bss_pad = 0;
+	if (buf_sz & (bss_align - 1))
+		bss_pad = bss_align - (buf_sz & (bss_align - 1));
+
+	memsz = buf_sz + bss_pad + bss_sz;
+
+	/* Allocate buffer for purgatory */
+	purgatory_buf = vzalloc(buf_sz);
+	if (!purgatory_buf) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	if (buf_align < bss_align)
+		buf_align = bss_align;
+
+	/* Add buffer to segment list */
+	ret = kexec_add_buffer(image, purgatory_buf, buf_sz, memsz,
+				buf_align, min, max, top_down,
+				&pi->purgatory_load_addr);
+	if (ret)
+		goto out;
+
+	/* Load SHF_ALLOC sections */
+	buf_addr = purgatory_buf;
+	load_addr = curr_load_addr = pi->purgatory_load_addr;
+	bss_addr = load_addr + buf_sz + bss_pad;
+
+	for (i = 0; i < pi->ehdr->e_shnum; i++) {
+		if (!(sechdrs[i].sh_flags & SHF_ALLOC))
+			continue;
+
+		align = sechdrs[i].sh_addralign;
+		if (sechdrs[i].sh_type != SHT_NOBITS) {
+			curr_load_addr = ALIGN(curr_load_addr, align);
+			offset = curr_load_addr - load_addr;
+			/* We already modifed ->sh_offset to keep src addr */
+			src = (char *) sechdrs[i].sh_offset;
+			memcpy(buf_addr + offset, src, sechdrs[i].sh_size);
+
+			/* Store load address and source address of section */
+			sechdrs[i].sh_addr = curr_load_addr;
+
+			/*
+			 * This section got copied to temporary buffer. Update
+			 * ->sh_offset accordingly.
+			 */
+			sechdrs[i].sh_offset = (unsigned long)(buf_addr + offset);
+
+			/* Advance to the next address */
+			curr_load_addr += sechdrs[i].sh_size;
+		} else {
+			bss_addr = ALIGN(bss_addr, align);
+			sechdrs[i].sh_addr = bss_addr;
+			bss_addr += sechdrs[i].sh_size;
+		}
+	}
+
+	/* Update entry point based on load address of text section */
+	if (entry_sidx >= 0)
+		entry += sechdrs[entry_sidx].sh_addr;
+
+	/* Make kernel jump to purgatory after shutdown */
+	image->start = entry;
+
+	/* Used later to get/set symbol values */
+	pi->sechdrs = sechdrs;
+
+	/*
+	 * Used later to identify which section is purgatory and skip it
+	 * from checksumming.
+	 */
+	pi->purgatory_buf = purgatory_buf;
+	return ret;
+out:
+	vfree(sechdrs);
+	vfree(purgatory_buf);
+	return ret;
+}
+
+static int kexec_apply_relocations(struct kimage *image)
+{
+	int i, ret;
+	struct purgatory_info *pi = &image->purgatory_info;
+	Elf_Shdr *sechdrs = pi->sechdrs;
+
+	/* Apply relocations */
+	for (i = 0; i < pi->ehdr->e_shnum; i++) {
+		Elf_Shdr *section, *symtab;
+
+		if (sechdrs[i].sh_type != SHT_RELA &&
+		    sechdrs[i].sh_type != SHT_REL)
+			continue;
+
+		/*
+		 * For section of type SHT_RELA/SHT_REL,
+		 * ->sh_link contains section header index of associated
+		 * symbol table. And ->sh_info contains section header
+		 * index of section to which relocations apply.
+		 */
+		if (sechdrs[i].sh_info >= pi->ehdr->e_shnum ||
+		    sechdrs[i].sh_link >= pi->ehdr->e_shnum)
+			return -ENOEXEC;
+
+		section = &sechdrs[sechdrs[i].sh_info];
+		symtab = &sechdrs[sechdrs[i].sh_link];
+
+		if (!(section->sh_flags & SHF_ALLOC))
+			continue;
+
+		/*
+		 * symtab->sh_link contain section header index of associated
+		 * string table.
+		 */
+		if (symtab->sh_link >= pi->ehdr->e_shnum)
+			/* Invalid section number? */
+			continue;
+
+		/*
+		 * Respective archicture needs to provide support for applying
+		 * relocations of type SHT_RELA/SHT_REL.
+		 */
+		if (sechdrs[i].sh_type == SHT_RELA)
+			ret = arch_kexec_apply_relocations_add(pi->ehdr,
+							       sechdrs, i);
+		else if (sechdrs[i].sh_type == SHT_REL)
+			ret = arch_kexec_apply_relocations(pi->ehdr,
+							   sechdrs, i);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+/* Load relocatable purgatory object and relocate it appropriately */
+int kexec_load_purgatory(struct kimage *image, unsigned long min,
+			 unsigned long max, int top_down,
+			 unsigned long *load_addr)
+{
+	struct purgatory_info *pi = &image->purgatory_info;
+	int ret;
+
+	if (kexec_purgatory_size <= 0)
+		return -EINVAL;
+
+	if (kexec_purgatory_size < sizeof(Elf_Ehdr))
+		return -ENOEXEC;
+
+	pi->ehdr = (Elf_Ehdr *)kexec_purgatory;
+
+	if (memcmp(pi->ehdr->e_ident, ELFMAG, SELFMAG) != 0
+	    || pi->ehdr->e_type != ET_REL
+	    || !elf_check_arch(pi->ehdr)
+	    || pi->ehdr->e_shentsize != sizeof(Elf_Shdr))
+		return -ENOEXEC;
+
+	if (pi->ehdr->e_shoff >= kexec_purgatory_size
+	    || (pi->ehdr->e_shnum * sizeof(Elf_Shdr) >
+	    kexec_purgatory_size - pi->ehdr->e_shoff))
+		return -ENOEXEC;
+
+	ret = __kexec_load_purgatory(image, min, max, top_down);
+	if (ret)
+		return ret;
+
+	ret = kexec_apply_relocations(image);
+	if (ret)
+		goto out;
+
+	*load_addr = pi->purgatory_load_addr;
+	return 0;
+out:
+	vfree(pi->sechdrs);
+	vfree(pi->purgatory_buf);
+	return ret;
+}
+
+static Elf_Sym *kexec_purgatory_find_symbol(struct purgatory_info *pi,
+					    const char *name)
+{
+	Elf_Sym *syms;
+	Elf_Shdr *sechdrs;
+	Elf_Ehdr *ehdr;
+	int i, k;
+	const char *strtab;
+
+	if (!pi->sechdrs || !pi->ehdr)
+		return NULL;
+
+	sechdrs = pi->sechdrs;
+	ehdr = pi->ehdr;
+
+	for (i = 0; i < ehdr->e_shnum; i++) {
+		if (sechdrs[i].sh_type != SHT_SYMTAB)
+			continue;
+
+		if (sechdrs[i].sh_link >= ehdr->e_shnum)
+			/* Invalid strtab section number */
+			continue;
+		strtab = (char *)sechdrs[sechdrs[i].sh_link].sh_offset;
+		syms = (Elf_Sym *)sechdrs[i].sh_offset;
+
+		/* Go through symbols for a match */
+		for (k = 0; k < sechdrs[i].sh_size/sizeof(Elf_Sym); k++) {
+			if (ELF_ST_BIND(syms[k].st_info) != STB_GLOBAL)
+				continue;
+
+			if (strcmp(strtab + syms[k].st_name, name) != 0)
+				continue;
+
+			if (syms[k].st_shndx == SHN_UNDEF ||
+			    syms[k].st_shndx >= ehdr->e_shnum) {
+				pr_debug("Symbol: %s has bad section index %d.\n",
+						name, syms[k].st_shndx);
+				return NULL;
+			}
+
+			/* Found the symbol we are looking for */
+			return &syms[k];
+		}
+	}
+
+	return NULL;
+}
+
+void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name)
+{
+	struct purgatory_info *pi = &image->purgatory_info;
+	Elf_Sym *sym;
+	Elf_Shdr *sechdr;
+
+	sym = kexec_purgatory_find_symbol(pi, name);
+	if (!sym)
+		return ERR_PTR(-EINVAL);
+
+	sechdr = &pi->sechdrs[sym->st_shndx];
+
+	/*
+	 * Returns the address where symbol will finally be loaded after
+	 * kexec_load_segment()
+	 */
+	return (void *)(sechdr->sh_addr + sym->st_value);
+}
+
+/*
+ * Get or set value of a symbol. If "get_value" is true, symbol value is
+ * returned in buf otherwise symbol value is set based on value in buf.
+ */
+int kexec_purgatory_get_set_symbol(struct kimage *image, const char *name,
+				   void *buf, unsigned int size, bool get_value)
+{
+	Elf_Sym *sym;
+	Elf_Shdr *sechdrs;
+	struct purgatory_info *pi = &image->purgatory_info;
+	char *sym_buf;
+
+	sym = kexec_purgatory_find_symbol(pi, name);
+	if (!sym)
+		return -EINVAL;
+
+	if (sym->st_size != size) {
+		pr_err("symbol %s size mismatch: expected %lu actual %u\n",
+		       name, (unsigned long)sym->st_size, size);
+		return -EINVAL;
+	}
+
+	sechdrs = pi->sechdrs;
+
+	if (sechdrs[sym->st_shndx].sh_type == SHT_NOBITS) {
+		pr_err("symbol %s is in a bss section. Cannot %s\n", name,
+		       get_value ? "get" : "set");
+		return -EINVAL;
+	}
+
+	sym_buf = (unsigned char *)sechdrs[sym->st_shndx].sh_offset +
+					sym->st_value;
+
+	if (get_value)
+		memcpy((void *)buf, sym_buf, size);
+	else
+		memcpy((void *)sym_buf, buf, size);
+
+	return 0;
+}
 
 /*
  * Move into place and start executing a preloaded standalone
-- 
cgit v1.2.3-59-g8ed1b


From 27f48d3e633be23656a097baa3be336e04a82d84 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Fri, 8 Aug 2014 14:26:06 -0700
Subject: kexec-bzImage64: support for loading bzImage using 64bit entry

This is loader specific code which can load bzImage and set it up for
64bit entry.  This does not take care of 32bit entry or real mode entry.

32bit mode entry can be implemented if somebody needs it.

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Matthew Garrett <mjg59@srcf.ucam.org>
Cc: Greg Kroah-Hartman <greg@kroah.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: WANG Chao <chaowang@redhat.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/kexec-bzimage64.h |   6 +
 arch/x86/include/asm/kexec.h           |  21 ++
 arch/x86/kernel/Makefile               |   1 +
 arch/x86/kernel/kexec-bzimage64.c      | 375 +++++++++++++++++++++++++++++++++
 arch/x86/kernel/machine_kexec_64.c     |   5 +-
 include/linux/kexec.h                  |   2 +-
 kernel/kexec.c                         |  11 +-
 7 files changed, 415 insertions(+), 6 deletions(-)
 create mode 100644 arch/x86/include/asm/kexec-bzimage64.h
 create mode 100644 arch/x86/kernel/kexec-bzimage64.c

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/kexec-bzimage64.h b/arch/x86/include/asm/kexec-bzimage64.h
new file mode 100644
index 000000000000..d1b5d194e31d
--- /dev/null
+++ b/arch/x86/include/asm/kexec-bzimage64.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_KEXEC_BZIMAGE64_H
+#define _ASM_KEXEC_BZIMAGE64_H
+
+extern struct kexec_file_ops kexec_bzImage64_ops;
+
+#endif  /* _ASM_KEXE_BZIMAGE64_H */
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 17483a492f18..0dfccced4edf 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -23,6 +23,7 @@
 
 #include <asm/page.h>
 #include <asm/ptrace.h>
+#include <asm/bootparam.h>
 
 /*
  * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return.
@@ -161,6 +162,26 @@ struct kimage_arch {
 	pmd_t *pmd;
 	pte_t *pte;
 };
+
+struct kexec_entry64_regs {
+	uint64_t rax;
+	uint64_t rbx;
+	uint64_t rcx;
+	uint64_t rdx;
+	uint64_t rsi;
+	uint64_t rdi;
+	uint64_t rsp;
+	uint64_t rbp;
+	uint64_t r8;
+	uint64_t r9;
+	uint64_t r10;
+	uint64_t r11;
+	uint64_t r12;
+	uint64_t r13;
+	uint64_t r14;
+	uint64_t r15;
+	uint64_t rip;
+};
 #endif
 
 typedef void crash_vmclear_fn(void);
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index bde3993624f1..b5ea75c4a4b4 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -118,4 +118,5 @@ ifeq ($(CONFIG_X86_64),y)
 
 	obj-$(CONFIG_PCI_MMCONFIG)	+= mmconf-fam10h_64.o
 	obj-y				+= vsmp_64.o
+	obj-$(CONFIG_KEXEC)		+= kexec-bzimage64.o
 endif
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
new file mode 100644
index 000000000000..bcedd100192f
--- /dev/null
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -0,0 +1,375 @@
+/*
+ * Kexec bzImage loader
+ *
+ * Copyright (C) 2014 Red Hat Inc.
+ * Authors:
+ *      Vivek Goyal <vgoyal@redhat.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2.  See the file COPYING for more details.
+ */
+
+#define pr_fmt(fmt)	"kexec-bzImage64: " fmt
+
+#include <linux/string.h>
+#include <linux/printk.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/kexec.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+
+#include <asm/bootparam.h>
+#include <asm/setup.h>
+
+/*
+ * Defines lowest physical address for various segments. Not sure where
+ * exactly these limits came from. Current bzimage64 loader in kexec-tools
+ * uses these so I am retaining it. It can be changed over time as we gain
+ * more insight.
+ */
+#define MIN_PURGATORY_ADDR	0x3000
+#define MIN_BOOTPARAM_ADDR	0x3000
+#define MIN_KERNEL_LOAD_ADDR	0x100000
+#define MIN_INITRD_LOAD_ADDR	0x1000000
+
+/*
+ * This is a place holder for all boot loader specific data structure which
+ * gets allocated in one call but gets freed much later during cleanup
+ * time. Right now there is only one field but it can grow as need be.
+ */
+struct bzimage64_data {
+	/*
+	 * Temporary buffer to hold bootparams buffer. This should be
+	 * freed once the bootparam segment has been loaded.
+	 */
+	void *bootparams_buf;
+};
+
+static int setup_initrd(struct boot_params *params,
+		unsigned long initrd_load_addr, unsigned long initrd_len)
+{
+	params->hdr.ramdisk_image = initrd_load_addr & 0xffffffffUL;
+	params->hdr.ramdisk_size = initrd_len & 0xffffffffUL;
+
+	params->ext_ramdisk_image = initrd_load_addr >> 32;
+	params->ext_ramdisk_size = initrd_len >> 32;
+
+	return 0;
+}
+
+static int setup_cmdline(struct boot_params *params,
+			 unsigned long bootparams_load_addr,
+			 unsigned long cmdline_offset, char *cmdline,
+			 unsigned long cmdline_len)
+{
+	char *cmdline_ptr = ((char *)params) + cmdline_offset;
+	unsigned long cmdline_ptr_phys;
+	uint32_t cmdline_low_32, cmdline_ext_32;
+
+	memcpy(cmdline_ptr, cmdline, cmdline_len);
+	cmdline_ptr[cmdline_len - 1] = '\0';
+
+	cmdline_ptr_phys = bootparams_load_addr + cmdline_offset;
+	cmdline_low_32 = cmdline_ptr_phys & 0xffffffffUL;
+	cmdline_ext_32 = cmdline_ptr_phys >> 32;
+
+	params->hdr.cmd_line_ptr = cmdline_low_32;
+	if (cmdline_ext_32)
+		params->ext_cmd_line_ptr = cmdline_ext_32;
+
+	return 0;
+}
+
+static int setup_memory_map_entries(struct boot_params *params)
+{
+	unsigned int nr_e820_entries;
+
+	nr_e820_entries = e820_saved.nr_map;
+
+	/* TODO: Pass entries more than E820MAX in bootparams setup data */
+	if (nr_e820_entries > E820MAX)
+		nr_e820_entries = E820MAX;
+
+	params->e820_entries = nr_e820_entries;
+	memcpy(&params->e820_map, &e820_saved.map,
+	       nr_e820_entries * sizeof(struct e820entry));
+
+	return 0;
+}
+
+static int setup_boot_parameters(struct boot_params *params)
+{
+	unsigned int nr_e820_entries;
+	unsigned long long mem_k, start, end;
+	int i;
+
+	/* Get subarch from existing bootparams */
+	params->hdr.hardware_subarch = boot_params.hdr.hardware_subarch;
+
+	/* Copying screen_info will do? */
+	memcpy(&params->screen_info, &boot_params.screen_info,
+				sizeof(struct screen_info));
+
+	/* Fill in memsize later */
+	params->screen_info.ext_mem_k = 0;
+	params->alt_mem_k = 0;
+
+	/* Default APM info */
+	memset(&params->apm_bios_info, 0, sizeof(params->apm_bios_info));
+
+	/* Default drive info */
+	memset(&params->hd0_info, 0, sizeof(params->hd0_info));
+	memset(&params->hd1_info, 0, sizeof(params->hd1_info));
+
+	/* Default sysdesc table */
+	params->sys_desc_table.length = 0;
+
+	setup_memory_map_entries(params);
+	nr_e820_entries = params->e820_entries;
+
+	for (i = 0; i < nr_e820_entries; i++) {
+		if (params->e820_map[i].type != E820_RAM)
+			continue;
+		start = params->e820_map[i].addr;
+		end = params->e820_map[i].addr + params->e820_map[i].size - 1;
+
+		if ((start <= 0x100000) && end > 0x100000) {
+			mem_k = (end >> 10) - (0x100000 >> 10);
+			params->screen_info.ext_mem_k = mem_k;
+			params->alt_mem_k = mem_k;
+			if (mem_k > 0xfc00)
+				params->screen_info.ext_mem_k = 0xfc00; /* 64M*/
+			if (mem_k > 0xffffffff)
+				params->alt_mem_k = 0xffffffff;
+		}
+	}
+
+	/* Setup EDD info */
+	memcpy(params->eddbuf, boot_params.eddbuf,
+				EDDMAXNR * sizeof(struct edd_info));
+	params->eddbuf_entries = boot_params.eddbuf_entries;
+
+	memcpy(params->edd_mbr_sig_buffer, boot_params.edd_mbr_sig_buffer,
+	       EDD_MBR_SIG_MAX * sizeof(unsigned int));
+
+	return 0;
+}
+
+int bzImage64_probe(const char *buf, unsigned long len)
+{
+	int ret = -ENOEXEC;
+	struct setup_header *header;
+
+	/* kernel should be atleast two sectors long */
+	if (len < 2 * 512) {
+		pr_err("File is too short to be a bzImage\n");
+		return ret;
+	}
+
+	header = (struct setup_header *)(buf + offsetof(struct boot_params, hdr));
+	if (memcmp((char *)&header->header, "HdrS", 4) != 0) {
+		pr_err("Not a bzImage\n");
+		return ret;
+	}
+
+	if (header->boot_flag != 0xAA55) {
+		pr_err("No x86 boot sector present\n");
+		return ret;
+	}
+
+	if (header->version < 0x020C) {
+		pr_err("Must be at least protocol version 2.12\n");
+		return ret;
+	}
+
+	if (!(header->loadflags & LOADED_HIGH)) {
+		pr_err("zImage not a bzImage\n");
+		return ret;
+	}
+
+	if (!(header->xloadflags & XLF_KERNEL_64)) {
+		pr_err("Not a bzImage64. XLF_KERNEL_64 is not set.\n");
+		return ret;
+	}
+
+	if (!(header->xloadflags & XLF_CAN_BE_LOADED_ABOVE_4G)) {
+		pr_err("XLF_CAN_BE_LOADED_ABOVE_4G is not set.\n");
+		return ret;
+	}
+
+	/* I've got a bzImage */
+	pr_debug("It's a relocatable bzImage64\n");
+	ret = 0;
+
+	return ret;
+}
+
+void *bzImage64_load(struct kimage *image, char *kernel,
+		     unsigned long kernel_len, char *initrd,
+		     unsigned long initrd_len, char *cmdline,
+		     unsigned long cmdline_len)
+{
+
+	struct setup_header *header;
+	int setup_sects, kern16_size, ret = 0;
+	unsigned long setup_header_size, params_cmdline_sz;
+	struct boot_params *params;
+	unsigned long bootparam_load_addr, kernel_load_addr, initrd_load_addr;
+	unsigned long purgatory_load_addr;
+	unsigned long kernel_bufsz, kernel_memsz, kernel_align;
+	char *kernel_buf;
+	struct bzimage64_data *ldata;
+	struct kexec_entry64_regs regs64;
+	void *stack;
+	unsigned int setup_hdr_offset = offsetof(struct boot_params, hdr);
+
+	header = (struct setup_header *)(kernel + setup_hdr_offset);
+	setup_sects = header->setup_sects;
+	if (setup_sects == 0)
+		setup_sects = 4;
+
+	kern16_size = (setup_sects + 1) * 512;
+	if (kernel_len < kern16_size) {
+		pr_err("bzImage truncated\n");
+		return ERR_PTR(-ENOEXEC);
+	}
+
+	if (cmdline_len > header->cmdline_size) {
+		pr_err("Kernel command line too long\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	/*
+	 * Load purgatory. For 64bit entry point, purgatory  code can be
+	 * anywhere.
+	 */
+	ret = kexec_load_purgatory(image, MIN_PURGATORY_ADDR, ULONG_MAX, 1,
+				   &purgatory_load_addr);
+	if (ret) {
+		pr_err("Loading purgatory failed\n");
+		return ERR_PTR(ret);
+	}
+
+	pr_debug("Loaded purgatory at 0x%lx\n", purgatory_load_addr);
+
+	/* Load Bootparams and cmdline */
+	params_cmdline_sz = sizeof(struct boot_params) + cmdline_len;
+	params = kzalloc(params_cmdline_sz, GFP_KERNEL);
+	if (!params)
+		return ERR_PTR(-ENOMEM);
+
+	/* Copy setup header onto bootparams. Documentation/x86/boot.txt */
+	setup_header_size = 0x0202 + kernel[0x0201] - setup_hdr_offset;
+
+	/* Is there a limit on setup header size? */
+	memcpy(&params->hdr, (kernel + setup_hdr_offset), setup_header_size);
+
+	ret = kexec_add_buffer(image, (char *)params, params_cmdline_sz,
+			       params_cmdline_sz, 16, MIN_BOOTPARAM_ADDR,
+			       ULONG_MAX, 1, &bootparam_load_addr);
+	if (ret)
+		goto out_free_params;
+	pr_debug("Loaded boot_param and command line at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+		 bootparam_load_addr, params_cmdline_sz, params_cmdline_sz);
+
+	/* Load kernel */
+	kernel_buf = kernel + kern16_size;
+	kernel_bufsz =  kernel_len - kern16_size;
+	kernel_memsz = PAGE_ALIGN(header->init_size);
+	kernel_align = header->kernel_alignment;
+
+	ret = kexec_add_buffer(image, kernel_buf,
+			       kernel_bufsz, kernel_memsz, kernel_align,
+			       MIN_KERNEL_LOAD_ADDR, ULONG_MAX, 1,
+			       &kernel_load_addr);
+	if (ret)
+		goto out_free_params;
+
+	pr_debug("Loaded 64bit kernel at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+		 kernel_load_addr, kernel_memsz, kernel_memsz);
+
+	/* Load initrd high */
+	if (initrd) {
+		ret = kexec_add_buffer(image, initrd, initrd_len, initrd_len,
+				       PAGE_SIZE, MIN_INITRD_LOAD_ADDR,
+				       ULONG_MAX, 1, &initrd_load_addr);
+		if (ret)
+			goto out_free_params;
+
+		pr_debug("Loaded initrd at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+				initrd_load_addr, initrd_len, initrd_len);
+
+		setup_initrd(params, initrd_load_addr, initrd_len);
+	}
+
+	setup_cmdline(params, bootparam_load_addr, sizeof(struct boot_params),
+		      cmdline, cmdline_len);
+
+	/* bootloader info. Do we need a separate ID for kexec kernel loader? */
+	params->hdr.type_of_loader = 0x0D << 4;
+	params->hdr.loadflags = 0;
+
+	/* Setup purgatory regs for entry */
+	ret = kexec_purgatory_get_set_symbol(image, "entry64_regs", &regs64,
+					     sizeof(regs64), 1);
+	if (ret)
+		goto out_free_params;
+
+	regs64.rbx = 0; /* Bootstrap Processor */
+	regs64.rsi = bootparam_load_addr;
+	regs64.rip = kernel_load_addr + 0x200;
+	stack = kexec_purgatory_get_symbol_addr(image, "stack_end");
+	if (IS_ERR(stack)) {
+		pr_err("Could not find address of symbol stack_end\n");
+		ret = -EINVAL;
+		goto out_free_params;
+	}
+
+	regs64.rsp = (unsigned long)stack;
+	ret = kexec_purgatory_get_set_symbol(image, "entry64_regs", &regs64,
+					     sizeof(regs64), 0);
+	if (ret)
+		goto out_free_params;
+
+	setup_boot_parameters(params);
+
+	/* Allocate loader specific data */
+	ldata = kzalloc(sizeof(struct bzimage64_data), GFP_KERNEL);
+	if (!ldata) {
+		ret = -ENOMEM;
+		goto out_free_params;
+	}
+
+	/*
+	 * Store pointer to params so that it could be freed after loading
+	 * params segment has been loaded and contents have been copied
+	 * somewhere else.
+	 */
+	ldata->bootparams_buf = params;
+	return ldata;
+
+out_free_params:
+	kfree(params);
+	return ERR_PTR(ret);
+}
+
+/* This cleanup function is called after various segments have been loaded */
+int bzImage64_cleanup(void *loader_data)
+{
+	struct bzimage64_data *ldata = loader_data;
+
+	if (!ldata)
+		return 0;
+
+	kfree(ldata->bootparams_buf);
+	ldata->bootparams_buf = NULL;
+
+	return 0;
+}
+
+struct kexec_file_ops kexec_bzImage64_ops = {
+	.probe = bzImage64_probe,
+	.load = bzImage64_load,
+	.cleanup = bzImage64_cleanup,
+};
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 88404c440727..18d0f9e0b6da 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -23,9 +23,10 @@
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
 #include <asm/debugreg.h>
+#include <asm/kexec-bzimage64.h>
 
 static struct kexec_file_ops *kexec_file_loaders[] = {
-		NULL,
+		&kexec_bzImage64_ops,
 };
 
 static void free_transition_pgtable(struct kimage *image)
@@ -328,7 +329,7 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image)
 	if (!image->fops || !image->fops->cleanup)
 		return 0;
 
-	return image->fops->cleanup(image);
+	return image->fops->cleanup(image->image_loader_data);
 }
 
 /*
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 84f09e9eca26..9481703b0e7a 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -190,7 +190,7 @@ typedef void *(kexec_load_t)(struct kimage *image, char *kernel_buf,
 			     unsigned long kernel_len, char *initrd,
 			     unsigned long initrd_len, char *cmdline,
 			     unsigned long cmdline_len);
-typedef int (kexec_cleanup_t)(struct kimage *image);
+typedef int (kexec_cleanup_t)(void *loader_data);
 
 struct kexec_file_ops {
 	kexec_probe_t *probe;
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 669e331aa9ec..0926f2a3ed03 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -460,6 +460,14 @@ static void kimage_file_post_load_cleanup(struct kimage *image)
 
 	/* See if architecture has anything to cleanup post load */
 	arch_kimage_file_post_load_cleanup(image);
+
+	/*
+	 * Above call should have called into bootloader to free up
+	 * any data stored in kimage->image_loader_data. It should
+	 * be ok now to free it up.
+	 */
+	kfree(image->image_loader_data);
+	image->image_loader_data = NULL;
 }
 
 /*
@@ -576,7 +584,6 @@ out_free_control_pages:
 	kimage_free_page_list(&image->control_pages);
 out_free_post_load_bufs:
 	kimage_file_post_load_cleanup(image);
-	kfree(image->image_loader_data);
 out_free_image:
 	kfree(image);
 	return ret;
@@ -900,8 +907,6 @@ static void kimage_free(struct kimage *image)
 	/* Free the kexec control pages... */
 	kimage_free_page_list(&image->control_pages);
 
-	kfree(image->image_loader_data);
-
 	/*
 	 * Free up any temporary buffers allocated. This might hit if
 	 * error occurred much later after buffer allocation.
-- 
cgit v1.2.3-59-g8ed1b


From 6a2c20e7d8900ed273dc34a9af9bf02fc478e427 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Fri, 8 Aug 2014 14:26:11 -0700
Subject: kexec: support kexec/kdump on EFI systems

This patch does two things.  It passes EFI run time mappings to second
kernel in bootparams efi_info.  Second kernel parse this info and create
new mappings in second kernel.  That means mappings in first and second
kernel will be same.  This paves the way to enable EFI in kexec kernel.

This patch also prepares and passes EFI setup data through bootparams.
This contains bunch of information about various tables and their
addresses.

These information gathering and passing has been written along the lines
of what current kexec-tools is doing to make kexec work with UEFI.

[akpm@linux-foundation.org: s/get_efi/efi_get/g, per Matt]
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Matthew Garrett <mjg59@srcf.ucam.org>
Cc: Greg Kroah-Hartman <greg@kroah.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: WANG Chao <chaowang@redhat.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Matt Fleming <matt@console-pimps.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/kexec-bzimage64.c  | 146 ++++++++++++++++++++++++++++++++++---
 drivers/firmware/efi/runtime-map.c |  21 ++++++
 include/linux/efi.h                |  19 +++++
 3 files changed, 174 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
index a8e646458a10..623e6c58081f 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -18,10 +18,12 @@
 #include <linux/kexec.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
+#include <linux/efi.h>
 
 #include <asm/bootparam.h>
 #include <asm/setup.h>
 #include <asm/crash.h>
+#include <asm/efi.h>
 
 #define MAX_ELFCOREHDR_STR_LEN	30	/* elfcorehdr=0x<64bit-value> */
 
@@ -90,7 +92,7 @@ static int setup_cmdline(struct kimage *image, struct boot_params *params,
 	return 0;
 }
 
-static int setup_memory_map_entries(struct boot_params *params)
+static int setup_e820_entries(struct boot_params *params)
 {
 	unsigned int nr_e820_entries;
 
@@ -107,8 +109,93 @@ static int setup_memory_map_entries(struct boot_params *params)
 	return 0;
 }
 
-static int setup_boot_parameters(struct kimage *image,
-				 struct boot_params *params)
+#ifdef CONFIG_EFI
+static int setup_efi_info_memmap(struct boot_params *params,
+				  unsigned long params_load_addr,
+				  unsigned int efi_map_offset,
+				  unsigned int efi_map_sz)
+{
+	void *efi_map = (void *)params + efi_map_offset;
+	unsigned long efi_map_phys_addr = params_load_addr + efi_map_offset;
+	struct efi_info *ei = &params->efi_info;
+
+	if (!efi_map_sz)
+		return 0;
+
+	efi_runtime_map_copy(efi_map, efi_map_sz);
+
+	ei->efi_memmap = efi_map_phys_addr & 0xffffffff;
+	ei->efi_memmap_hi = efi_map_phys_addr >> 32;
+	ei->efi_memmap_size = efi_map_sz;
+
+	return 0;
+}
+
+static int
+prepare_add_efi_setup_data(struct boot_params *params,
+		       unsigned long params_load_addr,
+		       unsigned int efi_setup_data_offset)
+{
+	unsigned long setup_data_phys;
+	struct setup_data *sd = (void *)params + efi_setup_data_offset;
+	struct efi_setup_data *esd = (void *)sd + sizeof(struct setup_data);
+
+	esd->fw_vendor = efi.fw_vendor;
+	esd->runtime = efi.runtime;
+	esd->tables = efi.config_table;
+	esd->smbios = efi.smbios;
+
+	sd->type = SETUP_EFI;
+	sd->len = sizeof(struct efi_setup_data);
+
+	/* Add setup data */
+	setup_data_phys = params_load_addr + efi_setup_data_offset;
+	sd->next = params->hdr.setup_data;
+	params->hdr.setup_data = setup_data_phys;
+
+	return 0;
+}
+
+static int
+setup_efi_state(struct boot_params *params, unsigned long params_load_addr,
+		unsigned int efi_map_offset, unsigned int efi_map_sz,
+		unsigned int efi_setup_data_offset)
+{
+	struct efi_info *current_ei = &boot_params.efi_info;
+	struct efi_info *ei = &params->efi_info;
+
+	if (!current_ei->efi_memmap_size)
+		return 0;
+
+	/*
+	 * If 1:1 mapping is not enabled, second kernel can not setup EFI
+	 * and use EFI run time services. User space will have to pass
+	 * acpi_rsdp=<addr> on kernel command line to make second kernel boot
+	 * without efi.
+	 */
+	if (efi_enabled(EFI_OLD_MEMMAP))
+		return 0;
+
+	ei->efi_loader_signature = current_ei->efi_loader_signature;
+	ei->efi_systab = current_ei->efi_systab;
+	ei->efi_systab_hi = current_ei->efi_systab_hi;
+
+	ei->efi_memdesc_version = current_ei->efi_memdesc_version;
+	ei->efi_memdesc_size = efi_get_runtime_map_desc_size();
+
+	setup_efi_info_memmap(params, params_load_addr, efi_map_offset,
+			      efi_map_sz);
+	prepare_add_efi_setup_data(params, params_load_addr,
+				   efi_setup_data_offset);
+	return 0;
+}
+#endif /* CONFIG_EFI */
+
+static int
+setup_boot_parameters(struct kimage *image, struct boot_params *params,
+		      unsigned long params_load_addr,
+		      unsigned int efi_map_offset, unsigned int efi_map_sz,
+		      unsigned int efi_setup_data_offset)
 {
 	unsigned int nr_e820_entries;
 	unsigned long long mem_k, start, end;
@@ -140,7 +227,7 @@ static int setup_boot_parameters(struct kimage *image,
 		if (ret)
 			return ret;
 	} else
-		setup_memory_map_entries(params);
+		setup_e820_entries(params);
 
 	nr_e820_entries = params->e820_entries;
 
@@ -161,6 +248,12 @@ static int setup_boot_parameters(struct kimage *image,
 		}
 	}
 
+#ifdef CONFIG_EFI
+	/* Setup EFI state */
+	setup_efi_state(params, params_load_addr, efi_map_offset, efi_map_sz,
+			efi_setup_data_offset);
+#endif
+
 	/* Setup EDD info */
 	memcpy(params->eddbuf, boot_params.eddbuf,
 				EDDMAXNR * sizeof(struct edd_info));
@@ -214,6 +307,15 @@ int bzImage64_probe(const char *buf, unsigned long len)
 		return ret;
 	}
 
+	/*
+	 * Can't handle 32bit EFI as it does not allow loading kernel
+	 * above 4G. This should be handled by 32bit bzImage loader
+	 */
+	if (efi_enabled(EFI_RUNTIME_SERVICES) && !efi_enabled(EFI_64BIT)) {
+		pr_debug("EFI is 32 bit. Can't load kernel above 4G.\n");
+		return ret;
+	}
+
 	/* I've got a bzImage */
 	pr_debug("It's a relocatable bzImage64\n");
 	ret = 0;
@@ -229,7 +331,7 @@ void *bzImage64_load(struct kimage *image, char *kernel,
 
 	struct setup_header *header;
 	int setup_sects, kern16_size, ret = 0;
-	unsigned long setup_header_size, params_cmdline_sz;
+	unsigned long setup_header_size, params_cmdline_sz, params_misc_sz;
 	struct boot_params *params;
 	unsigned long bootparam_load_addr, kernel_load_addr, initrd_load_addr;
 	unsigned long purgatory_load_addr;
@@ -239,6 +341,7 @@ void *bzImage64_load(struct kimage *image, char *kernel,
 	struct kexec_entry64_regs regs64;
 	void *stack;
 	unsigned int setup_hdr_offset = offsetof(struct boot_params, hdr);
+	unsigned int efi_map_offset, efi_map_sz, efi_setup_data_offset;
 
 	header = (struct setup_header *)(kernel + setup_hdr_offset);
 	setup_sects = header->setup_sects;
@@ -285,12 +388,29 @@ void *bzImage64_load(struct kimage *image, char *kernel,
 
 	pr_debug("Loaded purgatory at 0x%lx\n", purgatory_load_addr);
 
-	/* Load Bootparams and cmdline */
+
+	/*
+	 * Load Bootparams and cmdline and space for efi stuff.
+	 *
+	 * Allocate memory together for multiple data structures so
+	 * that they all can go in single area/segment and we don't
+	 * have to create separate segment for each. Keeps things
+	 * little bit simple
+	 */
+	efi_map_sz = efi_get_runtime_map_size();
+	efi_map_sz = ALIGN(efi_map_sz, 16);
 	params_cmdline_sz = sizeof(struct boot_params) + cmdline_len +
 				MAX_ELFCOREHDR_STR_LEN;
-	params = kzalloc(params_cmdline_sz, GFP_KERNEL);
+	params_cmdline_sz = ALIGN(params_cmdline_sz, 16);
+	params_misc_sz = params_cmdline_sz + efi_map_sz +
+				sizeof(struct setup_data) +
+				sizeof(struct efi_setup_data);
+
+	params = kzalloc(params_misc_sz, GFP_KERNEL);
 	if (!params)
 		return ERR_PTR(-ENOMEM);
+	efi_map_offset = params_cmdline_sz;
+	efi_setup_data_offset = efi_map_offset + efi_map_sz;
 
 	/* Copy setup header onto bootparams. Documentation/x86/boot.txt */
 	setup_header_size = 0x0202 + kernel[0x0201] - setup_hdr_offset;
@@ -298,13 +418,13 @@ void *bzImage64_load(struct kimage *image, char *kernel,
 	/* Is there a limit on setup header size? */
 	memcpy(&params->hdr, (kernel + setup_hdr_offset), setup_header_size);
 
-	ret = kexec_add_buffer(image, (char *)params, params_cmdline_sz,
-			       params_cmdline_sz, 16, MIN_BOOTPARAM_ADDR,
+	ret = kexec_add_buffer(image, (char *)params, params_misc_sz,
+			       params_misc_sz, 16, MIN_BOOTPARAM_ADDR,
 			       ULONG_MAX, 1, &bootparam_load_addr);
 	if (ret)
 		goto out_free_params;
-	pr_debug("Loaded boot_param and command line at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
-		 bootparam_load_addr, params_cmdline_sz, params_cmdline_sz);
+	pr_debug("Loaded boot_param, command line and misc at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+		 bootparam_load_addr, params_misc_sz, params_misc_sz);
 
 	/* Load kernel */
 	kernel_buf = kernel + kern16_size;
@@ -365,7 +485,9 @@ void *bzImage64_load(struct kimage *image, char *kernel,
 	if (ret)
 		goto out_free_params;
 
-	ret = setup_boot_parameters(image, params);
+	ret = setup_boot_parameters(image, params, bootparam_load_addr,
+				    efi_map_offset, efi_map_sz,
+				    efi_setup_data_offset);
 	if (ret)
 		goto out_free_params;
 
diff --git a/drivers/firmware/efi/runtime-map.c b/drivers/firmware/efi/runtime-map.c
index 97cdd16a2169..018c29a26615 100644
--- a/drivers/firmware/efi/runtime-map.c
+++ b/drivers/firmware/efi/runtime-map.c
@@ -138,6 +138,27 @@ add_sysfs_runtime_map_entry(struct kobject *kobj, int nr)
 	return entry;
 }
 
+int efi_get_runtime_map_size(void)
+{
+	return nr_efi_runtime_map * efi_memdesc_size;
+}
+
+int efi_get_runtime_map_desc_size(void)
+{
+	return efi_memdesc_size;
+}
+
+int efi_runtime_map_copy(void *buf, size_t bufsz)
+{
+	size_t sz = efi_get_runtime_map_size();
+
+	if (sz > bufsz)
+		sz = bufsz;
+
+	memcpy(buf, efi_runtime_map, sz);
+	return 0;
+}
+
 void efi_runtime_map_setup(void *map, int nr_entries, u32 desc_size)
 {
 	efi_runtime_map = map;
diff --git a/include/linux/efi.h b/include/linux/efi.h
index efc681fd5895..45cb4ffdea62 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1156,6 +1156,9 @@ int efivars_sysfs_init(void);
 #ifdef CONFIG_EFI_RUNTIME_MAP
 int efi_runtime_map_init(struct kobject *);
 void efi_runtime_map_setup(void *, int, u32);
+int efi_get_runtime_map_size(void);
+int efi_get_runtime_map_desc_size(void);
+int efi_runtime_map_copy(void *buf, size_t bufsz);
 #else
 static inline int efi_runtime_map_init(struct kobject *kobj)
 {
@@ -1164,6 +1167,22 @@ static inline int efi_runtime_map_init(struct kobject *kobj)
 
 static inline void
 efi_runtime_map_setup(void *map, int nr_entries, u32 desc_size) {}
+
+static inline int efi_get_runtime_map_size(void)
+{
+	return 0;
+}
+
+static inline int efi_get_runtime_map_desc_size(void)
+{
+	return 0;
+}
+
+static inline int efi_runtime_map_copy(void *buf, size_t bufsz)
+{
+	return 0;
+}
+
 #endif
 
 /* prototypes shared between arch specific and generic stub code */
-- 
cgit v1.2.3-59-g8ed1b


From 8e7d838103feac320baf9e68d73f954840ac1eea Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Fri, 8 Aug 2014 14:26:13 -0700
Subject: kexec: verify the signature of signed PE bzImage

This is the final piece of the puzzle of verifying kernel image signature
during kexec_file_load() syscall.

This patch calls into PE file routines to verify signature of bzImage.  If
signature are valid, kexec_file_load() succeeds otherwise it fails.

Two new config options have been introduced.  First one is
CONFIG_KEXEC_VERIFY_SIG.  This option enforces that kernel has to be
validly signed otherwise kernel load will fail.  If this option is not
set, no signature verification will be done.  Only exception will be when
secureboot is enabled.  In that case signature verification should be
automatically enforced when secureboot is enabled.  But that will happen
when secureboot patches are merged.

Second config option is CONFIG_KEXEC_BZIMAGE_VERIFY_SIG.  This option
enables signature verification support on bzImage.  If this option is not
set and previous one is set, kernel image loading will fail because kernel
does not have support to verify signature of bzImage.

I tested these patches with both "pesign" and "sbsign" signed bzImages.

I used signing_key.priv key and signing_key.x509 cert for signing as
generated during kernel build process (if module signing is enabled).

Used following method to sign bzImage.

pesign
======
- Convert DER format cert to PEM format cert
openssl x509 -in signing_key.x509 -inform DER -out signing_key.x509.PEM -outform
PEM

- Generate a .p12 file from existing cert and private key file
openssl pkcs12 -export -out kernel-key.p12 -inkey signing_key.priv -in
signing_key.x509.PEM

- Import .p12 file into pesign db
pk12util -i /tmp/kernel-key.p12 -d /etc/pki/pesign

- Sign bzImage
pesign -i /boot/vmlinuz-3.16.0-rc3+ -o /boot/vmlinuz-3.16.0-rc3+.signed.pesign
-c "Glacier signing key - Magrathea" -s

sbsign
======
sbsign --key signing_key.priv --cert signing_key.x509.PEM --output
/boot/vmlinuz-3.16.0-rc3+.signed.sbsign /boot/vmlinuz-3.16.0-rc3+

Patch details:

Well all the hard work is done in previous patches.  Now bzImage loader
has just call into that code and verify whether bzImage signature are
valid or not.

Also create two config options.  First one is CONFIG_KEXEC_VERIFY_SIG.
This option enforces that kernel has to be validly signed otherwise kernel
load will fail.  If this option is not set, no signature verification will
be done.  Only exception will be when secureboot is enabled.  In that case
signature verification should be automatically enforced when secureboot is
enabled.  But that will happen when secureboot patches are merged.

Second config option is CONFIG_KEXEC_BZIMAGE_VERIFY_SIG.  This option
enables signature verification support on bzImage.  If this option is not
set and previous one is set, kernel image loading will fail because kernel
does not have support to verify signature of bzImage.

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Matthew Garrett <mjg59@srcf.ucam.org>
Cc: Greg Kroah-Hartman <greg@kroah.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: WANG Chao <chaowang@redhat.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Matt Fleming <matt@console-pimps.org>
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/Kconfig                   | 22 ++++++++++++++++++++++
 arch/x86/kernel/kexec-bzimage64.c  | 21 +++++++++++++++++++++
 arch/x86/kernel/machine_kexec_64.c | 11 +++++++++++
 include/linux/kexec.h              |  3 +++
 kernel/kexec.c                     | 15 +++++++++++++++
 5 files changed, 72 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 9558b9fcafbf..4aafd322e21e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1599,6 +1599,28 @@ config KEXEC
 	  interface is strongly in flux, so no good recommendation can be
 	  made.
 
+config KEXEC_VERIFY_SIG
+	bool "Verify kernel signature during kexec_file_load() syscall"
+	depends on KEXEC
+	---help---
+	  This option makes kernel signature verification mandatory for
+	  kexec_file_load() syscall. If kernel is signature can not be
+	  verified, kexec_file_load() will fail.
+
+	  This option enforces signature verification at generic level.
+	  One needs to enable signature verification for type of kernel
+	  image being loaded to make sure it works. For example, enable
+	  bzImage signature verification option to be able to load and
+	  verify signatures of bzImage. Otherwise kernel loading will fail.
+
+config KEXEC_BZIMAGE_VERIFY_SIG
+	bool "Enable bzImage signature verification support"
+	depends on KEXEC_VERIFY_SIG
+	depends on SIGNED_PE_FILE_VERIFICATION
+	select SYSTEM_TRUSTED_KEYRING
+	---help---
+	  Enable bzImage signature verification support.
+
 config CRASH_DUMP
 	bool "kernel crash dumps"
 	depends on X86_64 || (X86_32 && HIGHMEM)
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
index 623e6c58081f..9642b9b33655 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -19,6 +19,8 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/efi.h>
+#include <linux/verify_pefile.h>
+#include <keys/system_keyring.h>
 
 #include <asm/bootparam.h>
 #include <asm/setup.h>
@@ -525,8 +527,27 @@ int bzImage64_cleanup(void *loader_data)
 	return 0;
 }
 
+#ifdef CONFIG_KEXEC_BZIMAGE_VERIFY_SIG
+int bzImage64_verify_sig(const char *kernel, unsigned long kernel_len)
+{
+	bool trusted;
+	int ret;
+
+	ret = verify_pefile_signature(kernel, kernel_len,
+				      system_trusted_keyring, &trusted);
+	if (ret < 0)
+		return ret;
+	if (!trusted)
+		return -EKEYREJECTED;
+	return 0;
+}
+#endif
+
 struct kexec_file_ops kexec_bzImage64_ops = {
 	.probe = bzImage64_probe,
 	.load = bzImage64_load,
 	.cleanup = bzImage64_cleanup,
+#ifdef CONFIG_KEXEC_BZIMAGE_VERIFY_SIG
+	.verify_sig = bzImage64_verify_sig,
+#endif
 };
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 9330434da777..8b04018e5d1f 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -372,6 +372,17 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image)
 	return image->fops->cleanup(image->image_loader_data);
 }
 
+int arch_kexec_kernel_verify_sig(struct kimage *image, void *kernel,
+				 unsigned long kernel_len)
+{
+	if (!image->fops || !image->fops->verify_sig) {
+		pr_debug("kernel loader does not support signature verification.");
+		return -EKEYREJECTED;
+	}
+
+	return image->fops->verify_sig(kernel, kernel_len);
+}
+
 /*
  * Apply purgatory relocations.
  *
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 9481703b0e7a..4b2a0e11cc5b 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -191,11 +191,14 @@ typedef void *(kexec_load_t)(struct kimage *image, char *kernel_buf,
 			     unsigned long initrd_len, char *cmdline,
 			     unsigned long cmdline_len);
 typedef int (kexec_cleanup_t)(void *loader_data);
+typedef int (kexec_verify_sig_t)(const char *kernel_buf,
+				 unsigned long kernel_len);
 
 struct kexec_file_ops {
 	kexec_probe_t *probe;
 	kexec_load_t *load;
 	kexec_cleanup_t *cleanup;
+	kexec_verify_sig_t *verify_sig;
 };
 
 /* kexec interface functions */
diff --git a/kernel/kexec.c b/kernel/kexec.c
index f18c780f9716..0b49a0a58102 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -416,6 +416,12 @@ void __weak arch_kimage_file_post_load_cleanup(struct kimage *image)
 {
 }
 
+int __weak arch_kexec_kernel_verify_sig(struct kimage *image, void *buf,
+					unsigned long buf_len)
+{
+	return -EKEYREJECTED;
+}
+
 /* Apply relocations of type RELA */
 int __weak
 arch_kexec_apply_relocations_add(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
@@ -494,6 +500,15 @@ kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd,
 	if (ret)
 		goto out;
 
+#ifdef CONFIG_KEXEC_VERIFY_SIG
+	ret = arch_kexec_kernel_verify_sig(image, image->kernel_buf,
+					   image->kernel_buf_len);
+	if (ret) {
+		pr_debug("kernel signature verification failed.\n");
+		goto out;
+	}
+	pr_debug("kernel signature verification successful.\n");
+#endif
 	/* It is possible that there no initramfs is being loaded */
 	if (!(flags & KEXEC_FILE_NO_INITRAMFS)) {
 		ret = copy_file_from_fd(initrd_fd, &image->initrd_buf,
-- 
cgit v1.2.3-59-g8ed1b


From 26375b5c8449927f740ce0e837e23f45c951fb80 Mon Sep 17 00:00:00 2001
From: Jaehoon Chung <jh80.chung@samsung.com>
Date: Thu, 7 Aug 2014 16:37:58 +0900
Subject: mmc: dw_mmc: Slot quirk "disable-wp" is deprecated.

Slot quirks "disable-wp" is deprecated.
Instead, use the host quirk "disable-wp".
(Because the slot-node is removed in dt-file.)

Signed-off-by: Jaehoon Chung <jh80.chung@samsung.com>
Tested-by: Sachin Kamat <sachin.kamat@samsung.com>
Acked-by: Seungwon Jeon <tgih.jun@samsung.com>
Reviewed-by: Doug Anderson <dianders@chromium.org>
Tested-by: Doug Anderson <dianders@chromium.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/dw_mmc.c  | 11 +++++++++--
 include/linux/mmc/dw_mmc.h |  2 ++
 2 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 39cf54f479d9..8f216edbdf08 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -996,7 +996,8 @@ static int dw_mci_get_ro(struct mmc_host *mmc)
 	int gpio_ro = mmc_gpio_get_ro(mmc);
 
 	/* Use platform get_ro function, else try on board write protect */
-	if (slot->quirks & DW_MCI_SLOT_QUIRK_NO_WRITE_PROTECT)
+	if ((slot->quirks & DW_MCI_SLOT_QUIRK_NO_WRITE_PROTECT) ||
+			(slot->host->quirks & DW_MCI_QUIRK_NO_WRITE_PROTECT))
 		read_only = 0;
 	else if (!IS_ERR_VALUE(gpio_ro))
 		read_only = gpio_ro;
@@ -2014,8 +2015,11 @@ static int dw_mci_of_get_slot_quirks(struct device *dev, u8 slot)
 
 	/* get quirks */
 	for (idx = 0; idx < ARRAY_SIZE(of_slot_quirks); idx++)
-		if (of_get_property(np, of_slot_quirks[idx].quirk, NULL))
+		if (of_get_property(np, of_slot_quirks[idx].quirk, NULL)) {
+			dev_warn(dev, "Slot quirk %s is deprecated\n",
+					of_slot_quirks[idx].quirk);
 			quirks |= of_slot_quirks[idx].id;
+		}
 
 	return quirks;
 }
@@ -2279,6 +2283,9 @@ static struct dw_mci_of_quirks {
 	{
 		.quirk	= "broken-cd",
 		.id	= DW_MCI_QUIRK_BROKEN_CARD_DETECTION,
+	}, {
+		.quirk	= "disable-wp",
+		.id	= DW_MCI_QUIRK_NO_WRITE_PROTECT,
 	},
 };
 
diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
index babaea93bca6..29ce014ab421 100644
--- a/include/linux/mmc/dw_mmc.h
+++ b/include/linux/mmc/dw_mmc.h
@@ -213,6 +213,8 @@ struct dw_mci_dma_ops {
 #define DW_MCI_QUIRK_HIGHSPEED			BIT(2)
 /* Unreliable card detection */
 #define DW_MCI_QUIRK_BROKEN_CARD_DETECTION	BIT(3)
+/* No write protect */
+#define DW_MCI_QUIRK_NO_WRITE_PROTECT		BIT(4)
 
 /* Slot level quirks */
 /* This slot has no write protect */
-- 
cgit v1.2.3-59-g8ed1b


From 0d5501c1c828fb97d02af50aa9d2b1a5498b94e4 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevic@redhat.com>
Date: Fri, 8 Aug 2014 14:42:13 -0400
Subject: net: Always untag vlan-tagged traffic on input.

Currently the functionality to untag traffic on input resides
as part of the vlan module and is build only when VLAN support
is enabled in the kernel.  When VLAN is disabled, the function
vlan_untag() turns into a stub and doesn't really untag the
packets.  This seems to create an interesting interaction
between VMs supporting checksum offloading and some network drivers.

There are some drivers that do not allow the user to change
tx-vlan-offload feature of the driver.  These drivers also seem
to assume that any VLAN-tagged traffic they transmit will
have the vlan information in the vlan_tci and not in the vlan
header already in the skb.  When transmitting skbs that already
have tagged data with partial checksum set, the checksum doesn't
appear to be updated correctly by the card thus resulting in a
failure to establish TCP connections.

The following is a packet trace taken on the receiver where a
sender is a VM with a VLAN configued.  The host VM is running on
doest not have VLAN support and the outging interface on the
host is tg3:
10:12:43.503055 52:54:00:ae:42:3f > 28:d2:44:7d:c2:de, ethertype 802.1Q
(0x8100), length 78: vlan 100, p 0, ethertype IPv4, (tos 0x0, ttl 64, id 27243,
offset 0, flags [DF], proto TCP (6), length 60)
    10.0.100.1.58545 > 10.0.100.10.ircu-2: Flags [S], cksum 0xdc39 (incorrect
-> 0x48d9), seq 1069378582, win 29200, options [mss 1460,sackOK,TS val
4294837885 ecr 0,nop,wscale 7], length 0
10:12:44.505556 52:54:00:ae:42:3f > 28:d2:44:7d:c2:de, ethertype 802.1Q
(0x8100), length 78: vlan 100, p 0, ethertype IPv4, (tos 0x0, ttl 64, id 27244,
offset 0, flags [DF], proto TCP (6), length 60)
    10.0.100.1.58545 > 10.0.100.10.ircu-2: Flags [S], cksum 0xdc39 (incorrect
-> 0x44ee), seq 1069378582, win 29200, options [mss 1460,sackOK,TS val
4294838888 ecr 0,nop,wscale 7], length 0

This connection finally times out.

I've only access to the TG3 hardware in this configuration thus have
only tested this with TG3 driver.  There are a lot of other drivers
that do not permit user changes to vlan acceleration features, and
I don't know if they all suffere from a similar issue.

The patch attempt to fix this another way.  It moves the vlan header
stipping code out of the vlan module and always builds it into the
kernel network core.  This way, even if vlan is not supported on
a virtualizatoin host, the virtual machines running on top of such
host will still work with VLANs enabled.

CC: Patrick McHardy <kaber@trash.net>
CC: Nithin Nayak Sujir <nsujir@broadcom.com>
CC: Michael Chan <mchan@broadcom.com>
CC: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
Acked-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h |  6 ------
 include/linux/skbuff.h  |  1 +
 net/8021q/vlan_core.c   | 53 -------------------------------------------------
 net/bridge/br_vlan.c    |  2 +-
 net/core/dev.c          |  2 +-
 net/core/skbuff.c       | 53 +++++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 56 insertions(+), 61 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 4967916fe4ac..d69f0577a319 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -187,7 +187,6 @@ vlan_dev_get_egress_qos_mask(struct net_device *dev, u32 skprio)
 }
 
 extern bool vlan_do_receive(struct sk_buff **skb);
-extern struct sk_buff *vlan_untag(struct sk_buff *skb);
 
 extern int vlan_vid_add(struct net_device *dev, __be16 proto, u16 vid);
 extern void vlan_vid_del(struct net_device *dev, __be16 proto, u16 vid);
@@ -241,11 +240,6 @@ static inline bool vlan_do_receive(struct sk_buff **skb)
 	return false;
 }
 
-static inline struct sk_buff *vlan_untag(struct sk_buff *skb)
-{
-	return skb;
-}
-
 static inline int vlan_vid_add(struct net_device *dev, __be16 proto, u16 vid)
 {
 	return 0;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 11c270551d25..abde271c18ae 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2555,6 +2555,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen);
 void skb_scrub_packet(struct sk_buff *skb, bool xnet);
 unsigned int skb_gso_transport_seglen(const struct sk_buff *skb);
 struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features);
+struct sk_buff *skb_vlan_untag(struct sk_buff *skb);
 
 struct skb_checksum_ops {
 	__wsum (*update)(const void *mem, int len, __wsum wsum);
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 75d427763992..90cc2bdd4064 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -112,59 +112,6 @@ __be16 vlan_dev_vlan_proto(const struct net_device *dev)
 }
 EXPORT_SYMBOL(vlan_dev_vlan_proto);
 
-static struct sk_buff *vlan_reorder_header(struct sk_buff *skb)
-{
-	if (skb_cow(skb, skb_headroom(skb)) < 0) {
-		kfree_skb(skb);
-		return NULL;
-	}
-
-	memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN);
-	skb->mac_header += VLAN_HLEN;
-	return skb;
-}
-
-struct sk_buff *vlan_untag(struct sk_buff *skb)
-{
-	struct vlan_hdr *vhdr;
-	u16 vlan_tci;
-
-	if (unlikely(vlan_tx_tag_present(skb))) {
-		/* vlan_tci is already set-up so leave this for another time */
-		return skb;
-	}
-
-	skb = skb_share_check(skb, GFP_ATOMIC);
-	if (unlikely(!skb))
-		goto err_free;
-
-	if (unlikely(!pskb_may_pull(skb, VLAN_HLEN)))
-		goto err_free;
-
-	vhdr = (struct vlan_hdr *) skb->data;
-	vlan_tci = ntohs(vhdr->h_vlan_TCI);
-	__vlan_hwaccel_put_tag(skb, skb->protocol, vlan_tci);
-
-	skb_pull_rcsum(skb, VLAN_HLEN);
-	vlan_set_encap_proto(skb, vhdr);
-
-	skb = vlan_reorder_header(skb);
-	if (unlikely(!skb))
-		goto err_free;
-
-	skb_reset_network_header(skb);
-	skb_reset_transport_header(skb);
-	skb_reset_mac_len(skb);
-
-	return skb;
-
-err_free:
-	kfree_skb(skb);
-	return NULL;
-}
-EXPORT_SYMBOL(vlan_untag);
-
-
 /*
  * vlan info and vid list
  */
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index febb0f87fa37..e1bcd653899b 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -181,7 +181,7 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
 	 */
 	if (unlikely(!vlan_tx_tag_present(skb) &&
 		     skb->protocol == proto)) {
-		skb = vlan_untag(skb);
+		skb = skb_vlan_untag(skb);
 		if (unlikely(!skb))
 			return false;
 	}
diff --git a/net/core/dev.c b/net/core/dev.c
index 1c15b189c52b..b65a5051361f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3602,7 +3602,7 @@ another_round:
 
 	if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
 	    skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
-		skb = vlan_untag(skb);
+		skb = skb_vlan_untag(skb);
 		if (unlikely(!skb))
 			goto unlock;
 	}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 224506a6fa80..163b673f9e62 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -62,6 +62,7 @@
 #include <linux/scatterlist.h>
 #include <linux/errqueue.h>
 #include <linux/prefetch.h>
+#include <linux/if_vlan.h>
 
 #include <net/protocol.h>
 #include <net/dst.h>
@@ -3973,3 +3974,55 @@ unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
 	return shinfo->gso_size;
 }
 EXPORT_SYMBOL_GPL(skb_gso_transport_seglen);
+
+static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
+{
+	if (skb_cow(skb, skb_headroom(skb)) < 0) {
+		kfree_skb(skb);
+		return NULL;
+	}
+
+	memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN);
+	skb->mac_header += VLAN_HLEN;
+	return skb;
+}
+
+struct sk_buff *skb_vlan_untag(struct sk_buff *skb)
+{
+	struct vlan_hdr *vhdr;
+	u16 vlan_tci;
+
+	if (unlikely(vlan_tx_tag_present(skb))) {
+		/* vlan_tci is already set-up so leave this for another time */
+		return skb;
+	}
+
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (unlikely(!skb))
+		goto err_free;
+
+	if (unlikely(!pskb_may_pull(skb, VLAN_HLEN)))
+		goto err_free;
+
+	vhdr = (struct vlan_hdr *)skb->data;
+	vlan_tci = ntohs(vhdr->h_vlan_TCI);
+	__vlan_hwaccel_put_tag(skb, skb->protocol, vlan_tci);
+
+	skb_pull_rcsum(skb, VLAN_HLEN);
+	vlan_set_encap_proto(skb, vhdr);
+
+	skb = skb_reorder_vlan_header(skb);
+	if (unlikely(!skb))
+		goto err_free;
+
+	skb_reset_network_header(skb);
+	skb_reset_transport_header(skb);
+	skb_reset_mac_len(skb);
+
+	return skb;
+
+err_free:
+	kfree_skb(skb);
+	return NULL;
+}
+EXPORT_SYMBOL(skb_vlan_untag);
-- 
cgit v1.2.3-59-g8ed1b


From 14c4000a88afaaa2d0877cc86d42a74fde0f35e0 Mon Sep 17 00:00:00 2001
From: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
Date: Sat, 9 Aug 2014 11:15:30 +0530
Subject: printk: Add function to return log buffer address and size

Platforms like IBM Power Systems supports service processor
assisted dump. It provides interface to add memory region to
be captured when system is crashed.

During initialization/running we can add kernel memory region
to be collected.

Presently we don't have a way to get the log buffer base address
and size. This patch adds support to return log buffer address
and size.

Signed-off-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/printk.h |  3 +++
 kernel/printk/printk.c | 12 ++++++++++++
 2 files changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/printk.h b/include/linux/printk.h
index 0990997a5304..d78125f73ac4 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -10,6 +10,9 @@
 extern const char linux_banner[];
 extern const char linux_proc_banner[];
 
+extern char *log_buf_addr_get(void);
+extern u32 log_buf_len_get(void);
+
 static inline int printk_get_level(const char *buffer)
 {
 	if (buffer[0] == KERN_SOH_ASCII && buffer[1]) {
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index de1a6bb6861d..e04c455a0e38 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -272,6 +272,18 @@ static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
 static char *log_buf = __log_buf;
 static u32 log_buf_len = __LOG_BUF_LEN;
 
+/* Return log buffer address */
+char *log_buf_addr_get(void)
+{
+	return log_buf;
+}
+
+/* Return log buffer size */
+u32 log_buf_len_get(void)
+{
+	return log_buf_len;
+}
+
 /* human readable text of the record */
 static char *log_text(const struct printk_log *msg)
 {
-- 
cgit v1.2.3-59-g8ed1b


From 2c67568903d6ae1b8cfa343c649029180239418e Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Fri, 8 Aug 2014 13:02:36 +0200
Subject: spi: Add missing kerneldoc bits

These are all arguments or fields that got added without updating the
kerneldoc comments.

Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/spi/spi.c       | 1 +
 include/linux/spi/spi.h | 7 +++++++
 2 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index e6f076d5ffd5..f52f3e647ef8 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -843,6 +843,7 @@ out:
 
 /**
  * spi_finalize_current_transfer - report completion of a transfer
+ * @master: the master reporting completion
  *
  * Called by SPI drivers using the core transfer_one_message()
  * implementation to notify it that the current interrupt driven
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index e713543336f1..46d188a9947c 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -253,6 +253,7 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
  *	the device whose settings are being modified.
  * @transfer: adds a message to the controller's transfer queue.
  * @cleanup: frees controller-specific state
+ * @can_dma: determine whether this master supports DMA
  * @queued: whether this master is providing an internal message queue
  * @kworker: thread struct for message pump
  * @kworker_task: pointer to task for message pump kworker thread
@@ -262,6 +263,7 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
  * @cur_msg: the currently in-flight message
  * @cur_msg_prepared: spi_prepare_message was called for the currently
  *                    in-flight message
+ * @cur_msg_mapped: message has been mapped for DMA
  * @xfer_completion: used by core transfer_one_message()
  * @busy: message pump is busy
  * @running: message pump is running
@@ -299,6 +301,10 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
  * @cs_gpios: Array of GPIOs to use as chip select lines; one per CS
  *	number. Any individual value may be -ENOENT for CS lines that
  *	are not GPIOs (driven by the SPI controller itself).
+ * @dma_tx: DMA transmit channel
+ * @dma_rx: DMA receive channel
+ * @dummy_rx: dummy receive buffer for full-duplex devices
+ * @dummy_tx: dummy transmit buffer for full-duplex devices
  *
  * Each SPI master controller can communicate with one or more @spi_device
  * children.  These make a small bus, sharing MOSI, MISO and SCK signals
@@ -632,6 +638,7 @@ struct spi_transfer {
  *	addresses for each transfer buffer
  * @complete: called to report transaction completions
  * @context: the argument to complete() when it's called
+ * @frame_length: the total number of bytes in the message
  * @actual_length: the total number of bytes that were transferred in all
  *	successful segments
  * @status: zero for success, else negative errno
-- 
cgit v1.2.3-59-g8ed1b


From 5300fdcb7b7e97d83033bc7196582705524d35ea Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Wed, 13 Aug 2014 16:38:29 +0200
Subject: rhashtable: RCU annotations for next pointers

Properly annotate next pointers as access is RCU protected in
the lookup path.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 4 ++--
 lib/rhashtable.c           | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 9cda293c867d..8c6048e77f29 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -21,7 +21,7 @@
 #include <linux/rculist.h>
 
 struct rhash_head {
-	struct rhash_head		*next;
+	struct rhash_head __rcu		*next;
 };
 
 #define INIT_HASH_HEAD(ptr) ((ptr)->next = NULL)
@@ -97,7 +97,7 @@ u32 rhashtable_obj_hashfn(const struct rhashtable *ht, void *ptr);
 void rhashtable_insert(struct rhashtable *ht, struct rhash_head *node, gfp_t);
 bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *node, gfp_t);
 void rhashtable_remove_pprev(struct rhashtable *ht, struct rhash_head *obj,
-			     struct rhash_head **pprev, gfp_t flags);
+			     struct rhash_head __rcu **pprev, gfp_t flags);
 
 bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size);
 bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size);
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index e6940cf16628..338dd7aa5e13 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -386,7 +386,7 @@ EXPORT_SYMBOL_GPL(rhashtable_insert);
  * deletion when combined with walking or lookup.
  */
 void rhashtable_remove_pprev(struct rhashtable *ht, struct rhash_head *obj,
-			     struct rhash_head **pprev, gfp_t flags)
+			     struct rhash_head __rcu **pprev, gfp_t flags)
 {
 	struct bucket_table *tbl = rht_dereference(ht->tbl, ht);
 
-- 
cgit v1.2.3-59-g8ed1b


From c91eee56dc4f8c3d9ae834bacb835596d47a709e Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Wed, 13 Aug 2014 16:38:30 +0200
Subject: rhashtable: unexport and make rht_obj() static

No need to export rht_obj(), all inner to outer object translations
occur internally. It was intended to be used with rht_for_each() which
now primarily serves as the iterator for rhashtable_remove_pprev() to
effectively flush and free the full table.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 1 -
 lib/rhashtable.c           | 8 +-------
 2 files changed, 1 insertion(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 8c6048e77f29..af967c4c7591 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -117,7 +117,6 @@ void rhashtable_destroy(const struct rhashtable *ht);
 #define rht_dereference_rcu(p, ht) \
 	rcu_dereference_check(p, lockdep_rht_mutex_is_held(ht))
 
-/* Internal, use rht_obj() instead */
 #define rht_entry(ptr, type, member) container_of(ptr, type, member)
 #define rht_entry_safe(ptr, type, member) \
 ({ \
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 338dd7aa5e13..a2c78810ebc1 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -38,16 +38,10 @@ int lockdep_rht_mutex_is_held(const struct rhashtable *ht)
 EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held);
 #endif
 
-/**
- * rht_obj - cast hash head to outer object
- * @ht:		hash table
- * @he:		hashed node
- */
-void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he)
+static void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he)
 {
 	return (void *) he - ht->p.head_offset;
 }
-EXPORT_SYMBOL_GPL(rht_obj);
 
 static u32 __hashfn(const struct rhashtable *ht, const void *key,
 		      u32 len, u32 hsize)
-- 
cgit v1.2.3-59-g8ed1b


From 93f560811e80216e98f3fcec220aa0f8836b09af Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Wed, 13 Aug 2014 16:38:31 +0200
Subject: rhashtable: fix annotations for rht_for_each_entry_rcu()

Call rcu_deference_raw() directly from within rht_for_each_entry_rcu()
as list_for_each_entry_rcu() does.

Fixes the following sparse warnings:
net/netlink/af_netlink.c:2906:25:    expected struct rhash_head const *__mptr
net/netlink/af_netlink.c:2906:25:    got struct rhash_head [noderef] <asn:4>*<noident>

Fixes: e341694e3eb57fc ("netlink: Convert netlink_lookup() to use RCU protected hash table")
Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index af967c4c7591..36826c0166c5 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -123,11 +123,6 @@ void rhashtable_destroy(const struct rhashtable *ht);
 	typeof(ptr) __ptr = (ptr); \
 	   __ptr ? rht_entry(__ptr, type, member) : NULL; \
 })
-#define rht_entry_safe_rcu(ptr, type, member) \
-({ \
-	typeof(*ptr) __rcu *__ptr = (typeof(*ptr) __rcu __force *)ptr; \
-	__ptr ? container_of((typeof(ptr))rcu_dereference_raw(__ptr), type, member) : NULL; \
-})
 
 #define rht_next_entry_safe(pos, ht, member) \
 ({ \
@@ -204,9 +199,10 @@ void rhashtable_destroy(const struct rhashtable *ht);
  * traversal is guarded by rcu_read_lock().
  */
 #define rht_for_each_entry_rcu(pos, head, member) \
-	for (pos = rht_entry_safe_rcu(head, typeof(*(pos)), member); \
+	for (pos = rht_entry_safe(rcu_dereference_raw(head), \
+				  typeof(*(pos)), member); \
 	     pos; \
-	     pos = rht_entry_safe_rcu((pos)->member.next, \
-				      typeof(*(pos)), member))
+	     pos = rht_entry_safe(rcu_dereference_raw((pos)->member.next), \
+				  typeof(*(pos)), member))
 
 #endif /* _LINUX_RHASHTABLE_H */
-- 
cgit v1.2.3-59-g8ed1b


From 8a58d1f1f373238cb0d6d7f8d3dd723aa164b8ac Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Fri, 15 Aug 2014 12:38:41 -0600
Subject: blk-mq: get rid of unused BLK_MQ_F_SHOULD_SORT flag

We used to use this for determining whether to sort the dispatch list,
but it's unused now.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blk-mq.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index eb726b9c5762..a1e31f274fcd 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -127,10 +127,9 @@ enum {
 	BLK_MQ_RQ_QUEUE_ERROR	= 2,	/* end IO with error */
 
 	BLK_MQ_F_SHOULD_MERGE	= 1 << 0,
-	BLK_MQ_F_SHOULD_SORT	= 1 << 1,
-	BLK_MQ_F_TAG_SHARED	= 1 << 2,
-	BLK_MQ_F_SG_MERGE	= 1 << 3,
-	BLK_MQ_F_SYSFS_UP	= 1 << 4,
+	BLK_MQ_F_TAG_SHARED	= 1 << 1,
+	BLK_MQ_F_SG_MERGE	= 1 << 2,
+	BLK_MQ_F_SYSFS_UP	= 1 << 3,
 
 	BLK_MQ_S_STOPPED	= 0,
 	BLK_MQ_S_TAG_ACTIVE	= 1,
-- 
cgit v1.2.3-59-g8ed1b


From 16466f4284154311f163a58b77379eb186274f87 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 14 Aug 2014 16:52:51 -0700
Subject: net: phy: bcm7xxx: remove 28nm wildcard entry

A wildcard entry with the 32-bits OUI 0x600d8400 was added as part of
the BCM7xxx internal PHY driver, but that entry might match other PHYs
that are not covered by this driver, so let's just remove it.

Fixes: b560a58c45c6 ("net: phy: add Broadcom BCM7xxx internal PHY driver")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@greenl8ke.davemloft.net>
---
 drivers/net/phy/bcm7xxx.c | 14 --------------
 include/linux/brcmphy.h   |  1 -
 2 files changed, 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c
index 526b94cea569..2b40548c85d5 100644
--- a/drivers/net/phy/bcm7xxx.c
+++ b/drivers/net/phy/bcm7xxx.c
@@ -286,19 +286,6 @@ static struct phy_driver bcm7xxx_driver[] = {
 	.suspend	= bcm7xxx_suspend,
 	.resume		= bcm7xxx_28nm_config_init,
 	.driver		= { .owner = THIS_MODULE },
-}, {
-	.name		= "Broadcom BCM7XXX 28nm",
-	.phy_id		= PHY_ID_BCM7XXX_28,
-	.phy_id_mask	= PHY_BCM_OUI_MASK,
-	.features	= PHY_GBIT_FEATURES |
-			  SUPPORTED_Pause | SUPPORTED_Asym_Pause,
-	.flags		= PHY_IS_INTERNAL,
-	.config_init	= bcm7xxx_28nm_config_init,
-	.config_aneg	= genphy_config_aneg,
-	.read_status	= genphy_read_status,
-	.suspend	= bcm7xxx_suspend,
-	.resume		= bcm7xxx_28nm_config_init,
-	.driver		= { .owner = THIS_MODULE },
 }, {
 	.phy_id		= PHY_BCM_OUI_4,
 	.phy_id_mask	= 0xffff0000,
@@ -331,7 +318,6 @@ static struct mdio_device_id __maybe_unused bcm7xxx_tbl[] = {
 	{ PHY_ID_BCM7366, 0xfffffff0, },
 	{ PHY_ID_BCM7439, 0xfffffff0, },
 	{ PHY_ID_BCM7445, 0xfffffff0, },
-	{ PHY_ID_BCM7XXX_28, 0xfffffc00 },
 	{ PHY_BCM_OUI_4, 0xffff0000 },
 	{ PHY_BCM_OUI_5, 0xffffff00 },
 	{ }
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 6f76277baf39..61219b9b3445 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -16,7 +16,6 @@
 #define PHY_ID_BCM7366			0x600d8490
 #define PHY_ID_BCM7439			0x600d8480
 #define PHY_ID_BCM7445			0x600d8510
-#define PHY_ID_BCM7XXX_28		0x600d8400
 
 #define PHY_BCM_OUI_MASK		0xfffffc00
 #define PHY_BCM_OUI_1			0x00206000
-- 
cgit v1.2.3-59-g8ed1b


From 366047515c6eab2ff886bc28d1c2b0ad041d040a Mon Sep 17 00:00:00 2001
From: Lan Tianyu <tianyu.lan@intel.com>
Date: Fri, 15 Aug 2014 13:38:59 +0800
Subject: i2c: rework kernel config I2C_ACPI

Commit da3c6647(I2C/ACPI: Clean up I2C ACPI code and Add CONFIG_I2C_ACPI
config) adds a new kernel config I2C_ACPI and make I2C core built in
when the config is selected. This is wrong because distributions
etc generally compile I2C as a module and the commit broken that.
This patch is to rename I2C_ACPI to ACPI_I2C_OPREGION. New config
only controls ACPI I2C operation region code and depends on I2C=y.

Signed-off-by: Lan Tianyu <tianyu.lan@intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
[wsa: removed unrelated change for Kconfig]
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/Kconfig    | 15 ++++++---------
 drivers/i2c/Makefile   |  2 +-
 drivers/i2c/i2c-acpi.c |  2 ++
 include/linux/i2c.h    | 12 ++++++++----
 4 files changed, 17 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/i2c/Kconfig b/drivers/i2c/Kconfig
index 3e3b680dc007..b51a402752c4 100644
--- a/drivers/i2c/Kconfig
+++ b/drivers/i2c/Kconfig
@@ -23,17 +23,14 @@ config I2C
 	  This I2C support can also be built as a module.  If so, the module
 	  will be called i2c-core.
 
-config I2C_ACPI
-	bool "I2C ACPI support"
-	select I2C
-	depends on ACPI
+config ACPI_I2C_OPREGION
+	bool "ACPI I2C Operation region support"
+	depends on I2C=y && ACPI
 	default y
 	help
-	  Say Y here if you want to enable ACPI I2C support. This includes support
-	  for automatic enumeration of I2C slave devices and support for ACPI I2C
-	  Operation Regions. Operation Regions allow firmware (BIOS) code to
-	  access I2C slave devices, such as smart batteries through an I2C host
-	  controller driver.
+	  Say Y here if you want to enable ACPI I2C operation region support.
+	  Operation Regions allow firmware (BIOS) code to access I2C slave devices,
+	  such as smart batteries through an I2C host controller driver.
 
 if I2C
 
diff --git a/drivers/i2c/Makefile b/drivers/i2c/Makefile
index a1f590cbb435..e0228b228256 100644
--- a/drivers/i2c/Makefile
+++ b/drivers/i2c/Makefile
@@ -3,7 +3,7 @@
 #
 
 i2ccore-y := i2c-core.o
-i2ccore-$(CONFIG_I2C_ACPI)	+= i2c-acpi.o
+i2ccore-$(CONFIG_ACPI)	 	+= i2c-acpi.o
 
 obj-$(CONFIG_I2C_BOARDINFO)	+= i2c-boardinfo.o
 obj-$(CONFIG_I2C)		+= i2ccore.o
diff --git a/drivers/i2c/i2c-acpi.c b/drivers/i2c/i2c-acpi.c
index e8b61967334b..0dbc18c15c43 100644
--- a/drivers/i2c/i2c-acpi.c
+++ b/drivers/i2c/i2c-acpi.c
@@ -126,6 +126,7 @@ void acpi_i2c_register_devices(struct i2c_adapter *adap)
 		dev_warn(&adap->dev, "failed to enumerate I2C slaves\n");
 }
 
+#ifdef CONFIG_ACPI_I2C_OPREGION
 static int acpi_gsb_i2c_read_bytes(struct i2c_client *client,
 		u8 cmd, u8 *data, u8 data_len)
 {
@@ -360,3 +361,4 @@ void acpi_i2c_remove_space_handler(struct i2c_adapter *adapter)
 
 	acpi_bus_detach_private_data(handle);
 }
+#endif
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index ea507665896c..a95efeb53a8b 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -577,16 +577,20 @@ static inline struct i2c_adapter *of_find_i2c_adapter_by_node(struct device_node
 }
 #endif /* CONFIG_OF */
 
-#ifdef CONFIG_I2C_ACPI
-int acpi_i2c_install_space_handler(struct i2c_adapter *adapter);
-void acpi_i2c_remove_space_handler(struct i2c_adapter *adapter);
+#ifdef CONFIG_ACPI
 void acpi_i2c_register_devices(struct i2c_adapter *adap);
 #else
 static inline void acpi_i2c_register_devices(struct i2c_adapter *adap) { }
+#endif /* CONFIG_ACPI */
+
+#ifdef CONFIG_ACPI_I2C_OPREGION
+int acpi_i2c_install_space_handler(struct i2c_adapter *adapter);
+void acpi_i2c_remove_space_handler(struct i2c_adapter *adapter);
+#else
 static inline void acpi_i2c_remove_space_handler(struct i2c_adapter *adapter)
 { }
 static inline int acpi_i2c_install_space_handler(struct i2c_adapter *adapter)
 { return 0; }
-#endif
+#endif /* CONFIG_ACPI_I2C_OPREGION */
 
 #endif /* _LINUX_I2C_H */
-- 
cgit v1.2.3-59-g8ed1b


From 58b84f6a97f7f8811e0636836734809ff52cad43 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 19 Aug 2014 12:00:53 -0500
Subject: gpio: move GPIOD flags outside #ifdef

The GPIOD flags are defined inside the #ifdef CONFIG_GPIOLIB
switch, making the gpiolib stubs fail if these flags are used
by a consumer. This is not correct: the stubs should compile
fine without GPIOLIB.

Reported-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/gpio/consumer.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index b7ce0c64c6f3..c7e17de732f3 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -16,8 +16,6 @@ struct device;
  */
 struct gpio_desc;
 
-#ifdef CONFIG_GPIOLIB
-
 #define GPIOD_FLAGS_BIT_DIR_SET		BIT(0)
 #define GPIOD_FLAGS_BIT_DIR_OUT		BIT(1)
 #define GPIOD_FLAGS_BIT_DIR_VAL		BIT(2)
@@ -34,6 +32,8 @@ enum gpiod_flags {
 			  GPIOD_FLAGS_BIT_DIR_VAL,
 };
 
+#ifdef CONFIG_GPIOLIB
+
 /* Acquire and dispose GPIOs */
 struct gpio_desc *__must_check __gpiod_get(struct device *dev,
 					 const char *con_id,
-- 
cgit v1.2.3-59-g8ed1b


From 33b7f99cf003ca6c1d31c42b50e1100ad71aaec0 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Fri, 15 Aug 2014 17:23:02 -0400
Subject: ftrace: Allow ftrace_ops to use the hashes from other ops

Currently the top level debug file system function tracer shares its
ftrace_ops with the function graph tracer. This was thought to be fine
because the tracers are not used together, as one can only enable
function or function_graph tracer in the current_tracer file.

But that assumption proved to be incorrect. The function profiler
can use the function graph tracer when function tracing is enabled.
Since all function graph users uses the function tracing ftrace_ops
this causes a conflict and when a user enables both function profiling
as well as the function tracer it will crash ftrace and disable it.

The quick solution so far is to move them as separate ftrace_ops like
it was earlier. The problem though is to synchronize the functions that
are traced because both function and function_graph tracer are limited
by the selections made in the set_ftrace_filter and set_ftrace_notrace
files.

To handle this, a new structure is made called ftrace_ops_hash. This
structure will now hold the filter_hash and notrace_hash, and the
ftrace_ops will point to this structure. That will allow two ftrace_ops
to share the same hashes.

Since most ftrace_ops do not share the hashes, and to keep allocation
simple, the ftrace_ops structure will include both a pointer to the
ftrace_ops_hash called func_hash, as well as the structure itself,
called local_hash. When the ops are registered, the func_hash pointer
will be initialized to point to the local_hash within the ftrace_ops
structure. Some of the ftrace internal ftrace_ops will be initialized
statically. This will allow for the function and function_graph tracer
to have separate ops but still share the same hash tables that determine
what functions they trace.

Cc: stable@vger.kernel.org # 3.16 (apply after 3.17-rc4 is out)
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h |  14 +++++--
 kernel/trace/ftrace.c  | 100 +++++++++++++++++++++++++------------------------
 2 files changed, 63 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 6bb5e3f2a3b4..f0b0edbf55a9 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -102,6 +102,15 @@ enum {
 	FTRACE_OPS_FL_DELETED			= 1 << 8,
 };
 
+#ifdef CONFIG_DYNAMIC_FTRACE
+/* The hash used to know what functions callbacks trace */
+struct ftrace_ops_hash {
+	struct ftrace_hash		*notrace_hash;
+	struct ftrace_hash		*filter_hash;
+	struct mutex			regex_lock;
+};
+#endif
+
 /*
  * Note, ftrace_ops can be referenced outside of RCU protection.
  * (Although, for perf, the control ops prevent that). If ftrace_ops is
@@ -121,10 +130,9 @@ struct ftrace_ops {
 	int __percpu			*disabled;
 #ifdef CONFIG_DYNAMIC_FTRACE
 	int				nr_trampolines;
-	struct ftrace_hash		*notrace_hash;
-	struct ftrace_hash		*filter_hash;
+	struct ftrace_ops_hash		local_hash;
+	struct ftrace_ops_hash		*func_hash;
 	struct ftrace_hash		*tramp_hash;
-	struct mutex			regex_lock;
 	unsigned long			trampoline;
 #endif
 };
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 1654b12c891a..c92757adba79 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -65,15 +65,17 @@
 #define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_CONTROL)
 
 #ifdef CONFIG_DYNAMIC_FTRACE
-#define INIT_REGEX_LOCK(opsname)	\
-	.regex_lock	= __MUTEX_INITIALIZER(opsname.regex_lock),
+#define INIT_OPS_HASH(opsname)	\
+	.func_hash		= &opsname.local_hash,			\
+	.local_hash.regex_lock	= __MUTEX_INITIALIZER(opsname.local_hash.regex_lock),
 #else
-#define INIT_REGEX_LOCK(opsname)
+#define INIT_OPS_HASH(opsname)
 #endif
 
 static struct ftrace_ops ftrace_list_end __read_mostly = {
 	.func		= ftrace_stub,
 	.flags		= FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_STUB,
+	INIT_OPS_HASH(ftrace_list_end)
 };
 
 /* ftrace_enabled is a method to turn ftrace on or off */
@@ -140,7 +142,8 @@ static inline void ftrace_ops_init(struct ftrace_ops *ops)
 {
 #ifdef CONFIG_DYNAMIC_FTRACE
 	if (!(ops->flags & FTRACE_OPS_FL_INITIALIZED)) {
-		mutex_init(&ops->regex_lock);
+		mutex_init(&ops->local_hash.regex_lock);
+		ops->func_hash = &ops->local_hash;
 		ops->flags |= FTRACE_OPS_FL_INITIALIZED;
 	}
 #endif
@@ -899,7 +902,7 @@ static void unregister_ftrace_profiler(void)
 static struct ftrace_ops ftrace_profile_ops __read_mostly = {
 	.func		= function_profile_call,
 	.flags		= FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED,
-	INIT_REGEX_LOCK(ftrace_profile_ops)
+	INIT_OPS_HASH(ftrace_profile_ops)
 };
 
 static int register_ftrace_profiler(void)
@@ -1081,11 +1084,12 @@ static const struct ftrace_hash empty_hash = {
 #define EMPTY_HASH	((struct ftrace_hash *)&empty_hash)
 
 static struct ftrace_ops global_ops = {
-	.func			= ftrace_stub,
-	.notrace_hash		= EMPTY_HASH,
-	.filter_hash		= EMPTY_HASH,
-	.flags			= FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED,
-	INIT_REGEX_LOCK(global_ops)
+	.func				= ftrace_stub,
+	.local_hash.notrace_hash	= EMPTY_HASH,
+	.local_hash.filter_hash		= EMPTY_HASH,
+	INIT_OPS_HASH(global_ops)
+	.flags				= FTRACE_OPS_FL_RECURSION_SAFE |
+					  FTRACE_OPS_FL_INITIALIZED,
 };
 
 struct ftrace_page {
@@ -1226,8 +1230,8 @@ static void free_ftrace_hash_rcu(struct ftrace_hash *hash)
 void ftrace_free_filter(struct ftrace_ops *ops)
 {
 	ftrace_ops_init(ops);
-	free_ftrace_hash(ops->filter_hash);
-	free_ftrace_hash(ops->notrace_hash);
+	free_ftrace_hash(ops->func_hash->filter_hash);
+	free_ftrace_hash(ops->func_hash->notrace_hash);
 }
 
 static struct ftrace_hash *alloc_ftrace_hash(int size_bits)
@@ -1382,8 +1386,8 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs)
 		return 0;
 #endif
 
-	filter_hash = rcu_dereference_raw_notrace(ops->filter_hash);
-	notrace_hash = rcu_dereference_raw_notrace(ops->notrace_hash);
+	filter_hash = rcu_dereference_raw_notrace(ops->func_hash->filter_hash);
+	notrace_hash = rcu_dereference_raw_notrace(ops->func_hash->notrace_hash);
 
 	if ((ftrace_hash_empty(filter_hash) ||
 	     ftrace_lookup_ip(filter_hash, ip)) &&
@@ -1554,14 +1558,14 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
 	 *   gets inversed.
 	 */
 	if (filter_hash) {
-		hash = ops->filter_hash;
-		other_hash = ops->notrace_hash;
+		hash = ops->func_hash->filter_hash;
+		other_hash = ops->func_hash->notrace_hash;
 		if (ftrace_hash_empty(hash))
 			all = 1;
 	} else {
 		inc = !inc;
-		hash = ops->notrace_hash;
-		other_hash = ops->filter_hash;
+		hash = ops->func_hash->notrace_hash;
+		other_hash = ops->func_hash->filter_hash;
 		/*
 		 * If the notrace hash has no items,
 		 * then there's nothing to do.
@@ -2436,8 +2440,8 @@ static inline int ops_traces_mod(struct ftrace_ops *ops)
 	 * Filter_hash being empty will default to trace module.
 	 * But notrace hash requires a test of individual module functions.
 	 */
-	return ftrace_hash_empty(ops->filter_hash) &&
-		ftrace_hash_empty(ops->notrace_hash);
+	return ftrace_hash_empty(ops->func_hash->filter_hash) &&
+		ftrace_hash_empty(ops->func_hash->notrace_hash);
 }
 
 /*
@@ -2459,12 +2463,12 @@ ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec)
 		return 0;
 
 	/* The function must be in the filter */
-	if (!ftrace_hash_empty(ops->filter_hash) &&
-	    !ftrace_lookup_ip(ops->filter_hash, rec->ip))
+	if (!ftrace_hash_empty(ops->func_hash->filter_hash) &&
+	    !ftrace_lookup_ip(ops->func_hash->filter_hash, rec->ip))
 		return 0;
 
 	/* If in notrace hash, we ignore it too */
-	if (ftrace_lookup_ip(ops->notrace_hash, rec->ip))
+	if (ftrace_lookup_ip(ops->func_hash->notrace_hash, rec->ip))
 		return 0;
 
 	return 1;
@@ -2785,10 +2789,10 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
 	} else {
 		rec = &iter->pg->records[iter->idx++];
 		if (((iter->flags & FTRACE_ITER_FILTER) &&
-		     !(ftrace_lookup_ip(ops->filter_hash, rec->ip))) ||
+		     !(ftrace_lookup_ip(ops->func_hash->filter_hash, rec->ip))) ||
 
 		    ((iter->flags & FTRACE_ITER_NOTRACE) &&
-		     !ftrace_lookup_ip(ops->notrace_hash, rec->ip)) ||
+		     !ftrace_lookup_ip(ops->func_hash->notrace_hash, rec->ip)) ||
 
 		    ((iter->flags & FTRACE_ITER_ENABLED) &&
 		     !(rec->flags & FTRACE_FL_ENABLED))) {
@@ -2837,9 +2841,9 @@ static void *t_start(struct seq_file *m, loff_t *pos)
 	 * functions are enabled.
 	 */
 	if ((iter->flags & FTRACE_ITER_FILTER &&
-	     ftrace_hash_empty(ops->filter_hash)) ||
+	     ftrace_hash_empty(ops->func_hash->filter_hash)) ||
 	    (iter->flags & FTRACE_ITER_NOTRACE &&
-	     ftrace_hash_empty(ops->notrace_hash))) {
+	     ftrace_hash_empty(ops->func_hash->notrace_hash))) {
 		if (*pos > 0)
 			return t_hash_start(m, pos);
 		iter->flags |= FTRACE_ITER_PRINTALL;
@@ -3001,12 +3005,12 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag,
 	iter->ops = ops;
 	iter->flags = flag;
 
-	mutex_lock(&ops->regex_lock);
+	mutex_lock(&ops->func_hash->regex_lock);
 
 	if (flag & FTRACE_ITER_NOTRACE)
-		hash = ops->notrace_hash;
+		hash = ops->func_hash->notrace_hash;
 	else
-		hash = ops->filter_hash;
+		hash = ops->func_hash->filter_hash;
 
 	if (file->f_mode & FMODE_WRITE) {
 		const int size_bits = FTRACE_HASH_DEFAULT_BITS;
@@ -3041,7 +3045,7 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag,
 		file->private_data = iter;
 
  out_unlock:
-	mutex_unlock(&ops->regex_lock);
+	mutex_unlock(&ops->func_hash->regex_lock);
 
 	return ret;
 }
@@ -3279,7 +3283,7 @@ static struct ftrace_ops trace_probe_ops __read_mostly =
 {
 	.func		= function_trace_probe_call,
 	.flags		= FTRACE_OPS_FL_INITIALIZED,
-	INIT_REGEX_LOCK(trace_probe_ops)
+	INIT_OPS_HASH(trace_probe_ops)
 };
 
 static int ftrace_probe_registered;
@@ -3342,7 +3346,7 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
 			      void *data)
 {
 	struct ftrace_func_probe *entry;
-	struct ftrace_hash **orig_hash = &trace_probe_ops.filter_hash;
+	struct ftrace_hash **orig_hash = &trace_probe_ops.func_hash->filter_hash;
 	struct ftrace_hash *hash;
 	struct ftrace_page *pg;
 	struct dyn_ftrace *rec;
@@ -3359,7 +3363,7 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
 	if (WARN_ON(not))
 		return -EINVAL;
 
-	mutex_lock(&trace_probe_ops.regex_lock);
+	mutex_lock(&trace_probe_ops.func_hash->regex_lock);
 
 	hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash);
 	if (!hash) {
@@ -3428,7 +3432,7 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
  out_unlock:
 	mutex_unlock(&ftrace_lock);
  out:
-	mutex_unlock(&trace_probe_ops.regex_lock);
+	mutex_unlock(&trace_probe_ops.func_hash->regex_lock);
 	free_ftrace_hash(hash);
 
 	return count;
@@ -3446,7 +3450,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
 	struct ftrace_func_entry *rec_entry;
 	struct ftrace_func_probe *entry;
 	struct ftrace_func_probe *p;
-	struct ftrace_hash **orig_hash = &trace_probe_ops.filter_hash;
+	struct ftrace_hash **orig_hash = &trace_probe_ops.func_hash->filter_hash;
 	struct list_head free_list;
 	struct ftrace_hash *hash;
 	struct hlist_node *tmp;
@@ -3468,7 +3472,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
 			return;
 	}
 
-	mutex_lock(&trace_probe_ops.regex_lock);
+	mutex_lock(&trace_probe_ops.func_hash->regex_lock);
 
 	hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash);
 	if (!hash)
@@ -3521,7 +3525,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
 	mutex_unlock(&ftrace_lock);
 		
  out_unlock:
-	mutex_unlock(&trace_probe_ops.regex_lock);
+	mutex_unlock(&trace_probe_ops.func_hash->regex_lock);
 	free_ftrace_hash(hash);
 }
 
@@ -3717,12 +3721,12 @@ ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len,
 	if (unlikely(ftrace_disabled))
 		return -ENODEV;
 
-	mutex_lock(&ops->regex_lock);
+	mutex_lock(&ops->func_hash->regex_lock);
 
 	if (enable)
-		orig_hash = &ops->filter_hash;
+		orig_hash = &ops->func_hash->filter_hash;
 	else
-		orig_hash = &ops->notrace_hash;
+		orig_hash = &ops->func_hash->notrace_hash;
 
 	if (reset)
 		hash = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS);
@@ -3752,7 +3756,7 @@ ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len,
 	mutex_unlock(&ftrace_lock);
 
  out_regex_unlock:
-	mutex_unlock(&ops->regex_lock);
+	mutex_unlock(&ops->func_hash->regex_lock);
 
 	free_ftrace_hash(hash);
 	return ret;
@@ -3975,15 +3979,15 @@ int ftrace_regex_release(struct inode *inode, struct file *file)
 
 	trace_parser_put(parser);
 
-	mutex_lock(&iter->ops->regex_lock);
+	mutex_lock(&iter->ops->func_hash->regex_lock);
 
 	if (file->f_mode & FMODE_WRITE) {
 		filter_hash = !!(iter->flags & FTRACE_ITER_FILTER);
 
 		if (filter_hash)
-			orig_hash = &iter->ops->filter_hash;
+			orig_hash = &iter->ops->func_hash->filter_hash;
 		else
-			orig_hash = &iter->ops->notrace_hash;
+			orig_hash = &iter->ops->func_hash->notrace_hash;
 
 		mutex_lock(&ftrace_lock);
 		ret = ftrace_hash_move(iter->ops, filter_hash,
@@ -3994,7 +3998,7 @@ int ftrace_regex_release(struct inode *inode, struct file *file)
 		mutex_unlock(&ftrace_lock);
 	}
 
-	mutex_unlock(&iter->ops->regex_lock);
+	mutex_unlock(&iter->ops->func_hash->regex_lock);
 	free_ftrace_hash(iter->hash);
 	kfree(iter);
 
@@ -4611,7 +4615,7 @@ void __init ftrace_init(void)
 static struct ftrace_ops global_ops = {
 	.func			= ftrace_stub,
 	.flags			= FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED,
-	INIT_REGEX_LOCK(global_ops)
+	INIT_OPS_HASH(global_ops)
 };
 
 static int __init ftrace_nodyn_init(void)
@@ -4713,7 +4717,7 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip,
 static struct ftrace_ops control_ops = {
 	.func	= ftrace_ops_control_func,
 	.flags	= FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED,
-	INIT_REGEX_LOCK(control_ops)
+	INIT_OPS_HASH(control_ops)
 };
 
 static inline void
-- 
cgit v1.2.3-59-g8ed1b


From 7c3af975257383ece54b83c0505d3e0656cb7daf Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Fri, 8 Aug 2014 11:00:57 -0400
Subject: nfs: don't sleep with inode lock in lock_and_join_requests

This handles the 'nonblock=false' case in nfs_lock_and_join_requests.
If the group is already locked and blocking is allowed, drop the inode lock
and wait for the group lock to be cleared before trying it all again.
This should fix warnings found in peterz's tree (sched/wait branch), where
might_sleep() checks are added to wait.[ch].

Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Reviewed-by: Peng Tao <tao.peng@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/pagelist.c        | 17 +++++++++++++++++
 fs/nfs/write.c           | 12 +++++++++++-
 include/linux/nfs_page.h |  1 +
 3 files changed, 29 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 30c9626f96b0..4ec67f8d70aa 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -167,6 +167,23 @@ nfs_page_group_lock(struct nfs_page *req, bool nonblock)
 	return -EAGAIN;
 }
 
+/*
+ * nfs_page_group_lock_wait - wait for the lock to clear, but don't grab it
+ * @req - a request in the group
+ *
+ * This is a blocking call to wait for the group lock to be cleared.
+ */
+void
+nfs_page_group_lock_wait(struct nfs_page *req)
+{
+	struct nfs_page *head = req->wb_head;
+
+	WARN_ON_ONCE(head != head->wb_head);
+
+	wait_on_bit(&head->wb_flags, PG_HEADLOCK,
+		TASK_UNINTERRUPTIBLE);
+}
+
 /*
  * nfs_page_group_unlock - unlock the head of the page group
  * @req - request in group that is to be unlocked
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index e056f617adf2..175d5d073ccf 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -478,13 +478,23 @@ try_again:
 		return NULL;
 	}
 
-	/* lock each request in the page group */
+	/* holding inode lock, so always make a non-blocking call to try the
+	 * page group lock */
 	ret = nfs_page_group_lock(head, true);
 	if (ret < 0) {
 		spin_unlock(&inode->i_lock);
+
+		if (!nonblock && ret == -EAGAIN) {
+			nfs_page_group_lock_wait(head);
+			nfs_release_request(head);
+			goto try_again;
+		}
+
 		nfs_release_request(head);
 		return ERR_PTR(ret);
 	}
+
+	/* lock each request in the page group */
 	subreq = head;
 	do {
 		/*
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 6ad2bbcad405..6c3e06ee2fb7 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -123,6 +123,7 @@ extern  int nfs_wait_on_request(struct nfs_page *);
 extern	void nfs_unlock_request(struct nfs_page *req);
 extern	void nfs_unlock_and_release_request(struct nfs_page *);
 extern int nfs_page_group_lock(struct nfs_page *, bool);
+extern void nfs_page_group_lock_wait(struct nfs_page *);
 extern void nfs_page_group_unlock(struct nfs_page *);
 extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int);
 
-- 
cgit v1.2.3-59-g8ed1b


From 7d5929c1f34304ca5a970cfde8044053e56aa8c9 Mon Sep 17 00:00:00 2001
From: Roger Quadros <rogerq@ti.com>
Date: Mon, 25 Aug 2014 16:15:32 -0700
Subject: mtd: nand: omap: Revert to using software ECC by default

For v3.12 and prior, 1-bit Hamming code ECC via software was the
default choice. Commit c66d039197e4 in v3.13 changed the behaviour
to use 1-bit Hamming code via Hardware using a different ECC layout
i.e. (ROM code layout) than what is used by software ECC.

This ECC layout change causes NAND filesystems created in v3.12
and prior to be unusable in v3.13 and later. So revert back to
using software ECC by default if an ECC scheme is not explicitely
specified.

This defect can be observed on the following boards during legacy boot

-omap3beagle
-omap3touchbook
-overo
-am3517crane
-devkit8000
-ldp
-3430sdp

Signed-off-by: Roger Quadros <rogerq@ti.com>
Tested-by: Grazvydas Ignotas <notasas@gmail.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/board-flash.c            |  2 +-
 arch/arm/mach-omap2/gpmc-nand.c              |  3 ++-
 drivers/mtd/nand/omap2.c                     | 14 +++++++++++---
 include/linux/platform_data/mtd-nand-omap2.h | 13 +++++++++++--
 4 files changed, 25 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/board-flash.c b/arch/arm/mach-omap2/board-flash.c
index e87f2a83d6bf..2d245c2e641c 100644
--- a/arch/arm/mach-omap2/board-flash.c
+++ b/arch/arm/mach-omap2/board-flash.c
@@ -142,7 +142,7 @@ __init board_nand_init(struct mtd_partition *nand_parts, u8 nr_parts, u8 cs,
 	board_nand_data.nr_parts	= nr_parts;
 	board_nand_data.devsize		= nand_type;
 
-	board_nand_data.ecc_opt = OMAP_ECC_HAM1_CODE_HW;
+	board_nand_data.ecc_opt = OMAP_ECC_HAM1_CODE_SW;
 	gpmc_nand_init(&board_nand_data, gpmc_t);
 }
 #endif /* CONFIG_MTD_NAND_OMAP2 || CONFIG_MTD_NAND_OMAP2_MODULE */
diff --git a/arch/arm/mach-omap2/gpmc-nand.c b/arch/arm/mach-omap2/gpmc-nand.c
index 8897ad7035fd..cb7764314f17 100644
--- a/arch/arm/mach-omap2/gpmc-nand.c
+++ b/arch/arm/mach-omap2/gpmc-nand.c
@@ -49,7 +49,8 @@ static bool gpmc_hwecc_bch_capable(enum omap_ecc ecc_opt)
 		return 0;
 
 	/* legacy platforms support only HAM1 (1-bit Hamming) ECC scheme */
-	if (ecc_opt == OMAP_ECC_HAM1_CODE_HW)
+	if (ecc_opt == OMAP_ECC_HAM1_CODE_HW ||
+	    ecc_opt == OMAP_ECC_HAM1_CODE_SW)
 		return 1;
 	else
 		return 0;
diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c
index f0ed92e210a1..4dd617897eee 100644
--- a/drivers/mtd/nand/omap2.c
+++ b/drivers/mtd/nand/omap2.c
@@ -1794,9 +1794,12 @@ static int omap_nand_probe(struct platform_device *pdev)
 	}
 
 	/* populate MTD interface based on ECC scheme */
-	nand_chip->ecc.layout	= &omap_oobinfo;
 	ecclayout		= &omap_oobinfo;
 	switch (info->ecc_opt) {
+	case OMAP_ECC_HAM1_CODE_SW:
+		nand_chip->ecc.mode = NAND_ECC_SOFT;
+		break;
+
 	case OMAP_ECC_HAM1_CODE_HW:
 		pr_info("nand: using OMAP_ECC_HAM1_CODE_HW\n");
 		nand_chip->ecc.mode             = NAND_ECC_HW;
@@ -1848,7 +1851,7 @@ static int omap_nand_probe(struct platform_device *pdev)
 		nand_chip->ecc.priv		= nand_bch_init(mtd,
 							nand_chip->ecc.size,
 							nand_chip->ecc.bytes,
-							&nand_chip->ecc.layout);
+							&ecclayout);
 		if (!nand_chip->ecc.priv) {
 			pr_err("nand: error: unable to use s/w BCH library\n");
 			err = -EINVAL;
@@ -1923,7 +1926,7 @@ static int omap_nand_probe(struct platform_device *pdev)
 		nand_chip->ecc.priv		= nand_bch_init(mtd,
 							nand_chip->ecc.size,
 							nand_chip->ecc.bytes,
-							&nand_chip->ecc.layout);
+							&ecclayout);
 		if (!nand_chip->ecc.priv) {
 			pr_err("nand: error: unable to use s/w BCH library\n");
 			err = -EINVAL;
@@ -2012,6 +2015,9 @@ static int omap_nand_probe(struct platform_device *pdev)
 		goto return_error;
 	}
 
+	if (info->ecc_opt == OMAP_ECC_HAM1_CODE_SW)
+		goto scan_tail;
+
 	/* all OOB bytes from oobfree->offset till end off OOB are free */
 	ecclayout->oobfree->length = mtd->oobsize - ecclayout->oobfree->offset;
 	/* check if NAND device's OOB is enough to store ECC signatures */
@@ -2021,7 +2027,9 @@ static int omap_nand_probe(struct platform_device *pdev)
 		err = -EINVAL;
 		goto return_error;
 	}
+	nand_chip->ecc.layout = ecclayout;
 
+scan_tail:
 	/* second phase scan */
 	if (nand_scan_tail(mtd)) {
 		err = -ENXIO;
diff --git a/include/linux/platform_data/mtd-nand-omap2.h b/include/linux/platform_data/mtd-nand-omap2.h
index 660c029d694f..16ec262dfcc8 100644
--- a/include/linux/platform_data/mtd-nand-omap2.h
+++ b/include/linux/platform_data/mtd-nand-omap2.h
@@ -21,8 +21,17 @@ enum nand_io {
 };
 
 enum omap_ecc {
-	/* 1-bit  ECC calculation by GPMC, Error detection by Software */
-	OMAP_ECC_HAM1_CODE_HW = 0,
+	/*
+	 * 1-bit ECC: calculation and correction by SW
+	 * ECC stored at end of spare area
+	 */
+	OMAP_ECC_HAM1_CODE_SW = 0,
+
+	/*
+	 * 1-bit ECC: calculation by GPMC, Error detection by Software
+	 * ECC layout compatible with ROM code layout
+	 */
+	OMAP_ECC_HAM1_CODE_HW,
 	/* 4-bit  ECC calculation by GPMC, Error detection by Software */
 	OMAP_ECC_BCH4_CODE_HW_DETECTION_SW,
 	/* 4-bit  ECC calculation by GPMC, Error detection by ELM */
-- 
cgit v1.2.3-59-g8ed1b


From 1f58d9465c568eb47cab939bbc4f30ae51863295 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Fri, 8 Aug 2014 13:06:30 +0200
Subject: dma-buf/fence: Fix one more kerneldoc warning

The seqno_fence_init() function's cond argument isn't described in the
kerneldoc comment. Fix that to silence a warning when building DocBook
documentation.

Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Sumit Semwal <sumit.semwal@linaro.org>
---
 include/linux/seqno-fence.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/seqno-fence.h b/include/linux/seqno-fence.h
index 3d6003de4b0d..a1ba6a5ccdd6 100644
--- a/include/linux/seqno-fence.h
+++ b/include/linux/seqno-fence.h
@@ -62,6 +62,7 @@ to_seqno_fence(struct fence *fence)
  * @context: the execution context this fence is a part of
  * @seqno_ofs: the offset within @sync_buf
  * @seqno: the sequence # to signal on
+ * @cond: fence wait condition
  * @ops: the fence_ops for operations on this seqno fence
  *
  * This function initializes a struct seqno_fence with passed parameters,
-- 
cgit v1.2.3-59-g8ed1b


From db9ee220361de03ee86388f9ea5e529eaad5323c Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Wed, 27 Aug 2014 18:40:07 -0400
Subject: jbd2: fix descriptor block size handling errors with journal_csum

It turns out that there are some serious problems with the on-disk
format of journal checksum v2.  The foremost is that the function to
calculate descriptor tag size returns sizes that are too big.  This
causes alignment issues on some architectures and is compounded by the
fact that some parts of jbd2 use the structure size (incorrectly) to
determine the presence of a 64bit journal instead of checking the
feature flags.

Therefore, introduce journal checksum v3, which enlarges the
descriptor block tag format to allow for full 32-bit checksums of
journal blocks, fix the journal tag function to return the correct
sizes, and fix the jbd2 recovery code to use feature flags to
determine 64bitness.

Add a few function helpers so we don't have to open-code quite so
many pieces.

Switching to a 16-byte block size was found to increase journal size
overhead by a maximum of 0.1%, to convert a 32-bit journal with no
checksumming to a 32-bit journal with checksum v3 enabled.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reported-by: TR Reardon <thomas_reardon@hotmail.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@vger.kernel.org
---
 fs/ext4/super.c      |  5 +++--
 fs/jbd2/commit.c     | 21 +++++++++++---------
 fs/jbd2/journal.c    | 56 ++++++++++++++++++++++++++++++++++------------------
 fs/jbd2/recovery.c   | 26 +++++++++++++-----------
 fs/jbd2/revoke.c     |  6 +++---
 include/linux/jbd2.h | 30 +++++++++++++++++++++++-----
 6 files changed, 95 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 32b43ad154b9..0b28b36e7915 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3181,9 +3181,9 @@ static int set_journal_csum_feature_set(struct super_block *sb)
 
 	if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
 				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
-		/* journal checksum v2 */
+		/* journal checksum v3 */
 		compat = 0;
-		incompat = JBD2_FEATURE_INCOMPAT_CSUM_V2;
+		incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3;
 	} else {
 		/* journal checksum v1 */
 		compat = JBD2_FEATURE_COMPAT_CHECKSUM;
@@ -3205,6 +3205,7 @@ static int set_journal_csum_feature_set(struct super_block *sb)
 		jbd2_journal_clear_features(sbi->s_journal,
 				JBD2_FEATURE_COMPAT_CHECKSUM, 0,
 				JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT |
+				JBD2_FEATURE_INCOMPAT_CSUM_V3 |
 				JBD2_FEATURE_INCOMPAT_CSUM_V2);
 	}
 
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 6fac74349856..b73e0215baa7 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -97,7 +97,7 @@ static void jbd2_commit_block_csum_set(journal_t *j, struct buffer_head *bh)
 	struct commit_header *h;
 	__u32 csum;
 
-	if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+	if (!jbd2_journal_has_csum_v2or3(j))
 		return;
 
 	h = (struct commit_header *)(bh->b_data);
@@ -313,11 +313,11 @@ static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh)
 	return checksum;
 }
 
-static void write_tag_block(int tag_bytes, journal_block_tag_t *tag,
+static void write_tag_block(journal_t *j, journal_block_tag_t *tag,
 				   unsigned long long block)
 {
 	tag->t_blocknr = cpu_to_be32(block & (u32)~0);
-	if (tag_bytes > JBD2_TAG_SIZE32)
+	if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_64BIT))
 		tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1);
 }
 
@@ -327,7 +327,7 @@ static void jbd2_descr_block_csum_set(journal_t *j,
 	struct jbd2_journal_block_tail *tail;
 	__u32 csum;
 
-	if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+	if (!jbd2_journal_has_csum_v2or3(j))
 		return;
 
 	tail = (struct jbd2_journal_block_tail *)(bh->b_data + j->j_blocksize -
@@ -340,12 +340,13 @@ static void jbd2_descr_block_csum_set(journal_t *j,
 static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag,
 				    struct buffer_head *bh, __u32 sequence)
 {
+	journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag;
 	struct page *page = bh->b_page;
 	__u8 *addr;
 	__u32 csum32;
 	__be32 seq;
 
-	if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+	if (!jbd2_journal_has_csum_v2or3(j))
 		return;
 
 	seq = cpu_to_be32(sequence);
@@ -355,8 +356,10 @@ static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag,
 			     bh->b_size);
 	kunmap_atomic(addr);
 
-	/* We only have space to store the lower 16 bits of the crc32c. */
-	tag->t_checksum = cpu_to_be16(csum32);
+	if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V3))
+		tag3->t_checksum = cpu_to_be32(csum32);
+	else
+		tag->t_checksum = cpu_to_be16(csum32);
 }
 /*
  * jbd2_journal_commit_transaction
@@ -396,7 +399,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 	LIST_HEAD(io_bufs);
 	LIST_HEAD(log_bufs);
 
-	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+	if (jbd2_journal_has_csum_v2or3(journal))
 		csum_size = sizeof(struct jbd2_journal_block_tail);
 
 	/*
@@ -690,7 +693,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 			tag_flag |= JBD2_FLAG_SAME_UUID;
 
 		tag = (journal_block_tag_t *) tagp;
-		write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr);
+		write_tag_block(journal, tag, jh2bh(jh)->b_blocknr);
 		tag->t_flags = cpu_to_be16(tag_flag);
 		jbd2_block_tag_csum_set(journal, tag, wbuf[bufs],
 					commit_transaction->t_tid);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 67b8e303946c..19d74d86d99c 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -124,7 +124,7 @@ EXPORT_SYMBOL(__jbd2_debug);
 /* Checksumming functions */
 static int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb)
 {
-	if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+	if (!jbd2_journal_has_csum_v2or3(j))
 		return 1;
 
 	return sb->s_checksum_type == JBD2_CRC32C_CHKSUM;
@@ -145,7 +145,7 @@ static __be32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb)
 
 static int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb)
 {
-	if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+	if (!jbd2_journal_has_csum_v2or3(j))
 		return 1;
 
 	return sb->s_checksum == jbd2_superblock_csum(j, sb);
@@ -153,7 +153,7 @@ static int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb)
 
 static void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb)
 {
-	if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+	if (!jbd2_journal_has_csum_v2or3(j))
 		return;
 
 	sb->s_checksum = jbd2_superblock_csum(j, sb);
@@ -1522,21 +1522,29 @@ static int journal_get_superblock(journal_t *journal)
 		goto out;
 	}
 
-	if (JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM) &&
-	    JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) {
+	if (jbd2_journal_has_csum_v2or3(journal) &&
+	    JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM)) {
 		/* Can't have checksum v1 and v2 on at the same time! */
 		printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2 "
 		       "at the same time!\n");
 		goto out;
 	}
 
+	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2) &&
+	    JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
+		/* Can't have checksum v2 and v3 at the same time! */
+		printk(KERN_ERR "JBD2: Can't enable checksumming v2 and v3 "
+		       "at the same time!\n");
+		goto out;
+	}
+
 	if (!jbd2_verify_csum_type(journal, sb)) {
 		printk(KERN_ERR "JBD2: Unknown checksum type\n");
 		goto out;
 	}
 
 	/* Load the checksum driver */
-	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) {
+	if (jbd2_journal_has_csum_v2or3(journal)) {
 		journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
 		if (IS_ERR(journal->j_chksum_driver)) {
 			printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n");
@@ -1553,7 +1561,7 @@ static int journal_get_superblock(journal_t *journal)
 	}
 
 	/* Precompute checksum seed for all metadata */
-	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+	if (jbd2_journal_has_csum_v2or3(journal))
 		journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid,
 						   sizeof(sb->s_uuid));
 
@@ -1813,8 +1821,14 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
 	if (!jbd2_journal_check_available_features(journal, compat, ro, incompat))
 		return 0;
 
-	/* Asking for checksumming v2 and v1?  Only give them v2. */
-	if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V2 &&
+	/* If enabling v2 checksums, turn on v3 instead */
+	if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V2) {
+		incompat &= ~JBD2_FEATURE_INCOMPAT_CSUM_V2;
+		incompat |= JBD2_FEATURE_INCOMPAT_CSUM_V3;
+	}
+
+	/* Asking for checksumming v3 and v1?  Only give them v3. */
+	if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V3 &&
 	    compat & JBD2_FEATURE_COMPAT_CHECKSUM)
 		compat &= ~JBD2_FEATURE_COMPAT_CHECKSUM;
 
@@ -1823,8 +1837,8 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
 
 	sb = journal->j_superblock;
 
-	/* If enabling v2 checksums, update superblock */
-	if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V2)) {
+	/* If enabling v3 checksums, update superblock */
+	if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
 		sb->s_checksum_type = JBD2_CRC32C_CHKSUM;
 		sb->s_feature_compat &=
 			~cpu_to_be32(JBD2_FEATURE_COMPAT_CHECKSUM);
@@ -1842,8 +1856,7 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
 		}
 
 		/* Precompute checksum seed for all metadata */
-		if (JBD2_HAS_INCOMPAT_FEATURE(journal,
-					      JBD2_FEATURE_INCOMPAT_CSUM_V2))
+		if (jbd2_journal_has_csum_v2or3(journal))
 			journal->j_csum_seed = jbd2_chksum(journal, ~0,
 							   sb->s_uuid,
 							   sizeof(sb->s_uuid));
@@ -1852,7 +1865,8 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
 	/* If enabling v1 checksums, downgrade superblock */
 	if (COMPAT_FEATURE_ON(JBD2_FEATURE_COMPAT_CHECKSUM))
 		sb->s_feature_incompat &=
-			~cpu_to_be32(JBD2_FEATURE_INCOMPAT_CSUM_V2);
+			~cpu_to_be32(JBD2_FEATURE_INCOMPAT_CSUM_V2 |
+				     JBD2_FEATURE_INCOMPAT_CSUM_V3);
 
 	sb->s_feature_compat    |= cpu_to_be32(compat);
 	sb->s_feature_ro_compat |= cpu_to_be32(ro);
@@ -2165,16 +2179,20 @@ int jbd2_journal_blocks_per_page(struct inode *inode)
  */
 size_t journal_tag_bytes(journal_t *journal)
 {
-	journal_block_tag_t tag;
-	size_t x = 0;
+	size_t sz;
+
+	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3))
+		return sizeof(journal_block_tag3_t);
+
+	sz = sizeof(journal_block_tag_t);
 
 	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2))
-		x += sizeof(tag.t_checksum);
+		sz += sizeof(__u16);
 
 	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
-		return x + JBD2_TAG_SIZE64;
+		return sz;
 	else
-		return x + JBD2_TAG_SIZE32;
+		return sz - sizeof(__u32);
 }
 
 /*
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 00e9703d7dc6..9b329b55ffe3 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -181,7 +181,7 @@ static int jbd2_descr_block_csum_verify(journal_t *j,
 	__be32 provided;
 	__u32 calculated;
 
-	if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+	if (!jbd2_journal_has_csum_v2or3(j))
 		return 1;
 
 	tail = (struct jbd2_journal_block_tail *)(buf + j->j_blocksize -
@@ -205,7 +205,7 @@ static int count_tags(journal_t *journal, struct buffer_head *bh)
 	int			nr = 0, size = journal->j_blocksize;
 	int			tag_bytes = journal_tag_bytes(journal);
 
-	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+	if (jbd2_journal_has_csum_v2or3(journal))
 		size -= sizeof(struct jbd2_journal_block_tail);
 
 	tagp = &bh->b_data[sizeof(journal_header_t)];
@@ -338,10 +338,11 @@ int jbd2_journal_skip_recovery(journal_t *journal)
 	return err;
 }
 
-static inline unsigned long long read_tag_block(int tag_bytes, journal_block_tag_t *tag)
+static inline unsigned long long read_tag_block(journal_t *journal,
+						journal_block_tag_t *tag)
 {
 	unsigned long long block = be32_to_cpu(tag->t_blocknr);
-	if (tag_bytes > JBD2_TAG_SIZE32)
+	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
 		block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32;
 	return block;
 }
@@ -384,7 +385,7 @@ static int jbd2_commit_block_csum_verify(journal_t *j, void *buf)
 	__be32 provided;
 	__u32 calculated;
 
-	if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+	if (!jbd2_journal_has_csum_v2or3(j))
 		return 1;
 
 	h = buf;
@@ -399,17 +400,21 @@ static int jbd2_commit_block_csum_verify(journal_t *j, void *buf)
 static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag,
 				      void *buf, __u32 sequence)
 {
+	journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag;
 	__u32 csum32;
 	__be32 seq;
 
-	if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+	if (!jbd2_journal_has_csum_v2or3(j))
 		return 1;
 
 	seq = cpu_to_be32(sequence);
 	csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq));
 	csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize);
 
-	return tag->t_checksum == cpu_to_be16(csum32);
+	if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V3))
+		return tag3->t_checksum == cpu_to_be32(csum32);
+	else
+		return tag->t_checksum == cpu_to_be16(csum32);
 }
 
 static int do_one_pass(journal_t *journal,
@@ -513,8 +518,7 @@ static int do_one_pass(journal_t *journal,
 		switch(blocktype) {
 		case JBD2_DESCRIPTOR_BLOCK:
 			/* Verify checksum first */
-			if (JBD2_HAS_INCOMPAT_FEATURE(journal,
-					JBD2_FEATURE_INCOMPAT_CSUM_V2))
+			if (jbd2_journal_has_csum_v2or3(journal))
 				descr_csum_size =
 					sizeof(struct jbd2_journal_block_tail);
 			if (descr_csum_size > 0 &&
@@ -575,7 +579,7 @@ static int do_one_pass(journal_t *journal,
 					unsigned long long blocknr;
 
 					J_ASSERT(obh != NULL);
-					blocknr = read_tag_block(tag_bytes,
+					blocknr = read_tag_block(journal,
 								 tag);
 
 					/* If the block has been
@@ -814,7 +818,7 @@ static int jbd2_revoke_block_csum_verify(journal_t *j,
 	__be32 provided;
 	__u32 calculated;
 
-	if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+	if (!jbd2_journal_has_csum_v2or3(j))
 		return 1;
 
 	tail = (struct jbd2_journal_revoke_tail *)(buf + j->j_blocksize -
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 198c9c10276d..d5e95a175c92 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -91,8 +91,8 @@
 #include <linux/list.h>
 #include <linux/init.h>
 #include <linux/bio.h>
-#endif
 #include <linux/log2.h>
+#endif
 
 static struct kmem_cache *jbd2_revoke_record_cache;
 static struct kmem_cache *jbd2_revoke_table_cache;
@@ -597,7 +597,7 @@ static void write_one_revoke_record(journal_t *journal,
 	offset = *offsetp;
 
 	/* Do we need to leave space at the end for a checksum? */
-	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+	if (jbd2_journal_has_csum_v2or3(journal))
 		csum_size = sizeof(struct jbd2_journal_revoke_tail);
 
 	/* Make sure we have a descriptor with space left for the record */
@@ -644,7 +644,7 @@ static void jbd2_revoke_csum_set(journal_t *j, struct buffer_head *bh)
 	struct jbd2_journal_revoke_tail *tail;
 	__u32 csum;
 
-	if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+	if (!jbd2_journal_has_csum_v2or3(j))
 		return;
 
 	tail = (struct jbd2_journal_revoke_tail *)(bh->b_data + j->j_blocksize -
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index d5b50a19463c..0dae71e9971c 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -159,7 +159,11 @@ typedef struct journal_header_s
  * journal_block_tag (in the descriptor).  The other h_chksum* fields are
  * not used.
  *
- * Checksum v1 and v2 are mutually exclusive features.
+ * If FEATURE_INCOMPAT_CSUM_V3 is set, the descriptor block uses
+ * journal_block_tag3_t to store a full 32-bit checksum.  Everything else
+ * is the same as v2.
+ *
+ * Checksum v1, v2, and v3 are mutually exclusive features.
  */
 struct commit_header {
 	__be32		h_magic;
@@ -179,6 +183,14 @@ struct commit_header {
  * raw struct shouldn't be used for pointer math or sizeof() - use
  * journal_tag_bytes(journal) instead to compute this.
  */
+typedef struct journal_block_tag3_s
+{
+	__be32		t_blocknr;	/* The on-disk block number */
+	__be32		t_flags;	/* See below */
+	__be32		t_blocknr_high; /* most-significant high 32bits. */
+	__be32		t_checksum;	/* crc32c(uuid+seq+block) */
+} journal_block_tag3_t;
+
 typedef struct journal_block_tag_s
 {
 	__be32		t_blocknr;	/* The on-disk block number */
@@ -187,9 +199,6 @@ typedef struct journal_block_tag_s
 	__be32		t_blocknr_high; /* most-significant high 32bits. */
 } journal_block_tag_t;
 
-#define JBD2_TAG_SIZE32 (offsetof(journal_block_tag_t, t_blocknr_high))
-#define JBD2_TAG_SIZE64 (sizeof(journal_block_tag_t))
-
 /* Tail of descriptor block, for checksumming */
 struct jbd2_journal_block_tail {
 	__be32		t_checksum;	/* crc32c(uuid+descr_block) */
@@ -284,6 +293,7 @@ typedef struct journal_superblock_s
 #define JBD2_FEATURE_INCOMPAT_64BIT		0x00000002
 #define JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT	0x00000004
 #define JBD2_FEATURE_INCOMPAT_CSUM_V2		0x00000008
+#define JBD2_FEATURE_INCOMPAT_CSUM_V3		0x00000010
 
 /* Features known to this kernel version: */
 #define JBD2_KNOWN_COMPAT_FEATURES	JBD2_FEATURE_COMPAT_CHECKSUM
@@ -291,7 +301,8 @@ typedef struct journal_superblock_s
 #define JBD2_KNOWN_INCOMPAT_FEATURES	(JBD2_FEATURE_INCOMPAT_REVOKE | \
 					JBD2_FEATURE_INCOMPAT_64BIT | \
 					JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | \
-					JBD2_FEATURE_INCOMPAT_CSUM_V2)
+					JBD2_FEATURE_INCOMPAT_CSUM_V2 | \
+					JBD2_FEATURE_INCOMPAT_CSUM_V3)
 
 #ifdef __KERNEL__
 
@@ -1296,6 +1307,15 @@ static inline int tid_geq(tid_t x, tid_t y)
 extern int jbd2_journal_blocks_per_page(struct inode *inode);
 extern size_t journal_tag_bytes(journal_t *journal);
 
+static inline int jbd2_journal_has_csum_v2or3(journal_t *journal)
+{
+	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2) ||
+	    JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3))
+		return 1;
+
+	return 0;
+}
+
 /*
  * We reserve t_outstanding_credits >> JBD2_CONTROL_BLOCKS_SHIFT for
  * transaction control blocks.
-- 
cgit v1.2.3-59-g8ed1b