From a13007160f1b9ec7c67e28ec9254f197c5c08d7d Mon Sep 17 00:00:00 2001
From: Amos Kong <akong@redhat.com>
Date: Fri, 9 Mar 2012 12:17:32 +0800
Subject: KVM: resize kvm_io_range array dynamically

This patch makes the kvm_io_range array can be resized dynamically.

Signed-off-by: Amos Kong <akong@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm_host.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 665a260c7e09..ba9fb4a9762d 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -68,10 +68,11 @@ struct kvm_io_range {
 	struct kvm_io_device *dev;
 };
 
+#define NR_IOBUS_DEVS 300
+
 struct kvm_io_bus {
 	int                   dev_count;
-#define NR_IOBUS_DEVS 300
-	struct kvm_io_range range[NR_IOBUS_DEVS];
+	struct kvm_io_range range[];
 };
 
 enum kvm_bus {
-- 
cgit v1.3-7-g2ca7


From 786a9f888bfbe70a36d0592b26037ca1e8c8da7f Mon Sep 17 00:00:00 2001
From: Amos Kong <akong@redhat.com>
Date: Fri, 9 Mar 2012 12:17:40 +0800
Subject: KVM: set upper bounds for iobus dev to limit userspace

kvm_io_bus devices are used for ioevent, pit, pic, ioapic,
coalesced_mmio.

Currently Qemu only emulates one PCI bus, it contains 32 slots,
one slot contains 8 functions, maximum of supported PCI devices:
 1 * 32 * 8 = 256. One virtio-blk takes one iobus device,
one virtio-net(vhost=on) takes two iobus devices.
The maximum of coalesced mmio zone is 100, each zone
has an iobus devices. So 300 io_bus devices are not enough.

Set an upper bounds for kvm_io_range to limit userspace.
1000 is a very large limit and not bloat the typical user.

Signed-off-by: Amos Kong <akong@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm_host.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ba9fb4a9762d..3a2cea616283 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -68,7 +68,7 @@ struct kvm_io_range {
 	struct kvm_io_device *dev;
 };
 
-#define NR_IOBUS_DEVS 300
+#define NR_IOBUS_DEVS 1000
 
 struct kvm_io_bus {
 	int                   dev_count;
-- 
cgit v1.3-7-g2ca7


From b6d33834bd4e8bdf4a199812e31b3e36da53c794 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <c.dall@virtualopensystems.com>
Date: Thu, 8 Mar 2012 16:44:24 -0500
Subject: KVM: Factor out kvm_vcpu_kick to arch-generic code

The kvm_vcpu_kick function performs roughly the same funcitonality on
most all architectures, so we shouldn't have separate copies.

PowerPC keeps a pointer to interchanging waitqueues on the vcpu_arch
structure and to accomodate this special need a
__KVM_HAVE_ARCH_VCPU_GET_WQ define and accompanying function
kvm_arch_vcpu_wq have been defined. For all other architectures this
is a generic inline that just returns &vcpu->wq;

Acked-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/ia64/include/asm/kvm_host.h    |  1 +
 arch/ia64/kvm/kvm-ia64.c            | 20 +++++---------------
 arch/powerpc/include/asm/kvm_host.h |  6 ++++++
 arch/powerpc/kvm/powerpc.c          | 21 ++++++---------------
 arch/s390/kvm/kvm-s390.c            |  8 ++++++++
 arch/x86/kvm/x86.c                  | 16 ++--------------
 include/linux/kvm_host.h            |  9 +++++++++
 virt/kvm/kvm_main.c                 | 22 ++++++++++++++++++++++
 8 files changed, 59 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index e35b3a84a40b..c4b4bac3d09e 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -365,6 +365,7 @@ struct thash_cb {
 };
 
 struct kvm_vcpu_stat {
+	u32 halt_wakeup;
 };
 
 struct kvm_vcpu_arch {
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index f5104b7c52cd..9d80ff8d9eff 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1872,21 +1872,6 @@ void kvm_arch_hardware_unsetup(void)
 {
 }
 
-void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
-{
-	int me;
-	int cpu = vcpu->cpu;
-
-	if (waitqueue_active(&vcpu->wq))
-		wake_up_interruptible(&vcpu->wq);
-
-	me = get_cpu();
-	if (cpu != me && (unsigned) cpu < nr_cpu_ids && cpu_online(cpu))
-		if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests))
-			smp_send_reschedule(cpu);
-	put_cpu();
-}
-
 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
 {
 	return __apic_accept_irq(vcpu, irq->vector);
@@ -1956,6 +1941,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 		(kvm_highest_pending_irq(vcpu) != -1);
 }
 
+int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
+{
+	return (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests));
+}
+
 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
 				    struct kvm_mp_state *mp_state)
 {
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 52eb9c1f4fe0..889383735e73 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -498,4 +498,10 @@ struct kvm_vcpu_arch {
 #define KVM_MMIO_REG_QPR	0x0040
 #define KVM_MMIO_REG_FQPR	0x0060
 
+#define __KVM_HAVE_ARCH_VCPU_GET_WQ 1
+static inline wait_queue_head *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.wqp;
+}
+
 #endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 00d7e345b3fe..b5e9046462fd 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -43,6 +43,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
 	       v->requests;
 }
 
+int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
+{
+	return 1;
+}
+
 int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
 {
 	int nr = kvmppc_get_gpr(vcpu, 11);
@@ -588,21 +593,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	return r;
 }
 
-void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
-{
-	int me;
-	int cpu = vcpu->cpu;
-
-	me = get_cpu();
-	if (waitqueue_active(vcpu->arch.wqp)) {
-		wake_up_interruptible(vcpu->arch.wqp);
-		vcpu->stat.halt_wakeup++;
-	} else if (cpu != me && cpu != -1) {
-		smp_send_reschedule(vcpu->cpu);
-	}
-	put_cpu();
-}
-
 int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
 {
 	if (irq->irq == KVM_INTERRUPT_UNSET) {
@@ -611,6 +601,7 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
 	}
 
 	kvmppc_core_queue_external(vcpu, irq);
+
 	kvm_vcpu_kick(vcpu);
 
 	return 0;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 217ce44395a4..d30c8350b949 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -423,6 +423,14 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
+{
+	/* kvm common code refers to this, but never calls it */
+	BUG();
+	return 0;
+}
+
+
 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
 {
 	kvm_s390_vcpu_initial_reset(vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4044ce0bf7c1..511031dcb9cc 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6403,21 +6403,9 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 		 kvm_cpu_has_interrupt(vcpu));
 }
 
-void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
+int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
 {
-	int me;
-	int cpu = vcpu->cpu;
-
-	if (waitqueue_active(&vcpu->wq)) {
-		wake_up_interruptible(&vcpu->wq);
-		++vcpu->stat.halt_wakeup;
-	}
-
-	me = get_cpu();
-	if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
-		if (kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE)
-			smp_send_reschedule(cpu);
-	put_cpu();
+	return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
 }
 
 int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 3a2cea616283..5b624e1ff814 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -439,6 +439,7 @@ void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot,
 			     gfn_t gfn);
 
 void kvm_vcpu_block(struct kvm_vcpu *vcpu);
+void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
 void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
 void kvm_resched(struct kvm_vcpu *vcpu);
 void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
@@ -507,6 +508,7 @@ int kvm_arch_hardware_setup(void);
 void kvm_arch_hardware_unsetup(void);
 void kvm_arch_check_processor_compat(void *rtn);
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
+int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu);
 
 void kvm_free_physmem(struct kvm *kvm);
 
@@ -522,6 +524,13 @@ static inline void kvm_arch_free_vm(struct kvm *kvm)
 }
 #endif
 
+#ifndef __KVM_HAVE_ARCH_VCPU_GET_WQ
+static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
+{
+	return &vcpu->wq;
+}
+#endif
+
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type);
 void kvm_arch_destroy_vm(struct kvm *kvm);
 void kvm_free_all_assigned_devices(struct kvm *kvm);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a9565e240636..7149a2e65524 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1514,6 +1514,28 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 	finish_wait(&vcpu->wq, &wait);
 }
 
+/*
+ * Kick a sleeping VCPU, or a guest VCPU in guest mode, into host kernel mode.
+ */
+void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
+{
+	int me;
+	int cpu = vcpu->cpu;
+	wait_queue_head_t *wqp;
+
+	wqp = kvm_arch_vcpu_wq(vcpu);
+	if (waitqueue_active(wqp)) {
+		wake_up_interruptible(wqp);
+		++vcpu->stat.halt_wakeup;
+	}
+
+	me = get_cpu();
+	if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
+		if (kvm_arch_vcpu_should_kick(vcpu))
+			smp_send_reschedule(cpu);
+	put_cpu();
+}
+
 void kvm_resched(struct kvm_vcpu *vcpu)
 {
 	if (!need_resched())
-- 
cgit v1.3-7-g2ca7


From 2246f8b56315befa30f3d3d2800e0734c774f70e Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Tue, 13 Mar 2012 22:35:01 +0100
Subject: KVM: PPC: Rework wqp conditional code

On PowerPC, we sometimes use a waitqueue per core, not per thread,
so we can't always use the vcpu internal waitqueue.

This code has been generalized by Christoffer Dall recently, but
unfortunately broke compilation for PowerPC. At the time the helper
function is defined, struct kvm_vcpu is not declared yet, so we can't
dereference it.

This patch moves all logic into the generic inline function, at which
time we have all information necessary.

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/include/asm/kvm_host.h | 6 +-----
 include/linux/kvm_host.h            | 6 ++++--
 2 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 889383735e73..20ab5b2dbd0f 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -498,10 +498,6 @@ struct kvm_vcpu_arch {
 #define KVM_MMIO_REG_QPR	0x0040
 #define KVM_MMIO_REG_FQPR	0x0060
 
-#define __KVM_HAVE_ARCH_VCPU_GET_WQ 1
-static inline wait_queue_head *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
-{
-	return vcpu->arch.wqp;
-}
+#define __KVM_HAVE_ARCH_WQP
 
 #endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5b624e1ff814..5184817e714a 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -524,12 +524,14 @@ static inline void kvm_arch_free_vm(struct kvm *kvm)
 }
 #endif
 
-#ifndef __KVM_HAVE_ARCH_VCPU_GET_WQ
 static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
 {
+#ifdef __KVM_HAVE_ARCH_WQP
+	return vcpu->arch.wqp;
+#else
 	return &vcpu->wq;
-}
 #endif
+}
 
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type);
 void kvm_arch_destroy_vm(struct kvm *kvm);
-- 
cgit v1.3-7-g2ca7


From 1c0b28c2a46d98cd258d96b8c222144b22876c46 Mon Sep 17 00:00:00 2001
From: Eric B Munson <emunson@mgebm.net>
Date: Sat, 10 Mar 2012 14:37:27 -0500
Subject: KVM: x86: Add ioctl for KVM_KVMCLOCK_CTRL

Now that we have a flag that will tell the guest it was suspended, create an
interface for that communication using a KVM ioctl.

Signed-off-by: Eric B Munson <emunson@mgebm.net>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 Documentation/virtual/kvm/api.txt | 20 ++++++++++++++++++++
 Documentation/virtual/kvm/msr.txt |  4 ++++
 arch/x86/kvm/x86.c                | 22 ++++++++++++++++++++++
 include/linux/kvm.h               |  3 +++
 4 files changed, 49 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 6386f8c0482e..81ff39f6248d 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1669,6 +1669,26 @@ at the memory location pointed to by "addr".
 The list of registers accessible using this interface is identical to the
 list in 4.64.
 
+4.70 KVM_KVMCLOCK_CTRL
+
+Capability: KVM_CAP_KVMCLOCK_CTRL
+Architectures: Any that implement pvclocks (currently x86 only)
+Type: vcpu ioctl
+Parameters: None
+Returns: 0 on success, -1 on error
+
+This signals to the host kernel that the specified guest is being paused by
+userspace.  The host will set a flag in the pvclock structure that is checked
+from the soft lockup watchdog.  The flag is part of the pvclock structure that
+is shared between guest and host, specifically the second bit of the flags
+field of the pvclock_vcpu_time_info structure.  It will be set exclusively by
+the host and read/cleared exclusively by the guest.  The guest operation of
+checking and clearing the flag must an atomic operation so
+load-link/store-conditional, or equivalent must be used.  There are two cases
+where the guest will clear the flag: when the soft lockup watchdog timer resets
+itself or when a soft lockup is detected.  This ioctl can be called any time
+after pausing the vcpu, but before it is resumed.
+
 5. The kvm_run structure
 
 Application code obtains a pointer to the kvm_run structure by
diff --git a/Documentation/virtual/kvm/msr.txt b/Documentation/virtual/kvm/msr.txt
index 50317809113d..96b41bd97523 100644
--- a/Documentation/virtual/kvm/msr.txt
+++ b/Documentation/virtual/kvm/msr.txt
@@ -108,6 +108,10 @@ MSR_KVM_SYSTEM_TIME_NEW:  0x4b564d01
 			    |	           | time measures taken across
 		     0      |	   24      | multiple cpus are guaranteed to
 			    |		   | be monotonic
+		-------------------------------------------------------------
+			    |		   | guest vcpu has been paused by
+		     1	    |	  N/A	   | the host
+			    |		   | See 4.70 in api.txt
 		-------------------------------------------------------------
 
 	Availability of this MSR must be checked via bit 3 in 0x4000001 cpuid
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 511031dcb9cc..99b738028fc0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2147,6 +2147,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_ASYNC_PF:
 	case KVM_CAP_GET_TSC_KHZ:
 	case KVM_CAP_PCI_2_3:
+	case KVM_CAP_KVMCLOCK_CTRL:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
@@ -2597,6 +2598,23 @@ static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
 	return r;
 }
 
+/*
+ * kvm_set_guest_paused() indicates to the guest kernel that it has been
+ * stopped by the hypervisor.  This function will be called from the host only.
+ * EINVAL is returned when the host attempts to set the flag for a guest that
+ * does not support pv clocks.
+ */
+static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
+{
+	struct pvclock_vcpu_time_info *src = &vcpu->arch.hv_clock;
+	if (!vcpu->arch.time_page)
+		return -EINVAL;
+	src->flags |= PVCLOCK_GUEST_STOPPED;
+	mark_page_dirty(vcpu->kvm, vcpu->arch.time >> PAGE_SHIFT);
+	kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
+	return 0;
+}
+
 long kvm_arch_vcpu_ioctl(struct file *filp,
 			 unsigned int ioctl, unsigned long arg)
 {
@@ -2873,6 +2891,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		r = vcpu->arch.virtual_tsc_khz;
 		goto out;
 	}
+	case KVM_KVMCLOCK_CTRL: {
+		r = kvm_set_guest_paused(vcpu);
+		goto out;
+	}
 	default:
 		r = -EINVAL;
 	}
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 6c322a90b92f..7a9dd4b3dede 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -589,6 +589,7 @@ struct kvm_ppc_pvinfo {
 #define KVM_CAP_S390_UCONTROL 73
 #define KVM_CAP_SYNC_REGS 74
 #define KVM_CAP_PCI_2_3 75
+#define KVM_CAP_KVMCLOCK_CTRL 76
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -859,6 +860,8 @@ struct kvm_s390_ucas_mapping {
 /* Available with KVM_CAP_ONE_REG */
 #define KVM_GET_ONE_REG		  _IOW(KVMIO,  0xab, struct kvm_one_reg)
 #define KVM_SET_ONE_REG		  _IOW(KVMIO,  0xac, struct kvm_one_reg)
+/* VM is being stopped by host */
+#define KVM_KVMCLOCK_CTRL	  _IO(KVMIO,   0xad)
 
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
 #define KVM_DEV_ASSIGN_PCI_2_3		(1 << 1)
-- 
cgit v1.3-7-g2ca7


From 93474b25af1eabf5b14743793156e8d307bfcd6b Mon Sep 17 00:00:00 2001
From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Date: Thu, 1 Mar 2012 19:34:45 +0900
Subject: KVM: Remove unused dirty_bitmap_head and nr_dirty_pages

Now that we do neither double buffering nor heuristic selection of the
write protection method these are not needed anymore.

Note: some drivers have their own implementation of set_bit_le() and
making it generic needs a bit of work; so we use test_and_set_bit_le()
and will later replace it with generic set_bit_le().

Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm_host.h |  2 --
 virt/kvm/kvm_main.c      | 14 +++++---------
 2 files changed, 5 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5184817e714a..49c2f2fd281f 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -179,8 +179,6 @@ struct kvm_memory_slot {
 	unsigned long flags;
 	unsigned long *rmap;
 	unsigned long *dirty_bitmap;
-	unsigned long *dirty_bitmap_head;
-	unsigned long nr_dirty_pages;
 	struct kvm_arch_memory_slot arch;
 	unsigned long userspace_addr;
 	int user_alloc;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a612bc8c921c..6bd34a6ecca1 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -522,12 +522,11 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
 		return;
 
 	if (2 * kvm_dirty_bitmap_bytes(memslot) > PAGE_SIZE)
-		vfree(memslot->dirty_bitmap_head);
+		vfree(memslot->dirty_bitmap);
 	else
-		kfree(memslot->dirty_bitmap_head);
+		kfree(memslot->dirty_bitmap);
 
 	memslot->dirty_bitmap = NULL;
-	memslot->dirty_bitmap_head = NULL;
 }
 
 /*
@@ -611,8 +610,7 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
 
 /*
  * Allocation size is twice as large as the actual dirty bitmap size.
- * This makes it possible to do double buffering: see x86's
- * kvm_vm_ioctl_get_dirty_log().
+ * See x86's kvm_vm_ioctl_get_dirty_log() why this is needed.
  */
 static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
 {
@@ -627,8 +625,6 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
 	if (!memslot->dirty_bitmap)
 		return -ENOMEM;
 
-	memslot->dirty_bitmap_head = memslot->dirty_bitmap;
-	memslot->nr_dirty_pages = 0;
 #endif /* !CONFIG_S390 */
 	return 0;
 }
@@ -1476,8 +1472,8 @@ void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot,
 	if (memslot && memslot->dirty_bitmap) {
 		unsigned long rel_gfn = gfn - memslot->base_gfn;
 
-		if (!test_and_set_bit_le(rel_gfn, memslot->dirty_bitmap))
-			memslot->nr_dirty_pages++;
+		/* TODO: introduce set_bit_le() and use it */
+		test_and_set_bit_le(rel_gfn, memslot->dirty_bitmap);
 	}
 }
 
-- 
cgit v1.3-7-g2ca7


From 2db938bee32e7469ca8ed9bfb3a05535f28c680d Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 21 Feb 2011 17:25:37 +0100
Subject: jbd: Refine commit writeout logic

Currently we write out all journal buffers in WRITE_SYNC mode. This improves
performance for fsync heavy workloads but hinders performance when writes
are mostly asynchronous, most noticably it slows down readers and users
complain about slow desktop response etc.

So submit writes as asynchronous in the normal case and only submit writes as
WRITE_SYNC if we detect someone is waiting for current transaction commit.

I've gathered some numbers to back this change. The first is the read latency
test. It measures time to read 1 MB after several seconds of sleeping in
presence of streaming writes.

Top 10 times (out of 90) in us:
Before		After
2131586		697473
1709932		557487
1564598		535642
1480462		347573
1478579		323153
1408496		222181
1388960		181273
1329565		181070
1252486		172832
1223265		172278

Average:
619377		82180

So the improvement in both maximum and average latency is massive.

I've measured fsync throughput by:
fs_mark -n 100 -t 1 -s 16384 -d /mnt/fsync/ -S 1 -L 4

in presence of streaming reader. The numbers (fsyncs/s) are:
Before		After
9.9		6.3
6.8		6.0
6.3		6.2
5.8		6.1

So fsync performance seems unharmed by this change.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/jbd/commit.c            | 10 +++++++---
 fs/jbd/journal.c           |  2 ++
 fs/jbd/transaction.c       |  2 --
 include/linux/jbd.h        | 15 +++++++++------
 include/trace/events/jbd.h | 24 ++++++++----------------
 5 files changed, 26 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index f2b9a571f4cf..9d31e6a39205 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -298,6 +298,7 @@ void journal_commit_transaction(journal_t *journal)
 	int tag_flag;
 	int i;
 	struct blk_plug plug;
+	int write_op = WRITE;
 
 	/*
 	 * First job: lock down the current transaction and wait for
@@ -413,13 +414,16 @@ void journal_commit_transaction(journal_t *journal)
 
 	jbd_debug (3, "JBD: commit phase 2\n");
 
+	if (tid_geq(journal->j_commit_waited, commit_transaction->t_tid))
+		write_op = WRITE_SYNC;
+
 	/*
 	 * Now start flushing things to disk, in the order they appear
 	 * on the transaction lists.  Data blocks go first.
 	 */
 	blk_start_plug(&plug);
 	err = journal_submit_data_buffers(journal, commit_transaction,
-					  WRITE_SYNC);
+					  write_op);
 	blk_finish_plug(&plug);
 
 	/*
@@ -478,7 +482,7 @@ void journal_commit_transaction(journal_t *journal)
 
 	blk_start_plug(&plug);
 
-	journal_write_revoke_records(journal, commit_transaction, WRITE_SYNC);
+	journal_write_revoke_records(journal, commit_transaction, write_op);
 
 	/*
 	 * If we found any dirty or locked buffers, then we should have
@@ -649,7 +653,7 @@ start_journal_io:
 				clear_buffer_dirty(bh);
 				set_buffer_uptodate(bh);
 				bh->b_end_io = journal_end_buffer_io_sync;
-				submit_bh(WRITE_SYNC, bh);
+				submit_bh(write_op, bh);
 			}
 			cond_resched();
 
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 0971e9217808..2047fd77bf38 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -563,6 +563,8 @@ int log_wait_commit(journal_t *journal, tid_t tid)
 	spin_unlock(&journal->j_state_lock);
 #endif
 	spin_lock(&journal->j_state_lock);
+	if (!tid_geq(journal->j_commit_waited, tid))
+		journal->j_commit_waited = tid;
 	while (tid_gt(tid, journal->j_commit_sequence)) {
 		jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n",
 				  tid, journal->j_commit_sequence);
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index b2a7e5244e39..febc10db5ced 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1433,8 +1433,6 @@ int journal_stop(handle_t *handle)
 		}
 	}
 
-	if (handle->h_sync)
-		transaction->t_synchronous_commit = 1;
 	current->journal_info = NULL;
 	spin_lock(&journal->j_state_lock);
 	spin_lock(&transaction->t_handle_lock);
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index d211732b9e99..f265682ae134 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -479,12 +479,6 @@ struct transaction_s
 	 * How many handles used this transaction? [t_handle_lock]
 	 */
 	int t_handle_count;
-
-	/*
-	 * This transaction is being forced and some process is
-	 * waiting for it to finish.
-	 */
-	unsigned int t_synchronous_commit:1;
 };
 
 /**
@@ -531,6 +525,8 @@ struct transaction_s
  *  transaction
  * @j_commit_request: Sequence number of the most recent transaction wanting
  *     commit
+ * @j_commit_waited: Sequence number of the most recent transaction someone
+ *     is waiting for to commit.
  * @j_uuid: Uuid of client object.
  * @j_task: Pointer to the current commit thread for this journal
  * @j_max_transaction_buffers:  Maximum number of metadata buffers to allow in a
@@ -695,6 +691,13 @@ struct journal_s
 	 */
 	tid_t			j_commit_request;
 
+	/*
+	 * Sequence number of the most recent transaction someone is waiting
+	 * for to commit.
+	 * [j_state_lock]
+	 */
+	tid_t                   j_commit_waited;
+
 	/*
 	 * Journal uuid: identifies the object (filesystem, LVM volume etc)
 	 * backed by this journal.  This will eventually be replaced by an array
diff --git a/include/trace/events/jbd.h b/include/trace/events/jbd.h
index aff64d82d713..9305e1b5edc3 100644
--- a/include/trace/events/jbd.h
+++ b/include/trace/events/jbd.h
@@ -36,19 +36,17 @@ DECLARE_EVENT_CLASS(jbd_commit,
 
 	TP_STRUCT__entry(
 		__field(	dev_t,	dev			)
-		__field(	char,	sync_commit		)
 		__field(	int,	transaction		)
 	),
 
 	TP_fast_assign(
 		__entry->dev		= journal->j_fs_dev->bd_dev;
-		__entry->sync_commit = commit_transaction->t_synchronous_commit;
 		__entry->transaction	= commit_transaction->t_tid;
 	),
 
-	TP_printk("dev %d,%d transaction %d sync %d",
+	TP_printk("dev %d,%d transaction %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->transaction, __entry->sync_commit)
+		  __entry->transaction)
 );
 
 DEFINE_EVENT(jbd_commit, jbd_start_commit,
@@ -87,19 +85,17 @@ TRACE_EVENT(jbd_drop_transaction,
 
 	TP_STRUCT__entry(
 		__field(	dev_t,	dev			)
-		__field(	char,	sync_commit		)
 		__field(	int,	transaction		)
 	),
 
 	TP_fast_assign(
 		__entry->dev		= journal->j_fs_dev->bd_dev;
-		__entry->sync_commit = commit_transaction->t_synchronous_commit;
 		__entry->transaction	= commit_transaction->t_tid;
 	),
 
-	TP_printk("dev %d,%d transaction %d sync %d",
+	TP_printk("dev %d,%d transaction %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->transaction, __entry->sync_commit)
+		  __entry->transaction)
 );
 
 TRACE_EVENT(jbd_end_commit,
@@ -109,21 +105,19 @@ TRACE_EVENT(jbd_end_commit,
 
 	TP_STRUCT__entry(
 		__field(	dev_t,	dev			)
-		__field(	char,	sync_commit		)
 		__field(	int,	transaction		)
 		__field(	int,	head			)
 	),
 
 	TP_fast_assign(
 		__entry->dev		= journal->j_fs_dev->bd_dev;
-		__entry->sync_commit = commit_transaction->t_synchronous_commit;
 		__entry->transaction	= commit_transaction->t_tid;
 		__entry->head		= journal->j_tail_sequence;
 	),
 
-	TP_printk("dev %d,%d transaction %d sync %d head %d",
+	TP_printk("dev %d,%d transaction %d head %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->transaction, __entry->sync_commit, __entry->head)
+		  __entry->transaction, __entry->head)
 );
 
 TRACE_EVENT(jbd_do_submit_data,
@@ -133,19 +127,17 @@ TRACE_EVENT(jbd_do_submit_data,
 
 	TP_STRUCT__entry(
 		__field(	dev_t,	dev			)
-		__field(	char,	sync_commit		)
 		__field(	int,	transaction		)
 	),
 
 	TP_fast_assign(
 		__entry->dev		= journal->j_fs_dev->bd_dev;
-		__entry->sync_commit = commit_transaction->t_synchronous_commit;
 		__entry->transaction	= commit_transaction->t_tid;
 	),
 
-	TP_printk("dev %d,%d transaction %d sync %d",
+	TP_printk("dev %d,%d transaction %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		   __entry->transaction, __entry->sync_commit)
+		   __entry->transaction)
 );
 
 TRACE_EVENT(jbd_cleanup_journal_tail,
-- 
cgit v1.3-7-g2ca7


From 9fe2a7015393dc0203ac39242ae9c89038994f3c Mon Sep 17 00:00:00 2001
From: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Date: Fri, 23 Mar 2012 13:36:28 +0530
Subject: debugfs: Add support to print u32 array in debugfs

Move the code from Xen to debugfs to make the code common
for other users as well.

Accked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Signed-off-by: Suzuki Poulose <suzuki@in.ibm.com>
[v1: Fixed rebase issues]
[v2: Fixed PPC compile issues]
Signed-off-by: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/debugfs.c  | 104 ---------------------------------------
 arch/x86/xen/debugfs.h  |   4 --
 arch/x86/xen/spinlock.c |  12 ++---
 fs/debugfs/file.c       | 128 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/debugfs.h |  11 +++++
 5 files changed, 145 insertions(+), 114 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/xen/debugfs.c b/arch/x86/xen/debugfs.c
index ef1db1900d86..c8377fb26cdf 100644
--- a/arch/x86/xen/debugfs.c
+++ b/arch/x86/xen/debugfs.c
@@ -19,107 +19,3 @@ struct dentry * __init xen_init_debugfs(void)
 	return d_xen_debug;
 }
 
-struct array_data
-{
-	void *array;
-	unsigned elements;
-};
-
-static int u32_array_open(struct inode *inode, struct file *file)
-{
-	file->private_data = NULL;
-	return nonseekable_open(inode, file);
-}
-
-static size_t format_array(char *buf, size_t bufsize, const char *fmt,
-			   u32 *array, unsigned array_size)
-{
-	size_t ret = 0;
-	unsigned i;
-
-	for(i = 0; i < array_size; i++) {
-		size_t len;
-
-		len = snprintf(buf, bufsize, fmt, array[i]);
-		len++;	/* ' ' or '\n' */
-		ret += len;
-
-		if (buf) {
-			buf += len;
-			bufsize -= len;
-			buf[-1] = (i == array_size-1) ? '\n' : ' ';
-		}
-	}
-
-	ret++;		/* \0 */
-	if (buf)
-		*buf = '\0';
-
-	return ret;
-}
-
-static char *format_array_alloc(const char *fmt, u32 *array, unsigned array_size)
-{
-	size_t len = format_array(NULL, 0, fmt, array, array_size);
-	char *ret;
-
-	ret = kmalloc(len, GFP_KERNEL);
-	if (ret == NULL)
-		return NULL;
-
-	format_array(ret, len, fmt, array, array_size);
-	return ret;
-}
-
-static ssize_t u32_array_read(struct file *file, char __user *buf, size_t len,
-			      loff_t *ppos)
-{
-	struct inode *inode = file->f_path.dentry->d_inode;
-	struct array_data *data = inode->i_private;
-	size_t size;
-
-	if (*ppos == 0) {
-		if (file->private_data) {
-			kfree(file->private_data);
-			file->private_data = NULL;
-		}
-
-		file->private_data = format_array_alloc("%u", data->array, data->elements);
-	}
-
-	size = 0;
-	if (file->private_data)
-		size = strlen(file->private_data);
-
-	return simple_read_from_buffer(buf, len, ppos, file->private_data, size);
-}
-
-static int xen_array_release(struct inode *inode, struct file *file)
-{
-	kfree(file->private_data);
-
-	return 0;
-}
-
-static const struct file_operations u32_array_fops = {
-	.owner	= THIS_MODULE,
-	.open	= u32_array_open,
-	.release= xen_array_release,
-	.read	= u32_array_read,
-	.llseek = no_llseek,
-};
-
-struct dentry *xen_debugfs_create_u32_array(const char *name, umode_t mode,
-					    struct dentry *parent,
-					    u32 *array, unsigned elements)
-{
-	struct array_data *data = kmalloc(sizeof(*data), GFP_KERNEL);
-
-	if (data == NULL)
-		return NULL;
-
-	data->array = array;
-	data->elements = elements;
-
-	return debugfs_create_file(name, mode, parent, data, &u32_array_fops);
-}
diff --git a/arch/x86/xen/debugfs.h b/arch/x86/xen/debugfs.h
index 78d25499be5b..12ebf3325c7b 100644
--- a/arch/x86/xen/debugfs.h
+++ b/arch/x86/xen/debugfs.h
@@ -3,8 +3,4 @@
 
 struct dentry * __init xen_init_debugfs(void);
 
-struct dentry *xen_debugfs_create_u32_array(const char *name, umode_t mode,
-					    struct dentry *parent,
-					    u32 *array, unsigned elements);
-
 #endif /* _XEN_DEBUGFS_H */
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index d69cc6c3f808..83e866d714ce 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -440,12 +440,12 @@ static int __init xen_spinlock_debugfs(void)
 	debugfs_create_u64("time_total", 0444, d_spin_debug,
 			   &spinlock_stats.time_total);
 
-	xen_debugfs_create_u32_array("histo_total", 0444, d_spin_debug,
-				     spinlock_stats.histo_spin_total, HISTO_BUCKETS + 1);
-	xen_debugfs_create_u32_array("histo_spinning", 0444, d_spin_debug,
-				     spinlock_stats.histo_spin_spinning, HISTO_BUCKETS + 1);
-	xen_debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug,
-				     spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1);
+	debugfs_create_u32_array("histo_total", 0444, d_spin_debug,
+				spinlock_stats.histo_spin_total, HISTO_BUCKETS + 1);
+	debugfs_create_u32_array("histo_spinning", 0444, d_spin_debug,
+				spinlock_stats.histo_spin_spinning, HISTO_BUCKETS + 1);
+	debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug,
+				spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1);
 
 	return 0;
 }
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 5dfafdd1dbd3..2340f6978d6e 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -20,6 +20,7 @@
 #include <linux/namei.h>
 #include <linux/debugfs.h>
 #include <linux/io.h>
+#include <linux/slab.h>
 
 static ssize_t default_read_file(struct file *file, char __user *buf,
 				 size_t count, loff_t *ppos)
@@ -520,6 +521,133 @@ struct dentry *debugfs_create_blob(const char *name, umode_t mode,
 }
 EXPORT_SYMBOL_GPL(debugfs_create_blob);
 
+struct array_data {
+	void *array;
+	u32 elements;
+};
+
+static int u32_array_open(struct inode *inode, struct file *file)
+{
+	file->private_data = NULL;
+	return nonseekable_open(inode, file);
+}
+
+static size_t format_array(char *buf, size_t bufsize, const char *fmt,
+			   u32 *array, u32 array_size)
+{
+	size_t ret = 0;
+	u32 i;
+
+	for (i = 0; i < array_size; i++) {
+		size_t len;
+
+		len = snprintf(buf, bufsize, fmt, array[i]);
+		len++;	/* ' ' or '\n' */
+		ret += len;
+
+		if (buf) {
+			buf += len;
+			bufsize -= len;
+			buf[-1] = (i == array_size-1) ? '\n' : ' ';
+		}
+	}
+
+	ret++;		/* \0 */
+	if (buf)
+		*buf = '\0';
+
+	return ret;
+}
+
+static char *format_array_alloc(const char *fmt, u32 *array,
+						u32 array_size)
+{
+	size_t len = format_array(NULL, 0, fmt, array, array_size);
+	char *ret;
+
+	ret = kmalloc(len, GFP_KERNEL);
+	if (ret == NULL)
+		return NULL;
+
+	format_array(ret, len, fmt, array, array_size);
+	return ret;
+}
+
+static ssize_t u32_array_read(struct file *file, char __user *buf, size_t len,
+			      loff_t *ppos)
+{
+	struct inode *inode = file->f_path.dentry->d_inode;
+	struct array_data *data = inode->i_private;
+	size_t size;
+
+	if (*ppos == 0) {
+		if (file->private_data) {
+			kfree(file->private_data);
+			file->private_data = NULL;
+		}
+
+		file->private_data = format_array_alloc("%u", data->array,
+							      data->elements);
+	}
+
+	size = 0;
+	if (file->private_data)
+		size = strlen(file->private_data);
+
+	return simple_read_from_buffer(buf, len, ppos,
+					file->private_data, size);
+}
+
+static int u32_array_release(struct inode *inode, struct file *file)
+{
+	kfree(file->private_data);
+
+	return 0;
+}
+
+static const struct file_operations u32_array_fops = {
+	.owner	 = THIS_MODULE,
+	.open	 = u32_array_open,
+	.release = u32_array_release,
+	.read	 = u32_array_read,
+	.llseek  = no_llseek,
+};
+
+/**
+ * debugfs_create_u32_array - create a debugfs file that is used to read u32
+ * array.
+ * @name: a pointer to a string containing the name of the file to create.
+ * @mode: the permission that the file should have.
+ * @parent: a pointer to the parent dentry for this file.  This should be a
+ *          directory dentry if set.  If this parameter is %NULL, then the
+ *          file will be created in the root of the debugfs filesystem.
+ * @array: u32 array that provides data.
+ * @elements: total number of elements in the array.
+ *
+ * This function creates a file in debugfs with the given name that exports
+ * @array as data. If the @mode variable is so set it can be read from.
+ * Writing is not supported. Seek within the file is also not supported.
+ * Once array is created its size can not be changed.
+ *
+ * The function returns a pointer to dentry on success. If debugfs is not
+ * enabled in the kernel, the value -%ENODEV will be returned.
+ */
+struct dentry *debugfs_create_u32_array(const char *name, umode_t mode,
+					    struct dentry *parent,
+					    u32 *array, u32 elements)
+{
+	struct array_data *data = kmalloc(sizeof(*data), GFP_KERNEL);
+
+	if (data == NULL)
+		return NULL;
+
+	data->array = array;
+	data->elements = elements;
+
+	return debugfs_create_file(name, mode, parent, data, &u32_array_fops);
+}
+EXPORT_SYMBOL_GPL(debugfs_create_u32_array);
+
 #ifdef CONFIG_HAS_IOMEM
 
 /*
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index ae36b72c22f3..66c434f5dd1e 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -93,6 +93,10 @@ struct dentry *debugfs_create_regset32(const char *name, umode_t mode,
 int debugfs_print_regs32(struct seq_file *s, const struct debugfs_reg32 *regs,
 			 int nregs, void __iomem *base, char *prefix);
 
+struct dentry *debugfs_create_u32_array(const char *name, umode_t mode,
+					struct dentry *parent,
+					u32 *array, u32 elements);
+
 bool debugfs_initialized(void);
 
 #else
@@ -219,6 +223,13 @@ static inline bool debugfs_initialized(void)
 	return false;
 }
 
+static inline struct dentry *debugfs_create_u32_array(const char *name, umode_t mode,
+					struct dentry *parent,
+					u32 *array, u32 elements)
+{
+	return ERR_PTR(-ENODEV);
+}
+
 #endif
 
 #endif
-- 
cgit v1.3-7-g2ca7


From f78146b0f9230765c6315b2e14f56112513389ad Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Wed, 18 Apr 2012 19:22:47 +0300
Subject: KVM: Fix page-crossing MMIO

MMIO that are split across a page boundary are currently broken - the
code does not expect to be aborted by the exit to userspace for the
first MMIO fragment.

This patch fixes the problem by generalizing the current code for handling
16-byte MMIOs to handle a number of "fragments", and changes the MMIO
code to create those fragments.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/ia64/include/asm/kvm_host.h |   2 +
 arch/ia64/kvm/kvm-ia64.c         |  10 ++--
 arch/x86/kvm/x86.c               | 114 +++++++++++++++++++++++++++------------
 include/linux/kvm_host.h         |  31 +++++++++--
 4 files changed, 115 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index c4b4bac3d09e..6d6a5ac48d85 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -449,6 +449,8 @@ struct kvm_vcpu_arch {
 	char log_buf[VMM_LOG_LEN];
 	union context host;
 	union context guest;
+
+	char mmio_data[8];
 };
 
 struct kvm_vm_stat {
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 9d80ff8d9eff..882ab21a8dcd 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -232,12 +232,12 @@ static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	if ((p->addr & PAGE_MASK) == IOAPIC_DEFAULT_BASE_ADDRESS)
 		goto mmio;
 	vcpu->mmio_needed = 1;
-	vcpu->mmio_phys_addr = kvm_run->mmio.phys_addr = p->addr;
-	vcpu->mmio_size = kvm_run->mmio.len = p->size;
+	vcpu->mmio_fragments[0].gpa = kvm_run->mmio.phys_addr = p->addr;
+	vcpu->mmio_fragments[0].len = kvm_run->mmio.len = p->size;
 	vcpu->mmio_is_write = kvm_run->mmio.is_write = !p->dir;
 
 	if (vcpu->mmio_is_write)
-		memcpy(vcpu->mmio_data, &p->data, p->size);
+		memcpy(vcpu->arch.mmio_data, &p->data, p->size);
 	memcpy(kvm_run->mmio.data, &p->data, p->size);
 	kvm_run->exit_reason = KVM_EXIT_MMIO;
 	return 0;
@@ -719,7 +719,7 @@ static void kvm_set_mmio_data(struct kvm_vcpu *vcpu)
 	struct kvm_mmio_req *p = kvm_get_vcpu_ioreq(vcpu);
 
 	if (!vcpu->mmio_is_write)
-		memcpy(&p->data, vcpu->mmio_data, 8);
+		memcpy(&p->data, vcpu->arch.mmio_data, 8);
 	p->state = STATE_IORESP_READY;
 }
 
@@ -739,7 +739,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	}
 
 	if (vcpu->mmio_needed) {
-		memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
+		memcpy(vcpu->arch.mmio_data, kvm_run->mmio.data, 8);
 		kvm_set_mmio_data(vcpu);
 		vcpu->mmio_read_completed = 1;
 		vcpu->mmio_needed = 0;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0d9a57875f0b..4de705cdcafd 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3718,9 +3718,8 @@ struct read_write_emulator_ops {
 static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
 {
 	if (vcpu->mmio_read_completed) {
-		memcpy(val, vcpu->mmio_data, bytes);
 		trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
-			       vcpu->mmio_phys_addr, *(u64 *)val);
+			       vcpu->mmio_fragments[0].gpa, *(u64 *)val);
 		vcpu->mmio_read_completed = 0;
 		return 1;
 	}
@@ -3756,8 +3755,9 @@ static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
 static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
 			   void *val, int bytes)
 {
-	memcpy(vcpu->mmio_data, val, bytes);
-	memcpy(vcpu->run->mmio.data, vcpu->mmio_data, 8);
+	struct kvm_mmio_fragment *frag = &vcpu->mmio_fragments[0];
+
+	memcpy(vcpu->run->mmio.data, frag->data, frag->len);
 	return X86EMUL_CONTINUE;
 }
 
@@ -3784,10 +3784,7 @@ static int emulator_read_write_onepage(unsigned long addr, void *val,
 	gpa_t gpa;
 	int handled, ret;
 	bool write = ops->write;
-
-	if (ops->read_write_prepare &&
-		  ops->read_write_prepare(vcpu, val, bytes))
-		return X86EMUL_CONTINUE;
+	struct kvm_mmio_fragment *frag;
 
 	ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
 
@@ -3813,15 +3810,19 @@ mmio:
 	bytes -= handled;
 	val += handled;
 
-	vcpu->mmio_needed = 1;
-	vcpu->run->exit_reason = KVM_EXIT_MMIO;
-	vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
-	vcpu->mmio_size = bytes;
-	vcpu->run->mmio.len = min(vcpu->mmio_size, 8);
-	vcpu->run->mmio.is_write = vcpu->mmio_is_write = write;
-	vcpu->mmio_index = 0;
+	while (bytes) {
+		unsigned now = min(bytes, 8U);
 
-	return ops->read_write_exit_mmio(vcpu, gpa, val, bytes);
+		frag = &vcpu->mmio_fragments[vcpu->mmio_nr_fragments++];
+		frag->gpa = gpa;
+		frag->data = val;
+		frag->len = now;
+
+		gpa += now;
+		val += now;
+		bytes -= now;
+	}
+	return X86EMUL_CONTINUE;
 }
 
 int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
@@ -3830,10 +3831,18 @@ int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
 			struct read_write_emulator_ops *ops)
 {
 	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+	gpa_t gpa;
+	int rc;
+
+	if (ops->read_write_prepare &&
+		  ops->read_write_prepare(vcpu, val, bytes))
+		return X86EMUL_CONTINUE;
+
+	vcpu->mmio_nr_fragments = 0;
 
 	/* Crossing a page boundary? */
 	if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
-		int rc, now;
+		int now;
 
 		now = -addr & ~PAGE_MASK;
 		rc = emulator_read_write_onepage(addr, val, now, exception,
@@ -3846,8 +3855,25 @@ int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
 		bytes -= now;
 	}
 
-	return emulator_read_write_onepage(addr, val, bytes, exception,
-					   vcpu, ops);
+	rc = emulator_read_write_onepage(addr, val, bytes, exception,
+					 vcpu, ops);
+	if (rc != X86EMUL_CONTINUE)
+		return rc;
+
+	if (!vcpu->mmio_nr_fragments)
+		return rc;
+
+	gpa = vcpu->mmio_fragments[0].gpa;
+
+	vcpu->mmio_needed = 1;
+	vcpu->mmio_cur_fragment = 0;
+
+	vcpu->run->mmio.len = vcpu->mmio_fragments[0].len;
+	vcpu->run->mmio.is_write = vcpu->mmio_is_write = ops->write;
+	vcpu->run->exit_reason = KVM_EXIT_MMIO;
+	vcpu->run->mmio.phys_addr = gpa;
+
+	return ops->read_write_exit_mmio(vcpu, gpa, val, bytes);
 }
 
 static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
@@ -5446,33 +5472,55 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 	return r;
 }
 
+/*
+ * Implements the following, as a state machine:
+ *
+ * read:
+ *   for each fragment
+ *     write gpa, len
+ *     exit
+ *     copy data
+ *   execute insn
+ *
+ * write:
+ *   for each fragment
+ *      write gpa, len
+ *      copy data
+ *      exit
+ */
 static int complete_mmio(struct kvm_vcpu *vcpu)
 {
 	struct kvm_run *run = vcpu->run;
+	struct kvm_mmio_fragment *frag;
 	int r;
 
 	if (!(vcpu->arch.pio.count || vcpu->mmio_needed))
 		return 1;
 
 	if (vcpu->mmio_needed) {
-		vcpu->mmio_needed = 0;
+		/* Complete previous fragment */
+		frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment++];
 		if (!vcpu->mmio_is_write)
-			memcpy(vcpu->mmio_data + vcpu->mmio_index,
-			       run->mmio.data, 8);
-		vcpu->mmio_index += 8;
-		if (vcpu->mmio_index < vcpu->mmio_size) {
-			run->exit_reason = KVM_EXIT_MMIO;
-			run->mmio.phys_addr = vcpu->mmio_phys_addr + vcpu->mmio_index;
-			memcpy(run->mmio.data, vcpu->mmio_data + vcpu->mmio_index, 8);
-			run->mmio.len = min(vcpu->mmio_size - vcpu->mmio_index, 8);
-			run->mmio.is_write = vcpu->mmio_is_write;
-			vcpu->mmio_needed = 1;
-			return 0;
+			memcpy(frag->data, run->mmio.data, frag->len);
+		if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) {
+			vcpu->mmio_needed = 0;
+			if (vcpu->mmio_is_write)
+				return 1;
+			vcpu->mmio_read_completed = 1;
+			goto done;
 		}
+		/* Initiate next fragment */
+		++frag;
+		run->exit_reason = KVM_EXIT_MMIO;
+		run->mmio.phys_addr = frag->gpa;
 		if (vcpu->mmio_is_write)
-			return 1;
-		vcpu->mmio_read_completed = 1;
+			memcpy(run->mmio.data, frag->data, frag->len);
+		run->mmio.len = frag->len;
+		run->mmio.is_write = vcpu->mmio_is_write;
+		return 0;
+
 	}
+done:
 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
 	r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a2d00b1bbf54..186ffab0b9f0 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -34,6 +34,20 @@
 #define KVM_MMIO_SIZE 8
 #endif
 
+/*
+ * If we support unaligned MMIO, at most one fragment will be split into two:
+ */
+#ifdef KVM_UNALIGNED_MMIO
+#  define KVM_EXTRA_MMIO_FRAGMENTS 1
+#else
+#  define KVM_EXTRA_MMIO_FRAGMENTS 0
+#endif
+
+#define KVM_USER_MMIO_SIZE 8
+
+#define KVM_MAX_MMIO_FRAGMENTS \
+	(KVM_MMIO_SIZE / KVM_USER_MMIO_SIZE + KVM_EXTRA_MMIO_FRAGMENTS)
+
 /*
  * vcpu->requests bit members
  */
@@ -117,6 +131,16 @@ enum {
 	EXITING_GUEST_MODE
 };
 
+/*
+ * Sometimes a large or cross-page mmio needs to be broken up into separate
+ * exits for userspace servicing.
+ */
+struct kvm_mmio_fragment {
+	gpa_t gpa;
+	void *data;
+	unsigned len;
+};
+
 struct kvm_vcpu {
 	struct kvm *kvm;
 #ifdef CONFIG_PREEMPT_NOTIFIERS
@@ -144,10 +168,9 @@ struct kvm_vcpu {
 	int mmio_needed;
 	int mmio_read_completed;
 	int mmio_is_write;
-	int mmio_size;
-	int mmio_index;
-	unsigned char mmio_data[KVM_MMIO_SIZE];
-	gpa_t mmio_phys_addr;
+	int mmio_cur_fragment;
+	int mmio_nr_fragments;
+	struct kvm_mmio_fragment mmio_fragments[KVM_MAX_MMIO_FRAGMENTS];
 #endif
 
 #ifdef CONFIG_KVM_ASYNC_PF
-- 
cgit v1.3-7-g2ca7


From 4ccf4beab8c447f8cd33d46afb6e10e1aa3befc6 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Wed, 31 Aug 2011 20:35:40 +0200
Subject: lib: add support for stmp-style devices

MX23/28 use IP cores which follow a register layout I have first seen on
STMP3xxx SoCs. In this layout, every register actually has four u32:

 1.) to store a value directly
 2.) a SET register where every 1-bit sets the corresponding bit,
     others are unaffected
 3.) same with a CLR register
 4.) same with a TOG (toggle) register

Also, the 2 MSBs in register 0 are always the same and can be used to reset
the IP core.

All this is strictly speaking not mach-specific (but IP core specific) and,
thus, doesn't need to be in mach-mxs/include. At least mx6 also uses IP cores
following this stmp-style. So:

Introduce a stmp-style device, put the code and defines for that in a public
place (lib/), and let drivers for stmp-style devices select that code.
To avoid regressions and ease reviewing, the actual code is simply copied from
mach-mxs. It definately wants updates, but those need a seperate patch series.

Voila, mach dependency gone, reusable code introduced. Note that I didn't
remove the duplicated code from mach-mxs yet, first the drivers have to be
converted.

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Acked-by: Shawn Guo <shawn.guo@linaro.org>
Acked-by: Dong Aisheng <dong.aisheng@linaro.org>
---
 include/linux/stmp_device.h | 20 ++++++++++++
 lib/Kconfig                 |  3 ++
 lib/Makefile                |  2 ++
 lib/stmp_device.c           | 80 +++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 105 insertions(+)
 create mode 100644 include/linux/stmp_device.h
 create mode 100644 lib/stmp_device.c

(limited to 'include/linux')

diff --git a/include/linux/stmp_device.h b/include/linux/stmp_device.h
new file mode 100644
index 000000000000..6cf7ec9547cf
--- /dev/null
+++ b/include/linux/stmp_device.h
@@ -0,0 +1,20 @@
+/*
+ * basic functions for devices following the "stmp" style register layout
+ *
+ * Copyright (C) 2011 Wolfram Sang, Pengutronix e.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __STMP_DEVICE_H__
+#define __STMP_DEVICE_H__
+
+#define STMP_OFFSET_REG_SET	0x4
+#define STMP_OFFSET_REG_CLR	0x8
+#define STMP_OFFSET_REG_TOG	0xc
+
+extern int stmp_reset_block(void __iomem *);
+#endif /* __STMP_DEVICE_H__ */
diff --git a/lib/Kconfig b/lib/Kconfig
index 4a8aba2e5cc0..c5da1548b964 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -33,6 +33,9 @@ config GENERIC_IO
 	boolean
 	default n
 
+config STMP_DEVICE
+	bool
+
 config CRC_CCITT
 	tristate "CRC-CCITT functions"
 	help
diff --git a/lib/Makefile b/lib/Makefile
index 18515f0267c4..f78dbcdc7e3d 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -123,6 +123,8 @@ obj-$(CONFIG_SIGNATURE) += digsig.o
 
 obj-$(CONFIG_CLZ_TAB) += clz_tab.o
 
+obj-$(CONFIG_STMP_DEVICE) += stmp_device.o
+
 hostprogs-y	:= gen_crc32table
 clean-files	:= crc32table.h
 
diff --git a/lib/stmp_device.c b/lib/stmp_device.c
new file mode 100644
index 000000000000..8ac9bcc4289a
--- /dev/null
+++ b/lib/stmp_device.c
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 1999 ARM Limited
+ * Copyright (C) 2000 Deep Blue Solutions Ltd
+ * Copyright 2006-2007,2010 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Juergen Beisert, kernel@pengutronix.de
+ * Copyright 2009 Ilya Yanok, Emcraft Systems Ltd, yanok@emcraft.com
+ * Copyright (C) 2011 Wolfram Sang, Pengutronix e.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/io.h>
+#include <linux/errno.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/stmp_device.h>
+
+#define STMP_MODULE_CLKGATE	(1 << 30)
+#define STMP_MODULE_SFTRST	(1 << 31)
+
+/*
+ * Clear the bit and poll it cleared.  This is usually called with
+ * a reset address and mask being either SFTRST(bit 31) or CLKGATE
+ * (bit 30).
+ */
+static int stmp_clear_poll_bit(void __iomem *addr, u32 mask)
+{
+	int timeout = 0x400;
+
+	writel(mask, addr + STMP_OFFSET_REG_CLR);
+	udelay(1);
+	while ((readl(addr) & mask) && --timeout)
+		/* nothing */;
+
+	return !timeout;
+}
+
+int stmp_reset_block(void __iomem *reset_addr)
+{
+	int ret;
+	int timeout = 0x400;
+
+	/* clear and poll SFTRST */
+	ret = stmp_clear_poll_bit(reset_addr, STMP_MODULE_SFTRST);
+	if (unlikely(ret))
+		goto error;
+
+	/* clear CLKGATE */
+	writel(STMP_MODULE_CLKGATE, reset_addr + STMP_OFFSET_REG_CLR);
+
+	/* set SFTRST to reset the block */
+	writel(STMP_MODULE_SFTRST, reset_addr + STMP_OFFSET_REG_SET);
+	udelay(1);
+
+	/* poll CLKGATE becoming set */
+	while ((!(readl(reset_addr) & STMP_MODULE_CLKGATE)) && --timeout)
+		/* nothing */;
+	if (unlikely(!timeout))
+		goto error;
+
+	/* clear and poll SFTRST */
+	ret = stmp_clear_poll_bit(reset_addr, STMP_MODULE_SFTRST);
+	if (unlikely(ret))
+		goto error;
+
+	/* clear and poll CLKGATE */
+	ret = stmp_clear_poll_bit(reset_addr, STMP_MODULE_CLKGATE);
+	if (unlikely(ret))
+		goto error;
+
+	return 0;
+
+error:
+	pr_err("%s(%p): module reset timeout\n", __func__, reset_addr);
+	return -ETIMEDOUT;
+}
+EXPORT_SYMBOL(stmp_reset_block);
-- 
cgit v1.3-7-g2ca7


From bbbc4c4d8c5face097d695f9bf3a39647ba6b7e7 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Mon, 16 Apr 2012 19:16:54 -0400
Subject: mmc: sdio: avoid spurious calls to interrupt handlers

Commit 06e8935feb ("optimized SDIO IRQ handling for single irq")
introduced some spurious calls to SDIO function interrupt handlers,
such as when the SDIO IRQ thread is started, or the safety check
performed upon a system resume.  Let's add a flag to perform the
optimization only when a real interrupt is signaled by the host
driver and we know there is no point confirming it.

Reported-by: Sujit Reddy Thumma <sthumma@codeaurora.org>
Signed-off-by: Nicolas Pitre <nico@linaro.org>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/sdio.c     |  2 +-
 drivers/mmc/core/sdio_irq.c | 11 +++++++----
 include/linux/mmc/host.h    |  2 ++
 3 files changed, 10 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index 2c7c83f832d2..13d0e95380ab 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -947,7 +947,7 @@ static int mmc_sdio_resume(struct mmc_host *host)
 	}
 
 	if (!err && host->sdio_irqs)
-		mmc_signal_sdio_irq(host);
+		wake_up_process(host->sdio_irq_thread);
 	mmc_release_host(host);
 
 	/*
diff --git a/drivers/mmc/core/sdio_irq.c b/drivers/mmc/core/sdio_irq.c
index f573e7f9f740..3d8ceb4084de 100644
--- a/drivers/mmc/core/sdio_irq.c
+++ b/drivers/mmc/core/sdio_irq.c
@@ -28,18 +28,20 @@
 
 #include "sdio_ops.h"
 
-static int process_sdio_pending_irqs(struct mmc_card *card)
+static int process_sdio_pending_irqs(struct mmc_host *host)
 {
+	struct mmc_card *card = host->card;
 	int i, ret, count;
 	unsigned char pending;
 	struct sdio_func *func;
 
 	/*
 	 * Optimization, if there is only 1 function interrupt registered
-	 * call irq handler directly
+	 * and we know an IRQ was signaled then call irq handler directly.
+	 * Otherwise do the full probe.
 	 */
 	func = card->sdio_single_irq;
-	if (func) {
+	if (func && host->sdio_irq_pending) {
 		func->irq_handler(func);
 		return 1;
 	}
@@ -116,7 +118,8 @@ static int sdio_irq_thread(void *_host)
 		ret = __mmc_claim_host(host, &host->sdio_irq_thread_abort);
 		if (ret)
 			break;
-		ret = process_sdio_pending_irqs(host->card);
+		ret = process_sdio_pending_irqs(host);
+		host->sdio_irq_pending = false;
 		mmc_release_host(host);
 
 		/*
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index cbde4b7e675e..0707d228d7f1 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -297,6 +297,7 @@ struct mmc_host {
 
 	unsigned int		sdio_irqs;
 	struct task_struct	*sdio_irq_thread;
+	bool			sdio_irq_pending;
 	atomic_t		sdio_irq_thread_abort;
 
 	mmc_pm_flag_t		pm_flags;	/* requested pm features */
@@ -352,6 +353,7 @@ extern int mmc_cache_ctrl(struct mmc_host *, u8);
 static inline void mmc_signal_sdio_irq(struct mmc_host *host)
 {
 	host->ops->enable_sdio_irq(host, 0);
+	host->sdio_irq_pending = true;
 	wake_up_process(host->sdio_irq_thread);
 }
 
-- 
cgit v1.3-7-g2ca7


From 07975ad3b30579ca27d880491ad992326b930c63 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Thu, 29 Mar 2012 21:14:12 +0200
Subject: KVM: Introduce direct MSI message injection for in-kernel irqchips

Currently, MSI messages can only be injected to in-kernel irqchips by
defining a corresponding IRQ route for each message. This is not only
unhandy if the MSI messages are generated "on the fly" by user space,
IRQ routes are a limited resource that user space has to manage
carefully.

By providing a direct injection path, we can both avoid using up limited
resources and simplify the necessary steps for user land.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 Documentation/virtual/kvm/api.txt | 21 +++++++++++++++++++++
 arch/x86/kvm/Kconfig              |  1 +
 include/linux/kvm.h               | 11 +++++++++++
 include/linux/kvm_host.h          |  2 ++
 virt/kvm/Kconfig                  |  3 +++
 virt/kvm/irq_comm.c               | 14 ++++++++++++++
 virt/kvm/kvm_main.c               | 14 ++++++++++++++
 7 files changed, 66 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 81ff39f6248d..a1552210b16d 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1689,6 +1689,27 @@ where the guest will clear the flag: when the soft lockup watchdog timer resets
 itself or when a soft lockup is detected.  This ioctl can be called any time
 after pausing the vcpu, but before it is resumed.
 
+4.71 KVM_SIGNAL_MSI
+
+Capability: KVM_CAP_SIGNAL_MSI
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_msi (in)
+Returns: >0 on delivery, 0 if guest blocked the MSI, and -1 on error
+
+Directly inject a MSI message. Only valid with in-kernel irqchip that handles
+MSI messages.
+
+struct kvm_msi {
+	__u32 address_lo;
+	__u32 address_hi;
+	__u32 data;
+	__u32 flags;
+	__u8  pad[16];
+};
+
+No flags are defined so far. The corresponding field must be 0.
+
 5. The kvm_run structure
 
 Application code obtains a pointer to the kvm_run structure by
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 1a7fe868f375..a28f338843ea 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -36,6 +36,7 @@ config KVM
 	select TASKSTATS
 	select TASK_DELAY_ACCT
 	select PERF_EVENTS
+	select HAVE_KVM_MSI
 	---help---
 	  Support hosting fully virtualized guest machines using hardware
 	  virtualization extensions.  You will need a fairly recent
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 7a9dd4b3dede..225b452e1d1d 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -590,6 +590,7 @@ struct kvm_ppc_pvinfo {
 #define KVM_CAP_SYNC_REGS 74
 #define KVM_CAP_PCI_2_3 75
 #define KVM_CAP_KVMCLOCK_CTRL 76
+#define KVM_CAP_SIGNAL_MSI 77
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -715,6 +716,14 @@ struct kvm_one_reg {
 	__u64 addr;
 };
 
+struct kvm_msi {
+	__u32 address_lo;
+	__u32 address_hi;
+	__u32 data;
+	__u32 flags;
+	__u8  pad[16];
+};
+
 /*
  * ioctls for VM fds
  */
@@ -789,6 +798,8 @@ struct kvm_s390_ucas_mapping {
 /* Available with KVM_CAP_PCI_2_3 */
 #define KVM_ASSIGN_SET_INTX_MASK  _IOW(KVMIO,  0xa4, \
 				       struct kvm_assigned_pci_dev)
+/* Available with KVM_CAP_SIGNAL_MSI */
+#define KVM_SIGNAL_MSI            _IOW(KVMIO,  0xa5, struct kvm_msi)
 
 /*
  * ioctls for vcpu fds
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 186ffab0b9f0..6f343307d72b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -802,6 +802,8 @@ int kvm_set_irq_routing(struct kvm *kvm,
 			unsigned flags);
 void kvm_free_irq_routing(struct kvm *kvm);
 
+int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi);
+
 #else
 
 static inline void kvm_free_irq_routing(struct kvm *kvm) {}
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index f63ccb0a5982..28694f4a9139 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -18,3 +18,6 @@ config KVM_MMIO
 
 config KVM_ASYNC_PF
        bool
+
+config HAVE_KVM_MSI
+       bool
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 9f614b4e365f..a6a0365475ed 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -138,6 +138,20 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
 	return kvm_irq_delivery_to_apic(kvm, NULL, &irq);
 }
 
+int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
+{
+	struct kvm_kernel_irq_routing_entry route;
+
+	if (!irqchip_in_kernel(kvm) || msi->flags != 0)
+		return -EINVAL;
+
+	route.msi.address_lo = msi->address_lo;
+	route.msi.address_hi = msi->address_hi;
+	route.msi.data = msi->data;
+
+	return kvm_set_msi(&route, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1);
+}
+
 /*
  * Return value:
  *  < 0   Interrupt was ignored (masked or not delivered for other reasons)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 9eb7936e491d..1847c762d8d9 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2059,6 +2059,17 @@ static long kvm_vm_ioctl(struct file *filp,
 			kvm->bsp_vcpu_id = arg;
 		mutex_unlock(&kvm->lock);
 		break;
+#endif
+#ifdef CONFIG_HAVE_KVM_MSI
+	case KVM_SIGNAL_MSI: {
+		struct kvm_msi msi;
+
+		r = -EFAULT;
+		if (copy_from_user(&msi, argp, sizeof msi))
+			goto out;
+		r = kvm_send_userspace_msi(kvm, &msi);
+		break;
+	}
 #endif
 	default:
 		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
@@ -2188,6 +2199,9 @@ static long kvm_dev_ioctl_check_extension_generic(long arg)
 	case KVM_CAP_SET_BOOT_CPU_ID:
 #endif
 	case KVM_CAP_INTERNAL_ERROR_DATA:
+#ifdef CONFIG_HAVE_KVM_MSI
+	case KVM_CAP_SIGNAL_MSI:
+#endif
 		return 1;
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 	case KVM_CAP_IRQ_ROUTING:
-- 
cgit v1.3-7-g2ca7


From 822c250e154cd44cf60a4f0d647aa70abea09520 Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Tue, 27 Mar 2012 15:23:22 +0800
Subject: clk: add "const" for clk_ops of basic clks

The clk_ops of basic clks should have "const" to match the definition
in "struct clk" and clk_register prototype.

Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Mike Turquette <mturquette@linaro.org>
---
 drivers/clk/clk-divider.c    | 2 +-
 drivers/clk/clk-fixed-rate.c | 2 +-
 drivers/clk/clk-gate.c       | 2 +-
 drivers/clk/clk-mux.c        | 2 +-
 include/linux/clk-private.h  | 8 ++++----
 5 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clk/clk-divider.c b/drivers/clk/clk-divider.c
index 231cd6e89003..b1c4b02aaaf1 100644
--- a/drivers/clk/clk-divider.c
+++ b/drivers/clk/clk-divider.c
@@ -146,7 +146,7 @@ static int clk_divider_set_rate(struct clk_hw *hw, unsigned long rate)
 	return 0;
 }
 
-struct clk_ops clk_divider_ops = {
+const struct clk_ops clk_divider_ops = {
 	.recalc_rate = clk_divider_recalc_rate,
 	.round_rate = clk_divider_round_rate,
 	.set_rate = clk_divider_set_rate,
diff --git a/drivers/clk/clk-fixed-rate.c b/drivers/clk/clk-fixed-rate.c
index 651b06f49e15..027e47704de9 100644
--- a/drivers/clk/clk-fixed-rate.c
+++ b/drivers/clk/clk-fixed-rate.c
@@ -33,7 +33,7 @@ static unsigned long clk_fixed_rate_recalc_rate(struct clk_hw *hw,
 	return to_clk_fixed_rate(hw)->fixed_rate;
 }
 
-struct clk_ops clk_fixed_rate_ops = {
+const struct clk_ops clk_fixed_rate_ops = {
 	.recalc_rate = clk_fixed_rate_recalc_rate,
 };
 EXPORT_SYMBOL_GPL(clk_fixed_rate_ops);
diff --git a/drivers/clk/clk-gate.c b/drivers/clk/clk-gate.c
index b688f4775859..fe2ff9e774c2 100644
--- a/drivers/clk/clk-gate.c
+++ b/drivers/clk/clk-gate.c
@@ -98,7 +98,7 @@ static int clk_gate_is_enabled(struct clk_hw *hw)
 	return reg ? 1 : 0;
 }
 
-struct clk_ops clk_gate_ops = {
+const struct clk_ops clk_gate_ops = {
 	.enable = clk_gate_enable,
 	.disable = clk_gate_disable,
 	.is_enabled = clk_gate_is_enabled,
diff --git a/drivers/clk/clk-mux.c b/drivers/clk/clk-mux.c
index 45cad61600c9..54244889a948 100644
--- a/drivers/clk/clk-mux.c
+++ b/drivers/clk/clk-mux.c
@@ -82,7 +82,7 @@ static int clk_mux_set_parent(struct clk_hw *hw, u8 index)
 	return 0;
 }
 
-struct clk_ops clk_mux_ops = {
+const struct clk_ops clk_mux_ops = {
 	.get_parent = clk_mux_get_parent,
 	.set_parent = clk_mux_set_parent,
 };
diff --git a/include/linux/clk-private.h b/include/linux/clk-private.h
index 5e4312b6f5cc..5f4ccd7cd761 100644
--- a/include/linux/clk-private.h
+++ b/include/linux/clk-private.h
@@ -55,7 +55,7 @@ struct clk {
  * alternative macro for static initialization
  */
 
-extern struct clk_ops clk_fixed_rate_ops;
+extern const struct clk_ops clk_fixed_rate_ops;
 
 #define DEFINE_CLK_FIXED_RATE(_name, _flags, _rate,		\
 				_fixed_rate_flags)		\
@@ -78,7 +78,7 @@ extern struct clk_ops clk_fixed_rate_ops;
 		.flags = _flags,				\
 	};
 
-extern struct clk_ops clk_gate_ops;
+extern const struct clk_ops clk_gate_ops;
 
 #define DEFINE_CLK_GATE(_name, _parent_name, _parent_ptr,	\
 				_flags, _reg, _bit_idx,		\
@@ -110,7 +110,7 @@ extern struct clk_ops clk_gate_ops;
 		.flags = _flags,				\
 	};
 
-extern struct clk_ops clk_divider_ops;
+extern const struct clk_ops clk_divider_ops;
 
 #define DEFINE_CLK_DIVIDER(_name, _parent_name, _parent_ptr,	\
 				_flags, _reg, _shift, _width,	\
@@ -143,7 +143,7 @@ extern struct clk_ops clk_divider_ops;
 		.flags = _flags,				\
 	};
 
-extern struct clk_ops clk_mux_ops;
+extern const struct clk_ops clk_mux_ops;
 
 #define DEFINE_CLK_MUX(_name, _parent_names, _parents, _flags,	\
 				_reg, _shift, _width,		\
-- 
cgit v1.3-7-g2ca7


From bffad66e31fe9d94cd096f2e4de7c683e1ae32ef Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Tue, 27 Mar 2012 15:23:23 +0800
Subject: clk: declare clk_ops of basic clks in clk-provider.h

Besides the static initialization, the clk_ops of basic clks could
also be used by particular clk type being subclass of the basic clks.

For example, clk_busy_divider has the same clk_ops as clk_divider,
except it has to wait for a busy bit before return success with
.set_rate.  clk_busy_divider will somehow reuse clk_ops of clk_divider.

Since clk-provider.h is included by clk-private.h, it's safe to move
those clk_ops declaration of basic clks form  clk-private.h into
clk-provider.h, so that implementation of clks like clk_busy_divider
above do not need to include clk-private.h to access those clk_ops.

Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Mike Turquette <mturquette@linaro.org>
---
 include/linux/clk-private.h  | 8 --------
 include/linux/clk-provider.h | 4 ++++
 2 files changed, 4 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/clk-private.h b/include/linux/clk-private.h
index 5f4ccd7cd761..f19fee0190cb 100644
--- a/include/linux/clk-private.h
+++ b/include/linux/clk-private.h
@@ -55,8 +55,6 @@ struct clk {
  * alternative macro for static initialization
  */
 
-extern const struct clk_ops clk_fixed_rate_ops;
-
 #define DEFINE_CLK_FIXED_RATE(_name, _flags, _rate,		\
 				_fixed_rate_flags)		\
 	static struct clk _name;				\
@@ -78,8 +76,6 @@ extern const struct clk_ops clk_fixed_rate_ops;
 		.flags = _flags,				\
 	};
 
-extern const struct clk_ops clk_gate_ops;
-
 #define DEFINE_CLK_GATE(_name, _parent_name, _parent_ptr,	\
 				_flags, _reg, _bit_idx,		\
 				_gate_flags, _lock)		\
@@ -110,8 +106,6 @@ extern const struct clk_ops clk_gate_ops;
 		.flags = _flags,				\
 	};
 
-extern const struct clk_ops clk_divider_ops;
-
 #define DEFINE_CLK_DIVIDER(_name, _parent_name, _parent_ptr,	\
 				_flags, _reg, _shift, _width,	\
 				_divider_flags, _lock)		\
@@ -143,8 +137,6 @@ extern const struct clk_ops clk_divider_ops;
 		.flags = _flags,				\
 	};
 
-extern const struct clk_ops clk_mux_ops;
-
 #define DEFINE_CLK_MUX(_name, _parent_names, _parents, _flags,	\
 				_reg, _shift, _width,		\
 				_mux_flags, _lock)		\
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 5508897ad376..6eb8e5da788e 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -149,6 +149,7 @@ struct clk_fixed_rate {
 	u8		flags;
 };
 
+extern const struct clk_ops clk_fixed_rate_ops;
 struct clk *clk_register_fixed_rate(struct device *dev, const char *name,
 		const char *parent_name, unsigned long flags,
 		unsigned long fixed_rate);
@@ -180,6 +181,7 @@ struct clk_gate {
 
 #define CLK_GATE_SET_TO_DISABLE		BIT(0)
 
+extern const struct clk_ops clk_gate_ops;
 struct clk *clk_register_gate(struct device *dev, const char *name,
 		const char *parent_name, unsigned long flags,
 		void __iomem *reg, u8 bit_idx,
@@ -218,6 +220,7 @@ struct clk_divider {
 #define CLK_DIVIDER_ONE_BASED		BIT(0)
 #define CLK_DIVIDER_POWER_OF_TWO	BIT(1)
 
+extern const struct clk_ops clk_divider_ops;
 struct clk *clk_register_divider(struct device *dev, const char *name,
 		const char *parent_name, unsigned long flags,
 		void __iomem *reg, u8 shift, u8 width,
@@ -252,6 +255,7 @@ struct clk_mux {
 #define CLK_MUX_INDEX_ONE		BIT(0)
 #define CLK_MUX_INDEX_BIT		BIT(1)
 
+extern const struct clk_ops clk_mux_ops;
 struct clk *clk_register_mux(struct device *dev, const char *name,
 		char **parent_names, u8 num_parents, unsigned long flags,
 		void __iomem *reg, u8 shift, u8 width,
-- 
cgit v1.3-7-g2ca7


From 7e87aed965fa7a642fc299af96d370dad7b5b814 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Sun, 1 Apr 2012 15:31:23 +0100
Subject: clk: Remove comment for end of CONFIG_COMMON_CLK section

The comment is inaccurate (it actually ends the CONFIG_COMMON_CLK
section, there's no else) and given that we've just got a single level
of ifdef isn't really needed anyway.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Mike Turquette <mturquette@linaro.org>
---
 include/linux/clk.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/clk.h b/include/linux/clk.h
index b0252726df61..c9547d99e52c 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -81,7 +81,7 @@ int clk_notifier_register(struct clk *clk, struct notifier_block *nb);
 
 int clk_notifier_unregister(struct clk *clk, struct notifier_block *nb);
 
-#endif /* !CONFIG_COMMON_CLK */
+#endif
 
 /**
  * clk_get - lookup and obtain a reference to a clock producer.
-- 
cgit v1.3-7-g2ca7


From d305fb78f31209596c9135d396a0d3af7ac86947 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 21 Mar 2012 20:01:20 +0000
Subject: clk: Constify parent name arrays

Drivers should be able to declare their arrays of parent names as const
so the APIs need to accept const arguments.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
[mturquette@linaro.org: constified gate]
Signed-off-by: Mike Turquette <mturquette@linaro.org>
---
 drivers/clk/clk-mux.c        | 2 +-
 drivers/clk/clk.c            | 2 +-
 include/linux/clk-private.h  | 2 +-
 include/linux/clk-provider.h | 8 ++++----
 4 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clk/clk-mux.c b/drivers/clk/clk-mux.c
index 54244889a948..bd5e598b9f1e 100644
--- a/drivers/clk/clk-mux.c
+++ b/drivers/clk/clk-mux.c
@@ -89,7 +89,7 @@ const struct clk_ops clk_mux_ops = {
 EXPORT_SYMBOL_GPL(clk_mux_ops);
 
 struct clk *clk_register_mux(struct device *dev, const char *name,
-		char **parent_names, u8 num_parents, unsigned long flags,
+		const char **parent_names, u8 num_parents, unsigned long flags,
 		void __iomem *reg, u8 shift, u8 width,
 		u8 clk_mux_flags, spinlock_t *lock)
 {
diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index a24b121747ac..ddade8759ea9 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -1328,7 +1328,7 @@ out:
  */
 struct clk *clk_register(struct device *dev, const char *name,
 		const struct clk_ops *ops, struct clk_hw *hw,
-		char **parent_names, u8 num_parents, unsigned long flags)
+		const char **parent_names, u8 num_parents, unsigned long flags)
 {
 	struct clk *clk;
 
diff --git a/include/linux/clk-private.h b/include/linux/clk-private.h
index f19fee0190cb..e9c8b9841b16 100644
--- a/include/linux/clk-private.h
+++ b/include/linux/clk-private.h
@@ -30,7 +30,7 @@ struct clk {
 	const struct clk_ops	*ops;
 	struct clk_hw		*hw;
 	struct clk		*parent;
-	char			**parent_names;
+	const char		**parent_names;
 	struct clk		**parents;
 	u8			num_parents;
 	unsigned long		rate;
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 6eb8e5da788e..8981435f9064 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -176,7 +176,7 @@ struct clk_gate {
 	u8		bit_idx;
 	u8		flags;
 	spinlock_t	*lock;
-	char		*parent[1];
+	const char	*parent[1];
 };
 
 #define CLK_GATE_SET_TO_DISABLE		BIT(0)
@@ -214,7 +214,7 @@ struct clk_divider {
 	u8		width;
 	u8		flags;
 	spinlock_t	*lock;
-	char		*parent[1];
+	const char	*parent[1];
 };
 
 #define CLK_DIVIDER_ONE_BASED		BIT(0)
@@ -257,7 +257,7 @@ struct clk_mux {
 
 extern const struct clk_ops clk_mux_ops;
 struct clk *clk_register_mux(struct device *dev, const char *name,
-		char **parent_names, u8 num_parents, unsigned long flags,
+		const char **parent_names, u8 num_parents, unsigned long flags,
 		void __iomem *reg, u8 shift, u8 width,
 		u8 clk_mux_flags, spinlock_t *lock);
 
@@ -278,7 +278,7 @@ struct clk *clk_register_mux(struct device *dev, const char *name,
  */
 struct clk *clk_register(struct device *dev, const char *name,
 		const struct clk_ops *ops, struct clk_hw *hw,
-		char **parent_names, u8 num_parents, unsigned long flags);
+		const char **parent_names, u8 num_parents, unsigned long flags);
 
 /* helper functions */
 const char *__clk_get_name(struct clk *clk);
-- 
cgit v1.3-7-g2ca7


From d1302a36a7f1c33d1a8babc6a510e1401a5e5aed Mon Sep 17 00:00:00 2001
From: Mike Turquette <mturquette@linaro.org>
Date: Thu, 29 Mar 2012 14:30:40 -0700
Subject: clk: core: copy parent_names & return error codes

This patch cleans up clk_register and solves a few bugs by teaching
clk_register and __clk_init to return error codes (instead of just NULL)
to better align with the existing clk.h api.

Along with that change this patch also introduces a new behavior whereby
clk_register copies the parent_names array, thus allowing platforms to
declare their parent_names arrays as __initdata.

Signed-off-by: Mike Turquette <mturquette@linaro.org>
---
 drivers/clk/clk.c            | 65 ++++++++++++++++++++++++++++++++++++--------
 include/linux/clk-private.h  |  4 ++-
 include/linux/clk-provider.h |  3 +-
 3 files changed, 58 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index ddade8759ea9..8f7c3849c8f6 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -1185,34 +1185,41 @@ EXPORT_SYMBOL_GPL(clk_set_parent);
  * very large numbers of clocks that need to be statically initialized.  It is
  * a layering violation to include clk-private.h from any code which implements
  * a clock's .ops; as such any statically initialized clock data MUST be in a
- * separate C file from the logic that implements it's operations.
+ * separate C file from the logic that implements it's operations.  Returns 0
+ * on success, otherwise an error code.
  */
-void __clk_init(struct device *dev, struct clk *clk)
+int __clk_init(struct device *dev, struct clk *clk)
 {
-	int i;
+	int i, ret = 0;
 	struct clk *orphan;
 	struct hlist_node *tmp, *tmp2;
 
 	if (!clk)
-		return;
+		return -EINVAL;
 
 	mutex_lock(&prepare_lock);
 
 	/* check to see if a clock with this name is already registered */
-	if (__clk_lookup(clk->name))
+	if (__clk_lookup(clk->name)) {
+		pr_debug("%s: clk %s already initialized\n",
+				__func__, clk->name);
+		ret = -EEXIST;
 		goto out;
+	}
 
 	/* check that clk_ops are sane.  See Documentation/clk.txt */
 	if (clk->ops->set_rate &&
 			!(clk->ops->round_rate && clk->ops->recalc_rate)) {
 		pr_warning("%s: %s must implement .round_rate & .recalc_rate\n",
 				__func__, clk->name);
+		ret = -EINVAL;
 		goto out;
 	}
 
 	if (clk->ops->set_parent && !clk->ops->get_parent) {
 		pr_warning("%s: %s must implement .get_parent & .set_parent\n",
 				__func__, clk->name);
+		ret = -EINVAL;
 		goto out;
 	}
 
@@ -1308,7 +1315,7 @@ void __clk_init(struct device *dev, struct clk *clk)
 out:
 	mutex_unlock(&prepare_lock);
 
-	return;
+	return ret;
 }
 
 /**
@@ -1324,29 +1331,63 @@ out:
  * clk_register is the primary interface for populating the clock tree with new
  * clock nodes.  It returns a pointer to the newly allocated struct clk which
  * cannot be dereferenced by driver code but may be used in conjuction with the
- * rest of the clock API.
+ * rest of the clock API.  In the event of an error clk_register will return an
+ * error code; drivers must test for an error code after calling clk_register.
  */
 struct clk *clk_register(struct device *dev, const char *name,
 		const struct clk_ops *ops, struct clk_hw *hw,
 		const char **parent_names, u8 num_parents, unsigned long flags)
 {
+	int i, ret;
 	struct clk *clk;
 
 	clk = kzalloc(sizeof(*clk), GFP_KERNEL);
-	if (!clk)
-		return NULL;
+	if (!clk) {
+		pr_err("%s: could not allocate clk\n", __func__);
+		ret = -ENOMEM;
+		goto fail_out;
+	}
 
 	clk->name = name;
 	clk->ops = ops;
 	clk->hw = hw;
 	clk->flags = flags;
-	clk->parent_names = parent_names;
 	clk->num_parents = num_parents;
 	hw->clk = clk;
 
-	__clk_init(dev, clk);
+	/* allocate local copy in case parent_names is __initdata */
+	clk->parent_names = kzalloc((sizeof(char*) * num_parents),
+			GFP_KERNEL);
+
+	if (!clk->parent_names) {
+		pr_err("%s: could not allocate clk->parent_names\n", __func__);
+		ret = -ENOMEM;
+		goto fail_parent_names;
+	}
+
+
+	/* copy each string name in case parent_names is __initdata */
+	for (i = 0; i < num_parents; i++) {
+		clk->parent_names[i] = kstrdup(parent_names[i], GFP_KERNEL);
+		if (!clk->parent_names[i]) {
+			pr_err("%s: could not copy parent_names\n", __func__);
+			ret = -ENOMEM;
+			goto fail_parent_names_copy;
+		}
+	}
+
+	ret = __clk_init(dev, clk);
+	if (!ret)
+		return clk;
 
-	return clk;
+fail_parent_names_copy:
+	while (--i >= 0)
+		kfree(clk->parent_names[i]);
+	kfree(clk->parent_names);
+fail_parent_names:
+	kfree(clk);
+fail_out:
+	return ERR_PTR(ret);
 }
 EXPORT_SYMBOL_GPL(clk_register);
 
diff --git a/include/linux/clk-private.h b/include/linux/clk-private.h
index e9c8b9841b16..e7032fdd45eb 100644
--- a/include/linux/clk-private.h
+++ b/include/linux/clk-private.h
@@ -181,8 +181,10 @@ struct clk {
  *
  * It is not necessary to call clk_register if __clk_init is used directly with
  * statically initialized clock data.
+ *
+ * Returns 0 on success, otherwise an error code.
  */
-void __clk_init(struct device *dev, struct clk *clk);
+int __clk_init(struct device *dev, struct clk *clk);
 
 #endif /* CONFIG_COMMON_CLK */
 #endif /* CLK_PRIVATE_H */
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 8981435f9064..97f9fabf3be2 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -274,7 +274,8 @@ struct clk *clk_register_mux(struct device *dev, const char *name,
  * clk_register is the primary interface for populating the clock tree with new
  * clock nodes.  It returns a pointer to the newly allocated struct clk which
  * cannot be dereferenced by driver code but may be used in conjuction with the
- * rest of the clock API.
+ * rest of the clock API.  In the event of an error clk_register will return an
+ * error code; drivers must test for an error code after calling clk_register.
  */
 struct clk *clk_register(struct device *dev, const char *name,
 		const struct clk_ops *ops, struct clk_hw *hw,
-- 
cgit v1.3-7-g2ca7


From 27d545915fd49cbe18a3877d82359896e9851efb Mon Sep 17 00:00:00 2001
From: Mike Turquette <mturquette@linaro.org>
Date: Mon, 26 Mar 2012 17:51:03 -0700
Subject: clk: basic: improve parent_names & return errors

This patch is the basic clk version of 'clk: core: copy parent_names &
return error codes'.

The registration functions are changed to allow the core code to copy
the array of strings and allow platforms to declare those arrays as
__initdata.

This patch also converts all of the basic clk registration functions to
return error codes which better aligns them with the existing clk.h api.

Signed-off-by: Mike Turquette <mturquette@linaro.org>
---
 drivers/clk/clk-divider.c    | 34 +++++++++++++++++++---------------
 drivers/clk/clk-fixed-rate.c | 40 ++++++++++++++++++----------------------
 drivers/clk/clk-gate.c       | 34 +++++++++++++++++++---------------
 drivers/clk/clk-mux.c        | 10 ++++++++--
 include/linux/clk-provider.h |  2 --
 5 files changed, 64 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clk/clk-divider.c b/drivers/clk/clk-divider.c
index b1c4b02aaaf1..5fc541d017f1 100644
--- a/drivers/clk/clk-divider.c
+++ b/drivers/clk/clk-divider.c
@@ -153,6 +153,18 @@ const struct clk_ops clk_divider_ops = {
 };
 EXPORT_SYMBOL_GPL(clk_divider_ops);
 
+/**
+ * clk_register_divider - register a divider clock with the clock framework
+ * @dev: device registering this clock
+ * @name: name of this clock
+ * @parent_name: name of clock's parent
+ * @flags: framework-specific flags
+ * @reg: register address to adjust divider
+ * @shift: number of bits to shift the bitfield
+ * @width: width of the bitfield
+ * @clk_divider_flags: divider-specific flags for this clock
+ * @lock: shared register lock for this clock
+ */
 struct clk *clk_register_divider(struct device *dev, const char *name,
 		const char *parent_name, unsigned long flags,
 		void __iomem *reg, u8 shift, u8 width,
@@ -161,11 +173,11 @@ struct clk *clk_register_divider(struct device *dev, const char *name,
 	struct clk_divider *div;
 	struct clk *clk;
 
+	/* allocate the divider */
 	div = kzalloc(sizeof(struct clk_divider), GFP_KERNEL);
-
 	if (!div) {
 		pr_err("%s: could not allocate divider clk\n", __func__);
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 	}
 
 	/* struct clk_divider assignments */
@@ -175,23 +187,15 @@ struct clk *clk_register_divider(struct device *dev, const char *name,
 	div->flags = clk_divider_flags;
 	div->lock = lock;
 
-	if (parent_name) {
-		div->parent[0] = kstrdup(parent_name, GFP_KERNEL);
-		if (!div->parent[0])
-			goto out;
-	}
-
+	/* register the clock */
 	clk = clk_register(dev, name,
 			&clk_divider_ops, &div->hw,
-			div->parent,
+			(parent_name ? &parent_name: NULL),
 			(parent_name ? 1 : 0),
 			flags);
-	if (clk)
-		return clk;
 
-out:
-	kfree(div->parent[0]);
-	kfree(div);
+	if (IS_ERR(clk))
+		kfree(div);
 
-	return NULL;
+	return clk;
 }
diff --git a/drivers/clk/clk-fixed-rate.c b/drivers/clk/clk-fixed-rate.c
index 027e47704de9..b555a04c8df8 100644
--- a/drivers/clk/clk-fixed-rate.c
+++ b/drivers/clk/clk-fixed-rate.c
@@ -38,16 +38,23 @@ const struct clk_ops clk_fixed_rate_ops = {
 };
 EXPORT_SYMBOL_GPL(clk_fixed_rate_ops);
 
+/**
+ * clk_register_fixed_rate - register fixed-rate clock with the clock framework
+ * @dev: device that is registering this clock
+ * @name: name of this clock
+ * @parent_name: name of clock's parent
+ * @flags: framework-specific flags
+ * @fixed_rate: non-adjustable clock rate
+ */
 struct clk *clk_register_fixed_rate(struct device *dev, const char *name,
 		const char *parent_name, unsigned long flags,
 		unsigned long fixed_rate)
 {
 	struct clk_fixed_rate *fixed;
-	char **parent_names = NULL;
-	u8 len;
+	struct clk *clk;
 
+	/* allocate fixed-rate clock */
 	fixed = kzalloc(sizeof(struct clk_fixed_rate), GFP_KERNEL);
-
 	if (!fixed) {
 		pr_err("%s: could not allocate fixed clk\n", __func__);
 		return ERR_PTR(-ENOMEM);
@@ -56,26 +63,15 @@ struct clk *clk_register_fixed_rate(struct device *dev, const char *name,
 	/* struct clk_fixed_rate assignments */
 	fixed->fixed_rate = fixed_rate;
 
-	if (parent_name) {
-		parent_names = kmalloc(sizeof(char *), GFP_KERNEL);
-
-		if (! parent_names)
-			goto out;
-
-		len = sizeof(char) * strlen(parent_name);
-
-		parent_names[0] = kmalloc(len, GFP_KERNEL);
-
-		if (!parent_names[0])
-			goto out;
-
-		strncpy(parent_names[0], parent_name, len);
-	}
-
-out:
-	return clk_register(dev, name,
+	/* register the clock */
+	clk = clk_register(dev, name,
 			&clk_fixed_rate_ops, &fixed->hw,
-			parent_names,
+			(parent_name ? &parent_name : NULL),
 			(parent_name ? 1 : 0),
 			flags);
+
+	if (IS_ERR(clk))
+		kfree(fixed);
+
+	return clk;
 }
diff --git a/drivers/clk/clk-gate.c b/drivers/clk/clk-gate.c
index fe2ff9e774c2..42a4b941b6e7 100644
--- a/drivers/clk/clk-gate.c
+++ b/drivers/clk/clk-gate.c
@@ -105,6 +105,17 @@ const struct clk_ops clk_gate_ops = {
 };
 EXPORT_SYMBOL_GPL(clk_gate_ops);
 
+/**
+ * clk_register_gate - register a gate clock with the clock framework
+ * @dev: device that is registering this clock
+ * @name: name of this clock
+ * @parent_name: name of this clock's parent
+ * @flags: framework-specific flags for this clock
+ * @reg: register address to control gating of this clock
+ * @bit_idx: which bit in the register controls gating of this clock
+ * @clk_gate_flags: gate-specific flags for this clock
+ * @lock: shared register lock for this clock
+ */
 struct clk *clk_register_gate(struct device *dev, const char *name,
 		const char *parent_name, unsigned long flags,
 		void __iomem *reg, u8 bit_idx,
@@ -113,11 +124,11 @@ struct clk *clk_register_gate(struct device *dev, const char *name,
 	struct clk_gate *gate;
 	struct clk *clk;
 
+	/* allocate the gate */
 	gate = kzalloc(sizeof(struct clk_gate), GFP_KERNEL);
-
 	if (!gate) {
 		pr_err("%s: could not allocate gated clk\n", __func__);
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 	}
 
 	/* struct clk_gate assignments */
@@ -126,22 +137,15 @@ struct clk *clk_register_gate(struct device *dev, const char *name,
 	gate->flags = clk_gate_flags;
 	gate->lock = lock;
 
-	if (parent_name) {
-		gate->parent[0] = kstrdup(parent_name, GFP_KERNEL);
-		if (!gate->parent[0])
-			goto out;
-	}
-
+	/* register the clock */
 	clk = clk_register(dev, name,
 			&clk_gate_ops, &gate->hw,
-			gate->parent,
+			(parent_name ? &parent_name : NULL),
 			(parent_name ? 1 : 0),
 			flags);
-	if (clk)
-		return clk;
-out:
-	kfree(gate->parent[0]);
-	kfree(gate);
 
-	return NULL;
+	if (IS_ERR(clk))
+		kfree(gate);
+
+	return clk;
 }
diff --git a/drivers/clk/clk-mux.c b/drivers/clk/clk-mux.c
index bd5e598b9f1e..6e58f11ab81f 100644
--- a/drivers/clk/clk-mux.c
+++ b/drivers/clk/clk-mux.c
@@ -94,9 +94,10 @@ struct clk *clk_register_mux(struct device *dev, const char *name,
 		u8 clk_mux_flags, spinlock_t *lock)
 {
 	struct clk_mux *mux;
+	struct clk *clk;
 
+	/* allocate the mux */
 	mux = kzalloc(sizeof(struct clk_mux), GFP_KERNEL);
-
 	if (!mux) {
 		pr_err("%s: could not allocate mux clk\n", __func__);
 		return ERR_PTR(-ENOMEM);
@@ -109,6 +110,11 @@ struct clk *clk_register_mux(struct device *dev, const char *name,
 	mux->flags = clk_mux_flags;
 	mux->lock = lock;
 
-	return clk_register(dev, name, &clk_mux_ops, &mux->hw,
+	clk = clk_register(dev, name, &clk_mux_ops, &mux->hw,
 			parent_names, num_parents, flags);
+
+	if (IS_ERR(clk))
+		kfree(mux);
+
+	return clk;
 }
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 97f9fabf3be2..3323d24a7be4 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -176,7 +176,6 @@ struct clk_gate {
 	u8		bit_idx;
 	u8		flags;
 	spinlock_t	*lock;
-	const char	*parent[1];
 };
 
 #define CLK_GATE_SET_TO_DISABLE		BIT(0)
@@ -214,7 +213,6 @@ struct clk_divider {
 	u8		width;
 	u8		flags;
 	spinlock_t	*lock;
-	const char	*parent[1];
 };
 
 #define CLK_DIVIDER_ONE_BASED		BIT(0)
-- 
cgit v1.3-7-g2ca7


From 1c0035d710dd3bfa86d58f851b8737c7f11a9bbc Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Thu, 12 Apr 2012 20:50:18 +0800
Subject: clk: pass parent_rate into .set_rate

For most of .set_rate implementation, parent_rate will be used, so just
like passing parent_rate into .recalc_rate, let's pass parent_rate into
.set_rate too.

It also updates the kernel doc for .set_rate ops.

Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Mike Turquette <mturquette@linaro.org>
---
 drivers/clk/clk-divider.c    |  5 +++--
 drivers/clk/clk.c            |  2 +-
 include/linux/clk-provider.h | 21 +++++++--------------
 3 files changed, 11 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clk/clk-divider.c b/drivers/clk/clk-divider.c
index 03b127c0313b..90627e4069af 100644
--- a/drivers/clk/clk-divider.c
+++ b/drivers/clk/clk-divider.c
@@ -111,14 +111,15 @@ static long clk_divider_round_rate(struct clk_hw *hw, unsigned long rate,
 	return *prate / div;
 }
 
-static int clk_divider_set_rate(struct clk_hw *hw, unsigned long rate)
+static int clk_divider_set_rate(struct clk_hw *hw, unsigned long rate,
+				unsigned long parent_rate)
 {
 	struct clk_divider *divider = to_clk_divider(hw);
 	unsigned int div;
 	unsigned long flags = 0;
 	u32 val;
 
-	div = __clk_get_rate(__clk_get_parent(hw->clk)) / rate;
+	div = parent_rate / rate;
 
 	if (!(divider->flags & CLK_DIVIDER_ONE_BASED))
 		div--;
diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index 1ab4f7e5c7ef..62ecac53b0a2 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -848,7 +848,7 @@ static void clk_change_rate(struct clk *clk)
 	old_rate = clk->rate;
 
 	if (clk->ops->set_rate)
-		clk->ops->set_rate(clk->hw, clk->new_rate);
+		clk->ops->set_rate(clk->hw, clk->new_rate, clk->parent->rate);
 
 	if (clk->ops->recalc_rate)
 		clk->rate = clk->ops->recalc_rate(clk->hw,
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 3323d24a7be4..cb82918d8fe0 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -88,19 +88,11 @@ struct clk_hw {
  * 		array index into the value programmed into the hardware.
  * 		Returns 0 on success, -EERROR otherwise.
  *
- * @set_rate:	Change the rate of this clock. If this callback returns
- * 		CLK_SET_RATE_PARENT, the rate change will be propagated to the
- * 		parent clock (which may propagate again if the parent clock
- * 		also sets this flag). The requested rate of the parent is
- * 		passed back from the callback in the second 'unsigned long *'
- * 		argument.  Note that it is up to the hardware clock's set_rate
- * 		implementation to insure that clocks do not run out of spec
- * 		when propgating the call to set_rate up to the parent.  One way
- * 		to do this is to gate the clock (via clk_disable and/or
- * 		clk_unprepare) before calling clk_set_rate, then ungating it
- * 		afterward.  If your clock also has the CLK_GATE_SET_RATE flag
- * 		set then this will insure safety.  Returns 0 on success,
- * 		-EERROR otherwise.
+ * @set_rate:	Change the rate of this clock. The requested rate is specified
+ *		by the second argument, which should typically be the return
+ *		of .round_rate call.  The third argument gives the parent rate
+ *		which is likely helpful for most .set_rate implementation.
+ *		Returns 0 on success, -EERROR otherwise.
  *
  * The clk_enable/clk_disable and clk_prepare/clk_unprepare pairs allow
  * implementations to split any work between atomic (enable) and sleepable
@@ -125,7 +117,8 @@ struct clk_ops {
 					unsigned long *);
 	int		(*set_parent)(struct clk_hw *hw, u8 index);
 	u8		(*get_parent)(struct clk_hw *hw);
-	int		(*set_rate)(struct clk_hw *hw, unsigned long);
+	int		(*set_rate)(struct clk_hw *hw, unsigned long,
+				    unsigned long);
 	void		(*init)(struct clk_hw *hw);
 };
 
-- 
cgit v1.3-7-g2ca7


From 1f73f31ad6e37df0679f6842b7405d96515ec8b1 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@st.com>
Date: Tue, 17 Apr 2012 16:45:35 +0530
Subject: clk: Fix typo in comment

CLK_MUX_INDEX_BIT is mistakenly written as CLK_MUX_INDEX_BITWISE in comment. Fix
it.

CLK_GATE_SET_TO_DISABLE is mistakenly written as CLK_GATE_SET_DISABLE in
comment. Fix it.

Signed-off-by: Viresh Kumar <viresh.kumar@st.com>
Signed-off-by: Mike Turquette <mturquette@linaro.org>
---
 include/linux/clk-provider.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index cb82918d8fe0..8f2148942b87 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -159,7 +159,7 @@ struct clk *clk_register_fixed_rate(struct device *dev, const char *name,
  * Clock which can gate its output.  Implements .enable & .disable
  *
  * Flags:
- * CLK_GATE_SET_DISABLE - by default this clock sets the bit at bit_idx to
+ * CLK_GATE_SET_TO_DISABLE - by default this clock sets the bit at bit_idx to
  * 	enable the clock.  Setting this flag does the opposite: setting the bit
  * 	disable the clock and clearing it enables the clock
  */
@@ -232,7 +232,7 @@ struct clk *clk_register_divider(struct device *dev, const char *name,
  *
  * Flags:
  * CLK_MUX_INDEX_ONE - register index starts at 1, not 0
- * CLK_MUX_INDEX_BITWISE - register index is a single bit (power of two)
+ * CLK_MUX_INDEX_BIT - register index is a single bit (power of two)
  */
 struct clk_mux {
 	struct clk_hw	hw;
-- 
cgit v1.3-7-g2ca7


From 182f9e8cd5e451911a37f121f942409205ede0d6 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@st.com>
Date: Tue, 17 Apr 2012 16:45:36 +0530
Subject: clk: clk-private: Add DEFINE_CLK macro

All macros used for creating different kind of clocks have similar code for
initializing struct clk. This patch removes those redundant lines and create
another macro DEFINE_CLK.

Signed-off-by: Viresh Kumar <viresh.kumar@st.com>
Signed-off-by: Mike Turquette <mturquette@linaro.org>
---
 include/linux/clk-private.h | 59 +++++++++++++++------------------------------
 1 file changed, 20 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/clk-private.h b/include/linux/clk-private.h
index e7032fdd45eb..eeae7a3cfc45 100644
--- a/include/linux/clk-private.h
+++ b/include/linux/clk-private.h
@@ -55,6 +55,18 @@ struct clk {
  * alternative macro for static initialization
  */
 
+#define DEFINE_CLK(_name, _ops, _flags, _parent_names,		\
+		_parents)					\
+	static struct clk _name = {				\
+		.name = #_name,					\
+		.ops = &_ops,					\
+		.hw = &_name##_hw.hw,				\
+		.parent_names = _parent_names,			\
+		.num_parents = ARRAY_SIZE(_parent_names),	\
+		.parents = _parents,				\
+		.flags = _flags,				\
+	}
+
 #define DEFINE_CLK_FIXED_RATE(_name, _flags, _rate,		\
 				_fixed_rate_flags)		\
 	static struct clk _name;				\
@@ -66,15 +78,8 @@ struct clk {
 		.fixed_rate = _rate,				\
 		.flags = _fixed_rate_flags,			\
 	};							\
-	static struct clk _name = {				\
-		.name = #_name,					\
-		.ops = &clk_fixed_rate_ops,			\
-		.hw = &_name##_hw.hw,				\
-		.parent_names = _name##_parent_names,		\
-		.num_parents =					\
-			ARRAY_SIZE(_name##_parent_names),	\
-		.flags = _flags,				\
-	};
+	DEFINE_CLK(_name, clk_fixed_rate_ops, _flags,		\
+			_name##_parent_names, NULL);
 
 #define DEFINE_CLK_GATE(_name, _parent_name, _parent_ptr,	\
 				_flags, _reg, _bit_idx,		\
@@ -95,16 +100,8 @@ struct clk {
 		.flags = _gate_flags,				\
 		.lock = _lock,					\
 	};							\
-	static struct clk _name = {				\
-		.name = #_name,					\
-		.ops = &clk_gate_ops,				\
-		.hw = &_name##_hw.hw,				\
-		.parent_names = _name##_parent_names,		\
-		.num_parents =					\
-			ARRAY_SIZE(_name##_parent_names),	\
-		.parents = _name##_parents,			\
-		.flags = _flags,				\
-	};
+	DEFINE_CLK(_name, clk_gate_ops, _flags,			\
+			_name##_parent_names, _name##_parents);
 
 #define DEFINE_CLK_DIVIDER(_name, _parent_name, _parent_ptr,	\
 				_flags, _reg, _shift, _width,	\
@@ -126,16 +123,8 @@ struct clk {
 		.flags = _divider_flags,			\
 		.lock = _lock,					\
 	};							\
-	static struct clk _name = {				\
-		.name = #_name,					\
-		.ops = &clk_divider_ops,			\
-		.hw = &_name##_hw.hw,				\
-		.parent_names = _name##_parent_names,		\
-		.num_parents =					\
-			ARRAY_SIZE(_name##_parent_names),	\
-		.parents = _name##_parents,			\
-		.flags = _flags,				\
-	};
+	DEFINE_CLK(_name, clk_divider_ops, _flags,		\
+			_name##_parent_names, _name##_parents);
 
 #define DEFINE_CLK_MUX(_name, _parent_names, _parents, _flags,	\
 				_reg, _shift, _width,		\
@@ -151,16 +140,8 @@ struct clk {
 		.flags = _mux_flags,				\
 		.lock = _lock,					\
 	};							\
-	static struct clk _name = {				\
-		.name = #_name,					\
-		.ops = &clk_mux_ops,				\
-		.hw = &_name##_hw.hw,				\
-		.parent_names = _parent_names,			\
-		.num_parents =					\
-			ARRAY_SIZE(_parent_names),		\
-		.parents = _parents,				\
-		.flags = _flags,				\
-	};
+	DEFINE_CLK(_name, clk_mux_ops, _flags, _parent_names,	\
+			_parents);
 
 /**
  * __clk_init - initialize the data structures in a struct clk
-- 
cgit v1.3-7-g2ca7


From 8b7730ddff5affd623bed2affa0d0fa47ebbad3b Mon Sep 17 00:00:00 2001
From: Rob Herring <rob.herring@calxeda.com>
Date: Mon, 9 Apr 2012 15:24:59 -0500
Subject: clk: remove trailing whitespace from clk.h

Remove trailing whitespace from 2 lines.

Signed-off-by: Rob Herring <rob.herring@calxeda.com>
---
 include/linux/clk.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/clk.h b/include/linux/clk.h
index c9547d99e52c..0e078bdec09f 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -220,7 +220,7 @@ void clk_put(struct clk *clk);
  * Returns rounded clock rate in Hz, or negative errno.
  */
 long clk_round_rate(struct clk *clk, unsigned long rate);
- 
+
 /**
  * clk_set_rate - set the clock rate for a clock source
  * @clk: clock source
@@ -229,7 +229,7 @@ long clk_round_rate(struct clk *clk, unsigned long rate);
  * Returns success (0) or negative errno.
  */
 int clk_set_rate(struct clk *clk, unsigned long rate);
- 
+
 /**
  * clk_set_parent - set the parent clock source for this clock
  * @clk: clock source
-- 
cgit v1.3-7-g2ca7


From affa115ed365d646ad1a8cc7d2d063b8181cce37 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Thu, 12 Apr 2012 09:01:49 +0200
Subject: dma/amba-pl08x: add support for the Nomadik variant

The Nomadik PL080 variant has some extra protection bits that
may be set, so we need to check these bits to see if the
channels are actually available for the DMAengine to use.

Cc: Russell King <linux@arm.linux.org.uk>
Cc: Alim Akhtar <alim.akhtar@gmail.com>
Cc: Alessandro Rubini <rubini@gnudd.com>
Reviewed-by: Viresh Kumar <viresh.kumar@st.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 arch/arm/include/asm/hardware/pl080.h |  2 ++
 drivers/dma/amba-pl08x.c              | 44 +++++++++++++++++++++++++++++------
 include/linux/amba/pl08x.h            |  3 +++
 3 files changed, 42 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/include/asm/hardware/pl080.h b/arch/arm/include/asm/hardware/pl080.h
index 33c78d7af2e1..4eea2107214b 100644
--- a/arch/arm/include/asm/hardware/pl080.h
+++ b/arch/arm/include/asm/hardware/pl080.h
@@ -102,6 +102,8 @@
 #define PL080_WIDTH_16BIT			(0x1)
 #define PL080_WIDTH_32BIT			(0x2)
 
+#define PL080N_CONFIG_ITPROT			(1 << 20)
+#define PL080N_CONFIG_SECPROT			(1 << 19)
 #define PL080_CONFIG_HALT			(1 << 18)
 #define PL080_CONFIG_ACTIVE			(1 << 17)  /* RO */
 #define PL080_CONFIG_LOCK			(1 << 16)
diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
index 08589c683e2b..629250e36d3b 100644
--- a/drivers/dma/amba-pl08x.c
+++ b/drivers/dma/amba-pl08x.c
@@ -95,10 +95,14 @@ static struct amba_driver pl08x_amba_driver;
  * struct vendor_data - vendor-specific config parameters for PL08x derivatives
  * @channels: the number of channels available in this variant
  * @dualmaster: whether this version supports dual AHB masters or not.
+ * @nomadik: whether the channels have Nomadik security extension bits
+ *	that need to be checked for permission before use and some registers are
+ *	missing
  */
 struct vendor_data {
 	u8 channels;
 	bool dualmaster;
+	bool nomadik;
 };
 
 /*
@@ -385,7 +389,7 @@ pl08x_get_phy_channel(struct pl08x_driver_data *pl08x,
 
 		spin_lock_irqsave(&ch->lock, flags);
 
-		if (!ch->serving) {
+		if (!ch->locked && !ch->serving) {
 			ch->serving = virt_chan;
 			ch->signal = -1;
 			spin_unlock_irqrestore(&ch->lock, flags);
@@ -1483,6 +1487,9 @@ bool pl08x_filter_id(struct dma_chan *chan, void *chan_id)
  */
 static void pl08x_ensure_on(struct pl08x_driver_data *pl08x)
 {
+	/* The Nomadik variant does not have the config register */
+	if (pl08x->vd->nomadik)
+		return;
 	writel(PL080_CONFIG_ENABLE, pl08x->base + PL080_CONFIG);
 }
 
@@ -1772,8 +1779,10 @@ static int pl08x_debugfs_show(struct seq_file *s, void *data)
 		spin_lock_irqsave(&ch->lock, flags);
 		virt_chan = ch->serving;
 
-		seq_printf(s, "%d\t\t%s\n",
-			   ch->id, virt_chan ? virt_chan->name : "(none)");
+		seq_printf(s, "%d\t\t%s%s\n",
+			   ch->id,
+			   virt_chan ? virt_chan->name : "(none)",
+			   ch->locked ? " LOCKED" : "");
 
 		spin_unlock_irqrestore(&ch->lock, flags);
 	}
@@ -1917,7 +1926,7 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
 	}
 
 	/* Initialize physical channels */
-	pl08x->phy_chans = kmalloc((vd->channels * sizeof(*pl08x->phy_chans)),
+	pl08x->phy_chans = kzalloc((vd->channels * sizeof(*pl08x->phy_chans)),
 			GFP_KERNEL);
 	if (!pl08x->phy_chans) {
 		dev_err(&adev->dev, "%s failed to allocate "
@@ -1932,8 +1941,23 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
 		ch->id = i;
 		ch->base = pl08x->base + PL080_Cx_BASE(i);
 		spin_lock_init(&ch->lock);
-		ch->serving = NULL;
 		ch->signal = -1;
+
+		/*
+		 * Nomadik variants can have channels that are locked
+		 * down for the secure world only. Lock up these channels
+		 * by perpetually serving a dummy virtual channel.
+		 */
+		if (vd->nomadik) {
+			u32 val;
+
+			val = readl(ch->base + PL080_CH_CONFIG);
+			if (val & (PL080N_CONFIG_ITPROT | PL080N_CONFIG_SECPROT)) {
+				dev_info(&adev->dev, "physical channel %d reserved for secure access only\n", i);
+				ch->locked = true;
+			}
+		}
+
 		dev_dbg(&adev->dev, "physical channel %d is %s\n",
 			i, pl08x_phy_channel_busy(ch) ? "BUSY" : "FREE");
 	}
@@ -2016,6 +2040,12 @@ static struct vendor_data vendor_pl080 = {
 	.dualmaster = true,
 };
 
+static struct vendor_data vendor_nomadik = {
+	.channels = 8,
+	.dualmaster = true,
+	.nomadik = true,
+};
+
 static struct vendor_data vendor_pl081 = {
 	.channels = 2,
 	.dualmaster = false,
@@ -2036,9 +2066,9 @@ static struct amba_id pl08x_ids[] = {
 	},
 	/* Nomadik 8815 PL080 variant */
 	{
-		.id	= 0x00280880,
+		.id	= 0x00280080,
 		.mask	= 0x00ffffff,
-		.data	= &vendor_pl080,
+		.data	= &vendor_nomadik,
 	},
 	{ 0, 0 },
 };
diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h
index e64ce2cfee99..02549017212a 100644
--- a/include/linux/amba/pl08x.h
+++ b/include/linux/amba/pl08x.h
@@ -92,6 +92,8 @@ struct pl08x_bus_data {
  * right now
  * @serving: the virtual channel currently being served by this physical
  * channel
+ * @locked: channel unavailable for the system, e.g. dedicated to secure
+ * world
  */
 struct pl08x_phy_chan {
 	unsigned int id;
@@ -99,6 +101,7 @@ struct pl08x_phy_chan {
 	spinlock_t lock;
 	int signal;
 	struct pl08x_dma_chan *serving;
+	bool locked;
 };
 
 /**
-- 
cgit v1.3-7-g2ca7


From 4cd9069a0a0e5fb8b007425c937642682ac96c76 Mon Sep 17 00:00:00 2001
From: Richard Kennedy <richard@rsk.demon.co.uk>
Date: Wed, 25 Apr 2012 14:53:05 +0100
Subject: fs: remove 8 bytes of padding from struct writeback_control on 64 bit
 builds

Reorder structure writeback_control to remove 8 bytes of padding on 64
bit builds, this shrinks its size from 48 to 40 bytes.

This structure is always on the stack and uses C99 named initialisation,
so should be safe and have a small impact on stack usage.

Signed-off-by: Richard Kennedy <richard@rsk.demon.co.uk>
Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
---
 include/linux/writeback.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index a2b84f598e2b..3309736ff059 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -58,7 +58,6 @@ extern const char *wb_reason_name[];
  * in a manner such that unspecified fields are set to zero.
  */
 struct writeback_control {
-	enum writeback_sync_modes sync_mode;
 	long nr_to_write;		/* Write this many pages, and decrement
 					   this for each page written */
 	long pages_skipped;		/* Pages which were not written */
@@ -71,6 +70,8 @@ struct writeback_control {
 	loff_t range_start;
 	loff_t range_end;
 
+	enum writeback_sync_modes sync_mode;
+
 	unsigned for_kupdate:1;		/* A kupdate writeback */
 	unsigned for_background:1;	/* A background writeback */
 	unsigned tagged_writepages:1;	/* tag-and-write to avoid livelock */
-- 
cgit v1.3-7-g2ca7


From 0b7c01533aa9f4a228d07d2768d084acb3a387bc Mon Sep 17 00:00:00 2001
From: Fred Isaman <iisaman@netapp.com>
Date: Fri, 20 Apr 2012 14:47:39 -0400
Subject: NFS: add a struct nfs_commit_data to replace nfs_write_data in
 commits

Commits don't need the vectors of pages, etc. that writes do. Split out
a separate structure for the commit operation.

Signed-off-by: Fred Isaman <iisaman@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c         | 17 ++++++-------
 fs/nfs/internal.h       | 13 +++++-----
 fs/nfs/nfs3proc.c       | 10 ++++++--
 fs/nfs/nfs3xdr.c        |  6 ++---
 fs/nfs/nfs4filelayout.c | 65 ++++++++++++++++++++++++++++++++++---------------
 fs/nfs/nfs4proc.c       | 23 ++++++++++++-----
 fs/nfs/nfs4xdr.c        |  8 +++---
 fs/nfs/proc.c           |  8 +++++-
 fs/nfs/write.c          | 50 ++++++++++++++++++++++---------------
 include/linux/nfs_fs.h  |  4 +--
 include/linux/nfs_xdr.h | 45 +++++++++++++++++++++++++++++++---
 11 files changed, 173 insertions(+), 76 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 8a8942326758..5897dfe48118 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -82,7 +82,7 @@ struct nfs_direct_req {
 
 	/* commit state */
 	struct list_head	rewrite_list;	/* saved nfs_write_data structs */
-	struct nfs_write_data *	commit_data;	/* special write_data for commits */
+	struct nfs_commit_data *commit_data;	/* special write_data for commits */
 	int			flags;
 #define NFS_ODIRECT_DO_COMMIT		(1)	/* an unstable reply was received */
 #define NFS_ODIRECT_RESCHED_WRITES	(2)	/* write verification failed */
@@ -524,7 +524,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
 
 static void nfs_direct_commit_result(struct rpc_task *task, void *calldata)
 {
-	struct nfs_write_data *data = calldata;
+	struct nfs_commit_data *data = calldata;
 
 	/* Call the NFS version-specific code */
 	NFS_PROTO(data->inode)->commit_done(task, data);
@@ -532,8 +532,8 @@ static void nfs_direct_commit_result(struct rpc_task *task, void *calldata)
 
 static void nfs_direct_commit_release(void *calldata)
 {
-	struct nfs_write_data *data = calldata;
-	struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
+	struct nfs_commit_data *data = calldata;
+	struct nfs_direct_req *dreq = data->dreq;
 	int status = data->task.tk_status;
 
 	if (status < 0) {
@@ -551,14 +551,14 @@ static void nfs_direct_commit_release(void *calldata)
 }
 
 static const struct rpc_call_ops nfs_commit_direct_ops = {
-	.rpc_call_prepare = nfs_write_prepare,
+	.rpc_call_prepare = nfs_commit_prepare,
 	.rpc_call_done = nfs_direct_commit_result,
 	.rpc_release = nfs_direct_commit_release,
 };
 
 static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
 {
-	struct nfs_write_data *data = dreq->commit_data;
+	struct nfs_commit_data *data = dreq->commit_data;
 	struct rpc_task *task;
 	struct rpc_message msg = {
 		.rpc_argp = &data->args,
@@ -581,9 +581,6 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
 	data->args.fh = NFS_FH(data->inode);
 	data->args.offset = 0;
 	data->args.count = 0;
-	data->args.context = dreq->ctx;
-	data->args.lock_context = dreq->l_ctx;
-	data->res.count = 0;
 	data->res.fattr = &data->fattr;
 	data->res.verf = &data->verf;
 	nfs_fattr_init(&data->fattr);
@@ -625,7 +622,7 @@ static void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
 {
 	dreq->commit_data = nfs_commitdata_alloc();
 	if (dreq->commit_data != NULL)
-		dreq->commit_data->req = (struct nfs_page *) dreq;
+		dreq->commit_data->dreq = dreq;
 }
 #else
 static inline void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index b777bdaba4c5..29ab441b22b1 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -314,24 +314,25 @@ extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
 				  struct inode *inode, int ioflags);
 extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
 extern void nfs_writedata_release(struct nfs_write_data *wdata);
-extern void nfs_commit_free(struct nfs_write_data *p);
+extern void nfs_commit_free(struct nfs_commit_data *p);
 extern int nfs_initiate_write(struct nfs_write_data *data,
 			      struct rpc_clnt *clnt,
 			      const struct rpc_call_ops *call_ops,
 			      int how);
 extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
-extern int nfs_initiate_commit(struct nfs_write_data *data,
-			       struct rpc_clnt *clnt,
+extern void nfs_commit_prepare(struct rpc_task *task, void *calldata);
+extern int nfs_initiate_commit(struct rpc_clnt *clnt,
+			       struct nfs_commit_data *data,
 			       const struct rpc_call_ops *call_ops,
 			       int how);
-extern void nfs_init_commit(struct nfs_write_data *data,
+extern void nfs_init_commit(struct nfs_commit_data *data,
 			    struct list_head *head,
 			    struct pnfs_layout_segment *lseg);
 void nfs_retry_commit(struct list_head *page_list,
 		      struct pnfs_layout_segment *lseg);
 void nfs_commit_clear_lock(struct nfs_inode *nfsi);
-void nfs_commitdata_release(void *data);
-void nfs_commit_release_pages(struct nfs_write_data *data);
+void nfs_commitdata_release(struct nfs_commit_data *data);
+void nfs_commit_release_pages(struct nfs_commit_data *data);
 void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head);
 void nfs_request_remove_commit_list(struct nfs_page *req);
 
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 5242eae6711a..b1daca7f0f7b 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -848,7 +848,12 @@ static void nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_
 	rpc_call_start(task);
 }
 
-static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data)
+static void nfs3_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
+{
+	rpc_call_start(task);
+}
+
+static int nfs3_commit_done(struct rpc_task *task, struct nfs_commit_data *data)
 {
 	if (nfs3_async_handle_jukebox(task, data->inode))
 		return -EAGAIN;
@@ -856,7 +861,7 @@ static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data)
 	return 0;
 }
 
-static void nfs3_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg)
+static void nfs3_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg)
 {
 	msg->rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT];
 }
@@ -907,6 +912,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
 	.write_rpc_prepare = nfs3_proc_write_rpc_prepare,
 	.write_done	= nfs3_write_done,
 	.commit_setup	= nfs3_proc_commit_setup,
+	.commit_rpc_prepare = nfs3_proc_commit_rpc_prepare,
 	.commit_done	= nfs3_commit_done,
 	.lock		= nfs3_proc_lock,
 	.clear_acl_cache = nfs3_forget_cached_acls,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index a77cc9a3ce55..01e53e94f53d 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -1287,7 +1287,7 @@ static void nfs3_xdr_enc_readdirplus3args(struct rpc_rqst *req,
  *	};
  */
 static void encode_commit3args(struct xdr_stream *xdr,
-			       const struct nfs_writeargs *args)
+			       const struct nfs_commitargs *args)
 {
 	__be32 *p;
 
@@ -1300,7 +1300,7 @@ static void encode_commit3args(struct xdr_stream *xdr,
 
 static void nfs3_xdr_enc_commit3args(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     const struct nfs_writeargs *args)
+				     const struct nfs_commitargs *args)
 {
 	encode_commit3args(xdr, args);
 }
@@ -2319,7 +2319,7 @@ out_status:
  */
 static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   struct nfs_writeres *result)
+				   struct nfs_commitres *result)
 {
 	enum nfs_stat status;
 	int error;
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 15aeba20d57d..675ce3b8663c 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -250,7 +250,7 @@ static int filelayout_write_done_cb(struct rpc_task *task,
 }
 
 /* Fake up some data that will cause nfs_commit_release to retry the writes. */
-static void prepare_to_resend_writes(struct nfs_write_data *data)
+static void prepare_to_resend_writes(struct nfs_commit_data *data)
 {
 	struct nfs_page *first = nfs_list_entry(data->pages.next);
 
@@ -261,11 +261,11 @@ static void prepare_to_resend_writes(struct nfs_write_data *data)
 }
 
 static int filelayout_commit_done_cb(struct rpc_task *task,
-				     struct nfs_write_data *data)
+				     struct nfs_commit_data *data)
 {
 	int reset = 0;
 
-	if (filelayout_async_handle_error(task, data->args.context->state,
+	if (filelayout_async_handle_error(task, data->context->state,
 					  data->ds_clp, &reset) == -EAGAIN) {
 		dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
 			__func__, data->ds_clp, data->ds_clp->cl_session);
@@ -315,15 +315,42 @@ static void filelayout_write_release(void *data)
 	wdata->mds_ops->rpc_release(data);
 }
 
-static void filelayout_commit_release(void *data)
+static void filelayout_commit_prepare(struct rpc_task *task, void *data)
 {
-	struct nfs_write_data *wdata = (struct nfs_write_data *)data;
+	struct nfs_commit_data *wdata = data;
 
-	nfs_commit_release_pages(wdata);
-	if (atomic_dec_and_test(&NFS_I(wdata->inode)->commits_outstanding))
-		nfs_commit_clear_lock(NFS_I(wdata->inode));
-	put_lseg(wdata->lseg);
-	nfs_commitdata_release(wdata);
+	if (nfs41_setup_sequence(wdata->ds_clp->cl_session,
+				&wdata->args.seq_args, &wdata->res.seq_res,
+				task))
+		return;
+
+	rpc_call_start(task);
+}
+
+static void filelayout_write_commit_done(struct rpc_task *task, void *data)
+{
+	struct nfs_commit_data *wdata = data;
+
+	/* Note this may cause RPC to be resent */
+	wdata->mds_ops->rpc_call_done(task, data);
+}
+
+static void filelayout_commit_count_stats(struct rpc_task *task, void *data)
+{
+	struct nfs_commit_data *cdata = data;
+
+	rpc_count_iostats(task, NFS_SERVER(cdata->inode)->client->cl_metrics);
+}
+
+static void filelayout_commit_release(void *calldata)
+{
+	struct nfs_commit_data *data = calldata;
+
+	nfs_commit_release_pages(data);
+	if (atomic_dec_and_test(&NFS_I(data->inode)->commits_outstanding))
+		nfs_commit_clear_lock(NFS_I(data->inode));
+	put_lseg(data->lseg);
+	nfs_commitdata_release(data);
 }
 
 static const struct rpc_call_ops filelayout_read_call_ops = {
@@ -341,9 +368,9 @@ static const struct rpc_call_ops filelayout_write_call_ops = {
 };
 
 static const struct rpc_call_ops filelayout_commit_call_ops = {
-	.rpc_call_prepare = filelayout_write_prepare,
-	.rpc_call_done = filelayout_write_call_done,
-	.rpc_count_stats = filelayout_write_count_stats,
+	.rpc_call_prepare = filelayout_commit_prepare,
+	.rpc_call_done = filelayout_write_commit_done,
+	.rpc_count_stats = filelayout_commit_count_stats,
 	.rpc_release = filelayout_commit_release,
 };
 
@@ -922,7 +949,7 @@ select_ds_fh_from_commit(struct pnfs_layout_segment *lseg, u32 i)
 	return flseg->fh_array[i];
 }
 
-static int filelayout_initiate_commit(struct nfs_write_data *data, int how)
+static int filelayout_initiate_commit(struct nfs_commit_data *data, int how)
 {
 	struct pnfs_layout_segment *lseg = data->lseg;
 	struct nfs4_pnfs_ds *ds;
@@ -941,12 +968,12 @@ static int filelayout_initiate_commit(struct nfs_write_data *data, int how)
 		return -EAGAIN;
 	}
 	dprintk("%s ino %lu, how %d\n", __func__, data->inode->i_ino, how);
-	data->write_done_cb = filelayout_commit_done_cb;
+	data->commit_done_cb = filelayout_commit_done_cb;
 	data->ds_clp = ds->ds_clp;
 	fh = select_ds_fh_from_commit(lseg, data->ds_commit_index);
 	if (fh)
 		data->args.fh = fh;
-	return nfs_initiate_commit(data, ds->ds_clp->cl_rpcclient,
+	return nfs_initiate_commit(ds->ds_clp->cl_rpcclient, data,
 				   &filelayout_commit_call_ops, how);
 }
 
@@ -1008,7 +1035,7 @@ alloc_ds_commits(struct inode *inode, struct list_head *list)
 {
 	struct nfs4_fl_commit_info *fl_cinfo;
 	struct nfs4_fl_commit_bucket *bucket;
-	struct nfs_write_data *data;
+	struct nfs_commit_data *data;
 	int i, j;
 	unsigned int nreq = 0;
 
@@ -1044,7 +1071,7 @@ static int
 filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
 			   int how)
 {
-	struct nfs_write_data	*data, *tmp;
+	struct nfs_commit_data *data, *tmp;
 	LIST_HEAD(list);
 	unsigned int nreq = 0;
 
@@ -1071,7 +1098,7 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
 		list_del_init(&data->pages);
 		if (!data->lseg) {
 			nfs_init_commit(data, mds_pages, NULL);
-			nfs_initiate_commit(data, NFS_CLIENT(inode),
+			nfs_initiate_commit(NFS_CLIENT(inode), data,
 					    data->mds_ops, how);
 		} else {
 			struct nfs4_fl_commit_info *fl_cinfo;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 75eb883ed4ce..cc04b6e409ed 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3468,7 +3468,17 @@ static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_
 	rpc_call_start(task);
 }
 
-static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *data)
+static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
+{
+	if (nfs4_setup_sequence(NFS_SERVER(data->inode),
+				&data->args.seq_args,
+				&data->res.seq_res,
+				task))
+		return;
+	rpc_call_start(task);
+}
+
+static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_commit_data *data)
 {
 	struct inode *inode = data->inode;
 
@@ -3480,14 +3490,14 @@ static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *dat
 	return 0;
 }
 
-static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs4_commit_done(struct rpc_task *task, struct nfs_commit_data *data)
 {
 	if (!nfs4_sequence_done(task, &data->res.seq_res))
 		return -EAGAIN;
-	return data->write_done_cb(task, data);
+	return data->commit_done_cb(task, data);
 }
 
-static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg)
+static void nfs4_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg)
 {
 	struct nfs_server *server = NFS_SERVER(data->inode);
 
@@ -3496,8 +3506,8 @@ static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_messa
 		data->res.fattr = NULL;
 	} else
 		data->args.bitmask = server->cache_consistency_bitmask;
-	if (!data->write_done_cb)
-		data->write_done_cb = nfs4_commit_done_cb;
+	if (data->commit_done_cb == NULL)
+		data->commit_done_cb = nfs4_commit_done_cb;
 	data->res.server = server;
 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
 	nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
@@ -6591,6 +6601,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
 	.write_rpc_prepare = nfs4_proc_write_rpc_prepare,
 	.write_done	= nfs4_write_done,
 	.commit_setup	= nfs4_proc_commit_setup,
+	.commit_rpc_prepare = nfs4_proc_commit_rpc_prepare,
 	.commit_done	= nfs4_commit_done,
 	.lock		= nfs4_proc_lock,
 	.clear_acl_cache = nfs4_zap_acl_attr,
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index c54aae364bee..4c3cc0ed9543 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1103,7 +1103,7 @@ static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg
 	encode_nfs4_stateid(xdr, arg->stateid);
 }
 
-static void encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr)
+static void encode_commit(struct xdr_stream *xdr, const struct nfs_commitargs *args, struct compound_hdr *hdr)
 {
 	__be32 *p;
 
@@ -2448,7 +2448,7 @@ static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr,
  *  a COMMIT request
  */
 static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr,
-				struct nfs_writeargs *args)
+				struct nfs_commitargs *args)
 {
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
@@ -4102,7 +4102,7 @@ static int decode_verifier(struct xdr_stream *xdr, void *verifier)
 	return decode_opaque_fixed(xdr, verifier, NFS4_VERIFIER_SIZE);
 }
 
-static int decode_commit(struct xdr_stream *xdr, struct nfs_writeres *res)
+static int decode_commit(struct xdr_stream *xdr, struct nfs_commitres *res)
 {
 	int status;
 
@@ -6353,7 +6353,7 @@ out:
  * Decode COMMIT response
  */
 static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			       struct nfs_writeres *res)
+			       struct nfs_commitres *res)
 {
 	struct compound_hdr hdr;
 	int status;
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index b63b6f4d14fb..bf80503200f5 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -688,8 +688,13 @@ static void nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_d
 	rpc_call_start(task);
 }
 
+static void nfs_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
+{
+	BUG();
+}
+
 static void
-nfs_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg)
+nfs_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg)
 {
 	BUG();
 }
@@ -764,6 +769,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
 	.write_rpc_prepare = nfs_proc_write_rpc_prepare,
 	.write_done	= nfs_write_done,
 	.commit_setup	= nfs_proc_commit_setup,
+	.commit_rpc_prepare = nfs_proc_commit_rpc_prepare,
 	.lock		= nfs_proc_lock,
 	.lock_check_bounds = nfs_lock_check_bounds,
 	.close_context	= nfs_close_context,
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index c07462320f6b..54f7c0ffe5c3 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -48,11 +48,12 @@ static const struct rpc_call_ops nfs_commit_ops;
 
 static struct kmem_cache *nfs_wdata_cachep;
 static mempool_t *nfs_wdata_mempool;
+static struct kmem_cache *nfs_cdata_cachep;
 static mempool_t *nfs_commit_mempool;
 
-struct nfs_write_data *nfs_commitdata_alloc(void)
+struct nfs_commit_data *nfs_commitdata_alloc(void)
 {
-	struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS);
+	struct nfs_commit_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS);
 
 	if (p) {
 		memset(p, 0, sizeof(*p));
@@ -62,10 +63,8 @@ struct nfs_write_data *nfs_commitdata_alloc(void)
 }
 EXPORT_SYMBOL_GPL(nfs_commitdata_alloc);
 
-void nfs_commit_free(struct nfs_write_data *p)
+void nfs_commit_free(struct nfs_commit_data *p)
 {
-	if (p && (p->pagevec != &p->page_array[0]))
-		kfree(p->pagevec);
 	mempool_free(p, nfs_commit_mempool);
 }
 EXPORT_SYMBOL_GPL(nfs_commit_free);
@@ -1179,6 +1178,13 @@ void nfs_write_prepare(struct rpc_task *task, void *calldata)
 	NFS_PROTO(data->inode)->write_rpc_prepare(task, data);
 }
 
+void nfs_commit_prepare(struct rpc_task *task, void *calldata)
+{
+	struct nfs_commit_data *data = calldata;
+
+	NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
+}
+
 static const struct rpc_call_ops nfs_write_partial_ops = {
 	.rpc_call_prepare = nfs_write_prepare,
 	.rpc_call_done = nfs_writeback_done_partial,
@@ -1355,16 +1361,14 @@ void nfs_commit_clear_lock(struct nfs_inode *nfsi)
 }
 EXPORT_SYMBOL_GPL(nfs_commit_clear_lock);
 
-void nfs_commitdata_release(void *data)
+void nfs_commitdata_release(struct nfs_commit_data *data)
 {
-	struct nfs_write_data *wdata = data;
-
-	put_nfs_open_context(wdata->args.context);
-	nfs_commit_free(wdata);
+	put_nfs_open_context(data->context);
+	nfs_commit_free(data);
 }
 EXPORT_SYMBOL_GPL(nfs_commitdata_release);
 
-int nfs_initiate_commit(struct nfs_write_data *data, struct rpc_clnt *clnt,
+int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data,
 			const struct rpc_call_ops *call_ops,
 			int how)
 {
@@ -1403,7 +1407,7 @@ EXPORT_SYMBOL_GPL(nfs_initiate_commit);
 /*
  * Set up the argument/result storage required for the RPC call.
  */
-void nfs_init_commit(struct nfs_write_data *data,
+void nfs_init_commit(struct nfs_commit_data *data,
 			    struct list_head *head,
 			    struct pnfs_layout_segment *lseg)
 {
@@ -1424,8 +1428,7 @@ void nfs_init_commit(struct nfs_write_data *data,
 	/* Note: we always request a commit of the entire inode */
 	data->args.offset = 0;
 	data->args.count  = 0;
-	data->args.context = get_nfs_open_context(first->wb_context);
-	data->res.count   = 0;
+	data->context     = get_nfs_open_context(first->wb_context);
 	data->res.fattr   = &data->fattr;
 	data->res.verf    = &data->verf;
 	nfs_fattr_init(&data->fattr);
@@ -1455,7 +1458,7 @@ EXPORT_SYMBOL_GPL(nfs_retry_commit);
 static int
 nfs_commit_list(struct inode *inode, struct list_head *head, int how)
 {
-	struct nfs_write_data	*data;
+	struct nfs_commit_data	*data;
 
 	data = nfs_commitdata_alloc();
 
@@ -1464,7 +1467,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
 
 	/* Set up the argument struct */
 	nfs_init_commit(data, head, NULL);
-	return nfs_initiate_commit(data, NFS_CLIENT(inode), data->mds_ops, how);
+	return nfs_initiate_commit(NFS_CLIENT(inode), data, data->mds_ops, how);
  out_bad:
 	nfs_retry_commit(head, NULL);
 	nfs_commit_clear_lock(NFS_I(inode));
@@ -1476,7 +1479,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
  */
 static void nfs_commit_done(struct rpc_task *task, void *calldata)
 {
-	struct nfs_write_data	*data = calldata;
+	struct nfs_commit_data	*data = calldata;
 
         dprintk("NFS: %5u nfs_commit_done (status %d)\n",
                                 task->tk_pid, task->tk_status);
@@ -1485,7 +1488,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
 	NFS_PROTO(data->inode)->commit_done(task, data);
 }
 
-void nfs_commit_release_pages(struct nfs_write_data *data)
+void nfs_commit_release_pages(struct nfs_commit_data *data)
 {
 	struct nfs_page	*req;
 	int status = data->task.tk_status;
@@ -1526,7 +1529,7 @@ EXPORT_SYMBOL_GPL(nfs_commit_release_pages);
 
 static void nfs_commit_release(void *calldata)
 {
-	struct nfs_write_data *data = calldata;
+	struct nfs_commit_data *data = calldata;
 
 	nfs_commit_release_pages(data);
 	nfs_commit_clear_lock(NFS_I(data->inode));
@@ -1534,7 +1537,7 @@ static void nfs_commit_release(void *calldata)
 }
 
 static const struct rpc_call_ops nfs_commit_ops = {
-	.rpc_call_prepare = nfs_write_prepare,
+	.rpc_call_prepare = nfs_commit_prepare,
 	.rpc_call_done = nfs_commit_done,
 	.rpc_release = nfs_commit_release,
 };
@@ -1753,6 +1756,13 @@ int __init nfs_init_writepagecache(void)
 	if (nfs_wdata_mempool == NULL)
 		return -ENOMEM;
 
+	nfs_cdata_cachep = kmem_cache_create("nfs_commit_data",
+					     sizeof(struct nfs_commit_data),
+					     0, SLAB_HWCACHE_ALIGN,
+					     NULL);
+	if (nfs_cdata_cachep == NULL)
+		return -ENOMEM;
+
 	nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT,
 						      nfs_wdata_cachep);
 	if (nfs_commit_mempool == NULL)
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 52a1bdb4ee2b..d5d68f322bf0 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -552,8 +552,8 @@ extern int nfs_wb_page(struct inode *inode, struct page* page);
 extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 extern int  nfs_commit_inode(struct inode *, int);
-extern struct nfs_write_data *nfs_commitdata_alloc(void);
-extern void nfs_commit_free(struct nfs_write_data *wdata);
+extern struct nfs_commit_data *nfs_commitdata_alloc(void);
+extern void nfs_commit_free(struct nfs_commit_data *data);
 #else
 static inline int
 nfs_commit_inode(struct inode *inode, int how)
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 7ba3551a0414..8fb036a0d489 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -518,6 +518,24 @@ struct nfs_writeres {
 	struct nfs4_sequence_res	seq_res;
 };
 
+/*
+ * Arguments to the commit call.
+ */
+struct nfs_commitargs {
+	struct nfs_fh		*fh;
+	__u64			offset;
+	__u32			count;
+	const u32		*bitmask;
+	struct nfs4_sequence_args	seq_args;
+};
+
+struct nfs_commitres {
+	struct nfs_fattr	*fattr;
+	struct nfs_writeverf	*verf;
+	const struct nfs_server *server;
+	struct nfs4_sequence_res	seq_res;
+};
+
 /*
  * Common arguments to the unlink call
  */
@@ -1171,6 +1189,8 @@ struct nfs_read_data {
 	struct page		*page_array[NFS_PAGEVEC_SIZE];
 };
 
+struct nfs_direct_req;
+
 struct nfs_write_data {
 	struct rpc_task		task;
 	struct inode		*inode;
@@ -1186,7 +1206,6 @@ struct nfs_write_data {
 	struct nfs_writeres	res;		/* result struct */
 	struct pnfs_layout_segment *lseg;
 	struct nfs_client	*ds_clp;	/* pNFS data server */
-	int			ds_commit_index;
 	const struct rpc_call_ops *mds_ops;
 	int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data);
 #ifdef CONFIG_NFS_V4
@@ -1197,6 +1216,25 @@ struct nfs_write_data {
 	struct page		*page_array[NFS_PAGEVEC_SIZE];
 };
 
+struct nfs_commit_data {
+	struct rpc_task		task;
+	struct inode		*inode;
+	struct rpc_cred		*cred;
+	struct nfs_fattr	fattr;
+	struct nfs_writeverf	verf;
+	struct list_head	pages;		/* Coalesced requests we wish to flush */
+	struct list_head	list;		/* lists of struct nfs_write_data */
+	struct nfs_direct_req	*dreq;		/* O_DIRECT request */
+	struct nfs_commitargs	args;		/* argument struct */
+	struct nfs_commitres	res;		/* result struct */
+	struct nfs_open_context *context;
+	struct pnfs_layout_segment *lseg;
+	struct nfs_client	*ds_clp;	/* pNFS data server */
+	int			ds_commit_index;
+	const struct rpc_call_ops *mds_ops;
+	int (*commit_done_cb) (struct rpc_task *task, struct nfs_commit_data *data);
+};
+
 struct nfs_unlinkdata {
 	struct hlist_node list;
 	struct nfs_removeargs args;
@@ -1277,8 +1315,9 @@ struct nfs_rpc_ops {
 	void	(*write_setup)  (struct nfs_write_data *, struct rpc_message *);
 	void	(*write_rpc_prepare)(struct rpc_task *, struct nfs_write_data *);
 	int	(*write_done)  (struct rpc_task *, struct nfs_write_data *);
-	void	(*commit_setup) (struct nfs_write_data *, struct rpc_message *);
-	int	(*commit_done) (struct rpc_task *, struct nfs_write_data *);
+	void	(*commit_setup) (struct nfs_commit_data *, struct rpc_message *);
+	void	(*commit_rpc_prepare)(struct rpc_task *, struct nfs_commit_data *);
+	int	(*commit_done) (struct rpc_task *, struct nfs_commit_data *);
 	int	(*lock)(struct file *, int, struct file_lock *);
 	int	(*lock_check_bounds)(const struct file_lock *);
 	void	(*clear_acl_cache)(struct inode *);
-- 
cgit v1.3-7-g2ca7


From cd841605f7a721878d8a2d1362484723d8abf569 Mon Sep 17 00:00:00 2001
From: Fred Isaman <iisaman@netapp.com>
Date: Fri, 20 Apr 2012 14:47:44 -0400
Subject: NFS: create common nfs_pgio_header for both read and write

In order to avoid duplicating all the data in nfs_read_data whenever we
split it up into multiple RPC calls (either due to a short read result
or due to rsize < PAGE_SIZE), we split out the bits that are the same
per RPC call into a separate "header" structure.

The goal this patch moves towards is to have a single header
refcounted by several rpc_data structures.  Thus, want to always refer
from rpc_data to the header, and not the other way.  This patch comes
close to that ideal, but the directio code currently needs some
special casing, isolated in the nfs_direct_[read_write]hdr_release()
functions.  This will be dealt with in a future patch.

Signed-off-by: Fred Isaman <iisaman@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/blocklayout/blocklayout.c |  79 +++++++++++++++--------------
 fs/nfs/direct.c                  |  73 ++++++++++++++++++---------
 fs/nfs/internal.h                |   4 ++
 fs/nfs/nfs3proc.c                |  14 ++++--
 fs/nfs/nfs4filelayout.c          |  40 ++++++++-------
 fs/nfs/nfs4proc.c                |  44 ++++++++++-------
 fs/nfs/objlayout/objio_osd.c     |  16 +++---
 fs/nfs/objlayout/objlayout.c     |  19 ++++---
 fs/nfs/pnfs.c                    | 102 ++++++++++++++++++++++----------------
 fs/nfs/proc.c                    |  10 ++--
 fs/nfs/read.c                    |  89 +++++++++++++++++++--------------
 fs/nfs/write.c                   | 104 ++++++++++++++++++++++-----------------
 include/linux/nfs_fs.h           |  12 -----
 include/linux/nfs_xdr.h          |  48 ++++++++++--------
 14 files changed, 376 insertions(+), 278 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 7f6a23f0244e..7a482517f4c6 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -187,7 +187,6 @@ static void bl_end_io_read(struct bio *bio, int err)
 	struct parallel_io *par = bio->bi_private;
 	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
 	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
-	struct nfs_read_data *rdata = (struct nfs_read_data *)par->data;
 
 	do {
 		struct page *page = bvec->bv_page;
@@ -198,9 +197,12 @@ static void bl_end_io_read(struct bio *bio, int err)
 			SetPageUptodate(page);
 	} while (bvec >= bio->bi_io_vec);
 	if (!uptodate) {
-		if (!rdata->pnfs_error)
-			rdata->pnfs_error = -EIO;
-		pnfs_set_lo_fail(rdata->lseg);
+		struct nfs_read_data *rdata = par->data;
+		struct nfs_pgio_header *header = rdata->header;
+
+		if (!header->pnfs_error)
+			header->pnfs_error = -EIO;
+		pnfs_set_lo_fail(header->lseg);
 	}
 	bio_put(bio);
 	put_parallel(par);
@@ -221,7 +223,7 @@ bl_end_par_io_read(void *data, int unused)
 {
 	struct nfs_read_data *rdata = data;
 
-	rdata->task.tk_status = rdata->pnfs_error;
+	rdata->task.tk_status = rdata->header->pnfs_error;
 	INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup);
 	schedule_work(&rdata->task.u.tk_work);
 }
@@ -229,6 +231,7 @@ bl_end_par_io_read(void *data, int unused)
 static enum pnfs_try_status
 bl_read_pagelist(struct nfs_read_data *rdata)
 {
+	struct nfs_pgio_header *header = rdata->header;
 	int i, hole;
 	struct bio *bio = NULL;
 	struct pnfs_block_extent *be = NULL, *cow_read = NULL;
@@ -256,10 +259,10 @@ bl_read_pagelist(struct nfs_read_data *rdata)
 			bl_put_extent(cow_read);
 			bio = bl_submit_bio(READ, bio);
 			/* Get the next one */
-			be = bl_find_get_extent(BLK_LSEG2EXT(rdata->lseg),
+			be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg),
 					     isect, &cow_read);
 			if (!be) {
-				rdata->pnfs_error = -EIO;
+				header->pnfs_error = -EIO;
 				goto out;
 			}
 			extent_length = be->be_length -
@@ -286,7 +289,7 @@ bl_read_pagelist(struct nfs_read_data *rdata)
 						 isect, pages[i], be_read,
 						 bl_end_io_read, par);
 			if (IS_ERR(bio)) {
-				rdata->pnfs_error = PTR_ERR(bio);
+				header->pnfs_error = PTR_ERR(bio);
 				bio = NULL;
 				goto out;
 			}
@@ -294,9 +297,9 @@ bl_read_pagelist(struct nfs_read_data *rdata)
 		isect += PAGE_CACHE_SECTORS;
 		extent_length -= PAGE_CACHE_SECTORS;
 	}
-	if ((isect << SECTOR_SHIFT) >= rdata->inode->i_size) {
+	if ((isect << SECTOR_SHIFT) >= header->inode->i_size) {
 		rdata->res.eof = 1;
-		rdata->res.count = rdata->inode->i_size - f_offset;
+		rdata->res.count = header->inode->i_size - f_offset;
 	} else {
 		rdata->res.count = (isect << SECTOR_SHIFT) - f_offset;
 	}
@@ -345,7 +348,6 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
 	struct parallel_io *par = bio->bi_private;
 	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
 	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
-	struct nfs_write_data *wdata = (struct nfs_write_data *)par->data;
 
 	do {
 		struct page *page = bvec->bv_page;
@@ -358,9 +360,12 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
 	} while (bvec >= bio->bi_io_vec);
 
 	if (unlikely(!uptodate)) {
-		if (!wdata->pnfs_error)
-			wdata->pnfs_error = -EIO;
-		pnfs_set_lo_fail(wdata->lseg);
+		struct nfs_write_data *data = par->data;
+		struct nfs_pgio_header *header = data->header;
+
+		if (!header->pnfs_error)
+			header->pnfs_error = -EIO;
+		pnfs_set_lo_fail(header->lseg);
 	}
 	bio_put(bio);
 	put_parallel(par);
@@ -370,12 +375,13 @@ static void bl_end_io_write(struct bio *bio, int err)
 {
 	struct parallel_io *par = bio->bi_private;
 	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
-	struct nfs_write_data *wdata = (struct nfs_write_data *)par->data;
+	struct nfs_write_data *data = par->data;
+	struct nfs_pgio_header *header = data->header;
 
 	if (!uptodate) {
-		if (!wdata->pnfs_error)
-			wdata->pnfs_error = -EIO;
-		pnfs_set_lo_fail(wdata->lseg);
+		if (!header->pnfs_error)
+			header->pnfs_error = -EIO;
+		pnfs_set_lo_fail(header->lseg);
 	}
 	bio_put(bio);
 	put_parallel(par);
@@ -391,9 +397,9 @@ static void bl_write_cleanup(struct work_struct *work)
 	dprintk("%s enter\n", __func__);
 	task = container_of(work, struct rpc_task, u.tk_work);
 	wdata = container_of(task, struct nfs_write_data, task);
-	if (likely(!wdata->pnfs_error)) {
+	if (likely(!wdata->header->pnfs_error)) {
 		/* Marks for LAYOUTCOMMIT */
-		mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
+		mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg),
 				     wdata->args.offset, wdata->args.count);
 	}
 	pnfs_ld_write_done(wdata);
@@ -404,12 +410,12 @@ static void bl_end_par_io_write(void *data, int num_se)
 {
 	struct nfs_write_data *wdata = data;
 
-	if (unlikely(wdata->pnfs_error)) {
-		bl_free_short_extents(&BLK_LSEG2EXT(wdata->lseg)->bl_inval,
+	if (unlikely(wdata->header->pnfs_error)) {
+		bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval,
 					num_se);
 	}
 
-	wdata->task.tk_status = wdata->pnfs_error;
+	wdata->task.tk_status = wdata->header->pnfs_error;
 	wdata->verf.committed = NFS_FILE_SYNC;
 	INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup);
 	schedule_work(&wdata->task.u.tk_work);
@@ -540,6 +546,7 @@ check_page:
 static enum pnfs_try_status
 bl_write_pagelist(struct nfs_write_data *wdata, int sync)
 {
+	struct nfs_pgio_header *header = wdata->header;
 	int i, ret, npg_zero, pg_index, last = 0;
 	struct bio *bio = NULL;
 	struct pnfs_block_extent *be = NULL, *cow_read = NULL;
@@ -552,7 +559,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
 	pgoff_t index;
 	u64 temp;
 	int npg_per_block =
-	    NFS_SERVER(wdata->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT;
+	    NFS_SERVER(header->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT;
 
 	dprintk("%s enter, %Zu@%lld\n", __func__, count, offset);
 	/* At this point, wdata->pages is a (sequential) list of nfs_pages.
@@ -566,7 +573,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
 	/* At this point, have to be more careful with error handling */
 
 	isect = (sector_t) ((offset & (long)PAGE_CACHE_MASK) >> SECTOR_SHIFT);
-	be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), isect, &cow_read);
+	be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg), isect, &cow_read);
 	if (!be || !is_writable(be, isect)) {
 		dprintk("%s no matching extents!\n", __func__);
 		goto out_mds;
@@ -597,10 +604,10 @@ fill_invalid_ext:
 			dprintk("%s zero %dth page: index %lu isect %llu\n",
 				__func__, npg_zero, index,
 				(unsigned long long)isect);
-			page = bl_find_get_zeroing_page(wdata->inode, index,
+			page = bl_find_get_zeroing_page(header->inode, index,
 							cow_read);
 			if (unlikely(IS_ERR(page))) {
-				wdata->pnfs_error = PTR_ERR(page);
+				header->pnfs_error = PTR_ERR(page);
 				goto out;
 			} else if (page == NULL)
 				goto next_page;
@@ -612,7 +619,7 @@ fill_invalid_ext:
 					__func__, ret);
 				end_page_writeback(page);
 				page_cache_release(page);
-				wdata->pnfs_error = ret;
+				header->pnfs_error = ret;
 				goto out;
 			}
 			if (likely(!bl_push_one_short_extent(be->be_inval)))
@@ -620,11 +627,11 @@ fill_invalid_ext:
 			else {
 				end_page_writeback(page);
 				page_cache_release(page);
-				wdata->pnfs_error = -ENOMEM;
+				header->pnfs_error = -ENOMEM;
 				goto out;
 			}
 			/* FIXME: This should be done in bi_end_io */
-			mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
+			mark_extents_written(BLK_LSEG2EXT(header->lseg),
 					     page->index << PAGE_CACHE_SHIFT,
 					     PAGE_CACHE_SIZE);
 
@@ -632,7 +639,7 @@ fill_invalid_ext:
 						 isect, page, be,
 						 bl_end_io_write_zero, par);
 			if (IS_ERR(bio)) {
-				wdata->pnfs_error = PTR_ERR(bio);
+				header->pnfs_error = PTR_ERR(bio);
 				bio = NULL;
 				goto out;
 			}
@@ -653,10 +660,10 @@ next_page:
 			bl_put_extent(be);
 			bio = bl_submit_bio(WRITE, bio);
 			/* Get the next one */
-			be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg),
+			be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg),
 					     isect, NULL);
 			if (!be || !is_writable(be, isect)) {
-				wdata->pnfs_error = -EINVAL;
+				header->pnfs_error = -EINVAL;
 				goto out;
 			}
 			if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
@@ -664,7 +671,7 @@ next_page:
 								be->be_inval)))
 					par->bse_count++;
 				else {
-					wdata->pnfs_error = -ENOMEM;
+					header->pnfs_error = -ENOMEM;
 					goto out;
 				}
 			}
@@ -677,7 +684,7 @@ next_page:
 			if (unlikely(ret)) {
 				dprintk("%s bl_mark_sectors_init fail %d\n",
 					__func__, ret);
-				wdata->pnfs_error = ret;
+				header->pnfs_error = ret;
 				goto out;
 			}
 		}
@@ -685,7 +692,7 @@ next_page:
 					 isect, pages[i], be,
 					 bl_end_io_write, par);
 		if (IS_ERR(bio)) {
-			wdata->pnfs_error = PTR_ERR(bio);
+			header->pnfs_error = PTR_ERR(bio);
 			bio = NULL;
 			goto out;
 		}
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index fb7fbaa79c20..56176af1436f 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -242,7 +242,7 @@ static void nfs_direct_read_release(void *calldata)
 {
 
 	struct nfs_read_data *data = calldata;
-	struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
+	struct nfs_direct_req *dreq = (struct nfs_direct_req *)data->header->req;
 	int status = data->task.tk_status;
 
 	spin_lock(&dreq->lock);
@@ -269,6 +269,15 @@ static const struct rpc_call_ops nfs_read_direct_ops = {
 	.rpc_release = nfs_direct_read_release,
 };
 
+static void nfs_direct_readhdr_release(struct nfs_read_header *rhdr)
+{
+	struct nfs_read_data *data = &rhdr->rpc_data;
+
+	if (data->pagevec != data->page_array)
+		kfree(data->pagevec);
+	nfs_readhdr_free(&rhdr->header);
+}
+
 /*
  * For each rsize'd chunk of the user's buffer, dispatch an NFS READ
  * operation.  If nfs_readdata_alloc() or get_user_pages() fails,
@@ -301,6 +310,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
 	ssize_t started = 0;
 
 	do {
+		struct nfs_read_header *rhdr;
 		struct nfs_read_data *data;
 		size_t bytes;
 
@@ -308,23 +318,24 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
 		bytes = min(rsize,count);
 
 		result = -ENOMEM;
-		data = nfs_readdata_alloc(nfs_page_array_len(pgbase, bytes));
-		if (unlikely(!data))
+		rhdr = nfs_readhdr_alloc(nfs_page_array_len(pgbase, bytes));
+		if (unlikely(!rhdr))
 			break;
+		data = &rhdr->rpc_data;
 
 		down_read(&current->mm->mmap_sem);
 		result = get_user_pages(current, current->mm, user_addr,
 					data->npages, 1, 0, data->pagevec, NULL);
 		up_read(&current->mm->mmap_sem);
 		if (result < 0) {
-			nfs_readdata_free(data);
+			nfs_direct_readhdr_release(rhdr);
 			break;
 		}
 		if ((unsigned)result < data->npages) {
 			bytes = result * PAGE_SIZE;
 			if (bytes <= pgbase) {
 				nfs_direct_release_pages(data->pagevec, result);
-				nfs_readdata_free(data);
+				nfs_direct_readhdr_release(rhdr);
 				break;
 			}
 			bytes -= pgbase;
@@ -333,9 +344,9 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
 
 		get_dreq(dreq);
 
-		data->req = (struct nfs_page *) dreq;
-		data->inode = inode;
-		data->cred = msg.rpc_cred;
+		rhdr->header.req = (struct nfs_page *) dreq;
+		rhdr->header.inode = inode;
+		rhdr->header.cred = msg.rpc_cred;
 		data->args.fh = NFS_FH(inode);
 		data->args.context = get_nfs_open_context(ctx);
 		data->args.lock_context = dreq->l_ctx;
@@ -447,13 +458,23 @@ out:
 	return result;
 }
 
+static void nfs_direct_writehdr_release(struct nfs_write_header *whdr)
+{
+	struct nfs_write_data *data = &whdr->rpc_data;
+
+	if (data->pagevec != data->page_array)
+		kfree(data->pagevec);
+	nfs_writehdr_free(&whdr->header);
+}
+
 static void nfs_direct_free_writedata(struct nfs_direct_req *dreq)
 {
 	while (!list_empty(&dreq->rewrite_list)) {
-		struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages);
-		list_del(&data->pages);
-		nfs_direct_release_pages(data->pagevec, data->npages);
-		nfs_writedata_free(data);
+		struct nfs_pgio_header *hdr = list_entry(dreq->rewrite_list.next, struct nfs_pgio_header, pages);
+		struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header);
+		list_del(&hdr->pages);
+		nfs_direct_release_pages(whdr->rpc_data.pagevec, whdr->rpc_data.npages);
+		nfs_direct_writehdr_release(whdr);
 	}
 }
 
@@ -463,6 +484,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
 	struct inode *inode = dreq->inode;
 	struct list_head *p;
 	struct nfs_write_data *data;
+	struct nfs_pgio_header *hdr;
 	struct rpc_task *task;
 	struct rpc_message msg = {
 		.rpc_cred = dreq->ctx->cred,
@@ -479,7 +501,8 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
 	get_dreq(dreq);
 
 	list_for_each(p, &dreq->rewrite_list) {
-		data = list_entry(p, struct nfs_write_data, pages);
+		hdr = list_entry(p, struct nfs_pgio_header, pages);
+		data = &(container_of(hdr, struct nfs_write_header, header))->rpc_data;
 
 		get_dreq(dreq);
 
@@ -652,7 +675,8 @@ static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
 static void nfs_direct_write_release(void *calldata)
 {
 	struct nfs_write_data *data = calldata;
-	struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
+	struct nfs_pgio_header *hdr = data->header;
+	struct nfs_direct_req *dreq = (struct nfs_direct_req *) hdr->req;
 	int status = data->task.tk_status;
 
 	spin_lock(&dreq->lock);
@@ -684,7 +708,7 @@ out_unlock:
 	spin_unlock(&dreq->lock);
 
 	if (put_dreq(dreq))
-		nfs_direct_write_complete(dreq, data->inode);
+		nfs_direct_write_complete(dreq, hdr->inode);
 }
 
 static const struct rpc_call_ops nfs_write_direct_ops = {
@@ -725,6 +749,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
 	ssize_t started = 0;
 
 	do {
+		struct nfs_write_header *whdr;
 		struct nfs_write_data *data;
 		size_t bytes;
 
@@ -732,23 +757,25 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
 		bytes = min(wsize,count);
 
 		result = -ENOMEM;
-		data = nfs_writedata_alloc(nfs_page_array_len(pgbase, bytes));
-		if (unlikely(!data))
+		whdr = nfs_writehdr_alloc(nfs_page_array_len(pgbase, bytes));
+		if (unlikely(!whdr))
 			break;
 
+		data = &whdr->rpc_data;
+
 		down_read(&current->mm->mmap_sem);
 		result = get_user_pages(current, current->mm, user_addr,
 					data->npages, 0, 0, data->pagevec, NULL);
 		up_read(&current->mm->mmap_sem);
 		if (result < 0) {
-			nfs_writedata_free(data);
+			nfs_direct_writehdr_release(whdr);
 			break;
 		}
 		if ((unsigned)result < data->npages) {
 			bytes = result * PAGE_SIZE;
 			if (bytes <= pgbase) {
 				nfs_direct_release_pages(data->pagevec, result);
-				nfs_writedata_free(data);
+				nfs_direct_writehdr_release(whdr);
 				break;
 			}
 			bytes -= pgbase;
@@ -757,11 +784,11 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
 
 		get_dreq(dreq);
 
-		list_move_tail(&data->pages, &dreq->rewrite_list);
+		list_move_tail(&whdr->header.pages, &dreq->rewrite_list);
 
-		data->req = (struct nfs_page *) dreq;
-		data->inode = inode;
-		data->cred = msg.rpc_cred;
+		whdr->header.req = (struct nfs_page *) dreq;
+		whdr->header.inode = inode;
+		whdr->header.cred = msg.rpc_cred;
 		data->args.fh = NFS_FH(inode);
 		data->args.context = ctx;
 		data->args.lock_context = dreq->l_ctx;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 650127fd24bb..7dc9be1a6e1a 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -296,6 +296,8 @@ extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);
 
 struct nfs_pageio_descriptor;
 /* read.c */
+extern struct nfs_read_header *nfs_readhdr_alloc(unsigned int npages);
+extern void nfs_readhdr_free(struct nfs_pgio_header *hdr);
 extern int nfs_initiate_read(struct rpc_clnt *clnt,
 			     struct nfs_read_data *data,
 			     const struct rpc_call_ops *call_ops);
@@ -309,6 +311,8 @@ extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
 extern void nfs_readdata_release(struct nfs_read_data *rdata);
 
 /* write.c */
+extern struct nfs_write_header *nfs_writehdr_alloc(unsigned int npages);
+extern void nfs_writehdr_free(struct nfs_pgio_header *hdr);
 extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
 		struct list_head *head);
 extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index b1daca7f0f7b..56dcefc2f3f7 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -811,11 +811,13 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
 
 static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data)
 {
-	if (nfs3_async_handle_jukebox(task, data->inode))
+	struct inode *inode = data->header->inode;
+
+	if (nfs3_async_handle_jukebox(task, inode))
 		return -EAGAIN;
 
-	nfs_invalidate_atime(data->inode);
-	nfs_refresh_inode(data->inode, &data->fattr);
+	nfs_invalidate_atime(inode);
+	nfs_refresh_inode(inode, &data->fattr);
 	return 0;
 }
 
@@ -831,10 +833,12 @@ static void nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_da
 
 static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
 {
-	if (nfs3_async_handle_jukebox(task, data->inode))
+	struct inode *inode = data->header->inode;
+
+	if (nfs3_async_handle_jukebox(task, inode))
 		return -EAGAIN;
 	if (task->tk_status >= 0)
-		nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr);
+		nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
 	return 0;
 }
 
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index c536328557cb..ad1d68013a5b 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -148,6 +148,7 @@ wait_on_recovery:
 static int filelayout_read_done_cb(struct rpc_task *task,
 				struct nfs_read_data *data)
 {
+	struct nfs_pgio_header *hdr = data->header;
 	int reset = 0;
 
 	dprintk("%s DS read\n", __func__);
@@ -157,7 +158,7 @@ static int filelayout_read_done_cb(struct rpc_task *task,
 		dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
 			__func__, data->ds_clp, data->ds_clp->cl_session);
 		if (reset) {
-			pnfs_set_lo_fail(data->lseg);
+			pnfs_set_lo_fail(hdr->lseg);
 			nfs4_reset_read(task, data);
 		}
 		rpc_restart_call_prepare(task);
@@ -175,13 +176,15 @@ static int filelayout_read_done_cb(struct rpc_task *task,
 static void
 filelayout_set_layoutcommit(struct nfs_write_data *wdata)
 {
-	if (FILELAYOUT_LSEG(wdata->lseg)->commit_through_mds ||
+	struct nfs_pgio_header *hdr = wdata->header;
+
+	if (FILELAYOUT_LSEG(hdr->lseg)->commit_through_mds ||
 	    wdata->res.verf->committed == NFS_FILE_SYNC)
 		return;
 
 	pnfs_set_layoutcommit(wdata);
-	dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, wdata->inode->i_ino,
-		(unsigned long) NFS_I(wdata->inode)->layout->plh_lwb);
+	dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino,
+		(unsigned long) NFS_I(hdr->inode)->layout->plh_lwb);
 }
 
 /*
@@ -210,27 +213,28 @@ static void filelayout_read_call_done(struct rpc_task *task, void *data)
 	dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
 
 	/* Note this may cause RPC to be resent */
-	rdata->mds_ops->rpc_call_done(task, data);
+	rdata->header->mds_ops->rpc_call_done(task, data);
 }
 
 static void filelayout_read_count_stats(struct rpc_task *task, void *data)
 {
 	struct nfs_read_data *rdata = data;
 
-	rpc_count_iostats(task, NFS_SERVER(rdata->inode)->client->cl_metrics);
+	rpc_count_iostats(task, NFS_SERVER(rdata->header->inode)->client->cl_metrics);
 }
 
 static void filelayout_read_release(void *data)
 {
 	struct nfs_read_data *rdata = data;
 
-	put_lseg(rdata->lseg);
-	rdata->mds_ops->rpc_release(data);
+	put_lseg(rdata->header->lseg);
+	rdata->header->mds_ops->rpc_release(data);
 }
 
 static int filelayout_write_done_cb(struct rpc_task *task,
 				struct nfs_write_data *data)
 {
+	struct nfs_pgio_header *hdr = data->header;
 	int reset = 0;
 
 	if (filelayout_async_handle_error(task, data->args.context->state,
@@ -238,7 +242,7 @@ static int filelayout_write_done_cb(struct rpc_task *task,
 		dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
 			__func__, data->ds_clp, data->ds_clp->cl_session);
 		if (reset) {
-			pnfs_set_lo_fail(data->lseg);
+			pnfs_set_lo_fail(hdr->lseg);
 			nfs4_reset_write(task, data);
 		}
 		rpc_restart_call_prepare(task);
@@ -297,22 +301,22 @@ static void filelayout_write_call_done(struct rpc_task *task, void *data)
 	struct nfs_write_data *wdata = data;
 
 	/* Note this may cause RPC to be resent */
-	wdata->mds_ops->rpc_call_done(task, data);
+	wdata->header->mds_ops->rpc_call_done(task, data);
 }
 
 static void filelayout_write_count_stats(struct rpc_task *task, void *data)
 {
 	struct nfs_write_data *wdata = data;
 
-	rpc_count_iostats(task, NFS_SERVER(wdata->inode)->client->cl_metrics);
+	rpc_count_iostats(task, NFS_SERVER(wdata->header->inode)->client->cl_metrics);
 }
 
 static void filelayout_write_release(void *data)
 {
 	struct nfs_write_data *wdata = data;
 
-	put_lseg(wdata->lseg);
-	wdata->mds_ops->rpc_release(data);
+	put_lseg(wdata->header->lseg);
+	wdata->header->mds_ops->rpc_release(data);
 }
 
 static void filelayout_commit_prepare(struct rpc_task *task, void *data)
@@ -377,7 +381,8 @@ static const struct rpc_call_ops filelayout_commit_call_ops = {
 static enum pnfs_try_status
 filelayout_read_pagelist(struct nfs_read_data *data)
 {
-	struct pnfs_layout_segment *lseg = data->lseg;
+	struct nfs_pgio_header *hdr = data->header;
+	struct pnfs_layout_segment *lseg = hdr->lseg;
 	struct nfs4_pnfs_ds *ds;
 	loff_t offset = data->args.offset;
 	u32 j, idx;
@@ -385,7 +390,7 @@ filelayout_read_pagelist(struct nfs_read_data *data)
 	int status;
 
 	dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n",
-		__func__, data->inode->i_ino,
+		__func__, hdr->inode->i_ino,
 		data->args.pgbase, (size_t)data->args.count, offset);
 
 	if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags))
@@ -423,7 +428,8 @@ filelayout_read_pagelist(struct nfs_read_data *data)
 static enum pnfs_try_status
 filelayout_write_pagelist(struct nfs_write_data *data, int sync)
 {
-	struct pnfs_layout_segment *lseg = data->lseg;
+	struct nfs_pgio_header *hdr = data->header;
+	struct pnfs_layout_segment *lseg = hdr->lseg;
 	struct nfs4_pnfs_ds *ds;
 	loff_t offset = data->args.offset;
 	u32 j, idx;
@@ -445,7 +451,7 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
 		return PNFS_NOT_ATTEMPTED;
 	}
 	dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s\n", __func__,
-		data->inode->i_ino, sync, (size_t) data->args.count, offset,
+		hdr->inode->i_ino, sync, (size_t) data->args.count, offset,
 		ds->ds_remotestr);
 
 	data->write_done_cb = filelayout_write_done_cb;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index cc04b6e409ed..5375862075de 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3336,12 +3336,12 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
 
 void __nfs4_read_done_cb(struct nfs_read_data *data)
 {
-	nfs_invalidate_atime(data->inode);
+	nfs_invalidate_atime(data->header->inode);
 }
 
 static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data)
 {
-	struct nfs_server *server = NFS_SERVER(data->inode);
+	struct nfs_server *server = NFS_SERVER(data->header->inode);
 
 	if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) {
 		rpc_restart_call_prepare(task);
@@ -3376,7 +3376,7 @@ static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message
 
 static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
 {
-	if (nfs4_setup_sequence(NFS_SERVER(data->inode),
+	if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
 				&data->args.seq_args,
 				&data->res.seq_res,
 				task))
@@ -3387,22 +3387,25 @@ static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_da
 /* Reset the the nfs_read_data to send the read to the MDS. */
 void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data)
 {
+	struct nfs_pgio_header *hdr = data->header;
+	struct inode *inode = hdr->inode;
+
 	dprintk("%s Reset task for i/o through\n", __func__);
-	put_lseg(data->lseg);
-	data->lseg = NULL;
+	put_lseg(hdr->lseg);
+	hdr->lseg = NULL;
+	data->ds_clp = NULL;
 	/* offsets will differ in the dense stripe case */
 	data->args.offset = data->mds_offset;
-	data->ds_clp = NULL;
-	data->args.fh     = NFS_FH(data->inode);
+	data->args.fh     = NFS_FH(inode);
 	data->read_done_cb = nfs4_read_done_cb;
-	task->tk_ops = data->mds_ops;
-	rpc_task_reset_client(task, NFS_CLIENT(data->inode));
+	task->tk_ops = hdr->mds_ops;
+	rpc_task_reset_client(task, NFS_CLIENT(inode));
 }
 EXPORT_SYMBOL_GPL(nfs4_reset_read);
 
 static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data)
 {
-	struct inode *inode = data->inode;
+	struct inode *inode = data->header->inode;
 	
 	if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) {
 		rpc_restart_call_prepare(task);
@@ -3426,25 +3429,28 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
 /* Reset the the nfs_write_data to send the write to the MDS. */
 void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data)
 {
+	struct nfs_pgio_header *hdr = data->header;
+	struct inode *inode = hdr->inode;
+
 	dprintk("%s Reset task for i/o through\n", __func__);
-	put_lseg(data->lseg);
-	data->lseg          = NULL;
-	data->ds_clp        = NULL;
+	put_lseg(hdr->lseg);
+	hdr->lseg        = NULL;
+	data->ds_clp     = NULL;
 	data->write_done_cb = nfs4_write_done_cb;
-	data->args.fh       = NFS_FH(data->inode);
+	data->args.fh       = NFS_FH(inode);
 	data->args.bitmask  = data->res.server->cache_consistency_bitmask;
 	data->args.offset   = data->mds_offset;
 	data->res.fattr     = &data->fattr;
-	task->tk_ops        = data->mds_ops;
-	rpc_task_reset_client(task, NFS_CLIENT(data->inode));
+	task->tk_ops        = hdr->mds_ops;
+	rpc_task_reset_client(task, NFS_CLIENT(inode));
 }
 EXPORT_SYMBOL_GPL(nfs4_reset_write);
 
 static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
 {
-	struct nfs_server *server = NFS_SERVER(data->inode);
+	struct nfs_server *server = NFS_SERVER(data->header->inode);
 
-	if (data->lseg) {
+	if (data->header->lseg) {
 		data->args.bitmask = NULL;
 		data->res.fattr = NULL;
 	} else
@@ -3460,7 +3466,7 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag
 
 static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
 {
-	if (nfs4_setup_sequence(NFS_SERVER(data->inode),
+	if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
 				&data->args.seq_args,
 				&data->res.seq_res,
 				task))
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 4bff4a3dab46..fbf4874ec252 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -440,11 +440,12 @@ static void _read_done(struct ore_io_state *ios, void *private)
 
 int objio_read_pagelist(struct nfs_read_data *rdata)
 {
+	struct nfs_pgio_header *hdr = rdata->header;
 	struct objio_state *objios;
 	int ret;
 
-	ret = objio_alloc_io_state(NFS_I(rdata->inode)->layout, true,
-			rdata->lseg, rdata->args.pages, rdata->args.pgbase,
+	ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, true,
+			hdr->lseg, rdata->args.pages, rdata->args.pgbase,
 			rdata->args.offset, rdata->args.count, rdata,
 			GFP_KERNEL, &objios);
 	if (unlikely(ret))
@@ -483,12 +484,12 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
 {
 	struct objio_state *objios = priv;
 	struct nfs_write_data *wdata = objios->oir.rpcdata;
+	struct address_space *mapping = wdata->header->inode->i_mapping;
 	pgoff_t index = offset / PAGE_SIZE;
-	struct page *page = find_get_page(wdata->inode->i_mapping, index);
+	struct page *page = find_get_page(mapping, index);
 
 	if (!page) {
-		page = find_or_create_page(wdata->inode->i_mapping,
-						index, GFP_NOFS);
+		page = find_or_create_page(mapping, index, GFP_NOFS);
 		if (unlikely(!page)) {
 			dprintk("%s: grab_cache_page Failed index=0x%lx\n",
 				__func__, index);
@@ -518,11 +519,12 @@ static const struct _ore_r4w_op _r4w_op = {
 
 int objio_write_pagelist(struct nfs_write_data *wdata, int how)
 {
+	struct nfs_pgio_header *hdr = wdata->header;
 	struct objio_state *objios;
 	int ret;
 
-	ret = objio_alloc_io_state(NFS_I(wdata->inode)->layout, false,
-			wdata->lseg, wdata->args.pages, wdata->args.pgbase,
+	ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, false,
+			hdr->lseg, wdata->args.pages, wdata->args.pgbase,
 			wdata->args.offset, wdata->args.count, wdata, GFP_NOFS,
 			&objios);
 	if (unlikely(ret))
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index 595c5fc21a19..874613545301 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -258,7 +258,7 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
 	if (status >= 0)
 		rdata->res.count = status;
 	else
-		rdata->pnfs_error = status;
+		rdata->header->pnfs_error = status;
 	objlayout_iodone(oir);
 	/* must not use oir after this point */
 
@@ -279,12 +279,14 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
 enum pnfs_try_status
 objlayout_read_pagelist(struct nfs_read_data *rdata)
 {
+	struct nfs_pgio_header *hdr = rdata->header;
+	struct inode *inode = hdr->inode;
 	loff_t offset = rdata->args.offset;
 	size_t count = rdata->args.count;
 	int err;
 	loff_t eof;
 
-	eof = i_size_read(rdata->inode);
+	eof = i_size_read(inode);
 	if (unlikely(offset + count > eof)) {
 		if (offset >= eof) {
 			err = 0;
@@ -297,17 +299,17 @@ objlayout_read_pagelist(struct nfs_read_data *rdata)
 	}
 
 	rdata->res.eof = (offset + count) >= eof;
-	_fix_verify_io_params(rdata->lseg, &rdata->args.pages,
+	_fix_verify_io_params(hdr->lseg, &rdata->args.pages,
 			      &rdata->args.pgbase,
 			      rdata->args.offset, rdata->args.count);
 
 	dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n",
-		__func__, rdata->inode->i_ino, offset, count, rdata->res.eof);
+		__func__, inode->i_ino, offset, count, rdata->res.eof);
 
 	err = objio_read_pagelist(rdata);
  out:
 	if (unlikely(err)) {
-		rdata->pnfs_error = err;
+		hdr->pnfs_error = err;
 		dprintk("%s: Returned Error %d\n", __func__, err);
 		return PNFS_NOT_ATTEMPTED;
 	}
@@ -340,7 +342,7 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
 		wdata->res.count = status;
 		wdata->verf.committed = oir->committed;
 	} else {
-		wdata->pnfs_error = status;
+		wdata->header->pnfs_error = status;
 	}
 	objlayout_iodone(oir);
 	/* must not use oir after this point */
@@ -363,15 +365,16 @@ enum pnfs_try_status
 objlayout_write_pagelist(struct nfs_write_data *wdata,
 			 int how)
 {
+	struct nfs_pgio_header *hdr = wdata->header;
 	int err;
 
-	_fix_verify_io_params(wdata->lseg, &wdata->args.pages,
+	_fix_verify_io_params(hdr->lseg, &wdata->args.pages,
 			      &wdata->args.pgbase,
 			      wdata->args.offset, wdata->args.count);
 
 	err = objio_write_pagelist(wdata, how);
 	if (unlikely(err)) {
-		wdata->pnfs_error = err;
+		hdr->pnfs_error = err;
 		dprintk("%s: Returned Error %d\n", __func__, err);
 		return PNFS_NOT_ATTEMPTED;
 	}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 9c4d14a17d49..d705da427e6d 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1191,13 +1191,15 @@ static int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *
 
 static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
 {
-	dprintk("pnfs write error = %d\n", data->pnfs_error);
-	if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags &
+	struct nfs_pgio_header *hdr = data->header;
+
+	dprintk("pnfs write error = %d\n", hdr->pnfs_error);
+	if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
 	    PNFS_LAYOUTRET_ON_ERROR) {
-		clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(data->inode)->flags);
-		pnfs_return_layout(data->inode);
+		clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
+		pnfs_return_layout(hdr->inode);
 	}
-	data->task.tk_status = pnfs_write_done_resend_to_mds(data->inode, &data->pages);
+	data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode, &hdr->pages);
 }
 
 /*
@@ -1205,13 +1207,15 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
  */
 void pnfs_ld_write_done(struct nfs_write_data *data)
 {
-	if (likely(!data->pnfs_error)) {
+	struct nfs_pgio_header *hdr = data->header;
+
+	if (!hdr->pnfs_error) {
 		pnfs_set_layoutcommit(data);
-		data->mds_ops->rpc_call_done(&data->task, data);
+		hdr->mds_ops->rpc_call_done(&data->task, data);
 	} else
 		pnfs_ld_handle_write_error(data);
-	put_lseg(data->lseg);
-	data->mds_ops->rpc_release(data);
+	put_lseg(hdr->lseg);
+	hdr->mds_ops->rpc_release(data);
 }
 EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
 
@@ -1219,12 +1223,14 @@ static void
 pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
 		struct nfs_write_data *data)
 {
-	list_splice_tail_init(&data->pages, &desc->pg_list);
-	if (data->req && list_empty(&data->req->wb_list))
-		nfs_list_add_request(data->req, &desc->pg_list);
+	struct nfs_pgio_header *hdr = data->header;
+
+	list_splice_tail_init(&hdr->pages, &desc->pg_list);
+	if (hdr->req && list_empty(&hdr->req->wb_list))
+		nfs_list_add_request(hdr->req, &desc->pg_list);
 	nfs_pageio_reset_write_mds(desc);
 	desc->pg_recoalesce = 1;
-	put_lseg(data->lseg);
+	put_lseg(hdr->lseg);
 	nfs_writedata_release(data);
 }
 
@@ -1234,20 +1240,21 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata,
 			struct pnfs_layout_segment *lseg,
 			int how)
 {
-	struct inode *inode = wdata->inode;
+	struct nfs_pgio_header *hdr = wdata->header;
+	struct inode *inode = hdr->inode;
 	enum pnfs_try_status trypnfs;
 	struct nfs_server *nfss = NFS_SERVER(inode);
 
-	wdata->mds_ops = call_ops;
-	wdata->lseg = get_lseg(lseg);
+	hdr->mds_ops = call_ops;
+	hdr->lseg = get_lseg(lseg);
 
 	dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
 		inode->i_ino, wdata->args.count, wdata->args.offset, how);
 
 	trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how);
 	if (trypnfs == PNFS_NOT_ATTEMPTED) {
-		put_lseg(wdata->lseg);
-		wdata->lseg = NULL;
+		put_lseg(hdr->lseg);
+		hdr->lseg = NULL;
 	} else
 		nfs_inc_stats(inode, NFSIOS_PNFS_WRITE);
 
@@ -1318,13 +1325,15 @@ static int pnfs_read_done_resend_to_mds(struct inode *inode, struct list_head *h
 
 static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
 {
-	dprintk("pnfs read error = %d\n", data->pnfs_error);
-	if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags &
+	struct nfs_pgio_header *hdr = data->header;
+
+	dprintk("pnfs read error = %d\n", hdr->pnfs_error);
+	if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
 	    PNFS_LAYOUTRET_ON_ERROR) {
-		clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(data->inode)->flags);
-		pnfs_return_layout(data->inode);
+		clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
+		pnfs_return_layout(hdr->inode);
 	}
-	data->task.tk_status = pnfs_read_done_resend_to_mds(data->inode, &data->pages);
+	data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode, &hdr->pages);
 }
 
 /*
@@ -1332,13 +1341,15 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
  */
 void pnfs_ld_read_done(struct nfs_read_data *data)
 {
-	if (likely(!data->pnfs_error)) {
+	struct nfs_pgio_header *hdr = data->header;
+
+	if (likely(!hdr->pnfs_error)) {
 		__nfs4_read_done_cb(data);
-		data->mds_ops->rpc_call_done(&data->task, data);
+		hdr->mds_ops->rpc_call_done(&data->task, data);
 	} else
 		pnfs_ld_handle_read_error(data);
-	put_lseg(data->lseg);
-	data->mds_ops->rpc_release(data);
+	put_lseg(hdr->lseg);
+	hdr->mds_ops->rpc_release(data);
 }
 EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
 
@@ -1346,9 +1357,11 @@ static void
 pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
 		struct nfs_read_data *data)
 {
-	list_splice_tail_init(&data->pages, &desc->pg_list);
-	if (data->req && list_empty(&data->req->wb_list))
-		nfs_list_add_request(data->req, &desc->pg_list);
+	struct nfs_pgio_header *hdr = data->header;
+
+	list_splice_tail_init(&hdr->pages, &desc->pg_list);
+	if (hdr->req && list_empty(&hdr->req->wb_list))
+		nfs_list_add_request(hdr->req, &desc->pg_list);
 	nfs_pageio_reset_read_mds(desc);
 	desc->pg_recoalesce = 1;
 	nfs_readdata_release(data);
@@ -1362,20 +1375,21 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata,
 		       const struct rpc_call_ops *call_ops,
 		       struct pnfs_layout_segment *lseg)
 {
-	struct inode *inode = rdata->inode;
+	struct nfs_pgio_header *hdr = rdata->header;
+	struct inode *inode = hdr->inode;
 	struct nfs_server *nfss = NFS_SERVER(inode);
 	enum pnfs_try_status trypnfs;
 
-	rdata->mds_ops = call_ops;
-	rdata->lseg = get_lseg(lseg);
+	hdr->mds_ops = call_ops;
+	hdr->lseg = get_lseg(lseg);
 
 	dprintk("%s: Reading ino:%lu %u@%llu\n",
 		__func__, inode->i_ino, rdata->args.count, rdata->args.offset);
 
 	trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata);
 	if (trypnfs == PNFS_NOT_ATTEMPTED) {
-		put_lseg(rdata->lseg);
-		rdata->lseg = NULL;
+		put_lseg(hdr->lseg);
+		hdr->lseg = NULL;
 	} else {
 		nfs_inc_stats(inode, NFSIOS_PNFS_READ);
 	}
@@ -1450,30 +1464,32 @@ EXPORT_SYMBOL_GPL(pnfs_set_lo_fail);
 void
 pnfs_set_layoutcommit(struct nfs_write_data *wdata)
 {
-	struct nfs_inode *nfsi = NFS_I(wdata->inode);
+	struct nfs_pgio_header *hdr = wdata->header;
+	struct inode *inode = hdr->inode;
+	struct nfs_inode *nfsi = NFS_I(inode);
 	loff_t end_pos = wdata->mds_offset + wdata->res.count;
 	bool mark_as_dirty = false;
 
-	spin_lock(&nfsi->vfs_inode.i_lock);
+	spin_lock(&inode->i_lock);
 	if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
 		mark_as_dirty = true;
 		dprintk("%s: Set layoutcommit for inode %lu ",
-			__func__, wdata->inode->i_ino);
+			__func__, inode->i_ino);
 	}
-	if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &wdata->lseg->pls_flags)) {
+	if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &hdr->lseg->pls_flags)) {
 		/* references matched in nfs4_layoutcommit_release */
-		get_lseg(wdata->lseg);
+		get_lseg(hdr->lseg);
 	}
 	if (end_pos > nfsi->layout->plh_lwb)
 		nfsi->layout->plh_lwb = end_pos;
-	spin_unlock(&nfsi->vfs_inode.i_lock);
+	spin_unlock(&inode->i_lock);
 	dprintk("%s: lseg %p end_pos %llu\n",
-		__func__, wdata->lseg, nfsi->layout->plh_lwb);
+		__func__, hdr->lseg, nfsi->layout->plh_lwb);
 
 	/* if pnfs_layoutcommit_inode() runs between inode locks, the next one
 	 * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */
 	if (mark_as_dirty)
-		mark_inode_dirty_sync(wdata->inode);
+		mark_inode_dirty_sync(inode);
 }
 EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit);
 
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index bf80503200f5..22ee70586875 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -641,12 +641,14 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
 
 static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data)
 {
+	struct inode *inode = data->header->inode;
+
 	if (nfs_async_handle_expired_key(task))
 		return -EAGAIN;
 
-	nfs_invalidate_atime(data->inode);
+	nfs_invalidate_atime(inode);
 	if (task->tk_status >= 0) {
-		nfs_refresh_inode(data->inode, data->res.fattr);
+		nfs_refresh_inode(inode, data->res.fattr);
 		/* Emulate the eof flag, which isn't normally needed in NFSv2
 		 * as it is guaranteed to always return the file attributes
 		 */
@@ -668,11 +670,13 @@ static void nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_dat
 
 static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
 {
+	struct inode *inode = data->header->inode;
+
 	if (nfs_async_handle_expired_key(task))
 		return -EAGAIN;
 
 	if (task->tk_status >= 0)
-		nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr);
+		nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
 	return 0;
 }
 
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 4ddba6706347..d6d46823d9e4 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -35,19 +35,24 @@ static const struct rpc_call_ops nfs_read_full_ops;
 
 static struct kmem_cache *nfs_rdata_cachep;
 
-struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
+struct nfs_read_header *nfs_readhdr_alloc(unsigned int pagecount)
 {
-	struct nfs_read_data *p;
+	struct nfs_read_header *p;
 
 	p = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
 	if (p) {
-		INIT_LIST_HEAD(&p->pages);
-		p->npages = pagecount;
-		if (pagecount <= ARRAY_SIZE(p->page_array))
-			p->pagevec = p->page_array;
+		struct nfs_pgio_header *hdr = &p->header;
+		struct nfs_read_data *data = &p->rpc_data;
+
+		INIT_LIST_HEAD(&hdr->pages);
+		INIT_LIST_HEAD(&data->list);
+		data->npages = pagecount;
+		data->header = hdr;
+		if (pagecount <= ARRAY_SIZE(data->page_array))
+			data->pagevec = data->page_array;
 		else {
-			p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL);
-			if (!p->pagevec) {
+			data->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL);
+			if (!data->pagevec) {
 				kmem_cache_free(nfs_rdata_cachep, p);
 				p = NULL;
 			}
@@ -56,17 +61,19 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
 	return p;
 }
 
-void nfs_readdata_free(struct nfs_read_data *p)
+void nfs_readhdr_free(struct nfs_pgio_header *hdr)
 {
-	if (p && (p->pagevec != &p->page_array[0]))
-		kfree(p->pagevec);
-	kmem_cache_free(nfs_rdata_cachep, p);
+	struct nfs_read_header *rhdr = container_of(hdr, struct nfs_read_header, header);
+
+	kmem_cache_free(nfs_rdata_cachep, rhdr);
 }
 
 void nfs_readdata_release(struct nfs_read_data *rdata)
 {
 	put_nfs_open_context(rdata->args.context);
-	nfs_readdata_free(rdata);
+	if (rdata->pagevec != rdata->page_array)
+		kfree(rdata->pagevec);
+	nfs_readhdr_free(rdata->header);
 }
 
 static
@@ -173,13 +180,13 @@ int nfs_initiate_read(struct rpc_clnt *clnt,
 		      struct nfs_read_data *data,
 		      const struct rpc_call_ops *call_ops)
 {
-	struct inode *inode = data->inode;
+	struct inode *inode = data->header->inode;
 	int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
 	struct rpc_task *task;
 	struct rpc_message msg = {
 		.rpc_argp = &data->args,
 		.rpc_resp = &data->res,
-		.rpc_cred = data->cred,
+		.rpc_cred = data->header->cred,
 	};
 	struct rpc_task_setup task_setup_data = {
 		.task = &data->task,
@@ -216,11 +223,11 @@ EXPORT_SYMBOL_GPL(nfs_initiate_read);
 static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
 		unsigned int count, unsigned int offset)
 {
-	struct inode *inode = req->wb_context->dentry->d_inode;
+	struct inode *inode = data->header->inode;
 
-	data->req	  = req;
-	data->inode	  = inode;
-	data->cred	  = req->wb_context->cred;
+	data->header->req	  = req;
+	data->header->inode	  = inode;
+	data->header->cred	  = req->wb_context->cred;
 
 	data->args.fh     = NFS_FH(inode);
 	data->args.offset = req_offset(req) + offset;
@@ -239,7 +246,7 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
 static int nfs_do_read(struct nfs_read_data *data,
 		const struct rpc_call_ops *call_ops)
 {
-	struct inode *inode = data->args.context->dentry->d_inode;
+	struct inode *inode = data->header->inode;
 
 	return nfs_initiate_read(NFS_CLIENT(inode), data, call_ops);
 }
@@ -293,6 +300,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head
 {
 	struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
 	struct page *page = req->wb_page;
+	struct nfs_read_header *rhdr;
 	struct nfs_read_data *data;
 	size_t rsize = desc->pg_bsize, nbytes;
 	unsigned int offset;
@@ -306,9 +314,10 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head
 	do {
 		size_t len = min(nbytes,rsize);
 
-		data = nfs_readdata_alloc(1);
-		if (!data)
+		rhdr = nfs_readhdr_alloc(1);
+		if (!rhdr)
 			goto out_bad;
+		data = &rhdr->rpc_data;
 		data->pagevec[0] = page;
 		nfs_read_rpcsetup(req, data, len, offset);
 		list_add(&data->list, res);
@@ -333,26 +342,28 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *
 {
 	struct nfs_page		*req;
 	struct page		**pages;
+	struct nfs_read_header	*rhdr;
 	struct nfs_read_data	*data;
 	struct list_head *head = &desc->pg_list;
 	int ret = 0;
 
-	data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base,
-						     desc->pg_count));
-	if (!data) {
+	rhdr = nfs_readhdr_alloc(nfs_page_array_len(desc->pg_base,
+						    desc->pg_count));
+	if (!rhdr) {
 		nfs_async_read_error(head);
 		ret = -ENOMEM;
 		goto out;
 	}
 
+	data = &rhdr->rpc_data;
 	pages = data->pagevec;
 	while (!list_empty(head)) {
 		req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
-		nfs_list_add_request(req, &data->pages);
+		nfs_list_add_request(req, &rhdr->header.pages);
 		*pages++ = req->wb_page;
 	}
-	req = nfs_list_entry(data->pages.next);
+	req = nfs_list_entry(rhdr->header.pages.next);
 
 	nfs_read_rpcsetup(req, data, desc->pg_count, 0);
 	list_add(&data->list, res);
@@ -390,20 +401,21 @@ static const struct nfs_pageio_ops nfs_pageio_read_ops = {
  */
 int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
 {
+	struct inode *inode = data->header->inode;
 	int status;
 
 	dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid,
 			task->tk_status);
 
-	status = NFS_PROTO(data->inode)->read_done(task, data);
+	status = NFS_PROTO(inode)->read_done(task, data);
 	if (status != 0)
 		return status;
 
-	nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, data->res.count);
+	nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, data->res.count);
 
 	if (task->tk_status == -ESTALE) {
-		set_bit(NFS_INO_STALE, &NFS_I(data->inode)->flags);
-		nfs_mark_for_revalidate(data->inode);
+		set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
+		nfs_mark_for_revalidate(inode);
 	}
 	return 0;
 }
@@ -417,7 +429,7 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data
 		return;
 
 	/* This is a short read! */
-	nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
+	nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD);
 	/* Has the server at least made some progress? */
 	if (resp->count == 0)
 		return;
@@ -449,7 +461,7 @@ static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata)
 static void nfs_readpage_release_partial(void *calldata)
 {
 	struct nfs_read_data *data = calldata;
-	struct nfs_page *req = data->req;
+	struct nfs_page *req = data->header->req;
 	struct page *page = req->wb_page;
 	int status = data->task.tk_status;
 
@@ -461,13 +473,13 @@ static void nfs_readpage_release_partial(void *calldata)
 			SetPageUptodate(page);
 		nfs_readpage_release(req);
 	}
-	nfs_readdata_release(calldata);
+	nfs_readdata_release(data);
 }
 
 void nfs_read_prepare(struct rpc_task *task, void *calldata)
 {
 	struct nfs_read_data *data = calldata;
-	NFS_PROTO(data->inode)->read_rpc_prepare(task, data);
+	NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data);
 }
 
 static const struct rpc_call_ops nfs_read_partial_ops = {
@@ -524,9 +536,10 @@ static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
 static void nfs_readpage_release_full(void *calldata)
 {
 	struct nfs_read_data *data = calldata;
+	struct nfs_pgio_header *hdr = data->header;
 
-	while (!list_empty(&data->pages)) {
-		struct nfs_page *req = nfs_list_entry(data->pages.next);
+	while (!list_empty(&hdr->pages)) {
+		struct nfs_page *req = nfs_list_entry(hdr->pages.next);
 
 		nfs_list_remove_request(req);
 		nfs_readpage_release(req);
@@ -685,7 +698,7 @@ out:
 int __init nfs_init_readpagecache(void)
 {
 	nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
-					     sizeof(struct nfs_read_data),
+					     sizeof(struct nfs_read_header),
 					     0, SLAB_HWCACHE_ALIGN,
 					     NULL);
 	if (nfs_rdata_cachep == NULL)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 76735dd8c9a7..dbb5c0a613b8 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -69,19 +69,24 @@ void nfs_commit_free(struct nfs_commit_data *p)
 }
 EXPORT_SYMBOL_GPL(nfs_commit_free);
 
-struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
+struct nfs_write_header *nfs_writehdr_alloc(unsigned int pagecount)
 {
-	struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS);
+	struct nfs_write_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS);
 
 	if (p) {
+		struct nfs_pgio_header *hdr = &p->header;
+		struct nfs_write_data *data = &p->rpc_data;
+
 		memset(p, 0, sizeof(*p));
-		INIT_LIST_HEAD(&p->pages);
-		p->npages = pagecount;
-		if (pagecount <= ARRAY_SIZE(p->page_array))
-			p->pagevec = p->page_array;
+		INIT_LIST_HEAD(&hdr->pages);
+		INIT_LIST_HEAD(&data->list);
+		data->npages = pagecount;
+		data->header = hdr;
+		if (pagecount <= ARRAY_SIZE(data->page_array))
+			data->pagevec = data->page_array;
 		else {
-			p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS);
-			if (!p->pagevec) {
+			data->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS);
+			if (!data->pagevec) {
 				mempool_free(p, nfs_wdata_mempool);
 				p = NULL;
 			}
@@ -90,17 +95,18 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
 	return p;
 }
 
-void nfs_writedata_free(struct nfs_write_data *p)
+void nfs_writehdr_free(struct nfs_pgio_header *hdr)
 {
-	if (p && (p->pagevec != &p->page_array[0]))
-		kfree(p->pagevec);
-	mempool_free(p, nfs_wdata_mempool);
+	struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header);
+	mempool_free(whdr, nfs_wdata_mempool);
 }
 
 void nfs_writedata_release(struct nfs_write_data *wdata)
 {
 	put_nfs_open_context(wdata->args.context);
-	nfs_writedata_free(wdata);
+	if (wdata->pagevec != wdata->page_array)
+		kfree(wdata->pagevec);
+	nfs_writehdr_free(wdata->header);
 }
 
 static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
@@ -507,9 +513,8 @@ static inline
 int nfs_write_need_commit(struct nfs_write_data *data)
 {
 	if (data->verf.committed == NFS_DATA_SYNC)
-		return data->lseg == NULL;
-	else
-		return data->verf.committed != NFS_FILE_SYNC;
+		return data->header->lseg == NULL;
+	return data->verf.committed != NFS_FILE_SYNC;
 }
 
 static inline
@@ -517,7 +522,7 @@ int nfs_reschedule_unstable_write(struct nfs_page *req,
 				  struct nfs_write_data *data)
 {
 	if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) {
-		nfs_mark_request_commit(req, data->lseg);
+		nfs_mark_request_commit(req, data->header->lseg);
 		return 1;
 	}
 	if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) {
@@ -841,13 +846,13 @@ int nfs_initiate_write(struct rpc_clnt *clnt,
 		       const struct rpc_call_ops *call_ops,
 		       int how)
 {
-	struct inode *inode = data->inode;
+	struct inode *inode = data->header->inode;
 	int priority = flush_task_priority(how);
 	struct rpc_task *task;
 	struct rpc_message msg = {
 		.rpc_argp = &data->args,
 		.rpc_resp = &data->res,
-		.rpc_cred = data->cred,
+		.rpc_cred = data->header->cred,
 	};
 	struct rpc_task_setup task_setup_data = {
 		.rpc_client = clnt,
@@ -896,14 +901,15 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
 		unsigned int count, unsigned int offset,
 		int how)
 {
+	struct nfs_pgio_header *hdr = data->header;
 	struct inode *inode = req->wb_context->dentry->d_inode;
 
 	/* Set up the RPC argument and reply structs
 	 * NB: take care not to mess about with data->commit et al. */
 
-	data->req = req;
-	data->inode = inode = req->wb_context->dentry->d_inode;
-	data->cred = req->wb_context->cred;
+	hdr->req = req;
+	hdr->inode = inode = req->wb_context->dentry->d_inode;
+	hdr->cred = req->wb_context->cred;
 
 	data->args.fh     = NFS_FH(inode);
 	data->args.offset = req_offset(req) + offset;
@@ -935,7 +941,7 @@ static int nfs_do_write(struct nfs_write_data *data,
 		const struct rpc_call_ops *call_ops,
 		int how)
 {
-	struct inode *inode = data->args.context->dentry->d_inode;
+	struct inode *inode = data->header->inode;
 
 	return nfs_initiate_write(NFS_CLIENT(inode), data, call_ops, how);
 }
@@ -981,6 +987,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head
 {
 	struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
 	struct page *page = req->wb_page;
+	struct nfs_write_header *whdr;
 	struct nfs_write_data *data;
 	size_t wsize = desc->pg_bsize, nbytes;
 	unsigned int offset;
@@ -1000,9 +1007,10 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head
 	do {
 		size_t len = min(nbytes, wsize);
 
-		data = nfs_writedata_alloc(1);
-		if (!data)
+		whdr = nfs_writehdr_alloc(1);
+		if (!whdr)
 			goto out_bad;
+		data = &whdr->rpc_data;
 		data->pagevec[0] = page;
 		nfs_write_rpcsetup(req, data, len, offset, desc->pg_ioflags);
 		list_add(&data->list, res);
@@ -1036,13 +1044,14 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *r
 {
 	struct nfs_page		*req;
 	struct page		**pages;
+	struct nfs_write_header	*whdr;
 	struct nfs_write_data	*data;
 	struct list_head *head = &desc->pg_list;
 	int ret = 0;
 
-	data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base,
-						      desc->pg_count));
-	if (!data) {
+	whdr = nfs_writehdr_alloc(nfs_page_array_len(desc->pg_base,
+						     desc->pg_count));
+	if (!whdr) {
 		while (!list_empty(head)) {
 			req = nfs_list_entry(head->next);
 			nfs_list_remove_request(req);
@@ -1051,14 +1060,15 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *r
 		ret = -ENOMEM;
 		goto out;
 	}
+	data = &whdr->rpc_data;
 	pages = data->pagevec;
 	while (!list_empty(head)) {
 		req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
-		nfs_list_add_request(req, &data->pages);
+		nfs_list_add_request(req, &whdr->header.pages);
 		*pages++ = req->wb_page;
 	}
-	req = nfs_list_entry(data->pages.next);
+	req = nfs_list_entry(whdr->header.pages.next);
 
 	if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
 	    (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit))
@@ -1126,10 +1136,11 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
 
 	dprintk("NFS: %5u write(%s/%lld %d@%lld)",
 		task->tk_pid,
-		data->req->wb_context->dentry->d_inode->i_sb->s_id,
+		data->header->inode->i_sb->s_id,
 		(long long)
-		  NFS_FILEID(data->req->wb_context->dentry->d_inode),
-		data->req->wb_bytes, (long long)req_offset(data->req));
+		  NFS_FILEID(data->header->inode),
+		data->header->req->wb_bytes,
+		(long long)req_offset(data->header->req));
 
 	nfs_writeback_done(task, data);
 }
@@ -1137,7 +1148,7 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
 static void nfs_writeback_release_partial(void *calldata)
 {
 	struct nfs_write_data	*data = calldata;
-	struct nfs_page		*req = data->req;
+	struct nfs_page		*req = data->header->req;
 	struct page		*page = req->wb_page;
 	int status = data->task.tk_status;
 
@@ -1169,13 +1180,13 @@ static void nfs_writeback_release_partial(void *calldata)
 out:
 	if (atomic_dec_and_test(&req->wb_complete))
 		nfs_writepage_release(req, data);
-	nfs_writedata_release(calldata);
+	nfs_writedata_release(data);
 }
 
 void nfs_write_prepare(struct rpc_task *task, void *calldata)
 {
 	struct nfs_write_data *data = calldata;
-	NFS_PROTO(data->inode)->write_rpc_prepare(task, data);
+	NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data);
 }
 
 void nfs_commit_prepare(struct rpc_task *task, void *calldata)
@@ -1208,11 +1219,12 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
 static void nfs_writeback_release_full(void *calldata)
 {
 	struct nfs_write_data	*data = calldata;
+	struct nfs_pgio_header *hdr = data->header;
 	int status = data->task.tk_status;
 
 	/* Update attributes as result of writeback. */
-	while (!list_empty(&data->pages)) {
-		struct nfs_page *req = nfs_list_entry(data->pages.next);
+	while (!list_empty(&hdr->pages)) {
+		struct nfs_page *req = nfs_list_entry(hdr->pages.next);
 		struct page *page = req->wb_page;
 
 		nfs_list_remove_request(req);
@@ -1233,7 +1245,7 @@ static void nfs_writeback_release_full(void *calldata)
 
 		if (nfs_write_need_commit(data)) {
 			memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
-			nfs_mark_request_commit(req, data->lseg);
+			nfs_mark_request_commit(req, hdr->lseg);
 			dprintk(" marked for commit\n");
 			goto next;
 		}
@@ -1244,7 +1256,7 @@ remove_request:
 		nfs_unlock_request(req);
 		nfs_end_page_writeback(page);
 	}
-	nfs_writedata_release(calldata);
+	nfs_writedata_release(data);
 }
 
 static const struct rpc_call_ops nfs_write_full_ops = {
@@ -1261,6 +1273,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
 {
 	struct nfs_writeargs	*argp = &data->args;
 	struct nfs_writeres	*resp = &data->res;
+	struct inode		*inode = data->header->inode;
 	int status;
 
 	dprintk("NFS: %5u nfs_writeback_done (status %d)\n",
@@ -1273,10 +1286,10 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
 	 * another writer had changed the file, but some applications
 	 * depend on tighter cache coherency when writing.
 	 */
-	status = NFS_PROTO(data->inode)->write_done(task, data);
+	status = NFS_PROTO(inode)->write_done(task, data);
 	if (status != 0)
 		return;
-	nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
+	nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
 
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 	if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
@@ -1294,7 +1307,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
 		if (time_before(complain, jiffies)) {
 			dprintk("NFS:       faulty NFS server %s:"
 				" (committed = %d) != (stable = %d)\n",
-				NFS_SERVER(data->inode)->nfs_client->cl_hostname,
+				NFS_SERVER(inode)->nfs_client->cl_hostname,
 				resp->verf->committed, argp->stable);
 			complain = jiffies + 300 * HZ;
 		}
@@ -1304,7 +1317,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
 	if (task->tk_status >= 0 && resp->count < argp->count) {
 		static unsigned long    complain;
 
-		nfs_inc_stats(data->inode, NFSIOS_SHORTWRITE);
+		nfs_inc_stats(inode, NFSIOS_SHORTWRITE);
 
 		/* Has the server at least made some progress? */
 		if (resp->count != 0) {
@@ -1333,7 +1346,6 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
 		/* Can't do anything about it except throw an error. */
 		task->tk_status = -EIO;
 	}
-	return;
 }
 
 
@@ -1745,7 +1757,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
 int __init nfs_init_writepagecache(void)
 {
 	nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
-					     sizeof(struct nfs_write_data),
+					     sizeof(struct nfs_write_header),
 					     0, SLAB_HWCACHE_ALIGN,
 					     NULL);
 	if (nfs_wdata_cachep == NULL)
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index d5d68f322bf0..8d3a2b804201 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -568,12 +568,6 @@ nfs_have_writebacks(struct inode *inode)
 	return NFS_I(inode)->npages != 0;
 }
 
-/*
- * Allocate nfs_write_data structures
- */
-extern struct nfs_write_data *nfs_writedata_alloc(unsigned int npages);
-extern void nfs_writedata_free(struct nfs_write_data *);
-
 /*
  * linux/fs/nfs/read.c
  */
@@ -584,12 +578,6 @@ extern int  nfs_readpage_result(struct rpc_task *, struct nfs_read_data *);
 extern int  nfs_readpage_async(struct nfs_open_context *, struct inode *,
 			       struct page *);
 
-/*
- * Allocate nfs_read_data structures
- */
-extern struct nfs_read_data *nfs_readdata_alloc(unsigned int npages);
-extern void nfs_readdata_free(struct nfs_read_data *);
-
 /*
  * linux/fs/nfs3proc.c
  */
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 8fb036a0d489..fee324175391 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1168,52 +1168,58 @@ struct nfs_page;
 #define NFS_PAGEVEC_SIZE	(8U)
 
 struct nfs_read_data {
+	struct nfs_pgio_header	*header;
+	struct list_head	list;
 	struct rpc_task		task;
-	struct inode		*inode;
-	struct rpc_cred		*cred;
 	struct nfs_fattr	fattr;	/* fattr storage */
-	struct list_head	pages;	/* Coalesced read requests */
-	struct list_head	list;	/* lists of struct nfs_read_data */
-	struct nfs_page		*req;	/* multi ops per nfs_page */
 	struct page		**pagevec;
 	unsigned int		npages;	/* Max length of pagevec */
 	struct nfs_readargs args;
 	struct nfs_readres  res;
 	unsigned long		timestamp;	/* For lease renewal */
-	struct pnfs_layout_segment *lseg;
-	struct nfs_client	*ds_clp;	/* pNFS data server */
-	const struct rpc_call_ops *mds_ops;
 	int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data);
 	__u64			mds_offset;
-	int			pnfs_error;
 	struct page		*page_array[NFS_PAGEVEC_SIZE];
+	struct nfs_client	*ds_clp;	/* pNFS data server */
+};
+
+struct nfs_pgio_header {
+	struct inode		*inode;
+	struct rpc_cred		*cred;
+	struct list_head	pages;
+	struct nfs_page		*req;
+	struct pnfs_layout_segment *lseg;
+	const struct rpc_call_ops *mds_ops;
+	int			pnfs_error;
+};
+
+struct nfs_read_header {
+	struct nfs_pgio_header	header;
+	struct nfs_read_data	rpc_data;
 };
 
 struct nfs_direct_req;
 
 struct nfs_write_data {
+	struct nfs_pgio_header	*header;
+	struct list_head	list;
 	struct rpc_task		task;
-	struct inode		*inode;
-	struct rpc_cred		*cred;
 	struct nfs_fattr	fattr;
 	struct nfs_writeverf	verf;
-	struct list_head	pages;		/* Coalesced requests we wish to flush */
-	struct list_head	list;		/* lists of struct nfs_write_data */
-	struct nfs_page		*req;		/* multi ops per nfs_page */
 	struct page		**pagevec;
 	unsigned int		npages;		/* Max length of pagevec */
 	struct nfs_writeargs	args;		/* argument struct */
 	struct nfs_writeres	res;		/* result struct */
-	struct pnfs_layout_segment *lseg;
-	struct nfs_client	*ds_clp;	/* pNFS data server */
-	const struct rpc_call_ops *mds_ops;
-	int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data);
-#ifdef CONFIG_NFS_V4
 	unsigned long		timestamp;	/* For lease renewal */
-#endif
+	int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data);
 	__u64			mds_offset;	/* Filelayout dense stripe */
-	int			pnfs_error;
 	struct page		*page_array[NFS_PAGEVEC_SIZE];
+	struct nfs_client	*ds_clp;	/* pNFS data server */
+};
+
+struct nfs_write_header {
+	struct nfs_pgio_header	header;
+	struct nfs_write_data	rpc_data;
 };
 
 struct nfs_commit_data {
-- 
cgit v1.3-7-g2ca7


From 30dd374f6fc1b202db3a1b57b61afff1326bad92 Mon Sep 17 00:00:00 2001
From: Fred Isaman <iisaman@netapp.com>
Date: Fri, 20 Apr 2012 14:47:45 -0400
Subject: NFS: create struct nfs_page_array

Both nfs_read_data and nfs_write_data devote several fields which
can be combined into a single shared struct.

Signed-off-by: Fred Isaman <iisaman@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/blocklayout/blocklayout.c | 11 ++++++-----
 fs/nfs/direct.c                  | 40 +++++++++++++++++++++++-----------------
 fs/nfs/internal.h                |  1 +
 fs/nfs/pagelist.c                | 13 +++++++++++++
 fs/nfs/read.c                    | 22 ++++++++--------------
 fs/nfs/write.c                   | 22 ++++++++--------------
 include/linux/nfs_xdr.h          | 14 ++++++++------
 7 files changed, 67 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 7a482517f4c6..7ae8a608956f 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -242,7 +242,7 @@ bl_read_pagelist(struct nfs_read_data *rdata)
 	int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT;
 
 	dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__,
-	       rdata->npages, f_offset, (unsigned int)rdata->args.count);
+	       rdata->pages.npages, f_offset, (unsigned int)rdata->args.count);
 
 	par = alloc_parallel(rdata);
 	if (!par)
@@ -252,7 +252,7 @@ bl_read_pagelist(struct nfs_read_data *rdata)
 
 	isect = (sector_t) (f_offset >> SECTOR_SHIFT);
 	/* Code assumes extents are page-aligned */
-	for (i = pg_index; i < rdata->npages; i++) {
+	for (i = pg_index; i < rdata->pages.npages; i++) {
 		if (!extent_length) {
 			/* We've used up the previous extent */
 			bl_put_extent(be);
@@ -285,7 +285,8 @@ bl_read_pagelist(struct nfs_read_data *rdata)
 			struct pnfs_block_extent *be_read;
 
 			be_read = (hole && cow_read) ? cow_read : be;
-			bio = bl_add_page_to_bio(bio, rdata->npages - i, READ,
+			bio = bl_add_page_to_bio(bio, rdata->pages.npages - i,
+						 READ,
 						 isect, pages[i], be_read,
 						 bl_end_io_read, par);
 			if (IS_ERR(bio)) {
@@ -654,7 +655,7 @@ next_page:
 
 	/* Middle pages */
 	pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT;
-	for (i = pg_index; i < wdata->npages; i++) {
+	for (i = pg_index; i < wdata->pages.npages; i++) {
 		if (!extent_length) {
 			/* We've used up the previous extent */
 			bl_put_extent(be);
@@ -688,7 +689,7 @@ next_page:
 				goto out;
 			}
 		}
-		bio = bl_add_page_to_bio(bio, wdata->npages - i, WRITE,
+		bio = bl_add_page_to_bio(bio, wdata->pages.npages - i, WRITE,
 					 isect, pages[i], be,
 					 bl_end_io_write, par);
 		if (IS_ERR(bio)) {
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 56176af1436f..0faba4cb531d 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -252,11 +252,11 @@ static void nfs_direct_read_release(void *calldata)
 	} else {
 		dreq->count += data->res.count;
 		spin_unlock(&dreq->lock);
-		nfs_direct_dirty_pages(data->pagevec,
+		nfs_direct_dirty_pages(data->pages.pagevec,
 				data->args.pgbase,
 				data->res.count);
 	}
-	nfs_direct_release_pages(data->pagevec, data->npages);
+	nfs_direct_release_pages(data->pages.pagevec, data->pages.npages);
 
 	if (put_dreq(dreq))
 		nfs_direct_complete(dreq);
@@ -273,8 +273,8 @@ static void nfs_direct_readhdr_release(struct nfs_read_header *rhdr)
 {
 	struct nfs_read_data *data = &rhdr->rpc_data;
 
-	if (data->pagevec != data->page_array)
-		kfree(data->pagevec);
+	if (data->pages.pagevec != data->pages.page_array)
+		kfree(data->pages.pagevec);
 	nfs_readhdr_free(&rhdr->header);
 }
 
@@ -312,6 +312,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
 	do {
 		struct nfs_read_header *rhdr;
 		struct nfs_read_data *data;
+		struct nfs_page_array *pages;
 		size_t bytes;
 
 		pgbase = user_addr & ~PAGE_MASK;
@@ -322,24 +323,25 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
 		if (unlikely(!rhdr))
 			break;
 		data = &rhdr->rpc_data;
+		pages = &data->pages;
 
 		down_read(&current->mm->mmap_sem);
 		result = get_user_pages(current, current->mm, user_addr,
-					data->npages, 1, 0, data->pagevec, NULL);
+					pages->npages, 1, 0, pages->pagevec, NULL);
 		up_read(&current->mm->mmap_sem);
 		if (result < 0) {
 			nfs_direct_readhdr_release(rhdr);
 			break;
 		}
-		if ((unsigned)result < data->npages) {
+		if ((unsigned)result < pages->npages) {
 			bytes = result * PAGE_SIZE;
 			if (bytes <= pgbase) {
-				nfs_direct_release_pages(data->pagevec, result);
+				nfs_direct_release_pages(pages->pagevec, result);
 				nfs_direct_readhdr_release(rhdr);
 				break;
 			}
 			bytes -= pgbase;
-			data->npages = result;
+			pages->npages = result;
 		}
 
 		get_dreq(dreq);
@@ -352,7 +354,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
 		data->args.lock_context = dreq->l_ctx;
 		data->args.offset = pos;
 		data->args.pgbase = pgbase;
-		data->args.pages = data->pagevec;
+		data->args.pages = pages->pagevec;
 		data->args.count = bytes;
 		data->res.fattr = &data->fattr;
 		data->res.eof = 0;
@@ -462,8 +464,8 @@ static void nfs_direct_writehdr_release(struct nfs_write_header *whdr)
 {
 	struct nfs_write_data *data = &whdr->rpc_data;
 
-	if (data->pagevec != data->page_array)
-		kfree(data->pagevec);
+	if (data->pages.pagevec != data->pages.page_array)
+		kfree(data->pages.pagevec);
 	nfs_writehdr_free(&whdr->header);
 }
 
@@ -472,8 +474,10 @@ static void nfs_direct_free_writedata(struct nfs_direct_req *dreq)
 	while (!list_empty(&dreq->rewrite_list)) {
 		struct nfs_pgio_header *hdr = list_entry(dreq->rewrite_list.next, struct nfs_pgio_header, pages);
 		struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header);
+		struct nfs_page_array *p = &whdr->rpc_data.pages;
+
 		list_del(&hdr->pages);
-		nfs_direct_release_pages(whdr->rpc_data.pagevec, whdr->rpc_data.npages);
+		nfs_direct_release_pages(p->pagevec, p->npages);
 		nfs_direct_writehdr_release(whdr);
 	}
 }
@@ -751,6 +755,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
 	do {
 		struct nfs_write_header *whdr;
 		struct nfs_write_data *data;
+		struct nfs_page_array *pages;
 		size_t bytes;
 
 		pgbase = user_addr & ~PAGE_MASK;
@@ -762,24 +767,25 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
 			break;
 
 		data = &whdr->rpc_data;
+		pages = &data->pages;
 
 		down_read(&current->mm->mmap_sem);
 		result = get_user_pages(current, current->mm, user_addr,
-					data->npages, 0, 0, data->pagevec, NULL);
+					pages->npages, 0, 0, pages->pagevec, NULL);
 		up_read(&current->mm->mmap_sem);
 		if (result < 0) {
 			nfs_direct_writehdr_release(whdr);
 			break;
 		}
-		if ((unsigned)result < data->npages) {
+		if ((unsigned)result < pages->npages) {
 			bytes = result * PAGE_SIZE;
 			if (bytes <= pgbase) {
-				nfs_direct_release_pages(data->pagevec, result);
+				nfs_direct_release_pages(pages->pagevec, result);
 				nfs_direct_writehdr_release(whdr);
 				break;
 			}
 			bytes -= pgbase;
-			data->npages = result;
+			pages->npages = result;
 		}
 
 		get_dreq(dreq);
@@ -794,7 +800,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
 		data->args.lock_context = dreq->l_ctx;
 		data->args.offset = pos;
 		data->args.pgbase = pgbase;
-		data->args.pages = data->pagevec;
+		data->args.pages = pages->pagevec;
 		data->args.count = bytes;
 		data->args.stable = sync;
 		data->res.fattr = &data->fattr;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 7dc9be1a6e1a..5c3d77fda560 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -210,6 +210,7 @@ extern void nfs_destroy_writepagecache(void);
 
 extern int __init nfs_init_directcache(void);
 extern void nfs_destroy_directcache(void);
+extern bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount);
 
 /* nfs2xdr.c */
 extern int nfs_stat_to_errno(enum nfs_stat);
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index d21fceaa9f62..d349bd4c48db 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -26,6 +26,19 @@
 
 static struct kmem_cache *nfs_page_cachep;
 
+bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount)
+{
+	p->npages = pagecount;
+	if (pagecount <= ARRAY_SIZE(p->page_array))
+		p->pagevec = p->page_array;
+	else {
+		p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL);
+		if (!p->pagevec)
+			p->npages = 0;
+	}
+	return p->pagevec != NULL;
+}
+
 static inline struct nfs_page *
 nfs_page_alloc(void)
 {
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index d6d46823d9e4..f6ab30b5a462 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -46,16 +46,10 @@ struct nfs_read_header *nfs_readhdr_alloc(unsigned int pagecount)
 
 		INIT_LIST_HEAD(&hdr->pages);
 		INIT_LIST_HEAD(&data->list);
-		data->npages = pagecount;
 		data->header = hdr;
-		if (pagecount <= ARRAY_SIZE(data->page_array))
-			data->pagevec = data->page_array;
-		else {
-			data->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL);
-			if (!data->pagevec) {
-				kmem_cache_free(nfs_rdata_cachep, p);
-				p = NULL;
-			}
+		if (!nfs_pgarray_set(&data->pages, pagecount)) {
+			kmem_cache_free(nfs_rdata_cachep, p);
+			p = NULL;
 		}
 	}
 	return p;
@@ -71,8 +65,8 @@ void nfs_readhdr_free(struct nfs_pgio_header *hdr)
 void nfs_readdata_release(struct nfs_read_data *rdata)
 {
 	put_nfs_open_context(rdata->args.context);
-	if (rdata->pagevec != rdata->page_array)
-		kfree(rdata->pagevec);
+	if (rdata->pages.pagevec != rdata->pages.page_array)
+		kfree(rdata->pages.pagevec);
 	nfs_readhdr_free(rdata->header);
 }
 
@@ -232,7 +226,7 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
 	data->args.fh     = NFS_FH(inode);
 	data->args.offset = req_offset(req) + offset;
 	data->args.pgbase = req->wb_pgbase + offset;
-	data->args.pages  = data->pagevec;
+	data->args.pages  = data->pages.pagevec;
 	data->args.count  = count;
 	data->args.context = get_nfs_open_context(req->wb_context);
 	data->args.lock_context = req->wb_lock_context;
@@ -318,7 +312,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head
 		if (!rhdr)
 			goto out_bad;
 		data = &rhdr->rpc_data;
-		data->pagevec[0] = page;
+		data->pages.pagevec[0] = page;
 		nfs_read_rpcsetup(req, data, len, offset);
 		list_add(&data->list, res);
 		requests++;
@@ -356,7 +350,7 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *
 	}
 
 	data = &rhdr->rpc_data;
-	pages = data->pagevec;
+	pages = data->pages.pagevec;
 	while (!list_empty(head)) {
 		req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index dbb5c0a613b8..2efae049b4f0 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -80,16 +80,10 @@ struct nfs_write_header *nfs_writehdr_alloc(unsigned int pagecount)
 		memset(p, 0, sizeof(*p));
 		INIT_LIST_HEAD(&hdr->pages);
 		INIT_LIST_HEAD(&data->list);
-		data->npages = pagecount;
 		data->header = hdr;
-		if (pagecount <= ARRAY_SIZE(data->page_array))
-			data->pagevec = data->page_array;
-		else {
-			data->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS);
-			if (!data->pagevec) {
-				mempool_free(p, nfs_wdata_mempool);
-				p = NULL;
-			}
+		if (!nfs_pgarray_set(&data->pages, pagecount)) {
+			mempool_free(p, nfs_wdata_mempool);
+			p = NULL;
 		}
 	}
 	return p;
@@ -104,8 +98,8 @@ void nfs_writehdr_free(struct nfs_pgio_header *hdr)
 void nfs_writedata_release(struct nfs_write_data *wdata)
 {
 	put_nfs_open_context(wdata->args.context);
-	if (wdata->pagevec != wdata->page_array)
-		kfree(wdata->pagevec);
+	if (wdata->pages.pagevec != wdata->pages.page_array)
+		kfree(wdata->pages.pagevec);
 	nfs_writehdr_free(wdata->header);
 }
 
@@ -916,7 +910,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
 	/* pnfs_set_layoutcommit needs this */
 	data->mds_offset = data->args.offset;
 	data->args.pgbase = req->wb_pgbase + offset;
-	data->args.pages  = data->pagevec;
+	data->args.pages  = data->pages.pagevec;
 	data->args.count  = count;
 	data->args.context = get_nfs_open_context(req->wb_context);
 	data->args.lock_context = req->wb_lock_context;
@@ -1011,7 +1005,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head
 		if (!whdr)
 			goto out_bad;
 		data = &whdr->rpc_data;
-		data->pagevec[0] = page;
+		data->pages.pagevec[0] = page;
 		nfs_write_rpcsetup(req, data, len, offset, desc->pg_ioflags);
 		list_add(&data->list, res);
 		requests++;
@@ -1061,7 +1055,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *r
 		goto out;
 	}
 	data = &whdr->rpc_data;
-	pages = data->pagevec;
+	pages = data->pages.pagevec;
 	while (!list_empty(head)) {
 		req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index fee324175391..e34beaf86e9c 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1167,19 +1167,23 @@ struct nfs_page;
 
 #define NFS_PAGEVEC_SIZE	(8U)
 
+struct nfs_page_array {
+	struct page		**pagevec;
+	unsigned int		npages;		/* Max length of pagevec */
+	struct page		*page_array[NFS_PAGEVEC_SIZE];
+};
+
 struct nfs_read_data {
 	struct nfs_pgio_header	*header;
 	struct list_head	list;
 	struct rpc_task		task;
 	struct nfs_fattr	fattr;	/* fattr storage */
-	struct page		**pagevec;
-	unsigned int		npages;	/* Max length of pagevec */
 	struct nfs_readargs args;
 	struct nfs_readres  res;
 	unsigned long		timestamp;	/* For lease renewal */
 	int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data);
 	__u64			mds_offset;
-	struct page		*page_array[NFS_PAGEVEC_SIZE];
+	struct nfs_page_array	pages;
 	struct nfs_client	*ds_clp;	/* pNFS data server */
 };
 
@@ -1206,14 +1210,12 @@ struct nfs_write_data {
 	struct rpc_task		task;
 	struct nfs_fattr	fattr;
 	struct nfs_writeverf	verf;
-	struct page		**pagevec;
-	unsigned int		npages;		/* Max length of pagevec */
 	struct nfs_writeargs	args;		/* argument struct */
 	struct nfs_writeres	res;		/* result struct */
 	unsigned long		timestamp;	/* For lease renewal */
 	int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data);
 	__u64			mds_offset;	/* Filelayout dense stripe */
-	struct page		*page_array[NFS_PAGEVEC_SIZE];
+	struct nfs_page_array	pages;
 	struct nfs_client	*ds_clp;	/* pNFS data server */
 };
 
-- 
cgit v1.3-7-g2ca7


From 4db6e0b74c0f6dfc2f9c0690e8df512e3b635983 Mon Sep 17 00:00:00 2001
From: Fred Isaman <iisaman@netapp.com>
Date: Fri, 20 Apr 2012 14:47:46 -0400
Subject: NFS: merge _full and _partial read rpc_ops

Decouple nfs_pgio_header and nfs_read_data, and have (possibly
multiple) nfs_read_datas each take a refcount on nfs_pgio_header.

For the moment keeps nfs_read_header as a way to preallocate a single
nfs_read_data with the nfs_pgio_header.  The code doesn't need this,
and would be prettier without, but given the amount of churn I am
already introducing I didn't want to play with tuning new mempools.

This also fixes bug in pnfs_ld_handle_read_error.  In the case of
desc->pg_bsize < PAGE_CACHE_SIZE, the pages list was empty, causing
replay attempt to do nothing.

Signed-off-by: Fred Isaman <iisaman@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c          |  10 +-
 fs/nfs/internal.h        |  15 ++-
 fs/nfs/nfs4filelayout.c  |   1 -
 fs/nfs/nfs4proc.c        |   2 -
 fs/nfs/pagelist.c        |  24 ++++
 fs/nfs/pnfs.c            |  55 +++++---
 fs/nfs/read.c            | 338 ++++++++++++++++++++++-------------------------
 include/linux/nfs_page.h |   1 -
 include/linux/nfs_xdr.h  |  16 +++
 9 files changed, 252 insertions(+), 210 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 0faba4cb531d..90b00ce42cbe 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -319,10 +319,16 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
 		bytes = min(rsize,count);
 
 		result = -ENOMEM;
-		rhdr = nfs_readhdr_alloc(nfs_page_array_len(pgbase, bytes));
+		rhdr = nfs_readhdr_alloc();
 		if (unlikely(!rhdr))
 			break;
-		data = &rhdr->rpc_data;
+		data = nfs_readdata_alloc(&rhdr->header, nfs_page_array_len(pgbase, bytes));
+		if (!data) {
+			nfs_readhdr_free(&rhdr->header);
+			break;
+		}
+		data->header = &rhdr->header;
+		atomic_inc(&data->header->refcnt);
 		pages = &data->pages;
 
 		down_read(&current->mm->mmap_sem);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 5c3d77fda560..33af5e51c0bb 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -200,6 +200,7 @@ struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry)
 extern struct svc_version nfs4_callback_version1;
 extern struct svc_version nfs4_callback_version4;
 
+struct nfs_pageio_descriptor;
 /* pagelist.c */
 extern int __init nfs_init_nfspagecache(void);
 extern void nfs_destroy_nfspagecache(void);
@@ -211,6 +212,10 @@ extern void nfs_destroy_writepagecache(void);
 extern int __init nfs_init_directcache(void);
 extern void nfs_destroy_directcache(void);
 extern bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount);
+extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
+			      struct nfs_pgio_header *hdr,
+			      void (*release)(struct nfs_pgio_header *hdr));
+void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos);
 
 /* nfs2xdr.c */
 extern int nfs_stat_to_errno(enum nfs_stat);
@@ -295,17 +300,19 @@ extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *,
 extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);
 #endif
 
-struct nfs_pageio_descriptor;
 /* read.c */
-extern struct nfs_read_header *nfs_readhdr_alloc(unsigned int npages);
+extern void nfs_async_read_error(struct list_head *head);
+extern struct nfs_read_header *nfs_readhdr_alloc(void);
 extern void nfs_readhdr_free(struct nfs_pgio_header *hdr);
+extern void nfs_read_completion(struct nfs_pgio_header *hdr);
+extern struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr,
+						unsigned int pagecount);
 extern int nfs_initiate_read(struct rpc_clnt *clnt,
 			     struct nfs_read_data *data,
 			     const struct rpc_call_ops *call_ops);
 extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
 extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
-		struct list_head *head);
-
+			      struct nfs_pgio_header *hdr);
 extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
 		struct inode *inode);
 extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index ad1d68013a5b..333e765f3ac2 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -227,7 +227,6 @@ static void filelayout_read_release(void *data)
 {
 	struct nfs_read_data *rdata = data;
 
-	put_lseg(rdata->header->lseg);
 	rdata->header->mds_ops->rpc_release(data);
 }
 
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 5375862075de..ce31ab22bc55 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3391,8 +3391,6 @@ void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data)
 	struct inode *inode = hdr->inode;
 
 	dprintk("%s Reset task for i/o through\n", __func__);
-	put_lseg(hdr->lseg);
-	hdr->lseg = NULL;
 	data->ds_clp = NULL;
 	/* offsets will differ in the dense stripe case */
 	data->args.offset = data->mds_offset;
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index d349bd4c48db..cd4c038135a7 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -39,6 +39,30 @@ bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount)
 	return p->pagevec != NULL;
 }
 
+void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
+		       struct nfs_pgio_header *hdr,
+		       void (*release)(struct nfs_pgio_header *hdr))
+{
+	hdr->req = nfs_list_entry(desc->pg_list.next);
+	hdr->inode = desc->pg_inode;
+	hdr->cred = hdr->req->wb_context->cred;
+	hdr->io_start = req_offset(hdr->req);
+	hdr->good_bytes = desc->pg_count;
+	hdr->release = release;
+}
+
+void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos)
+{
+	spin_lock(&hdr->lock);
+	if (pos < hdr->io_start + hdr->good_bytes) {
+		set_bit(NFS_IOHDR_ERROR, &hdr->flags);
+		clear_bit(NFS_IOHDR_EOF, &hdr->flags);
+		hdr->good_bytes = pos - hdr->io_start;
+		hdr->error = error;
+	}
+	spin_unlock(&hdr->lock);
+}
+
 static inline struct nfs_page *
 nfs_page_alloc(void)
 {
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index d705da427e6d..d1a91dbe7654 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1333,7 +1333,9 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
 		clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
 		pnfs_return_layout(hdr->inode);
 	}
-	data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode, &hdr->pages);
+	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
+		data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode,
+								&hdr->pages);
 }
 
 /*
@@ -1348,7 +1350,6 @@ void pnfs_ld_read_done(struct nfs_read_data *data)
 		hdr->mds_ops->rpc_call_done(&data->task, data);
 	} else
 		pnfs_ld_handle_read_error(data);
-	put_lseg(hdr->lseg);
 	hdr->mds_ops->rpc_release(data);
 }
 EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
@@ -1359,11 +1360,11 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
 {
 	struct nfs_pgio_header *hdr = data->header;
 
-	list_splice_tail_init(&hdr->pages, &desc->pg_list);
-	if (hdr->req && list_empty(&hdr->req->wb_list))
-		nfs_list_add_request(hdr->req, &desc->pg_list);
-	nfs_pageio_reset_read_mds(desc);
-	desc->pg_recoalesce = 1;
+	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
+		list_splice_tail_init(&hdr->pages, &desc->pg_list);
+		nfs_pageio_reset_read_mds(desc);
+		desc->pg_recoalesce = 1;
+	}
 	nfs_readdata_release(data);
 }
 
@@ -1381,18 +1382,13 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata,
 	enum pnfs_try_status trypnfs;
 
 	hdr->mds_ops = call_ops;
-	hdr->lseg = get_lseg(lseg);
 
 	dprintk("%s: Reading ino:%lu %u@%llu\n",
 		__func__, inode->i_ino, rdata->args.count, rdata->args.offset);
 
 	trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata);
-	if (trypnfs == PNFS_NOT_ATTEMPTED) {
-		put_lseg(hdr->lseg);
-		hdr->lseg = NULL;
-	} else {
+	if (trypnfs != PNFS_NOT_ATTEMPTED)
 		nfs_inc_stats(inode, NFSIOS_PNFS_READ);
-	}
 	dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
 	return trypnfs;
 }
@@ -1408,7 +1404,7 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea
 	while (!list_empty(head)) {
 		enum pnfs_try_status trypnfs;
 
-		data = list_entry(head->next, struct nfs_read_data, list);
+		data = list_first_entry(head, struct nfs_read_data, list);
 		list_del_init(&data->list);
 
 		trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
@@ -1418,20 +1414,41 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea
 	put_lseg(lseg);
 }
 
+static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
+{
+	put_lseg(hdr->lseg);
+	nfs_readhdr_free(hdr);
+}
+
 int
 pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
 {
-	LIST_HEAD(head);
+	struct nfs_read_header *rhdr;
+	struct nfs_pgio_header *hdr;
 	int ret;
 
-	ret = nfs_generic_pagein(desc, &head);
-	if (ret != 0) {
+	rhdr = nfs_readhdr_alloc();
+	if (!rhdr) {
+		nfs_async_read_error(&desc->pg_list);
+		ret = -ENOMEM;
 		put_lseg(desc->pg_lseg);
 		desc->pg_lseg = NULL;
 		return ret;
 	}
-	pnfs_do_multiple_reads(desc, &head);
-	return 0;
+	hdr = &rhdr->header;
+	nfs_pgheader_init(desc, hdr, pnfs_readhdr_free);
+	hdr->lseg = get_lseg(desc->pg_lseg);
+	atomic_inc(&hdr->refcnt);
+	ret = nfs_generic_pagein(desc, hdr);
+	if (ret != 0) {
+		put_lseg(desc->pg_lseg);
+		desc->pg_lseg = NULL;
+		set_bit(NFS_IOHDR_REDO, &hdr->flags);
+	} else
+		pnfs_do_multiple_reads(desc, &hdr->rpc_list);
+	if (atomic_dec_and_test(&hdr->refcnt))
+		nfs_read_completion(hdr);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
 
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index f6ab30b5a462..c9633b2501bd 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -30,29 +30,49 @@
 #define NFSDBG_FACILITY		NFSDBG_PAGECACHE
 
 static const struct nfs_pageio_ops nfs_pageio_read_ops;
-static const struct rpc_call_ops nfs_read_partial_ops;
-static const struct rpc_call_ops nfs_read_full_ops;
+static const struct rpc_call_ops nfs_read_common_ops;
 
 static struct kmem_cache *nfs_rdata_cachep;
 
-struct nfs_read_header *nfs_readhdr_alloc(unsigned int pagecount)
+struct nfs_read_header *nfs_readhdr_alloc()
 {
-	struct nfs_read_header *p;
+	struct nfs_read_header *rhdr;
 
-	p = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
-	if (p) {
-		struct nfs_pgio_header *hdr = &p->header;
-		struct nfs_read_data *data = &p->rpc_data;
+	rhdr = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
+	if (rhdr) {
+		struct nfs_pgio_header *hdr = &rhdr->header;
 
 		INIT_LIST_HEAD(&hdr->pages);
-		INIT_LIST_HEAD(&data->list);
+		INIT_LIST_HEAD(&hdr->rpc_list);
+		spin_lock_init(&hdr->lock);
+		atomic_set(&hdr->refcnt, 0);
+	}
+	return rhdr;
+}
+
+struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr,
+					 unsigned int pagecount)
+{
+	struct nfs_read_data *data, *prealloc;
+
+	prealloc = &container_of(hdr, struct nfs_read_header, header)->rpc_data;
+	if (prealloc->header == NULL)
+		data = prealloc;
+	else
+		data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		goto out;
+
+	if (nfs_pgarray_set(&data->pages, pagecount)) {
 		data->header = hdr;
-		if (!nfs_pgarray_set(&data->pages, pagecount)) {
-			kmem_cache_free(nfs_rdata_cachep, p);
-			p = NULL;
-		}
+		atomic_inc(&hdr->refcnt);
+	} else {
+		if (data != prealloc)
+			kfree(data);
+		data = NULL;
 	}
-	return p;
+out:
+	return data;
 }
 
 void nfs_readhdr_free(struct nfs_pgio_header *hdr)
@@ -64,10 +84,18 @@ void nfs_readhdr_free(struct nfs_pgio_header *hdr)
 
 void nfs_readdata_release(struct nfs_read_data *rdata)
 {
+	struct nfs_pgio_header *hdr = rdata->header;
+	struct nfs_read_header *read_header = container_of(hdr, struct nfs_read_header, header);
+
 	put_nfs_open_context(rdata->args.context);
 	if (rdata->pages.pagevec != rdata->pages.page_array)
 		kfree(rdata->pages.pagevec);
-	nfs_readhdr_free(rdata->header);
+	if (rdata != &read_header->rpc_data)
+		kfree(rdata);
+	else
+		rdata->header = NULL;
+	if (atomic_dec_and_test(&hdr->refcnt))
+		nfs_read_completion(hdr);
 }
 
 static
@@ -79,35 +107,6 @@ int nfs_return_empty_page(struct page *page)
 	return 0;
 }
 
-static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
-{
-	unsigned int remainder = data->args.count - data->res.count;
-	unsigned int base = data->args.pgbase + data->res.count;
-	unsigned int pglen;
-	struct page **pages;
-
-	if (data->res.eof == 0 || remainder == 0)
-		return;
-	/*
-	 * Note: "remainder" can never be negative, since we check for
-	 * 	this in the XDR code.
-	 */
-	pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
-	base &= ~PAGE_CACHE_MASK;
-	pglen = PAGE_CACHE_SIZE - base;
-	for (;;) {
-		if (remainder <= pglen) {
-			zero_user(*pages, base, remainder);
-			break;
-		}
-		zero_user(*pages, base, pglen);
-		pages++;
-		remainder -= pglen;
-		pglen = PAGE_CACHE_SIZE;
-		base = 0;
-	}
-}
-
 void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
 		struct inode *inode)
 {
@@ -170,6 +169,46 @@ static void nfs_readpage_release(struct nfs_page *req)
 	nfs_release_request(req);
 }
 
+/* Note io was page aligned */
+void nfs_read_completion(struct nfs_pgio_header *hdr)
+{
+	unsigned long bytes = 0;
+
+	if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
+		goto out;
+	if (!test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
+		while (!list_empty(&hdr->pages)) {
+			struct nfs_page *req = nfs_list_entry(hdr->pages.next);
+			struct page *page = req->wb_page;
+
+			if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) {
+				if (bytes > hdr->good_bytes)
+					zero_user(page, 0, PAGE_SIZE);
+				else if (hdr->good_bytes - bytes < PAGE_SIZE)
+					zero_user_segment(page,
+						hdr->good_bytes & ~PAGE_MASK,
+						PAGE_SIZE);
+			}
+			SetPageUptodate(page);
+			nfs_list_remove_request(req);
+			nfs_readpage_release(req);
+			bytes += PAGE_SIZE;
+		}
+	} else {
+		while (!list_empty(&hdr->pages)) {
+			struct nfs_page *req = nfs_list_entry(hdr->pages.next);
+
+			bytes += req->wb_bytes;
+			if (bytes <= hdr->good_bytes)
+				SetPageUptodate(req->wb_page);
+			nfs_list_remove_request(req);
+			nfs_readpage_release(req);
+		}
+	}
+out:
+	hdr->release(hdr);
+}
+
 int nfs_initiate_read(struct rpc_clnt *clnt,
 		      struct nfs_read_data *data,
 		      const struct rpc_call_ops *call_ops)
@@ -214,16 +253,12 @@ EXPORT_SYMBOL_GPL(nfs_initiate_read);
 /*
  * Set up the NFS read request struct
  */
-static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
+static void nfs_read_rpcsetup(struct nfs_read_data *data,
 		unsigned int count, unsigned int offset)
 {
-	struct inode *inode = data->header->inode;
-
-	data->header->req	  = req;
-	data->header->inode	  = inode;
-	data->header->cred	  = req->wb_context->cred;
+	struct nfs_page *req = data->header->req;
 
-	data->args.fh     = NFS_FH(inode);
+	data->args.fh     = NFS_FH(data->header->inode);
 	data->args.offset = req_offset(req) + offset;
 	data->args.pgbase = req->wb_pgbase + offset;
 	data->args.pages  = data->pages.pagevec;
@@ -255,7 +290,7 @@ nfs_do_multiple_reads(struct list_head *head,
 	while (!list_empty(head)) {
 		int ret2;
 
-		data = list_entry(head->next, struct nfs_read_data, list);
+		data = list_first_entry(head, struct nfs_read_data, list);
 		list_del_init(&data->list);
 
 		ret2 = nfs_do_read(data, call_ops);
@@ -265,7 +300,7 @@ nfs_do_multiple_reads(struct list_head *head,
 	return ret;
 }
 
-static void
+void
 nfs_async_read_error(struct list_head *head)
 {
 	struct nfs_page	*req;
@@ -290,11 +325,11 @@ nfs_async_read_error(struct list_head *head)
  * won't see the new data until our attribute cache is updated.  This is more
  * or less conventional NFS client behavior.
  */
-static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head *res)
+static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc,
+			    struct nfs_pgio_header *hdr)
 {
-	struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
+	struct nfs_page *req = hdr->req;
 	struct page *page = req->wb_page;
-	struct nfs_read_header *rhdr;
 	struct nfs_read_data *data;
 	size_t rsize = desc->pg_bsize, nbytes;
 	unsigned int offset;
@@ -302,85 +337,97 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head
 	int ret = 0;
 
 	nfs_list_remove_request(req);
+	nfs_list_add_request(req, &hdr->pages);
 
 	offset = 0;
 	nbytes = desc->pg_count;
 	do {
 		size_t len = min(nbytes,rsize);
 
-		rhdr = nfs_readhdr_alloc(1);
-		if (!rhdr)
+		data = nfs_readdata_alloc(hdr, 1);
+		if (!data)
 			goto out_bad;
-		data = &rhdr->rpc_data;
 		data->pages.pagevec[0] = page;
-		nfs_read_rpcsetup(req, data, len, offset);
-		list_add(&data->list, res);
+		nfs_read_rpcsetup(data, len, offset);
+		list_add(&data->list, &hdr->rpc_list);
 		requests++;
 		nbytes -= len;
 		offset += len;
 	} while(nbytes != 0);
-	atomic_set(&req->wb_complete, requests);
-	desc->pg_rpc_callops = &nfs_read_partial_ops;
+	desc->pg_rpc_callops = &nfs_read_common_ops;
 	return ret;
 out_bad:
-	while (!list_empty(res)) {
-		data = list_entry(res->next, struct nfs_read_data, list);
+	while (!list_empty(&hdr->rpc_list)) {
+		data = list_first_entry(&hdr->rpc_list, struct nfs_read_data, list);
 		list_del(&data->list);
 		nfs_readdata_release(data);
 	}
-	nfs_readpage_release(req);
+	nfs_async_read_error(&hdr->pages);
 	return -ENOMEM;
 }
 
-static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *res)
+static int nfs_pagein_one(struct nfs_pageio_descriptor *desc,
+			  struct nfs_pgio_header *hdr)
 {
 	struct nfs_page		*req;
 	struct page		**pages;
-	struct nfs_read_header	*rhdr;
-	struct nfs_read_data	*data;
+	struct nfs_read_data    *data;
 	struct list_head *head = &desc->pg_list;
 	int ret = 0;
 
-	rhdr = nfs_readhdr_alloc(nfs_page_array_len(desc->pg_base,
-						    desc->pg_count));
-	if (!rhdr) {
+	data = nfs_readdata_alloc(hdr, nfs_page_array_len(desc->pg_base,
+							  desc->pg_count));
+	if (!data) {
 		nfs_async_read_error(head);
 		ret = -ENOMEM;
 		goto out;
 	}
 
-	data = &rhdr->rpc_data;
 	pages = data->pages.pagevec;
 	while (!list_empty(head)) {
 		req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
-		nfs_list_add_request(req, &rhdr->header.pages);
+		nfs_list_add_request(req, &hdr->pages);
 		*pages++ = req->wb_page;
 	}
-	req = nfs_list_entry(rhdr->header.pages.next);
 
-	nfs_read_rpcsetup(req, data, desc->pg_count, 0);
-	list_add(&data->list, res);
-	desc->pg_rpc_callops = &nfs_read_full_ops;
+	nfs_read_rpcsetup(data, desc->pg_count, 0);
+	list_add(&data->list, &hdr->rpc_list);
+	desc->pg_rpc_callops = &nfs_read_common_ops;
 out:
 	return ret;
 }
 
-int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct list_head *head)
+int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
+		       struct nfs_pgio_header *hdr)
 {
 	if (desc->pg_bsize < PAGE_CACHE_SIZE)
-		return nfs_pagein_multi(desc, head);
-	return nfs_pagein_one(desc, head);
+		return nfs_pagein_multi(desc, hdr);
+	return nfs_pagein_one(desc, hdr);
 }
 
 static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
 {
-	LIST_HEAD(head);
+	struct nfs_read_header *rhdr;
+	struct nfs_pgio_header *hdr;
 	int ret;
 
-	ret = nfs_generic_pagein(desc, &head);
+	rhdr = nfs_readhdr_alloc();
+	if (!rhdr) {
+		nfs_async_read_error(&desc->pg_list);
+		return -ENOMEM;
+	}
+	hdr = &rhdr->header;
+	nfs_pgheader_init(desc, hdr, nfs_readhdr_free);
+	atomic_inc(&hdr->refcnt);
+	ret = nfs_generic_pagein(desc, hdr);
 	if (ret == 0)
-		ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops);
+		ret = nfs_do_multiple_reads(&hdr->rpc_list,
+					    desc->pg_rpc_callops);
+	else
+		set_bit(NFS_IOHDR_REDO, &hdr->flags);
+	if (atomic_dec_and_test(&hdr->refcnt))
+		nfs_read_completion(hdr);
 	return ret;
 }
 
@@ -419,15 +466,13 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data
 	struct nfs_readargs *argp = &data->args;
 	struct nfs_readres *resp = &data->res;
 
-	if (resp->eof || resp->count == argp->count)
-		return;
-
 	/* This is a short read! */
 	nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD);
 	/* Has the server at least made some progress? */
-	if (resp->count == 0)
+	if (resp->count == 0) {
+		nfs_set_pgio_error(data->header, -EIO, argp->offset);
 		return;
-
+	}
 	/* Yes, so retry the read at the end of the data */
 	data->mds_offset += resp->count;
 	argp->offset += resp->count;
@@ -436,38 +481,34 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data
 	rpc_restart_call_prepare(task);
 }
 
-/*
- * Handle a read reply that fills part of a page.
- */
-static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata)
+static void nfs_readpage_result_common(struct rpc_task *task, void *calldata)
 {
 	struct nfs_read_data *data = calldata;
- 
+	struct nfs_pgio_header *hdr = data->header;
+
+	/* Note the only returns of nfs_readpage_result are 0 and -EAGAIN */
 	if (nfs_readpage_result(task, data) != 0)
 		return;
 	if (task->tk_status < 0)
-		return;
-
-	nfs_readpage_truncate_uninitialised_page(data);
-	nfs_readpage_retry(task, data);
+		nfs_set_pgio_error(hdr, task->tk_status, data->args.offset);
+	else if (data->res.eof) {
+		loff_t bound;
+
+		bound = data->args.offset + data->res.count;
+		spin_lock(&hdr->lock);
+		if (bound < hdr->io_start + hdr->good_bytes) {
+			set_bit(NFS_IOHDR_EOF, &hdr->flags);
+			clear_bit(NFS_IOHDR_ERROR, &hdr->flags);
+			hdr->good_bytes = bound - hdr->io_start;
+		}
+		spin_unlock(&hdr->lock);
+	} else if (data->res.count != data->args.count)
+		nfs_readpage_retry(task, data);
 }
 
-static void nfs_readpage_release_partial(void *calldata)
+static void nfs_readpage_release_common(void *calldata)
 {
-	struct nfs_read_data *data = calldata;
-	struct nfs_page *req = data->header->req;
-	struct page *page = req->wb_page;
-	int status = data->task.tk_status;
-
-	if (status < 0)
-		set_bit(PG_PARTIAL_READ_FAILED, &req->wb_flags);
-
-	if (atomic_dec_and_test(&req->wb_complete)) {
-		if (!test_bit(PG_PARTIAL_READ_FAILED, &req->wb_flags))
-			SetPageUptodate(page);
-		nfs_readpage_release(req);
-	}
-	nfs_readdata_release(data);
+	nfs_readdata_release(calldata);
 }
 
 void nfs_read_prepare(struct rpc_task *task, void *calldata)
@@ -476,75 +517,10 @@ void nfs_read_prepare(struct rpc_task *task, void *calldata)
 	NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data);
 }
 
-static const struct rpc_call_ops nfs_read_partial_ops = {
-	.rpc_call_prepare = nfs_read_prepare,
-	.rpc_call_done = nfs_readpage_result_partial,
-	.rpc_release = nfs_readpage_release_partial,
-};
-
-static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data)
-{
-	unsigned int count = data->res.count;
-	unsigned int base = data->args.pgbase;
-	struct page **pages;
-
-	if (data->res.eof)
-		count = data->args.count;
-	if (unlikely(count == 0))
-		return;
-	pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
-	base &= ~PAGE_CACHE_MASK;
-	count += base;
-	for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++)
-		SetPageUptodate(*pages);
-	if (count == 0)
-		return;
-	/* Was this a short read? */
-	if (data->res.eof || data->res.count == data->args.count)
-		SetPageUptodate(*pages);
-}
-
-/*
- * This is the callback from RPC telling us whether a reply was
- * received or some error occurred (timeout or socket shutdown).
- */
-static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
-{
-	struct nfs_read_data *data = calldata;
-
-	if (nfs_readpage_result(task, data) != 0)
-		return;
-	if (task->tk_status < 0)
-		return;
-	/*
-	 * Note: nfs_readpage_retry may change the values of
-	 * data->args. In the multi-page case, we therefore need
-	 * to ensure that we call nfs_readpage_set_pages_uptodate()
-	 * first.
-	 */
-	nfs_readpage_truncate_uninitialised_page(data);
-	nfs_readpage_set_pages_uptodate(data);
-	nfs_readpage_retry(task, data);
-}
-
-static void nfs_readpage_release_full(void *calldata)
-{
-	struct nfs_read_data *data = calldata;
-	struct nfs_pgio_header *hdr = data->header;
-
-	while (!list_empty(&hdr->pages)) {
-		struct nfs_page *req = nfs_list_entry(hdr->pages.next);
-
-		nfs_list_remove_request(req);
-		nfs_readpage_release(req);
-	}
-	nfs_readdata_release(calldata);
-}
-
-static const struct rpc_call_ops nfs_read_full_ops = {
+static const struct rpc_call_ops nfs_read_common_ops = {
 	.rpc_call_prepare = nfs_read_prepare,
-	.rpc_call_done = nfs_readpage_result_full,
-	.rpc_release = nfs_readpage_release_full,
+	.rpc_call_done = nfs_readpage_result_common,
+	.rpc_release = nfs_readpage_release_common,
 };
 
 /*
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index eac30d6bec17..5c520344d8ad 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -27,7 +27,6 @@ enum {
 	PG_CLEAN,
 	PG_NEED_COMMIT,
 	PG_NEED_RESCHED,
-	PG_PARTIAL_READ_FAILED,
 	PG_COMMIT_TO_DS,
 };
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index e34beaf86e9c..164862148ba0 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1187,14 +1187,30 @@ struct nfs_read_data {
 	struct nfs_client	*ds_clp;	/* pNFS data server */
 };
 
+/* used as flag bits in nfs_pgio_header */
+enum {
+	NFS_IOHDR_ERROR = 0,
+	NFS_IOHDR_EOF,
+	NFS_IOHDR_REDO,
+};
+
 struct nfs_pgio_header {
 	struct inode		*inode;
 	struct rpc_cred		*cred;
 	struct list_head	pages;
+	struct list_head	rpc_list;
+	atomic_t		refcnt;
 	struct nfs_page		*req;
 	struct pnfs_layout_segment *lseg;
+	loff_t			io_start;
 	const struct rpc_call_ops *mds_ops;
+	void (*release) (struct nfs_pgio_header *hdr);
+	spinlock_t		lock;
+	/* fields protected by lock */
 	int			pnfs_error;
+	int			error;		/* merge with pnfs_error */
+	unsigned long		good_bytes;	/* boundary of good data */
+	unsigned long		flags;
 };
 
 struct nfs_read_header {
-- 
cgit v1.3-7-g2ca7


From 6c75dc0d498caa402fb17b1bf769835a9db875c8 Mon Sep 17 00:00:00 2001
From: Fred Isaman <iisaman@netapp.com>
Date: Fri, 20 Apr 2012 14:47:47 -0400
Subject: NFS: merge _full and _partial write rpc_ops

Decouple nfs_pgio_header and nfs_write_data, and have (possibly
multiple) nfs_write_datas each take a refcount on nfs_pgio_header.

For the moment keeps nfs_write_header as a way to preallocate a single
nfs_write_data with the nfs_pgio_header.  The code doesn't need this,
and would be prettier without, but given the amount of churn I am
already introducing I didn't want to play with tuning new mempools.

This also fixes bug in pnfs_ld_handle_write_error.  In the case of
desc->pg_bsize < PAGE_CACHE_SIZE, the pages list was empty, causing
replay attempt to do nothing.

Signed-off-by: Fred Isaman <iisaman@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c         |  10 +-
 fs/nfs/internal.h       |   8 +-
 fs/nfs/nfs4filelayout.c |   1 -
 fs/nfs/nfs4proc.c       |   4 +-
 fs/nfs/pnfs.c           |  58 +++++---
 fs/nfs/write.c          | 383 ++++++++++++++++++++++--------------------------
 include/linux/nfs_xdr.h |   2 +
 7 files changed, 227 insertions(+), 239 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 90b00ce42cbe..22a40c408449 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -768,11 +768,17 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
 		bytes = min(wsize,count);
 
 		result = -ENOMEM;
-		whdr = nfs_writehdr_alloc(nfs_page_array_len(pgbase, bytes));
+		whdr = nfs_writehdr_alloc();
 		if (unlikely(!whdr))
 			break;
 
-		data = &whdr->rpc_data;
+		data = nfs_writedata_alloc(&whdr->header, nfs_page_array_len(pgbase, bytes));
+		if (!data) {
+			nfs_writehdr_free(&whdr->header);
+			break;
+		}
+		data->header = &whdr->header;
+		atomic_inc(&data->header->refcnt);
 		pages = &data->pages;
 
 		down_read(&current->mm->mmap_sem);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 33af5e51c0bb..16bc9c47c83e 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -319,10 +319,14 @@ extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
 extern void nfs_readdata_release(struct nfs_read_data *rdata);
 
 /* write.c */
-extern struct nfs_write_header *nfs_writehdr_alloc(unsigned int npages);
+extern void nfs_async_write_error(struct list_head *head);
+extern struct nfs_write_header *nfs_writehdr_alloc(void);
 extern void nfs_writehdr_free(struct nfs_pgio_header *hdr);
+extern struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr,
+						  unsigned int pagecount);
+extern void nfs_write_completion(struct nfs_pgio_header *hdr);
 extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
-		struct list_head *head);
+			     struct nfs_pgio_header *hdr);
 extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
 				  struct inode *inode, int ioflags);
 extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 333e765f3ac2..02d8170ce0f3 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -314,7 +314,6 @@ static void filelayout_write_release(void *data)
 {
 	struct nfs_write_data *wdata = data;
 
-	put_lseg(wdata->header->lseg);
 	wdata->header->mds_ops->rpc_release(data);
 }
 
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index ce31ab22bc55..87af80d28a82 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3431,8 +3431,6 @@ void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data)
 	struct inode *inode = hdr->inode;
 
 	dprintk("%s Reset task for i/o through\n", __func__);
-	put_lseg(hdr->lseg);
-	hdr->lseg        = NULL;
 	data->ds_clp     = NULL;
 	data->write_done_cb = nfs4_write_done_cb;
 	data->args.fh       = NFS_FH(inode);
@@ -3448,7 +3446,7 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag
 {
 	struct nfs_server *server = NFS_SERVER(data->header->inode);
 
-	if (data->header->lseg) {
+	if (data->ds_clp) {
 		data->args.bitmask = NULL;
 		data->res.fattr = NULL;
 	} else
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index d1a91dbe7654..d515f00614cd 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1199,7 +1199,9 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
 		clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
 		pnfs_return_layout(hdr->inode);
 	}
-	data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode, &hdr->pages);
+	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
+		data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode,
+								&hdr->pages);
 }
 
 /*
@@ -1214,7 +1216,6 @@ void pnfs_ld_write_done(struct nfs_write_data *data)
 		hdr->mds_ops->rpc_call_done(&data->task, data);
 	} else
 		pnfs_ld_handle_write_error(data);
-	put_lseg(hdr->lseg);
 	hdr->mds_ops->rpc_release(data);
 }
 EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
@@ -1225,12 +1226,11 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
 {
 	struct nfs_pgio_header *hdr = data->header;
 
-	list_splice_tail_init(&hdr->pages, &desc->pg_list);
-	if (hdr->req && list_empty(&hdr->req->wb_list))
-		nfs_list_add_request(hdr->req, &desc->pg_list);
-	nfs_pageio_reset_write_mds(desc);
-	desc->pg_recoalesce = 1;
-	put_lseg(hdr->lseg);
+	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
+		list_splice_tail_init(&hdr->pages, &desc->pg_list);
+		nfs_pageio_reset_write_mds(desc);
+		desc->pg_recoalesce = 1;
+	}
 	nfs_writedata_release(data);
 }
 
@@ -1246,18 +1246,12 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata,
 	struct nfs_server *nfss = NFS_SERVER(inode);
 
 	hdr->mds_ops = call_ops;
-	hdr->lseg = get_lseg(lseg);
 
 	dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
 		inode->i_ino, wdata->args.count, wdata->args.offset, how);
-
 	trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how);
-	if (trypnfs == PNFS_NOT_ATTEMPTED) {
-		put_lseg(hdr->lseg);
-		hdr->lseg = NULL;
-	} else
+	if (trypnfs != PNFS_NOT_ATTEMPTED)
 		nfs_inc_stats(inode, NFSIOS_PNFS_WRITE);
-
 	dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
 	return trypnfs;
 }
@@ -1273,7 +1267,7 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he
 	while (!list_empty(head)) {
 		enum pnfs_try_status trypnfs;
 
-		data = list_entry(head->next, struct nfs_write_data, list);
+		data = list_first_entry(head, struct nfs_write_data, list);
 		list_del_init(&data->list);
 
 		trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
@@ -1283,20 +1277,40 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he
 	put_lseg(lseg);
 }
 
+static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
+{
+	put_lseg(hdr->lseg);
+	nfs_writehdr_free(hdr);
+}
+
 int
 pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
 {
-	LIST_HEAD(head);
+	struct nfs_write_header *whdr;
+	struct nfs_pgio_header *hdr;
 	int ret;
 
-	ret = nfs_generic_flush(desc, &head);
-	if (ret != 0) {
+	whdr = nfs_writehdr_alloc();
+	if (!whdr) {
+		nfs_async_write_error(&desc->pg_list);
 		put_lseg(desc->pg_lseg);
 		desc->pg_lseg = NULL;
-		return ret;
+		return -ENOMEM;
 	}
-	pnfs_do_multiple_writes(desc, &head, desc->pg_ioflags);
-	return 0;
+	hdr = &whdr->header;
+	nfs_pgheader_init(desc, hdr, pnfs_writehdr_free);
+	hdr->lseg = get_lseg(desc->pg_lseg);
+	atomic_inc(&hdr->refcnt);
+	ret = nfs_generic_flush(desc, hdr);
+	if (ret != 0) {
+		put_lseg(desc->pg_lseg);
+		desc->pg_lseg = NULL;
+		set_bit(NFS_IOHDR_REDO, &hdr->flags);
+	} else
+		pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags);
+	if (atomic_dec_and_test(&hdr->refcnt))
+		nfs_write_completion(hdr);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
 
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 2efae049b4f0..076075eb676c 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -42,8 +42,7 @@
 static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc,
 				  struct inode *inode, int ioflags);
 static void nfs_redirty_request(struct nfs_page *req);
-static const struct rpc_call_ops nfs_write_partial_ops;
-static const struct rpc_call_ops nfs_write_full_ops;
+static const struct rpc_call_ops nfs_write_common_ops;
 static const struct rpc_call_ops nfs_commit_ops;
 
 static struct kmem_cache *nfs_wdata_cachep;
@@ -69,26 +68,47 @@ void nfs_commit_free(struct nfs_commit_data *p)
 }
 EXPORT_SYMBOL_GPL(nfs_commit_free);
 
-struct nfs_write_header *nfs_writehdr_alloc(unsigned int pagecount)
+struct nfs_write_header *nfs_writehdr_alloc(void)
 {
 	struct nfs_write_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS);
 
 	if (p) {
 		struct nfs_pgio_header *hdr = &p->header;
-		struct nfs_write_data *data = &p->rpc_data;
 
 		memset(p, 0, sizeof(*p));
 		INIT_LIST_HEAD(&hdr->pages);
-		INIT_LIST_HEAD(&data->list);
-		data->header = hdr;
-		if (!nfs_pgarray_set(&data->pages, pagecount)) {
-			mempool_free(p, nfs_wdata_mempool);
-			p = NULL;
-		}
+		INIT_LIST_HEAD(&hdr->rpc_list);
+		spin_lock_init(&hdr->lock);
+		atomic_set(&hdr->refcnt, 0);
 	}
 	return p;
 }
 
+struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr,
+					   unsigned int pagecount)
+{
+	struct nfs_write_data *data, *prealloc;
+
+	prealloc = &container_of(hdr, struct nfs_write_header, header)->rpc_data;
+	if (prealloc->header == NULL)
+		data = prealloc;
+	else
+		data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		goto out;
+
+	if (nfs_pgarray_set(&data->pages, pagecount)) {
+		data->header = hdr;
+		atomic_inc(&hdr->refcnt);
+	} else {
+		if (data != prealloc)
+			kfree(data);
+		data = NULL;
+	}
+out:
+	return data;
+}
+
 void nfs_writehdr_free(struct nfs_pgio_header *hdr)
 {
 	struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header);
@@ -97,10 +117,18 @@ void nfs_writehdr_free(struct nfs_pgio_header *hdr)
 
 void nfs_writedata_release(struct nfs_write_data *wdata)
 {
+	struct nfs_pgio_header *hdr = wdata->header;
+	struct nfs_write_header *write_header = container_of(hdr, struct nfs_write_header, header);
+
 	put_nfs_open_context(wdata->args.context);
 	if (wdata->pages.pagevec != wdata->pages.page_array)
 		kfree(wdata->pages.pagevec);
-	nfs_writehdr_free(wdata->header);
+	if (wdata != &write_header->rpc_data)
+		kfree(wdata);
+	else
+		wdata->header = NULL;
+	if (atomic_dec_and_test(&hdr->refcnt))
+		nfs_write_completion(hdr);
 }
 
 static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
@@ -511,20 +539,6 @@ int nfs_write_need_commit(struct nfs_write_data *data)
 	return data->verf.committed != NFS_FILE_SYNC;
 }
 
-static inline
-int nfs_reschedule_unstable_write(struct nfs_page *req,
-				  struct nfs_write_data *data)
-{
-	if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) {
-		nfs_mark_request_commit(req, data->header->lseg);
-		return 1;
-	}
-	if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) {
-		nfs_mark_request_dirty(req);
-		return 1;
-	}
-	return 0;
-}
 #else
 static void
 nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
@@ -542,13 +556,43 @@ int nfs_write_need_commit(struct nfs_write_data *data)
 	return 0;
 }
 
-static inline
-int nfs_reschedule_unstable_write(struct nfs_page *req,
-				  struct nfs_write_data *data)
+#endif
+
+void nfs_write_completion(struct nfs_pgio_header *hdr)
 {
-	return 0;
+	unsigned long bytes = 0;
+
+	if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
+		goto out;
+	while (!list_empty(&hdr->pages)) {
+		struct nfs_page *req = nfs_list_entry(hdr->pages.next);
+		struct page *page = req->wb_page;
+
+		bytes += req->wb_bytes;
+		nfs_list_remove_request(req);
+		if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) &&
+		    (hdr->good_bytes < bytes)) {
+			nfs_set_pageerror(page);
+			nfs_context_set_write_error(req->wb_context, hdr->error);
+			goto remove_req;
+		}
+		if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) {
+			nfs_mark_request_dirty(req);
+			goto next;
+		}
+		if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
+			nfs_mark_request_commit(req, hdr->lseg);
+			goto next;
+		}
+remove_req:
+		nfs_inode_remove_request(req);
+next:
+		nfs_unlock_request(req);
+		nfs_end_page_writeback(page);
+	}
+out:
+	hdr->release(hdr);
 }
-#endif
 
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 static int
@@ -813,17 +857,6 @@ int nfs_updatepage(struct file *file, struct page *page,
 	return status;
 }
 
-static void nfs_writepage_release(struct nfs_page *req,
-				  struct nfs_write_data *data)
-{
-	struct page *page = req->wb_page;
-
-	if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req, data))
-		nfs_inode_remove_request(req);
-	nfs_unlock_request(req);
-	nfs_end_page_writeback(page);
-}
-
 static int flush_task_priority(int how)
 {
 	switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) {
@@ -890,22 +923,16 @@ EXPORT_SYMBOL_GPL(nfs_initiate_write);
 /*
  * Set up the argument/result storage required for the RPC call.
  */
-static void nfs_write_rpcsetup(struct nfs_page *req,
-		struct nfs_write_data *data,
+static void nfs_write_rpcsetup(struct nfs_write_data *data,
 		unsigned int count, unsigned int offset,
 		int how)
 {
-	struct nfs_pgio_header *hdr = data->header;
-	struct inode *inode = req->wb_context->dentry->d_inode;
+	struct nfs_page *req = data->header->req;
 
 	/* Set up the RPC argument and reply structs
 	 * NB: take care not to mess about with data->commit et al. */
 
-	hdr->req = req;
-	hdr->inode = inode = req->wb_context->dentry->d_inode;
-	hdr->cred = req->wb_context->cred;
-
-	data->args.fh     = NFS_FH(inode);
+	data->args.fh     = NFS_FH(data->header->inode);
 	data->args.offset = req_offset(req) + offset;
 	/* pnfs_set_layoutcommit needs this */
 	data->mds_offset = data->args.offset;
@@ -919,7 +946,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
 	case 0:
 		break;
 	case FLUSH_COND_STABLE:
-		if (nfs_need_commit(NFS_I(inode)))
+		if (nfs_need_commit(NFS_I(data->header->inode)))
 			break;
 	default:
 		data->args.stable = NFS_FILE_SYNC;
@@ -950,7 +977,7 @@ static int nfs_do_multiple_writes(struct list_head *head,
 	while (!list_empty(head)) {
 		int ret2;
 
-		data = list_entry(head->next, struct nfs_write_data, list);
+		data = list_first_entry(head, struct nfs_write_data, list);
 		list_del_init(&data->list);
 		
 		ret2 = nfs_do_write(data, call_ops, how);
@@ -973,15 +1000,26 @@ static void nfs_redirty_request(struct nfs_page *req)
 	nfs_end_page_writeback(page);
 }
 
+void nfs_async_write_error(struct list_head *head)
+{
+	struct nfs_page	*req;
+
+	while (!list_empty(head)) {
+		req = nfs_list_entry(head->next);
+		nfs_list_remove_request(req);
+		nfs_redirty_request(req);
+	}
+}
+
 /*
  * Generate multiple small requests to write out a single
  * contiguous dirty area on one page.
  */
-static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head *res)
+static int nfs_flush_multi(struct nfs_pageio_descriptor *desc,
+			   struct nfs_pgio_header *hdr)
 {
-	struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
+	struct nfs_page *req = hdr->req;
 	struct page *page = req->wb_page;
-	struct nfs_write_header *whdr;
 	struct nfs_write_data *data;
 	size_t wsize = desc->pg_bsize, nbytes;
 	unsigned int offset;
@@ -989,6 +1027,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head
 	int ret = 0;
 
 	nfs_list_remove_request(req);
+	nfs_list_add_request(req, &hdr->pages);
 
 	if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
 	    (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit ||
@@ -1001,28 +1040,27 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head
 	do {
 		size_t len = min(nbytes, wsize);
 
-		whdr = nfs_writehdr_alloc(1);
-		if (!whdr)
+		data = nfs_writedata_alloc(hdr, 1);
+		if (!data)
 			goto out_bad;
-		data = &whdr->rpc_data;
 		data->pages.pagevec[0] = page;
-		nfs_write_rpcsetup(req, data, len, offset, desc->pg_ioflags);
-		list_add(&data->list, res);
+		nfs_write_rpcsetup(data, len, offset, desc->pg_ioflags);
+		list_add(&data->list, &hdr->rpc_list);
 		requests++;
 		nbytes -= len;
 		offset += len;
 	} while (nbytes != 0);
 	atomic_set(&req->wb_complete, requests);
-	desc->pg_rpc_callops = &nfs_write_partial_ops;
+	desc->pg_rpc_callops = &nfs_write_common_ops;
 	return ret;
 
 out_bad:
-	while (!list_empty(res)) {
-		data = list_entry(res->next, struct nfs_write_data, list);
+	while (!list_empty(&hdr->rpc_list)) {
+		data = list_first_entry(&hdr->rpc_list, struct nfs_write_data, list);
 		list_del(&data->list);
 		nfs_writedata_release(data);
 	}
-	nfs_redirty_request(req);
+	nfs_async_write_error(&hdr->pages);
 	return -ENOMEM;
 }
 
@@ -1034,64 +1072,74 @@ out_bad:
  * This is the case if nfs_updatepage detects a conflicting request
  * that has been written but not committed.
  */
-static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *res)
+static int nfs_flush_one(struct nfs_pageio_descriptor *desc,
+			 struct nfs_pgio_header *hdr)
 {
 	struct nfs_page		*req;
 	struct page		**pages;
-	struct nfs_write_header	*whdr;
 	struct nfs_write_data	*data;
 	struct list_head *head = &desc->pg_list;
 	int ret = 0;
 
-	whdr = nfs_writehdr_alloc(nfs_page_array_len(desc->pg_base,
-						     desc->pg_count));
-	if (!whdr) {
-		while (!list_empty(head)) {
-			req = nfs_list_entry(head->next);
-			nfs_list_remove_request(req);
-			nfs_redirty_request(req);
-		}
+	data = nfs_writedata_alloc(hdr, nfs_page_array_len(desc->pg_base,
+							   desc->pg_count));
+	if (!data) {
+		nfs_async_write_error(head);
 		ret = -ENOMEM;
 		goto out;
 	}
-	data = &whdr->rpc_data;
+
 	pages = data->pages.pagevec;
 	while (!list_empty(head)) {
 		req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
-		nfs_list_add_request(req, &whdr->header.pages);
+		nfs_list_add_request(req, &hdr->pages);
 		*pages++ = req->wb_page;
 	}
-	req = nfs_list_entry(whdr->header.pages.next);
 
 	if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
 	    (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit))
 		desc->pg_ioflags &= ~FLUSH_COND_STABLE;
 
 	/* Set up the argument struct */
-	nfs_write_rpcsetup(req, data, desc->pg_count, 0, desc->pg_ioflags);
-	list_add(&data->list, res);
-	desc->pg_rpc_callops = &nfs_write_full_ops;
+	nfs_write_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags);
+	list_add(&data->list, &hdr->rpc_list);
+	desc->pg_rpc_callops = &nfs_write_common_ops;
 out:
 	return ret;
 }
 
-int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct list_head *head)
+int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
+		      struct nfs_pgio_header *hdr)
 {
 	if (desc->pg_bsize < PAGE_CACHE_SIZE)
-		return nfs_flush_multi(desc, head);
-	return nfs_flush_one(desc, head);
+		return nfs_flush_multi(desc, hdr);
+	return nfs_flush_one(desc, hdr);
 }
 
 static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
 {
-	LIST_HEAD(head);
+	struct nfs_write_header *whdr;
+	struct nfs_pgio_header *hdr;
 	int ret;
 
-	ret = nfs_generic_flush(desc, &head);
+	whdr = nfs_writehdr_alloc();
+	if (!whdr) {
+		nfs_async_write_error(&desc->pg_list);
+		return -ENOMEM;
+	}
+	hdr = &whdr->header;
+	nfs_pgheader_init(desc, hdr, nfs_writehdr_free);
+	atomic_inc(&hdr->refcnt);
+	ret = nfs_generic_flush(desc, hdr);
 	if (ret == 0)
-		ret = nfs_do_multiple_writes(&head, desc->pg_rpc_callops,
-				desc->pg_ioflags);
+		ret = nfs_do_multiple_writes(&hdr->rpc_list,
+					     desc->pg_rpc_callops,
+					     desc->pg_ioflags);
+	else
+		set_bit(NFS_IOHDR_REDO, &hdr->flags);
+	if (atomic_dec_and_test(&hdr->refcnt))
+		nfs_write_completion(hdr);
 	return ret;
 }
 
@@ -1121,62 +1169,6 @@ static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
 		nfs_pageio_init_write_mds(pgio, inode, ioflags);
 }
 
-/*
- * Handle a write reply that flushed part of a page.
- */
-static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
-{
-	struct nfs_write_data	*data = calldata;
-
-	dprintk("NFS: %5u write(%s/%lld %d@%lld)",
-		task->tk_pid,
-		data->header->inode->i_sb->s_id,
-		(long long)
-		  NFS_FILEID(data->header->inode),
-		data->header->req->wb_bytes,
-		(long long)req_offset(data->header->req));
-
-	nfs_writeback_done(task, data);
-}
-
-static void nfs_writeback_release_partial(void *calldata)
-{
-	struct nfs_write_data	*data = calldata;
-	struct nfs_page		*req = data->header->req;
-	struct page		*page = req->wb_page;
-	int status = data->task.tk_status;
-
-	if (status < 0) {
-		nfs_set_pageerror(page);
-		nfs_context_set_write_error(req->wb_context, status);
-		dprintk(", error = %d\n", status);
-		goto out;
-	}
-
-	if (nfs_write_need_commit(data)) {
-		struct inode *inode = page->mapping->host;
-
-		spin_lock(&inode->i_lock);
-		if (test_bit(PG_NEED_RESCHED, &req->wb_flags)) {
-			/* Do nothing we need to resend the writes */
-		} else if (!test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags)) {
-			memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
-			dprintk(" defer commit\n");
-		} else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) {
-			set_bit(PG_NEED_RESCHED, &req->wb_flags);
-			clear_bit(PG_NEED_COMMIT, &req->wb_flags);
-			dprintk(" server reboot detected\n");
-		}
-		spin_unlock(&inode->i_lock);
-	} else
-		dprintk(" OK\n");
-
-out:
-	if (atomic_dec_and_test(&req->wb_complete))
-		nfs_writepage_release(req, data);
-	nfs_writedata_release(data);
-}
-
 void nfs_write_prepare(struct rpc_task *task, void *calldata)
 {
 	struct nfs_write_data *data = calldata;
@@ -1190,12 +1182,6 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata)
 	NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
 }
 
-static const struct rpc_call_ops nfs_write_partial_ops = {
-	.rpc_call_prepare = nfs_write_prepare,
-	.rpc_call_done = nfs_writeback_done_partial,
-	.rpc_release = nfs_writeback_release_partial,
-};
-
 /*
  * Handle a write reply that flushes a whole page.
  *
@@ -1203,60 +1189,37 @@ static const struct rpc_call_ops nfs_write_partial_ops = {
  *	  writebacks since the page->count is kept > 1 for as long
  *	  as the page has a write request pending.
  */
-static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
+static void nfs_writeback_done_common(struct rpc_task *task, void *calldata)
 {
 	struct nfs_write_data	*data = calldata;
 
 	nfs_writeback_done(task, data);
 }
 
-static void nfs_writeback_release_full(void *calldata)
+static void nfs_writeback_release_common(void *calldata)
 {
 	struct nfs_write_data	*data = calldata;
 	struct nfs_pgio_header *hdr = data->header;
 	int status = data->task.tk_status;
+	struct nfs_page *req = hdr->req;
 
-	/* Update attributes as result of writeback. */
-	while (!list_empty(&hdr->pages)) {
-		struct nfs_page *req = nfs_list_entry(hdr->pages.next);
-		struct page *page = req->wb_page;
-
-		nfs_list_remove_request(req);
-
-		dprintk("NFS: %5u write (%s/%lld %d@%lld)",
-			data->task.tk_pid,
-			req->wb_context->dentry->d_inode->i_sb->s_id,
-			(long long)NFS_FILEID(req->wb_context->dentry->d_inode),
-			req->wb_bytes,
-			(long long)req_offset(req));
-
-		if (status < 0) {
-			nfs_set_pageerror(page);
-			nfs_context_set_write_error(req->wb_context, status);
-			dprintk(", error = %d\n", status);
-			goto remove_request;
-		}
-
-		if (nfs_write_need_commit(data)) {
+	if ((status >= 0) && nfs_write_need_commit(data)) {
+		spin_lock(&hdr->lock);
+		if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags))
+			; /* Do nothing */
+		else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags))
 			memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
-			nfs_mark_request_commit(req, hdr->lseg);
-			dprintk(" marked for commit\n");
-			goto next;
-		}
-		dprintk(" OK\n");
-remove_request:
-		nfs_inode_remove_request(req);
-	next:
-		nfs_unlock_request(req);
-		nfs_end_page_writeback(page);
+		else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf)))
+			set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags);
+		spin_unlock(&hdr->lock);
 	}
 	nfs_writedata_release(data);
 }
 
-static const struct rpc_call_ops nfs_write_full_ops = {
+static const struct rpc_call_ops nfs_write_common_ops = {
 	.rpc_call_prepare = nfs_write_prepare,
-	.rpc_call_done = nfs_writeback_done_full,
-	.rpc_release = nfs_writeback_release_full,
+	.rpc_call_done = nfs_writeback_done_common,
+	.rpc_release = nfs_writeback_release_common,
 };
 
 
@@ -1307,38 +1270,40 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
 		}
 	}
 #endif
-	/* Is this a short write? */
-	if (task->tk_status >= 0 && resp->count < argp->count) {
+	if (task->tk_status < 0)
+		nfs_set_pgio_error(data->header, task->tk_status, argp->offset);
+	else if (resp->count < argp->count) {
 		static unsigned long    complain;
 
+		/* This a short write! */
 		nfs_inc_stats(inode, NFSIOS_SHORTWRITE);
 
 		/* Has the server at least made some progress? */
-		if (resp->count != 0) {
-			/* Was this an NFSv2 write or an NFSv3 stable write? */
-			if (resp->verf->committed != NFS_UNSTABLE) {
-				/* Resend from where the server left off */
-				data->mds_offset += resp->count;
-				argp->offset += resp->count;
-				argp->pgbase += resp->count;
-				argp->count -= resp->count;
-			} else {
-				/* Resend as a stable write in order to avoid
-				 * headaches in the case of a server crash.
-				 */
-				argp->stable = NFS_FILE_SYNC;
+		if (resp->count == 0) {
+			if (time_before(complain, jiffies)) {
+				printk(KERN_WARNING
+				       "NFS: Server wrote zero bytes, expected %u.\n",
+				       argp->count);
+				complain = jiffies + 300 * HZ;
 			}
-			rpc_restart_call_prepare(task);
+			nfs_set_pgio_error(data->header, -EIO, argp->offset);
+			task->tk_status = -EIO;
 			return;
 		}
-		if (time_before(complain, jiffies)) {
-			printk(KERN_WARNING
-			       "NFS: Server wrote zero bytes, expected %u.\n",
-					argp->count);
-			complain = jiffies + 300 * HZ;
+		/* Was this an NFSv2 write or an NFSv3 stable write? */
+		if (resp->verf->committed != NFS_UNSTABLE) {
+			/* Resend from where the server left off */
+			data->mds_offset += resp->count;
+			argp->offset += resp->count;
+			argp->pgbase += resp->count;
+			argp->count -= resp->count;
+		} else {
+			/* Resend as a stable write in order to avoid
+			 * headaches in the case of a server crash.
+			 */
+			argp->stable = NFS_FILE_SYNC;
 		}
-		/* Can't do anything about it except throw an error. */
-		task->tk_status = -EIO;
+		rpc_restart_call_prepare(task);
 	}
 }
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 164862148ba0..0d17db7973de 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1192,6 +1192,8 @@ enum {
 	NFS_IOHDR_ERROR = 0,
 	NFS_IOHDR_EOF,
 	NFS_IOHDR_REDO,
+	NFS_IOHDR_NEED_COMMIT,
+	NFS_IOHDR_NEED_RESCHED,
 };
 
 struct nfs_pgio_header {
-- 
cgit v1.3-7-g2ca7


From 061ae2edb7375ab6776468b075da71008a098b55 Mon Sep 17 00:00:00 2001
From: Fred Isaman <iisaman@netapp.com>
Date: Fri, 20 Apr 2012 14:47:48 -0400
Subject: NFS: create completion structure to pass into page_init functions

Factors out the code that will need to change when directio
starts using these code paths.  This will allow directio to use
the generic pagein and flush routines

Signed-off-by: Fred Isaman <iisaman@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/internal.h        | 11 +++++------
 fs/nfs/pagelist.c        |  3 +++
 fs/nfs/pnfs.c            | 39 +++++++++++++++++++++++++--------------
 fs/nfs/pnfs.h            |  6 ++++--
 fs/nfs/read.c            | 36 ++++++++++++++++++++++--------------
 fs/nfs/write.c           | 41 ++++++++++++++++++++++++++---------------
 include/linux/nfs_page.h |  2 ++
 include/linux/nfs_xdr.h  |  6 ++++++
 8 files changed, 93 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 16bc9c47c83e..3ef8fcda1a5f 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -300,11 +300,10 @@ extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *,
 extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);
 #endif
 
+struct nfs_pgio_completion_ops;
 /* read.c */
-extern void nfs_async_read_error(struct list_head *head);
 extern struct nfs_read_header *nfs_readhdr_alloc(void);
 extern void nfs_readhdr_free(struct nfs_pgio_header *hdr);
-extern void nfs_read_completion(struct nfs_pgio_header *hdr);
 extern struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr,
 						unsigned int pagecount);
 extern int nfs_initiate_read(struct rpc_clnt *clnt,
@@ -314,21 +313,21 @@ extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
 extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
 			      struct nfs_pgio_header *hdr);
 extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
-		struct inode *inode);
+			struct inode *inode,
+			const struct nfs_pgio_completion_ops *compl_ops);
 extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
 extern void nfs_readdata_release(struct nfs_read_data *rdata);
 
 /* write.c */
-extern void nfs_async_write_error(struct list_head *head);
 extern struct nfs_write_header *nfs_writehdr_alloc(void);
 extern void nfs_writehdr_free(struct nfs_pgio_header *hdr);
 extern struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr,
 						  unsigned int pagecount);
-extern void nfs_write_completion(struct nfs_pgio_header *hdr);
 extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
 			     struct nfs_pgio_header *hdr);
 extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
-				  struct inode *inode, int ioflags);
+			struct inode *inode, int ioflags,
+			const struct nfs_pgio_completion_ops *compl_ops);
 extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
 extern void nfs_writedata_release(struct nfs_write_data *wdata);
 extern void nfs_commit_free(struct nfs_commit_data *p);
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index cd4c038135a7..4cf2a68493e0 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -49,6 +49,7 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
 	hdr->io_start = req_offset(hdr->req);
 	hdr->good_bytes = desc->pg_count;
 	hdr->release = release;
+	hdr->completion_ops = desc->pg_completion_ops;
 }
 
 void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos)
@@ -240,6 +241,7 @@ EXPORT_SYMBOL_GPL(nfs_generic_pg_test);
 void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
 		     struct inode *inode,
 		     const struct nfs_pageio_ops *pg_ops,
+		     const struct nfs_pgio_completion_ops *compl_ops,
 		     size_t bsize,
 		     int io_flags)
 {
@@ -252,6 +254,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
 	desc->pg_recoalesce = 0;
 	desc->pg_inode = inode;
 	desc->pg_ops = pg_ops;
+	desc->pg_completion_ops = compl_ops;
 	desc->pg_ioflags = io_flags;
 	desc->pg_error = 0;
 	desc->pg_lseg = NULL;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index d515f00614cd..b3a0c01718af 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1113,26 +1113,31 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *
 EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
 
 bool
-pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode)
+pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode,
+		      const struct nfs_pgio_completion_ops *compl_ops)
 {
 	struct nfs_server *server = NFS_SERVER(inode);
 	struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
 
 	if (ld == NULL)
 		return false;
-	nfs_pageio_init(pgio, inode, ld->pg_read_ops, server->rsize, 0);
+	nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops,
+			server->rsize, 0);
 	return true;
 }
 
 bool
-pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags)
+pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode,
+		       int ioflags,
+		       const struct nfs_pgio_completion_ops *compl_ops)
 {
 	struct nfs_server *server = NFS_SERVER(inode);
 	struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
 
 	if (ld == NULL)
 		return false;
-	nfs_pageio_init(pgio, inode, ld->pg_write_ops, server->wsize, ioflags);
+	nfs_pageio_init(pgio, inode, ld->pg_write_ops, compl_ops,
+			server->wsize, ioflags);
 	return true;
 }
 
@@ -1162,13 +1167,15 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
 }
 EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
 
-static int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head)
+static int pnfs_write_done_resend_to_mds(struct inode *inode,
+				struct list_head *head,
+				const struct nfs_pgio_completion_ops *compl_ops)
 {
 	struct nfs_pageio_descriptor pgio;
 	LIST_HEAD(failed);
 
 	/* Resend all requests through the MDS */
-	nfs_pageio_init_write_mds(&pgio, inode, FLUSH_STABLE);
+	nfs_pageio_init_write_mds(&pgio, inode, FLUSH_STABLE, compl_ops);
 	while (!list_empty(head)) {
 		struct nfs_page *req = nfs_list_entry(head->next);
 
@@ -1201,7 +1208,8 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
 	}
 	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
 		data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode,
-								&hdr->pages);
+							&hdr->pages,
+							hdr->completion_ops);
 }
 
 /*
@@ -1292,7 +1300,7 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
 
 	whdr = nfs_writehdr_alloc();
 	if (!whdr) {
-		nfs_async_write_error(&desc->pg_list);
+		desc->pg_completion_ops->error_cleanup(&hdr->pages);
 		put_lseg(desc->pg_lseg);
 		desc->pg_lseg = NULL;
 		return -ENOMEM;
@@ -1309,18 +1317,20 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
 	} else
 		pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags);
 	if (atomic_dec_and_test(&hdr->refcnt))
-		nfs_write_completion(hdr);
+		hdr->completion_ops->completion(hdr);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
 
-static int pnfs_read_done_resend_to_mds(struct inode *inode, struct list_head *head)
+static int pnfs_read_done_resend_to_mds(struct inode *inode,
+				struct list_head *head,
+				const struct nfs_pgio_completion_ops *compl_ops)
 {
 	struct nfs_pageio_descriptor pgio;
 	LIST_HEAD(failed);
 
 	/* Resend all requests through the MDS */
-	nfs_pageio_init_read_mds(&pgio, inode);
+	nfs_pageio_init_read_mds(&pgio, inode, compl_ops);
 	while (!list_empty(head)) {
 		struct nfs_page *req = nfs_list_entry(head->next);
 
@@ -1349,7 +1359,8 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
 	}
 	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
 		data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode,
-								&hdr->pages);
+							&hdr->pages,
+							hdr->completion_ops);
 }
 
 /*
@@ -1443,7 +1454,7 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
 
 	rhdr = nfs_readhdr_alloc();
 	if (!rhdr) {
-		nfs_async_read_error(&desc->pg_list);
+		desc->pg_completion_ops->error_cleanup(&desc->pg_list);
 		ret = -ENOMEM;
 		put_lseg(desc->pg_lseg);
 		desc->pg_lseg = NULL;
@@ -1461,7 +1472,7 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
 	} else
 		pnfs_do_multiple_reads(desc, &hdr->rpc_list);
 	if (atomic_dec_and_test(&hdr->refcnt))
-		nfs_read_completion(hdr);
+		hdr->completion_ops->completion(hdr);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 442ebf68eeec..734e4eff7fb0 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -168,8 +168,10 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp);
 void get_layout_hdr(struct pnfs_layout_hdr *lo);
 void put_lseg(struct pnfs_layout_segment *lseg);
 
-bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *);
-bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, int);
+bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *,
+			   const struct nfs_pgio_completion_ops *);
+bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *,
+			    int, const struct nfs_pgio_completion_ops *);
 
 void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32);
 void unset_pnfs_layoutdriver(struct nfs_server *);
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index c9633b2501bd..5e78af162039 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -31,6 +31,7 @@
 
 static const struct nfs_pageio_ops nfs_pageio_read_ops;
 static const struct rpc_call_ops nfs_read_common_ops;
+static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops;
 
 static struct kmem_cache *nfs_rdata_cachep;
 
@@ -95,7 +96,7 @@ void nfs_readdata_release(struct nfs_read_data *rdata)
 	else
 		rdata->header = NULL;
 	if (atomic_dec_and_test(&hdr->refcnt))
-		nfs_read_completion(hdr);
+		hdr->completion_ops->completion(hdr);
 }
 
 static
@@ -108,9 +109,10 @@ int nfs_return_empty_page(struct page *page)
 }
 
 void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
-		struct inode *inode)
+			      struct inode *inode,
+			      const struct nfs_pgio_completion_ops *compl_ops)
 {
-	nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops,
+	nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, compl_ops,
 			NFS_SERVER(inode)->rsize, 0);
 }
 
@@ -122,10 +124,11 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
 EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
 
 static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
-		struct inode *inode)
+				struct inode *inode,
+				const struct nfs_pgio_completion_ops *compl_ops)
 {
-	if (!pnfs_pageio_init_read(pgio, inode))
-		nfs_pageio_init_read_mds(pgio, inode);
+	if (!pnfs_pageio_init_read(pgio, inode, compl_ops))
+		nfs_pageio_init_read_mds(pgio, inode, compl_ops);
 }
 
 int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
@@ -146,7 +149,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
 	if (len < PAGE_CACHE_SIZE)
 		zero_user_segment(page, len, PAGE_CACHE_SIZE);
 
-	nfs_pageio_init_read(&pgio, inode);
+	nfs_pageio_init_read(&pgio, inode, &nfs_async_read_completion_ops);
 	nfs_pageio_add_request(&pgio, new);
 	nfs_pageio_complete(&pgio);
 	return 0;
@@ -170,7 +173,7 @@ static void nfs_readpage_release(struct nfs_page *req)
 }
 
 /* Note io was page aligned */
-void nfs_read_completion(struct nfs_pgio_header *hdr)
+static void nfs_read_completion(struct nfs_pgio_header *hdr)
 {
 	unsigned long bytes = 0;
 
@@ -300,7 +303,7 @@ nfs_do_multiple_reads(struct list_head *head,
 	return ret;
 }
 
-void
+static void
 nfs_async_read_error(struct list_head *head)
 {
 	struct nfs_page	*req;
@@ -312,6 +315,11 @@ nfs_async_read_error(struct list_head *head)
 	}
 }
 
+static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = {
+	.error_cleanup = nfs_async_read_error,
+	.completion = nfs_read_completion,
+};
+
 /*
  * Generate multiple requests to fill a single page.
  *
@@ -362,7 +370,7 @@ out_bad:
 		list_del(&data->list);
 		nfs_readdata_release(data);
 	}
-	nfs_async_read_error(&hdr->pages);
+	desc->pg_completion_ops->error_cleanup(&hdr->pages);
 	return -ENOMEM;
 }
 
@@ -378,7 +386,7 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc,
 	data = nfs_readdata_alloc(hdr, nfs_page_array_len(desc->pg_base,
 							  desc->pg_count));
 	if (!data) {
-		nfs_async_read_error(head);
+		desc->pg_completion_ops->error_cleanup(head);
 		ret = -ENOMEM;
 		goto out;
 	}
@@ -414,7 +422,7 @@ static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
 
 	rhdr = nfs_readhdr_alloc();
 	if (!rhdr) {
-		nfs_async_read_error(&desc->pg_list);
+		desc->pg_completion_ops->error_cleanup(&desc->pg_list);
 		return -ENOMEM;
 	}
 	hdr = &rhdr->header;
@@ -427,7 +435,7 @@ static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
 	else
 		set_bit(NFS_IOHDR_REDO, &hdr->flags);
 	if (atomic_dec_and_test(&hdr->refcnt))
-		nfs_read_completion(hdr);
+		hdr->completion_ops->completion(hdr);
 	return ret;
 }
 
@@ -652,7 +660,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
 	if (ret == 0)
 		goto read_complete; /* all pages were read */
 
-	nfs_pageio_init_read(&pgio, inode);
+	nfs_pageio_init_read(&pgio, inode, &nfs_async_read_completion_ops);
 
 	ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
 
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 076075eb676c..150397279b8d 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -40,10 +40,12 @@
  * Local function declarations
  */
 static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc,
-				  struct inode *inode, int ioflags);
+			struct inode *inode, int ioflags,
+			const struct nfs_pgio_completion_ops *compl_ops);
 static void nfs_redirty_request(struct nfs_page *req);
 static const struct rpc_call_ops nfs_write_common_ops;
 static const struct rpc_call_ops nfs_commit_ops;
+static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
 
 static struct kmem_cache *nfs_wdata_cachep;
 static mempool_t *nfs_wdata_mempool;
@@ -128,7 +130,7 @@ void nfs_writedata_release(struct nfs_write_data *wdata)
 	else
 		wdata->header = NULL;
 	if (atomic_dec_and_test(&hdr->refcnt))
-		nfs_write_completion(hdr);
+		hdr->completion_ops->completion(hdr);
 }
 
 static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
@@ -337,7 +339,8 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc
 	struct nfs_pageio_descriptor pgio;
 	int err;
 
-	nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc));
+	nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc),
+			      &nfs_async_write_completion_ops);
 	err = nfs_do_writepage(page, wbc, &pgio);
 	nfs_pageio_complete(&pgio);
 	if (err < 0)
@@ -380,7 +383,8 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 
 	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
 
-	nfs_pageio_init_write(&pgio, inode, wb_priority(wbc));
+	nfs_pageio_init_write(&pgio, inode, wb_priority(wbc),
+			      &nfs_async_write_completion_ops);
 	err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
 	nfs_pageio_complete(&pgio);
 
@@ -558,7 +562,7 @@ int nfs_write_need_commit(struct nfs_write_data *data)
 
 #endif
 
-void nfs_write_completion(struct nfs_pgio_header *hdr)
+static void nfs_write_completion(struct nfs_pgio_header *hdr)
 {
 	unsigned long bytes = 0;
 
@@ -1000,7 +1004,7 @@ static void nfs_redirty_request(struct nfs_page *req)
 	nfs_end_page_writeback(page);
 }
 
-void nfs_async_write_error(struct list_head *head)
+static void nfs_async_write_error(struct list_head *head)
 {
 	struct nfs_page	*req;
 
@@ -1011,6 +1015,11 @@ void nfs_async_write_error(struct list_head *head)
 	}
 }
 
+static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = {
+	.error_cleanup = nfs_async_write_error,
+	.completion = nfs_write_completion,
+};
+
 /*
  * Generate multiple small requests to write out a single
  * contiguous dirty area on one page.
@@ -1060,7 +1069,7 @@ out_bad:
 		list_del(&data->list);
 		nfs_writedata_release(data);
 	}
-	nfs_async_write_error(&hdr->pages);
+	desc->pg_completion_ops->error_cleanup(&hdr->pages);
 	return -ENOMEM;
 }
 
@@ -1084,7 +1093,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc,
 	data = nfs_writedata_alloc(hdr, nfs_page_array_len(desc->pg_base,
 							   desc->pg_count));
 	if (!data) {
-		nfs_async_write_error(head);
+		desc->pg_completion_ops->error_cleanup(head);
 		ret = -ENOMEM;
 		goto out;
 	}
@@ -1125,7 +1134,7 @@ static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
 
 	whdr = nfs_writehdr_alloc();
 	if (!whdr) {
-		nfs_async_write_error(&desc->pg_list);
+		desc->pg_completion_ops->error_cleanup(&hdr->pages);
 		return -ENOMEM;
 	}
 	hdr = &whdr->header;
@@ -1139,7 +1148,7 @@ static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
 	else
 		set_bit(NFS_IOHDR_REDO, &hdr->flags);
 	if (atomic_dec_and_test(&hdr->refcnt))
-		nfs_write_completion(hdr);
+		hdr->completion_ops->completion(hdr);
 	return ret;
 }
 
@@ -1149,9 +1158,10 @@ static const struct nfs_pageio_ops nfs_pageio_write_ops = {
 };
 
 void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
-				  struct inode *inode, int ioflags)
+			       struct inode *inode, int ioflags,
+			       const struct nfs_pgio_completion_ops *compl_ops)
 {
-	nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops,
+	nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, compl_ops,
 				NFS_SERVER(inode)->wsize, ioflags);
 }
 
@@ -1163,10 +1173,11 @@ void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
 EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds);
 
 static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
-				  struct inode *inode, int ioflags)
+				struct inode *inode, int ioflags,
+				const struct nfs_pgio_completion_ops *compl_ops)
 {
-	if (!pnfs_pageio_init_write(pgio, inode, ioflags))
-		nfs_pageio_init_write_mds(pgio, inode, ioflags);
+	if (!pnfs_pageio_init_write(pgio, inode, ioflags, compl_ops))
+		nfs_pageio_init_write_mds(pgio, inode, ioflags, compl_ops);
 }
 
 void nfs_write_prepare(struct rpc_task *task, void *calldata)
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 5c520344d8ad..bc5b7a5e787e 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -67,6 +67,7 @@ struct nfs_pageio_descriptor {
 	int 			pg_ioflags;
 	int			pg_error;
 	const struct rpc_call_ops *pg_rpc_callops;
+	const struct nfs_pgio_completion_ops *pg_completion_ops;
 	struct pnfs_layout_segment *pg_lseg;
 };
 
@@ -83,6 +84,7 @@ extern	void nfs_release_request(struct nfs_page *req);
 extern	void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
 			     struct inode *inode,
 			     const struct nfs_pageio_ops *pg_ops,
+			     const struct nfs_pgio_completion_ops *compl_ops,
 			     size_t bsize,
 			     int how);
 extern	int nfs_pageio_add_request(struct nfs_pageio_descriptor *,
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 0d17db7973de..6fa1d2278c9d 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1207,6 +1207,7 @@ struct nfs_pgio_header {
 	loff_t			io_start;
 	const struct rpc_call_ops *mds_ops;
 	void (*release) (struct nfs_pgio_header *hdr);
+	const struct nfs_pgio_completion_ops *completion_ops;
 	spinlock_t		lock;
 	/* fields protected by lock */
 	int			pnfs_error;
@@ -1261,6 +1262,11 @@ struct nfs_commit_data {
 	int (*commit_done_cb) (struct rpc_task *task, struct nfs_commit_data *data);
 };
 
+struct nfs_pgio_completion_ops {
+	void	(*error_cleanup)(struct list_head *head);
+	void	(*completion)(struct nfs_pgio_header *hdr);
+};
+
 struct nfs_unlinkdata {
 	struct hlist_node list;
 	struct nfs_removeargs args;
-- 
cgit v1.3-7-g2ca7


From 9533da2979757258d3fd5429d830a297013d69ed Mon Sep 17 00:00:00 2001
From: Fred Isaman <iisaman@netapp.com>
Date: Fri, 20 Apr 2012 14:47:49 -0400
Subject: NFS: remove unused wb_complete field from struct nfs_page

Signed-off-by: Fred Isaman <iisaman@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/pagelist.c        | 1 -
 fs/nfs/write.c           | 1 -
 include/linux/nfs_page.h | 1 -
 3 files changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 4cf2a68493e0..5d01a1651084 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -114,7 +114,6 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
 	 * long write-back delay. This will be adjusted in
 	 * update_nfs_request below if the region is not locked. */
 	req->wb_page    = page;
-	atomic_set(&req->wb_complete, 0);
 	req->wb_index	= page->index;
 	page_cache_get(page);
 	BUG_ON(PagePrivate(page));
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 150397279b8d..705bf01cfbba 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1059,7 +1059,6 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc,
 		nbytes -= len;
 		offset += len;
 	} while (nbytes != 0);
-	atomic_set(&req->wb_complete, requests);
 	desc->pg_rpc_callops = &nfs_write_common_ops;
 	return ret;
 
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index bc5b7a5e787e..0a5b63f16116 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -36,7 +36,6 @@ struct nfs_page {
 	struct page		*wb_page;	/* page to read in/write out */
 	struct nfs_open_context	*wb_context;	/* File state context info */
 	struct nfs_lock_context	*wb_lock_context;	/* lock context info */
-	atomic_t		wb_complete;	/* i/os we're waiting for */
 	pgoff_t			wb_index;	/* Offset >> PAGE_CACHE_SHIFT */
 	unsigned int		wb_offset,	/* Offset & ~PAGE_CACHE_MASK */
 				wb_pgbase,	/* Start of page data */
-- 
cgit v1.3-7-g2ca7


From 584aa810b6240d88c28113a90c5029449814a3b5 Mon Sep 17 00:00:00 2001
From: Fred Isaman <iisaman@netapp.com>
Date: Fri, 20 Apr 2012 14:47:51 -0400
Subject: NFS: rewrite directio read to use async coalesce code

This also has the advantage that it allows directio to use pnfs.

Signed-off-by: Fred Isaman <iisaman@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c          | 255 +++++++++++++++++++++++------------------------
 fs/nfs/internal.h        |   5 +-
 fs/nfs/pagelist.c        |   7 +-
 fs/nfs/read.c            |  10 +-
 include/linux/nfs_page.h |   1 +
 include/linux/nfs_xdr.h  |   4 +-
 6 files changed, 138 insertions(+), 144 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 22a40c408449..4ba9a2c839bb 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -124,22 +124,6 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_
 	return -EINVAL;
 }
 
-static void nfs_direct_dirty_pages(struct page **pages, unsigned int pgbase, size_t count)
-{
-	unsigned int npages;
-	unsigned int i;
-
-	if (count == 0)
-		return;
-	pages += (pgbase >> PAGE_SHIFT);
-	npages = (count + (pgbase & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	for (i = 0; i < npages; i++) {
-		struct page *page = pages[i];
-		if (!PageCompound(page))
-			set_page_dirty(page);
-	}
-}
-
 static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
 {
 	unsigned int i;
@@ -226,58 +210,92 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
 	nfs_direct_req_release(dreq);
 }
 
-/*
- * We must hold a reference to all the pages in this direct read request
- * until the RPCs complete.  This could be long *after* we are woken up in
- * nfs_direct_wait (for instance, if someone hits ^C on a slow server).
- */
-static void nfs_direct_read_result(struct rpc_task *task, void *calldata)
+void nfs_direct_readpage_release(struct nfs_page *req)
 {
-	struct nfs_read_data *data = calldata;
-
-	nfs_readpage_result(task, data);
+	dprintk("NFS: direct read done (%s/%lld %d@%lld)\n",
+		req->wb_context->dentry->d_inode->i_sb->s_id,
+		(long long)NFS_FILEID(req->wb_context->dentry->d_inode),
+		req->wb_bytes,
+		(long long)req_offset(req));
+	nfs_release_request(req);
 }
 
-static void nfs_direct_read_release(void *calldata)
+static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
 {
+	unsigned long bytes = 0;
+	struct nfs_direct_req *dreq = hdr->dreq;
 
-	struct nfs_read_data *data = calldata;
-	struct nfs_direct_req *dreq = (struct nfs_direct_req *)data->header->req;
-	int status = data->task.tk_status;
+	if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
+		goto out_put;
 
 	spin_lock(&dreq->lock);
-	if (unlikely(status < 0)) {
-		dreq->error = status;
-		spin_unlock(&dreq->lock);
+	if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes == 0))
+		dreq->error = hdr->error;
+	else
+		dreq->count += hdr->good_bytes;
+	spin_unlock(&dreq->lock);
+
+	if (!test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
+		while (!list_empty(&hdr->pages)) {
+			struct nfs_page *req = nfs_list_entry(hdr->pages.next);
+			struct page *page = req->wb_page;
+
+			if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) {
+				if (bytes > hdr->good_bytes)
+					zero_user(page, 0, PAGE_SIZE);
+				else if (hdr->good_bytes - bytes < PAGE_SIZE)
+					zero_user_segment(page,
+						hdr->good_bytes & ~PAGE_MASK,
+						PAGE_SIZE);
+			}
+			bytes += req->wb_bytes;
+			nfs_list_remove_request(req);
+			nfs_direct_readpage_release(req);
+			if (!PageCompound(page))
+				set_page_dirty(page);
+			page_cache_release(page);
+		}
 	} else {
-		dreq->count += data->res.count;
-		spin_unlock(&dreq->lock);
-		nfs_direct_dirty_pages(data->pages.pagevec,
-				data->args.pgbase,
-				data->res.count);
+		while (!list_empty(&hdr->pages)) {
+			struct nfs_page *req = nfs_list_entry(hdr->pages.next);
+
+			if (bytes < hdr->good_bytes)
+				if (!PageCompound(req->wb_page))
+					set_page_dirty(req->wb_page);
+			bytes += req->wb_bytes;
+			page_cache_release(req->wb_page);
+			nfs_list_remove_request(req);
+			nfs_direct_readpage_release(req);
+		}
 	}
-	nfs_direct_release_pages(data->pages.pagevec, data->pages.npages);
-
+out_put:
 	if (put_dreq(dreq))
 		nfs_direct_complete(dreq);
-	nfs_readdata_release(data);
+	hdr->release(hdr);
 }
 
-static const struct rpc_call_ops nfs_read_direct_ops = {
-	.rpc_call_prepare = nfs_read_prepare,
-	.rpc_call_done = nfs_direct_read_result,
-	.rpc_release = nfs_direct_read_release,
-};
-
-static void nfs_direct_readhdr_release(struct nfs_read_header *rhdr)
+static void nfs_sync_pgio_error(struct list_head *head)
 {
-	struct nfs_read_data *data = &rhdr->rpc_data;
+	struct nfs_page *req;
 
-	if (data->pages.pagevec != data->pages.page_array)
-		kfree(data->pages.pagevec);
-	nfs_readhdr_free(&rhdr->header);
+	while (!list_empty(head)) {
+		req = nfs_list_entry(head->next);
+		nfs_list_remove_request(req);
+		nfs_release_request(req);
+	}
 }
 
+static void nfs_direct_pgio_init(struct nfs_pgio_header *hdr)
+{
+	get_dreq(hdr->dreq);
+}
+
+static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = {
+	.error_cleanup = nfs_sync_pgio_error,
+	.init_hdr = nfs_direct_pgio_init,
+	.completion = nfs_direct_read_completion,
+};
+
 /*
  * For each rsize'd chunk of the user's buffer, dispatch an NFS READ
  * operation.  If nfs_readdata_alloc() or get_user_pages() fails,
@@ -285,118 +303,85 @@ static void nfs_direct_readhdr_release(struct nfs_read_header *rhdr)
  * handled automatically by nfs_direct_read_result().  Otherwise, if
  * no requests have been sent, just return an error.
  */
-static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
+static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc,
 						const struct iovec *iov,
 						loff_t pos)
 {
+	struct nfs_direct_req *dreq = desc->pg_dreq;
 	struct nfs_open_context *ctx = dreq->ctx;
 	struct inode *inode = ctx->dentry->d_inode;
 	unsigned long user_addr = (unsigned long)iov->iov_base;
 	size_t count = iov->iov_len;
 	size_t rsize = NFS_SERVER(inode)->rsize;
-	struct rpc_task *task;
-	struct rpc_message msg = {
-		.rpc_cred = ctx->cred,
-	};
-	struct rpc_task_setup task_setup_data = {
-		.rpc_client = NFS_CLIENT(inode),
-		.rpc_message = &msg,
-		.callback_ops = &nfs_read_direct_ops,
-		.workqueue = nfsiod_workqueue,
-		.flags = RPC_TASK_ASYNC,
-	};
 	unsigned int pgbase;
 	int result;
 	ssize_t started = 0;
+	struct page **pagevec = NULL;
+	unsigned int npages;
 
 	do {
-		struct nfs_read_header *rhdr;
-		struct nfs_read_data *data;
-		struct nfs_page_array *pages;
 		size_t bytes;
+		int i;
 
 		pgbase = user_addr & ~PAGE_MASK;
-		bytes = min(rsize,count);
+		bytes = min(max(rsize, PAGE_SIZE), count);
 
 		result = -ENOMEM;
-		rhdr = nfs_readhdr_alloc();
-		if (unlikely(!rhdr))
-			break;
-		data = nfs_readdata_alloc(&rhdr->header, nfs_page_array_len(pgbase, bytes));
-		if (!data) {
-			nfs_readhdr_free(&rhdr->header);
+		npages = nfs_page_array_len(pgbase, bytes);
+		if (!pagevec)
+			pagevec = kmalloc(npages * sizeof(struct page *),
+					  GFP_KERNEL);
+		if (!pagevec)
 			break;
-		}
-		data->header = &rhdr->header;
-		atomic_inc(&data->header->refcnt);
-		pages = &data->pages;
-
 		down_read(&current->mm->mmap_sem);
 		result = get_user_pages(current, current->mm, user_addr,
-					pages->npages, 1, 0, pages->pagevec, NULL);
+					npages, 1, 0, pagevec, NULL);
 		up_read(&current->mm->mmap_sem);
-		if (result < 0) {
-			nfs_direct_readhdr_release(rhdr);
+		if (result < 0)
 			break;
-		}
-		if ((unsigned)result < pages->npages) {
+		if ((unsigned)result < npages) {
 			bytes = result * PAGE_SIZE;
 			if (bytes <= pgbase) {
-				nfs_direct_release_pages(pages->pagevec, result);
-				nfs_direct_readhdr_release(rhdr);
+				nfs_direct_release_pages(pagevec, result);
 				break;
 			}
 			bytes -= pgbase;
-			pages->npages = result;
+			npages = result;
 		}
 
-		get_dreq(dreq);
-
-		rhdr->header.req = (struct nfs_page *) dreq;
-		rhdr->header.inode = inode;
-		rhdr->header.cred = msg.rpc_cred;
-		data->args.fh = NFS_FH(inode);
-		data->args.context = get_nfs_open_context(ctx);
-		data->args.lock_context = dreq->l_ctx;
-		data->args.offset = pos;
-		data->args.pgbase = pgbase;
-		data->args.pages = pages->pagevec;
-		data->args.count = bytes;
-		data->res.fattr = &data->fattr;
-		data->res.eof = 0;
-		data->res.count = bytes;
-		nfs_fattr_init(&data->fattr);
-		msg.rpc_argp = &data->args;
-		msg.rpc_resp = &data->res;
-
-		task_setup_data.task = &data->task;
-		task_setup_data.callback_data = data;
-		NFS_PROTO(inode)->read_setup(data, &msg);
-
-		task = rpc_run_task(&task_setup_data);
-		if (IS_ERR(task))
-			break;
-
-		dprintk("NFS: %5u initiated direct read call "
-			"(req %s/%Ld, %zu bytes @ offset %Lu)\n",
-				task->tk_pid,
-				inode->i_sb->s_id,
-				(long long)NFS_FILEID(inode),
-				bytes,
-				(unsigned long long)data->args.offset);
-		rpc_put_task(task);
-
-		started += bytes;
-		user_addr += bytes;
-		pos += bytes;
-		/* FIXME: Remove this unnecessary math from final patch */
-		pgbase += bytes;
-		pgbase &= ~PAGE_MASK;
-		BUG_ON(pgbase != (user_addr & ~PAGE_MASK));
-
-		count -= bytes;
+		for (i = 0; i < npages; i++) {
+			struct nfs_page *req;
+			unsigned int req_len = min(bytes, PAGE_SIZE - pgbase);
+			/* XXX do we need to do the eof zeroing found in async_filler? */
+			req = nfs_create_request(dreq->ctx, dreq->inode,
+						 pagevec[i],
+						 pgbase, req_len);
+			if (IS_ERR(req)) {
+				nfs_direct_release_pages(pagevec + i,
+							 npages - i);
+				result = PTR_ERR(req);
+				break;
+			}
+			req->wb_index = pos >> PAGE_SHIFT;
+			req->wb_offset = pos & ~PAGE_MASK;
+			if (!nfs_pageio_add_request(desc, req)) {
+				result = desc->pg_error;
+				nfs_release_request(req);
+				nfs_direct_release_pages(pagevec + i,
+							 npages - i);
+				break;
+			}
+			pgbase = 0;
+			bytes -= req_len;
+			started += req_len;
+			user_addr += req_len;
+			pos += req_len;
+			count -= req_len;
+		}
 	} while (count != 0);
 
+	kfree(pagevec);
+
 	if (started)
 		return started;
 	return result < 0 ? (ssize_t) result : -EFAULT;
@@ -407,15 +392,19 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
 					      unsigned long nr_segs,
 					      loff_t pos)
 {
+	struct nfs_pageio_descriptor desc;
 	ssize_t result = -EINVAL;
 	size_t requested_bytes = 0;
 	unsigned long seg;
 
+	nfs_pageio_init_read(&desc, dreq->inode,
+			     &nfs_direct_read_completion_ops);
 	get_dreq(dreq);
+	desc.pg_dreq = dreq;
 
 	for (seg = 0; seg < nr_segs; seg++) {
 		const struct iovec *vec = &iov[seg];
-		result = nfs_direct_read_schedule_segment(dreq, vec, pos);
+		result = nfs_direct_read_schedule_segment(&desc, vec, pos);
 		if (result < 0)
 			break;
 		requested_bytes += result;
@@ -424,6 +413,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
 		pos += vec->iov_len;
 	}
 
+	nfs_pageio_complete(&desc);
+
 	/*
 	 * If no bytes were started, return the error, and let the
 	 * generic layer handle the completion.
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 3ef8fcda1a5f..cd5d4a300bc9 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -304,8 +304,9 @@ struct nfs_pgio_completion_ops;
 /* read.c */
 extern struct nfs_read_header *nfs_readhdr_alloc(void);
 extern void nfs_readhdr_free(struct nfs_pgio_header *hdr);
-extern struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr,
-						unsigned int pagecount);
+extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
+			struct inode *inode,
+			const struct nfs_pgio_completion_ops *compl_ops);
 extern int nfs_initiate_read(struct rpc_clnt *clnt,
 			     struct nfs_read_data *data,
 			     const struct rpc_call_ops *call_ops);
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 638ca7f5a1e4..33a21ca9b84b 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -48,8 +48,11 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
 	hdr->cred = hdr->req->wb_context->cred;
 	hdr->io_start = req_offset(hdr->req);
 	hdr->good_bytes = desc->pg_count;
+	hdr->dreq = desc->pg_dreq;
 	hdr->release = release;
 	hdr->completion_ops = desc->pg_completion_ops;
+	if (hdr->completion_ops->init_hdr)
+		hdr->completion_ops->init_hdr(hdr);
 }
 
 void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos)
@@ -116,9 +119,6 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
 	req->wb_page    = page;
 	req->wb_index	= page->index;
 	page_cache_get(page);
-	BUG_ON(PagePrivate(page));
-	BUG_ON(!PageLocked(page));
-	BUG_ON(page->mapping->host != inode);
 	req->wb_offset  = offset;
 	req->wb_pgbase	= offset;
 	req->wb_bytes   = count;
@@ -257,6 +257,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
 	desc->pg_ioflags = io_flags;
 	desc->pg_error = 0;
 	desc->pg_lseg = NULL;
+	desc->pg_dreq = NULL;
 }
 
 /**
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 5e78af162039..35e2dcebffe6 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -51,8 +51,8 @@ struct nfs_read_header *nfs_readhdr_alloc()
 	return rhdr;
 }
 
-struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr,
-					 unsigned int pagecount)
+static struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr,
+						unsigned int pagecount)
 {
 	struct nfs_read_data *data, *prealloc;
 
@@ -123,9 +123,9 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
 }
 EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
 
-static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
-				struct inode *inode,
-				const struct nfs_pgio_completion_ops *compl_ops)
+void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
+			  struct inode *inode,
+			  const struct nfs_pgio_completion_ops *compl_ops)
 {
 	if (!pnfs_pageio_init_read(pgio, inode, compl_ops))
 		nfs_pageio_init_read_mds(pgio, inode, compl_ops);
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 0a5b63f16116..f9ee9eba7f88 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -68,6 +68,7 @@ struct nfs_pageio_descriptor {
 	const struct rpc_call_ops *pg_rpc_callops;
 	const struct nfs_pgio_completion_ops *pg_completion_ops;
 	struct pnfs_layout_segment *pg_lseg;
+	struct nfs_direct_req	*pg_dreq;
 };
 
 #define NFS_WBACK_BUSY(req)	(test_bit(PG_BUSY,&(req)->wb_flags))
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 6fa1d2278c9d..38687b87ca9b 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1208,6 +1208,7 @@ struct nfs_pgio_header {
 	const struct rpc_call_ops *mds_ops;
 	void (*release) (struct nfs_pgio_header *hdr);
 	const struct nfs_pgio_completion_ops *completion_ops;
+	struct nfs_direct_req	*dreq;
 	spinlock_t		lock;
 	/* fields protected by lock */
 	int			pnfs_error;
@@ -1221,8 +1222,6 @@ struct nfs_read_header {
 	struct nfs_read_data	rpc_data;
 };
 
-struct nfs_direct_req;
-
 struct nfs_write_data {
 	struct nfs_pgio_header	*header;
 	struct list_head	list;
@@ -1264,6 +1263,7 @@ struct nfs_commit_data {
 
 struct nfs_pgio_completion_ops {
 	void	(*error_cleanup)(struct list_head *head);
+	void	(*init_hdr)(struct nfs_pgio_header *hdr);
 	void	(*completion)(struct nfs_pgio_header *hdr);
 };
 
-- 
cgit v1.3-7-g2ca7


From ea2cf2282b4278461266013e9c002ee1c66700ff Mon Sep 17 00:00:00 2001
From: Fred Isaman <iisaman@netapp.com>
Date: Fri, 20 Apr 2012 14:47:53 -0400
Subject: NFS: create struct nfs_commit_info

It is COMMIT that is handled the most differently between
the paged and direct paths.  Create a structure that encapsulates
everything either path needs to know about the commit state.

We could use void to hide some of the layout driver stuff, but
Trond suggests pulling it out to ensure type checking, given the
huge changes being made, and the fact that it doesn't interfere
with other drivers.

Signed-off-by: Fred Isaman <iisaman@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c          |   6 +-
 fs/nfs/internal.h       |  12 +++-
 fs/nfs/nfs4filelayout.c | 119 ++++++++++++++++++++----------------
 fs/nfs/nfs4filelayout.h |  14 +----
 fs/nfs/pnfs.h           |  72 ++++++++++++++--------
 fs/nfs/write.c          | 158 ++++++++++++++++++++++++++++--------------------
 include/linux/nfs_fs.h  |   5 +-
 include/linux/nfs_xdr.h |  27 +++++++++
 8 files changed, 248 insertions(+), 165 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index e8bbfa5b3500..59a12c6a8df6 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1547,7 +1547,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi)
 	nfsi->delegation_state = 0;
 	init_rwsem(&nfsi->rwsem);
 	nfsi->layout = NULL;
-	atomic_set(&nfsi->commits_outstanding, 0);
+	atomic_set(&nfsi->commit_info.rpcs_out, 0);
 #endif
 }
 
@@ -1559,9 +1559,9 @@ static void init_once(void *foo)
 	INIT_LIST_HEAD(&nfsi->open_files);
 	INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
 	INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
-	INIT_LIST_HEAD(&nfsi->commit_list);
+	INIT_LIST_HEAD(&nfsi->commit_info.list);
 	nfsi->npages = 0;
-	nfsi->ncommit = 0;
+	nfsi->commit_info.ncommit = 0;
 	atomic_set(&nfsi->silly_count, 1);
 	INIT_HLIST_HEAD(&nfsi->silly_list);
 	init_waitqueue_head(&nfsi->waitqueue);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index cd5d4a300bc9..145e9e7dc8ce 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -346,12 +346,18 @@ extern void nfs_init_commit(struct nfs_commit_data *data,
 			    struct list_head *head,
 			    struct pnfs_layout_segment *lseg);
 void nfs_retry_commit(struct list_head *page_list,
-		      struct pnfs_layout_segment *lseg);
+		      struct pnfs_layout_segment *lseg,
+		      struct nfs_commit_info *cinfo);
 void nfs_commit_clear_lock(struct nfs_inode *nfsi);
 void nfs_commitdata_release(struct nfs_commit_data *data);
 void nfs_commit_release_pages(struct nfs_commit_data *data);
-void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head);
-void nfs_request_remove_commit_list(struct nfs_page *req);
+void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst,
+				 struct nfs_commit_info *cinfo);
+void nfs_request_remove_commit_list(struct nfs_page *req,
+				    struct nfs_commit_info *cinfo);
+void nfs_init_cinfo(struct nfs_commit_info *cinfo,
+		    struct inode *inode,
+		    struct nfs_direct_req *dreq);
 
 #ifdef CONFIG_MIGRATION
 extern int nfs_migrate_page(struct address_space *,
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index e40523f2fe26..fe2cb55ca6b1 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -347,9 +347,11 @@ static void filelayout_commit_count_stats(struct rpc_task *task, void *data)
 static void filelayout_commit_release(void *calldata)
 {
 	struct nfs_commit_data *data = calldata;
+	struct nfs_commit_info cinfo;
 
 	nfs_commit_release_pages(data);
-	if (atomic_dec_and_test(&NFS_I(data->inode)->commits_outstanding))
+	nfs_init_cinfo(&cinfo, data->inode, data->dreq);
+	if (atomic_dec_and_test(&cinfo.mds->rpcs_out))
 		nfs_commit_clear_lock(NFS_I(data->inode));
 	put_lseg(data->lseg);
 	nfs_commitdata_release(data);
@@ -695,17 +697,16 @@ filelayout_free_lseg(struct pnfs_layout_segment *lseg)
 
 static int
 filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg,
+			     struct nfs_commit_info *cinfo,
 			     gfp_t gfp_flags)
 {
 	struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
-	struct nfs4_filelayout *flo = FILELAYOUT_FROM_HDR(lseg->pls_layout);
-
-	struct nfs4_fl_commit_bucket *buckets;
+	struct pnfs_commit_bucket *buckets;
 	int size;
 
 	if (fl->commit_through_mds)
 		return 0;
-	if (flo->commit_info.nbuckets != 0) {
+	if (cinfo->ds->nbuckets != 0) {
 		/* This assumes there is only one IOMODE_RW lseg.  What
 		 * we really want to do is have a layout_hdr level
 		 * dictionary of <multipath_list4, fh> keys, each
@@ -718,25 +719,25 @@ filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg,
 	size = (fl->stripe_type == STRIPE_SPARSE) ?
 		fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
 
-	buckets = kcalloc(size, sizeof(struct nfs4_fl_commit_bucket),
+	buckets = kcalloc(size, sizeof(struct pnfs_commit_bucket),
 			  gfp_flags);
 	if (!buckets)
 		return -ENOMEM;
 	else {
 		int i;
 
-		spin_lock(&lseg->pls_layout->plh_inode->i_lock);
-		if (flo->commit_info.nbuckets != 0)
+		spin_lock(cinfo->lock);
+		if (cinfo->ds->nbuckets != 0)
 			kfree(buckets);
 		else {
-			flo->commit_info.buckets = buckets;
-			flo->commit_info.nbuckets = size;
+			cinfo->ds->buckets = buckets;
+			cinfo->ds->nbuckets = size;
 			for (i = 0; i < size; i++) {
 				INIT_LIST_HEAD(&buckets[i].written);
 				INIT_LIST_HEAD(&buckets[i].committing);
 			}
 		}
-		spin_unlock(&lseg->pls_layout->plh_inode->i_lock);
+		spin_unlock(cinfo->lock);
 		return 0;
 	}
 }
@@ -821,6 +822,7 @@ static void
 filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
 			 struct nfs_page *req)
 {
+	struct nfs_commit_info cinfo;
 	int status;
 
 	BUG_ON(pgio->pg_lseg != NULL);
@@ -836,7 +838,8 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
 	/* If no lseg, fall back to write through mds */
 	if (pgio->pg_lseg == NULL)
 		goto out_mds;
-	status = filelayout_alloc_commit_info(pgio->pg_lseg, GFP_NOFS);
+	nfs_init_cinfo(&cinfo, pgio->pg_inode, pgio->pg_dreq);
+	status = filelayout_alloc_commit_info(pgio->pg_lseg, &cinfo, GFP_NOFS);
 	if (status < 0) {
 		put_lseg(pgio->pg_lseg);
 		pgio->pg_lseg = NULL;
@@ -871,40 +874,42 @@ static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j)
  * If this will make the bucket empty, it will need to put the lseg reference.
  */
 static void
-filelayout_clear_request_commit(struct nfs_page *req)
+filelayout_clear_request_commit(struct nfs_page *req,
+				struct nfs_commit_info *cinfo)
 {
 	struct pnfs_layout_segment *freeme = NULL;
-	struct inode *inode = req->wb_context->dentry->d_inode;
 
-	spin_lock(&inode->i_lock);
+	spin_lock(cinfo->lock);
 	if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags))
 		goto out;
+	cinfo->ds->nwritten--;
 	if (list_is_singular(&req->wb_list)) {
-		struct nfs4_fl_commit_bucket *bucket;
+		struct pnfs_commit_bucket *bucket;
 
 		bucket = list_first_entry(&req->wb_list,
-					  struct nfs4_fl_commit_bucket,
+					  struct pnfs_commit_bucket,
 					  written);
 		freeme = bucket->wlseg;
 		bucket->wlseg = NULL;
 	}
 out:
-	nfs_request_remove_commit_list(req);
-	spin_unlock(&inode->i_lock);
+	nfs_request_remove_commit_list(req, cinfo);
+	spin_unlock(cinfo->lock);
 	put_lseg(freeme);
 }
 
 static struct list_head *
 filelayout_choose_commit_list(struct nfs_page *req,
-			      struct pnfs_layout_segment *lseg)
+			      struct pnfs_layout_segment *lseg,
+			      struct nfs_commit_info *cinfo)
 {
 	struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
 	u32 i, j;
 	struct list_head *list;
-	struct nfs4_fl_commit_bucket *buckets;
+	struct pnfs_commit_bucket *buckets;
 
 	if (fl->commit_through_mds)
-		return &NFS_I(req->wb_context->dentry->d_inode)->commit_list;
+		return &cinfo->mds->list;
 
 	/* Note that we are calling nfs4_fl_calc_j_index on each page
 	 * that ends up being committed to a data server.  An attractive
@@ -914,7 +919,7 @@ filelayout_choose_commit_list(struct nfs_page *req,
 	 */
 	j = nfs4_fl_calc_j_index(lseg, req_offset(req));
 	i = select_bucket_index(fl, j);
-	buckets = FILELAYOUT_FROM_HDR(lseg->pls_layout)->commit_info.buckets;
+	buckets = cinfo->ds->buckets;
 	list = &buckets[i].written;
 	if (list_empty(list)) {
 		/* Non-empty buckets hold a reference on the lseg.  That ref
@@ -926,17 +931,19 @@ filelayout_choose_commit_list(struct nfs_page *req,
 		buckets[i].wlseg = get_lseg(lseg);
 	}
 	set_bit(PG_COMMIT_TO_DS, &req->wb_flags);
+	cinfo->ds->nwritten++;
 	return list;
 }
 
 static void
 filelayout_mark_request_commit(struct nfs_page *req,
-		struct pnfs_layout_segment *lseg)
+			       struct pnfs_layout_segment *lseg,
+			       struct nfs_commit_info *cinfo)
 {
 	struct list_head *list;
 
-	list = filelayout_choose_commit_list(req, lseg);
-	nfs_request_add_commit_list(req, list);
+	list = filelayout_choose_commit_list(req, lseg, cinfo);
+	nfs_request_add_commit_list(req, list, cinfo);
 }
 
 static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i)
@@ -993,8 +1000,9 @@ static int filelayout_initiate_commit(struct nfs_commit_data *data, int how)
 }
 
 static int
-filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max,
-		spinlock_t *lock)
+filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket,
+			       struct nfs_commit_info *cinfo,
+			       int max)
 {
 	struct list_head *src = &bucket->written;
 	struct list_head *dst = &bucket->committing;
@@ -1004,9 +1012,9 @@ filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max,
 	list_for_each_entry_safe(req, tmp, src, wb_list) {
 		if (!nfs_lock_request(req))
 			continue;
-		if (cond_resched_lock(lock))
+		if (cond_resched_lock(cinfo->lock))
 			list_safe_reset_next(req, tmp, wb_list);
-		nfs_request_remove_commit_list(req);
+		nfs_request_remove_commit_list(req, cinfo);
 		clear_bit(PG_COMMIT_TO_DS, &req->wb_flags);
 		nfs_list_add_request(req, dst);
 		ret++;
@@ -1014,6 +1022,8 @@ filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max,
 			break;
 	}
 	if (ret) {
+		cinfo->ds->nwritten -= ret;
+		cinfo->ds->ncommitting += ret;
 		bucket->clseg = bucket->wlseg;
 		if (list_empty(src))
 			bucket->wlseg = NULL;
@@ -1024,37 +1034,32 @@ filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max,
 }
 
 /* Move reqs from written to committing lists, returning count of number moved.
- * Note called with i_lock held.
+ * Note called with cinfo->lock held.
  */
-static int filelayout_scan_commit_lists(struct inode *inode, int max,
-		spinlock_t *lock)
+static int filelayout_scan_commit_lists(struct nfs_commit_info *cinfo,
+					int max)
 {
-	struct nfs4_fl_commit_info *fl_cinfo;
 	int i, rv = 0, cnt;
 
-	fl_cinfo = &FILELAYOUT_FROM_HDR(NFS_I(inode)->layout)->commit_info;
-	if (fl_cinfo->nbuckets == 0)
-		goto out_done;
-	for (i = 0; i < fl_cinfo->nbuckets && max != 0; i++) {
-		cnt = filelayout_scan_ds_commit_list(&fl_cinfo->buckets[i],
-				max, lock);
+	for (i = 0; i < cinfo->ds->nbuckets && max != 0; i++) {
+		cnt = filelayout_scan_ds_commit_list(&cinfo->ds->buckets[i],
+						     cinfo, max);
 		max -= cnt;
 		rv += cnt;
 	}
-out_done:
 	return rv;
 }
 
 static unsigned int
-alloc_ds_commits(struct inode *inode, struct list_head *list)
+alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list)
 {
-	struct nfs4_fl_commit_info *fl_cinfo;
-	struct nfs4_fl_commit_bucket *bucket;
+	struct pnfs_ds_commit_info *fl_cinfo;
+	struct pnfs_commit_bucket *bucket;
 	struct nfs_commit_data *data;
 	int i, j;
 	unsigned int nreq = 0;
 
-	fl_cinfo = &FILELAYOUT_FROM_HDR(NFS_I(inode)->layout)->commit_info;
+	fl_cinfo = cinfo->ds;
 	bucket = fl_cinfo->buckets;
 	for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) {
 		if (list_empty(&bucket->committing))
@@ -1073,7 +1078,7 @@ alloc_ds_commits(struct inode *inode, struct list_head *list)
 	for (j = i; j < fl_cinfo->nbuckets; j++, bucket++) {
 		if (list_empty(&bucket->committing))
 			continue;
-		nfs_retry_commit(&bucket->committing, bucket->clseg);
+		nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo);
 		put_lseg(bucket->clseg);
 		bucket->clseg = NULL;
 	}
@@ -1084,7 +1089,7 @@ alloc_ds_commits(struct inode *inode, struct list_head *list)
 /* This follows nfs_commit_list pretty closely */
 static int
 filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
-			   int how)
+			   int how, struct nfs_commit_info *cinfo)
 {
 	struct nfs_commit_data *data, *tmp;
 	LIST_HEAD(list);
@@ -1097,17 +1102,17 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
 			list_add(&data->pages, &list);
 			nreq++;
 		} else
-			nfs_retry_commit(mds_pages, NULL);
+			nfs_retry_commit(mds_pages, NULL, cinfo);
 	}
 
-	nreq += alloc_ds_commits(inode, &list);
+	nreq += alloc_ds_commits(cinfo, &list);
 
 	if (nreq == 0) {
 		nfs_commit_clear_lock(NFS_I(inode));
 		goto out;
 	}
 
-	atomic_add(nreq, &NFS_I(inode)->commits_outstanding);
+	atomic_add(nreq, &cinfo->mds->rpcs_out);
 
 	list_for_each_entry_safe(data, tmp, &list, pages) {
 		list_del_init(&data->pages);
@@ -1116,14 +1121,15 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
 			nfs_initiate_commit(NFS_CLIENT(inode), data,
 					    data->mds_ops, how);
 		} else {
-			struct nfs4_fl_commit_info *fl_cinfo;
+			struct pnfs_commit_bucket *buckets;
 
-			fl_cinfo = &FILELAYOUT_FROM_HDR(data->lseg->pls_layout)->commit_info;
-			nfs_init_commit(data, &fl_cinfo->buckets[data->ds_commit_index].committing, data->lseg);
+			buckets = cinfo->ds->buckets;
+			nfs_init_commit(data, &buckets[data->ds_commit_index].committing, data->lseg);
 			filelayout_initiate_commit(data, how);
 		}
 	}
 out:
+	cinfo->ds->ncommitting = 0;
 	return PNFS_ATTEMPTED;
 }
 
@@ -1148,6 +1154,12 @@ filelayout_free_layout_hdr(struct pnfs_layout_hdr *lo)
 	kfree(FILELAYOUT_FROM_HDR(lo));
 }
 
+static struct pnfs_ds_commit_info *
+filelayout_get_ds_info(struct inode *inode)
+{
+	return &FILELAYOUT_FROM_HDR(NFS_I(inode)->layout)->commit_info;
+}
+
 static struct pnfs_layoutdriver_type filelayout_type = {
 	.id			= LAYOUT_NFSV4_1_FILES,
 	.name			= "LAYOUT_NFSV4_1_FILES",
@@ -1158,6 +1170,7 @@ static struct pnfs_layoutdriver_type filelayout_type = {
 	.free_lseg		= filelayout_free_lseg,
 	.pg_read_ops		= &filelayout_pg_read_ops,
 	.pg_write_ops		= &filelayout_pg_write_ops,
+	.get_ds_info		= &filelayout_get_ds_info,
 	.mark_request_commit	= filelayout_mark_request_commit,
 	.clear_request_commit	= filelayout_clear_request_commit,
 	.scan_commit_lists	= filelayout_scan_commit_lists,
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index 333a3ac97606..96b89bbddf4f 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -74,18 +74,6 @@ struct nfs4_file_layout_dsaddr {
 	struct nfs4_pnfs_ds		*ds_list[1];
 };
 
-struct nfs4_fl_commit_bucket {
-	struct list_head written;
-	struct list_head committing;
-	struct pnfs_layout_segment *wlseg;
-	struct pnfs_layout_segment *clseg;
-};
-
-struct nfs4_fl_commit_info {
-	int nbuckets;
-	struct nfs4_fl_commit_bucket *buckets;
-};
-
 struct nfs4_filelayout_segment {
 	struct pnfs_layout_segment generic_hdr;
 	u32 stripe_type;
@@ -100,7 +88,7 @@ struct nfs4_filelayout_segment {
 
 struct nfs4_filelayout {
 	struct pnfs_layout_hdr generic_hdr;
-	struct nfs4_fl_commit_info commit_info;
+	struct pnfs_ds_commit_info commit_info;
 };
 
 static inline struct nfs4_filelayout *
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 734e4eff7fb0..4cd8760c2f89 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -94,11 +94,18 @@ struct pnfs_layoutdriver_type {
 	const struct nfs_pageio_ops *pg_read_ops;
 	const struct nfs_pageio_ops *pg_write_ops;
 
+	struct pnfs_ds_commit_info *(*get_ds_info) (struct inode *inode);
 	void (*mark_request_commit) (struct nfs_page *req,
-					struct pnfs_layout_segment *lseg);
-	void (*clear_request_commit) (struct nfs_page *req);
-	int (*scan_commit_lists) (struct inode *inode, int max, spinlock_t *lock);
-	int (*commit_pagelist)(struct inode *inode, struct list_head *mds_pages, int how);
+				     struct pnfs_layout_segment *lseg,
+				     struct nfs_commit_info *cinfo);
+	void (*clear_request_commit) (struct nfs_page *req,
+				      struct nfs_commit_info *cinfo);
+	int (*scan_commit_lists) (struct nfs_commit_info *cinfo,
+				  int max);
+	int (*commit_pagelist)(struct inode *inode,
+			       struct list_head *mds_pages,
+			       int how,
+			       struct nfs_commit_info *cinfo);
 
 	/*
 	 * Return PNFS_ATTEMPTED to indicate the layout code has attempted
@@ -263,49 +270,57 @@ static inline int pnfs_enabled_sb(struct nfs_server *nfss)
 }
 
 static inline int
-pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how)
+pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how,
+		 struct nfs_commit_info *cinfo)
 {
-	if (!test_and_clear_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags))
+	if (cinfo->ds == NULL || cinfo->ds->ncommitting == 0)
 		return PNFS_NOT_ATTEMPTED;
-	return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how);
+	return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how, cinfo);
+}
+
+static inline struct pnfs_ds_commit_info *
+pnfs_get_ds_info(struct inode *inode)
+{
+	struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
+
+	if (ld == NULL || ld->get_ds_info == NULL)
+		return NULL;
+	return ld->get_ds_info(inode);
 }
 
 static inline bool
-pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
+pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
+			 struct nfs_commit_info *cinfo)
 {
 	struct inode *inode = req->wb_context->dentry->d_inode;
 	struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
 
 	if (lseg == NULL || ld->mark_request_commit == NULL)
 		return false;
-	ld->mark_request_commit(req, lseg);
+	ld->mark_request_commit(req, lseg, cinfo);
 	return true;
 }
 
 static inline bool
-pnfs_clear_request_commit(struct nfs_page *req)
+pnfs_clear_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo)
 {
 	struct inode *inode = req->wb_context->dentry->d_inode;
 	struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
 
 	if (ld == NULL || ld->clear_request_commit == NULL)
 		return false;
-	ld->clear_request_commit(req);
+	ld->clear_request_commit(req, cinfo);
 	return true;
 }
 
 static inline int
-pnfs_scan_commit_lists(struct inode *inode, int max, spinlock_t *lock)
+pnfs_scan_commit_lists(struct inode *inode, struct nfs_commit_info *cinfo,
+		       int max)
 {
-	struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
-	int ret;
-
-	if (ld == NULL || ld->scan_commit_lists == NULL)
+	if (cinfo->ds == NULL || cinfo->ds->nwritten == 0)
 		return 0;
-	ret = ld->scan_commit_lists(inode, max, lock);
-	if (ret != 0)
-		set_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags);
-	return ret;
+	else
+		return NFS_SERVER(inode)->pnfs_curr_ld->scan_commit_lists(cinfo, max);
 }
 
 /* Should the pNFS client commit and return the layout upon a setattr */
@@ -409,25 +424,34 @@ static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, st
 }
 
 static inline int
-pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how)
+pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how,
+		 struct nfs_commit_info *cinfo)
 {
 	return PNFS_NOT_ATTEMPTED;
 }
 
+static inline struct pnfs_ds_commit_info *
+pnfs_get_ds_info(struct inode *inode)
+{
+	return NULL;
+}
+
 static inline bool
-pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
+pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
+			 struct nfs_commit_info *cinfo)
 {
 	return false;
 }
 
 static inline bool
-pnfs_clear_request_commit(struct nfs_page *req)
+pnfs_clear_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo)
 {
 	return false;
 }
 
 static inline int
-pnfs_scan_commit_lists(struct inode *inode, int max, spinlock_t *lock)
+pnfs_scan_commit_lists(struct inode *inode, struct nfs_commit_info *cinfo,
+		       int max)
 {
 	return 0;
 }
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 2500f1cf1996..18bf70055272 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -452,65 +452,79 @@ nfs_mark_request_dirty(struct nfs_page *req)
 /**
  * nfs_request_add_commit_list - add request to a commit list
  * @req: pointer to a struct nfs_page
- * @head: commit list head
+ * @dst: commit list head
+ * @cinfo: holds list lock and accounting info
  *
- * This sets the PG_CLEAN bit, updates the inode global count of
+ * This sets the PG_CLEAN bit, updates the cinfo count of
  * number of outstanding requests requiring a commit as well as
  * the MM page stats.
  *
- * The caller must _not_ hold the inode->i_lock, but must be
+ * The caller must _not_ hold the cinfo->lock, but must be
  * holding the nfs_page lock.
  */
 void
-nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head)
+nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst,
+			    struct nfs_commit_info *cinfo)
 {
-	struct inode *inode = req->wb_context->dentry->d_inode;
-
 	set_bit(PG_CLEAN, &(req)->wb_flags);
-	spin_lock(&inode->i_lock);
-	nfs_list_add_request(req, head);
-	NFS_I(inode)->ncommit++;
-	spin_unlock(&inode->i_lock);
+	spin_lock(cinfo->lock);
+	nfs_list_add_request(req, dst);
+	cinfo->mds->ncommit++;
+	spin_unlock(cinfo->lock);
 	inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
 	inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE);
-	__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
+	__mark_inode_dirty(req->wb_context->dentry->d_inode, I_DIRTY_DATASYNC);
 }
 EXPORT_SYMBOL_GPL(nfs_request_add_commit_list);
 
 /**
  * nfs_request_remove_commit_list - Remove request from a commit list
  * @req: pointer to a nfs_page
+ * @cinfo: holds list lock and accounting info
  *
- * This clears the PG_CLEAN bit, and updates the inode global count of
+ * This clears the PG_CLEAN bit, and updates the cinfo's count of
  * number of outstanding requests requiring a commit
  * It does not update the MM page stats.
  *
- * The caller _must_ hold the inode->i_lock and the nfs_page lock.
+ * The caller _must_ hold the cinfo->lock and the nfs_page lock.
  */
 void
-nfs_request_remove_commit_list(struct nfs_page *req)
+nfs_request_remove_commit_list(struct nfs_page *req,
+			       struct nfs_commit_info *cinfo)
 {
-	struct inode *inode = req->wb_context->dentry->d_inode;
-
 	if (!test_and_clear_bit(PG_CLEAN, &(req)->wb_flags))
 		return;
 	nfs_list_remove_request(req);
-	NFS_I(inode)->ncommit--;
+	cinfo->mds->ncommit--;
 }
 EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list);
 
+static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
+				      struct inode *inode)
+{
+	cinfo->lock = &inode->i_lock;
+	cinfo->mds = &NFS_I(inode)->commit_info;
+	cinfo->ds = pnfs_get_ds_info(inode);
+}
+
+void nfs_init_cinfo(struct nfs_commit_info *cinfo,
+		    struct inode *inode,
+		    struct nfs_direct_req *dreq)
+{
+	nfs_init_cinfo_from_inode(cinfo, inode);
+}
+EXPORT_SYMBOL_GPL(nfs_init_cinfo);
 
 /*
  * Add a request to the inode's commit list.
  */
 static void
-nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
+nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
+			struct nfs_commit_info *cinfo)
 {
-	struct inode *inode = req->wb_context->dentry->d_inode;
-
-	if (pnfs_mark_request_commit(req, lseg))
+	if (pnfs_mark_request_commit(req, lseg, cinfo))
 		return;
-	nfs_request_add_commit_list(req, &NFS_I(inode)->commit_list);
+	nfs_request_add_commit_list(req, &cinfo->mds->list, cinfo);
 }
 
 static void
@@ -525,11 +539,13 @@ nfs_clear_request_commit(struct nfs_page *req)
 {
 	if (test_bit(PG_CLEAN, &req->wb_flags)) {
 		struct inode *inode = req->wb_context->dentry->d_inode;
+		struct nfs_commit_info cinfo;
 
-		if (!pnfs_clear_request_commit(req)) {
-			spin_lock(&inode->i_lock);
-			nfs_request_remove_commit_list(req);
-			spin_unlock(&inode->i_lock);
+		nfs_init_cinfo_from_inode(&cinfo, inode);
+		if (!pnfs_clear_request_commit(req, &cinfo)) {
+			spin_lock(cinfo.lock);
+			nfs_request_remove_commit_list(req, &cinfo);
+			spin_unlock(cinfo.lock);
 		}
 		nfs_clear_page_commit(req->wb_page);
 	}
@@ -545,7 +561,8 @@ int nfs_write_need_commit(struct nfs_write_data *data)
 
 #else
 static void
-nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
+nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
+			struct nfs_commit_info *cinfo)
 {
 }
 
@@ -564,10 +581,12 @@ int nfs_write_need_commit(struct nfs_write_data *data)
 
 static void nfs_write_completion(struct nfs_pgio_header *hdr)
 {
+	struct nfs_commit_info cinfo;
 	unsigned long bytes = 0;
 
 	if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
 		goto out;
+	nfs_init_cinfo_from_inode(&cinfo, hdr->inode);
 	while (!list_empty(&hdr->pages)) {
 		struct nfs_page *req = nfs_list_entry(hdr->pages.next);
 		struct page *page = req->wb_page;
@@ -585,7 +604,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
 			goto next;
 		}
 		if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
-			nfs_mark_request_commit(req, hdr->lseg);
+			nfs_mark_request_commit(req, hdr->lseg, &cinfo);
 			goto next;
 		}
 remove_req:
@@ -599,16 +618,16 @@ out:
 }
 
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-static int
-nfs_need_commit(struct nfs_inode *nfsi)
+static unsigned long
+nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
 {
-	return nfsi->ncommit > 0;
+	return cinfo->mds->ncommit;
 }
 
-/* i_lock held by caller */
+/* cinfo->lock held by caller */
 static int
-nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max,
-		spinlock_t *lock)
+nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
+		     struct nfs_commit_info *cinfo, int max)
 {
 	struct nfs_page *req, *tmp;
 	int ret = 0;
@@ -616,9 +635,9 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max,
 	list_for_each_entry_safe(req, tmp, src, wb_list) {
 		if (!nfs_lock_request(req))
 			continue;
-		if (cond_resched_lock(lock))
+		if (cond_resched_lock(cinfo->lock))
 			list_safe_reset_next(req, tmp, wb_list);
-		nfs_request_remove_commit_list(req);
+		nfs_request_remove_commit_list(req, cinfo);
 		nfs_list_add_request(req, dst);
 		ret++;
 		if (ret == max)
@@ -630,37 +649,38 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max,
 /*
  * nfs_scan_commit - Scan an inode for commit requests
  * @inode: NFS inode to scan
- * @dst: destination list
+ * @dst: mds destination list
+ * @cinfo: mds and ds lists of reqs ready to commit
  *
  * Moves requests from the inode's 'commit' request list.
  * The requests are *not* checked to ensure that they form a contiguous set.
  */
 static int
-nfs_scan_commit(struct inode *inode, struct list_head *dst)
+nfs_scan_commit(struct inode *inode, struct list_head *dst,
+		struct nfs_commit_info *cinfo)
 {
-	struct nfs_inode *nfsi = NFS_I(inode);
 	int ret = 0;
 
-	spin_lock(&inode->i_lock);
-	if (nfsi->ncommit > 0) {
+	spin_lock(cinfo->lock);
+	if (cinfo->mds->ncommit > 0) {
 		const int max = INT_MAX;
 
-		ret = nfs_scan_commit_list(&nfsi->commit_list, dst, max,
-				&inode->i_lock);
-		ret += pnfs_scan_commit_lists(inode, max - ret,
-				&inode->i_lock);
+		ret = nfs_scan_commit_list(&cinfo->mds->list, dst,
+					   cinfo, max);
+		ret += pnfs_scan_commit_lists(inode, cinfo, max - ret);
 	}
-	spin_unlock(&inode->i_lock);
+	spin_unlock(cinfo->lock);
 	return ret;
 }
 
 #else
-static inline int nfs_need_commit(struct nfs_inode *nfsi)
+static unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
 {
 	return 0;
 }
 
-static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst)
+static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst,
+				  struct nfs_commit_info *cinfo)
 {
 	return 0;
 }
@@ -929,7 +949,7 @@ EXPORT_SYMBOL_GPL(nfs_initiate_write);
  */
 static void nfs_write_rpcsetup(struct nfs_write_data *data,
 		unsigned int count, unsigned int offset,
-		int how)
+		int how, struct nfs_commit_info *cinfo)
 {
 	struct nfs_page *req = data->header->req;
 
@@ -950,7 +970,7 @@ static void nfs_write_rpcsetup(struct nfs_write_data *data,
 	case 0:
 		break;
 	case FLUSH_COND_STABLE:
-		if (nfs_need_commit(NFS_I(data->header->inode)))
+		if (nfs_reqs_to_commit(cinfo))
 			break;
 	default:
 		data->args.stable = NFS_FILE_SYNC;
@@ -1034,12 +1054,14 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc,
 	unsigned int offset;
 	int requests = 0;
 	int ret = 0;
+	struct nfs_commit_info cinfo;
 
+	nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
 	nfs_list_remove_request(req);
 	nfs_list_add_request(req, &hdr->pages);
 
 	if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
-	    (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit ||
+	    (desc->pg_moreio || nfs_reqs_to_commit(&cinfo) ||
 	     desc->pg_count > wsize))
 		desc->pg_ioflags &= ~FLUSH_COND_STABLE;
 
@@ -1053,7 +1075,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc,
 		if (!data)
 			goto out_bad;
 		data->pages.pagevec[0] = page;
-		nfs_write_rpcsetup(data, len, offset, desc->pg_ioflags);
+		nfs_write_rpcsetup(data, len, offset, desc->pg_ioflags, &cinfo);
 		list_add(&data->list, &hdr->rpc_list);
 		requests++;
 		nbytes -= len;
@@ -1088,6 +1110,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc,
 	struct nfs_write_data	*data;
 	struct list_head *head = &desc->pg_list;
 	int ret = 0;
+	struct nfs_commit_info cinfo;
 
 	data = nfs_writedata_alloc(hdr, nfs_page_array_len(desc->pg_base,
 							   desc->pg_count));
@@ -1097,6 +1120,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc,
 		goto out;
 	}
 
+	nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
 	pages = data->pages.pagevec;
 	while (!list_empty(head)) {
 		req = nfs_list_entry(head->next);
@@ -1106,11 +1130,11 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc,
 	}
 
 	if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
-	    (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit))
+	    (desc->pg_moreio || nfs_reqs_to_commit(&cinfo)))
 		desc->pg_ioflags &= ~FLUSH_COND_STABLE;
 
 	/* Set up the argument struct */
-	nfs_write_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags);
+	nfs_write_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo);
 	list_add(&data->list, &hdr->rpc_list);
 	desc->pg_rpc_callops = &nfs_write_common_ops;
 out:
@@ -1417,14 +1441,15 @@ void nfs_init_commit(struct nfs_commit_data *data,
 EXPORT_SYMBOL_GPL(nfs_init_commit);
 
 void nfs_retry_commit(struct list_head *page_list,
-		      struct pnfs_layout_segment *lseg)
+		      struct pnfs_layout_segment *lseg,
+		      struct nfs_commit_info *cinfo)
 {
 	struct nfs_page *req;
 
 	while (!list_empty(page_list)) {
 		req = nfs_list_entry(page_list->next);
 		nfs_list_remove_request(req);
-		nfs_mark_request_commit(req, lseg);
+		nfs_mark_request_commit(req, lseg, cinfo);
 		dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
 		dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
 			     BDI_RECLAIMABLE);
@@ -1437,7 +1462,8 @@ EXPORT_SYMBOL_GPL(nfs_retry_commit);
  * Commit dirty pages
  */
 static int
-nfs_commit_list(struct inode *inode, struct list_head *head, int how)
+nfs_commit_list(struct inode *inode, struct list_head *head, int how,
+		struct nfs_commit_info *cinfo)
 {
 	struct nfs_commit_data	*data;
 
@@ -1450,7 +1476,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
 	nfs_init_commit(data, head, NULL);
 	return nfs_initiate_commit(NFS_CLIENT(inode), data, data->mds_ops, how);
  out_bad:
-	nfs_retry_commit(head, NULL);
+	nfs_retry_commit(head, NULL, cinfo);
 	nfs_commit_clear_lock(NFS_I(inode));
 	return -ENOMEM;
 }
@@ -1524,30 +1550,32 @@ static const struct rpc_call_ops nfs_commit_ops = {
 };
 
 static int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
-				   int how)
+				   int how, struct nfs_commit_info *cinfo)
 {
 	int status;
 
-	status = pnfs_commit_list(inode, head, how);
+	status = pnfs_commit_list(inode, head, how, cinfo);
 	if (status == PNFS_NOT_ATTEMPTED)
-		status = nfs_commit_list(inode, head, how);
+		status = nfs_commit_list(inode, head, how, cinfo);
 	return status;
 }
 
 int nfs_commit_inode(struct inode *inode, int how)
 {
 	LIST_HEAD(head);
+	struct nfs_commit_info cinfo;
 	int may_wait = how & FLUSH_SYNC;
 	int res;
 
 	res = nfs_commit_set_lock(NFS_I(inode), may_wait);
 	if (res <= 0)
 		goto out_mark_dirty;
-	res = nfs_scan_commit(inode, &head);
+	nfs_init_cinfo_from_inode(&cinfo, inode);
+	res = nfs_scan_commit(inode, &head, &cinfo);
 	if (res) {
 		int error;
 
-		error = nfs_generic_commit_list(inode, &head, how);
+		error = nfs_generic_commit_list(inode, &head, how, &cinfo);
 		if (error < 0)
 			return error;
 		if (!may_wait)
@@ -1578,14 +1606,14 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr
 	int ret = 0;
 
 	/* no commits means nothing needs to be done */
-	if (!nfsi->ncommit)
+	if (!nfsi->commit_info.ncommit)
 		return ret;
 
 	if (wbc->sync_mode == WB_SYNC_NONE) {
 		/* Don't commit yet if this is a non-blocking flush and there
 		 * are a lot of outstanding writes for this mapping.
 		 */
-		if (nfsi->ncommit <= (nfsi->npages >> 1))
+		if (nfsi->commit_info.ncommit <= (nfsi->npages >> 1))
 			goto out_mark_dirty;
 
 		/* don't wait for the COMMIT response */
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 8d3a2b804201..8a88c16662c5 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -179,8 +179,7 @@ struct nfs_inode {
 	__be32			cookieverf[2];
 
 	unsigned long		npages;
-	unsigned long		ncommit;
-	struct list_head	commit_list;
+	struct nfs_mds_commit_info commit_info;
 
 	/* Open contexts for shared mmap writes */
 	struct list_head	open_files;
@@ -201,7 +200,6 @@ struct nfs_inode {
 
 	/* pNFS layout information */
 	struct pnfs_layout_hdr *layout;
-	atomic_t		commits_outstanding;
 #endif /* CONFIG_NFS_V4*/
 #ifdef CONFIG_NFS_FSCACHE
 	struct fscache_cookie	*fscache;
@@ -230,7 +228,6 @@ struct nfs_inode {
 #define NFS_INO_FSCACHE		(5)		/* inode can be cached by FS-Cache */
 #define NFS_INO_FSCACHE_LOCK	(6)		/* FS-Cache cookie management lock */
 #define NFS_INO_COMMIT		(7)		/* inode is committing unstable writes */
-#define NFS_INO_PNFS_COMMIT	(8)		/* use pnfs code for commit */
 #define NFS_INO_LAYOUTCOMMIT	(9)		/* layoutcommit required */
 #define NFS_INO_LAYOUTCOMMITTING (10)		/* layoutcommit inflight */
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 38687b87ca9b..224e1e82670c 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1079,6 +1079,21 @@ struct nfstime4 {
 };
 
 #ifdef CONFIG_NFS_V4_1
+
+struct pnfs_commit_bucket {
+	struct list_head written;
+	struct list_head committing;
+	struct pnfs_layout_segment *wlseg;
+	struct pnfs_layout_segment *clseg;
+};
+
+struct pnfs_ds_commit_info {
+	int nwritten;
+	int ncommitting;
+	int nbuckets;
+	struct pnfs_commit_bucket *buckets;
+};
+
 #define NFS4_EXCHANGE_ID_LEN	(48)
 struct nfs41_exchange_id_args {
 	struct nfs_client		*client;
@@ -1242,6 +1257,18 @@ struct nfs_write_header {
 	struct nfs_write_data	rpc_data;
 };
 
+struct nfs_mds_commit_info {
+	atomic_t rpcs_out;
+	unsigned long		ncommit;
+	struct list_head	list;
+};
+
+struct nfs_commit_info {
+	spinlock_t			*lock;
+	struct nfs_mds_commit_info	*mds;
+	struct pnfs_ds_commit_info	*ds;
+};
+
 struct nfs_commit_data {
 	struct rpc_task		task;
 	struct inode		*inode;
-- 
cgit v1.3-7-g2ca7


From f453a54a01c7c0453ad9550906e3d2663dd486ac Mon Sep 17 00:00:00 2001
From: Fred Isaman <iisaman@netapp.com>
Date: Fri, 20 Apr 2012 14:47:54 -0400
Subject: NFS: create nfs_commit_completion_ops

Factors out the code that needs to change when directio
starts using these code paths.

Signed-off-by: Fred Isaman <iisaman@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/internal.h       |  5 ++---
 fs/nfs/nfs4filelayout.c | 12 ++++--------
 fs/nfs/write.c          | 31 +++++++++++++++++++++----------
 include/linux/nfs_xdr.h |  9 +++++++++
 4 files changed, 36 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 145e9e7dc8ce..137f5cd71433 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -344,13 +344,12 @@ extern int nfs_initiate_commit(struct rpc_clnt *clnt,
 			       int how);
 extern void nfs_init_commit(struct nfs_commit_data *data,
 			    struct list_head *head,
-			    struct pnfs_layout_segment *lseg);
+			    struct pnfs_layout_segment *lseg,
+			    struct nfs_commit_info *cinfo);
 void nfs_retry_commit(struct list_head *page_list,
 		      struct pnfs_layout_segment *lseg,
 		      struct nfs_commit_info *cinfo);
-void nfs_commit_clear_lock(struct nfs_inode *nfsi);
 void nfs_commitdata_release(struct nfs_commit_data *data);
-void nfs_commit_release_pages(struct nfs_commit_data *data);
 void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst,
 				 struct nfs_commit_info *cinfo);
 void nfs_request_remove_commit_list(struct nfs_page *req,
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index fe2cb55ca6b1..26d1da486761 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -347,12 +347,8 @@ static void filelayout_commit_count_stats(struct rpc_task *task, void *data)
 static void filelayout_commit_release(void *calldata)
 {
 	struct nfs_commit_data *data = calldata;
-	struct nfs_commit_info cinfo;
 
-	nfs_commit_release_pages(data);
-	nfs_init_cinfo(&cinfo, data->inode, data->dreq);
-	if (atomic_dec_and_test(&cinfo.mds->rpcs_out))
-		nfs_commit_clear_lock(NFS_I(data->inode));
+	data->completion_ops->completion(data);
 	put_lseg(data->lseg);
 	nfs_commitdata_release(data);
 }
@@ -1108,7 +1104,7 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
 	nreq += alloc_ds_commits(cinfo, &list);
 
 	if (nreq == 0) {
-		nfs_commit_clear_lock(NFS_I(inode));
+		cinfo->completion_ops->error_cleanup(NFS_I(inode));
 		goto out;
 	}
 
@@ -1117,14 +1113,14 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
 	list_for_each_entry_safe(data, tmp, &list, pages) {
 		list_del_init(&data->pages);
 		if (!data->lseg) {
-			nfs_init_commit(data, mds_pages, NULL);
+			nfs_init_commit(data, mds_pages, NULL, cinfo);
 			nfs_initiate_commit(NFS_CLIENT(inode), data,
 					    data->mds_ops, how);
 		} else {
 			struct pnfs_commit_bucket *buckets;
 
 			buckets = cinfo->ds->buckets;
-			nfs_init_commit(data, &buckets[data->ds_commit_index].committing, data->lseg);
+			nfs_init_commit(data, &buckets[data->ds_commit_index].committing, data->lseg, cinfo);
 			filelayout_initiate_commit(data, how);
 		}
 	}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 18bf70055272..333d01d26292 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -46,6 +46,7 @@ static void nfs_redirty_request(struct nfs_page *req);
 static const struct rpc_call_ops nfs_write_common_ops;
 static const struct rpc_call_ops nfs_commit_ops;
 static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
+static const struct nfs_commit_completion_ops nfs_commit_completion_ops;
 
 static struct kmem_cache *nfs_wdata_cachep;
 static mempool_t *nfs_wdata_mempool;
@@ -505,6 +506,7 @@ static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
 	cinfo->lock = &inode->i_lock;
 	cinfo->mds = &NFS_I(inode)->commit_info;
 	cinfo->ds = pnfs_get_ds_info(inode);
+	cinfo->completion_ops = &nfs_commit_completion_ops;
 }
 
 void nfs_init_cinfo(struct nfs_commit_info *cinfo,
@@ -1358,13 +1360,12 @@ static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait)
 	return (ret < 0) ? ret : 1;
 }
 
-void nfs_commit_clear_lock(struct nfs_inode *nfsi)
+static void nfs_commit_clear_lock(struct nfs_inode *nfsi)
 {
 	clear_bit(NFS_INO_COMMIT, &nfsi->flags);
 	smp_mb__after_clear_bit();
 	wake_up_bit(&nfsi->flags, NFS_INO_COMMIT);
 }
-EXPORT_SYMBOL_GPL(nfs_commit_clear_lock);
 
 void nfs_commitdata_release(struct nfs_commit_data *data)
 {
@@ -1413,8 +1414,9 @@ EXPORT_SYMBOL_GPL(nfs_initiate_commit);
  * Set up the argument/result storage required for the RPC call.
  */
 void nfs_init_commit(struct nfs_commit_data *data,
-			    struct list_head *head,
-			    struct pnfs_layout_segment *lseg)
+		     struct list_head *head,
+		     struct pnfs_layout_segment *lseg,
+		     struct nfs_commit_info *cinfo)
 {
 	struct nfs_page *first = nfs_list_entry(head->next);
 	struct inode *inode = first->wb_context->dentry->d_inode;
@@ -1428,6 +1430,7 @@ void nfs_init_commit(struct nfs_commit_data *data,
 	data->cred	  = first->wb_context->cred;
 	data->lseg	  = lseg; /* reference transferred */
 	data->mds_ops     = &nfs_commit_ops;
+	data->completion_ops = cinfo->completion_ops;
 
 	data->args.fh     = NFS_FH(data->inode);
 	/* Note: we always request a commit of the entire inode */
@@ -1473,11 +1476,12 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how,
 		goto out_bad;
 
 	/* Set up the argument struct */
-	nfs_init_commit(data, head, NULL);
+	nfs_init_commit(data, head, NULL, cinfo);
+	atomic_inc(&cinfo->mds->rpcs_out);
 	return nfs_initiate_commit(NFS_CLIENT(inode), data, data->mds_ops, how);
  out_bad:
 	nfs_retry_commit(head, NULL, cinfo);
-	nfs_commit_clear_lock(NFS_I(inode));
+	cinfo->completion_ops->error_cleanup(NFS_I(inode));
 	return -ENOMEM;
 }
 
@@ -1495,10 +1499,11 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
 	NFS_PROTO(data->inode)->commit_done(task, data);
 }
 
-void nfs_commit_release_pages(struct nfs_commit_data *data)
+static void nfs_commit_release_pages(struct nfs_commit_data *data)
 {
 	struct nfs_page	*req;
 	int status = data->task.tk_status;
+	struct nfs_commit_info cinfo;
 
 	while (!list_empty(&data->pages)) {
 		req = nfs_list_entry(data->pages.next);
@@ -1531,15 +1536,16 @@ void nfs_commit_release_pages(struct nfs_commit_data *data)
 	next:
 		nfs_unlock_request(req);
 	}
+	nfs_init_cinfo(&cinfo, data->inode, data->dreq);
+	if (atomic_dec_and_test(&cinfo.mds->rpcs_out))
+		nfs_commit_clear_lock(NFS_I(data->inode));
 }
-EXPORT_SYMBOL_GPL(nfs_commit_release_pages);
 
 static void nfs_commit_release(void *calldata)
 {
 	struct nfs_commit_data *data = calldata;
 
-	nfs_commit_release_pages(data);
-	nfs_commit_clear_lock(NFS_I(data->inode));
+	data->completion_ops->completion(data);
 	nfs_commitdata_release(calldata);
 }
 
@@ -1549,6 +1555,11 @@ static const struct rpc_call_ops nfs_commit_ops = {
 	.rpc_release = nfs_commit_release,
 };
 
+static const struct nfs_commit_completion_ops nfs_commit_completion_ops = {
+	.completion = nfs_commit_release_pages,
+	.error_cleanup = nfs_commit_clear_lock,
+};
+
 static int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
 				   int how, struct nfs_commit_info *cinfo)
 {
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 224e1e82670c..0e8b88ad9ae2 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1263,10 +1263,18 @@ struct nfs_mds_commit_info {
 	struct list_head	list;
 };
 
+struct nfs_commit_data;
+struct nfs_inode;
+struct nfs_commit_completion_ops {
+	void (*error_cleanup) (struct nfs_inode *nfsi);
+	void (*completion) (struct nfs_commit_data *data);
+};
+
 struct nfs_commit_info {
 	spinlock_t			*lock;
 	struct nfs_mds_commit_info	*mds;
 	struct pnfs_ds_commit_info	*ds;
+	const struct nfs_commit_completion_ops *completion_ops;
 };
 
 struct nfs_commit_data {
@@ -1285,6 +1293,7 @@ struct nfs_commit_data {
 	struct nfs_client	*ds_clp;	/* pNFS data server */
 	int			ds_commit_index;
 	const struct rpc_call_ops *mds_ops;
+	const struct nfs_commit_completion_ops *completion_ops;
 	int (*commit_done_cb) (struct rpc_task *task, struct nfs_commit_data *data);
 };
 
-- 
cgit v1.3-7-g2ca7


From b359f9d09bcbaede09243cfe844172ba055d89fd Mon Sep 17 00:00:00 2001
From: Fred Isaman <iisaman@netapp.com>
Date: Fri, 20 Apr 2012 14:47:55 -0400
Subject: NFS: add dreq to nfs_commit_info

Need this to pass into nfs_commitdata_init, in order to keep data->dreq
accurate.

Signed-off-by: Fred Isaman <iisaman@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/write.c          | 2 ++
 include/linux/nfs_xdr.h | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 333d01d26292..44a93d8c7b52 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -506,6 +506,7 @@ static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
 	cinfo->lock = &inode->i_lock;
 	cinfo->mds = &NFS_I(inode)->commit_info;
 	cinfo->ds = pnfs_get_ds_info(inode);
+	cinfo->dreq = NULL;
 	cinfo->completion_ops = &nfs_commit_completion_ops;
 }
 
@@ -1431,6 +1432,7 @@ void nfs_init_commit(struct nfs_commit_data *data,
 	data->lseg	  = lseg; /* reference transferred */
 	data->mds_ops     = &nfs_commit_ops;
 	data->completion_ops = cinfo->completion_ops;
+	data->dreq	  = cinfo->dreq;
 
 	data->args.fh     = NFS_FH(data->inode);
 	/* Note: we always request a commit of the entire inode */
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 0e8b88ad9ae2..5f563bd113e8 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1274,6 +1274,7 @@ struct nfs_commit_info {
 	spinlock_t			*lock;
 	struct nfs_mds_commit_info	*mds;
 	struct pnfs_ds_commit_info	*ds;
+	struct nfs_direct_req		*dreq;	/* O_DIRECT request */
 	const struct nfs_commit_completion_ops *completion_ops;
 };
 
-- 
cgit v1.3-7-g2ca7


From 2671bfc3beb44e70636bd0208274426db57f73b5 Mon Sep 17 00:00:00 2001
From: Bryan Schumaker <bjschuma@netapp.com>
Date: Fri, 27 Apr 2012 13:27:44 -0400
Subject: NFS: Remove secinfo knowledge out of the generic client

And also remove the unneeded rpc_op.

Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/internal.h       |  3 ---
 fs/nfs/namespace.c      | 29 -----------------------------
 fs/nfs/nfs4_fs.h        |  1 +
 fs/nfs/nfs4namespace.c  | 29 +++++++++++++++++++++++++++++
 fs/nfs/nfs4proc.c       |  1 -
 include/linux/nfs_xdr.h |  1 -
 6 files changed, 30 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index d68810f61869..d6994443f285 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -286,9 +286,6 @@ extern void nfs_sb_deactive(struct super_block *sb);
 extern char *nfs_path(char **p, struct dentry *dentry,
 		      char *buffer, ssize_t buflen);
 extern struct vfsmount *nfs_d_automount(struct path *path);
-#ifdef CONFIG_NFS_V4
-rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *);
-#endif
 
 /* getroot.c */
 extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *,
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index d51868e5683c..2a9591b0b150 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -119,35 +119,6 @@ Elong:
 }
 
 #ifdef CONFIG_NFS_V4
-rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors)
-{
-	struct gss_api_mech *mech;
-	struct xdr_netobj oid;
-	int i;
-	rpc_authflavor_t pseudoflavor = RPC_AUTH_UNIX;
-
-	for (i = 0; i < flavors->num_flavors; i++) {
-		struct nfs4_secinfo_flavor *flavor;
-		flavor = &flavors->flavors[i];
-
-		if (flavor->flavor == RPC_AUTH_NULL || flavor->flavor == RPC_AUTH_UNIX) {
-			pseudoflavor = flavor->flavor;
-			break;
-		} else if (flavor->flavor == RPC_AUTH_GSS) {
-			oid.len  = flavor->gss.sec_oid4.len;
-			oid.data = flavor->gss.sec_oid4.data;
-			mech = gss_mech_get_by_OID(&oid);
-			if (!mech)
-				continue;
-			pseudoflavor = gss_svc_to_pseudoflavor(mech, flavor->gss.service);
-			gss_mech_put(mech);
-			break;
-		}
-	}
-
-	return pseudoflavor;
-}
-
 static struct rpc_clnt *nfs_lookup_mountpoint(struct inode *dir,
 					      struct qstr *name,
 					      struct nfs_fh *fh,
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 8d75021020b3..53a487ee9867 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -206,6 +206,7 @@ extern const struct dentry_operations nfs4_dentry_operations;
 extern const struct inode_operations nfs4_dir_inode_operations;
 
 /* nfs4namespace.c */
+rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *);
 struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *);
 
 /* nfs4proc.c */
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index a7f3dedc4ec7..a69ee3952bbe 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -132,6 +132,35 @@ static size_t nfs_parse_server_name(char *string, size_t len,
 	return ret;
 }
 
+rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors)
+{
+	struct gss_api_mech *mech;
+	struct xdr_netobj oid;
+	int i;
+	rpc_authflavor_t pseudoflavor = RPC_AUTH_UNIX;
+
+	for (i = 0; i < flavors->num_flavors; i++) {
+		struct nfs4_secinfo_flavor *flavor;
+		flavor = &flavors->flavors[i];
+
+		if (flavor->flavor == RPC_AUTH_NULL || flavor->flavor == RPC_AUTH_UNIX) {
+			pseudoflavor = flavor->flavor;
+			break;
+		} else if (flavor->flavor == RPC_AUTH_GSS) {
+			oid.len  = flavor->gss.sec_oid4.len;
+			oid.data = flavor->gss.sec_oid4.data;
+			mech = gss_mech_get_by_OID(&oid);
+			if (!mech)
+				continue;
+			pseudoflavor = gss_svc_to_pseudoflavor(mech, flavor->gss.service);
+			gss_mech_put(mech);
+			break;
+		}
+	}
+
+	return pseudoflavor;
+}
+
 static rpc_authflavor_t nfs4_negotiate_security(struct inode *inode, struct qstr *name)
 {
 	struct page *page;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 87af80d28a82..fa661b91e57c 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -6610,7 +6610,6 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
 	.close_context  = nfs4_close_context,
 	.open_context	= nfs4_atomic_open,
 	.init_client	= nfs4_init_client,
-	.secinfo	= nfs4_proc_secinfo,
 };
 
 static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = {
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 5f563bd113e8..eb1f143042f4 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1397,7 +1397,6 @@ struct nfs_rpc_ops {
 				struct iattr *iattr);
 	int	(*init_client) (struct nfs_client *, const struct rpc_timeout *,
 				const char *, rpc_authflavor_t, int);
-	int	(*secinfo)(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *);
 };
 
 /*
-- 
cgit v1.3-7-g2ca7


From 281cad46b34db4dbb1d1e603f7b9cfe25d1ae7c9 Mon Sep 17 00:00:00 2001
From: Bryan Schumaker <bjschuma@netapp.com>
Date: Fri, 27 Apr 2012 13:27:45 -0400
Subject: NFS: Create a submount rpc_op

This simplifies the code for v2 and v3 and gives v4 a chance to decide
on referrals without needing to modify the generic client.

Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/internal.h       | 15 +++-------
 fs/nfs/namespace.c      | 75 +++++++++++++------------------------------------
 fs/nfs/nfs3proc.c       |  1 +
 fs/nfs/nfs4_fs.h        |  2 ++
 fs/nfs/nfs4namespace.c  | 24 +++++++++++++++-
 fs/nfs/nfs4proc.c       |  1 +
 fs/nfs/proc.c           |  1 +
 include/linux/nfs_xdr.h |  2 ++
 8 files changed, 54 insertions(+), 67 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index d6994443f285..0fd1efaf1cff 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -185,17 +185,6 @@ static inline void nfs_fs_proc_exit(void)
 }
 #endif
 
-/* nfs4namespace.c */
-#ifdef CONFIG_NFS_V4
-extern struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry);
-#else
-static inline
-struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry)
-{
-	return ERR_PTR(-ENOENT);
-}
-#endif
-
 /* callback_xdr.c */
 extern struct svc_version nfs4_callback_version1;
 extern struct svc_version nfs4_callback_version4;
@@ -286,6 +275,10 @@ extern void nfs_sb_deactive(struct super_block *sb);
 extern char *nfs_path(char **p, struct dentry *dentry,
 		      char *buffer, ssize_t buflen);
 extern struct vfsmount *nfs_d_automount(struct path *path);
+struct vfsmount *nfs_submount(struct nfs_server *, struct dentry *,
+			      struct nfs_fh *, struct nfs_fattr *);
+struct vfsmount *nfs_do_submount(struct dentry *, struct nfs_fh *,
+				 struct nfs_fattr *, rpc_authflavor_t);
 
 /* getroot.c */
 extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *,
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 2a9591b0b150..e36fd8a51819 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -26,11 +26,6 @@ static LIST_HEAD(nfs_automount_list);
 static DECLARE_DELAYED_WORK(nfs_automount_task, nfs_expire_automounts);
 int nfs_mountpoint_expiry_timeout = 500 * HZ;
 
-static struct vfsmount *nfs_do_submount(struct dentry *dentry,
-					struct nfs_fh *fh,
-					struct nfs_fattr *fattr,
-					rpc_authflavor_t authflavor);
-
 /*
  * nfs_path - reconstruct the path given an arbitrary dentry
  * @base - used to return pointer to the end of devname part of path
@@ -118,35 +113,6 @@ Elong:
 	return ERR_PTR(-ENAMETOOLONG);
 }
 
-#ifdef CONFIG_NFS_V4
-static struct rpc_clnt *nfs_lookup_mountpoint(struct inode *dir,
-					      struct qstr *name,
-					      struct nfs_fh *fh,
-					      struct nfs_fattr *fattr)
-{
-	int err;
-
-	if (NFS_PROTO(dir)->version == 4)
-		return nfs4_proc_lookup_mountpoint(dir, name, fh, fattr);
-
-	err = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, name, fh, fattr);
-	if (err)
-		return ERR_PTR(err);
-	return rpc_clone_client(NFS_SERVER(dir)->client);
-}
-#else /* CONFIG_NFS_V4 */
-static inline struct rpc_clnt *nfs_lookup_mountpoint(struct inode *dir,
-						     struct qstr *name,
-						     struct nfs_fh *fh,
-						     struct nfs_fattr *fattr)
-{
-	int err = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, name, fh, fattr);
-	if (err)
-		return ERR_PTR(err);
-	return rpc_clone_client(NFS_SERVER(dir)->client);
-}
-#endif /* CONFIG_NFS_V4 */
-
 /*
  * nfs_d_automount - Handle crossing a mountpoint on the server
  * @path - The mountpoint
@@ -162,10 +128,9 @@ static inline struct rpc_clnt *nfs_lookup_mountpoint(struct inode *dir,
 struct vfsmount *nfs_d_automount(struct path *path)
 {
 	struct vfsmount *mnt;
-	struct dentry *parent;
+	struct nfs_server *server = NFS_SERVER(path->dentry->d_inode);
 	struct nfs_fh *fh = NULL;
 	struct nfs_fattr *fattr = NULL;
-	struct rpc_clnt *client;
 
 	dprintk("--> nfs_d_automount()\n");
 
@@ -181,21 +146,7 @@ struct vfsmount *nfs_d_automount(struct path *path)
 
 	dprintk("%s: enter\n", __func__);
 
-	/* Look it up again to get its attributes */
-	parent = dget_parent(path->dentry);
-	client = nfs_lookup_mountpoint(parent->d_inode, &path->dentry->d_name, fh, fattr);
-	dput(parent);
-	if (IS_ERR(client)) {
-		mnt = ERR_CAST(client);
-		goto out;
-	}
-
-	if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
-		mnt = nfs_do_refmount(client, path->dentry);
-	else
-		mnt = nfs_do_submount(path->dentry, fh, fattr, client->cl_auth->au_flavor);
-	rpc_shutdown_client(client);
-
+	mnt = server->nfs_client->rpc_ops->submount(server, path->dentry, fh, fattr);
 	if (IS_ERR(mnt))
 		goto out;
 
@@ -268,10 +219,8 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
  * @authflavor - security flavor to use when performing the mount
  *
  */
-static struct vfsmount *nfs_do_submount(struct dentry *dentry,
-					struct nfs_fh *fh,
-					struct nfs_fattr *fattr,
-					rpc_authflavor_t authflavor)
+struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh,
+				 struct nfs_fattr *fattr, rpc_authflavor_t authflavor)
 {
 	struct nfs_clone_mount mountdata = {
 		.sb = dentry->d_sb,
@@ -304,3 +253,19 @@ out:
 	dprintk("<-- nfs_do_submount() = %p\n", mnt);
 	return mnt;
 }
+
+struct vfsmount *nfs_submount(struct nfs_server *server, struct dentry *dentry,
+			      struct nfs_fh *fh, struct nfs_fattr *fattr)
+{
+	int err;
+	struct dentry *parent = dget_parent(dentry);
+
+	/* Look it up again to get its attributes */
+	err = server->nfs_client->rpc_ops->lookup(server->client, parent->d_inode,
+						  &dentry->d_name, fh, fattr);
+	dput(parent);
+	if (err != 0)
+		return ERR_PTR(err);
+
+	return nfs_do_submount(dentry, fh, fattr, server->client->cl_auth->au_flavor);
+}
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 56dcefc2f3f7..c23214d55ecf 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -885,6 +885,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
 	.file_inode_ops	= &nfs3_file_inode_operations,
 	.file_ops	= &nfs_file_operations,
 	.getroot	= nfs3_proc_get_root,
+	.submount	= nfs_submount,
 	.getattr	= nfs3_proc_getattr,
 	.setattr	= nfs3_proc_setattr,
 	.lookup		= nfs3_proc_lookup,
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 53a487ee9867..97365b0f9d3f 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -208,6 +208,8 @@ extern const struct inode_operations nfs4_dir_inode_operations;
 /* nfs4namespace.c */
 rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *);
 struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *);
+struct vfsmount *nfs4_submount(struct nfs_server *, struct dentry *,
+			       struct nfs_fh *, struct nfs_fattr *);
 
 /* nfs4proc.c */
 extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *);
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index a69ee3952bbe..80fc0fe7095e 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -329,7 +329,7 @@ out:
  * @dentry - dentry of referral
  *
  */
-struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry)
+static struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry)
 {
 	struct vfsmount *mnt = ERR_PTR(-ENOMEM);
 	struct dentry *parent;
@@ -370,3 +370,25 @@ out:
 	dprintk("%s: done\n", __func__);
 	return mnt;
 }
+
+struct vfsmount *nfs4_submount(struct nfs_server *server, struct dentry *dentry,
+			       struct nfs_fh *fh, struct nfs_fattr *fattr)
+{
+	struct dentry *parent = dget_parent(dentry);
+	struct rpc_clnt *client;
+	struct vfsmount *mnt;
+
+	/* Look it up again to get its attributes and sec flavor */
+	client = nfs4_proc_lookup_mountpoint(parent->d_inode, &dentry->d_name, fh, fattr);
+	dput(parent);
+	if (IS_ERR(client))
+		return ERR_CAST(client);
+
+	if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
+		mnt = nfs_do_refmount(client, dentry);
+	else
+		mnt = nfs_do_submount(dentry, fh, fattr, client->cl_auth->au_flavor);
+
+	rpc_shutdown_client(client);
+	return mnt;
+}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index fa661b91e57c..2091af294c61 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -6571,6 +6571,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
 	.file_inode_ops	= &nfs4_file_inode_operations,
 	.file_ops	= &nfs4_file_operations,
 	.getroot	= nfs4_proc_get_root,
+	.submount	= nfs4_submount,
 	.getattr	= nfs4_proc_getattr,
 	.setattr	= nfs4_proc_setattr,
 	.lookup		= nfs4_proc_lookup,
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 22ee70586875..76b3229fc527 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -742,6 +742,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
 	.file_inode_ops	= &nfs_file_inode_operations,
 	.file_ops	= &nfs_file_operations,
 	.getroot	= nfs_proc_get_root,
+	.submount	= nfs_submount,
 	.getattr	= nfs_proc_getattr,
 	.setattr	= nfs_proc_setattr,
 	.lookup		= nfs_proc_lookup,
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index eb1f143042f4..4dada94eba7d 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1341,6 +1341,8 @@ struct nfs_rpc_ops {
 
 	int	(*getroot) (struct nfs_server *, struct nfs_fh *,
 			    struct nfs_fsinfo *);
+	struct vfsmount *(*submount) (struct nfs_server *, struct dentry *,
+				      struct nfs_fh *, struct nfs_fattr *);
 	int	(*getattr) (struct nfs_server *, struct nfs_fh *,
 			    struct nfs_fattr *);
 	int	(*setattr) (struct dentry *, struct nfs_fattr *,
-- 
cgit v1.3-7-g2ca7


From 80a16b21a81eb639f0b726549f4c46c0e9aff92e Mon Sep 17 00:00:00 2001
From: Bryan Schumaker <bjschuma@netapp.com>
Date: Fri, 27 Apr 2012 13:27:46 -0400
Subject: NFS: Remove extra rpc_clnt argument to proc_lookup

Now that I'm doing secinfo automatically in the v4 code this extra
argument isn't needed.

Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c            | 6 +++---
 fs/nfs/namespace.c      | 3 +--
 fs/nfs/nfs3proc.c       | 2 +-
 fs/nfs/nfs4proc.c       | 2 +-
 fs/nfs/proc.c           | 2 +-
 include/linux/nfs_xdr.h | 2 +-
 6 files changed, 8 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 8789210c6905..82b42e2ea65c 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1143,7 +1143,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
 	if (fhandle == NULL || fattr == NULL)
 		goto out_error;
 
-	error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr);
+	error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
 	if (error)
 		goto out_bad;
 	if (nfs_compare_fh(NFS_FH(inode), fhandle))
@@ -1299,7 +1299,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
 	parent = dentry->d_parent;
 	/* Protect against concurrent sillydeletes */
 	nfs_block_sillyrename(parent);
-	error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr);
+	error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
 	if (error == -ENOENT)
 		goto no_entry;
 	if (error < 0) {
@@ -1646,7 +1646,7 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
 	if (dentry->d_inode)
 		goto out;
 	if (fhandle->size == 0) {
-		error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr);
+		error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
 		if (error)
 			goto out_error;
 	}
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index e36fd8a51819..08b9c93675da 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -261,8 +261,7 @@ struct vfsmount *nfs_submount(struct nfs_server *server, struct dentry *dentry,
 	struct dentry *parent = dget_parent(dentry);
 
 	/* Look it up again to get its attributes */
-	err = server->nfs_client->rpc_ops->lookup(server->client, parent->d_inode,
-						  &dentry->d_name, fh, fattr);
+	err = server->nfs_client->rpc_ops->lookup(parent->d_inode, &dentry->d_name, fh, fattr);
 	dput(parent);
 	if (err != 0)
 		return ERR_PTR(err);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index c23214d55ecf..48bcad294161 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -142,7 +142,7 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 }
 
 static int
-nfs3_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name,
+nfs3_proc_lookup(struct inode *dir, struct qstr *name,
 		 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
 {
 	struct nfs3_diropargs	arg = {
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 2091af294c61..1780391a2c4c 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2578,7 +2578,7 @@ out:
 	return err;
 }
 
-static int nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name,
+static int nfs4_proc_lookup(struct inode *dir, struct qstr *name,
 			    struct nfs_fh *fhandle, struct nfs_fattr *fattr)
 {
 	int status;
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 76b3229fc527..fea9163d6f8e 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -178,7 +178,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 }
 
 static int
-nfs_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name,
+nfs_proc_lookup(struct inode *dir, struct qstr *name,
 		struct nfs_fh *fhandle, struct nfs_fattr *fattr)
 {
 	struct nfs_diropargs	arg = {
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 4dada94eba7d..c940d46eb423 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1347,7 +1347,7 @@ struct nfs_rpc_ops {
 			    struct nfs_fattr *);
 	int	(*setattr) (struct dentry *, struct nfs_fattr *,
 			    struct iattr *);
-	int	(*lookup)  (struct rpc_clnt *clnt, struct inode *, struct qstr *,
+	int	(*lookup)  (struct inode *, struct qstr *,
 			    struct nfs_fh *, struct nfs_fattr *);
 	int	(*access)  (struct inode *, struct nfs_access_entry *);
 	int	(*readlink)(struct inode *, struct page *, unsigned int,
-- 
cgit v1.3-7-g2ca7


From 4f97615d19c370d1d907ef37f8bcd9c3672851ca Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 30 Apr 2012 18:39:20 -0400
Subject: NFS: Fix a compile issue when CONFIG_NFS_V4_1 is undefined

struct nfs_direct_req can't compile when struct pnfs_ds_commit_info
is undefined.

Reported-by: Bryan Schumaker <bjschuma@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Fred Isaman <iisaman@netapp.com>
---
 include/linux/nfs_xdr.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index c940d46eb423..6deb8f097c42 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1176,6 +1176,11 @@ struct nfs41_free_stateid_res {
 	struct nfs4_sequence_res	seq_res;
 };
 
+#else
+
+struct pnfs_ds_commit_info {
+};
+
 #endif /* CONFIG_NFS_V4_1 */
 
 struct nfs_page;
-- 
cgit v1.3-7-g2ca7


From 41628d334361670d825fb03c04568f5ef9f084dc Mon Sep 17 00:00:00 2001
From: Konstantin Weitz <WEITZKON@de.ibm.com>
Date: Wed, 25 Apr 2012 15:30:38 +0200
Subject: KVM: s390: Implement the directed yield (diag 9c) hypervisor call for
 KVM

This patch implements the directed yield hypercall found on other
System z hypervisors. It delegates execution time to the virtual cpu
specified in the instruction's parameter.

Useful to avoid long spinlock waits in the guest.

Christian Borntraeger: moved common code in virt/kvm/

Signed-off-by: Konstantin Weitz <WEITZKON@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/s390/include/asm/kvm_host.h |  1 +
 arch/s390/kvm/diag.c             | 25 ++++++++++++++++++++++++
 arch/s390/kvm/kvm-s390.c         |  1 +
 include/linux/kvm_host.h         |  1 +
 virt/kvm/kvm_main.c              | 42 +++++++++++++++++++++++++---------------
 5 files changed, 54 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 7343872890a2..dd17537b9a9d 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -148,6 +148,7 @@ struct kvm_vcpu_stat {
 	u32 instruction_sigp_restart;
 	u32 diagnose_10;
 	u32 diagnose_44;
+	u32 diagnose_9c;
 };
 
 struct kvm_s390_io_info {
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index a353f0ea45c2..2d2ae327b747 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -53,6 +53,29 @@ static int __diag_time_slice_end(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_vcpu *tcpu;
+	int tid;
+	int i;
+
+	tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
+	vcpu->stat.diagnose_9c++;
+	VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d", tid);
+
+	if (tid == vcpu->vcpu_id)
+		return 0;
+
+	kvm_for_each_vcpu(i, tcpu, kvm)
+		if (tcpu->vcpu_id == tid) {
+			kvm_vcpu_yield_to(tcpu);
+			break;
+		}
+
+	return 0;
+}
+
 static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
 {
 	unsigned int reg = vcpu->arch.sie_block->ipa & 0xf;
@@ -89,6 +112,8 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
 		return diag_release_pages(vcpu);
 	case 0x44:
 		return __diag_time_slice_end(vcpu);
+	case 0x9c:
+		return __diag_time_slice_end_directed(vcpu);
 	case 0x308:
 		return __diag_ipl_functions(vcpu);
 	default:
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index d30c8350b949..fd98914a36f1 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -74,6 +74,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 	{ "diagnose_10", VCPU_STAT(diagnose_10) },
 	{ "diagnose_44", VCPU_STAT(diagnose_44) },
+	{ "diagnose_9c", VCPU_STAT(diagnose_9c) },
 	{ NULL }
 };
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 6f343307d72b..cae342d29d1b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -461,6 +461,7 @@ void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot,
 
 void kvm_vcpu_block(struct kvm_vcpu *vcpu);
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
+bool kvm_vcpu_yield_to(struct kvm_vcpu *target);
 void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
 void kvm_resched(struct kvm_vcpu *vcpu);
 void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 1847c762d8d9..7e140683ff14 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1543,6 +1543,31 @@ void kvm_resched(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_resched);
 
+bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
+{
+	struct pid *pid;
+	struct task_struct *task = NULL;
+
+	rcu_read_lock();
+	pid = rcu_dereference(target->pid);
+	if (pid)
+		task = get_pid_task(target->pid, PIDTYPE_PID);
+	rcu_read_unlock();
+	if (!task)
+		return false;
+	if (task->flags & PF_VCPU) {
+		put_task_struct(task);
+		return false;
+	}
+	if (yield_to(task, 1)) {
+		put_task_struct(task);
+		return true;
+	}
+	put_task_struct(task);
+	return false;
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);
+
 void kvm_vcpu_on_spin(struct kvm_vcpu *me)
 {
 	struct kvm *kvm = me->kvm;
@@ -1561,8 +1586,6 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
 	 */
 	for (pass = 0; pass < 2 && !yielded; pass++) {
 		kvm_for_each_vcpu(i, vcpu, kvm) {
-			struct task_struct *task = NULL;
-			struct pid *pid;
 			if (!pass && i < last_boosted_vcpu) {
 				i = last_boosted_vcpu;
 				continue;
@@ -1572,24 +1595,11 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
 				continue;
 			if (waitqueue_active(&vcpu->wq))
 				continue;
-			rcu_read_lock();
-			pid = rcu_dereference(vcpu->pid);
-			if (pid)
-				task = get_pid_task(vcpu->pid, PIDTYPE_PID);
-			rcu_read_unlock();
-			if (!task)
-				continue;
-			if (task->flags & PF_VCPU) {
-				put_task_struct(task);
-				continue;
-			}
-			if (yield_to(task, 1)) {
-				put_task_struct(task);
+			if (kvm_vcpu_yield_to(vcpu)) {
 				kvm->last_boosted_vcpu = i;
 				yielded = 1;
 				break;
 			}
-			put_task_struct(task);
 		}
 	}
 }
-- 
cgit v1.3-7-g2ca7


From 4630b130b30be6420394ba31121e111c8771ca08 Mon Sep 17 00:00:00 2001
From: Aaron Sierra <asierra@xes-inc.com>
Date: Wed, 28 Mar 2012 09:43:10 -0500
Subject: mfd: Add LPC driver for Intel ICH chipsets

This driver currently creates resources for use by a forthcoming ICH
chipset GPIO driver. It could be expanded to create the resources for
converting the esb2rom (mtd) and iTCO_wdt (wdt), and potentially more,
drivers to use the mfd model.

Signed-off-by: Aaron Sierra <asierra@xes-inc.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig         |   9 +
 drivers/mfd/Makefile        |   1 +
 drivers/mfd/lpc_ich.c       | 719 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/lpc_ich.h |  41 +++
 4 files changed, 770 insertions(+)
 create mode 100644 drivers/mfd/lpc_ich.c
 create mode 100644 include/linux/mfd/lpc_ich.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 11e44386fa9b..c6edba69678a 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -748,6 +748,15 @@ config LPC_SCH
 	  LPC bridge function of the Intel SCH provides support for
 	  System Management Bus and General Purpose I/O.
 
+config LPC_ICH
+	tristate "Intel ICH LPC"
+	depends on PCI
+	select MFD_CORE
+	help
+	  The LPC bridge function of the Intel ICH provides support for
+	  many functional units. This driver provides needed support for
+	  other drivers to control these functions, currently GPIO.
+
 config MFD_RDC321X
 	tristate "Support for RDC-R321x southbridge"
 	select MFD_CORE
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 05fa538c5efe..c4500c35d5ba 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -99,6 +99,7 @@ obj-$(CONFIG_MFD_DB5500_PRCMU)	+= db5500-prcmu.o
 obj-$(CONFIG_MFD_TIMBERDALE)    += timberdale.o
 obj-$(CONFIG_PMIC_ADP5520)	+= adp5520.o
 obj-$(CONFIG_LPC_SCH)		+= lpc_sch.o
+obj-$(CONFIG_LPC_ICH)		+= lpc_ich.o
 obj-$(CONFIG_MFD_RDC321X)	+= rdc321x-southbridge.o
 obj-$(CONFIG_MFD_JANZ_CMODIO)	+= janz-cmodio.o
 obj-$(CONFIG_MFD_JZ4740_ADC)	+= jz4740-adc.o
diff --git a/drivers/mfd/lpc_ich.c b/drivers/mfd/lpc_ich.c
new file mode 100644
index 000000000000..7e3a7b6ab022
--- /dev/null
+++ b/drivers/mfd/lpc_ich.c
@@ -0,0 +1,719 @@
+/*
+ *  lpc_ich.c - LPC interface for Intel ICH
+ *
+ *  LPC bridge function of the Intel ICH contains many other
+ *  functional units, such as Interrupt controllers, Timers,
+ *  Power Management, System Management, GPIO, RTC, and LPC
+ *  Configuration Registers.
+ *
+ *  This driver is derived from lpc_sch.
+
+ *  Copyright (c) 2011 Extreme Engineering Solution, Inc.
+ *  Author: Aaron Sierra <asierra@xes-inc.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *  This driver supports the following I/O Controller hubs:
+ *	(See the intel documentation on http://developer.intel.com.)
+ *	document number 290655-003, 290677-014: 82801AA (ICH), 82801AB (ICHO)
+ *	document number 290687-002, 298242-027: 82801BA (ICH2)
+ *	document number 290733-003, 290739-013: 82801CA (ICH3-S)
+ *	document number 290716-001, 290718-007: 82801CAM (ICH3-M)
+ *	document number 290744-001, 290745-025: 82801DB (ICH4)
+ *	document number 252337-001, 252663-008: 82801DBM (ICH4-M)
+ *	document number 273599-001, 273645-002: 82801E (C-ICH)
+ *	document number 252516-001, 252517-028: 82801EB (ICH5), 82801ER (ICH5R)
+ *	document number 300641-004, 300884-013: 6300ESB
+ *	document number 301473-002, 301474-026: 82801F (ICH6)
+ *	document number 313082-001, 313075-006: 631xESB, 632xESB
+ *	document number 307013-003, 307014-024: 82801G (ICH7)
+ *	document number 322896-001, 322897-001: NM10
+ *	document number 313056-003, 313057-017: 82801H (ICH8)
+ *	document number 316972-004, 316973-012: 82801I (ICH9)
+ *	document number 319973-002, 319974-002: 82801J (ICH10)
+ *	document number 322169-001, 322170-003: 5 Series, 3400 Series (PCH)
+ *	document number 320066-003, 320257-008: EP80597 (IICH)
+ *	document number 324645-001, 324646-001: Cougar Point (CPT)
+ *	document number TBD : Patsburg (PBG)
+ *	document number TBD : DH89xxCC
+ *	document number TBD : Panther Point
+ *	document number TBD : Lynx Point
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/acpi.h>
+#include <linux/pci.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/lpc_ich.h>
+
+#define ACPIBASE		0x40
+#define ACPIBASE_GPE_OFF	0x28
+#define ACPIBASE_GPE_END	0x2f
+#define ACPICTRL		0x44
+
+#define GPIOBASE		0x48
+#define GPIOCTRL		0x4C
+
+static int lpc_ich_acpi_save = -1;
+static int lpc_ich_gpio_save = -1;
+
+static struct resource gpio_ich_res[] = {
+	/* GPIO */
+	{
+		.flags = IORESOURCE_IO,
+	},
+	/* ACPI - GPE0 */
+	{
+		.flags = IORESOURCE_IO,
+	},
+};
+
+enum lpc_cells {
+	LPC_GPIO = 0,
+};
+
+static struct mfd_cell lpc_ich_cells[] = {
+	[LPC_GPIO] = {
+		.name = "gpio_ich",
+		.num_resources = ARRAY_SIZE(gpio_ich_res),
+		.resources = gpio_ich_res,
+		.ignore_resource_conflicts = true,
+	},
+};
+
+/* chipset related info */
+enum lpc_chipsets {
+	LPC_ICH = 0,	/* ICH */
+	LPC_ICH0,	/* ICH0 */
+	LPC_ICH2,	/* ICH2 */
+	LPC_ICH2M,	/* ICH2-M */
+	LPC_ICH3,	/* ICH3-S */
+	LPC_ICH3M,	/* ICH3-M */
+	LPC_ICH4,	/* ICH4 */
+	LPC_ICH4M,	/* ICH4-M */
+	LPC_CICH,	/* C-ICH */
+	LPC_ICH5,	/* ICH5 & ICH5R */
+	LPC_6300ESB,	/* 6300ESB */
+	LPC_ICH6,	/* ICH6 & ICH6R */
+	LPC_ICH6M,	/* ICH6-M */
+	LPC_ICH6W,	/* ICH6W & ICH6RW */
+	LPC_631XESB,	/* 631xESB/632xESB */
+	LPC_ICH7,	/* ICH7 & ICH7R */
+	LPC_ICH7DH,	/* ICH7DH */
+	LPC_ICH7M,	/* ICH7-M & ICH7-U */
+	LPC_ICH7MDH,	/* ICH7-M DH */
+	LPC_NM10,	/* NM10 */
+	LPC_ICH8,	/* ICH8 & ICH8R */
+	LPC_ICH8DH,	/* ICH8DH */
+	LPC_ICH8DO,	/* ICH8DO */
+	LPC_ICH8M,	/* ICH8M */
+	LPC_ICH8ME,	/* ICH8M-E */
+	LPC_ICH9,	/* ICH9 */
+	LPC_ICH9R,	/* ICH9R */
+	LPC_ICH9DH,	/* ICH9DH */
+	LPC_ICH9DO,	/* ICH9DO */
+	LPC_ICH9M,	/* ICH9M */
+	LPC_ICH9ME,	/* ICH9M-E */
+	LPC_ICH10,	/* ICH10 */
+	LPC_ICH10R,	/* ICH10R */
+	LPC_ICH10D,	/* ICH10D */
+	LPC_ICH10DO,	/* ICH10DO */
+	LPC_PCH,	/* PCH Desktop Full Featured */
+	LPC_PCHM,	/* PCH Mobile Full Featured */
+	LPC_P55,	/* P55 */
+	LPC_PM55,	/* PM55 */
+	LPC_H55,	/* H55 */
+	LPC_QM57,	/* QM57 */
+	LPC_H57,	/* H57 */
+	LPC_HM55,	/* HM55 */
+	LPC_Q57,	/* Q57 */
+	LPC_HM57,	/* HM57 */
+	LPC_PCHMSFF,	/* PCH Mobile SFF Full Featured */
+	LPC_QS57,	/* QS57 */
+	LPC_3400,	/* 3400 */
+	LPC_3420,	/* 3420 */
+	LPC_3450,	/* 3450 */
+	LPC_EP80579,	/* EP80579 */
+	LPC_CPT,	/* Cougar Point */
+	LPC_CPTD,	/* Cougar Point Desktop */
+	LPC_CPTM,	/* Cougar Point Mobile */
+	LPC_PBG,	/* Patsburg */
+	LPC_DH89XXCC,	/* DH89xxCC */
+	LPC_PPT,	/* Panther Point */
+	LPC_LPT,	/* Lynx Point */
+};
+
+struct lpc_ich_info lpc_chipset_info[] __devinitdata = {
+	[LPC_ICH] = {
+		.name = "ICH",
+	},
+	[LPC_ICH0] = {
+		.name = "ICH0",
+	},
+	[LPC_ICH2] = {
+		.name = "ICH2",
+	},
+	[LPC_ICH2M] = {
+		.name = "ICH2-M",
+	},
+	[LPC_ICH3] = {
+		.name = "ICH3-S",
+	},
+	[LPC_ICH3M] = {
+		.name = "ICH3-M",
+	},
+	[LPC_ICH4] = {
+		.name = "ICH4",
+	},
+	[LPC_ICH4M] = {
+		.name = "ICH4-M",
+	},
+	[LPC_CICH] = {
+		.name = "C-ICH",
+	},
+	[LPC_ICH5] = {
+		.name = "ICH5 or ICH5R",
+	},
+	[LPC_6300ESB] = {
+		.name = "6300ESB",
+	},
+	[LPC_ICH6] = {
+		.name = "ICH6 or ICH6R",
+		.gpio_version = ICH_V6_GPIO,
+	},
+	[LPC_ICH6M] = {
+		.name = "ICH6-M",
+		.gpio_version = ICH_V6_GPIO,
+	},
+	[LPC_ICH6W] = {
+		.name = "ICH6W or ICH6RW",
+		.gpio_version = ICH_V6_GPIO,
+	},
+	[LPC_631XESB] = {
+		.name = "631xESB/632xESB",
+		.gpio_version = ICH_V6_GPIO,
+	},
+	[LPC_ICH7] = {
+		.name = "ICH7 or ICH7R",
+		.gpio_version = ICH_V7_GPIO,
+	},
+	[LPC_ICH7DH] = {
+		.name = "ICH7DH",
+		.gpio_version = ICH_V7_GPIO,
+	},
+	[LPC_ICH7M] = {
+		.name = "ICH7-M or ICH7-U",
+		.gpio_version = ICH_V7_GPIO,
+	},
+	[LPC_ICH7MDH] = {
+		.name = "ICH7-M DH",
+		.gpio_version = ICH_V7_GPIO,
+	},
+	[LPC_NM10] = {
+		.name = "NM10",
+	},
+	[LPC_ICH8] = {
+		.name = "ICH8 or ICH8R",
+		.gpio_version = ICH_V7_GPIO,
+	},
+	[LPC_ICH8DH] = {
+		.name = "ICH8DH",
+		.gpio_version = ICH_V7_GPIO,
+	},
+	[LPC_ICH8DO] = {
+		.name = "ICH8DO",
+		.gpio_version = ICH_V7_GPIO,
+	},
+	[LPC_ICH8M] = {
+		.name = "ICH8M",
+		.gpio_version = ICH_V7_GPIO,
+	},
+	[LPC_ICH8ME] = {
+		.name = "ICH8M-E",
+		.gpio_version = ICH_V7_GPIO,
+	},
+	[LPC_ICH9] = {
+		.name = "ICH9",
+		.gpio_version = ICH_V9_GPIO,
+	},
+	[LPC_ICH9R] = {
+		.name = "ICH9R",
+		.gpio_version = ICH_V9_GPIO,
+	},
+	[LPC_ICH9DH] = {
+		.name = "ICH9DH",
+		.gpio_version = ICH_V9_GPIO,
+	},
+	[LPC_ICH9DO] = {
+		.name = "ICH9DO",
+		.gpio_version = ICH_V9_GPIO,
+	},
+	[LPC_ICH9M] = {
+		.name = "ICH9M",
+		.gpio_version = ICH_V9_GPIO,
+	},
+	[LPC_ICH9ME] = {
+		.name = "ICH9M-E",
+		.gpio_version = ICH_V9_GPIO,
+	},
+	[LPC_ICH10] = {
+		.name = "ICH10",
+		.gpio_version = ICH_V10CONS_GPIO,
+	},
+	[LPC_ICH10R] = {
+		.name = "ICH10R",
+		.gpio_version = ICH_V10CONS_GPIO,
+	},
+	[LPC_ICH10D] = {
+		.name = "ICH10D",
+		.gpio_version = ICH_V10CORP_GPIO,
+	},
+	[LPC_ICH10DO] = {
+		.name = "ICH10DO",
+		.gpio_version = ICH_V10CORP_GPIO,
+	},
+	[LPC_PCH] = {
+		.name = "PCH Desktop Full Featured",
+		.gpio_version = ICH_V5_GPIO,
+	},
+	[LPC_PCHM] = {
+		.name = "PCH Mobile Full Featured",
+		.gpio_version = ICH_V5_GPIO,
+	},
+	[LPC_P55] = {
+		.name = "P55",
+		.gpio_version = ICH_V5_GPIO,
+	},
+	[LPC_PM55] = {
+		.name = "PM55",
+		.gpio_version = ICH_V5_GPIO,
+	},
+	[LPC_H55] = {
+		.name = "H55",
+		.gpio_version = ICH_V5_GPIO,
+	},
+	[LPC_QM57] = {
+		.name = "QM57",
+		.gpio_version = ICH_V5_GPIO,
+	},
+	[LPC_H57] = {
+		.name = "H57",
+		.gpio_version = ICH_V5_GPIO,
+	},
+	[LPC_HM55] = {
+		.name = "HM55",
+		.gpio_version = ICH_V5_GPIO,
+	},
+	[LPC_Q57] = {
+		.name = "Q57",
+		.gpio_version = ICH_V5_GPIO,
+	},
+	[LPC_HM57] = {
+		.name = "HM57",
+		.gpio_version = ICH_V5_GPIO,
+	},
+	[LPC_PCHMSFF] = {
+		.name = "PCH Mobile SFF Full Featured",
+		.gpio_version = ICH_V5_GPIO,
+	},
+	[LPC_QS57] = {
+		.name = "QS57",
+		.gpio_version = ICH_V5_GPIO,
+	},
+	[LPC_3400] = {
+		.name = "3400",
+		.gpio_version = ICH_V5_GPIO,
+	},
+	[LPC_3420] = {
+		.name = "3420",
+		.gpio_version = ICH_V5_GPIO,
+	},
+	[LPC_3450] = {
+		.name = "3450",
+		.gpio_version = ICH_V5_GPIO,
+	},
+	[LPC_EP80579] = {
+		.name = "EP80579",
+	},
+	[LPC_CPT] = {
+		.name = "Cougar Point",
+		.gpio_version = ICH_V5_GPIO,
+	},
+	[LPC_CPTD] = {
+		.name = "Cougar Point Desktop",
+		.gpio_version = ICH_V5_GPIO,
+	},
+	[LPC_CPTM] = {
+		.name = "Cougar Point Mobile",
+		.gpio_version = ICH_V5_GPIO,
+	},
+	[LPC_PBG] = {
+		.name = "Patsburg",
+	},
+	[LPC_DH89XXCC] = {
+		.name = "DH89xxCC",
+	},
+	[LPC_PPT] = {
+		.name = "Panther Point",
+	},
+	[LPC_LPT] = {
+		.name = "Lynx Point",
+	},
+};
+
+/*
+ * This data only exists for exporting the supported PCI ids
+ * via MODULE_DEVICE_TABLE.  We do not actually register a
+ * pci_driver, because the I/O Controller Hub has also other
+ * functions that probably will be registered by other drivers.
+ */
+static DEFINE_PCI_DEVICE_TABLE(lpc_ich_ids) = {
+	{ PCI_VDEVICE(INTEL, 0x2410), LPC_ICH},
+	{ PCI_VDEVICE(INTEL, 0x2420), LPC_ICH0},
+	{ PCI_VDEVICE(INTEL, 0x2440), LPC_ICH2},
+	{ PCI_VDEVICE(INTEL, 0x244c), LPC_ICH2M},
+	{ PCI_VDEVICE(INTEL, 0x2480), LPC_ICH3},
+	{ PCI_VDEVICE(INTEL, 0x248c), LPC_ICH3M},
+	{ PCI_VDEVICE(INTEL, 0x24c0), LPC_ICH4},
+	{ PCI_VDEVICE(INTEL, 0x24cc), LPC_ICH4M},
+	{ PCI_VDEVICE(INTEL, 0x2450), LPC_CICH},
+	{ PCI_VDEVICE(INTEL, 0x24d0), LPC_ICH5},
+	{ PCI_VDEVICE(INTEL, 0x25a1), LPC_6300ESB},
+	{ PCI_VDEVICE(INTEL, 0x2640), LPC_ICH6},
+	{ PCI_VDEVICE(INTEL, 0x2641), LPC_ICH6M},
+	{ PCI_VDEVICE(INTEL, 0x2642), LPC_ICH6W},
+	{ PCI_VDEVICE(INTEL, 0x2670), LPC_631XESB},
+	{ PCI_VDEVICE(INTEL, 0x2671), LPC_631XESB},
+	{ PCI_VDEVICE(INTEL, 0x2672), LPC_631XESB},
+	{ PCI_VDEVICE(INTEL, 0x2673), LPC_631XESB},
+	{ PCI_VDEVICE(INTEL, 0x2674), LPC_631XESB},
+	{ PCI_VDEVICE(INTEL, 0x2675), LPC_631XESB},
+	{ PCI_VDEVICE(INTEL, 0x2676), LPC_631XESB},
+	{ PCI_VDEVICE(INTEL, 0x2677), LPC_631XESB},
+	{ PCI_VDEVICE(INTEL, 0x2678), LPC_631XESB},
+	{ PCI_VDEVICE(INTEL, 0x2679), LPC_631XESB},
+	{ PCI_VDEVICE(INTEL, 0x267a), LPC_631XESB},
+	{ PCI_VDEVICE(INTEL, 0x267b), LPC_631XESB},
+	{ PCI_VDEVICE(INTEL, 0x267c), LPC_631XESB},
+	{ PCI_VDEVICE(INTEL, 0x267d), LPC_631XESB},
+	{ PCI_VDEVICE(INTEL, 0x267e), LPC_631XESB},
+	{ PCI_VDEVICE(INTEL, 0x267f), LPC_631XESB},
+	{ PCI_VDEVICE(INTEL, 0x27b8), LPC_ICH7},
+	{ PCI_VDEVICE(INTEL, 0x27b0), LPC_ICH7DH},
+	{ PCI_VDEVICE(INTEL, 0x27b9), LPC_ICH7M},
+	{ PCI_VDEVICE(INTEL, 0x27bd), LPC_ICH7MDH},
+	{ PCI_VDEVICE(INTEL, 0x27bc), LPC_NM10},
+	{ PCI_VDEVICE(INTEL, 0x2810), LPC_ICH8},
+	{ PCI_VDEVICE(INTEL, 0x2812), LPC_ICH8DH},
+	{ PCI_VDEVICE(INTEL, 0x2814), LPC_ICH8DO},
+	{ PCI_VDEVICE(INTEL, 0x2815), LPC_ICH8M},
+	{ PCI_VDEVICE(INTEL, 0x2811), LPC_ICH8ME},
+	{ PCI_VDEVICE(INTEL, 0x2918), LPC_ICH9},
+	{ PCI_VDEVICE(INTEL, 0x2916), LPC_ICH9R},
+	{ PCI_VDEVICE(INTEL, 0x2912), LPC_ICH9DH},
+	{ PCI_VDEVICE(INTEL, 0x2914), LPC_ICH9DO},
+	{ PCI_VDEVICE(INTEL, 0x2919), LPC_ICH9M},
+	{ PCI_VDEVICE(INTEL, 0x2917), LPC_ICH9ME},
+	{ PCI_VDEVICE(INTEL, 0x3a18), LPC_ICH10},
+	{ PCI_VDEVICE(INTEL, 0x3a16), LPC_ICH10R},
+	{ PCI_VDEVICE(INTEL, 0x3a1a), LPC_ICH10D},
+	{ PCI_VDEVICE(INTEL, 0x3a14), LPC_ICH10DO},
+	{ PCI_VDEVICE(INTEL, 0x3b00), LPC_PCH},
+	{ PCI_VDEVICE(INTEL, 0x3b01), LPC_PCHM},
+	{ PCI_VDEVICE(INTEL, 0x3b02), LPC_P55},
+	{ PCI_VDEVICE(INTEL, 0x3b03), LPC_PM55},
+	{ PCI_VDEVICE(INTEL, 0x3b06), LPC_H55},
+	{ PCI_VDEVICE(INTEL, 0x3b07), LPC_QM57},
+	{ PCI_VDEVICE(INTEL, 0x3b08), LPC_H57},
+	{ PCI_VDEVICE(INTEL, 0x3b09), LPC_HM55},
+	{ PCI_VDEVICE(INTEL, 0x3b0a), LPC_Q57},
+	{ PCI_VDEVICE(INTEL, 0x3b0b), LPC_HM57},
+	{ PCI_VDEVICE(INTEL, 0x3b0d), LPC_PCHMSFF},
+	{ PCI_VDEVICE(INTEL, 0x3b0f), LPC_QS57},
+	{ PCI_VDEVICE(INTEL, 0x3b12), LPC_3400},
+	{ PCI_VDEVICE(INTEL, 0x3b14), LPC_3420},
+	{ PCI_VDEVICE(INTEL, 0x3b16), LPC_3450},
+	{ PCI_VDEVICE(INTEL, 0x5031), LPC_EP80579},
+	{ PCI_VDEVICE(INTEL, 0x1c41), LPC_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c42), LPC_CPTD},
+	{ PCI_VDEVICE(INTEL, 0x1c43), LPC_CPTM},
+	{ PCI_VDEVICE(INTEL, 0x1c44), LPC_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c45), LPC_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c46), LPC_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c47), LPC_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c48), LPC_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c49), LPC_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c4a), LPC_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c4b), LPC_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c4c), LPC_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c4d), LPC_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c4e), LPC_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c4f), LPC_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c50), LPC_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c51), LPC_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c52), LPC_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c53), LPC_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c54), LPC_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c55), LPC_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c56), LPC_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c57), LPC_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c58), LPC_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c59), LPC_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c5a), LPC_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c5b), LPC_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c5c), LPC_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c5d), LPC_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c5e), LPC_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1c5f), LPC_CPT},
+	{ PCI_VDEVICE(INTEL, 0x1d40), LPC_PBG},
+	{ PCI_VDEVICE(INTEL, 0x1d41), LPC_PBG},
+	{ PCI_VDEVICE(INTEL, 0x2310), LPC_DH89XXCC},
+	{ PCI_VDEVICE(INTEL, 0x1e40), LPC_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e41), LPC_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e42), LPC_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e43), LPC_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e44), LPC_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e45), LPC_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e46), LPC_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e47), LPC_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e48), LPC_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e49), LPC_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e4a), LPC_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e4b), LPC_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e4c), LPC_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e4d), LPC_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e4e), LPC_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e4f), LPC_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e50), LPC_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e51), LPC_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e52), LPC_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e53), LPC_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e54), LPC_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e55), LPC_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e56), LPC_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e57), LPC_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e58), LPC_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e59), LPC_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e5a), LPC_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e5b), LPC_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e5c), LPC_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e5d), LPC_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e5e), LPC_PPT},
+	{ PCI_VDEVICE(INTEL, 0x1e5f), LPC_PPT},
+	{ PCI_VDEVICE(INTEL, 0x8c40), LPC_LPT},
+	{ PCI_VDEVICE(INTEL, 0x8c41), LPC_LPT},
+	{ PCI_VDEVICE(INTEL, 0x8c42), LPC_LPT},
+	{ PCI_VDEVICE(INTEL, 0x8c43), LPC_LPT},
+	{ PCI_VDEVICE(INTEL, 0x8c44), LPC_LPT},
+	{ PCI_VDEVICE(INTEL, 0x8c45), LPC_LPT},
+	{ PCI_VDEVICE(INTEL, 0x8c46), LPC_LPT},
+	{ PCI_VDEVICE(INTEL, 0x8c47), LPC_LPT},
+	{ PCI_VDEVICE(INTEL, 0x8c48), LPC_LPT},
+	{ PCI_VDEVICE(INTEL, 0x8c49), LPC_LPT},
+	{ PCI_VDEVICE(INTEL, 0x8c4a), LPC_LPT},
+	{ PCI_VDEVICE(INTEL, 0x8c4b), LPC_LPT},
+	{ PCI_VDEVICE(INTEL, 0x8c4c), LPC_LPT},
+	{ PCI_VDEVICE(INTEL, 0x8c4d), LPC_LPT},
+	{ PCI_VDEVICE(INTEL, 0x8c4e), LPC_LPT},
+	{ PCI_VDEVICE(INTEL, 0x8c4f), LPC_LPT},
+	{ PCI_VDEVICE(INTEL, 0x8c50), LPC_LPT},
+	{ PCI_VDEVICE(INTEL, 0x8c51), LPC_LPT},
+	{ PCI_VDEVICE(INTEL, 0x8c52), LPC_LPT},
+	{ PCI_VDEVICE(INTEL, 0x8c53), LPC_LPT},
+	{ PCI_VDEVICE(INTEL, 0x8c54), LPC_LPT},
+	{ PCI_VDEVICE(INTEL, 0x8c55), LPC_LPT},
+	{ PCI_VDEVICE(INTEL, 0x8c56), LPC_LPT},
+	{ PCI_VDEVICE(INTEL, 0x8c57), LPC_LPT},
+	{ PCI_VDEVICE(INTEL, 0x8c58), LPC_LPT},
+	{ PCI_VDEVICE(INTEL, 0x8c59), LPC_LPT},
+	{ PCI_VDEVICE(INTEL, 0x8c5a), LPC_LPT},
+	{ PCI_VDEVICE(INTEL, 0x8c5b), LPC_LPT},
+	{ PCI_VDEVICE(INTEL, 0x8c5c), LPC_LPT},
+	{ PCI_VDEVICE(INTEL, 0x8c5d), LPC_LPT},
+	{ PCI_VDEVICE(INTEL, 0x8c5e), LPC_LPT},
+	{ PCI_VDEVICE(INTEL, 0x8c5f), LPC_LPT},
+	{ 0, },			/* End of list */
+};
+MODULE_DEVICE_TABLE(pci, lpc_ich_ids);
+
+static void lpc_ich_restore_config_space(struct pci_dev *dev)
+{
+	if (lpc_ich_acpi_save >= 0) {
+		pci_write_config_byte(dev, ACPICTRL, lpc_ich_acpi_save);
+		lpc_ich_acpi_save = -1;
+	}
+
+	if (lpc_ich_gpio_save >= 0) {
+		pci_write_config_byte(dev, GPIOCTRL, lpc_ich_gpio_save);
+		lpc_ich_gpio_save = -1;
+	}
+}
+
+static void __devinit lpc_ich_enable_acpi_space(struct pci_dev *dev)
+{
+	u8 reg_save;
+
+	pci_read_config_byte(dev, ACPICTRL, &reg_save);
+	pci_write_config_byte(dev, ACPICTRL, reg_save | 0x10);
+	lpc_ich_acpi_save = reg_save;
+}
+
+static void __devinit lpc_ich_enable_gpio_space(struct pci_dev *dev)
+{
+	u8 reg_save;
+
+	pci_read_config_byte(dev, GPIOCTRL, &reg_save);
+	pci_write_config_byte(dev, GPIOCTRL, reg_save | 0x10);
+	lpc_ich_gpio_save = reg_save;
+}
+
+static void __devinit lpc_ich_finalize_cell(struct mfd_cell *cell,
+					const struct pci_device_id *id)
+{
+	cell->platform_data = &lpc_chipset_info[id->driver_data];
+	cell->pdata_size = sizeof(struct lpc_ich_info);
+}
+
+static int __devinit lpc_ich_init_gpio(struct pci_dev *dev,
+				const struct pci_device_id *id)
+{
+	u32 base_addr_cfg;
+	u32 base_addr;
+	int ret;
+	bool acpi_conflict = false;
+	struct resource *res;
+
+	/* Setup power management base register */
+	pci_read_config_dword(dev, ACPIBASE, &base_addr_cfg);
+	base_addr = base_addr_cfg & 0x0000ff80;
+	if (!base_addr) {
+		dev_err(&dev->dev, "I/O space for ACPI uninitialized\n");
+		lpc_ich_cells[LPC_GPIO].num_resources--;
+		goto gpe0_done;
+	}
+
+	res = &gpio_ich_res[ICH_RES_GPE0];
+	res->start = base_addr + ACPIBASE_GPE_OFF;
+	res->end = base_addr + ACPIBASE_GPE_END;
+	ret = acpi_check_resource_conflict(res);
+	if (ret) {
+		/*
+		 * This isn't fatal for the GPIO, but we have to make sure that
+		 * the platform_device subsystem doesn't see this resource
+		 * or it will register an invalid region.
+		 */
+		lpc_ich_cells[LPC_GPIO].num_resources--;
+		acpi_conflict = true;
+	} else {
+		lpc_ich_enable_acpi_space(dev);
+	}
+
+gpe0_done:
+	/* Setup GPIO base register */
+	pci_read_config_dword(dev, GPIOBASE, &base_addr_cfg);
+	base_addr = base_addr_cfg & 0x0000ff80;
+	if (!base_addr) {
+		dev_err(&dev->dev, "I/O space for GPIO uninitialized\n");
+		ret = -ENODEV;
+		goto gpio_done;
+	}
+
+	/* Older devices provide fewer GPIO and have a smaller resource size. */
+	res = &gpio_ich_res[ICH_RES_GPIO];
+	res->start = base_addr;
+	switch (lpc_chipset_info[id->driver_data].gpio_version) {
+	case ICH_V5_GPIO:
+	case ICH_V10CORP_GPIO:
+		res->end = res->start + 128 - 1;
+		break;
+	default:
+		res->end = res->start + 64 - 1;
+		break;
+	}
+
+	ret = acpi_check_resource_conflict(res);
+	if (ret) {
+		/* this isn't necessarily fatal for the GPIO */
+		acpi_conflict = true;
+		goto gpio_done;
+	}
+	lpc_ich_enable_gpio_space(dev);
+
+	lpc_ich_finalize_cell(&lpc_ich_cells[LPC_GPIO], id);
+	ret = mfd_add_devices(&dev->dev, -1, &lpc_ich_cells[LPC_GPIO],
+				1, NULL, 0);
+
+gpio_done:
+	if (acpi_conflict)
+		pr_warn("Resource conflict(s) found affecting %s\n",
+				lpc_ich_cells[LPC_GPIO].name);
+	return ret;
+}
+
+static int __devinit lpc_ich_probe(struct pci_dev *dev,
+				const struct pci_device_id *id)
+{
+	int ret;
+	bool cell_added = false;
+
+	ret = lpc_ich_init_gpio(dev, id);
+	if (!ret)
+		cell_added = true;
+
+	/*
+	 * We only care if at least one or none of the cells registered
+	 * successfully.
+	 */
+	if (!cell_added) {
+		lpc_ich_restore_config_space(dev);
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static void __devexit lpc_ich_remove(struct pci_dev *dev)
+{
+	mfd_remove_devices(&dev->dev);
+	lpc_ich_restore_config_space(dev);
+}
+
+static struct pci_driver lpc_ich_driver = {
+	.name		= "lpc_ich",
+	.id_table	= lpc_ich_ids,
+	.probe		= lpc_ich_probe,
+	.remove		= __devexit_p(lpc_ich_remove),
+};
+
+static int __init lpc_ich_init(void)
+{
+	return pci_register_driver(&lpc_ich_driver);
+}
+
+static void __exit lpc_ich_exit(void)
+{
+	pci_unregister_driver(&lpc_ich_driver);
+}
+
+module_init(lpc_ich_init);
+module_exit(lpc_ich_exit);
+
+MODULE_AUTHOR("Aaron Sierra <asierra@xes-inc.com>");
+MODULE_DESCRIPTION("LPC interface for Intel ICH");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/mfd/lpc_ich.h b/include/linux/mfd/lpc_ich.h
new file mode 100644
index 000000000000..91300b18219b
--- /dev/null
+++ b/include/linux/mfd/lpc_ich.h
@@ -0,0 +1,41 @@
+/*
+ *  linux/drivers/mfd/lpc_ich.h
+ *
+ *  Copyright (c) 2012 Extreme Engineering Solution, Inc.
+ *  Author: Aaron Sierra <asierra@xes-inc.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#ifndef LPC_ICH_H
+#define LPC_ICH_H
+
+/* GPIO resources */
+#define ICH_RES_GPIO	0
+#define ICH_RES_GPE0	1
+
+/* GPIO compatibility */
+#define ICH_I3100_GPIO		0x401
+#define ICH_V5_GPIO		0x501
+#define ICH_V6_GPIO		0x601
+#define ICH_V7_GPIO		0x701
+#define ICH_V9_GPIO		0x801
+#define ICH_V10CORP_GPIO	0xa01
+#define ICH_V10CONS_GPIO	0xa11
+
+struct lpc_ich_info {
+	char name[32];
+	unsigned int gpio_version;
+};
+
+#endif
-- 
cgit v1.3-7-g2ca7


From 4f304245bb6cfa665ff21b12c059499eafa8b725 Mon Sep 17 00:00:00 2001
From: Paul Parsons <lost.distance@yahoo.com>
Date: Mon, 9 Apr 2012 13:18:31 +0100
Subject: mfd: Set asic3 DS1WM clock_rate

The mfd/asic3 driver does not set the ds1wm_driver_data clock_rate field
before passing the structure to the DS1WM w1 busmaster driver.
This was not noticed before commit 26a6afb, because ds1wm_find_divisor()
unintentionally returned the correct divisor when a zero clock_rate was
passed in. However after that commit DS1WM fails a zero clock_rate:

ds1wm ds1wm: no suitable divisor for 0Hz clock

This patch sets the ds1wm_driver_data clock_rate field.

Signed-off-by: Paul Parsons <lost.distance@yahoo.com>
Acked-by: Philipp Zabel <philipp.zabel@gmail.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/asic3.c       | 9 ++++++---
 include/linux/mfd/asic3.h | 2 ++
 2 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/asic3.c b/drivers/mfd/asic3.c
index f75dc6733f49..4c3ec8113e7e 100644
--- a/drivers/mfd/asic3.c
+++ b/drivers/mfd/asic3.c
@@ -894,10 +894,13 @@ static int __init asic3_mfd_probe(struct platform_device *pdev,
 	asic3_mmc_resources[0].start >>= asic->bus_shift;
 	asic3_mmc_resources[0].end   >>= asic->bus_shift;
 
-	ret = mfd_add_devices(&pdev->dev, pdev->id,
+	if (pdata->clock_rate) {
+		ds1wm_pdata.clock_rate = pdata->clock_rate;
+		ret = mfd_add_devices(&pdev->dev, pdev->id,
 			&asic3_cell_ds1wm, 1, mem, asic->irq_base);
-	if (ret < 0)
-		goto out;
+		if (ret < 0)
+			goto out;
+	}
 
 	if (mem_sdio && (irq >= 0)) {
 		ret = mfd_add_devices(&pdev->dev, pdev->id,
diff --git a/include/linux/mfd/asic3.h b/include/linux/mfd/asic3.h
index ed793b77a1c5..3fda7e589ccd 100644
--- a/include/linux/mfd/asic3.h
+++ b/include/linux/mfd/asic3.h
@@ -31,6 +31,8 @@ struct asic3_platform_data {
 
 	unsigned int gpio_base;
 
+	unsigned int clock_rate;
+
 	struct asic3_led *leds;
 };
 
-- 
cgit v1.3-7-g2ca7


From 201cf052810d20814a77ca0e0045a2c1a3508a1f Mon Sep 17 00:00:00 2001
From: Laxman Dewangan <ldewangan@nvidia.com>
Date: Wed, 18 Apr 2012 12:13:51 +0200
Subject: mfd: Add support for tps65910 device sleep

Adding support for device sleep through the external input control
signal "SLEEP".
Changing the SLEEP signal state can switch the device into SLEEP and
ACTIVE state.
Also adding sleep configuration for different resources so that they
should be keep on during sleep state of device.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/tps65910.c       | 62 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/tps65910.h | 14 ++++++++++
 2 files changed, 76 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/tps65910.c b/drivers/mfd/tps65910.c
index bf2b25ebf2ca..ae7f47b6e71b 100644
--- a/drivers/mfd/tps65910.c
+++ b/drivers/mfd/tps65910.c
@@ -90,6 +90,66 @@ static const struct regmap_config tps65910_regmap_config = {
 	.cache_type = REGCACHE_RBTREE,
 };
 
+static int __init tps65910_sleepinit(struct tps65910 *tps65910,
+		struct tps65910_board *pmic_pdata)
+{
+	struct device *dev = NULL;
+	int ret = 0;
+
+	dev = tps65910->dev;
+
+	if (!pmic_pdata->en_dev_slp)
+		return 0;
+
+	/* enabling SLEEP device state */
+	ret = tps65910_set_bits(tps65910, TPS65910_DEVCTRL,
+				DEVCTRL_DEV_SLP_MASK);
+	if (ret < 0) {
+		dev_err(dev, "set dev_slp failed: %d\n", ret);
+		goto err_sleep_init;
+	}
+
+	/* Return if there is no sleep keepon data. */
+	if (!pmic_pdata->slp_keepon)
+		return 0;
+
+	if (pmic_pdata->slp_keepon->therm_keepon) {
+		ret = tps65910_set_bits(tps65910, TPS65910_SLEEP_KEEP_RES_ON,
+				SLEEP_KEEP_RES_ON_THERM_KEEPON_MASK);
+		if (ret < 0) {
+			dev_err(dev, "set therm_keepon failed: %d\n", ret);
+			goto disable_dev_slp;
+		}
+	}
+
+	if (pmic_pdata->slp_keepon->clkout32k_keepon) {
+		ret = tps65910_set_bits(tps65910, TPS65910_SLEEP_KEEP_RES_ON,
+				SLEEP_KEEP_RES_ON_CLKOUT32K_KEEPON_MASK);
+		if (ret < 0) {
+			dev_err(dev, "set clkout32k_keepon failed: %d\n", ret);
+			goto disable_dev_slp;
+		}
+	}
+
+	if (pmic_pdata->slp_keepon->i2chs_keepon) {
+		ret = tps65910_set_bits(tps65910, TPS65910_SLEEP_KEEP_RES_ON,
+				SLEEP_KEEP_RES_ON_I2CHS_KEEPON_MASK);
+		if (ret < 0) {
+			dev_err(dev, "set i2chs_keepon failed: %d\n", ret);
+			goto disable_dev_slp;
+		}
+	}
+
+	return 0;
+
+disable_dev_slp:
+	tps65910_clear_bits(tps65910, TPS65910_DEVCTRL, DEVCTRL_DEV_SLP_MASK);
+
+err_sleep_init:
+	return ret;
+}
+
+
 static int tps65910_i2c_probe(struct i2c_client *i2c,
 			    const struct i2c_device_id *id)
 {
@@ -140,6 +200,8 @@ static int tps65910_i2c_probe(struct i2c_client *i2c,
 
 	tps65910_irq_init(tps65910, init_data->irq, init_data);
 
+	tps65910_sleepinit(tps65910, pmic_plat_data);
+
 	kfree(init_data);
 	return ret;
 
diff --git a/include/linux/mfd/tps65910.h b/include/linux/mfd/tps65910.h
index 1c6c2860d1a6..56903ad04283 100644
--- a/include/linux/mfd/tps65910.h
+++ b/include/linux/mfd/tps65910.h
@@ -783,6 +783,18 @@
 #define TPS65910_SLEEP_CONTROL_EXT_INPUT_EN3		0x4
 #define TPS65911_SLEEP_CONTROL_EXT_INPUT_SLEEP		0x8
 
+/*
+ * Sleep keepon data: Maintains the state in sleep mode
+ * @therm_keepon: Keep on the thermal monitoring in sleep state.
+ * @clkout32k_keepon: Keep on the 32KHz clock output in sleep state.
+ * @i2chs_keepon: Keep on high speed internal clock in sleep state.
+ */
+struct tps65910_sleep_keepon_data {
+	unsigned therm_keepon:1;
+	unsigned clkout32k_keepon:1;
+	unsigned i2chs_keepon:1;
+};
+
 /**
  * struct tps65910_board
  * Board platform data may be used to initialize regulators.
@@ -794,6 +806,8 @@ struct tps65910_board {
 	int irq_base;
 	int vmbch_threshold;
 	int vmbch2_threshold;
+	bool en_dev_slp;
+	struct tps65910_sleep_keepon_data *slp_keepon;
 	bool en_gpio_sleep[TPS6591X_MAX_NUM_GPIO];
 	unsigned long regulator_ext_sleep_control[TPS65910_NUM_REGS];
 	struct regulator_init_data *tps65910_pmic_init_data[TPS65910_NUM_REGS];
-- 
cgit v1.3-7-g2ca7


From 44f72e53382c9c673fd54c3bab67a6b9a2d4526e Mon Sep 17 00:00:00 2001
From: Virupax Sadashivpetimath <virupax.sadashivpetimath@stericsson.com>
Date: Tue, 17 Apr 2012 09:30:14 +0200
Subject: mfd: Add new resources on ab8500 AB8505 and AB9540

The AB8505 and AB9540 has extended support for micro USB
resistance detection, used for detecting chargers. Let's
register resources for this resource. Let's also split off the
separate codec device for AB9540.

Signed-off-by: Virupax Sadashivpetimath <virupax.sadashivpetimath@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/ab8500-core.c         | 60 +++++++++++++++++++++++++++++++++++----
 include/linux/mfd/abx500/ab8500.h | 12 ++++++--
 2 files changed, 65 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c
index 1f08704f7ae8..ae67612317a8 100644
--- a/drivers/mfd/ab8500-core.c
+++ b/drivers/mfd/ab8500-core.c
@@ -744,6 +744,39 @@ static struct resource __devinitdata ab8500_usb_resources[] = {
 	},
 };
 
+static struct resource __devinitdata ab8505_iddet_resources[] = {
+	{
+		.name  = "KeyDeglitch",
+		.start = AB8505_INT_KEYDEGLITCH,
+		.end   = AB8505_INT_KEYDEGLITCH,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name  = "KP",
+		.start = AB8505_INT_KP,
+		.end   = AB8505_INT_KP,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name  = "IKP",
+		.start = AB8505_INT_IKP,
+		.end   = AB8505_INT_IKP,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name  = "IKR",
+		.start = AB8505_INT_IKR,
+		.end   = AB8505_INT_IKR,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name  = "KeyStuck",
+		.start = AB8505_INT_KEYSTUCK,
+		.end   = AB8505_INT_KEYSTUCK,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
 static struct resource __devinitdata ab8500_temp_resources[] = {
 	{
 		.name  = "AB8500_TEMP_WARM",
@@ -802,10 +835,6 @@ static struct mfd_cell __devinitdata abx500_common_devs[] = {
 		.num_resources = ARRAY_SIZE(ab8500_av_acc_detect_resources),
 		.resources = ab8500_av_acc_detect_resources,
 	},
-	{
-		.name = "ab8500-codec",
-	},
-
 	{
 		.name = "ab8500-poweron-key",
 		.num_resources = ARRAY_SIZE(ab8500_poweronkey_db_resources),
@@ -845,6 +874,9 @@ static struct mfd_cell __devinitdata ab8500_devs[] = {
 		.num_resources = ARRAY_SIZE(ab8500_usb_resources),
 		.resources = ab8500_usb_resources,
 	},
+	{
+		.name = "ab8500-codec",
+	},
 };
 
 static struct mfd_cell __devinitdata ab9540_devs[] = {
@@ -858,6 +890,18 @@ static struct mfd_cell __devinitdata ab9540_devs[] = {
 		.num_resources = ARRAY_SIZE(ab8500_usb_resources),
 		.resources = ab8500_usb_resources,
 	},
+	{
+		.name = "ab9540-codec",
+	},
+};
+
+/* Device list common to ab9540 and ab8505 */
+static struct mfd_cell __devinitdata ab9540_ab8505_devs[] = {
+	{
+		.name = "ab-iddet",
+		.num_resources = ARRAY_SIZE(ab8505_iddet_resources),
+		.resources = ab8505_iddet_resources,
+	},
 };
 
 static ssize_t show_chip_id(struct device *dev,
@@ -1125,8 +1169,14 @@ int __devinit ab8500_init(struct ab8500 *ab8500, enum ab8500_version version)
 			      ab8500->irq_base);
 	else
 		ret = mfd_add_devices(ab8500->dev, 0, ab8500_devs,
-			      ARRAY_SIZE(ab9540_devs), NULL,
+			      ARRAY_SIZE(ab8500_devs), NULL,
+			      ab8500->irq_base);
+
+	if (is_ab9540(ab8500) || is_ab8505(ab8500))
+		ret = mfd_add_devices(ab8500->dev, 0, ab9540_ab8505_devs,
+			      ARRAY_SIZE(ab9540_ab8505_devs), NULL,
 			      ab8500->irq_base);
+
 	if (ret)
 		goto out_freeirq;
 
diff --git a/include/linux/mfd/abx500/ab8500.h b/include/linux/mfd/abx500/ab8500.h
index fccc3002f271..d798f5b6a55f 100644
--- a/include/linux/mfd/abx500/ab8500.h
+++ b/include/linux/mfd/abx500/ab8500.h
@@ -194,6 +194,14 @@ enum ab8500_version {
 #define AB9540_INT_GPIO52F		123
 #define AB9540_INT_GPIO53F		124
 #define AB9540_INT_GPIO54F		125 /* not 8505 */
+/* ab8500_irq_regoffset[16] -> IT[Source|Latch|Mask]25 */
+#define AB8505_INT_KEYSTUCK		128
+#define AB8505_INT_IKR			129
+#define AB8505_INT_IKP			130
+#define AB8505_INT_KP			131
+#define AB8505_INT_KEYDEGLITCH		132
+#define AB8505_INT_MODPWRSTATUSF	134
+#define AB8505_INT_MODPWRSTATUSR	135
 
 /*
  * AB8500_AB9540_NR_IRQS is used when configuring the IRQ numbers for the
@@ -203,8 +211,8 @@ enum ab8500_version {
  * which is larger.
  */
 #define AB8500_NR_IRQS			112
-#define AB8505_NR_IRQS			128
-#define AB9540_NR_IRQS			128
+#define AB8505_NR_IRQS			136
+#define AB9540_NR_IRQS			136
 /* This is set to the roof of any AB8500 chip variant IRQ counts */
 #define AB8500_MAX_NR_IRQS		AB9540_NR_IRQS
 
-- 
cgit v1.3-7-g2ca7


From 112a80d29b529d4057777ac2cb4ec15ff5b6d210 Mon Sep 17 00:00:00 2001
From: Jonas Aaberg <jonas.aberg@stericsson.com>
Date: Tue, 17 Apr 2012 09:30:33 +0200
Subject: mfd: Deny ab8500 suspend if i2c transfer is ongoing

If we are in the middle of an I2C transfer we need to deny suspend
of the AB8500 core. Implement an atomic reference counter for the
I2C operations to make sure we don't do this.

Signed-off-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Reviewed-by: Mattias Wallin <mattias.wallin@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/ab8500-core.c         | 38 +++++++++++++++++++++++++++++++-------
 include/linux/mfd/abx500/ab8500.h |  6 +++++-
 2 files changed, 36 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c
index ae67612317a8..eee95606bdbb 100644
--- a/drivers/mfd/ab8500-core.c
+++ b/drivers/mfd/ab8500-core.c
@@ -161,9 +161,13 @@ static int set_register_interruptible(struct ab8500 *ab8500, u8 bank,
 static int ab8500_set_register(struct device *dev, u8 bank,
 	u8 reg, u8 value)
 {
+	int ret;
 	struct ab8500 *ab8500 = dev_get_drvdata(dev->parent);
 
-	return set_register_interruptible(ab8500, bank, reg, value);
+	atomic_inc(&ab8500->transfer_ongoing);
+	ret = set_register_interruptible(ab8500, bank, reg, value);
+	atomic_dec(&ab8500->transfer_ongoing);
+	return ret;
 }
 
 static int get_register_interruptible(struct ab8500 *ab8500, u8 bank,
@@ -192,9 +196,13 @@ static int get_register_interruptible(struct ab8500 *ab8500, u8 bank,
 static int ab8500_get_register(struct device *dev, u8 bank,
 	u8 reg, u8 *value)
 {
+	int ret;
 	struct ab8500 *ab8500 = dev_get_drvdata(dev->parent);
 
-	return get_register_interruptible(ab8500, bank, reg, value);
+	atomic_inc(&ab8500->transfer_ongoing);
+	ret = get_register_interruptible(ab8500, bank, reg, value);
+	atomic_dec(&ab8500->transfer_ongoing);
+	return ret;
 }
 
 static int mask_and_set_register_interruptible(struct ab8500 *ab8500, u8 bank,
@@ -241,11 +249,14 @@ out:
 static int ab8500_mask_and_set_register(struct device *dev,
 	u8 bank, u8 reg, u8 bitmask, u8 bitvalues)
 {
+	int ret;
 	struct ab8500 *ab8500 = dev_get_drvdata(dev->parent);
 
-	return mask_and_set_register_interruptible(ab8500, bank, reg,
-		bitmask, bitvalues);
-
+	atomic_inc(&ab8500->transfer_ongoing);
+	ret= mask_and_set_register_interruptible(ab8500, bank, reg,
+						 bitmask, bitvalues);
+	atomic_dec(&ab8500->transfer_ongoing);
+	return ret;
 }
 
 static struct abx500_ops ab8500_ops = {
@@ -264,6 +275,7 @@ static void ab8500_irq_lock(struct irq_data *data)
 	struct ab8500 *ab8500 = irq_data_get_irq_chip_data(data);
 
 	mutex_lock(&ab8500->irq_lock);
+	atomic_inc(&ab8500->transfer_ongoing);
 }
 
 static void ab8500_irq_sync_unlock(struct irq_data *data)
@@ -292,7 +304,7 @@ static void ab8500_irq_sync_unlock(struct irq_data *data)
 		reg = AB8500_IT_MASK1_REG + ab8500->irq_reg_offset[i];
 		set_register_interruptible(ab8500, AB8500_INTERRUPT, reg, new);
 	}
-
+	atomic_dec(&ab8500->transfer_ongoing);
 	mutex_unlock(&ab8500->irq_lock);
 }
 
@@ -332,6 +344,8 @@ static irqreturn_t ab8500_irq(int irq, void *dev)
 
 	dev_vdbg(ab8500->dev, "interrupt\n");
 
+	atomic_inc(&ab8500->transfer_ongoing);
+
 	for (i = 0; i < ab8500->mask_size; i++) {
 		int regoffset = ab8500->irq_reg_offset[i];
 		int status;
@@ -355,9 +369,10 @@ static irqreturn_t ab8500_irq(int irq, void *dev)
 
 			handle_nested_irq(ab8500->irq_base + line);
 			value &= ~(1 << bit);
+
 		} while (value);
 	}
-
+	atomic_dec(&ab8500->transfer_ongoing);
 	return IRQ_HANDLED;
 }
 
@@ -411,6 +426,14 @@ static void ab8500_irq_remove(struct ab8500 *ab8500)
 	}
 }
 
+int ab8500_suspend(struct ab8500 *ab8500)
+{
+	if (atomic_read(&ab8500->transfer_ongoing))
+		return -EINVAL;
+	else
+		return 0;
+}
+
 /* AB8500 GPIO Resources */
 static struct resource __devinitdata ab8500_gpio_resources[] = {
 	{
@@ -1059,6 +1082,7 @@ int __devinit ab8500_init(struct ab8500 *ab8500, enum ab8500_version version)
 
 	mutex_init(&ab8500->lock);
 	mutex_init(&ab8500->irq_lock);
+	atomic_set(&ab8500->transfer_ongoing, 0);
 
 	if (version != AB8500_VERSION_UNDEFINED)
 		ab8500->version = version;
diff --git a/include/linux/mfd/abx500/ab8500.h b/include/linux/mfd/abx500/ab8500.h
index d798f5b6a55f..91dd3ef63e99 100644
--- a/include/linux/mfd/abx500/ab8500.h
+++ b/include/linux/mfd/abx500/ab8500.h
@@ -7,6 +7,7 @@
 #ifndef MFD_AB8500_H
 #define MFD_AB8500_H
 
+#include <linux/atomic.h>
 #include <linux/mutex.h>
 
 struct device;
@@ -224,6 +225,7 @@ enum ab8500_version {
  * @dev: parent device
  * @lock: read/write operations lock
  * @irq_lock: genirq bus lock
+ * @transfer_ongoing: 0 if no transfer ongoing
  * @irq: irq line
  * @version: chip version id (e.g. ab8500 or ab9540)
  * @chip_id: chip revision id
@@ -242,7 +244,7 @@ struct ab8500 {
 	struct device	*dev;
 	struct mutex	lock;
 	struct mutex	irq_lock;
-
+	atomic_t	transfer_ongoing;
 	int		irq_base;
 	int		irq;
 	enum ab8500_version version;
@@ -288,6 +290,8 @@ extern int __devinit ab8500_init(struct ab8500 *ab8500,
 				 enum ab8500_version version);
 extern int __devexit ab8500_exit(struct ab8500 *ab8500);
 
+extern int ab8500_suspend(struct ab8500 *ab8500);
+
 static inline int is_ab8500(struct ab8500 *ab)
 {
 	return ab->version == AB8500_VERSION_AB8500;
-- 
cgit v1.3-7-g2ca7


From 3a1556e8662cc425c433b463fcdae138908ca467 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 27 Apr 2012 13:48:18 -0400
Subject: NFSv2/v3: Simulate the change attribute

Use the ctime to simulate a change attribute for NFSv2 and NFSv3.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c         |  2 +-
 fs/nfs/internal.h       | 12 ++++++++++++
 fs/nfs/nfs2xdr.c        |  2 ++
 fs/nfs/nfs3xdr.c        |  3 +++
 include/linux/nfs_xdr.h |  6 +++---
 5 files changed, 21 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 60f7e4ec842c..a8f8de618d73 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -880,7 +880,7 @@ static int nfs_init_server(struct nfs_server *server,
 	server->options = data->options;
 	server->caps |= NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID|
 		NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|NFS_CAP_OWNER_GROUP|
-		NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME;
+		NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME|NFS_CAP_CHANGE_ATTR;
 
 	if (data->rsize)
 		server->rsize = nfs_block_size(data->rsize, NULL);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 0fd1efaf1cff..1855e8fea423 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -493,3 +493,15 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len)
 		PAGE_SIZE - 1) >> PAGE_SHIFT;
 }
 
+/*
+ * Convert a struct timespec into a 64-bit change attribute
+ *
+ * This does approximately the same thing as timespec_to_ns(),
+ * but for calculation efficiency, we multiply the seconds by
+ * 1024*1024*1024.
+ */
+static inline
+u64 nfs_timespec_to_change_attr(const struct timespec *ts)
+{
+	return ((u64)ts->tv_sec << 30) + ts->tv_nsec;
+}
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 1f56000fabbd..c99008e9d8a4 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -313,6 +313,8 @@ static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
 	p = xdr_decode_time(p, &fattr->atime);
 	p = xdr_decode_time(p, &fattr->mtime);
 	xdr_decode_time(p, &fattr->ctime);
+	fattr->change_attr = nfs_timespec_to_change_attr(&fattr->ctime);
+
 	return 0;
 out_overflow:
 	print_overflow_msg(__func__, xdr);
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 01e53e94f53d..ee284c2b2757 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -675,6 +675,7 @@ static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr)
 	p = xdr_decode_nfstime3(p, &fattr->atime);
 	p = xdr_decode_nfstime3(p, &fattr->mtime);
 	xdr_decode_nfstime3(p, &fattr->ctime);
+	fattr->change_attr = nfs_timespec_to_change_attr(&fattr->ctime);
 
 	fattr->valid |= NFS_ATTR_FATTR_V3;
 	return 0;
@@ -725,12 +726,14 @@ static int decode_wcc_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
 		goto out_overflow;
 
 	fattr->valid |= NFS_ATTR_FATTR_PRESIZE
+		| NFS_ATTR_FATTR_PRECHANGE
 		| NFS_ATTR_FATTR_PREMTIME
 		| NFS_ATTR_FATTR_PRECTIME;
 
 	p = xdr_decode_size3(p, &fattr->pre_size);
 	p = xdr_decode_nfstime3(p, &fattr->pre_mtime);
 	xdr_decode_nfstime3(p, &fattr->pre_ctime);
+	fattr->pre_change_attr = nfs_timespec_to_change_attr(&fattr->pre_ctime);
 
 	return 0;
 out_overflow:
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 6deb8f097c42..bc3680885428 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -106,14 +106,14 @@ struct nfs_fattr {
 		| NFS_ATTR_FATTR_FILEID \
 		| NFS_ATTR_FATTR_ATIME \
 		| NFS_ATTR_FATTR_MTIME \
-		| NFS_ATTR_FATTR_CTIME)
+		| NFS_ATTR_FATTR_CTIME \
+		| NFS_ATTR_FATTR_CHANGE)
 #define NFS_ATTR_FATTR_V2 (NFS_ATTR_FATTR \
 		| NFS_ATTR_FATTR_BLOCKS_USED)
 #define NFS_ATTR_FATTR_V3 (NFS_ATTR_FATTR \
 		| NFS_ATTR_FATTR_SPACE_USED)
 #define NFS_ATTR_FATTR_V4 (NFS_ATTR_FATTR \
-		| NFS_ATTR_FATTR_SPACE_USED \
-		| NFS_ATTR_FATTR_CHANGE)
+		| NFS_ATTR_FATTR_SPACE_USED)
 
 /*
  * Info on the file system
-- 
cgit v1.3-7-g2ca7


From 90ff0c548d1220d31f80e498b587393895705e6c Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 27 Apr 2012 13:48:18 -0400
Subject: NFSv4: Simplify the NFSv4 OPEN compound

Get rid of the post-op GETATTR on the directory in order to reduce
the amount of processing done on the server.

The cost is that if we later need to stat() the directory, then we
know that the ctime and mtime are likely to be invalid.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       | 11 +----------
 fs/nfs/nfs4xdr.c        | 18 +-----------------
 include/linux/nfs_xdr.h |  2 --
 3 files changed, 2 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 2e0fbff37d1f..f01c3d1b54b7 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -788,7 +788,6 @@ struct nfs4_opendata {
 	struct nfs4_string owner_name;
 	struct nfs4_string group_name;
 	struct nfs_fattr f_attr;
-	struct nfs_fattr dir_attr;
 	struct dentry *dir;
 	struct dentry *dentry;
 	struct nfs4_state_owner *owner;
@@ -804,12 +803,10 @@ struct nfs4_opendata {
 static void nfs4_init_opendata_res(struct nfs4_opendata *p)
 {
 	p->o_res.f_attr = &p->f_attr;
-	p->o_res.dir_attr = &p->dir_attr;
 	p->o_res.seqid = p->o_arg.seqid;
 	p->c_res.seqid = p->c_arg.seqid;
 	p->o_res.server = p->o_arg.server;
 	nfs_fattr_init(&p->f_attr);
-	nfs_fattr_init(&p->dir_attr);
 	nfs_fattr_init_names(&p->f_attr, &p->owner_name, &p->group_name);
 }
 
@@ -843,7 +840,6 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
 	p->o_arg.name = &dentry->d_name;
 	p->o_arg.server = server;
 	p->o_arg.bitmask = server->attr_bitmask;
-	p->o_arg.dir_bitmask = server->cache_consistency_bitmask;
 	p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;
 	if (attrs != NULL && attrs->ia_valid != 0) {
 		__be32 verf[2];
@@ -1611,8 +1607,6 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data)
 
 	nfs_fattr_map_and_free_names(NFS_SERVER(dir), &data->f_attr);
 
-	nfs_refresh_inode(dir, o_res->dir_attr);
-
 	if (o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
 		status = _nfs4_proc_open_confirm(data);
 		if (status != 0)
@@ -1645,11 +1639,8 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
 
 	nfs_fattr_map_and_free_names(server, &data->f_attr);
 
-	if (o_arg->open_flags & O_CREAT) {
+	if (o_arg->open_flags & O_CREAT)
 		update_changeattr(dir, &o_res->cinfo);
-		nfs_post_op_update_inode(dir, o_res->dir_attr);
-	} else
-		nfs_refresh_inode(dir, o_res->dir_attr);
 	if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0)
 		server->caps &= ~NFS_CAP_POSIX_LOCK;
 	if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index ac7a3b014d99..6e878dcc0d2d 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -431,20 +431,14 @@ static int nfs4_stat_to_errno(int);
 #define NFS4_enc_open_sz        (compound_encode_hdr_maxsz + \
 				encode_sequence_maxsz + \
 				encode_putfh_maxsz + \
-				encode_savefh_maxsz + \
 				encode_open_maxsz + \
 				encode_getfh_maxsz + \
-				encode_getattr_maxsz + \
-				encode_restorefh_maxsz + \
 				encode_getattr_maxsz)
 #define NFS4_dec_open_sz        (compound_decode_hdr_maxsz + \
 				decode_sequence_maxsz + \
 				decode_putfh_maxsz + \
-				decode_savefh_maxsz + \
 				decode_open_maxsz + \
 				decode_getfh_maxsz + \
-				decode_getattr_maxsz + \
-				decode_restorefh_maxsz + \
 				decode_getattr_maxsz)
 #define NFS4_enc_open_confirm_sz \
 				(compound_encode_hdr_maxsz + \
@@ -2191,12 +2185,9 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr,
 	encode_compound_hdr(xdr, req, &hdr);
 	encode_sequence(xdr, &args->seq_args, &hdr);
 	encode_putfh(xdr, args->fh, &hdr);
-	encode_savefh(xdr, &hdr);
 	encode_open(xdr, args, &hdr);
 	encode_getfh(xdr, &hdr);
 	encode_getfattr(xdr, args->bitmask, &hdr);
-	encode_restorefh(xdr, &hdr);
-	encode_getfattr(xdr, args->dir_bitmask, &hdr);
 	encode_nops(&hdr);
 }
 
@@ -6073,9 +6064,6 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 	if (status)
 		goto out;
 	status = decode_putfh(xdr);
-	if (status)
-		goto out;
-	status = decode_savefh(xdr);
 	if (status)
 		goto out;
 	status = decode_open(xdr, res);
@@ -6083,11 +6071,7 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 		goto out;
 	if (decode_getfh(xdr, &res->fh) != 0)
 		goto out;
-	if (decode_getfattr(xdr, res->f_attr, res->server) != 0)
-		goto out;
-	if (decode_restorefh(xdr) != 0)
-		goto out;
-	decode_getfattr(xdr, res->dir_attr, res->server);
+	decode_getfattr(xdr, res->f_attr, res->server);
 out:
 	return status;
 }
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index bc3680885428..92a929fc97c8 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -338,7 +338,6 @@ struct nfs_openargs {
 	const struct qstr *	name;
 	const struct nfs_server *server;	 /* Needed for ID mapping */
 	const u32 *		bitmask;
-	const u32 *		dir_bitmask;
 	__u32			claim;
 	struct nfs4_sequence_args	seq_args;
 };
@@ -349,7 +348,6 @@ struct nfs_openres {
 	struct nfs4_change_info	cinfo;
 	__u32                   rflags;
 	struct nfs_fattr *      f_attr;
-	struct nfs_fattr *      dir_attr;
 	struct nfs_seqid *	seqid;
 	const struct nfs_server *server;
 	fmode_t			delegation_type;
-- 
cgit v1.3-7-g2ca7


From 7c317fcfbae773e493ecee1c53738db774b1d0ca Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 27 Apr 2012 13:48:18 -0400
Subject: NFSv4: Simplify the NFSv4 CREATE compound

Get rid of the post-op GETATTR on the directory in order to reduce
the amount of processing done on the server.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       |  4 ----
 fs/nfs/nfs4xdr.c        | 19 +------------------
 include/linux/nfs_xdr.h |  1 -
 3 files changed, 1 insertion(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f01c3d1b54b7..619bc1eb157b 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2994,7 +2994,6 @@ struct nfs4_createdata {
 	struct nfs4_create_res res;
 	struct nfs_fh fh;
 	struct nfs_fattr fattr;
-	struct nfs_fattr dir_fattr;
 };
 
 static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir,
@@ -3018,9 +3017,7 @@ static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir,
 		data->res.server = server;
 		data->res.fh = &data->fh;
 		data->res.fattr = &data->fattr;
-		data->res.dir_fattr = &data->dir_fattr;
 		nfs_fattr_init(data->res.fattr);
-		nfs_fattr_init(data->res.dir_fattr);
 	}
 	return data;
 }
@@ -3031,7 +3028,6 @@ static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_
 				    &data->arg.seq_args, &data->res.seq_res, 1);
 	if (status == 0) {
 		update_changeattr(dir, &data->res.dir_cinfo);
-		nfs_post_op_update_inode(dir, data->res.dir_fattr);
 		status = nfs_instantiate(dentry, data->res.fh, data->res.fattr);
 	}
 	return status;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 6e878dcc0d2d..1a70097a8dc7 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -647,20 +647,14 @@ static int nfs4_stat_to_errno(int);
 #define NFS4_enc_create_sz	(compound_encode_hdr_maxsz + \
 				encode_sequence_maxsz + \
 				encode_putfh_maxsz + \
-				encode_savefh_maxsz + \
 				encode_create_maxsz + \
 				encode_getfh_maxsz + \
-				encode_getattr_maxsz + \
-				encode_restorefh_maxsz + \
 				encode_getattr_maxsz)
 #define NFS4_dec_create_sz	(compound_decode_hdr_maxsz + \
 				decode_sequence_maxsz + \
 				decode_putfh_maxsz + \
-				decode_savefh_maxsz + \
 				decode_create_maxsz + \
 				decode_getfh_maxsz + \
-				decode_getattr_maxsz + \
-				decode_restorefh_maxsz + \
 				decode_getattr_maxsz)
 #define NFS4_enc_pathconf_sz	(compound_encode_hdr_maxsz + \
 				encode_sequence_maxsz + \
@@ -2119,12 +2113,9 @@ static void nfs4_xdr_enc_create(struct rpc_rqst *req, struct xdr_stream *xdr,
 	encode_compound_hdr(xdr, req, &hdr);
 	encode_sequence(xdr, &args->seq_args, &hdr);
 	encode_putfh(xdr, args->dir_fh, &hdr);
-	encode_savefh(xdr, &hdr);
 	encode_create(xdr, args, &hdr);
 	encode_getfh(xdr, &hdr);
 	encode_getfattr(xdr, args->bitmask, &hdr);
-	encode_restorefh(xdr, &hdr);
-	encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
 }
 
@@ -5893,9 +5884,6 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 	if (status)
 		goto out;
 	status = decode_putfh(xdr);
-	if (status)
-		goto out;
-	status = decode_savefh(xdr);
 	if (status)
 		goto out;
 	status = decode_create(xdr, &res->dir_cinfo);
@@ -5904,12 +5892,7 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 	status = decode_getfh(xdr, res->fh);
 	if (status)
 		goto out;
-	if (decode_getfattr(xdr, res->fattr, res->server))
-		goto out;
-	status = decode_restorefh(xdr);
-	if (status)
-		goto out;
-	decode_getfattr(xdr, res->dir_fattr, res->server);
+	decode_getfattr(xdr, res->fattr, res->server);
 out:
 	return status;
 }
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 92a929fc97c8..696a17e047be 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -855,7 +855,6 @@ struct nfs4_create_res {
 	struct nfs_fh *			fh;
 	struct nfs_fattr *		fattr;
 	struct nfs4_change_info		dir_cinfo;
-	struct nfs_fattr *		dir_fattr;
 	struct nfs4_sequence_res	seq_res;
 };
 
-- 
cgit v1.3-7-g2ca7


From 778d28172f710184855bcfeadcdd6b46997c4de2 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 27 Apr 2012 13:48:19 -0400
Subject: NFSv4: Simplify the NFSv4 REMOVE, LINK and RENAME compounds

Get rid of the post-op GETATTR on the directory in order to reduce
the amount of processing done on the server.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       | 34 +++-------------------------------
 fs/nfs/nfs4xdr.c        | 37 ++++---------------------------------
 include/linux/nfs_xdr.h |  2 --
 3 files changed, 7 insertions(+), 66 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 619bc1eb157b..c746b0cab499 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2775,7 +2775,6 @@ static int _nfs4_proc_remove(struct inode *dir, struct qstr *name)
 		.fh = NFS_FH(dir),
 		.name.len = name->len,
 		.name.name = name->name,
-		.bitmask = server->attr_bitmask,
 	};
 	struct nfs_removeres res = {
 		.server = server,
@@ -2785,19 +2784,11 @@ static int _nfs4_proc_remove(struct inode *dir, struct qstr *name)
 		.rpc_argp = &args,
 		.rpc_resp = &res,
 	};
-	int status = -ENOMEM;
-
-	res.dir_attr = nfs_alloc_fattr();
-	if (res.dir_attr == NULL)
-		goto out;
+	int status;
 
 	status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 1);
-	if (status == 0) {
+	if (status == 0)
 		update_changeattr(dir, &res.cinfo);
-		nfs_post_op_update_inode(dir, res.dir_attr);
-	}
-	nfs_free_fattr(res.dir_attr);
-out:
 	return status;
 }
 
@@ -2819,7 +2810,6 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir)
 	struct nfs_removeargs *args = msg->rpc_argp;
 	struct nfs_removeres *res = msg->rpc_resp;
 
-	args->bitmask = server->cache_consistency_bitmask;
 	res->server = server;
 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE];
 	nfs41_init_sequence(&args->seq_args, &res->seq_res, 1);
@@ -2844,7 +2834,6 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir)
 	if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN)
 		return 0;
 	update_changeattr(dir, &res->cinfo);
-	nfs_post_op_update_inode(dir, res->dir_attr);
 	return 1;
 }
 
@@ -2855,7 +2844,6 @@ static void nfs4_proc_rename_setup(struct rpc_message *msg, struct inode *dir)
 	struct nfs_renameres *res = msg->rpc_resp;
 
 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME];
-	arg->bitmask = server->attr_bitmask;
 	res->server = server;
 	nfs41_init_sequence(&arg->seq_args, &res->seq_res, 1);
 }
@@ -2881,9 +2869,7 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
 		return 0;
 
 	update_changeattr(old_dir, &res->old_cinfo);
-	nfs_post_op_update_inode(old_dir, res->old_fattr);
 	update_changeattr(new_dir, &res->new_cinfo);
-	nfs_post_op_update_inode(new_dir, res->new_fattr);
 	return 1;
 }
 
@@ -2896,7 +2882,6 @@ static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
 		.new_dir = NFS_FH(new_dir),
 		.old_name = old_name,
 		.new_name = new_name,
-		.bitmask = server->attr_bitmask,
 	};
 	struct nfs_renameres res = {
 		.server = server,
@@ -2908,21 +2893,11 @@ static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
 	};
 	int status = -ENOMEM;
 	
-	res.old_fattr = nfs_alloc_fattr();
-	res.new_fattr = nfs_alloc_fattr();
-	if (res.old_fattr == NULL || res.new_fattr == NULL)
-		goto out;
-
 	status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
 	if (!status) {
 		update_changeattr(old_dir, &res.old_cinfo);
-		nfs_post_op_update_inode(old_dir, res.old_fattr);
 		update_changeattr(new_dir, &res.new_cinfo);
-		nfs_post_op_update_inode(new_dir, res.new_fattr);
 	}
-out:
-	nfs_free_fattr(res.new_fattr);
-	nfs_free_fattr(res.old_fattr);
 	return status;
 }
 
@@ -2960,18 +2935,15 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *
 	int status = -ENOMEM;
 
 	res.fattr = nfs_alloc_fattr();
-	res.dir_attr = nfs_alloc_fattr();
-	if (res.fattr == NULL || res.dir_attr == NULL)
+	if (res.fattr == NULL)
 		goto out;
 
 	status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
 	if (!status) {
 		update_changeattr(dir, &res.cinfo);
-		nfs_post_op_update_inode(dir, res.dir_attr);
 		nfs_post_op_update_inode(inode, res.fattr);
 	}
 out:
-	nfs_free_fattr(res.dir_attr);
 	nfs_free_fattr(res.fattr);
 	return status;
 }
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 1a70097a8dc7..49483f19c5d3 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -589,38 +589,29 @@ static int nfs4_stat_to_errno(int);
 #define NFS4_enc_remove_sz	(compound_encode_hdr_maxsz + \
 				encode_sequence_maxsz + \
 				encode_putfh_maxsz + \
-				encode_remove_maxsz + \
-				encode_getattr_maxsz)
+				encode_remove_maxsz)
 #define NFS4_dec_remove_sz	(compound_decode_hdr_maxsz + \
 				decode_sequence_maxsz + \
 				decode_putfh_maxsz + \
-				decode_remove_maxsz + \
-				decode_getattr_maxsz)
+				decode_remove_maxsz)
 #define NFS4_enc_rename_sz	(compound_encode_hdr_maxsz + \
 				encode_sequence_maxsz + \
 				encode_putfh_maxsz + \
 				encode_savefh_maxsz + \
 				encode_putfh_maxsz + \
-				encode_rename_maxsz + \
-				encode_getattr_maxsz + \
-				encode_restorefh_maxsz + \
-				encode_getattr_maxsz)
+				encode_rename_maxsz)
 #define NFS4_dec_rename_sz	(compound_decode_hdr_maxsz + \
 				decode_sequence_maxsz + \
 				decode_putfh_maxsz + \
 				decode_savefh_maxsz + \
 				decode_putfh_maxsz + \
-				decode_rename_maxsz + \
-				decode_getattr_maxsz + \
-				decode_restorefh_maxsz + \
-				decode_getattr_maxsz)
+				decode_rename_maxsz)
 #define NFS4_enc_link_sz	(compound_encode_hdr_maxsz + \
 				encode_sequence_maxsz + \
 				encode_putfh_maxsz + \
 				encode_savefh_maxsz + \
 				encode_putfh_maxsz + \
 				encode_link_maxsz + \
-				encode_getattr_maxsz + \
 				encode_restorefh_maxsz + \
 				encode_getattr_maxsz)
 #define NFS4_dec_link_sz	(compound_decode_hdr_maxsz + \
@@ -629,7 +620,6 @@ static int nfs4_stat_to_errno(int);
 				decode_savefh_maxsz + \
 				decode_putfh_maxsz + \
 				decode_link_maxsz + \
-				decode_getattr_maxsz + \
 				decode_restorefh_maxsz + \
 				decode_getattr_maxsz)
 #define NFS4_enc_symlink_sz	(compound_encode_hdr_maxsz + \
@@ -2052,7 +2042,6 @@ static void nfs4_xdr_enc_remove(struct rpc_rqst *req, struct xdr_stream *xdr,
 	encode_sequence(xdr, &args->seq_args, &hdr);
 	encode_putfh(xdr, args->fh, &hdr);
 	encode_remove(xdr, &args->name, &hdr);
-	encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
 }
 
@@ -2072,9 +2061,6 @@ static void nfs4_xdr_enc_rename(struct rpc_rqst *req, struct xdr_stream *xdr,
 	encode_savefh(xdr, &hdr);
 	encode_putfh(xdr, args->new_dir, &hdr);
 	encode_rename(xdr, args->old_name, args->new_name, &hdr);
-	encode_getfattr(xdr, args->bitmask, &hdr);
-	encode_restorefh(xdr, &hdr);
-	encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
 }
 
@@ -2094,7 +2080,6 @@ static void nfs4_xdr_enc_link(struct rpc_rqst *req, struct xdr_stream *xdr,
 	encode_savefh(xdr, &hdr);
 	encode_putfh(xdr, args->dir_fh, &hdr);
 	encode_link(xdr, args->name, &hdr);
-	encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_restorefh(xdr, &hdr);
 	encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
@@ -5782,9 +5767,6 @@ static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 	if (status)
 		goto out;
 	status = decode_remove(xdr, &res->cinfo);
-	if (status)
-		goto out;
-	decode_getfattr(xdr, res->dir_attr, res->server);
 out:
 	return status;
 }
@@ -5814,15 +5796,6 @@ static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 	if (status)
 		goto out;
 	status = decode_rename(xdr, &res->old_cinfo, &res->new_cinfo);
-	if (status)
-		goto out;
-	/* Current FH is target directory */
-	if (decode_getfattr(xdr, res->new_fattr, res->server))
-		goto out;
-	status = decode_restorefh(xdr);
-	if (status)
-		goto out;
-	decode_getfattr(xdr, res->old_fattr, res->server);
 out:
 	return status;
 }
@@ -5858,8 +5831,6 @@ static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 	 * Note order: OP_LINK leaves the directory as the current
 	 *             filehandle.
 	 */
-	if (decode_getfattr(xdr, res->dir_attr, res->server))
-		goto out;
 	status = decode_restorefh(xdr);
 	if (status)
 		goto out;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 696a17e047be..2e53a3f1d2ff 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -540,7 +540,6 @@ struct nfs_commitres {
 struct nfs_removeargs {
 	const struct nfs_fh	*fh;
 	struct qstr		name;
-	const u32 *		bitmask;
 	struct nfs4_sequence_args	seq_args;
 };
 
@@ -559,7 +558,6 @@ struct nfs_renameargs {
 	const struct nfs_fh		*new_dir;
 	const struct qstr		*old_name;
 	const struct qstr		*new_name;
-	const u32			*bitmask;
 	struct nfs4_sequence_args	seq_args;
 };
 
-- 
cgit v1.3-7-g2ca7


From d69ee9b85541a69a1092f5da675bd23256dc62af Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 1 May 2012 17:37:59 -0400
Subject: NFS: Adapt readdirplus to application usage patterns

While the use of READDIRPLUS is significantly more efficient than
READDIR followed by many LOOKUP calls, it is still less efficient
than just READDIR if the attributes are not required.

This patch tracks when lookups are attempted on the directory,
and uses that information to selectively disable READDIRPLUS
on that directory.
The first 'readdir' call is always served using READDIRPLUS.
Subsequent calls only use READDIRPLUS if there was a successful
lookup or revalidation on a child in the mean time.

Credit for the original idea should go to Neil Brown. See:
      http://www.spinics.net/lists/linux-nfs/msg19996.html
However, the implementation in this patch differs from Neil's
in that it focuses on tracking lookups rather than calls to
stat().

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Neil Brown <neilb@suse.de>
---
 fs/nfs/dir.c           | 33 +++++++++++++++++++++++++++++++--
 fs/nfs/inode.c         |  2 --
 include/linux/nfs_fs.h |  5 -----
 3 files changed, 31 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 82b42e2ea65c..d0884c0d9464 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -474,6 +474,29 @@ different:
 	return 0;
 }
 
+static
+bool nfs_use_readdirplus(struct inode *dir, struct file *filp)
+{
+	if (!nfs_server_capable(dir, NFS_CAP_READDIRPLUS))
+		return false;
+	if (test_and_clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags))
+		return true;
+	if (filp->f_pos == 0)
+		return true;
+	return false;
+}
+
+/*
+ * This function is called by the lookup code to request the use of
+ * readdirplus to accelerate any future lookups in the same
+ * directory.
+ */
+static
+void nfs_advise_use_readdirplus(struct inode *dir)
+{
+	set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags);
+}
+
 static
 void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
 {
@@ -874,7 +897,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	desc->file = filp;
 	desc->dir_cookie = &dir_ctx->dir_cookie;
 	desc->decode = NFS_PROTO(inode)->decode_dirent;
-	desc->plus = NFS_USE_READDIRPLUS(inode);
+	desc->plus = nfs_use_readdirplus(inode, filp) ? 1 : 0;
 
 	nfs_block_sillyrename(dentry);
 	res = nfs_revalidate_mapping(inode, filp->f_mapping);
@@ -1114,7 +1137,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
 	if (!inode) {
 		if (nfs_neg_need_reval(dir, dentry, nd))
 			goto out_bad;
-		goto out_valid;
+		goto out_valid_noent;
 	}
 
 	if (is_bad_inode(inode)) {
@@ -1156,6 +1179,9 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
 out_set_verifier:
 	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
  out_valid:
+	/* Success: notify readdir to use READDIRPLUS */
+	nfs_advise_use_readdirplus(dir);
+ out_valid_noent:
 	dput(parent);
 	dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n",
 			__func__, dentry->d_parent->d_name.name,
@@ -1311,6 +1337,9 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
 	if (IS_ERR(res))
 		goto out_unblock_sillyrename;
 
+	/* Success: notify readdir to use READDIRPLUS */
+	nfs_advise_use_readdirplus(dir);
+
 no_entry:
 	res = d_materialise_unique(dentry, inode);
 	if (res != NULL) {
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 0d53113207e5..9f17cd19e710 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -298,8 +298,6 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 			inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops;
 			inode->i_fop = &nfs_dir_operations;
 			inode->i_data.a_ops = &nfs_dir_aops;
-			if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS))
-				set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
 			/* Deal with crossing mountpoints */
 			if (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT ||
 					fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) {
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 8a88c16662c5..6cc7dbaf0695 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -314,11 +314,6 @@ static inline int nfs_server_capable(struct inode *inode, int cap)
 	return NFS_SERVER(inode)->caps & cap;
 }
 
-static inline int NFS_USE_READDIRPLUS(struct inode *inode)
-{
-	return test_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
-}
-
 static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf)
 {
 	dentry->d_time = verf;
-- 
cgit v1.3-7-g2ca7


From e447c50e3af5dcad3075c80bd1bdc4e2024b8186 Mon Sep 17 00:00:00 2001
From: Rajendra Nayak <rnayak@ti.com>
Date: Fri, 27 Apr 2012 17:58:13 +0530
Subject: clk: constify parent name arrays in macros

parent name array is now expected to be const char *, make
the relevent changes in the clk macros which define
default clock types.

Signed-off-by: Rajendra Nayak <rnayak@ti.com>
Signed-off-by: Mike Turquette <mturquette@linaro.org>
---
 include/linux/clk-private.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/clk-private.h b/include/linux/clk-private.h
index eeae7a3cfc45..6ebec83f1a77 100644
--- a/include/linux/clk-private.h
+++ b/include/linux/clk-private.h
@@ -70,7 +70,7 @@ struct clk {
 #define DEFINE_CLK_FIXED_RATE(_name, _flags, _rate,		\
 				_fixed_rate_flags)		\
 	static struct clk _name;				\
-	static char *_name##_parent_names[] = {};		\
+	static const char *_name##_parent_names[] = {};		\
 	static struct clk_fixed_rate _name##_hw = {		\
 		.hw = {						\
 			.clk = &_name,				\
@@ -85,7 +85,7 @@ struct clk {
 				_flags, _reg, _bit_idx,		\
 				_gate_flags, _lock)		\
 	static struct clk _name;				\
-	static char *_name##_parent_names[] = {			\
+	static const char *_name##_parent_names[] = {		\
 		_parent_name,					\
 	};							\
 	static struct clk *_name##_parents[] = {		\
@@ -107,7 +107,7 @@ struct clk {
 				_flags, _reg, _shift, _width,	\
 				_divider_flags, _lock)		\
 	static struct clk _name;				\
-	static char *_name##_parent_names[] = {			\
+	static const char *_name##_parent_names[] = {		\
 		_parent_name,					\
 	};							\
 	static struct clk *_name##_parents[] = {		\
-- 
cgit v1.3-7-g2ca7


From 0197b3ea0f66cd2a11417f58fe1812858ea77908 Mon Sep 17 00:00:00 2001
From: Saravana Kannan <skannan@codeaurora.org>
Date: Wed, 25 Apr 2012 22:58:56 -0700
Subject: clk: Use a separate struct for holding init data.

Create a struct clk_init_data to hold all data that needs to be passed from
the platfrom specific driver to the common clock framework during clock
registration. Add a pointer to this struct inside clk_hw.

This has several advantages:
* Completely hides struct clk from many clock platform drivers and static
  clock initialization code that don't care for static initialization of
  the struct clks.
* For platforms that want to do complete static initialization, it removed
  the need to directly mess with the struct clk's fields while still
  allowing to statically allocate struct clk. This keeps the code more
  future proof even if they include clk-private.h.
* Simplifies the generic clk_register() function and allows adding optional
  fields in the future without modifying the function signature.
* Simplifies the static initialization of clocks on all platforms by
  removing the need for forward delcarations or convoluted macros.

Signed-off-by: Saravana Kannan <skannan@codeaurora.org>
[mturquette@linaro.org: kept DEFINE_CLK_* macros and __clk_init]
Signed-off-by: Mike Turquette <mturquette@linaro.org>
Cc: Andrew Lunn <andrew@lunn.ch>
Cc: Rob Herring <rob.herring@calxeda.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Jeremy Kerr <jeremy.kerr@canonical.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Arnd Bergman <arnd.bergmann@linaro.org>
Cc: Paul Walmsley <paul@pwsan.com>
Cc: Shawn Guo <shawn.guo@freescale.com>
Cc: Sascha Hauer <s.hauer@pengutronix.de>
Cc: Jamie Iles <jamie@jamieiles.com>
Cc: Richard Zhao <richard.zhao@linaro.org>
Cc: Saravana Kannan <skannan@codeaurora.org>
Cc: Magnus Damm <magnus.damm@gmail.com>
Cc: Mark Brown <broonie@opensource.wolfsonmicro.com>
Cc: Linus Walleij <linus.walleij@stericsson.com>
Cc: Stephen Boyd <sboyd@codeaurora.org>
Cc: Amit Kucheria <amit.kucheria@linaro.org>
Cc: Deepak Saxena <dsaxena@linaro.org>
Cc: Grant Likely <grant.likely@secretlab.ca>
---
 drivers/clk/clk-divider.c    | 14 ++++---
 drivers/clk/clk-fixed-rate.c | 14 ++++---
 drivers/clk/clk-gate.c       | 15 +++++---
 drivers/clk/clk-mux.c        | 10 ++++-
 drivers/clk/clk.c            | 89 +++++++++++++++++++++++++++-----------------
 include/linux/clk-private.h  |  2 +
 include/linux/clk-provider.h | 59 ++++++++++++++++++-----------
 7 files changed, 129 insertions(+), 74 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clk/clk-divider.c b/drivers/clk/clk-divider.c
index 90627e4069af..8ea11b444528 100644
--- a/drivers/clk/clk-divider.c
+++ b/drivers/clk/clk-divider.c
@@ -167,6 +167,7 @@ struct clk *clk_register_divider(struct device *dev, const char *name,
 {
 	struct clk_divider *div;
 	struct clk *clk;
+	struct clk_init_data init;
 
 	/* allocate the divider */
 	div = kzalloc(sizeof(struct clk_divider), GFP_KERNEL);
@@ -175,19 +176,22 @@ struct clk *clk_register_divider(struct device *dev, const char *name,
 		return ERR_PTR(-ENOMEM);
 	}
 
+	init.name = name;
+	init.ops = &clk_divider_ops;
+	init.flags = flags;
+	init.parent_names = (parent_name ? &parent_name: NULL);
+	init.num_parents = (parent_name ? 1 : 0);
+
 	/* struct clk_divider assignments */
 	div->reg = reg;
 	div->shift = shift;
 	div->width = width;
 	div->flags = clk_divider_flags;
 	div->lock = lock;
+	div->hw.init = &init;
 
 	/* register the clock */
-	clk = clk_register(dev, name,
-			&clk_divider_ops, &div->hw,
-			(parent_name ? &parent_name: NULL),
-			(parent_name ? 1 : 0),
-			flags);
+	clk = clk_register(dev, &div->hw);
 
 	if (IS_ERR(clk))
 		kfree(div);
diff --git a/drivers/clk/clk-fixed-rate.c b/drivers/clk/clk-fixed-rate.c
index b555a04c8df8..cbd246229786 100644
--- a/drivers/clk/clk-fixed-rate.c
+++ b/drivers/clk/clk-fixed-rate.c
@@ -52,6 +52,7 @@ struct clk *clk_register_fixed_rate(struct device *dev, const char *name,
 {
 	struct clk_fixed_rate *fixed;
 	struct clk *clk;
+	struct clk_init_data init;
 
 	/* allocate fixed-rate clock */
 	fixed = kzalloc(sizeof(struct clk_fixed_rate), GFP_KERNEL);
@@ -60,15 +61,18 @@ struct clk *clk_register_fixed_rate(struct device *dev, const char *name,
 		return ERR_PTR(-ENOMEM);
 	}
 
+	init.name = name;
+	init.ops = &clk_fixed_rate_ops;
+	init.flags = flags;
+	init.parent_names = (parent_name ? &parent_name: NULL);
+	init.num_parents = (parent_name ? 1 : 0);
+
 	/* struct clk_fixed_rate assignments */
 	fixed->fixed_rate = fixed_rate;
+	fixed->hw.init = &init;
 
 	/* register the clock */
-	clk = clk_register(dev, name,
-			&clk_fixed_rate_ops, &fixed->hw,
-			(parent_name ? &parent_name : NULL),
-			(parent_name ? 1 : 0),
-			flags);
+	clk = clk_register(dev, &fixed->hw);
 
 	if (IS_ERR(clk))
 		kfree(fixed);
diff --git a/drivers/clk/clk-gate.c b/drivers/clk/clk-gate.c
index 00216164fb9d..578465e04be6 100644
--- a/drivers/clk/clk-gate.c
+++ b/drivers/clk/clk-gate.c
@@ -119,6 +119,7 @@ struct clk *clk_register_gate(struct device *dev, const char *name,
 {
 	struct clk_gate *gate;
 	struct clk *clk;
+	struct clk_init_data init;
 
 	/* allocate the gate */
 	gate = kzalloc(sizeof(struct clk_gate), GFP_KERNEL);
@@ -127,18 +128,20 @@ struct clk *clk_register_gate(struct device *dev, const char *name,
 		return ERR_PTR(-ENOMEM);
 	}
 
+	init.name = name;
+	init.ops = &clk_gate_ops;
+	init.flags = flags;
+	init.parent_names = (parent_name ? &parent_name: NULL);
+	init.num_parents = (parent_name ? 1 : 0);
+
 	/* struct clk_gate assignments */
 	gate->reg = reg;
 	gate->bit_idx = bit_idx;
 	gate->flags = clk_gate_flags;
 	gate->lock = lock;
+	gate->hw.init = &init;
 
-	/* register the clock */
-	clk = clk_register(dev, name,
-			&clk_gate_ops, &gate->hw,
-			(parent_name ? &parent_name : NULL),
-			(parent_name ? 1 : 0),
-			flags);
+	clk = clk_register(dev, &gate->hw);
 
 	if (IS_ERR(clk))
 		kfree(gate);
diff --git a/drivers/clk/clk-mux.c b/drivers/clk/clk-mux.c
index 6e58f11ab81f..8e97491902e7 100644
--- a/drivers/clk/clk-mux.c
+++ b/drivers/clk/clk-mux.c
@@ -95,6 +95,7 @@ struct clk *clk_register_mux(struct device *dev, const char *name,
 {
 	struct clk_mux *mux;
 	struct clk *clk;
+	struct clk_init_data init;
 
 	/* allocate the mux */
 	mux = kzalloc(sizeof(struct clk_mux), GFP_KERNEL);
@@ -103,6 +104,12 @@ struct clk *clk_register_mux(struct device *dev, const char *name,
 		return ERR_PTR(-ENOMEM);
 	}
 
+	init.name = name;
+	init.ops = &clk_mux_ops;
+	init.flags = flags;
+	init.parent_names = parent_names;
+	init.num_parents = num_parents;
+
 	/* struct clk_mux assignments */
 	mux->reg = reg;
 	mux->shift = shift;
@@ -110,8 +117,7 @@ struct clk *clk_register_mux(struct device *dev, const char *name,
 	mux->flags = clk_mux_flags;
 	mux->lock = lock;
 
-	clk = clk_register(dev, name, &clk_mux_ops, &mux->hw,
-			parent_names, num_parents, flags);
+	clk = clk_register(dev, &mux->hw);
 
 	if (IS_ERR(clk))
 		kfree(mux);
diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index 2dd20c01134d..c81803b9ba35 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -1169,26 +1169,6 @@ EXPORT_SYMBOL_GPL(clk_set_parent);
  *
  * Initializes the lists in struct clk, queries the hardware for the
  * parent and rate and sets them both.
- *
- * Any struct clk passed into __clk_init must have the following members
- * populated:
- * 	.name
- * 	.ops
- * 	.hw
- * 	.parent_names
- * 	.num_parents
- * 	.flags
- *
- * Essentially, everything that would normally be passed into clk_register is
- * assumed to be initialized already in __clk_init.  The other members may be
- * populated, but are optional.
- *
- * __clk_init is only exposed via clk-private.h and is intended for use with
- * very large numbers of clocks that need to be statically initialized.  It is
- * a layering violation to include clk-private.h from any code which implements
- * a clock's .ops; as such any statically initialized clock data MUST be in a
- * separate C file from the logic that implements it's operations.  Returns 0
- * on success, otherwise an error code.
  */
 int __clk_init(struct device *dev, struct clk *clk)
 {
@@ -1320,15 +1300,48 @@ out:
 	return ret;
 }
 
+/**
+ * __clk_register - register a clock and return a cookie.
+ *
+ * Same as clk_register, except that the .clk field inside hw shall point to a
+ * preallocated (generally statically allocated) struct clk. None of the fields
+ * of the struct clk need to be initialized.
+ *
+ * The data pointed to by .init and .clk field shall NOT be marked as init
+ * data.
+ *
+ * __clk_register is only exposed via clk-private.h and is intended for use with
+ * very large numbers of clocks that need to be statically initialized.  It is
+ * a layering violation to include clk-private.h from any code which implements
+ * a clock's .ops; as such any statically initialized clock data MUST be in a
+ * separate C file from the logic that implements it's operations.  Returns 0
+ * on success, otherwise an error code.
+ */
+struct clk *__clk_register(struct device *dev, struct clk_hw *hw)
+{
+	int ret;
+	struct clk *clk;
+
+	clk = hw->clk;
+	clk->name = hw->init->name;
+	clk->ops = hw->init->ops;
+	clk->hw = hw;
+	clk->flags = hw->init->flags;
+	clk->parent_names = hw->init->parent_names;
+	clk->num_parents = hw->init->num_parents;
+
+	ret = __clk_init(dev, clk);
+	if (ret)
+		return ERR_PTR(ret);
+
+	return clk;
+}
+EXPORT_SYMBOL_GPL(__clk_register);
+
 /**
  * clk_register - allocate a new clock, register it and return an opaque cookie
  * @dev: device that is registering this clock
- * @name: clock name
- * @ops: operations this clock supports
  * @hw: link to hardware-specific clock data
- * @parent_names: array of string names for all possible parents
- * @num_parents: number of possible parents
- * @flags: framework-level hints and quirks
  *
  * clk_register is the primary interface for populating the clock tree with new
  * clock nodes.  It returns a pointer to the newly allocated struct clk which
@@ -1336,9 +1349,7 @@ out:
  * rest of the clock API.  In the event of an error clk_register will return an
  * error code; drivers must test for an error code after calling clk_register.
  */
-struct clk *clk_register(struct device *dev, const char *name,
-		const struct clk_ops *ops, struct clk_hw *hw,
-		const char **parent_names, u8 num_parents, unsigned long flags)
+struct clk *clk_register(struct device *dev, struct clk_hw *hw)
 {
 	int i, ret;
 	struct clk *clk;
@@ -1350,15 +1361,20 @@ struct clk *clk_register(struct device *dev, const char *name,
 		goto fail_out;
 	}
 
-	clk->name = name;
-	clk->ops = ops;
+	clk->name = kstrdup(hw->init->name, GFP_KERNEL);
+	if (!clk->name) {
+		pr_err("%s: could not allocate clk->name\n", __func__);
+		ret = -ENOMEM;
+		goto fail_name;
+	}
+	clk->ops = hw->init->ops;
 	clk->hw = hw;
-	clk->flags = flags;
-	clk->num_parents = num_parents;
+	clk->flags = hw->init->flags;
+	clk->num_parents = hw->init->num_parents;
 	hw->clk = clk;
 
 	/* allocate local copy in case parent_names is __initdata */
-	clk->parent_names = kzalloc((sizeof(char*) * num_parents),
+	clk->parent_names = kzalloc((sizeof(char*) * clk->num_parents),
 			GFP_KERNEL);
 
 	if (!clk->parent_names) {
@@ -1369,8 +1385,9 @@ struct clk *clk_register(struct device *dev, const char *name,
 
 
 	/* copy each string name in case parent_names is __initdata */
-	for (i = 0; i < num_parents; i++) {
-		clk->parent_names[i] = kstrdup(parent_names[i], GFP_KERNEL);
+	for (i = 0; i < clk->num_parents; i++) {
+		clk->parent_names[i] = kstrdup(hw->init->parent_names[i],
+						GFP_KERNEL);
 		if (!clk->parent_names[i]) {
 			pr_err("%s: could not copy parent_names\n", __func__);
 			ret = -ENOMEM;
@@ -1387,6 +1404,8 @@ fail_parent_names_copy:
 		kfree(clk->parent_names[i]);
 	kfree(clk->parent_names);
 fail_parent_names:
+	kfree(clk->name);
+fail_name:
 	kfree(clk);
 fail_out:
 	return ERR_PTR(ret);
diff --git a/include/linux/clk-private.h b/include/linux/clk-private.h
index 6ebec83f1a77..b258532162b8 100644
--- a/include/linux/clk-private.h
+++ b/include/linux/clk-private.h
@@ -167,5 +167,7 @@ struct clk {
  */
 int __clk_init(struct device *dev, struct clk *clk);
 
+struct clk *__clk_register(struct device *dev, struct clk_hw *hw);
+
 #endif /* CONFIG_COMMON_CLK */
 #endif /* CLK_PRIVATE_H */
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 8f2148942b87..5db3412106b3 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -15,19 +15,6 @@
 
 #ifdef CONFIG_COMMON_CLK
 
-/**
- * struct clk_hw - handle for traversing from a struct clk to its corresponding
- * hardware-specific structure.  struct clk_hw should be declared within struct
- * clk_foo and then referenced by the struct clk instance that uses struct
- * clk_foo's clk_ops
- *
- * clk: pointer to the struct clk instance that points back to this struct
- * clk_hw instance
- */
-struct clk_hw {
-	struct clk *clk;
-};
-
 /*
  * flags used across common struct clk.  these flags should only affect the
  * top-level framework.  custom flags for dealing with hardware specifics
@@ -39,6 +26,8 @@ struct clk_hw {
 #define CLK_IGNORE_UNUSED	BIT(3) /* do not gate even if unused */
 #define CLK_IS_ROOT		BIT(4) /* root clk, has no parent */
 
+struct clk_hw;
+
 /**
  * struct clk_ops -  Callback operations for hardware clocks; these are to
  * be provided by the clock implementation, and will be called by drivers
@@ -122,6 +111,41 @@ struct clk_ops {
 	void		(*init)(struct clk_hw *hw);
 };
 
+/**
+ * struct clk_init_data - holds init data that's common to all clocks and is
+ * shared between the clock provider and the common clock framework.
+ *
+ * @name: clock name
+ * @ops: operations this clock supports
+ * @parent_names: array of string names for all possible parents
+ * @num_parents: number of possible parents
+ * @flags: framework-level hints and quirks
+ */
+struct clk_init_data {
+	const char		*name;
+	const struct clk_ops	*ops;
+	const char		**parent_names;
+	u8			num_parents;
+	unsigned long		flags;
+};
+
+/**
+ * struct clk_hw - handle for traversing from a struct clk to its corresponding
+ * hardware-specific structure.  struct clk_hw should be declared within struct
+ * clk_foo and then referenced by the struct clk instance that uses struct
+ * clk_foo's clk_ops
+ *
+ * @clk: pointer to the struct clk instance that points back to this struct
+ * clk_hw instance
+ *
+ * @init: pointer to struct clk_init_data that contains the init data shared
+ * with the common clock framework.
+ */
+struct clk_hw {
+	struct clk *clk;
+	struct clk_init_data *init;
+};
+
 /*
  * DOC: Basic clock implementations common to many platforms
  *
@@ -255,12 +279,7 @@ struct clk *clk_register_mux(struct device *dev, const char *name,
 /**
  * clk_register - allocate a new clock, register it and return an opaque cookie
  * @dev: device that is registering this clock
- * @name: clock name
- * @ops: operations this clock supports
  * @hw: link to hardware-specific clock data
- * @parent_names: array of string names for all possible parents
- * @num_parents: number of possible parents
- * @flags: framework-level hints and quirks
  *
  * clk_register is the primary interface for populating the clock tree with new
  * clock nodes.  It returns a pointer to the newly allocated struct clk which
@@ -268,9 +287,7 @@ struct clk *clk_register_mux(struct device *dev, const char *name,
  * rest of the clock API.  In the event of an error clk_register will return an
  * error code; drivers must test for an error code after calling clk_register.
  */
-struct clk *clk_register(struct device *dev, const char *name,
-		const struct clk_ops *ops, struct clk_hw *hw,
-		const char **parent_names, u8 num_parents, unsigned long flags);
+struct clk *clk_register(struct device *dev, struct clk_hw *hw);
 
 /* helper functions */
 const char *__clk_get_name(struct clk *clk);
-- 
cgit v1.3-7-g2ca7


From dbd5768f87ff6fb0a4fe09c4d7b6c4a24de99430 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 3 May 2012 14:48:02 +0200
Subject: vfs: Rename end_writeback() to clear_inode()

After we moved inode_sync_wait() from end_writeback() it doesn't make sense
to call the function end_writeback() anymore. Rename it to clear_inode()
which well says what the function really does - set I_CLEAR flag.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
---
 Documentation/filesystems/porting         | 16 +++++++---------
 arch/powerpc/platforms/cell/spufs/inode.c |  2 +-
 arch/s390/hypfs/inode.c                   |  2 +-
 fs/9p/vfs_inode.c                         |  2 +-
 fs/affs/inode.c                           |  2 +-
 fs/afs/inode.c                            |  2 +-
 fs/autofs4/inode.c                        |  2 +-
 fs/bfs/inode.c                            |  2 +-
 fs/binfmt_misc.c                          |  2 +-
 fs/block_dev.c                            |  2 +-
 fs/btrfs/inode.c                          |  2 +-
 fs/cifs/cifsfs.c                          |  2 +-
 fs/coda/inode.c                           |  2 +-
 fs/ecryptfs/super.c                       |  2 +-
 fs/exofs/inode.c                          |  4 ++--
 fs/ext2/inode.c                           |  2 +-
 fs/ext3/inode.c                           |  6 +++---
 fs/ext4/super.c                           |  2 +-
 fs/fat/inode.c                            |  2 +-
 fs/freevxfs/vxfs_inode.c                  |  2 +-
 fs/fuse/inode.c                           |  2 +-
 fs/gfs2/super.c                           |  2 +-
 fs/hfs/inode.c                            |  2 +-
 fs/hfsplus/super.c                        |  2 +-
 fs/hostfs/hostfs_kern.c                   |  2 +-
 fs/hpfs/inode.c                           |  2 +-
 fs/hppfs/hppfs.c                          |  2 +-
 fs/hugetlbfs/inode.c                      |  2 +-
 fs/inode.c                                |  6 +++---
 fs/jffs2/fs.c                             |  2 +-
 fs/jfs/inode.c                            |  2 +-
 fs/logfs/readwrite.c                      |  2 +-
 fs/minix/inode.c                          |  2 +-
 fs/ncpfs/inode.c                          |  2 +-
 fs/nfs/inode.c                            |  4 ++--
 fs/nilfs2/inode.c                         |  4 ++--
 fs/ntfs/inode.c                           |  2 +-
 fs/ocfs2/dlmfs/dlmfs.c                    |  2 +-
 fs/ocfs2/inode.c                          |  2 +-
 fs/omfs/inode.c                           |  2 +-
 fs/proc/inode.c                           |  2 +-
 fs/pstore/inode.c                         |  2 +-
 fs/reiserfs/inode.c                       |  4 ++--
 fs/sysfs/inode.c                          |  2 +-
 fs/sysv/inode.c                           |  2 +-
 fs/ubifs/super.c                          |  2 +-
 fs/udf/inode.c                            |  2 +-
 fs/ufs/inode.c                            |  2 +-
 fs/xfs/xfs_super.c                        |  2 +-
 include/linux/fs.h                        |  6 +++---
 ipc/mqueue.c                              |  2 +-
 mm/shmem.c                                |  2 +-
 52 files changed, 68 insertions(+), 70 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index 74acd9618819..8c91d1057d9a 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -297,7 +297,8 @@ in the beginning of ->setattr unconditionally.
 be used instead.  It gets called whenever the inode is evicted, whether it has
 remaining links or not.  Caller does *not* evict the pagecache or inode-associated
 metadata buffers; getting rid of those is responsibility of method, as it had
-been for ->delete_inode().
+been for ->delete_inode(). Caller makes sure async writeback cannot be running
+for the inode while (or after) ->evict_inode() is called.
 
 	->drop_inode() returns int now; it's called on final iput() with
 inode->i_lock held and it returns true if filesystems wants the inode to be
@@ -306,14 +307,11 @@ updated appropriately.  generic_delete_inode() is also alive and it consists
 simply of return 1.  Note that all actual eviction work is done by caller after
 ->drop_inode() returns.
 
-	clear_inode() is gone; use end_writeback() instead.  As before, it must
-be called exactly once on each call of ->evict_inode() (as it used to be for
-each call of ->delete_inode()).  Unlike before, if you are using inode-associated
-metadata buffers (i.e. mark_buffer_dirty_inode()), it's your responsibility to
-call invalidate_inode_buffers() before end_writeback().
-	No async writeback (and thus no calls of ->write_inode()) will happen
-after end_writeback() returns, so actions that should not overlap with ->write_inode()
-(e.g. freeing on-disk inode if i_nlink is 0) ought to be done after that call.
+	As before, clear_inode() must be called exactly once on each call of
+->evict_inode() (as it used to be for each call of ->delete_inode()).  Unlike
+before, if you are using inode-associated metadata buffers (i.e.
+mark_buffer_dirty_inode()), it's your responsibility to call
+invalidate_inode_buffers() before clear_inode().
 
 	NOTE: checking i_nlink in the beginning of ->write_inode() and bailing out
 if it's zero is not *and* *never* *had* *been* enough.  Final unlink() and iput()
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 1d75c92ea8fb..66519d263da7 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -151,7 +151,7 @@ static void
 spufs_evict_inode(struct inode *inode)
 {
 	struct spufs_inode_info *ei = SPUFS_I(inode);
-	end_writeback(inode);
+	clear_inode(inode);
 	if (ei->i_ctx)
 		put_spu_context(ei->i_ctx);
 	if (ei->i_gang)
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index 6a2cb560e968..73dae8b9b77a 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -115,7 +115,7 @@ static struct inode *hypfs_make_inode(struct super_block *sb, umode_t mode)
 
 static void hypfs_evict_inode(struct inode *inode)
 {
-	end_writeback(inode);
+	clear_inode(inode);
 	kfree(inode->i_private);
 }
 
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 014c8dd62962..57ccb7537dae 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -448,7 +448,7 @@ void v9fs_evict_inode(struct inode *inode)
 	struct v9fs_inode *v9inode = V9FS_I(inode);
 
 	truncate_inode_pages(inode->i_mapping, 0);
-	end_writeback(inode);
+	clear_inode(inode);
 	filemap_fdatawrite(inode->i_mapping);
 
 #ifdef CONFIG_9P_FSCACHE
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 88a4b0b50058..8bc4a59f4e7e 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -264,7 +264,7 @@ affs_evict_inode(struct inode *inode)
 	}
 
 	invalidate_inode_buffers(inode);
-	end_writeback(inode);
+	clear_inode(inode);
 	affs_free_prealloc(inode);
 	cache_page = (unsigned long)AFFS_I(inode)->i_lc;
 	if (cache_page) {
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index d890ae3b2ce6..95cffd38239f 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -423,7 +423,7 @@ void afs_evict_inode(struct inode *inode)
 	ASSERTCMP(inode->i_ino, ==, vnode->fid.vnode);
 
 	truncate_inode_pages(&inode->i_data, 0);
-	end_writeback(inode);
+	clear_inode(inode);
 
 	afs_give_up_callback(vnode);
 
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index d8dc002e9cc3..df31ddb58228 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -101,7 +101,7 @@ static int autofs4_show_options(struct seq_file *m, struct dentry *root)
 
 static void autofs4_evict_inode(struct inode *inode)
 {
-	end_writeback(inode);
+	clear_inode(inode);
 	kfree(inode->i_private);
 }
 
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index e23dc7c8b884..9870417c26e7 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -174,7 +174,7 @@ static void bfs_evict_inode(struct inode *inode)
 
 	truncate_inode_pages(&inode->i_data, 0);
 	invalidate_inode_buffers(inode);
-	end_writeback(inode);
+	clear_inode(inode);
 
 	if (inode->i_nlink)
 		return;
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 613aa0618235..790b3cddca67 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -505,7 +505,7 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode)
 
 static void bm_evict_inode(struct inode *inode)
 {
-	end_writeback(inode);
+	clear_inode(inode);
 	kfree(inode->i_private);
 }
 
diff --git a/fs/block_dev.c b/fs/block_dev.c
index e08f6a20a5bb..d8a7959a9654 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -487,7 +487,7 @@ static void bdev_evict_inode(struct inode *inode)
 	struct list_head *p;
 	truncate_inode_pages(&inode->i_data, 0);
 	invalidate_inode_buffers(inode); /* is it needed here? */
-	end_writeback(inode);
+	clear_inode(inode);
 	spin_lock(&bdev_lock);
 	while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) {
 		__bd_forget(list_entry(p, struct inode, i_devices));
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 115bc05e42b0..5c058c4d3283 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3756,7 +3756,7 @@ void btrfs_evict_inode(struct inode *inode)
 	btrfs_end_transaction(trans, root);
 	btrfs_btree_balance_dirty(root, nr);
 no_delete:
-	end_writeback(inode);
+	clear_inode(inode);
 	return;
 }
 
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index d34212822444..acb138f0eba0 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -272,7 +272,7 @@ static void
 cifs_evict_inode(struct inode *inode)
 {
 	truncate_inode_pages(&inode->i_data, 0);
-	end_writeback(inode);
+	clear_inode(inode);
 	cifs_fscache_release_inode_cookie(inode);
 }
 
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 2870597b5c9d..f1813120d753 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -244,7 +244,7 @@ static void coda_put_super(struct super_block *sb)
 static void coda_evict_inode(struct inode *inode)
 {
 	truncate_inode_pages(&inode->i_data, 0);
-	end_writeback(inode);
+	clear_inode(inode);
 	coda_cache_clear_inode(inode);
 }
 
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index 2dd946b636d2..e879cf8ff0b1 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -133,7 +133,7 @@ static int ecryptfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 static void ecryptfs_evict_inode(struct inode *inode)
 {
 	truncate_inode_pages(&inode->i_data, 0);
-	end_writeback(inode);
+	clear_inode(inode);
 	iput(ecryptfs_inode_to_lower(inode));
 }
 
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index ea5e1f97806a..5badb0c039de 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -1473,7 +1473,7 @@ void exofs_evict_inode(struct inode *inode)
 		goto no_delete;
 
 	inode->i_size = 0;
-	end_writeback(inode);
+	clear_inode(inode);
 
 	/* if we are deleting an obj that hasn't been created yet, wait.
 	 * This also makes sure that create_done cannot be called with an
@@ -1503,5 +1503,5 @@ void exofs_evict_inode(struct inode *inode)
 	return;
 
 no_delete:
-	end_writeback(inode);
+	clear_inode(inode);
 }
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 740cad8dcd8d..37b8bf606f45 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -90,7 +90,7 @@ void ext2_evict_inode(struct inode * inode)
 	}
 
 	invalidate_inode_buffers(inode);
-	end_writeback(inode);
+	clear_inode(inode);
 
 	ext2_discard_reservation(inode);
 	rsv = EXT2_I(inode)->i_block_alloc_info;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 10d7812f6021..ca5eb6189ee9 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -272,18 +272,18 @@ void ext3_evict_inode (struct inode *inode)
 	if (ext3_mark_inode_dirty(handle, inode)) {
 		/* If that failed, just dquot_drop() and be done with that */
 		dquot_drop(inode);
-		end_writeback(inode);
+		clear_inode(inode);
 	} else {
 		ext3_xattr_delete_inode(handle, inode);
 		dquot_free_inode(inode);
 		dquot_drop(inode);
-		end_writeback(inode);
+		clear_inode(inode);
 		ext3_free_inode(handle, inode);
 	}
 	ext3_journal_stop(handle);
 	return;
 no_delete:
-	end_writeback(inode);
+	clear_inode(inode);
 	dquot_drop(inode);
 }
 
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index ceebaf853beb..2484f560483a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1007,7 +1007,7 @@ static void destroy_inodecache(void)
 void ext4_clear_inode(struct inode *inode)
 {
 	invalidate_inode_buffers(inode);
-	end_writeback(inode);
+	clear_inode(inode);
 	dquot_drop(inode);
 	ext4_discard_preallocations(inode);
 	if (EXT4_I(inode)->jinode) {
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 21687e31acc0..b3d290c1b513 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -454,7 +454,7 @@ static void fat_evict_inode(struct inode *inode)
 		fat_truncate_blocks(inode, 0);
 	}
 	invalidate_inode_buffers(inode);
-	end_writeback(inode);
+	clear_inode(inode);
 	fat_cache_inval_inode(inode);
 	fat_detach(inode);
 }
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index cf9ef918a2a9..ef67c95f12d4 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -355,6 +355,6 @@ void
 vxfs_evict_inode(struct inode *ip)
 {
 	truncate_inode_pages(&ip->i_data, 0);
-	end_writeback(ip);
+	clear_inode(ip);
 	call_rcu(&ip->i_rcu, vxfs_i_callback);
 }
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 4aec5995867e..87e61152b34e 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -122,7 +122,7 @@ static void fuse_destroy_inode(struct inode *inode)
 static void fuse_evict_inode(struct inode *inode)
 {
 	truncate_inode_pages(&inode->i_data, 0);
-	end_writeback(inode);
+	clear_inode(inode);
 	if (inode->i_sb->s_flags & MS_ACTIVE) {
 		struct fuse_conn *fc = get_fuse_conn(inode);
 		struct fuse_inode *fi = get_fuse_inode(inode);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 6172fa77ad59..713e621c240b 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1554,7 +1554,7 @@ out_unlock:
 out:
 	/* Case 3 starts here */
 	truncate_inode_pages(&inode->i_data, 0);
-	end_writeback(inode);
+	clear_inode(inode);
 	gfs2_dir_hash_inval(ip);
 	ip->i_gl->gl_object = NULL;
 	flush_delayed_work_sync(&ip->i_gl->gl_work);
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 737dbeb64320..761ec06354b4 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -532,7 +532,7 @@ out:
 void hfs_evict_inode(struct inode *inode)
 {
 	truncate_inode_pages(&inode->i_data, 0);
-	end_writeback(inode);
+	clear_inode(inode);
 	if (HFS_IS_RSRC(inode) && HFS_I(inode)->rsrc_inode) {
 		HFS_I(HFS_I(inode)->rsrc_inode)->rsrc_inode = NULL;
 		iput(HFS_I(inode)->rsrc_inode);
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index ceb1c281eefb..a9bca4b8768b 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -154,7 +154,7 @@ static void hfsplus_evict_inode(struct inode *inode)
 {
 	dprint(DBG_INODE, "hfsplus_evict_inode: %lu\n", inode->i_ino);
 	truncate_inode_pages(&inode->i_data, 0);
-	end_writeback(inode);
+	clear_inode(inode);
 	if (HFSPLUS_IS_RSRC(inode)) {
 		HFSPLUS_I(HFSPLUS_I(inode)->rsrc_inode)->rsrc_inode = NULL;
 		iput(HFSPLUS_I(inode)->rsrc_inode);
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 07c516bfea76..2afa5bbccf9b 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -240,7 +240,7 @@ static struct inode *hostfs_alloc_inode(struct super_block *sb)
 static void hostfs_evict_inode(struct inode *inode)
 {
 	truncate_inode_pages(&inode->i_data, 0);
-	end_writeback(inode);
+	clear_inode(inode);
 	if (HOSTFS_I(inode)->fd != -1) {
 		close_file(&HOSTFS_I(inode)->fd);
 		HOSTFS_I(inode)->fd = -1;
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 3b2cec29972b..b43066cbdc6a 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -299,7 +299,7 @@ void hpfs_write_if_changed(struct inode *inode)
 void hpfs_evict_inode(struct inode *inode)
 {
 	truncate_inode_pages(&inode->i_data, 0);
-	end_writeback(inode);
+	clear_inode(inode);
 	if (!inode->i_nlink) {
 		hpfs_lock(inode->i_sb);
 		hpfs_remove_fnode(inode->i_sb, inode->i_ino);
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index a80e45a690ac..d4f93b52cec5 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -614,7 +614,7 @@ static struct inode *hppfs_alloc_inode(struct super_block *sb)
 
 void hppfs_evict_inode(struct inode *ino)
 {
-	end_writeback(ino);
+	clear_inode(ino);
 	dput(HPPFS_I(ino)->proc_dentry);
 	mntput(ino->i_sb->s_fs_info);
 }
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 28cf06e4ec84..568193d5153c 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -393,7 +393,7 @@ static void truncate_hugepages(struct inode *inode, loff_t lstart)
 static void hugetlbfs_evict_inode(struct inode *inode)
 {
 	truncate_hugepages(inode, 0);
-	end_writeback(inode);
+	clear_inode(inode);
 }
 
 static inline void
diff --git a/fs/inode.c b/fs/inode.c
index 501fc5daf6f4..02c0fa5e16a4 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -486,7 +486,7 @@ void __remove_inode_hash(struct inode *inode)
 }
 EXPORT_SYMBOL(__remove_inode_hash);
 
-void end_writeback(struct inode *inode)
+void clear_inode(struct inode *inode)
 {
 	might_sleep();
 	/*
@@ -503,7 +503,7 @@ void end_writeback(struct inode *inode)
 	/* don't need i_lock here, no concurrent mods to i_state */
 	inode->i_state = I_FREEING | I_CLEAR;
 }
-EXPORT_SYMBOL(end_writeback);
+EXPORT_SYMBOL(clear_inode);
 
 /*
  * Free the inode passed in, removing it from the lists it is still connected
@@ -537,7 +537,7 @@ static void evict(struct inode *inode)
 	} else {
 		if (inode->i_data.nrpages)
 			truncate_inode_pages(&inode->i_data, 0);
-		end_writeback(inode);
+		clear_inode(inode);
 	}
 	if (S_ISBLK(inode->i_mode) && inode->i_bdev)
 		bd_forget(inode);
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index bb6f993ebca9..3d3092eda811 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -240,7 +240,7 @@ void jffs2_evict_inode (struct inode *inode)
 	jffs2_dbg(1, "%s(): ino #%lu mode %o\n",
 		  __func__, inode->i_ino, inode->i_mode);
 	truncate_inode_pages(&inode->i_data, 0);
-	end_writeback(inode);
+	clear_inode(inode);
 	jffs2_do_clear_inode(c, f);
 }
 
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 77b69b27f825..4692bf3ca8cb 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -169,7 +169,7 @@ void jfs_evict_inode(struct inode *inode)
 	} else {
 		truncate_inode_pages(&inode->i_data, 0);
 	}
-	end_writeback(inode);
+	clear_inode(inode);
 	dquot_drop(inode);
 }
 
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index e3ab5e5a904c..f1cb512c5019 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -2175,7 +2175,7 @@ void logfs_evict_inode(struct inode *inode)
 		}
 	}
 	truncate_inode_pages(&inode->i_data, 0);
-	end_writeback(inode);
+	clear_inode(inode);
 
 	/* Cheaper version of write_inode.  All changes are concealed in
 	 * aliases, which are moved back.  No write to the medium happens.
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index fcb05d2c6b5f..2a503ad020d5 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -32,7 +32,7 @@ static void minix_evict_inode(struct inode *inode)
 		minix_truncate(inode);
 	}
 	invalidate_inode_buffers(inode);
-	end_writeback(inode);
+	clear_inode(inode);
 	if (!inode->i_nlink)
 		minix_free_inode(inode);
 }
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 87484fb8d177..333df07ae3bd 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -292,7 +292,7 @@ static void
 ncp_evict_inode(struct inode *inode)
 {
 	truncate_inode_pages(&inode->i_data, 0);
-	end_writeback(inode);
+	clear_inode(inode);
 
 	if (S_ISDIR(inode->i_mode)) {
 		DDPRINTK("ncp_evict_inode: put directory %ld\n", inode->i_ino);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index e8bbfa5b3500..c6073139b402 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -121,7 +121,7 @@ static void nfs_clear_inode(struct inode *inode)
 void nfs_evict_inode(struct inode *inode)
 {
 	truncate_inode_pages(&inode->i_data, 0);
-	end_writeback(inode);
+	clear_inode(inode);
 	nfs_clear_inode(inode);
 }
 
@@ -1500,7 +1500,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 void nfs4_evict_inode(struct inode *inode)
 {
 	truncate_inode_pages(&inode->i_data, 0);
-	end_writeback(inode);
+	clear_inode(inode);
 	pnfs_return_layout(inode);
 	pnfs_destroy_layout(NFS_I(inode));
 	/* If we are holding a delegation, return it! */
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 8f7b95ac1f7e..7cc64465ec26 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -734,7 +734,7 @@ void nilfs_evict_inode(struct inode *inode)
 	if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) {
 		if (inode->i_data.nrpages)
 			truncate_inode_pages(&inode->i_data, 0);
-		end_writeback(inode);
+		clear_inode(inode);
 		nilfs_clear_inode(inode);
 		return;
 	}
@@ -746,7 +746,7 @@ void nilfs_evict_inode(struct inode *inode)
 	/* TODO: some of the following operations may fail.  */
 	nilfs_truncate_bmap(ii, 0);
 	nilfs_mark_inode_dirty(inode);
-	end_writeback(inode);
+	clear_inode(inode);
 
 	ret = nilfs_ifile_delete_inode(ii->i_root->ifile, inode->i_ino);
 	if (!ret)
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 2eaa66652944..c6dbd3db6ca8 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -2258,7 +2258,7 @@ void ntfs_evict_big_inode(struct inode *vi)
 	ntfs_inode *ni = NTFS_I(vi);
 
 	truncate_inode_pages(&vi->i_data, 0);
-	end_writeback(vi);
+	clear_inode(vi);
 
 #ifdef NTFS_RW
 	if (NInoDirty(ni)) {
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 3b5825ef3193..e31d6ae013ab 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -367,7 +367,7 @@ static void dlmfs_evict_inode(struct inode *inode)
 	int status;
 	struct dlmfs_inode_private *ip;
 
-	end_writeback(inode);
+	clear_inode(inode);
 
 	mlog(0, "inode %lu\n", inode->i_ino);
 
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 17454a904d7b..735514ca400f 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -1069,7 +1069,7 @@ static void ocfs2_clear_inode(struct inode *inode)
 	int status;
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
 
-	end_writeback(inode);
+	clear_inode(inode);
 	trace_ocfs2_clear_inode((unsigned long long)oi->ip_blkno,
 				inode->i_nlink);
 
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index dbc842222589..e6213b3725d1 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -184,7 +184,7 @@ int omfs_sync_inode(struct inode *inode)
 static void omfs_evict_inode(struct inode *inode)
 {
 	truncate_inode_pages(&inode->i_data, 0);
-	end_writeback(inode);
+	clear_inode(inode);
 
 	if (inode->i_nlink)
 		return;
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 205c92280838..29ab406b3704 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -33,7 +33,7 @@ static void proc_evict_inode(struct inode *inode)
 	const struct proc_ns_operations *ns_ops;
 
 	truncate_inode_pages(&inode->i_data, 0);
-	end_writeback(inode);
+	clear_inode(inode);
 
 	/* Stop tracking associated processes */
 	put_pid(PROC_I(inode)->pid);
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index 19507889bb7f..aeb19e68e086 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -85,7 +85,7 @@ static void pstore_evict_inode(struct inode *inode)
 	struct pstore_private	*p = inode->i_private;
 	unsigned long		flags;
 
-	end_writeback(inode);
+	clear_inode(inode);
 	if (p) {
 		spin_lock_irqsave(&allpstore_lock, flags);
 		list_del(&p->list);
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 494c315c7417..59d06871a850 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -76,14 +76,14 @@ void reiserfs_evict_inode(struct inode *inode)
 		;
 	}
       out:
-	end_writeback(inode);	/* note this must go after the journal_end to prevent deadlock */
+	clear_inode(inode);	/* note this must go after the journal_end to prevent deadlock */
 	dquot_drop(inode);
 	inode->i_blocks = 0;
 	reiserfs_write_unlock_once(inode->i_sb, depth);
 	return;
 
 no_delete:
-	end_writeback(inode);
+	clear_inode(inode);
 	dquot_drop(inode);
 }
 
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index feb2d69396cf..b8ce6a98933f 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -310,7 +310,7 @@ void sysfs_evict_inode(struct inode *inode)
 	struct sysfs_dirent *sd  = inode->i_private;
 
 	truncate_inode_pages(&inode->i_data, 0);
-	end_writeback(inode);
+	clear_inode(inode);
 	sysfs_put(sd);
 }
 
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 3da5ce25faf0..08d0b2568cd3 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -316,7 +316,7 @@ static void sysv_evict_inode(struct inode *inode)
 		sysv_truncate(inode);
 	}
 	invalidate_inode_buffers(inode);
-	end_writeback(inode);
+	clear_inode(inode);
 	if (!inode->i_nlink)
 		sysv_free_inode(inode);
 }
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 76e4e0566ad6..7bf60ae58ed4 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -378,7 +378,7 @@ out:
 		smp_wmb();
 	}
 done:
-	end_writeback(inode);
+	clear_inode(inode);
 }
 
 static void ubifs_dirty_inode(struct inode *inode, int flags)
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 7d7528008359..873e1bab9c4c 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -80,7 +80,7 @@ void udf_evict_inode(struct inode *inode)
 	} else
 		truncate_inode_pages(&inode->i_data, 0);
 	invalidate_inode_buffers(inode);
-	end_writeback(inode);
+	clear_inode(inode);
 	if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB &&
 	    inode->i_size != iinfo->i_lenExtents) {
 		udf_warn(inode->i_sb, "Inode %lu (mode %o) has inode size %llu different from extent length %llu. Filesystem need not be standards compliant.\n",
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 7cdd3953d67e..dd7c89d8a1c1 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -895,7 +895,7 @@ void ufs_evict_inode(struct inode * inode)
 	}
 
 	invalidate_inode_buffers(inode);
-	end_writeback(inode);
+	clear_inode(inode);
 
 	if (want_delete) {
 		lock_ufs(inode->i_sb);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index dab9a5f6dfd6..5b806f23ad0a 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -926,7 +926,7 @@ xfs_fs_evict_inode(
 	trace_xfs_evict_inode(ip);
 
 	truncate_inode_pages(&inode->i_data, 0);
-	end_writeback(inode);
+	clear_inode(inode);
 	XFS_STATS_INC(vn_rele);
 	XFS_STATS_INC(vn_remove);
 	XFS_STATS_DEC(vn_active);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8de675523e46..c79316c79ee3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1744,8 +1744,8 @@ struct super_operations {
  * I_FREEING		Set when inode is about to be freed but still has dirty
  *			pages or buffers attached or the inode itself is still
  *			dirty.
- * I_CLEAR		Added by end_writeback().  In this state the inode is clean
- *			and can be destroyed.  Inode keeps I_FREEING.
+ * I_CLEAR		Added by clear_inode().  In this state the inode is
+ *			clean and can be destroyed.  Inode keeps I_FREEING.
  *
  *			Inodes that are I_WILL_FREE, I_FREEING or I_CLEAR are
  *			prohibited for many purposes.  iget() must wait for
@@ -2328,7 +2328,7 @@ extern unsigned int get_next_ino(void);
 
 extern void __iget(struct inode * inode);
 extern void iget_failed(struct inode *);
-extern void end_writeback(struct inode *);
+extern void clear_inode(struct inode *);
 extern void __destroy_inode(struct inode *);
 extern struct inode *new_inode_pseudo(struct super_block *sb);
 extern struct inode *new_inode(struct super_block *sb);
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 28bd64ddeda3..0032d9cccb7c 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -249,7 +249,7 @@ static void mqueue_evict_inode(struct inode *inode)
 	int i;
 	struct ipc_namespace *ipc_ns;
 
-	end_writeback(inode);
+	clear_inode(inode);
 
 	if (S_ISDIR(inode->i_mode))
 		return;
diff --git a/mm/shmem.c b/mm/shmem.c
index f99ff3e50bd6..68412fa90fd0 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -597,7 +597,7 @@ static void shmem_evict_inode(struct inode *inode)
 	}
 	BUG_ON(inode->i_blocks);
 	shmem_free_inode(inode->i_sb);
-	end_writeback(inode);
+	clear_inode(inode);
 }
 
 /*
-- 
cgit v1.3-7-g2ca7


From 169ebd90131b2ffca74bb2dbe7eeacd39fb83714 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 3 May 2012 14:48:03 +0200
Subject: writeback: Avoid iput() from flusher thread

Doing iput() from flusher thread (writeback_sb_inodes()) can create problems
because iput() can do a lot of work - for example truncate the inode if it's
the last iput on unlinked file. Some filesystems depend on flusher thread
progressing (e.g. because they need to flush delay allocated blocks to reduce
allocation uncertainty) and so flusher thread doing truncate creates
interesting dependencies and possibilities for deadlocks.

We get rid of iput() in flusher thread by using the fact that I_SYNC inode
flag effectively pins the inode in memory. So if we take care to either hold
i_lock or have I_SYNC set, we can get away without taking inode reference
in writeback_sb_inodes().

As a side effect of these changes, we also fix possible use-after-free in
wb_writeback() because inode_wait_for_writeback() call could try to reacquire
i_lock on the inode that was already free.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
---
 fs/fs-writeback.c         | 66 +++++++++++++++++++++++++++++++++++++----------
 fs/inode.c                |  8 +++++-
 include/linux/fs.h        |  7 ++---
 include/linux/writeback.h |  7 +----
 4 files changed, 65 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 5f2c68289610..8d2fb8c88cf3 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -326,9 +326,12 @@ static int write_inode(struct inode *inode, struct writeback_control *wbc)
 }
 
 /*
- * Wait for writeback on an inode to complete.
+ * Wait for writeback on an inode to complete. Called with i_lock held.
+ * Caller must make sure inode cannot go away when we drop i_lock.
  */
-static void inode_wait_for_writeback(struct inode *inode)
+static void __inode_wait_for_writeback(struct inode *inode)
+	__releases(inode->i_lock)
+	__acquires(inode->i_lock)
 {
 	DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
 	wait_queue_head_t *wqh;
@@ -341,6 +344,36 @@ static void inode_wait_for_writeback(struct inode *inode)
 	}
 }
 
+/*
+ * Wait for writeback on an inode to complete. Caller must have inode pinned.
+ */
+void inode_wait_for_writeback(struct inode *inode)
+{
+	spin_lock(&inode->i_lock);
+	__inode_wait_for_writeback(inode);
+	spin_unlock(&inode->i_lock);
+}
+
+/*
+ * Sleep until I_SYNC is cleared. This function must be called with i_lock
+ * held and drops it. It is aimed for callers not holding any inode reference
+ * so once i_lock is dropped, inode can go away.
+ */
+static void inode_sleep_on_writeback(struct inode *inode)
+	__releases(inode->i_lock)
+{
+	DEFINE_WAIT(wait);
+	wait_queue_head_t *wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
+	int sleep;
+
+	prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
+	sleep = inode->i_state & I_SYNC;
+	spin_unlock(&inode->i_lock);
+	if (sleep)
+		schedule();
+	finish_wait(wqh, &wait);
+}
+
 /*
  * Find proper writeback list for the inode depending on its current state and
  * possibly also change of its state while we were doing writeback.  Here we
@@ -479,9 +512,11 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
 		if (wbc->sync_mode != WB_SYNC_ALL)
 			goto out;
 		/*
-		 * It's a data-integrity sync.  We must wait.
+		 * It's a data-integrity sync. We must wait. Since callers hold
+		 * inode reference or inode has I_WILL_FREE set, it cannot go
+		 * away under us.
 		 */
-		inode_wait_for_writeback(inode);
+		__inode_wait_for_writeback(inode);
 	}
 	WARN_ON(inode->i_state & I_SYNC);
 	/*
@@ -620,20 +655,28 @@ static long writeback_sb_inodes(struct super_block *sb,
 		}
 		spin_unlock(&wb->list_lock);
 
-		__iget(inode);
 		/*
 		 * We already requeued the inode if it had I_SYNC set and we
 		 * are doing WB_SYNC_NONE writeback. So this catches only the
 		 * WB_SYNC_ALL case.
 		 */
-		if (inode->i_state & I_SYNC)
-			inode_wait_for_writeback(inode);
+		if (inode->i_state & I_SYNC) {
+			/* Wait for I_SYNC. This function drops i_lock... */
+			inode_sleep_on_writeback(inode);
+			/* Inode may be gone, start again */
+			continue;
+		}
 		inode->i_state |= I_SYNC;
 		spin_unlock(&inode->i_lock);
+
 		write_chunk = writeback_chunk_size(wb->bdi, work);
 		wbc.nr_to_write = write_chunk;
 		wbc.pages_skipped = 0;
 
+		/*
+		 * We use I_SYNC to pin the inode in memory. While it is set
+		 * evict_inode() will wait so the inode cannot be freed.
+		 */
 		__writeback_single_inode(inode, wb, &wbc);
 
 		work->nr_pages -= write_chunk - wbc.nr_to_write;
@@ -645,10 +688,7 @@ static long writeback_sb_inodes(struct super_block *sb,
 		requeue_inode(inode, wb, &wbc);
 		inode_sync_complete(inode);
 		spin_unlock(&inode->i_lock);
-		spin_unlock(&wb->list_lock);
-		iput(inode);
-		cond_resched();
-		spin_lock(&wb->list_lock);
+		cond_resched_lock(&wb->list_lock);
 		/*
 		 * bail out to wb_writeback() often enough to check
 		 * background threshold and other termination conditions.
@@ -843,8 +883,8 @@ static long wb_writeback(struct bdi_writeback *wb,
 			inode = wb_inode(wb->b_more_io.prev);
 			spin_lock(&inode->i_lock);
 			spin_unlock(&wb->list_lock);
-			inode_wait_for_writeback(inode);
-			spin_unlock(&inode->i_lock);
+			/* This function drops i_lock... */
+			inode_sleep_on_writeback(inode);
 			spin_lock(&wb->list_lock);
 		}
 	}
diff --git a/fs/inode.c b/fs/inode.c
index 02c0fa5e16a4..f4e145016611 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -530,7 +530,13 @@ static void evict(struct inode *inode)
 
 	inode_sb_list_del(inode);
 
-	inode_sync_wait(inode);
+	/*
+	 * Wait for flusher thread to be done with the inode so that filesystem
+	 * does not start destroying it while writeback is still running. Since
+	 * the inode has I_FREEING set, flusher thread won't start new work on
+	 * the inode.  We just have to wait for running writeback to finish.
+	 */
+	inode_wait_for_writeback(inode);
 
 	if (op->evict_inode) {
 		op->evict_inode(inode);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c79316c79ee3..1c71e7f4d234 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1753,9 +1753,10 @@ struct super_operations {
  *			anew.  Other functions will just ignore such inodes,
  *			if appropriate.  I_NEW is used for waiting.
  *
- * I_SYNC		Synchonized write of dirty inode data.  The bits is
- *			set during data writeback, and cleared with a wakeup
- *			on the bit address once it is done.
+ * I_SYNC		Writeback of inode is running. The bit is set during
+ *			data writeback, and cleared with a wakeup on the bit
+ *			address once it is done. The bit is also used to pin
+ *			the inode in memory for flusher thread.
  *
  * I_REFERENCED		Marks the inode as recently references on the LRU list.
  *
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 3309736ff059..6d0a0fcd80e7 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -95,6 +95,7 @@ long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
 				enum wb_reason reason);
 long wb_do_writeback(struct bdi_writeback *wb, int force_wait);
 void wakeup_flusher_threads(long nr_pages, enum wb_reason reason);
+void inode_wait_for_writeback(struct inode *inode);
 
 /* writeback.h requires fs.h; it, too, is not included from here. */
 static inline void wait_on_inode(struct inode *inode)
@@ -102,12 +103,6 @@ static inline void wait_on_inode(struct inode *inode)
 	might_sleep();
 	wait_on_bit(&inode->i_state, __I_NEW, inode_wait, TASK_UNINTERRUPTIBLE);
 }
-static inline void inode_sync_wait(struct inode *inode)
-{
-	might_sleep();
-	wait_on_bit(&inode->i_state, __I_SYNC, inode_wait,
-							TASK_UNINTERRUPTIBLE);
-}
 
 
 /*
-- 
cgit v1.3-7-g2ca7


From 5b74716ebab10e7bce960d148fe6d8f6920451e5 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Thu, 26 Apr 2012 19:43:42 +0000
Subject: kvm/powerpc: Add new ioctl to retreive server MMU infos

This is necessary for qemu to be able to pass the right information
to the guest, such as the supported page sizes and corresponding
encodings in the SLB and hash table, which can vary depending
on the processor type, the type of KVM used (PR vs HV) and the
version of KVM

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
[agraf: fix compilation on hv, adjust for newer ioctl numbers]
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 Documentation/virtual/kvm/api.txt  | 70 ++++++++++++++++++++++++++++++++++++++
 arch/powerpc/include/asm/kvm_ppc.h |  2 ++
 arch/powerpc/kernel/ppc_ksyms.c    |  4 +++
 arch/powerpc/kvm/book3s_hv.c       | 32 +++++++++++++++++
 arch/powerpc/kvm/book3s_pr.c       | 25 ++++++++++++++
 arch/powerpc/kvm/powerpc.c         | 18 +++++++++-
 include/linux/kvm.h                | 27 +++++++++++++++
 7 files changed, 177 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index eb62761b7683..930126698a0f 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1860,6 +1860,76 @@ See KVM_GET_PIT2 for details on struct kvm_pit_state2.
 This IOCTL replaces the obsolete KVM_SET_PIT.
 
 
+4.74 KVM_PPC_GET_SMMU_INFO
+
+Capability: KVM_CAP_PPC_GET_SMMU_INFO
+Architectures: powerpc
+Type: vm ioctl
+Parameters: None
+Returns: 0 on success, -1 on error
+
+This populates and returns a structure describing the features of
+the "Server" class MMU emulation supported by KVM.
+This can in turn be used by userspace to generate the appropariate
+device-tree properties for the guest operating system.
+
+The structure contains some global informations, followed by an
+array of supported segment page sizes:
+
+      struct kvm_ppc_smmu_info {
+	     __u64 flags;
+	     __u32 slb_size;
+	     __u32 pad;
+	     struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ];
+      };
+
+The supported flags are:
+
+    - KVM_PPC_PAGE_SIZES_REAL:
+        When that flag is set, guest page sizes must "fit" the backing
+        store page sizes. When not set, any page size in the list can
+        be used regardless of how they are backed by userspace.
+
+    - KVM_PPC_1T_SEGMENTS
+        The emulated MMU supports 1T segments in addition to the
+        standard 256M ones.
+
+The "slb_size" field indicates how many SLB entries are supported
+
+The "sps" array contains 8 entries indicating the supported base
+page sizes for a segment in increasing order. Each entry is defined
+as follow:
+
+   struct kvm_ppc_one_seg_page_size {
+	__u32 page_shift;	/* Base page shift of segment (or 0) */
+	__u32 slb_enc;		/* SLB encoding for BookS */
+	struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ];
+   };
+
+An entry with a "page_shift" of 0 is unused. Because the array is
+organized in increasing order, a lookup can stop when encoutering
+such an entry.
+
+The "slb_enc" field provides the encoding to use in the SLB for the
+page size. The bits are in positions such as the value can directly
+be OR'ed into the "vsid" argument of the slbmte instruction.
+
+The "enc" array is a list which for each of those segment base page
+size provides the list of supported actual page sizes (which can be
+only larger or equal to the base page size), along with the
+corresponding encoding in the hash PTE. Similarily, the array is
+8 entries sorted by increasing sizes and an entry with a "0" shift
+is an empty entry and a terminator:
+
+   struct kvm_ppc_one_page_size {
+	__u32 page_shift;	/* Page shift (or 0) */
+	__u32 pte_enc;		/* Encoding in the HPTE (>>12) */
+   };
+
+The "pte_enc" field provides a value that can OR'ed into the hash
+PTE's RPN field (ie, it needs to be shifted left by 12 to OR it
+into the hash PTE second double word).
+
 5. The kvm_run structure
 ------------------------
 
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index c1069f63dcaf..c87e3b503fdc 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -140,6 +140,8 @@ extern int kvmppc_core_prepare_memory_region(struct kvm *kvm,
 				struct kvm_userspace_memory_region *mem);
 extern void kvmppc_core_commit_memory_region(struct kvm *kvm,
 				struct kvm_userspace_memory_region *mem);
+extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm,
+				      struct kvm_ppc_smmu_info *info);
 
 extern int kvmppc_bookehv_init(void);
 extern void kvmppc_bookehv_exit(void);
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 786a2700ec2d..d1f2aafcbe8c 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -190,3 +190,7 @@ EXPORT_SYMBOL(__arch_hweight16);
 EXPORT_SYMBOL(__arch_hweight32);
 EXPORT_SYMBOL(__arch_hweight64);
 #endif
+
+#ifdef CONFIG_PPC_BOOK3S_64
+EXPORT_SYMBOL_GPL(mmu_psize_defs);
+#endif
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 59c296743595..bb5a0f4b4bbb 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1175,6 +1175,38 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
 	return fd;
 }
 
+static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps,
+				     int linux_psize)
+{
+	struct mmu_psize_def *def = &mmu_psize_defs[linux_psize];
+
+	if (!def->shift)
+		return;
+	(*sps)->page_shift = def->shift;
+	(*sps)->slb_enc = def->sllp;
+	(*sps)->enc[0].page_shift = def->shift;
+	(*sps)->enc[0].pte_enc = def->penc;
+	(*sps)++;
+}
+
+int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info)
+{
+	struct kvm_ppc_one_seg_page_size *sps;
+
+	info->flags = KVM_PPC_PAGE_SIZES_REAL;
+	if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
+		info->flags |= KVM_PPC_1T_SEGMENTS;
+	info->slb_size = mmu_slb_size;
+
+	/* We only support these sizes for now, and no muti-size segments */
+	sps = &info->sps[0];
+	kvmppc_add_seg_page_size(&sps, MMU_PAGE_4K);
+	kvmppc_add_seg_page_size(&sps, MMU_PAGE_64K);
+	kvmppc_add_seg_page_size(&sps, MMU_PAGE_16M);
+
+	return 0;
+}
+
 /*
  * Get (and clear) the dirty memory log for a memory slot.
  */
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 815ac5938a9e..a1baec340f7e 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1158,6 +1158,31 @@ out:
 	return r;
 }
 
+#ifdef CONFIG_PPC64
+int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info)
+{
+	/* No flags */
+	info->flags = 0;
+
+	/* SLB is always 64 entries */
+	info->slb_size = 64;
+
+	/* Standard 4k base page size segment */
+	info->sps[0].page_shift = 12;
+	info->sps[0].slb_enc = 0;
+	info->sps[0].enc[0].page_shift = 12;
+	info->sps[0].enc[0].pte_enc = 0;
+
+	/* Standard 16M large page size segment */
+	info->sps[1].page_shift = 24;
+	info->sps[1].slb_enc = SLB_VSID_L;
+	info->sps[1].enc[0].page_shift = 24;
+	info->sps[1].enc[0].pte_enc = 0;
+
+	return 0;
+}
+#endif /* CONFIG_PPC64 */
+
 int kvmppc_core_prepare_memory_region(struct kvm *kvm,
 				      struct kvm_userspace_memory_region *mem)
 {
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 6ac31154d170..1493c8de947b 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -279,6 +279,11 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_MAX_VCPUS:
 		r = KVM_MAX_VCPUS;
 		break;
+#ifdef CONFIG_PPC_BOOK3S_64
+	case KVM_CAP_PPC_GET_SMMU_INFO:
+		r = 1;
+		break;
+#endif
 	default:
 		r = 0;
 		break;
@@ -718,7 +723,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		break;
 	}
 #endif
-
 	default:
 		r = -EINVAL;
 	}
@@ -800,6 +804,18 @@ long kvm_arch_vm_ioctl(struct file *filp,
 	}
 #endif /* CONFIG_KVM_BOOK3S_64_HV */
 
+#ifdef CONFIG_PPC_BOOK3S_64
+	case KVM_PPC_GET_SMMU_INFO: {
+		struct kvm *kvm = filp->private_data;
+		struct kvm_ppc_smmu_info info;
+
+		memset(&info, 0, sizeof(info));
+		r = kvm_vm_ioctl_get_smmu_info(kvm, &info);
+		if (r >= 0 && copy_to_user(argp, &info, sizeof(info)))
+			r = -EFAULT;
+		break;
+	}
+#endif /* CONFIG_PPC_BOOK3S_64 */
 	default:
 		r = -ENOTTY;
 	}
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 225b452e1d1d..8d696cf6edcc 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -449,6 +449,30 @@ struct kvm_ppc_pvinfo {
 	__u8  pad[108];
 };
 
+/* for KVM_PPC_GET_SMMU_INFO */
+#define KVM_PPC_PAGE_SIZES_MAX_SZ	8
+
+struct kvm_ppc_one_page_size {
+	__u32 page_shift;	/* Page shift (or 0) */
+	__u32 pte_enc;		/* Encoding in the HPTE (>>12) */
+};
+
+struct kvm_ppc_one_seg_page_size {
+	__u32 page_shift;	/* Base page shift of segment (or 0) */
+	__u32 slb_enc;		/* SLB encoding for BookS */
+	struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ];
+};
+
+#define KVM_PPC_PAGE_SIZES_REAL		0x00000001
+#define KVM_PPC_1T_SEGMENTS		0x00000002
+
+struct kvm_ppc_smmu_info {
+	__u64 flags;
+	__u32 slb_size;
+	__u32 pad;
+	struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ];
+};
+
 #define KVMIO 0xAE
 
 /* machine type bits, to be used as argument to KVM_CREATE_VM */
@@ -591,6 +615,7 @@ struct kvm_ppc_pvinfo {
 #define KVM_CAP_PCI_2_3 75
 #define KVM_CAP_KVMCLOCK_CTRL 76
 #define KVM_CAP_SIGNAL_MSI 77
+#define KVM_CAP_PPC_GET_SMMU_INFO 78
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -800,6 +825,8 @@ struct kvm_s390_ucas_mapping {
 				       struct kvm_assigned_pci_dev)
 /* Available with KVM_CAP_SIGNAL_MSI */
 #define KVM_SIGNAL_MSI            _IOW(KVMIO,  0xa5, struct kvm_msi)
+/* Available with KVM_CAP_PPC_GET_SMMU_INFO */
+#define KVM_PPC_GET_SMMU_INFO	  _IOR(KVMIO,  0xa6, struct kvm_ppc_smmu_info)
 
 /*
  * ioctls for vcpu fds
-- 
cgit v1.3-7-g2ca7


From b7b142d9fc056e98e6fdef82dca3e87067517340 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 7 May 2012 10:03:21 +0100
Subject: mfd: Convert wm8350 physical I/O to regmap API

The driver still uses a custom cache implementation but the underlying
physical I/O is now done using the regmap API, saving some code and
avoiding allocating enormous scratch arrays on the stack.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/wm8350-core.c       | 31 ++++++++----------------
 drivers/mfd/wm8350-i2c.c        | 53 ++++++++++++-----------------------------
 include/linux/mfd/wm8350/core.h |  9 ++-----
 3 files changed, 27 insertions(+), 66 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c
index dd1caaac55e4..8a9b11ca076a 100644
--- a/drivers/mfd/wm8350-core.c
+++ b/drivers/mfd/wm8350-core.c
@@ -20,6 +20,7 @@
 #include <linux/device.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
+#include <linux/regmap.h>
 #include <linux/workqueue.h>
 
 #include <linux/mfd/wm8350/core.h>
@@ -74,7 +75,7 @@ static int wm8350_phys_read(struct wm8350 *wm8350, u8 reg, int num_regs,
 	int bytes = num_regs * 2;
 
 	dev_dbg(wm8350->dev, "volatile read\n");
-	ret = wm8350->read_dev(wm8350, reg, bytes, (char *)dest);
+	ret = regmap_raw_read(wm8350->regmap, reg, dest, bytes);
 
 	for (i = reg; i < reg + num_regs; i++) {
 		/* Cache is CPU endian */
@@ -96,9 +97,6 @@ static int wm8350_read(struct wm8350 *wm8350, u8 reg, int num_regs, u16 *dest)
 	int ret = 0;
 	int bytes = num_regs * 2;
 
-	if (wm8350->read_dev == NULL)
-		return -ENODEV;
-
 	if ((reg + num_regs - 1) > WM8350_MAX_REGISTER) {
 		dev_err(wm8350->dev, "invalid reg %x\n",
 			reg + num_regs - 1);
@@ -149,9 +147,6 @@ static int wm8350_write(struct wm8350 *wm8350, u8 reg, int num_regs, u16 *src)
 	int end = reg + num_regs;
 	int bytes = num_regs * 2;
 
-	if (wm8350->write_dev == NULL)
-		return -ENODEV;
-
 	if ((reg + num_regs - 1) > WM8350_MAX_REGISTER) {
 		dev_err(wm8350->dev, "invalid reg %x\n",
 			reg + num_regs - 1);
@@ -182,7 +177,7 @@ static int wm8350_write(struct wm8350 *wm8350, u8 reg, int num_regs, u16 *src)
 	}
 
 	/* Actually write it out */
-	return wm8350->write_dev(wm8350, reg, bytes, (char *)src);
+	return regmap_raw_write(wm8350->regmap, reg, src, bytes);
 }
 
 /*
@@ -515,9 +510,8 @@ static int wm8350_create_cache(struct wm8350 *wm8350, int type, int mode)
 	 * a PMIC so the device many not be in a virgin state and we
 	 * can't rely on the silicon values.
 	 */
-	ret = wm8350->read_dev(wm8350, 0,
-			       sizeof(u16) * (WM8350_MAX_REGISTER + 1),
-			       wm8350->reg_cache);
+	ret = regmap_raw_read(wm8350->regmap, 0, wm8350->reg_cache,
+			      sizeof(u16) * (WM8350_MAX_REGISTER + 1));
 	if (ret < 0) {
 		dev_err(wm8350->dev,
 			"failed to read initial cache values\n");
@@ -570,35 +564,30 @@ int wm8350_device_init(struct wm8350 *wm8350, int irq,
 		       struct wm8350_platform_data *pdata)
 {
 	int ret;
-	u16 id1, id2, mask_rev;
-	u16 cust_id, mode, chip_rev;
+	unsigned int id1, id2, mask_rev;
+	unsigned int cust_id, mode, chip_rev;
 
 	dev_set_drvdata(wm8350->dev, wm8350);
 
 	/* get WM8350 revision and config mode */
-	ret = wm8350->read_dev(wm8350, WM8350_RESET_ID, sizeof(id1), &id1);
+	ret = regmap_read(wm8350->regmap, WM8350_RESET_ID, &id1);
 	if (ret != 0) {
 		dev_err(wm8350->dev, "Failed to read ID: %d\n", ret);
 		goto err;
 	}
 
-	ret = wm8350->read_dev(wm8350, WM8350_ID, sizeof(id2), &id2);
+	ret = regmap_read(wm8350->regmap, WM8350_ID, &id2);
 	if (ret != 0) {
 		dev_err(wm8350->dev, "Failed to read ID: %d\n", ret);
 		goto err;
 	}
 
-	ret = wm8350->read_dev(wm8350, WM8350_REVISION, sizeof(mask_rev),
-			       &mask_rev);
+	ret = regmap_read(wm8350->regmap, WM8350_REVISION, &mask_rev);
 	if (ret != 0) {
 		dev_err(wm8350->dev, "Failed to read revision: %d\n", ret);
 		goto err;
 	}
 
-	id1 = be16_to_cpu(id1);
-	id2 = be16_to_cpu(id2);
-	mask_rev = be16_to_cpu(mask_rev);
-
 	if (id1 != 0x6143) {
 		dev_err(wm8350->dev,
 			"Device with ID %x is not a WM8350\n", id1);
diff --git a/drivers/mfd/wm8350-i2c.c b/drivers/mfd/wm8350-i2c.c
index d955faaf27c4..271589f8e8e3 100644
--- a/drivers/mfd/wm8350-i2c.c
+++ b/drivers/mfd/wm8350-i2c.c
@@ -15,47 +15,18 @@
 
 #include <linux/module.h>
 #include <linux/moduleparam.h>
+#include <linux/err.h>
 #include <linux/init.h>
 #include <linux/i2c.h>
 #include <linux/platform_device.h>
 #include <linux/mfd/wm8350/core.h>
+#include <linux/regmap.h>
 #include <linux/slab.h>
 
-static int wm8350_i2c_read_device(struct wm8350 *wm8350, char reg,
-				  int bytes, void *dest)
-{
-	int ret;
-
-	ret = i2c_master_send(wm8350->i2c_client, &reg, 1);
-	if (ret < 0)
-		return ret;
-	ret = i2c_master_recv(wm8350->i2c_client, dest, bytes);
-	if (ret < 0)
-		return ret;
-	if (ret != bytes)
-		return -EIO;
-	return 0;
-}
-
-static int wm8350_i2c_write_device(struct wm8350 *wm8350, char reg,
-				   int bytes, void *src)
-{
-	/* we add 1 byte for device register */
-	u8 msg[(WM8350_MAX_REGISTER << 1) + 1];
-	int ret;
-
-	if (bytes > ((WM8350_MAX_REGISTER << 1) + 1))
-		return -EINVAL;
-
-	msg[0] = reg;
-	memcpy(&msg[1], src, bytes);
-	ret = i2c_master_send(wm8350->i2c_client, msg, bytes + 1);
-	if (ret < 0)
-		return ret;
-	if (ret != bytes + 1)
-		return -EIO;
-	return 0;
-}
+static const struct regmap_config wm8350_regmap = {
+	.reg_bits = 8,
+	.val_bits = 16,
+};
 
 static int wm8350_i2c_probe(struct i2c_client *i2c,
 			    const struct i2c_device_id *id)
@@ -67,11 +38,16 @@ static int wm8350_i2c_probe(struct i2c_client *i2c,
 	if (wm8350 == NULL)
 		return -ENOMEM;
 
+	wm8350->regmap = devm_regmap_init_i2c(i2c, &wm8350_regmap);
+	if (IS_ERR(wm8350->regmap)) {
+		ret = PTR_ERR(wm8350->regmap);
+		dev_err(&i2c->dev, "Failed to allocate register map: %d\n",
+			ret);
+		return ret;
+	}
+
 	i2c_set_clientdata(i2c, wm8350);
 	wm8350->dev = &i2c->dev;
-	wm8350->i2c_client = i2c;
-	wm8350->read_dev = wm8350_i2c_read_device;
-	wm8350->write_dev = wm8350_i2c_write_device;
 
 	ret = wm8350_device_init(wm8350, i2c->irq, i2c->dev.platform_data);
 	if (ret < 0)
@@ -80,6 +56,7 @@ static int wm8350_i2c_probe(struct i2c_client *i2c,
 	return ret;
 
 err:
+	regmap_exit(wm8350->regmap);
 	return ret;
 }
 
diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h
index 98fcc977e82b..9192b6404a73 100644
--- a/include/linux/mfd/wm8350/core.h
+++ b/include/linux/mfd/wm8350/core.h
@@ -602,6 +602,7 @@ extern const u16 wm8352_mode2_defaults[];
 extern const u16 wm8352_mode3_defaults[];
 
 struct wm8350;
+struct regmap;
 
 struct wm8350_hwmon {
 	struct platform_device *pdev;
@@ -612,13 +613,7 @@ struct wm8350 {
 	struct device *dev;
 
 	/* device IO */
-	union {
-		struct i2c_client *i2c_client;
-		struct spi_device *spi_device;
-	};
-	int (*read_dev)(struct wm8350 *wm8350, char reg, int size, void *dest);
-	int (*write_dev)(struct wm8350 *wm8350, char reg, int size,
-			 void *src);
+	struct regmap *regmap;
 	u16 *reg_cache;
 
 	struct mutex auxadc_mutex;
-- 
cgit v1.3-7-g2ca7


From cc7a727941193e3e59be2e9f6522eb78bc7ee909 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 7 May 2012 10:03:22 +0100
Subject: mfd: Read CUST_ID from the wm8994 device

Read CUST_ID from the device and log it for diagnostics.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/wm8994-core.c            | 7 ++++---
 include/linux/mfd/wm8994/core.h      | 1 +
 include/linux/mfd/wm8994/registers.h | 3 +++
 3 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm8994-core.c b/drivers/mfd/wm8994-core.c
index 9d7ca1e978fa..60e617549edd 100644
--- a/drivers/mfd/wm8994-core.c
+++ b/drivers/mfd/wm8994-core.c
@@ -500,7 +500,8 @@ static __devinit int wm8994_device_init(struct wm8994 *wm8994, int irq)
 			ret);
 		goto err_enable;
 	}
-	wm8994->revision = ret;
+	wm8994->revision = ret & WM8994_CHIP_REV_MASK;
+	wm8994->cust_id = (ret & WM8994_CUST_ID_MASK) >> WM8994_CUST_ID_SHIFT;
 
 	switch (wm8994->type) {
 	case WM8994:
@@ -553,8 +554,8 @@ static __devinit int wm8994_device_init(struct wm8994 *wm8994, int irq)
 		break;
 	}
 
-	dev_info(wm8994->dev, "%s revision %c\n", devname,
-		 'A' + wm8994->revision);
+	dev_info(wm8994->dev, "%s revision %c CUST_ID %02x\n", devname,
+		 'A' + wm8994->revision, wm8994->cust_id);
 
 	switch (wm8994->type) {
 	case WM1811:
diff --git a/include/linux/mfd/wm8994/core.h b/include/linux/mfd/wm8994/core.h
index 9eff2a351ec5..d41bc7b8a86a 100644
--- a/include/linux/mfd/wm8994/core.h
+++ b/include/linux/mfd/wm8994/core.h
@@ -57,6 +57,7 @@ struct wm8994 {
 
 	enum wm8994_type type;
 	int revision;
+	int cust_id;
 
 	struct device *dev;
 	struct regmap *regmap;
diff --git a/include/linux/mfd/wm8994/registers.h b/include/linux/mfd/wm8994/registers.h
index 86e6a032a078..053548961c15 100644
--- a/include/linux/mfd/wm8994/registers.h
+++ b/include/linux/mfd/wm8994/registers.h
@@ -2212,6 +2212,9 @@
 /*
  * R256 (0x100) - Chip Revision
  */
+#define WM8994_CUST_ID_MASK                     0xFF00  /* CUST_ID - [15:8] */
+#define WM8994_CUST_ID_SHIFT                         8  /* CUST_ID - [15:8] */
+#define WM8994_CUST_ID_WIDTH                         8  /* CUST_ID - [15:8] */
 #define WM8994_CHIP_REV_MASK                    0x000F  /* CHIP_REV - [3:0] */
 #define WM8994_CHIP_REV_SHIFT                        0  /* CHIP_REV - [3:0] */
 #define WM8994_CHIP_REV_WIDTH                        4  /* CHIP_REV - [3:0] */
-- 
cgit v1.3-7-g2ca7


From f0948f59dbc8e725a96ba16da666e8f5cdd43ba8 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Thu, 3 May 2012 15:36:14 +0530
Subject: clk: add a fixed factor clock

Having fixed factors/dividers in hardware is a common pattern, so
add a basic clock type doing this. It basically describes a fixed
factor clock using a nominator and a denominator.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Reviewed-by: Viresh Kumar <viresh.kumar@st.com>
Tested-by: Shawn Guo <shawn.guo@linaro.org>
[mturquette@linaro.org: constify parent_names in static init macro]
[mturquette@linaro.org: copy/paste bug from mux in static init macro]
[mturquette@linaro.org: fix error handling in clk_register_fixed_factor]
[mturquette@linaro.org: improve division accuracy; thanks to Saravana]
Signed-off-by: Mike Turquette <mturquette@linaro.org>
---
 drivers/clk/Makefile           |  2 +-
 drivers/clk/clk-fixed-factor.c | 95 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/clk-private.h    | 20 +++++++++
 include/linux/clk-provider.h   | 23 ++++++++++
 4 files changed, 139 insertions(+), 1 deletion(-)
 create mode 100644 drivers/clk/clk-fixed-factor.c

(limited to 'include/linux')

diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile
index 1f736bc11c4b..24aa7144811b 100644
--- a/drivers/clk/Makefile
+++ b/drivers/clk/Makefile
@@ -1,4 +1,4 @@
 
 obj-$(CONFIG_CLKDEV_LOOKUP)	+= clkdev.o
 obj-$(CONFIG_COMMON_CLK)	+= clk.o clk-fixed-rate.o clk-gate.o \
-				   clk-mux.o clk-divider.o
+				   clk-mux.o clk-divider.o clk-fixed-factor.o
diff --git a/drivers/clk/clk-fixed-factor.c b/drivers/clk/clk-fixed-factor.c
new file mode 100644
index 000000000000..c8c003e217ad
--- /dev/null
+++ b/drivers/clk/clk-fixed-factor.c
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2011 Sascha Hauer, Pengutronix <s.hauer@pengutronix.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Standard functionality for the common clock API.
+ */
+#include <linux/module.h>
+#include <linux/clk-provider.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+
+/*
+ * DOC: basic fixed multiplier and divider clock that cannot gate
+ *
+ * Traits of this clock:
+ * prepare - clk_prepare only ensures that parents are prepared
+ * enable - clk_enable only ensures that parents are enabled
+ * rate - rate is fixed.  clk->rate = parent->rate / div * mult
+ * parent - fixed parent.  No clk_set_parent support
+ */
+
+#define to_clk_fixed_factor(_hw) container_of(_hw, struct clk_fixed_factor, hw)
+
+static unsigned long clk_factor_recalc_rate(struct clk_hw *hw,
+		unsigned long parent_rate)
+{
+	struct clk_fixed_factor *fix = to_clk_fixed_factor(hw);
+
+	return parent_rate * fix->mult / fix->div;
+}
+
+static long clk_factor_round_rate(struct clk_hw *hw, unsigned long rate,
+				unsigned long *prate)
+{
+	struct clk_fixed_factor *fix = to_clk_fixed_factor(hw);
+
+	if (__clk_get_flags(hw->clk) & CLK_SET_RATE_PARENT) {
+		unsigned long best_parent;
+
+		best_parent = (rate / fix->mult) * fix->div;
+		*prate = __clk_round_rate(__clk_get_parent(hw->clk),
+				best_parent);
+	}
+
+	return (*prate / fix->div) * fix->mult;
+}
+
+static int clk_factor_set_rate(struct clk_hw *hw, unsigned long rate,
+				unsigned long parent_rate)
+{
+	return 0;
+}
+
+struct clk_ops clk_fixed_factor_ops = {
+	.round_rate = clk_factor_round_rate,
+	.set_rate = clk_factor_set_rate,
+	.recalc_rate = clk_factor_recalc_rate,
+};
+EXPORT_SYMBOL_GPL(clk_fixed_factor_ops);
+
+struct clk *clk_register_fixed_factor(struct device *dev, const char *name,
+		const char *parent_name, unsigned long flags,
+		unsigned int mult, unsigned int div)
+{
+	struct clk_fixed_factor *fix;
+	struct clk_init_data init;
+	struct clk *clk;
+
+	fix = kmalloc(sizeof(*fix), GFP_KERNEL);
+	if (!fix) {
+		pr_err("%s: could not allocate fixed factor clk\n", __func__);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	/* struct clk_fixed_factor assignments */
+	fix->mult = mult;
+	fix->div = div;
+	fix->hw.init = &init;
+
+	init.name = name;
+	init.ops = &clk_fixed_factor_ops;
+	init.flags = flags;
+	init.parent_names = &parent_name;
+	init.num_parents = 1;
+
+	clk = clk_register(dev, &fix->hw);
+
+	if (IS_ERR(clk))
+		kfree(fix);
+
+	return clk;
+}
diff --git a/include/linux/clk-private.h b/include/linux/clk-private.h
index b258532162b8..eb3f84bc5325 100644
--- a/include/linux/clk-private.h
+++ b/include/linux/clk-private.h
@@ -143,6 +143,26 @@ struct clk {
 	DEFINE_CLK(_name, clk_mux_ops, _flags, _parent_names,	\
 			_parents);
 
+#define DEFINE_CLK_FIXED_FACTOR(_name, _parent_name,		\
+				_parent_ptr, _flags,		\
+				_mult, _div)			\
+	static struct clk _name;				\
+	static const char *_name##_parent_names[] = {		\
+		_parent_name,					\
+	};							\
+	static struct clk *_name##_parents[] = {		\
+		_parent_ptr,					\
+	};							\
+	static struct clk_fixed_factor _name##_hw = {		\
+		.hw = {						\
+			.clk = &_name,				\
+		},						\
+		.mult = _mult,					\
+		.div = _div,					\
+	};							\
+	DEFINE_CLK(_name, clk_fixed_factor_ops, _flags,		\
+			_name##_parent_names, _name##_parents);
+
 /**
  * __clk_init - initialize the data structures in a struct clk
  * @dev:	device initializing this clk, placeholder for now
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 5db3412106b3..c1c23b9ec368 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -276,6 +276,29 @@ struct clk *clk_register_mux(struct device *dev, const char *name,
 		void __iomem *reg, u8 shift, u8 width,
 		u8 clk_mux_flags, spinlock_t *lock);
 
+/**
+ * struct clk_fixed_factor - fixed multiplier and divider clock
+ *
+ * @hw:		handle between common and hardware-specific interfaces
+ * @mult:	multiplier
+ * @div:	divider
+ *
+ * Clock with a fixed multiplier and divider. The output frequency is the
+ * parent clock rate divided by div and multiplied by mult.
+ * Implements .recalc_rate, .set_rate and .round_rate
+ */
+
+struct clk_fixed_factor {
+	struct clk_hw	hw;
+	unsigned int	mult;
+	unsigned int	div;
+};
+
+extern struct clk_ops clk_fixed_factor_ops;
+struct clk *clk_register_fixed_factor(struct device *dev, const char *name,
+		const char *parent_name, unsigned long flags,
+		unsigned int mult, unsigned int div);
+
 /**
  * clk_register - allocate a new clock, register it and return an opaque cookie
  * @dev: device that is registering this clock
-- 
cgit v1.3-7-g2ca7


From 4574b886698dfad6209102fed6136622b5fe1c21 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Fri, 6 Apr 2012 17:17:26 +0200
Subject: ARM: Orion: SPI: Add clk/clkdev support.

Remove now redundant tclk from SPI platform data. This makes the platform
data empty, so remove it.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Tested-by: Jamie Lentin <jm@lentin.co.uk>
Signed-off-by: Mike Turquette <mturquette@linaro.org>
---
 arch/arm/mach-dove/common.c                    |  6 ++--
 arch/arm/mach-dove/dove-db-setup.c             |  1 -
 arch/arm/mach-kirkwood/board-dreamplug.c       |  1 -
 arch/arm/mach-kirkwood/common.c                | 10 +++++--
 arch/arm/mach-kirkwood/mv88f6281gtw_ge-setup.c |  1 -
 arch/arm/mach-kirkwood/rd88f6192-nas-setup.c   |  1 -
 arch/arm/mach-kirkwood/t5325-setup.c           |  1 -
 arch/arm/mach-kirkwood/tsx1x-common.c          |  1 -
 arch/arm/mach-mv78xx0/common.c                 |  2 ++
 arch/arm/mach-orion5x/common.c                 |  4 ++-
 arch/arm/mach-orion5x/rd88f6183ap-ge-setup.c   |  1 -
 arch/arm/plat-orion/common.c                   | 38 ++++++++++++++++----------
 arch/arm/plat-orion/include/plat/common.h      | 11 +++++---
 drivers/spi/spi-orion.c                        | 30 +++++++++++++++-----
 include/linux/spi/orion_spi.h                  | 17 ------------
 15 files changed, 70 insertions(+), 55 deletions(-)
 delete mode 100644 include/linux/spi/orion_spi.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-dove/common.c b/arch/arm/mach-dove/common.c
index 63fe6e612e98..da5b4047464d 100644
--- a/arch/arm/mach-dove/common.c
+++ b/arch/arm/mach-dove/common.c
@@ -76,6 +76,8 @@ static void __init clk_init(void)
 {
 	tclk = clk_register_fixed_rate(NULL, "tclk", NULL, CLK_IS_ROOT,
 				       get_tclk());
+
+	orion_clkdev_init(tclk);
 }
 
 /*****************************************************************************
@@ -162,12 +164,12 @@ void __init dove_uart3_init(void)
  ****************************************************************************/
 void __init dove_spi0_init(void)
 {
-	orion_spi_init(DOVE_SPI0_PHYS_BASE, get_tclk());
+	orion_spi_init(DOVE_SPI0_PHYS_BASE);
 }
 
 void __init dove_spi1_init(void)
 {
-	orion_spi_1_init(DOVE_SPI1_PHYS_BASE, get_tclk());
+	orion_spi_1_init(DOVE_SPI1_PHYS_BASE);
 }
 
 /*****************************************************************************
diff --git a/arch/arm/mach-dove/dove-db-setup.c b/arch/arm/mach-dove/dove-db-setup.c
index ea77ae430b2d..bc2867f11346 100644
--- a/arch/arm/mach-dove/dove-db-setup.c
+++ b/arch/arm/mach-dove/dove-db-setup.c
@@ -20,7 +20,6 @@
 #include <linux/i2c.h>
 #include <linux/pci.h>
 #include <linux/spi/spi.h>
-#include <linux/spi/orion_spi.h>
 #include <linux/spi/flash.h>
 #include <linux/gpio.h>
 #include <asm/mach-types.h>
diff --git a/arch/arm/mach-kirkwood/board-dreamplug.c b/arch/arm/mach-kirkwood/board-dreamplug.c
index 985453994dd3..55e357ab2923 100644
--- a/arch/arm/mach-kirkwood/board-dreamplug.c
+++ b/arch/arm/mach-kirkwood/board-dreamplug.c
@@ -27,7 +27,6 @@
 #include <linux/mtd/physmap.h>
 #include <linux/spi/flash.h>
 #include <linux/spi/spi.h>
-#include <linux/spi/orion_spi.h>
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
diff --git a/arch/arm/mach-kirkwood/common.c b/arch/arm/mach-kirkwood/common.c
index 57b8d1ef3093..476e0b941db7 100644
--- a/arch/arm/mach-kirkwood/common.c
+++ b/arch/arm/mach-kirkwood/common.c
@@ -86,10 +86,12 @@ static struct clk __init *kirkwood_register_gate(const char *name, u8 bit_idx)
 
 void __init kirkwood_clk_init(void)
 {
+	struct clk *runit;
+
 	tclk = clk_register_fixed_rate(NULL, "tclk", NULL,
 				       CLK_IS_ROOT, kirkwood_tclk);
 
-	kirkwood_register_gate("runit",  CGC_BIT_RUNIT);
+	runit = kirkwood_register_gate("runit",  CGC_BIT_RUNIT);
 	kirkwood_register_gate("ge0",    CGC_BIT_GE0);
 	kirkwood_register_gate("ge1",    CGC_BIT_GE1);
 	kirkwood_register_gate("sata0",  CGC_BIT_SATA0);
@@ -104,6 +106,10 @@ void __init kirkwood_clk_init(void)
 	kirkwood_register_gate("audio",  CGC_BIT_AUDIO);
 	kirkwood_register_gate("tdm",    CGC_BIT_TDM);
 	kirkwood_register_gate("tsu",    CGC_BIT_TSU);
+
+	/* clkdev entries, mapping clks to devices */
+	orion_clkdev_add(NULL, "orion_spi.0", runit);
+	orion_clkdev_add(NULL, "orion_spi.1", runit);
 }
 
 /*****************************************************************************
@@ -270,7 +276,7 @@ void __init kirkwood_sdio_init(struct mvsdio_platform_data *mvsdio_data)
 void __init kirkwood_spi_init()
 {
 	kirkwood_clk_ctrl |= CGC_RUNIT;
-	orion_spi_init(SPI_PHYS_BASE, kirkwood_tclk);
+	orion_spi_init(SPI_PHYS_BASE);
 }
 
 
diff --git a/arch/arm/mach-kirkwood/mv88f6281gtw_ge-setup.c b/arch/arm/mach-kirkwood/mv88f6281gtw_ge-setup.c
index 85f6169c2484..6d8364a97810 100644
--- a/arch/arm/mach-kirkwood/mv88f6281gtw_ge-setup.c
+++ b/arch/arm/mach-kirkwood/mv88f6281gtw_ge-setup.c
@@ -23,7 +23,6 @@
 #include <linux/gpio_keys.h>
 #include <linux/spi/flash.h>
 #include <linux/spi/spi.h>
-#include <linux/spi/orion_spi.h>
 #include <net/dsa.h>
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
diff --git a/arch/arm/mach-kirkwood/rd88f6192-nas-setup.c b/arch/arm/mach-kirkwood/rd88f6192-nas-setup.c
index fd2c9c8b6831..f742a66a7045 100644
--- a/arch/arm/mach-kirkwood/rd88f6192-nas-setup.c
+++ b/arch/arm/mach-kirkwood/rd88f6192-nas-setup.c
@@ -16,7 +16,6 @@
 #include <linux/gpio.h>
 #include <linux/spi/flash.h>
 #include <linux/spi/spi.h>
-#include <linux/spi/orion_spi.h>
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
 #include <mach/kirkwood.h>
diff --git a/arch/arm/mach-kirkwood/t5325-setup.c b/arch/arm/mach-kirkwood/t5325-setup.c
index f9d2a11b7f96..bad738e44044 100644
--- a/arch/arm/mach-kirkwood/t5325-setup.c
+++ b/arch/arm/mach-kirkwood/t5325-setup.c
@@ -16,7 +16,6 @@
 #include <linux/mtd/physmap.h>
 #include <linux/spi/flash.h>
 #include <linux/spi/spi.h>
-#include <linux/spi/orion_spi.h>
 #include <linux/i2c.h>
 #include <linux/mv643xx_eth.h>
 #include <linux/ata_platform.h>
diff --git a/arch/arm/mach-kirkwood/tsx1x-common.c b/arch/arm/mach-kirkwood/tsx1x-common.c
index 24294b2bc469..8943ede29b44 100644
--- a/arch/arm/mach-kirkwood/tsx1x-common.c
+++ b/arch/arm/mach-kirkwood/tsx1x-common.c
@@ -4,7 +4,6 @@
 #include <linux/mtd/physmap.h>
 #include <linux/spi/flash.h>
 #include <linux/spi/spi.h>
-#include <linux/spi/orion_spi.h>
 #include <linux/serial_reg.h>
 #include <mach/kirkwood.h>
 #include "common.h"
diff --git a/arch/arm/mach-mv78xx0/common.c b/arch/arm/mach-mv78xx0/common.c
index 73733207f5a9..4c24b46520aa 100644
--- a/arch/arm/mach-mv78xx0/common.c
+++ b/arch/arm/mach-mv78xx0/common.c
@@ -175,6 +175,8 @@ static void __init clk_init(void)
 {
 	tclk = clk_register_fixed_rate(NULL, "tclk", NULL, CLK_IS_ROOT,
 				       get_tclk());
+
+	orion_clkdev_init(tclk);
 }
 
 /*****************************************************************************
diff --git a/arch/arm/mach-orion5x/common.c b/arch/arm/mach-orion5x/common.c
index 81660522c6b4..2ef82e2f511d 100644
--- a/arch/arm/mach-orion5x/common.c
+++ b/arch/arm/mach-orion5x/common.c
@@ -79,6 +79,8 @@ static void __init clk_init(void)
 {
 	tclk = clk_register_fixed_rate(NULL, "tclk", NULL, CLK_IS_ROOT,
 				       orion5x_tclk);
+
+	orion_clkdev_init(tclk);
 }
 
 /*****************************************************************************
@@ -144,7 +146,7 @@ void __init orion5x_sata_init(struct mv_sata_platform_data *sata_data)
  ****************************************************************************/
 void __init orion5x_spi_init()
 {
-	orion_spi_init(SPI_PHYS_BASE, orion5x_tclk);
+	orion_spi_init(SPI_PHYS_BASE);
 }
 
 
diff --git a/arch/arm/mach-orion5x/rd88f6183ap-ge-setup.c b/arch/arm/mach-orion5x/rd88f6183ap-ge-setup.c
index 2c5fab00d205..7b97a9a211ed 100644
--- a/arch/arm/mach-orion5x/rd88f6183ap-ge-setup.c
+++ b/arch/arm/mach-orion5x/rd88f6183ap-ge-setup.c
@@ -16,7 +16,6 @@
 #include <linux/mtd/physmap.h>
 #include <linux/mv643xx_eth.h>
 #include <linux/spi/spi.h>
-#include <linux/spi/orion_spi.h>
 #include <linux/spi/flash.h>
 #include <linux/ethtool.h>
 #include <net/dsa.h>
diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c
index 4fdd2e7e74a1..bbe50a948710 100644
--- a/arch/arm/plat-orion/common.c
+++ b/arch/arm/plat-orion/common.c
@@ -19,12 +19,32 @@
 #include <linux/mv643xx_eth.h>
 #include <linux/mv643xx_i2c.h>
 #include <net/dsa.h>
-#include <linux/spi/orion_spi.h>
 #include <plat/orion_wdt.h>
 #include <plat/mv_xor.h>
 #include <plat/ehci-orion.h>
 #include <mach/bridge-regs.h>
 
+/* Create a clkdev entry for a given device/clk */
+void __init orion_clkdev_add(const char *con_id, const char *dev_id,
+			     struct clk *clk)
+{
+	struct clk_lookup *cl;
+
+	cl = clkdev_alloc(clk, con_id, dev_id);
+	if (cl)
+		clkdev_add(cl);
+}
+
+/* Create clkdev entries for all orion platforms except kirkwood.
+   Kirkwood has gated clocks for some of its peripherals, so creates
+   its own clkdev entries. For all the other orion devices, create
+   clkdev entries to the tclk. */
+void __init orion_clkdev_init(struct clk *tclk)
+{
+	orion_clkdev_add(NULL, "orion_spi.0", tclk);
+	orion_clkdev_add(NULL, "orion_spi.1", tclk);
+}
+
 /* Fill in the resources structure and link it into the platform
    device structure. There is always a memory region, and nearly
    always an interrupt.*/
@@ -523,44 +543,32 @@ void __init orion_i2c_1_init(unsigned long mapbase,
 /*****************************************************************************
  * SPI
  ****************************************************************************/
-static struct orion_spi_info orion_spi_plat_data;
 static struct resource orion_spi_resources;
 
 static struct platform_device orion_spi = {
 	.name		= "orion_spi",
 	.id		= 0,
-	.dev		= {
-		.platform_data	= &orion_spi_plat_data,
-	},
 };
 
-static struct orion_spi_info orion_spi_1_plat_data;
 static struct resource orion_spi_1_resources;
 
 static struct platform_device orion_spi_1 = {
 	.name		= "orion_spi",
 	.id		= 1,
-	.dev		= {
-		.platform_data	= &orion_spi_1_plat_data,
-	},
 };
 
 /* Note: The SPI silicon core does have interrupts. However the
  * current Linux software driver does not use interrupts. */
 
-void __init orion_spi_init(unsigned long mapbase,
-			   unsigned long tclk)
+void __init orion_spi_init(unsigned long mapbase)
 {
-	orion_spi_plat_data.tclk = tclk;
 	fill_resources(&orion_spi, &orion_spi_resources,
 		       mapbase, SZ_512 - 1, NO_IRQ);
 	platform_device_register(&orion_spi);
 }
 
-void __init orion_spi_1_init(unsigned long mapbase,
-			     unsigned long tclk)
+void __init orion_spi_1_init(unsigned long mapbase)
 {
-	orion_spi_1_plat_data.tclk = tclk;
 	fill_resources(&orion_spi_1, &orion_spi_1_resources,
 		       mapbase, SZ_512 - 1, NO_IRQ);
 	platform_device_register(&orion_spi_1);
diff --git a/arch/arm/plat-orion/include/plat/common.h b/arch/arm/plat-orion/include/plat/common.h
index a7fa005a5a0e..d188a1aa6f56 100644
--- a/arch/arm/plat-orion/include/plat/common.h
+++ b/arch/arm/plat-orion/include/plat/common.h
@@ -70,11 +70,9 @@ void __init orion_i2c_1_init(unsigned long mapbase,
 			     unsigned long irq,
 			     unsigned long freq_m);
 
-void __init orion_spi_init(unsigned long mapbase,
-			   unsigned long tclk);
+void __init orion_spi_init(unsigned long mapbase);
 
-void __init orion_spi_1_init(unsigned long mapbase,
-			     unsigned long tclk);
+void __init orion_spi_1_init(unsigned long mapbase);
 
 void __init orion_wdt_init(unsigned long tclk);
 
@@ -106,4 +104,9 @@ void __init orion_crypto_init(unsigned long mapbase,
 			      unsigned long srambase,
 			      unsigned long sram_size,
 			      unsigned long irq);
+
+void __init orion_clkdev_add(const char *con_id, const char *dev_id,
+			     struct clk *clk);
+
+void __init orion_clkdev_init(struct clk *tclk);
 #endif
diff --git a/drivers/spi/spi-orion.c b/drivers/spi/spi-orion.c
index e496f799b7a9..dfd04e91fa6d 100644
--- a/drivers/spi/spi-orion.c
+++ b/drivers/spi/spi-orion.c
@@ -16,8 +16,8 @@
 #include <linux/err.h>
 #include <linux/io.h>
 #include <linux/spi/spi.h>
-#include <linux/spi/orion_spi.h>
 #include <linux/module.h>
+#include <linux/clk.h>
 #include <asm/unaligned.h>
 
 #define DRIVER_NAME			"orion_spi"
@@ -46,6 +46,7 @@ struct orion_spi {
 	unsigned int		max_speed;
 	unsigned int		min_speed;
 	struct orion_spi_info	*spi_info;
+	struct clk              *clk;
 };
 
 static struct workqueue_struct *orion_spi_wq;
@@ -104,7 +105,7 @@ static int orion_spi_baudrate_set(struct spi_device *spi, unsigned int speed)
 
 	orion_spi = spi_master_get_devdata(spi->master);
 
-	tclk_hz = orion_spi->spi_info->tclk;
+	tclk_hz = clk_get_rate(orion_spi->clk);
 
 	/*
 	 * the supported rates are: 4,6,8...30
@@ -450,6 +451,7 @@ static int __init orion_spi_probe(struct platform_device *pdev)
 	struct orion_spi *spi;
 	struct resource *r;
 	struct orion_spi_info *spi_info;
+	unsigned long tclk_hz;
 	int status = 0;
 
 	spi_info = pdev->dev.platform_data;
@@ -476,19 +478,28 @@ static int __init orion_spi_probe(struct platform_device *pdev)
 	spi->master = master;
 	spi->spi_info = spi_info;
 
-	spi->max_speed = DIV_ROUND_UP(spi_info->tclk, 4);
-	spi->min_speed = DIV_ROUND_UP(spi_info->tclk, 30);
+	spi->clk = clk_get(&pdev->dev, NULL);
+	if (IS_ERR(spi->clk)) {
+		status = PTR_ERR(spi->clk);
+		goto out;
+	}
+
+	clk_prepare(spi->clk);
+	clk_enable(spi->clk);
+	tclk_hz = clk_get_rate(spi->clk);
+	spi->max_speed = DIV_ROUND_UP(tclk_hz, 4);
+	spi->min_speed = DIV_ROUND_UP(tclk_hz, 30);
 
 	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (r == NULL) {
 		status = -ENODEV;
-		goto out;
+		goto out_rel_clk;
 	}
 
 	if (!request_mem_region(r->start, resource_size(r),
 				dev_name(&pdev->dev))) {
 		status = -EBUSY;
-		goto out;
+		goto out_rel_clk;
 	}
 	spi->base = ioremap(r->start, SZ_1K);
 
@@ -508,7 +519,9 @@ static int __init orion_spi_probe(struct platform_device *pdev)
 
 out_rel_mem:
 	release_mem_region(r->start, resource_size(r));
-
+out_rel_clk:
+	clk_disable_unprepare(spi->clk);
+	clk_put(spi->clk);
 out:
 	spi_master_put(master);
 	return status;
@@ -526,6 +539,9 @@ static int __exit orion_spi_remove(struct platform_device *pdev)
 
 	cancel_work_sync(&spi->work);
 
+	clk_disable_unprepare(spi->clk);
+	clk_put(spi->clk);
+
 	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	release_mem_region(r->start, resource_size(r));
 
diff --git a/include/linux/spi/orion_spi.h b/include/linux/spi/orion_spi.h
deleted file mode 100644
index b4d9fa6f797c..000000000000
--- a/include/linux/spi/orion_spi.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * orion_spi.h
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
- */
-
-#ifndef __LINUX_SPI_ORION_SPI_H
-#define __LINUX_SPI_ORION_SPI_H
-
-struct orion_spi_info {
-	u32	tclk;		/* no <linux/clk.h> support yet */
-};
-
-
-#endif
-- 
cgit v1.3-7-g2ca7


From 452503ebc7cc4cce5b9e52cf2f03255365a53234 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sat, 24 Dec 2011 01:24:24 +0100
Subject: ARM: Orion: Eth: Add clk/clkdev support.

The t_clk is moved from the shared part of the ethernet driver into
the per port section. Each port can have its own gated clock, which it
needs to enable/disable, as oppossed to there being one clock shared
by all ports. In practice, only kirkwood supports this at the moment.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Tested-by: Jamie Lentin <jm@lentin.co.uk>
Signed-off-by: Mike Turquette <mturquette@linaro.org>
---
 arch/arm/mach-dove/common.c                |  3 +--
 arch/arm/mach-kirkwood/common.c            | 12 +++++----
 arch/arm/mach-mv78xx0/common.c             |  8 +++---
 arch/arm/mach-orion5x/common.c             |  2 +-
 arch/arm/plat-orion/common.c               | 26 +++++++++---------
 arch/arm/plat-orion/include/plat/common.h  | 13 ++++-----
 drivers/net/ethernet/marvell/mv643xx_eth.c | 42 ++++++++++++++++++++++--------
 include/linux/mv643xx_eth.h                |  1 -
 8 files changed, 61 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-dove/common.c b/arch/arm/mach-dove/common.c
index da5b4047464d..02766960480d 100644
--- a/arch/arm/mach-dove/common.c
+++ b/arch/arm/mach-dove/common.c
@@ -102,8 +102,7 @@ void __init dove_ehci1_init(void)
 void __init dove_ge00_init(struct mv643xx_eth_platform_data *eth_data)
 {
 	orion_ge00_init(eth_data,
-			DOVE_GE00_PHYS_BASE, IRQ_DOVE_GE00_SUM,
-			0, get_tclk());
+			DOVE_GE00_PHYS_BASE, IRQ_DOVE_GE00_SUM, 0);
 }
 
 /*****************************************************************************
diff --git a/arch/arm/mach-kirkwood/common.c b/arch/arm/mach-kirkwood/common.c
index 476e0b941db7..c22354405297 100644
--- a/arch/arm/mach-kirkwood/common.c
+++ b/arch/arm/mach-kirkwood/common.c
@@ -86,14 +86,14 @@ static struct clk __init *kirkwood_register_gate(const char *name, u8 bit_idx)
 
 void __init kirkwood_clk_init(void)
 {
-	struct clk *runit;
+	struct clk *runit, *ge0, *ge1;
 
 	tclk = clk_register_fixed_rate(NULL, "tclk", NULL,
 				       CLK_IS_ROOT, kirkwood_tclk);
 
 	runit = kirkwood_register_gate("runit",  CGC_BIT_RUNIT);
-	kirkwood_register_gate("ge0",    CGC_BIT_GE0);
-	kirkwood_register_gate("ge1",    CGC_BIT_GE1);
+	ge0 = kirkwood_register_gate("ge0",    CGC_BIT_GE0);
+	ge1 = kirkwood_register_gate("ge1",    CGC_BIT_GE1);
 	kirkwood_register_gate("sata0",  CGC_BIT_SATA0);
 	kirkwood_register_gate("sata1",  CGC_BIT_SATA1);
 	kirkwood_register_gate("usb0",   CGC_BIT_USB0);
@@ -110,6 +110,8 @@ void __init kirkwood_clk_init(void)
 	/* clkdev entries, mapping clks to devices */
 	orion_clkdev_add(NULL, "orion_spi.0", runit);
 	orion_clkdev_add(NULL, "orion_spi.1", runit);
+	orion_clkdev_add(NULL, MV643XX_ETH_NAME ".0", ge0);
+	orion_clkdev_add(NULL, MV643XX_ETH_NAME ".1", ge1);
 }
 
 /*****************************************************************************
@@ -131,7 +133,7 @@ void __init kirkwood_ge00_init(struct mv643xx_eth_platform_data *eth_data)
 
 	orion_ge00_init(eth_data,
 			GE00_PHYS_BASE, IRQ_KIRKWOOD_GE00_SUM,
-			IRQ_KIRKWOOD_GE00_ERR, kirkwood_tclk);
+			IRQ_KIRKWOOD_GE00_ERR);
 }
 
 
@@ -145,7 +147,7 @@ void __init kirkwood_ge01_init(struct mv643xx_eth_platform_data *eth_data)
 
 	orion_ge01_init(eth_data,
 			GE01_PHYS_BASE, IRQ_KIRKWOOD_GE01_SUM,
-			IRQ_KIRKWOOD_GE01_ERR, kirkwood_tclk);
+			IRQ_KIRKWOOD_GE01_ERR);
 }
 
 
diff --git a/arch/arm/mach-mv78xx0/common.c b/arch/arm/mach-mv78xx0/common.c
index 4c24b46520aa..ad4d037bbcd3 100644
--- a/arch/arm/mach-mv78xx0/common.c
+++ b/arch/arm/mach-mv78xx0/common.c
@@ -213,7 +213,7 @@ void __init mv78xx0_ge00_init(struct mv643xx_eth_platform_data *eth_data)
 {
 	orion_ge00_init(eth_data,
 			GE00_PHYS_BASE, IRQ_MV78XX0_GE00_SUM,
-			IRQ_MV78XX0_GE_ERR, get_tclk());
+			IRQ_MV78XX0_GE_ERR);
 }
 
 
@@ -224,7 +224,7 @@ void __init mv78xx0_ge01_init(struct mv643xx_eth_platform_data *eth_data)
 {
 	orion_ge01_init(eth_data,
 			GE01_PHYS_BASE, IRQ_MV78XX0_GE01_SUM,
-			NO_IRQ, get_tclk());
+			NO_IRQ);
 }
 
 
@@ -248,7 +248,7 @@ void __init mv78xx0_ge10_init(struct mv643xx_eth_platform_data *eth_data)
 
 	orion_ge10_init(eth_data,
 			GE10_PHYS_BASE, IRQ_MV78XX0_GE10_SUM,
-			NO_IRQ, get_tclk());
+			NO_IRQ);
 }
 
 
@@ -272,7 +272,7 @@ void __init mv78xx0_ge11_init(struct mv643xx_eth_platform_data *eth_data)
 
 	orion_ge11_init(eth_data,
 			GE11_PHYS_BASE, IRQ_MV78XX0_GE11_SUM,
-			NO_IRQ, get_tclk());
+			NO_IRQ);
 }
 
 /*****************************************************************************
diff --git a/arch/arm/mach-orion5x/common.c b/arch/arm/mach-orion5x/common.c
index 2ef82e2f511d..3fc731824e9c 100644
--- a/arch/arm/mach-orion5x/common.c
+++ b/arch/arm/mach-orion5x/common.c
@@ -109,7 +109,7 @@ void __init orion5x_eth_init(struct mv643xx_eth_platform_data *eth_data)
 {
 	orion_ge00_init(eth_data,
 			ORION5X_ETH_PHYS_BASE, IRQ_ORION5X_ETH_SUM,
-			IRQ_ORION5X_ETH_ERR, orion5x_tclk);
+			IRQ_ORION5X_ETH_ERR);
 }
 
 
diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c
index bbe50a948710..a33733bb380d 100644
--- a/arch/arm/plat-orion/common.c
+++ b/arch/arm/plat-orion/common.c
@@ -43,6 +43,10 @@ void __init orion_clkdev_init(struct clk *tclk)
 {
 	orion_clkdev_add(NULL, "orion_spi.0", tclk);
 	orion_clkdev_add(NULL, "orion_spi.1", tclk);
+	orion_clkdev_add(NULL, MV643XX_ETH_NAME ".0", tclk);
+	orion_clkdev_add(NULL, MV643XX_ETH_NAME ".1", tclk);
+	orion_clkdev_add(NULL, MV643XX_ETH_NAME ".2", tclk);
+	orion_clkdev_add(NULL, MV643XX_ETH_NAME ".3", tclk);
 }
 
 /* Fill in the resources structure and link it into the platform
@@ -225,13 +229,11 @@ void __init orion_rtc_init(unsigned long mapbase,
  ****************************************************************************/
 static __init void ge_complete(
 	struct mv643xx_eth_shared_platform_data *orion_ge_shared_data,
-	int tclk,
 	struct resource *orion_ge_resource, unsigned long irq,
 	struct platform_device *orion_ge_shared,
 	struct mv643xx_eth_platform_data *eth_data,
 	struct platform_device *orion_ge)
 {
-	orion_ge_shared_data->t_clk = tclk;
 	orion_ge_resource->start = irq;
 	orion_ge_resource->end = irq;
 	eth_data->shared = orion_ge_shared;
@@ -282,12 +284,11 @@ static struct platform_device orion_ge00 = {
 void __init orion_ge00_init(struct mv643xx_eth_platform_data *eth_data,
 			    unsigned long mapbase,
 			    unsigned long irq,
-			    unsigned long irq_err,
-			    int tclk)
+			    unsigned long irq_err)
 {
 	fill_resources(&orion_ge00_shared, orion_ge00_shared_resources,
 		       mapbase + 0x2000, SZ_16K - 1, irq_err);
-	ge_complete(&orion_ge00_shared_data, tclk,
+	ge_complete(&orion_ge00_shared_data,
 		    orion_ge00_resources, irq, &orion_ge00_shared,
 		    eth_data, &orion_ge00);
 }
@@ -335,12 +336,11 @@ static struct platform_device orion_ge01 = {
 void __init orion_ge01_init(struct mv643xx_eth_platform_data *eth_data,
 			    unsigned long mapbase,
 			    unsigned long irq,
-			    unsigned long irq_err,
-			    int tclk)
+			    unsigned long irq_err)
 {
 	fill_resources(&orion_ge01_shared, orion_ge01_shared_resources,
 		       mapbase + 0x2000, SZ_16K - 1, irq_err);
-	ge_complete(&orion_ge01_shared_data, tclk,
+	ge_complete(&orion_ge01_shared_data,
 		    orion_ge01_resources, irq, &orion_ge01_shared,
 		    eth_data, &orion_ge01);
 }
@@ -388,12 +388,11 @@ static struct platform_device orion_ge10 = {
 void __init orion_ge10_init(struct mv643xx_eth_platform_data *eth_data,
 			    unsigned long mapbase,
 			    unsigned long irq,
-			    unsigned long irq_err,
-			    int tclk)
+			    unsigned long irq_err)
 {
 	fill_resources(&orion_ge10_shared, orion_ge10_shared_resources,
 		       mapbase + 0x2000, SZ_16K - 1, irq_err);
-	ge_complete(&orion_ge10_shared_data, tclk,
+	ge_complete(&orion_ge10_shared_data,
 		    orion_ge10_resources, irq, &orion_ge10_shared,
 		    eth_data, &orion_ge10);
 }
@@ -441,12 +440,11 @@ static struct platform_device orion_ge11 = {
 void __init orion_ge11_init(struct mv643xx_eth_platform_data *eth_data,
 			    unsigned long mapbase,
 			    unsigned long irq,
-			    unsigned long irq_err,
-			    int tclk)
+			    unsigned long irq_err)
 {
 	fill_resources(&orion_ge11_shared, orion_ge11_shared_resources,
 		       mapbase + 0x2000, SZ_16K - 1, irq_err);
-	ge_complete(&orion_ge11_shared_data, tclk,
+	ge_complete(&orion_ge11_shared_data,
 		    orion_ge11_resources, irq, &orion_ge11_shared,
 		    eth_data, &orion_ge11);
 }
diff --git a/arch/arm/plat-orion/include/plat/common.h b/arch/arm/plat-orion/include/plat/common.h
index d188a1aa6f56..00d8761c7d28 100644
--- a/arch/arm/plat-orion/include/plat/common.h
+++ b/arch/arm/plat-orion/include/plat/common.h
@@ -39,29 +39,26 @@ void __init orion_rtc_init(unsigned long mapbase,
 void __init orion_ge00_init(struct mv643xx_eth_platform_data *eth_data,
 			    unsigned long mapbase,
 			    unsigned long irq,
-			    unsigned long irq_err,
-			    int tclk);
+			    unsigned long irq_err);
 
 void __init orion_ge01_init(struct mv643xx_eth_platform_data *eth_data,
 			    unsigned long mapbase,
 			    unsigned long irq,
-			    unsigned long irq_err,
-			    int tclk);
+			    unsigned long irq_err);
 
 void __init orion_ge10_init(struct mv643xx_eth_platform_data *eth_data,
 			    unsigned long mapbase,
 			    unsigned long irq,
-			    unsigned long irq_err,
-			    int tclk);
+			    unsigned long irq_err);
 
 void __init orion_ge11_init(struct mv643xx_eth_platform_data *eth_data,
 			    unsigned long mapbase,
 			    unsigned long irq,
-			    unsigned long irq_err,
-			    int tclk);
+			    unsigned long irq_err);
 
 void __init orion_ge00_switch_init(struct dsa_platform_data *d,
 				   int irq);
+
 void __init orion_i2c_init(unsigned long mapbase,
 			   unsigned long irq,
 			   unsigned long freq_m);
diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index 5e1ca0f05090..99cd233266ac 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -57,6 +57,7 @@
 #include <linux/types.h>
 #include <linux/inet_lro.h>
 #include <linux/slab.h>
+#include <linux/clk.h>
 
 static char mv643xx_eth_driver_name[] = "mv643xx_eth";
 static char mv643xx_eth_driver_version[] = "1.4";
@@ -289,10 +290,10 @@ struct mv643xx_eth_shared_private {
 	/*
 	 * Hardware-specific parameters.
 	 */
-	unsigned int t_clk;
 	int extended_rx_coal_limit;
 	int tx_bw_control;
 	int tx_csum_limit;
+
 };
 
 #define TX_BW_CONTROL_ABSENT		0
@@ -431,6 +432,12 @@ struct mv643xx_eth_private {
 	int tx_desc_sram_size;
 	int txq_count;
 	struct tx_queue txq[8];
+
+	/*
+	 * Hardware-specific parameters.
+	 */
+	struct clk *clk;
+	unsigned int t_clk;
 };
 
 
@@ -1010,7 +1017,7 @@ static void tx_set_rate(struct mv643xx_eth_private *mp, int rate, int burst)
 	int mtu;
 	int bucket_size;
 
-	token_rate = ((rate / 1000) * 64) / (mp->shared->t_clk / 1000);
+	token_rate = ((rate / 1000) * 64) / (mp->t_clk / 1000);
 	if (token_rate > 1023)
 		token_rate = 1023;
 
@@ -1042,7 +1049,7 @@ static void txq_set_rate(struct tx_queue *txq, int rate, int burst)
 	int token_rate;
 	int bucket_size;
 
-	token_rate = ((rate / 1000) * 64) / (mp->shared->t_clk / 1000);
+	token_rate = ((rate / 1000) * 64) / (mp->t_clk / 1000);
 	if (token_rate > 1023)
 		token_rate = 1023;
 
@@ -1309,7 +1316,7 @@ static unsigned int get_rx_coal(struct mv643xx_eth_private *mp)
 		temp = (val & 0x003fff00) >> 8;
 
 	temp *= 64000000;
-	do_div(temp, mp->shared->t_clk);
+	do_div(temp, mp->t_clk);
 
 	return (unsigned int)temp;
 }
@@ -1319,7 +1326,7 @@ static void set_rx_coal(struct mv643xx_eth_private *mp, unsigned int usec)
 	u64 temp;
 	u32 val;
 
-	temp = (u64)usec * mp->shared->t_clk;
+	temp = (u64)usec * mp->t_clk;
 	temp += 31999999;
 	do_div(temp, 64000000);
 
@@ -1345,7 +1352,7 @@ static unsigned int get_tx_coal(struct mv643xx_eth_private *mp)
 
 	temp = (rdlp(mp, TX_FIFO_URGENT_THRESHOLD) & 0x3fff0) >> 4;
 	temp *= 64000000;
-	do_div(temp, mp->shared->t_clk);
+	do_div(temp, mp->t_clk);
 
 	return (unsigned int)temp;
 }
@@ -1354,7 +1361,7 @@ static void set_tx_coal(struct mv643xx_eth_private *mp, unsigned int usec)
 {
 	u64 temp;
 
-	temp = (u64)usec * mp->shared->t_clk;
+	temp = (u64)usec * mp->t_clk;
 	temp += 31999999;
 	do_div(temp, 64000000);
 
@@ -2662,10 +2669,6 @@ static int mv643xx_eth_shared_probe(struct platform_device *pdev)
 	if (dram)
 		mv643xx_eth_conf_mbus_windows(msp, dram);
 
-	/*
-	 * Detect hardware parameters.
-	 */
-	msp->t_clk = (pd != NULL && pd->t_clk != 0) ? pd->t_clk : 133000000;
 	msp->tx_csum_limit = (pd != NULL && pd->tx_csum_limit) ?
 					pd->tx_csum_limit : 9 * 1024;
 	infer_hw_params(msp);
@@ -2890,6 +2893,18 @@ static int mv643xx_eth_probe(struct platform_device *pdev)
 
 	mp->dev = dev;
 
+	/*
+	 * Get the clk rate, if there is one, otherwise use the default.
+	 */
+	mp->clk = clk_get(&pdev->dev, (pdev->id ? "1" : "0"));
+	if (!IS_ERR(mp->clk)) {
+		clk_prepare_enable(mp->clk);
+		mp->t_clk = clk_get_rate(mp->clk);
+	} else {
+		mp->t_clk = 133000000;
+		printk(KERN_WARNING "Unable to get clock");
+	}
+
 	set_params(mp, pd);
 	netif_set_real_num_tx_queues(dev, mp->txq_count);
 	netif_set_real_num_rx_queues(dev, mp->rxq_count);
@@ -2978,6 +2993,11 @@ static int mv643xx_eth_remove(struct platform_device *pdev)
 	if (mp->phy != NULL)
 		phy_detach(mp->phy);
 	cancel_work_sync(&mp->tx_timeout_task);
+
+	if (!IS_ERR(mp->clk)) {
+		clk_disable_unprepare(mp->clk);
+		clk_put(mp->clk);
+	}
 	free_netdev(mp->dev);
 
 	platform_set_drvdata(pdev, NULL);
diff --git a/include/linux/mv643xx_eth.h b/include/linux/mv643xx_eth.h
index 30b0c4e78f91..51bf8ada6dc0 100644
--- a/include/linux/mv643xx_eth.h
+++ b/include/linux/mv643xx_eth.h
@@ -18,7 +18,6 @@
 struct mv643xx_eth_shared_platform_data {
 	struct mbus_dram_target_info	*dram;
 	struct platform_device	*shared_smi;
-	unsigned int		t_clk;
 	/*
 	 * Max packet size for Tx IP/Layer 4 checksum, when set to 0, default
 	 * limit of 9KiB will be used.
-- 
cgit v1.3-7-g2ca7


From 35bdd29095ad614c5fb4a934bfd4f57a94dfd395 Mon Sep 17 00:00:00 2001
From: Alessandro Rubini <rubini@gnudd.com>
Date: Thu, 12 Apr 2012 10:48:44 +0200
Subject: mfd: Add driver for STA2X11 MFD block

This also introduces <asm/sta2x11.h> to export a function that is in
the base sta2x11 support patches. The header will increase with other
prototypes and constants over time.

Signed-off-by: Alessandro Rubini <rubini@gnudd.com>
Acked-by: Giancarlo Asnaghi <giancarlo.asnaghi@st.com>
Cc: Alan Cox <alan@linux.intel.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 arch/x86/include/asm/sta2x11.h  |  12 ++
 drivers/mfd/Kconfig             |   5 +
 drivers/mfd/Makefile            |   1 +
 drivers/mfd/sta2x11-mfd.c       | 467 ++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/sta2x11-mfd.h | 324 ++++++++++++++++++++++++++++
 5 files changed, 809 insertions(+)
 create mode 100644 arch/x86/include/asm/sta2x11.h
 create mode 100644 drivers/mfd/sta2x11-mfd.c
 create mode 100644 include/linux/mfd/sta2x11-mfd.h

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/sta2x11.h b/arch/x86/include/asm/sta2x11.h
new file mode 100644
index 000000000000..e9d32df89ccc
--- /dev/null
+++ b/arch/x86/include/asm/sta2x11.h
@@ -0,0 +1,12 @@
+/*
+ * Header file for STMicroelectronics ConneXt (STA2X11) IOHub
+ */
+#ifndef __ASM_STA2X11_H
+#define __ASM_STA2X11_H
+
+#include <linux/pci.h>
+
+/* This needs to be called from the MFD to configure its sub-devices */
+struct sta2x11_instance *sta2x11_get_instance(struct pci_dev *pdev);
+
+#endif /* __ASM_STA2X11_H */
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index ef86a741b7e2..48eed22c65a5 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -906,6 +906,11 @@ config MFD_RC5T583
 	  Additional drivers must be enabled in order to use the
 	  different functionality of the device.
 
+config MFD_STA2X11
+	bool "STA2X11 multi function device support"
+	depends on STA2X11
+	select MFD_CORE
+
 config MFD_ANATOP
 	bool "Support for Freescale i.MX on-chip ANATOP controller"
 	depends on SOC_IMX6Q
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 5dd6be7aa350..0dc55cbefa09 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -15,6 +15,7 @@ obj-$(CONFIG_MFD_DAVINCI_VOICECODEC)	+= davinci_voicecodec.o
 obj-$(CONFIG_MFD_DM355EVM_MSP)	+= dm355evm_msp.o
 obj-$(CONFIG_MFD_TI_SSP)	+= ti-ssp.o
 
+obj-$(CONFIG_MFD_STA2X11)	+= sta2x11-mfd.o
 obj-$(CONFIG_MFD_STMPE)		+= stmpe.o
 obj-$(CONFIG_STMPE_I2C)		+= stmpe-i2c.o
 obj-$(CONFIG_STMPE_SPI)		+= stmpe-spi.o
diff --git a/drivers/mfd/sta2x11-mfd.c b/drivers/mfd/sta2x11-mfd.c
new file mode 100644
index 000000000000..d31fed07aefb
--- /dev/null
+++ b/drivers/mfd/sta2x11-mfd.c
@@ -0,0 +1,467 @@
+/*
+ * Copyright (c) 2009-2011 Wind River Systems, Inc.
+ * Copyright (c) 2011 ST Microelectronics (Alessandro Rubini)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/errno.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/pci.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/platform_device.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/sta2x11-mfd.h>
+
+#include <asm/sta2x11.h>
+
+/* This describes STA2X11 MFD chip for us, we may have several */
+struct sta2x11_mfd {
+	struct sta2x11_instance *instance;
+	spinlock_t lock;
+	struct list_head list;
+	void __iomem *sctl_regs;
+	void __iomem *apbreg_regs;
+};
+
+static LIST_HEAD(sta2x11_mfd_list);
+
+/* Three functions to act on the list */
+static struct sta2x11_mfd *sta2x11_mfd_find(struct pci_dev *pdev)
+{
+	struct sta2x11_instance *instance;
+	struct sta2x11_mfd *mfd;
+
+	if (!pdev && !list_empty(&sta2x11_mfd_list)) {
+		pr_warning("%s: Unspecified device, "
+			    "using first instance\n", __func__);
+		return list_entry(sta2x11_mfd_list.next,
+				  struct sta2x11_mfd, list);
+	}
+
+	instance = sta2x11_get_instance(pdev);
+	if (!instance)
+		return NULL;
+	list_for_each_entry(mfd, &sta2x11_mfd_list, list) {
+		if (mfd->instance == instance)
+			return mfd;
+	}
+	return NULL;
+}
+
+static int __devinit sta2x11_mfd_add(struct pci_dev *pdev, gfp_t flags)
+{
+	struct sta2x11_mfd *mfd = sta2x11_mfd_find(pdev);
+	struct sta2x11_instance *instance;
+
+	if (mfd)
+		return -EBUSY;
+	instance = sta2x11_get_instance(pdev);
+	if (!instance)
+		return -EINVAL;
+	mfd = kzalloc(sizeof(*mfd), flags);
+	if (!mfd)
+		return -ENOMEM;
+	INIT_LIST_HEAD(&mfd->list);
+	spin_lock_init(&mfd->lock);
+	mfd->instance = instance;
+	list_add(&mfd->list, &sta2x11_mfd_list);
+	return 0;
+}
+
+static int __devexit mfd_remove(struct pci_dev *pdev)
+{
+	struct sta2x11_mfd *mfd = sta2x11_mfd_find(pdev);
+
+	if (!mfd)
+		return -ENODEV;
+	list_del(&mfd->list);
+	kfree(mfd);
+	return 0;
+}
+
+/* These two functions are exported and are not expected to fail */
+u32 sta2x11_sctl_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val)
+{
+	struct sta2x11_mfd *mfd = sta2x11_mfd_find(pdev);
+	u32 r;
+	unsigned long flags;
+
+	if (!mfd) {
+		dev_warn(&pdev->dev, ": can't access sctl regs\n");
+		return 0;
+	}
+	if (!mfd->sctl_regs) {
+		dev_warn(&pdev->dev, ": system ctl not initialized\n");
+		return 0;
+	}
+	spin_lock_irqsave(&mfd->lock, flags);
+	r = readl(mfd->sctl_regs + reg);
+	r &= ~mask;
+	r |= val;
+	if (mask)
+		writel(r, mfd->sctl_regs + reg);
+	spin_unlock_irqrestore(&mfd->lock, flags);
+	return r;
+}
+EXPORT_SYMBOL(sta2x11_sctl_mask);
+
+u32 sta2x11_apbreg_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val)
+{
+	struct sta2x11_mfd *mfd = sta2x11_mfd_find(pdev);
+	u32 r;
+	unsigned long flags;
+
+	if (!mfd) {
+		dev_warn(&pdev->dev, ": can't access apb regs\n");
+		return 0;
+	}
+	if (!mfd->apbreg_regs) {
+		dev_warn(&pdev->dev, ": apb bridge not initialized\n");
+		return 0;
+	}
+	spin_lock_irqsave(&mfd->lock, flags);
+	r = readl(mfd->apbreg_regs + reg);
+	r &= ~mask;
+	r |= val;
+	if (mask)
+		writel(r, mfd->apbreg_regs + reg);
+	spin_unlock_irqrestore(&mfd->lock, flags);
+	return r;
+}
+EXPORT_SYMBOL(sta2x11_apbreg_mask);
+
+/* Two debugfs files, for our registers (FIXME: one instance only) */
+#define REG(regname) {.name = #regname, .offset = SCTL_ ## regname}
+static struct debugfs_reg32 sta2x11_sctl_regs[] = {
+	REG(SCCTL), REG(ARMCFG), REG(SCPLLCTL), REG(SCPLLFCTRL),
+	REG(SCRESFRACT), REG(SCRESCTRL1), REG(SCRESXTRL2), REG(SCPEREN0),
+	REG(SCPEREN1), REG(SCPEREN2), REG(SCGRST), REG(SCPCIPMCR1),
+	REG(SCPCIPMCR2), REG(SCPCIPMSR1), REG(SCPCIPMSR2), REG(SCPCIPMSR3),
+	REG(SCINTREN), REG(SCRISR), REG(SCCLKSTAT0), REG(SCCLKSTAT1),
+	REG(SCCLKSTAT2), REG(SCRSTSTA),
+};
+#undef REG
+
+static struct debugfs_regset32 sctl_regset = {
+	.regs = sta2x11_sctl_regs,
+	.nregs = ARRAY_SIZE(sta2x11_sctl_regs),
+};
+
+#define REG(regname) {.name = #regname, .offset = regname}
+static struct debugfs_reg32 sta2x11_apbreg_regs[] = {
+	REG(APBREG_BSR), REG(APBREG_PAER), REG(APBREG_PWAC), REG(APBREG_PRAC),
+	REG(APBREG_PCG), REG(APBREG_PUR), REG(APBREG_EMU_PCG),
+};
+#undef REG
+
+static struct debugfs_regset32 apbreg_regset = {
+	.regs = sta2x11_apbreg_regs,
+	.nregs = ARRAY_SIZE(sta2x11_apbreg_regs),
+};
+
+static struct dentry *sta2x11_sctl_debugfs;
+static struct dentry *sta2x11_apbreg_debugfs;
+
+/* Probe for the two platform devices */
+static int sta2x11_sctl_probe(struct platform_device *dev)
+{
+	struct pci_dev **pdev;
+	struct sta2x11_mfd *mfd;
+	struct resource *res;
+
+	pdev = dev->dev.platform_data;
+	mfd = sta2x11_mfd_find(*pdev);
+	if (!mfd)
+		return -ENODEV;
+
+	res = platform_get_resource(dev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENOMEM;
+
+	if (!request_mem_region(res->start, resource_size(res),
+				"sta2x11-sctl"))
+		return -EBUSY;
+
+	mfd->sctl_regs = ioremap(res->start, resource_size(res));
+	if (!mfd->sctl_regs) {
+		release_mem_region(res->start, resource_size(res));
+		return -ENOMEM;
+	}
+	sctl_regset.base = mfd->sctl_regs;
+	sta2x11_sctl_debugfs = debugfs_create_regset32("sta2x11-sctl",
+						  S_IFREG | S_IRUGO,
+						  NULL, &sctl_regset);
+	return 0;
+}
+
+static int sta2x11_apbreg_probe(struct platform_device *dev)
+{
+	struct pci_dev **pdev;
+	struct sta2x11_mfd *mfd;
+	struct resource *res;
+
+	pdev = dev->dev.platform_data;
+	dev_dbg(&dev->dev, "%s: pdata is %p\n", __func__, pdev);
+	dev_dbg(&dev->dev, "%s: *pdata is %p\n", __func__, *pdev);
+
+	mfd = sta2x11_mfd_find(*pdev);
+	if (!mfd)
+		return -ENODEV;
+
+	res = platform_get_resource(dev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENOMEM;
+
+	if (!request_mem_region(res->start, resource_size(res),
+				"sta2x11-apbreg"))
+		return -EBUSY;
+
+	mfd->apbreg_regs = ioremap(res->start, resource_size(res));
+	if (!mfd->apbreg_regs) {
+		release_mem_region(res->start, resource_size(res));
+		return -ENOMEM;
+	}
+	dev_dbg(&dev->dev, "%s: regbase %p\n", __func__, mfd->apbreg_regs);
+
+	apbreg_regset.base = mfd->apbreg_regs;
+	sta2x11_apbreg_debugfs = debugfs_create_regset32("sta2x11-apbreg",
+						  S_IFREG | S_IRUGO,
+						  NULL, &apbreg_regset);
+	return 0;
+}
+
+/* The two platform drivers */
+static struct platform_driver sta2x11_sctl_platform_driver = {
+	.driver = {
+		.name	= "sta2x11-sctl",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= sta2x11_sctl_probe,
+};
+
+static int __init sta2x11_sctl_init(void)
+{
+	pr_info("%s\n", __func__);
+	return platform_driver_register(&sta2x11_sctl_platform_driver);
+}
+
+static struct platform_driver sta2x11_platform_driver = {
+	.driver = {
+		.name	= "sta2x11-apbreg",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= sta2x11_apbreg_probe,
+};
+
+static int __init sta2x11_apbreg_init(void)
+{
+	pr_info("%s\n", __func__);
+	return platform_driver_register(&sta2x11_platform_driver);
+}
+
+/*
+ * What follows is the PCI device that hosts the above two pdevs.
+ * Each logic block is 4kB and they are all consecutive: we use this info.
+ */
+
+/* Bar 0 */
+enum bar0_cells {
+	STA2X11_GPIO_0 = 0,
+	STA2X11_GPIO_1,
+	STA2X11_GPIO_2,
+	STA2X11_GPIO_3,
+	STA2X11_SCTL,
+	STA2X11_SCR,
+	STA2X11_TIME,
+};
+/* Bar 1 */
+enum bar1_cells {
+	STA2X11_APBREG = 0,
+};
+#define CELL_4K(_name, _cell) { \
+		.name = _name, \
+		.start = _cell * 4096, .end = _cell * 4096 + 4095, \
+		.flags = IORESOURCE_MEM, \
+		}
+
+static const __devinitconst struct resource gpio_resources[] = {
+	{
+		.name = "sta2x11_gpio", /* 4 consecutive cells, 1 driver */
+		.start = 0,
+		.end = (4 * 4096) - 1,
+		.flags = IORESOURCE_MEM,
+	}
+};
+static const __devinitconst struct resource sctl_resources[] = {
+	CELL_4K("sta2x11-sctl", STA2X11_SCTL),
+};
+static const __devinitconst struct resource scr_resources[] = {
+	CELL_4K("sta2x11-scr", STA2X11_SCR),
+};
+static const __devinitconst struct resource time_resources[] = {
+	CELL_4K("sta2x11-time", STA2X11_TIME),
+};
+
+static const __devinitconst struct resource apbreg_resources[] = {
+	CELL_4K("sta2x11-apbreg", STA2X11_APBREG),
+};
+
+#define DEV(_name, _r) \
+	{ .name = _name, .num_resources = ARRAY_SIZE(_r), .resources = _r, }
+
+static __devinitdata struct mfd_cell sta2x11_mfd_bar0[] = {
+	DEV("sta2x11-gpio", gpio_resources), /* offset 0: we add pdata later */
+	DEV("sta2x11-sctl", sctl_resources),
+	DEV("sta2x11-scr", scr_resources),
+	DEV("sta2x11-time", time_resources),
+};
+
+static __devinitdata struct mfd_cell sta2x11_mfd_bar1[] = {
+	DEV("sta2x11-apbreg", apbreg_resources),
+};
+
+static int sta2x11_mfd_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+	pci_save_state(pdev);
+	pci_disable_device(pdev);
+	pci_set_power_state(pdev, pci_choose_state(pdev, state));
+
+	return 0;
+}
+
+static int sta2x11_mfd_resume(struct pci_dev *pdev)
+{
+	int err;
+
+	pci_set_power_state(pdev, 0);
+	err = pci_enable_device(pdev);
+	if (err)
+		return err;
+	pci_restore_state(pdev);
+
+	return 0;
+}
+
+static int __devinit sta2x11_mfd_probe(struct pci_dev *pdev,
+				       const struct pci_device_id *pci_id)
+{
+	int err, i;
+	struct sta2x11_gpio_pdata *gpio_data;
+
+	dev_info(&pdev->dev, "%s\n", __func__);
+
+	err = pci_enable_device(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "Can't enable device.\n");
+		return err;
+	}
+
+	err = pci_enable_msi(pdev);
+	if (err)
+		dev_info(&pdev->dev, "Enable msi failed\n");
+
+	/* Read gpio config data as pci device's platform data */
+	gpio_data = dev_get_platdata(&pdev->dev);
+	if (!gpio_data)
+		dev_warn(&pdev->dev, "no gpio configuration\n");
+
+	dev_dbg(&pdev->dev, "%s, gpio_data = %p (%p)\n", __func__,
+		gpio_data, &gpio_data);
+	dev_dbg(&pdev->dev, "%s, pdev = %p (%p)\n", __func__,
+		pdev, &pdev);
+
+	/* platform data is the pci device for all of them */
+	for (i = 0; i < ARRAY_SIZE(sta2x11_mfd_bar0); i++) {
+		sta2x11_mfd_bar0[i].pdata_size = sizeof(pdev);
+		sta2x11_mfd_bar0[i].platform_data = &pdev;
+	}
+	sta2x11_mfd_bar1[0].pdata_size = sizeof(pdev);
+	sta2x11_mfd_bar1[0].platform_data = &pdev;
+
+	/* Record this pdev before mfd_add_devices: their probe looks for it */
+	sta2x11_mfd_add(pdev, GFP_ATOMIC);
+
+
+	err = mfd_add_devices(&pdev->dev, -1,
+			      sta2x11_mfd_bar0,
+			      ARRAY_SIZE(sta2x11_mfd_bar0),
+			      &pdev->resource[0],
+			      0);
+	if (err) {
+		dev_err(&pdev->dev, "mfd_add_devices[0] failed: %d\n", err);
+		goto err_disable;
+	}
+
+	err = mfd_add_devices(&pdev->dev, -1,
+			      sta2x11_mfd_bar1,
+			      ARRAY_SIZE(sta2x11_mfd_bar1),
+			      &pdev->resource[1],
+			      0);
+	if (err) {
+		dev_err(&pdev->dev, "mfd_add_devices[1] failed: %d\n", err);
+		goto err_disable;
+	}
+
+	return 0;
+
+err_disable:
+	mfd_remove_devices(&pdev->dev);
+	pci_disable_device(pdev);
+	pci_disable_msi(pdev);
+	return err;
+}
+
+static DEFINE_PCI_DEVICE_TABLE(sta2x11_mfd_tbl) = {
+	{PCI_DEVICE(PCI_VENDOR_ID_STMICRO, PCI_DEVICE_ID_STMICRO_GPIO)},
+	{0,},
+};
+
+static struct pci_driver sta2x11_mfd_driver = {
+	.name =		"sta2x11-mfd",
+	.id_table =	sta2x11_mfd_tbl,
+	.probe =	sta2x11_mfd_probe,
+	.suspend =	sta2x11_mfd_suspend,
+	.resume =	sta2x11_mfd_resume,
+};
+
+static int __init sta2x11_mfd_init(void)
+{
+	pr_info("%s\n", __func__);
+	return pci_register_driver(&sta2x11_mfd_driver);
+}
+
+/*
+ * All of this must be ready before "normal" devices like MMCI appear.
+ * But MFD (the pci device) can't be too early. The following choice
+ * prepares platform drivers very early and probe the PCI device later,
+ * but before other PCI devices.
+ */
+subsys_initcall(sta2x11_apbreg_init);
+subsys_initcall(sta2x11_sctl_init);
+rootfs_initcall(sta2x11_mfd_init);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Wind River");
+MODULE_DESCRIPTION("STA2x11 mfd for GPIO, SCTL and APBREG");
+MODULE_DEVICE_TABLE(pci, sta2x11_mfd_tbl);
diff --git a/include/linux/mfd/sta2x11-mfd.h b/include/linux/mfd/sta2x11-mfd.h
new file mode 100644
index 000000000000..d179227e866f
--- /dev/null
+++ b/include/linux/mfd/sta2x11-mfd.h
@@ -0,0 +1,324 @@
+/*
+ * Copyright (c) 2009-2011 Wind River Systems, Inc.
+ * Copyright (c) 2011 ST Microelectronics (Alessandro Rubini)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * The STMicroelectronics ConneXt (STA2X11) chip has several unrelated
+ * functions in one PCI endpoint functions. This driver simply
+ * registers the platform devices in this iomemregion and exports a few
+ * functions to access common registers
+ */
+
+#ifndef __STA2X11_MFD_H
+#define __STA2X11_MFD_H
+#include <linux/types.h>
+#include <linux/pci.h>
+
+/*
+ * The MFD PCI block includes the GPIO peripherals and other register blocks.
+ * For GPIO, we have 32*4 bits (I use "gsta" for "gpio sta2x11".)
+ */
+#define GSTA_GPIO_PER_BLOCK	32
+#define GSTA_NR_BLOCKS		4
+#define GSTA_NR_GPIO		(GSTA_GPIO_PER_BLOCK * GSTA_NR_BLOCKS)
+
+/* Pinconfig is set by the board definition: altfunc, pull-up, pull-down */
+struct sta2x11_gpio_pdata {
+	unsigned pinconfig[GSTA_NR_GPIO];
+};
+
+/* Macros below lifted from sh_pfc.h, with minor differences */
+#define PINMUX_TYPE_NONE		0
+#define PINMUX_TYPE_FUNCTION		1
+#define PINMUX_TYPE_OUTPUT_LOW		2
+#define PINMUX_TYPE_OUTPUT_HIGH		3
+#define PINMUX_TYPE_INPUT		4
+#define PINMUX_TYPE_INPUT_PULLUP	5
+#define PINMUX_TYPE_INPUT_PULLDOWN	6
+
+/* Give names to GPIO pins, like PXA does, taken from the manual */
+#define STA2X11_GPIO0			0
+#define STA2X11_GPIO1			1
+#define STA2X11_GPIO2			2
+#define STA2X11_GPIO3			3
+#define STA2X11_GPIO4			4
+#define STA2X11_GPIO5			5
+#define STA2X11_GPIO6			6
+#define STA2X11_GPIO7			7
+#define STA2X11_GPIO8_RGBOUT_RED7	8
+#define STA2X11_GPIO9_RGBOUT_RED6	9
+#define STA2X11_GPIO10_RGBOUT_RED5	10
+#define STA2X11_GPIO11_RGBOUT_RED4	11
+#define STA2X11_GPIO12_RGBOUT_RED3	12
+#define STA2X11_GPIO13_RGBOUT_RED2	13
+#define STA2X11_GPIO14_RGBOUT_RED1	14
+#define STA2X11_GPIO15_RGBOUT_RED0	15
+#define STA2X11_GPIO16_RGBOUT_GREEN7	16
+#define STA2X11_GPIO17_RGBOUT_GREEN6	17
+#define STA2X11_GPIO18_RGBOUT_GREEN5	18
+#define STA2X11_GPIO19_RGBOUT_GREEN4	19
+#define STA2X11_GPIO20_RGBOUT_GREEN3	20
+#define STA2X11_GPIO21_RGBOUT_GREEN2	21
+#define STA2X11_GPIO22_RGBOUT_GREEN1	22
+#define STA2X11_GPIO23_RGBOUT_GREEN0	23
+#define STA2X11_GPIO24_RGBOUT_BLUE7	24
+#define STA2X11_GPIO25_RGBOUT_BLUE6	25
+#define STA2X11_GPIO26_RGBOUT_BLUE5	26
+#define STA2X11_GPIO27_RGBOUT_BLUE4	27
+#define STA2X11_GPIO28_RGBOUT_BLUE3	28
+#define STA2X11_GPIO29_RGBOUT_BLUE2	29
+#define STA2X11_GPIO30_RGBOUT_BLUE1	30
+#define STA2X11_GPIO31_RGBOUT_BLUE0	31
+#define STA2X11_GPIO32_RGBOUT_VSYNCH	32
+#define STA2X11_GPIO33_RGBOUT_HSYNCH	33
+#define STA2X11_GPIO34_RGBOUT_DEN	34
+#define STA2X11_GPIO35_ETH_CRS_DV	35
+#define STA2X11_GPIO36_ETH_TXD1		36
+#define STA2X11_GPIO37_ETH_TXD0		37
+#define STA2X11_GPIO38_ETH_TX_EN	38
+#define STA2X11_GPIO39_MDIO		39
+#define STA2X11_GPIO40_ETH_REF_CLK	40
+#define STA2X11_GPIO41_ETH_RXD1		41
+#define STA2X11_GPIO42_ETH_RXD0		42
+#define STA2X11_GPIO43_MDC		43
+#define STA2X11_GPIO44_CAN_TX		44
+#define STA2X11_GPIO45_CAN_RX		45
+#define STA2X11_GPIO46_MLB_DAT		46
+#define STA2X11_GPIO47_MLB_SIG		47
+#define STA2X11_GPIO48_SPI0_CLK		48
+#define STA2X11_GPIO49_SPI0_TXD		49
+#define STA2X11_GPIO50_SPI0_RXD		50
+#define STA2X11_GPIO51_SPI0_FRM		51
+#define STA2X11_GPIO52_SPI1_CLK		52
+#define STA2X11_GPIO53_SPI1_TXD		53
+#define STA2X11_GPIO54_SPI1_RXD		54
+#define STA2X11_GPIO55_SPI1_FRM		55
+#define STA2X11_GPIO56_SPI2_CLK		56
+#define STA2X11_GPIO57_SPI2_TXD		57
+#define STA2X11_GPIO58_SPI2_RXD		58
+#define STA2X11_GPIO59_SPI2_FRM		59
+#define STA2X11_GPIO60_I2C0_SCL		60
+#define STA2X11_GPIO61_I2C0_SDA		61
+#define STA2X11_GPIO62_I2C1_SCL		62
+#define STA2X11_GPIO63_I2C1_SDA		63
+#define STA2X11_GPIO64_I2C2_SCL		64
+#define STA2X11_GPIO65_I2C2_SDA		65
+#define STA2X11_GPIO66_I2C3_SCL		66
+#define STA2X11_GPIO67_I2C3_SDA		67
+#define STA2X11_GPIO68_MSP0_RCK		68
+#define STA2X11_GPIO69_MSP0_RXD		69
+#define STA2X11_GPIO70_MSP0_RFS		70
+#define STA2X11_GPIO71_MSP0_TCK		71
+#define STA2X11_GPIO72_MSP0_TXD		72
+#define STA2X11_GPIO73_MSP0_TFS		73
+#define STA2X11_GPIO74_MSP0_SCK		74
+#define STA2X11_GPIO75_MSP1_CK		75
+#define STA2X11_GPIO76_MSP1_RXD		76
+#define STA2X11_GPIO77_MSP1_FS		77
+#define STA2X11_GPIO78_MSP1_TXD		78
+#define STA2X11_GPIO79_MSP2_CK		79
+#define STA2X11_GPIO80_MSP2_RXD		80
+#define STA2X11_GPIO81_MSP2_FS		81
+#define STA2X11_GPIO82_MSP2_TXD		82
+#define STA2X11_GPIO83_MSP3_CK		83
+#define STA2X11_GPIO84_MSP3_RXD		84
+#define STA2X11_GPIO85_MSP3_FS		85
+#define STA2X11_GPIO86_MSP3_TXD		86
+#define STA2X11_GPIO87_MSP4_CK		87
+#define STA2X11_GPIO88_MSP4_RXD		88
+#define STA2X11_GPIO89_MSP4_FS		89
+#define STA2X11_GPIO90_MSP4_TXD		90
+#define STA2X11_GPIO91_MSP5_CK		91
+#define STA2X11_GPIO92_MSP5_RXD		92
+#define STA2X11_GPIO93_MSP5_FS		93
+#define STA2X11_GPIO94_MSP5_TXD		94
+#define STA2X11_GPIO95_SDIO3_DAT3	95
+#define STA2X11_GPIO96_SDIO3_DAT2	96
+#define STA2X11_GPIO97_SDIO3_DAT1	97
+#define STA2X11_GPIO98_SDIO3_DAT0	98
+#define STA2X11_GPIO99_SDIO3_CLK	99
+#define STA2X11_GPIO100_SDIO3_CMD	100
+#define STA2X11_GPIO101			101
+#define STA2X11_GPIO102			102
+#define STA2X11_GPIO103			103
+#define STA2X11_GPIO104			104
+#define STA2X11_GPIO105_SDIO2_DAT3	105
+#define STA2X11_GPIO106_SDIO2_DAT2	106
+#define STA2X11_GPIO107_SDIO2_DAT1	107
+#define STA2X11_GPIO108_SDIO2_DAT0	108
+#define STA2X11_GPIO109_SDIO2_CLK	109
+#define STA2X11_GPIO110_SDIO2_CMD	110
+#define STA2X11_GPIO111			111
+#define STA2X11_GPIO112			112
+#define STA2X11_GPIO113			113
+#define STA2X11_GPIO114			114
+#define STA2X11_GPIO115_SDIO1_DAT3	115
+#define STA2X11_GPIO116_SDIO1_DAT2	116
+#define STA2X11_GPIO117_SDIO1_DAT1	117
+#define STA2X11_GPIO118_SDIO1_DAT0	118
+#define STA2X11_GPIO119_SDIO1_CLK	119
+#define STA2X11_GPIO120_SDIO1_CMD	120
+#define STA2X11_GPIO121			121
+#define STA2X11_GPIO122			122
+#define STA2X11_GPIO123			123
+#define STA2X11_GPIO124			124
+#define STA2X11_GPIO125_UART2_TXD	125
+#define STA2X11_GPIO126_UART2_RXD	126
+#define STA2X11_GPIO127_UART3_TXD	127
+
+/*
+ * The APB bridge has its own registers, needed by our users as well.
+ * They are accessed with the following read/mask/write function.
+ */
+u32 sta2x11_apbreg_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val);
+
+/* CAN and MLB */
+#define APBREG_BSR	0x00	/* Bridge Status Reg */
+#define APBREG_PAER	0x08	/* Peripherals Address Error Reg */
+#define APBREG_PWAC	0x20	/* Peripheral Write Access Control reg */
+#define APBREG_PRAC	0x40	/* Peripheral Read Access Control reg */
+#define APBREG_PCG	0x60	/* Peripheral Clock Gating Reg */
+#define APBREG_PUR	0x80	/* Peripheral Under Reset Reg */
+#define APBREG_EMU_PCG	0xA0	/* Emulator Peripheral Clock Gating Reg */
+
+#define APBREG_CAN	(1 << 1)
+#define APBREG_MLB	(1 << 3)
+
+/* SARAC */
+#define APBREG_BSR_SARAC     0x100 /* Bridge Status Reg */
+#define APBREG_PAER_SARAC    0x108 /* Peripherals Address Error Reg */
+#define APBREG_PWAC_SARAC    0x120 /* Peripheral Write Access Control reg */
+#define APBREG_PRAC_SARAC    0x140 /* Peripheral Read Access Control reg */
+#define APBREG_PCG_SARAC     0x160 /* Peripheral Clock Gating Reg */
+#define APBREG_PUR_SARAC     0x180 /* Peripheral Under Reset Reg */
+#define APBREG_EMU_PCG_SARAC 0x1A0 /* Emulator Peripheral Clock Gating Reg */
+
+#define APBREG_SARAC	(1 << 2)
+
+/*
+ * The system controller has its own registers. Some of these are accessed
+ * by out users as well, using the following read/mask/write/function
+ */
+u32 sta2x11_sctl_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val);
+
+#define SCTL_SCCTL		0x00	/* System controller control register */
+#define SCTL_ARMCFG		0x04	/* ARM configuration register */
+#define SCTL_SCPLLCTL		0x08	/* PLL control status register */
+#define SCTL_SCPLLFCTRL		0x0c	/* PLL frequency control register */
+#define SCTL_SCRESFRACT		0x10	/* PLL fractional input register */
+#define SCTL_SCRESCTRL1		0x14	/* Peripheral reset control 1 */
+#define SCTL_SCRESXTRL2		0x18	/* Peripheral reset control 2 */
+#define SCTL_SCPEREN0		0x1c	/* Peripheral clock enable register 0 */
+#define SCTL_SCPEREN1		0x20	/* Peripheral clock enable register 1 */
+#define SCTL_SCPEREN2		0x24	/* Peripheral clock enable register 2 */
+#define SCTL_SCGRST		0x28	/* Peripheral global reset */
+#define SCTL_SCPCIPMCR1		0x30	/* PCI power management control 1 */
+#define SCTL_SCPCIPMCR2		0x34	/* PCI power management control 2 */
+#define SCTL_SCPCIPMSR1		0x38	/* PCI power management status 1 */
+#define SCTL_SCPCIPMSR2		0x3c	/* PCI power management status 2 */
+#define SCTL_SCPCIPMSR3		0x40	/* PCI power management status 3 */
+#define SCTL_SCINTREN		0x44	/* Interrupt enable */
+#define SCTL_SCRISR		0x48	/* RAW interrupt status */
+#define SCTL_SCCLKSTAT0		0x4c	/* Peripheral clocks status 0 */
+#define SCTL_SCCLKSTAT1		0x50	/* Peripheral clocks status 1 */
+#define SCTL_SCCLKSTAT2		0x54	/* Peripheral clocks status 2 */
+#define SCTL_SCRSTSTA		0x58	/* Reset status register */
+
+#define SCTL_SCRESCTRL1_USB_PHY_POR	(1 << 0)
+#define SCTL_SCRESCTRL1_USB_OTG	(1 << 1)
+#define SCTL_SCRESCTRL1_USB_HRST	(1 << 2)
+#define SCTL_SCRESCTRL1_USB_PHY_HOST	(1 << 3)
+#define SCTL_SCRESCTRL1_SATAII	(1 << 4)
+#define SCTL_SCRESCTRL1_VIP		(1 << 5)
+#define SCTL_SCRESCTRL1_PER_MMC0	(1 << 6)
+#define SCTL_SCRESCTRL1_PER_MMC1	(1 << 7)
+#define SCTL_SCRESCTRL1_PER_GPIO0	(1 << 8)
+#define SCTL_SCRESCTRL1_PER_GPIO1	(1 << 9)
+#define SCTL_SCRESCTRL1_PER_GPIO2	(1 << 10)
+#define SCTL_SCRESCTRL1_PER_GPIO3	(1 << 11)
+#define SCTL_SCRESCTRL1_PER_MTU0	(1 << 12)
+#define SCTL_SCRESCTRL1_KER_SPI0	(1 << 13)
+#define SCTL_SCRESCTRL1_KER_SPI1	(1 << 14)
+#define SCTL_SCRESCTRL1_KER_SPI2	(1 << 15)
+#define SCTL_SCRESCTRL1_KER_MCI0	(1 << 16)
+#define SCTL_SCRESCTRL1_KER_MCI1	(1 << 17)
+#define SCTL_SCRESCTRL1_PRE_HSI2C0	(1 << 18)
+#define SCTL_SCRESCTRL1_PER_HSI2C1	(1 << 19)
+#define SCTL_SCRESCTRL1_PER_HSI2C2	(1 << 20)
+#define SCTL_SCRESCTRL1_PER_HSI2C3	(1 << 21)
+#define SCTL_SCRESCTRL1_PER_MSP0	(1 << 22)
+#define SCTL_SCRESCTRL1_PER_MSP1	(1 << 23)
+#define SCTL_SCRESCTRL1_PER_MSP2	(1 << 24)
+#define SCTL_SCRESCTRL1_PER_MSP3	(1 << 25)
+#define SCTL_SCRESCTRL1_PER_MSP4	(1 << 26)
+#define SCTL_SCRESCTRL1_PER_MSP5	(1 << 27)
+#define SCTL_SCRESCTRL1_PER_MMC	(1 << 28)
+#define SCTL_SCRESCTRL1_KER_MSP0	(1 << 29)
+#define SCTL_SCRESCTRL1_KER_MSP1	(1 << 30)
+#define SCTL_SCRESCTRL1_KER_MSP2	(1 << 31)
+
+#define SCTL_SCPEREN0_UART0		(1 << 0)
+#define SCTL_SCPEREN0_UART1		(1 << 1)
+#define SCTL_SCPEREN0_UART2		(1 << 2)
+#define SCTL_SCPEREN0_UART3		(1 << 3)
+#define SCTL_SCPEREN0_MSP0		(1 << 4)
+#define SCTL_SCPEREN0_MSP1		(1 << 5)
+#define SCTL_SCPEREN0_MSP2		(1 << 6)
+#define SCTL_SCPEREN0_MSP3		(1 << 7)
+#define SCTL_SCPEREN0_MSP4		(1 << 8)
+#define SCTL_SCPEREN0_MSP5		(1 << 9)
+#define SCTL_SCPEREN0_SPI0		(1 << 10)
+#define SCTL_SCPEREN0_SPI1		(1 << 11)
+#define SCTL_SCPEREN0_SPI2		(1 << 12)
+#define SCTL_SCPEREN0_I2C0		(1 << 13)
+#define SCTL_SCPEREN0_I2C1		(1 << 14)
+#define SCTL_SCPEREN0_I2C2		(1 << 15)
+#define SCTL_SCPEREN0_I2C3		(1 << 16)
+#define SCTL_SCPEREN0_SVDO_LVDS		(1 << 17)
+#define SCTL_SCPEREN0_USB_HOST		(1 << 18)
+#define SCTL_SCPEREN0_USB_OTG		(1 << 19)
+#define SCTL_SCPEREN0_MCI0		(1 << 20)
+#define SCTL_SCPEREN0_MCI1		(1 << 21)
+#define SCTL_SCPEREN0_MCI2		(1 << 22)
+#define SCTL_SCPEREN0_MCI3		(1 << 23)
+#define SCTL_SCPEREN0_SATA		(1 << 24)
+#define SCTL_SCPEREN0_ETHERNET		(1 << 25)
+#define SCTL_SCPEREN0_VIC		(1 << 26)
+#define SCTL_SCPEREN0_DMA_AUDIO		(1 << 27)
+#define SCTL_SCPEREN0_DMA_SOC		(1 << 28)
+#define SCTL_SCPEREN0_RAM		(1 << 29)
+#define SCTL_SCPEREN0_VIP		(1 << 30)
+#define SCTL_SCPEREN0_ARM		(1 << 31)
+
+#define SCTL_SCPEREN1_UART0		(1 << 0)
+#define SCTL_SCPEREN1_UART1		(1 << 1)
+#define SCTL_SCPEREN1_UART2		(1 << 2)
+#define SCTL_SCPEREN1_UART3		(1 << 3)
+#define SCTL_SCPEREN1_MSP0		(1 << 4)
+#define SCTL_SCPEREN1_MSP1		(1 << 5)
+#define SCTL_SCPEREN1_MSP2		(1 << 6)
+#define SCTL_SCPEREN1_MSP3		(1 << 7)
+#define SCTL_SCPEREN1_MSP4		(1 << 8)
+#define SCTL_SCPEREN1_MSP5		(1 << 9)
+#define SCTL_SCPEREN1_SPI0		(1 << 10)
+#define SCTL_SCPEREN1_SPI1		(1 << 11)
+#define SCTL_SCPEREN1_SPI2		(1 << 12)
+#define SCTL_SCPEREN1_I2C0		(1 << 13)
+#define SCTL_SCPEREN1_I2C1		(1 << 14)
+#define SCTL_SCPEREN1_I2C2		(1 << 15)
+#define SCTL_SCPEREN1_I2C3		(1 << 16)
+#define SCTL_SCPEREN1_USB_PHY		(1 << 17)
+
+#endif /* __STA2X11_MFD_H */
-- 
cgit v1.3-7-g2ca7


From 1fc9b1eade80b323f02a9cf7a29e1641eddf1052 Mon Sep 17 00:00:00 2001
From: Seth Heasley <seth.heasley@intel.com>
Date: Mon, 23 Apr 2012 09:23:56 -0700
Subject: pci_ids: Add Intel Centerton Legacy Block DeviceID

This patch adds the Integrated Legacy Block DeviceID for the Centerton CPU.  It will be used in the GPIO and Multifunction Devices driver.

Signed-off-by: Seth Heasley <seth.heasley@intel.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/pci_ids.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 3329965ed63f..ab741b0d0074 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2506,6 +2506,7 @@
 #define PCI_DEVICE_ID_INTEL_MRST_SD2	0x084F
 #define PCI_DEVICE_ID_INTEL_I960	0x0960
 #define PCI_DEVICE_ID_INTEL_I960RM	0x0962
+#define PCI_DEVICE_ID_INTEL_CENTERTON_ILB	0x0c60
 #define PCI_DEVICE_ID_INTEL_8257X_SOL	0x1062
 #define PCI_DEVICE_ID_INTEL_82573E_SOL	0x1085
 #define PCI_DEVICE_ID_INTEL_82573L_SOL	0x108F
-- 
cgit v1.3-7-g2ca7


From 96cf5f02aee8bbeff38824b18b9ec583d687f846 Mon Sep 17 00:00:00 2001
From: Seungwon Jeon <tgih.jun@samsung.com>
Date: Wed, 25 Apr 2012 16:17:37 +0900
Subject: mmc: core: fix the decision of HS200/DDR card-type

Current implementation decides the card type exclusively. Even though
eMMC device can support both HS200 and DDR mode, card type will be
set only for HS200. If the host doesn't support HS200 but has DDR
capability, then DDR mode can't be selected.

Signed-off-by: Seungwon Jeon <tgih.jun@samsung.com>
Reviewed-by: Subhash Jadavani <subhashj@codeaurora.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/mmc.c   | 81 +++++++++++++++++++-----------------------------
 include/linux/mmc/card.h |  4 +++
 include/linux/mmc/mmc.h  | 60 -----------------------------------
 3 files changed, 36 insertions(+), 109 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 54df5adc0413..0477769a8841 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -235,6 +235,36 @@ static int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd)
 	return err;
 }
 
+static void mmc_select_card_type(struct mmc_card *card)
+{
+	struct mmc_host *host = card->host;
+	u8 card_type = card->ext_csd.raw_card_type & EXT_CSD_CARD_TYPE_MASK;
+	unsigned int caps = host->caps, caps2 = host->caps2;
+	unsigned int hs_max_dtr = 0;
+
+	if (card_type & EXT_CSD_CARD_TYPE_26)
+		hs_max_dtr = MMC_HIGH_26_MAX_DTR;
+
+	if (caps & MMC_CAP_MMC_HIGHSPEED &&
+			card_type & EXT_CSD_CARD_TYPE_52)
+		hs_max_dtr = MMC_HIGH_52_MAX_DTR;
+
+	if ((caps & MMC_CAP_1_8V_DDR &&
+			card_type & EXT_CSD_CARD_TYPE_DDR_1_8V) ||
+	    (caps & MMC_CAP_1_2V_DDR &&
+			card_type & EXT_CSD_CARD_TYPE_DDR_1_2V))
+		hs_max_dtr = MMC_HIGH_DDR_MAX_DTR;
+
+	if ((caps2 & MMC_CAP2_HS200_1_8V_SDR &&
+			card_type & EXT_CSD_CARD_TYPE_SDR_1_8V) ||
+	    (caps2 & MMC_CAP2_HS200_1_2V_SDR &&
+			card_type & EXT_CSD_CARD_TYPE_SDR_1_2V))
+		hs_max_dtr = MMC_HS200_MAX_DTR;
+
+	card->ext_csd.hs_max_dtr = hs_max_dtr;
+	card->ext_csd.card_type = card_type;
+}
+
 /*
  * Decode extended CSD.
  */
@@ -284,56 +314,9 @@ static int mmc_read_ext_csd(struct mmc_card *card, u8 *ext_csd)
 		if (card->ext_csd.sectors > (2u * 1024 * 1024 * 1024) / 512)
 			mmc_card_set_blockaddr(card);
 	}
+
 	card->ext_csd.raw_card_type = ext_csd[EXT_CSD_CARD_TYPE];
-	switch (ext_csd[EXT_CSD_CARD_TYPE] & EXT_CSD_CARD_TYPE_MASK) {
-	case EXT_CSD_CARD_TYPE_SDR_ALL:
-	case EXT_CSD_CARD_TYPE_SDR_ALL_DDR_1_8V:
-	case EXT_CSD_CARD_TYPE_SDR_ALL_DDR_1_2V:
-	case EXT_CSD_CARD_TYPE_SDR_ALL_DDR_52:
-		card->ext_csd.hs_max_dtr = 200000000;
-		card->ext_csd.card_type = EXT_CSD_CARD_TYPE_SDR_200;
-		break;
-	case EXT_CSD_CARD_TYPE_SDR_1_2V_ALL:
-	case EXT_CSD_CARD_TYPE_SDR_1_2V_DDR_1_8V:
-	case EXT_CSD_CARD_TYPE_SDR_1_2V_DDR_1_2V:
-	case EXT_CSD_CARD_TYPE_SDR_1_2V_DDR_52:
-		card->ext_csd.hs_max_dtr = 200000000;
-		card->ext_csd.card_type = EXT_CSD_CARD_TYPE_SDR_1_2V;
-		break;
-	case EXT_CSD_CARD_TYPE_SDR_1_8V_ALL:
-	case EXT_CSD_CARD_TYPE_SDR_1_8V_DDR_1_8V:
-	case EXT_CSD_CARD_TYPE_SDR_1_8V_DDR_1_2V:
-	case EXT_CSD_CARD_TYPE_SDR_1_8V_DDR_52:
-		card->ext_csd.hs_max_dtr = 200000000;
-		card->ext_csd.card_type = EXT_CSD_CARD_TYPE_SDR_1_8V;
-		break;
-	case EXT_CSD_CARD_TYPE_DDR_52 | EXT_CSD_CARD_TYPE_52 |
-	     EXT_CSD_CARD_TYPE_26:
-		card->ext_csd.hs_max_dtr = 52000000;
-		card->ext_csd.card_type = EXT_CSD_CARD_TYPE_DDR_52;
-		break;
-	case EXT_CSD_CARD_TYPE_DDR_1_2V | EXT_CSD_CARD_TYPE_52 |
-	     EXT_CSD_CARD_TYPE_26:
-		card->ext_csd.hs_max_dtr = 52000000;
-		card->ext_csd.card_type = EXT_CSD_CARD_TYPE_DDR_1_2V;
-		break;
-	case EXT_CSD_CARD_TYPE_DDR_1_8V | EXT_CSD_CARD_TYPE_52 |
-	     EXT_CSD_CARD_TYPE_26:
-		card->ext_csd.hs_max_dtr = 52000000;
-		card->ext_csd.card_type = EXT_CSD_CARD_TYPE_DDR_1_8V;
-		break;
-	case EXT_CSD_CARD_TYPE_52 | EXT_CSD_CARD_TYPE_26:
-		card->ext_csd.hs_max_dtr = 52000000;
-		break;
-	case EXT_CSD_CARD_TYPE_26:
-		card->ext_csd.hs_max_dtr = 26000000;
-		break;
-	default:
-		/* MMC v4 spec says this cannot happen */
-		pr_warning("%s: card is mmc v4 but doesn't "
-			"support any high-speed modes.\n",
-			mmc_hostname(card->host));
-	}
+	mmc_select_card_type(card);
 
 	card->ext_csd.raw_s_a_timeout = ext_csd[EXT_CSD_S_A_TIMEOUT];
 	card->ext_csd.raw_erase_timeout_mult =
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 629b823f8836..d76513b5b263 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -58,6 +58,10 @@ struct mmc_ext_csd {
 	unsigned int		generic_cmd6_time;	/* Units: 10ms */
 	unsigned int            power_off_longtime;     /* Units: ms */
 	unsigned int		hs_max_dtr;
+#define MMC_HIGH_26_MAX_DTR	26000000
+#define MMC_HIGH_52_MAX_DTR	52000000
+#define MMC_HIGH_DDR_MAX_DTR	52000000
+#define MMC_HS200_MAX_DTR	200000000
 	unsigned int		sectors;
 	unsigned int		card_type;
 	unsigned int		hc_erase_size;		/* In sectors */
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index b822a2cb6008..d425cab144d9 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -354,66 +354,6 @@ struct _mmc_csd {
 #define EXT_CSD_CARD_TYPE_SDR_1_2V	(1<<5)	/* Card can run at 200MHz */
 						/* SDR mode @1.2V I/O */
 
-#define EXT_CSD_CARD_TYPE_SDR_200	(EXT_CSD_CARD_TYPE_SDR_1_8V | \
-					 EXT_CSD_CARD_TYPE_SDR_1_2V)
-
-#define EXT_CSD_CARD_TYPE_SDR_ALL	(EXT_CSD_CARD_TYPE_SDR_200 | \
-					 EXT_CSD_CARD_TYPE_52 | \
-					 EXT_CSD_CARD_TYPE_26)
-
-#define	EXT_CSD_CARD_TYPE_SDR_1_2V_ALL	(EXT_CSD_CARD_TYPE_SDR_1_2V | \
-					 EXT_CSD_CARD_TYPE_52 | \
-					 EXT_CSD_CARD_TYPE_26)
-
-#define	EXT_CSD_CARD_TYPE_SDR_1_8V_ALL	(EXT_CSD_CARD_TYPE_SDR_1_8V | \
-					 EXT_CSD_CARD_TYPE_52 | \
-					 EXT_CSD_CARD_TYPE_26)
-
-#define EXT_CSD_CARD_TYPE_SDR_1_2V_DDR_1_8V	(EXT_CSD_CARD_TYPE_SDR_1_2V | \
-						 EXT_CSD_CARD_TYPE_DDR_1_8V | \
-						 EXT_CSD_CARD_TYPE_52 | \
-						 EXT_CSD_CARD_TYPE_26)
-
-#define EXT_CSD_CARD_TYPE_SDR_1_8V_DDR_1_8V	(EXT_CSD_CARD_TYPE_SDR_1_8V | \
-						 EXT_CSD_CARD_TYPE_DDR_1_8V | \
-						 EXT_CSD_CARD_TYPE_52 | \
-						 EXT_CSD_CARD_TYPE_26)
-
-#define EXT_CSD_CARD_TYPE_SDR_1_2V_DDR_1_2V	(EXT_CSD_CARD_TYPE_SDR_1_2V | \
-						 EXT_CSD_CARD_TYPE_DDR_1_2V | \
-						 EXT_CSD_CARD_TYPE_52 | \
-						 EXT_CSD_CARD_TYPE_26)
-
-#define EXT_CSD_CARD_TYPE_SDR_1_8V_DDR_1_2V	(EXT_CSD_CARD_TYPE_SDR_1_8V | \
-						 EXT_CSD_CARD_TYPE_DDR_1_2V | \
-						 EXT_CSD_CARD_TYPE_52 | \
-						 EXT_CSD_CARD_TYPE_26)
-
-#define EXT_CSD_CARD_TYPE_SDR_1_2V_DDR_52	(EXT_CSD_CARD_TYPE_SDR_1_2V | \
-						 EXT_CSD_CARD_TYPE_DDR_52 | \
-						 EXT_CSD_CARD_TYPE_52 | \
-						 EXT_CSD_CARD_TYPE_26)
-
-#define EXT_CSD_CARD_TYPE_SDR_1_8V_DDR_52	(EXT_CSD_CARD_TYPE_SDR_1_8V | \
-						 EXT_CSD_CARD_TYPE_DDR_52 | \
-						 EXT_CSD_CARD_TYPE_52 | \
-						 EXT_CSD_CARD_TYPE_26)
-
-#define EXT_CSD_CARD_TYPE_SDR_ALL_DDR_1_8V	(EXT_CSD_CARD_TYPE_SDR_200 | \
-						 EXT_CSD_CARD_TYPE_DDR_1_8V | \
-						 EXT_CSD_CARD_TYPE_52 | \
-						 EXT_CSD_CARD_TYPE_26)
-
-#define EXT_CSD_CARD_TYPE_SDR_ALL_DDR_1_2V	(EXT_CSD_CARD_TYPE_SDR_200 | \
-						 EXT_CSD_CARD_TYPE_DDR_1_2V | \
-						 EXT_CSD_CARD_TYPE_52 | \
-						 EXT_CSD_CARD_TYPE_26)
-
-#define EXT_CSD_CARD_TYPE_SDR_ALL_DDR_52	(EXT_CSD_CARD_TYPE_SDR_200 | \
-						 EXT_CSD_CARD_TYPE_DDR_52 | \
-						 EXT_CSD_CARD_TYPE_52 | \
-						 EXT_CSD_CARD_TYPE_26)
-
 #define EXT_CSD_BUS_WIDTH_1	0	/* Card is in 1 bit mode */
 #define EXT_CSD_BUS_WIDTH_4	1	/* Card is in 4 bit mode */
 #define EXT_CSD_BUS_WIDTH_8	2	/* Card is in 8 bit mode */
-- 
cgit v1.3-7-g2ca7


From 95dcc2cb6c9c84555c29187f8b7cf39e83991a29 Mon Sep 17 00:00:00 2001
From: Thomas Abraham <thomas.abraham@linaro.org>
Date: Tue, 1 May 2012 14:57:36 -0700
Subject: mmc: dw_mmc: make multiple instances of dw_mci_card_workqueue

The variable 'dw_mci_card_workqueue' is a global variable shared between
multiple instances of the dw_mmc host controller. Due to this, data
corruption has been noticed when multiple instances of dw_mmc controllers
are actively reading/writing the media. Fix this by adding a instance
of 'struct workqueue_struct' for each host instance and removing the
global 'dw_mci_card_workqueue' instance.

Signed-off-by: Thomas Abraham <thomas.abraham@linaro.org>
Acked-by: Jaehoon Chung <jh80.chung@samsung.com>
Acked-by: Will Newton <will.newton@imgtec.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/dw_mmc.c  | 14 ++++++--------
 include/linux/mmc/dw_mmc.h |  1 +
 2 files changed, 7 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index ab3fc4617107..1532357787cb 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -100,8 +100,6 @@ struct dw_mci_slot {
 	int			last_detect_state;
 };
 
-static struct workqueue_struct *dw_mci_card_workqueue;
-
 #if defined(CONFIG_DEBUG_FS)
 static int dw_mci_req_show(struct seq_file *s, void *v)
 {
@@ -1605,7 +1603,7 @@ static irqreturn_t dw_mci_interrupt(int irq, void *dev_id)
 
 		if (pending & SDMMC_INT_CD) {
 			mci_writel(host, RINTSTS, SDMMC_INT_CD);
-			queue_work(dw_mci_card_workqueue, &host->card_work);
+			queue_work(host->card_workqueue, &host->card_work);
 		}
 
 		/* Handle SDIO Interrupts */
@@ -1844,7 +1842,7 @@ static int __init dw_mci_init_slot(struct dw_mci *host, unsigned int id)
 	 * Card may have been plugged in prior to boot so we
 	 * need to run the detect tasklet
 	 */
-	queue_work(dw_mci_card_workqueue, &host->card_work);
+	queue_work(host->card_workqueue, &host->card_work);
 
 	return 0;
 }
@@ -2021,9 +2019,9 @@ int dw_mci_probe(struct dw_mci *host)
 	mci_writel(host, CLKSRC, 0);
 
 	tasklet_init(&host->tasklet, dw_mci_tasklet_func, (unsigned long)host);
-	dw_mci_card_workqueue = alloc_workqueue("dw-mci-card",
+	host->card_workqueue = alloc_workqueue("dw-mci-card",
 			WQ_MEM_RECLAIM | WQ_NON_REENTRANT, 1);
-	if (!dw_mci_card_workqueue)
+	if (!host->card_workqueue)
 		goto err_dmaunmap;
 	INIT_WORK(&host->card_work, dw_mci_work_routine_card);
 	ret = request_irq(host->irq, dw_mci_interrupt, host->irq_flags, "dw-mci", host);
@@ -2085,7 +2083,7 @@ err_init_slot:
 	free_irq(host->irq, host);
 
 err_workqueue:
-	destroy_workqueue(dw_mci_card_workqueue);
+	destroy_workqueue(host->card_workqueue);
 
 err_dmaunmap:
 	if (host->use_dma && host->dma_ops->exit)
@@ -2119,7 +2117,7 @@ void dw_mci_remove(struct dw_mci *host)
 	mci_writel(host, CLKSRC, 0);
 
 	free_irq(host->irq, host);
-	destroy_workqueue(dw_mci_card_workqueue);
+	destroy_workqueue(host->card_workqueue);
 	dma_free_coherent(&host->dev, PAGE_SIZE, host->sg_cpu, host->sg_dma);
 
 	if (host->use_dma && host->dma_ops->exit)
diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
index 8f66e28f5a0f..7a7ebd367cfd 100644
--- a/include/linux/mmc/dw_mmc.h
+++ b/include/linux/mmc/dw_mmc.h
@@ -125,6 +125,7 @@ struct dw_mci {
 	struct mmc_request	*mrq;
 	struct mmc_command	*cmd;
 	struct mmc_data		*data;
+	struct workqueue_struct	*card_workqueue;
 
 	/* DMA interface members*/
 	int			use_dma;
-- 
cgit v1.3-7-g2ca7


From 16c5c023aac86228e3e94c4bf6d19708ea861a05 Mon Sep 17 00:00:00 2001
From: Johan Hovold <jhovold@gmail.com>
Date: Thu, 3 May 2012 12:26:36 +0200
Subject: mfd: Add LM3533 lighting-power core driver

Add support for National Semiconductor / TI LM3533 lighting power chips.

This is the core driver which provides register access over I2C and
registers the ambient-light-sensor, LED and backlight sub-drivers.

Signed-off-by: Johan Hovold <jhovold@gmail.com>
Reviewed-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 .../ABI/testing/sysfs-bus-i2c-devices-lm3533       |  38 ++
 drivers/mfd/Kconfig                                |  13 +
 drivers/mfd/Makefile                               |   1 +
 drivers/mfd/lm3533-core.c                          | 717 +++++++++++++++++++++
 drivers/mfd/lm3533-ctrlbank.c                      | 134 ++++
 include/linux/mfd/lm3533.h                         |  89 +++
 6 files changed, 992 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-bus-i2c-devices-lm3533
 create mode 100644 drivers/mfd/lm3533-core.c
 create mode 100644 drivers/mfd/lm3533-ctrlbank.c
 create mode 100644 include/linux/mfd/lm3533.h

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-bus-i2c-devices-lm3533 b/Documentation/ABI/testing/sysfs-bus-i2c-devices-lm3533
new file mode 100644
index 000000000000..570072180b8d
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-i2c-devices-lm3533
@@ -0,0 +1,38 @@
+What:		/sys/bus/i2c/devices/.../boost_freq
+Date:		April 2012
+KernelVersion:	3.5
+Contact:	Johan Hovold <jhovold@gmail.com>
+Description:
+		Set the boost converter switching frequency (0, 1), where
+
+		0 -  500Hz
+		1 - 1000Hz
+
+What:		/sys/bus/i2c/devices/.../boost_ovp
+Date:		April 2012
+KernelVersion:	3.5
+Contact:	Johan Hovold <jhovold@gmail.com>
+Description:
+		Set the boost converter over-voltage protection threshold
+		(0..3), where
+
+		0 - 16V
+		1 - 24V
+		2 - 32V
+		3 - 40V
+
+What:		/sys/bus/i2c/devices/.../output_hvled[n]
+Date:		April 2012
+KernelVersion:	3.5
+Contact:	Johan Hovold <jhovold@gmail.com>
+Description:
+		Set the controlling backlight device for high-voltage current
+		sink HVLED[n] (n = 1, 2) (0, 1).
+
+What:		/sys/bus/i2c/devices/.../output_lvled[n]
+Date:		April 2012
+KernelVersion:	3.5
+Contact:	Johan Hovold <jhovold@gmail.com>
+Description:
+		Set the controlling led device for low-voltage current sink
+		LVLED[n] (n = 1..5) (0..3).
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 48eed22c65a5..211f5dee9b68 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -106,6 +106,19 @@ config UCB1400_CORE
 	  To compile this driver as a module, choose M here: the
 	  module will be called ucb1400_core.
 
+config MFD_LM3533
+	tristate "LM3533 Lighting Power chip"
+	depends on I2C
+	select MFD_CORE
+	select REGMAP_I2C
+	help
+	  Say yes here to enable support for National Semiconductor / TI
+	  LM3533 Lighting Power chips.
+
+	  This driver provides common support for accessing the device;
+	  additional drivers must be enabled in order to use the LED,
+	  backlight or ambient-light-sensor functionality of the device.
+
 config TPS6105X
 	tristate "TPS61050/61052 Boost Converters"
 	depends on I2C
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 0dc55cbefa09..d3dae9567800 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -120,3 +120,4 @@ obj-$(CONFIG_MFD_INTEL_MSIC)	+= intel_msic.o
 obj-$(CONFIG_MFD_RC5T583)	+= rc5t583.o rc5t583-irq.o
 obj-$(CONFIG_MFD_S5M_CORE)	+= s5m-core.o s5m-irq.o
 obj-$(CONFIG_MFD_ANATOP)	+= anatop-mfd.o
+obj-$(CONFIG_MFD_LM3533)	+= lm3533-core.o lm3533-ctrlbank.o
diff --git a/drivers/mfd/lm3533-core.c b/drivers/mfd/lm3533-core.c
new file mode 100644
index 000000000000..75f4b7f5a4fd
--- /dev/null
+++ b/drivers/mfd/lm3533-core.c
@@ -0,0 +1,717 @@
+/*
+ * lm3533-core.c -- LM3533 Core
+ *
+ * Copyright (C) 2011-2012 Texas Instruments
+ *
+ * Author: Johan Hovold <jhovold@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under  the terms of the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/gpio.h>
+#include <linux/i2c.h>
+#include <linux/mfd/core.h>
+#include <linux/regmap.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+#include <linux/mfd/lm3533.h>
+
+
+#define LM3533_BOOST_OVP_MAX		0x03
+#define LM3533_BOOST_OVP_MASK		0x06
+#define LM3533_BOOST_OVP_SHIFT		1
+
+#define LM3533_BOOST_FREQ_MAX		0x01
+#define LM3533_BOOST_FREQ_MASK		0x01
+#define LM3533_BOOST_FREQ_SHIFT		0
+
+#define LM3533_BL_ID_MASK		1
+#define LM3533_LED_ID_MASK		3
+#define LM3533_BL_ID_MAX		1
+#define LM3533_LED_ID_MAX		3
+
+#define LM3533_HVLED_ID_MAX		2
+#define LM3533_LVLED_ID_MAX		5
+
+#define LM3533_REG_OUTPUT_CONF1		0x10
+#define LM3533_REG_OUTPUT_CONF2		0x11
+#define LM3533_REG_BOOST_PWM		0x2c
+
+#define LM3533_REG_MAX			0xb2
+
+
+static struct mfd_cell lm3533_als_devs[] = {
+	{
+		.name	= "lm3533-als",
+		.id	= -1,
+	},
+};
+
+static struct mfd_cell lm3533_bl_devs[] = {
+	{
+		.name	= "lm3533-backlight",
+		.id	= 0,
+	},
+	{
+		.name	= "lm3533-backlight",
+		.id	= 1,
+	},
+};
+
+static struct mfd_cell lm3533_led_devs[] = {
+	{
+		.name	= "lm3533-leds",
+		.id	= 0,
+	},
+	{
+		.name	= "lm3533-leds",
+		.id	= 1,
+	},
+	{
+		.name	= "lm3533-leds",
+		.id	= 2,
+	},
+	{
+		.name	= "lm3533-leds",
+		.id	= 3,
+	},
+};
+
+int lm3533_read(struct lm3533 *lm3533, u8 reg, u8 *val)
+{
+	int tmp;
+	int ret;
+
+	ret = regmap_read(lm3533->regmap, reg, &tmp);
+	if (ret < 0) {
+		dev_err(lm3533->dev, "failed to read register %02x: %d\n",
+								reg, ret);
+		return ret;
+	}
+
+	*val = tmp;
+
+	dev_dbg(lm3533->dev, "read [%02x]: %02x\n", reg, *val);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(lm3533_read);
+
+int lm3533_write(struct lm3533 *lm3533, u8 reg, u8 val)
+{
+	int ret;
+
+	dev_dbg(lm3533->dev, "write [%02x]: %02x\n", reg, val);
+
+	ret = regmap_write(lm3533->regmap, reg, val);
+	if (ret < 0) {
+		dev_err(lm3533->dev, "failed to write register %02x: %d\n",
+								reg, ret);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(lm3533_write);
+
+int lm3533_update(struct lm3533 *lm3533, u8 reg, u8 val, u8 mask)
+{
+	int ret;
+
+	dev_dbg(lm3533->dev, "update [%02x]: %02x/%02x\n", reg, val, mask);
+
+	ret = regmap_update_bits(lm3533->regmap, reg, val, mask);
+	if (ret < 0) {
+		dev_err(lm3533->dev, "failed to update register %02x: %d\n",
+								reg, ret);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(lm3533_update);
+
+/*
+ * HVLED output config -- output hvled controlled by backlight bl
+ */
+static int lm3533_set_hvled_config(struct lm3533 *lm3533, u8 hvled, u8 bl)
+{
+	u8 val;
+	u8 mask;
+	int shift;
+	int ret;
+
+	if (hvled == 0 || hvled > LM3533_HVLED_ID_MAX)
+		return -EINVAL;
+
+	if (bl > LM3533_BL_ID_MAX)
+		return -EINVAL;
+
+	shift = hvled - 1;
+	mask = LM3533_BL_ID_MASK << shift;
+	val = bl << shift;
+
+	ret = lm3533_update(lm3533, LM3533_REG_OUTPUT_CONF1, val, mask);
+	if (ret)
+		dev_err(lm3533->dev, "failed to set hvled config\n");
+
+	return ret;
+}
+
+/*
+ * LVLED output config -- output lvled controlled by LED led
+ */
+static int lm3533_set_lvled_config(struct lm3533 *lm3533, u8 lvled, u8 led)
+{
+	u8 reg;
+	u8 val;
+	u8 mask;
+	int shift;
+	int ret;
+
+	if (lvled == 0 || lvled > LM3533_LVLED_ID_MAX)
+		return -EINVAL;
+
+	if (led > LM3533_LED_ID_MAX)
+		return -EINVAL;
+
+	if (lvled < 4) {
+		reg = LM3533_REG_OUTPUT_CONF1;
+		shift = 2 * lvled;
+	} else {
+		reg = LM3533_REG_OUTPUT_CONF2;
+		shift = 2 * (lvled - 4);
+	}
+
+	mask = LM3533_LED_ID_MASK << shift;
+	val = led << shift;
+
+	ret = lm3533_update(lm3533, reg, val, mask);
+	if (ret)
+		dev_err(lm3533->dev, "failed to set lvled config\n");
+
+	return ret;
+}
+
+static void lm3533_enable(struct lm3533 *lm3533)
+{
+	if (gpio_is_valid(lm3533->gpio_hwen))
+		gpio_set_value(lm3533->gpio_hwen, 1);
+}
+
+static void lm3533_disable(struct lm3533 *lm3533)
+{
+	if (gpio_is_valid(lm3533->gpio_hwen))
+		gpio_set_value(lm3533->gpio_hwen, 0);
+}
+
+enum lm3533_attribute_type {
+	LM3533_ATTR_TYPE_BACKLIGHT,
+	LM3533_ATTR_TYPE_LED,
+};
+
+struct lm3533_device_attribute {
+	struct device_attribute dev_attr;
+	enum lm3533_attribute_type type;
+	union {
+		struct {
+			u8 id;
+		} output;
+		struct {
+			u8 reg;
+			u8 shift;
+			u8 mask;
+			u8 max;
+		} generic;
+	} u;
+};
+
+#define to_lm3533_dev_attr(_attr) \
+	container_of(_attr, struct lm3533_device_attribute, dev_attr)
+
+static ssize_t show_lm3533_reg(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct lm3533 *lm3533 = dev_get_drvdata(dev);
+	struct lm3533_device_attribute *lattr = to_lm3533_dev_attr(attr);
+	u8 val;
+	int ret;
+
+	ret = lm3533_read(lm3533, lattr->u.generic.reg, &val);
+	if (ret)
+		return ret;
+
+	val = (val & lattr->u.generic.mask) >> lattr->u.generic.shift;
+
+	return scnprintf(buf, PAGE_SIZE, "%u\n", val);
+}
+
+static ssize_t store_lm3533_reg(struct device *dev,
+						struct device_attribute *attr,
+						const char *buf, size_t len)
+{
+	struct lm3533 *lm3533 = dev_get_drvdata(dev);
+	struct lm3533_device_attribute *lattr = to_lm3533_dev_attr(attr);
+	u8 val;
+	int ret;
+
+	if (kstrtou8(buf, 0, &val) || val > lattr->u.generic.max)
+		return -EINVAL;
+
+	val = val << lattr->u.generic.shift;
+	ret = lm3533_update(lm3533, lattr->u.generic.reg, val,
+							lattr->u.generic.mask);
+	if (ret)
+		return ret;
+
+	return len;
+}
+
+#define GENERIC_ATTR(_reg, _max, _mask, _shift) \
+	{ .reg		= _reg, \
+	  .max		= _max, \
+	  .mask		= _mask, \
+	  .shift	= _shift }
+
+#define LM3533_GENERIC_ATTR(_name, _mode, _show, _store, _type,	\
+						_reg, _max, _mask, _shift) \
+	struct lm3533_device_attribute lm3533_dev_attr_##_name = { \
+		.dev_attr	= __ATTR(_name, _mode, _show, _store), \
+		.type		= _type, \
+		.u.generic	= GENERIC_ATTR(_reg, _max, _mask, _shift) }
+
+#define LM3533_GENERIC_ATTR_RW(_name, _type, _reg, _max, _mask, _shift) \
+	LM3533_GENERIC_ATTR(_name, S_IRUGO | S_IWUSR, \
+					show_lm3533_reg, store_lm3533_reg, \
+					_type, _reg, _max, _mask, _shift)
+
+#define LM3533_BOOST_ATTR_RW(_name, _NAME) \
+	LM3533_GENERIC_ATTR_RW(_name, LM3533_ATTR_TYPE_BACKLIGHT, \
+				LM3533_REG_BOOST_PWM, LM3533_##_NAME##_MAX, \
+				LM3533_##_NAME##_MASK, LM3533_##_NAME##_SHIFT)
+/*
+ * Boost Over Voltage Protection Select
+ *
+ *   0 - 16 V (default)
+ *   1 - 24 V
+ *   2 - 32 V
+ *   3 - 40 V
+ */
+static LM3533_BOOST_ATTR_RW(boost_ovp, BOOST_OVP);
+
+/*
+ * Boost Frequency Select
+ *
+ *   0 - 500 kHz (default)
+ *   1 - 1 MHz
+ */
+static LM3533_BOOST_ATTR_RW(boost_freq, BOOST_FREQ);
+
+static ssize_t show_output(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct lm3533 *lm3533 = dev_get_drvdata(dev);
+	struct lm3533_device_attribute *lattr = to_lm3533_dev_attr(attr);
+	int id = lattr->u.output.id;
+	u8 reg;
+	u8 val;
+	u8 mask;
+	int shift;
+	int ret;
+
+	if (lattr->type == LM3533_ATTR_TYPE_BACKLIGHT) {
+		reg = LM3533_REG_OUTPUT_CONF1;
+		shift = id - 1;
+		mask = LM3533_BL_ID_MASK << shift;
+	} else {
+		if (id < 4) {
+			reg = LM3533_REG_OUTPUT_CONF1;
+			shift = 2 * id;
+		} else {
+			reg = LM3533_REG_OUTPUT_CONF2;
+			shift = 2 * (id - 4);
+		}
+		mask = LM3533_LED_ID_MASK << shift;
+	}
+
+	ret = lm3533_read(lm3533, reg, &val);
+	if (ret)
+		return ret;
+
+	val = (val & mask) >> shift;
+
+	return scnprintf(buf, PAGE_SIZE, "%u\n", val);
+}
+
+static ssize_t store_output(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t len)
+{
+	struct lm3533 *lm3533 = dev_get_drvdata(dev);
+	struct lm3533_device_attribute *lattr = to_lm3533_dev_attr(attr);
+	int id = lattr->u.output.id;
+	u8 val;
+	int ret;
+
+	if (kstrtou8(buf, 0, &val))
+		return -EINVAL;
+
+	if (lattr->type == LM3533_ATTR_TYPE_BACKLIGHT)
+		ret = lm3533_set_hvled_config(lm3533, id, val);
+	else
+		ret = lm3533_set_lvled_config(lm3533, id, val);
+
+	if (ret)
+		return ret;
+
+	return len;
+}
+
+#define LM3533_OUTPUT_ATTR(_name, _mode, _show, _store, _type, _id) \
+	struct lm3533_device_attribute lm3533_dev_attr_##_name = \
+		{ .dev_attr	= __ATTR(_name, _mode, _show, _store), \
+		  .type		= _type, \
+		  .u.output	= { .id = _id }, }
+
+#define LM3533_OUTPUT_ATTR_RW(_name, _type, _id) \
+	LM3533_OUTPUT_ATTR(output_##_name, S_IRUGO | S_IWUSR, \
+					show_output, store_output, _type, _id)
+
+#define LM3533_OUTPUT_HVLED_ATTR_RW(_nr) \
+	LM3533_OUTPUT_ATTR_RW(hvled##_nr, LM3533_ATTR_TYPE_BACKLIGHT, _nr)
+#define LM3533_OUTPUT_LVLED_ATTR_RW(_nr) \
+	LM3533_OUTPUT_ATTR_RW(lvled##_nr, LM3533_ATTR_TYPE_LED, _nr)
+/*
+ * Output config:
+ *
+ * output_hvled<nr>	0-1
+ * output_lvled<nr>	0-3
+ */
+static LM3533_OUTPUT_HVLED_ATTR_RW(1);
+static LM3533_OUTPUT_HVLED_ATTR_RW(2);
+static LM3533_OUTPUT_LVLED_ATTR_RW(1);
+static LM3533_OUTPUT_LVLED_ATTR_RW(2);
+static LM3533_OUTPUT_LVLED_ATTR_RW(3);
+static LM3533_OUTPUT_LVLED_ATTR_RW(4);
+static LM3533_OUTPUT_LVLED_ATTR_RW(5);
+
+static struct attribute *lm3533_attributes[] = {
+	&lm3533_dev_attr_boost_freq.dev_attr.attr,
+	&lm3533_dev_attr_boost_ovp.dev_attr.attr,
+	&lm3533_dev_attr_output_hvled1.dev_attr.attr,
+	&lm3533_dev_attr_output_hvled2.dev_attr.attr,
+	&lm3533_dev_attr_output_lvled1.dev_attr.attr,
+	&lm3533_dev_attr_output_lvled2.dev_attr.attr,
+	&lm3533_dev_attr_output_lvled3.dev_attr.attr,
+	&lm3533_dev_attr_output_lvled4.dev_attr.attr,
+	&lm3533_dev_attr_output_lvled5.dev_attr.attr,
+	NULL,
+};
+
+#define to_dev_attr(_attr) \
+	container_of(_attr, struct device_attribute, attr)
+
+static mode_t lm3533_attr_is_visible(struct kobject *kobj,
+					     struct attribute *attr, int n)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+	struct lm3533 *lm3533 = dev_get_drvdata(dev);
+	struct device_attribute *dattr = to_dev_attr(attr);
+	struct lm3533_device_attribute *lattr = to_lm3533_dev_attr(dattr);
+	enum lm3533_attribute_type type = lattr->type;
+	mode_t mode = attr->mode;
+
+	if (!lm3533->have_backlights && type == LM3533_ATTR_TYPE_BACKLIGHT)
+		mode = 0;
+	else if (!lm3533->have_leds && type == LM3533_ATTR_TYPE_LED)
+		mode = 0;
+
+	return mode;
+};
+
+static struct attribute_group lm3533_attribute_group = {
+	.is_visible	= lm3533_attr_is_visible,
+	.attrs		= lm3533_attributes
+};
+
+static int __devinit lm3533_device_als_init(struct lm3533 *lm3533)
+{
+	struct lm3533_platform_data *pdata = lm3533->dev->platform_data;
+	int ret;
+
+	if (!pdata->als)
+		return 0;
+
+	lm3533_als_devs[0].platform_data = pdata->als;
+	lm3533_als_devs[0].pdata_size = sizeof(*pdata->als);
+
+	ret = mfd_add_devices(lm3533->dev, 0, lm3533_als_devs, 1, NULL, 0);
+	if (ret) {
+		dev_err(lm3533->dev, "failed to add ALS device\n");
+		return ret;
+	}
+
+	lm3533->have_als = 1;
+
+	return 0;
+}
+
+static int __devinit lm3533_device_bl_init(struct lm3533 *lm3533)
+{
+	struct lm3533_platform_data *pdata = lm3533->dev->platform_data;
+	int i;
+	int ret;
+
+	if (!pdata->backlights || pdata->num_backlights == 0)
+		return 0;
+
+	if (pdata->num_backlights > ARRAY_SIZE(lm3533_bl_devs))
+		pdata->num_backlights = ARRAY_SIZE(lm3533_bl_devs);
+
+	for (i = 0; i < pdata->num_backlights; ++i) {
+		lm3533_bl_devs[i].platform_data = &pdata->backlights[i];
+		lm3533_bl_devs[i].pdata_size = sizeof(pdata->backlights[i]);
+	}
+
+	ret = mfd_add_devices(lm3533->dev, 0, lm3533_bl_devs,
+					pdata->num_backlights, NULL, 0);
+	if (ret) {
+		dev_err(lm3533->dev, "failed to add backlight devices\n");
+		return ret;
+	}
+
+	lm3533->have_backlights = 1;
+
+	return 0;
+}
+
+static int __devinit lm3533_device_led_init(struct lm3533 *lm3533)
+{
+	struct lm3533_platform_data *pdata = lm3533->dev->platform_data;
+	int i;
+	int ret;
+
+	if (!pdata->leds || pdata->num_leds == 0)
+		return 0;
+
+	if (pdata->num_leds > ARRAY_SIZE(lm3533_led_devs))
+		pdata->num_leds = ARRAY_SIZE(lm3533_led_devs);
+
+	for (i = 0; i < pdata->num_leds; ++i) {
+		lm3533_led_devs[i].platform_data = &pdata->leds[i];
+		lm3533_led_devs[i].pdata_size = sizeof(pdata->leds[i]);
+	}
+
+	ret = mfd_add_devices(lm3533->dev, 0, lm3533_led_devs,
+						pdata->num_leds, NULL, 0);
+	if (ret) {
+		dev_err(lm3533->dev, "failed to add LED devices\n");
+		return ret;
+	}
+
+	lm3533->have_leds = 1;
+
+	return 0;
+}
+
+static int __devinit lm3533_device_init(struct lm3533 *lm3533)
+{
+	struct lm3533_platform_data *pdata = lm3533->dev->platform_data;
+	int ret;
+
+	dev_dbg(lm3533->dev, "%s\n", __func__);
+
+	if (!pdata) {
+		dev_err(lm3533->dev, "no platform data\n");
+		return -EINVAL;
+	}
+
+	lm3533->gpio_hwen = pdata->gpio_hwen;
+
+	dev_set_drvdata(lm3533->dev, lm3533);
+
+	if (gpio_is_valid(lm3533->gpio_hwen)) {
+		ret = gpio_request_one(lm3533->gpio_hwen, GPIOF_OUT_INIT_LOW,
+								"lm3533-hwen");
+		if (ret < 0) {
+			dev_err(lm3533->dev,
+				"failed to request HWEN GPIO %d\n",
+				lm3533->gpio_hwen);
+			return ret;
+		}
+	}
+
+	lm3533_enable(lm3533);
+
+	lm3533_device_als_init(lm3533);
+	lm3533_device_bl_init(lm3533);
+	lm3533_device_led_init(lm3533);
+
+	ret = sysfs_create_group(&lm3533->dev->kobj, &lm3533_attribute_group);
+	if (ret < 0) {
+		dev_err(lm3533->dev, "failed to create sysfs attributes\n");
+		goto err_unregister;
+	}
+
+	return 0;
+
+err_unregister:
+	mfd_remove_devices(lm3533->dev);
+	lm3533_disable(lm3533);
+	if (gpio_is_valid(lm3533->gpio_hwen))
+		gpio_free(lm3533->gpio_hwen);
+
+	return ret;
+}
+
+static void __devexit lm3533_device_exit(struct lm3533 *lm3533)
+{
+	dev_dbg(lm3533->dev, "%s\n", __func__);
+
+	sysfs_remove_group(&lm3533->dev->kobj, &lm3533_attribute_group);
+
+	mfd_remove_devices(lm3533->dev);
+	lm3533_disable(lm3533);
+	if (gpio_is_valid(lm3533->gpio_hwen))
+		gpio_free(lm3533->gpio_hwen);
+}
+
+static bool lm3533_readable_register(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case 0x10 ... 0x2c:
+	case 0x30 ... 0x38:
+	case 0x40 ... 0x45:
+	case 0x50 ... 0x57:
+	case 0x60 ... 0x6e:
+	case 0x70 ... 0x75:
+	case 0x80 ... 0x85:
+	case 0x90 ... 0x95:
+	case 0xa0 ... 0xa5:
+	case 0xb0 ... 0xb2:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool lm3533_volatile_register(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case 0x34:		/* zone */
+	case 0x37 ... 0x38:	/* adc */
+	case 0xb0 ... 0xb1:	/* fault */
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool lm3533_precious_register(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case 0x34:		/* zone */
+		return true;
+	default:
+		return false;
+	}
+}
+
+static struct regmap_config regmap_config = {
+	.reg_bits	= 8,
+	.val_bits	= 8,
+	.max_register	= LM3533_REG_MAX,
+	.readable_reg	= lm3533_readable_register,
+	.volatile_reg	= lm3533_volatile_register,
+	.precious_reg	= lm3533_precious_register,
+};
+
+static int __devinit lm3533_i2c_probe(struct i2c_client *i2c,
+					const struct i2c_device_id *id)
+{
+	struct lm3533 *lm3533;
+	int ret;
+
+	dev_dbg(&i2c->dev, "%s\n", __func__);
+
+	lm3533 = kzalloc(sizeof(*lm3533), GFP_KERNEL);
+	if (!lm3533)
+		return -ENOMEM;
+
+	i2c_set_clientdata(i2c, lm3533);
+
+	lm3533->regmap = regmap_init_i2c(i2c, &regmap_config);
+	if (IS_ERR(lm3533->regmap)) {
+		ret = PTR_ERR(lm3533->regmap);
+		goto err_regmap;
+	}
+
+	lm3533->dev = &i2c->dev;
+	lm3533->irq = i2c->irq;
+
+	ret = lm3533_device_init(lm3533);
+	if (ret)
+		goto err_dev;
+
+	return 0;
+
+err_dev:
+	regmap_exit(lm3533->regmap);
+err_regmap:
+	kfree(lm3533);
+
+	return ret;
+}
+
+static int __devexit lm3533_i2c_remove(struct i2c_client *i2c)
+{
+	struct lm3533 *lm3533 = i2c_get_clientdata(i2c);
+
+	dev_dbg(&i2c->dev, "%s\n", __func__);
+
+	lm3533_device_exit(lm3533);
+	regmap_exit(lm3533->regmap);
+
+	kfree(lm3533);
+
+	return 0;
+}
+
+static const struct i2c_device_id lm3533_i2c_ids[] = {
+	{ "lm3533", 0 },
+	{ },
+};
+MODULE_DEVICE_TABLE(i2c, lm3533_i2c_ids);
+
+static struct i2c_driver lm3533_i2c_driver = {
+	.driver = {
+		   .name = "lm3533",
+		   .owner = THIS_MODULE,
+	},
+	.id_table	= lm3533_i2c_ids,
+	.probe		= lm3533_i2c_probe,
+	.remove		= __devexit_p(lm3533_i2c_remove),
+};
+
+static int __init lm3533_i2c_init(void)
+{
+	return i2c_add_driver(&lm3533_i2c_driver);
+}
+subsys_initcall(lm3533_i2c_init);
+
+static void __exit lm3533_i2c_exit(void)
+{
+	i2c_del_driver(&lm3533_i2c_driver);
+}
+module_exit(lm3533_i2c_exit);
+
+MODULE_AUTHOR("Johan Hovold <jhovold@gmail.com>");
+MODULE_DESCRIPTION("LM3533 Core");
+MODULE_LICENSE("GPL");
diff --git a/drivers/mfd/lm3533-ctrlbank.c b/drivers/mfd/lm3533-ctrlbank.c
new file mode 100644
index 000000000000..c2732a37c65a
--- /dev/null
+++ b/drivers/mfd/lm3533-ctrlbank.c
@@ -0,0 +1,134 @@
+/*
+ * lm3533-ctrlbank.c -- LM3533 Generic Control Bank interface
+ *
+ * Copyright (C) 2011-2012 Texas Instruments
+ *
+ * Author: Johan Hovold <jhovold@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under  the terms of the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/device.h>
+#include <linux/module.h>
+
+#include <linux/mfd/lm3533.h>
+
+
+#define LM3533_BRIGHTNESS_MAX		255
+#define LM3533_MAX_CURRENT_MAX		31
+#define LM3533_PWM_MAX			0x3f
+
+#define LM3533_REG_PWM_BASE		0x14
+#define LM3533_REG_MAX_CURRENT_BASE	0x1f
+#define LM3533_REG_CTRLBANK_ENABLE	0x27
+#define LM3533_REG_BRIGHTNESS_BASE	0x40
+
+
+static inline u8 lm3533_ctrlbank_get_reg(struct lm3533_ctrlbank *cb, u8 base)
+{
+	return base + cb->id;
+}
+
+int lm3533_ctrlbank_enable(struct lm3533_ctrlbank *cb)
+{
+	u8 mask;
+	int ret;
+
+	dev_dbg(cb->dev, "%s - %d\n", __func__, cb->id);
+
+	mask = 1 << cb->id;
+	ret = lm3533_update(cb->lm3533, LM3533_REG_CTRLBANK_ENABLE,
+								mask, mask);
+	if (ret)
+		dev_err(cb->dev, "failed to enable ctrlbank %d\n", cb->id);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(lm3533_ctrlbank_enable);
+
+int lm3533_ctrlbank_disable(struct lm3533_ctrlbank *cb)
+{
+	u8 mask;
+	int ret;
+
+	dev_dbg(cb->dev, "%s - %d\n", __func__, cb->id);
+
+	mask = 1 << cb->id;
+	ret = lm3533_update(cb->lm3533, LM3533_REG_CTRLBANK_ENABLE, 0, mask);
+	if (ret)
+		dev_err(cb->dev, "failed to disable ctrlbank %d\n", cb->id);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(lm3533_ctrlbank_disable);
+
+#define lm3533_ctrlbank_set(_name, _NAME)				\
+int lm3533_ctrlbank_set_##_name(struct lm3533_ctrlbank *cb, u8 val)	\
+{									\
+	u8 reg;								\
+	int ret;							\
+									\
+	if (val > LM3533_##_NAME##_MAX)					\
+		return -EINVAL;						\
+									\
+	reg = lm3533_ctrlbank_get_reg(cb, LM3533_REG_##_NAME##_BASE);	\
+	ret = lm3533_write(cb->lm3533, reg, val);			\
+	if (ret)							\
+		dev_err(cb->dev, "failed to set " #_name "\n");		\
+									\
+	return ret;							\
+}									\
+EXPORT_SYMBOL_GPL(lm3533_ctrlbank_set_##_name);
+
+#define lm3533_ctrlbank_get(_name, _NAME)				\
+int lm3533_ctrlbank_get_##_name(struct lm3533_ctrlbank *cb, u8 *val)	\
+{									\
+	u8 reg;								\
+	int ret;							\
+									\
+	reg = lm3533_ctrlbank_get_reg(cb, LM3533_REG_##_NAME##_BASE);	\
+	ret = lm3533_read(cb->lm3533, reg, val);			\
+	if (ret)							\
+		dev_err(cb->dev, "failed to get " #_name "\n");		\
+									\
+	return ret;							\
+}									\
+EXPORT_SYMBOL_GPL(lm3533_ctrlbank_get_##_name);
+
+lm3533_ctrlbank_set(brightness, BRIGHTNESS);
+lm3533_ctrlbank_get(brightness, BRIGHTNESS);
+
+/*
+ * Full scale current.
+ *
+ * Imax = 5 + val * 0.8 mA, e.g.:
+ *
+ *    0 - 5 mA
+ *     ...
+ *   19 - 20.2 mA (default)
+ *     ...
+ *   31 - 29.8 mA
+ */
+lm3533_ctrlbank_set(max_current, MAX_CURRENT);
+lm3533_ctrlbank_get(max_current, MAX_CURRENT);
+
+/*
+ * PWM-input control mask:
+ *
+ *   bit 5 - PWM-input enabled in Zone 4
+ *   bit 4 - PWM-input enabled in Zone 3
+ *   bit 3 - PWM-input enabled in Zone 2
+ *   bit 2 - PWM-input enabled in Zone 1
+ *   bit 1 - PWM-input enabled in Zone 0
+ *   bit 0 - PWM-input enabled
+ */
+lm3533_ctrlbank_set(pwm, PWM);
+lm3533_ctrlbank_get(pwm, PWM);
+
+
+MODULE_AUTHOR("Johan Hovold <jhovold@gmail.com>");
+MODULE_DESCRIPTION("LM3533 Control Bank interface");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/mfd/lm3533.h b/include/linux/mfd/lm3533.h
new file mode 100644
index 000000000000..75f85f3fbd90
--- /dev/null
+++ b/include/linux/mfd/lm3533.h
@@ -0,0 +1,89 @@
+/*
+ * lm3533.h -- LM3533 interface
+ *
+ * Copyright (C) 2011-2012 Texas Instruments
+ *
+ * Author: Johan Hovold <jhovold@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under  the terms of the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+#ifndef __LINUX_MFD_LM3533_H
+#define __LINUX_MFD_LM3533_H
+
+#define LM3533_ATTR_RO(_name) \
+	DEVICE_ATTR(_name, S_IRUGO, show_##_name, NULL)
+#define LM3533_ATTR_RW(_name) \
+	DEVICE_ATTR(_name, S_IRUGO | S_IWUSR , show_##_name, store_##_name)
+
+struct device;
+struct regmap;
+
+struct lm3533 {
+	struct device *dev;
+
+	struct regmap *regmap;
+
+	int gpio_hwen;
+	int irq;
+
+	unsigned have_als:1;
+	unsigned have_backlights:1;
+	unsigned have_leds:1;
+};
+
+struct lm3533_ctrlbank {
+	struct lm3533 *lm3533;
+	struct device *dev;
+	int id;
+};
+
+struct lm3533_als_platform_data {
+	unsigned pwm_mode:1;		/* PWM input mode (default analog) */
+};
+
+struct lm3533_bl_platform_data {
+	char *name;
+	u8 default_brightness;		/* 0 - 255 */
+	u8 max_current;			/* 0 - 31 */
+	u8 pwm;				/* 0 - 0x3f */
+};
+
+struct lm3533_led_platform_data {
+	char *name;
+	const char *default_trigger;
+	u8 max_current;			/* 0 - 31 */
+	u8 pwm;				/* 0 - 0x3f */
+};
+
+struct lm3533_platform_data {
+	int gpio_hwen;
+
+	struct lm3533_als_platform_data *als;
+
+	struct lm3533_bl_platform_data *backlights;
+	int num_backlights;
+
+	struct lm3533_led_platform_data *leds;
+	int num_leds;
+};
+
+extern int lm3533_ctrlbank_enable(struct lm3533_ctrlbank *cb);
+extern int lm3533_ctrlbank_disable(struct lm3533_ctrlbank *cb);
+
+extern int lm3533_ctrlbank_set_brightness(struct lm3533_ctrlbank *cb, u8 val);
+extern int lm3533_ctrlbank_get_brightness(struct lm3533_ctrlbank *cb, u8 *val);
+extern int lm3533_ctrlbank_set_max_current(struct lm3533_ctrlbank *cb, u8 val);
+extern int lm3533_ctrlbank_get_max_current(struct lm3533_ctrlbank *cb,
+								u8 *val);
+extern int lm3533_ctrlbank_set_pwm(struct lm3533_ctrlbank *cb, u8 val);
+extern int lm3533_ctrlbank_get_pwm(struct lm3533_ctrlbank *cb, u8 *val);
+
+extern int lm3533_read(struct lm3533 *lm3533, u8 reg, u8 *val);
+extern int lm3533_write(struct lm3533 *lm3533, u8 reg, u8 val);
+extern int lm3533_update(struct lm3533 *lm3533, u8 reg, u8 val, u8 mask);
+
+#endif	/* __LINUX_MFD_LM3533_H */
-- 
cgit v1.3-7-g2ca7


From 887c8ec7219fc8eba78bb8f44a74c660934e9b98 Mon Sep 17 00:00:00 2001
From: Aaron Sierra <asierra@xes-inc.com>
Date: Fri, 20 Apr 2012 14:14:11 -0500
Subject: watchdog: Convert iTCO_wdt driver to mfd model

This patch converts the iTCO_wdt driver to use the multi-function device
driver model. It uses resources discovered by the lpc_ich driver, so that
it no longer does its own PCI scanning.

Signed-off-by: Aaron Sierra <asierra@xes-inc.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig                    |   3 +-
 drivers/mfd/lpc_ich.c                  | 171 ++++++++++-
 drivers/watchdog/Kconfig               |   1 +
 drivers/watchdog/iTCO_vendor.h         |   6 +-
 drivers/watchdog/iTCO_vendor_support.c |  43 ++-
 drivers/watchdog/iTCO_wdt.c            | 529 +++++++--------------------------
 include/linux/mfd/lpc_ich.h            |   7 +
 7 files changed, 313 insertions(+), 447 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 211f5dee9b68..1e9a7d5ec919 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -785,7 +785,8 @@ config LPC_ICH
 	help
 	  The LPC bridge function of the Intel ICH provides support for
 	  many functional units. This driver provides needed support for
-	  other drivers to control these functions, currently GPIO.
+	  other drivers to control these functions, currently GPIO and
+	  watchdog.
 
 config MFD_RDC321X
 	tristate "Support for RDC-R321x southbridge"
diff --git a/drivers/mfd/lpc_ich.c b/drivers/mfd/lpc_ich.c
index 7e3a7b6ab022..027cc8f86132 100644
--- a/drivers/mfd/lpc_ich.c
+++ b/drivers/mfd/lpc_ich.c
@@ -65,14 +65,42 @@
 #define ACPIBASE		0x40
 #define ACPIBASE_GPE_OFF	0x28
 #define ACPIBASE_GPE_END	0x2f
+#define ACPIBASE_SMI_OFF	0x30
+#define ACPIBASE_SMI_END	0x33
+#define ACPIBASE_TCO_OFF	0x60
+#define ACPIBASE_TCO_END	0x7f
 #define ACPICTRL		0x44
 
+#define ACPIBASE_GCS_OFF	0x3410
+#define ACPIBASE_GCS_END	0x3414
+
 #define GPIOBASE		0x48
 #define GPIOCTRL		0x4C
 
+#define RCBABASE		0xf0
+
+#define wdt_io_res(i) wdt_res(0, i)
+#define wdt_mem_res(i) wdt_res(ICH_RES_MEM_OFF, i)
+#define wdt_res(b, i) (&wdt_ich_res[(b) + (i)])
+
 static int lpc_ich_acpi_save = -1;
 static int lpc_ich_gpio_save = -1;
 
+static struct resource wdt_ich_res[] = {
+	/* ACPI - TCO */
+	{
+		.flags = IORESOURCE_IO,
+	},
+	/* ACPI - SMI */
+	{
+		.flags = IORESOURCE_IO,
+	},
+	/* GCS */
+	{
+		.flags = IORESOURCE_MEM,
+	},
+};
+
 static struct resource gpio_ich_res[] = {
 	/* GPIO */
 	{
@@ -85,10 +113,17 @@ static struct resource gpio_ich_res[] = {
 };
 
 enum lpc_cells {
-	LPC_GPIO = 0,
+	LPC_WDT = 0,
+	LPC_GPIO,
 };
 
 static struct mfd_cell lpc_ich_cells[] = {
+	[LPC_WDT] = {
+		.name = "iTCO_wdt",
+		.num_resources = ARRAY_SIZE(wdt_ich_res),
+		.resources = wdt_ich_res,
+		.ignore_resource_conflicts = true,
+	},
 	[LPC_GPIO] = {
 		.name = "gpio_ich",
 		.num_resources = ARRAY_SIZE(gpio_ich_res),
@@ -162,218 +197,276 @@ enum lpc_chipsets {
 struct lpc_ich_info lpc_chipset_info[] __devinitdata = {
 	[LPC_ICH] = {
 		.name = "ICH",
+		.iTCO_version = 1,
 	},
 	[LPC_ICH0] = {
 		.name = "ICH0",
+		.iTCO_version = 1,
 	},
 	[LPC_ICH2] = {
 		.name = "ICH2",
+		.iTCO_version = 1,
 	},
 	[LPC_ICH2M] = {
 		.name = "ICH2-M",
+		.iTCO_version = 1,
 	},
 	[LPC_ICH3] = {
 		.name = "ICH3-S",
+		.iTCO_version = 1,
 	},
 	[LPC_ICH3M] = {
 		.name = "ICH3-M",
+		.iTCO_version = 1,
 	},
 	[LPC_ICH4] = {
 		.name = "ICH4",
+		.iTCO_version = 1,
 	},
 	[LPC_ICH4M] = {
 		.name = "ICH4-M",
+		.iTCO_version = 1,
 	},
 	[LPC_CICH] = {
 		.name = "C-ICH",
+		.iTCO_version = 1,
 	},
 	[LPC_ICH5] = {
 		.name = "ICH5 or ICH5R",
+		.iTCO_version = 1,
 	},
 	[LPC_6300ESB] = {
 		.name = "6300ESB",
+		.iTCO_version = 1,
 	},
 	[LPC_ICH6] = {
 		.name = "ICH6 or ICH6R",
+		.iTCO_version = 2,
 		.gpio_version = ICH_V6_GPIO,
 	},
 	[LPC_ICH6M] = {
 		.name = "ICH6-M",
+		.iTCO_version = 2,
 		.gpio_version = ICH_V6_GPIO,
 	},
 	[LPC_ICH6W] = {
 		.name = "ICH6W or ICH6RW",
+		.iTCO_version = 2,
 		.gpio_version = ICH_V6_GPIO,
 	},
 	[LPC_631XESB] = {
 		.name = "631xESB/632xESB",
+		.iTCO_version = 2,
 		.gpio_version = ICH_V6_GPIO,
 	},
 	[LPC_ICH7] = {
 		.name = "ICH7 or ICH7R",
+		.iTCO_version = 2,
 		.gpio_version = ICH_V7_GPIO,
 	},
 	[LPC_ICH7DH] = {
 		.name = "ICH7DH",
+		.iTCO_version = 2,
 		.gpio_version = ICH_V7_GPIO,
 	},
 	[LPC_ICH7M] = {
 		.name = "ICH7-M or ICH7-U",
+		.iTCO_version = 2,
 		.gpio_version = ICH_V7_GPIO,
 	},
 	[LPC_ICH7MDH] = {
 		.name = "ICH7-M DH",
+		.iTCO_version = 2,
 		.gpio_version = ICH_V7_GPIO,
 	},
 	[LPC_NM10] = {
 		.name = "NM10",
+		.iTCO_version = 2,
 	},
 	[LPC_ICH8] = {
 		.name = "ICH8 or ICH8R",
+		.iTCO_version = 2,
 		.gpio_version = ICH_V7_GPIO,
 	},
 	[LPC_ICH8DH] = {
 		.name = "ICH8DH",
+		.iTCO_version = 2,
 		.gpio_version = ICH_V7_GPIO,
 	},
 	[LPC_ICH8DO] = {
 		.name = "ICH8DO",
+		.iTCO_version = 2,
 		.gpio_version = ICH_V7_GPIO,
 	},
 	[LPC_ICH8M] = {
 		.name = "ICH8M",
+		.iTCO_version = 2,
 		.gpio_version = ICH_V7_GPIO,
 	},
 	[LPC_ICH8ME] = {
 		.name = "ICH8M-E",
+		.iTCO_version = 2,
 		.gpio_version = ICH_V7_GPIO,
 	},
 	[LPC_ICH9] = {
 		.name = "ICH9",
+		.iTCO_version = 2,
 		.gpio_version = ICH_V9_GPIO,
 	},
 	[LPC_ICH9R] = {
 		.name = "ICH9R",
+		.iTCO_version = 2,
 		.gpio_version = ICH_V9_GPIO,
 	},
 	[LPC_ICH9DH] = {
 		.name = "ICH9DH",
+		.iTCO_version = 2,
 		.gpio_version = ICH_V9_GPIO,
 	},
 	[LPC_ICH9DO] = {
 		.name = "ICH9DO",
+		.iTCO_version = 2,
 		.gpio_version = ICH_V9_GPIO,
 	},
 	[LPC_ICH9M] = {
 		.name = "ICH9M",
+		.iTCO_version = 2,
 		.gpio_version = ICH_V9_GPIO,
 	},
 	[LPC_ICH9ME] = {
 		.name = "ICH9M-E",
+		.iTCO_version = 2,
 		.gpio_version = ICH_V9_GPIO,
 	},
 	[LPC_ICH10] = {
 		.name = "ICH10",
+		.iTCO_version = 2,
 		.gpio_version = ICH_V10CONS_GPIO,
 	},
 	[LPC_ICH10R] = {
 		.name = "ICH10R",
+		.iTCO_version = 2,
 		.gpio_version = ICH_V10CONS_GPIO,
 	},
 	[LPC_ICH10D] = {
 		.name = "ICH10D",
+		.iTCO_version = 2,
 		.gpio_version = ICH_V10CORP_GPIO,
 	},
 	[LPC_ICH10DO] = {
 		.name = "ICH10DO",
+		.iTCO_version = 2,
 		.gpio_version = ICH_V10CORP_GPIO,
 	},
 	[LPC_PCH] = {
 		.name = "PCH Desktop Full Featured",
+		.iTCO_version = 2,
 		.gpio_version = ICH_V5_GPIO,
 	},
 	[LPC_PCHM] = {
 		.name = "PCH Mobile Full Featured",
+		.iTCO_version = 2,
 		.gpio_version = ICH_V5_GPIO,
 	},
 	[LPC_P55] = {
 		.name = "P55",
+		.iTCO_version = 2,
 		.gpio_version = ICH_V5_GPIO,
 	},
 	[LPC_PM55] = {
 		.name = "PM55",
+		.iTCO_version = 2,
 		.gpio_version = ICH_V5_GPIO,
 	},
 	[LPC_H55] = {
 		.name = "H55",
+		.iTCO_version = 2,
 		.gpio_version = ICH_V5_GPIO,
 	},
 	[LPC_QM57] = {
 		.name = "QM57",
+		.iTCO_version = 2,
 		.gpio_version = ICH_V5_GPIO,
 	},
 	[LPC_H57] = {
 		.name = "H57",
+		.iTCO_version = 2,
 		.gpio_version = ICH_V5_GPIO,
 	},
 	[LPC_HM55] = {
 		.name = "HM55",
+		.iTCO_version = 2,
 		.gpio_version = ICH_V5_GPIO,
 	},
 	[LPC_Q57] = {
 		.name = "Q57",
+		.iTCO_version = 2,
 		.gpio_version = ICH_V5_GPIO,
 	},
 	[LPC_HM57] = {
 		.name = "HM57",
+		.iTCO_version = 2,
 		.gpio_version = ICH_V5_GPIO,
 	},
 	[LPC_PCHMSFF] = {
 		.name = "PCH Mobile SFF Full Featured",
+		.iTCO_version = 2,
 		.gpio_version = ICH_V5_GPIO,
 	},
 	[LPC_QS57] = {
 		.name = "QS57",
+		.iTCO_version = 2,
 		.gpio_version = ICH_V5_GPIO,
 	},
 	[LPC_3400] = {
 		.name = "3400",
+		.iTCO_version = 2,
 		.gpio_version = ICH_V5_GPIO,
 	},
 	[LPC_3420] = {
 		.name = "3420",
+		.iTCO_version = 2,
 		.gpio_version = ICH_V5_GPIO,
 	},
 	[LPC_3450] = {
 		.name = "3450",
+		.iTCO_version = 2,
 		.gpio_version = ICH_V5_GPIO,
 	},
 	[LPC_EP80579] = {
 		.name = "EP80579",
+		.iTCO_version = 2,
 	},
 	[LPC_CPT] = {
 		.name = "Cougar Point",
+		.iTCO_version = 2,
 		.gpio_version = ICH_V5_GPIO,
 	},
 	[LPC_CPTD] = {
 		.name = "Cougar Point Desktop",
+		.iTCO_version = 2,
 		.gpio_version = ICH_V5_GPIO,
 	},
 	[LPC_CPTM] = {
 		.name = "Cougar Point Mobile",
+		.iTCO_version = 2,
 		.gpio_version = ICH_V5_GPIO,
 	},
 	[LPC_PBG] = {
 		.name = "Patsburg",
+		.iTCO_version = 2,
 	},
 	[LPC_DH89XXCC] = {
 		.name = "DH89xxCC",
+		.iTCO_version = 2,
 	},
 	[LPC_PPT] = {
 		.name = "Panther Point",
+		.iTCO_version = 2,
 	},
 	[LPC_LPT] = {
 		.name = "Lynx Point",
+		.iTCO_version = 2,
 	},
 };
 
@@ -666,12 +759,88 @@ gpio_done:
 	return ret;
 }
 
+static int __devinit lpc_ich_init_wdt(struct pci_dev *dev,
+				const struct pci_device_id *id)
+{
+	u32 base_addr_cfg;
+	u32 base_addr;
+	int ret;
+	bool acpi_conflict = false;
+	struct resource *res;
+
+	/* Setup power management base register */
+	pci_read_config_dword(dev, ACPIBASE, &base_addr_cfg);
+	base_addr = base_addr_cfg & 0x0000ff80;
+	if (!base_addr) {
+		dev_err(&dev->dev, "I/O space for ACPI uninitialized\n");
+		ret = -ENODEV;
+		goto wdt_done;
+	}
+
+	res = wdt_io_res(ICH_RES_IO_TCO);
+	res->start = base_addr + ACPIBASE_TCO_OFF;
+	res->end = base_addr + ACPIBASE_TCO_END;
+	ret = acpi_check_resource_conflict(res);
+	if (ret) {
+		acpi_conflict = true;
+		goto wdt_done;
+	}
+
+	res = wdt_io_res(ICH_RES_IO_SMI);
+	res->start = base_addr + ACPIBASE_SMI_OFF;
+	res->end = base_addr + ACPIBASE_SMI_END;
+	ret = acpi_check_resource_conflict(res);
+	if (ret) {
+		acpi_conflict = true;
+		goto wdt_done;
+	}
+	lpc_ich_enable_acpi_space(dev);
+
+	/*
+	 * Get the Memory-Mapped GCS register. To get access to it
+	 * we have to read RCBA from PCI Config space 0xf0 and use
+	 * it as base. GCS = RCBA + ICH6_GCS(0x3410).
+	 */
+	if (lpc_chipset_info[id->driver_data].iTCO_version == 2) {
+		pci_read_config_dword(dev, RCBABASE, &base_addr_cfg);
+		base_addr = base_addr_cfg & 0xffffc000;
+		if (!(base_addr_cfg & 1)) {
+			pr_err("RCBA is disabled by hardware/BIOS, "
+					"device disabled\n");
+			ret = -ENODEV;
+			goto wdt_done;
+		}
+		res = wdt_mem_res(ICH_RES_MEM_GCS);
+		res->start = base_addr + ACPIBASE_GCS_OFF;
+		res->end = base_addr + ACPIBASE_GCS_END;
+		ret = acpi_check_resource_conflict(res);
+		if (ret) {
+			acpi_conflict = true;
+			goto wdt_done;
+		}
+	}
+
+	lpc_ich_finalize_cell(&lpc_ich_cells[LPC_WDT], id);
+	ret = mfd_add_devices(&dev->dev, -1, &lpc_ich_cells[LPC_WDT],
+				1, NULL, 0);
+
+wdt_done:
+	if (acpi_conflict)
+		pr_warn("Resource conflict(s) found affecting %s\n",
+				lpc_ich_cells[LPC_WDT].name);
+	return ret;
+}
+
 static int __devinit lpc_ich_probe(struct pci_dev *dev,
 				const struct pci_device_id *id)
 {
 	int ret;
 	bool cell_added = false;
 
+	ret = lpc_ich_init_wdt(dev, id);
+	if (!ret)
+		cell_added = true;
+
 	ret = lpc_ich_init_gpio(dev, id);
 	if (!ret)
 		cell_added = true;
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 37096246c937..a9ed0878abfc 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -563,6 +563,7 @@ config INTEL_SCU_WATCHDOG
 config ITCO_WDT
 	tristate "Intel TCO Timer/Watchdog"
 	depends on (X86 || IA64) && PCI
+	select LPC_ICH
 	---help---
 	  Hardware driver for the intel TCO timer based watchdog devices.
 	  These drivers are included in the Intel 82801 I/O Controller
diff --git a/drivers/watchdog/iTCO_vendor.h b/drivers/watchdog/iTCO_vendor.h
index 9e27e6422f66..3c57b45537a2 100644
--- a/drivers/watchdog/iTCO_vendor.h
+++ b/drivers/watchdog/iTCO_vendor.h
@@ -1,8 +1,8 @@
 /* iTCO Vendor Specific Support hooks */
 #ifdef CONFIG_ITCO_VENDOR_SUPPORT
-extern void iTCO_vendor_pre_start(unsigned long, unsigned int);
-extern void iTCO_vendor_pre_stop(unsigned long);
-extern void iTCO_vendor_pre_keepalive(unsigned long, unsigned int);
+extern void iTCO_vendor_pre_start(struct resource *, unsigned int);
+extern void iTCO_vendor_pre_stop(struct resource *);
+extern void iTCO_vendor_pre_keepalive(struct resource *, unsigned int);
 extern void iTCO_vendor_pre_set_heartbeat(unsigned int);
 extern int iTCO_vendor_check_noreboot_on(void);
 #else
diff --git a/drivers/watchdog/iTCO_vendor_support.c b/drivers/watchdog/iTCO_vendor_support.c
index 2721d29ce243..b6b2f90b5d44 100644
--- a/drivers/watchdog/iTCO_vendor_support.c
+++ b/drivers/watchdog/iTCO_vendor_support.c
@@ -35,11 +35,6 @@
 
 #include "iTCO_vendor.h"
 
-/* iTCO defines */
-#define	SMI_EN		(acpibase + 0x30) /* SMI Control and Enable Register */
-#define	TCOBASE		(acpibase + 0x60) /* TCO base address */
-#define	TCO1_STS	(TCOBASE + 0x04)  /* TCO1 Status Register */
-
 /* List of vendor support modes */
 /* SuperMicro Pentium 3 Era 370SSE+-OEM1/P3TSSE */
 #define SUPERMICRO_OLD_BOARD	1
@@ -82,24 +77,24 @@ MODULE_PARM_DESC(vendorsupport, "iTCO vendor specific support mode, default="
  *	    20.6 seconds.
  */
 
-static void supermicro_old_pre_start(unsigned long acpibase)
+static void supermicro_old_pre_start(struct resource *smires)
 {
 	unsigned long val32;
 
 	/* Bit 13: TCO_EN -> 0 = Disables TCO logic generating an SMI# */
-	val32 = inl(SMI_EN);
+	val32 = inl(smires->start);
 	val32 &= 0xffffdfff;	/* Turn off SMI clearing watchdog */
-	outl(val32, SMI_EN);	/* Needed to activate watchdog */
+	outl(val32, smires->start);	/* Needed to activate watchdog */
 }
 
-static void supermicro_old_pre_stop(unsigned long acpibase)
+static void supermicro_old_pre_stop(struct resource *smires)
 {
 	unsigned long val32;
 
 	/* Bit 13: TCO_EN -> 1 = Enables the TCO logic to generate SMI# */
-	val32 = inl(SMI_EN);
+	val32 = inl(smires->start);
 	val32 |= 0x00002000;	/* Turn on SMI clearing watchdog */
-	outl(val32, SMI_EN);	/* Needed to deactivate watchdog */
+	outl(val32, smires->start);	/* Needed to deactivate watchdog */
 }
 
 /*
@@ -270,66 +265,66 @@ static void supermicro_new_pre_set_heartbeat(unsigned int heartbeat)
  *	Don't use this fix if you don't need to!!!
  */
 
-static void broken_bios_start(unsigned long acpibase)
+static void broken_bios_start(struct resource *smires)
 {
 	unsigned long val32;
 
-	val32 = inl(SMI_EN);
+	val32 = inl(smires->start);
 	/* Bit 13: TCO_EN     -> 0 = Disables TCO logic generating an SMI#
 	   Bit  0: GBL_SMI_EN -> 0 = No SMI# will be generated by ICH. */
 	val32 &= 0xffffdffe;
-	outl(val32, SMI_EN);
+	outl(val32, smires->start);
 }
 
-static void broken_bios_stop(unsigned long acpibase)
+static void broken_bios_stop(struct resource *smires)
 {
 	unsigned long val32;
 
-	val32 = inl(SMI_EN);
+	val32 = inl(smires->start);
 	/* Bit 13: TCO_EN     -> 1 = Enables TCO logic generating an SMI#
 	   Bit  0: GBL_SMI_EN -> 1 = Turn global SMI on again. */
 	val32 |= 0x00002001;
-	outl(val32, SMI_EN);
+	outl(val32, smires->start);
 }
 
 /*
  *	Generic Support Functions
  */
 
-void iTCO_vendor_pre_start(unsigned long acpibase,
+void iTCO_vendor_pre_start(struct resource *smires,
 			   unsigned int heartbeat)
 {
 	switch (vendorsupport) {
 	case SUPERMICRO_OLD_BOARD:
-		supermicro_old_pre_start(acpibase);
+		supermicro_old_pre_start(smires);
 		break;
 	case SUPERMICRO_NEW_BOARD:
 		supermicro_new_pre_start(heartbeat);
 		break;
 	case BROKEN_BIOS:
-		broken_bios_start(acpibase);
+		broken_bios_start(smires);
 		break;
 	}
 }
 EXPORT_SYMBOL(iTCO_vendor_pre_start);
 
-void iTCO_vendor_pre_stop(unsigned long acpibase)
+void iTCO_vendor_pre_stop(struct resource *smires)
 {
 	switch (vendorsupport) {
 	case SUPERMICRO_OLD_BOARD:
-		supermicro_old_pre_stop(acpibase);
+		supermicro_old_pre_stop(smires);
 		break;
 	case SUPERMICRO_NEW_BOARD:
 		supermicro_new_pre_stop();
 		break;
 	case BROKEN_BIOS:
-		broken_bios_stop(acpibase);
+		broken_bios_stop(smires);
 		break;
 	}
 }
 EXPORT_SYMBOL(iTCO_vendor_pre_stop);
 
-void iTCO_vendor_pre_keepalive(unsigned long acpibase, unsigned int heartbeat)
+void iTCO_vendor_pre_keepalive(struct resource *smires, unsigned int heartbeat)
 {
 	if (vendorsupport == SUPERMICRO_NEW_BOARD)
 		supermicro_new_pre_set_heartbeat(heartbeat);
diff --git a/drivers/watchdog/iTCO_wdt.c b/drivers/watchdog/iTCO_wdt.c
index 9fecb95645a3..741528b032e2 100644
--- a/drivers/watchdog/iTCO_wdt.c
+++ b/drivers/watchdog/iTCO_wdt.c
@@ -66,316 +66,16 @@
 #include <linux/spinlock.h>		/* For spin_lock/spin_unlock/... */
 #include <linux/uaccess.h>		/* For copy_to_user/put_user/... */
 #include <linux/io.h>			/* For inb/outb/... */
+#include <linux/mfd/core.h>
+#include <linux/mfd/lpc_ich.h>
 
 #include "iTCO_vendor.h"
 
-/* TCO related info */
-enum iTCO_chipsets {
-	TCO_ICH = 0,	/* ICH */
-	TCO_ICH0,	/* ICH0 */
-	TCO_ICH2,	/* ICH2 */
-	TCO_ICH2M,	/* ICH2-M */
-	TCO_ICH3,	/* ICH3-S */
-	TCO_ICH3M,	/* ICH3-M */
-	TCO_ICH4,	/* ICH4 */
-	TCO_ICH4M,	/* ICH4-M */
-	TCO_CICH,	/* C-ICH */
-	TCO_ICH5,	/* ICH5 & ICH5R */
-	TCO_6300ESB,	/* 6300ESB */
-	TCO_ICH6,	/* ICH6 & ICH6R */
-	TCO_ICH6M,	/* ICH6-M */
-	TCO_ICH6W,	/* ICH6W & ICH6RW */
-	TCO_631XESB,	/* 631xESB/632xESB */
-	TCO_ICH7,	/* ICH7 & ICH7R */
-	TCO_ICH7DH,	/* ICH7DH */
-	TCO_ICH7M,	/* ICH7-M & ICH7-U */
-	TCO_ICH7MDH,	/* ICH7-M DH */
-	TCO_NM10,	/* NM10 */
-	TCO_ICH8,	/* ICH8 & ICH8R */
-	TCO_ICH8DH,	/* ICH8DH */
-	TCO_ICH8DO,	/* ICH8DO */
-	TCO_ICH8M,	/* ICH8M */
-	TCO_ICH8ME,	/* ICH8M-E */
-	TCO_ICH9,	/* ICH9 */
-	TCO_ICH9R,	/* ICH9R */
-	TCO_ICH9DH,	/* ICH9DH */
-	TCO_ICH9DO,	/* ICH9DO */
-	TCO_ICH9M,	/* ICH9M */
-	TCO_ICH9ME,	/* ICH9M-E */
-	TCO_ICH10,	/* ICH10 */
-	TCO_ICH10R,	/* ICH10R */
-	TCO_ICH10D,	/* ICH10D */
-	TCO_ICH10DO,	/* ICH10DO */
-	TCO_PCH,	/* PCH Desktop Full Featured */
-	TCO_PCHM,	/* PCH Mobile Full Featured */
-	TCO_P55,	/* P55 */
-	TCO_PM55,	/* PM55 */
-	TCO_H55,	/* H55 */
-	TCO_QM57,	/* QM57 */
-	TCO_H57,	/* H57 */
-	TCO_HM55,	/* HM55 */
-	TCO_Q57,	/* Q57 */
-	TCO_HM57,	/* HM57 */
-	TCO_PCHMSFF,	/* PCH Mobile SFF Full Featured */
-	TCO_QS57,	/* QS57 */
-	TCO_3400,	/* 3400 */
-	TCO_3420,	/* 3420 */
-	TCO_3450,	/* 3450 */
-	TCO_EP80579,	/* EP80579 */
-	TCO_CPT,	/* Cougar Point */
-	TCO_CPTD,	/* Cougar Point Desktop */
-	TCO_CPTM,	/* Cougar Point Mobile */
-	TCO_PBG,	/* Patsburg */
-	TCO_DH89XXCC,	/* DH89xxCC */
-	TCO_PPT,	/* Panther Point */
-	TCO_LPT,	/* Lynx Point */
-};
-
-static struct {
-	char *name;
-	unsigned int iTCO_version;
-} iTCO_chipset_info[] __devinitdata = {
-	{"ICH", 1},
-	{"ICH0", 1},
-	{"ICH2", 1},
-	{"ICH2-M", 1},
-	{"ICH3-S", 1},
-	{"ICH3-M", 1},
-	{"ICH4", 1},
-	{"ICH4-M", 1},
-	{"C-ICH", 1},
-	{"ICH5 or ICH5R", 1},
-	{"6300ESB", 1},
-	{"ICH6 or ICH6R", 2},
-	{"ICH6-M", 2},
-	{"ICH6W or ICH6RW", 2},
-	{"631xESB/632xESB", 2},
-	{"ICH7 or ICH7R", 2},
-	{"ICH7DH", 2},
-	{"ICH7-M or ICH7-U", 2},
-	{"ICH7-M DH", 2},
-	{"NM10", 2},
-	{"ICH8 or ICH8R", 2},
-	{"ICH8DH", 2},
-	{"ICH8DO", 2},
-	{"ICH8M", 2},
-	{"ICH8M-E", 2},
-	{"ICH9", 2},
-	{"ICH9R", 2},
-	{"ICH9DH", 2},
-	{"ICH9DO", 2},
-	{"ICH9M", 2},
-	{"ICH9M-E", 2},
-	{"ICH10", 2},
-	{"ICH10R", 2},
-	{"ICH10D", 2},
-	{"ICH10DO", 2},
-	{"PCH Desktop Full Featured", 2},
-	{"PCH Mobile Full Featured", 2},
-	{"P55", 2},
-	{"PM55", 2},
-	{"H55", 2},
-	{"QM57", 2},
-	{"H57", 2},
-	{"HM55", 2},
-	{"Q57", 2},
-	{"HM57", 2},
-	{"PCH Mobile SFF Full Featured", 2},
-	{"QS57", 2},
-	{"3400", 2},
-	{"3420", 2},
-	{"3450", 2},
-	{"EP80579", 2},
-	{"Cougar Point", 2},
-	{"Cougar Point Desktop", 2},
-	{"Cougar Point Mobile", 2},
-	{"Patsburg", 2},
-	{"DH89xxCC", 2},
-	{"Panther Point", 2},
-	{"Lynx Point", 2},
-	{NULL, 0}
-};
-
-/*
- * This data only exists for exporting the supported PCI ids
- * via MODULE_DEVICE_TABLE.  We do not actually register a
- * pci_driver, because the I/O Controller Hub has also other
- * functions that probably will be registered by other drivers.
- */
-static DEFINE_PCI_DEVICE_TABLE(iTCO_wdt_pci_tbl) = {
-	{ PCI_VDEVICE(INTEL, 0x2410), TCO_ICH},
-	{ PCI_VDEVICE(INTEL, 0x2420), TCO_ICH0},
-	{ PCI_VDEVICE(INTEL, 0x2440), TCO_ICH2},
-	{ PCI_VDEVICE(INTEL, 0x244c), TCO_ICH2M},
-	{ PCI_VDEVICE(INTEL, 0x2480), TCO_ICH3},
-	{ PCI_VDEVICE(INTEL, 0x248c), TCO_ICH3M},
-	{ PCI_VDEVICE(INTEL, 0x24c0), TCO_ICH4},
-	{ PCI_VDEVICE(INTEL, 0x24cc), TCO_ICH4M},
-	{ PCI_VDEVICE(INTEL, 0x2450), TCO_CICH},
-	{ PCI_VDEVICE(INTEL, 0x24d0), TCO_ICH5},
-	{ PCI_VDEVICE(INTEL, 0x25a1), TCO_6300ESB},
-	{ PCI_VDEVICE(INTEL, 0x2640), TCO_ICH6},
-	{ PCI_VDEVICE(INTEL, 0x2641), TCO_ICH6M},
-	{ PCI_VDEVICE(INTEL, 0x2642), TCO_ICH6W},
-	{ PCI_VDEVICE(INTEL, 0x2670), TCO_631XESB},
-	{ PCI_VDEVICE(INTEL, 0x2671), TCO_631XESB},
-	{ PCI_VDEVICE(INTEL, 0x2672), TCO_631XESB},
-	{ PCI_VDEVICE(INTEL, 0x2673), TCO_631XESB},
-	{ PCI_VDEVICE(INTEL, 0x2674), TCO_631XESB},
-	{ PCI_VDEVICE(INTEL, 0x2675), TCO_631XESB},
-	{ PCI_VDEVICE(INTEL, 0x2676), TCO_631XESB},
-	{ PCI_VDEVICE(INTEL, 0x2677), TCO_631XESB},
-	{ PCI_VDEVICE(INTEL, 0x2678), TCO_631XESB},
-	{ PCI_VDEVICE(INTEL, 0x2679), TCO_631XESB},
-	{ PCI_VDEVICE(INTEL, 0x267a), TCO_631XESB},
-	{ PCI_VDEVICE(INTEL, 0x267b), TCO_631XESB},
-	{ PCI_VDEVICE(INTEL, 0x267c), TCO_631XESB},
-	{ PCI_VDEVICE(INTEL, 0x267d), TCO_631XESB},
-	{ PCI_VDEVICE(INTEL, 0x267e), TCO_631XESB},
-	{ PCI_VDEVICE(INTEL, 0x267f), TCO_631XESB},
-	{ PCI_VDEVICE(INTEL, 0x27b8), TCO_ICH7},
-	{ PCI_VDEVICE(INTEL, 0x27b0), TCO_ICH7DH},
-	{ PCI_VDEVICE(INTEL, 0x27b9), TCO_ICH7M},
-	{ PCI_VDEVICE(INTEL, 0x27bd), TCO_ICH7MDH},
-	{ PCI_VDEVICE(INTEL, 0x27bc), TCO_NM10},
-	{ PCI_VDEVICE(INTEL, 0x2810), TCO_ICH8},
-	{ PCI_VDEVICE(INTEL, 0x2812), TCO_ICH8DH},
-	{ PCI_VDEVICE(INTEL, 0x2814), TCO_ICH8DO},
-	{ PCI_VDEVICE(INTEL, 0x2815), TCO_ICH8M},
-	{ PCI_VDEVICE(INTEL, 0x2811), TCO_ICH8ME},
-	{ PCI_VDEVICE(INTEL, 0x2918), TCO_ICH9},
-	{ PCI_VDEVICE(INTEL, 0x2916), TCO_ICH9R},
-	{ PCI_VDEVICE(INTEL, 0x2912), TCO_ICH9DH},
-	{ PCI_VDEVICE(INTEL, 0x2914), TCO_ICH9DO},
-	{ PCI_VDEVICE(INTEL, 0x2919), TCO_ICH9M},
-	{ PCI_VDEVICE(INTEL, 0x2917), TCO_ICH9ME},
-	{ PCI_VDEVICE(INTEL, 0x3a18), TCO_ICH10},
-	{ PCI_VDEVICE(INTEL, 0x3a16), TCO_ICH10R},
-	{ PCI_VDEVICE(INTEL, 0x3a1a), TCO_ICH10D},
-	{ PCI_VDEVICE(INTEL, 0x3a14), TCO_ICH10DO},
-	{ PCI_VDEVICE(INTEL, 0x3b00), TCO_PCH},
-	{ PCI_VDEVICE(INTEL, 0x3b01), TCO_PCHM},
-	{ PCI_VDEVICE(INTEL, 0x3b02), TCO_P55},
-	{ PCI_VDEVICE(INTEL, 0x3b03), TCO_PM55},
-	{ PCI_VDEVICE(INTEL, 0x3b06), TCO_H55},
-	{ PCI_VDEVICE(INTEL, 0x3b07), TCO_QM57},
-	{ PCI_VDEVICE(INTEL, 0x3b08), TCO_H57},
-	{ PCI_VDEVICE(INTEL, 0x3b09), TCO_HM55},
-	{ PCI_VDEVICE(INTEL, 0x3b0a), TCO_Q57},
-	{ PCI_VDEVICE(INTEL, 0x3b0b), TCO_HM57},
-	{ PCI_VDEVICE(INTEL, 0x3b0d), TCO_PCHMSFF},
-	{ PCI_VDEVICE(INTEL, 0x3b0f), TCO_QS57},
-	{ PCI_VDEVICE(INTEL, 0x3b12), TCO_3400},
-	{ PCI_VDEVICE(INTEL, 0x3b14), TCO_3420},
-	{ PCI_VDEVICE(INTEL, 0x3b16), TCO_3450},
-	{ PCI_VDEVICE(INTEL, 0x5031), TCO_EP80579},
-	{ PCI_VDEVICE(INTEL, 0x1c41), TCO_CPT},
-	{ PCI_VDEVICE(INTEL, 0x1c42), TCO_CPTD},
-	{ PCI_VDEVICE(INTEL, 0x1c43), TCO_CPTM},
-	{ PCI_VDEVICE(INTEL, 0x1c44), TCO_CPT},
-	{ PCI_VDEVICE(INTEL, 0x1c45), TCO_CPT},
-	{ PCI_VDEVICE(INTEL, 0x1c46), TCO_CPT},
-	{ PCI_VDEVICE(INTEL, 0x1c47), TCO_CPT},
-	{ PCI_VDEVICE(INTEL, 0x1c48), TCO_CPT},
-	{ PCI_VDEVICE(INTEL, 0x1c49), TCO_CPT},
-	{ PCI_VDEVICE(INTEL, 0x1c4a), TCO_CPT},
-	{ PCI_VDEVICE(INTEL, 0x1c4b), TCO_CPT},
-	{ PCI_VDEVICE(INTEL, 0x1c4c), TCO_CPT},
-	{ PCI_VDEVICE(INTEL, 0x1c4d), TCO_CPT},
-	{ PCI_VDEVICE(INTEL, 0x1c4e), TCO_CPT},
-	{ PCI_VDEVICE(INTEL, 0x1c4f), TCO_CPT},
-	{ PCI_VDEVICE(INTEL, 0x1c50), TCO_CPT},
-	{ PCI_VDEVICE(INTEL, 0x1c51), TCO_CPT},
-	{ PCI_VDEVICE(INTEL, 0x1c52), TCO_CPT},
-	{ PCI_VDEVICE(INTEL, 0x1c53), TCO_CPT},
-	{ PCI_VDEVICE(INTEL, 0x1c54), TCO_CPT},
-	{ PCI_VDEVICE(INTEL, 0x1c55), TCO_CPT},
-	{ PCI_VDEVICE(INTEL, 0x1c56), TCO_CPT},
-	{ PCI_VDEVICE(INTEL, 0x1c57), TCO_CPT},
-	{ PCI_VDEVICE(INTEL, 0x1c58), TCO_CPT},
-	{ PCI_VDEVICE(INTEL, 0x1c59), TCO_CPT},
-	{ PCI_VDEVICE(INTEL, 0x1c5a), TCO_CPT},
-	{ PCI_VDEVICE(INTEL, 0x1c5b), TCO_CPT},
-	{ PCI_VDEVICE(INTEL, 0x1c5c), TCO_CPT},
-	{ PCI_VDEVICE(INTEL, 0x1c5d), TCO_CPT},
-	{ PCI_VDEVICE(INTEL, 0x1c5e), TCO_CPT},
-	{ PCI_VDEVICE(INTEL, 0x1c5f), TCO_CPT},
-	{ PCI_VDEVICE(INTEL, 0x1d40), TCO_PBG},
-	{ PCI_VDEVICE(INTEL, 0x1d41), TCO_PBG},
-	{ PCI_VDEVICE(INTEL, 0x2310), TCO_DH89XXCC},
-	{ PCI_VDEVICE(INTEL, 0x1e40), TCO_PPT},
-	{ PCI_VDEVICE(INTEL, 0x1e41), TCO_PPT},
-	{ PCI_VDEVICE(INTEL, 0x1e42), TCO_PPT},
-	{ PCI_VDEVICE(INTEL, 0x1e43), TCO_PPT},
-	{ PCI_VDEVICE(INTEL, 0x1e44), TCO_PPT},
-	{ PCI_VDEVICE(INTEL, 0x1e45), TCO_PPT},
-	{ PCI_VDEVICE(INTEL, 0x1e46), TCO_PPT},
-	{ PCI_VDEVICE(INTEL, 0x1e47), TCO_PPT},
-	{ PCI_VDEVICE(INTEL, 0x1e48), TCO_PPT},
-	{ PCI_VDEVICE(INTEL, 0x1e49), TCO_PPT},
-	{ PCI_VDEVICE(INTEL, 0x1e4a), TCO_PPT},
-	{ PCI_VDEVICE(INTEL, 0x1e4b), TCO_PPT},
-	{ PCI_VDEVICE(INTEL, 0x1e4c), TCO_PPT},
-	{ PCI_VDEVICE(INTEL, 0x1e4d), TCO_PPT},
-	{ PCI_VDEVICE(INTEL, 0x1e4e), TCO_PPT},
-	{ PCI_VDEVICE(INTEL, 0x1e4f), TCO_PPT},
-	{ PCI_VDEVICE(INTEL, 0x1e50), TCO_PPT},
-	{ PCI_VDEVICE(INTEL, 0x1e51), TCO_PPT},
-	{ PCI_VDEVICE(INTEL, 0x1e52), TCO_PPT},
-	{ PCI_VDEVICE(INTEL, 0x1e53), TCO_PPT},
-	{ PCI_VDEVICE(INTEL, 0x1e54), TCO_PPT},
-	{ PCI_VDEVICE(INTEL, 0x1e55), TCO_PPT},
-	{ PCI_VDEVICE(INTEL, 0x1e56), TCO_PPT},
-	{ PCI_VDEVICE(INTEL, 0x1e57), TCO_PPT},
-	{ PCI_VDEVICE(INTEL, 0x1e58), TCO_PPT},
-	{ PCI_VDEVICE(INTEL, 0x1e59), TCO_PPT},
-	{ PCI_VDEVICE(INTEL, 0x1e5a), TCO_PPT},
-	{ PCI_VDEVICE(INTEL, 0x1e5b), TCO_PPT},
-	{ PCI_VDEVICE(INTEL, 0x1e5c), TCO_PPT},
-	{ PCI_VDEVICE(INTEL, 0x1e5d), TCO_PPT},
-	{ PCI_VDEVICE(INTEL, 0x1e5e), TCO_PPT},
-	{ PCI_VDEVICE(INTEL, 0x1e5f), TCO_PPT},
-	{ PCI_VDEVICE(INTEL, 0x8c40), TCO_LPT},
-	{ PCI_VDEVICE(INTEL, 0x8c41), TCO_LPT},
-	{ PCI_VDEVICE(INTEL, 0x8c42), TCO_LPT},
-	{ PCI_VDEVICE(INTEL, 0x8c43), TCO_LPT},
-	{ PCI_VDEVICE(INTEL, 0x8c44), TCO_LPT},
-	{ PCI_VDEVICE(INTEL, 0x8c45), TCO_LPT},
-	{ PCI_VDEVICE(INTEL, 0x8c46), TCO_LPT},
-	{ PCI_VDEVICE(INTEL, 0x8c47), TCO_LPT},
-	{ PCI_VDEVICE(INTEL, 0x8c48), TCO_LPT},
-	{ PCI_VDEVICE(INTEL, 0x8c49), TCO_LPT},
-	{ PCI_VDEVICE(INTEL, 0x8c4a), TCO_LPT},
-	{ PCI_VDEVICE(INTEL, 0x8c4b), TCO_LPT},
-	{ PCI_VDEVICE(INTEL, 0x8c4c), TCO_LPT},
-	{ PCI_VDEVICE(INTEL, 0x8c4d), TCO_LPT},
-	{ PCI_VDEVICE(INTEL, 0x8c4e), TCO_LPT},
-	{ PCI_VDEVICE(INTEL, 0x8c4f), TCO_LPT},
-	{ PCI_VDEVICE(INTEL, 0x8c50), TCO_LPT},
-	{ PCI_VDEVICE(INTEL, 0x8c51), TCO_LPT},
-	{ PCI_VDEVICE(INTEL, 0x8c52), TCO_LPT},
-	{ PCI_VDEVICE(INTEL, 0x8c53), TCO_LPT},
-	{ PCI_VDEVICE(INTEL, 0x8c54), TCO_LPT},
-	{ PCI_VDEVICE(INTEL, 0x8c55), TCO_LPT},
-	{ PCI_VDEVICE(INTEL, 0x8c56), TCO_LPT},
-	{ PCI_VDEVICE(INTEL, 0x8c57), TCO_LPT},
-	{ PCI_VDEVICE(INTEL, 0x8c58), TCO_LPT},
-	{ PCI_VDEVICE(INTEL, 0x8c59), TCO_LPT},
-	{ PCI_VDEVICE(INTEL, 0x8c5a), TCO_LPT},
-	{ PCI_VDEVICE(INTEL, 0x8c5b), TCO_LPT},
-	{ PCI_VDEVICE(INTEL, 0x8c5c), TCO_LPT},
-	{ PCI_VDEVICE(INTEL, 0x8c5d), TCO_LPT},
-	{ PCI_VDEVICE(INTEL, 0x8c5e), TCO_LPT},
-	{ PCI_VDEVICE(INTEL, 0x8c5f), TCO_LPT},
-	{ 0, },			/* End of list */
-};
-MODULE_DEVICE_TABLE(pci, iTCO_wdt_pci_tbl);
-
 /* Address definitions for the TCO */
 /* TCO base address */
-#define TCOBASE		(iTCO_wdt_private.ACPIBASE + 0x60)
+#define TCOBASE		(iTCO_wdt_private.tco_res->start)
 /* SMI Control and Enable Register */
-#define SMI_EN		(iTCO_wdt_private.ACPIBASE + 0x30)
+#define SMI_EN		(iTCO_wdt_private.smi_res->start)
 
 #define TCO_RLD		(TCOBASE + 0x00) /* TCO Timer Reload and Curr. Value */
 #define TCOv1_TMR	(TCOBASE + 0x01) /* TCOv1 Timer Initial Value	*/
@@ -393,19 +93,18 @@ static char expect_release;
 static struct {		/* this is private data for the iTCO_wdt device */
 	/* TCO version/generation */
 	unsigned int iTCO_version;
-	/* The device's ACPIBASE address (TCOBASE = ACPIBASE+0x60) */
-	unsigned long ACPIBASE;
+	struct resource *tco_res;
+	struct resource *smi_res;
+	struct resource *gcs_res;
 	/* NO_REBOOT flag is Memory-Mapped GCS register bit 5 (TCO version 2)*/
 	unsigned long __iomem *gcs;
 	/* the lock for io operations */
 	spinlock_t io_lock;
+	struct platform_device *dev;
 	/* the PCI-device */
 	struct pci_dev *pdev;
 } iTCO_wdt_private;
 
-/* the watchdog platform device */
-static struct platform_device *iTCO_wdt_platform_device;
-
 /* module parameters */
 #define WATCHDOG_HEARTBEAT 30	/* 30 sec default heartbeat */
 static int heartbeat = WATCHDOG_HEARTBEAT;  /* in seconds */
@@ -485,7 +184,7 @@ static int iTCO_wdt_start(void)
 
 	spin_lock(&iTCO_wdt_private.io_lock);
 
-	iTCO_vendor_pre_start(iTCO_wdt_private.ACPIBASE, heartbeat);
+	iTCO_vendor_pre_start(iTCO_wdt_private.smi_res, heartbeat);
 
 	/* disable chipset's NO_REBOOT bit */
 	if (iTCO_wdt_unset_NO_REBOOT_bit()) {
@@ -519,7 +218,7 @@ static int iTCO_wdt_stop(void)
 
 	spin_lock(&iTCO_wdt_private.io_lock);
 
-	iTCO_vendor_pre_stop(iTCO_wdt_private.ACPIBASE);
+	iTCO_vendor_pre_stop(iTCO_wdt_private.smi_res);
 
 	/* Bit 11: TCO Timer Halt -> 1 = The TCO timer is disabled */
 	val = inw(TCO1_CNT);
@@ -541,7 +240,7 @@ static int iTCO_wdt_keepalive(void)
 {
 	spin_lock(&iTCO_wdt_private.io_lock);
 
-	iTCO_vendor_pre_keepalive(iTCO_wdt_private.ACPIBASE, heartbeat);
+	iTCO_vendor_pre_keepalive(iTCO_wdt_private.smi_res, heartbeat);
 
 	/* Reload the timer by writing to the TCO Timer Counter register */
 	if (iTCO_wdt_private.iTCO_version == 2)
@@ -786,83 +485,120 @@ static struct miscdevice iTCO_wdt_miscdev = {
  *	Init & exit routines
  */
 
-static int __devinit iTCO_wdt_init(struct pci_dev *pdev,
-		const struct pci_device_id *ent, struct platform_device *dev)
+static void __devexit iTCO_wdt_cleanup(void)
+{
+	/* Stop the timer before we leave */
+	if (!nowayout)
+		iTCO_wdt_stop();
+
+	/* Deregister */
+	misc_deregister(&iTCO_wdt_miscdev);
+
+	/* release resources */
+	release_region(iTCO_wdt_private.tco_res->start,
+			resource_size(iTCO_wdt_private.tco_res));
+	release_region(iTCO_wdt_private.smi_res->start,
+			resource_size(iTCO_wdt_private.smi_res));
+	if (iTCO_wdt_private.iTCO_version == 2) {
+		iounmap(iTCO_wdt_private.gcs);
+		release_mem_region(iTCO_wdt_private.gcs_res->start,
+				resource_size(iTCO_wdt_private.gcs_res));
+	}
+
+	iTCO_wdt_private.tco_res = NULL;
+	iTCO_wdt_private.smi_res = NULL;
+	iTCO_wdt_private.gcs_res = NULL;
+	iTCO_wdt_private.gcs = NULL;
+}
+
+static int __devinit iTCO_wdt_probe(struct platform_device *dev)
 {
-	int ret;
-	u32 base_address;
-	unsigned long RCBA;
+	int ret = -ENODEV;
 	unsigned long val32;
+	struct lpc_ich_info *ich_info = dev->dev.platform_data;
+
+	if (!ich_info)
+		goto out;
+
+	spin_lock_init(&iTCO_wdt_private.io_lock);
+
+	iTCO_wdt_private.tco_res =
+		platform_get_resource(dev, IORESOURCE_IO, ICH_RES_IO_TCO);
+	if (!iTCO_wdt_private.tco_res)
+		goto out;
+
+	iTCO_wdt_private.smi_res =
+		platform_get_resource(dev, IORESOURCE_IO, ICH_RES_IO_SMI);
+	if (!iTCO_wdt_private.smi_res)
+		goto out;
+
+	iTCO_wdt_private.iTCO_version = ich_info->iTCO_version;
+	iTCO_wdt_private.dev = dev;
+	iTCO_wdt_private.pdev = to_pci_dev(dev->dev.parent);
 
 	/*
-	 *      Find the ACPI/PM base I/O address which is the base
-	 *      for the TCO registers (TCOBASE=ACPIBASE + 0x60)
-	 *      ACPIBASE is bits [15:7] from 0x40-0x43
+	 * Get the Memory-Mapped GCS register, we need it for the
+	 * NO_REBOOT flag (TCO v2).
 	 */
-	pci_read_config_dword(pdev, 0x40, &base_address);
-	base_address &= 0x0000ff80;
-	if (base_address == 0x00000000) {
-		/* Something's wrong here, ACPIBASE has to be set */
-		pr_err("failed to get TCOBASE address, device disabled by hardware/BIOS\n");
-		return -ENODEV;
-	}
-	iTCO_wdt_private.iTCO_version =
-			iTCO_chipset_info[ent->driver_data].iTCO_version;
-	iTCO_wdt_private.ACPIBASE = base_address;
-	iTCO_wdt_private.pdev = pdev;
-
-	/* Get the Memory-Mapped GCS register, we need it for the
-	   NO_REBOOT flag (TCO v2). To get access to it you have to
-	   read RCBA from PCI Config space 0xf0 and use it as base.
-	   GCS = RCBA + ICH6_GCS(0x3410). */
 	if (iTCO_wdt_private.iTCO_version == 2) {
-		pci_read_config_dword(pdev, 0xf0, &base_address);
-		if ((base_address & 1) == 0) {
-			pr_err("RCBA is disabled by hardware/BIOS, device disabled\n");
-			ret = -ENODEV;
+		iTCO_wdt_private.gcs_res = platform_get_resource(dev,
+							IORESOURCE_MEM,
+							ICH_RES_MEM_GCS);
+
+		if (!iTCO_wdt_private.gcs_res)
+			goto out;
+
+		if (!request_mem_region(iTCO_wdt_private.gcs_res->start,
+			resource_size(iTCO_wdt_private.gcs_res), dev->name)) {
+			ret = -EBUSY;
 			goto out;
 		}
-		RCBA = base_address & 0xffffc000;
-		iTCO_wdt_private.gcs = ioremap((RCBA + 0x3410), 4);
+		iTCO_wdt_private.gcs = ioremap(iTCO_wdt_private.gcs_res->start,
+			resource_size(iTCO_wdt_private.gcs_res));
+		if (!iTCO_wdt_private.gcs) {
+			ret = -EIO;
+			goto unreg_gcs;
+		}
 	}
 
 	/* Check chipset's NO_REBOOT bit */
 	if (iTCO_wdt_unset_NO_REBOOT_bit() && iTCO_vendor_check_noreboot_on()) {
 		pr_info("unable to reset NO_REBOOT flag, device disabled by hardware/BIOS\n");
 		ret = -ENODEV;	/* Cannot reset NO_REBOOT bit */
-		goto out_unmap;
+		goto unmap_gcs;
 	}
 
 	/* Set the NO_REBOOT bit to prevent later reboots, just for sure */
 	iTCO_wdt_set_NO_REBOOT_bit();
 
 	/* The TCO logic uses the TCO_EN bit in the SMI_EN register */
-	if (!request_region(SMI_EN, 4, "iTCO_wdt")) {
-		pr_err("I/O address 0x%04lx already in use, device disabled\n",
+	if (!request_region(iTCO_wdt_private.smi_res->start,
+			resource_size(iTCO_wdt_private.smi_res), dev->name)) {
+		pr_err("I/O address 0x%04llx already in use, device disabled\n",
 		       SMI_EN);
-		ret = -EIO;
-		goto out_unmap;
+		ret = -EBUSY;
+		goto unmap_gcs;
 	}
 	if (turn_SMI_watchdog_clear_off >= iTCO_wdt_private.iTCO_version) {
-		/* Bit 13: TCO_EN -> 0 = Disables TCO logic generating an SMI# */
+		/*
+		 * Bit 13: TCO_EN -> 0
+		 * Disables TCO logic generating an SMI#
+		 */
 		val32 = inl(SMI_EN);
 		val32 &= 0xffffdfff;	/* Turn off SMI clearing watchdog */
 		outl(val32, SMI_EN);
 	}
 
-	/* The TCO I/O registers reside in a 32-byte range pointed to
-	   by the TCOBASE value */
-	if (!request_region(TCOBASE, 0x20, "iTCO_wdt")) {
-		pr_err("I/O address 0x%04lx already in use, device disabled\n",
+	if (!request_region(iTCO_wdt_private.tco_res->start,
+			resource_size(iTCO_wdt_private.tco_res), dev->name)) {
+		pr_err("I/O address 0x%04llx already in use, device disabled\n",
 		       TCOBASE);
-		ret = -EIO;
-		goto unreg_smi_en;
+		ret = -EBUSY;
+		goto unreg_smi;
 	}
 
-	pr_info("Found a %s TCO device (Version=%d, TCOBASE=0x%04lx)\n",
-		iTCO_chipset_info[ent->driver_data].name,
-		iTCO_chipset_info[ent->driver_data].iTCO_version,
-		TCOBASE);
+	pr_info("Found a %s TCO device (Version=%d, TCOBASE=0x%04llx)\n",
+		ich_info->name, ich_info->iTCO_version, TCOBASE);
 
 	/* Clear out the (probably old) status */
 	outw(0x0008, TCO1_STS);	/* Clear the Time Out Status bit */
@@ -883,7 +619,7 @@ static int __devinit iTCO_wdt_init(struct pci_dev *pdev,
 	if (ret != 0) {
 		pr_err("cannot register miscdev on minor=%d (err=%d)\n",
 		       WATCHDOG_MINOR, ret);
-		goto unreg_region;
+		goto unreg_tco;
 	}
 
 	pr_info("initialized. heartbeat=%d sec (nowayout=%d)\n",
@@ -891,62 +627,31 @@ static int __devinit iTCO_wdt_init(struct pci_dev *pdev,
 
 	return 0;
 
-unreg_region:
-	release_region(TCOBASE, 0x20);
-unreg_smi_en:
-	release_region(SMI_EN, 4);
-out_unmap:
+unreg_tco:
+	release_region(iTCO_wdt_private.tco_res->start,
+			resource_size(iTCO_wdt_private.tco_res));
+unreg_smi:
+	release_region(iTCO_wdt_private.smi_res->start,
+			resource_size(iTCO_wdt_private.smi_res));
+unmap_gcs:
 	if (iTCO_wdt_private.iTCO_version == 2)
 		iounmap(iTCO_wdt_private.gcs);
-out:
-	iTCO_wdt_private.ACPIBASE = 0;
-	return ret;
-}
-
-static void __devexit iTCO_wdt_cleanup(void)
-{
-	/* Stop the timer before we leave */
-	if (!nowayout)
-		iTCO_wdt_stop();
-
-	/* Deregister */
-	misc_deregister(&iTCO_wdt_miscdev);
-	release_region(TCOBASE, 0x20);
-	release_region(SMI_EN, 4);
+unreg_gcs:
 	if (iTCO_wdt_private.iTCO_version == 2)
-		iounmap(iTCO_wdt_private.gcs);
-	pci_dev_put(iTCO_wdt_private.pdev);
-	iTCO_wdt_private.ACPIBASE = 0;
-}
-
-static int __devinit iTCO_wdt_probe(struct platform_device *dev)
-{
-	int ret = -ENODEV;
-	int found = 0;
-	struct pci_dev *pdev = NULL;
-	const struct pci_device_id *ent;
-
-	spin_lock_init(&iTCO_wdt_private.io_lock);
-
-	for_each_pci_dev(pdev) {
-		ent = pci_match_id(iTCO_wdt_pci_tbl, pdev);
-		if (ent) {
-			found++;
-			ret = iTCO_wdt_init(pdev, ent, dev);
-			if (!ret)
-				break;
-		}
-	}
-
-	if (!found)
-		pr_info("No device detected\n");
+		release_mem_region(iTCO_wdt_private.gcs_res->start,
+				resource_size(iTCO_wdt_private.gcs_res));
+out:
+	iTCO_wdt_private.tco_res = NULL;
+	iTCO_wdt_private.smi_res = NULL;
+	iTCO_wdt_private.gcs_res = NULL;
+	iTCO_wdt_private.gcs = NULL;
 
 	return ret;
 }
 
 static int __devexit iTCO_wdt_remove(struct platform_device *dev)
 {
-	if (iTCO_wdt_private.ACPIBASE)
+	if (iTCO_wdt_private.tco_res || iTCO_wdt_private.smi_res)
 		iTCO_wdt_cleanup();
 
 	return 0;
@@ -977,23 +682,11 @@ static int __init iTCO_wdt_init_module(void)
 	if (err)
 		return err;
 
-	iTCO_wdt_platform_device = platform_device_register_simple(DRV_NAME,
-								-1, NULL, 0);
-	if (IS_ERR(iTCO_wdt_platform_device)) {
-		err = PTR_ERR(iTCO_wdt_platform_device);
-		goto unreg_platform_driver;
-	}
-
 	return 0;
-
-unreg_platform_driver:
-	platform_driver_unregister(&iTCO_wdt_driver);
-	return err;
 }
 
 static void __exit iTCO_wdt_cleanup_module(void)
 {
-	platform_device_unregister(iTCO_wdt_platform_device);
 	platform_driver_unregister(&iTCO_wdt_driver);
 	pr_info("Watchdog Module Unloaded\n");
 }
diff --git a/include/linux/mfd/lpc_ich.h b/include/linux/mfd/lpc_ich.h
index 91300b18219b..fec5256c3f5d 100644
--- a/include/linux/mfd/lpc_ich.h
+++ b/include/linux/mfd/lpc_ich.h
@@ -20,6 +20,12 @@
 #ifndef LPC_ICH_H
 #define LPC_ICH_H
 
+/* Watchdog resources */
+#define ICH_RES_IO_TCO	0
+#define ICH_RES_IO_SMI	1
+#define ICH_RES_MEM_OFF	2
+#define ICH_RES_MEM_GCS	0
+
 /* GPIO resources */
 #define ICH_RES_GPIO	0
 #define ICH_RES_GPE0	1
@@ -35,6 +41,7 @@
 
 struct lpc_ich_info {
 	char name[32];
+	unsigned int iTCO_version;
 	unsigned int gpio_version;
 };
 
-- 
cgit v1.3-7-g2ca7


From 1379f49ea91a28f5c023d041aab785c3de60c65d Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Mon, 23 Apr 2012 14:28:44 +0200
Subject: mfd: Allow for const stmpe keyboard data

Since it's not like we will re-arrange the keys at run-time, it
seems proper to allow the keymap data to be const. This solves
a compilation warning in ux500.

Cc: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/stmpe.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/stmpe.h b/include/linux/mfd/stmpe.h
index 8516fd1eaabc..f8d5b4d5843f 100644
--- a/include/linux/mfd/stmpe.h
+++ b/include/linux/mfd/stmpe.h
@@ -117,7 +117,7 @@ struct matrix_keymap_data;
  * @no_autorepeat: disable key autorepeat
  */
 struct stmpe_keypad_platform_data {
-	struct matrix_keymap_data *keymap_data;
+	const struct matrix_keymap_data *keymap_data;
 	unsigned int debounce_ms;
 	unsigned int scan_count;
 	bool no_autorepeat;
-- 
cgit v1.3-7-g2ca7


From 3aff4ebb95b20ad8db2c1447e8c52097d89af5a7 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 9 May 2012 14:30:35 -0400
Subject: NFS: Prevent a deadlock in the new writeback code

We have to unlock the nfs_page before we call nfs_end_page_writeback
to avoid races with functions that expect the page to be unlocked
when PG_locked and PG_writeback are not set.
The problem is that nfs_unlock_request also releases the nfs_page,
causing a deadlock if the release of the nfs_open_context
triggers an iput() while the PG_writeback flag is still set...

The solution is to separate the unlocking and release of the nfs_page,
so that we can do the former before nfs_end_page_writeback and the
latter after.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Fred Isaman <iisaman@netapp.com>
---
 fs/nfs/pagelist.c        | 12 ++++++++++--
 fs/nfs/write.c           |  6 ++++--
 include/linux/nfs_page.h |  1 +
 3 files changed, 15 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 33a21ca9b84b..69146f386989 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -128,10 +128,10 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
 }
 
 /**
- * nfs_unlock_request - Unlock request and wake up sleepers.
+ * nfs_unlock_request_dont_release - Unlock request and wake up sleepers.
  * @req:
  */
-void nfs_unlock_request(struct nfs_page *req)
+void nfs_unlock_request_dont_release(struct nfs_page *req)
 {
 	if (!NFS_WBACK_BUSY(req)) {
 		printk(KERN_ERR "NFS: Invalid unlock attempted\n");
@@ -141,6 +141,14 @@ void nfs_unlock_request(struct nfs_page *req)
 	clear_bit(PG_BUSY, &req->wb_flags);
 	smp_mb__after_clear_bit();
 	wake_up_bit(&req->wb_flags, PG_BUSY);
+}
+
+/**
+ * nfs_unlock_request - Unlock request and release the nfs_page
+ */
+void nfs_unlock_request(struct nfs_page *req)
+{
+	nfs_unlock_request_dont_release(req);
 	nfs_release_request(req);
 }
 
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 6f263daac748..fd36b31ee72e 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -628,8 +628,9 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
 remove_req:
 		nfs_inode_remove_request(req);
 next:
-		nfs_unlock_request(req);
+		nfs_unlock_request_dont_release(req);
 		nfs_end_page_writeback(page);
+		nfs_release_request(req);
 	}
 out:
 	hdr->release(hdr);
@@ -1042,8 +1043,9 @@ static void nfs_redirty_request(struct nfs_page *req)
 	struct page *page = req->wb_page;
 
 	nfs_mark_request_dirty(req);
-	nfs_unlock_request(req);
+	nfs_unlock_request_dont_release(req);
 	nfs_end_page_writeback(page);
+	nfs_release_request(req);
 }
 
 static void nfs_async_write_error(struct list_head *head)
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index f9ee9eba7f88..ef7504215446 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -96,6 +96,7 @@ extern bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
 				struct nfs_page *req);
 extern  int nfs_wait_on_request(struct nfs_page *);
 extern	void nfs_unlock_request(struct nfs_page *req);
+extern void nfs_unlock_request_dont_release(struct nfs_page *req);
 
 /*
  * Lock the page of an asynchronous request without getting a new reference
-- 
cgit v1.3-7-g2ca7


From 7ad84aa9448571678c243f0c5ef383fbe5b50f4f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 9 May 2012 13:19:15 -0400
Subject: NFS: Clean up - simplify nfs_lock_request()

We only have two places where we need to grab a reference when trying
to lock the nfs_page. We're better off making that explicit.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Fred Isaman <iisaman@netapp.com>
---
 fs/nfs/direct.c          |  1 +
 fs/nfs/write.c           | 11 ++++++-----
 include/linux/nfs_page.h | 14 ++------------
 3 files changed, 9 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 257d0091148b..465ea84a2874 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -657,6 +657,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d
 				break;
 			}
 			nfs_lock_request(req);
+			kref_get(&req->wb_kref);
 			req->wb_index = pos >> PAGE_SHIFT;
 			req->wb_offset = pos & ~PAGE_MASK;
 			if (!nfs_pageio_add_request(desc, req)) {
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 83823294ffd2..553f7ef1079b 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -260,10 +260,10 @@ static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblo
 		req = nfs_page_find_request_locked(page);
 		if (req == NULL)
 			break;
-		if (nfs_lock_request_dontget(req))
+		if (nfs_lock_request(req))
 			break;
 		/* Note: If we hold the page lock, as is the case in nfs_writepage,
-		 *	 then the call to nfs_lock_request_dontget() will always
+		 *	 then the call to nfs_lock_request() will always
 		 *	 succeed provided that someone hasn't already marked the
 		 *	 request as dirty (in which case we don't care).
 		 */
@@ -406,7 +406,7 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
 	struct nfs_inode *nfsi = NFS_I(inode);
 
 	/* Lock the request! */
-	nfs_lock_request_dontget(req);
+	nfs_lock_request(req);
 
 	spin_lock(&inode->i_lock);
 	if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE))
@@ -651,6 +651,7 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
 	list_for_each_entry_safe(req, tmp, src, wb_list) {
 		if (!nfs_lock_request(req))
 			continue;
+		kref_get(&req->wb_kref);
 		if (cond_resched_lock(cinfo->lock))
 			list_safe_reset_next(req, tmp, wb_list);
 		nfs_request_remove_commit_list(req, cinfo);
@@ -741,7 +742,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
 		    || end < req->wb_offset)
 			goto out_flushme;
 
-		if (nfs_lock_request_dontget(req))
+		if (nfs_lock_request(req))
 			break;
 
 		/* The request is locked, so wait and then retry */
@@ -1717,7 +1718,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
 		req = nfs_page_find_request(page);
 		if (req == NULL)
 			break;
-		if (nfs_lock_request_dontget(req)) {
+		if (nfs_lock_request(req)) {
 			nfs_clear_request_commit(req);
 			nfs_inode_remove_request(req);
 			/*
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index ef7504215446..263f30a5e10d 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -99,24 +99,14 @@ extern	void nfs_unlock_request(struct nfs_page *req);
 extern void nfs_unlock_request_dont_release(struct nfs_page *req);
 
 /*
- * Lock the page of an asynchronous request without getting a new reference
+ * Lock the page of an asynchronous request
  */
-static inline int
-nfs_lock_request_dontget(struct nfs_page *req)
-{
-	return !test_and_set_bit(PG_BUSY, &req->wb_flags);
-}
-
 static inline int
 nfs_lock_request(struct nfs_page *req)
 {
-	if (test_and_set_bit(PG_BUSY, &req->wb_flags))
-		return 0;
-	kref_get(&req->wb_kref);
-	return 1;
+	return !test_and_set_bit(PG_BUSY, &req->wb_flags);
 }
 
-
 /**
  * nfs_list_add_request - Insert a request into a list
  * @req: request
-- 
cgit v1.3-7-g2ca7


From 1d1afcbc294cc7c788eb5c7b6b98e8d63caf002c Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 9 May 2012 14:04:55 -0400
Subject: NFS: Clean up - Rename nfs_unlock_request and
 nfs_unlock_request_dont_release

Function rename to ensure that the functionality of nfs_unlock_request()
mirrors that of nfs_lock_request(). Then let nfs_unlock_and_release_request()
do the work of what used to be called nfs_unlock_request()...

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Fred Isaman <iisaman@netapp.com>
---
 fs/nfs/direct.c          | 10 +++++-----
 fs/nfs/pagelist.c        | 11 ++++++-----
 fs/nfs/write.c           | 12 ++++++------
 include/linux/nfs_page.h |  2 +-
 4 files changed, 18 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 465ea84a2874..845e20196803 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -488,7 +488,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
 
 	while (!list_empty(&failed)) {
 		nfs_release_request(req);
-		nfs_unlock_request(req);
+		nfs_unlock_and_release_request(req);
 	}
 
 	if (put_dreq(dreq))
@@ -521,7 +521,7 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
 			nfs_mark_request_commit(req, NULL, &cinfo);
 		} else
 			nfs_release_request(req);
-		nfs_unlock_request(req);
+		nfs_unlock_and_release_request(req);
 	}
 
 	if (atomic_dec_and_test(&cinfo.mds->rpcs_out))
@@ -662,7 +662,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d
 			req->wb_offset = pos & ~PAGE_MASK;
 			if (!nfs_pageio_add_request(desc, req)) {
 				result = desc->pg_error;
-				nfs_unlock_request(req);
+				nfs_unlock_and_release_request(req);
 				nfs_release_request(req);
 				break;
 			}
@@ -739,7 +739,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
 		default:
 			nfs_release_request(req);
 		}
-		nfs_unlock_request(req);
+		nfs_unlock_and_release_request(req);
 	}
 
 out_put:
@@ -756,7 +756,7 @@ static void nfs_write_sync_pgio_error(struct list_head *head)
 		req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
 		nfs_release_request(req);
-		nfs_unlock_request(req);
+		nfs_unlock_and_release_request(req);
 	}
 }
 
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 69146f386989..aed913c833f4 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -128,10 +128,10 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
 }
 
 /**
- * nfs_unlock_request_dont_release - Unlock request and wake up sleepers.
+ * nfs_unlock_request - Unlock request and wake up sleepers.
  * @req:
  */
-void nfs_unlock_request_dont_release(struct nfs_page *req)
+void nfs_unlock_request(struct nfs_page *req)
 {
 	if (!NFS_WBACK_BUSY(req)) {
 		printk(KERN_ERR "NFS: Invalid unlock attempted\n");
@@ -144,11 +144,12 @@ void nfs_unlock_request_dont_release(struct nfs_page *req)
 }
 
 /**
- * nfs_unlock_request - Unlock request and release the nfs_page
+ * nfs_unlock_and_release_request - Unlock request and release the nfs_page
+ * @req:
  */
-void nfs_unlock_request(struct nfs_page *req)
+void nfs_unlock_and_release_request(struct nfs_page *req)
 {
-	nfs_unlock_request_dont_release(req);
+	nfs_unlock_request(req);
 	nfs_release_request(req);
 }
 
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 553f7ef1079b..8ffd7d5ed58b 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -625,7 +625,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
 remove_req:
 		nfs_inode_remove_request(req);
 next:
-		nfs_unlock_request_dont_release(req);
+		nfs_unlock_request(req);
 		nfs_end_page_writeback(req->wb_page);
 		nfs_release_request(req);
 	}
@@ -812,7 +812,7 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
 	nfs_grow_file(page, offset, count);
 	nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
 	nfs_mark_request_dirty(req);
-	nfs_unlock_request(req);
+	nfs_unlock_and_release_request(req);
 	return 0;
 }
 
@@ -1039,7 +1039,7 @@ static int nfs_do_multiple_writes(struct list_head *head,
 static void nfs_redirty_request(struct nfs_page *req)
 {
 	nfs_mark_request_dirty(req);
-	nfs_unlock_request_dont_release(req);
+	nfs_unlock_request(req);
 	nfs_end_page_writeback(req->wb_page);
 	nfs_release_request(req);
 }
@@ -1477,7 +1477,7 @@ void nfs_retry_commit(struct list_head *page_list,
 			dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
 				     BDI_RECLAIMABLE);
 		}
-		nfs_unlock_request(req);
+		nfs_unlock_and_release_request(req);
 	}
 }
 EXPORT_SYMBOL_GPL(nfs_retry_commit);
@@ -1555,7 +1555,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
 		dprintk(" mismatch\n");
 		nfs_mark_request_dirty(req);
 	next:
-		nfs_unlock_request(req);
+		nfs_unlock_and_release_request(req);
 	}
 	nfs_init_cinfo(&cinfo, data->inode, data->dreq);
 	if (atomic_dec_and_test(&cinfo.mds->rpcs_out))
@@ -1726,7 +1726,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
 			 * page as being dirty
 			 */
 			cancel_dirty_page(page, PAGE_CACHE_SIZE);
-			nfs_unlock_request(req);
+			nfs_unlock_and_release_request(req);
 			break;
 		}
 		ret = nfs_wait_on_request(req);
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 263f30a5e10d..88d166b555e8 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -96,7 +96,7 @@ extern bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
 				struct nfs_page *req);
 extern  int nfs_wait_on_request(struct nfs_page *);
 extern	void nfs_unlock_request(struct nfs_page *req);
-extern void nfs_unlock_request_dont_release(struct nfs_page *req);
+extern	void nfs_unlock_and_release_request(struct nfs_page *req);
 
 /*
  * Lock the page of an asynchronous request
-- 
cgit v1.3-7-g2ca7


From 5af7df6b831ef9fd5fbde9d4bbd596f742cb2ad8 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Wed, 2 May 2012 16:54:42 +0300
Subject: mfd: Add regulator support for twl6040 VIO, V2V1 supplies

twl6040 has three power supply source:
VBAT needs to be connected to VBAT, VIO, and V2V1.
Add regulator support for the VIO, V2V1 supplies.
Initially handle the two supply together with bulk commands.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Reviewed-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Tero Kristo <t-kristo@ti.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/twl6040-core.c  | 33 +++++++++++++++++++++++++++++----
 include/linux/mfd/twl6040.h |  2 ++
 2 files changed, 31 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/twl6040-core.c b/drivers/mfd/twl6040-core.c
index 493f4a692747..7a92d95bfb60 100644
--- a/drivers/mfd/twl6040-core.c
+++ b/drivers/mfd/twl6040-core.c
@@ -27,6 +27,7 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/kernel.h>
+#include <linux/err.h>
 #include <linux/platform_device.h>
 #include <linux/gpio.h>
 #include <linux/delay.h>
@@ -35,8 +36,10 @@
 #include <linux/err.h>
 #include <linux/mfd/core.h>
 #include <linux/mfd/twl6040.h>
+#include <linux/regulator/consumer.h>
 
 #define VIBRACTRL_MEMBER(reg) ((reg == TWL6040_REG_VIBCTLL) ? 0 : 1)
+#define TWL6040_NUM_SUPPLIES	(2)
 
 int twl6040_reg_read(struct twl6040 *twl6040, unsigned int reg)
 {
@@ -532,6 +535,21 @@ static int __devinit twl6040_probe(struct i2c_client *client,
 
 	i2c_set_clientdata(client, twl6040);
 
+	twl6040->supplies[0].supply = "vio";
+	twl6040->supplies[1].supply = "v2v1";
+	ret = regulator_bulk_get(&client->dev, TWL6040_NUM_SUPPLIES,
+				 twl6040->supplies);
+	if (ret != 0) {
+		dev_err(&client->dev, "Failed to get supplies: %d\n", ret);
+		goto regulator_get_err;
+	}
+
+	ret = regulator_bulk_enable(TWL6040_NUM_SUPPLIES, twl6040->supplies);
+	if (ret != 0) {
+		dev_err(&client->dev, "Failed to enable supplies: %d\n", ret);
+		goto power_err;
+	}
+
 	twl6040->dev = &client->dev;
 	twl6040->irq = client->irq;
 	twl6040->irq_base = pdata->irq_base;
@@ -552,13 +570,13 @@ static int __devinit twl6040_probe(struct i2c_client *client,
 		ret = gpio_request_one(twl6040->audpwron, GPIOF_OUT_INIT_LOW,
 				       "audpwron");
 		if (ret)
-			goto gpio1_err;
+			goto gpio_err;
 	}
 
 	/* codec interrupt */
 	ret = twl6040_irq_init(twl6040);
 	if (ret)
-		goto gpio2_err;
+		goto irq_init_err;
 
 	ret = request_threaded_irq(twl6040->irq_base + TWL6040_IRQ_READY,
 				   NULL, twl6040_naudint_handler, 0,
@@ -618,10 +636,14 @@ mfd_err:
 	free_irq(twl6040->irq_base + TWL6040_IRQ_READY, twl6040);
 irq_err:
 	twl6040_irq_exit(twl6040);
-gpio2_err:
+irq_init_err:
 	if (gpio_is_valid(twl6040->audpwron))
 		gpio_free(twl6040->audpwron);
-gpio1_err:
+gpio_err:
+	regulator_bulk_disable(TWL6040_NUM_SUPPLIES, twl6040->supplies);
+power_err:
+	regulator_bulk_free(TWL6040_NUM_SUPPLIES, twl6040->supplies);
+regulator_get_err:
 	i2c_set_clientdata(client, NULL);
 err:
 	return ret;
@@ -643,6 +665,9 @@ static int __devexit twl6040_remove(struct i2c_client *client)
 	mfd_remove_devices(&client->dev);
 	i2c_set_clientdata(client, NULL);
 
+	regulator_bulk_disable(TWL6040_NUM_SUPPLIES, twl6040->supplies);
+	regulator_bulk_free(TWL6040_NUM_SUPPLIES, twl6040->supplies);
+
 	return 0;
 }
 
diff --git a/include/linux/mfd/twl6040.h b/include/linux/mfd/twl6040.h
index b15b5f03f5c4..6659487c31e7 100644
--- a/include/linux/mfd/twl6040.h
+++ b/include/linux/mfd/twl6040.h
@@ -27,6 +27,7 @@
 
 #include <linux/interrupt.h>
 #include <linux/mfd/core.h>
+#include <linux/regulator/consumer.h>
 
 #define TWL6040_REG_ASICID		0x01
 #define TWL6040_REG_ASICREV		0x02
@@ -203,6 +204,7 @@ struct regmap;
 struct twl6040 {
 	struct device *dev;
 	struct regmap *regmap;
+	struct regulator_bulk_data supplies[2]; /* supplies for vio, v2v1 */
 	struct mutex mutex;
 	struct mutex io_mutex;
 	struct mutex irq_mutex;
-- 
cgit v1.3-7-g2ca7


From 922ee08baad2052d0759f100e026d49798c51fef Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Wed, 25 Apr 2012 20:50:53 +0200
Subject: dmaengine: Fixup dmaengine_prep_slave_single() to be actually useful

dmaengine_prep_slave_single() is a helper function which is supposed to be used
to prepare a transfer of a single contingous buffer. Currently the function
takes a pointer to such a buffer from which it builds a scatterlist and passes
it on to device_prep_slave_sg. The dmaengine framework requires that any
scatterlist that is passed to device_prep_slave_sg is mapped and it may not be
unmapped until the DMA operation has completed. This is not the here and any use
of dmaengine_prep_slave_single() will lead to undefined behaviour (Most likely a
system crash).

This patch changes dmaengine_prep_slave_single() to take a dma_addr_t instead of
a pointer to a buffer and moves the responsibility of mapping and unmapping the
buffer up to the caller.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 include/linux/dmaengine.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 676f967390ae..0e6b595e95c8 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -615,11 +615,13 @@ static inline int dmaengine_slave_config(struct dma_chan *chan,
 }
 
 static inline struct dma_async_tx_descriptor *dmaengine_prep_slave_single(
-	struct dma_chan *chan, void *buf, size_t len,
+	struct dma_chan *chan, dma_addr_t buf, size_t len,
 	enum dma_transfer_direction dir, unsigned long flags)
 {
 	struct scatterlist sg;
-	sg_init_one(&sg, buf, len);
+	sg_init_table(&sg, 1);
+	sg_dma_address(&sg) = buf;
+	sg_dma_len(&sg) = len;
 
 	return chan->device->device_prep_slave_sg(chan, &sg, 1,
 						  dir, flags, NULL);
-- 
cgit v1.3-7-g2ca7


From 8c92013643f5c40633d61ae331cef49c1069af10 Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Thu, 10 May 2012 06:23:26 +0800
Subject: dma: mxs-dma: make platform_device_id more generic

Rewrite mxs_dma_is_apbh and mxs_dma_is_apbx in order to support
other SoCs like imx6q and reform the platform_device_id for the
better further dt support.

Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Sascha Hauer <s.hauer@pengutronix.de>
Cc: Huang Shijie <b32955@freescale.com>
Signed-off-by: Dong Aisheng <dong.aisheng@linaro.org>
Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Acked-by: Marek Vasut <marex@denx.de>
Acked-by: Vinod Koul <vinod.koul@intel.com>
---
 arch/arm/mach-mxs/devices/platform-dma.c |  14 ++--
 drivers/clk/mxs/clk-imx23.c              |   4 +-
 drivers/clk/mxs/clk-imx28.c              |   4 +-
 drivers/dma/mxs-dma.c                    | 115 +++++++++++++++++++++----------
 include/linux/fsl/mxs-dma.h              |  12 +---
 5 files changed, 93 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-mxs/devices/platform-dma.c b/arch/arm/mach-mxs/devices/platform-dma.c
index 6a0202b1016c..aff481360212 100644
--- a/arch/arm/mach-mxs/devices/platform-dma.c
+++ b/arch/arm/mach-mxs/devices/platform-dma.c
@@ -32,17 +32,19 @@ static struct platform_device *__init mxs_add_dma(const char *devid,
 
 static int __init mxs_add_mxs_dma(void)
 {
-	char *apbh = "mxs-dma-apbh";
-	char *apbx = "mxs-dma-apbx";
+	char *mx23_apbh = "imx23-dma-apbh";
+	char *mx23_apbx = "imx23-dma-apbx";
+	char *mx28_apbh = "imx28-dma-apbh";
+	char *mx28_apbx = "imx28-dma-apbx";
 
 	if (cpu_is_mx23()) {
-		mxs_add_dma(apbh, MX23_APBH_DMA_BASE_ADDR);
-		mxs_add_dma(apbx, MX23_APBX_DMA_BASE_ADDR);
+		mxs_add_dma(mx23_apbh, MX23_APBH_DMA_BASE_ADDR);
+		mxs_add_dma(mx23_apbx, MX23_APBX_DMA_BASE_ADDR);
 	}
 
 	if (cpu_is_mx28()) {
-		mxs_add_dma(apbh, MX28_APBH_DMA_BASE_ADDR);
-		mxs_add_dma(apbx, MX28_APBX_DMA_BASE_ADDR);
+		mxs_add_dma(mx28_apbh, MX28_APBH_DMA_BASE_ADDR);
+		mxs_add_dma(mx28_apbx, MX28_APBX_DMA_BASE_ADDR);
 	}
 
 	return 0;
diff --git a/drivers/clk/mxs/clk-imx23.c b/drivers/clk/mxs/clk-imx23.c
index 07fe1f1b4b70..96562f5f92f7 100644
--- a/drivers/clk/mxs/clk-imx23.c
+++ b/drivers/clk/mxs/clk-imx23.c
@@ -81,14 +81,14 @@ static struct clk_lookup uart_lookups[] __initdata = {
 };
 
 static struct clk_lookup hbus_lookups[] __initdata = {
-	{ .dev_id = "mxs-dma-apbh", },
+	{ .dev_id = "imx23-dma-apbh", },
 	{ .dev_id = "80004000.dma-apbh", },
 };
 
 static struct clk_lookup xbus_lookups[] __initdata = {
 	{ .dev_id = "duart", .con_id = "apb_pclk"},
 	{ .dev_id = "80070000.serial", .con_id = "apb_pclk"},
-	{ .dev_id = "mxs-dma-apbx", },
+	{ .dev_id = "imx23-dma-apbx", },
 	{ .dev_id = "80024000.dma-apbx", },
 };
 
diff --git a/drivers/clk/mxs/clk-imx28.c b/drivers/clk/mxs/clk-imx28.c
index 5be4636c45a6..a7ff4f106ce2 100644
--- a/drivers/clk/mxs/clk-imx28.c
+++ b/drivers/clk/mxs/clk-imx28.c
@@ -136,14 +136,14 @@ static struct clk_lookup uart_lookups[] __initdata = {
 };
 
 static struct clk_lookup hbus_lookups[] __initdata = {
-	{ .dev_id = "mxs-dma-apbh", },
+	{ .dev_id = "imx28-dma-apbh", },
 	{ .dev_id = "80004000.dma-apbh", },
 };
 
 static struct clk_lookup xbus_lookups[] __initdata = {
 	{ .dev_id = "duart", .con_id = "apb_pclk"},
 	{ .dev_id = "80074000.serial", .con_id = "apb_pclk"},
-	{ .dev_id = "mxs-dma-apbx", },
+	{ .dev_id = "imx28-dma-apbx", },
 	{ .dev_id = "80024000.dma-apbx", },
 };
 
diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c
index c93f9fa08caf..b1cab087cd04 100644
--- a/drivers/dma/mxs-dma.c
+++ b/drivers/dma/mxs-dma.c
@@ -36,12 +36,8 @@
  * dma can program the controller registers of peripheral devices.
  */
 
-#define MXS_DMA_APBH		0
-#define MXS_DMA_APBX		1
-#define dma_is_apbh(mxs_dma)	((mxs_dma)->dev_id == MXS_DMA_APBH)
-
-#define APBH_VERSION_LATEST	3
-#define apbh_is_old(mxs_dma)	((mxs_dma)->version < APBH_VERSION_LATEST)
+#define dma_is_apbh(mxs_dma)	((mxs_dma)->type == MXS_DMA_APBH)
+#define apbh_is_old(mxs_dma)	((mxs_dma)->dev_id == IMX23_DMA)
 
 #define HW_APBHX_CTRL0				0x000
 #define BM_APBH_CTRL0_APB_BURST8_EN		(1 << 29)
@@ -51,9 +47,6 @@
 #define HW_APBHX_CTRL2				0x020
 #define HW_APBHX_CHANNEL_CTRL			0x030
 #define BP_APBHX_CHANNEL_CTRL_RESET_CHANNEL	16
-#define HW_APBH_VERSION				(cpu_is_mx23() ? 0x3f0 : 0x800)
-#define HW_APBX_VERSION				0x800
-#define BP_APBHX_VERSION_MAJOR			24
 /*
  * The offset of NXTCMDAR register is different per both dma type and version,
  * while stride for each channel is all the same 0x70.
@@ -125,9 +118,19 @@ struct mxs_dma_chan {
 #define MXS_DMA_CHANNELS		16
 #define MXS_DMA_CHANNELS_MASK		0xffff
 
+enum mxs_dma_devtype {
+	MXS_DMA_APBH,
+	MXS_DMA_APBX,
+};
+
+enum mxs_dma_id {
+	IMX23_DMA,
+	IMX28_DMA,
+};
+
 struct mxs_dma_engine {
-	int				dev_id;
-	unsigned int			version;
+	enum mxs_dma_id			dev_id;
+	enum mxs_dma_devtype		type;
 	void __iomem			*base;
 	struct clk			*clk;
 	struct dma_device		dma_device;
@@ -135,6 +138,66 @@ struct mxs_dma_engine {
 	struct mxs_dma_chan		mxs_chans[MXS_DMA_CHANNELS];
 };
 
+struct mxs_dma_type {
+	enum mxs_dma_id id;
+	enum mxs_dma_devtype type;
+};
+
+static struct mxs_dma_type mxs_dma_types[] = {
+	{
+		.id = IMX23_DMA,
+		.type = MXS_DMA_APBH,
+	}, {
+		.id = IMX23_DMA,
+		.type = MXS_DMA_APBX,
+	}, {
+		.id = IMX28_DMA,
+		.type = MXS_DMA_APBH,
+	}, {
+		.id = IMX28_DMA,
+		.type = MXS_DMA_APBX,
+	}
+};
+
+static struct platform_device_id mxs_dma_ids[] = {
+	{
+		.name = "imx23-dma-apbh",
+		.driver_data = (kernel_ulong_t) &mxs_dma_types[0],
+	}, {
+		.name = "imx23-dma-apbx",
+		.driver_data = (kernel_ulong_t) &mxs_dma_types[1],
+	}, {
+		.name = "imx28-dma-apbh",
+		.driver_data = (kernel_ulong_t) &mxs_dma_types[2],
+	}, {
+		.name = "imx28-dma-apbx",
+		.driver_data = (kernel_ulong_t) &mxs_dma_types[3],
+	}, {
+		/* end of list */
+	}
+};
+
+static struct mxs_dma_chan *to_mxs_dma_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct mxs_dma_chan, chan);
+}
+
+int mxs_dma_is_apbh(struct dma_chan *chan)
+{
+	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+
+	return dma_is_apbh(mxs_dma);
+}
+
+int mxs_dma_is_apbx(struct dma_chan *chan)
+{
+	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+
+	return !dma_is_apbh(mxs_dma);
+}
+
 static void mxs_dma_reset_chan(struct mxs_dma_chan *mxs_chan)
 {
 	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
@@ -198,11 +261,6 @@ static void mxs_dma_resume_chan(struct mxs_dma_chan *mxs_chan)
 	mxs_chan->status = DMA_IN_PROGRESS;
 }
 
-static struct mxs_dma_chan *to_mxs_dma_chan(struct dma_chan *chan)
-{
-	return container_of(chan, struct mxs_dma_chan, chan);
-}
-
 static dma_cookie_t mxs_dma_tx_submit(struct dma_async_tx_descriptor *tx)
 {
 	return dma_cookie_assign(tx);
@@ -575,12 +633,6 @@ static int __init mxs_dma_init(struct mxs_dma_engine *mxs_dma)
 	if (ret)
 		goto err_out;
 
-	/* only major version matters */
-	mxs_dma->version = readl(mxs_dma->base +
-				((mxs_dma->dev_id == MXS_DMA_APBX) ?
-				HW_APBX_VERSION : HW_APBH_VERSION)) >>
-				BP_APBHX_VERSION_MAJOR;
-
 	/* enable apbh burst */
 	if (dma_is_apbh(mxs_dma)) {
 		writel(BM_APBH_CTRL0_APB_BURST_EN,
@@ -602,6 +654,8 @@ static int __init mxs_dma_probe(struct platform_device *pdev)
 {
 	const struct platform_device_id *id_entry =
 				platform_get_device_id(pdev);
+	const struct mxs_dma_type *dma_type =
+			(struct mxs_dma_type *)id_entry->driver_data;
 	struct mxs_dma_engine *mxs_dma;
 	struct resource *iores;
 	int ret, i;
@@ -610,7 +664,8 @@ static int __init mxs_dma_probe(struct platform_device *pdev)
 	if (!mxs_dma)
 		return -ENOMEM;
 
-	mxs_dma->dev_id = id_entry->driver_data;
+	mxs_dma->dev_id = dma_type->id;
+	mxs_dma->type = dma_type->type;
 
 	iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 
@@ -693,23 +748,11 @@ err_request_region:
 	return ret;
 }
 
-static struct platform_device_id mxs_dma_type[] = {
-	{
-		.name = "mxs-dma-apbh",
-		.driver_data = MXS_DMA_APBH,
-	}, {
-		.name = "mxs-dma-apbx",
-		.driver_data = MXS_DMA_APBX,
-	}, {
-		/* end of list */
-	}
-};
-
 static struct platform_driver mxs_dma_driver = {
 	.driver		= {
 		.name	= "mxs-dma",
 	},
-	.id_table	= mxs_dma_type,
+	.id_table	= mxs_dma_ids,
 };
 
 static int __init mxs_dma_module_init(void)
diff --git a/include/linux/fsl/mxs-dma.h b/include/linux/fsl/mxs-dma.h
index 203d7c4a3e11..55d870238399 100644
--- a/include/linux/fsl/mxs-dma.h
+++ b/include/linux/fsl/mxs-dma.h
@@ -15,14 +15,6 @@ struct mxs_dma_data {
 	int chan_irq;
 };
 
-static inline int mxs_dma_is_apbh(struct dma_chan *chan)
-{
-	return !strcmp(dev_name(chan->device->dev), "mxs-dma-apbh");
-}
-
-static inline int mxs_dma_is_apbx(struct dma_chan *chan)
-{
-	return !strcmp(dev_name(chan->device->dev), "mxs-dma-apbx");
-}
-
+extern int mxs_dma_is_apbh(struct dma_chan *chan);
+extern int mxs_dma_is_apbx(struct dma_chan *chan);
 #endif /* __MACH_MXS_DMA_H__ */
-- 
cgit v1.3-7-g2ca7


From 77701a8bab89fb0f87271674e57b2ecf636fbc5f Mon Sep 17 00:00:00 2001
From: Roland Stigge <stigge@antcom.de>
Date: Wed, 4 Apr 2012 10:34:38 +0200
Subject: i2c-pnx.c: Use resources in platforms

As a precondition for device tree conversion, the platforms using i2c-pnx.c are
converted to using mem and irq resources instead of platform data.

Signed-off-by: Roland Stigge <stigge@antcom.de>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
---
 arch/arm/mach-lpc32xx/common.c | 67 ++++++++++++++++++++++++++----------------
 arch/arm/mach-pnx4008/i2c.c    | 64 ++++++++++++++++++++++++----------------
 drivers/i2c/busses/i2c-pnx.c   | 47 +++++++++++++++--------------
 include/linux/i2c-pnx.h        |  9 ++----
 4 files changed, 108 insertions(+), 79 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-lpc32xx/common.c b/arch/arm/mach-lpc32xx/common.c
index bbbf063a74c2..6f255600fc97 100644
--- a/arch/arm/mach-lpc32xx/common.c
+++ b/arch/arm/mach-lpc32xx/common.c
@@ -27,7 +27,6 @@
 
 #include <asm/mach/map.h>
 
-#include <mach/i2c.h>
 #include <mach/hardware.h>
 #include <mach/platform.h>
 #include "common.h"
@@ -53,46 +52,64 @@ struct platform_device lpc32xx_watchdog_device = {
 /*
  * I2C busses
  */
-static struct i2c_pnx_data i2c0_data = {
-	.name = I2C_CHIP_NAME "1",
-	.base = LPC32XX_I2C1_BASE,
-	.irq = IRQ_LPC32XX_I2C_1,
+static struct resource i2c0_resources[] = {
+	[0] = {
+		.start = LPC32XX_I2C1_BASE,
+		.end = LPC32XX_I2C1_BASE + 0x100 - 1,
+		.flags = IORESOURCE_MEM,
+	},
+	[1] = {
+		.start = IRQ_LPC32XX_I2C_1,
+		.end = IRQ_LPC32XX_I2C_1,
+		.flags = IORESOURCE_IRQ,
+	},
 };
 
-static struct i2c_pnx_data i2c1_data = {
-	.name = I2C_CHIP_NAME "2",
-	.base = LPC32XX_I2C2_BASE,
-	.irq = IRQ_LPC32XX_I2C_2,
+static struct resource i2c1_resources[] = {
+	[0] = {
+		.start = LPC32XX_I2C2_BASE,
+		.end = LPC32XX_I2C2_BASE + 0x100 - 1,
+		.flags = IORESOURCE_MEM,
+	},
+	[1] = {
+		.start = IRQ_LPC32XX_I2C_2,
+		.end = IRQ_LPC32XX_I2C_2,
+		.flags = IORESOURCE_IRQ,
+	},
 };
 
-static struct i2c_pnx_data i2c2_data = {
-	.name = "USB-I2C",
-	.base = LPC32XX_OTG_I2C_BASE,
-	.irq = IRQ_LPC32XX_USB_I2C,
+static struct resource i2c2_resources[] = {
+	[0] = {
+		.start = LPC32XX_OTG_I2C_BASE,
+		.end = LPC32XX_OTG_I2C_BASE + 0x100 - 1,
+		.flags = IORESOURCE_MEM,
+	},
+	[1] = {
+		.start = IRQ_LPC32XX_USB_I2C,
+		.end = IRQ_LPC32XX_USB_I2C,
+		.flags = IORESOURCE_IRQ,
+	},
 };
 
 struct platform_device lpc32xx_i2c0_device = {
-	.name = "pnx-i2c",
+	.name = "pnx-i2c.0",
 	.id = 0,
-	.dev = {
-		.platform_data = &i2c0_data,
-	},
+	.num_resources = ARRAY_SIZE(i2c0_resources),
+	.resource = i2c0_resources,
 };
 
 struct platform_device lpc32xx_i2c1_device = {
-	.name = "pnx-i2c",
+	.name = "pnx-i2c.1",
 	.id = 1,
-	.dev = {
-		.platform_data = &i2c1_data,
-	},
+	.num_resources = ARRAY_SIZE(i2c1_resources),
+	.resource = i2c1_resources,
 };
 
 struct platform_device lpc32xx_i2c2_device = {
-	.name = "pnx-i2c",
+	.name = "pnx-i2c.2",
 	.id = 2,
-	.dev = {
-		.platform_data = &i2c2_data,
-	},
+	.num_resources = ARRAY_SIZE(i2c2_resources),
+	.resource = i2c2_resources,
 };
 
 /* TSC (Touch Screen Controller) */
diff --git a/arch/arm/mach-pnx4008/i2c.c b/arch/arm/mach-pnx4008/i2c.c
index 8103f9644e2d..550cfc2a1f2e 100644
--- a/arch/arm/mach-pnx4008/i2c.c
+++ b/arch/arm/mach-pnx4008/i2c.c
@@ -16,48 +16,62 @@
 #include <linux/err.h>
 #include <mach/platform.h>
 #include <mach/irqs.h>
-#include <mach/i2c.h>
 
-static struct i2c_pnx_data i2c0_data = {
-	.name = I2C_CHIP_NAME "0",
-	.base = PNX4008_I2C1_BASE,
-	.irq = I2C_1_INT,
+static struct resource i2c0_resources[] = {
+	{
+		.start = PNX4008_I2C1_BASE,
+		.end = PNX4008_I2C1_BASE + SZ_4K - 1,
+		.flags = IORESOURCE_MEM,
+	}, {
+		.start = I2C_1_INT,
+		.end = I2C_1_INT,
+		.flags = IORESOURCE_IRQ,
+	},
 };
 
-static struct i2c_pnx_data i2c1_data = {
-	.name = I2C_CHIP_NAME "1",
-	.base = PNX4008_I2C2_BASE,
-	.irq = I2C_2_INT,
+static struct resource i2c1_resources[] = {
+	{
+		.start = PNX4008_I2C2_BASE,
+		.end = PNX4008_I2C2_BASE + SZ_4K - 1,
+		.flags = IORESOURCE_MEM,
+	}, {
+		.start = I2C_2_INT,
+		.end = I2C_2_INT,
+		.flags = IORESOURCE_IRQ,
+	},
 };
 
-static struct i2c_pnx_data i2c2_data = {
-	.name = "USB-I2C",
-	.base = (PNX4008_USB_CONFIG_BASE + 0x300),
-	.irq = USB_I2C_INT,
+static struct resource i2c2_resources[] = {
+	{
+		.start = PNX4008_USB_CONFIG_BASE + 0x300,
+		.end = PNX4008_USB_CONFIG_BASE + 0x300 + SZ_4K - 1,
+		.flags = IORESOURCE_MEM,
+	}, {
+		.start = USB_I2C_INT,
+		.end = USB_I2C_INT,
+		.flags = IORESOURCE_IRQ,
+	},
 };
 
 static struct platform_device i2c0_device = {
-	.name = "pnx-i2c",
+	.name = "pnx-i2c.0",
 	.id = 0,
-	.dev = {
-		.platform_data = &i2c0_data,
-	},
+	.resource = i2c0_resources,
+	.num_resources = ARRAY_SIZE(i2c0_resources),
 };
 
 static struct platform_device i2c1_device = {
-	.name = "pnx-i2c",
+	.name = "pnx-i2c.1",
 	.id = 1,
-	.dev = {
-		.platform_data = &i2c1_data,
-	},
+	.resource = i2c1_resources,
+	.num_resources = ARRAY_SIZE(i2c1_resources),
 };
 
 static struct platform_device i2c2_device = {
-	.name = "pnx-i2c",
+	.name = "pnx-i2c.2",
 	.id = 2,
-	.dev = {
-		.platform_data = &i2c2_data,
-	},
+	.resource = i2c2_resources,
+	.num_resources = ARRAY_SIZE(i2c2_resources),
 };
 
 static struct platform_device *devices[] __initdata = {
diff --git a/drivers/i2c/busses/i2c-pnx.c b/drivers/i2c/busses/i2c-pnx.c
index eb8ad538c79f..6fb97aef0465 100644
--- a/drivers/i2c/busses/i2c-pnx.c
+++ b/drivers/i2c/busses/i2c-pnx.c
@@ -568,14 +568,7 @@ static int __devinit i2c_pnx_probe(struct platform_device *pdev)
 	int ret = 0;
 	struct i2c_pnx_algo_data *alg_data;
 	unsigned long freq;
-	struct i2c_pnx_data *i2c_pnx = pdev->dev.platform_data;
-
-	if (!i2c_pnx || !i2c_pnx->name) {
-		dev_err(&pdev->dev, "%s: no platform data supplied\n",
-		       __func__);
-		ret = -EINVAL;
-		goto out;
-	}
+	struct resource *res;
 
 	alg_data = kzalloc(sizeof(*alg_data), GFP_KERNEL);
 	if (!alg_data) {
@@ -585,13 +578,10 @@ static int __devinit i2c_pnx_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, alg_data);
 
-	strlcpy(alg_data->adapter.name, i2c_pnx->name,
-		sizeof(alg_data->adapter.name));
 	alg_data->adapter.dev.parent = &pdev->dev;
 	alg_data->adapter.algo = &pnx_algorithm;
 	alg_data->adapter.algo_data = alg_data;
 	alg_data->adapter.nr = pdev->id;
-	alg_data->i2c_pnx = i2c_pnx;
 
 	alg_data->clk = clk_get(&pdev->dev, NULL);
 	if (IS_ERR(alg_data->clk)) {
@@ -603,17 +593,27 @@ static int __devinit i2c_pnx_probe(struct platform_device *pdev)
 	alg_data->mif.timer.function = i2c_pnx_timeout;
 	alg_data->mif.timer.data = (unsigned long)alg_data;
 
+	snprintf(alg_data->adapter.name, sizeof(alg_data->adapter.name),
+		 "%s", pdev->name);
+
 	/* Register I/O resource */
-	if (!request_mem_region(i2c_pnx->base, I2C_PNX_REGION_SIZE,
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&pdev->dev, "Unable to get mem resource.\n");
+		ret = -EBUSY;
+		goto out_clkget;
+	}
+	if (!request_mem_region(res->start, I2C_PNX_REGION_SIZE,
 				pdev->name)) {
 		dev_err(&pdev->dev,
 		       "I/O region 0x%08x for I2C already in use.\n",
-		       i2c_pnx->base);
+		       res->start);
 		ret = -ENODEV;
 		goto out_clkget;
 	}
 
-	alg_data->ioaddr = ioremap(i2c_pnx->base, I2C_PNX_REGION_SIZE);
+	alg_data->base = res->start;
+	alg_data->ioaddr = ioremap(res->start, I2C_PNX_REGION_SIZE);
 	if (!alg_data->ioaddr) {
 		dev_err(&pdev->dev, "Couldn't ioremap I2C I/O region\n");
 		ret = -ENOMEM;
@@ -650,7 +650,12 @@ static int __devinit i2c_pnx_probe(struct platform_device *pdev)
 	}
 	init_completion(&alg_data->mif.complete);
 
-	ret = request_irq(i2c_pnx->irq, i2c_pnx_interrupt,
+	alg_data->irq = platform_get_irq(pdev, 0);
+	if (alg_data->irq < 0) {
+		dev_err(&pdev->dev, "Failed to get IRQ from platform resource\n");
+		goto out_irq;
+	}
+	ret = request_irq(alg_data->irq, i2c_pnx_interrupt,
 			0, pdev->name, alg_data);
 	if (ret)
 		goto out_clock;
@@ -663,38 +668,36 @@ static int __devinit i2c_pnx_probe(struct platform_device *pdev)
 	}
 
 	dev_dbg(&pdev->dev, "%s: Master at %#8x, irq %d.\n",
-	       alg_data->adapter.name, i2c_pnx->base, i2c_pnx->irq);
+		alg_data->adapter.name, res->start, alg_data->irq);
 
 	return 0;
 
 out_irq:
-	free_irq(i2c_pnx->irq, alg_data);
+	free_irq(alg_data->irq, alg_data);
 out_clock:
 	clk_disable(alg_data->clk);
 out_unmap:
 	iounmap(alg_data->ioaddr);
 out_release:
-	release_mem_region(i2c_pnx->base, I2C_PNX_REGION_SIZE);
+	release_mem_region(res->start, I2C_PNX_REGION_SIZE);
 out_clkget:
 	clk_put(alg_data->clk);
 out_drvdata:
 	kfree(alg_data);
 err_kzalloc:
 	platform_set_drvdata(pdev, NULL);
-out:
 	return ret;
 }
 
 static int __devexit i2c_pnx_remove(struct platform_device *pdev)
 {
 	struct i2c_pnx_algo_data *alg_data = platform_get_drvdata(pdev);
-	struct i2c_pnx_data *i2c_pnx = alg_data->i2c_pnx;
 
-	free_irq(i2c_pnx->irq, alg_data);
+	free_irq(alg_data->irq, alg_data);
 	i2c_del_adapter(&alg_data->adapter);
 	clk_disable(alg_data->clk);
 	iounmap(alg_data->ioaddr);
-	release_mem_region(i2c_pnx->base, I2C_PNX_REGION_SIZE);
+	release_mem_region(alg_data->base, I2C_PNX_REGION_SIZE);
 	clk_put(alg_data->clk);
 	kfree(alg_data);
 	platform_set_drvdata(pdev, NULL);
diff --git a/include/linux/i2c-pnx.h b/include/linux/i2c-pnx.h
index a87124d4d533..6e8efb7afd7c 100644
--- a/include/linux/i2c-pnx.h
+++ b/include/linux/i2c-pnx.h
@@ -29,14 +29,9 @@ struct i2c_pnx_algo_data {
 	struct i2c_pnx_mif	mif;
 	int			last;
 	struct clk		*clk;
-	struct i2c_pnx_data	*i2c_pnx;
 	struct i2c_adapter	adapter;
-};
-
-struct i2c_pnx_data {
-	const char *name;
-	u32 base;
-	int irq;
+	phys_addr_t		base;
+	int			irq;
 };
 
 #endif /* __I2C_PNX_H__ */
-- 
cgit v1.3-7-g2ca7


From a092de11bb4a96ac43ede0352e09bdf7e06280e8 Mon Sep 17 00:00:00 2001
From: Roland Stigge <stigge@antcom.de>
Date: Fri, 20 Apr 2012 15:34:09 +0200
Subject: i2c: pnx: add device tree support

This patch adds device tree support to the pnx-i2c driver by using platform
resources for memory region and irq and removing dependency on mach includes.

The following platforms are affected:

* PNX
* LPC31xx (WIP)
* LPC32xx

The patch is based on a patch by Jon Smirl, working on lpc31xx integration

Signed-off-by: Roland Stigge <stigge@antcom.de>
Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
---
 Documentation/devicetree/bindings/i2c/pnx.txt | 36 +++++++++++++++
 drivers/i2c/busses/i2c-pnx.c                  | 63 ++++++++++++++++++++-------
 include/linux/i2c-pnx.h                       |  1 +
 3 files changed, 84 insertions(+), 16 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/i2c/pnx.txt

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/i2c/pnx.txt b/Documentation/devicetree/bindings/i2c/pnx.txt
new file mode 100644
index 000000000000..fe98ada33ee4
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/pnx.txt
@@ -0,0 +1,36 @@
+* NXP PNX I2C Controller
+
+Required properties:
+
+ - reg: Offset and length of the register set for the device
+ - compatible: should be "nxp,pnx-i2c"
+ - interrupts: configure one interrupt line
+ - #address-cells: always 1 (for i2c addresses)
+ - #size-cells: always 0
+ - interrupt-parent: the phandle for the interrupt controller that
+   services interrupts for this device.
+
+Optional properties:
+
+ - clock-frequency: desired I2C bus clock frequency in Hz, Default: 100000 Hz
+
+Examples:
+
+	i2c1: i2c@400a0000 {
+		compatible = "nxp,pnx-i2c";
+		reg = <0x400a0000 0x100>;
+		interrupt-parent = <&mic>;
+		interrupts = <51 0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+	};
+
+	i2c2: i2c@400a8000 {
+		compatible = "nxp,pnx-i2c";
+		reg = <0x400a8000 0x100>;
+		interrupt-parent = <&mic>;
+		interrupts = <50 0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+		clock-frequency = <100000>;
+	};
diff --git a/drivers/i2c/busses/i2c-pnx.c b/drivers/i2c/busses/i2c-pnx.c
index f69d80b8d736..99389d2eae51 100644
--- a/drivers/i2c/busses/i2c-pnx.c
+++ b/drivers/i2c/busses/i2c-pnx.c
@@ -23,10 +23,11 @@
 #include <linux/err.h>
 #include <linux/clk.h>
 #include <linux/slab.h>
+#include <linux/of_i2c.h>
 
-#define I2C_PNX_TIMEOUT		10 /* msec */
-#define I2C_PNX_SPEED_KHZ	100
-#define I2C_PNX_REGION_SIZE	0x100
+#define I2C_PNX_TIMEOUT_DEFAULT		10 /* msec */
+#define I2C_PNX_SPEED_KHZ_DEFAULT	100
+#define I2C_PNX_REGION_SIZE		0x100
 
 enum {
 	mstatus_tdi = 0x00000001,
@@ -74,8 +75,9 @@ enum {
 #define I2C_REG_TXS(a)	((a)->ioaddr + 0x28)	/* Tx slave FIFO (RO) */
 #define I2C_REG_STFL(a)	((a)->ioaddr + 0x2c)	/* Tx slave FIFO level (RO) */
 
-static inline int wait_timeout(long timeout, struct i2c_pnx_algo_data *data)
+static inline int wait_timeout(struct i2c_pnx_algo_data *data)
 {
+	long timeout = data->timeout;
 	while (timeout > 0 &&
 			(ioread32(I2C_REG_STS(data)) & mstatus_active)) {
 		mdelay(1);
@@ -84,8 +86,9 @@ static inline int wait_timeout(long timeout, struct i2c_pnx_algo_data *data)
 	return (timeout <= 0);
 }
 
-static inline int wait_reset(long timeout, struct i2c_pnx_algo_data *data)
+static inline int wait_reset(struct i2c_pnx_algo_data *data)
 {
+	long timeout = data->timeout;
 	while (timeout > 0 &&
 			(ioread32(I2C_REG_CTL(data)) & mcntrl_reset)) {
 		mdelay(1);
@@ -97,7 +100,7 @@ static inline int wait_reset(long timeout, struct i2c_pnx_algo_data *data)
 static inline void i2c_pnx_arm_timer(struct i2c_pnx_algo_data *alg_data)
 {
 	struct timer_list *timer = &alg_data->mif.timer;
-	unsigned long expires = msecs_to_jiffies(I2C_PNX_TIMEOUT);
+	unsigned long expires = msecs_to_jiffies(alg_data->timeout);
 
 	if (expires <= 1)
 		expires = 2;
@@ -135,7 +138,7 @@ static int i2c_pnx_start(unsigned char slave_addr,
 	}
 
 	/* First, make sure bus is idle */
-	if (wait_timeout(I2C_PNX_TIMEOUT, alg_data)) {
+	if (wait_timeout(alg_data)) {
 		/* Somebody else is monopolizing the bus */
 		dev_err(&alg_data->adapter.dev,
 			"%s: Bus busy. Slave addr = %02x, cntrl = %x, stat = %x\n",
@@ -228,7 +231,7 @@ static int i2c_pnx_master_xmit(struct i2c_pnx_algo_data *alg_data)
 		if (alg_data->mif.len == 0) {
 			if (alg_data->last) {
 				/* Wait until the STOP is seen. */
-				if (wait_timeout(I2C_PNX_TIMEOUT, alg_data))
+				if (wait_timeout(alg_data))
 					dev_err(&alg_data->adapter.dev,
 						"The bus is still active after timeout\n");
 			}
@@ -326,7 +329,7 @@ static int i2c_pnx_master_rcv(struct i2c_pnx_algo_data *alg_data)
 		if (alg_data->mif.len == 0) {
 			if (alg_data->last)
 				/* Wait until the STOP is seen. */
-				if (wait_timeout(I2C_PNX_TIMEOUT, alg_data))
+				if (wait_timeout(alg_data))
 					dev_err(&alg_data->adapter.dev,
 						"The bus is still active after timeout\n");
 
@@ -442,7 +445,7 @@ static void i2c_pnx_timeout(unsigned long data)
 
 	ctl |= mcntrl_reset;
 	iowrite32(ctl, I2C_REG_CTL(alg_data));
-	wait_reset(I2C_PNX_TIMEOUT, alg_data);
+	wait_reset(alg_data);
 	alg_data->mif.ret = -EIO;
 	complete(&alg_data->mif.complete);
 }
@@ -457,18 +460,18 @@ static inline void bus_reset_if_active(struct i2c_pnx_algo_data *alg_data)
 			alg_data->adapter.name);
 		iowrite32(ioread32(I2C_REG_CTL(alg_data)) | mcntrl_reset,
 			  I2C_REG_CTL(alg_data));
-		wait_reset(I2C_PNX_TIMEOUT, alg_data);
+		wait_reset(alg_data);
 	} else if (!(stat & mstatus_rfe) || !(stat & mstatus_tfe)) {
 		/* If there is data in the fifo's after transfer,
 		 * flush fifo's by reset.
 		 */
 		iowrite32(ioread32(I2C_REG_CTL(alg_data)) | mcntrl_reset,
 			  I2C_REG_CTL(alg_data));
-		wait_reset(I2C_PNX_TIMEOUT, alg_data);
+		wait_reset(alg_data);
 	} else if (stat & mstatus_nai) {
 		iowrite32(ioread32(I2C_REG_CTL(alg_data)) | mcntrl_reset,
 			  I2C_REG_CTL(alg_data));
-		wait_reset(I2C_PNX_TIMEOUT, alg_data);
+		wait_reset(alg_data);
 	}
 }
 
@@ -612,6 +615,7 @@ static int __devinit i2c_pnx_probe(struct platform_device *pdev)
 	struct i2c_pnx_algo_data *alg_data;
 	unsigned long freq;
 	struct resource *res;
+	u32 speed = I2C_PNX_SPEED_KHZ_DEFAULT * 1000;
 
 	alg_data = kzalloc(sizeof(*alg_data), GFP_KERNEL);
 	if (!alg_data) {
@@ -626,6 +630,22 @@ static int __devinit i2c_pnx_probe(struct platform_device *pdev)
 	alg_data->adapter.algo_data = alg_data;
 	alg_data->adapter.nr = pdev->id;
 
+	alg_data->timeout = I2C_PNX_TIMEOUT_DEFAULT;
+#ifdef CONFIG_OF
+	alg_data->adapter.dev.of_node = of_node_get(pdev->dev.of_node);
+	if (pdev->dev.of_node) {
+		of_property_read_u32(pdev->dev.of_node, "clock-frequency",
+				     &speed);
+		/*
+		 * At this point, it is planned to add an OF timeout property.
+		 * As soon as there is a consensus about how to call and handle
+		 * this, sth. like the following can be put here:
+		 *
+		 * of_property_read_u32(pdev->dev.of_node, "timeout",
+		 *                      &alg_data->timeout);
+		 */
+	}
+#endif
 	alg_data->clk = clk_get(&pdev->dev, NULL);
 	if (IS_ERR(alg_data->clk)) {
 		ret = PTR_ERR(alg_data->clk);
@@ -651,7 +671,7 @@ static int __devinit i2c_pnx_probe(struct platform_device *pdev)
 		dev_err(&pdev->dev,
 		       "I/O region 0x%08x for I2C already in use.\n",
 		       res->start);
-		ret = -ENODEV;
+		ret = -ENOMEM;
 		goto out_clkget;
 	}
 
@@ -680,14 +700,14 @@ static int __devinit i2c_pnx_probe(struct platform_device *pdev)
 	 * the deglitching filter length.
 	 */
 
-	tmp = ((freq / 1000) / I2C_PNX_SPEED_KHZ) / 2 - 2;
+	tmp = (freq / speed) / 2 - 2;
 	if (tmp > 0x3FF)
 		tmp = 0x3FF;
 	iowrite32(tmp, I2C_REG_CKH(alg_data));
 	iowrite32(tmp, I2C_REG_CKL(alg_data));
 
 	iowrite32(mcntrl_reset, I2C_REG_CTL(alg_data));
-	if (wait_reset(I2C_PNX_TIMEOUT, alg_data)) {
+	if (wait_reset(alg_data)) {
 		ret = -ENODEV;
 		goto out_clock;
 	}
@@ -710,6 +730,8 @@ static int __devinit i2c_pnx_probe(struct platform_device *pdev)
 		goto out_irq;
 	}
 
+	of_i2c_register_devices(&alg_data->adapter);
+
 	dev_dbg(&pdev->dev, "%s: Master at %#8x, irq %d.\n",
 		alg_data->adapter.name, res->start, alg_data->irq);
 
@@ -748,10 +770,19 @@ static int __devexit i2c_pnx_remove(struct platform_device *pdev)
 	return 0;
 }
 
+#ifdef CONFIG_OF
+static const struct of_device_id i2c_pnx_of_match[] = {
+	{ .compatible = "nxp,pnx-i2c" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, i2c_pnx_of_match);
+#endif
+
 static struct platform_driver i2c_pnx_driver = {
 	.driver = {
 		.name = "pnx-i2c",
 		.owner = THIS_MODULE,
+		.of_match_table = of_match_ptr(i2c_pnx_of_match),
 	},
 	.probe = i2c_pnx_probe,
 	.remove = __devexit_p(i2c_pnx_remove),
diff --git a/include/linux/i2c-pnx.h b/include/linux/i2c-pnx.h
index 6e8efb7afd7c..1bc74afe7a35 100644
--- a/include/linux/i2c-pnx.h
+++ b/include/linux/i2c-pnx.h
@@ -32,6 +32,7 @@ struct i2c_pnx_algo_data {
 	struct i2c_adapter	adapter;
 	phys_addr_t		base;
 	int			irq;
+	u32			timeout;
 };
 
 #endif /* __I2C_PNX_H__ */
-- 
cgit v1.3-7-g2ca7


From 5a3ecd5f9877b963a581ca5d4495a1a24dafc88c Mon Sep 17 00:00:00 2001
From: David Daney <david.daney@cavium.com>
Date: Thu, 12 Apr 2012 14:14:22 -0700
Subject: i2c: Add a struct device * parameter to i2c_add_mux_adapter()

And adjust all callers.

The new device parameter is used in the next patch to initialize the
mux's of_node so that its children may be automatically populated.

Signed-off-by: David Daney <david.daney@cavium.com>
Tested-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
---
 drivers/i2c/i2c-mux.c           | 19 ++++++++++---------
 drivers/i2c/muxes/gpio-i2cmux.c |  3 ++-
 drivers/i2c/muxes/pca9541.c     |  3 ++-
 drivers/i2c/muxes/pca954x.c     |  2 +-
 include/linux/i2c-mux.h         |  3 ++-
 5 files changed, 17 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/i2c/i2c-mux.c b/drivers/i2c/i2c-mux.c
index d7a4833be416..26ab31dd742b 100644
--- a/drivers/i2c/i2c-mux.c
+++ b/drivers/i2c/i2c-mux.c
@@ -31,11 +31,11 @@ struct i2c_mux_priv {
 	struct i2c_algorithm algo;
 
 	struct i2c_adapter *parent;
-	void *mux_dev;	/* the mux chip/device */
+	void *mux_priv;	/* the mux chip/device */
 	u32  chan_id;	/* the channel id */
 
-	int (*select)(struct i2c_adapter *, void *mux_dev, u32 chan_id);
-	int (*deselect)(struct i2c_adapter *, void *mux_dev, u32 chan_id);
+	int (*select)(struct i2c_adapter *, void *mux_priv, u32 chan_id);
+	int (*deselect)(struct i2c_adapter *, void *mux_priv, u32 chan_id);
 };
 
 static int i2c_mux_master_xfer(struct i2c_adapter *adap,
@@ -47,11 +47,11 @@ static int i2c_mux_master_xfer(struct i2c_adapter *adap,
 
 	/* Switch to the right mux port and perform the transfer. */
 
-	ret = priv->select(parent, priv->mux_dev, priv->chan_id);
+	ret = priv->select(parent, priv->mux_priv, priv->chan_id);
 	if (ret >= 0)
 		ret = parent->algo->master_xfer(parent, msgs, num);
 	if (priv->deselect)
-		priv->deselect(parent, priv->mux_dev, priv->chan_id);
+		priv->deselect(parent, priv->mux_priv, priv->chan_id);
 
 	return ret;
 }
@@ -67,12 +67,12 @@ static int i2c_mux_smbus_xfer(struct i2c_adapter *adap,
 
 	/* Select the right mux port and perform the transfer. */
 
-	ret = priv->select(parent, priv->mux_dev, priv->chan_id);
+	ret = priv->select(parent, priv->mux_priv, priv->chan_id);
 	if (ret >= 0)
 		ret = parent->algo->smbus_xfer(parent, addr, flags,
 					read_write, command, size, data);
 	if (priv->deselect)
-		priv->deselect(parent, priv->mux_dev, priv->chan_id);
+		priv->deselect(parent, priv->mux_priv, priv->chan_id);
 
 	return ret;
 }
@@ -87,7 +87,8 @@ static u32 i2c_mux_functionality(struct i2c_adapter *adap)
 }
 
 struct i2c_adapter *i2c_add_mux_adapter(struct i2c_adapter *parent,
-				void *mux_dev, u32 force_nr, u32 chan_id,
+				struct device *mux_dev,
+				void *mux_priv, u32 force_nr, u32 chan_id,
 				int (*select) (struct i2c_adapter *,
 					       void *, u32),
 				int (*deselect) (struct i2c_adapter *,
@@ -102,7 +103,7 @@ struct i2c_adapter *i2c_add_mux_adapter(struct i2c_adapter *parent,
 
 	/* Set up private adapter data */
 	priv->parent = parent;
-	priv->mux_dev = mux_dev;
+	priv->mux_priv = mux_priv;
 	priv->chan_id = chan_id;
 	priv->select = select;
 	priv->deselect = deselect;
diff --git a/drivers/i2c/muxes/gpio-i2cmux.c b/drivers/i2c/muxes/gpio-i2cmux.c
index e5fa695eb0fa..fc5c1ef9b6ec 100644
--- a/drivers/i2c/muxes/gpio-i2cmux.c
+++ b/drivers/i2c/muxes/gpio-i2cmux.c
@@ -105,7 +105,8 @@ static int __devinit gpiomux_probe(struct platform_device *pdev)
 	for (i = 0; i < pdata->n_values; i++) {
 		u32 nr = pdata->base_nr ? (pdata->base_nr + i) : 0;
 
-		mux->adap[i] = i2c_add_mux_adapter(parent, mux, nr, i,
+		mux->adap[i] = i2c_add_mux_adapter(parent, &pdev->dev, mux,
+						   nr, i,
 						   gpiomux_select, deselect);
 		if (!mux->adap[i]) {
 			ret = -ENODEV;
diff --git a/drivers/i2c/muxes/pca9541.c b/drivers/i2c/muxes/pca9541.c
index e0df9b6c66b3..8aacde1516ac 100644
--- a/drivers/i2c/muxes/pca9541.c
+++ b/drivers/i2c/muxes/pca9541.c
@@ -353,7 +353,8 @@ static int pca9541_probe(struct i2c_client *client,
 	force = 0;
 	if (pdata)
 		force = pdata->modes[0].adap_id;
-	data->mux_adap = i2c_add_mux_adapter(adap, client, force, 0,
+	data->mux_adap = i2c_add_mux_adapter(adap, &client->dev, client,
+					     force, 0,
 					     pca9541_select_chan,
 					     pca9541_release_chan);
 
diff --git a/drivers/i2c/muxes/pca954x.c b/drivers/i2c/muxes/pca954x.c
index 0e37ef27aa12..f2dfe0d8fcce 100644
--- a/drivers/i2c/muxes/pca954x.c
+++ b/drivers/i2c/muxes/pca954x.c
@@ -226,7 +226,7 @@ static int pca954x_probe(struct i2c_client *client,
 		}
 
 		data->virt_adaps[num] =
-			i2c_add_mux_adapter(adap, client,
+			i2c_add_mux_adapter(adap, &client->dev, client,
 				force, num, pca954x_select_chan,
 				(pdata && pdata->modes[num].deselect_on_exit)
 					? pca954x_deselect_mux : NULL);
diff --git a/include/linux/i2c-mux.h b/include/linux/i2c-mux.h
index 747f0cde4164..c79083830014 100644
--- a/include/linux/i2c-mux.h
+++ b/include/linux/i2c-mux.h
@@ -34,7 +34,8 @@
  * mux control.
  */
 struct i2c_adapter *i2c_add_mux_adapter(struct i2c_adapter *parent,
-				void *mux_dev, u32 force_nr, u32 chan_id,
+				struct device *mux_dev,
+				void *mux_priv, u32 force_nr, u32 chan_id,
 				int (*select) (struct i2c_adapter *,
 					       void *mux_dev, u32 chan_id),
 				int (*deselect) (struct i2c_adapter *,
-- 
cgit v1.3-7-g2ca7


From 643dd09eb27b40ced671564edbe2640935fe37c2 Mon Sep 17 00:00:00 2001
From: Stephen Warren <swarren@nvidia.com>
Date: Tue, 17 Apr 2012 12:43:33 -0600
Subject: i2c: implement i2c_verify_adapter

This converts a struct device * to a struct i2c_adapter * while verifying
that the device really is an I2C adapter. Just like i2c_verify_client.

Signed-off-by: Stephen Warren <swarren@nvidia.com>
Acked-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
---
 drivers/i2c/i2c-core.c | 17 +++++++++++++++++
 include/linux/i2c.h    |  1 +
 2 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index feb7dc359186..a6ad32bc0a96 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -772,6 +772,23 @@ struct device_type i2c_adapter_type = {
 };
 EXPORT_SYMBOL_GPL(i2c_adapter_type);
 
+/**
+ * i2c_verify_adapter - return parameter as i2c_adapter or NULL
+ * @dev: device, probably from some driver model iterator
+ *
+ * When traversing the driver model tree, perhaps using driver model
+ * iterators like @device_for_each_child(), you can't assume very much
+ * about the nodes you find.  Use this function to avoid oopses caused
+ * by wrongly treating some non-I2C device as an i2c_adapter.
+ */
+struct i2c_adapter *i2c_verify_adapter(struct device *dev)
+{
+	return (dev->type == &i2c_adapter_type)
+			? to_i2c_adapter(dev)
+			: NULL;
+}
+EXPORT_SYMBOL(i2c_verify_adapter);
+
 #ifdef CONFIG_I2C_COMPAT
 static struct class_compat *i2c_adapter_compat_class;
 #endif
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 195d8b3d9cfb..b66cb601435f 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -232,6 +232,7 @@ struct i2c_client {
 #define to_i2c_client(d) container_of(d, struct i2c_client, dev)
 
 extern struct i2c_client *i2c_verify_client(struct device *dev);
+extern struct i2c_adapter *i2c_verify_adapter(struct device *dev);
 
 static inline struct i2c_client *kobj_to_i2c_client(struct kobject *kobj)
 {
-- 
cgit v1.3-7-g2ca7


From 0938643e533cc80ef0cdfdd2e260c4910d0f8bc7 Mon Sep 17 00:00:00 2001
From: Stephen Warren <swarren@nvidia.com>
Date: Tue, 17 Apr 2012 12:43:34 -0600
Subject: of/i2c: implement of_find_i2c_adapter_by_node

This finds the struct i2c_adapter * for a given device tree node. Just
like of_find_i2c_device_by_node.

Signed-off-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
---
 drivers/of/of_i2c.c    | 14 ++++++++++++++
 include/linux/of_i2c.h |  4 ++++
 2 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/of/of_i2c.c b/drivers/of/of_i2c.c
index 068488717f69..1e173f357674 100644
--- a/drivers/of/of_i2c.c
+++ b/drivers/of/of_i2c.c
@@ -94,4 +94,18 @@ struct i2c_client *of_find_i2c_device_by_node(struct device_node *node)
 }
 EXPORT_SYMBOL(of_find_i2c_device_by_node);
 
+/* must call put_device() when done with returned i2c_adapter device */
+struct i2c_adapter *of_find_i2c_adapter_by_node(struct device_node *node)
+{
+	struct device *dev;
+
+	dev = bus_find_device(&i2c_bus_type, NULL, node,
+					 of_dev_node_match);
+	if (!dev)
+		return NULL;
+
+	return i2c_verify_adapter(dev);
+}
+EXPORT_SYMBOL(of_find_i2c_adapter_by_node);
+
 MODULE_LICENSE("GPL");
diff --git a/include/linux/of_i2c.h b/include/linux/of_i2c.h
index 0efe8d465f55..1cb775f8e663 100644
--- a/include/linux/of_i2c.h
+++ b/include/linux/of_i2c.h
@@ -20,6 +20,10 @@ extern void of_i2c_register_devices(struct i2c_adapter *adap);
 /* must call put_device() when done with returned i2c_client device */
 extern struct i2c_client *of_find_i2c_device_by_node(struct device_node *node);
 
+/* must call put_device() when done with returned i2c_adapter device */
+extern struct i2c_adapter *of_find_i2c_adapter_by_node(
+						struct device_node *node);
+
 #else
 static inline void of_i2c_register_devices(struct i2c_adapter *adap)
 {
-- 
cgit v1.3-7-g2ca7


From e7065e20d9a6a8ee4a8b31ebe71d6c00a0f45354 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Sat, 28 Apr 2012 15:32:06 +0200
Subject: i2c: Rename last mux driver to standard pattern

Update the MAINTAINERS entry and all other references accordingly.

Based on an original patch by Wolfram Sang.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Acked-by: Peter Korsgaard <peter.korsgaard@barco.com>

[wsa: fixed merge conflict due to rework in i2c_add_mux_adapter()]

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
---
 Documentation/i2c/muxes/gpio-i2cmux  |  65 -------------
 Documentation/i2c/muxes/i2c-mux-gpio |  65 +++++++++++++
 MAINTAINERS                          |   6 +-
 drivers/i2c/muxes/Kconfig            |   2 +-
 drivers/i2c/muxes/Makefile           |   2 +-
 drivers/i2c/muxes/gpio-i2cmux.c      | 174 -----------------------------------
 drivers/i2c/muxes/i2c-mux-gpio.c     | 173 ++++++++++++++++++++++++++++++++++
 include/linux/gpio-i2cmux.h          |  38 --------
 include/linux/i2c-mux-gpio.h         |  38 ++++++++
 9 files changed, 281 insertions(+), 282 deletions(-)
 delete mode 100644 Documentation/i2c/muxes/gpio-i2cmux
 create mode 100644 Documentation/i2c/muxes/i2c-mux-gpio
 delete mode 100644 drivers/i2c/muxes/gpio-i2cmux.c
 create mode 100644 drivers/i2c/muxes/i2c-mux-gpio.c
 delete mode 100644 include/linux/gpio-i2cmux.h
 create mode 100644 include/linux/i2c-mux-gpio.h

(limited to 'include/linux')

diff --git a/Documentation/i2c/muxes/gpio-i2cmux b/Documentation/i2c/muxes/gpio-i2cmux
deleted file mode 100644
index 811cd78d4cdc..000000000000
--- a/Documentation/i2c/muxes/gpio-i2cmux
+++ /dev/null
@@ -1,65 +0,0 @@
-Kernel driver gpio-i2cmux
-
-Author: Peter Korsgaard <peter.korsgaard@barco.com>
-
-Description
------------
-
-gpio-i2cmux is an i2c mux driver providing access to I2C bus segments
-from a master I2C bus and a hardware MUX controlled through GPIO pins.
-
-E.G.:
-
-  ----------              ----------  Bus segment 1   - - - - -
- |          | SCL/SDA    |          |-------------- |           |
- |          |------------|          |
- |          |            |          | Bus segment 2 |           |
- |  Linux   | GPIO 1..N  |   MUX    |---------------   Devices
- |          |------------|          |               |           |
- |          |            |          | Bus segment M
- |          |            |          |---------------|           |
-  ----------              ----------                  - - - - -
-
-SCL/SDA of the master I2C bus is multiplexed to bus segment 1..M
-according to the settings of the GPIO pins 1..N.
-
-Usage
------
-
-gpio-i2cmux uses the platform bus, so you need to provide a struct
-platform_device with the platform_data pointing to a struct
-gpio_i2cmux_platform_data with the I2C adapter number of the master
-bus, the number of bus segments to create and the GPIO pins used
-to control it. See include/linux/gpio-i2cmux.h for details.
-
-E.G. something like this for a MUX providing 4 bus segments
-controlled through 3 GPIO pins:
-
-#include <linux/gpio-i2cmux.h>
-#include <linux/platform_device.h>
-
-static const unsigned myboard_gpiomux_gpios[] = {
-	AT91_PIN_PC26, AT91_PIN_PC25, AT91_PIN_PC24
-};
-
-static const unsigned myboard_gpiomux_values[] = {
-	0, 1, 2, 3
-};
-
-static struct gpio_i2cmux_platform_data myboard_i2cmux_data = {
-	.parent		= 1,
-	.base_nr	= 2, /* optional */
-	.values		= myboard_gpiomux_values,
-	.n_values	= ARRAY_SIZE(myboard_gpiomux_values),
-	.gpios		= myboard_gpiomux_gpios,
-	.n_gpios	= ARRAY_SIZE(myboard_gpiomux_gpios),
-	.idle		= 4, /* optional */
-};
-
-static struct platform_device myboard_i2cmux = {
-	.name		= "gpio-i2cmux",
-	.id		= 0,
-	.dev		= {
-		.platform_data	= &myboard_i2cmux_data,
-	},
-};
diff --git a/Documentation/i2c/muxes/i2c-mux-gpio b/Documentation/i2c/muxes/i2c-mux-gpio
new file mode 100644
index 000000000000..bd9b2299b739
--- /dev/null
+++ b/Documentation/i2c/muxes/i2c-mux-gpio
@@ -0,0 +1,65 @@
+Kernel driver i2c-gpio-mux
+
+Author: Peter Korsgaard <peter.korsgaard@barco.com>
+
+Description
+-----------
+
+i2c-gpio-mux is an i2c mux driver providing access to I2C bus segments
+from a master I2C bus and a hardware MUX controlled through GPIO pins.
+
+E.G.:
+
+  ----------              ----------  Bus segment 1   - - - - -
+ |          | SCL/SDA    |          |-------------- |           |
+ |          |------------|          |
+ |          |            |          | Bus segment 2 |           |
+ |  Linux   | GPIO 1..N  |   MUX    |---------------   Devices
+ |          |------------|          |               |           |
+ |          |            |          | Bus segment M
+ |          |            |          |---------------|           |
+  ----------              ----------                  - - - - -
+
+SCL/SDA of the master I2C bus is multiplexed to bus segment 1..M
+according to the settings of the GPIO pins 1..N.
+
+Usage
+-----
+
+i2c-gpio-mux uses the platform bus, so you need to provide a struct
+platform_device with the platform_data pointing to a struct
+gpio_i2cmux_platform_data with the I2C adapter number of the master
+bus, the number of bus segments to create and the GPIO pins used
+to control it. See include/linux/i2c-gpio-mux.h for details.
+
+E.G. something like this for a MUX providing 4 bus segments
+controlled through 3 GPIO pins:
+
+#include <linux/i2c-gpio-mux.h>
+#include <linux/platform_device.h>
+
+static const unsigned myboard_gpiomux_gpios[] = {
+	AT91_PIN_PC26, AT91_PIN_PC25, AT91_PIN_PC24
+};
+
+static const unsigned myboard_gpiomux_values[] = {
+	0, 1, 2, 3
+};
+
+static struct gpio_i2cmux_platform_data myboard_i2cmux_data = {
+	.parent		= 1,
+	.base_nr	= 2, /* optional */
+	.values		= myboard_gpiomux_values,
+	.n_values	= ARRAY_SIZE(myboard_gpiomux_values),
+	.gpios		= myboard_gpiomux_gpios,
+	.n_gpios	= ARRAY_SIZE(myboard_gpiomux_gpios),
+	.idle		= 4, /* optional */
+};
+
+static struct platform_device myboard_i2cmux = {
+	.name		= "i2c-gpio-mux",
+	.id		= 0,
+	.dev		= {
+		.platform_data	= &myboard_i2cmux_data,
+	},
+};
diff --git a/MAINTAINERS b/MAINTAINERS
index f0c3b4558381..4241699adea3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2937,9 +2937,9 @@ GENERIC GPIO I2C MULTIPLEXER DRIVER
 M:	Peter Korsgaard <peter.korsgaard@barco.com>
 L:	linux-i2c@vger.kernel.org
 S:	Supported
-F:	drivers/i2c/muxes/gpio-i2cmux.c
-F:	include/linux/gpio-i2cmux.h
-F:	Documentation/i2c/muxes/gpio-i2cmux
+F:	drivers/i2c/muxes/i2c-mux-gpio.c
+F:	include/linux/i2c-mux-gpio.h
+F:	Documentation/i2c/muxes/i2c-mux-gpio
 
 GENERIC HDLC (WAN) DRIVERS
 M:	Krzysztof Halasa <khc@pm.waw.pl>
diff --git a/drivers/i2c/muxes/Kconfig b/drivers/i2c/muxes/Kconfig
index e14a4205e5c6..beb2491db274 100644
--- a/drivers/i2c/muxes/Kconfig
+++ b/drivers/i2c/muxes/Kconfig
@@ -15,7 +15,7 @@ config I2C_MUX_GPIO
 	  through GPIO pins.
 
 	  This driver can also be built as a module.  If so, the module
-	  will be called gpio-i2cmux.
+	  will be called i2c-mux-gpio.
 
 config I2C_MUX_PCA9541
 	tristate "NXP PCA9541 I2C Master Selector"
diff --git a/drivers/i2c/muxes/Makefile b/drivers/i2c/muxes/Makefile
index 0868335cff11..5826249b29ca 100644
--- a/drivers/i2c/muxes/Makefile
+++ b/drivers/i2c/muxes/Makefile
@@ -1,7 +1,7 @@
 #
 # Makefile for multiplexer I2C chip drivers.
 
-obj-$(CONFIG_I2C_MUX_GPIO)	+= gpio-i2cmux.o
+obj-$(CONFIG_I2C_MUX_GPIO)	+= i2c-mux-gpio.o
 obj-$(CONFIG_I2C_MUX_PCA9541)	+= i2c-mux-pca9541.o
 obj-$(CONFIG_I2C_MUX_PCA954x)	+= i2c-mux-pca954x.o
 
diff --git a/drivers/i2c/muxes/gpio-i2cmux.c b/drivers/i2c/muxes/gpio-i2cmux.c
deleted file mode 100644
index fc5c1ef9b6ec..000000000000
--- a/drivers/i2c/muxes/gpio-i2cmux.c
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * I2C multiplexer using GPIO API
- *
- * Peter Korsgaard <peter.korsgaard@barco.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/i2c.h>
-#include <linux/i2c-mux.h>
-#include <linux/gpio-i2cmux.h>
-#include <linux/platform_device.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/gpio.h>
-
-struct gpiomux {
-	struct i2c_adapter *parent;
-	struct i2c_adapter **adap; /* child busses */
-	struct gpio_i2cmux_platform_data data;
-};
-
-static void gpiomux_set(const struct gpiomux *mux, unsigned val)
-{
-	int i;
-
-	for (i = 0; i < mux->data.n_gpios; i++)
-		gpio_set_value(mux->data.gpios[i], val & (1 << i));
-}
-
-static int gpiomux_select(struct i2c_adapter *adap, void *data, u32 chan)
-{
-	struct gpiomux *mux = data;
-
-	gpiomux_set(mux, mux->data.values[chan]);
-
-	return 0;
-}
-
-static int gpiomux_deselect(struct i2c_adapter *adap, void *data, u32 chan)
-{
-	struct gpiomux *mux = data;
-
-	gpiomux_set(mux, mux->data.idle);
-
-	return 0;
-}
-
-static int __devinit gpiomux_probe(struct platform_device *pdev)
-{
-	struct gpiomux *mux;
-	struct gpio_i2cmux_platform_data *pdata;
-	struct i2c_adapter *parent;
-	int (*deselect) (struct i2c_adapter *, void *, u32);
-	unsigned initial_state;
-	int i, ret;
-
-	pdata = pdev->dev.platform_data;
-	if (!pdata) {
-		dev_err(&pdev->dev, "Missing platform data\n");
-		return -ENODEV;
-	}
-
-	parent = i2c_get_adapter(pdata->parent);
-	if (!parent) {
-		dev_err(&pdev->dev, "Parent adapter (%d) not found\n",
-			pdata->parent);
-		return -ENODEV;
-	}
-
-	mux = kzalloc(sizeof(*mux), GFP_KERNEL);
-	if (!mux) {
-		ret = -ENOMEM;
-		goto alloc_failed;
-	}
-
-	mux->parent = parent;
-	mux->data = *pdata;
-	mux->adap = kzalloc(sizeof(struct i2c_adapter *) * pdata->n_values,
-			    GFP_KERNEL);
-	if (!mux->adap) {
-		ret = -ENOMEM;
-		goto alloc_failed2;
-	}
-
-	if (pdata->idle != GPIO_I2CMUX_NO_IDLE) {
-		initial_state = pdata->idle;
-		deselect = gpiomux_deselect;
-	} else {
-		initial_state = pdata->values[0];
-		deselect = NULL;
-	}
-
-	for (i = 0; i < pdata->n_gpios; i++) {
-		ret = gpio_request(pdata->gpios[i], "gpio-i2cmux");
-		if (ret)
-			goto err_request_gpio;
-		gpio_direction_output(pdata->gpios[i],
-				      initial_state & (1 << i));
-	}
-
-	for (i = 0; i < pdata->n_values; i++) {
-		u32 nr = pdata->base_nr ? (pdata->base_nr + i) : 0;
-
-		mux->adap[i] = i2c_add_mux_adapter(parent, &pdev->dev, mux,
-						   nr, i,
-						   gpiomux_select, deselect);
-		if (!mux->adap[i]) {
-			ret = -ENODEV;
-			dev_err(&pdev->dev, "Failed to add adapter %d\n", i);
-			goto add_adapter_failed;
-		}
-	}
-
-	dev_info(&pdev->dev, "%d port mux on %s adapter\n",
-		 pdata->n_values, parent->name);
-
-	platform_set_drvdata(pdev, mux);
-
-	return 0;
-
-add_adapter_failed:
-	for (; i > 0; i--)
-		i2c_del_mux_adapter(mux->adap[i - 1]);
-	i = pdata->n_gpios;
-err_request_gpio:
-	for (; i > 0; i--)
-		gpio_free(pdata->gpios[i - 1]);
-	kfree(mux->adap);
-alloc_failed2:
-	kfree(mux);
-alloc_failed:
-	i2c_put_adapter(parent);
-
-	return ret;
-}
-
-static int __devexit gpiomux_remove(struct platform_device *pdev)
-{
-	struct gpiomux *mux = platform_get_drvdata(pdev);
-	int i;
-
-	for (i = 0; i < mux->data.n_values; i++)
-		i2c_del_mux_adapter(mux->adap[i]);
-
-	for (i = 0; i < mux->data.n_gpios; i++)
-		gpio_free(mux->data.gpios[i]);
-
-	platform_set_drvdata(pdev, NULL);
-	i2c_put_adapter(mux->parent);
-	kfree(mux->adap);
-	kfree(mux);
-
-	return 0;
-}
-
-static struct platform_driver gpiomux_driver = {
-	.probe	= gpiomux_probe,
-	.remove	= __devexit_p(gpiomux_remove),
-	.driver	= {
-		.owner	= THIS_MODULE,
-		.name	= "gpio-i2cmux",
-	},
-};
-
-module_platform_driver(gpiomux_driver);
-
-MODULE_DESCRIPTION("GPIO-based I2C multiplexer driver");
-MODULE_AUTHOR("Peter Korsgaard <peter.korsgaard@barco.com>");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:gpio-i2cmux");
diff --git a/drivers/i2c/muxes/i2c-mux-gpio.c b/drivers/i2c/muxes/i2c-mux-gpio.c
new file mode 100644
index 000000000000..68b1f8ec3436
--- /dev/null
+++ b/drivers/i2c/muxes/i2c-mux-gpio.c
@@ -0,0 +1,173 @@
+/*
+ * I2C multiplexer using GPIO API
+ *
+ * Peter Korsgaard <peter.korsgaard@barco.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/i2c.h>
+#include <linux/i2c-mux.h>
+#include <linux/i2c-mux-gpio.h>
+#include <linux/platform_device.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/gpio.h>
+
+struct gpiomux {
+	struct i2c_adapter *parent;
+	struct i2c_adapter **adap; /* child busses */
+	struct i2c_mux_gpio_platform_data data;
+};
+
+static void i2c_mux_gpio_set(const struct gpiomux *mux, unsigned val)
+{
+	int i;
+
+	for (i = 0; i < mux->data.n_gpios; i++)
+		gpio_set_value(mux->data.gpios[i], val & (1 << i));
+}
+
+static int i2c_mux_gpio_select(struct i2c_adapter *adap, void *data, u32 chan)
+{
+	struct gpiomux *mux = data;
+
+	i2c_mux_gpio_set(mux, mux->data.values[chan]);
+
+	return 0;
+}
+
+static int i2c_mux_gpio_deselect(struct i2c_adapter *adap, void *data, u32 chan)
+{
+	struct gpiomux *mux = data;
+
+	i2c_mux_gpio_set(mux, mux->data.idle);
+
+	return 0;
+}
+
+static int __devinit i2c_mux_gpio_probe(struct platform_device *pdev)
+{
+	struct gpiomux *mux;
+	struct i2c_mux_gpio_platform_data *pdata;
+	struct i2c_adapter *parent;
+	int (*deselect) (struct i2c_adapter *, void *, u32);
+	unsigned initial_state;
+	int i, ret;
+
+	pdata = pdev->dev.platform_data;
+	if (!pdata) {
+		dev_err(&pdev->dev, "Missing platform data\n");
+		return -ENODEV;
+	}
+
+	parent = i2c_get_adapter(pdata->parent);
+	if (!parent) {
+		dev_err(&pdev->dev, "Parent adapter (%d) not found\n",
+			pdata->parent);
+		return -ENODEV;
+	}
+
+	mux = kzalloc(sizeof(*mux), GFP_KERNEL);
+	if (!mux) {
+		ret = -ENOMEM;
+		goto alloc_failed;
+	}
+
+	mux->parent = parent;
+	mux->data = *pdata;
+	mux->adap = kzalloc(sizeof(struct i2c_adapter *) * pdata->n_values,
+			    GFP_KERNEL);
+	if (!mux->adap) {
+		ret = -ENOMEM;
+		goto alloc_failed2;
+	}
+
+	if (pdata->idle != I2C_MUX_GPIO_NO_IDLE) {
+		initial_state = pdata->idle;
+		deselect = i2c_mux_gpio_deselect;
+	} else {
+		initial_state = pdata->values[0];
+		deselect = NULL;
+	}
+
+	for (i = 0; i < pdata->n_gpios; i++) {
+		ret = gpio_request(pdata->gpios[i], "i2c-mux-gpio");
+		if (ret)
+			goto err_request_gpio;
+		gpio_direction_output(pdata->gpios[i],
+				      initial_state & (1 << i));
+	}
+
+	for (i = 0; i < pdata->n_values; i++) {
+		u32 nr = pdata->base_nr ? (pdata->base_nr + i) : 0;
+
+		mux->adap[i] = i2c_add_mux_adapter(parent, &pdev->dev, mux, nr, i,
+						   i2c_mux_gpio_select, deselect);
+		if (!mux->adap[i]) {
+			ret = -ENODEV;
+			dev_err(&pdev->dev, "Failed to add adapter %d\n", i);
+			goto add_adapter_failed;
+		}
+	}
+
+	dev_info(&pdev->dev, "%d port mux on %s adapter\n",
+		 pdata->n_values, parent->name);
+
+	platform_set_drvdata(pdev, mux);
+
+	return 0;
+
+add_adapter_failed:
+	for (; i > 0; i--)
+		i2c_del_mux_adapter(mux->adap[i - 1]);
+	i = pdata->n_gpios;
+err_request_gpio:
+	for (; i > 0; i--)
+		gpio_free(pdata->gpios[i - 1]);
+	kfree(mux->adap);
+alloc_failed2:
+	kfree(mux);
+alloc_failed:
+	i2c_put_adapter(parent);
+
+	return ret;
+}
+
+static int __devexit i2c_mux_gpio_remove(struct platform_device *pdev)
+{
+	struct gpiomux *mux = platform_get_drvdata(pdev);
+	int i;
+
+	for (i = 0; i < mux->data.n_values; i++)
+		i2c_del_mux_adapter(mux->adap[i]);
+
+	for (i = 0; i < mux->data.n_gpios; i++)
+		gpio_free(mux->data.gpios[i]);
+
+	platform_set_drvdata(pdev, NULL);
+	i2c_put_adapter(mux->parent);
+	kfree(mux->adap);
+	kfree(mux);
+
+	return 0;
+}
+
+static struct platform_driver i2c_mux_gpio_driver = {
+	.probe	= i2c_mux_gpio_probe,
+	.remove	= __devexit_p(i2c_mux_gpio_remove),
+	.driver	= {
+		.owner	= THIS_MODULE,
+		.name	= "i2c-mux-gpio",
+	},
+};
+
+module_platform_driver(i2c_mux_gpio_driver);
+
+MODULE_DESCRIPTION("GPIO-based I2C multiplexer driver");
+MODULE_AUTHOR("Peter Korsgaard <peter.korsgaard@barco.com>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:i2c-mux-gpio");
diff --git a/include/linux/gpio-i2cmux.h b/include/linux/gpio-i2cmux.h
deleted file mode 100644
index 4a333bb0bd0d..000000000000
--- a/include/linux/gpio-i2cmux.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * gpio-i2cmux interface to platform code
- *
- * Peter Korsgaard <peter.korsgaard@barco.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _LINUX_GPIO_I2CMUX_H
-#define _LINUX_GPIO_I2CMUX_H
-
-/* MUX has no specific idle mode */
-#define GPIO_I2CMUX_NO_IDLE	((unsigned)-1)
-
-/**
- * struct gpio_i2cmux_platform_data - Platform-dependent data for gpio-i2cmux
- * @parent: Parent I2C bus adapter number
- * @base_nr: Base I2C bus number to number adapters from or zero for dynamic
- * @values: Array of bitmasks of GPIO settings (low/high) for each
- *	position
- * @n_values: Number of multiplexer positions (busses to instantiate)
- * @gpios: Array of GPIO numbers used to control MUX
- * @n_gpios: Number of GPIOs used to control MUX
- * @idle: Bitmask to write to MUX when idle or GPIO_I2CMUX_NO_IDLE if not used
- */
-struct gpio_i2cmux_platform_data {
-	int parent;
-	int base_nr;
-	const unsigned *values;
-	int n_values;
-	const unsigned *gpios;
-	int n_gpios;
-	unsigned idle;
-};
-
-#endif /* _LINUX_GPIO_I2CMUX_H */
diff --git a/include/linux/i2c-mux-gpio.h b/include/linux/i2c-mux-gpio.h
new file mode 100644
index 000000000000..a36343a37ebc
--- /dev/null
+++ b/include/linux/i2c-mux-gpio.h
@@ -0,0 +1,38 @@
+/*
+ * i2c-mux-gpio interface to platform code
+ *
+ * Peter Korsgaard <peter.korsgaard@barco.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _LINUX_I2C_MUX_GPIO_H
+#define _LINUX_I2C_MUX_GPIO_H
+
+/* MUX has no specific idle mode */
+#define I2C_MUX_GPIO_NO_IDLE	((unsigned)-1)
+
+/**
+ * struct i2c_mux_gpio_platform_data - Platform-dependent data for i2c-mux-gpio
+ * @parent: Parent I2C bus adapter number
+ * @base_nr: Base I2C bus number to number adapters from or zero for dynamic
+ * @values: Array of bitmasks of GPIO settings (low/high) for each
+ *	position
+ * @n_values: Number of multiplexer positions (busses to instantiate)
+ * @gpios: Array of GPIO numbers used to control MUX
+ * @n_gpios: Number of GPIOs used to control MUX
+ * @idle: Bitmask to write to MUX when idle or GPIO_I2CMUX_NO_IDLE if not used
+ */
+struct i2c_mux_gpio_platform_data {
+	int parent;
+	int base_nr;
+	const unsigned *values;
+	int n_values;
+	const unsigned *gpios;
+	int n_gpios;
+	unsigned idle;
+};
+
+#endif /* _LINUX_I2C_MUX_GPIO_H */
-- 
cgit v1.3-7-g2ca7


From 81f38ee8e6a9472193337da248c30963a9741a30 Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Sun, 6 May 2012 10:04:23 +0800
Subject: mmc: mxs-mmc: move header from mach into linux folder

Rename arch/arm/mach-mxs/include/mach/mmc.h to
include/linux/mmc/mxs-mmc.h, so that mxs-mmc driver becomes
<mach/*> inclusion free.

Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Acked-by: Marek Vasut <marex@denx.de>
Acked-by: Chris Ball <cjb@laptop.org>
---
 arch/arm/mach-mxs/include/mach/devices-common.h |  2 +-
 arch/arm/mach-mxs/include/mach/mmc.h            | 18 ------------------
 drivers/mmc/host/mxs-mmc.c                      |  3 +--
 include/linux/mmc/mxs-mmc.h                     | 19 +++++++++++++++++++
 4 files changed, 21 insertions(+), 21 deletions(-)
 delete mode 100644 arch/arm/mach-mxs/include/mach/mmc.h
 create mode 100644 include/linux/mmc/mxs-mmc.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-mxs/include/mach/devices-common.h b/arch/arm/mach-mxs/include/mach/devices-common.h
index 2b37689c74ee..6fc060151979 100644
--- a/arch/arm/mach-mxs/include/mach/devices-common.h
+++ b/arch/arm/mach-mxs/include/mach/devices-common.h
@@ -87,7 +87,7 @@ struct platform_device * __init mxs_add_mxs_i2c(
 		const struct mxs_mxs_i2c_data *data);
 
 /* mmc */
-#include <mach/mmc.h>
+#include <linux/mmc/mxs-mmc.h>
 struct mxs_mxs_mmc_data {
 	const char *devid;
 	int id;
diff --git a/arch/arm/mach-mxs/include/mach/mmc.h b/arch/arm/mach-mxs/include/mach/mmc.h
deleted file mode 100644
index 211547a05564..000000000000
--- a/arch/arm/mach-mxs/include/mach/mmc.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * Copyright 2011 Freescale Semiconductor, Inc. All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef __MACH_MXS_MMC_H__
-#define __MACH_MXS_MMC_H__
-
-struct mxs_mmc_platform_data {
-	int wp_gpio;	/* write protect pin */
-	unsigned int flags;
-#define SLOTF_4_BIT_CAPABLE	(1 << 0)
-#define SLOTF_8_BIT_CAPABLE	(1 << 1)
-};
-#endif /* __MACH_MXS_MMC_H__ */
diff --git a/drivers/mmc/host/mxs-mmc.c b/drivers/mmc/host/mxs-mmc.c
index 92395563cd82..5343190bb9d9 100644
--- a/drivers/mmc/host/mxs-mmc.c
+++ b/drivers/mmc/host/mxs-mmc.c
@@ -41,8 +41,7 @@
 #include <linux/fsl/mxs-dma.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/stmp_device.h>
-
-#include <mach/mmc.h>
+#include <linux/mmc/mxs-mmc.h>
 
 #define DRIVER_NAME	"mxs-mmc"
 
diff --git a/include/linux/mmc/mxs-mmc.h b/include/linux/mmc/mxs-mmc.h
new file mode 100644
index 000000000000..7c2ad3a7f2f3
--- /dev/null
+++ b/include/linux/mmc/mxs-mmc.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright 2011 Freescale Semiconductor, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __LINUX_MMC_MXS_MMC_H__
+#define __LINUX_MMC_MXS_MMC_H__
+
+struct mxs_mmc_platform_data {
+	int wp_gpio;	/* write protect pin */
+	unsigned int flags;
+#define SLOTF_4_BIT_CAPABLE	(1 << 0)
+#define SLOTF_8_BIT_CAPABLE	(1 << 1)
+};
+
+#endif /* __LINUX_MMC_MXS_MMC_H__ */
-- 
cgit v1.3-7-g2ca7


From 1df5c939f6d9dff7dfbe108d93133b9636baa607 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 18 Apr 2012 09:07:12 +0100
Subject: clk: Provide dummy clk_unregister()

While there's no actual implementation behind it having the call to use
in drivers makes them feel neater from a driver author point of view. An
actual implementation can wait for someone who needs to use the function
in a real system.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
[mturquette@linaro.org: void return type instead of int -EINVAL]
Signed-off-by: Mike Turquette <mturquette@linaro.org>
---
 drivers/clk/clk.c            | 9 +++++++++
 include/linux/clk-provider.h | 2 ++
 2 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index e5d5dc13bcfd..a7e5dd59e19d 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -1420,6 +1420,15 @@ fail_out:
 }
 EXPORT_SYMBOL_GPL(clk_register);
 
+/**
+ * clk_unregister - unregister a currently registered clock
+ * @clk: clock to unregister
+ *
+ * Currently unimplemented.
+ */
+void clk_unregister(struct clk *clk) {}
+EXPORT_SYMBOL_GPL(clk_unregister);
+
 /***        clk rate change notifiers        ***/
 
 /**
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index c1c23b9ec368..4a0b483986c3 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -312,6 +312,8 @@ struct clk *clk_register_fixed_factor(struct device *dev, const char *name,
  */
 struct clk *clk_register(struct device *dev, struct clk_hw *hw);
 
+void clk_unregister(struct clk *clk);
+
 /* helper functions */
 const char *__clk_get_name(struct clk *clk);
 struct clk_hw *__clk_get_hw(struct clk *clk);
-- 
cgit v1.3-7-g2ca7


From 9754e39c7bc51328f145e933bfb0df47cd67b6e9 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Sat, 7 Apr 2012 12:33:03 +0200
Subject: jbd: Split updating of journal superblock and marking journal empty

There are three case of updating journal superblock. In the first case, we want
to mark journal as empty (setting s_sequence to 0), in the second case we want
to update log tail, in the third case we want to update s_errno. Split these
cases into separate functions. It makes the code slightly more straightforward
and later patches will make the distinction even more important.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/jbd/checkpoint.c        |   2 +-
 fs/jbd/commit.c            |   2 +-
 fs/jbd/journal.c           | 164 +++++++++++++++++++++++++++------------------
 include/linux/jbd.h        |   2 +-
 include/trace/events/jbd.h |  12 ++--
 5 files changed, 104 insertions(+), 78 deletions(-)

(limited to 'include/linux')

diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index 05f0754f2b46..80c85f3e087f 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -540,7 +540,7 @@ int cleanup_journal_tail(journal_t *journal)
 	journal->j_tail = blocknr;
 	spin_unlock(&journal->j_state_lock);
 	if (!(journal->j_flags & JFS_ABORT))
-		journal_update_superblock(journal, 1);
+		journal_update_sb_log_tail(journal);
 	return 0;
 }
 
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 9d31e6a39205..dba9cfd75f1a 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -308,7 +308,7 @@ void journal_commit_transaction(journal_t *journal)
 	/* Do we need to erase the effects of a prior journal_flush? */
 	if (journal->j_flags & JFS_FLUSHED) {
 		jbd_debug(3, "super block updated\n");
-		journal_update_superblock(journal, 1);
+		journal_update_sb_log_tail(journal);
 	} else {
 		jbd_debug(3, "superblock not updated\n");
 	}
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 2047fd77bf38..44c104abfb36 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -923,8 +923,22 @@ static int journal_reset(journal_t *journal)
 
 	journal->j_max_transaction_buffers = journal->j_maxlen / 4;
 
-	/* Add the dynamic fields and write it to disk. */
-	journal_update_superblock(journal, 1);
+	/*
+	 * As a special case, if the on-disk copy is already marked as needing
+	 * no recovery (s_start == 0), then we can safely defer the superblock
+	 * update until the next commit by setting JFS_FLUSHED.  This avoids
+	 * attempting a write to a potential-readonly device.
+	 */
+	if (sb->s_start == 0) {
+		jbd_debug(1,"JBD: Skipping superblock update on recovered sb "
+			"(start %u, seq %d, errno %d)\n",
+			journal->j_tail, journal->j_tail_sequence,
+			journal->j_errno);
+		journal->j_flags |= JFS_FLUSHED;
+	} else {
+		/* Add the dynamic fields and write it to disk. */
+		journal_update_sb_log_tail(journal);
+	}
 	return journal_start_thread(journal);
 }
 
@@ -1001,35 +1015,11 @@ int journal_create(journal_t *journal)
 	return journal_reset(journal);
 }
 
-/**
- * void journal_update_superblock() - Update journal sb on disk.
- * @journal: The journal to update.
- * @wait: Set to '0' if you don't want to wait for IO completion.
- *
- * Update a journal's dynamic superblock fields and write it to disk,
- * optionally waiting for the IO to complete.
- */
-void journal_update_superblock(journal_t *journal, int wait)
+static void journal_write_superblock(journal_t *journal)
 {
-	journal_superblock_t *sb = journal->j_superblock;
 	struct buffer_head *bh = journal->j_sb_buffer;
 
-	/*
-	 * As a special case, if the on-disk copy is already marked as needing
-	 * no recovery (s_start == 0) and there are no outstanding transactions
-	 * in the filesystem, then we can safely defer the superblock update
-	 * until the next commit by setting JFS_FLUSHED.  This avoids
-	 * attempting a write to a potential-readonly device.
-	 */
-	if (sb->s_start == 0 && journal->j_tail_sequence ==
-				journal->j_transaction_sequence) {
-		jbd_debug(1,"JBD: Skipping superblock update on recovered sb "
-			"(start %u, seq %d, errno %d)\n",
-			journal->j_tail, journal->j_tail_sequence,
-			journal->j_errno);
-		goto out;
-	}
-
+	trace_journal_write_superblock(journal);
 	if (buffer_write_io_error(bh)) {
 		char b[BDEVNAME_SIZE];
 		/*
@@ -1047,44 +1037,94 @@ void journal_update_superblock(journal_t *journal, int wait)
 		set_buffer_uptodate(bh);
 	}
 
+	BUFFER_TRACE(bh, "marking dirty");
+	mark_buffer_dirty(bh);
+	sync_dirty_buffer(bh);
+	if (buffer_write_io_error(bh)) {
+		char b[BDEVNAME_SIZE];
+		printk(KERN_ERR "JBD: I/O error detected "
+		       "when updating journal superblock for %s.\n",
+		       journal_dev_name(journal, b));
+		clear_buffer_write_io_error(bh);
+		set_buffer_uptodate(bh);
+	}
+}
+
+/**
+ * journal_update_sb_log_tail() - Update log tail in journal sb on disk.
+ * @journal: The journal to update.
+ *
+ * Update a journal's superblock information about log tail and write it to
+ * disk, waiting for the IO to complete.
+ */
+void journal_update_sb_log_tail(journal_t *journal)
+{
+	journal_superblock_t *sb = journal->j_superblock;
+
 	spin_lock(&journal->j_state_lock);
 	jbd_debug(1,"JBD: updating superblock (start %u, seq %d, errno %d)\n",
 		  journal->j_tail, journal->j_tail_sequence, journal->j_errno);
 
 	sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
 	sb->s_start    = cpu_to_be32(journal->j_tail);
-	sb->s_errno    = cpu_to_be32(journal->j_errno);
 	spin_unlock(&journal->j_state_lock);
 
-	BUFFER_TRACE(bh, "marking dirty");
-	mark_buffer_dirty(bh);
-	if (wait) {
-		sync_dirty_buffer(bh);
-		if (buffer_write_io_error(bh)) {
-			char b[BDEVNAME_SIZE];
-			printk(KERN_ERR "JBD: I/O error detected "
-			       "when updating journal superblock for %s.\n",
-			       journal_dev_name(journal, b));
-			clear_buffer_write_io_error(bh);
-			set_buffer_uptodate(bh);
-		}
-	} else
-		write_dirty_buffer(bh, WRITE);
+	journal_write_superblock(journal);
 
-	trace_jbd_update_superblock_end(journal, wait);
-out:
-	/* If we have just flushed the log (by marking s_start==0), then
-	 * any future commit will have to be careful to update the
-	 * superblock again to re-record the true start of the log. */
+	/* Log is no longer empty */
+	spin_lock(&journal->j_state_lock);
+	WARN_ON(!sb->s_sequence);
+	journal->j_flags &= ~JFS_FLUSHED;
+	spin_unlock(&journal->j_state_lock);
+}
+
+/**
+ * mark_journal_empty() - Mark on disk journal as empty.
+ * @journal: The journal to update.
+ *
+ * Update a journal's dynamic superblock fields to show that journal is empty.
+ * Write updated superblock to disk waiting for IO to complete.
+ */
+static void mark_journal_empty(journal_t *journal)
+{
+	journal_superblock_t *sb = journal->j_superblock;
 
 	spin_lock(&journal->j_state_lock);
-	if (sb->s_start)
-		journal->j_flags &= ~JFS_FLUSHED;
-	else
-		journal->j_flags |= JFS_FLUSHED;
+	jbd_debug(1, "JBD: Marking journal as empty (seq %d)\n",
+        	  journal->j_tail_sequence);
+
+	sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
+	sb->s_start    = cpu_to_be32(0);
+	spin_unlock(&journal->j_state_lock);
+
+	journal_write_superblock(journal);
+
+	spin_lock(&journal->j_state_lock);
+	/* Log is empty */
+	journal->j_flags |= JFS_FLUSHED;
 	spin_unlock(&journal->j_state_lock);
 }
 
+/**
+ * journal_update_sb_errno() - Update error in the journal.
+ * @journal: The journal to update.
+ *
+ * Update a journal's errno.  Write updated superblock to disk waiting for IO
+ * to complete.
+ */
+static void journal_update_sb_errno(journal_t *journal)
+{
+	journal_superblock_t *sb = journal->j_superblock;
+
+	spin_lock(&journal->j_state_lock);
+	jbd_debug(1, "JBD: updating superblock error (errno %d)\n",
+        	  journal->j_errno);
+	sb->s_errno = cpu_to_be32(journal->j_errno);
+	spin_unlock(&journal->j_state_lock);
+
+	journal_write_superblock(journal);
+}
+
 /*
  * Read the superblock for a given journal, performing initial
  * validation of the format.
@@ -1268,14 +1308,11 @@ int journal_destroy(journal_t *journal)
 
 	if (journal->j_sb_buffer) {
 		if (!is_journal_aborted(journal)) {
-			/* We can now mark the journal as empty. */
-			journal->j_tail = 0;
 			journal->j_tail_sequence =
 				++journal->j_transaction_sequence;
-			journal_update_superblock(journal, 1);
-		} else {
+			mark_journal_empty(journal);
+		} else
 			err = -EIO;
-		}
 		brelse(journal->j_sb_buffer);
 	}
 
@@ -1457,7 +1494,6 @@ int journal_flush(journal_t *journal)
 {
 	int err = 0;
 	transaction_t *transaction = NULL;
-	unsigned int old_tail;
 
 	spin_lock(&journal->j_state_lock);
 
@@ -1499,14 +1535,8 @@ int journal_flush(journal_t *journal)
 	 * the magic code for a fully-recovered superblock.  Any future
 	 * commits of data to the journal will restore the current
 	 * s_start value. */
+	mark_journal_empty(journal);
 	spin_lock(&journal->j_state_lock);
-	old_tail = journal->j_tail;
-	journal->j_tail = 0;
-	spin_unlock(&journal->j_state_lock);
-	journal_update_superblock(journal, 1);
-	spin_lock(&journal->j_state_lock);
-	journal->j_tail = old_tail;
-
 	J_ASSERT(!journal->j_running_transaction);
 	J_ASSERT(!journal->j_committing_transaction);
 	J_ASSERT(!journal->j_checkpoint_transactions);
@@ -1547,7 +1577,7 @@ int journal_wipe(journal_t *journal, int write)
 
 	err = journal_skip_recovery(journal);
 	if (write)
-		journal_update_superblock(journal, 1);
+		mark_journal_empty(journal);
 
  no_recovery:
 	return err;
@@ -1615,7 +1645,7 @@ static void __journal_abort_soft (journal_t *journal, int errno)
 	__journal_abort_hard(journal);
 
 	if (errno)
-		journal_update_superblock(journal, 1);
+		journal_update_sb_errno(journal);
 }
 
 /**
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index f265682ae134..9716d370c501 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -864,7 +864,7 @@ extern int	   journal_destroy    (journal_t *);
 extern int	   journal_recover    (journal_t *journal);
 extern int	   journal_wipe       (journal_t *, int);
 extern int	   journal_skip_recovery	(journal_t *);
-extern void	   journal_update_superblock	(journal_t *, int);
+extern void	   journal_update_sb_log_tail	(journal_t *);
 extern void	   journal_abort      (journal_t *, int);
 extern int	   journal_errno      (journal_t *);
 extern void	   journal_ack_err    (journal_t *);
diff --git a/include/trace/events/jbd.h b/include/trace/events/jbd.h
index 9305e1b5edc3..d9658a940a39 100644
--- a/include/trace/events/jbd.h
+++ b/include/trace/events/jbd.h
@@ -169,24 +169,20 @@ TRACE_EVENT(jbd_cleanup_journal_tail,
 		  __entry->block_nr, __entry->freed)
 );
 
-TRACE_EVENT(jbd_update_superblock_end,
-	TP_PROTO(journal_t *journal, int wait),
+TRACE_EVENT(journal_write_superblock,
+	TP_PROTO(journal_t *journal),
 
-	TP_ARGS(journal, wait),
+	TP_ARGS(journal),
 
 	TP_STRUCT__entry(
 		__field(	dev_t,	dev			)
-		__field(	int,	wait			)
 	),
 
 	TP_fast_assign(
 		__entry->dev		= journal->j_fs_dev->bd_dev;
-		__entry->wait		= wait;
 	),
 
-	TP_printk("dev %d,%d wait %d",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		   __entry->wait)
+	TP_printk("dev %d,%d", MAJOR(__entry->dev), MINOR(__entry->dev))
 );
 
 #endif /* _TRACE_JBD_H */
-- 
cgit v1.3-7-g2ca7


From fd2cbd4dfa3db477dd6226d387d3f1911d36a6a9 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Sat, 7 Apr 2012 11:05:19 +0200
Subject: jbd: Write journal superblock with WRITE_FUA after checkpointing

If journal superblock is written only in disk's caches and other transaction
starts reusing space of the transaction cleaned from the log, it can happen
blocks of a new transaction reach the disk before journal superblock. When
power failure happens in such case, subsequent journal replay would still try
to replay the old transaction but some of it's blocks may be already
overwritten by the new transaction. For this reason we must use WRITE_FUA when
updating log tail and we must first write new log tail to disk and update
in-memory information only after that.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/jbd/checkpoint.c        | 23 ++++++++----------
 fs/jbd/commit.c            |  9 ++++++-
 fs/jbd/journal.c           | 60 ++++++++++++++++++++++++++++++----------------
 include/linux/jbd.h        |  3 ++-
 include/trace/events/jbd.h |  9 ++++---
 5 files changed, 65 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index 80c85f3e087f..08c03044abdd 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -508,20 +508,19 @@ int cleanup_journal_tail(journal_t *journal)
 	/*
 	 * We need to make sure that any blocks that were recently written out
 	 * --- perhaps by log_do_checkpoint() --- are flushed out before we
-	 * drop the transactions from the journal. It's unlikely this will be
-	 * necessary, especially with an appropriately sized journal, but we
-	 * need this to guarantee correctness.  Fortunately
-	 * cleanup_journal_tail() doesn't get called all that often.
+	 * drop the transactions from the journal. Similarly we need to be sure
+	 * superblock makes it to disk before next transaction starts reusing
+	 * freed space (otherwise we could replay some blocks of the new
+	 * transaction thinking they belong to the old one). So we use
+	 * WRITE_FLUSH_FUA. It's unlikely this will be necessary, especially
+	 * with an appropriately sized journal, but we need this to guarantee
+	 * correctness.  Fortunately cleanup_journal_tail() doesn't get called
+	 * all that often.
 	 */
-	if (journal->j_flags & JFS_BARRIER)
-		blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
+	journal_update_sb_log_tail(journal, first_tid, blocknr,
+				   WRITE_FLUSH_FUA);
 
 	spin_lock(&journal->j_state_lock);
-	if (!tid_gt(first_tid, journal->j_tail_sequence)) {
-		spin_unlock(&journal->j_state_lock);
-		/* Someone else cleaned up journal so return 0 */
-		return 0;
-	}
 	/* OK, update the superblock to recover the freed space.
 	 * Physical blocks come first: have we wrapped beyond the end of
 	 * the log?  */
@@ -539,8 +538,6 @@ int cleanup_journal_tail(journal_t *journal)
 	journal->j_tail_sequence = first_tid;
 	journal->j_tail = blocknr;
 	spin_unlock(&journal->j_state_lock);
-	if (!(journal->j_flags & JFS_ABORT))
-		journal_update_sb_log_tail(journal);
 	return 0;
 }
 
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 1b27f46e6108..52c15c776029 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -309,7 +309,14 @@ void journal_commit_transaction(journal_t *journal)
 	if (journal->j_flags & JFS_FLUSHED) {
 		jbd_debug(3, "super block updated\n");
 		mutex_lock(&journal->j_checkpoint_mutex);
-		journal_update_sb_log_tail(journal);
+		/*
+		 * We hold j_checkpoint_mutex so tail cannot change under us.
+		 * We don't need any special data guarantees for writing sb
+		 * since journal is empty and it is ok for write to be
+		 * flushed only with transaction commit.
+		 */
+		journal_update_sb_log_tail(journal, journal->j_tail_sequence,
+					   journal->j_tail, WRITE_SYNC);
 		mutex_unlock(&journal->j_checkpoint_mutex);
 	} else {
 		jbd_debug(3, "superblock not updated\n");
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index b29c7678525d..425c2f2cf170 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -938,8 +938,16 @@ static int journal_reset(journal_t *journal)
 	} else {
 		/* Lock here to make assertions happy... */
 		mutex_lock(&journal->j_checkpoint_mutex);
-		/* Add the dynamic fields and write it to disk. */
-		journal_update_sb_log_tail(journal);
+		/*
+		 * Update log tail information. We use WRITE_FUA since new
+		 * transaction will start reusing journal space and so we
+		 * must make sure information about current log tail is on
+		 * disk before that.
+		 */
+		journal_update_sb_log_tail(journal,
+					   journal->j_tail_sequence,
+					   journal->j_tail,
+					   WRITE_FUA);
 		mutex_unlock(&journal->j_checkpoint_mutex);
 	}
 	return journal_start_thread(journal);
@@ -1018,11 +1026,15 @@ int journal_create(journal_t *journal)
 	return journal_reset(journal);
 }
 
-static void journal_write_superblock(journal_t *journal)
+static void journal_write_superblock(journal_t *journal, int write_op)
 {
 	struct buffer_head *bh = journal->j_sb_buffer;
+	int ret;
 
-	trace_journal_write_superblock(journal);
+	trace_journal_write_superblock(journal, write_op);
+	if (!(journal->j_flags & JFS_BARRIER))
+		write_op &= ~(REQ_FUA | REQ_FLUSH);
+	lock_buffer(bh);
 	if (buffer_write_io_error(bh)) {
 		char b[BDEVNAME_SIZE];
 		/*
@@ -1040,40 +1052,46 @@ static void journal_write_superblock(journal_t *journal)
 		set_buffer_uptodate(bh);
 	}
 
-	BUFFER_TRACE(bh, "marking dirty");
-	mark_buffer_dirty(bh);
-	sync_dirty_buffer(bh);
+	get_bh(bh);
+	bh->b_end_io = end_buffer_write_sync;
+	ret = submit_bh(write_op, bh);
+	wait_on_buffer(bh);
 	if (buffer_write_io_error(bh)) {
-		char b[BDEVNAME_SIZE];
-		printk(KERN_ERR "JBD: I/O error detected "
-		       "when updating journal superblock for %s.\n",
-		       journal_dev_name(journal, b));
 		clear_buffer_write_io_error(bh);
 		set_buffer_uptodate(bh);
+		ret = -EIO;
+	}
+	if (ret) {
+		char b[BDEVNAME_SIZE];
+		printk(KERN_ERR "JBD: Error %d detected "
+		       "when updating journal superblock for %s.\n",
+		       ret, journal_dev_name(journal, b));
 	}
 }
 
 /**
  * journal_update_sb_log_tail() - Update log tail in journal sb on disk.
  * @journal: The journal to update.
+ * @tail_tid: TID of the new transaction at the tail of the log
+ * @tail_block: The first block of the transaction at the tail of the log
+ * @write_op: With which operation should we write the journal sb
  *
  * Update a journal's superblock information about log tail and write it to
  * disk, waiting for the IO to complete.
  */
-void journal_update_sb_log_tail(journal_t *journal)
+void journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
+				unsigned int tail_block, int write_op)
 {
 	journal_superblock_t *sb = journal->j_superblock;
 
 	BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
-	spin_lock(&journal->j_state_lock);
-	jbd_debug(1,"JBD: updating superblock (start %u, seq %d, errno %d)\n",
-		  journal->j_tail, journal->j_tail_sequence, journal->j_errno);
+	jbd_debug(1,"JBD: updating superblock (start %u, seq %u)\n",
+		  tail_block, tail_tid);
 
-	sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
-	sb->s_start    = cpu_to_be32(journal->j_tail);
-	spin_unlock(&journal->j_state_lock);
+	sb->s_sequence = cpu_to_be32(tail_tid);
+	sb->s_start    = cpu_to_be32(tail_block);
 
-	journal_write_superblock(journal);
+	journal_write_superblock(journal, write_op);
 
 	/* Log is no longer empty */
 	spin_lock(&journal->j_state_lock);
@@ -1102,7 +1120,7 @@ static void mark_journal_empty(journal_t *journal)
 	sb->s_start    = cpu_to_be32(0);
 	spin_unlock(&journal->j_state_lock);
 
-	journal_write_superblock(journal);
+	journal_write_superblock(journal, WRITE_FUA);
 
 	spin_lock(&journal->j_state_lock);
 	/* Log is empty */
@@ -1127,7 +1145,7 @@ static void journal_update_sb_errno(journal_t *journal)
 	sb->s_errno = cpu_to_be32(journal->j_errno);
 	spin_unlock(&journal->j_state_lock);
 
-	journal_write_superblock(journal);
+	journal_write_superblock(journal, WRITE_SYNC);
 }
 
 /*
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index 9716d370c501..c8f32975f0e4 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -864,7 +864,8 @@ extern int	   journal_destroy    (journal_t *);
 extern int	   journal_recover    (journal_t *journal);
 extern int	   journal_wipe       (journal_t *, int);
 extern int	   journal_skip_recovery	(journal_t *);
-extern void	   journal_update_sb_log_tail	(journal_t *);
+extern void	   journal_update_sb_log_tail	(journal_t *, tid_t, unsigned int,
+						 int);
 extern void	   journal_abort      (journal_t *, int);
 extern int	   journal_errno      (journal_t *);
 extern void	   journal_ack_err    (journal_t *);
diff --git a/include/trace/events/jbd.h b/include/trace/events/jbd.h
index d9658a940a39..da6f2591c25e 100644
--- a/include/trace/events/jbd.h
+++ b/include/trace/events/jbd.h
@@ -170,19 +170,22 @@ TRACE_EVENT(jbd_cleanup_journal_tail,
 );
 
 TRACE_EVENT(journal_write_superblock,
-	TP_PROTO(journal_t *journal),
+	TP_PROTO(journal_t *journal, int write_op),
 
-	TP_ARGS(journal),
+	TP_ARGS(journal, write_op),
 
 	TP_STRUCT__entry(
 		__field(	dev_t,	dev			)
+		__field(	int,	write_op		)
 	),
 
 	TP_fast_assign(
 		__entry->dev		= journal->j_fs_dev->bd_dev;
+		__entry->write_op	= write_op;
 	),
 
-	TP_printk("dev %d,%d", MAJOR(__entry->dev), MINOR(__entry->dev))
+	TP_printk("dev %d,%d write_op %x", MAJOR(__entry->dev),
+		  MINOR(__entry->dev), __entry->write_op)
 );
 
 #endif /* _TRACE_JBD_H */
-- 
cgit v1.3-7-g2ca7


From c142786c6291189b5c85f53d91743e1eefbd8fe0 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Mon, 14 May 2012 15:44:06 +0300
Subject: KVM: MMU: Don't use RCU for lockless shadow walking

Using RCU for lockless shadow walking can increase the amount of memory
in use by the system, since RCU grace periods are unpredictable.  We also
have an unconditional write to a shared variable (reader_counter), which
isn't good for scaling.

Replace that with a scheme similar to x86's get_user_pages_fast(): disable
interrupts during lockless shadow walk to force the freer
(kvm_mmu_commit_zap_page()) to wait for the TLB flush IPI to find the
processor with interrupts enabled.

We also add a new vcpu->mode, READING_SHADOW_PAGE_TABLES, to prevent
kvm_flush_remote_tlbs() from avoiding the IPI.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  4 ---
 arch/x86/kvm/mmu.c              | 73 ++++++++++++++++-------------------------
 include/linux/kvm_host.h        |  3 +-
 3 files changed, 31 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 69e39bc7e36f..64c8989263f6 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -240,8 +240,6 @@ struct kvm_mmu_page {
 #endif
 
 	int write_flooding_count;
-
-	struct rcu_head rcu;
 };
 
 struct kvm_pio_request {
@@ -540,8 +538,6 @@ struct kvm_arch {
 	u64 hv_guest_os_id;
 	u64 hv_hypercall;
 
-	atomic_t reader_counter;
-
 	#ifdef CONFIG_KVM_MMU_AUDIT
 	int audit_point;
 	#endif
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 07424cf60434..72102e0ab7cb 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -551,19 +551,29 @@ static u64 mmu_spte_get_lockless(u64 *sptep)
 
 static void walk_shadow_page_lockless_begin(struct kvm_vcpu *vcpu)
 {
-	rcu_read_lock();
-	atomic_inc(&vcpu->kvm->arch.reader_counter);
-
-	/* Increase the counter before walking shadow page table */
-	smp_mb__after_atomic_inc();
+	/*
+	 * Prevent page table teardown by making any free-er wait during
+	 * kvm_flush_remote_tlbs() IPI to all active vcpus.
+	 */
+	local_irq_disable();
+	vcpu->mode = READING_SHADOW_PAGE_TABLES;
+	/*
+	 * Make sure a following spte read is not reordered ahead of the write
+	 * to vcpu->mode.
+	 */
+	smp_mb();
 }
 
 static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu)
 {
-	/* Decrease the counter after walking shadow page table finished */
-	smp_mb__before_atomic_dec();
-	atomic_dec(&vcpu->kvm->arch.reader_counter);
-	rcu_read_unlock();
+	/*
+	 * Make sure the write to vcpu->mode is not reordered in front of
+	 * reads to sptes.  If it does, kvm_commit_zap_page() can see us
+	 * OUTSIDE_GUEST_MODE and proceed to free the shadow page table.
+	 */
+	smp_mb();
+	vcpu->mode = OUTSIDE_GUEST_MODE;
+	local_irq_enable();
 }
 
 static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
@@ -1989,30 +1999,6 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
 	return ret;
 }
 
-static void kvm_mmu_isolate_pages(struct list_head *invalid_list)
-{
-	struct kvm_mmu_page *sp;
-
-	list_for_each_entry(sp, invalid_list, link)
-		kvm_mmu_isolate_page(sp);
-}
-
-static void free_pages_rcu(struct rcu_head *head)
-{
-	struct kvm_mmu_page *next, *sp;
-
-	sp = container_of(head, struct kvm_mmu_page, rcu);
-	while (sp) {
-		if (!list_empty(&sp->link))
-			next = list_first_entry(&sp->link,
-				      struct kvm_mmu_page, link);
-		else
-			next = NULL;
-		kvm_mmu_free_page(sp);
-		sp = next;
-	}
-}
-
 static void kvm_mmu_commit_zap_page(struct kvm *kvm,
 				    struct list_head *invalid_list)
 {
@@ -2021,17 +2007,17 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
 	if (list_empty(invalid_list))
 		return;
 
-	kvm_flush_remote_tlbs(kvm);
-
-	if (atomic_read(&kvm->arch.reader_counter)) {
-		kvm_mmu_isolate_pages(invalid_list);
-		sp = list_first_entry(invalid_list, struct kvm_mmu_page, link);
-		list_del_init(invalid_list);
+	/*
+	 * wmb: make sure everyone sees our modifications to the page tables
+	 * rmb: make sure we see changes to vcpu->mode
+	 */
+	smp_mb();
 
-		trace_kvm_mmu_delay_free_pages(sp);
-		call_rcu(&sp->rcu, free_pages_rcu);
-		return;
-	}
+	/*
+	 * Wait for all vcpus to exit guest mode and/or lockless shadow
+	 * page table walks.
+	 */
+	kvm_flush_remote_tlbs(kvm);
 
 	do {
 		sp = list_first_entry(invalid_list, struct kvm_mmu_page, link);
@@ -2039,7 +2025,6 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
 		kvm_mmu_isolate_page(sp);
 		kvm_mmu_free_page(sp);
 	} while (!list_empty(invalid_list));
-
 }
 
 /*
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index cae342d29d1b..c4464356b35b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -128,7 +128,8 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
 enum {
 	OUTSIDE_GUEST_MODE,
 	IN_GUEST_MODE,
-	EXITING_GUEST_MODE
+	EXITING_GUEST_MODE,
+	READING_SHADOW_PAGE_TABLES,
 };
 
 /*
-- 
cgit v1.3-7-g2ca7


From 1526bf9ccf310f1d35c1275b8b477a249d25aaf2 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Tue, 15 May 2012 14:15:25 +0200
Subject: KVM: s390: add capability indicating COW support

Currently qemu/kvm on s390 uses a guest mapping that does not
allow the guest backing page table to be write-protected to
support older systems. On those older systems a host write
protection fault will be delivered to the guest.

Newer systems allow to write-protect the guest backing memory
and let the fault be delivered to the host, thus allowing COW.

Use a capability bit to tell qemu if that is possible.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/s390/include/asm/sclp.h |  1 +
 arch/s390/kvm/kvm-s390.c     |  4 ++++
 drivers/s390/char/sclp_cmd.c | 12 +++++++++++-
 include/linux/kvm.h          |  1 +
 4 files changed, 17 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index fed7bee650a0..bf238c55740b 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -48,6 +48,7 @@ int sclp_cpu_deconfigure(u8 cpu);
 void sclp_facilities_detect(void);
 unsigned long long sclp_get_rnmax(void);
 unsigned long long sclp_get_rzm(void);
+u8 sclp_get_fac85(void);
 int sclp_sdias_blk_count(void);
 int sclp_sdias_copy(void *dest, int blk_num, int nr_blks);
 int sclp_chp_configure(struct chp_id chpid);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index e5e3800b0125..5c761bffa02d 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -28,6 +28,7 @@
 #include <asm/pgtable.h>
 #include <asm/nmi.h>
 #include <asm/switch_to.h>
+#include <asm/sclp.h>
 #include "kvm-s390.h"
 #include "gaccess.h"
 
@@ -140,6 +141,9 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_MAX_VCPUS:
 		r = KVM_MAX_VCPUS;
 		break;
+	case KVM_CAP_S390_COW:
+		r = sclp_get_fac85() & 0x2;
+		break;
 	default:
 		r = 0;
 	}
diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c
index 231a1d85127b..032171e335e9 100644
--- a/drivers/s390/char/sclp_cmd.c
+++ b/drivers/s390/char/sclp_cmd.c
@@ -17,6 +17,7 @@
 #include <linux/mm.h>
 #include <linux/mmzone.h>
 #include <linux/memory.h>
+#include <linux/module.h>
 #include <linux/platform_device.h>
 #include <asm/chpid.h>
 #include <asm/sclp.h>
@@ -38,7 +39,8 @@ struct read_info_sccb {
 	u64	facilities;		/* 48-55 */
 	u8	_reserved2[84 - 56];	/* 56-83 */
 	u8	fac84;			/* 84 */
-	u8	_reserved3[91 - 85];	/* 85-90 */
+	u8	fac85;			/* 85 */
+	u8	_reserved3[91 - 86];	/* 86-90 */
 	u8	flags;			/* 91 */
 	u8	_reserved4[100 - 92];	/* 92-99 */
 	u32	rnsize2;		/* 100-103 */
@@ -51,6 +53,7 @@ static int __initdata early_read_info_sccb_valid;
 
 u64 sclp_facilities;
 static u8 sclp_fac84;
+static u8 sclp_fac85;
 static unsigned long long rzm;
 static unsigned long long rnmax;
 
@@ -112,6 +115,7 @@ void __init sclp_facilities_detect(void)
 	sccb = &early_read_info_sccb;
 	sclp_facilities = sccb->facilities;
 	sclp_fac84 = sccb->fac84;
+	sclp_fac85 = sccb->fac85;
 	rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2;
 	rzm = sccb->rnsize ? sccb->rnsize : sccb->rnsize2;
 	rzm <<= 20;
@@ -127,6 +131,12 @@ unsigned long long sclp_get_rzm(void)
 	return rzm;
 }
 
+u8 sclp_get_fac85(void)
+{
+	return sclp_fac85;
+}
+EXPORT_SYMBOL_GPL(sclp_get_fac85);
+
 /*
  * This function will be called after sclp_facilities_detect(), which gets
  * called from early.c code. Therefore the sccb should have valid contents.
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 8d696cf6edcc..09f2b3aa2da7 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -616,6 +616,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_KVMCLOCK_CTRL 76
 #define KVM_CAP_SIGNAL_MSI 77
 #define KVM_CAP_PPC_GET_SMMU_INFO 78
+#define KVM_CAP_S390_COW 79
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit v1.3-7-g2ca7


From 3f7e82759c692df473675ed06fb90b20f1f225c3 Mon Sep 17 00:00:00 2001
From: Rhyland Klein <rklein@nvidia.com>
Date: Tue, 8 May 2012 11:42:38 -0700
Subject: mfd: Commonize tps65910 regmap access through header

This change removes the read/write callback functions in favor of common
regmap accessors inside the header file. This change also makes use of
regmap_read/write for single register access which maps better onto what this
driver actually needs.

Signed-off-by: Rhyland Klein <rklein@nvidia.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/gpio/gpio-tps65910.c           | 14 +++---
 drivers/mfd/tps65910-irq.c             | 34 ++++++-------
 drivers/mfd/tps65910.c                 | 40 ++++------------
 drivers/regulator/tps65910-regulator.c | 88 ++++++++++++++++------------------
 include/linux/mfd/tps65910.h           | 29 +++++++++--
 5 files changed, 100 insertions(+), 105 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpio-tps65910.c b/drivers/gpio/gpio-tps65910.c
index 7eef648a3351..bc155f2509ba 100644
--- a/drivers/gpio/gpio-tps65910.c
+++ b/drivers/gpio/gpio-tps65910.c
@@ -23,9 +23,9 @@
 static int tps65910_gpio_get(struct gpio_chip *gc, unsigned offset)
 {
 	struct tps65910 *tps65910 = container_of(gc, struct tps65910, gpio);
-	uint8_t val;
+	unsigned int val;
 
-	tps65910->read(tps65910, TPS65910_GPIO0 + offset, 1, &val);
+	tps65910_reg_read(tps65910, TPS65910_GPIO0 + offset, &val);
 
 	if (val & GPIO_STS_MASK)
 		return 1;
@@ -39,10 +39,10 @@ static void tps65910_gpio_set(struct gpio_chip *gc, unsigned offset,
 	struct tps65910 *tps65910 = container_of(gc, struct tps65910, gpio);
 
 	if (value)
-		tps65910_set_bits(tps65910, TPS65910_GPIO0 + offset,
+		tps65910_reg_set_bits(tps65910, TPS65910_GPIO0 + offset,
 						GPIO_SET_MASK);
 	else
-		tps65910_clear_bits(tps65910, TPS65910_GPIO0 + offset,
+		tps65910_reg_clear_bits(tps65910, TPS65910_GPIO0 + offset,
 						GPIO_SET_MASK);
 }
 
@@ -54,7 +54,7 @@ static int tps65910_gpio_output(struct gpio_chip *gc, unsigned offset,
 	/* Set the initial value */
 	tps65910_gpio_set(gc, offset, value);
 
-	return tps65910_set_bits(tps65910, TPS65910_GPIO0 + offset,
+	return tps65910_reg_set_bits(tps65910, TPS65910_GPIO0 + offset,
 						GPIO_CFG_MASK);
 }
 
@@ -62,7 +62,7 @@ static int tps65910_gpio_input(struct gpio_chip *gc, unsigned offset)
 {
 	struct tps65910 *tps65910 = container_of(gc, struct tps65910, gpio);
 
-	return tps65910_clear_bits(tps65910, TPS65910_GPIO0 + offset,
+	return tps65910_reg_clear_bits(tps65910, TPS65910_GPIO0 + offset,
 						GPIO_CFG_MASK);
 }
 
@@ -102,7 +102,7 @@ void tps65910_gpio_init(struct tps65910 *tps65910, int gpio_base)
 		int i;
 		for (i = 0; i < tps65910->gpio.ngpio; ++i) {
 			if (board_data->en_gpio_sleep[i]) {
-				ret = tps65910_set_bits(tps65910,
+				ret = tps65910_reg_set_bits(tps65910,
 					TPS65910_GPIO0 + i, GPIO_SLEEP_MASK);
 				if (ret < 0)
 					dev_warn(tps65910->dev,
diff --git a/drivers/mfd/tps65910-irq.c b/drivers/mfd/tps65910-irq.c
index c9ed5c00a621..0f1ff7fbdc74 100644
--- a/drivers/mfd/tps65910-irq.c
+++ b/drivers/mfd/tps65910-irq.c
@@ -41,28 +41,28 @@ static inline int irq_to_tps65910_irq(struct tps65910 *tps65910,
 static irqreturn_t tps65910_irq(int irq, void *irq_data)
 {
 	struct tps65910 *tps65910 = irq_data;
+	unsigned int reg;
 	u32 irq_sts;
 	u32 irq_mask;
-	u8 reg;
 	int i;
 
-	tps65910->read(tps65910, TPS65910_INT_STS, 1, &reg);
+	tps65910_reg_read(tps65910, TPS65910_INT_STS, &reg);
 	irq_sts = reg;
-	tps65910->read(tps65910, TPS65910_INT_STS2, 1, &reg);
+	tps65910_reg_read(tps65910, TPS65910_INT_STS2, &reg);
 	irq_sts |= reg << 8;
 	switch (tps65910_chip_id(tps65910)) {
 	case TPS65911:
-		tps65910->read(tps65910, TPS65910_INT_STS3, 1, &reg);
+		tps65910_reg_read(tps65910, TPS65910_INT_STS3, &reg);
 		irq_sts |= reg << 16;
 	}
 
-	tps65910->read(tps65910, TPS65910_INT_MSK, 1, &reg);
+	tps65910_reg_read(tps65910, TPS65910_INT_MSK, &reg);
 	irq_mask = reg;
-	tps65910->read(tps65910, TPS65910_INT_MSK2, 1, &reg);
+	tps65910_reg_read(tps65910, TPS65910_INT_MSK2, &reg);
 	irq_mask |= reg << 8;
 	switch (tps65910_chip_id(tps65910)) {
 	case TPS65911:
-		tps65910->read(tps65910, TPS65910_INT_MSK3, 1, &reg);
+		tps65910_reg_read(tps65910, TPS65910_INT_MSK3, &reg);
 		irq_mask |= reg << 16;
 	}
 
@@ -82,13 +82,13 @@ static irqreturn_t tps65910_irq(int irq, void *irq_data)
 	/* Write the STS register back to clear IRQs we handled */
 	reg = irq_sts & 0xFF;
 	irq_sts >>= 8;
-	tps65910->write(tps65910, TPS65910_INT_STS, 1, &reg);
+	tps65910_reg_write(tps65910, TPS65910_INT_STS, reg);
 	reg = irq_sts & 0xFF;
-	tps65910->write(tps65910, TPS65910_INT_STS2, 1, &reg);
+	tps65910_reg_write(tps65910, TPS65910_INT_STS2, reg);
 	switch (tps65910_chip_id(tps65910)) {
 	case TPS65911:
 		reg = irq_sts >> 8;
-		tps65910->write(tps65910, TPS65910_INT_STS3, 1, &reg);
+		tps65910_reg_write(tps65910, TPS65910_INT_STS3, reg);
 	}
 
 	return IRQ_HANDLED;
@@ -105,27 +105,27 @@ static void tps65910_irq_sync_unlock(struct irq_data *data)
 {
 	struct tps65910 *tps65910 = irq_data_get_irq_chip_data(data);
 	u32 reg_mask;
-	u8 reg;
+	unsigned int reg;
 
-	tps65910->read(tps65910, TPS65910_INT_MSK, 1, &reg);
+	tps65910_reg_read(tps65910, TPS65910_INT_MSK, &reg);
 	reg_mask = reg;
-	tps65910->read(tps65910, TPS65910_INT_MSK2, 1, &reg);
+	tps65910_reg_read(tps65910, TPS65910_INT_MSK2, &reg);
 	reg_mask |= reg << 8;
 	switch (tps65910_chip_id(tps65910)) {
 	case TPS65911:
-		tps65910->read(tps65910, TPS65910_INT_MSK3, 1, &reg);
+		tps65910_reg_read(tps65910, TPS65910_INT_MSK3, &reg);
 		reg_mask |= reg << 16;
 	}
 
 	if (tps65910->irq_mask != reg_mask) {
 		reg = tps65910->irq_mask & 0xFF;
-		tps65910->write(tps65910, TPS65910_INT_MSK, 1, &reg);
+		tps65910_reg_write(tps65910, TPS65910_INT_MSK, reg);
 		reg = tps65910->irq_mask >> 8 & 0xFF;
-		tps65910->write(tps65910, TPS65910_INT_MSK2, 1, &reg);
+		tps65910_reg_write(tps65910, TPS65910_INT_MSK2, reg);
 		switch (tps65910_chip_id(tps65910)) {
 		case TPS65911:
 			reg = tps65910->irq_mask >> 16;
-			tps65910->write(tps65910, TPS65910_INT_MSK3, 1, &reg);
+			tps65910_reg_write(tps65910, TPS65910_INT_MSK3, reg);
 		}
 	}
 	mutex_unlock(&tps65910->irq_lock);
diff --git a/drivers/mfd/tps65910.c b/drivers/mfd/tps65910.c
index 7a55af921e25..7dffbe1a50c6 100644
--- a/drivers/mfd/tps65910.c
+++ b/drivers/mfd/tps65910.c
@@ -37,30 +37,6 @@ static struct mfd_cell tps65910s[] = {
 };
 
 
-static int tps65910_i2c_read(struct tps65910 *tps65910, u8 reg,
-				  int bytes, void *dest)
-{
-	return regmap_bulk_read(tps65910->regmap, reg, dest, bytes);
-}
-
-static int tps65910_i2c_write(struct tps65910 *tps65910, u8 reg,
-				  int bytes, void *src)
-{
-	return regmap_bulk_write(tps65910->regmap, reg, src, bytes);
-}
-
-int tps65910_set_bits(struct tps65910 *tps65910, u8 reg, u8 mask)
-{
-	return regmap_update_bits(tps65910->regmap, reg, mask, mask);
-}
-EXPORT_SYMBOL_GPL(tps65910_set_bits);
-
-int tps65910_clear_bits(struct tps65910 *tps65910, u8 reg, u8 mask)
-{
-	return regmap_update_bits(tps65910->regmap, reg, mask, 0);
-}
-EXPORT_SYMBOL_GPL(tps65910_clear_bits);
-
 static bool is_volatile_reg(struct device *dev, unsigned int reg)
 {
 	struct tps65910 *tps65910 = dev_get_drvdata(dev);
@@ -102,7 +78,7 @@ static int __devinit tps65910_sleepinit(struct tps65910 *tps65910,
 		return 0;
 
 	/* enabling SLEEP device state */
-	ret = tps65910_set_bits(tps65910, TPS65910_DEVCTRL,
+	ret = tps65910_reg_set_bits(tps65910, TPS65910_DEVCTRL,
 				DEVCTRL_DEV_SLP_MASK);
 	if (ret < 0) {
 		dev_err(dev, "set dev_slp failed: %d\n", ret);
@@ -114,7 +90,8 @@ static int __devinit tps65910_sleepinit(struct tps65910 *tps65910,
 		return 0;
 
 	if (pmic_pdata->slp_keepon->therm_keepon) {
-		ret = tps65910_set_bits(tps65910, TPS65910_SLEEP_KEEP_RES_ON,
+		ret = tps65910_reg_set_bits(tps65910,
+				TPS65910_SLEEP_KEEP_RES_ON,
 				SLEEP_KEEP_RES_ON_THERM_KEEPON_MASK);
 		if (ret < 0) {
 			dev_err(dev, "set therm_keepon failed: %d\n", ret);
@@ -123,7 +100,8 @@ static int __devinit tps65910_sleepinit(struct tps65910 *tps65910,
 	}
 
 	if (pmic_pdata->slp_keepon->clkout32k_keepon) {
-		ret = tps65910_set_bits(tps65910, TPS65910_SLEEP_KEEP_RES_ON,
+		ret = tps65910_reg_set_bits(tps65910,
+				TPS65910_SLEEP_KEEP_RES_ON,
 				SLEEP_KEEP_RES_ON_CLKOUT32K_KEEPON_MASK);
 		if (ret < 0) {
 			dev_err(dev, "set clkout32k_keepon failed: %d\n", ret);
@@ -132,7 +110,8 @@ static int __devinit tps65910_sleepinit(struct tps65910 *tps65910,
 	}
 
 	if (pmic_pdata->slp_keepon->i2chs_keepon) {
-		ret = tps65910_set_bits(tps65910, TPS65910_SLEEP_KEEP_RES_ON,
+		ret = tps65910_reg_set_bits(tps65910,
+				TPS65910_SLEEP_KEEP_RES_ON,
 				SLEEP_KEEP_RES_ON_I2CHS_KEEPON_MASK);
 		if (ret < 0) {
 			dev_err(dev, "set i2chs_keepon failed: %d\n", ret);
@@ -143,7 +122,8 @@ static int __devinit tps65910_sleepinit(struct tps65910 *tps65910,
 	return 0;
 
 disable_dev_slp:
-	tps65910_clear_bits(tps65910, TPS65910_DEVCTRL, DEVCTRL_DEV_SLP_MASK);
+	tps65910_reg_clear_bits(tps65910, TPS65910_DEVCTRL,
+				DEVCTRL_DEV_SLP_MASK);
 
 err_sleep_init:
 	return ret;
@@ -176,8 +156,6 @@ static __devinit int tps65910_i2c_probe(struct i2c_client *i2c,
 	tps65910->dev = &i2c->dev;
 	tps65910->i2c_client = i2c;
 	tps65910->id = id->driver_data;
-	tps65910->read = tps65910_i2c_read;
-	tps65910->write = tps65910_i2c_write;
 	mutex_init(&tps65910->io_mutex);
 
 	tps65910->regmap = regmap_init_i2c(i2c, &tps65910_regmap_config);
diff --git a/drivers/regulator/tps65910-regulator.c b/drivers/regulator/tps65910-regulator.c
index 4a37c2b6367f..852b05b20de9 100644
--- a/drivers/regulator/tps65910-regulator.c
+++ b/drivers/regulator/tps65910-regulator.c
@@ -331,21 +331,16 @@ struct tps65910_reg {
 
 static inline int tps65910_read(struct tps65910_reg *pmic, u8 reg)
 {
-	u8 val;
+	unsigned int val;
 	int err;
 
-	err = pmic->mfd->read(pmic->mfd, reg, 1, &val);
+	err = tps65910_reg_read(pmic->mfd, reg, &val);
 	if (err)
 		return err;
 
 	return val;
 }
 
-static inline int tps65910_write(struct tps65910_reg *pmic, u8 reg, u8 val)
-{
-	return pmic->mfd->write(pmic->mfd, reg, 1, &val);
-}
-
 static int tps65910_modify_bits(struct tps65910_reg *pmic, u8 reg,
 					u8 set_mask, u8 clear_mask)
 {
@@ -362,7 +357,7 @@ static int tps65910_modify_bits(struct tps65910_reg *pmic, u8 reg,
 
 	data &= ~clear_mask;
 	data |= set_mask;
-	err = tps65910_write(pmic, reg, data);
+	err = tps65910_reg_write(pmic->mfd, reg, data);
 	if (err)
 		dev_err(pmic->mfd->dev, "Write for reg 0x%x failed\n", reg);
 
@@ -371,7 +366,7 @@ out:
 	return err;
 }
 
-static int tps65910_reg_read(struct tps65910_reg *pmic, u8 reg)
+static int tps65910_reg_read_locked(struct tps65910_reg *pmic, u8 reg)
 {
 	int data;
 
@@ -385,13 +380,13 @@ static int tps65910_reg_read(struct tps65910_reg *pmic, u8 reg)
 	return data;
 }
 
-static int tps65910_reg_write(struct tps65910_reg *pmic, u8 reg, u8 val)
+static int tps65910_reg_write_locked(struct tps65910_reg *pmic, u8 reg, u8 val)
 {
 	int err;
 
 	mutex_lock(&pmic->mutex);
 
-	err = tps65910_write(pmic, reg, val);
+	err = tps65910_reg_write(pmic->mfd, reg, val);
 	if (err < 0)
 		dev_err(pmic->mfd->dev, "Write for reg 0x%x failed\n", reg);
 
@@ -476,7 +471,7 @@ static int tps65910_is_enabled(struct regulator_dev *dev)
 	if (reg < 0)
 		return reg;
 
-	value = tps65910_reg_read(pmic, reg);
+	value = tps65910_reg_read_locked(pmic, reg);
 	if (value < 0)
 		return value;
 
@@ -493,7 +488,7 @@ static int tps65910_enable(struct regulator_dev *dev)
 	if (reg < 0)
 		return reg;
 
-	return tps65910_set_bits(mfd, reg, TPS65910_SUPPLY_STATE_ENABLED);
+	return tps65910_reg_set_bits(mfd, reg, TPS65910_SUPPLY_STATE_ENABLED);
 }
 
 static int tps65910_disable(struct regulator_dev *dev)
@@ -506,7 +501,7 @@ static int tps65910_disable(struct regulator_dev *dev)
 	if (reg < 0)
 		return reg;
 
-	return tps65910_clear_bits(mfd, reg, TPS65910_SUPPLY_STATE_ENABLED);
+	return tps65910_reg_clear_bits(mfd, reg, TPS65910_SUPPLY_STATE_ENABLED);
 }
 
 static int tps65910_enable_time(struct regulator_dev *dev)
@@ -532,9 +527,9 @@ static int tps65910_set_mode(struct regulator_dev *dev, unsigned int mode)
 							LDO_ST_MODE_BIT);
 	case REGULATOR_MODE_IDLE:
 		value = LDO_ST_ON_BIT | LDO_ST_MODE_BIT;
-		return tps65910_set_bits(mfd, reg, value);
+		return tps65910_reg_set_bits(mfd, reg, value);
 	case REGULATOR_MODE_STANDBY:
-		return tps65910_clear_bits(mfd, reg, LDO_ST_ON_BIT);
+		return tps65910_reg_clear_bits(mfd, reg, LDO_ST_ON_BIT);
 	}
 
 	return -EINVAL;
@@ -549,7 +544,7 @@ static unsigned int tps65910_get_mode(struct regulator_dev *dev)
 	if (reg < 0)
 		return reg;
 
-	value = tps65910_reg_read(pmic, reg);
+	value = tps65910_reg_read_locked(pmic, reg);
 	if (value < 0)
 		return value;
 
@@ -569,28 +564,28 @@ static int tps65910_get_voltage_dcdc_sel(struct regulator_dev *dev)
 
 	switch (id) {
 	case TPS65910_REG_VDD1:
-		opvsel = tps65910_reg_read(pmic, TPS65910_VDD1_OP);
-		mult = tps65910_reg_read(pmic, TPS65910_VDD1);
+		opvsel = tps65910_reg_read_locked(pmic, TPS65910_VDD1_OP);
+		mult = tps65910_reg_read_locked(pmic, TPS65910_VDD1);
 		mult = (mult & VDD1_VGAIN_SEL_MASK) >> VDD1_VGAIN_SEL_SHIFT;
-		srvsel = tps65910_reg_read(pmic, TPS65910_VDD1_SR);
+		srvsel = tps65910_reg_read_locked(pmic, TPS65910_VDD1_SR);
 		sr = opvsel & VDD1_OP_CMD_MASK;
 		opvsel &= VDD1_OP_SEL_MASK;
 		srvsel &= VDD1_SR_SEL_MASK;
 		vselmax = 75;
 		break;
 	case TPS65910_REG_VDD2:
-		opvsel = tps65910_reg_read(pmic, TPS65910_VDD2_OP);
-		mult = tps65910_reg_read(pmic, TPS65910_VDD2);
+		opvsel = tps65910_reg_read_locked(pmic, TPS65910_VDD2_OP);
+		mult = tps65910_reg_read_locked(pmic, TPS65910_VDD2);
 		mult = (mult & VDD2_VGAIN_SEL_MASK) >> VDD2_VGAIN_SEL_SHIFT;
-		srvsel = tps65910_reg_read(pmic, TPS65910_VDD2_SR);
+		srvsel = tps65910_reg_read_locked(pmic, TPS65910_VDD2_SR);
 		sr = opvsel & VDD2_OP_CMD_MASK;
 		opvsel &= VDD2_OP_SEL_MASK;
 		srvsel &= VDD2_SR_SEL_MASK;
 		vselmax = 75;
 		break;
 	case TPS65911_REG_VDDCTRL:
-		opvsel = tps65910_reg_read(pmic, TPS65911_VDDCTRL_OP);
-		srvsel = tps65910_reg_read(pmic, TPS65911_VDDCTRL_SR);
+		opvsel = tps65910_reg_read_locked(pmic, TPS65911_VDDCTRL_OP);
+		srvsel = tps65910_reg_read_locked(pmic, TPS65911_VDDCTRL_SR);
 		sr = opvsel & VDDCTRL_OP_CMD_MASK;
 		opvsel &= VDDCTRL_OP_SEL_MASK;
 		srvsel &= VDDCTRL_SR_SEL_MASK;
@@ -630,7 +625,7 @@ static int tps65910_get_voltage(struct regulator_dev *dev)
 	if (reg < 0)
 		return reg;
 
-	value = tps65910_reg_read(pmic, reg);
+	value = tps65910_reg_read_locked(pmic, reg);
 	if (value < 0)
 		return value;
 
@@ -669,7 +664,7 @@ static int tps65911_get_voltage(struct regulator_dev *dev)
 
 	reg = pmic->get_ctrl_reg(id);
 
-	value = tps65910_reg_read(pmic, reg);
+	value = tps65910_reg_read_locked(pmic, reg);
 
 	switch (id) {
 	case TPS65911_REG_LDO1:
@@ -728,7 +723,7 @@ static int tps65910_set_voltage_dcdc_sel(struct regulator_dev *dev,
 		tps65910_modify_bits(pmic, TPS65910_VDD1,
 				(dcdc_mult << VDD1_VGAIN_SEL_SHIFT),
 						VDD1_VGAIN_SEL_MASK);
-		tps65910_reg_write(pmic, TPS65910_VDD1_OP, vsel);
+		tps65910_reg_write_locked(pmic, TPS65910_VDD1_OP, vsel);
 		break;
 	case TPS65910_REG_VDD2:
 		dcdc_mult = (selector / VDD1_2_NUM_VOLT_FINE) + 1;
@@ -739,11 +734,11 @@ static int tps65910_set_voltage_dcdc_sel(struct regulator_dev *dev,
 		tps65910_modify_bits(pmic, TPS65910_VDD2,
 				(dcdc_mult << VDD2_VGAIN_SEL_SHIFT),
 						VDD1_VGAIN_SEL_MASK);
-		tps65910_reg_write(pmic, TPS65910_VDD2_OP, vsel);
+		tps65910_reg_write_locked(pmic, TPS65910_VDD2_OP, vsel);
 		break;
 	case TPS65911_REG_VDDCTRL:
 		vsel = selector + 3;
-		tps65910_reg_write(pmic, TPS65911_VDDCTRL_OP, vsel);
+		tps65910_reg_write_locked(pmic, TPS65911_VDDCTRL_OP, vsel);
 	}
 
 	return 0;
@@ -994,10 +989,10 @@ static int tps65910_set_ext_sleep_config(struct tps65910_reg *pmic,
 
 	/* External EN1 control */
 	if (ext_sleep_config & TPS65910_SLEEP_CONTROL_EXT_INPUT_EN1)
-		ret = tps65910_set_bits(mfd,
+		ret = tps65910_reg_set_bits(mfd,
 				TPS65910_EN1_LDO_ASS + regoffs, bit_pos);
 	else
-		ret = tps65910_clear_bits(mfd,
+		ret = tps65910_reg_clear_bits(mfd,
 				TPS65910_EN1_LDO_ASS + regoffs, bit_pos);
 	if (ret < 0) {
 		dev_err(mfd->dev,
@@ -1007,10 +1002,10 @@ static int tps65910_set_ext_sleep_config(struct tps65910_reg *pmic,
 
 	/* External EN2 control */
 	if (ext_sleep_config & TPS65910_SLEEP_CONTROL_EXT_INPUT_EN2)
-		ret = tps65910_set_bits(mfd,
+		ret = tps65910_reg_set_bits(mfd,
 				TPS65910_EN2_LDO_ASS + regoffs, bit_pos);
 	else
-		ret = tps65910_clear_bits(mfd,
+		ret = tps65910_reg_clear_bits(mfd,
 				TPS65910_EN2_LDO_ASS + regoffs, bit_pos);
 	if (ret < 0) {
 		dev_err(mfd->dev,
@@ -1022,10 +1017,10 @@ static int tps65910_set_ext_sleep_config(struct tps65910_reg *pmic,
 	if ((tps65910_chip_id(mfd) == TPS65910) &&
 			(id >= TPS65910_REG_VDIG1)) {
 		if (ext_sleep_config & TPS65910_SLEEP_CONTROL_EXT_INPUT_EN3)
-			ret = tps65910_set_bits(mfd,
+			ret = tps65910_reg_set_bits(mfd,
 				TPS65910_EN3_LDO_ASS + regoffs, bit_pos);
 		else
-			ret = tps65910_clear_bits(mfd,
+			ret = tps65910_reg_clear_bits(mfd,
 				TPS65910_EN3_LDO_ASS + regoffs, bit_pos);
 		if (ret < 0) {
 			dev_err(mfd->dev,
@@ -1037,10 +1032,10 @@ static int tps65910_set_ext_sleep_config(struct tps65910_reg *pmic,
 	/* Return if no external control is selected */
 	if (!(ext_sleep_config & EXT_SLEEP_CONTROL)) {
 		/* Clear all sleep controls */
-		ret = tps65910_clear_bits(mfd,
+		ret = tps65910_reg_clear_bits(mfd,
 			TPS65910_SLEEP_KEEP_LDO_ON + regoffs, bit_pos);
 		if (!ret)
-			ret = tps65910_clear_bits(mfd,
+			ret = tps65910_reg_clear_bits(mfd,
 				TPS65910_SLEEP_SET_LDO_OFF + regoffs, bit_pos);
 		if (ret < 0)
 			dev_err(mfd->dev,
@@ -1059,32 +1054,33 @@ static int tps65910_set_ext_sleep_config(struct tps65910_reg *pmic,
 				(tps65910_chip_id(mfd) == TPS65911))) {
 		int op_reg_add = pmic->get_ctrl_reg(id) + 1;
 		int sr_reg_add = pmic->get_ctrl_reg(id) + 2;
-		int opvsel = tps65910_reg_read(pmic, op_reg_add);
-		int srvsel = tps65910_reg_read(pmic, sr_reg_add);
+		int opvsel = tps65910_reg_read_locked(pmic, op_reg_add);
+		int srvsel = tps65910_reg_read_locked(pmic, sr_reg_add);
 		if (opvsel & VDD1_OP_CMD_MASK) {
 			u8 reg_val = srvsel & VDD1_OP_SEL_MASK;
-			ret = tps65910_reg_write(pmic, op_reg_add, reg_val);
+			ret = tps65910_reg_write_locked(pmic, op_reg_add,
+							reg_val);
 			if (ret < 0) {
 				dev_err(mfd->dev,
 					"Error in configuring op register\n");
 				return ret;
 			}
 		}
-		ret = tps65910_reg_write(pmic, sr_reg_add, 0);
+		ret = tps65910_reg_write_locked(pmic, sr_reg_add, 0);
 		if (ret < 0) {
 			dev_err(mfd->dev, "Error in settting sr register\n");
 			return ret;
 		}
 	}
 
-	ret = tps65910_clear_bits(mfd,
+	ret = tps65910_reg_clear_bits(mfd,
 			TPS65910_SLEEP_KEEP_LDO_ON + regoffs, bit_pos);
 	if (!ret) {
 		if (ext_sleep_config & TPS65911_SLEEP_CONTROL_EXT_INPUT_SLEEP)
-			ret = tps65910_set_bits(mfd,
+			ret = tps65910_reg_set_bits(mfd,
 				TPS65910_SLEEP_SET_LDO_OFF + regoffs, bit_pos);
 		else
-			ret = tps65910_clear_bits(mfd,
+			ret = tps65910_reg_clear_bits(mfd,
 				TPS65910_SLEEP_SET_LDO_OFF + regoffs, bit_pos);
 	}
 	if (ret < 0)
@@ -1117,7 +1113,7 @@ static __devinit int tps65910_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, pmic);
 
 	/* Give control of all register to control port */
-	tps65910_set_bits(pmic->mfd, TPS65910_DEVCTRL,
+	tps65910_reg_set_bits(pmic->mfd, TPS65910_DEVCTRL,
 				DEVCTRL_SR_CTL_I2C_SEL_MASK);
 
 	switch(tps65910_chip_id(tps65910)) {
diff --git a/include/linux/mfd/tps65910.h b/include/linux/mfd/tps65910.h
index 56903ad04283..949f1da661d2 100644
--- a/include/linux/mfd/tps65910.h
+++ b/include/linux/mfd/tps65910.h
@@ -18,6 +18,7 @@
 #define __LINUX_MFD_TPS65910_H
 
 #include <linux/gpio.h>
+#include <linux/regmap.h>
 
 /* TPS chip id list */
 #define TPS65910			0
@@ -823,8 +824,6 @@ struct tps65910 {
 	struct regmap *regmap;
 	struct mutex io_mutex;
 	unsigned int id;
-	int (*read)(struct tps65910 *tps65910, u8 reg, int size, void *dest);
-	int (*write)(struct tps65910 *tps65910, u8 reg, int size, void *src);
 
 	/* Client devices */
 	struct tps65910_pmic *pmic;
@@ -847,8 +846,6 @@ struct tps65910_platform_data {
 	int irq_base;
 };
 
-int tps65910_set_bits(struct tps65910 *tps65910, u8 reg, u8 mask);
-int tps65910_clear_bits(struct tps65910 *tps65910, u8 reg, u8 mask);
 void tps65910_gpio_init(struct tps65910 *tps65910, int gpio_base);
 int tps65910_irq_init(struct tps65910 *tps65910, int irq,
 		struct tps65910_platform_data *pdata);
@@ -859,4 +856,28 @@ static inline int tps65910_chip_id(struct tps65910 *tps65910)
 	return tps65910->id;
 }
 
+static inline int tps65910_reg_read(struct tps65910 *tps65910, u8 reg,
+		unsigned int *val)
+{
+	return regmap_read(tps65910->regmap, reg, val);
+}
+
+static inline int tps65910_reg_write(struct tps65910 *tps65910, u8 reg,
+		unsigned int val)
+{
+	return regmap_write(tps65910->regmap, reg, val);
+}
+
+static inline int tps65910_reg_set_bits(struct tps65910 *tps65910, u8 reg,
+		u8 mask)
+{
+	return regmap_update_bits(tps65910->regmap, reg, mask, mask);
+}
+
+static inline int tps65910_reg_clear_bits(struct tps65910 *tps65910, u8 reg,
+		u8 mask)
+{
+	return regmap_update_bits(tps65910->regmap, reg, mask, 0);
+}
+
 #endif /*  __LINUX_MFD_TPS65910_H */
-- 
cgit v1.3-7-g2ca7


From 9577e8c3fbc145b5d2a12d2fbc6a50031573c77d Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 9 May 2012 05:43:59 +1000
Subject: mfd: Define all twl-regulator feature flags in one place

twl-regulator has a collection of feature flags, some defined
in twl-core.c and  one defined in i2c/twl.h.
This is confusing for anyone adding a new feature flag.

So collect them together and place them in twl.h immediately
after the structure in which they are initially set.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/twl-core.c  | 7 -------
 include/linux/i2c/twl.h | 8 ++++++--
 2 files changed, 6 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/twl-core.c b/drivers/mfd/twl-core.c
index 7c2267e71f8b..6fc90befa79e 100644
--- a/drivers/mfd/twl-core.c
+++ b/drivers/mfd/twl-core.c
@@ -224,13 +224,6 @@
 #define HIGH_PERF_SQ			(1 << 3)
 #define CK32K_LOWPWR_EN			(1 << 7)
 
-
-/* chip-specific feature flags, for i2c_device_id.driver_data */
-#define TWL4030_VAUX2		BIT(0)	/* pre-5030 voltage ranges */
-#define TPS_SUBSET		BIT(1)	/* tps659[23]0 have fewer LDOs */
-#define TWL5031			BIT(2)  /* twl5031 has different registers */
-#define TWL6030_CLASS		BIT(3)	/* TWL6030 class */
-
 /*----------------------------------------------------------------------*/
 
 /* is driver active, bound to a chip? */
diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index 1f90de0cfdbe..d1afedc00898 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h
@@ -171,8 +171,6 @@ static inline int twl_class_is_ ##class(void)	\
 TWL_CLASS_IS(4030, TWL4030_CLASS_ID)
 TWL_CLASS_IS(6030, TWL6030_CLASS_ID)
 
-#define TWL6025_SUBCLASS	BIT(4)  /* TWL6025 has changed registers */
-
 /*
  * Read and write single 8-bit registers
  */
@@ -746,6 +744,12 @@ struct twl_regulator_driver_data {
 	void		*data;
 	unsigned long	features;
 };
+/* chip-specific feature flags, for twl_regulator_driver_data.features */
+#define TWL4030_VAUX2		BIT(0)	/* pre-5030 voltage ranges */
+#define TPS_SUBSET		BIT(1)	/* tps659[23]0 have fewer LDOs */
+#define TWL5031			BIT(2)  /* twl5031 has different registers */
+#define TWL6030_CLASS		BIT(3)	/* TWL6030 class */
+#define TWL6025_SUBCLASS	BIT(4)  /* TWL6025 has changed registers */
 
 /*----------------------------------------------------------------------*/
 
-- 
cgit v1.3-7-g2ca7


From 32df986e985921386b75b4bd1117102bf65fe095 Mon Sep 17 00:00:00 2001
From: Laxman Dewangan <ldewangan@nvidia.com>
Date: Fri, 11 May 2012 15:07:44 +0200
Subject: mfd: Register tps65910 gpios as an mfd device

As gpio support for tps65910 is on gpio driver, registering
gpio support as the mfd sub devices instead of calling gpio_init()
from the core probe.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig          | 1 -
 drivers/mfd/tps65910.c       | 6 +++---
 include/linux/mfd/tps65910.h | 4 ----
 3 files changed, 3 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 1e9a7d5ec919..b914483cd630 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -189,7 +189,6 @@ config MFD_TPS65910
 	bool "TPS65910 Power Management chip"
 	depends on I2C=y && GPIOLIB
 	select MFD_CORE
-	select GPIO_TPS65910
 	select REGMAP_I2C
 	help
 	  if you say yes here you get support for the TPS65910 series of
diff --git a/drivers/mfd/tps65910.c b/drivers/mfd/tps65910.c
index 22fa43070659..553574da3611 100644
--- a/drivers/mfd/tps65910.c
+++ b/drivers/mfd/tps65910.c
@@ -19,13 +19,15 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/i2c.h>
-#include <linux/gpio.h>
 #include <linux/mfd/core.h>
 #include <linux/regmap.h>
 #include <linux/mfd/tps65910.h>
 #include <linux/of_device.h>
 
 static struct mfd_cell tps65910s[] = {
+	{
+		.name = "tps65910-gpio",
+	},
 	{
 		.name = "tps65910-pmic",
 	},
@@ -250,8 +252,6 @@ static __devinit int tps65910_i2c_probe(struct i2c_client *i2c,
 	init_data->irq = pmic_plat_data->irq;
 	init_data->irq_base = pmic_plat_data->irq_base;
 
-	tps65910_gpio_init(tps65910, pmic_plat_data->gpio_base);
-
 	tps65910_irq_init(tps65910, init_data->irq, init_data);
 
 	tps65910_sleepinit(tps65910, pmic_plat_data);
diff --git a/include/linux/mfd/tps65910.h b/include/linux/mfd/tps65910.h
index 949f1da661d2..c2673ee5e70f 100644
--- a/include/linux/mfd/tps65910.h
+++ b/include/linux/mfd/tps65910.h
@@ -830,9 +830,6 @@ struct tps65910 {
 	struct tps65910_rtc *rtc;
 	struct tps65910_power *power;
 
-	/* GPIO Handling */
-	struct gpio_chip gpio;
-
 	/* IRQ Handling */
 	struct mutex irq_lock;
 	int chip_irq;
@@ -846,7 +843,6 @@ struct tps65910_platform_data {
 	int irq_base;
 };
 
-void tps65910_gpio_init(struct tps65910 *tps65910, int gpio_base);
 int tps65910_irq_init(struct tps65910 *tps65910, int irq,
 		struct tps65910_platform_data *pdata);
 int tps65910_irq_exit(struct tps65910 *tps65910);
-- 
cgit v1.3-7-g2ca7


From 879eed68265c8dcb2f2856ec96820fc93b7038c9 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 9 May 2012 22:53:48 +0100
Subject: mfd: Remove wm8400 custom cache implementation

Save a useful amount of code by removing the custom cache implementation
for wm8400 and using the regmap cache. Also simplify things by not
separately reseting the CODEC registers, this is a sufficiently infrequent
operation that we can simply invalidate the entire cache when this happens.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/wm8400-core.c          | 251 ++++++-------------------------------
 include/linux/mfd/wm8400-private.h |  14 +--
 2 files changed, 42 insertions(+), 223 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm8400-core.c b/drivers/mfd/wm8400-core.c
index 1189a17f0f25..9083b775e2b6 100644
--- a/drivers/mfd/wm8400-core.c
+++ b/drivers/mfd/wm8400-core.c
@@ -23,136 +23,16 @@
 #include <linux/regmap.h>
 #include <linux/slab.h>
 
-static struct {
-	u16  readable;    /* Mask of readable bits */
-	u16  writable;    /* Mask of writable bits */
-	u16  vol;         /* Mask of volatile bits */
-	int  is_codec;    /* Register controlled by codec reset */
-	u16  default_val; /* Value on reset */
-} reg_data[] = {
-	{ 0xFFFF, 0xFFFF, 0x0000, 0, 0x6172 }, /* R0 */
-	{ 0x7000, 0x0000, 0x8000, 0, 0x0000 }, /* R1 */
-	{ 0xFF17, 0xFF17, 0x0000, 0, 0x0000 }, /* R2 */
-	{ 0xEBF3, 0xEBF3, 0x0000, 1, 0x6000 }, /* R3 */
-	{ 0x3CF3, 0x3CF3, 0x0000, 1, 0x0000 }, /* R4  */
-	{ 0xF1F8, 0xF1F8, 0x0000, 1, 0x4050 }, /* R5  */
-	{ 0xFC1F, 0xFC1F, 0x0000, 1, 0x4000 }, /* R6  */
-	{ 0xDFDE, 0xDFDE, 0x0000, 1, 0x01C8 }, /* R7  */
-	{ 0xFCFC, 0xFCFC, 0x0000, 1, 0x0000 }, /* R8  */
-	{ 0xEFFF, 0xEFFF, 0x0000, 1, 0x0040 }, /* R9  */
-	{ 0xEFFF, 0xEFFF, 0x0000, 1, 0x0040 }, /* R10 */
-	{ 0x27F7, 0x27F7, 0x0000, 1, 0x0004 }, /* R11 */
-	{ 0x01FF, 0x01FF, 0x0000, 1, 0x00C0 }, /* R12 */
-	{ 0x01FF, 0x01FF, 0x0000, 1, 0x00C0 }, /* R13 */
-	{ 0x1FEF, 0x1FEF, 0x0000, 1, 0x0000 }, /* R14 */
-	{ 0x0163, 0x0163, 0x0000, 1, 0x0100 }, /* R15 */
-	{ 0x01FF, 0x01FF, 0x0000, 1, 0x00C0 }, /* R16 */
-	{ 0x01FF, 0x01FF, 0x0000, 1, 0x00C0 }, /* R17 */
-	{ 0x1FFF, 0x0FFF, 0x0000, 1, 0x0000 }, /* R18 */
-	{ 0xFFFF, 0xFFFF, 0x0000, 1, 0x1000 }, /* R19 */
-	{ 0xFFFF, 0xFFFF, 0x0000, 1, 0x1010 }, /* R20 */
-	{ 0xFFFF, 0xFFFF, 0x0000, 1, 0x1010 }, /* R21 */
-	{ 0x0FDD, 0x0FDD, 0x0000, 1, 0x8000 }, /* R22 */
-	{ 0x1FFF, 0x1FFF, 0x0000, 1, 0x0800 }, /* R23 */
-	{ 0x0000, 0x01DF, 0x0000, 1, 0x008B }, /* R24 */
-	{ 0x0000, 0x01DF, 0x0000, 1, 0x008B }, /* R25 */
-	{ 0x0000, 0x01DF, 0x0000, 1, 0x008B }, /* R26 */
-	{ 0x0000, 0x01DF, 0x0000, 1, 0x008B }, /* R27 */
-	{ 0x0000, 0x01FF, 0x0000, 1, 0x0000 }, /* R28 */
-	{ 0x0000, 0x01FF, 0x0000, 1, 0x0000 }, /* R29 */
-	{ 0x0000, 0x0077, 0x0000, 1, 0x0066 }, /* R30 */
-	{ 0x0000, 0x0033, 0x0000, 1, 0x0022 }, /* R31 */
-	{ 0x0000, 0x01FF, 0x0000, 1, 0x0079 }, /* R32 */
-	{ 0x0000, 0x01FF, 0x0000, 1, 0x0079 }, /* R33 */
-	{ 0x0000, 0x0003, 0x0000, 1, 0x0003 }, /* R34 */
-	{ 0x0000, 0x01FF, 0x0000, 1, 0x0003 }, /* R35 */
-	{ 0x0000, 0x0000, 0x0000, 0, 0x0000 }, /* R36 */
-	{ 0x0000, 0x003F, 0x0000, 1, 0x0100 }, /* R37 */
-	{ 0x0000, 0x0000, 0x0000, 0, 0x0000 }, /* R38 */
-	{ 0x0000, 0x000F, 0x0000, 0, 0x0000 }, /* R39 */
-	{ 0x0000, 0x00FF, 0x0000, 1, 0x0000 }, /* R40 */
-	{ 0x0000, 0x01B7, 0x0000, 1, 0x0000 }, /* R41 */
-	{ 0x0000, 0x01B7, 0x0000, 1, 0x0000 }, /* R42 */
-	{ 0x0000, 0x01FF, 0x0000, 1, 0x0000 }, /* R43 */
-	{ 0x0000, 0x01FF, 0x0000, 1, 0x0000 }, /* R44 */
-	{ 0x0000, 0x00FD, 0x0000, 1, 0x0000 }, /* R45 */
-	{ 0x0000, 0x00FD, 0x0000, 1, 0x0000 }, /* R46 */
-	{ 0x0000, 0x01FF, 0x0000, 1, 0x0000 }, /* R47 */
-	{ 0x0000, 0x01FF, 0x0000, 1, 0x0000 }, /* R48 */
-	{ 0x0000, 0x01FF, 0x0000, 1, 0x0000 }, /* R49 */
-	{ 0x0000, 0x01FF, 0x0000, 1, 0x0000 }, /* R50 */
-	{ 0x0000, 0x01B3, 0x0000, 1, 0x0180 }, /* R51 */
-	{ 0x0000, 0x0077, 0x0000, 1, 0x0000 }, /* R52 */
-	{ 0x0000, 0x0077, 0x0000, 1, 0x0000 }, /* R53 */
-	{ 0x0000, 0x00FF, 0x0000, 1, 0x0000 }, /* R54 */
-	{ 0x0000, 0x0001, 0x0000, 1, 0x0000 }, /* R55 */
-	{ 0x0000, 0x003F, 0x0000, 1, 0x0000 }, /* R56 */
-	{ 0x0000, 0x004F, 0x0000, 1, 0x0000 }, /* R57 */
-	{ 0x0000, 0x00FD, 0x0000, 1, 0x0000 }, /* R58 */
-	{ 0x0000, 0x0000, 0x0000, 0, 0x0000 }, /* R59 */
-	{ 0x1FFF, 0x1FFF, 0x0000, 1, 0x0000 }, /* R60 */
-	{ 0xFFFF, 0xFFFF, 0x0000, 1, 0x0000 }, /* R61 */
-	{ 0x03FF, 0x03FF, 0x0000, 1, 0x0000 }, /* R62 */
-	{ 0x007F, 0x007F, 0x0000, 1, 0x0000 }, /* R63 */
-	{ 0x0000, 0x0000, 0x0000, 0, 0x0000 }, /* R64 */
-	{ 0xDFFF, 0xDFFF, 0x0000, 0, 0x0000 }, /* R65 */
-	{ 0xDFFF, 0xDFFF, 0x0000, 0, 0x0000 }, /* R66 */
-	{ 0xDFFF, 0xDFFF, 0x0000, 0, 0x0000 }, /* R67 */
-	{ 0xDFFF, 0xDFFF, 0x0000, 0, 0x0000 }, /* R68 */
-	{ 0x0000, 0x0000, 0x0000, 0, 0x0000 }, /* R69 */
-	{ 0xFFFF, 0xFFFF, 0x0000, 0, 0x4400 }, /* R70 */
-	{ 0x23FF, 0x23FF, 0x0000, 0, 0x0000 }, /* R71 */
-	{ 0xFFFF, 0xFFFF, 0x0000, 0, 0x4400 }, /* R72 */
-	{ 0x23FF, 0x23FF, 0x0000, 0, 0x0000 }, /* R73 */
-	{ 0x0000, 0x0000, 0x0000, 0, 0x0000 }, /* R74 */
-	{ 0x000E, 0x000E, 0x0000, 0, 0x0008 }, /* R75 */
-	{ 0xE00F, 0xE00F, 0x0000, 0, 0x0000 }, /* R76 */
-	{ 0x0000, 0x0000, 0x0000, 0, 0x0000 }, /* R77 */
-	{ 0x03C0, 0x03C0, 0x0000, 0, 0x02C0 }, /* R78 */
-	{ 0xFFFF, 0x0000, 0xffff, 0, 0x0000 }, /* R79 */
-	{ 0xFFFF, 0xFFFF, 0x0000, 0, 0x0000 }, /* R80 */
-	{ 0xFFFF, 0x0000, 0xffff, 0, 0x0000 }, /* R81 */
-	{ 0x2BFF, 0x0000, 0xffff, 0, 0x0000 }, /* R82 */
-	{ 0x0000, 0x0000, 0x0000, 0, 0x0000 }, /* R83 */
-	{ 0x80FF, 0x80FF, 0x0000, 0, 0x00ff }, /* R84 */
-};
-
-static int wm8400_read(struct wm8400 *wm8400, u8 reg, int num_regs, u16 *dest)
+static bool wm8400_volatile(struct device *dev, unsigned int reg)
 {
-	int i, ret = 0;
-
-	BUG_ON(reg + num_regs > ARRAY_SIZE(wm8400->reg_cache));
-
-	/* If there are any volatile reads then read back the entire block */
-	for (i = reg; i < reg + num_regs; i++)
-		if (reg_data[i].vol) {
-			ret = regmap_bulk_read(wm8400->regmap, reg, dest,
-					       num_regs);
-			return ret;
-		}
-
-	/* Otherwise use the cache */
-	memcpy(dest, &wm8400->reg_cache[reg], num_regs * sizeof(u16));
-
-	return 0;
-}
-
-static int wm8400_write(struct wm8400 *wm8400, u8 reg, int num_regs,
-			u16 *src)
-{
-	int ret, i;
-
-	BUG_ON(reg + num_regs > ARRAY_SIZE(wm8400->reg_cache));
-
-	for (i = 0; i < num_regs; i++) {
-		BUG_ON(!reg_data[reg + i].writable);
-		wm8400->reg_cache[reg + i] = src[i];
-		ret = regmap_write(wm8400->regmap, reg, src[i]);
-		if (ret != 0)
-			return ret;
+	switch (reg) {
+	case WM8400_INTERRUPT_STATUS_1:
+	case WM8400_INTERRUPT_LEVELS:
+	case WM8400_SHUTDOWN_REASON:
+		return true;
+	default:
+		return false;
 	}
-
-	return 0;
 }
 
 /**
@@ -165,13 +45,12 @@ static int wm8400_write(struct wm8400 *wm8400, u8 reg, int num_regs,
  */
 u16 wm8400_reg_read(struct wm8400 *wm8400, u8 reg)
 {
-	u16 val;
-
-	mutex_lock(&wm8400->io_lock);
-
-	wm8400_read(wm8400, reg, 1, &val);
+	unsigned int val;
+	int ret;
 
-	mutex_unlock(&wm8400->io_lock);
+	ret = regmap_read(wm8400->regmap, reg, &val);
+	if (ret < 0)
+		return ret;
 
 	return val;
 }
@@ -179,62 +58,8 @@ EXPORT_SYMBOL_GPL(wm8400_reg_read);
 
 int wm8400_block_read(struct wm8400 *wm8400, u8 reg, int count, u16 *data)
 {
-	int ret;
-
-	mutex_lock(&wm8400->io_lock);
-
-	ret = wm8400_read(wm8400, reg, count, data);
-
-	mutex_unlock(&wm8400->io_lock);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(wm8400_block_read);
-
-/**
- * wm8400_set_bits - Bitmask write
- *
- * @wm8400: Pointer to wm8400 control structure
- * @reg:    Register to access
- * @mask:   Mask of bits to change
- * @val:    Value to set for masked bits
- */
-int wm8400_set_bits(struct wm8400 *wm8400, u8 reg, u16 mask, u16 val)
-{
-	u16 tmp;
-	int ret;
-
-	mutex_lock(&wm8400->io_lock);
-
-	ret = wm8400_read(wm8400, reg, 1, &tmp);
-	tmp = (tmp & ~mask) | val;
-	if (ret == 0)
-		ret = wm8400_write(wm8400, reg, 1, &tmp);
-
-	mutex_unlock(&wm8400->io_lock);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(wm8400_set_bits);
-
-/**
- * wm8400_reset_codec_reg_cache - Reset cached codec registers to
- * their default values.
- */
-void wm8400_reset_codec_reg_cache(struct wm8400 *wm8400)
-{
-	int i;
-
-	mutex_lock(&wm8400->io_lock);
-
-	/* Reset all codec registers to their initial value */
-	for (i = 0; i < ARRAY_SIZE(wm8400->reg_cache); i++)
-		if (reg_data[i].is_codec)
-			wm8400->reg_cache[i] = reg_data[i].default_val;
-
-	mutex_unlock(&wm8400->io_lock);
+	return regmap_bulk_read(wm8400->regmap, reg, data, count);
 }
-EXPORT_SYMBOL_GPL(wm8400_reset_codec_reg_cache);
 
 static int wm8400_register_codec(struct wm8400 *wm8400)
 {
@@ -257,44 +82,24 @@ static int wm8400_register_codec(struct wm8400 *wm8400)
 static int wm8400_init(struct wm8400 *wm8400,
 		       struct wm8400_platform_data *pdata)
 {
-	u16 reg;
-	int ret, i;
-
-	mutex_init(&wm8400->io_lock);
+	unsigned int reg;
+	int ret;
 
 	dev_set_drvdata(wm8400->dev, wm8400);
 
 	/* Check that this is actually a WM8400 */
-	ret = regmap_read(wm8400->regmap, WM8400_RESET_ID, &i);
+	ret = regmap_read(wm8400->regmap, WM8400_RESET_ID, &reg);
 	if (ret != 0) {
 		dev_err(wm8400->dev, "Chip ID register read failed\n");
 		return -EIO;
 	}
-	if (i != reg_data[WM8400_RESET_ID].default_val) {
-		dev_err(wm8400->dev, "Device is not a WM8400, ID is %x\n", i);
+	if (reg != 0x6172) {
+		dev_err(wm8400->dev, "Device is not a WM8400, ID is %x\n",
+			reg);
 		return -ENODEV;
 	}
 
-	/* We don't know what state the hardware is in and since this
-	 * is a PMIC we can't reset it safely so initialise the register
-	 * cache from the hardware.
-	 */
-	ret = regmap_raw_read(wm8400->regmap, 0, wm8400->reg_cache,
-			      ARRAY_SIZE(wm8400->reg_cache));
-	if (ret != 0) {
-		dev_err(wm8400->dev, "Register cache read failed\n");
-		return -EIO;
-	}
-	for (i = 0; i < ARRAY_SIZE(wm8400->reg_cache); i++)
-		wm8400->reg_cache[i] = be16_to_cpu(wm8400->reg_cache[i]);
-
-	/* If the codec is in reset use hard coded values */
-	if (!(wm8400->reg_cache[WM8400_POWER_MANAGEMENT_1] & WM8400_CODEC_ENA))
-		for (i = 0; i < ARRAY_SIZE(wm8400->reg_cache); i++)
-			if (reg_data[i].is_codec)
-				wm8400->reg_cache[i] = reg_data[i].default_val;
-
-	ret = wm8400_read(wm8400, WM8400_ID, 1, &reg);
+	ret = regmap_read(wm8400->regmap, WM8400_ID, &reg);
 	if (ret != 0) {
 		dev_err(wm8400->dev, "ID register read failed: %d\n", ret);
 		return ret;
@@ -334,8 +139,22 @@ static const struct regmap_config wm8400_regmap_config = {
 	.reg_bits = 8,
 	.val_bits = 16,
 	.max_register = WM8400_REGISTER_COUNT - 1,
+
+	.volatile_reg = wm8400_volatile,
+
+	.cache_type = REGCACHE_RBTREE,
 };
 
+/**
+ * wm8400_reset_codec_reg_cache - Reset cached codec registers to
+ * their default values.
+ */
+void wm8400_reset_codec_reg_cache(struct wm8400 *wm8400)
+{
+	regmap_reinit_cache(wm8400->regmap, &wm8400_regmap_config);
+}
+EXPORT_SYMBOL_GPL(wm8400_reset_codec_reg_cache);
+
 #if defined(CONFIG_I2C) || defined(CONFIG_I2C_MODULE)
 static int wm8400_i2c_probe(struct i2c_client *i2c,
 			    const struct i2c_device_id *id)
diff --git a/include/linux/mfd/wm8400-private.h b/include/linux/mfd/wm8400-private.h
index 0147b6968510..2de565b94d0c 100644
--- a/include/linux/mfd/wm8400-private.h
+++ b/include/linux/mfd/wm8400-private.h
@@ -24,19 +24,14 @@
 #include <linux/mfd/wm8400.h>
 #include <linux/mutex.h>
 #include <linux/platform_device.h>
-
-struct regmap;
+#include <linux/regmap.h>
 
 #define WM8400_REGISTER_COUNT 0x55
 
 struct wm8400 {
 	struct device *dev;
-
-	struct mutex io_lock;
 	struct regmap *regmap;
 
-	u16 reg_cache[WM8400_REGISTER_COUNT];
-
 	struct platform_device regulators[6];
 };
 
@@ -930,6 +925,11 @@ struct wm8400 {
 
 u16 wm8400_reg_read(struct wm8400 *wm8400, u8 reg);
 int wm8400_block_read(struct wm8400 *wm8400, u8 reg, int count, u16 *data);
-int wm8400_set_bits(struct wm8400 *wm8400, u8 reg, u16 mask, u16 val);
+
+static inline int wm8400_set_bits(struct wm8400 *wm8400, u8 reg,
+				  u16 mask, u16 val)
+{
+	return regmap_update_bits(wm8400->regmap, reg, mask, val);
+}
 
 #endif
-- 
cgit v1.3-7-g2ca7


From d9055dc501da6734e3cfea1ef236173bd8b645b1 Mon Sep 17 00:00:00 2001
From: Johan Hovold <jhovold@gmail.com>
Date: Thu, 10 May 2012 14:11:28 +0200
Subject: mfd: Add boost frequency and ovp to lm3533 platform data

Add boost-frequency and over-voltage-protection settings to platform
data.

Signed-off-by: Johan Hovold <jhovold@gmail.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/lm3533-core.c  | 50 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/lm3533.h | 15 ++++++++++++++
 2 files changed, 65 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/lm3533-core.c b/drivers/mfd/lm3533-core.c
index 75f4b7f5a4fd..053438cff10a 100644
--- a/drivers/mfd/lm3533-core.c
+++ b/drivers/mfd/lm3533-core.c
@@ -138,6 +138,35 @@ int lm3533_update(struct lm3533 *lm3533, u8 reg, u8 val, u8 mask)
 }
 EXPORT_SYMBOL_GPL(lm3533_update);
 
+static int lm3533_set_boost_freq(struct lm3533 *lm3533,
+						enum lm3533_boost_freq freq)
+{
+	int ret;
+
+	ret = lm3533_update(lm3533, LM3533_REG_BOOST_PWM,
+					freq << LM3533_BOOST_FREQ_SHIFT,
+					LM3533_BOOST_FREQ_MASK);
+	if (ret)
+		dev_err(lm3533->dev, "failed to set boost frequency\n");
+
+	return ret;
+}
+
+
+static int lm3533_set_boost_ovp(struct lm3533 *lm3533,
+						enum lm3533_boost_ovp ovp)
+{
+	int ret;
+
+	ret = lm3533_update(lm3533, LM3533_REG_BOOST_PWM,
+					ovp << LM3533_BOOST_OVP_SHIFT,
+					LM3533_BOOST_OVP_MASK);
+	if (ret)
+		dev_err(lm3533->dev, "failed to set boost ovp\n");
+
+	return ret;
+}
+
 /*
  * HVLED output config -- output hvled controlled by backlight bl
  */
@@ -521,6 +550,22 @@ static int __devinit lm3533_device_led_init(struct lm3533 *lm3533)
 	return 0;
 }
 
+static int __devinit lm3533_device_setup(struct lm3533 *lm3533,
+					struct lm3533_platform_data *pdata)
+{
+	int ret;
+
+	ret = lm3533_set_boost_freq(lm3533, pdata->boost_freq);
+	if (ret)
+		return ret;
+
+	ret = lm3533_set_boost_ovp(lm3533, pdata->boost_ovp);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
 static int __devinit lm3533_device_init(struct lm3533 *lm3533)
 {
 	struct lm3533_platform_data *pdata = lm3533->dev->platform_data;
@@ -550,6 +595,10 @@ static int __devinit lm3533_device_init(struct lm3533 *lm3533)
 
 	lm3533_enable(lm3533);
 
+	ret = lm3533_device_setup(lm3533, pdata);
+	if (ret)
+		goto err_disable;
+
 	lm3533_device_als_init(lm3533);
 	lm3533_device_bl_init(lm3533);
 	lm3533_device_led_init(lm3533);
@@ -564,6 +613,7 @@ static int __devinit lm3533_device_init(struct lm3533 *lm3533)
 
 err_unregister:
 	mfd_remove_devices(lm3533->dev);
+err_disable:
 	lm3533_disable(lm3533);
 	if (gpio_is_valid(lm3533->gpio_hwen))
 		gpio_free(lm3533->gpio_hwen);
diff --git a/include/linux/mfd/lm3533.h b/include/linux/mfd/lm3533.h
index 75f85f3fbd90..336113759fd1 100644
--- a/include/linux/mfd/lm3533.h
+++ b/include/linux/mfd/lm3533.h
@@ -59,9 +59,24 @@ struct lm3533_led_platform_data {
 	u8 pwm;				/* 0 - 0x3f */
 };
 
+enum lm3533_boost_freq {
+	LM3533_BOOST_FREQ_500KHZ,
+	LM3533_BOOST_FREQ_1000KHZ,
+};
+
+enum lm3533_boost_ovp {
+	LM3533_BOOST_OVP_16V,
+	LM3533_BOOST_OVP_24V,
+	LM3533_BOOST_OVP_32V,
+	LM3533_BOOST_OVP_40V,
+};
+
 struct lm3533_platform_data {
 	int gpio_hwen;
 
+	enum lm3533_boost_ovp boost_ovp;
+	enum lm3533_boost_freq boost_freq;
+
 	struct lm3533_als_platform_data *als;
 
 	struct lm3533_bl_platform_data *backlights;
-- 
cgit v1.3-7-g2ca7


From 7af5e87dc5e6b6f413ba95b06e06ebf810687858 Mon Sep 17 00:00:00 2001
From: Johan Hovold <jhovold@gmail.com>
Date: Thu, 10 May 2012 19:18:28 +0200
Subject: mfd: Remove unused max-current lm3533 function

The max-current attributes of the subdrivers have been dropped so
remove the no longer used lm3533_ctrlbank_get_max_current function.

Signed-off-by: Johan Hovold <jhovold@gmail.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/lm3533-ctrlbank.c | 1 -
 include/linux/mfd/lm3533.h    | 2 --
 2 files changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/lm3533-ctrlbank.c b/drivers/mfd/lm3533-ctrlbank.c
index c2732a37c65a..adf4c1a542a9 100644
--- a/drivers/mfd/lm3533-ctrlbank.c
+++ b/drivers/mfd/lm3533-ctrlbank.c
@@ -113,7 +113,6 @@ lm3533_ctrlbank_get(brightness, BRIGHTNESS);
  *   31 - 29.8 mA
  */
 lm3533_ctrlbank_set(max_current, MAX_CURRENT);
-lm3533_ctrlbank_get(max_current, MAX_CURRENT);
 
 /*
  * PWM-input control mask:
diff --git a/include/linux/mfd/lm3533.h b/include/linux/mfd/lm3533.h
index 336113759fd1..7cfef9e4f41b 100644
--- a/include/linux/mfd/lm3533.h
+++ b/include/linux/mfd/lm3533.h
@@ -92,8 +92,6 @@ extern int lm3533_ctrlbank_disable(struct lm3533_ctrlbank *cb);
 extern int lm3533_ctrlbank_set_brightness(struct lm3533_ctrlbank *cb, u8 val);
 extern int lm3533_ctrlbank_get_brightness(struct lm3533_ctrlbank *cb, u8 *val);
 extern int lm3533_ctrlbank_set_max_current(struct lm3533_ctrlbank *cb, u8 val);
-extern int lm3533_ctrlbank_get_max_current(struct lm3533_ctrlbank *cb,
-								u8 *val);
 extern int lm3533_ctrlbank_set_pwm(struct lm3533_ctrlbank *cb, u8 val);
 extern int lm3533_ctrlbank_get_pwm(struct lm3533_ctrlbank *cb, u8 *val);
 
-- 
cgit v1.3-7-g2ca7


From 6fa4b9d802610116adf4b89c2f9bd155829aafd3 Mon Sep 17 00:00:00 2001
From: Johan Hovold <jhovold@gmail.com>
Date: Thu, 10 May 2012 19:18:29 +0200
Subject: mfd: Use SI-units for the lm3533 max-current interface

Use SI-units (uA) for max-current interface (5000 - 29800 uA).

Signed-off-by: Johan Hovold <jhovold@gmail.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/lm3533-ctrlbank.c | 43 +++++++++++++++++++++++++++++--------------
 include/linux/mfd/lm3533.h    |  7 ++++---
 2 files changed, 33 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/lm3533-ctrlbank.c b/drivers/mfd/lm3533-ctrlbank.c
index adf4c1a542a9..a4cb7a5220a7 100644
--- a/drivers/mfd/lm3533-ctrlbank.c
+++ b/drivers/mfd/lm3533-ctrlbank.c
@@ -17,8 +17,11 @@
 #include <linux/mfd/lm3533.h>
 
 
+#define LM3533_MAX_CURRENT_MIN		5000
+#define LM3533_MAX_CURRENT_MAX		29800
+#define LM3533_MAX_CURRENT_STEP		800
+
 #define LM3533_BRIGHTNESS_MAX		255
-#define LM3533_MAX_CURRENT_MAX		31
 #define LM3533_PWM_MAX			0x3f
 
 #define LM3533_REG_PWM_BASE		0x14
@@ -65,6 +68,31 @@ int lm3533_ctrlbank_disable(struct lm3533_ctrlbank *cb)
 }
 EXPORT_SYMBOL_GPL(lm3533_ctrlbank_disable);
 
+/*
+ * Full-scale current.
+ *
+ * imax		5000 - 29800 uA (800 uA step)
+ */
+int lm3533_ctrlbank_set_max_current(struct lm3533_ctrlbank *cb, u16 imax)
+{
+	u8 reg;
+	u8 val;
+	int ret;
+
+	if (imax < LM3533_MAX_CURRENT_MIN || imax > LM3533_MAX_CURRENT_MAX)
+		return -EINVAL;
+
+	val = (imax - LM3533_MAX_CURRENT_MIN) / LM3533_MAX_CURRENT_STEP;
+
+	reg = lm3533_ctrlbank_get_reg(cb, LM3533_REG_MAX_CURRENT_BASE);
+	ret = lm3533_write(cb->lm3533, reg, val);
+	if (ret)
+		dev_err(cb->dev, "failed to set max current\n");
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(lm3533_ctrlbank_set_max_current);
+
 #define lm3533_ctrlbank_set(_name, _NAME)				\
 int lm3533_ctrlbank_set_##_name(struct lm3533_ctrlbank *cb, u8 val)	\
 {									\
@@ -101,19 +129,6 @@ EXPORT_SYMBOL_GPL(lm3533_ctrlbank_get_##_name);
 lm3533_ctrlbank_set(brightness, BRIGHTNESS);
 lm3533_ctrlbank_get(brightness, BRIGHTNESS);
 
-/*
- * Full scale current.
- *
- * Imax = 5 + val * 0.8 mA, e.g.:
- *
- *    0 - 5 mA
- *     ...
- *   19 - 20.2 mA (default)
- *     ...
- *   31 - 29.8 mA
- */
-lm3533_ctrlbank_set(max_current, MAX_CURRENT);
-
 /*
  * PWM-input control mask:
  *
diff --git a/include/linux/mfd/lm3533.h b/include/linux/mfd/lm3533.h
index 7cfef9e4f41b..9660febe93c2 100644
--- a/include/linux/mfd/lm3533.h
+++ b/include/linux/mfd/lm3533.h
@@ -47,15 +47,15 @@ struct lm3533_als_platform_data {
 
 struct lm3533_bl_platform_data {
 	char *name;
+	u16 max_current;		/* 5000 - 29800 uA (800 uA step) */
 	u8 default_brightness;		/* 0 - 255 */
-	u8 max_current;			/* 0 - 31 */
 	u8 pwm;				/* 0 - 0x3f */
 };
 
 struct lm3533_led_platform_data {
 	char *name;
 	const char *default_trigger;
-	u8 max_current;			/* 0 - 31 */
+	u16 max_current;		/* 5000 - 29800 uA (800 uA step) */
 	u8 pwm;				/* 0 - 0x3f */
 };
 
@@ -91,7 +91,8 @@ extern int lm3533_ctrlbank_disable(struct lm3533_ctrlbank *cb);
 
 extern int lm3533_ctrlbank_set_brightness(struct lm3533_ctrlbank *cb, u8 val);
 extern int lm3533_ctrlbank_get_brightness(struct lm3533_ctrlbank *cb, u8 *val);
-extern int lm3533_ctrlbank_set_max_current(struct lm3533_ctrlbank *cb, u8 val);
+extern int lm3533_ctrlbank_set_max_current(struct lm3533_ctrlbank *cb,
+								u16 imax);
 extern int lm3533_ctrlbank_set_pwm(struct lm3533_ctrlbank *cb, u8 val);
 extern int lm3533_ctrlbank_get_pwm(struct lm3533_ctrlbank *cb, u8 *val);
 
-- 
cgit v1.3-7-g2ca7


From 83871c00bb43f41d85dd15aba56a83bbb191eabc Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Mon, 14 May 2012 22:50:39 +0200
Subject: mfd: Add MAX77693 driver

This patch adds MFD driver for MAX77693 to enable its sub devices.

The MAX77693 is a multi-function devices. It includes PMIC,
MUIC(Micro USB Interface Controller), flash LED control and
haptic motor control.

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Myungjoo Ham <myungjoo.ham@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Reviewed-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig                  |  12 ++
 drivers/mfd/Makefile                 |   1 +
 drivers/mfd/max77693.c               | 217 +++++++++++++++++++++++++++++++++++
 include/linux/mfd/max77693-private.h | 217 +++++++++++++++++++++++++++++++++++
 include/linux/mfd/max77693.h         |  37 ++++++
 5 files changed, 484 insertions(+)
 create mode 100644 drivers/mfd/max77693.c
 create mode 100644 include/linux/mfd/max77693-private.h
 create mode 100644 include/linux/mfd/max77693.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index af46ce019fc7..a0e1b834af61 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -420,6 +420,18 @@ config PMIC_ADP5520
 	  individual components like LCD backlight, LEDs, GPIOs and Kepad
 	  under the corresponding menus.
 
+config MFD_MAX77693
+	bool "Maxim Semiconductor MAX77693 PMIC Support"
+	depends on I2C=y && GENERIC_HARDIRQS
+	select MFD_CORE
+	help
+	  Say yes here to support for Maxim Semiconductor MAX77693.
+	  This is a companion Power Management IC with Flash, Haptic, Charger,
+	  and MUIC(Micro USB Interface Controller) controls on chip.
+	  This driver provides common support for accessing the device;
+	  additional drivers must be enabled in order to use the functionality
+	  of the device.
+
 config MFD_MAX8925
 	bool "Maxim Semiconductor MAX8925 PMIC Support"
 	depends on I2C=y && GENERIC_HARDIRQS
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index d3dae9567800..db0262b34af6 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -78,6 +78,7 @@ obj-$(CONFIG_PMIC_DA9052)	+= da9052-core.o
 obj-$(CONFIG_MFD_DA9052_SPI)	+= da9052-spi.o
 obj-$(CONFIG_MFD_DA9052_I2C)	+= da9052-i2c.o
 
+obj-$(CONFIG_MFD_MAX77693)	+= max77693.o
 max8925-objs			:= max8925-core.o max8925-i2c.o
 obj-$(CONFIG_MFD_MAX8925)	+= max8925.o
 obj-$(CONFIG_MFD_MAX8997)	+= max8997.o max8997-irq.o
diff --git a/drivers/mfd/max77693.c b/drivers/mfd/max77693.c
new file mode 100644
index 000000000000..c852515e68c8
--- /dev/null
+++ b/drivers/mfd/max77693.c
@@ -0,0 +1,217 @@
+/*
+ * max77693.c - mfd core driver for the MAX 77693
+ *
+ * Copyright (C) 2012 Samsung Electronics
+ * SangYoung Son <hello.son@smasung.com>
+ *
+ * This program is not provided / owned by Maxim Integrated Products.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * This driver is based on max8997.c
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/pm_runtime.h>
+#include <linux/mutex.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/max77693.h>
+#include <linux/mfd/max77693-private.h>
+#include <linux/regulator/machine.h>
+#include <linux/regmap.h>
+
+#define I2C_ADDR_PMIC	(0xCC >> 1)	/* Charger, Flash LED */
+#define I2C_ADDR_MUIC	(0x4A >> 1)
+#define I2C_ADDR_HAPTIC	(0x90 >> 1)
+
+static struct mfd_cell max77693_devs[] = {
+	{ .name = "max77693-pmic", },
+	{ .name = "max77693-charger", },
+	{ .name = "max77693-flash", },
+	{ .name = "max77693-muic", },
+	{ .name = "max77693-haptic", },
+};
+
+int max77693_read_reg(struct regmap *map, u8 reg, u8 *dest)
+{
+	unsigned int val;
+	int ret;
+
+	ret = regmap_read(map, reg, &val);
+	*dest = val;
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(max77693_read_reg);
+
+int max77693_bulk_read(struct regmap *map, u8 reg, int count, u8 *buf)
+{
+	int ret;
+
+	ret = regmap_bulk_read(map, reg, buf, count);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(max77693_bulk_read);
+
+int max77693_write_reg(struct regmap *map, u8 reg, u8 value)
+{
+	int ret;
+
+	ret = regmap_write(map, reg, value);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(max77693_write_reg);
+
+int max77693_bulk_write(struct regmap *map, u8 reg, int count, u8 *buf)
+{
+	int ret;
+
+	ret = regmap_bulk_write(map, reg, buf, count);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(max77693_bulk_write);
+
+int max77693_update_reg(struct regmap *map, u8 reg, u8 val, u8 mask)
+{
+	int ret;
+
+	ret = regmap_update_bits(map, reg, mask, val);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(max77693_update_reg);
+
+static const struct regmap_config max77693_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.max_register = MAX77693_PMIC_REG_END,
+};
+
+static int max77693_i2c_probe(struct i2c_client *i2c,
+			      const struct i2c_device_id *id)
+{
+	struct max77693_dev *max77693;
+	struct max77693_platform_data *pdata = i2c->dev.platform_data;
+	u8 reg_data;
+	int ret = 0;
+
+	max77693 = devm_kzalloc(&i2c->dev,
+			sizeof(struct max77693_dev), GFP_KERNEL);
+	if (max77693 == NULL)
+		return -ENOMEM;
+
+	max77693->regmap = devm_regmap_init_i2c(i2c, &max77693_regmap_config);
+	if (IS_ERR(max77693->regmap)) {
+		ret = PTR_ERR(max77693->regmap);
+		dev_err(max77693->dev,"failed to allocate register map: %d\n",
+				ret);
+		goto err_regmap;
+	}
+
+	i2c_set_clientdata(i2c, max77693);
+	max77693->dev = &i2c->dev;
+	max77693->i2c = i2c;
+	max77693->irq = i2c->irq;
+	max77693->type = id->driver_data;
+
+	if (!pdata)
+		goto err_regmap;
+
+	max77693->wakeup = pdata->wakeup;
+
+	mutex_init(&max77693->iolock);
+
+	if (max77693_read_reg(max77693->regmap,
+				MAX77693_PMIC_REG_PMIC_ID2, &reg_data) < 0) {
+		dev_err(max77693->dev, "device not found on this channel\n");
+		ret = -ENODEV;
+		goto err_regmap;
+	} else
+		dev_info(max77693->dev, "device ID: 0x%x\n", reg_data);
+
+	max77693->muic = i2c_new_dummy(i2c->adapter, I2C_ADDR_MUIC);
+	i2c_set_clientdata(max77693->muic, max77693);
+
+	max77693->haptic = i2c_new_dummy(i2c->adapter, I2C_ADDR_HAPTIC);
+	i2c_set_clientdata(max77693->haptic, max77693);
+
+	pm_runtime_set_active(max77693->dev);
+
+	ret = mfd_add_devices(max77693->dev, -1, max77693_devs,
+			ARRAY_SIZE(max77693_devs), NULL, 0);
+	if (ret < 0)
+		goto err_mfd;
+
+	return ret;
+
+err_mfd:
+	i2c_unregister_device(max77693->muic);
+	i2c_unregister_device(max77693->haptic);
+err_regmap:
+	kfree(max77693);
+
+	return ret;
+}
+
+static int max77693_i2c_remove(struct i2c_client *i2c)
+{
+	struct max77693_dev *max77693 = i2c_get_clientdata(i2c);
+
+	mfd_remove_devices(max77693->dev);
+	i2c_unregister_device(max77693->muic);
+	i2c_unregister_device(max77693->haptic);
+
+	return 0;
+}
+
+static const struct i2c_device_id max77693_i2c_id[] = {
+	{ "max77693", TYPE_MAX77693 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, max77693_i2c_id);
+
+static struct i2c_driver max77693_i2c_driver = {
+	.driver = {
+		   .name = "max77693",
+		   .owner = THIS_MODULE,
+	},
+	.probe = max77693_i2c_probe,
+	.remove = max77693_i2c_remove,
+	.id_table = max77693_i2c_id,
+};
+
+static int __init max77693_i2c_init(void)
+{
+	return i2c_add_driver(&max77693_i2c_driver);
+}
+/* init early so consumer devices can complete system boot */
+subsys_initcall(max77693_i2c_init);
+
+static void __exit max77693_i2c_exit(void)
+{
+	i2c_del_driver(&max77693_i2c_driver);
+}
+module_exit(max77693_i2c_exit);
+
+MODULE_DESCRIPTION("MAXIM 77693 multi-function core driver");
+MODULE_AUTHOR("SangYoung, Son <hello.son@samsung.com>");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/mfd/max77693-private.h b/include/linux/mfd/max77693-private.h
new file mode 100644
index 000000000000..bf6077d3c43c
--- /dev/null
+++ b/include/linux/mfd/max77693-private.h
@@ -0,0 +1,217 @@
+/*
+ * max77693-private.h - Voltage regulator driver for the Maxim 77693
+ *
+ *  Copyright (C) 2012 Samsung Electrnoics
+ *  SangYoung Son <hello.son@samsung.com>
+ *
+ * This program is not provided / owned by Maxim Integrated Products.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef __LINUX_MFD_MAX77693_PRIV_H
+#define __LINUX_MFD_MAX77693_PRIV_H
+
+#include <linux/i2c.h>
+
+#define MAX77693_NUM_IRQ_MUIC_REGS	3
+#define MAX77693_REG_INVALID		(0xff)
+
+/* Slave addr = 0xCC: PMIC, Charger, Flash LED */
+enum max77693_pmic_reg {
+	MAX77693_LED_REG_IFLASH1			= 0x00,
+	MAX77693_LED_REG_IFLASH2			= 0x01,
+	MAX77693_LED_REG_ITORCH				= 0x02,
+	MAX77693_LED_REG_ITORCHTIMER			= 0x03,
+	MAX77693_LED_REG_FLASH_TIMER			= 0x04,
+	MAX77693_LED_REG_FLASH_EN			= 0x05,
+	MAX77693_LED_REG_MAX_FLASH1			= 0x06,
+	MAX77693_LED_REG_MAX_FLASH2			= 0x07,
+	MAX77693_LED_REG_MAX_FLASH3			= 0x08,
+	MAX77693_LED_REG_MAX_FLASH4			= 0x09,
+	MAX77693_LED_REG_VOUT_CNTL			= 0x0A,
+	MAX77693_LED_REG_VOUT_FLASH1			= 0x0B,
+	MAX77693_LED_REG_VOUT_FLASH2			= 0x0C,
+	MAX77693_LED_REG_FLASH_INT			= 0x0E,
+	MAX77693_LED_REG_FLASH_INT_MASK			= 0x0F,
+	MAX77693_LED_REG_FLASH_INT_STATUS		= 0x10,
+
+	MAX77693_PMIC_REG_PMIC_ID1			= 0x20,
+	MAX77693_PMIC_REG_PMIC_ID2			= 0x21,
+	MAX77693_PMIC_REG_INTSRC			= 0x22,
+	MAX77693_PMIC_REG_INTSRC_MASK			= 0x23,
+	MAX77693_PMIC_REG_TOPSYS_INT			= 0x24,
+	MAX77693_PMIC_REG_TOPSYS_INT_MASK		= 0x26,
+	MAX77693_PMIC_REG_TOPSYS_STAT			= 0x28,
+	MAX77693_PMIC_REG_MAINCTRL1			= 0x2A,
+	MAX77693_PMIC_REG_LSCNFG			= 0x2B,
+
+	MAX77693_CHG_REG_CHG_INT			= 0xB0,
+	MAX77693_CHG_REG_CHG_INT_MASK			= 0xB1,
+	MAX77693_CHG_REG_CHG_INT_OK			= 0xB2,
+	MAX77693_CHG_REG_CHG_DETAILS_00			= 0xB3,
+	MAX77693_CHG_REG_CHG_DETAILS_01			= 0xB4,
+	MAX77693_CHG_REG_CHG_DETAILS_02			= 0xB5,
+	MAX77693_CHG_REG_CHG_DETAILS_03			= 0xB6,
+	MAX77693_CHG_REG_CHG_CNFG_00			= 0xB7,
+	MAX77693_CHG_REG_CHG_CNFG_01			= 0xB8,
+	MAX77693_CHG_REG_CHG_CNFG_02			= 0xB9,
+	MAX77693_CHG_REG_CHG_CNFG_03			= 0xBA,
+	MAX77693_CHG_REG_CHG_CNFG_04			= 0xBB,
+	MAX77693_CHG_REG_CHG_CNFG_05			= 0xBC,
+	MAX77693_CHG_REG_CHG_CNFG_06			= 0xBD,
+	MAX77693_CHG_REG_CHG_CNFG_07			= 0xBE,
+	MAX77693_CHG_REG_CHG_CNFG_08			= 0xBF,
+	MAX77693_CHG_REG_CHG_CNFG_09			= 0xC0,
+	MAX77693_CHG_REG_CHG_CNFG_10			= 0xC1,
+	MAX77693_CHG_REG_CHG_CNFG_11			= 0xC2,
+	MAX77693_CHG_REG_CHG_CNFG_12			= 0xC3,
+	MAX77693_CHG_REG_CHG_CNFG_13			= 0xC4,
+	MAX77693_CHG_REG_CHG_CNFG_14			= 0xC5,
+	MAX77693_CHG_REG_SAFEOUT_CTRL			= 0xC6,
+
+	MAX77693_PMIC_REG_END,
+};
+
+/* Slave addr = 0x4A: MUIC */
+enum max77693_muic_reg {
+	MAX77693_MUIC_REG_ID		= 0x00,
+	MAX77693_MUIC_REG_INT1		= 0x01,
+	MAX77693_MUIC_REG_INT2		= 0x02,
+	MAX77693_MUIC_REG_INT3		= 0x03,
+	MAX77693_MUIC_REG_STATUS1	= 0x04,
+	MAX77693_MUIC_REG_STATUS2	= 0x05,
+	MAX77693_MUIC_REG_STATUS3	= 0x06,
+	MAX77693_MUIC_REG_INTMASK1	= 0x07,
+	MAX77693_MUIC_REG_INTMASK2	= 0x08,
+	MAX77693_MUIC_REG_INTMASK3	= 0x09,
+	MAX77693_MUIC_REG_CDETCTRL1	= 0x0A,
+	MAX77693_MUIC_REG_CDETCTRL2	= 0x0B,
+	MAX77693_MUIC_REG_CTRL1		= 0x0C,
+	MAX77693_MUIC_REG_CTRL2		= 0x0D,
+	MAX77693_MUIC_REG_CTRL3		= 0x0E,
+
+	MAX77693_MUIC_REG_END,
+};
+
+/* Slave addr = 0x90: Haptic */
+enum max77693_haptic_reg {
+	MAX77693_HAPTIC_REG_STATUS		= 0x00,
+	MAX77693_HAPTIC_REG_CONFIG1		= 0x01,
+	MAX77693_HAPTIC_REG_CONFIG2		= 0x02,
+	MAX77693_HAPTIC_REG_CONFIG_CHNL		= 0x03,
+	MAX77693_HAPTIC_REG_CONFG_CYC1		= 0x04,
+	MAX77693_HAPTIC_REG_CONFG_CYC2		= 0x05,
+	MAX77693_HAPTIC_REG_CONFIG_PER1		= 0x06,
+	MAX77693_HAPTIC_REG_CONFIG_PER2		= 0x07,
+	MAX77693_HAPTIC_REG_CONFIG_PER3		= 0x08,
+	MAX77693_HAPTIC_REG_CONFIG_PER4		= 0x09,
+	MAX77693_HAPTIC_REG_CONFIG_DUTY1	= 0x0A,
+	MAX77693_HAPTIC_REG_CONFIG_DUTY2	= 0x0B,
+	MAX77693_HAPTIC_REG_CONFIG_PWM1		= 0x0C,
+	MAX77693_HAPTIC_REG_CONFIG_PWM2		= 0x0D,
+	MAX77693_HAPTIC_REG_CONFIG_PWM3		= 0x0E,
+	MAX77693_HAPTIC_REG_CONFIG_PWM4		= 0x0F,
+	MAX77693_HAPTIC_REG_REV			= 0x10,
+
+	MAX77693_HAPTIC_REG_END,
+};
+
+enum max77693_irq_source {
+	LED_INT = 0,
+	TOPSYS_INT,
+	CHG_INT,
+	MUIC_INT1,
+	MUIC_INT2,
+	MUIC_INT3,
+
+	MAX77693_IRQ_GROUP_NR,
+};
+
+enum max77693_irq {
+	/* PMIC - FLASH */
+	MAX77693_LED_IRQ_FLED2_OPEN,
+	MAX77693_LED_IRQ_FLED2_SHORT,
+	MAX77693_LED_IRQ_FLED1_OPEN,
+	MAX77693_LED_IRQ_FLED1_SHORT,
+	MAX77693_LED_IRQ_MAX_FLASH,
+
+	/* PMIC - TOPSYS */
+	MAX77693_TOPSYS_IRQ_T120C_INT,
+	MAX77693_TOPSYS_IRQ_T140C_INT,
+	MAX77693_TOPSYS_IRQ_LOWSYS_INT,
+
+	/* PMIC - Charger */
+	MAX77693_CHG_IRQ_BYP_I,
+	MAX77693_CHG_IRQ_THM_I,
+	MAX77693_CHG_IRQ_BAT_I,
+	MAX77693_CHG_IRQ_CHG_I,
+	MAX77693_CHG_IRQ_CHGIN_I,
+
+	/* MUIC INT1 */
+	MAX77693_MUIC_IRQ_INT1_ADC,
+	MAX77693_MUIC_IRQ_INT1_ADC_LOW,
+	MAX77693_MUIC_IRQ_INT1_ADC_ERR,
+	MAX77693_MUIC_IRQ_INT1_ADC1K,
+
+	/* MUIC INT2 */
+	MAX77693_MUIC_IRQ_INT2_CHGTYP,
+	MAX77693_MUIC_IRQ_INT2_CHGDETREUN,
+	MAX77693_MUIC_IRQ_INT2_DCDTMR,
+	MAX77693_MUIC_IRQ_INT2_DXOVP,
+	MAX77693_MUIC_IRQ_INT2_VBVOLT,
+	MAX77693_MUIC_IRQ_INT2_VIDRM,
+
+	/* MUIC INT3 */
+	MAX77693_MUIC_IRQ_INT3_EOC,
+	MAX77693_MUIC_IRQ_INT3_CGMBC,
+	MAX77693_MUIC_IRQ_INT3_OVP,
+	MAX77693_MUIC_IRQ_INT3_MBCCHG_ERR,
+	MAX77693_MUIC_IRQ_INT3_CHG_ENABLED,
+	MAX77693_MUIC_IRQ_INT3_BAT_DET,
+
+	MAX77693_IRQ_NR,
+};
+
+struct max77693_dev {
+	struct device *dev;
+	struct i2c_client *i2c;		/* 0xCC , PMIC, Charger, Flash LED */
+	struct i2c_client *muic;	/* 0x4A , MUIC */
+	struct i2c_client *haptic;	/* 0x90 , Haptic */
+	struct mutex iolock;
+
+	int type;
+
+	struct regmap *regmap;
+	struct regmap *regmap_muic;
+	struct regmap *regmap_haptic;
+
+	int irq;
+	bool wakeup;
+};
+
+enum max77693_types {
+	TYPE_MAX77693,
+};
+
+extern int max77693_read_reg(struct regmap *map, u8 reg, u8 *dest);
+extern int max77693_bulk_read(struct regmap *map, u8 reg, int count,
+				u8 *buf);
+extern int max77693_write_reg(struct regmap *map, u8 reg, u8 value);
+extern int max77693_bulk_write(struct regmap *map, u8 reg, int count,
+				u8 *buf);
+extern int max77693_update_reg(struct regmap *map, u8 reg, u8 val, u8 mask);
+
+#endif /*  __LINUX_MFD_MAX77693_PRIV_H */
diff --git a/include/linux/mfd/max77693.h b/include/linux/mfd/max77693.h
new file mode 100644
index 000000000000..5020b8616daa
--- /dev/null
+++ b/include/linux/mfd/max77693.h
@@ -0,0 +1,37 @@
+/*
+ * max77693.h - Driver for the Maxim 77693
+ *
+ *  Copyright (C) 2012 Samsung Electrnoics
+ *  SangYoung Son <hello.son@samsung.com>
+ *
+ * This program is not provided / owned by Maxim Integrated Products.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * This driver is based on max8997.h
+ *
+ * MAX77693 has PMIC, Charger, Flash LED, Haptic, MUIC devices.
+ * The devices share the same I2C bus and included in
+ * this mfd driver.
+ */
+
+#ifndef __LINUX_MFD_MAX77693_H
+#define __LINUX_MFD_MAX77693_H
+
+struct max77693_platform_data {
+	/* IRQ */
+	int wakeup;
+};
+#endif	/* __LINUX_MFD_MAX77693_H */
-- 
cgit v1.3-7-g2ca7


From 6592ebb3979c1ec0e37eb06553ef5ce9d6f5f025 Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Mon, 14 May 2012 22:54:20 +0200
Subject: mfd: Add MAX77693 irq handler

This patch supports IRQ handling for MAX77693.

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Myungjoo Ham <myungjoo.ham@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Makefile         |   2 +-
 drivers/mfd/max77693-irq.c   | 309 +++++++++++++++++++++++++++++++++++++++++++
 drivers/mfd/max77693.c       |  32 +++++
 include/linux/mfd/max77693.h |   1 -
 4 files changed, 342 insertions(+), 2 deletions(-)
 create mode 100644 drivers/mfd/max77693-irq.c

(limited to 'include/linux')

diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index db0262b34af6..d7138510c880 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -78,7 +78,7 @@ obj-$(CONFIG_PMIC_DA9052)	+= da9052-core.o
 obj-$(CONFIG_MFD_DA9052_SPI)	+= da9052-spi.o
 obj-$(CONFIG_MFD_DA9052_I2C)	+= da9052-i2c.o
 
-obj-$(CONFIG_MFD_MAX77693)	+= max77693.o
+obj-$(CONFIG_MFD_MAX77693)	+= max77693.o max77693-irq.o
 max8925-objs			:= max8925-core.o max8925-i2c.o
 obj-$(CONFIG_MFD_MAX8925)	+= max8925.o
 obj-$(CONFIG_MFD_MAX8997)	+= max8997.o max8997-irq.o
diff --git a/drivers/mfd/max77693-irq.c b/drivers/mfd/max77693-irq.c
new file mode 100644
index 000000000000..2b403569e0a6
--- /dev/null
+++ b/drivers/mfd/max77693-irq.c
@@ -0,0 +1,309 @@
+/*
+ * max77693-irq.c - Interrupt controller support for MAX77693
+ *
+ * Copyright (C) 2012 Samsung Electronics Co.Ltd
+ * SangYoung Son <hello.son@samsung.com>
+ *
+ * This program is not provided / owned by Maxim Integrated Products.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * This driver is based on max8997-irq.c
+ */
+
+#include <linux/err.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/irqdomain.h>
+#include <linux/mfd/max77693.h>
+#include <linux/mfd/max77693-private.h>
+
+static const u8 max77693_mask_reg[] = {
+	[LED_INT] = MAX77693_LED_REG_FLASH_INT_MASK,
+	[TOPSYS_INT] = MAX77693_PMIC_REG_TOPSYS_INT_MASK,
+	[CHG_INT] = MAX77693_CHG_REG_CHG_INT_MASK,
+	[MUIC_INT1] = MAX77693_MUIC_REG_INTMASK1,
+	[MUIC_INT2] = MAX77693_MUIC_REG_INTMASK2,
+	[MUIC_INT3] = MAX77693_MUIC_REG_INTMASK3,
+};
+
+static struct regmap *max77693_get_regmap(struct max77693_dev *max77693,
+				enum max77693_irq_source src)
+{
+	switch (src) {
+	case LED_INT ... CHG_INT:
+		return max77693->regmap;
+	case MUIC_INT1 ... MUIC_INT3:
+		return max77693->regmap_muic;
+	default:
+		return ERR_PTR(-EINVAL);
+	}
+}
+
+struct max77693_irq_data {
+	int mask;
+	enum max77693_irq_source group;
+};
+
+#define DECLARE_IRQ(idx, _group, _mask)		\
+	[(idx)] = { .group = (_group), .mask = (_mask) }
+static const struct max77693_irq_data max77693_irqs[] = {
+	DECLARE_IRQ(MAX77693_LED_IRQ_FLED2_OPEN,	LED_INT, 1 << 0),
+	DECLARE_IRQ(MAX77693_LED_IRQ_FLED2_SHORT,	LED_INT, 1 << 1),
+	DECLARE_IRQ(MAX77693_LED_IRQ_FLED1_OPEN,	LED_INT, 1 << 2),
+	DECLARE_IRQ(MAX77693_LED_IRQ_FLED1_SHORT,	LED_INT, 1 << 3),
+	DECLARE_IRQ(MAX77693_LED_IRQ_MAX_FLASH,		LED_INT, 1 << 4),
+
+	DECLARE_IRQ(MAX77693_TOPSYS_IRQ_T120C_INT,	TOPSYS_INT, 1 << 0),
+	DECLARE_IRQ(MAX77693_TOPSYS_IRQ_T140C_INT,	TOPSYS_INT, 1 << 1),
+	DECLARE_IRQ(MAX77693_TOPSYS_IRQ_LOWSYS_INT,	TOPSYS_INT, 1 << 3),
+
+	DECLARE_IRQ(MAX77693_CHG_IRQ_BYP_I,		CHG_INT, 1 << 0),
+	DECLARE_IRQ(MAX77693_CHG_IRQ_THM_I,		CHG_INT, 1 << 2),
+	DECLARE_IRQ(MAX77693_CHG_IRQ_BAT_I,		CHG_INT, 1 << 3),
+	DECLARE_IRQ(MAX77693_CHG_IRQ_CHG_I,		CHG_INT, 1 << 4),
+	DECLARE_IRQ(MAX77693_CHG_IRQ_CHGIN_I,		CHG_INT, 1 << 6),
+
+	DECLARE_IRQ(MAX77693_MUIC_IRQ_INT1_ADC,		MUIC_INT1, 1 << 0),
+	DECLARE_IRQ(MAX77693_MUIC_IRQ_INT1_ADC_LOW,	MUIC_INT1, 1 << 1),
+	DECLARE_IRQ(MAX77693_MUIC_IRQ_INT1_ADC_ERR,	MUIC_INT1, 1 << 2),
+	DECLARE_IRQ(MAX77693_MUIC_IRQ_INT1_ADC1K,	MUIC_INT1, 1 << 3),
+
+	DECLARE_IRQ(MAX77693_MUIC_IRQ_INT2_CHGTYP,	MUIC_INT2, 1 << 0),
+	DECLARE_IRQ(MAX77693_MUIC_IRQ_INT2_CHGDETREUN,	MUIC_INT2, 1 << 1),
+	DECLARE_IRQ(MAX77693_MUIC_IRQ_INT2_DCDTMR,	MUIC_INT2, 1 << 2),
+	DECLARE_IRQ(MAX77693_MUIC_IRQ_INT2_DXOVP,	MUIC_INT2, 1 << 3),
+	DECLARE_IRQ(MAX77693_MUIC_IRQ_INT2_VBVOLT,	MUIC_INT2, 1 << 4),
+	DECLARE_IRQ(MAX77693_MUIC_IRQ_INT2_VIDRM,	MUIC_INT2, 1 << 5),
+
+	DECLARE_IRQ(MAX77693_MUIC_IRQ_INT3_EOC,		MUIC_INT3, 1 << 0),
+	DECLARE_IRQ(MAX77693_MUIC_IRQ_INT3_CGMBC,	MUIC_INT3, 1 << 1),
+	DECLARE_IRQ(MAX77693_MUIC_IRQ_INT3_OVP,		MUIC_INT3, 1 << 2),
+	DECLARE_IRQ(MAX77693_MUIC_IRQ_INT3_MBCCHG_ERR,	MUIC_INT3, 1 << 3),
+	DECLARE_IRQ(MAX77693_MUIC_IRQ_INT3_CHG_ENABLED,	MUIC_INT3, 1 << 4),
+	DECLARE_IRQ(MAX77693_MUIC_IRQ_INT3_BAT_DET,	MUIC_INT3, 1 << 5),
+};
+
+static void max77693_irq_lock(struct irq_data *data)
+{
+	struct max77693_dev *max77693 = irq_get_chip_data(data->irq);
+
+	mutex_lock(&max77693->irqlock);
+}
+
+static void max77693_irq_sync_unlock(struct irq_data *data)
+{
+	struct max77693_dev *max77693 = irq_get_chip_data(data->irq);
+	int i;
+
+	for (i = 0; i < MAX77693_IRQ_GROUP_NR; i++) {
+		u8 mask_reg = max77693_mask_reg[i];
+		struct regmap *map = max77693_get_regmap(max77693, i);
+
+		if (mask_reg == MAX77693_REG_INVALID ||
+				IS_ERR_OR_NULL(map))
+			continue;
+		max77693->irq_masks_cache[i] = max77693->irq_masks_cur[i];
+
+		max77693_write_reg(map, max77693_mask_reg[i],
+				max77693->irq_masks_cur[i]);
+	}
+
+	mutex_unlock(&max77693->irqlock);
+}
+
+static const inline struct max77693_irq_data *
+irq_to_max77693_irq(struct max77693_dev *max77693, int irq)
+{
+	return &max77693_irqs[irq];
+}
+
+static void max77693_irq_mask(struct irq_data *data)
+{
+	struct max77693_dev *max77693 = irq_get_chip_data(data->irq);
+	const struct max77693_irq_data *irq_data =
+				irq_to_max77693_irq(max77693, data->irq);
+
+	if (irq_data->group >= MUIC_INT1 && irq_data->group <= MUIC_INT3)
+		max77693->irq_masks_cur[irq_data->group] &= ~irq_data->mask;
+	else
+		max77693->irq_masks_cur[irq_data->group] |= irq_data->mask;
+}
+
+static void max77693_irq_unmask(struct irq_data *data)
+{
+	struct max77693_dev *max77693 = irq_get_chip_data(data->irq);
+	const struct max77693_irq_data *irq_data =
+	    irq_to_max77693_irq(max77693, data->irq);
+
+	if (irq_data->group >= MUIC_INT1 && irq_data->group <= MUIC_INT3)
+		max77693->irq_masks_cur[irq_data->group] |= irq_data->mask;
+	else
+		max77693->irq_masks_cur[irq_data->group] &= ~irq_data->mask;
+}
+
+static struct irq_chip max77693_irq_chip = {
+	.name			= "max77693",
+	.irq_bus_lock		= max77693_irq_lock,
+	.irq_bus_sync_unlock	= max77693_irq_sync_unlock,
+	.irq_mask		= max77693_irq_mask,
+	.irq_unmask		= max77693_irq_unmask,
+};
+
+#define MAX77693_IRQSRC_CHG		(1 << 0)
+#define MAX77693_IRQSRC_TOP		(1 << 1)
+#define MAX77693_IRQSRC_FLASH		(1 << 2)
+#define MAX77693_IRQSRC_MUIC		(1 << 3)
+static irqreturn_t max77693_irq_thread(int irq, void *data)
+{
+	struct max77693_dev *max77693 = data;
+	u8 irq_reg[MAX77693_IRQ_GROUP_NR] = {};
+	u8 irq_src;
+	int ret;
+	int i, cur_irq;
+
+	ret = max77693_read_reg(max77693->regmap, MAX77693_PMIC_REG_INTSRC,
+				&irq_src);
+	if (ret < 0) {
+		dev_err(max77693->dev, "Failed to read interrupt source: %d\n",
+				ret);
+		return IRQ_NONE;
+	}
+
+	if (irq_src & MAX77693_IRQSRC_CHG)
+		/* CHG_INT */
+		ret = max77693_read_reg(max77693->regmap, MAX77693_CHG_REG_CHG_INT,
+				&irq_reg[CHG_INT]);
+
+	if (irq_src & MAX77693_IRQSRC_TOP)
+		/* TOPSYS_INT */
+		ret = max77693_read_reg(max77693->regmap,
+			MAX77693_PMIC_REG_TOPSYS_INT, &irq_reg[TOPSYS_INT]);
+
+	if (irq_src & MAX77693_IRQSRC_FLASH)
+		/* LED_INT */
+		ret = max77693_read_reg(max77693->regmap,
+			MAX77693_LED_REG_FLASH_INT, &irq_reg[LED_INT]);
+
+	if (irq_src & MAX77693_IRQSRC_MUIC)
+		/* MUIC INT1 ~ INT3 */
+		max77693_bulk_read(max77693->regmap, MAX77693_MUIC_REG_INT1,
+			MAX77693_NUM_IRQ_MUIC_REGS, &irq_reg[MUIC_INT1]);
+
+	/* Apply masking */
+	for (i = 0; i < MAX77693_IRQ_GROUP_NR; i++) {
+		if (i >= MUIC_INT1 && i <= MUIC_INT3)
+			irq_reg[i] &= max77693->irq_masks_cur[i];
+		else
+			irq_reg[i] &= ~max77693->irq_masks_cur[i];
+	}
+
+	/* Report */
+	for (i = 0; i < MAX77693_IRQ_NR; i++) {
+		if (irq_reg[max77693_irqs[i].group] & max77693_irqs[i].mask) {
+			cur_irq = irq_find_mapping(max77693->irq_domain, i);
+			if (cur_irq)
+				handle_nested_irq(cur_irq);
+		}
+	}
+
+	return IRQ_HANDLED;
+}
+
+int max77693_irq_resume(struct max77693_dev *max77693)
+{
+	if (max77693->irq)
+		max77693_irq_thread(0, max77693);
+
+	return 0;
+}
+
+static int max77693_irq_domain_map(struct irq_domain *d, unsigned int irq,
+				irq_hw_number_t hw)
+{
+	struct max77693_dev *max77693 = d->host_data;
+
+	irq_set_chip_data(irq, max77693);
+	irq_set_chip_and_handler(irq, &max77693_irq_chip, handle_edge_irq);
+	irq_set_nested_thread(irq, 1);
+#ifdef CONFIG_ARM
+	set_irq_flags(irq, IRQF_VALID);
+#else
+	irq_set_noprobe(irq);
+#endif
+	return 0;
+}
+
+static struct irq_domain_ops max77693_irq_domain_ops = {
+	.map = max77693_irq_domain_map,
+};
+
+int max77693_irq_init(struct max77693_dev *max77693)
+{
+	struct irq_domain *domain;
+	int i;
+	int ret;
+
+	mutex_init(&max77693->irqlock);
+
+	/* Mask individual interrupt sources */
+	for (i = 0; i < MAX77693_IRQ_GROUP_NR; i++) {
+		struct regmap *map;
+		/* MUIC IRQ  0:MASK 1:NOT MASK */
+		/* Other IRQ 1:MASK 0:NOT MASK */
+		if (i >= MUIC_INT1 && i <= MUIC_INT3) {
+			max77693->irq_masks_cur[i] = 0x00;
+			max77693->irq_masks_cache[i] = 0x00;
+		} else {
+			max77693->irq_masks_cur[i] = 0xff;
+			max77693->irq_masks_cache[i] = 0xff;
+		}
+		map = max77693_get_regmap(max77693, i);
+
+		if (IS_ERR_OR_NULL(map))
+			continue;
+		if (max77693_mask_reg[i] == MAX77693_REG_INVALID)
+			continue;
+		if (i >= MUIC_INT1 && i <= MUIC_INT3)
+			max77693_write_reg(map, max77693_mask_reg[i], 0x00);
+		else
+			max77693_write_reg(map, max77693_mask_reg[i], 0xff);
+	}
+
+	domain = irq_domain_add_linear(NULL, MAX77693_IRQ_NR,
+					&max77693_irq_domain_ops, max77693);
+	if (!domain) {
+		dev_err(max77693->dev, "could not create irq domain\n");
+		return -ENODEV;
+	}
+	max77693->irq_domain = domain;
+
+	ret = request_threaded_irq(max77693->irq, NULL, max77693_irq_thread,
+				   IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
+				   "max77693-irq", max77693);
+
+	if (ret)
+		dev_err(max77693->dev, "Failed to request IRQ %d: %d\n",
+			max77693->irq, ret);
+
+	return 0;
+}
+
+void max77693_irq_exit(struct max77693_dev *max77693)
+{
+	if (max77693->irq)
+		free_irq(max77693->irq, max77693);
+}
diff --git a/drivers/mfd/max77693.c b/drivers/mfd/max77693.c
index c852515e68c8..e9e4278722f3 100644
--- a/drivers/mfd/max77693.c
+++ b/drivers/mfd/max77693.c
@@ -154,6 +154,10 @@ static int max77693_i2c_probe(struct i2c_client *i2c,
 	max77693->haptic = i2c_new_dummy(i2c->adapter, I2C_ADDR_HAPTIC);
 	i2c_set_clientdata(max77693->haptic, max77693);
 
+	ret = max77693_irq_init(max77693);
+	if (ret < 0)
+		goto err_mfd;
+
 	pm_runtime_set_active(max77693->dev);
 
 	ret = mfd_add_devices(max77693->dev, -1, max77693_devs,
@@ -161,6 +165,8 @@ static int max77693_i2c_probe(struct i2c_client *i2c,
 	if (ret < 0)
 		goto err_mfd;
 
+	device_init_wakeup(max77693->dev, pdata->wakeup);
+
 	return ret;
 
 err_mfd:
@@ -189,10 +195,36 @@ static const struct i2c_device_id max77693_i2c_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, max77693_i2c_id);
 
+static int max77693_suspend(struct device *dev)
+{
+	struct i2c_client *i2c = container_of(dev, struct i2c_client, dev);
+	struct max77693_dev *max77693 = i2c_get_clientdata(i2c);
+
+	if (device_may_wakeup(dev))
+		irq_set_irq_wake(max77693->irq, 1);
+	return 0;
+}
+
+static int max77693_resume(struct device *dev)
+{
+	struct i2c_client *i2c = container_of(dev, struct i2c_client, dev);
+	struct max77693_dev *max77693 = i2c_get_clientdata(i2c);
+
+	if (device_may_wakeup(dev))
+		irq_set_irq_wake(max77693->irq, 0);
+	return max77693_irq_resume(max77693);
+}
+
+const struct dev_pm_ops max77693_pm = {
+	.suspend = max77693_suspend,
+	.resume = max77693_resume,
+};
+
 static struct i2c_driver max77693_i2c_driver = {
 	.driver = {
 		   .name = "max77693",
 		   .owner = THIS_MODULE,
+		   .pm = &max77693_pm,
 	},
 	.probe = max77693_i2c_probe,
 	.remove = max77693_i2c_remove,
diff --git a/include/linux/mfd/max77693.h b/include/linux/mfd/max77693.h
index 5020b8616daa..1d28ae90384e 100644
--- a/include/linux/mfd/max77693.h
+++ b/include/linux/mfd/max77693.h
@@ -31,7 +31,6 @@
 #define __LINUX_MFD_MAX77693_H
 
 struct max77693_platform_data {
-	/* IRQ */
 	int wakeup;
 };
 #endif	/* __LINUX_MFD_MAX77693_H */
-- 
cgit v1.3-7-g2ca7


From cd99758ba3bde64347a8ece381cbae2fb5c745b2 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 14 May 2012 23:14:24 +0200
Subject: mfd: Convert wm831x to irq_domain

The modern idiom is to use irq_domain to allocate interrupts. This is
useful partly to allow further infrastructure to be based on the domains
and partly because it makes it much easier to allocate virtual interrupts
to devices as we don't need to allocate a contiguous range of interrupt
numbers.

Convert the wm831x driver over to this infrastructure, using a legacy
IRQ mapping if an irq_base is specified in platform data and otherwise
using a linear mapping, always registering the interrupts even if they
won't ever be used. Only boards which need to use the GPIOs as
interrupts should need to use an irq_base.

This means that we can't use the MFD irq_base management since the
unless we're using an explicit irq_base from platform data we can't rely
on a linear mapping of interrupts.  Instead we need to map things via
the irq_domain - provide a conveniencem function wm831x_irq() to save a
small amount of typing when doing so. Looking at this I couldn't clearly
see anything the MFD core could do to make this nicer.

Since we're not supporting device tree yet there's no meaningful
advantage if we don't do this conversion in one, the fact that the
interrupt resources are used for repeated IP blocks makes accessor
functions for the irq_domain more trouble to do than they're worth.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/gpio/gpio-wm831x.c            |   6 +-
 drivers/input/misc/wm831x-on.c        |   2 +-
 drivers/input/touchscreen/wm831x-ts.c |   9 +--
 drivers/mfd/Kconfig                   |   2 +
 drivers/mfd/wm831x-auxadc.c           |   6 +-
 drivers/mfd/wm831x-core.c             |  19 +++----
 drivers/mfd/wm831x-irq.c              | 101 +++++++++++++++++++++-------------
 drivers/power/wm831x_power.c          |  21 ++++---
 drivers/regulator/wm831x-dcdc.c       |  24 +++++---
 drivers/regulator/wm831x-isink.c      |   4 +-
 drivers/regulator/wm831x-ldo.c        |  10 ++--
 drivers/rtc/rtc-wm831x.c              |   2 +-
 include/linux/mfd/wm831x/core.h       |   9 ++-
 13 files changed, 131 insertions(+), 84 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpio-wm831x.c b/drivers/gpio/gpio-wm831x.c
index deb949e75ec1..e56a2165641c 100644
--- a/drivers/gpio/gpio-wm831x.c
+++ b/drivers/gpio/gpio-wm831x.c
@@ -102,10 +102,8 @@ static int wm831x_gpio_to_irq(struct gpio_chip *chip, unsigned offset)
 	struct wm831x_gpio *wm831x_gpio = to_wm831x_gpio(chip);
 	struct wm831x *wm831x = wm831x_gpio->wm831x;
 
-	if (!wm831x->irq_base)
-		return -EINVAL;
-
-	return wm831x->irq_base + WM831X_IRQ_GPIO_1 + offset;
+	return irq_create_mapping(wm831x->irq_domain,
+				  WM831X_IRQ_GPIO_1 + offset);
 }
 
 static int wm831x_gpio_set_debounce(struct gpio_chip *chip, unsigned offset,
diff --git a/drivers/input/misc/wm831x-on.c b/drivers/input/misc/wm831x-on.c
index 47f18d6bce46..6790a812a1db 100644
--- a/drivers/input/misc/wm831x-on.c
+++ b/drivers/input/misc/wm831x-on.c
@@ -73,7 +73,7 @@ static int __devinit wm831x_on_probe(struct platform_device *pdev)
 {
 	struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent);
 	struct wm831x_on *wm831x_on;
-	int irq = platform_get_irq(pdev, 0);
+	int irq = wm831x_irq(wm831x, platform_get_irq(pdev, 0));
 	int ret;
 
 	wm831x_on = kzalloc(sizeof(struct wm831x_on), GFP_KERNEL);
diff --git a/drivers/input/touchscreen/wm831x-ts.c b/drivers/input/touchscreen/wm831x-ts.c
index 4bc851a9dc3d..e83410721e38 100644
--- a/drivers/input/touchscreen/wm831x-ts.c
+++ b/drivers/input/touchscreen/wm831x-ts.c
@@ -260,15 +260,16 @@ static __devinit int wm831x_ts_probe(struct platform_device *pdev)
 	 * If we have a direct IRQ use it, otherwise use the interrupt
 	 * from the WM831x IRQ controller.
 	 */
+	wm831x_ts->data_irq = wm831x_irq(wm831x,
+					 platform_get_irq_byname(pdev,
+								 "TCHDATA"));
 	if (pdata && pdata->data_irq)
 		wm831x_ts->data_irq = pdata->data_irq;
-	else
-		wm831x_ts->data_irq = platform_get_irq_byname(pdev, "TCHDATA");
 
+	wm831x_ts->pd_irq = wm831x_irq(wm831x,
+				       platform_get_irq_byname(pdev, "TCHPD"));
 	if (pdata && pdata->pd_irq)
 		wm831x_ts->pd_irq = pdata->pd_irq;
-	else
-		wm831x_ts->pd_irq = platform_get_irq_byname(pdev, "TCHPD");
 
 	if (pdata)
 		wm831x_ts->pressure = pdata->pressure;
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index a0e1b834af61..8325c44c04c6 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -496,6 +496,7 @@ config MFD_WM831X_I2C
 	select MFD_CORE
 	select MFD_WM831X
 	select REGMAP_I2C
+	select IRQ_DOMAIN
 	depends on I2C=y && GENERIC_HARDIRQS
 	help
 	  Support for the Wolfson Microelecronics WM831x and WM832x PMICs
@@ -508,6 +509,7 @@ config MFD_WM831X_SPI
 	select MFD_CORE
 	select MFD_WM831X
 	select REGMAP_SPI
+	select IRQ_DOMAIN
 	depends on SPI_MASTER && GENERIC_HARDIRQS
 	help
 	  Support for the Wolfson Microelecronics WM831x and WM832x PMICs
diff --git a/drivers/mfd/wm831x-auxadc.c b/drivers/mfd/wm831x-auxadc.c
index 87210954a066..6ee3018d8653 100644
--- a/drivers/mfd/wm831x-auxadc.c
+++ b/drivers/mfd/wm831x-auxadc.c
@@ -280,11 +280,11 @@ void wm831x_auxadc_init(struct wm831x *wm831x)
 	mutex_init(&wm831x->auxadc_lock);
 	INIT_LIST_HEAD(&wm831x->auxadc_pending);
 
-	if (wm831x->irq && wm831x->irq_base) {
+	if (wm831x->irq) {
 		wm831x->auxadc_read = wm831x_auxadc_read_irq;
 
-		ret = request_threaded_irq(wm831x->irq_base +
-					   WM831X_IRQ_AUXADC_DATA,
+		ret = request_threaded_irq(wm831x_irq(wm831x,
+						      WM831X_IRQ_AUXADC_DATA),
 					   NULL, wm831x_auxadc_irq, 0,
 					   "auxadc", wm831x);
 		if (ret < 0) {
diff --git a/drivers/mfd/wm831x-core.c b/drivers/mfd/wm831x-core.c
index 476e4d31a823..946698fd2dc6 100644
--- a/drivers/mfd/wm831x-core.c
+++ b/drivers/mfd/wm831x-core.c
@@ -1813,27 +1813,27 @@ int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
 	case WM8310:
 		ret = mfd_add_devices(wm831x->dev, wm831x_num,
 				      wm8310_devs, ARRAY_SIZE(wm8310_devs),
-				      NULL, wm831x->irq_base);
+				      NULL, 0);
 		break;
 
 	case WM8311:
 		ret = mfd_add_devices(wm831x->dev, wm831x_num,
 				      wm8311_devs, ARRAY_SIZE(wm8311_devs),
-				      NULL, wm831x->irq_base);
+				      NULL, 0);
 		if (!pdata || !pdata->disable_touch)
 			mfd_add_devices(wm831x->dev, wm831x_num,
 					touch_devs, ARRAY_SIZE(touch_devs),
-					NULL, wm831x->irq_base);
+					NULL, 0);
 		break;
 
 	case WM8312:
 		ret = mfd_add_devices(wm831x->dev, wm831x_num,
 				      wm8312_devs, ARRAY_SIZE(wm8312_devs),
-				      NULL, wm831x->irq_base);
+				      NULL, 0);
 		if (!pdata || !pdata->disable_touch)
 			mfd_add_devices(wm831x->dev, wm831x_num,
 					touch_devs, ARRAY_SIZE(touch_devs),
-					NULL, wm831x->irq_base);
+					NULL, 0);
 		break;
 
 	case WM8320:
@@ -1842,7 +1842,7 @@ int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
 	case WM8326:
 		ret = mfd_add_devices(wm831x->dev, wm831x_num,
 				      wm8320_devs, ARRAY_SIZE(wm8320_devs),
-				      NULL, wm831x->irq_base);
+				      NULL, 0);
 		break;
 
 	default:
@@ -1867,7 +1867,7 @@ int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
 	if (ret & WM831X_XTAL_ENA) {
 		ret = mfd_add_devices(wm831x->dev, wm831x_num,
 				      rtc_devs, ARRAY_SIZE(rtc_devs),
-				      NULL, wm831x->irq_base);
+				      NULL, 0);
 		if (ret != 0) {
 			dev_err(wm831x->dev, "Failed to add RTC: %d\n", ret);
 			goto err_irq;
@@ -1880,7 +1880,7 @@ int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
 		/* Treat errors as non-critical */
 		ret = mfd_add_devices(wm831x->dev, wm831x_num, backlight_devs,
 				      ARRAY_SIZE(backlight_devs), NULL,
-				      wm831x->irq_base);
+				      0);
 		if (ret < 0)
 			dev_err(wm831x->dev, "Failed to add backlight: %d\n",
 				ret);
@@ -1909,8 +1909,7 @@ void wm831x_device_exit(struct wm831x *wm831x)
 {
 	wm831x_otp_exit(wm831x);
 	mfd_remove_devices(wm831x->dev);
-	if (wm831x->irq_base)
-		free_irq(wm831x->irq_base + WM831X_IRQ_AUXADC_DATA, wm831x);
+	free_irq(wm831x_irq(wm831x, WM831X_IRQ_AUXADC_DATA), wm831x);
 	wm831x_irq_exit(wm831x);
 }
 
diff --git a/drivers/mfd/wm831x-irq.c b/drivers/mfd/wm831x-irq.c
index 2be9628074bd..ecc9d6d62fad 100644
--- a/drivers/mfd/wm831x-irq.c
+++ b/drivers/mfd/wm831x-irq.c
@@ -18,6 +18,7 @@
 #include <linux/irq.h>
 #include <linux/mfd/core.h>
 #include <linux/interrupt.h>
+#include <linux/irqdomain.h>
 
 #include <linux/mfd/wm831x/core.h>
 #include <linux/mfd/wm831x/pdata.h>
@@ -328,7 +329,7 @@ static inline int irq_data_to_status_reg(struct wm831x_irq_data *irq_data)
 static inline struct wm831x_irq_data *irq_to_wm831x_irq(struct wm831x *wm831x,
 							int irq)
 {
-	return &wm831x_irqs[irq - wm831x->irq_base];
+	return &wm831x_irqs[irq];
 }
 
 static void wm831x_irq_lock(struct irq_data *data)
@@ -374,7 +375,7 @@ static void wm831x_irq_enable(struct irq_data *data)
 {
 	struct wm831x *wm831x = irq_data_get_irq_chip_data(data);
 	struct wm831x_irq_data *irq_data = irq_to_wm831x_irq(wm831x,
-							     data->irq);
+							     data->hwirq);
 
 	wm831x->irq_masks_cur[irq_data->reg - 1] &= ~irq_data->mask;
 }
@@ -383,7 +384,7 @@ static void wm831x_irq_disable(struct irq_data *data)
 {
 	struct wm831x *wm831x = irq_data_get_irq_chip_data(data);
 	struct wm831x_irq_data *irq_data = irq_to_wm831x_irq(wm831x,
-							     data->irq);
+							     data->hwirq);
 
 	wm831x->irq_masks_cur[irq_data->reg - 1] |= irq_data->mask;
 }
@@ -393,7 +394,7 @@ static int wm831x_irq_set_type(struct irq_data *data, unsigned int type)
 	struct wm831x *wm831x = irq_data_get_irq_chip_data(data);
 	int irq;
 
-	irq = data->irq - wm831x->irq_base;
+	irq = data->hwirq;
 
 	if (irq < WM831X_IRQ_GPIO_1 || irq > WM831X_IRQ_GPIO_11) {
 		/* Ignore internal-only IRQs */
@@ -469,9 +470,11 @@ static irqreturn_t wm831x_irq_thread(int irq, void *data)
 	 * descriptors.
 	 */
 	if (primary & WM831X_TCHPD_INT)
-		handle_nested_irq(wm831x->irq_base + WM831X_IRQ_TCHPD);
+		handle_nested_irq(irq_find_mapping(wm831x->irq_domain,
+						   WM831X_IRQ_TCHPD));
 	if (primary & WM831X_TCHDATA_INT)
-		handle_nested_irq(wm831x->irq_base + WM831X_IRQ_TCHDATA);
+		handle_nested_irq(irq_find_mapping(wm831x->irq_domain,
+						   WM831X_IRQ_TCHDATA));
 	primary &= ~(WM831X_TCHDATA_EINT | WM831X_TCHPD_EINT);
 
 	for (i = 0; i < ARRAY_SIZE(wm831x_irqs); i++) {
@@ -507,7 +510,8 @@ static irqreturn_t wm831x_irq_thread(int irq, void *data)
 		}
 
 		if (*status & wm831x_irqs[i].mask)
-			handle_nested_irq(wm831x->irq_base + i);
+			handle_nested_irq(irq_find_mapping(wm831x->irq_domain,
+							   i));
 
 		/* Simulate an edge triggered IRQ by polling the input
 		 * status.  This is sucky but improves interoperability.
@@ -516,7 +520,8 @@ static irqreturn_t wm831x_irq_thread(int irq, void *data)
 		    wm831x->gpio_level[i - WM831X_IRQ_GPIO_1]) {
 			ret = wm831x_reg_read(wm831x, WM831X_GPIO_LEVEL);
 			while (ret & 1 << (i - WM831X_IRQ_GPIO_1)) {
-				handle_nested_irq(wm831x->irq_base + i);
+				handle_nested_irq(irq_find_mapping(wm831x->irq_domain,
+								   i));
 				ret = wm831x_reg_read(wm831x,
 						      WM831X_GPIO_LEVEL);
 			}
@@ -527,10 +532,34 @@ out:
 	return IRQ_HANDLED;
 }
 
+static int wm831x_irq_map(struct irq_domain *h, unsigned int virq,
+			  irq_hw_number_t hw)
+{
+	irq_set_chip_data(virq, h->host_data);
+	irq_set_chip_and_handler(virq, &wm831x_irq_chip, handle_edge_irq);
+	irq_set_nested_thread(virq, 1);
+
+	/* ARM needs us to explicitly flag the IRQ as valid
+	 * and will set them noprobe when we do so. */
+#ifdef CONFIG_ARM
+	set_irq_flags(virq, IRQF_VALID);
+#else
+	irq_set_noprobe(virq);
+#endif
+
+	return 0;
+}
+
+static struct irq_domain_ops wm831x_irq_domain_ops = {
+	.map	= wm831x_irq_map,
+	.xlate	= irq_domain_xlate_twocell,
+};
+
 int wm831x_irq_init(struct wm831x *wm831x, int irq)
 {
 	struct wm831x_pdata *pdata = wm831x->dev->platform_data;
-	int i, cur_irq, ret;
+	struct irq_domain *domain;
+	int i, ret, irq_base;
 
 	mutex_init(&wm831x->irq_lock);
 
@@ -543,18 +572,33 @@ int wm831x_irq_init(struct wm831x *wm831x, int irq)
 	}
 
 	/* Try to dynamically allocate IRQs if no base is specified */
-	if (!pdata || !pdata->irq_base)
-		wm831x->irq_base = -1;
+	if (pdata && pdata->irq_base) {
+		irq_base = irq_alloc_descs(pdata->irq_base, 0,
+					   WM831X_NUM_IRQS, 0);
+		if (irq_base < 0) {
+			dev_warn(wm831x->dev, "Failed to allocate IRQs: %d\n",
+				 irq_base);
+			irq_base = 0;
+		}
+	} else {
+		irq_base = 0;
+	}
+
+	if (irq_base)
+		domain = irq_domain_add_legacy(wm831x->dev->of_node,
+					       ARRAY_SIZE(wm831x_irqs),
+					       irq_base, 0,
+					       &wm831x_irq_domain_ops,
+					       wm831x);
 	else
-		wm831x->irq_base = pdata->irq_base;
+		domain = irq_domain_add_linear(wm831x->dev->of_node,
+					       ARRAY_SIZE(wm831x_irqs),
+					       &wm831x_irq_domain_ops,
+					       wm831x);
 
-	wm831x->irq_base = irq_alloc_descs(wm831x->irq_base, 0,
-					   WM831X_NUM_IRQS, 0);
-	if (wm831x->irq_base < 0) {
-		dev_warn(wm831x->dev, "Failed to allocate IRQs: %d\n",
-			 wm831x->irq_base);
-		wm831x->irq_base = 0;
-		return 0;
+	if (!domain) {
+		dev_warn(wm831x->dev, "Failed to allocate IRQ domain\n");
+		return -EINVAL;
 	}
 
 	if (pdata && pdata->irq_cmos)
@@ -566,24 +610,7 @@ int wm831x_irq_init(struct wm831x *wm831x, int irq)
 			WM831X_IRQ_OD, i);
 
 	wm831x->irq = irq;
-
-	/* Register them with genirq */
-	for (cur_irq = wm831x->irq_base;
-	     cur_irq < ARRAY_SIZE(wm831x_irqs) + wm831x->irq_base;
-	     cur_irq++) {
-		irq_set_chip_data(cur_irq, wm831x);
-		irq_set_chip_and_handler(cur_irq, &wm831x_irq_chip,
-					 handle_edge_irq);
-		irq_set_nested_thread(cur_irq, 1);
-
-		/* ARM needs us to explicitly flag the IRQ as valid
-		 * and will set them noprobe when we do so. */
-#ifdef CONFIG_ARM
-		set_irq_flags(cur_irq, IRQF_VALID);
-#else
-		irq_set_noprobe(cur_irq);
-#endif
-	}
+	wm831x->irq_domain = domain;
 
 	if (irq) {
 		/* Try to flag /IRQ as a wake source; there are a number of
diff --git a/drivers/power/wm831x_power.c b/drivers/power/wm831x_power.c
index 987332b71d8d..fc1ad9551182 100644
--- a/drivers/power/wm831x_power.c
+++ b/drivers/power/wm831x_power.c
@@ -565,7 +565,7 @@ static __devinit int wm831x_power_probe(struct platform_device *pdev)
 			    goto err_usb;
 	}
 
-	irq = platform_get_irq_byname(pdev, "SYSLO");
+	irq = wm831x_irq(wm831x, platform_get_irq_byname(pdev, "SYSLO"));
 	ret = request_threaded_irq(irq, NULL, wm831x_syslo_irq,
 				   IRQF_TRIGGER_RISING, "System power low",
 				   power);
@@ -575,7 +575,7 @@ static __devinit int wm831x_power_probe(struct platform_device *pdev)
 		goto err_battery;
 	}
 
-	irq = platform_get_irq_byname(pdev, "PWR SRC");
+	irq = wm831x_irq(wm831x, platform_get_irq_byname(pdev, "PWR SRC"));
 	ret = request_threaded_irq(irq, NULL, wm831x_pwr_src_irq,
 				   IRQF_TRIGGER_RISING, "Power source",
 				   power);
@@ -586,7 +586,9 @@ static __devinit int wm831x_power_probe(struct platform_device *pdev)
 	}
 
 	for (i = 0; i < ARRAY_SIZE(wm831x_bat_irqs); i++) {
-		irq = platform_get_irq_byname(pdev, wm831x_bat_irqs[i]);
+		irq = wm831x_irq(wm831x,
+				 platform_get_irq_byname(pdev,
+							 wm831x_bat_irqs[i]));
 		ret = request_threaded_irq(irq, NULL, wm831x_bat_irq,
 					   IRQF_TRIGGER_RISING,
 					   wm831x_bat_irqs[i],
@@ -606,10 +608,10 @@ err_bat_irq:
 		irq = platform_get_irq_byname(pdev, wm831x_bat_irqs[i]);
 		free_irq(irq, power);
 	}
-	irq = platform_get_irq_byname(pdev, "PWR SRC");
+	irq = wm831x_irq(wm831x, platform_get_irq_byname(pdev, "PWR SRC"));
 	free_irq(irq, power);
 err_syslo:
-	irq = platform_get_irq_byname(pdev, "SYSLO");
+	irq = wm831x_irq(wm831x, platform_get_irq_byname(pdev, "SYSLO"));
 	free_irq(irq, power);
 err_battery:
 	if (power->have_battery)
@@ -626,17 +628,20 @@ err_kmalloc:
 static __devexit int wm831x_power_remove(struct platform_device *pdev)
 {
 	struct wm831x_power *wm831x_power = platform_get_drvdata(pdev);
+	struct wm831x *wm831x = wm831x_power->wm831x;
 	int irq, i;
 
 	for (i = 0; i < ARRAY_SIZE(wm831x_bat_irqs); i++) {
-		irq = platform_get_irq_byname(pdev, wm831x_bat_irqs[i]);
+		irq = wm831x_irq(wm831x, 
+				 platform_get_irq_byname(pdev,
+							 wm831x_bat_irqs[i]));
 		free_irq(irq, wm831x_power);
 	}
 
-	irq = platform_get_irq_byname(pdev, "PWR SRC");
+	irq = wm831x_irq(wm831x, platform_get_irq_byname(pdev, "PWR SRC"));
 	free_irq(irq, wm831x_power);
 
-	irq = platform_get_irq_byname(pdev, "SYSLO");
+	irq = wm831x_irq(wm831x, platform_get_irq_byname(pdev, "SYSLO"));
 	free_irq(irq, wm831x_power);
 
 	if (wm831x_power->have_battery)
diff --git a/drivers/regulator/wm831x-dcdc.c b/drivers/regulator/wm831x-dcdc.c
index ff810e787eac..33b2f20a2932 100644
--- a/drivers/regulator/wm831x-dcdc.c
+++ b/drivers/regulator/wm831x-dcdc.c
@@ -565,7 +565,7 @@ static __devinit int wm831x_buckv_probe(struct platform_device *pdev)
 		goto err;
 	}
 
-	irq = platform_get_irq_byname(pdev, "UV");
+	irq = wm831x_irq(wm831x, platform_get_irq_byname(pdev, "UV"));
 	ret = request_threaded_irq(irq, NULL, wm831x_dcdc_uv_irq,
 				   IRQF_TRIGGER_RISING, dcdc->name, dcdc);
 	if (ret != 0) {
@@ -574,7 +574,7 @@ static __devinit int wm831x_buckv_probe(struct platform_device *pdev)
 		goto err_regulator;
 	}
 
-	irq = platform_get_irq_byname(pdev, "HC");
+	irq = wm831x_irq(wm831x, platform_get_irq_byname(pdev, "HC"));
 	ret = request_threaded_irq(irq, NULL, wm831x_dcdc_oc_irq,
 				   IRQF_TRIGGER_RISING, dcdc->name, dcdc);
 	if (ret != 0) {
@@ -588,7 +588,8 @@ static __devinit int wm831x_buckv_probe(struct platform_device *pdev)
 	return 0;
 
 err_uv:
-	free_irq(platform_get_irq_byname(pdev, "UV"), dcdc);
+	free_irq(wm831x_irq(wm831x, platform_get_irq_byname(pdev, "UV")),
+		 dcdc);
 err_regulator:
 	regulator_unregister(dcdc->regulator);
 err:
@@ -600,11 +601,14 @@ err:
 static __devexit int wm831x_buckv_remove(struct platform_device *pdev)
 {
 	struct wm831x_dcdc *dcdc = platform_get_drvdata(pdev);
+	struct wm831x *wm831x = dcdc->wm831x;
 
 	platform_set_drvdata(pdev, NULL);
 
-	free_irq(platform_get_irq_byname(pdev, "HC"), dcdc);
-	free_irq(platform_get_irq_byname(pdev, "UV"), dcdc);
+	free_irq(wm831x_irq(wm831x, platform_get_irq_byname(pdev, "HC")),
+			    dcdc);
+	free_irq(wm831x_irq(wm831x, platform_get_irq_byname(pdev, "UV")),
+			    dcdc);
 	regulator_unregister(dcdc->regulator);
 	if (dcdc->dvs_gpio)
 		gpio_free(dcdc->dvs_gpio);
@@ -758,7 +762,7 @@ static __devinit int wm831x_buckp_probe(struct platform_device *pdev)
 		goto err;
 	}
 
-	irq = platform_get_irq_byname(pdev, "UV");
+	irq = wm831x_irq(wm831x, platform_get_irq_byname(pdev, "UV"));
 	ret = request_threaded_irq(irq, NULL, wm831x_dcdc_uv_irq,
 				   IRQF_TRIGGER_RISING,	dcdc->name, dcdc);
 	if (ret != 0) {
@@ -783,7 +787,8 @@ static __devexit int wm831x_buckp_remove(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, NULL);
 
-	free_irq(platform_get_irq_byname(pdev, "UV"), dcdc);
+	free_irq(wm831x_irq(dcdc->wm831x, platform_get_irq_byname(pdev, "UV")),
+			    dcdc);
 	regulator_unregister(dcdc->regulator);
 
 	return 0;
@@ -883,7 +888,7 @@ static __devinit int wm831x_boostp_probe(struct platform_device *pdev)
 		goto err;
 	}
 
-	irq = platform_get_irq_byname(pdev, "UV");
+	irq = wm831x_irq(wm831x, platform_get_irq_byname(pdev, "UV"));
 	ret = request_threaded_irq(irq, NULL, wm831x_dcdc_uv_irq,
 				   IRQF_TRIGGER_RISING, dcdc->name,
 				   dcdc);
@@ -910,7 +915,8 @@ static __devexit int wm831x_boostp_remove(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, NULL);
 
-	free_irq(platform_get_irq_byname(pdev, "UV"), dcdc);
+	free_irq(wm831x_irq(dcdc->wm831x, platform_get_irq_byname(pdev, "UV")),
+		 dcdc);
 	regulator_unregister(dcdc->regulator);
 	kfree(dcdc);
 
diff --git a/drivers/regulator/wm831x-isink.c b/drivers/regulator/wm831x-isink.c
index b414e09c5620..1596947f603f 100644
--- a/drivers/regulator/wm831x-isink.c
+++ b/drivers/regulator/wm831x-isink.c
@@ -198,7 +198,7 @@ static __devinit int wm831x_isink_probe(struct platform_device *pdev)
 		goto err;
 	}
 
-	irq = platform_get_irq(pdev, 0);
+	irq = wm831x_irq(wm831x, platform_get_irq(pdev, 0));
 	ret = request_threaded_irq(irq, NULL, wm831x_isink_irq,
 				   IRQF_TRIGGER_RISING, isink->name, isink);
 	if (ret != 0) {
@@ -223,7 +223,7 @@ static __devexit int wm831x_isink_remove(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, NULL);
 
-	free_irq(platform_get_irq(pdev, 0), isink);
+	free_irq(wm831x_irq(isink->wm831x, platform_get_irq(pdev, 0)), isink);
 
 	regulator_unregister(isink->regulator);
 
diff --git a/drivers/regulator/wm831x-ldo.c b/drivers/regulator/wm831x-ldo.c
index 641e9f6499d1..b09ba05ada6d 100644
--- a/drivers/regulator/wm831x-ldo.c
+++ b/drivers/regulator/wm831x-ldo.c
@@ -359,7 +359,7 @@ static __devinit int wm831x_gp_ldo_probe(struct platform_device *pdev)
 		goto err;
 	}
 
-	irq = platform_get_irq_byname(pdev, "UV");
+	irq = wm831x_irq(wm831x, platform_get_irq_byname(pdev, "UV"));
 	ret = request_threaded_irq(irq, NULL, wm831x_ldo_uv_irq,
 				   IRQF_TRIGGER_RISING, ldo->name,
 				   ldo);
@@ -385,7 +385,8 @@ static __devexit int wm831x_gp_ldo_remove(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, NULL);
 
-	free_irq(platform_get_irq_byname(pdev, "UV"), ldo);
+	free_irq(wm831x_irq(ldo->wm831x,
+			    platform_get_irq_byname(pdev, "UV")), ldo);
 	regulator_unregister(ldo->regulator);
 
 	return 0;
@@ -624,7 +625,7 @@ static __devinit int wm831x_aldo_probe(struct platform_device *pdev)
 		goto err;
 	}
 
-	irq = platform_get_irq_byname(pdev, "UV");
+	irq = wm831x_irq(wm831x, platform_get_irq_byname(pdev, "UV"));
 	ret = request_threaded_irq(irq, NULL, wm831x_ldo_uv_irq,
 				   IRQF_TRIGGER_RISING, ldo->name, ldo);
 	if (ret != 0) {
@@ -647,7 +648,8 @@ static __devexit int wm831x_aldo_remove(struct platform_device *pdev)
 {
 	struct wm831x_ldo *ldo = platform_get_drvdata(pdev);
 
-	free_irq(platform_get_irq_byname(pdev, "UV"), ldo);
+	free_irq(wm831x_irq(ldo->wm831x, platform_get_irq_byname(pdev, "UV")),
+		 ldo);
 	regulator_unregister(ldo->regulator);
 
 	return 0;
diff --git a/drivers/rtc/rtc-wm831x.c b/drivers/rtc/rtc-wm831x.c
index 3b6e6a67e765..59c6245e0421 100644
--- a/drivers/rtc/rtc-wm831x.c
+++ b/drivers/rtc/rtc-wm831x.c
@@ -396,7 +396,7 @@ static int wm831x_rtc_probe(struct platform_device *pdev)
 {
 	struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent);
 	struct wm831x_rtc *wm831x_rtc;
-	int alm_irq = platform_get_irq_byname(pdev, "ALM");
+	int alm_irq = wm831x_irq(wm831x, platform_get_irq_byname(pdev, "ALM"));
 	int ret = 0;
 
 	wm831x_rtc = devm_kzalloc(&pdev->dev, sizeof(*wm831x_rtc), GFP_KERNEL);
diff --git a/include/linux/mfd/wm831x/core.h b/include/linux/mfd/wm831x/core.h
index 4b1211859f74..736191cc7e00 100644
--- a/include/linux/mfd/wm831x/core.h
+++ b/include/linux/mfd/wm831x/core.h
@@ -17,6 +17,7 @@
 
 #include <linux/completion.h>
 #include <linux/interrupt.h>
+#include <linux/irqdomain.h>
 #include <linux/list.h>
 #include <linux/regmap.h>
 
@@ -338,6 +339,7 @@
 #define WM831X_FLL_CLK_SRC_WIDTH                     2  /* FLL_CLK_SRC - [1:0] */
 
 struct regulator_dev;
+struct irq_domain;
 
 #define WM831X_NUM_IRQ_REGS 5
 #define WM831X_NUM_GPIO_REGS 16
@@ -367,7 +369,7 @@ struct wm831x {
 
 	int irq;  /* Our chip IRQ */
 	struct mutex irq_lock;
-	int irq_base;
+	struct irq_domain *irq_domain;
 	int irq_masks_cur[WM831X_NUM_IRQ_REGS];   /* Currently active value */
 	int irq_masks_cache[WM831X_NUM_IRQ_REGS]; /* Cached hardware value */
 
@@ -417,6 +419,11 @@ int wm831x_irq_init(struct wm831x *wm831x, int irq);
 void wm831x_irq_exit(struct wm831x *wm831x);
 void wm831x_auxadc_init(struct wm831x *wm831x);
 
+static inline int wm831x_irq(struct wm831x *wm831x, int irq)
+{
+	return irq_create_mapping(wm831x->irq_domain, irq);
+}
+
 extern struct regmap_config wm831x_regmap_config;
 
 #endif
-- 
cgit v1.3-7-g2ca7


From b09530ef844f0bf29ed3677080c02b179be84818 Mon Sep 17 00:00:00 2001
From: Richard Zhao <richard.zhao@linaro.org>
Date: Sun, 13 May 2012 09:18:02 +0800
Subject: mfd: Make anatop register accessor more flexible and rename
 meaningfully

 - rename to anatop_read_reg and anatop_write_reg
 - anatop_read_reg directly return reg value
 - anatop_write_reg write reg with mask

Signed-off-by: Richard Zhao <richard.zhao@freescale.com>
Reviewed-by: Ying-Chun Liu (PaulLiu) <paul.liu@linaro.org>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/anatop-mfd.c             | 35 +++++++++++------------------------
 drivers/regulator/anatop-regulator.c | 18 ++++++++----------
 include/linux/mfd/anatop.h           |  4 ++--
 3 files changed, 21 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/anatop-mfd.c b/drivers/mfd/anatop-mfd.c
index 2af42480635e..6da06341f6c9 100644
--- a/drivers/mfd/anatop-mfd.c
+++ b/drivers/mfd/anatop-mfd.c
@@ -41,39 +41,26 @@
 #include <linux/of_address.h>
 #include <linux/mfd/anatop.h>
 
-u32 anatop_get_bits(struct anatop *adata, u32 addr, int bit_shift,
-		    int bit_width)
+u32 anatop_read_reg(struct anatop *adata, u32 addr)
 {
-	u32 val, mask;
-
-	if (bit_width == 32)
-		mask = ~0;
-	else
-		mask = (1 << bit_width) - 1;
-
-	val = readl(adata->ioreg + addr);
-	val = (val >> bit_shift) & mask;
-
-	return val;
+	return readl(adata->ioreg + addr);
 }
-EXPORT_SYMBOL_GPL(anatop_get_bits);
+EXPORT_SYMBOL_GPL(anatop_read_reg);
 
-void anatop_set_bits(struct anatop *adata, u32 addr, int bit_shift,
-		     int bit_width, u32 data)
+void anatop_write_reg(struct anatop *adata, u32 addr, u32 data, u32 mask)
 {
-	u32 val, mask;
+	u32 val;
 
-	if (bit_width == 32)
-		mask = ~0;
-	else
-		mask = (1 << bit_width) - 1;
+	data &= mask;
 
 	spin_lock(&adata->reglock);
-	val = readl(adata->ioreg + addr) & ~(mask << bit_shift);
-	writel((data << bit_shift) | val, adata->ioreg + addr);
+	val = readl(adata->ioreg + addr);
+	val &= ~mask;
+	val |= data;
+	writel(val, adata->ioreg + addr);
 	spin_unlock(&adata->reglock);
 }
-EXPORT_SYMBOL_GPL(anatop_set_bits);
+EXPORT_SYMBOL_GPL(anatop_write_reg);
 
 static const struct of_device_id of_anatop_match[] = {
 	{ .compatible = "fsl,imx6q-anatop", },
diff --git a/drivers/regulator/anatop-regulator.c b/drivers/regulator/anatop-regulator.c
index 81fd606e47bc..0a3408570d0a 100644
--- a/drivers/regulator/anatop-regulator.c
+++ b/drivers/regulator/anatop-regulator.c
@@ -47,7 +47,7 @@ static int anatop_set_voltage(struct regulator_dev *reg, int min_uV,
 				  int max_uV, unsigned *selector)
 {
 	struct anatop_regulator *anatop_reg = rdev_get_drvdata(reg);
-	u32 val, sel;
+	u32 val, sel, mask;
 	int uv;
 
 	uv = min_uV;
@@ -71,11 +71,10 @@ static int anatop_set_voltage(struct regulator_dev *reg, int min_uV,
 	val = anatop_reg->min_bit_val + sel;
 	*selector = sel;
 	dev_dbg(&reg->dev, "%s: calculated val %d\n", __func__, val);
-	anatop_set_bits(anatop_reg->mfd,
-			anatop_reg->control_reg,
-			anatop_reg->vol_bit_shift,
-			anatop_reg->vol_bit_width,
-			val);
+	mask = ((1 << anatop_reg->vol_bit_width) - 1) <<
+		anatop_reg->vol_bit_shift;
+	val <<= anatop_reg->vol_bit_shift;
+	anatop_write_reg(anatop_reg->mfd, anatop_reg->control_reg, val, mask);
 
 	return 0;
 }
@@ -88,10 +87,9 @@ static int anatop_get_voltage_sel(struct regulator_dev *reg)
 	if (!anatop_reg->control_reg)
 		return -ENOTSUPP;
 
-	val = anatop_get_bits(anatop_reg->mfd,
-			      anatop_reg->control_reg,
-			      anatop_reg->vol_bit_shift,
-			      anatop_reg->vol_bit_width);
+	val = anatop_read_reg(anatop_reg->mfd, anatop_reg->control_reg);
+	val = (val & ((1 << anatop_reg->vol_bit_width) - 1)) >>
+		anatop_reg->vol_bit_shift;
 
 	return val - anatop_reg->min_bit_val;
 }
diff --git a/include/linux/mfd/anatop.h b/include/linux/mfd/anatop.h
index 22c1007d3ec5..7f92acf03d9e 100644
--- a/include/linux/mfd/anatop.h
+++ b/include/linux/mfd/anatop.h
@@ -34,7 +34,7 @@ struct anatop {
 	spinlock_t reglock;
 };
 
-extern u32 anatop_get_bits(struct anatop *, u32, int, int);
-extern void anatop_set_bits(struct anatop *, u32, int, int, u32);
+extern u32 anatop_read_reg(struct anatop *, u32);
+extern void anatop_write_reg(struct anatop *, u32, u32, u32);
 
 #endif /*  __LINUX_MFD_ANATOP_H */
-- 
cgit v1.3-7-g2ca7


From 21f7541d8861fdcdff663c68903e961ca1b06dc6 Mon Sep 17 00:00:00 2001
From: Rhyland Klein <rklein@nvidia.com>
Date: Fri, 18 May 2012 11:52:19 +0200
Subject: mfd: Add tps65910-irq devicetree init and irqdomain support

This change changes the tps65910-irq code to use irqdomain, and support
initialization from devicetree. This assumes that the irq_base in the
platform data is -1 if devicetree is used.

Signed-off-by: Rhyland Klein <rklein@nvidia.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig          |  1 +
 drivers/mfd/tps65910-irq.c   | 96 ++++++++++++++++++++++++++++----------------
 include/linux/mfd/tps65910.h |  1 +
 3 files changed, 64 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 6da82ded3371..b819eea1775a 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -190,6 +190,7 @@ config MFD_TPS65910
 	depends on I2C=y && GPIOLIB
 	select MFD_CORE
 	select REGMAP_I2C
+	select IRQ_DOMAIN
 	help
 	  if you say yes here you get support for the TPS65910 series of
 	  Power Management chips.
diff --git a/drivers/mfd/tps65910-irq.c b/drivers/mfd/tps65910-irq.c
index 0f1ff7fbdc74..09aab3e4776d 100644
--- a/drivers/mfd/tps65910-irq.c
+++ b/drivers/mfd/tps65910-irq.c
@@ -20,15 +20,10 @@
 #include <linux/device.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
+#include <linux/irqdomain.h>
 #include <linux/gpio.h>
 #include <linux/mfd/tps65910.h>
 
-static inline int irq_to_tps65910_irq(struct tps65910 *tps65910,
-							int irq)
-{
-	return (irq - tps65910->irq_base);
-}
-
 /*
  * This is a threaded IRQ handler so can access I2C/SPI.  Since all
  * interrupts are clear on read the IRQ line will be reasserted and
@@ -76,7 +71,7 @@ static irqreturn_t tps65910_irq(int irq, void *irq_data)
 		if (!(irq_sts & (1 << i)))
 			continue;
 
-		handle_nested_irq(tps65910->irq_base + i);
+		handle_nested_irq(irq_find_mapping(tps65910->domain, i));
 	}
 
 	/* Write the STS register back to clear IRQs we handled */
@@ -135,14 +130,14 @@ static void tps65910_irq_enable(struct irq_data *data)
 {
 	struct tps65910 *tps65910 = irq_data_get_irq_chip_data(data);
 
-	tps65910->irq_mask &= ~( 1 << irq_to_tps65910_irq(tps65910, data->irq));
+	tps65910->irq_mask &= ~(1 << data->hwirq);
 }
 
 static void tps65910_irq_disable(struct irq_data *data)
 {
 	struct tps65910 *tps65910 = irq_data_get_irq_chip_data(data);
 
-	tps65910->irq_mask |= ( 1 << irq_to_tps65910_irq(tps65910, data->irq));
+	tps65910->irq_mask |= (1 << data->hwirq);
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -164,10 +159,35 @@ static struct irq_chip tps65910_irq_chip = {
 	.irq_set_wake = tps65910_irq_set_wake,
 };
 
+static int tps65910_irq_map(struct irq_domain *h, unsigned int virq,
+				irq_hw_number_t hw)
+{
+	struct tps65910 *tps65910 = h->host_data;
+
+	irq_set_chip_data(virq, tps65910);
+	irq_set_chip_and_handler(virq, &tps65910_irq_chip, handle_edge_irq);
+	irq_set_nested_thread(virq, 1);
+
+	/* ARM needs us to explicitly flag the IRQ as valid
+	 * and will set them noprobe when we do so. */
+#ifdef CONFIG_ARM
+	set_irq_flags(virq, IRQF_VALID);
+#else
+	irq_set_noprobe(virq);
+#endif
+
+	return 0;
+}
+
+static struct irq_domain_ops tps65910_domain_ops = {
+	.map	= tps65910_irq_map,
+	.xlate	= irq_domain_xlate_twocell,
+};
+
 int tps65910_irq_init(struct tps65910 *tps65910, int irq,
 		    struct tps65910_platform_data *pdata)
 {
-	int ret, cur_irq;
+	int ret;
 	int flags = IRQF_ONESHOT;
 
 	if (!irq) {
@@ -175,17 +195,11 @@ int tps65910_irq_init(struct tps65910 *tps65910, int irq,
 		return -EINVAL;
 	}
 
-	if (!pdata || !pdata->irq_base) {
-		dev_warn(tps65910->dev, "No interrupt support, no IRQ base\n");
+	if (!pdata) {
+		dev_warn(tps65910->dev, "No interrupt support, no pdata\n");
 		return -EINVAL;
 	}
 
-	tps65910->irq_mask = 0xFFFFFF;
-
-	mutex_init(&tps65910->irq_lock);
-	tps65910->chip_irq = irq;
-	tps65910->irq_base = pdata->irq_base;
-
 	switch (tps65910_chip_id(tps65910)) {
 	case TPS65910:
 		tps65910->irq_num = TPS65910_NUM_IRQ;
@@ -195,22 +209,36 @@ int tps65910_irq_init(struct tps65910 *tps65910, int irq,
 		break;
 	}
 
-	/* Register with genirq */
-	for (cur_irq = tps65910->irq_base;
-	     cur_irq < tps65910->irq_num + tps65910->irq_base;
-	     cur_irq++) {
-		irq_set_chip_data(cur_irq, tps65910);
-		irq_set_chip_and_handler(cur_irq, &tps65910_irq_chip,
-					 handle_edge_irq);
-		irq_set_nested_thread(cur_irq, 1);
-
-		/* ARM needs us to explicitly flag the IRQ as valid
-		 * and will set them noprobe when we do so. */
-#ifdef CONFIG_ARM
-		set_irq_flags(cur_irq, IRQF_VALID);
-#else
-		irq_set_noprobe(cur_irq);
-#endif
+	if (pdata->irq_base > 0) {
+		pdata->irq_base = irq_alloc_descs(pdata->irq_base, 0,
+					tps65910->irq_num, -1);
+		if (pdata->irq_base < 0) {
+			dev_warn(tps65910->dev, "Failed to alloc IRQs: %d\n",
+					pdata->irq_base);
+			return pdata->irq_base;
+		}
+	}
+
+	tps65910->irq_mask = 0xFFFFFF;
+
+	mutex_init(&tps65910->irq_lock);
+	tps65910->chip_irq = irq;
+	tps65910->irq_base = pdata->irq_base;
+
+	if (pdata->irq_base > 0)
+		tps65910->domain = irq_domain_add_legacy(tps65910->dev->of_node,
+					tps65910->irq_num,
+					pdata->irq_base,
+					0,
+					&tps65910_domain_ops, tps65910);
+	else
+		tps65910->domain = irq_domain_add_linear(tps65910->dev->of_node,
+					tps65910->irq_num,
+					&tps65910_domain_ops, tps65910);
+
+	if (!tps65910->domain) {
+		dev_err(tps65910->dev, "Failed to create IRQ domain\n");
+		return -ENOMEM;
 	}
 
 	ret = request_threaded_irq(irq, NULL, tps65910_irq, flags,
diff --git a/include/linux/mfd/tps65910.h b/include/linux/mfd/tps65910.h
index c2673ee5e70f..ab04e901e57e 100644
--- a/include/linux/mfd/tps65910.h
+++ b/include/linux/mfd/tps65910.h
@@ -836,6 +836,7 @@ struct tps65910 {
 	int irq_base;
 	int irq_num;
 	u32 irq_mask;
+	struct irq_domain *domain;
 };
 
 struct tps65910_platform_data {
-- 
cgit v1.3-7-g2ca7


From 16e5e204c92800aad4e7db52d289565cc82240ce Mon Sep 17 00:00:00 2001
From: Ashish Jangam <ashish.jangam@kpitcummins.com>
Date: Fri, 18 May 2012 12:19:18 +0200
Subject: mfd: Add ADC support to the DA9052/53 core

This patch adds ADC support to the DA9052/53 core.

Tested on smdkv6410 and i.mx53 QS boards.

Signed-off-by: Ashish Jangam <ashish.jangam@kpitcummins.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/da9052-core.c         | 140 ++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/da9052/da9052.h |  19 ++++++
 2 files changed, 159 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/da9052-core.c b/drivers/mfd/da9052-core.c
index 7ff313fe9fb1..5036cf5fc077 100644
--- a/drivers/mfd/da9052-core.c
+++ b/drivers/mfd/da9052-core.c
@@ -318,6 +318,135 @@ static bool da9052_reg_volatile(struct device *dev, unsigned int reg)
 	}
 }
 
+/*
+ * TBAT look-up table is computed from the R90 reg (8 bit register)
+ * reading as below. The battery temperature is in milliCentigrade
+ * TBAT = (1/(t1+1/298) - 273) * 1000 mC
+ * where t1 = (1/B)* ln(( ADCval * 2.5)/(R25*ITBAT*255))
+ * Default values are R25 = 10e3, B = 3380, ITBAT = 50e-6
+ * Example:
+ * R25=10E3, B=3380, ITBAT=50e-6, ADCVAL=62d calculates
+ * TBAT = 20015 mili degrees Centrigrade
+ *
+*/
+static const int32_t tbat_lookup[255] = {
+	183258, 144221, 124334, 111336, 101826, 94397, 88343, 83257,
+	78889, 75071, 71688, 68656, 65914, 63414, 61120, 59001,
+	570366, 55204, 53490, 51881, 50364, 48931, 47574, 46285,
+	45059, 43889, 42772, 41703, 40678, 39694, 38748, 37838,
+	36961, 36115, 35297, 34507, 33743, 33002, 32284, 31588,
+	30911, 30254, 29615, 28994, 28389, 27799, 27225, 26664,
+	26117, 25584, 25062, 24553, 24054, 23567, 23091, 22624,
+	22167, 21719, 21281, 20851, 20429, 20015, 19610, 19211,
+	18820, 18436, 18058, 17688, 17323, 16965, 16612, 16266,
+	15925, 15589, 15259, 14933, 14613, 14298, 13987, 13681,
+	13379, 13082, 12788, 12499, 12214, 11933, 11655, 11382,
+	11112, 10845, 10582, 10322, 10066, 9812, 9562, 9315,
+	9071, 8830, 8591, 8356, 8123, 7893, 7665, 7440,
+	7218, 6998, 6780, 6565, 6352, 6141, 5933, 5726,
+	5522, 5320, 5120, 4922, 4726, 4532, 4340, 4149,
+	3961, 3774, 3589, 3406, 3225, 3045, 2867, 2690,
+	2516, 2342, 2170, 2000, 1831, 1664, 1498, 1334,
+	1171, 1009, 849, 690, 532, 376, 221, 67,
+	-84, -236, -386, -535, -683, -830, -975, -1119,
+	-1263, -1405, -1546, -1686, -1825, -1964, -2101, -2237,
+	-2372, -2506, -2639, -2771, -2902, -3033, -3162, -3291,
+	-3418, -3545, -3671, -3796, -3920, -4044, -4166, -4288,
+	-4409, -4529, -4649, -4767, -4885, -5002, -5119, -5235,
+	-5349, -5464, -5577, -5690, -5802, -5913, -6024, -6134,
+	-6244, -6352, -6461, -6568, -6675, -6781, -6887, -6992,
+	-7096, -7200, -7303, -7406, -7508, -7609, -7710, -7810,
+	-7910, -8009, -8108, -8206, -8304, -8401, -8497, -8593,
+	-8689, -8784, -8878, -8972, -9066, -9159, -9251, -9343,
+	-9435, -9526, -9617, -9707, -9796, -9886, -9975, -10063,
+	-10151, -10238, -10325, -10412, -10839, -10923, -11007, -11090,
+	-11173, -11256, -11338, -11420, -11501, -11583, -11663, -11744,
+	-11823, -11903, -11982
+};
+
+static const u8 chan_mux[DA9052_ADC_VBBAT + 1] = {
+	[DA9052_ADC_VDDOUT]	= DA9052_ADC_MAN_MUXSEL_VDDOUT,
+	[DA9052_ADC_ICH]	= DA9052_ADC_MAN_MUXSEL_ICH,
+	[DA9052_ADC_TBAT]	= DA9052_ADC_MAN_MUXSEL_TBAT,
+	[DA9052_ADC_VBAT]	= DA9052_ADC_MAN_MUXSEL_VBAT,
+	[DA9052_ADC_IN4]	= DA9052_ADC_MAN_MUXSEL_AD4,
+	[DA9052_ADC_IN5]	= DA9052_ADC_MAN_MUXSEL_AD5,
+	[DA9052_ADC_IN6]	= DA9052_ADC_MAN_MUXSEL_AD6,
+	[DA9052_ADC_VBBAT]	= DA9052_ADC_MAN_MUXSEL_VBBAT
+};
+
+int da9052_adc_manual_read(struct da9052 *da9052, unsigned char channel)
+{
+	int ret;
+	unsigned short calc_data;
+	unsigned short data;
+	unsigned char mux_sel;
+
+	if (channel > DA9052_ADC_VBBAT)
+		return -EINVAL;
+
+	mutex_lock(&da9052->auxadc_lock);
+
+	/* Channel gets activated on enabling the Conversion bit */
+	mux_sel = chan_mux[channel] | DA9052_ADC_MAN_MAN_CONV;
+
+	ret = da9052_reg_write(da9052, DA9052_ADC_MAN_REG, mux_sel);
+	if (ret < 0)
+		goto err;
+
+	/* Wait for an interrupt */
+	if (!wait_for_completion_timeout(&da9052->done,
+					 msecs_to_jiffies(500))) {
+		dev_err(da9052->dev,
+			"timeout waiting for ADC conversion interrupt\n");
+		ret = -ETIMEDOUT;
+		goto err;
+	}
+
+	ret = da9052_reg_read(da9052, DA9052_ADC_RES_H_REG);
+	if (ret < 0)
+		goto err;
+
+	calc_data = (unsigned short)ret;
+	data = calc_data << 2;
+
+	ret = da9052_reg_read(da9052, DA9052_ADC_RES_L_REG);
+	if (ret < 0)
+		goto err;
+
+	calc_data = (unsigned short)(ret & DA9052_ADC_RES_LSB);
+	data |= calc_data;
+
+	ret = data;
+
+err:
+	mutex_unlock(&da9052->auxadc_lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(da9052_adc_manual_read);
+
+static irqreturn_t da9052_auxadc_irq(int irq, void *irq_data)
+{
+	struct da9052 *da9052 = irq_data;
+
+	complete(&da9052->done);
+
+	return IRQ_HANDLED;
+}
+
+int da9052_adc_read_temp(struct da9052 *da9052)
+{
+	int tbat;
+
+	tbat = da9052_reg_read(da9052, DA9052_TBAT_RES_REG);
+	if (tbat <= 0)
+		return tbat;
+
+	/* ARRAY_SIZE check is not needed since TBAT is a 8-bit register */
+	return tbat_lookup[tbat - 1];
+}
+EXPORT_SYMBOL_GPL(da9052_adc_read_temp);
+
 static struct resource da9052_rtc_resource = {
 	.name = "ALM",
 	.start = DA9052_IRQ_ALARM,
@@ -646,6 +775,9 @@ int __devinit da9052_device_init(struct da9052 *da9052, u8 chip_id)
 	struct irq_desc *desc;
 	int ret;
 
+	mutex_init(&da9052->auxadc_lock);
+	init_completion(&da9052->done);
+
 	if (pdata && pdata->init != NULL)
 		pdata->init(da9052);
 
@@ -666,6 +798,12 @@ int __devinit da9052_device_init(struct da9052 *da9052, u8 chip_id)
 	desc = irq_to_desc(da9052->chip_irq);
 	da9052->irq_base = regmap_irq_chip_get_base(desc->action->dev_id);
 
+	ret = request_threaded_irq(DA9052_IRQ_ADC_EOM, NULL, da9052_auxadc_irq,
+				   IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+				   "adc irq", da9052);
+	if (ret != 0)
+		dev_err(da9052->dev, "DA9052 ADC IRQ failed ret=%d\n", ret);
+
 	ret = mfd_add_devices(da9052->dev, -1, da9052_subdev_info,
 			      ARRAY_SIZE(da9052_subdev_info), NULL, 0);
 	if (ret)
@@ -674,6 +812,7 @@ int __devinit da9052_device_init(struct da9052 *da9052, u8 chip_id)
 	return 0;
 
 err:
+	free_irq(DA9052_IRQ_ADC_EOM, da9052);
 	mfd_remove_devices(da9052->dev);
 regmap_err:
 	return ret;
@@ -681,6 +820,7 @@ regmap_err:
 
 void da9052_device_exit(struct da9052 *da9052)
 {
+	free_irq(DA9052_IRQ_ADC_EOM, da9052);
 	regmap_del_irq_chip(da9052->chip_irq,
 			    irq_get_irq_data(da9052->irq_base)->chip_data);
 	mfd_remove_devices(da9052->dev);
diff --git a/include/linux/mfd/da9052/da9052.h b/include/linux/mfd/da9052/da9052.h
index 7ffbd6e9e7fc..b990cca1d9ee 100644
--- a/include/linux/mfd/da9052/da9052.h
+++ b/include/linux/mfd/da9052/da9052.h
@@ -33,6 +33,18 @@
 
 #include <linux/mfd/da9052/reg.h>
 
+/* Common - HWMON Channel Definations */
+#define DA9052_ADC_VDDOUT	0
+#define DA9052_ADC_ICH		1
+#define DA9052_ADC_TBAT	2
+#define DA9052_ADC_VBAT	3
+#define DA9052_ADC_IN4		4
+#define DA9052_ADC_IN5		5
+#define DA9052_ADC_IN6		6
+#define DA9052_ADC_TSI		7
+#define DA9052_ADC_TJUNC	8
+#define DA9052_ADC_VBBAT	9
+
 #define DA9052_IRQ_DCIN	0
 #define DA9052_IRQ_VBUS	1
 #define DA9052_IRQ_DCINREM	2
@@ -79,12 +91,19 @@ struct da9052 {
 	struct device *dev;
 	struct regmap *regmap;
 
+	struct mutex auxadc_lock;
+	struct completion done;
+
 	int irq_base;
 	u8 chip_id;
 
 	int chip_irq;
 };
 
+/* ADC API */
+int da9052_adc_manual_read(struct da9052 *da9052, unsigned char channel);
+int da9052_adc_read_temp(struct da9052 *da9052);
+
 /* Device I/O API */
 static inline int da9052_reg_read(struct da9052 *da9052, unsigned char reg)
 {
-- 
cgit v1.3-7-g2ca7


From 1fe17a24e2fe0a9554d19a4249eb2d80050ecb8c Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Fri, 18 May 2012 17:02:02 +0100
Subject: mfd: Emulate active low IRQs as well as active high IRQs for wm831x

As with the existing emulation this should not be used in production
systems but is useful for test purposes.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/wm831x-irq.c        | 24 +++++++++++++++++++-----
 include/linux/mfd/wm831x/core.h |  3 ++-
 2 files changed, 21 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm831x-irq.c b/drivers/mfd/wm831x-irq.c
index ecc9d6d62fad..804e56ec99eb 100644
--- a/drivers/mfd/wm831x-irq.c
+++ b/drivers/mfd/wm831x-irq.c
@@ -413,22 +413,25 @@ static int wm831x_irq_set_type(struct irq_data *data, unsigned int type)
 	 * do the update here as we can be called with the bus lock
 	 * held.
 	 */
+	wm831x->gpio_level_low[irq] = false;
+	wm831x->gpio_level_high[irq] = false;
 	switch (type) {
 	case IRQ_TYPE_EDGE_BOTH:
 		wm831x->gpio_update[irq] = 0x10000 | WM831X_GPN_INT_MODE;
-		wm831x->gpio_level[irq] = false;
 		break;
 	case IRQ_TYPE_EDGE_RISING:
 		wm831x->gpio_update[irq] = 0x10000 | WM831X_GPN_POL;
-		wm831x->gpio_level[irq] = false;
 		break;
 	case IRQ_TYPE_EDGE_FALLING:
 		wm831x->gpio_update[irq] = 0x10000;
-		wm831x->gpio_level[irq] = false;
 		break;
 	case IRQ_TYPE_LEVEL_HIGH:
 		wm831x->gpio_update[irq] = 0x10000 | WM831X_GPN_POL;
-		wm831x->gpio_level[irq] = true;
+		wm831x->gpio_level_high[irq] = true;
+		break;
+	case IRQ_TYPE_LEVEL_LOW:
+		wm831x->gpio_update[irq] = 0x10000;
+		wm831x->gpio_level_low[irq] = true;
 		break;
 	default:
 		return -EINVAL;
@@ -517,7 +520,7 @@ static irqreturn_t wm831x_irq_thread(int irq, void *data)
 		 * status.  This is sucky but improves interoperability.
 		 */
 		if (primary == WM831X_GP_INT &&
-		    wm831x->gpio_level[i - WM831X_IRQ_GPIO_1]) {
+		    wm831x->gpio_level_high[i - WM831X_IRQ_GPIO_1]) {
 			ret = wm831x_reg_read(wm831x, WM831X_GPIO_LEVEL);
 			while (ret & 1 << (i - WM831X_IRQ_GPIO_1)) {
 				handle_nested_irq(irq_find_mapping(wm831x->irq_domain,
@@ -526,6 +529,17 @@ static irqreturn_t wm831x_irq_thread(int irq, void *data)
 						      WM831X_GPIO_LEVEL);
 			}
 		}
+
+		if (primary == WM831X_GP_INT &&
+		    wm831x->gpio_level_low[i - WM831X_IRQ_GPIO_1]) {
+			ret = wm831x_reg_read(wm831x, WM831X_GPIO_LEVEL);
+			while (!(ret & 1 << (i - WM831X_IRQ_GPIO_1))) {
+				handle_nested_irq(irq_find_mapping(wm831x->irq_domain,
+								   i));
+				ret = wm831x_reg_read(wm831x,
+						      WM831X_GPIO_LEVEL);
+			}
+		}
 	}
 
 out:
diff --git a/include/linux/mfd/wm831x/core.h b/include/linux/mfd/wm831x/core.h
index 736191cc7e00..4a3b83a77614 100644
--- a/include/linux/mfd/wm831x/core.h
+++ b/include/linux/mfd/wm831x/core.h
@@ -384,7 +384,8 @@ struct wm831x {
 
 	/* Used by the interrupt controller code to post writes */
 	int gpio_update[WM831X_NUM_GPIO_REGS];
-	bool gpio_level[WM831X_NUM_GPIO_REGS];
+	bool gpio_level_high[WM831X_NUM_GPIO_REGS];
+	bool gpio_level_low[WM831X_NUM_GPIO_REGS];
 
 	struct mutex auxadc_lock;
 	struct list_head auxadc_pending;
-- 
cgit v1.3-7-g2ca7


From 041d3a8cdc18dc375a128d90bbb753949a81b1fb Mon Sep 17 00:00:00 2001
From: Michal Nazarewicz <mina86@mina86.com>
Date: Thu, 29 Dec 2011 13:09:50 +0100
Subject: mm: page_alloc: introduce alloc_contig_range()

This commit adds the alloc_contig_range() function which tries
to allocate given range of pages.  It tries to migrate all
already allocated pages that fall in the range thus freeing them.
Once all pages in the range are freed they are removed from the
buddy system thus allocated for the caller to use.

Signed-off-by: Michal Nazarewicz <mina86@mina86.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Tested-by: Rob Clark <rob.clark@linaro.org>
Tested-by: Ohad Ben-Cohen <ohad@wizery.com>
Tested-by: Benjamin Gaignard <benjamin.gaignard@linaro.org>
Tested-by: Robert Nelson <robertcnelson@gmail.com>
Tested-by: Barry Song <Baohua.Song@csr.com>
---
 include/linux/gfp.h |   8 +++
 mm/page_alloc.c     | 188 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 196 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 581e74b7df95..052a5b6cc4d0 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -391,4 +391,12 @@ static inline bool pm_suspended_storage(void)
 }
 #endif /* CONFIG_PM_SLEEP */
 
+#ifdef CONFIG_CMA
+
+/* The below functions must be run on a range from a single zone. */
+extern int alloc_contig_range(unsigned long start, unsigned long end);
+extern void free_contig_range(unsigned long pfn, unsigned nr_pages);
+
+#endif
+
 #endif /* __LINUX_GFP_H */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6fb46c1589b9..2c38a30d064e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -57,6 +57,7 @@
 #include <linux/ftrace_event.h>
 #include <linux/memcontrol.h>
 #include <linux/prefetch.h>
+#include <linux/migrate.h>
 #include <linux/page-debug-flags.h>
 
 #include <asm/tlbflush.h>
@@ -5550,6 +5551,193 @@ out:
 	spin_unlock_irqrestore(&zone->lock, flags);
 }
 
+#ifdef CONFIG_CMA
+
+static unsigned long pfn_max_align_down(unsigned long pfn)
+{
+	return pfn & ~(max_t(unsigned long, MAX_ORDER_NR_PAGES,
+			     pageblock_nr_pages) - 1);
+}
+
+static unsigned long pfn_max_align_up(unsigned long pfn)
+{
+	return ALIGN(pfn, max_t(unsigned long, MAX_ORDER_NR_PAGES,
+				pageblock_nr_pages));
+}
+
+static struct page *
+__alloc_contig_migrate_alloc(struct page *page, unsigned long private,
+			     int **resultp)
+{
+	return alloc_page(GFP_HIGHUSER_MOVABLE);
+}
+
+/* [start, end) must belong to a single zone. */
+static int __alloc_contig_migrate_range(unsigned long start, unsigned long end)
+{
+	/* This function is based on compact_zone() from compaction.c. */
+
+	unsigned long pfn = start;
+	unsigned int tries = 0;
+	int ret = 0;
+
+	struct compact_control cc = {
+		.nr_migratepages = 0,
+		.order = -1,
+		.zone = page_zone(pfn_to_page(start)),
+		.sync = true,
+	};
+	INIT_LIST_HEAD(&cc.migratepages);
+
+	migrate_prep_local();
+
+	while (pfn < end || !list_empty(&cc.migratepages)) {
+		if (fatal_signal_pending(current)) {
+			ret = -EINTR;
+			break;
+		}
+
+		if (list_empty(&cc.migratepages)) {
+			cc.nr_migratepages = 0;
+			pfn = isolate_migratepages_range(cc.zone, &cc,
+							 pfn, end);
+			if (!pfn) {
+				ret = -EINTR;
+				break;
+			}
+			tries = 0;
+		} else if (++tries == 5) {
+			ret = ret < 0 ? ret : -EBUSY;
+			break;
+		}
+
+		ret = migrate_pages(&cc.migratepages,
+				    __alloc_contig_migrate_alloc,
+				    0, false, true);
+	}
+
+	putback_lru_pages(&cc.migratepages);
+	return ret > 0 ? 0 : ret;
+}
+
+/**
+ * alloc_contig_range() -- tries to allocate given range of pages
+ * @start:	start PFN to allocate
+ * @end:	one-past-the-last PFN to allocate
+ *
+ * The PFN range does not have to be pageblock or MAX_ORDER_NR_PAGES
+ * aligned, however it's the caller's responsibility to guarantee that
+ * we are the only thread that changes migrate type of pageblocks the
+ * pages fall in.
+ *
+ * The PFN range must belong to a single zone.
+ *
+ * Returns zero on success or negative error code.  On success all
+ * pages which PFN is in [start, end) are allocated for the caller and
+ * need to be freed with free_contig_range().
+ */
+int alloc_contig_range(unsigned long start, unsigned long end)
+{
+	struct zone *zone = page_zone(pfn_to_page(start));
+	unsigned long outer_start, outer_end;
+	int ret = 0, order;
+
+	/*
+	 * What we do here is we mark all pageblocks in range as
+	 * MIGRATE_ISOLATE.  Because pageblock and max order pages may
+	 * have different sizes, and due to the way page allocator
+	 * work, we align the range to biggest of the two pages so
+	 * that page allocator won't try to merge buddies from
+	 * different pageblocks and change MIGRATE_ISOLATE to some
+	 * other migration type.
+	 *
+	 * Once the pageblocks are marked as MIGRATE_ISOLATE, we
+	 * migrate the pages from an unaligned range (ie. pages that
+	 * we are interested in).  This will put all the pages in
+	 * range back to page allocator as MIGRATE_ISOLATE.
+	 *
+	 * When this is done, we take the pages in range from page
+	 * allocator removing them from the buddy system.  This way
+	 * page allocator will never consider using them.
+	 *
+	 * This lets us mark the pageblocks back as
+	 * MIGRATE_CMA/MIGRATE_MOVABLE so that free pages in the
+	 * aligned range but not in the unaligned, original range are
+	 * put back to page allocator so that buddy can use them.
+	 */
+
+	ret = start_isolate_page_range(pfn_max_align_down(start),
+				       pfn_max_align_up(end));
+	if (ret)
+		goto done;
+
+	ret = __alloc_contig_migrate_range(start, end);
+	if (ret)
+		goto done;
+
+	/*
+	 * Pages from [start, end) are within a MAX_ORDER_NR_PAGES
+	 * aligned blocks that are marked as MIGRATE_ISOLATE.  What's
+	 * more, all pages in [start, end) are free in page allocator.
+	 * What we are going to do is to allocate all pages from
+	 * [start, end) (that is remove them from page allocator).
+	 *
+	 * The only problem is that pages at the beginning and at the
+	 * end of interesting range may be not aligned with pages that
+	 * page allocator holds, ie. they can be part of higher order
+	 * pages.  Because of this, we reserve the bigger range and
+	 * once this is done free the pages we are not interested in.
+	 *
+	 * We don't have to hold zone->lock here because the pages are
+	 * isolated thus they won't get removed from buddy.
+	 */
+
+	lru_add_drain_all();
+	drain_all_pages();
+
+	order = 0;
+	outer_start = start;
+	while (!PageBuddy(pfn_to_page(outer_start))) {
+		if (++order >= MAX_ORDER) {
+			ret = -EBUSY;
+			goto done;
+		}
+		outer_start &= ~0UL << order;
+	}
+
+	/* Make sure the range is really isolated. */
+	if (test_pages_isolated(outer_start, end)) {
+		pr_warn("alloc_contig_range test_pages_isolated(%lx, %lx) failed\n",
+		       outer_start, end);
+		ret = -EBUSY;
+		goto done;
+	}
+
+	outer_end = isolate_freepages_range(outer_start, end);
+	if (!outer_end) {
+		ret = -EBUSY;
+		goto done;
+	}
+
+	/* Free head and tail (if any) */
+	if (start != outer_start)
+		free_contig_range(outer_start, start - outer_start);
+	if (end != outer_end)
+		free_contig_range(end, outer_end - end);
+
+done:
+	undo_isolate_page_range(pfn_max_align_down(start),
+				pfn_max_align_up(end));
+	return ret;
+}
+
+void free_contig_range(unsigned long pfn, unsigned nr_pages)
+{
+	for (; nr_pages--; ++pfn)
+		__free_page(pfn_to_page(pfn));
+}
+#endif
+
 #ifdef CONFIG_MEMORY_HOTREMOVE
 /*
  * All pages in the range must be isolated before calling this.
-- 
cgit v1.3-7-g2ca7


From 47118af076f64844b4f423bc2f545b2da9dab50d Mon Sep 17 00:00:00 2001
From: Michal Nazarewicz <mina86@mina86.com>
Date: Thu, 29 Dec 2011 13:09:50 +0100
Subject: mm: mmzone: MIGRATE_CMA migration type added

The MIGRATE_CMA migration type has two main characteristics:
(i) only movable pages can be allocated from MIGRATE_CMA
pageblocks and (ii) page allocator will never change migration
type of MIGRATE_CMA pageblocks.

This guarantees (to some degree) that page in a MIGRATE_CMA page
block can always be migrated somewhere else (unless there's no
memory left in the system).

It is designed to be used for allocating big chunks (eg. 10MiB)
of physically contiguous memory.  Once driver requests
contiguous memory, pages from MIGRATE_CMA pageblocks may be
migrated away to create a contiguous block.

To minimise number of migrations, MIGRATE_CMA migration type
is the last type tried when page allocator falls back to other
migration types when requested.

Signed-off-by: Michal Nazarewicz <mina86@mina86.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Tested-by: Rob Clark <rob.clark@linaro.org>
Tested-by: Ohad Ben-Cohen <ohad@wizery.com>
Tested-by: Benjamin Gaignard <benjamin.gaignard@linaro.org>
Tested-by: Robert Nelson <robertcnelson@gmail.com>
Tested-by: Barry Song <Baohua.Song@csr.com>
---
 include/linux/gfp.h    |  3 ++
 include/linux/mmzone.h | 38 ++++++++++++++++++++-----
 mm/Kconfig             |  2 +-
 mm/compaction.c        | 11 ++++++--
 mm/page_alloc.c        | 76 +++++++++++++++++++++++++++++++++++++++-----------
 mm/vmstat.c            |  3 ++
 6 files changed, 106 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 052a5b6cc4d0..78d32a7be257 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -397,6 +397,9 @@ static inline bool pm_suspended_storage(void)
 extern int alloc_contig_range(unsigned long start, unsigned long end);
 extern void free_contig_range(unsigned long pfn, unsigned nr_pages);
 
+/* CMA stuff */
+extern void init_cma_reserved_pageblock(struct page *page);
+
 #endif
 
 #endif /* __LINUX_GFP_H */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index dff711509661..8c1335f3c3a3 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -35,13 +35,37 @@
  */
 #define PAGE_ALLOC_COSTLY_ORDER 3
 
-#define MIGRATE_UNMOVABLE     0
-#define MIGRATE_RECLAIMABLE   1
-#define MIGRATE_MOVABLE       2
-#define MIGRATE_PCPTYPES      3 /* the number of types on the pcp lists */
-#define MIGRATE_RESERVE       3
-#define MIGRATE_ISOLATE       4 /* can't allocate from here */
-#define MIGRATE_TYPES         5
+enum {
+	MIGRATE_UNMOVABLE,
+	MIGRATE_RECLAIMABLE,
+	MIGRATE_MOVABLE,
+	MIGRATE_PCPTYPES,	/* the number of types on the pcp lists */
+	MIGRATE_RESERVE = MIGRATE_PCPTYPES,
+#ifdef CONFIG_CMA
+	/*
+	 * MIGRATE_CMA migration type is designed to mimic the way
+	 * ZONE_MOVABLE works.  Only movable pages can be allocated
+	 * from MIGRATE_CMA pageblocks and page allocator never
+	 * implicitly change migration type of MIGRATE_CMA pageblock.
+	 *
+	 * The way to use it is to change migratetype of a range of
+	 * pageblocks to MIGRATE_CMA which can be done by
+	 * __free_pageblock_cma() function.  What is important though
+	 * is that a range of pageblocks must be aligned to
+	 * MAX_ORDER_NR_PAGES should biggest page be bigger then
+	 * a single pageblock.
+	 */
+	MIGRATE_CMA,
+#endif
+	MIGRATE_ISOLATE,	/* can't allocate from here */
+	MIGRATE_TYPES
+};
+
+#ifdef CONFIG_CMA
+#  define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA)
+#else
+#  define is_migrate_cma(migratetype) false
+#endif
 
 #define for_each_migratetype_order(order, type) \
 	for (order = 0; order < MAX_ORDER; order++) \
diff --git a/mm/Kconfig b/mm/Kconfig
index e338407f1225..39220026c797 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -198,7 +198,7 @@ config COMPACTION
 config MIGRATION
 	bool "Page migration"
 	def_bool y
-	depends on NUMA || ARCH_ENABLE_MEMORY_HOTREMOVE || COMPACTION
+	depends on NUMA || ARCH_ENABLE_MEMORY_HOTREMOVE || COMPACTION || CMA
 	help
 	  Allows the migration of the physical location of pages of processes
 	  while the virtual addresses are not changed. This is useful in
diff --git a/mm/compaction.c b/mm/compaction.c
index 7a92e418a187..da7d35ea5103 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -45,6 +45,11 @@ static void map_pages(struct list_head *list)
 	}
 }
 
+static inline bool migrate_async_suitable(int migratetype)
+{
+	return is_migrate_cma(migratetype) || migratetype == MIGRATE_MOVABLE;
+}
+
 /*
  * Isolate free pages onto a private freelist. Caller must hold zone->lock.
  * If @strict is true, will abort returning 0 on any invalid PFNs or non-free
@@ -299,7 +304,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
 		 */
 		pageblock_nr = low_pfn >> pageblock_order;
 		if (!cc->sync && last_pageblock_nr != pageblock_nr &&
-				get_pageblock_migratetype(page) != MIGRATE_MOVABLE) {
+		    !migrate_async_suitable(get_pageblock_migratetype(page))) {
 			low_pfn += pageblock_nr_pages;
 			low_pfn = ALIGN(low_pfn, pageblock_nr_pages) - 1;
 			last_pageblock_nr = pageblock_nr;
@@ -367,8 +372,8 @@ static bool suitable_migration_target(struct page *page)
 	if (PageBuddy(page) && page_order(page) >= pageblock_order)
 		return true;
 
-	/* If the block is MIGRATE_MOVABLE, allow migration */
-	if (migratetype == MIGRATE_MOVABLE)
+	/* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */
+	if (migrate_async_suitable(migratetype))
 		return true;
 
 	/* Otherwise skip the block */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d6b580c660f5..0869eb1e9461 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -750,6 +750,24 @@ void __meminit __free_pages_bootmem(struct page *page, unsigned int order)
 	__free_pages(page, order);
 }
 
+#ifdef CONFIG_CMA
+/* Free whole pageblock and set it's migration type to MIGRATE_CMA. */
+void __init init_cma_reserved_pageblock(struct page *page)
+{
+	unsigned i = pageblock_nr_pages;
+	struct page *p = page;
+
+	do {
+		__ClearPageReserved(p);
+		set_page_count(p, 0);
+	} while (++p, --i);
+
+	set_page_refcounted(page);
+	set_pageblock_migratetype(page, MIGRATE_CMA);
+	__free_pages(page, pageblock_order);
+	totalram_pages += pageblock_nr_pages;
+}
+#endif
 
 /*
  * The order of subdivision here is critical for the IO subsystem.
@@ -875,10 +893,15 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
  * This array describes the order lists are fallen back to when
  * the free lists for the desirable migrate type are depleted
  */
-static int fallbacks[MIGRATE_TYPES][3] = {
-	[MIGRATE_UNMOVABLE]   = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE,   MIGRATE_RESERVE },
-	[MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE,   MIGRATE_MOVABLE,   MIGRATE_RESERVE },
-	[MIGRATE_MOVABLE]     = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE },
+static int fallbacks[MIGRATE_TYPES][4] = {
+	[MIGRATE_UNMOVABLE]   = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE,     MIGRATE_RESERVE },
+	[MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE,   MIGRATE_MOVABLE,     MIGRATE_RESERVE },
+#ifdef CONFIG_CMA
+	[MIGRATE_MOVABLE]     = { MIGRATE_CMA,         MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE },
+	[MIGRATE_CMA]         = { MIGRATE_RESERVE }, /* Never used */
+#else
+	[MIGRATE_MOVABLE]     = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE,   MIGRATE_RESERVE },
+#endif
 	[MIGRATE_RESERVE]     = { MIGRATE_RESERVE }, /* Never used */
 	[MIGRATE_ISOLATE]     = { MIGRATE_RESERVE }, /* Never used */
 };
@@ -995,11 +1018,18 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
 			 * pages to the preferred allocation list. If falling
 			 * back for a reclaimable kernel allocation, be more
 			 * aggressive about taking ownership of free pages
+			 *
+			 * On the other hand, never change migration
+			 * type of MIGRATE_CMA pageblocks nor move CMA
+			 * pages on different free lists. We don't
+			 * want unmovable pages to be allocated from
+			 * MIGRATE_CMA areas.
 			 */
-			if (unlikely(current_order >= (pageblock_order >> 1)) ||
-					start_migratetype == MIGRATE_RECLAIMABLE ||
-					page_group_by_mobility_disabled) {
-				unsigned long pages;
+			if (!is_migrate_cma(migratetype) &&
+			    (unlikely(current_order >= pageblock_order / 2) ||
+			     start_migratetype == MIGRATE_RECLAIMABLE ||
+			     page_group_by_mobility_disabled)) {
+				int pages;
 				pages = move_freepages_block(zone, page,
 								start_migratetype);
 
@@ -1017,11 +1047,14 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
 			rmv_page_order(page);
 
 			/* Take ownership for orders >= pageblock_order */
-			if (current_order >= pageblock_order)
+			if (current_order >= pageblock_order &&
+			    !is_migrate_cma(migratetype))
 				change_pageblock_range(page, current_order,
 							start_migratetype);
 
-			expand(zone, page, order, current_order, area, migratetype);
+			expand(zone, page, order, current_order, area,
+			       is_migrate_cma(migratetype)
+			     ? migratetype : start_migratetype);
 
 			trace_mm_page_alloc_extfrag(page, order, current_order,
 				start_migratetype, migratetype);
@@ -1072,7 +1105,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
 			unsigned long count, struct list_head *list,
 			int migratetype, int cold)
 {
-	int i;
+	int mt = migratetype, i;
 
 	spin_lock(&zone->lock);
 	for (i = 0; i < count; ++i) {
@@ -1093,7 +1126,12 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
 			list_add(&page->lru, list);
 		else
 			list_add_tail(&page->lru, list);
-		set_page_private(page, migratetype);
+		if (IS_ENABLED(CONFIG_CMA)) {
+			mt = get_pageblock_migratetype(page);
+			if (!is_migrate_cma(mt) && mt != MIGRATE_ISOLATE)
+				mt = migratetype;
+		}
+		set_page_private(page, mt);
 		list = &page->lru;
 	}
 	__mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order));
@@ -1373,8 +1411,12 @@ int split_free_page(struct page *page)
 
 	if (order >= pageblock_order - 1) {
 		struct page *endpage = page + (1 << order) - 1;
-		for (; page < endpage; page += pageblock_nr_pages)
-			set_pageblock_migratetype(page, MIGRATE_MOVABLE);
+		for (; page < endpage; page += pageblock_nr_pages) {
+			int mt = get_pageblock_migratetype(page);
+			if (mt != MIGRATE_ISOLATE && !is_migrate_cma(mt))
+				set_pageblock_migratetype(page,
+							  MIGRATE_MOVABLE);
+		}
 	}
 
 	return 1 << order;
@@ -5414,14 +5456,16 @@ static int
 __count_immobile_pages(struct zone *zone, struct page *page, int count)
 {
 	unsigned long pfn, iter, found;
+	int mt;
+
 	/*
 	 * For avoiding noise data, lru_add_drain_all() should be called
 	 * If ZONE_MOVABLE, the zone never contains immobile pages
 	 */
 	if (zone_idx(zone) == ZONE_MOVABLE)
 		return true;
-
-	if (get_pageblock_migratetype(page) == MIGRATE_MOVABLE)
+	mt = get_pageblock_migratetype(page);
+	if (mt == MIGRATE_MOVABLE || is_migrate_cma(mt))
 		return true;
 
 	pfn = page_to_pfn(page);
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 7db1b9bab492..0dad31dc1618 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -613,6 +613,9 @@ static char * const migratetype_names[MIGRATE_TYPES] = {
 	"Reclaimable",
 	"Movable",
 	"Reserve",
+#ifdef CONFIG_CMA
+	"CMA",
+#endif
 	"Isolate",
 };
 
-- 
cgit v1.3-7-g2ca7


From 0815f3d81d76dfbf2abcfd93a85ff0a6008fe4c0 Mon Sep 17 00:00:00 2001
From: Michal Nazarewicz <mina86@mina86.com>
Date: Tue, 3 Apr 2012 15:06:15 +0200
Subject: mm: page_isolation: MIGRATE_CMA isolation functions added

This commit changes various functions that change pages and
pageblocks migrate type between MIGRATE_ISOLATE and
MIGRATE_MOVABLE in such a way as to allow to work with
MIGRATE_CMA migrate type.

Signed-off-by: Michal Nazarewicz <mina86@mina86.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Tested-by: Rob Clark <rob.clark@linaro.org>
Tested-by: Ohad Ben-Cohen <ohad@wizery.com>
Tested-by: Benjamin Gaignard <benjamin.gaignard@linaro.org>
Tested-by: Robert Nelson <robertcnelson@gmail.com>
Tested-by: Barry Song <Baohua.Song@csr.com>
---
 include/linux/gfp.h            |  3 ++-
 include/linux/page-isolation.h | 18 +++++++++---------
 mm/memory-failure.c            |  2 +-
 mm/memory_hotplug.c            |  6 +++---
 mm/page_alloc.c                | 17 +++++++++++------
 mm/page_isolation.c            | 15 ++++++++-------
 6 files changed, 34 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 78d32a7be257..1e49be49d324 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -394,7 +394,8 @@ static inline bool pm_suspended_storage(void)
 #ifdef CONFIG_CMA
 
 /* The below functions must be run on a range from a single zone. */
-extern int alloc_contig_range(unsigned long start, unsigned long end);
+extern int alloc_contig_range(unsigned long start, unsigned long end,
+			      unsigned migratetype);
 extern void free_contig_range(unsigned long pfn, unsigned nr_pages);
 
 /* CMA stuff */
diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
index 051c1b1ede4e..3bdcab30ca41 100644
--- a/include/linux/page-isolation.h
+++ b/include/linux/page-isolation.h
@@ -3,7 +3,7 @@
 
 /*
  * Changes migrate type in [start_pfn, end_pfn) to be MIGRATE_ISOLATE.
- * If specified range includes migrate types other than MOVABLE,
+ * If specified range includes migrate types other than MOVABLE or CMA,
  * this will fail with -EBUSY.
  *
  * For isolating all pages in the range finally, the caller have to
@@ -11,27 +11,27 @@
  * test it.
  */
 extern int
-start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn);
+start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
+			 unsigned migratetype);
 
 /*
  * Changes MIGRATE_ISOLATE to MIGRATE_MOVABLE.
  * target range is [start_pfn, end_pfn)
  */
 extern int
-undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn);
+undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
+			unsigned migratetype);
 
 /*
- * test all pages in [start_pfn, end_pfn)are isolated or not.
+ * Test all pages in [start_pfn, end_pfn) are isolated or not.
  */
-extern int
-test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn);
+int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn);
 
 /*
- * Internal funcs.Changes pageblock's migrate type.
- * Please use make_pagetype_isolated()/make_pagetype_movable().
+ * Internal functions. Changes pageblock's migrate type.
  */
 extern int set_migratetype_isolate(struct page *page);
-extern void unset_migratetype_isolate(struct page *page);
+extern void unset_migratetype_isolate(struct page *page, unsigned migratetype);
 
 
 #endif
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 97cc2733551a..c99ad4e6b88c 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1404,7 +1404,7 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags)
 		/* Not a free page */
 		ret = 1;
 	}
-	unset_migratetype_isolate(p);
+	unset_migratetype_isolate(p, MIGRATE_MOVABLE);
 	unlock_memory_hotplug();
 	return ret;
 }
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 6629fafd6ce4..fc898cb4fe8f 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -891,7 +891,7 @@ static int __ref offline_pages(unsigned long start_pfn,
 	nr_pages = end_pfn - start_pfn;
 
 	/* set above range as isolated */
-	ret = start_isolate_page_range(start_pfn, end_pfn);
+	ret = start_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
 	if (ret)
 		goto out;
 
@@ -956,7 +956,7 @@ repeat:
 	   We cannot do rollback at this point. */
 	offline_isolated_pages(start_pfn, end_pfn);
 	/* reset pagetype flags and makes migrate type to be MOVABLE */
-	undo_isolate_page_range(start_pfn, end_pfn);
+	undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
 	/* removal success */
 	zone->present_pages -= offlined_pages;
 	zone->zone_pgdat->node_present_pages -= offlined_pages;
@@ -981,7 +981,7 @@ failed_removal:
 		start_pfn, end_pfn);
 	memory_notify(MEM_CANCEL_OFFLINE, &arg);
 	/* pushback to free area */
-	undo_isolate_page_range(start_pfn, end_pfn);
+	undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
 
 out:
 	unlock_memory_hotplug();
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0869eb1e9461..116c087f76bb 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5582,7 +5582,7 @@ out:
 	return ret;
 }
 
-void unset_migratetype_isolate(struct page *page)
+void unset_migratetype_isolate(struct page *page, unsigned migratetype)
 {
 	struct zone *zone;
 	unsigned long flags;
@@ -5590,8 +5590,8 @@ void unset_migratetype_isolate(struct page *page)
 	spin_lock_irqsave(&zone->lock, flags);
 	if (get_pageblock_migratetype(page) != MIGRATE_ISOLATE)
 		goto out;
-	set_pageblock_migratetype(page, MIGRATE_MOVABLE);
-	move_freepages_block(zone, page, MIGRATE_MOVABLE);
+	set_pageblock_migratetype(page, migratetype);
+	move_freepages_block(zone, page, migratetype);
 out:
 	spin_unlock_irqrestore(&zone->lock, flags);
 }
@@ -5669,6 +5669,10 @@ static int __alloc_contig_migrate_range(unsigned long start, unsigned long end)
  * alloc_contig_range() -- tries to allocate given range of pages
  * @start:	start PFN to allocate
  * @end:	one-past-the-last PFN to allocate
+ * @migratetype:	migratetype of the underlaying pageblocks (either
+ *			#MIGRATE_MOVABLE or #MIGRATE_CMA).  All pageblocks
+ *			in range must have the same migratetype and it must
+ *			be either of the two.
  *
  * The PFN range does not have to be pageblock or MAX_ORDER_NR_PAGES
  * aligned, however it's the caller's responsibility to guarantee that
@@ -5681,7 +5685,8 @@ static int __alloc_contig_migrate_range(unsigned long start, unsigned long end)
  * pages which PFN is in [start, end) are allocated for the caller and
  * need to be freed with free_contig_range().
  */
-int alloc_contig_range(unsigned long start, unsigned long end)
+int alloc_contig_range(unsigned long start, unsigned long end,
+		       unsigned migratetype)
 {
 	struct zone *zone = page_zone(pfn_to_page(start));
 	unsigned long outer_start, outer_end;
@@ -5712,7 +5717,7 @@ int alloc_contig_range(unsigned long start, unsigned long end)
 	 */
 
 	ret = start_isolate_page_range(pfn_max_align_down(start),
-				       pfn_max_align_up(end));
+				       pfn_max_align_up(end), migratetype);
 	if (ret)
 		goto done;
 
@@ -5772,7 +5777,7 @@ int alloc_contig_range(unsigned long start, unsigned long end)
 
 done:
 	undo_isolate_page_range(pfn_max_align_down(start),
-				pfn_max_align_up(end));
+				pfn_max_align_up(end), migratetype);
 	return ret;
 }
 
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index 4ae42bb40892..c9f04774f2b8 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -24,6 +24,7 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
  * to be MIGRATE_ISOLATE.
  * @start_pfn: The lower PFN of the range to be isolated.
  * @end_pfn: The upper PFN of the range to be isolated.
+ * @migratetype: migrate type to set in error recovery.
  *
  * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in
  * the range will never be allocated. Any free pages and pages freed in the
@@ -32,8 +33,8 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
  * start_pfn/end_pfn must be aligned to pageblock_order.
  * Returns 0 on success and -EBUSY if any part of range cannot be isolated.
  */
-int
-start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn)
+int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
+			     unsigned migratetype)
 {
 	unsigned long pfn;
 	unsigned long undo_pfn;
@@ -56,7 +57,7 @@ undo:
 	for (pfn = start_pfn;
 	     pfn < undo_pfn;
 	     pfn += pageblock_nr_pages)
-		unset_migratetype_isolate(pfn_to_page(pfn));
+		unset_migratetype_isolate(pfn_to_page(pfn), migratetype);
 
 	return -EBUSY;
 }
@@ -64,8 +65,8 @@ undo:
 /*
  * Make isolated pages available again.
  */
-int
-undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn)
+int undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
+			    unsigned migratetype)
 {
 	unsigned long pfn;
 	struct page *page;
@@ -77,7 +78,7 @@ undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn)
 		page = __first_valid_page(pfn, pageblock_nr_pages);
 		if (!page || get_pageblock_migratetype(page) != MIGRATE_ISOLATE)
 			continue;
-		unset_migratetype_isolate(page);
+		unset_migratetype_isolate(page, migratetype);
 	}
 	return 0;
 }
@@ -86,7 +87,7 @@ undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn)
  * all pages in [start_pfn...end_pfn) must be in the same zone.
  * zone->lock must be held before call this.
  *
- * Returns 1 if all pages in the range is isolated.
+ * Returns 1 if all pages in the range are isolated.
  */
 static int
 __test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn)
-- 
cgit v1.3-7-g2ca7


From 49f223a9cd96c7293d7258ff88c2bdf83065f69c Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Wed, 25 Jan 2012 12:49:24 +0100
Subject: mm: trigger page reclaim in alloc_contig_range() to stabilise
 watermarks

alloc_contig_range() performs memory allocation so it also should keep
track on keeping the correct level of memory watermarks. This commit adds
a call to *_slowpath style reclaim to grab enough pages to make sure that
the final collection of contiguous pages from freelists will not starve
the system.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
CC: Michal Nazarewicz <mina86@mina86.com>
Tested-by: Rob Clark <rob.clark@linaro.org>
Tested-by: Ohad Ben-Cohen <ohad@wizery.com>
Tested-by: Benjamin Gaignard <benjamin.gaignard@linaro.org>
Tested-by: Robert Nelson <robertcnelson@gmail.com>
Tested-by: Barry Song <Baohua.Song@csr.com>
---
 include/linux/mmzone.h |  9 ++++++++
 mm/page_alloc.c        | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 69 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 8c1335f3c3a3..26f2040b8b04 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -63,8 +63,10 @@ enum {
 
 #ifdef CONFIG_CMA
 #  define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA)
+#  define cma_wmark_pages(zone)	zone->min_cma_pages
 #else
 #  define is_migrate_cma(migratetype) false
+#  define cma_wmark_pages(zone) 0
 #endif
 
 #define for_each_migratetype_order(order, type) \
@@ -370,6 +372,13 @@ struct zone {
 #ifdef CONFIG_MEMORY_HOTPLUG
 	/* see spanned/present_pages for more description */
 	seqlock_t		span_seqlock;
+#endif
+#ifdef CONFIG_CMA
+	/*
+	 * CMA needs to increase watermark levels during the allocation
+	 * process to make sure that the system is not starved.
+	 */
+	unsigned long		min_cma_pages;
 #endif
 	struct free_area	free_area[MAX_ORDER];
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4615531dcf66..22348ae1005d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5079,6 +5079,11 @@ static void __setup_per_zone_wmarks(void)
 
 		zone->watermark[WMARK_LOW]  = min_wmark_pages(zone) + (tmp >> 2);
 		zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + (tmp >> 1);
+
+		zone->watermark[WMARK_MIN] += cma_wmark_pages(zone);
+		zone->watermark[WMARK_LOW] += cma_wmark_pages(zone);
+		zone->watermark[WMARK_HIGH] += cma_wmark_pages(zone);
+
 		setup_zone_migrate_reserve(zone);
 		spin_unlock_irqrestore(&zone->lock, flags);
 	}
@@ -5684,6 +5689,54 @@ static int __alloc_contig_migrate_range(unsigned long start, unsigned long end)
 	return ret > 0 ? 0 : ret;
 }
 
+/*
+ * Update zone's cma pages counter used for watermark level calculation.
+ */
+static inline void __update_cma_watermarks(struct zone *zone, int count)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&zone->lock, flags);
+	zone->min_cma_pages += count;
+	spin_unlock_irqrestore(&zone->lock, flags);
+	setup_per_zone_wmarks();
+}
+
+/*
+ * Trigger memory pressure bump to reclaim some pages in order to be able to
+ * allocate 'count' pages in single page units. Does similar work as
+ *__alloc_pages_slowpath() function.
+ */
+static int __reclaim_pages(struct zone *zone, gfp_t gfp_mask, int count)
+{
+	enum zone_type high_zoneidx = gfp_zone(gfp_mask);
+	struct zonelist *zonelist = node_zonelist(0, gfp_mask);
+	int did_some_progress = 0;
+	int order = 1;
+
+	/*
+	 * Increase level of watermarks to force kswapd do his job
+	 * to stabilise at new watermark level.
+	 */
+	__update_cma_watermarks(zone, count);
+
+	/* Obey watermarks as if the page was being allocated */
+	while (!zone_watermark_ok(zone, 0, low_wmark_pages(zone), 0, 0)) {
+		wake_all_kswapd(order, zonelist, high_zoneidx, zone_idx(zone));
+
+		did_some_progress = __perform_reclaim(gfp_mask, order, zonelist,
+						      NULL);
+		if (!did_some_progress) {
+			/* Exhausted what can be done so it's blamo time */
+			out_of_memory(zonelist, gfp_mask, order, NULL, false);
+		}
+	}
+
+	/* Restore original watermark levels. */
+	__update_cma_watermarks(zone, -count);
+
+	return count;
+}
+
 /**
  * alloc_contig_range() -- tries to allocate given range of pages
  * @start:	start PFN to allocate
@@ -5782,6 +5835,13 @@ int alloc_contig_range(unsigned long start, unsigned long end,
 		goto done;
 	}
 
+	/*
+	 * Reclaim enough pages to make sure that contiguous allocation
+	 * will not starve the system.
+	 */
+	__reclaim_pages(zone, GFP_HIGHUSER_MOVABLE, end-start);
+
+	/* Grab isolated pages from freelists. */
 	outer_end = isolate_freepages_range(outer_start, end);
 	if (!outer_end) {
 		ret = -EBUSY;
-- 
cgit v1.3-7-g2ca7


From c64be2bb1c6eb43c838b2c6d57b074078be208dd Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Thu, 29 Dec 2011 13:09:51 +0100
Subject: drivers: add Contiguous Memory Allocator

The Contiguous Memory Allocator is a set of helper functions for DMA
mapping framework that improves allocations of contiguous memory chunks.

CMA grabs memory on system boot, marks it with MIGRATE_CMA migrate type
and gives back to the system. Kernel is allowed to allocate only movable
pages within CMA's managed memory so that it can be used for example for
page cache when DMA mapping do not use it. On
dma_alloc_from_contiguous() request such pages are migrated out of CMA
area to free required contiguous block and fulfill the request. This
allows to allocate large contiguous chunks of memory at any time
assuming that there is enough free memory available in the system.

This code is heavily based on earlier works by Michal Nazarewicz.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Michal Nazarewicz <mina86@mina86.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Tested-by: Rob Clark <rob.clark@linaro.org>
Tested-by: Ohad Ben-Cohen <ohad@wizery.com>
Tested-by: Benjamin Gaignard <benjamin.gaignard@linaro.org>
Tested-by: Robert Nelson <robertcnelson@gmail.com>
Tested-by: Barry Song <Baohua.Song@csr.com>
---
 Documentation/kernel-parameters.txt  |   5 +
 arch/Kconfig                         |   3 +
 drivers/base/Kconfig                 |  89 ++++++++
 drivers/base/Makefile                |   1 +
 drivers/base/dma-contiguous.c        | 401 +++++++++++++++++++++++++++++++++++
 include/asm-generic/dma-contiguous.h |  28 +++
 include/linux/device.h               |   4 +
 include/linux/dma-contiguous.h       | 110 ++++++++++
 8 files changed, 641 insertions(+)
 create mode 100644 drivers/base/dma-contiguous.c
 create mode 100644 include/asm-generic/dma-contiguous.h
 create mode 100644 include/linux/dma-contiguous.h

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index c1601e5a8b71..669e8bb52b94 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -508,6 +508,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			Also note the kernel might malfunction if you disable
 			some critical bits.
 
+	cma=nn[MG]	[ARM,KNL]
+			Sets the size of kernel global memory area for contiguous
+			memory allocations. For more information, see
+			include/linux/dma-contiguous.h
+
 	cmo_free_hint=	[PPC] Format: { yes | no }
 			Specify whether pages are marked as being inactive
 			when they are freed.  This is used in CMO environments
diff --git a/arch/Kconfig b/arch/Kconfig
index 684eb5af439d..0a3ffe46e567 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -142,6 +142,9 @@ config HAVE_ARCH_TRACEHOOK
 config HAVE_DMA_ATTRS
 	bool
 
+config HAVE_DMA_CONTIGUOUS
+	bool
+
 config USE_GENERIC_SMP_HELPERS
 	bool
 
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 9aa618acfe97..9b21469482ae 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -192,4 +192,93 @@ config DMA_SHARED_BUFFER
 	  APIs extension; the file's descriptor can then be passed on to other
 	  driver.
 
+config CMA
+	bool "Contiguous Memory Allocator (EXPERIMENTAL)"
+	depends on HAVE_DMA_CONTIGUOUS && HAVE_MEMBLOCK && EXPERIMENTAL
+	select MIGRATION
+	help
+	  This enables the Contiguous Memory Allocator which allows drivers
+	  to allocate big physically-contiguous blocks of memory for use with
+	  hardware components that do not support I/O map nor scatter-gather.
+
+	  For more information see <include/linux/dma-contiguous.h>.
+	  If unsure, say "n".
+
+if CMA
+
+config CMA_DEBUG
+	bool "CMA debug messages (DEVELOPMENT)"
+	depends on DEBUG_KERNEL
+	help
+	  Turns on debug messages in CMA.  This produces KERN_DEBUG
+	  messages for every CMA call as well as various messages while
+	  processing calls such as dma_alloc_from_contiguous().
+	  This option does not affect warning and error messages.
+
+comment "Default contiguous memory area size:"
+
+config CMA_SIZE_MBYTES
+	int "Size in Mega Bytes"
+	depends on !CMA_SIZE_SEL_PERCENTAGE
+	default 16
+	help
+	  Defines the size (in MiB) of the default memory area for Contiguous
+	  Memory Allocator.
+
+config CMA_SIZE_PERCENTAGE
+	int "Percentage of total memory"
+	depends on !CMA_SIZE_SEL_MBYTES
+	default 10
+	help
+	  Defines the size of the default memory area for Contiguous Memory
+	  Allocator as a percentage of the total memory in the system.
+
+choice
+	prompt "Selected region size"
+	default CMA_SIZE_SEL_ABSOLUTE
+
+config CMA_SIZE_SEL_MBYTES
+	bool "Use mega bytes value only"
+
+config CMA_SIZE_SEL_PERCENTAGE
+	bool "Use percentage value only"
+
+config CMA_SIZE_SEL_MIN
+	bool "Use lower value (minimum)"
+
+config CMA_SIZE_SEL_MAX
+	bool "Use higher value (maximum)"
+
+endchoice
+
+config CMA_ALIGNMENT
+	int "Maximum PAGE_SIZE order of alignment for contiguous buffers"
+	range 4 9
+	default 8
+	help
+	  DMA mapping framework by default aligns all buffers to the smallest
+	  PAGE_SIZE order which is greater than or equal to the requested buffer
+	  size. This works well for buffers up to a few hundreds kilobytes, but
+	  for larger buffers it just a memory waste. With this parameter you can
+	  specify the maximum PAGE_SIZE order for contiguous buffers. Larger
+	  buffers will be aligned only to this specified order. The order is
+	  expressed as a power of two multiplied by the PAGE_SIZE.
+
+	  For example, if your system defaults to 4KiB pages, the order value
+	  of 8 means that the buffers will be aligned up to 1MiB only.
+
+	  If unsure, leave the default value "8".
+
+config CMA_AREAS
+	int "Maximum count of the CMA device-private areas"
+	default 7
+	help
+	  CMA allows to create CMA areas for particular devices. This parameter
+	  sets the maximum number of such device private CMA areas in the
+	  system.
+
+	  If unsure, leave the default value "7".
+
+endif
+
 endmenu
diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index b6d1b9c4200c..5aa2d703d19f 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -6,6 +6,7 @@ obj-y			:= core.o bus.o dd.o syscore.o \
 			   attribute_container.o transport_class.o \
 			   topology.o
 obj-$(CONFIG_DEVTMPFS)	+= devtmpfs.o
+obj-$(CONFIG_CMA) += dma-contiguous.o
 obj-y			+= power/
 obj-$(CONFIG_HAS_DMA)	+= dma-mapping.o
 obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
diff --git a/drivers/base/dma-contiguous.c b/drivers/base/dma-contiguous.c
new file mode 100644
index 000000000000..78efb0306a44
--- /dev/null
+++ b/drivers/base/dma-contiguous.c
@@ -0,0 +1,401 @@
+/*
+ * Contiguous Memory Allocator for DMA mapping framework
+ * Copyright (c) 2010-2011 by Samsung Electronics.
+ * Written by:
+ *	Marek Szyprowski <m.szyprowski@samsung.com>
+ *	Michal Nazarewicz <mina86@mina86.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License or (at your optional) any later version of the license.
+ */
+
+#define pr_fmt(fmt) "cma: " fmt
+
+#ifdef CONFIG_CMA_DEBUG
+#ifndef DEBUG
+#  define DEBUG
+#endif
+#endif
+
+#include <asm/page.h>
+#include <asm/dma-contiguous.h>
+
+#include <linux/memblock.h>
+#include <linux/err.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/page-isolation.h>
+#include <linux/slab.h>
+#include <linux/swap.h>
+#include <linux/mm_types.h>
+#include <linux/dma-contiguous.h>
+
+#ifndef SZ_1M
+#define SZ_1M (1 << 20)
+#endif
+
+struct cma {
+	unsigned long	base_pfn;
+	unsigned long	count;
+	unsigned long	*bitmap;
+};
+
+struct cma *dma_contiguous_default_area;
+
+#ifdef CONFIG_CMA_SIZE_MBYTES
+#define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES
+#else
+#define CMA_SIZE_MBYTES 0
+#endif
+
+/*
+ * Default global CMA area size can be defined in kernel's .config.
+ * This is usefull mainly for distro maintainers to create a kernel
+ * that works correctly for most supported systems.
+ * The size can be set in bytes or as a percentage of the total memory
+ * in the system.
+ *
+ * Users, who want to set the size of global CMA area for their system
+ * should use cma= kernel parameter.
+ */
+static const unsigned long size_bytes = CMA_SIZE_MBYTES * SZ_1M;
+static long size_cmdline = -1;
+
+static int __init early_cma(char *p)
+{
+	pr_debug("%s(%s)\n", __func__, p);
+	size_cmdline = memparse(p, &p);
+	return 0;
+}
+early_param("cma", early_cma);
+
+#ifdef CONFIG_CMA_SIZE_PERCENTAGE
+
+static unsigned long __init __maybe_unused cma_early_percent_memory(void)
+{
+	struct memblock_region *reg;
+	unsigned long total_pages = 0;
+
+	/*
+	 * We cannot use memblock_phys_mem_size() here, because
+	 * memblock_analyze() has not been called yet.
+	 */
+	for_each_memblock(memory, reg)
+		total_pages += memblock_region_memory_end_pfn(reg) -
+			       memblock_region_memory_base_pfn(reg);
+
+	return (total_pages * CONFIG_CMA_SIZE_PERCENTAGE / 100) << PAGE_SHIFT;
+}
+
+#else
+
+static inline __maybe_unused unsigned long cma_early_percent_memory(void)
+{
+	return 0;
+}
+
+#endif
+
+/**
+ * dma_contiguous_reserve() - reserve area for contiguous memory handling
+ * @limit: End address of the reserved memory (optional, 0 for any).
+ *
+ * This function reserves memory from early allocator. It should be
+ * called by arch specific code once the early allocator (memblock or bootmem)
+ * has been activated and all other subsystems have already allocated/reserved
+ * memory.
+ */
+void __init dma_contiguous_reserve(phys_addr_t limit)
+{
+	unsigned long selected_size = 0;
+
+	pr_debug("%s(limit %08lx)\n", __func__, (unsigned long)limit);
+
+	if (size_cmdline != -1) {
+		selected_size = size_cmdline;
+	} else {
+#ifdef CONFIG_CMA_SIZE_SEL_MBYTES
+		selected_size = size_bytes;
+#elif defined(CONFIG_CMA_SIZE_SEL_PERCENTAGE)
+		selected_size = cma_early_percent_memory();
+#elif defined(CONFIG_CMA_SIZE_SEL_MIN)
+		selected_size = min(size_bytes, cma_early_percent_memory());
+#elif defined(CONFIG_CMA_SIZE_SEL_MAX)
+		selected_size = max(size_bytes, cma_early_percent_memory());
+#endif
+	}
+
+	if (selected_size) {
+		pr_debug("%s: reserving %ld MiB for global area\n", __func__,
+			 selected_size / SZ_1M);
+
+		dma_declare_contiguous(NULL, selected_size, 0, limit);
+	}
+};
+
+static DEFINE_MUTEX(cma_mutex);
+
+static __init int cma_activate_area(unsigned long base_pfn, unsigned long count)
+{
+	unsigned long pfn = base_pfn;
+	unsigned i = count >> pageblock_order;
+	struct zone *zone;
+
+	WARN_ON_ONCE(!pfn_valid(pfn));
+	zone = page_zone(pfn_to_page(pfn));
+
+	do {
+		unsigned j;
+		base_pfn = pfn;
+		for (j = pageblock_nr_pages; j; --j, pfn++) {
+			WARN_ON_ONCE(!pfn_valid(pfn));
+			if (page_zone(pfn_to_page(pfn)) != zone)
+				return -EINVAL;
+		}
+		init_cma_reserved_pageblock(pfn_to_page(base_pfn));
+	} while (--i);
+	return 0;
+}
+
+static __init struct cma *cma_create_area(unsigned long base_pfn,
+				     unsigned long count)
+{
+	int bitmap_size = BITS_TO_LONGS(count) * sizeof(long);
+	struct cma *cma;
+	int ret = -ENOMEM;
+
+	pr_debug("%s(base %08lx, count %lx)\n", __func__, base_pfn, count);
+
+	cma = kmalloc(sizeof *cma, GFP_KERNEL);
+	if (!cma)
+		return ERR_PTR(-ENOMEM);
+
+	cma->base_pfn = base_pfn;
+	cma->count = count;
+	cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
+
+	if (!cma->bitmap)
+		goto no_mem;
+
+	ret = cma_activate_area(base_pfn, count);
+	if (ret)
+		goto error;
+
+	pr_debug("%s: returned %p\n", __func__, (void *)cma);
+	return cma;
+
+error:
+	kfree(cma->bitmap);
+no_mem:
+	kfree(cma);
+	return ERR_PTR(ret);
+}
+
+static struct cma_reserved {
+	phys_addr_t start;
+	unsigned long size;
+	struct device *dev;
+} cma_reserved[MAX_CMA_AREAS] __initdata;
+static unsigned cma_reserved_count __initdata;
+
+static int __init cma_init_reserved_areas(void)
+{
+	struct cma_reserved *r = cma_reserved;
+	unsigned i = cma_reserved_count;
+
+	pr_debug("%s()\n", __func__);
+
+	for (; i; --i, ++r) {
+		struct cma *cma;
+		cma = cma_create_area(PFN_DOWN(r->start),
+				      r->size >> PAGE_SHIFT);
+		if (!IS_ERR(cma))
+			dev_set_cma_area(r->dev, cma);
+	}
+	return 0;
+}
+core_initcall(cma_init_reserved_areas);
+
+/**
+ * dma_declare_contiguous() - reserve area for contiguous memory handling
+ *			      for particular device
+ * @dev:   Pointer to device structure.
+ * @size:  Size of the reserved memory.
+ * @base:  Start address of the reserved memory (optional, 0 for any).
+ * @limit: End address of the reserved memory (optional, 0 for any).
+ *
+ * This function reserves memory for specified device. It should be
+ * called by board specific code when early allocator (memblock or bootmem)
+ * is still activate.
+ */
+int __init dma_declare_contiguous(struct device *dev, unsigned long size,
+				  phys_addr_t base, phys_addr_t limit)
+{
+	struct cma_reserved *r = &cma_reserved[cma_reserved_count];
+	unsigned long alignment;
+
+	pr_debug("%s(size %lx, base %08lx, limit %08lx)\n", __func__,
+		 (unsigned long)size, (unsigned long)base,
+		 (unsigned long)limit);
+
+	/* Sanity checks */
+	if (cma_reserved_count == ARRAY_SIZE(cma_reserved)) {
+		pr_err("Not enough slots for CMA reserved regions!\n");
+		return -ENOSPC;
+	}
+
+	if (!size)
+		return -EINVAL;
+
+	/* Sanitise input arguments */
+	alignment = PAGE_SIZE << max(MAX_ORDER, pageblock_order);
+	base = ALIGN(base, alignment);
+	size = ALIGN(size, alignment);
+	limit &= ~(alignment - 1);
+
+	/* Reserve memory */
+	if (base) {
+		if (memblock_is_region_reserved(base, size) ||
+		    memblock_reserve(base, size) < 0) {
+			base = -EBUSY;
+			goto err;
+		}
+	} else {
+		/*
+		 * Use __memblock_alloc_base() since
+		 * memblock_alloc_base() panic()s.
+		 */
+		phys_addr_t addr = __memblock_alloc_base(size, alignment, limit);
+		if (!addr) {
+			base = -ENOMEM;
+			goto err;
+		} else if (addr + size > ~(unsigned long)0) {
+			memblock_free(addr, size);
+			base = -EINVAL;
+			goto err;
+		} else {
+			base = addr;
+		}
+	}
+
+	/*
+	 * Each reserved area must be initialised later, when more kernel
+	 * subsystems (like slab allocator) are available.
+	 */
+	r->start = base;
+	r->size = size;
+	r->dev = dev;
+	cma_reserved_count++;
+	pr_info("CMA: reserved %ld MiB at %08lx\n", size / SZ_1M,
+		(unsigned long)base);
+
+	/* Architecture specific contiguous memory fixup. */
+	dma_contiguous_early_fixup(base, size);
+	return 0;
+err:
+	pr_err("CMA: failed to reserve %ld MiB\n", size / SZ_1M);
+	return base;
+}
+
+/**
+ * dma_alloc_from_contiguous() - allocate pages from contiguous area
+ * @dev:   Pointer to device for which the allocation is performed.
+ * @count: Requested number of pages.
+ * @align: Requested alignment of pages (in PAGE_SIZE order).
+ *
+ * This function allocates memory buffer for specified device. It uses
+ * device specific contiguous memory area if available or the default
+ * global one. Requires architecture specific get_dev_cma_area() helper
+ * function.
+ */
+struct page *dma_alloc_from_contiguous(struct device *dev, int count,
+				       unsigned int align)
+{
+	unsigned long mask, pfn, pageno, start = 0;
+	struct cma *cma = dev_get_cma_area(dev);
+	int ret;
+
+	if (!cma || !cma->count)
+		return NULL;
+
+	if (align > CONFIG_CMA_ALIGNMENT)
+		align = CONFIG_CMA_ALIGNMENT;
+
+	pr_debug("%s(cma %p, count %d, align %d)\n", __func__, (void *)cma,
+		 count, align);
+
+	if (!count)
+		return NULL;
+
+	mask = (1 << align) - 1;
+
+	mutex_lock(&cma_mutex);
+
+	for (;;) {
+		pageno = bitmap_find_next_zero_area(cma->bitmap, cma->count,
+						    start, count, mask);
+		if (pageno >= cma->count) {
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		pfn = cma->base_pfn + pageno;
+		ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA);
+		if (ret == 0) {
+			bitmap_set(cma->bitmap, pageno, count);
+			break;
+		} else if (ret != -EBUSY) {
+			goto error;
+		}
+		pr_debug("%s(): memory range at %p is busy, retrying\n",
+			 __func__, pfn_to_page(pfn));
+		/* try again with a bit different memory target */
+		start = pageno + mask + 1;
+	}
+
+	mutex_unlock(&cma_mutex);
+
+	pr_debug("%s(): returned %p\n", __func__, pfn_to_page(pfn));
+	return pfn_to_page(pfn);
+error:
+	mutex_unlock(&cma_mutex);
+	return NULL;
+}
+
+/**
+ * dma_release_from_contiguous() - release allocated pages
+ * @dev:   Pointer to device for which the pages were allocated.
+ * @pages: Allocated pages.
+ * @count: Number of allocated pages.
+ *
+ * This function releases memory allocated by dma_alloc_from_contiguous().
+ * It returns false when provided pages do not belong to contiguous area and
+ * true otherwise.
+ */
+bool dma_release_from_contiguous(struct device *dev, struct page *pages,
+				 int count)
+{
+	struct cma *cma = dev_get_cma_area(dev);
+	unsigned long pfn;
+
+	if (!cma || !pages)
+		return false;
+
+	pr_debug("%s(page %p)\n", __func__, (void *)pages);
+
+	pfn = page_to_pfn(pages);
+
+	if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count)
+		return false;
+
+	VM_BUG_ON(pfn + count > cma->base_pfn + cma->count);
+
+	mutex_lock(&cma_mutex);
+	bitmap_clear(cma->bitmap, pfn - cma->base_pfn, count);
+	free_contig_range(pfn, count);
+	mutex_unlock(&cma_mutex);
+
+	return true;
+}
diff --git a/include/asm-generic/dma-contiguous.h b/include/asm-generic/dma-contiguous.h
new file mode 100644
index 000000000000..c544356b374b
--- /dev/null
+++ b/include/asm-generic/dma-contiguous.h
@@ -0,0 +1,28 @@
+#ifndef ASM_DMA_CONTIGUOUS_H
+#define ASM_DMA_CONTIGUOUS_H
+
+#ifdef __KERNEL__
+#ifdef CONFIG_CMA
+
+#include <linux/device.h>
+#include <linux/dma-contiguous.h>
+
+static inline struct cma *dev_get_cma_area(struct device *dev)
+{
+	if (dev && dev->cma_area)
+		return dev->cma_area;
+	return dma_contiguous_default_area;
+}
+
+static inline void dev_set_cma_area(struct device *dev, struct cma *cma)
+{
+	if (dev)
+		dev->cma_area = cma;
+	if (!dev || !dma_contiguous_default_area)
+		dma_contiguous_default_area = cma;
+}
+
+#endif
+#endif
+
+#endif
diff --git a/include/linux/device.h b/include/linux/device.h
index 5ad17cccdd71..e3399290436e 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -661,6 +661,10 @@ struct device {
 
 	struct dma_coherent_mem	*dma_mem; /* internal for coherent mem
 					     override */
+#ifdef CONFIG_CMA
+	struct cma *cma_area;		/* contiguous memory area for dma
+					   allocations */
+#endif
 	/* arch specific additions */
 	struct dev_archdata	archdata;
 
diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h
new file mode 100644
index 000000000000..2f303e4b7ed3
--- /dev/null
+++ b/include/linux/dma-contiguous.h
@@ -0,0 +1,110 @@
+#ifndef __LINUX_CMA_H
+#define __LINUX_CMA_H
+
+/*
+ * Contiguous Memory Allocator for DMA mapping framework
+ * Copyright (c) 2010-2011 by Samsung Electronics.
+ * Written by:
+ *	Marek Szyprowski <m.szyprowski@samsung.com>
+ *	Michal Nazarewicz <mina86@mina86.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License or (at your optional) any later version of the license.
+ */
+
+/*
+ * Contiguous Memory Allocator
+ *
+ *   The Contiguous Memory Allocator (CMA) makes it possible to
+ *   allocate big contiguous chunks of memory after the system has
+ *   booted.
+ *
+ * Why is it needed?
+ *
+ *   Various devices on embedded systems have no scatter-getter and/or
+ *   IO map support and require contiguous blocks of memory to
+ *   operate.  They include devices such as cameras, hardware video
+ *   coders, etc.
+ *
+ *   Such devices often require big memory buffers (a full HD frame
+ *   is, for instance, more then 2 mega pixels large, i.e. more than 6
+ *   MB of memory), which makes mechanisms such as kmalloc() or
+ *   alloc_page() ineffective.
+ *
+ *   At the same time, a solution where a big memory region is
+ *   reserved for a device is suboptimal since often more memory is
+ *   reserved then strictly required and, moreover, the memory is
+ *   inaccessible to page system even if device drivers don't use it.
+ *
+ *   CMA tries to solve this issue by operating on memory regions
+ *   where only movable pages can be allocated from.  This way, kernel
+ *   can use the memory for pagecache and when device driver requests
+ *   it, allocated pages can be migrated.
+ *
+ * Driver usage
+ *
+ *   CMA should not be used by the device drivers directly. It is
+ *   only a helper framework for dma-mapping subsystem.
+ *
+ *   For more information, see kernel-docs in drivers/base/dma-contiguous.c
+ */
+
+#ifdef __KERNEL__
+
+struct cma;
+struct page;
+struct device;
+
+#ifdef CONFIG_CMA
+
+/*
+ * There is always at least global CMA area and a few optional device
+ * private areas configured in kernel .config.
+ */
+#define MAX_CMA_AREAS	(1 + CONFIG_CMA_AREAS)
+
+extern struct cma *dma_contiguous_default_area;
+
+void dma_contiguous_reserve(phys_addr_t addr_limit);
+int dma_declare_contiguous(struct device *dev, unsigned long size,
+			   phys_addr_t base, phys_addr_t limit);
+
+struct page *dma_alloc_from_contiguous(struct device *dev, int count,
+				       unsigned int order);
+bool dma_release_from_contiguous(struct device *dev, struct page *pages,
+				 int count);
+
+#else
+
+#define MAX_CMA_AREAS	(0)
+
+static inline void dma_contiguous_reserve(phys_addr_t limit) { }
+
+static inline
+int dma_declare_contiguous(struct device *dev, unsigned long size,
+			   phys_addr_t base, phys_addr_t limit)
+{
+	return -ENOSYS;
+}
+
+static inline
+struct page *dma_alloc_from_contiguous(struct device *dev, int count,
+				       unsigned int order)
+{
+	return NULL;
+}
+
+static inline
+bool dma_release_from_contiguous(struct device *dev, struct page *pages,
+				 int count)
+{
+	return false;
+}
+
+#endif
+
+#endif
+
+#endif
-- 
cgit v1.3-7-g2ca7


From 3df425f316fb5c5e90236ff22b6e6616b3516af0 Mon Sep 17 00:00:00 2001
From: John Crispin <blogic@openwrt.org>
Date: Thu, 12 Apr 2012 17:33:07 +0200
Subject: OF: PCI: const usage needed by MIPS

On MIPS we want to call of_irq_map_pci from inside

arch/mips/include/asm/pci.h:extern int pcibios_map_irq(
				const struct pci_dev *dev, u8 slot, u8 pin);
For this to work we need to change several functions to const usage.

Signed-off-by: John Crispin <blogic@openwrt.org>
Cc: linux-pci@vger.kernel.org
Cc: devicetree-discuss@lists.ozlabs.org
Cc: linux-mips@linux-mips.org
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Patchwork: https://patchwork.linux-mips.org/patch/3710/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 drivers/of/of_pci_irq.c | 2 +-
 drivers/pci/pci.c       | 2 +-
 include/linux/of_pci.h  | 2 +-
 include/linux/pci.h     | 5 +++--
 4 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/of_pci_irq.c b/drivers/of/of_pci_irq.c
index 93125163dea2..677053813211 100644
--- a/drivers/of/of_pci_irq.c
+++ b/drivers/of/of_pci_irq.c
@@ -15,7 +15,7 @@
  * PCI tree until an device-node is found, at which point it will finish
  * resolving using the OF tree walking.
  */
-int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq)
+int of_irq_map_pci(const struct pci_dev *pdev, struct of_irq *out_irq)
 {
 	struct device_node *dn, *ppnode;
 	struct pci_dev *ppdev;
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 111569ccab43..8b91fe741f6a 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2369,7 +2369,7 @@ void pci_enable_acs(struct pci_dev *dev)
  * number is always 0 (see the Implementation Note in section 2.2.8.1 of
  * the PCI Express Base Specification, Revision 2.1)
  */
-u8 pci_swizzle_interrupt_pin(struct pci_dev *dev, u8 pin)
+u8 pci_swizzle_interrupt_pin(const struct pci_dev *dev, u8 pin)
 {
 	int slot;
 
diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h
index f93e21700d3e..bb115deb7612 100644
--- a/include/linux/of_pci.h
+++ b/include/linux/of_pci.h
@@ -5,7 +5,7 @@
 
 struct pci_dev;
 struct of_irq;
-int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq);
+int of_irq_map_pci(const struct pci_dev *pdev, struct of_irq *out_irq);
 
 struct device_node;
 struct device_node *of_pci_find_child_device(struct device_node *parent,
diff --git a/include/linux/pci.h b/include/linux/pci.h
index e444f5b49118..3bbc77e20a61 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -680,7 +680,7 @@ int __must_check pci_bus_add_device(struct pci_dev *dev);
 void pci_read_bridge_bases(struct pci_bus *child);
 struct resource *pci_find_parent_resource(const struct pci_dev *dev,
 					  struct resource *res);
-u8 pci_swizzle_interrupt_pin(struct pci_dev *dev, u8 pin);
+u8 pci_swizzle_interrupt_pin(const struct pci_dev *dev, u8 pin);
 int pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge);
 u8 pci_common_swizzle(struct pci_dev *dev, u8 *pinp);
 extern struct pci_dev *pci_dev_get(struct pci_dev *dev);
@@ -1685,7 +1685,8 @@ extern void pci_release_bus_of_node(struct pci_bus *bus);
 /* Arch may override this (weak) */
 extern struct device_node * __weak pcibios_get_phb_of_node(struct pci_bus *bus);
 
-static inline struct device_node *pci_device_to_OF_node(struct pci_dev *pdev)
+static inline struct device_node *
+pci_device_to_OF_node(const struct pci_dev *pdev)
 {
 	return pdev ? pdev->dev.of_node : NULL;
 }
-- 
cgit v1.3-7-g2ca7


From e3c0fb7ef515852619932b0da993baa2d107684d Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 21 May 2012 22:45:24 -0400
Subject: NFS: Add NFSDBG_STATE

fs/nfs/nfs4state.c does not yet have any dprintk() call sites, and I'm
about to introduce some.  We will need a new flag for enabling them.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4renewd.c    | 2 +-
 fs/nfs/nfs4state.c     | 2 ++
 include/linux/nfs_fs.h | 1 +
 3 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index dc484c0eae7f..6930bec91bca 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -49,7 +49,7 @@
 #include "nfs4_fs.h"
 #include "delegation.h"
 
-#define NFSDBG_FACILITY	NFSDBG_PROC
+#define NFSDBG_FACILITY		NFSDBG_STATE
 
 void
 nfs4_renew_state(struct work_struct *work)
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 7f0fcfc1fe9d..f8c06dec6563 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -57,6 +57,8 @@
 #include "internal.h"
 #include "pnfs.h"
 
+#define NFSDBG_FACILITY		NFSDBG_STATE
+
 #define OPENOWNER_POOL_SIZE	8
 
 const nfs4_stateid zero_stateid;
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 6cc7dbaf0695..80a9385b88ab 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -634,6 +634,7 @@ nfs_fileid_to_ino_t(u64 fileid)
 #define NFSDBG_FSCACHE		0x0800
 #define NFSDBG_PNFS		0x1000
 #define NFSDBG_PNFS_LD		0x2000
+#define NFSDBG_STATE		0x4000
 #define NFSDBG_ALL		0xFFFF
 
 #ifdef __KERNEL__
-- 
cgit v1.3-7-g2ca7


From 722baafc9e638714a69aa66e9ed24ef961ff350c Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 21 May 2012 22:44:22 -0400
Subject: NFS: Fix comment misspelling in struct nfs_client definition

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs_sb.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 7073fc74481c..5498e9d9ba84 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -79,7 +79,7 @@ struct nfs_client {
 	u32			cl_seqid;
 	/* The flags used for obtaining the clientid during EXCHANGE_ID */
 	u32			cl_exchange_flags;
-	struct nfs4_session	*cl_session; 	/* sharred session */
+	struct nfs4_session	*cl_session;	/* shared session */
 #endif /* CONFIG_NFS_V4 */
 
 #ifdef CONFIG_NFS_FSCACHE
-- 
cgit v1.3-7-g2ca7


From 79d4e1f0d8910f0214a57832ca6d589640d572c0 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 21 May 2012 22:44:31 -0400
Subject: NFS: Use proper naming conventions for NFSv4.1 server scope fields

Clean up:  When naming fields and data types, follow established
conventions to facilitate accurate grep/cscope searches.

Additionally, for consistency, move the scope field into the NFSv4-
specific part of the nfs_client, and free that memory in the logic
that shuts down NFSv4 nfs_clients.

Introduced by commit 99fe60d0 "nfs41: exchange_id operation", April
1 2009.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c           |  2 +-
 fs/nfs/nfs4_fs.h          |  2 +-
 fs/nfs/nfs4proc.c         | 18 ++++++++++--------
 include/linux/nfs_fs_sb.h |  4 ++--
 include/linux/nfs_xdr.h   |  4 ++--
 5 files changed, 16 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index b4e2199c32b3..471fc9b927a9 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -237,6 +237,7 @@ static void nfs4_shutdown_client(struct nfs_client *clp)
 		nfs_idmap_delete(clp);
 
 	rpc_destroy_wait_queue(&clp->cl_rpcwaitq);
+	kfree(clp->cl_serverscope);
 }
 
 /* idr_remove_all is not needed as all id's are removed by nfs_put_client */
@@ -305,7 +306,6 @@ static void nfs_free_client(struct nfs_client *clp)
 
 	put_net(clp->net);
 	kfree(clp->cl_hostname);
-	kfree(clp->server_scope);
 	kfree(clp->impl_id);
 	kfree(clp);
 
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index edeef71f957a..b14bcc3c132d 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -338,7 +338,7 @@ extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs
 extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
 extern void nfs41_handle_recall_slot(struct nfs_client *clp);
 extern void nfs41_handle_server_scope(struct nfs_client *,
-				      struct server_scope **);
+				      struct nfs41_server_scope **);
 extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
 extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
 extern void nfs4_select_rw_stateid(nfs4_stateid *, struct nfs4_state *,
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 0f4e54033abf..94494f24bb12 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5052,7 +5052,8 @@ out_inval:
 }
 
 static bool
-nfs41_same_server_scope(struct server_scope *a, struct server_scope *b)
+nfs41_same_server_scope(struct nfs41_server_scope *a,
+			struct nfs41_server_scope *b)
 {
 	if (a->server_scope_sz == b->server_scope_sz &&
 	    memcmp(a->server_scope, b->server_scope, a->server_scope_sz) == 0)
@@ -5099,7 +5100,8 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
 				clp->cl_rpcclient->cl_nodename,
 				clp->cl_rpcclient->cl_auth->au_flavor);
 
-	res.server_scope = kzalloc(sizeof(struct server_scope), GFP_KERNEL);
+	res.server_scope = kzalloc(sizeof(struct nfs41_server_scope),
+					GFP_KERNEL);
 	if (unlikely(!res.server_scope)) {
 		status = -ENOMEM;
 		goto out;
@@ -5123,18 +5125,18 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
 		kfree(res.impl_id);
 
 	if (!status) {
-		if (clp->server_scope &&
-		    !nfs41_same_server_scope(clp->server_scope,
+		if (clp->cl_serverscope &&
+		    !nfs41_same_server_scope(clp->cl_serverscope,
 					     res.server_scope)) {
 			dprintk("%s: server_scope mismatch detected\n",
 				__func__);
 			set_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, &clp->cl_state);
-			kfree(clp->server_scope);
-			clp->server_scope = NULL;
+			kfree(clp->cl_serverscope);
+			clp->cl_serverscope = NULL;
 		}
 
-		if (!clp->server_scope) {
-			clp->server_scope = res.server_scope;
+		if (!clp->cl_serverscope) {
+			clp->cl_serverscope = res.server_scope;
 			goto out;
 		}
 	}
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 5498e9d9ba84..900d733668eb 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -17,7 +17,7 @@ struct nfs4_sequence_args;
 struct nfs4_sequence_res;
 struct nfs_server;
 struct nfs4_minor_version_ops;
-struct server_scope;
+struct nfs41_server_scope;
 struct nfs41_impl_id;
 
 /*
@@ -80,13 +80,13 @@ struct nfs_client {
 	/* The flags used for obtaining the clientid during EXCHANGE_ID */
 	u32			cl_exchange_flags;
 	struct nfs4_session	*cl_session;	/* shared session */
+	struct nfs41_server_scope *cl_serverscope;
 #endif /* CONFIG_NFS_V4 */
 
 #ifdef CONFIG_NFS_FSCACHE
 	struct fscache_cookie	*fscache;	/* client index cache cookie */
 #endif
 
-	struct server_scope	*server_scope;	/* from exchange_id */
 	struct nfs41_impl_id	*impl_id;	/* from exchange_id */
 	struct net		*net;
 };
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 2e53a3f1d2ff..c420b8d60a55 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1104,7 +1104,7 @@ struct server_owner {
 	char				major_id[NFS4_OPAQUE_LIMIT];
 };
 
-struct server_scope {
+struct nfs41_server_scope {
 	uint32_t			server_scope_sz;
 	char 				server_scope[NFS4_OPAQUE_LIMIT];
 };
@@ -1118,7 +1118,7 @@ struct nfs41_impl_id {
 struct nfs41_exchange_id_res {
 	struct nfs_client		*client;
 	u32				flags;
-	struct server_scope		*server_scope;
+	struct nfs41_server_scope	*server_scope;
 	struct nfs41_impl_id		*impl_id;
 };
 
-- 
cgit v1.3-7-g2ca7


From 591555465ec513c42416392d392fd56866cb220c Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 21 May 2012 22:44:41 -0400
Subject: NFS: Use proper naming conventions for nfs_client.impl_id field

Clean up:  When naming fields and data types, follow established
conventions to facilitate accurate grep/cscope searches.

Additionally, for consistency, move the impl_id field into the NFSv4-
specific part of the nfs_client, and free that memory in the logic
that shuts down NFSv4 nfs_clients.

Introduced by commit 7d2ed9ac "NFSv4: parse and display server
implementation ids," Fri Feb 17, 2012.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c           |  2 +-
 fs/nfs/nfs4proc.c         | 12 ++++++------
 fs/nfs/super.c            |  4 ++--
 include/linux/nfs_fs_sb.h |  2 +-
 4 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 471fc9b927a9..39db1beb92f8 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -238,6 +238,7 @@ static void nfs4_shutdown_client(struct nfs_client *clp)
 
 	rpc_destroy_wait_queue(&clp->cl_rpcwaitq);
 	kfree(clp->cl_serverscope);
+	kfree(clp->cl_implid);
 }
 
 /* idr_remove_all is not needed as all id's are removed by nfs_put_client */
@@ -306,7 +307,6 @@ static void nfs_free_client(struct nfs_client *clp)
 
 	put_net(clp->net);
 	kfree(clp->cl_hostname);
-	kfree(clp->impl_id);
 	kfree(clp);
 
 	dprintk("<-- nfs_free_client()\n");
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 94494f24bb12..daa4e1b17313 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5119,8 +5119,8 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
 
 	if (!status) {
 		/* use the most recent implementation id */
-		kfree(clp->impl_id);
-		clp->impl_id = res.impl_id;
+		kfree(clp->cl_implid);
+		clp->cl_implid = res.impl_id;
 	} else
 		kfree(res.impl_id);
 
@@ -5144,12 +5144,12 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
 out_server_scope:
 	kfree(res.server_scope);
 out:
-	if (clp->impl_id)
+	if (clp->cl_implid)
 		dprintk("%s: Server Implementation ID: "
 			"domain: %s, name: %s, date: %llu,%u\n",
-			__func__, clp->impl_id->domain, clp->impl_id->name,
-			clp->impl_id->date.seconds,
-			clp->impl_id->date.nseconds);
+			__func__, clp->cl_implid->domain, clp->cl_implid->name,
+			clp->cl_implid->date.seconds,
+			clp->cl_implid->date.nseconds);
 	dprintk("<-- %s status= %d\n", __func__, status);
 	return status;
 }
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index a973eb101a92..ff656c022684 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -796,8 +796,8 @@ static void show_pnfs(struct seq_file *m, struct nfs_server *server)
 
 static void show_implementation_id(struct seq_file *m, struct nfs_server *nfss)
 {
-	if (nfss->nfs_client && nfss->nfs_client->impl_id) {
-		struct nfs41_impl_id *impl_id = nfss->nfs_client->impl_id;
+	if (nfss->nfs_client && nfss->nfs_client->cl_implid) {
+		struct nfs41_impl_id *impl_id = nfss->nfs_client->cl_implid;
 		seq_printf(m, "\n\timpl_id:\tname='%s',domain='%s',"
 			   "date='%llu,%u'",
 			   impl_id->name, impl_id->domain,
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 900d733668eb..773e02135903 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -81,13 +81,13 @@ struct nfs_client {
 	u32			cl_exchange_flags;
 	struct nfs4_session	*cl_session;	/* shared session */
 	struct nfs41_server_scope *cl_serverscope;
+	struct nfs41_impl_id	*cl_implid;
 #endif /* CONFIG_NFS_V4 */
 
 #ifdef CONFIG_NFS_FSCACHE
 	struct fscache_cookie	*fscache;	/* client index cache cookie */
 #endif
 
-	struct nfs41_impl_id	*impl_id;	/* from exchange_id */
 	struct net		*net;
 };
 
-- 
cgit v1.3-7-g2ca7


From 73ea666c2bb536f2862cefdb3e014ed62b262ba5 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 21 May 2012 22:44:50 -0400
Subject: NFS: Use proper naming conventions for the nfs_client.net field

Clean up:  When naming fields and data types, follow established
conventions to facilitate accurate grep/cscope searches.

Introduced by commit e50a7a1a "NFS: make NFS client allocated per
network namespace context," Tue Jan 10, 2012.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/blocklayout/blocklayoutdev.c |  2 +-
 fs/nfs/client.c                     | 22 +++++++++++-----------
 fs/nfs/idmap.c                      |  4 ++--
 fs/nfs/nfs4filelayoutdev.c          |  2 +-
 include/linux/nfs_fs_sb.h           |  2 +-
 5 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c
index a5c88a554d92..c96554245ccf 100644
--- a/fs/nfs/blocklayout/blocklayoutdev.c
+++ b/fs/nfs/blocklayout/blocklayoutdev.c
@@ -123,7 +123,7 @@ nfs4_blk_decode_device(struct nfs_server *server,
 	uint8_t *dataptr;
 	DECLARE_WAITQUEUE(wq, current);
 	int offset, len, i, rc;
-	struct net *net = server->nfs_client->net;
+	struct net *net = server->nfs_client->cl_net;
 	struct nfs_net *nn = net_generic(net, nfs_net_id);
 	struct bl_dev_msg *reply = &nn->bl_mount_reply;
 
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 39db1beb92f8..9b9df71df09a 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -65,7 +65,7 @@ static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq);
 static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion)
 {
 	int ret = 0;
-	struct nfs_net *nn = net_generic(clp->net, nfs_net_id);
+	struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
 
 	if (clp->rpc_ops->version != 4 || minorversion != 0)
 		return ret;
@@ -174,7 +174,7 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
 	clp->cl_rpcclient = ERR_PTR(-EINVAL);
 
 	clp->cl_proto = cl_init->proto;
-	clp->net = get_net(cl_init->net);
+	clp->cl_net = get_net(cl_init->net);
 
 #ifdef CONFIG_NFS_V4
 	err = nfs_get_cb_ident_idr(clp, cl_init->minorversion);
@@ -252,7 +252,7 @@ void nfs_cleanup_cb_ident_idr(struct net *net)
 /* nfs_client_lock held */
 static void nfs_cb_idr_remove_locked(struct nfs_client *clp)
 {
-	struct nfs_net *nn = net_generic(clp->net, nfs_net_id);
+	struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
 
 	if (clp->cl_cb_ident)
 		idr_remove(&nn->cb_ident_idr, clp->cl_cb_ident);
@@ -305,7 +305,7 @@ static void nfs_free_client(struct nfs_client *clp)
 	if (clp->cl_machine_cred != NULL)
 		put_rpccred(clp->cl_machine_cred);
 
-	put_net(clp->net);
+	put_net(clp->cl_net);
 	kfree(clp->cl_hostname);
 	kfree(clp);
 
@@ -323,7 +323,7 @@ void nfs_put_client(struct nfs_client *clp)
 		return;
 
 	dprintk("--> nfs_put_client({%d})\n", atomic_read(&clp->cl_count));
-	nn = net_generic(clp->net, nfs_net_id);
+	nn = net_generic(clp->cl_net, nfs_net_id);
 
 	if (atomic_dec_and_lock(&clp->cl_count, &nn->nfs_client_lock)) {
 		list_del(&clp->cl_share_link);
@@ -661,7 +661,7 @@ static int nfs_create_rpc_client(struct nfs_client *clp,
 {
 	struct rpc_clnt		*clnt = NULL;
 	struct rpc_create_args args = {
-		.net		= clp->net,
+		.net		= clp->cl_net,
 		.protocol	= clp->cl_proto,
 		.address	= (struct sockaddr *)&clp->cl_addr,
 		.addrsize	= clp->cl_addrlen,
@@ -715,7 +715,7 @@ static int nfs_start_lockd(struct nfs_server *server)
 		.nfs_version	= clp->rpc_ops->version,
 		.noresvport	= server->flags & NFS_MOUNT_NORESVPORT ?
 					1 : 0,
-		.net		= clp->net,
+		.net		= clp->cl_net,
 	};
 
 	if (nlm_init.nfs_version > 3)
@@ -1060,7 +1060,7 @@ static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_serve
 static void nfs_server_insert_lists(struct nfs_server *server)
 {
 	struct nfs_client *clp = server->nfs_client;
-	struct nfs_net *nn = net_generic(clp->net, nfs_net_id);
+	struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
 
 	spin_lock(&nn->nfs_client_lock);
 	list_add_tail_rcu(&server->client_link, &clp->cl_superblocks);
@@ -1077,7 +1077,7 @@ static void nfs_server_remove_lists(struct nfs_server *server)
 
 	if (clp == NULL)
 		return;
-	nn = net_generic(clp->net, nfs_net_id);
+	nn = net_generic(clp->cl_net, nfs_net_id);
 	spin_lock(&nn->nfs_client_lock);
 	list_del_rcu(&server->client_link);
 	if (list_empty(&clp->cl_superblocks))
@@ -1486,7 +1486,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
 		.rpc_ops = &nfs_v4_clientops,
 		.proto = ds_proto,
 		.minorversion = mds_clp->cl_minorversion,
-		.net = mds_clp->net,
+		.net = mds_clp->cl_net,
 	};
 	struct rpc_timeout ds_timeout;
 	struct nfs_client *clp;
@@ -1709,7 +1709,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
 				rpc_protocol(parent_server->client),
 				parent_server->client->cl_timeout,
 				parent_client->cl_mvops->minor_version,
-				parent_client->net);
+				parent_client->cl_net);
 	if (error < 0)
 		goto error;
 
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 3e8edbe71ec6..2eaecf9d8db7 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -415,7 +415,7 @@ static int __nfs_idmap_register(struct dentry *dir,
 static void nfs_idmap_unregister(struct nfs_client *clp,
 				      struct rpc_pipe *pipe)
 {
-	struct net *net = clp->net;
+	struct net *net = clp->cl_net;
 	struct super_block *pipefs_sb;
 
 	pipefs_sb = rpc_get_sb_net(net);
@@ -429,7 +429,7 @@ static int nfs_idmap_register(struct nfs_client *clp,
 				   struct idmap *idmap,
 				   struct rpc_pipe *pipe)
 {
-	struct net *net = clp->net;
+	struct net *net = clp->cl_net;
 	struct super_block *pipefs_sb;
 	int err = 0;
 
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index bf49b78db1b3..c610f84ff030 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -629,7 +629,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
 
 		mp_count = be32_to_cpup(p); /* multipath count */
 		for (j = 0; j < mp_count; j++) {
-			da = decode_ds_addr(NFS_SERVER(ino)->nfs_client->net,
+			da = decode_ds_addr(NFS_SERVER(ino)->nfs_client->cl_net,
 					    &stream, gfp_flags);
 			if (da)
 				list_add_tail(&da->da_node, &dsaddrs);
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 773e02135903..59410b365ba4 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -88,7 +88,7 @@ struct nfs_client {
 	struct fscache_cookie	*fscache;	/* client index cache cookie */
 #endif
 
-	struct net		*net;
+	struct net		*cl_net;
 };
 
 /*
-- 
cgit v1.3-7-g2ca7


From f092075dd33ea04000590e8ffea65c2e7d03d764 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 21 May 2012 22:45:41 -0400
Subject: NFS: Always use the same SETCLIENTID boot verifier

Currently our NFS client assigns a unique SETCLIENTID boot verifier
for each server IP address it knows about.  It's set to CURRENT_TIME
when the struct nfs_client for that server IP is created.

During the SETCLIENTID operation, our client also presents an
nfs_client_id4 string to servers, as an identifier on which the server
can hang all of this client's NFSv4 state.  Our client's
nfs_client_id4 string is unique for each server IP address.

An NFSv4 server is obligated to wipe all NFSv4 state associated with
an nfs_client_id4 string when the client presents the same
nfs_client_id4 string along with a changed SETCLIENTID boot verifier.

When our client unmounts the last of a server's shares, it destroys
that server's struct nfs_client.  The next time the client mounts that
NFS server, it creates a fresh struct nfs_client with a fresh boot
verifier.  On seeing the fresh verifer, the server wipes any previous
NFSv4 state associated with that nfs_client_id4.

However, NFSv4.1 clients are supposed to present the same
nfs_client_id4 string to all servers.  And, to support Transparent
State Migration, the same nfs_client_id4 string should be presented
to all NFSv4.0 servers so they recognize that migrated state for this
client belongs with state a server may already have for this client.
(This is known as the Uniform Client String model).

If the nfs_client_id4 string is the same but the boot verifier changes
for each server IP address, SETCLIENTID and EXCHANGE_ID operations
from such a client could unintentionally result in a server wiping a
client's previously obtained lease.

Thus, if our NFS client is going to use a fixed nfs_client_id4 string,
either for NFSv4.0 or NFSv4.1 mounts, our NFS client should use a
boot verifier that does not change depending on server IP address.
Replace our current per-nfs_client boot verifier with a per-nfs_net
boot verifier.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c           |  2 +-
 fs/nfs/netns.h            |  5 +++++
 fs/nfs/nfs4proc.c         | 14 ++++++++------
 fs/nfs/nfs4xdr.c          |  5 ++++-
 include/linux/nfs_fs_sb.h |  3 ---
 5 files changed, 18 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 9b9df71df09a..af9b7e4b9df2 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -184,7 +184,6 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
 	spin_lock_init(&clp->cl_lock);
 	INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state);
 	rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
-	clp->cl_boot_time = CURRENT_TIME;
 	clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
 	clp->cl_minorversion = cl_init->minorversion;
 	clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion];
@@ -1813,6 +1812,7 @@ void nfs_clients_init(struct net *net)
 	idr_init(&nn->cb_ident_idr);
 #endif
 	spin_lock_init(&nn->nfs_client_lock);
+	nn->boot_time = CURRENT_TIME;
 }
 
 #ifdef CONFIG_PROC_FS
diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h
index aa14ec303e94..8a6394edb8b0 100644
--- a/fs/nfs/netns.h
+++ b/fs/nfs/netns.h
@@ -1,3 +1,7 @@
+/*
+ * NFS-private data for each "struct net".  Accessed with net_generic().
+ */
+
 #ifndef __NFS_NETNS_H__
 #define __NFS_NETNS_H__
 
@@ -20,6 +24,7 @@ struct nfs_net {
 	struct idr cb_ident_idr; /* Protected by nfs_client_lock */
 #endif
 	spinlock_t nfs_client_lock;
+	struct timespec boot_time;
 };
 
 extern int nfs_net_id;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 81ccdbbb43e8..9e9334a172cf 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -64,6 +64,7 @@
 #include "iostat.h"
 #include "callback.h"
 #include "pnfs.h"
+#include "netns.h"
 
 #define NFSDBG_FACILITY		NFSDBG_PROC
 
@@ -3903,8 +3904,8 @@ wait_on_recovery:
 	return -EAGAIN;
 }
 
-static void nfs4_construct_boot_verifier(struct nfs_client *clp,
-					 nfs4_verifier *bootverf)
+static void nfs4_init_boot_verifier(const struct nfs_client *clp,
+				    nfs4_verifier *bootverf)
 {
 	__be32 verf[2];
 
@@ -3914,8 +3915,9 @@ static void nfs4_construct_boot_verifier(struct nfs_client *clp,
 		verf[0] = 0;
 		verf[1] = (__be32)(NSEC_PER_SEC + 1);
 	} else {
-		verf[0] = (__be32)clp->cl_boot_time.tv_sec;
-		verf[1] = (__be32)clp->cl_boot_time.tv_nsec;
+		struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
+		verf[0] = (__be32)nn->boot_time.tv_sec;
+		verf[1] = (__be32)nn->boot_time.tv_nsec;
 	}
 	memcpy(bootverf->data, verf, sizeof(bootverf->data));
 }
@@ -3939,7 +3941,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
 	int loop = 0;
 	int status;
 
-	nfs4_construct_boot_verifier(clp, &sc_verifier);
+	nfs4_init_boot_verifier(clp, &sc_verifier);
 
 	for(;;) {
 		rcu_read_lock();
@@ -5099,7 +5101,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
 	dprintk("--> %s\n", __func__);
 	BUG_ON(clp == NULL);
 
-	nfs4_construct_boot_verifier(clp, &verifier);
+	nfs4_init_boot_verifier(clp, &verifier);
 
 	args.id_len = scnprintf(args.id, sizeof(args.id),
 				"%s/%s/%u",
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index db040e971932..12b99825a1c1 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -53,9 +53,11 @@
 #include <linux/nfs4.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_idmap.h>
+
 #include "nfs4_fs.h"
 #include "internal.h"
 #include "pnfs.h"
+#include "netns.h"
 
 #define NFSDBG_FACILITY		NFSDBG_XDR
 
@@ -1702,6 +1704,7 @@ static void encode_create_session(struct xdr_stream *xdr,
 	char machine_name[NFS4_MAX_MACHINE_NAME_LEN];
 	uint32_t len;
 	struct nfs_client *clp = args->client;
+	struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
 	u32 max_resp_sz_cached;
 
 	/*
@@ -1743,7 +1746,7 @@ static void encode_create_session(struct xdr_stream *xdr,
 	*p++ = cpu_to_be32(RPC_AUTH_UNIX);			/* auth_sys */
 
 	/* authsys_parms rfc1831 */
-	*p++ = cpu_to_be32((u32)clp->cl_boot_time.tv_nsec);	/* stamp */
+	*p++ = (__be32)nn->boot_time.tv_nsec;		/* stamp */
 	p = xdr_encode_opaque(p, machine_name, len);
 	*p++ = cpu_to_be32(0);				/* UID */
 	*p++ = cpu_to_be32(0);				/* GID */
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 59410b365ba4..fbec57d6dc0a 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -61,9 +61,6 @@ struct nfs_client {
 
 	struct rpc_wait_queue	cl_rpcwaitq;
 
-	/* used for the setclientid verifier */
-	struct timespec		cl_boot_time;
-
 	/* idmapper */
 	struct idmap *		cl_idmap;
 
-- 
cgit v1.3-7-g2ca7


From 8cab4c390b43fe34c07bd33799c1bc24be648122 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 21 May 2012 22:45:59 -0400
Subject: NFS: Refactor nfs_get_client(): initialize nfs_client

Clean up: Continue to rationalize the locking in nfs_get_client() by
moving the logic that handles the case where a matching server IP
address is not found.

When we support server trunking detection, client initialization may
return a different nfs_client struct than was passed to it.  Change
the synopsis of the init_client methods to return an nfs_client.

The client initialization logic in nfs_get_client() is not much more
than a wrapper around ->init_client.  It's simpler to keep the little
bits of error handling in the version-specific init_client methods.

No behavior change is expected.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c         | 76 +++++++++++++++++++++++++++----------------------
 fs/nfs/internal.h       |  4 +--
 include/linux/nfs_xdr.h |  3 +-
 3 files changed, 46 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 5f19f9577730..8a4b3c2c5a2b 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -546,7 +546,6 @@ nfs_get_client(const struct nfs_client_initdata *cl_init,
 	       int noresvport)
 {
 	struct nfs_client *clp, *new = NULL;
-	int error;
 	struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id);
 
 	dprintk("--> nfs_get_client(%s,v%u)\n",
@@ -563,8 +562,13 @@ nfs_get_client(const struct nfs_client_initdata *cl_init,
 				nfs_free_client(new);
 			return nfs_found_client(cl_init, clp);
 		}
-		if (new)
-			goto install_client;
+		if (new) {
+			list_add(&new->cl_share_link, &nn->nfs_client_list);
+			spin_unlock(&nn->nfs_client_lock);
+			return cl_init->rpc_ops->init_client(new,
+						timeparms, ip_addr,
+						authflavour, noresvport);
+		}
 
 		spin_unlock(&nn->nfs_client_lock);
 
@@ -574,21 +578,6 @@ nfs_get_client(const struct nfs_client_initdata *cl_init,
 	dprintk("<-- nfs_get_client() Failed to find %s (%ld)\n",
 		cl_init->hostname ?: "", PTR_ERR(new));
 	return new;
-
-	/* install a new client and return with it unready */
-install_client:
-	clp = new;
-	list_add(&clp->cl_share_link, &nn->nfs_client_list);
-	spin_unlock(&nn->nfs_client_lock);
-
-	error = cl_init->rpc_ops->init_client(clp, timeparms, ip_addr,
-					      authflavour, noresvport);
-	if (error < 0) {
-		nfs_put_client(clp);
-		return ERR_PTR(error);
-	}
-	dprintk("--> nfs_get_client() = %p [new]\n", clp);
-	return clp;
 }
 
 /*
@@ -813,10 +802,19 @@ static int nfs_init_server_rpcclient(struct nfs_server *server,
 	return 0;
 }
 
-/*
- * Initialise an NFS2 or NFS3 client
+/**
+ * nfs_init_client - Initialise an NFS2 or NFS3 client
+ *
+ * @clp: nfs_client to initialise
+ * @timeparms: timeout parameters for underlying RPC transport
+ * @ip_addr: IP presentation address (not used)
+ * @authflavor: authentication flavor for underlying RPC transport
+ * @noresvport: set if RPC transport can use an ephemeral source port
+ *
+ * Returns pointer to an NFS client, or an ERR_PTR value.
  */
-int nfs_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms,
+struct nfs_client *nfs_init_client(struct nfs_client *clp,
+		    const struct rpc_timeout *timeparms,
 		    const char *ip_addr, rpc_authflavor_t authflavour,
 		    int noresvport)
 {
@@ -825,7 +823,7 @@ int nfs_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms,
 	if (clp->cl_cons_state == NFS_CS_READY) {
 		/* the client is already initialised */
 		dprintk("<-- nfs_init_client() = 0 [already %p]\n", clp);
-		return 0;
+		return clp;
 	}
 
 	/*
@@ -837,12 +835,13 @@ int nfs_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms,
 	if (error < 0)
 		goto error;
 	nfs_mark_client_ready(clp, NFS_CS_READY);
-	return 0;
+	return clp;
 
 error:
 	nfs_mark_client_ready(clp, error);
+	nfs_put_client(clp);
 	dprintk("<-- nfs_init_client() = xerror %d\n", error);
-	return error;
+	return ERR_PTR(error);
 }
 
 /*
@@ -1358,14 +1357,22 @@ static int nfs4_init_client_minor_version(struct nfs_client *clp)
 	return nfs4_init_callback(clp);
 }
 
-/*
- * Initialise an NFS4 client record
+/**
+ * nfs4_init_client - Initialise an NFS4 client record
+ *
+ * @clp: nfs_client to initialise
+ * @timeparms: timeout parameters for underlying RPC transport
+ * @ip_addr: callback IP address in presentation format
+ * @authflavor: authentication flavor for underlying RPC transport
+ * @noresvport: set if RPC transport can use an ephemeral source port
+ *
+ * Returns pointer to an NFS client, or an ERR_PTR value.
  */
-int nfs4_init_client(struct nfs_client *clp,
-		     const struct rpc_timeout *timeparms,
-		     const char *ip_addr,
-		     rpc_authflavor_t authflavour,
-		     int noresvport)
+struct nfs_client *nfs4_init_client(struct nfs_client *clp,
+				    const struct rpc_timeout *timeparms,
+				    const char *ip_addr,
+				    rpc_authflavor_t authflavour,
+				    int noresvport)
 {
 	char buf[INET6_ADDRSTRLEN + 1];
 	int error;
@@ -1373,7 +1380,7 @@ int nfs4_init_client(struct nfs_client *clp,
 	if (clp->cl_cons_state == NFS_CS_READY) {
 		/* the client is initialised already */
 		dprintk("<-- nfs4_init_client() = 0 [already %p]\n", clp);
-		return 0;
+		return clp;
 	}
 
 	/* Check NFS protocol revision and initialize RPC op vector */
@@ -1413,12 +1420,13 @@ int nfs4_init_client(struct nfs_client *clp,
 
 	if (!nfs4_has_session(clp))
 		nfs_mark_client_ready(clp, NFS_CS_READY);
-	return 0;
+	return clp;
 
 error:
 	nfs_mark_client_ready(clp, error);
+	nfs_put_client(clp);
 	dprintk("<-- nfs4_init_client() = xerror %d\n", error);
-	return error;
+	return ERR_PTR(error);
 }
 
 /*
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 989959a59f07..3a9e80c9524b 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -238,7 +238,7 @@ extern int nfs4_init_ds_session(struct nfs_client *clp);
 
 /* proc.c */
 void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
-extern int nfs_init_client(struct nfs_client *clp,
+extern struct nfs_client *nfs_init_client(struct nfs_client *clp,
 			   const struct rpc_timeout *timeparms,
 			   const char *ip_addr, rpc_authflavor_t authflavour,
 			   int noresvport);
@@ -373,7 +373,7 @@ void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
 
 /* nfs4proc.c */
 extern void __nfs4_read_done_cb(struct nfs_read_data *);
-extern int nfs4_init_client(struct nfs_client *clp,
+extern struct nfs_client *nfs4_init_client(struct nfs_client *clp,
 			    const struct rpc_timeout *timeparms,
 			    const char *ip_addr,
 			    rpc_authflavor_t authflavour,
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index c420b8d60a55..0c521cd496a7 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1397,7 +1397,8 @@ struct nfs_rpc_ops {
 				struct nfs_open_context *ctx,
 				int open_flags,
 				struct iattr *iattr);
-	int	(*init_client) (struct nfs_client *, const struct rpc_timeout *,
+	struct nfs_client *
+		(*init_client) (struct nfs_client *, const struct rpc_timeout *,
 				const char *, rpc_authflavor_t, int);
 };
 
-- 
cgit v1.3-7-g2ca7


From 4bf590e08f6db3395c181618a4c14f1c39b7c4af Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 21 May 2012 22:46:07 -0400
Subject: NFS: Add nfs_client behavior flags

"noresvport" and "discrtry" can be passed to nfs_create_rpc_client()
by setting flags in the passed-in nfs_client.  This change makes it
easy to add new flags.

Note that these settings are now "sticky" over the lifetime of a
struct nfs_client, and may even be copied when an nfs_client is
cloned.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c           | 42 ++++++++++++++++++++----------------------
 fs/nfs/internal.h         |  6 ++----
 include/linux/nfs_fs_sb.h |  3 +++
 include/linux/nfs_xdr.h   |  2 +-
 4 files changed, 26 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 8a4b3c2c5a2b..34b2e68c5249 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -131,6 +131,7 @@ const struct rpc_program nfsacl_program = {
 #endif  /* CONFIG_NFS_V3_ACL */
 
 struct nfs_client_initdata {
+	unsigned long init_flags;
 	const char *hostname;
 	const struct sockaddr *addr;
 	size_t addrlen;
@@ -542,8 +543,7 @@ static struct nfs_client *
 nfs_get_client(const struct nfs_client_initdata *cl_init,
 	       const struct rpc_timeout *timeparms,
 	       const char *ip_addr,
-	       rpc_authflavor_t authflavour,
-	       int noresvport)
+	       rpc_authflavor_t authflavour)
 {
 	struct nfs_client *clp, *new = NULL;
 	struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id);
@@ -565,9 +565,10 @@ nfs_get_client(const struct nfs_client_initdata *cl_init,
 		if (new) {
 			list_add(&new->cl_share_link, &nn->nfs_client_list);
 			spin_unlock(&nn->nfs_client_lock);
+			new->cl_flags = cl_init->init_flags;
 			return cl_init->rpc_ops->init_client(new,
 						timeparms, ip_addr,
-						authflavour, noresvport);
+						authflavour);
 		}
 
 		spin_unlock(&nn->nfs_client_lock);
@@ -651,8 +652,7 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
  */
 static int nfs_create_rpc_client(struct nfs_client *clp,
 				 const struct rpc_timeout *timeparms,
-				 rpc_authflavor_t flavor,
-				 int discrtry, int noresvport)
+				 rpc_authflavor_t flavor)
 {
 	struct rpc_clnt		*clnt = NULL;
 	struct rpc_create_args args = {
@@ -667,9 +667,9 @@ static int nfs_create_rpc_client(struct nfs_client *clp,
 		.authflavor	= flavor,
 	};
 
-	if (discrtry)
+	if (test_bit(NFS_CS_DISCRTRY, &clp->cl_flags))
 		args.flags |= RPC_CLNT_CREATE_DISCRTRY;
-	if (noresvport)
+	if (test_bit(NFS_CS_NORESVPORT, &clp->cl_flags))
 		args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
 
 	if (!IS_ERR(clp->cl_rpcclient))
@@ -809,14 +809,12 @@ static int nfs_init_server_rpcclient(struct nfs_server *server,
  * @timeparms: timeout parameters for underlying RPC transport
  * @ip_addr: IP presentation address (not used)
  * @authflavor: authentication flavor for underlying RPC transport
- * @noresvport: set if RPC transport can use an ephemeral source port
  *
  * Returns pointer to an NFS client, or an ERR_PTR value.
  */
 struct nfs_client *nfs_init_client(struct nfs_client *clp,
 		    const struct rpc_timeout *timeparms,
-		    const char *ip_addr, rpc_authflavor_t authflavour,
-		    int noresvport)
+		    const char *ip_addr, rpc_authflavor_t authflavour)
 {
 	int error;
 
@@ -830,8 +828,7 @@ struct nfs_client *nfs_init_client(struct nfs_client *clp,
 	 * Create a client RPC handle for doing FSSTAT with UNIX auth only
 	 * - RFC 2623, sec 2.3.2
 	 */
-	error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX,
-				      0, noresvport);
+	error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX);
 	if (error < 0)
 		goto error;
 	nfs_mark_client_ready(clp, NFS_CS_READY);
@@ -881,10 +878,11 @@ static int nfs_init_server(struct nfs_server *server,
 
 	nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
 			data->timeo, data->retrans);
+	if (data->flags & NFS_MOUNT_NORESVPORT)
+		set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
 
 	/* Allocate or find a client reference we can use */
-	clp = nfs_get_client(&cl_init, &timeparms, NULL, RPC_AUTH_UNIX,
-			     data->flags & NFS_MOUNT_NORESVPORT);
+	clp = nfs_get_client(&cl_init, &timeparms, NULL, RPC_AUTH_UNIX);
 	if (IS_ERR(clp)) {
 		dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp));
 		return PTR_ERR(clp);
@@ -1364,15 +1362,13 @@ static int nfs4_init_client_minor_version(struct nfs_client *clp)
  * @timeparms: timeout parameters for underlying RPC transport
  * @ip_addr: callback IP address in presentation format
  * @authflavor: authentication flavor for underlying RPC transport
- * @noresvport: set if RPC transport can use an ephemeral source port
  *
  * Returns pointer to an NFS client, or an ERR_PTR value.
  */
 struct nfs_client *nfs4_init_client(struct nfs_client *clp,
 				    const struct rpc_timeout *timeparms,
 				    const char *ip_addr,
-				    rpc_authflavor_t authflavour,
-				    int noresvport)
+				    rpc_authflavor_t authflavour)
 {
 	char buf[INET6_ADDRSTRLEN + 1];
 	int error;
@@ -1386,8 +1382,8 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
 	/* Check NFS protocol revision and initialize RPC op vector */
 	clp->rpc_ops = &nfs_v4_clientops;
 
-	error = nfs_create_rpc_client(clp, timeparms, authflavour,
-				      1, noresvport);
+	__set_bit(NFS_CS_DISCRTRY, &clp->cl_flags);
+	error = nfs_create_rpc_client(clp, timeparms, authflavour);
 	if (error < 0)
 		goto error;
 
@@ -1455,9 +1451,11 @@ static int nfs4_set_client(struct nfs_server *server,
 
 	dprintk("--> nfs4_set_client()\n");
 
+	if (server->flags & NFS_MOUNT_NORESVPORT)
+		set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
+
 	/* Allocate or find a client reference we can use */
-	clp = nfs_get_client(&cl_init, timeparms, ip_addr, authflavour,
-			     server->flags & NFS_MOUNT_NORESVPORT);
+	clp = nfs_get_client(&cl_init, timeparms, ip_addr, authflavour);
 	if (IS_ERR(clp)) {
 		error = PTR_ERR(clp);
 		goto error;
@@ -1512,7 +1510,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
 	 */
 	nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans);
 	clp = nfs_get_client(&cl_init, &ds_timeout, mds_clp->cl_ipaddr,
-			     mds_clp->cl_rpcclient->cl_auth->au_flavor, 0);
+			     mds_clp->cl_rpcclient->cl_auth->au_flavor);
 
 	dprintk("<-- %s %p\n", __func__, clp);
 	return clp;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 3a9e80c9524b..547f24f17d16 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -240,8 +240,7 @@ extern int nfs4_init_ds_session(struct nfs_client *clp);
 void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
 extern struct nfs_client *nfs_init_client(struct nfs_client *clp,
 			   const struct rpc_timeout *timeparms,
-			   const char *ip_addr, rpc_authflavor_t authflavour,
-			   int noresvport);
+			   const char *ip_addr, rpc_authflavor_t authflavour);
 
 /* dir.c */
 extern int nfs_access_cache_shrinker(struct shrinker *shrink,
@@ -376,8 +375,7 @@ extern void __nfs4_read_done_cb(struct nfs_read_data *);
 extern struct nfs_client *nfs4_init_client(struct nfs_client *clp,
 			    const struct rpc_timeout *timeparms,
 			    const char *ip_addr,
-			    rpc_authflavor_t authflavour,
-			    int noresvport);
+			    rpc_authflavor_t authflavour);
 extern int _nfs4_call_sync(struct rpc_clnt *clnt,
 			   struct nfs_server *server,
 			   struct rpc_message *msg,
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index fbec57d6dc0a..3a99f5252340 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -35,6 +35,9 @@ struct nfs_client {
 #define NFS_CS_RENEWD		3		/* - renewd started */
 #define NFS_CS_STOP_RENEW	4		/* no more state to renew */
 #define NFS_CS_CHECK_LEASE_TIME	5		/* need to check lease time */
+	unsigned long		cl_flags;	/* behavior switches */
+#define NFS_CS_NORESVPORT	0		/* - use ephemeral src port */
+#define NFS_CS_DISCRTRY		1		/* - disconnect on RPC retry */
 	struct sockaddr_storage	cl_addr;	/* server identifier */
 	size_t			cl_addrlen;
 	char *			cl_hostname;	/* hostname of server */
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 0c521cd496a7..07048c012dec 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1399,7 +1399,7 @@ struct nfs_rpc_ops {
 				struct iattr *iattr);
 	struct nfs_client *
 		(*init_client) (struct nfs_client *, const struct rpc_timeout *,
-				const char *, rpc_authflavor_t, int);
+				const char *, rpc_authflavor_t);
 };
 
 /*
-- 
cgit v1.3-7-g2ca7


From acdeb69d9c5934a678a732b4e24770326bf9471e Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 21 May 2012 22:46:16 -0400
Subject: NFS: EXCHANGE_ID should save the server major and minor ID

Save the server major and minor ID results from EXCHANGE_ID, as they
are needed for detecting server trunking.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c           |  1 +
 fs/nfs/nfs4proc.c         | 17 ++++++++++++++++-
 fs/nfs/nfs4xdr.c          | 13 ++++++++-----
 include/linux/nfs_fs_sb.h |  1 +
 include/linux/nfs_xdr.h   |  3 ++-
 5 files changed, 28 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 34b2e68c5249..3c144689f9e4 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -237,6 +237,7 @@ static void nfs4_shutdown_client(struct nfs_client *clp)
 		nfs_idmap_delete(clp);
 
 	rpc_destroy_wait_queue(&clp->cl_rpcwaitq);
+	kfree(clp->cl_serverowner);
 	kfree(clp->cl_serverscope);
 	kfree(clp->cl_implid);
 }
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 9e9334a172cf..0d46fe449f0b 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5109,11 +5109,18 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
 				clp->cl_rpcclient->cl_nodename,
 				clp->cl_rpcclient->cl_auth->au_flavor);
 
+	res.server_owner = kzalloc(sizeof(struct nfs41_server_owner),
+					GFP_KERNEL);
+	if (unlikely(res.server_owner == NULL)) {
+		status = -ENOMEM;
+		goto out;
+	}
+
 	res.server_scope = kzalloc(sizeof(struct nfs41_server_scope),
 					GFP_KERNEL);
 	if (unlikely(res.server_scope == NULL)) {
 		status = -ENOMEM;
-		goto out;
+		goto out_server_owner;
 	}
 
 	res.impl_id = kzalloc(sizeof(struct nfs41_impl_id), GFP_KERNEL);
@@ -5126,6 +5133,12 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
 	if (status == 0)
 		status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags);
 
+	if (status == 0) {
+		kfree(clp->cl_serverowner);
+		clp->cl_serverowner = res.server_owner;
+		res.server_owner = NULL;
+	}
+
 	if (status == 0) {
 		/* use the most recent implementation id */
 		kfree(clp->cl_implid);
@@ -5150,6 +5163,8 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
 		}
 	}
 
+out_server_owner:
+	kfree(res.server_owner);
 out_server_scope:
 	kfree(res.server_scope);
 out:
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 12b99825a1c1..5ad2b2c2aecb 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -5144,24 +5144,27 @@ static int decode_exchange_id(struct xdr_stream *xdr,
 	if (dummy != SP4_NONE)
 		return -EIO;
 
-	/* Throw away minor_id */
+	/* server_owner4.so_minor_id */
 	p = xdr_inline_decode(xdr, 8);
 	if (unlikely(!p))
 		goto out_overflow;
+	p = xdr_decode_hyper(p, &res->server_owner->minor_id);
 
-	/* Throw away Major id */
+	/* server_owner4.so_major_id */
 	status = decode_opaque_inline(xdr, &dummy, &dummy_str);
 	if (unlikely(status))
 		return status;
+	if (unlikely(dummy > NFS4_OPAQUE_LIMIT))
+		return -EIO;
+	memcpy(res->server_owner->major_id, dummy_str, dummy);
+	res->server_owner->major_id_sz = dummy;
 
-	/* Save server_scope */
+	/* server_scope4 */
 	status = decode_opaque_inline(xdr, &dummy, &dummy_str);
 	if (unlikely(status))
 		return status;
-
 	if (unlikely(dummy > NFS4_OPAQUE_LIMIT))
 		return -EIO;
-
 	memcpy(res->server_scope->server_scope, dummy_str, dummy);
 	res->server_scope->server_scope_sz = dummy;
 
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 3a99f5252340..fbb78fb09bd2 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -80,6 +80,7 @@ struct nfs_client {
 	/* The flags used for obtaining the clientid during EXCHANGE_ID */
 	u32			cl_exchange_flags;
 	struct nfs4_session	*cl_session;	/* shared session */
+	struct nfs41_server_owner *cl_serverowner;
 	struct nfs41_server_scope *cl_serverscope;
 	struct nfs41_impl_id	*cl_implid;
 #endif /* CONFIG_NFS_V4 */
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 07048c012dec..0872f32c8eef 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1098,7 +1098,7 @@ struct nfs41_exchange_id_args {
 	u32				flags;
 };
 
-struct server_owner {
+struct nfs41_server_owner {
 	uint64_t			minor_id;
 	uint32_t			major_id_sz;
 	char				major_id[NFS4_OPAQUE_LIMIT];
@@ -1118,6 +1118,7 @@ struct nfs41_impl_id {
 struct nfs41_exchange_id_res {
 	struct nfs_client		*client;
 	u32				flags;
+	struct nfs41_server_owner	*server_owner;
 	struct nfs41_server_scope	*server_scope;
 	struct nfs41_impl_id		*impl_id;
 };
-- 
cgit v1.3-7-g2ca7


From 730a3d01b1e1e3ba102a5a4d3d5dcfecd55326b6 Mon Sep 17 00:00:00 2001
From: Johan Hovold <jhovold@gmail.com>
Date: Fri, 18 May 2012 20:22:45 +0200
Subject: mfd: Add r_select to lm3533 platform data

Add resistor-select parameter to the platform data.

Signed-off-by: Johan Hovold <jhovold@gmail.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/lm3533.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/lm3533.h b/include/linux/mfd/lm3533.h
index 9660febe93c2..594bc591f256 100644
--- a/include/linux/mfd/lm3533.h
+++ b/include/linux/mfd/lm3533.h
@@ -43,6 +43,7 @@ struct lm3533_ctrlbank {
 
 struct lm3533_als_platform_data {
 	unsigned pwm_mode:1;		/* PWM input mode (default analog) */
+	u8 r_select;			/* 1 - 127 (ignored in PWM-mode) */
 };
 
 struct lm3533_bl_platform_data {
-- 
cgit v1.3-7-g2ca7


From cb8d8654570c257d2ec5f7fa089e18b338314317 Mon Sep 17 00:00:00 2001
From: Laxman Dewangan <ldewangan@nvidia.com>
Date: Sat, 19 May 2012 02:01:41 +0530
Subject: mfd: Save device node parsed platform data for tps65910 sub devices

Save the allocated memory to store the parsed device node information
to the global device structure so that sub devices can directly use this
pointer.
In this way, the sub devices does not require to re-allocate the
memory for storing the sub-devices specific device node information.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/tps65910.c       | 6 +++++-
 include/linux/mfd/tps65910.h | 3 +++
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/tps65910.c b/drivers/mfd/tps65910.c
index 18b30cf45e5b..05d449b33693 100644
--- a/drivers/mfd/tps65910.c
+++ b/drivers/mfd/tps65910.c
@@ -209,14 +209,17 @@ static __devinit int tps65910_i2c_probe(struct i2c_client *i2c,
 {
 	struct tps65910 *tps65910;
 	struct tps65910_board *pmic_plat_data;
+	struct tps65910_board *of_pmic_plat_data = NULL;
 	struct tps65910_platform_data *init_data;
 	int ret = 0;
 	int chip_id = id->driver_data;
 
 	pmic_plat_data = dev_get_platdata(&i2c->dev);
 
-	if (!pmic_plat_data && i2c->dev.of_node)
+	if (!pmic_plat_data && i2c->dev.of_node) {
 		pmic_plat_data = tps65910_parse_dt(i2c, &chip_id);
+		of_pmic_plat_data = pmic_plat_data;
+	}
 
 	if (!pmic_plat_data)
 		return -EINVAL;
@@ -229,6 +232,7 @@ static __devinit int tps65910_i2c_probe(struct i2c_client *i2c,
 	if (tps65910 == NULL)
 		return -ENOMEM;
 
+	tps65910->of_plat_data = of_pmic_plat_data;
 	i2c_set_clientdata(i2c, tps65910);
 	tps65910->dev = &i2c->dev;
 	tps65910->i2c_client = i2c;
diff --git a/include/linux/mfd/tps65910.h b/include/linux/mfd/tps65910.h
index ab04e901e57e..dd8dc0a6c462 100644
--- a/include/linux/mfd/tps65910.h
+++ b/include/linux/mfd/tps65910.h
@@ -830,6 +830,9 @@ struct tps65910 {
 	struct tps65910_rtc *rtc;
 	struct tps65910_power *power;
 
+	/* Device node parsed board data */
+	struct tps65910_board *of_plat_data;
+
 	/* IRQ Handling */
 	struct mutex irq_lock;
 	int chip_irq;
-- 
cgit v1.3-7-g2ca7


From 78302a194c0ddf4438e50e3f9b327a6dce6bc8fc Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <sameo@linux.intel.com>
Date: Wed, 23 May 2012 13:28:33 +0200
Subject: mfd: Fix max77693 build failure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Without it we get:

drivers/mfd/max77693.c: In function ‘max77693_i2c_probe’:
drivers/mfd/max77693.c:157:2: error: implicit declaration of function
‘max77693_irq_init’ [-Werror=implicit-function-declaration]
drivers/mfd/max77693.c: In function ‘max77693_resume’:
drivers/mfd/max77693.c:215:2: error: implicit declaration of function
‘max77693_irq_resume’ [-Werror=implicit-function-declaration]
drivers/mfd/max77693-irq.c: In function ‘max77693_irq_lock’:
drivers/mfd/max77693-irq.c:104:2: error: ‘struct max77693_dev’ has no member
named ‘irqlock’
drivers/mfd/max77693-irq.c: In function ‘max77693_irq_sync_unlock’:
drivers/mfd/max77693-irq.c:119:11: error: ‘struct max77693_dev’ has no member
named ‘irq_masks_cache’
drivers/mfd/max77693-irq.c:119:42: error: ‘struct max77693_dev’ has no member
named ‘irq_masks_cur’
drivers/mfd/max77693-irq.c:122:13: error: ‘struct max77693_dev’ has no member
named ‘irq_masks_cur’
drivers/mfd/max77693-irq.c:125:24: error: ‘struct max77693_dev’ has no member
named ‘irqlock’
drivers/mfd/max77693-irq.c: In function ‘max77693_irq_mask’:
drivers/mfd/max77693-irq.c:141:11: error: ‘struct max77693_dev’ has no member
named ‘irq_masks_cur’
drivers/mfd/max77693-irq.c:143:11: error: ‘struct max77693_dev’ has no member
named ‘irq_masks_cur’
drivers/mfd/max77693-irq.c: In function ‘max77693_irq_unmask’:
drivers/mfd/max77693-irq.c:153:11: error: ‘struct max77693_dev’ has no member
named ‘irq_masks_cur’
drivers/mfd/max77693-irq.c:155:11: error: ‘struct max77693_dev’ has no member
named ‘irq_masks_cur’
drivers/mfd/max77693-irq.c: In function ‘max77693_irq_thread’:
drivers/mfd/max77693-irq.c:209:26: error: ‘struct max77693_dev’ has no member
named ‘irq_masks_cur’
drivers/mfd/max77693-irq.c:211:27: error: ‘struct max77693_dev’ has no member
named ‘irq_masks_cur’
drivers/mfd/max77693-irq.c:217:39: error: ‘struct max77693_dev’ has no member
named ‘irq_domain’
drivers/mfd/max77693-irq.c: In function ‘max77693_irq_init’:
drivers/mfd/max77693-irq.c:260:2: error: ‘struct max77693_dev’ has no member
named ‘irqlock’
drivers/mfd/max77693-irq.c:268:12: error: ‘struct max77693_dev’ has no member
named ‘irq_masks_cur’
drivers/mfd/max77693-irq.c:269:12: error: ‘struct max77693_dev’ has no member
named ‘irq_masks_cache’
drivers/mfd/max77693-irq.c:271:12: error: ‘struct max77693_dev’ has no member
named ‘irq_masks_cur’
drivers/mfd/max77693-irq.c:272:12: error: ‘struct max77693_dev’ has no member
named ‘irq_masks_cache’
drivers/mfd/max77693-irq.c:292:10: error: ‘struct max77693_dev’ has no member
named ‘irq_domain’

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/max77693-private.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/max77693-private.h b/include/linux/mfd/max77693-private.h
index bf6077d3c43c..68263c5fa53c 100644
--- a/include/linux/mfd/max77693-private.h
+++ b/include/linux/mfd/max77693-private.h
@@ -198,8 +198,14 @@ struct max77693_dev {
 	struct regmap *regmap_muic;
 	struct regmap *regmap_haptic;
 
+	struct irq_domain *irq_domain;
+
 	int irq;
+	int irq_gpio;
 	bool wakeup;
+	struct mutex irqlock;
+	int irq_masks_cur[MAX77693_IRQ_GROUP_NR];
+	int irq_masks_cache[MAX77693_IRQ_GROUP_NR];
 };
 
 enum max77693_types {
@@ -214,4 +220,8 @@ extern int max77693_bulk_write(struct regmap *map, u8 reg, int count,
 				u8 *buf);
 extern int max77693_update_reg(struct regmap *map, u8 reg, u8 val, u8 mask);
 
+extern int max77693_irq_init(struct max77693_dev *max77686);
+extern void max77693_irq_exit(struct max77693_dev *max77686);
+extern int max77693_irq_resume(struct max77693_dev *max77686);
+
 #endif /*  __LINUX_MFD_MAX77693_PRIV_H */
-- 
cgit v1.3-7-g2ca7


From 88034c3d88c2c48b215f2cc5eb22e564aa817f9c Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Wed, 23 May 2012 05:02:34 -0400
Subject: NFSv4.1 mdsthreshold attribute xdr

We only support one layout type per file system, so one threshold_item4 per
mdsthreshold4.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4xdr.c        | 125 +++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/nfs4.h    |   7 +++
 include/linux/nfs_xdr.h |  10 ++++
 3 files changed, 140 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 5ad2b2c2aecb..edb8ac7fce0e 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -101,9 +101,12 @@ static int nfs4_stat_to_errno(int);
 #define nfs4_path_maxsz		(1 + ((3 + NFS4_MAXPATHLEN) >> 2))
 #define nfs4_owner_maxsz	(1 + XDR_QUADLEN(IDMAP_NAMESZ))
 #define nfs4_group_maxsz	(1 + XDR_QUADLEN(IDMAP_NAMESZ))
+/* We support only one layout type per file system */
+#define decode_mdsthreshold_maxsz (1 + 1 + nfs4_fattr_bitmap_maxsz + 1 + 8)
 /* This is based on getfattr, which uses the most attributes: */
 #define nfs4_fattr_value_maxsz	(1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \
-				3 + 3 + 3 + nfs4_owner_maxsz + nfs4_group_maxsz))
+				3 + 3 + 3 + nfs4_owner_maxsz + \
+				nfs4_group_maxsz + decode_mdsthreshold_maxsz))
 #define nfs4_fattr_maxsz	(nfs4_fattr_bitmap_maxsz + \
 				nfs4_fattr_value_maxsz)
 #define decode_getattr_maxsz    (op_decode_hdr_maxsz + nfs4_fattr_maxsz)
@@ -1172,6 +1175,16 @@ static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct c
 			   bitmask[1] & nfs4_fattr_bitmap[1], hdr);
 }
 
+static void encode_getfattr_open(struct xdr_stream *xdr, const u32 *bitmask,
+				 struct compound_hdr *hdr)
+{
+	encode_getattr_three(xdr,
+			     bitmask[0] & nfs4_fattr_bitmap[0],
+			     bitmask[1] & nfs4_fattr_bitmap[1],
+			     bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD,
+			     hdr);
+}
+
 static void encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr)
 {
 	encode_getattr_three(xdr,
@@ -2164,7 +2177,7 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr,
 	encode_putfh(xdr, args->fh, &hdr);
 	encode_open(xdr, args, &hdr);
 	encode_getfh(xdr, &hdr);
-	encode_getfattr(xdr, args->bitmask, &hdr);
+	encode_getfattr_open(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
 }
 
@@ -4186,6 +4199,110 @@ xdr_error:
 	return status;
 }
 
+static int decode_threshold_hint(struct xdr_stream *xdr,
+				  uint32_t *bitmap,
+				  uint64_t *res,
+				  uint32_t hint_bit)
+{
+	__be32 *p;
+
+	*res = 0;
+	if (likely(bitmap[0] & hint_bit)) {
+		p = xdr_inline_decode(xdr, 8);
+		if (unlikely(!p))
+			goto out_overflow;
+		xdr_decode_hyper(p, res);
+	}
+	return 0;
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
+}
+
+static int decode_first_threshold_item4(struct xdr_stream *xdr,
+					struct nfs4_threshold *res)
+{
+	__be32 *p, *savep;
+	uint32_t bitmap[3] = {0,}, attrlen;
+	int status;
+
+	/* layout type */
+	p = xdr_inline_decode(xdr, 4);
+	if (unlikely(!p)) {
+		print_overflow_msg(__func__, xdr);
+		return -EIO;
+	}
+	res->l_type = be32_to_cpup(p);
+
+	/* thi_hintset bitmap */
+	status = decode_attr_bitmap(xdr, bitmap);
+	if (status < 0)
+		goto xdr_error;
+
+	/* thi_hintlist length */
+	status = decode_attr_length(xdr, &attrlen, &savep);
+	if (status < 0)
+		goto xdr_error;
+	/* thi_hintlist */
+	status = decode_threshold_hint(xdr, bitmap, &res->rd_sz, THRESHOLD_RD);
+	if (status < 0)
+		goto xdr_error;
+	status = decode_threshold_hint(xdr, bitmap, &res->wr_sz, THRESHOLD_WR);
+	if (status < 0)
+		goto xdr_error;
+	status = decode_threshold_hint(xdr, bitmap, &res->rd_io_sz,
+				       THRESHOLD_RD_IO);
+	if (status < 0)
+		goto xdr_error;
+	status = decode_threshold_hint(xdr, bitmap, &res->wr_io_sz,
+				       THRESHOLD_WR_IO);
+	if (status < 0)
+		goto xdr_error;
+
+	status = verify_attr_len(xdr, savep, attrlen);
+	res->bm = bitmap[0];
+
+	dprintk("%s bm=0x%x rd_sz=%llu wr_sz=%llu rd_io=%llu wr_io=%llu\n",
+		 __func__, res->bm, res->rd_sz, res->wr_sz, res->rd_io_sz,
+		res->wr_io_sz);
+xdr_error:
+	dprintk("%s ret=%d!\n", __func__, status);
+	return status;
+}
+
+/*
+ * Thresholds on pNFS direct I/O vrs MDS I/O
+ */
+static int decode_attr_mdsthreshold(struct xdr_stream *xdr,
+				    uint32_t *bitmap,
+				    struct nfs4_threshold *res)
+{
+	__be32 *p;
+	int status = 0;
+	uint32_t num;
+
+	if (unlikely(bitmap[2] & (FATTR4_WORD2_MDSTHRESHOLD - 1U)))
+		return -EIO;
+	if (likely(bitmap[2] & FATTR4_WORD2_MDSTHRESHOLD)) {
+		p = xdr_inline_decode(xdr, 4);
+		if (unlikely(!p))
+			goto out_overflow;
+		num = be32_to_cpup(p);
+		if (num == 0)
+			return 0;
+		if (num > 1)
+			printk(KERN_INFO "%s: Warning: Multiple pNFS layout "
+				"drivers per filesystem not supported\n",
+				__func__);
+
+		status = decode_first_threshold_item4(xdr, res);
+	}
+	return status;
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
+}
+
 static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
 		struct nfs_fattr *fattr, struct nfs_fh *fh,
 		struct nfs4_fs_locations *fs_loc,
@@ -4292,6 +4409,10 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
 		goto xdr_error;
 	fattr->valid |= status;
 
+	status = decode_attr_mdsthreshold(xdr, bitmap, fattr->mdsthreshold);
+	if (status < 0)
+		goto xdr_error;
+
 xdr_error:
 	dprintk("%s: xdr returned %d\n", __func__, -status);
 	return status;
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 0987146b0637..72b6bada0d79 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -526,6 +526,13 @@ enum lock_type4 {
 #define FATTR4_WORD1_MOUNTED_ON_FILEID  (1UL << 23)
 #define FATTR4_WORD1_FS_LAYOUT_TYPES    (1UL << 30)
 #define FATTR4_WORD2_LAYOUT_BLKSIZE     (1UL << 1)
+#define FATTR4_WORD2_MDSTHRESHOLD       (1UL << 4)
+
+/* MDS threshold bitmap bits */
+#define THRESHOLD_RD                    (1UL << 0)
+#define THRESHOLD_WR                    (1UL << 1)
+#define THRESHOLD_RD_IO                 (1UL << 2)
+#define THRESHOLD_WR_IO                 (1UL << 3)
 
 #define NFSPROC4_NULL 0
 #define NFSPROC4_COMPOUND 1
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 0872f32c8eef..201c312152fb 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -35,6 +35,15 @@ static inline int nfs_fsid_equal(const struct nfs_fsid *a, const struct nfs_fsid
 	return a->major == b->major && a->minor == b->minor;
 }
 
+struct nfs4_threshold {
+	__u32	bm;
+	__u32	l_type;
+	__u64	rd_sz;
+	__u64	wr_sz;
+	__u64	rd_io_sz;
+	__u64	wr_io_sz;
+};
+
 struct nfs_fattr {
 	unsigned int		valid;		/* which fields are valid */
 	umode_t			mode;
@@ -67,6 +76,7 @@ struct nfs_fattr {
 	unsigned long		gencount;
 	struct nfs4_string	*owner_name;
 	struct nfs4_string	*group_name;
+	struct nfs4_threshold	*mdsthreshold;	/* pNFS threshold hints */
 };
 
 #define NFS_ATTR_FATTR_TYPE		(1U << 0)
-- 
cgit v1.3-7-g2ca7


From 82be417aa37c05116e310b0f2171187ea389f89b Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Wed, 23 May 2012 05:02:35 -0400
Subject: NFSv4.1 cache mdsthreshold values on OPEN

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c         |  2 ++
 fs/nfs/nfs4proc.c      | 38 +++++++++++++++++++++++++++++++++-----
 fs/nfs/pnfs.c          | 12 ++++++++++++
 fs/nfs/pnfs.h          | 21 +++++++++++++++++++++
 include/linux/nfs_fs.h |  1 +
 5 files changed, 69 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 9ad81ce0c40f..889f7e5e92e1 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -641,6 +641,7 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, fmode_t f
 	nfs_init_lock_context(&ctx->lock_context);
 	ctx->lock_context.open_context = ctx;
 	INIT_LIST_HEAD(&ctx->list);
+	ctx->mdsthreshold = NULL;
 	return ctx;
 }
 
@@ -669,6 +670,7 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
 		put_rpccred(ctx->cred);
 	dput(ctx->dentry);
 	nfs_sb_deactive(sb);
+	kfree(ctx->mdsthreshold);
 	kfree(ctx);
 }
 
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 8f39bb3ca1b3..e725736ff288 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1782,7 +1782,14 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct
 /*
  * Returns a referenced nfs4_state
  */
-static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
+static int _nfs4_do_open(struct inode *dir,
+			struct dentry *dentry,
+			fmode_t fmode,
+			int flags,
+			struct iattr *sattr,
+			struct rpc_cred *cred,
+			struct nfs4_state **res,
+			struct nfs4_threshold **ctx_th)
 {
 	struct nfs4_state_owner  *sp;
 	struct nfs4_state     *state = NULL;
@@ -1807,6 +1814,11 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode
 	if (opendata == NULL)
 		goto err_put_state_owner;
 
+	if (ctx_th && server->attr_bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD) {
+		opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc();
+		if (!opendata->f_attr.mdsthreshold)
+			goto err_opendata_put;
+	}
 	if (dentry->d_inode != NULL)
 		opendata->state = nfs4_get_open_state(dentry->d_inode, sp);
 
@@ -1832,11 +1844,19 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode
 			nfs_setattr_update_inode(state->inode, sattr);
 		nfs_post_op_update_inode(state->inode, opendata->o_res.f_attr);
 	}
+
+	if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server))
+		*ctx_th = opendata->f_attr.mdsthreshold;
+	else
+		kfree(opendata->f_attr.mdsthreshold);
+	opendata->f_attr.mdsthreshold = NULL;
+
 	nfs4_opendata_put(opendata);
 	nfs4_put_state_owner(sp);
 	*res = state;
 	return 0;
 err_opendata_put:
+	kfree(opendata->f_attr.mdsthreshold);
 	nfs4_opendata_put(opendata);
 err_put_state_owner:
 	nfs4_put_state_owner(sp);
@@ -1846,14 +1866,21 @@ out_err:
 }
 
 
-static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred)
+static struct nfs4_state *nfs4_do_open(struct inode *dir,
+					struct dentry *dentry,
+					fmode_t fmode,
+					int flags,
+					struct iattr *sattr,
+					struct rpc_cred *cred,
+					struct nfs4_threshold **ctx_th)
 {
 	struct nfs4_exception exception = { };
 	struct nfs4_state *res;
 	int status;
 
 	do {
-		status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred, &res);
+		status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred,
+				       &res, ctx_th);
 		if (status == 0)
 			break;
 		/* NOTE: BAD_SEQID means the server and client disagree about the
@@ -2177,7 +2204,8 @@ nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags
 	struct nfs4_state *state;
 
 	/* Protect against concurrent sillydeletes */
-	state = nfs4_do_open(dir, ctx->dentry, ctx->mode, open_flags, attr, ctx->cred);
+	state = nfs4_do_open(dir, ctx->dentry, ctx->mode, open_flags, attr,
+			     ctx->cred, &ctx->mdsthreshold);
 	if (IS_ERR(state))
 		return ERR_CAST(state);
 	ctx->state = state;
@@ -2779,7 +2807,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		fmode = ctx->mode;
 	}
 	sattr->ia_mode &= ~current_umask();
-	state = nfs4_do_open(dir, de, fmode, flags, sattr, cred);
+	state = nfs4_do_open(dir, de, fmode, flags, sattr, cred, NULL);
 	d_drop(dentry);
 	if (IS_ERR(state)) {
 		status = PTR_ERR(state);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 5d09a36b2cd8..cbcb6aea58a3 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1630,3 +1630,15 @@ out_free:
 	kfree(data);
 	goto out;
 }
+
+struct nfs4_threshold *pnfs_mdsthreshold_alloc(void)
+{
+	struct nfs4_threshold *thp;
+
+	thp = kzalloc(sizeof(*thp), GFP_NOFS);
+	if (!thp) {
+		dprintk("%s mdsthreshold allocation failed\n", __func__);
+		return NULL;
+	}
+	return thp;
+}
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 7980756b2f57..29fd23c0efdc 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -227,6 +227,7 @@ int pnfs_read_done_resend_to_mds(struct inode *inode, struct list_head *head,
 			const struct nfs_pgio_completion_ops *compl_ops);
 int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head,
 			const struct nfs_pgio_completion_ops *compl_ops);
+struct nfs4_threshold *pnfs_mdsthreshold_alloc(void);
 
 /* nfs4_deviceid_flags */
 enum {
@@ -360,6 +361,14 @@ static inline int pnfs_return_layout(struct inode *ino)
 	return 0;
 }
 
+static inline bool
+pnfs_use_threshold(struct nfs4_threshold **dst, struct nfs4_threshold *src,
+		   struct nfs_server *nfss)
+{
+	return (dst && src && src->bm != 0 &&
+					nfss->pnfs_curr_ld->id == src->l_type);
+}
+
 #ifdef NFS_DEBUG
 void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id);
 #else
@@ -485,6 +494,18 @@ static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
 	return 0;
 }
 
+static inline bool
+pnfs_use_threshold(struct nfs4_threshold **dst, struct nfs4_threshold *src,
+		   struct nfs_server *nfss)
+{
+	return false;
+}
+
+static inline struct nfs4_threshold *pnfs_mdsthreshold_alloc(void)
+{
+	return NULL;
+}
+
 #endif /* CONFIG_NFS_V4_1 */
 
 #endif /* FS_NFS_PNFS_H */
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 80a9385b88ab..ce910cb7d761 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -102,6 +102,7 @@ struct nfs_open_context {
 	int error;
 
 	struct list_head list;
+	struct nfs4_threshold	*mdsthreshold;
 };
 
 struct nfs_open_dir_context {
-- 
cgit v1.3-7-g2ca7


From 2701d086dbfca03b2d28b25c6dc11dd78d0e26ad Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Thu, 24 May 2012 13:13:24 -0400
Subject: NFSv4.1 add nfs_inode book keeping for mdsthreshold

Keep track of the number of bytes read or written via buffered, direct, and
mem-mapped i/o for use by mdsthreshold size_io hints.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c        | 2 ++
 fs/nfs/file.c          | 1 +
 fs/nfs/inode.c         | 2 ++
 fs/nfs/pnfs.c          | 3 +++
 fs/nfs/read.c          | 2 ++
 include/linux/nfs_fs.h | 3 +++
 6 files changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index c47a46eaf905..23d170bc44f4 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -447,6 +447,7 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,
 	result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos);
 	if (!result)
 		result = nfs_direct_wait(dreq);
+	NFS_I(inode)->read_io += result;
 out_release:
 	nfs_direct_req_release(dreq);
 out:
@@ -785,6 +786,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
 		pos += vec->iov_len;
 	}
 	nfs_pageio_complete(&desc);
+	NFS_I(dreq->inode)->write_io += desc.pg_bytes_written;
 
 	/*
 	 * If no bytes were started, return the error, and let the
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 8eda8a6644c3..56311ca5f9f8 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -424,6 +424,7 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
 
 	if (status < 0)
 		return status;
+	NFS_I(mapping->host)->write_io += copied;
 	return copied;
 }
 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 889f7e5e92e1..a6f5fbbe9b9c 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -323,6 +323,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 		inode->i_gid = -2;
 		inode->i_blocks = 0;
 		memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
+		nfsi->write_io = 0;
+		nfsi->read_io = 0;
 
 		nfsi->read_cache_jiffies = fattr->time_start;
 		nfsi->attr_gencount = fattr->gencount;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index cbcb6aea58a3..6620606f2687 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -395,6 +395,9 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
 	dprintk("%s:Begin lo %p\n", __func__, lo);
 
 	if (list_empty(&lo->plh_segs)) {
+		/* Reset MDS Threshold I/O counters */
+		NFS_I(lo->plh_inode)->write_io = 0;
+		NFS_I(lo->plh_inode)->read_io = 0;
 		if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags))
 			put_layout_hdr_locked(lo);
 		return 0;
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 2cfdd7785411..86ced7836214 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -152,6 +152,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
 	nfs_pageio_init_read(&pgio, inode, &nfs_async_read_completion_ops);
 	nfs_pageio_add_request(&pgio, new);
 	nfs_pageio_complete(&pgio);
+	NFS_I(inode)->read_io += pgio.pg_bytes_written;
 	return 0;
 }
 
@@ -656,6 +657,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
 	ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
 
 	nfs_pageio_complete(&pgio);
+	NFS_I(inode)->read_io += pgio.pg_bytes_written;
 	npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 	nfs_add_stats(inode, NFSIOS_READPAGES, npages);
 read_complete:
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index ce910cb7d761..b23cfc120edb 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -202,6 +202,9 @@ struct nfs_inode {
 	/* pNFS layout information */
 	struct pnfs_layout_hdr *layout;
 #endif /* CONFIG_NFS_V4*/
+	/* how many bytes have been written/read and how many bytes queued up */
+	__u64 write_io;
+	__u64 read_io;
 #ifdef CONFIG_NFS_FSCACHE
 	struct fscache_cookie	*fscache;
 #endif
-- 
cgit v1.3-7-g2ca7


From 7c44f1ae4a21458a1ea3d6482ffb3136f1df6d2b Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@netapp.com>
Date: Thu, 24 May 2012 13:22:50 -0400
Subject: nfs4.1: add BIND_CONN_TO_SESSION operation

This patch adds the BIND_CONN_TO_SESSION operation which is needed for
upcoming SP4_MACH_CRED work and useful for recovering from broken connections
without destroying the session.

Signed-off-by: Weston Andros Adamson <dros@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4_fs.h        |  1 +
 fs/nfs/nfs4proc.c       | 54 ++++++++++++++++++++++++++++
 fs/nfs/nfs4xdr.c        | 95 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/nfs4.h    |  5 +++
 include/linux/nfs_xdr.h |  6 ++++
 5 files changed, 161 insertions(+)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index e6da02124c4e..2c7f1cf85b8f 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -211,6 +211,7 @@ struct vfsmount *nfs4_submount(struct nfs_server *, struct dentry *,
 extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *);
 extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *);
 extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
+extern int nfs4_proc_bind_conn_to_session(struct nfs_client *);
 extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred);
 extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
 extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index e725736ff288..e8988c000e7f 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5099,6 +5099,60 @@ nfs41_same_server_scope(struct nfs41_server_scope *a,
 	return false;
 }
 
+/*
+ * nfs4_proc_bind_conn_to_session()
+ *
+ * The 4.1 client currently uses the same TCP connection for the
+ * fore and backchannel.
+ */
+int nfs4_proc_bind_conn_to_session(struct nfs_client *clp)
+{
+	int status;
+	struct nfs41_bind_conn_to_session_res res;
+	struct rpc_message msg = {
+		.rpc_proc =
+			&nfs4_procedures[NFSPROC4_CLNT_BIND_CONN_TO_SESSION],
+		.rpc_argp = clp,
+		.rpc_resp = &res,
+	};
+
+	dprintk("--> %s\n", __func__);
+	BUG_ON(clp == NULL);
+
+	res.session = kzalloc(sizeof(struct nfs4_session), GFP_NOFS);
+	if (unlikely(res.session == NULL)) {
+		status = -ENOMEM;
+		goto out;
+	}
+
+	status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
+	if (status == 0) {
+		if (memcmp(res.session->sess_id.data,
+		    clp->cl_session->sess_id.data, NFS4_MAX_SESSIONID_LEN)) {
+			dprintk("NFS: %s: Session ID mismatch\n", __func__);
+			status = -EIO;
+			goto out_session;
+		}
+		if (res.dir != NFS4_CDFS4_BOTH) {
+			dprintk("NFS: %s: Unexpected direction from server\n",
+				__func__);
+			status = -EIO;
+			goto out_session;
+		}
+		if (res.use_conn_in_rdma_mode) {
+			dprintk("NFS: %s: Server returned RDMA mode = true\n",
+				__func__);
+			status = -EIO;
+			goto out_session;
+		}
+	}
+out_session:
+	kfree(res.session);
+out:
+	dprintk("<-- %s status= %d\n", __func__, status);
+	return status;
+}
+
 /*
  * nfs4_proc_exchange_id()
  *
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index edb8ac7fce0e..a6b95b766220 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -326,6 +326,16 @@ static int nfs4_stat_to_errno(int);
 				     1 /* csr_flags */ + \
 				     decode_channel_attrs_maxsz + \
 				     decode_channel_attrs_maxsz)
+#define encode_bind_conn_to_session_maxsz  (op_encode_hdr_maxsz + \
+				     /* bctsa_sessid */ \
+				     XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \
+				     1 /* bctsa_dir */ + \
+				     1 /* bctsa_use_conn_in_rdma_mode */)
+#define decode_bind_conn_to_session_maxsz  (op_decode_hdr_maxsz +	\
+				     /* bctsr_sessid */ \
+				     XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \
+				     1 /* bctsr_dir */ + \
+				     1 /* bctsr_use_conn_in_rdma_mode */)
 #define encode_destroy_session_maxsz    (op_encode_hdr_maxsz + 4)
 #define decode_destroy_session_maxsz    (op_decode_hdr_maxsz)
 #define encode_sequence_maxsz	(op_encode_hdr_maxsz + \
@@ -719,6 +729,12 @@ static int nfs4_stat_to_errno(int);
 				decode_putfh_maxsz + \
 				decode_secinfo_maxsz)
 #if defined(CONFIG_NFS_V4_1)
+#define NFS4_enc_bind_conn_to_session_sz \
+				(compound_encode_hdr_maxsz + \
+				 encode_bind_conn_to_session_maxsz)
+#define NFS4_dec_bind_conn_to_session_sz \
+				(compound_decode_hdr_maxsz + \
+				 decode_bind_conn_to_session_maxsz)
 #define NFS4_enc_exchange_id_sz \
 				(compound_encode_hdr_maxsz + \
 				 encode_exchange_id_maxsz)
@@ -1669,6 +1685,20 @@ static void encode_secinfo(struct xdr_stream *xdr, const struct qstr *name, stru
 
 #if defined(CONFIG_NFS_V4_1)
 /* NFSv4.1 operations */
+static void encode_bind_conn_to_session(struct xdr_stream *xdr,
+				   struct nfs4_session *session,
+				   struct compound_hdr *hdr)
+{
+	__be32 *p;
+
+	encode_op_hdr(xdr, OP_BIND_CONN_TO_SESSION,
+		decode_bind_conn_to_session_maxsz, hdr);
+	encode_opaque_fixed(xdr, session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
+	p = xdr_reserve_space(xdr, 8);
+	*p++ = cpu_to_be32(NFS4_CDFC4_BACK_OR_BOTH);
+	*p = 0;	/* use_conn_in_rdma_mode = False */
+}
+
 static void encode_exchange_id(struct xdr_stream *xdr,
 			       struct nfs41_exchange_id_args *args,
 			       struct compound_hdr *hdr)
@@ -2629,6 +2659,22 @@ static void nfs4_xdr_enc_secinfo(struct rpc_rqst *req,
 }
 
 #if defined(CONFIG_NFS_V4_1)
+/*
+ * BIND_CONN_TO_SESSION request
+ */
+static void nfs4_xdr_enc_bind_conn_to_session(struct rpc_rqst *req,
+				struct xdr_stream *xdr,
+				struct nfs_client *clp)
+{
+	struct compound_hdr hdr = {
+		.minorversion = clp->cl_mvops->minor_version,
+	};
+
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_bind_conn_to_session(xdr, clp->cl_session, &hdr);
+	encode_nops(&hdr);
+}
+
 /*
  * EXCHANGE_ID request
  */
@@ -5366,6 +5412,37 @@ static int decode_sessionid(struct xdr_stream *xdr, struct nfs4_sessionid *sid)
 	return decode_opaque_fixed(xdr, sid->data, NFS4_MAX_SESSIONID_LEN);
 }
 
+static int decode_bind_conn_to_session(struct xdr_stream *xdr,
+				struct nfs41_bind_conn_to_session_res *res)
+{
+	__be32 *p;
+	int status;
+
+	status = decode_op_hdr(xdr, OP_BIND_CONN_TO_SESSION);
+	if (!status)
+		status = decode_sessionid(xdr, &res->session->sess_id);
+	if (unlikely(status))
+		return status;
+
+	/* dir flags, rdma mode bool */
+	p = xdr_inline_decode(xdr, 8);
+	if (unlikely(!p))
+		goto out_overflow;
+
+	res->dir = be32_to_cpup(p++);
+	if (res->dir == 0 || res->dir > NFS4_CDFS4_BOTH)
+		return -EIO;
+	if (be32_to_cpup(p) == 0)
+		res->use_conn_in_rdma_mode = false;
+	else
+		res->use_conn_in_rdma_mode = true;
+
+	return 0;
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
+}
+
 static int decode_create_session(struct xdr_stream *xdr,
 				 struct nfs41_create_session_res *res)
 {
@@ -6647,6 +6724,22 @@ out:
 }
 
 #if defined(CONFIG_NFS_V4_1)
+/*
+ * Decode BIND_CONN_TO_SESSION response
+ */
+static int nfs4_xdr_dec_bind_conn_to_session(struct rpc_rqst *rqstp,
+					struct xdr_stream *xdr,
+					void *res)
+{
+	struct compound_hdr hdr;
+	int status;
+
+	status = decode_compound_hdr(xdr, &hdr);
+	if (!status)
+		status = decode_bind_conn_to_session(xdr, res);
+	return status;
+}
+
 /*
  * Decode EXCHANGE_ID response
  */
@@ -7128,6 +7221,8 @@ struct rpc_procinfo	nfs4_procedures[] = {
 	PROC(RELEASE_LOCKOWNER,	enc_release_lockowner,	dec_release_lockowner),
 	PROC(SECINFO,		enc_secinfo,		dec_secinfo),
 #if defined(CONFIG_NFS_V4_1)
+	PROC(BIND_CONN_TO_SESSION,
+			enc_bind_conn_to_session, dec_bind_conn_to_session),
 	PROC(EXCHANGE_ID,	enc_exchange_id,	dec_exchange_id),
 	PROC(CREATE_SESSION,	enc_create_session,	dec_create_session),
 	PROC(DESTROY_SESSION,	enc_destroy_session,	dec_destroy_session),
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 72b6bada0d79..a2b71cbfc44a 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -69,6 +69,10 @@
 #define NFS4_CDFC4_FORE_OR_BOTH 0x3
 #define NFS4_CDFC4_BACK_OR_BOTH 0x7
 
+#define NFS4_CDFS4_FORE 0x1
+#define NFS4_CDFS4_BACK 0x2
+#define NFS4_CDFS4_BOTH 0x3
+
 #define NFS4_SET_TO_SERVER_TIME	0
 #define NFS4_SET_TO_CLIENT_TIME	1
 
@@ -589,6 +593,7 @@ enum {
 	NFSPROC4_CLNT_SECINFO,
 
 	/* nfs41 */
+	NFSPROC4_CLNT_BIND_CONN_TO_SESSION,
 	NFSPROC4_CLNT_EXCHANGE_ID,
 	NFSPROC4_CLNT_CREATE_SESSION,
 	NFSPROC4_CLNT_DESTROY_SESSION,
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 201c312152fb..6387fc0097fe 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1125,6 +1125,12 @@ struct nfs41_impl_id {
 	struct nfstime4			date;
 };
 
+struct nfs41_bind_conn_to_session_res {
+	struct nfs4_session		*session;
+	u32				dir;
+	bool				use_conn_in_rdma_mode;
+};
+
 struct nfs41_exchange_id_res {
 	struct nfs_client		*client;
 	u32				flags;
-- 
cgit v1.3-7-g2ca7


From 4c78513e457f72d5554a0f6e2eabfad7b98e4f19 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Tue, 24 Apr 2012 14:38:52 +0530
Subject: dma-buf: mmap support

Compared to Rob Clark's RFC I've ditched the prepare/finish hooks
and corresponding ioctls on the dma_buf file. The major reason for
that is that many people seem to be under the impression that this is
also for synchronization with outstanding asynchronous processsing.
I'm pretty massively opposed to this because:

- It boils down reinventing a new rather general-purpose userspace
  synchronization interface. If we look at things like futexes, this
  is hard to get right.
- Furthermore a lot of kernel code has to interact with this
  synchronization primitive. This smells a look like the dri1 hw_lock,
  a horror show I prefer not to reinvent.
- Even more fun is that multiple different subsystems would interact
  here, so we have plenty of opportunities to create funny deadlock
  scenarios.

I think synchronization is a wholesale different problem from data
sharing and should be tackled as an orthogonal problem.

Now we could demand that prepare/finish may only ensure cache
coherency (as Rob intended), but that runs up into the next problem:
We not only need mmap support to facilitate sw-only processing nodes
in a pipeline (without jumping through hoops by importing the dma_buf
into some sw-access only importer), which allows for a nicer
ION->dma-buf upgrade path for existing Android userspace. We also need
mmap support for existing importing subsystems to support existing
userspace libraries. And a loot of these subsystems are expected to
export coherent userspace mappings.

So prepare/finish can only ever be optional and the exporter /needs/
to support coherent mappings. Given that mmap access is always
somewhat fallback-y in nature I've decided to drop this optimization,
instead of just making it optional. If we demonstrate a clear need for
this, supported by benchmark results, we can always add it in again
later as an optional extension.

Other differences compared to Rob's RFC is the above mentioned support
for mapping a dma-buf through facilities provided by the importer.
Which results in mmap support no longer being optional.

Note that this dma-buf mmap patch does _not_ support every possible
insanity an existing subsystem could pull of with mmap: Because it
does not allow to intercept pagefaults and shoot down ptes importing
subsystems can't add some magic of their own at these points (e.g. to
automatically synchronize with outstanding rendering or set up some
special resources). I've done a cursory read through a few mmap
implementions of various subsytems and I'm hopeful that we can avoid
this (and the complexity it'd bring with it).

Additonally I've extended the documentation a bit to explain the hows
and whys of this mmap extension.

In case we ever want to add support for explicitly cache maneged
userspace mmap with a prepare/finish ioctl pair, we could specify that
userspace needs to mmap a different part of the dma_buf, e.g. the
range starting at dma_buf->size up to dma_buf->size*2. This works
because the size of a dma_buf is invariant over it's lifetime. The
exporter would obviously need to fall back to coherent mappings for
both ranges if a legacy clients maps the coherent range and the
architecture cannot suppor conflicting caching policies. Also, this
would obviously be optional and userspace needs to be able to fall
back to coherent mappings.

v2:
- Spelling fixes from Rob Clark.
- Compile fix for !DMA_BUF from Rob Clark.
- Extend commit message to explain how explicitly cache managed mmap
  support could be added later.
- Extend the documentation with implementations notes for exporters
  that need to manually fake coherency.

v3:
- dma_buf pointer initialization goof-up noticed by Rebecca Schultz
  Zavin.

Cc: Rob Clark <rob.clark@linaro.org>
Cc: Rebecca Schultz Zavin <rebecca@android.com>
Acked-by: Rob Clark <rob.clark@linaro.org>
Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Sumit Semwal <sumit.semwal@linaro.org>
---
 Documentation/dma-buf-sharing.txt | 98 ++++++++++++++++++++++++++++++++++++---
 drivers/base/dma-buf.c            | 64 ++++++++++++++++++++++++-
 include/linux/dma-buf.h           | 16 +++++++
 3 files changed, 170 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/dma-buf-sharing.txt b/Documentation/dma-buf-sharing.txt
index 3bbd5c51605a..5ff4d2b84f72 100644
--- a/Documentation/dma-buf-sharing.txt
+++ b/Documentation/dma-buf-sharing.txt
@@ -29,13 +29,6 @@ The buffer-user
    in memory, mapped into its own address space, so it can access the same area
    of memory.
 
-*IMPORTANT*: [see https://lkml.org/lkml/2011/12/20/211 for more details]
-For this first version, A buffer shared using the dma_buf sharing API:
-- *may* be exported to user space using "mmap" *ONLY* by exporter, outside of
-  this framework.
-- with this new iteration of the dma-buf api cpu access from the kernel has been
-  enable, see below for the details.
-
 dma-buf operations for device dma only
 --------------------------------------
 
@@ -313,6 +306,83 @@ Access to a dma_buf from the kernel context involves three steps:
 				  enum dma_data_direction dir);
 
 
+Direct Userspace Access/mmap Support
+------------------------------------
+
+Being able to mmap an export dma-buf buffer object has 2 main use-cases:
+- CPU fallback processing in a pipeline and
+- supporting existing mmap interfaces in importers.
+
+1. CPU fallback processing in a pipeline
+
+   In many processing pipelines it is sometimes required that the cpu can access
+   the data in a dma-buf (e.g. for thumbnail creation, snapshots, ...). To avoid
+   the need to handle this specially in userspace frameworks for buffer sharing
+   it's ideal if the dma_buf fd itself can be used to access the backing storage
+   from userspace using mmap.
+
+   Furthermore Android's ION framework already supports this (and is otherwise
+   rather similar to dma-buf from a userspace consumer side with using fds as
+   handles, too). So it's beneficial to support this in a similar fashion on
+   dma-buf to have a good transition path for existing Android userspace.
+
+   No special interfaces, userspace simply calls mmap on the dma-buf fd.
+
+2. Supporting existing mmap interfaces in exporters
+
+   Similar to the motivation for kernel cpu access it is again important that
+   the userspace code of a given importing subsystem can use the same interfaces
+   with a imported dma-buf buffer object as with a native buffer object. This is
+   especially important for drm where the userspace part of contemporary OpenGL,
+   X, and other drivers is huge, and reworking them to use a different way to
+   mmap a buffer rather invasive.
+
+   The assumption in the current dma-buf interfaces is that redirecting the
+   initial mmap is all that's needed. A survey of some of the existing
+   subsystems shows that no driver seems to do any nefarious thing like syncing
+   up with outstanding asynchronous processing on the device or allocating
+   special resources at fault time. So hopefully this is good enough, since
+   adding interfaces to intercept pagefaults and allow pte shootdowns would
+   increase the complexity quite a bit.
+
+   Interface:
+      int dma_buf_mmap(struct dma_buf *, struct vm_area_struct *,
+		       unsigned long);
+
+   If the importing subsystem simply provides a special-purpose mmap call to set
+   up a mapping in userspace, calling do_mmap with dma_buf->file will equally
+   achieve that for a dma-buf object.
+
+3. Implementation notes for exporters
+
+   Because dma-buf buffers have invariant size over their lifetime, the dma-buf
+   core checks whether a vma is too large and rejects such mappings. The
+   exporter hence does not need to duplicate this check.
+
+   Because existing importing subsystems might presume coherent mappings for
+   userspace, the exporter needs to set up a coherent mapping. If that's not
+   possible, it needs to fake coherency by manually shooting down ptes when
+   leaving the cpu domain and flushing caches at fault time. Note that all the
+   dma_buf files share the same anon inode, hence the exporter needs to replace
+   the dma_buf file stored in vma->vm_file with it's own if pte shootdown is
+   requred. This is because the kernel uses the underlying inode's address_space
+   for vma tracking (and hence pte tracking at shootdown time with
+   unmap_mapping_range).
+
+   If the above shootdown dance turns out to be too expensive in certain
+   scenarios, we can extend dma-buf with a more explicit cache tracking scheme
+   for userspace mappings. But the current assumption is that using mmap is
+   always a slower path, so some inefficiencies should be acceptable.
+
+   Exporters that shoot down mappings (for any reasons) shall not do any
+   synchronization at fault time with outstanding device operations.
+   Synchronization is an orthogonal issue to sharing the backing storage of a
+   buffer and hence should not be handled by dma-buf itself. This is explictly
+   mentioned here because many people seem to want something like this, but if
+   different exporters handle this differently, buffer sharing can fail in
+   interesting ways depending upong the exporter (if userspace starts depending
+   upon this implicit synchronization).
+
 Miscellaneous notes
 -------------------
 
@@ -336,6 +406,20 @@ Miscellaneous notes
   the exporting driver to create a dmabuf fd must provide a way to let
   userspace control setting of O_CLOEXEC flag passed in to dma_buf_fd().
 
+- If an exporter needs to manually flush caches and hence needs to fake
+  coherency for mmap support, it needs to be able to zap all the ptes pointing
+  at the backing storage. Now linux mm needs a struct address_space associated
+  with the struct file stored in vma->vm_file to do that with the function
+  unmap_mapping_range. But the dma_buf framework only backs every dma_buf fd
+  with the anon_file struct file, i.e. all dma_bufs share the same file.
+
+  Hence exporters need to setup their own file (and address_space) association
+  by setting vma->vm_file and adjusting vma->vm_pgoff in the dma_buf mmap
+  callback. In the specific case of a gem driver the exporter could use the
+  shmem file already provided by gem (and set vm_pgoff = 0). Exporters can then
+  zap ptes by unmapping the corresponding range of the struct address_space
+  associated with their own file.
+
 References:
 [1] struct dma_buf_ops in include/linux/dma-buf.h
 [2] All interfaces mentioned above defined in include/linux/dma-buf.h
diff --git a/drivers/base/dma-buf.c b/drivers/base/dma-buf.c
index 07cbbc6fddb4..7cfb405b1ad5 100644
--- a/drivers/base/dma-buf.c
+++ b/drivers/base/dma-buf.c
@@ -44,8 +44,26 @@ static int dma_buf_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
+static int dma_buf_mmap_internal(struct file *file, struct vm_area_struct *vma)
+{
+	struct dma_buf *dmabuf;
+
+	if (!is_dma_buf_file(file))
+		return -EINVAL;
+
+	dmabuf = file->private_data;
+
+	/* check for overflowing the buffer's size */
+	if (vma->vm_pgoff + ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) >
+	    dmabuf->size >> PAGE_SHIFT)
+		return -EINVAL;
+
+	return dmabuf->ops->mmap(dmabuf, vma);
+}
+
 static const struct file_operations dma_buf_fops = {
 	.release	= dma_buf_release,
+	.mmap		= dma_buf_mmap_internal,
 };
 
 /*
@@ -82,7 +100,8 @@ struct dma_buf *dma_buf_export(void *priv, const struct dma_buf_ops *ops,
 			  || !ops->unmap_dma_buf
 			  || !ops->release
 			  || !ops->kmap_atomic
-			  || !ops->kmap)) {
+			  || !ops->kmap
+			  || !ops->mmap)) {
 		return ERR_PTR(-EINVAL);
 	}
 
@@ -406,3 +425,46 @@ void dma_buf_kunmap(struct dma_buf *dmabuf, unsigned long page_num,
 		dmabuf->ops->kunmap(dmabuf, page_num, vaddr);
 }
 EXPORT_SYMBOL_GPL(dma_buf_kunmap);
+
+
+/**
+ * dma_buf_mmap - Setup up a userspace mmap with the given vma
+ * @dma_buf:	[in]	buffer that should back the vma
+ * @vma:	[in]	vma for the mmap
+ * @pgoff:	[in]	offset in pages where this mmap should start within the
+ * 			dma-buf buffer.
+ *
+ * This function adjusts the passed in vma so that it points at the file of the
+ * dma_buf operation. It alsog adjusts the starting pgoff and does bounds
+ * checking on the size of the vma. Then it calls the exporters mmap function to
+ * set up the mapping.
+ *
+ * Can return negative error values, returns 0 on success.
+ */
+int dma_buf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma,
+		 unsigned long pgoff)
+{
+	if (WARN_ON(!dmabuf || !vma))
+		return -EINVAL;
+
+	/* check for offset overflow */
+	if (pgoff + ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) < pgoff)
+		return -EOVERFLOW;
+
+	/* check for overflowing the buffer's size */
+	if (pgoff + ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) >
+	    dmabuf->size >> PAGE_SHIFT)
+		return -EINVAL;
+
+	/* readjust the vma */
+	if (vma->vm_file)
+		fput(vma->vm_file);
+
+	vma->vm_file = dmabuf->file;
+	get_file(vma->vm_file);
+
+	vma->vm_pgoff = pgoff;
+
+	return dmabuf->ops->mmap(dmabuf, vma);
+}
+EXPORT_SYMBOL_GPL(dma_buf_mmap);
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 3efbfc2145c3..1f78d1594cc7 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -61,6 +61,10 @@ struct dma_buf_attachment;
  * 		   This Callback must not sleep.
  * @kmap: maps a page from the buffer into kernel address space.
  * @kunmap: [optional] unmaps a page from the buffer.
+ * @mmap: used to expose the backing storage to userspace. Note that the
+ * 	  mapping needs to be coherent - if the exporter doesn't directly
+ * 	  support this, it needs to fake coherency by shooting down any ptes
+ * 	  when transitioning away from the cpu domain.
  */
 struct dma_buf_ops {
 	int (*attach)(struct dma_buf *, struct device *,
@@ -92,6 +96,8 @@ struct dma_buf_ops {
 	void (*kunmap_atomic)(struct dma_buf *, unsigned long, void *);
 	void *(*kmap)(struct dma_buf *, unsigned long);
 	void (*kunmap)(struct dma_buf *, unsigned long, void *);
+
+	int (*mmap)(struct dma_buf *, struct vm_area_struct *vma);
 };
 
 /**
@@ -167,6 +173,9 @@ void *dma_buf_kmap_atomic(struct dma_buf *, unsigned long);
 void dma_buf_kunmap_atomic(struct dma_buf *, unsigned long, void *);
 void *dma_buf_kmap(struct dma_buf *, unsigned long);
 void dma_buf_kunmap(struct dma_buf *, unsigned long, void *);
+
+int dma_buf_mmap(struct dma_buf *, struct vm_area_struct *,
+		 unsigned long);
 #else
 
 static inline struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
@@ -248,6 +257,13 @@ static inline void dma_buf_kunmap(struct dma_buf *dmabuf,
 				  unsigned long pnum, void *vaddr)
 {
 }
+
+static inline int dma_buf_mmap(struct dma_buf *dmabuf,
+			       struct vm_area_struct *vma,
+			       unsigned long pgoff)
+{
+	return -ENODEV;
+}
 #endif /* CONFIG_DMA_SHARED_BUFFER */
 
 #endif /* __DMA_BUF_H__ */
-- 
cgit v1.3-7-g2ca7


From 98f86c9e4ae3205e4c85c535691a5d36426360ee Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Sun, 20 May 2012 12:33:56 +0530
Subject: dma-buf: add vmap interface

The main requirement I have for this interface is for scanning out
using the USB gpu devices. Since these devices have to read the
framebuffer on updates and linearly compress it, using kmaps
is a major overhead for every update.

v2: fix warn issues pointed out by Sylwester Nawrocki.

v3: fix compile !CONFIG_DMA_SHARED_BUFFER and add _GPL for now

Signed-off-by: Dave Airlie <airlied@redhat.com>
Reviewed-by: Rob Clark <rob.clark@linaro.org>
Signed-off-by: Sumit Semwal <sumit.semwal@linaro.org>
---
 drivers/base/dma-buf.c  | 34 ++++++++++++++++++++++++++++++++++
 include/linux/dma-buf.h | 14 ++++++++++++++
 2 files changed, 48 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/dma-buf.c b/drivers/base/dma-buf.c
index 7cfb405b1ad5..d43d80256fda 100644
--- a/drivers/base/dma-buf.c
+++ b/drivers/base/dma-buf.c
@@ -468,3 +468,37 @@ int dma_buf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma,
 	return dmabuf->ops->mmap(dmabuf, vma);
 }
 EXPORT_SYMBOL_GPL(dma_buf_mmap);
+
+/**
+ * dma_buf_vmap - Create virtual mapping for the buffer object into kernel address space. Same restrictions as for vmap and friends apply.
+ * @dma_buf:	[in]	buffer to vmap
+ *
+ * This call may fail due to lack of virtual mapping address space.
+ * These calls are optional in drivers. The intended use for them
+ * is for mapping objects linear in kernel space for high use objects.
+ * Please attempt to use kmap/kunmap before thinking about these interfaces.
+ */
+void *dma_buf_vmap(struct dma_buf *dmabuf)
+{
+	if (WARN_ON(!dmabuf))
+		return NULL;
+
+	if (dmabuf->ops->vmap)
+		return dmabuf->ops->vmap(dmabuf);
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(dma_buf_vmap);
+
+/**
+ * dma_buf_vunmap - Unmap a vmap obtained by dma_buf_vmap.
+ * @dma_buf:	[in]	buffer to vmap
+ */
+void dma_buf_vunmap(struct dma_buf *dmabuf, void *vaddr)
+{
+	if (WARN_ON(!dmabuf))
+		return;
+
+	if (dmabuf->ops->vunmap)
+		dmabuf->ops->vunmap(dmabuf, vaddr);
+}
+EXPORT_SYMBOL_GPL(dma_buf_vunmap);
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 1f78d1594cc7..a02b1ff6488e 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -98,6 +98,9 @@ struct dma_buf_ops {
 	void (*kunmap)(struct dma_buf *, unsigned long, void *);
 
 	int (*mmap)(struct dma_buf *, struct vm_area_struct *vma);
+
+	void *(*vmap)(struct dma_buf *);
+	void (*vunmap)(struct dma_buf *, void *vaddr);
 };
 
 /**
@@ -176,6 +179,8 @@ void dma_buf_kunmap(struct dma_buf *, unsigned long, void *);
 
 int dma_buf_mmap(struct dma_buf *, struct vm_area_struct *,
 		 unsigned long);
+void *dma_buf_vmap(struct dma_buf *);
+void dma_buf_vunmap(struct dma_buf *, void *vaddr);
 #else
 
 static inline struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
@@ -264,6 +269,15 @@ static inline int dma_buf_mmap(struct dma_buf *dmabuf,
 {
 	return -ENODEV;
 }
+
+static inline void *dma_buf_vmap(struct dma_buf *dmabuf)
+{
+	return NULL;
+}
+
+static inline void dma_buf_vunmap(struct dma_buf *dmabuf, void *vaddr)
+{
+}
 #endif /* CONFIG_DMA_SHARED_BUFFER */
 
 #endif /* __DMA_BUF_H__ */
-- 
cgit v1.3-7-g2ca7


From 12c4727e1d5370270a7df781d2ba0a76e05c1137 Mon Sep 17 00:00:00 2001
From: Sumit Semwal <sumit.semwal@ti.com>
Date: Wed, 23 May 2012 15:27:40 +0530
Subject: dma-buf: minor documentation fixes.

Some minor inline documentation fixes for gaps resulting from new patches.

Signed-off-by: Sumit Semwal <sumit.semwal@ti.com>
Signed-off-by: Sumit Semwal <sumit.semwal@linaro.org>
---
 drivers/base/dma-buf.c  | 9 +++++----
 include/linux/dma-buf.h | 3 +++
 2 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/dma-buf.c b/drivers/base/dma-buf.c
index d43d80256fda..20258e179211 100644
--- a/drivers/base/dma-buf.c
+++ b/drivers/base/dma-buf.c
@@ -429,7 +429,7 @@ EXPORT_SYMBOL_GPL(dma_buf_kunmap);
 
 /**
  * dma_buf_mmap - Setup up a userspace mmap with the given vma
- * @dma_buf:	[in]	buffer that should back the vma
+ * @dmabuf:	[in]	buffer that should back the vma
  * @vma:	[in]	vma for the mmap
  * @pgoff:	[in]	offset in pages where this mmap should start within the
  * 			dma-buf buffer.
@@ -470,8 +470,9 @@ int dma_buf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma,
 EXPORT_SYMBOL_GPL(dma_buf_mmap);
 
 /**
- * dma_buf_vmap - Create virtual mapping for the buffer object into kernel address space. Same restrictions as for vmap and friends apply.
- * @dma_buf:	[in]	buffer to vmap
+ * dma_buf_vmap - Create virtual mapping for the buffer object into kernel
+ * address space. Same restrictions as for vmap and friends apply.
+ * @dmabuf:	[in]	buffer to vmap
  *
  * This call may fail due to lack of virtual mapping address space.
  * These calls are optional in drivers. The intended use for them
@@ -491,7 +492,7 @@ EXPORT_SYMBOL_GPL(dma_buf_vmap);
 
 /**
  * dma_buf_vunmap - Unmap a vmap obtained by dma_buf_vmap.
- * @dma_buf:	[in]	buffer to vmap
+ * @dmabuf:	[in]	buffer to vunmap
  */
 void dma_buf_vunmap(struct dma_buf *dmabuf, void *vaddr)
 {
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index a02b1ff6488e..eb48f3816df9 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -65,6 +65,9 @@ struct dma_buf_attachment;
  * 	  mapping needs to be coherent - if the exporter doesn't directly
  * 	  support this, it needs to fake coherency by shooting down any ptes
  * 	  when transitioning away from the cpu domain.
+ * @vmap: [optional] creates a virtual mapping for the buffer into kernel
+ *	  address space. Same restrictions as for vmap and friends apply.
+ * @vunmap: [optional] unmaps a vmap from the buffer
  */
 struct dma_buf_ops {
 	int (*attach)(struct dma_buf *, struct device *,
-- 
cgit v1.3-7-g2ca7


From d9ed9faac283a3be73f0e11a2ef49ee55aece4db Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Sun, 1 Apr 2012 14:01:34 -0400
Subject: mm: add new arch_make_huge_pte() method for tile support

The tile support for multiple-size huge pages requires tagging
the hugetlb PTE with a "super" bit for PTEs that are multiples of
the basic size of a pagetable span.  To set that bit properly
we need to tweak the PTe in make_huge_pte() based on the vma.

This change provides the API for a subsequent tile-specific
change to use.

Reviewed-by: Hillf Danton <dhillf@gmail.com>
Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 include/linux/hugetlb.h | 8 ++++++++
 mm/hugetlb.c            | 1 +
 2 files changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 000837e126e6..d5d6bbe2259e 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -284,6 +284,14 @@ static inline unsigned int blocks_per_huge_page(struct hstate *h)
 
 #include <asm/hugetlb.h>
 
+#ifndef arch_make_huge_pte
+static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
+				       struct page *page, int writable)
+{
+	return entry;
+}
+#endif
+
 static inline struct hstate *page_hstate(struct page *page)
 {
 	return size_to_hstate(PAGE_SIZE << compound_order(page));
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index ae8f708e3d75..4e28416c47fb 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2213,6 +2213,7 @@ static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page,
 	}
 	entry = pte_mkyoung(entry);
 	entry = pte_mkhuge(entry);
+	entry = arch_make_huge_pte(entry, vma, page, writable);
 
 	return entry;
 }
-- 
cgit v1.3-7-g2ca7


From ad24ecfbcddfa88541bccc980e753aeda8bf4031 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 25 May 2012 17:11:42 -0400
Subject: NFSv4.1: Move NFSPROC4_CLNT_BIND_CONN_TO_SESSION to the end of the
 operations

For backward compatibility with nfs-utils.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Weston Andros Adamson <dros@netapp.com>
---
 fs/nfs/nfs4xdr.c     | 4 ++--
 include/linux/nfs4.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index a6b95b766220..1d4d259c5b3c 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -7221,8 +7221,6 @@ struct rpc_procinfo	nfs4_procedures[] = {
 	PROC(RELEASE_LOCKOWNER,	enc_release_lockowner,	dec_release_lockowner),
 	PROC(SECINFO,		enc_secinfo,		dec_secinfo),
 #if defined(CONFIG_NFS_V4_1)
-	PROC(BIND_CONN_TO_SESSION,
-			enc_bind_conn_to_session, dec_bind_conn_to_session),
 	PROC(EXCHANGE_ID,	enc_exchange_id,	dec_exchange_id),
 	PROC(CREATE_SESSION,	enc_create_session,	dec_create_session),
 	PROC(DESTROY_SESSION,	enc_destroy_session,	dec_destroy_session),
@@ -7237,6 +7235,8 @@ struct rpc_procinfo	nfs4_procedures[] = {
 	PROC(TEST_STATEID,	enc_test_stateid,	dec_test_stateid),
 	PROC(FREE_STATEID,	enc_free_stateid,	dec_free_stateid),
 	PROC(GETDEVICELIST,	enc_getdevicelist,	dec_getdevicelist),
+	PROC(BIND_CONN_TO_SESSION,
+			enc_bind_conn_to_session, dec_bind_conn_to_session),
 #endif /* CONFIG_NFS_V4_1 */
 };
 
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index a2b71cbfc44a..54006a997dd0 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -593,7 +593,6 @@ enum {
 	NFSPROC4_CLNT_SECINFO,
 
 	/* nfs41 */
-	NFSPROC4_CLNT_BIND_CONN_TO_SESSION,
 	NFSPROC4_CLNT_EXCHANGE_ID,
 	NFSPROC4_CLNT_CREATE_SESSION,
 	NFSPROC4_CLNT_DESTROY_SESSION,
@@ -608,6 +607,7 @@ enum {
 	NFSPROC4_CLNT_TEST_STATEID,
 	NFSPROC4_CLNT_FREE_STATEID,
 	NFSPROC4_CLNT_GETDEVICELIST,
+	NFSPROC4_CLNT_BIND_CONN_TO_SESSION,
 };
 
 /* nfs41 types */
-- 
cgit v1.3-7-g2ca7


From 662455391040a783b89d0232e743c27c23617dbd Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 25 May 2012 17:18:09 -0400
Subject: NFSv4.1: Add DESTROY_CLIENTID

Ensure that we destroy our lease on last unmount

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c      |  1 +
 fs/nfs/nfs4_fs.h     |  1 +
 fs/nfs/nfs4proc.c    | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfs/nfs4xdr.c     | 52 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/nfs4.h |  1 +
 5 files changed, 114 insertions(+)

(limited to 'include/linux')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index a50bdfbbc429..7d108753af81 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -209,6 +209,7 @@ static void nfs4_shutdown_session(struct nfs_client *clp)
 	if (nfs4_has_session(clp)) {
 		nfs4_deviceid_purge_client(clp);
 		nfs4_destroy_session(clp->cl_session);
+		nfs4_destroy_clientid(clp);
 	}
 
 }
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index f7307304320a..b20b5164f70a 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -214,6 +214,7 @@ extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setcli
 extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
 extern int nfs4_proc_bind_conn_to_session(struct nfs_client *, struct rpc_cred *cred);
 extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred);
+extern int nfs4_destroy_clientid(struct nfs_client *clp);
 extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
 extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *);
 extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 3fdff0cd558d..485a6c0cdc40 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5261,6 +5261,65 @@ out:
 	return status;
 }
 
+static int _nfs4_proc_destroy_clientid(struct nfs_client *clp,
+		struct rpc_cred *cred)
+{
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DESTROY_CLIENTID],
+		.rpc_argp = clp,
+		.rpc_cred = cred,
+	};
+	int status;
+
+	status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
+	if (status)
+		pr_warn("NFS: Got error %d from the server %s on "
+			"DESTROY_CLIENTID.", status, clp->cl_hostname);
+	return status;
+}
+
+static int nfs4_proc_destroy_clientid(struct nfs_client *clp,
+		struct rpc_cred *cred)
+{
+	unsigned int loop;
+	int ret;
+
+	for (loop = NFS4_MAX_LOOP_ON_RECOVER; loop != 0; loop--) {
+		ret = _nfs4_proc_destroy_clientid(clp, cred);
+		switch (ret) {
+		case -NFS4ERR_DELAY:
+		case -NFS4ERR_CLIENTID_BUSY:
+			ssleep(1);
+			break;
+		default:
+			return ret;
+		}
+	}
+	return 0;
+}
+
+int nfs4_destroy_clientid(struct nfs_client *clp)
+{
+	struct rpc_cred *cred;
+	int ret = 0;
+
+	if (clp->cl_mvops->minor_version < 1)
+		goto out;
+	if (clp->cl_exchange_flags == 0)
+		goto out;
+	cred = nfs4_get_exchange_id_cred(clp);
+	ret = nfs4_proc_destroy_clientid(clp, cred);
+	if (cred)
+		put_rpccred(cred);
+	switch (ret) {
+	case 0:
+	case -NFS4ERR_STALE_CLIENTID:
+		clp->cl_exchange_flags = 0;
+	}
+out:
+	return ret;
+}
+
 struct nfs4_get_lease_time_data {
 	struct nfs4_get_lease_time_args *args;
 	struct nfs4_get_lease_time_res *res;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 1d4d259c5b3c..b9ce3fdb862a 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -338,6 +338,8 @@ static int nfs4_stat_to_errno(int);
 				     1 /* bctsr_use_conn_in_rdma_mode */)
 #define encode_destroy_session_maxsz    (op_encode_hdr_maxsz + 4)
 #define decode_destroy_session_maxsz    (op_decode_hdr_maxsz)
+#define encode_destroy_clientid_maxsz   (op_encode_hdr_maxsz + 2)
+#define decode_destroy_clientid_maxsz   (op_decode_hdr_maxsz)
 #define encode_sequence_maxsz	(op_encode_hdr_maxsz + \
 				XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 4)
 #define decode_sequence_maxsz	(op_decode_hdr_maxsz + \
@@ -751,6 +753,10 @@ static int nfs4_stat_to_errno(int);
 					 encode_destroy_session_maxsz)
 #define NFS4_dec_destroy_session_sz	(compound_decode_hdr_maxsz + \
 					 decode_destroy_session_maxsz)
+#define NFS4_enc_destroy_clientid_sz	(compound_encode_hdr_maxsz + \
+					 encode_destroy_clientid_maxsz)
+#define NFS4_dec_destroy_clientid_sz	(compound_decode_hdr_maxsz + \
+					 decode_destroy_clientid_maxsz)
 #define NFS4_enc_sequence_sz \
 				(compound_decode_hdr_maxsz + \
 				 encode_sequence_maxsz)
@@ -1804,6 +1810,14 @@ static void encode_destroy_session(struct xdr_stream *xdr,
 	encode_opaque_fixed(xdr, session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
 }
 
+static void encode_destroy_clientid(struct xdr_stream *xdr,
+				   uint64_t clientid,
+				   struct compound_hdr *hdr)
+{
+	encode_op_hdr(xdr, OP_DESTROY_CLIENTID, decode_destroy_clientid_maxsz, hdr);
+	encode_uint64(xdr, clientid);
+}
+
 static void encode_reclaim_complete(struct xdr_stream *xdr,
 				    struct nfs41_reclaim_complete_args *args,
 				    struct compound_hdr *hdr)
@@ -2723,6 +2737,22 @@ static void nfs4_xdr_enc_destroy_session(struct rpc_rqst *req,
 	encode_nops(&hdr);
 }
 
+/*
+ * a DESTROY_CLIENTID request
+ */
+static void nfs4_xdr_enc_destroy_clientid(struct rpc_rqst *req,
+					 struct xdr_stream *xdr,
+					 struct nfs_client *clp)
+{
+	struct compound_hdr hdr = {
+		.minorversion = clp->cl_mvops->minor_version,
+	};
+
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_destroy_clientid(xdr, clp->cl_clientid, &hdr);
+	encode_nops(&hdr);
+}
+
 /*
  * a SEQUENCE request
  */
@@ -5479,6 +5509,11 @@ static int decode_destroy_session(struct xdr_stream *xdr, void *dummy)
 	return decode_op_hdr(xdr, OP_DESTROY_SESSION);
 }
 
+static int decode_destroy_clientid(struct xdr_stream *xdr, void *dummy)
+{
+	return decode_op_hdr(xdr, OP_DESTROY_CLIENTID);
+}
+
 static int decode_reclaim_complete(struct xdr_stream *xdr, void *dummy)
 {
 	return decode_op_hdr(xdr, OP_RECLAIM_COMPLETE);
@@ -6788,6 +6823,22 @@ static int nfs4_xdr_dec_destroy_session(struct rpc_rqst *rqstp,
 	return status;
 }
 
+/*
+ * Decode DESTROY_CLIENTID response
+ */
+static int nfs4_xdr_dec_destroy_clientid(struct rpc_rqst *rqstp,
+					struct xdr_stream *xdr,
+					void *res)
+{
+	struct compound_hdr hdr;
+	int status;
+
+	status = decode_compound_hdr(xdr, &hdr);
+	if (!status)
+		status = decode_destroy_clientid(xdr, res);
+	return status;
+}
+
 /*
  * Decode SEQUENCE response
  */
@@ -7237,6 +7288,7 @@ struct rpc_procinfo	nfs4_procedures[] = {
 	PROC(GETDEVICELIST,	enc_getdevicelist,	dec_getdevicelist),
 	PROC(BIND_CONN_TO_SESSION,
 			enc_bind_conn_to_session, dec_bind_conn_to_session),
+	PROC(DESTROY_CLIENTID,	enc_destroy_clientid,	dec_destroy_clientid),
 #endif /* CONFIG_NFS_V4_1 */
 };
 
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 54006a997dd0..af2d2fa30eee 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -608,6 +608,7 @@ enum {
 	NFSPROC4_CLNT_FREE_STATEID,
 	NFSPROC4_CLNT_GETDEVICELIST,
 	NFSPROC4_CLNT_BIND_CONN_TO_SESSION,
+	NFSPROC4_CLNT_DESTROY_CLIENTID,
 };
 
 /* nfs41 types */
-- 
cgit v1.3-7-g2ca7


From 32b0131069c5bebf52368a9fe170f8d58b78fa8d Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 26 May 2012 13:41:04 -0400
Subject: NFSv4.1: Don't clobber the seqid if exchange_id returns a confirmed
 clientid

If the EXCHGID4_FLAG_CONFIRMED_R flag is set, the client is in theory
supposed to already know the correct value of the seqid, in which case
RFC5661 states that it should ignore the value returned.

Also ensure that if the sanity check in nfs4_check_cl_exchange_flags
fails, then we must not change the nfs_client fields.

Finally, clean up the code: we don't need to retest the value of
'status' unless it can change.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       | 17 +++++++++--------
 fs/nfs/nfs4xdr.c        |  7 +++----
 include/linux/nfs_xdr.h |  3 ++-
 3 files changed, 14 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 485a6c0cdc40..9f0a96fe6212 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5171,7 +5171,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
 		.flags = EXCHGID4_FLAG_SUPP_MOVED_REFER,
 	};
 	struct nfs41_exchange_id_res res = {
-		.client = clp,
+		0
 	};
 	int status;
 	struct rpc_message msg = {
@@ -5214,22 +5214,22 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
 
 	status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
 	if (status == 0)
-		status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags);
+		status = nfs4_check_cl_exchange_flags(res.flags);
 
 	if (status == 0) {
+		clp->cl_clientid = res.clientid;
+		clp->cl_exchange_flags = (res.flags & ~EXCHGID4_FLAG_CONFIRMED_R);
+		if (!(res.flags & EXCHGID4_FLAG_CONFIRMED_R))
+			clp->cl_seqid = res.seqid;
+
 		kfree(clp->cl_serverowner);
 		clp->cl_serverowner = res.server_owner;
 		res.server_owner = NULL;
-	}
 
-	if (status == 0) {
 		/* use the most recent implementation id */
 		kfree(clp->cl_implid);
 		clp->cl_implid = res.impl_id;
-	} else
-		kfree(res.impl_id);
 
-	if (status == 0) {
 		if (clp->cl_serverscope != NULL &&
 		    !nfs41_same_server_scope(clp->cl_serverscope,
 					     res.server_scope)) {
@@ -5244,7 +5244,8 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
 			clp->cl_serverscope = res.server_scope;
 			goto out;
 		}
-	}
+	} else
+		kfree(res.impl_id);
 
 out_server_owner:
 	kfree(res.server_owner);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index b9ce3fdb862a..ee4a74db95d0 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -5319,7 +5319,6 @@ static int decode_exchange_id(struct xdr_stream *xdr,
 	uint32_t dummy;
 	char *dummy_str;
 	int status;
-	struct nfs_client *clp = res->client;
 	uint32_t impl_id_count;
 
 	status = decode_op_hdr(xdr, OP_EXCHANGE_ID);
@@ -5329,12 +5328,12 @@ static int decode_exchange_id(struct xdr_stream *xdr,
 	p = xdr_inline_decode(xdr, 8);
 	if (unlikely(!p))
 		goto out_overflow;
-	xdr_decode_hyper(p, &clp->cl_clientid);
+	xdr_decode_hyper(p, &res->clientid);
 	p = xdr_inline_decode(xdr, 12);
 	if (unlikely(!p))
 		goto out_overflow;
-	clp->cl_seqid = be32_to_cpup(p++);
-	clp->cl_exchange_flags = be32_to_cpup(p++);
+	res->seqid = be32_to_cpup(p++);
+	res->flags = be32_to_cpup(p++);
 
 	/* We ask for SP4_NONE */
 	dummy = be32_to_cpup(p);
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 6387fc0097fe..d1a7bf51c326 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1132,7 +1132,8 @@ struct nfs41_bind_conn_to_session_res {
 };
 
 struct nfs41_exchange_id_res {
-	struct nfs_client		*client;
+	u64				clientid;
+	u32				seqid;
 	u32				flags;
 	struct nfs41_server_owner	*server_owner;
 	struct nfs41_server_scope	*server_scope;
-- 
cgit v1.3-7-g2ca7


From a7d7d2e1a07e3811dc49af2962c940fd8bbb6c8f Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Fri, 27 Jan 2012 14:12:32 -0300
Subject: edac: Create a dimm struct and move the labels into it
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The way a DIMM is currently represented implies that they're
linked into a per-csrow struct. However, some drivers don't see
csrows, as they're ridden behind some chip like the AMB's
on FBDIMM's, for example.

This forced drivers to fake^Wvirtualize a csrow struct, and to create
a mess under csrow/channel original's concept.

Move the DIMM labels into a per-DIMM struct, and add there
the real location of the socket, in terms of csrow/channel.
Latter patches will modify the location to properly represent the
memory architecture.

All other drivers will use a per-csrow type of location.
Some of those drivers will require a latter conversion, as
they also fake the csrows internally.

TODO: While this patch doesn't change the existing behavior, on
csrows-based memory controllers, a csrow/channel pair points to a memory
rank. There's a known bug at the EDAC core that allows having different
labels for the same DIMM, if it has more than one rank. A latter patch
is need to merge the several ranks for a DIMM into the same dimm_info
struct, in order to avoid having different labels for the same DIMM.

The edac_mc_alloc() will now contain a per-dimm initialization loop that
will be changed by latter patches in order to match other types of
memory architectures.

Reviewed-by: Aristeu Rozanski <arozansk@redhat.com>
Reviewed-by: Borislav Petkov <borislav.petkov@amd.com>
Cc: Doug Thompson <norsk5@yahoo.com>
Cc: Ranganathan Desikan <ravi@jetztechnologies.com>
Cc: "Arvind R." <arvino55@gmail.com>
Cc: "Niklas Söderlund" <niklas.soderlund@ericsson.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/edac/edac_mc.c       | 47 +++++++++++++++++++++++++++++++++-----------
 drivers/edac/edac_mc_sysfs.c | 11 +++++------
 drivers/edac/i5100_edac.c    |  8 ++++----
 drivers/edac/i7core_edac.c   |  4 ++--
 drivers/edac/i82975x_edac.c  |  2 +-
 drivers/edac/sb_edac.c       |  4 ++--
 include/linux/edac.h         | 28 +++++++++++++++++++++-----
 7 files changed, 72 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index feef7733fae7..c1aae7233022 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -44,7 +44,7 @@ static void edac_mc_dump_channel(struct rank_info *chan)
 	debugf4("\tchannel = %p\n", chan);
 	debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
 	debugf4("\tchannel->ce_count = %d\n", chan->ce_count);
-	debugf4("\tchannel->label = '%s'\n", chan->label);
+	debugf4("\tchannel->label = '%s'\n", chan->dimm->label);
 	debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
 }
 
@@ -157,6 +157,7 @@ struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
 	struct mem_ctl_info *mci;
 	struct csrow_info *csi, *csrow;
 	struct rank_info *chi, *chp, *chan;
+	struct dimm_info *dimm;
 	void *pvt;
 	unsigned size;
 	int row, chn;
@@ -170,7 +171,8 @@ struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
 	mci = (struct mem_ctl_info *)0;
 	csi = edac_align_ptr(&mci[1], sizeof(*csi));
 	chi = edac_align_ptr(&csi[nr_csrows], sizeof(*chi));
-	pvt = edac_align_ptr(&chi[nr_chans * nr_csrows], sz_pvt);
+	dimm = edac_align_ptr(&chi[nr_chans * nr_csrows], sizeof(*dimm));
+	pvt = edac_align_ptr(&dimm[nr_chans * nr_csrows], sz_pvt);
 	size = ((unsigned long)pvt) + sz_pvt;
 
 	mci = kzalloc(size, GFP_KERNEL);
@@ -182,14 +184,22 @@ struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
 	 */
 	csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi));
 	chi = (struct rank_info *)(((char *)mci) + ((unsigned long)chi));
+	dimm = (struct dimm_info *)(((char *)mci) + ((unsigned long)dimm));
 	pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
 
 	/* setup index and various internal pointers */
 	mci->mc_idx = edac_index;
 	mci->csrows = csi;
+	mci->dimms  = dimm;
 	mci->pvt_info = pvt;
 	mci->nr_csrows = nr_csrows;
 
+	/*
+	 * For now, assumes that a per-csrow arrangement for dimms.
+	 * This will be latter changed.
+	 */
+	dimm = mci->dimms;
+
 	for (row = 0; row < nr_csrows; row++) {
 		csrow = &csi[row];
 		csrow->csrow_idx = row;
@@ -202,6 +212,12 @@ struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
 			chan = &chp[chn];
 			chan->chan_idx = chn;
 			chan->csrow = csrow;
+
+			mci->csrows[row].channels[chn].dimm = dimm;
+			dimm->csrow = row;
+			dimm->csrow_channel = chn;
+			dimm++;
+			mci->nr_dimms++;
 		}
 	}
 
@@ -678,6 +694,7 @@ void edac_mc_handle_ce(struct mem_ctl_info *mci,
 		int row, int channel, const char *msg)
 {
 	unsigned long remapped_page;
+	char *label = NULL;
 
 	debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
 
@@ -701,6 +718,8 @@ void edac_mc_handle_ce(struct mem_ctl_info *mci,
 		return;
 	}
 
+	label = mci->csrows[row].channels[channel].dimm->label;
+
 	if (edac_mc_get_log_ce())
 		/* FIXME - put in DIMM location */
 		edac_mc_printk(mci, KERN_WARNING,
@@ -708,7 +727,7 @@ void edac_mc_handle_ce(struct mem_ctl_info *mci,
 			"0x%lx, row %d, channel %d, label \"%s\": %s\n",
 			page_frame_number, offset_in_page,
 			mci->csrows[row].grain, syndrome, row, channel,
-			mci->csrows[row].channels[channel].label, msg);
+			label, msg);
 
 	mci->ce_count++;
 	mci->csrows[row].ce_count++;
@@ -754,6 +773,7 @@ void edac_mc_handle_ue(struct mem_ctl_info *mci,
 	char *pos = labels;
 	int chan;
 	int chars;
+	char *label = NULL;
 
 	debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
 
@@ -767,15 +787,15 @@ void edac_mc_handle_ue(struct mem_ctl_info *mci,
 		return;
 	}
 
-	chars = snprintf(pos, len + 1, "%s",
-			 mci->csrows[row].channels[0].label);
+	label = mci->csrows[row].channels[0].dimm->label;
+	chars = snprintf(pos, len + 1, "%s", label);
 	len -= chars;
 	pos += chars;
 
 	for (chan = 1; (chan < mci->csrows[row].nr_channels) && (len > 0);
 		chan++) {
-		chars = snprintf(pos, len + 1, ":%s",
-				 mci->csrows[row].channels[chan].label);
+		label = mci->csrows[row].channels[chan].dimm->label;
+		chars = snprintf(pos, len + 1, ":%s", label);
 		len -= chars;
 		pos += chars;
 	}
@@ -824,6 +844,7 @@ void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci,
 	char labels[len + 1];
 	char *pos = labels;
 	int chars;
+	char *label;
 
 	if (csrow >= mci->nr_csrows) {
 		/* something is wrong */
@@ -858,12 +879,12 @@ void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci,
 	mci->csrows[csrow].ue_count++;
 
 	/* Generate the DIMM labels from the specified channels */
-	chars = snprintf(pos, len + 1, "%s",
-			 mci->csrows[csrow].channels[channela].label);
+	label = mci->csrows[csrow].channels[channela].dimm->label;
+	chars = snprintf(pos, len + 1, "%s", label);
 	len -= chars;
 	pos += chars;
 	chars = snprintf(pos, len + 1, "-%s",
-			 mci->csrows[csrow].channels[channelb].label);
+			mci->csrows[csrow].channels[channelb].dimm->label);
 
 	if (edac_mc_get_log_ue())
 		edac_mc_printk(mci, KERN_EMERG,
@@ -885,6 +906,7 @@ EXPORT_SYMBOL(edac_mc_handle_fbd_ue);
 void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci,
 			unsigned int csrow, unsigned int channel, char *msg)
 {
+	char *label = NULL;
 
 	/* Ensure boundary values */
 	if (csrow >= mci->nr_csrows) {
@@ -904,12 +926,13 @@ void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci,
 		return;
 	}
 
+	label = mci->csrows[csrow].channels[channel].dimm->label;
+
 	if (edac_mc_get_log_ce())
 		/* FIXME - put in DIMM location */
 		edac_mc_printk(mci, KERN_WARNING,
 			"CE row %d, channel %d, label \"%s\": %s\n",
-			csrow, channel,
-			mci->csrows[csrow].channels[channel].label, msg);
+			csrow, channel, label, msg);
 
 	mci->ce_count++;
 	mci->csrows[csrow].ce_count++;
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index e9a28f576d14..af66b2256640 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -170,11 +170,11 @@ static ssize_t channel_dimm_label_show(struct csrow_info *csrow,
 				char *data, int channel)
 {
 	/* if field has not been initialized, there is nothing to send */
-	if (!csrow->channels[channel].label[0])
+	if (!csrow->channels[channel].dimm->label[0])
 		return 0;
 
 	return snprintf(data, EDAC_MC_LABEL_LEN, "%s\n",
-			csrow->channels[channel].label);
+			csrow->channels[channel].dimm->label);
 }
 
 static ssize_t channel_dimm_label_store(struct csrow_info *csrow,
@@ -184,8 +184,8 @@ static ssize_t channel_dimm_label_store(struct csrow_info *csrow,
 	ssize_t max_size = 0;
 
 	max_size = min((ssize_t) count, (ssize_t) EDAC_MC_LABEL_LEN - 1);
-	strncpy(csrow->channels[channel].label, data, max_size);
-	csrow->channels[channel].label[max_size] = '\0';
+	strncpy(csrow->channels[channel].dimm->label, data, max_size);
+	csrow->channels[channel].dimm->label[max_size] = '\0';
 
 	return max_size;
 }
@@ -952,9 +952,8 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci)
 	/* CSROW error: backout what has already been registered,  */
 fail1:
 	for (i--; i >= 0; i--) {
-		if (csrow->nr_pages > 0) {
+		if (mci->csrows[i].nr_pages > 0)
 			kobject_put(&mci->csrows[i].kobj);
-		}
 	}
 
 	/* remove the mci instance's attributes, if any */
diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c
index d500749464ea..d55e5529734c 100644
--- a/drivers/edac/i5100_edac.c
+++ b/drivers/edac/i5100_edac.c
@@ -433,7 +433,7 @@ static void i5100_handle_ce(struct mem_ctl_info *mci,
 		"CE chan %d, bank %u, rank %u, syndrome 0x%lx, "
 		"cas %u, ras %u, csrow %u, label \"%s\": %s\n",
 		chan, bank, rank, syndrome, cas, ras,
-		csrow, mci->csrows[csrow].channels[0].label, msg);
+		csrow, mci->csrows[csrow].channels[0].dimm->label, msg);
 
 	mci->ce_count++;
 	mci->csrows[csrow].ce_count++;
@@ -455,7 +455,7 @@ static void i5100_handle_ue(struct mem_ctl_info *mci,
 		"UE chan %d, bank %u, rank %u, syndrome 0x%lx, "
 		"cas %u, ras %u, csrow %u, label \"%s\": %s\n",
 		chan, bank, rank, syndrome, cas, ras,
-		csrow, mci->csrows[csrow].channels[0].label, msg);
+		csrow, mci->csrows[csrow].channels[0].dimm->label, msg);
 
 	mci->ue_count++;
 	mci->csrows[csrow].ue_count++;
@@ -868,8 +868,8 @@ static void __devinit i5100_init_csrows(struct mem_ctl_info *mci)
 		mci->csrows[i].channels[0].chan_idx = 0;
 		mci->csrows[i].channels[0].ce_count = 0;
 		mci->csrows[i].channels[0].csrow = mci->csrows + i;
-		snprintf(mci->csrows[i].channels[0].label,
-			 sizeof(mci->csrows[i].channels[0].label),
+		snprintf(mci->csrows[i].channels[0].dimm->label,
+			 sizeof(mci->csrows[i].channels[0].dimm->label),
 			 "DIMM%u", i5100_rank_to_slot(mci, chan, rank));
 
 		total_pages += npages;
diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c
index 85226ccf5290..df0acf02667a 100644
--- a/drivers/edac/i7core_edac.c
+++ b/drivers/edac/i7core_edac.c
@@ -746,8 +746,8 @@ static int get_dimm_config(const struct mem_ctl_info *mci)
 
 			csr->edac_mode = mode;
 			csr->mtype = mtype;
-			snprintf(csr->channels[0].label,
-					sizeof(csr->channels[0].label),
+			snprintf(csr->channels[0].dimm->label,
+					sizeof(csr->channels[0].dimm->label),
 					"CPU#%uChannel#%u_DIMM#%u",
 					pvt->i7core_dev->socket, i, j);
 
diff --git a/drivers/edac/i82975x_edac.c b/drivers/edac/i82975x_edac.c
index 0cd8368f88f8..b7aca58bf9eb 100644
--- a/drivers/edac/i82975x_edac.c
+++ b/drivers/edac/i82975x_edac.c
@@ -407,7 +407,7 @@ static void i82975x_init_csrows(struct mem_ctl_info *mci,
 		 *   [0-3] for dual-channel; i.e. csrow->nr_channels = 2
 		 */
 		for (chan = 0; chan < csrow->nr_channels; chan++)
-			strncpy(csrow->channels[chan].label,
+			strncpy(csrow->channels[chan].dimm->label,
 					labels[(index >> 1) + (chan * 2)],
 					EDAC_MC_LABEL_LEN);
 
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index a203536d90dd..95901c21d5dc 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -651,8 +651,8 @@ static int get_dimm_config(const struct mem_ctl_info *mci)
 				csr->channels[0].chan_idx = i;
 				csr->channels[0].ce_count = 0;
 				pvt->csrow_map[i][j] = csrow;
-				snprintf(csr->channels[0].label,
-					 sizeof(csr->channels[0].label),
+				snprintf(csr->channels[0].dimm->label,
+					 sizeof(csr->channels[0].dimm->label),
 					 "CPU_SrcID#%u_Channel#%u_DIMM#%u",
 					 pvt->sbridge_dev->source_id, i, j);
 				last_page += npages;
diff --git a/include/linux/edac.h b/include/linux/edac.h
index c621d762bb2c..52bceca85e63 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -312,23 +312,34 @@ enum scrub_type {
  * PS - I enjoyed writing all that about as much as you enjoyed reading it.
  */
 
+/* FIXME: add a per-dimm ce error count */
+struct dimm_info {
+	char label[EDAC_MC_LABEL_LEN + 1];	/* DIMM label on motherboard */
+	unsigned memory_controller;
+	unsigned csrow;
+	unsigned csrow_channel;
+};
+
 /**
  * struct rank_info - contains the information for one DIMM rank
  *
  * @chan_idx:	channel number where the rank is (typically, 0 or 1)
  * @ce_count:	number of correctable errors for this rank
- * @label:	DIMM label. Different ranks for the same DIMM should be
- *		filled, on userspace, with the same label.
- *		FIXME: The core currently won't enforce it.
  * @csrow:	A pointer to the chip select row structure (the parent
  *		structure). The location of the rank is given by
  *		the (csrow->csrow_idx, chan_idx) vector.
+ * @dimm:	A pointer to the DIMM structure, where the DIMM label
+ *		information is stored.
+ *
+ * FIXME: Currently, the EDAC core model will assume one DIMM per rank.
+ *	  This is a bad assumption, but it makes this patch easier. Later
+ *	  patches in this series will fix this issue.
  */
 struct rank_info {
 	int chan_idx;
 	u32 ce_count;
-	char label[EDAC_MC_LABEL_LEN + 1];
-	struct csrow_info *csrow;	/* the parent */
+	struct csrow_info *csrow;
+	struct dimm_info *dimm;
 };
 
 struct csrow_info {
@@ -428,6 +439,13 @@ struct mem_ctl_info {
 	int mc_idx;
 	int nr_csrows;
 	struct csrow_info *csrows;
+
+	/*
+	 * DIMM info. Will eventually remove the entire csrows_info some day
+	 */
+	unsigned nr_dimms;
+	struct dimm_info *dimms;
+
 	/*
 	 * FIXME - what about controllers on other busses? - IDs must be
 	 * unique.  dev pointer should be sufficiently unique, but
-- 
cgit v1.3-7-g2ca7


From 084a4fccef39ac7abb039511f32380f28d0b67e6 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Fri, 27 Jan 2012 18:38:08 -0300
Subject: edac: move dimm properties to struct dimm_info
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On systems based on chip select rows, all channels need to use memories
with the same properties, otherwise the memories on channels A and B
won't be recognized.

However, such assumption is not true for all types of memory
controllers.

Controllers for FB-DIMM's don't have such requirements.

Also, modern Intel controllers seem to be capable of handling such
differences.

So, we need to get rid of storing the DIMM information into a per-csrow
data, storing it, instead at the right place.

The first step is to move grain, mtype, dtype and edac_mode to the
per-dimm struct.

Reviewed-by: Aristeu Rozanski <arozansk@redhat.com>
Reviewed-by: Borislav Petkov <borislav.petkov@amd.com>
Acked-by: Chris Metcalf <cmetcalf@tilera.com>
Cc: Doug Thompson <norsk5@yahoo.com>
Cc: Borislav Petkov <borislav.petkov@amd.com>
Cc: Mark Gross <mark.gross@intel.com>
Cc: Jason Uhlenkott <juhlenko@akamai.com>
Cc: Tim Small <tim@buttersideup.com>
Cc: Ranganathan Desikan <ravi@jetztechnologies.com>
Cc: "Arvind R." <arvino55@gmail.com>
Cc: Olof Johansson <olof@lixom.net>
Cc: Egor Martovetsky <egor@pasemi.com>
Cc: Michal Marek <mmarek@suse.cz>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Joe Perches <joe@perches.com>
Cc: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Hitoshi Mitake <h.mitake@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: James Bottomley <James.Bottomley@parallels.com>
Cc: "Niklas Söderlund" <niklas.soderlund@ericsson.com>
Cc: Shaohui Xie <Shaohui.Xie@freescale.com>
Cc: Josh Boyer <jwboyer@gmail.com>
Cc: Mike Williams <mike@mikebwilliams.com>
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/edac/amd64_edac.c      | 18 ++++++++----
 drivers/edac/amd76x_edac.c     | 10 ++++---
 drivers/edac/cell_edac.c       | 10 +++++--
 drivers/edac/cpc925_edac.c     | 62 ++++++++++++++++++++++--------------------
 drivers/edac/e752x_edac.c      | 44 ++++++++++++++++--------------
 drivers/edac/e7xxx_edac.c      | 44 +++++++++++++++++-------------
 drivers/edac/edac_mc.c         | 19 ++++++++-----
 drivers/edac/edac_mc_sysfs.c   |  6 ++--
 drivers/edac/i3000_edac.c      | 18 ++++++------
 drivers/edac/i3200_edac.c      | 18 ++++++------
 drivers/edac/i5000_edac.c      | 24 ++++++++--------
 drivers/edac/i5100_edac.c      | 38 ++++++++++++++------------
 drivers/edac/i5400_edac.c      | 24 ++++++----------
 drivers/edac/i7300_edac.c      | 25 ++++++++++-------
 drivers/edac/i7core_edac.c     | 27 +++++++++---------
 drivers/edac/i82443bxgx_edac.c | 13 +++++----
 drivers/edac/i82860_edac.c     | 11 +++++---
 drivers/edac/i82875p_edac.c    | 17 ++++++++----
 drivers/edac/i82975x_edac.c    | 17 ++++++++----
 drivers/edac/mpc85xx_edac.c    | 13 +++++----
 drivers/edac/mv64x60_edac.c    | 18 ++++++------
 drivers/edac/pasemi_edac.c     | 10 ++++---
 drivers/edac/ppc4xx_edac.c     | 13 +++++----
 drivers/edac/r82600_edac.c     | 10 ++++---
 drivers/edac/sb_edac.c         | 31 +++++++++++----------
 drivers/edac/tile_edac.c       | 13 +++++----
 drivers/edac/x38_edac.c        | 17 ++++++------
 include/linux/edac.h           | 21 ++++++++------
 28 files changed, 334 insertions(+), 257 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 7ef73c919c5d..8126db0c8987 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -2187,7 +2187,9 @@ static int init_csrows(struct mem_ctl_info *mci)
 	struct amd64_pvt *pvt = mci->pvt_info;
 	u64 input_addr_min, input_addr_max, sys_addr, base, mask;
 	u32 val;
-	int i, empty = 1;
+	int i, j, empty = 1;
+	enum mem_type mtype;
+	enum edac_type edac_mode;
 
 	amd64_read_pci_cfg(pvt->F3, NBCFG, &val);
 
@@ -2224,7 +2226,7 @@ static int init_csrows(struct mem_ctl_info *mci)
 		csrow->page_mask = ~mask;
 		/* 8 bytes of resolution */
 
-		csrow->mtype = amd64_determine_memory_type(pvt, i);
+		mtype = amd64_determine_memory_type(pvt, i);
 
 		debugf1("  for MC node %d csrow %d:\n", pvt->mc_node_id, i);
 		debugf1("    input_addr_min: 0x%lx input_addr_max: 0x%lx\n",
@@ -2241,11 +2243,15 @@ static int init_csrows(struct mem_ctl_info *mci)
 		 * determine whether CHIPKILL or JUST ECC or NO ECC is operating
 		 */
 		if (pvt->nbcfg & NBCFG_ECC_ENABLE)
-			csrow->edac_mode =
-			    (pvt->nbcfg & NBCFG_CHIPKILL) ?
-			    EDAC_S4ECD4ED : EDAC_SECDED;
+			edac_mode = (pvt->nbcfg & NBCFG_CHIPKILL) ?
+				    EDAC_S4ECD4ED : EDAC_SECDED;
 		else
-			csrow->edac_mode = EDAC_NONE;
+			edac_mode = EDAC_NONE;
+
+		for (j = 0; j < pvt->channel_count; j++) {
+			csrow->channels[j].dimm->mtype = mtype;
+			csrow->channels[j].dimm->edac_mode = edac_mode;
+		}
 	}
 
 	return empty;
diff --git a/drivers/edac/amd76x_edac.c b/drivers/edac/amd76x_edac.c
index f8fd3c807bde..fcfe359f7be5 100644
--- a/drivers/edac/amd76x_edac.c
+++ b/drivers/edac/amd76x_edac.c
@@ -186,11 +186,13 @@ static void amd76x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
 			enum edac_type edac_mode)
 {
 	struct csrow_info *csrow;
+	struct dimm_info *dimm;
 	u32 mba, mba_base, mba_mask, dms;
 	int index;
 
 	for (index = 0; index < mci->nr_csrows; index++) {
 		csrow = &mci->csrows[index];
+		dimm = csrow->channels[0].dimm;
 
 		/* find the DRAM Chip Select Base address and mask */
 		pci_read_config_dword(pdev,
@@ -206,10 +208,10 @@ static void amd76x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
 		csrow->nr_pages = (mba_mask + 1) >> PAGE_SHIFT;
 		csrow->last_page = csrow->first_page + csrow->nr_pages - 1;
 		csrow->page_mask = mba_mask >> PAGE_SHIFT;
-		csrow->grain = csrow->nr_pages << PAGE_SHIFT;
-		csrow->mtype = MEM_RDDR;
-		csrow->dtype = ((dms >> index) & 0x1) ? DEV_X4 : DEV_UNKNOWN;
-		csrow->edac_mode = edac_mode;
+		dimm->grain = csrow->nr_pages << PAGE_SHIFT;
+		dimm->mtype = MEM_RDDR;
+		dimm->dtype = ((dms >> index) & 0x1) ? DEV_X4 : DEV_UNKNOWN;
+		dimm->edac_mode = edac_mode;
 	}
 }
 
diff --git a/drivers/edac/cell_edac.c b/drivers/edac/cell_edac.c
index 9a6a274e6925..94fbb127215a 100644
--- a/drivers/edac/cell_edac.c
+++ b/drivers/edac/cell_edac.c
@@ -124,8 +124,10 @@ static void cell_edac_check(struct mem_ctl_info *mci)
 static void __devinit cell_edac_init_csrows(struct mem_ctl_info *mci)
 {
 	struct csrow_info		*csrow = &mci->csrows[0];
+	struct dimm_info		*dimm;
 	struct cell_edac_priv		*priv = mci->pvt_info;
 	struct device_node		*np;
+	int				j;
 
 	for (np = NULL;
 	     (np = of_find_node_by_name(np, "memory")) != NULL;) {
@@ -142,8 +144,12 @@ static void __devinit cell_edac_init_csrows(struct mem_ctl_info *mci)
 		csrow->first_page = r.start >> PAGE_SHIFT;
 		csrow->nr_pages = resource_size(&r) >> PAGE_SHIFT;
 		csrow->last_page = csrow->first_page + csrow->nr_pages - 1;
-		csrow->mtype = MEM_XDR;
-		csrow->edac_mode = EDAC_SECDED;
+
+		for (j = 0; j < csrow->nr_channels; j++) {
+			dimm = csrow->channels[j].dimm;
+			dimm->mtype = MEM_XDR;
+			dimm->edac_mode = EDAC_SECDED;
+		}
 		dev_dbg(mci->dev,
 			"Initialized on node %d, chanmask=0x%x,"
 			" first_page=0x%lx, nr_pages=0x%x\n",
diff --git a/drivers/edac/cpc925_edac.c b/drivers/edac/cpc925_edac.c
index a774c0ddaf5b..ee90f3da8f3a 100644
--- a/drivers/edac/cpc925_edac.c
+++ b/drivers/edac/cpc925_edac.c
@@ -329,7 +329,8 @@ static void cpc925_init_csrows(struct mem_ctl_info *mci)
 {
 	struct cpc925_mc_pdata *pdata = mci->pvt_info;
 	struct csrow_info *csrow;
-	int index;
+	struct dimm_info *dimm;
+	int index, j;
 	u32 mbmr, mbbar, bba;
 	unsigned long row_size, last_nr_pages = 0;
 
@@ -354,32 +355,35 @@ static void cpc925_init_csrows(struct mem_ctl_info *mci)
 		csrow->last_page = csrow->first_page + csrow->nr_pages - 1;
 		last_nr_pages = csrow->last_page + 1;
 
-		csrow->mtype = MEM_RDDR;
-		csrow->edac_mode = EDAC_SECDED;
-
-		switch (csrow->nr_channels) {
-		case 1: /* Single channel */
-			csrow->grain = 32; /* four-beat burst of 32 bytes */
-			break;
-		case 2: /* Dual channel */
-		default:
-			csrow->grain = 64; /* four-beat burst of 64 bytes */
-			break;
-		}
-
-		switch ((mbmr & MBMR_MODE_MASK) >> MBMR_MODE_SHIFT) {
-		case 6: /* 0110, no way to differentiate X8 VS X16 */
-		case 5:	/* 0101 */
-		case 8: /* 1000 */
-			csrow->dtype = DEV_X16;
-			break;
-		case 7: /* 0111 */
-		case 9: /* 1001 */
-			csrow->dtype = DEV_X8;
-			break;
-		default:
-			csrow->dtype = DEV_UNKNOWN;
-			break;
+		for (j = 0; j < csrow->nr_channels; j++) {
+			dimm = csrow->channels[j].dimm;
+			dimm->mtype = MEM_RDDR;
+			dimm->edac_mode = EDAC_SECDED;
+
+			switch (csrow->nr_channels) {
+			case 1: /* Single channel */
+				dimm->grain = 32; /* four-beat burst of 32 bytes */
+				break;
+			case 2: /* Dual channel */
+			default:
+				dimm->grain = 64; /* four-beat burst of 64 bytes */
+				break;
+			}
+
+			switch ((mbmr & MBMR_MODE_MASK) >> MBMR_MODE_SHIFT) {
+			case 6: /* 0110, no way to differentiate X8 VS X16 */
+			case 5:	/* 0101 */
+			case 8: /* 1000 */
+				dimm->dtype = DEV_X16;
+				break;
+			case 7: /* 0111 */
+			case 9: /* 1001 */
+				dimm->dtype = DEV_X8;
+				break;
+			default:
+				dimm->dtype = DEV_UNKNOWN;
+				break;
+			}
 		}
 	}
 }
@@ -962,9 +966,9 @@ static int __devinit cpc925_probe(struct platform_device *pdev)
 		goto err2;
 	}
 
-	nr_channels = cpc925_mc_get_channels(vbase);
+	nr_channels = cpc925_mc_get_channels(vbase) + 1;
 	mci = edac_mc_alloc(sizeof(struct cpc925_mc_pdata),
-			CPC925_NR_CSROWS, nr_channels + 1, edac_mc_idx);
+			CPC925_NR_CSROWS, nr_channels, edac_mc_idx);
 	if (!mci) {
 		cpc925_printk(KERN_ERR, "No memory for mem_ctl_info\n");
 		res = -ENOMEM;
diff --git a/drivers/edac/e752x_edac.c b/drivers/edac/e752x_edac.c
index 41223261ede9..6cf6ec6bc71e 100644
--- a/drivers/edac/e752x_edac.c
+++ b/drivers/edac/e752x_edac.c
@@ -1044,7 +1044,7 @@ static void e752x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
 	int drc_drbg;		/* DRB granularity 0=64mb, 1=128mb */
 	int drc_ddim;		/* DRAM Data Integrity Mode 0=none, 2=edac */
 	u8 value;
-	u32 dra, drc, cumul_size;
+	u32 dra, drc, cumul_size, i;
 
 	dra = 0;
 	for (index = 0; index < 4; index++) {
@@ -1053,7 +1053,7 @@ static void e752x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
 		dra |= dra_reg << (index * 8);
 	}
 	pci_read_config_dword(pdev, E752X_DRC, &drc);
-	drc_chan = dual_channel_active(ddrcsr);
+	drc_chan = dual_channel_active(ddrcsr) ? 1 : 0;
 	drc_drbg = drc_chan + 1;	/* 128 in dual mode, 64 in single */
 	drc_ddim = (drc >> 20) & 0x3;
 
@@ -1080,24 +1080,28 @@ static void e752x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
 		csrow->last_page = cumul_size - 1;
 		csrow->nr_pages = cumul_size - last_cumul_size;
 		last_cumul_size = cumul_size;
-		csrow->grain = 1 << 12;	/* 4KiB - resolution of CELOG */
-		csrow->mtype = MEM_RDDR;	/* only one type supported */
-		csrow->dtype = mem_dev ? DEV_X4 : DEV_X8;
-
-		/*
-		 * if single channel or x8 devices then SECDED
-		 * if dual channel and x4 then S4ECD4ED
-		 */
-		if (drc_ddim) {
-			if (drc_chan && mem_dev) {
-				csrow->edac_mode = EDAC_S4ECD4ED;
-				mci->edac_cap |= EDAC_FLAG_S4ECD4ED;
-			} else {
-				csrow->edac_mode = EDAC_SECDED;
-				mci->edac_cap |= EDAC_FLAG_SECDED;
-			}
-		} else
-			csrow->edac_mode = EDAC_NONE;
+
+		for (i = 0; i < drc_chan + 1; i++) {
+			struct dimm_info *dimm = csrow->channels[i].dimm;
+			dimm->grain = 1 << 12;	/* 4KiB - resolution of CELOG */
+			dimm->mtype = MEM_RDDR;	/* only one type supported */
+			dimm->dtype = mem_dev ? DEV_X4 : DEV_X8;
+
+			/*
+			* if single channel or x8 devices then SECDED
+			* if dual channel and x4 then S4ECD4ED
+			*/
+			if (drc_ddim) {
+				if (drc_chan && mem_dev) {
+					dimm->edac_mode = EDAC_S4ECD4ED;
+					mci->edac_cap |= EDAC_FLAG_S4ECD4ED;
+				} else {
+					dimm->edac_mode = EDAC_SECDED;
+					mci->edac_cap |= EDAC_FLAG_SECDED;
+				}
+			} else
+				dimm->edac_mode = EDAC_NONE;
+		}
 	}
 }
 
diff --git a/drivers/edac/e7xxx_edac.c b/drivers/edac/e7xxx_edac.c
index 68dea87b72e6..5ed97f6eb346 100644
--- a/drivers/edac/e7xxx_edac.c
+++ b/drivers/edac/e7xxx_edac.c
@@ -347,11 +347,12 @@ static void e7xxx_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
 			int dev_idx, u32 drc)
 {
 	unsigned long last_cumul_size;
-	int index;
+	int index, j;
 	u8 value;
 	u32 dra, cumul_size;
 	int drc_chan, drc_drbg, drc_ddim, mem_dev;
 	struct csrow_info *csrow;
+	struct dimm_info *dimm;
 
 	pci_read_config_dword(pdev, E7XXX_DRA, &dra);
 	drc_chan = dual_channel_active(drc, dev_idx);
@@ -381,24 +382,29 @@ static void e7xxx_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
 		csrow->last_page = cumul_size - 1;
 		csrow->nr_pages = cumul_size - last_cumul_size;
 		last_cumul_size = cumul_size;
-		csrow->grain = 1 << 12;	/* 4KiB - resolution of CELOG */
-		csrow->mtype = MEM_RDDR;	/* only one type supported */
-		csrow->dtype = mem_dev ? DEV_X4 : DEV_X8;
-
-		/*
-		 * if single channel or x8 devices then SECDED
-		 * if dual channel and x4 then S4ECD4ED
-		 */
-		if (drc_ddim) {
-			if (drc_chan && mem_dev) {
-				csrow->edac_mode = EDAC_S4ECD4ED;
-				mci->edac_cap |= EDAC_FLAG_S4ECD4ED;
-			} else {
-				csrow->edac_mode = EDAC_SECDED;
-				mci->edac_cap |= EDAC_FLAG_SECDED;
-			}
-		} else
-			csrow->edac_mode = EDAC_NONE;
+
+		for (j = 0; j < drc_chan + 1; j++) {
+			dimm = csrow->channels[j].dimm;
+
+			dimm->grain = 1 << 12;	/* 4KiB - resolution of CELOG */
+			dimm->mtype = MEM_RDDR;	/* only one type supported */
+			dimm->dtype = mem_dev ? DEV_X4 : DEV_X8;
+
+			/*
+			* if single channel or x8 devices then SECDED
+			* if dual channel and x4 then S4ECD4ED
+			*/
+			if (drc_ddim) {
+				if (drc_chan && mem_dev) {
+					dimm->edac_mode = EDAC_S4ECD4ED;
+					mci->edac_cap |= EDAC_FLAG_S4ECD4ED;
+				} else {
+					dimm->edac_mode = EDAC_SECDED;
+					mci->edac_cap |= EDAC_FLAG_SECDED;
+				}
+			} else
+				dimm->edac_mode = EDAC_NONE;
+		}
 	}
 }
 
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index c1aae7233022..0942efad55c1 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -43,7 +43,7 @@ static void edac_mc_dump_channel(struct rank_info *chan)
 {
 	debugf4("\tchannel = %p\n", chan);
 	debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
-	debugf4("\tchannel->ce_count = %d\n", chan->ce_count);
+	debugf4("\tchannel->ce_count = %d\n", chan->dimm->ce_count);
 	debugf4("\tchannel->label = '%s'\n", chan->dimm->label);
 	debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
 }
@@ -695,6 +695,7 @@ void edac_mc_handle_ce(struct mem_ctl_info *mci,
 {
 	unsigned long remapped_page;
 	char *label = NULL;
+	u32 grain;
 
 	debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
 
@@ -719,6 +720,7 @@ void edac_mc_handle_ce(struct mem_ctl_info *mci,
 	}
 
 	label = mci->csrows[row].channels[channel].dimm->label;
+	grain = mci->csrows[row].channels[channel].dimm->grain;
 
 	if (edac_mc_get_log_ce())
 		/* FIXME - put in DIMM location */
@@ -726,11 +728,12 @@ void edac_mc_handle_ce(struct mem_ctl_info *mci,
 			"CE page 0x%lx, offset 0x%lx, grain %d, syndrome "
 			"0x%lx, row %d, channel %d, label \"%s\": %s\n",
 			page_frame_number, offset_in_page,
-			mci->csrows[row].grain, syndrome, row, channel,
+			grain, syndrome, row, channel,
 			label, msg);
 
 	mci->ce_count++;
 	mci->csrows[row].ce_count++;
+	mci->csrows[row].channels[channel].dimm->ce_count++;
 	mci->csrows[row].channels[channel].ce_count++;
 
 	if (mci->scrub_mode & SCRUB_SW_SRC) {
@@ -747,8 +750,7 @@ void edac_mc_handle_ce(struct mem_ctl_info *mci,
 			mci->ctl_page_to_phys(mci, page_frame_number) :
 			page_frame_number;
 
-		edac_mc_scrub_block(remapped_page, offset_in_page,
-				mci->csrows[row].grain);
+		edac_mc_scrub_block(remapped_page, offset_in_page, grain);
 	}
 }
 EXPORT_SYMBOL_GPL(edac_mc_handle_ce);
@@ -774,6 +776,7 @@ void edac_mc_handle_ue(struct mem_ctl_info *mci,
 	int chan;
 	int chars;
 	char *label = NULL;
+	u32 grain;
 
 	debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
 
@@ -787,6 +790,7 @@ void edac_mc_handle_ue(struct mem_ctl_info *mci,
 		return;
 	}
 
+	grain = mci->csrows[row].channels[0].dimm->grain;
 	label = mci->csrows[row].channels[0].dimm->label;
 	chars = snprintf(pos, len + 1, "%s", label);
 	len -= chars;
@@ -804,14 +808,13 @@ void edac_mc_handle_ue(struct mem_ctl_info *mci,
 		edac_mc_printk(mci, KERN_EMERG,
 			"UE page 0x%lx, offset 0x%lx, grain %d, row %d, "
 			"labels \"%s\": %s\n", page_frame_number,
-			offset_in_page, mci->csrows[row].grain, row,
-			labels, msg);
+			offset_in_page, grain, row, labels, msg);
 
 	if (edac_mc_get_panic_on_ue())
 		panic("EDAC MC%d: UE page 0x%lx, offset 0x%lx, grain %d, "
 			"row %d, labels \"%s\": %s\n", mci->mc_idx,
 			page_frame_number, offset_in_page,
-			mci->csrows[row].grain, row, labels, msg);
+			grain, row, labels, msg);
 
 	mci->ue_count++;
 	mci->csrows[row].ue_count++;
@@ -883,6 +886,7 @@ void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci,
 	chars = snprintf(pos, len + 1, "%s", label);
 	len -= chars;
 	pos += chars;
+
 	chars = snprintf(pos, len + 1, "-%s",
 			mci->csrows[csrow].channels[channelb].dimm->label);
 
@@ -936,6 +940,7 @@ void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci,
 
 	mci->ce_count++;
 	mci->csrows[csrow].ce_count++;
+	mci->csrows[csrow].channels[channel].dimm->ce_count++;
 	mci->csrows[csrow].channels[channel].ce_count++;
 }
 EXPORT_SYMBOL(edac_mc_handle_fbd_ce);
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index af66b2256640..487e03eeed26 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -150,19 +150,19 @@ static ssize_t csrow_size_show(struct csrow_info *csrow, char *data,
 static ssize_t csrow_mem_type_show(struct csrow_info *csrow, char *data,
 				int private)
 {
-	return sprintf(data, "%s\n", mem_types[csrow->mtype]);
+	return sprintf(data, "%s\n", mem_types[csrow->channels[0].dimm->mtype]);
 }
 
 static ssize_t csrow_dev_type_show(struct csrow_info *csrow, char *data,
 				int private)
 {
-	return sprintf(data, "%s\n", dev_types[csrow->dtype]);
+	return sprintf(data, "%s\n", dev_types[csrow->channels[0].dimm->dtype]);
 }
 
 static ssize_t csrow_edac_mode_show(struct csrow_info *csrow, char *data,
 				int private)
 {
-	return sprintf(data, "%s\n", edac_caps[csrow->edac_mode]);
+	return sprintf(data, "%s\n", edac_caps[csrow->channels[0].dimm->edac_mode]);
 }
 
 /* show/store functions for DIMM Label attributes */
diff --git a/drivers/edac/i3000_edac.c b/drivers/edac/i3000_edac.c
index 277689a68841..8fe60ee37826 100644
--- a/drivers/edac/i3000_edac.c
+++ b/drivers/edac/i3000_edac.c
@@ -304,7 +304,7 @@ static int i3000_is_interleaved(const unsigned char *c0dra,
 static int i3000_probe1(struct pci_dev *pdev, int dev_idx)
 {
 	int rc;
-	int i;
+	int i, j;
 	struct mem_ctl_info *mci = NULL;
 	unsigned long last_cumul_size;
 	int interleaved, nr_channels;
@@ -386,19 +386,21 @@ static int i3000_probe1(struct pci_dev *pdev, int dev_idx)
 			cumul_size <<= 1;
 		debugf3("MC: %s(): (%d) cumul_size 0x%x\n",
 			__func__, i, cumul_size);
-		if (cumul_size == last_cumul_size) {
-			csrow->mtype = MEM_EMPTY;
+		if (cumul_size == last_cumul_size)
 			continue;
-		}
 
 		csrow->first_page = last_cumul_size;
 		csrow->last_page = cumul_size - 1;
 		csrow->nr_pages = cumul_size - last_cumul_size;
 		last_cumul_size = cumul_size;
-		csrow->grain = I3000_DEAP_GRAIN;
-		csrow->mtype = MEM_DDR2;
-		csrow->dtype = DEV_UNKNOWN;
-		csrow->edac_mode = EDAC_UNKNOWN;
+
+		for (j = 0; j < nr_channels; j++) {
+			struct dimm_info *dimm = csrow->channels[j].dimm;
+			dimm->grain = I3000_DEAP_GRAIN;
+			dimm->mtype = MEM_DDR2;
+			dimm->dtype = DEV_UNKNOWN;
+			dimm->edac_mode = EDAC_UNKNOWN;
+		}
 	}
 
 	/*
diff --git a/drivers/edac/i3200_edac.c b/drivers/edac/i3200_edac.c
index 046808c6357d..6ae30176aef5 100644
--- a/drivers/edac/i3200_edac.c
+++ b/drivers/edac/i3200_edac.c
@@ -319,7 +319,7 @@ static unsigned long drb_to_nr_pages(
 static int i3200_probe1(struct pci_dev *pdev, int dev_idx)
 {
 	int rc;
-	int i;
+	int i, j;
 	struct mem_ctl_info *mci = NULL;
 	unsigned long last_page;
 	u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL];
@@ -375,20 +375,22 @@ static int i3200_probe1(struct pci_dev *pdev, int dev_idx)
 			i / I3200_RANKS_PER_CHANNEL,
 			i % I3200_RANKS_PER_CHANNEL);
 
-		if (nr_pages == 0) {
-			csrow->mtype = MEM_EMPTY;
+		if (nr_pages == 0)
 			continue;
-		}
 
 		csrow->first_page = last_page + 1;
 		last_page += nr_pages;
 		csrow->last_page = last_page;
 		csrow->nr_pages = nr_pages;
 
-		csrow->grain = nr_pages << PAGE_SHIFT;
-		csrow->mtype = MEM_DDR2;
-		csrow->dtype = DEV_UNKNOWN;
-		csrow->edac_mode = EDAC_UNKNOWN;
+		for (j = 0; j < nr_channels; j++) {
+			struct dimm_info *dimm = csrow->channels[j].dimm;
+
+			dimm->grain = nr_pages << PAGE_SHIFT;
+			dimm->mtype = MEM_DDR2;
+			dimm->dtype = DEV_UNKNOWN;
+			dimm->edac_mode = EDAC_UNKNOWN;
+		}
 	}
 
 	i3200_clear_error_info(mci);
diff --git a/drivers/edac/i5000_edac.c b/drivers/edac/i5000_edac.c
index a2680d8e744b..95966ba9c5ca 100644
--- a/drivers/edac/i5000_edac.c
+++ b/drivers/edac/i5000_edac.c
@@ -1268,25 +1268,23 @@ static int i5000_init_csrows(struct mem_ctl_info *mci)
 		p_csrow->last_page = 9 + csrow * 20;
 		p_csrow->page_mask = 0xFFF;
 
-		p_csrow->grain = 8;
-
 		csrow_megs = 0;
 		for (channel = 0; channel < pvt->maxch; channel++) {
 			csrow_megs += pvt->dimm_info[csrow][channel].megabytes;
-		}
+			p_csrow->channels[channel].dimm->grain = 8;
 
-		p_csrow->nr_pages = csrow_megs << 8;
+			/* Assume DDR2 for now */
+			p_csrow->channels[channel].dimm->mtype = MEM_FB_DDR2;
 
-		/* Assume DDR2 for now */
-		p_csrow->mtype = MEM_FB_DDR2;
+			/* ask what device type on this row */
+			if (MTR_DRAM_WIDTH(mtr))
+				p_csrow->channels[channel].dimm->dtype = DEV_X8;
+			else
+				p_csrow->channels[channel].dimm->dtype = DEV_X4;
 
-		/* ask what device type on this row */
-		if (MTR_DRAM_WIDTH(mtr))
-			p_csrow->dtype = DEV_X8;
-		else
-			p_csrow->dtype = DEV_X4;
-
-		p_csrow->edac_mode = EDAC_S8ECD8ED;
+			p_csrow->channels[channel].dimm->edac_mode = EDAC_S8ECD8ED;
+		}
+		p_csrow->nr_pages = csrow_megs << 8;
 
 		empty = 0;
 	}
diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c
index d55e5529734c..6c0dafa3f67b 100644
--- a/drivers/edac/i5100_edac.c
+++ b/drivers/edac/i5100_edac.c
@@ -428,12 +428,16 @@ static void i5100_handle_ce(struct mem_ctl_info *mci,
 			    const char *msg)
 {
 	const int csrow = i5100_rank_to_csrow(mci, chan, rank);
+	char *label = NULL;
+
+	if (mci->csrows[csrow].channels[0].dimm)
+		label = mci->csrows[csrow].channels[0].dimm->label;
 
 	printk(KERN_ERR
 		"CE chan %d, bank %u, rank %u, syndrome 0x%lx, "
 		"cas %u, ras %u, csrow %u, label \"%s\": %s\n",
 		chan, bank, rank, syndrome, cas, ras,
-		csrow, mci->csrows[csrow].channels[0].dimm->label, msg);
+		csrow, label, msg);
 
 	mci->ce_count++;
 	mci->csrows[csrow].ce_count++;
@@ -450,12 +454,16 @@ static void i5100_handle_ue(struct mem_ctl_info *mci,
 			    const char *msg)
 {
 	const int csrow = i5100_rank_to_csrow(mci, chan, rank);
+	char *label = NULL;
+
+	if (mci->csrows[csrow].channels[0].dimm)
+		label = mci->csrows[csrow].channels[0].dimm->label;
 
 	printk(KERN_ERR
 		"UE chan %d, bank %u, rank %u, syndrome 0x%lx, "
 		"cas %u, ras %u, csrow %u, label \"%s\": %s\n",
 		chan, bank, rank, syndrome, cas, ras,
-		csrow, mci->csrows[csrow].channels[0].dimm->label, msg);
+		csrow, label, msg);
 
 	mci->ue_count++;
 	mci->csrows[csrow].ue_count++;
@@ -837,6 +845,7 @@ static void __devinit i5100_init_csrows(struct mem_ctl_info *mci)
 	int i;
 	unsigned long total_pages = 0UL;
 	struct i5100_priv *priv = mci->pvt_info;
+	struct dimm_info *dimm;
 
 	for (i = 0; i < mci->nr_csrows; i++) {
 		const unsigned long npages = i5100_npages(mci, i);
@@ -852,27 +861,22 @@ static void __devinit i5100_init_csrows(struct mem_ctl_info *mci)
 		 */
 		mci->csrows[i].first_page = total_pages;
 		mci->csrows[i].last_page = total_pages + npages - 1;
-		mci->csrows[i].page_mask = 0UL;
-
 		mci->csrows[i].nr_pages = npages;
-		mci->csrows[i].grain = 32;
 		mci->csrows[i].csrow_idx = i;
-		mci->csrows[i].dtype =
-			(priv->mtr[chan][rank].width == 4) ? DEV_X4 : DEV_X8;
-		mci->csrows[i].ue_count = 0;
-		mci->csrows[i].ce_count = 0;
-		mci->csrows[i].mtype = MEM_RDDR2;
-		mci->csrows[i].edac_mode = EDAC_SECDED;
 		mci->csrows[i].mci = mci;
 		mci->csrows[i].nr_channels = 1;
-		mci->csrows[i].channels[0].chan_idx = 0;
-		mci->csrows[i].channels[0].ce_count = 0;
 		mci->csrows[i].channels[0].csrow = mci->csrows + i;
-		snprintf(mci->csrows[i].channels[0].dimm->label,
-			 sizeof(mci->csrows[i].channels[0].dimm->label),
-			 "DIMM%u", i5100_rank_to_slot(mci, chan, rank));
-
 		total_pages += npages;
+
+		dimm = mci->csrows[i].channels[0].dimm;
+		dimm->grain = 32;
+		dimm->dtype = (priv->mtr[chan][rank].width == 4) ?
+			      DEV_X4 : DEV_X8;
+		dimm->mtype = MEM_RDDR2;
+		dimm->edac_mode = EDAC_SECDED;
+		snprintf(dimm->label, sizeof(dimm->label),
+			 "DIMM%u",
+			 i5100_rank_to_slot(mci, chan, rank));
 	}
 }
 
diff --git a/drivers/edac/i5400_edac.c b/drivers/edac/i5400_edac.c
index 1869a1018fb5..c2379ba95c5a 100644
--- a/drivers/edac/i5400_edac.c
+++ b/drivers/edac/i5400_edac.c
@@ -1159,6 +1159,7 @@ static int i5400_init_csrows(struct mem_ctl_info *mci)
 	int csrow_megs;
 	int channel;
 	int csrow;
+	struct dimm_info *dimm;
 
 	pvt = mci->pvt_info;
 
@@ -1184,24 +1185,17 @@ static int i5400_init_csrows(struct mem_ctl_info *mci)
 		p_csrow->last_page = 9 + csrow * 20;
 		p_csrow->page_mask = 0xFFF;
 
-		p_csrow->grain = 8;
-
 		csrow_megs = 0;
-		for (channel = 0; channel < pvt->maxch; channel++)
+		for (channel = 0; channel < pvt->maxch; channel++) {
 			csrow_megs += pvt->dimm_info[csrow][channel].megabytes;
 
-		p_csrow->nr_pages = csrow_megs << 8;
-
-		/* Assume DDR2 for now */
-		p_csrow->mtype = MEM_FB_DDR2;
-
-		/* ask what device type on this row */
-		if (MTR_DRAM_WIDTH(mtr))
-			p_csrow->dtype = DEV_X8;
-		else
-			p_csrow->dtype = DEV_X4;
-
-		p_csrow->edac_mode = EDAC_S8ECD8ED;
+			p_csrow->nr_pages = csrow_megs << 8;
+			dimm = p_csrow->channels[channel].dimm;
+			dimm->grain = 8;
+			dimm->dtype = MTR_DRAM_WIDTH(mtr) ? DEV_X8 : DEV_X4;
+			dimm->mtype = MEM_RDDR2;
+			dimm->edac_mode = EDAC_SECDED;
+		}
 
 		empty = 0;
 	}
diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c
index 3bafa3bca148..4bfcb3da3f01 100644
--- a/drivers/edac/i7300_edac.c
+++ b/drivers/edac/i7300_edac.c
@@ -618,6 +618,7 @@ static int decode_mtr(struct i7300_pvt *pvt,
 		      int slot, int ch, int branch,
 		      struct i7300_dimm_info *dinfo,
 		      struct csrow_info *p_csrow,
+		      struct dimm_info *dimm,
 		      u32 *nr_pages)
 {
 	int mtr, ans, addrBits, channel;
@@ -663,10 +664,7 @@ static int decode_mtr(struct i7300_pvt *pvt,
 	debugf2("\t\tNUMCOL: %s\n", numcol_toString[MTR_DIMM_COLS(mtr)]);
 	debugf2("\t\tSIZE: %d MB\n", dinfo->megabytes);
 
-	p_csrow->grain = 8;
-	p_csrow->mtype = MEM_FB_DDR2;
 	p_csrow->csrow_idx = slot;
-	p_csrow->page_mask = 0;
 
 	/*
 	 * The type of error detection actually depends of the
@@ -677,15 +675,17 @@ static int decode_mtr(struct i7300_pvt *pvt,
 	 * See datasheet Sections 7.3.6 to 7.3.8
 	 */
 
+	dimm->grain = 8;
+	dimm->mtype = MEM_FB_DDR2;
 	if (IS_SINGLE_MODE(pvt->mc_settings_a)) {
-		p_csrow->edac_mode = EDAC_SECDED;
+		dimm->edac_mode = EDAC_SECDED;
 		debugf2("\t\tECC code is 8-byte-over-32-byte SECDED+ code\n");
 	} else {
 		debugf2("\t\tECC code is on Lockstep mode\n");
 		if (MTR_DRAM_WIDTH(mtr) == 8)
-			p_csrow->edac_mode = EDAC_S8ECD8ED;
+			dimm->edac_mode = EDAC_S8ECD8ED;
 		else
-			p_csrow->edac_mode = EDAC_S4ECD4ED;
+			dimm->edac_mode = EDAC_S4ECD4ED;
 	}
 
 	/* ask what device type on this row */
@@ -694,9 +694,9 @@ static int decode_mtr(struct i7300_pvt *pvt,
 			IS_SCRBALGO_ENHANCED(pvt->mc_settings) ?
 					    "enhanced" : "normal");
 
-		p_csrow->dtype = DEV_X8;
+		dimm->dtype = DEV_X8;
 	} else
-		p_csrow->dtype = DEV_X4;
+		dimm->dtype = DEV_X4;
 
 	return mtr;
 }
@@ -779,6 +779,7 @@ static int i7300_init_csrows(struct mem_ctl_info *mci)
 	int mtr;
 	int ch, branch, slot, channel;
 	u32 last_page = 0, nr_pages;
+	struct dimm_info *dimm;
 
 	pvt = mci->pvt_info;
 
@@ -803,20 +804,24 @@ static int i7300_init_csrows(struct mem_ctl_info *mci)
 	}
 
 	/* Get the set of MTR[0-7] regs by each branch */
+	nr_pages = 0;
 	for (slot = 0; slot < MAX_SLOTS; slot++) {
 		int where = mtr_regs[slot];
 		for (branch = 0; branch < MAX_BRANCHES; branch++) {
 			pci_read_config_word(pvt->pci_dev_2x_0_fbd_branch[branch],
 					where,
 					&pvt->mtr[slot][branch]);
-			for (ch = 0; ch < MAX_BRANCHES; ch++) {
+			for (ch = 0; ch < MAX_CH_PER_BRANCH; ch++) {
 				int channel = to_channel(ch, branch);
 
 				dinfo = &pvt->dimm_info[slot][channel];
 				p_csrow = &mci->csrows[slot];
 
+				dimm = p_csrow->channels[branch * MAX_CH_PER_BRANCH + ch].dimm;
+
 				mtr = decode_mtr(pvt, slot, ch, branch,
-						 dinfo, p_csrow, &nr_pages);
+						 dinfo, p_csrow, dimm,
+						 &nr_pages);
 				/* if no DIMMS on this row, continue */
 				if (!MTR_DIMMS_PRESENT(mtr))
 					continue;
diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c
index df0acf02667a..5449bd40a739 100644
--- a/drivers/edac/i7core_edac.c
+++ b/drivers/edac/i7core_edac.c
@@ -592,7 +592,7 @@ static int i7core_get_active_channels(const u8 socket, unsigned *channels,
 	return 0;
 }
 
-static int get_dimm_config(const struct mem_ctl_info *mci)
+static int get_dimm_config(struct mem_ctl_info *mci)
 {
 	struct i7core_pvt *pvt = mci->pvt_info;
 	struct csrow_info *csr;
@@ -602,6 +602,7 @@ static int get_dimm_config(const struct mem_ctl_info *mci)
 	unsigned long last_page = 0;
 	enum edac_type mode;
 	enum mem_type mtype;
+	struct dimm_info *dimm;
 
 	/* Get data from the MC register, function 0 */
 	pdev = pvt->pci_mcr[0];
@@ -721,7 +722,6 @@ static int get_dimm_config(const struct mem_ctl_info *mci)
 			csr->nr_pages = npages;
 
 			csr->page_mask = 0;
-			csr->grain = 8;
 			csr->csrow_idx = csrow;
 			csr->nr_channels = 1;
 
@@ -730,28 +730,27 @@ static int get_dimm_config(const struct mem_ctl_info *mci)
 
 			pvt->csrow_map[i][j] = csrow;
 
+			dimm = csr->channels[0].dimm;
 			switch (banks) {
 			case 4:
-				csr->dtype = DEV_X4;
+				dimm->dtype = DEV_X4;
 				break;
 			case 8:
-				csr->dtype = DEV_X8;
+				dimm->dtype = DEV_X8;
 				break;
 			case 16:
-				csr->dtype = DEV_X16;
+				dimm->dtype = DEV_X16;
 				break;
 			default:
-				csr->dtype = DEV_UNKNOWN;
+				dimm->dtype = DEV_UNKNOWN;
 			}
 
-			csr->edac_mode = mode;
-			csr->mtype = mtype;
-			snprintf(csr->channels[0].dimm->label,
-					sizeof(csr->channels[0].dimm->label),
-					"CPU#%uChannel#%u_DIMM#%u",
-					pvt->i7core_dev->socket, i, j);
-
-			csrow++;
+			snprintf(dimm->label, sizeof(dimm->label),
+				 "CPU#%uChannel#%u_DIMM#%u",
+				 pvt->i7core_dev->socket, i, j);
+			dimm->grain = 8;
+			dimm->edac_mode = mode;
+			dimm->mtype = mtype;
 		}
 
 		pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
diff --git a/drivers/edac/i82443bxgx_edac.c b/drivers/edac/i82443bxgx_edac.c
index 3bf2b2f490e7..0b98dd3408b9 100644
--- a/drivers/edac/i82443bxgx_edac.c
+++ b/drivers/edac/i82443bxgx_edac.c
@@ -12,7 +12,7 @@
  * 440GX fix by Jason Uhlenkott <juhlenko@akamai.com>.
  *
  * Written with reference to 82443BX Host Bridge Datasheet:
- * http://download.intel.com/design/chipsets/datashts/29063301.pdf 
+ * http://download.intel.com/design/chipsets/datashts/29063301.pdf
  * references to this document given in [].
  *
  * This module doesn't support the 440LX, but it may be possible to
@@ -189,6 +189,7 @@ static void i82443bxgx_init_csrows(struct mem_ctl_info *mci,
 				enum mem_type mtype)
 {
 	struct csrow_info *csrow;
+	struct dimm_info *dimm;
 	int index;
 	u8 drbar, dramc;
 	u32 row_base, row_high_limit, row_high_limit_last;
@@ -197,6 +198,8 @@ static void i82443bxgx_init_csrows(struct mem_ctl_info *mci,
 	row_high_limit_last = 0;
 	for (index = 0; index < mci->nr_csrows; index++) {
 		csrow = &mci->csrows[index];
+		dimm = csrow->channels[0].dimm;
+
 		pci_read_config_byte(pdev, I82443BXGX_DRB + index, &drbar);
 		debugf1("MC%d: %s: %s() Row=%d DRB = %#0x\n",
 			mci->mc_idx, __FILE__, __func__, index, drbar);
@@ -219,12 +222,12 @@ static void i82443bxgx_init_csrows(struct mem_ctl_info *mci,
 		csrow->last_page = (row_high_limit >> PAGE_SHIFT) - 1;
 		csrow->nr_pages = csrow->last_page - csrow->first_page + 1;
 		/* EAP reports in 4kilobyte granularity [61] */
-		csrow->grain = 1 << 12;
-		csrow->mtype = mtype;
+		dimm->grain = 1 << 12;
+		dimm->mtype = mtype;
 		/* I don't think 440BX can tell you device type? FIXME? */
-		csrow->dtype = DEV_UNKNOWN;
+		dimm->dtype = DEV_UNKNOWN;
 		/* Mode is global to all rows on 440BX */
-		csrow->edac_mode = edac_mode;
+		dimm->edac_mode = edac_mode;
 		row_high_limit_last = row_high_limit;
 	}
 }
diff --git a/drivers/edac/i82860_edac.c b/drivers/edac/i82860_edac.c
index c779092d18d1..3eb77845cfca 100644
--- a/drivers/edac/i82860_edac.c
+++ b/drivers/edac/i82860_edac.c
@@ -140,6 +140,7 @@ static void i82860_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev)
 	u16 value;
 	u32 cumul_size;
 	struct csrow_info *csrow;
+	struct dimm_info *dimm;
 	int index;
 
 	pci_read_config_word(pdev, I82860_MCHCFG, &mchcfg_ddim);
@@ -153,6 +154,8 @@ static void i82860_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev)
 	 */
 	for (index = 0; index < mci->nr_csrows; index++) {
 		csrow = &mci->csrows[index];
+		dimm = csrow->channels[0].dimm;
+
 		pci_read_config_word(pdev, I82860_GBA + index * 2, &value);
 		cumul_size = (value & I82860_GBA_MASK) <<
 			(I82860_GBA_SHIFT - PAGE_SHIFT);
@@ -166,10 +169,10 @@ static void i82860_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev)
 		csrow->last_page = cumul_size - 1;
 		csrow->nr_pages = cumul_size - last_cumul_size;
 		last_cumul_size = cumul_size;
-		csrow->grain = 1 << 12;	/* I82860_EAP has 4KiB reolution */
-		csrow->mtype = MEM_RMBS;
-		csrow->dtype = DEV_UNKNOWN;
-		csrow->edac_mode = mchcfg_ddim ? EDAC_SECDED : EDAC_NONE;
+		dimm->grain = 1 << 12;	/* I82860_EAP has 4KiB reolution */
+		dimm->mtype = MEM_RMBS;
+		dimm->dtype = DEV_UNKNOWN;
+		dimm->edac_mode = mchcfg_ddim ? EDAC_SECDED : EDAC_NONE;
 	}
 }
 
diff --git a/drivers/edac/i82875p_edac.c b/drivers/edac/i82875p_edac.c
index 10f15d85fb5e..eac574285da8 100644
--- a/drivers/edac/i82875p_edac.c
+++ b/drivers/edac/i82875p_edac.c
@@ -342,11 +342,13 @@ static void i82875p_init_csrows(struct mem_ctl_info *mci,
 				void __iomem * ovrfl_window, u32 drc)
 {
 	struct csrow_info *csrow;
+	struct dimm_info *dimm;
+	unsigned nr_chans = dual_channel_active(drc) + 1;
 	unsigned long last_cumul_size;
 	u8 value;
 	u32 drc_ddim;		/* DRAM Data Integrity Mode 0=none,2=edac */
 	u32 cumul_size;
-	int index;
+	int index, j;
 
 	drc_ddim = (drc >> 18) & 0x1;
 	last_cumul_size = 0;
@@ -371,10 +373,15 @@ static void i82875p_init_csrows(struct mem_ctl_info *mci,
 		csrow->last_page = cumul_size - 1;
 		csrow->nr_pages = cumul_size - last_cumul_size;
 		last_cumul_size = cumul_size;
-		csrow->grain = 1 << 12;	/* I82875P_EAP has 4KiB reolution */
-		csrow->mtype = MEM_DDR;
-		csrow->dtype = DEV_UNKNOWN;
-		csrow->edac_mode = drc_ddim ? EDAC_SECDED : EDAC_NONE;
+
+		for (j = 0; j < nr_chans; j++) {
+			dimm = csrow->channels[j].dimm;
+
+			dimm->grain = 1 << 12;	/* I82875P_EAP has 4KiB reolution */
+			dimm->mtype = MEM_DDR;
+			dimm->dtype = DEV_UNKNOWN;
+			dimm->edac_mode = drc_ddim ? EDAC_SECDED : EDAC_NONE;
+		}
 	}
 }
 
diff --git a/drivers/edac/i82975x_edac.c b/drivers/edac/i82975x_edac.c
index b7aca58bf9eb..b8ec8719e2f5 100644
--- a/drivers/edac/i82975x_edac.c
+++ b/drivers/edac/i82975x_edac.c
@@ -309,7 +309,7 @@ static int i82975x_process_error_info(struct mem_ctl_info *mci,
 	chan = (mci->csrows[row].nr_channels == 1) ? 0 : info->eap & 1;
 	offst = info->eap
 			& ((1 << PAGE_SHIFT) -
-				(1 << mci->csrows[row].grain));
+			   (1 << mci->csrows[row].channels[chan].dimm->grain));
 
 	if (info->errsts & 0x0002)
 		edac_mc_handle_ue(mci, page, offst , row, "i82975x UE");
@@ -372,6 +372,8 @@ static void i82975x_init_csrows(struct mem_ctl_info *mci,
 	u8 value;
 	u32 cumul_size;
 	int index, chan;
+	struct dimm_info *dimm;
+	enum dev_type dtype;
 
 	last_cumul_size = 0;
 
@@ -406,10 +408,17 @@ static void i82975x_init_csrows(struct mem_ctl_info *mci,
 		 *   [0-7] for single-channel; i.e. csrow->nr_channels = 1
 		 *   [0-3] for dual-channel; i.e. csrow->nr_channels = 2
 		 */
-		for (chan = 0; chan < csrow->nr_channels; chan++)
+		dtype = i82975x_dram_type(mch_window, index);
+		for (chan = 0; chan < csrow->nr_channels; chan++) {
+			dimm = mci->csrows[index].channels[chan].dimm;
 			strncpy(csrow->channels[chan].dimm->label,
 					labels[(index >> 1) + (chan * 2)],
 					EDAC_MC_LABEL_LEN);
+			dimm->grain = 1 << 7;	/* 128Byte cache-line resolution */
+			dimm->dtype = i82975x_dram_type(mch_window, index);
+			dimm->mtype = MEM_DDR2; /* I82975x supports only DDR2 */
+			dimm->edac_mode = EDAC_SECDED; /* only supported */
+		}
 
 		if (cumul_size == last_cumul_size)
 			continue;	/* not populated */
@@ -418,10 +427,6 @@ static void i82975x_init_csrows(struct mem_ctl_info *mci,
 		csrow->last_page = cumul_size - 1;
 		csrow->nr_pages = cumul_size - last_cumul_size;
 		last_cumul_size = cumul_size;
-		csrow->grain = 1 << 7;	/* 128Byte cache-line resolution */
-		csrow->mtype = MEM_DDR2; /* I82975x supports only DDR2 */
-		csrow->dtype = i82975x_dram_type(mch_window, index);
-		csrow->edac_mode = EDAC_SECDED; /* only supported */
 	}
 }
 
diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c
index 73464a62adf7..fb92916d0872 100644
--- a/drivers/edac/mpc85xx_edac.c
+++ b/drivers/edac/mpc85xx_edac.c
@@ -883,6 +883,7 @@ static void __devinit mpc85xx_init_csrows(struct mem_ctl_info *mci)
 {
 	struct mpc85xx_mc_pdata *pdata = mci->pvt_info;
 	struct csrow_info *csrow;
+	struct dimm_info *dimm;
 	u32 sdram_ctl;
 	u32 sdtype;
 	enum mem_type mtype;
@@ -929,6 +930,8 @@ static void __devinit mpc85xx_init_csrows(struct mem_ctl_info *mci)
 		u32 end;
 
 		csrow = &mci->csrows[index];
+		dimm = csrow->channels[0].dimm;
+
 		cs_bnds = in_be32(pdata->mc_vbase + MPC85XX_MC_CS_BNDS_0 +
 				  (index * MPC85XX_MC_CS_BNDS_OFS));
 
@@ -945,12 +948,12 @@ static void __devinit mpc85xx_init_csrows(struct mem_ctl_info *mci)
 		csrow->first_page = start;
 		csrow->last_page = end;
 		csrow->nr_pages = end + 1 - start;
-		csrow->grain = 8;
-		csrow->mtype = mtype;
-		csrow->dtype = DEV_UNKNOWN;
+		dimm->grain = 8;
+		dimm->mtype = mtype;
+		dimm->dtype = DEV_UNKNOWN;
 		if (sdram_ctl & DSC_X32_EN)
-			csrow->dtype = DEV_X32;
-		csrow->edac_mode = EDAC_SECDED;
+			dimm->dtype = DEV_X32;
+		dimm->edac_mode = EDAC_SECDED;
 	}
 }
 
diff --git a/drivers/edac/mv64x60_edac.c b/drivers/edac/mv64x60_edac.c
index 7e5ff367705c..12d7fe04454c 100644
--- a/drivers/edac/mv64x60_edac.c
+++ b/drivers/edac/mv64x60_edac.c
@@ -656,6 +656,8 @@ static void mv64x60_init_csrows(struct mem_ctl_info *mci,
 				struct mv64x60_mc_pdata *pdata)
 {
 	struct csrow_info *csrow;
+	struct dimm_info *dimm;
+
 	u32 devtype;
 	u32 ctl;
 
@@ -664,30 +666,30 @@ static void mv64x60_init_csrows(struct mem_ctl_info *mci,
 	ctl = in_le32(pdata->mc_vbase + MV64X60_SDRAM_CONFIG);
 
 	csrow = &mci->csrows[0];
-	csrow->first_page = 0;
+	dimm = csrow->channels[0].dimm;
 	csrow->nr_pages = pdata->total_mem >> PAGE_SHIFT;
 	csrow->last_page = csrow->first_page + csrow->nr_pages - 1;
-	csrow->grain = 8;
+	dimm->grain = 8;
 
-	csrow->mtype = (ctl & MV64X60_SDRAM_REGISTERED) ? MEM_RDDR : MEM_DDR;
+	dimm->mtype = (ctl & MV64X60_SDRAM_REGISTERED) ? MEM_RDDR : MEM_DDR;
 
 	devtype = (ctl >> 20) & 0x3;
 	switch (devtype) {
 	case 0x0:
-		csrow->dtype = DEV_X32;
+		dimm->dtype = DEV_X32;
 		break;
 	case 0x2:		/* could be X8 too, but no way to tell */
-		csrow->dtype = DEV_X16;
+		dimm->dtype = DEV_X16;
 		break;
 	case 0x3:
-		csrow->dtype = DEV_X4;
+		dimm->dtype = DEV_X4;
 		break;
 	default:
-		csrow->dtype = DEV_UNKNOWN;
+		dimm->dtype = DEV_UNKNOWN;
 		break;
 	}
 
-	csrow->edac_mode = EDAC_SECDED;
+	dimm->edac_mode = EDAC_SECDED;
 }
 
 static int __devinit mv64x60_mc_err_probe(struct platform_device *pdev)
diff --git a/drivers/edac/pasemi_edac.c b/drivers/edac/pasemi_edac.c
index 7f71ee436744..4e53270bc336 100644
--- a/drivers/edac/pasemi_edac.c
+++ b/drivers/edac/pasemi_edac.c
@@ -135,11 +135,13 @@ static int pasemi_edac_init_csrows(struct mem_ctl_info *mci,
 				   enum edac_type edac_mode)
 {
 	struct csrow_info *csrow;
+	struct dimm_info *dimm;
 	u32 rankcfg;
 	int index;
 
 	for (index = 0; index < mci->nr_csrows; index++) {
 		csrow = &mci->csrows[index];
+		dimm = csrow->channels[0].dimm;
 
 		pci_read_config_dword(pdev,
 				      MCDRAM_RANKCFG + (index * 12),
@@ -177,10 +179,10 @@ static int pasemi_edac_init_csrows(struct mem_ctl_info *mci,
 		csrow->last_page = csrow->first_page + csrow->nr_pages - 1;
 		last_page_in_mmc += csrow->nr_pages;
 		csrow->page_mask = 0;
-		csrow->grain = PASEMI_EDAC_ERROR_GRAIN;
-		csrow->mtype = MEM_DDR;
-		csrow->dtype = DEV_UNKNOWN;
-		csrow->edac_mode = edac_mode;
+		dimm->grain = PASEMI_EDAC_ERROR_GRAIN;
+		dimm->mtype = MEM_DDR;
+		dimm->dtype = DEV_UNKNOWN;
+		dimm->edac_mode = edac_mode;
 	}
 	return 0;
 }
diff --git a/drivers/edac/ppc4xx_edac.c b/drivers/edac/ppc4xx_edac.c
index d427c69bb8b1..a75e56788b29 100644
--- a/drivers/edac/ppc4xx_edac.c
+++ b/drivers/edac/ppc4xx_edac.c
@@ -895,7 +895,7 @@ ppc4xx_edac_init_csrows(struct mem_ctl_info *mci, u32 mcopt1)
 	enum mem_type mtype;
 	enum dev_type dtype;
 	enum edac_type edac_mode;
-	int row;
+	int row, j;
 	u32 mbxcf, size;
 	static u32 ppc4xx_last_page;
 
@@ -975,15 +975,18 @@ ppc4xx_edac_init_csrows(struct mem_ctl_info *mci, u32 mcopt1)
 		 * possible values would be the PLB width (16), the
 		 * page size (PAGE_SIZE) or the memory width (2 or 4).
 		 */
+		for (j = 0; j < csi->nr_channels; j++) {
+			struct dimm_info *dimm = csi->channels[j].dimm;
 
-		csi->grain	= 1;
+			dimm->grain	= 1;
 
-		csi->mtype	= mtype;
-		csi->dtype	= dtype;
+			dimm->mtype	= mtype;
+			dimm->dtype	= dtype;
 
-		csi->edac_mode	= edac_mode;
+			dimm->edac_mode	= edac_mode;
 
 		ppc4xx_last_page += csi->nr_pages;
+		}
 	}
 
  done:
diff --git a/drivers/edac/r82600_edac.c b/drivers/edac/r82600_edac.c
index 6d908ad72d64..70b0dfa81db4 100644
--- a/drivers/edac/r82600_edac.c
+++ b/drivers/edac/r82600_edac.c
@@ -216,6 +216,7 @@ static void r82600_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
 			u8 dramcr)
 {
 	struct csrow_info *csrow;
+	struct dimm_info *dimm;
 	int index;
 	u8 drbar;		/* SDRAM Row Boundary Address Register */
 	u32 row_high_limit, row_high_limit_last;
@@ -227,6 +228,7 @@ static void r82600_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
 
 	for (index = 0; index < mci->nr_csrows; index++) {
 		csrow = &mci->csrows[index];
+		dimm = csrow->channels[0].dimm;
 
 		/* find the DRAM Chip Select Base address and mask */
 		pci_read_config_byte(pdev, R82600_DRBA + index, &drbar);
@@ -250,13 +252,13 @@ static void r82600_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
 		csrow->nr_pages = csrow->last_page - csrow->first_page + 1;
 		/* Error address is top 19 bits - so granularity is      *
 		 * 14 bits                                               */
-		csrow->grain = 1 << 14;
-		csrow->mtype = reg_sdram ? MEM_RDDR : MEM_DDR;
+		dimm->grain = 1 << 14;
+		dimm->mtype = reg_sdram ? MEM_RDDR : MEM_DDR;
 		/* FIXME - check that this is unknowable with this chipset */
-		csrow->dtype = DEV_UNKNOWN;
+		dimm->dtype = DEV_UNKNOWN;
 
 		/* Mode is global on 82600 */
-		csrow->edac_mode = ecc_on ? EDAC_SECDED : EDAC_NONE;
+		dimm->edac_mode = ecc_on ? EDAC_SECDED : EDAC_NONE;
 		row_high_limit_last = row_high_limit;
 	}
 }
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index 95901c21d5dc..21147ac38c4d 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -551,7 +551,7 @@ static int sbridge_get_active_channels(const u8 bus, unsigned *channels,
 	return 0;
 }
 
-static int get_dimm_config(const struct mem_ctl_info *mci)
+static int get_dimm_config(struct mem_ctl_info *mci)
 {
 	struct sbridge_pvt *pvt = mci->pvt_info;
 	struct csrow_info *csr;
@@ -561,6 +561,7 @@ static int get_dimm_config(const struct mem_ctl_info *mci)
 	u32 reg;
 	enum edac_type mode;
 	enum mem_type mtype;
+	struct dimm_info *dimm;
 
 	pci_read_config_dword(pvt->pci_br, SAD_TARGET, &reg);
 	pvt->sbridge_dev->source_id = SOURCE_ID(reg);
@@ -612,6 +613,7 @@ static int get_dimm_config(const struct mem_ctl_info *mci)
 	/* On all supported DDR3 DIMM types, there are 8 banks available */
 	banks = 8;
 
+	dimm = mci->dimms;
 	for (i = 0; i < NUM_CHANNELS; i++) {
 		u32 mtr;
 
@@ -634,29 +636,30 @@ static int get_dimm_config(const struct mem_ctl_info *mci)
 					pvt->sbridge_dev->mc, i, j,
 					size, npages,
 					banks, ranks, rows, cols);
-				csr = &mci->csrows[csrow];
 
+				/*
+				 * Fake stuff. This controller doesn't see
+				 * csrows.
+				 */
+				csr = &mci->csrows[csrow];
 				csr->first_page = last_page;
 				csr->last_page = last_page + npages - 1;
-				csr->page_mask = 0UL;	/* Unused */
 				csr->nr_pages = npages;
-				csr->grain = 32;
 				csr->csrow_idx = csrow;
-				csr->dtype = (banks == 8) ? DEV_X8 : DEV_X4;
-				csr->ce_count = 0;
-				csr->ue_count = 0;
-				csr->mtype = mtype;
-				csr->edac_mode = mode;
 				csr->nr_channels = 1;
 				csr->channels[0].chan_idx = i;
-				csr->channels[0].ce_count = 0;
 				pvt->csrow_map[i][j] = csrow;
-				snprintf(csr->channels[0].dimm->label,
-					 sizeof(csr->channels[0].dimm->label),
-					 "CPU_SrcID#%u_Channel#%u_DIMM#%u",
-					 pvt->sbridge_dev->source_id, i, j);
 				last_page += npages;
 				csrow++;
+
+				csr->channels[0].dimm = dimm;
+				dimm->grain = 32;
+				dimm->dtype = (banks == 8) ? DEV_X8 : DEV_X4;
+				dimm->mtype = mtype;
+				dimm->edac_mode = mode;
+				snprintf(dimm->label, sizeof(dimm->label),
+					 "CPU_SrcID#%u_Channel#%u_DIMM#%u",
+					 pvt->sbridge_dev->source_id, i, j);
 			}
 		}
 	}
diff --git a/drivers/edac/tile_edac.c b/drivers/edac/tile_edac.c
index e99d00976189..c870f68ae8f1 100644
--- a/drivers/edac/tile_edac.c
+++ b/drivers/edac/tile_edac.c
@@ -84,6 +84,7 @@ static int __devinit tile_edac_init_csrows(struct mem_ctl_info *mci)
 	struct csrow_info	*csrow = &mci->csrows[0];
 	struct tile_edac_priv	*priv = mci->pvt_info;
 	struct mshim_mem_info	mem_info;
+	struct dimm_info *dimm = csrow->channels[0].dimm;
 
 	if (hv_dev_pread(priv->hv_devhdl, 0, (HV_VirtAddr)&mem_info,
 		sizeof(struct mshim_mem_info), MSHIM_MEM_INFO_OFF) !=
@@ -93,16 +94,16 @@ static int __devinit tile_edac_init_csrows(struct mem_ctl_info *mci)
 	}
 
 	if (mem_info.mem_ecc)
-		csrow->edac_mode = EDAC_SECDED;
+		dimm->edac_mode = EDAC_SECDED;
 	else
-		csrow->edac_mode = EDAC_NONE;
+		dimm->edac_mode = EDAC_NONE;
 	switch (mem_info.mem_type) {
 	case DDR2:
-		csrow->mtype = MEM_DDR2;
+		dimm->mtype = MEM_DDR2;
 		break;
 
 	case DDR3:
-		csrow->mtype = MEM_DDR3;
+		dimm->mtype = MEM_DDR3;
 		break;
 
 	default:
@@ -112,8 +113,8 @@ static int __devinit tile_edac_init_csrows(struct mem_ctl_info *mci)
 	csrow->first_page = 0;
 	csrow->nr_pages = mem_info.mem_size >> PAGE_SHIFT;
 	csrow->last_page = csrow->first_page + csrow->nr_pages - 1;
-	csrow->grain = TILE_EDAC_ERROR_GRAIN;
-	csrow->dtype = DEV_UNKNOWN;
+	dimm->grain = TILE_EDAC_ERROR_GRAIN;
+	dimm->dtype = DEV_UNKNOWN;
 
 	return 0;
 }
diff --git a/drivers/edac/x38_edac.c b/drivers/edac/x38_edac.c
index a438297389e5..f7cc4d214949 100644
--- a/drivers/edac/x38_edac.c
+++ b/drivers/edac/x38_edac.c
@@ -317,7 +317,7 @@ static unsigned long drb_to_nr_pages(
 static int x38_probe1(struct pci_dev *pdev, int dev_idx)
 {
 	int rc;
-	int i;
+	int i, j;
 	struct mem_ctl_info *mci = NULL;
 	unsigned long last_page;
 	u16 drbs[X38_CHANNELS][X38_RANKS_PER_CHANNEL];
@@ -372,20 +372,21 @@ static int x38_probe1(struct pci_dev *pdev, int dev_idx)
 			i / X38_RANKS_PER_CHANNEL,
 			i % X38_RANKS_PER_CHANNEL);
 
-		if (nr_pages == 0) {
-			csrow->mtype = MEM_EMPTY;
+		if (nr_pages == 0)
 			continue;
-		}
 
 		csrow->first_page = last_page + 1;
 		last_page += nr_pages;
 		csrow->last_page = last_page;
 		csrow->nr_pages = nr_pages;
 
-		csrow->grain = nr_pages << PAGE_SHIFT;
-		csrow->mtype = MEM_DDR2;
-		csrow->dtype = DEV_UNKNOWN;
-		csrow->edac_mode = EDAC_UNKNOWN;
+		for (j = 0; j < x38_channel_num; j++) {
+			struct dimm_info *dimm = csrow->channels[j].dimm;
+			dimm->grain = nr_pages << PAGE_SHIFT;
+			dimm->mtype = MEM_DDR2;
+			dimm->dtype = DEV_UNKNOWN;
+			dimm->edac_mode = EDAC_UNKNOWN;
+		}
 	}
 
 	x38_clear_error_info(mci);
diff --git a/include/linux/edac.h b/include/linux/edac.h
index 52bceca85e63..87aa07d2ee28 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -318,6 +318,13 @@ struct dimm_info {
 	unsigned memory_controller;
 	unsigned csrow;
 	unsigned csrow_channel;
+
+	u32 grain;		/* granularity of reported error in bytes */
+	enum dev_type dtype;	/* memory device type */
+	enum mem_type mtype;	/* memory dimm type */
+	enum edac_type edac_mode;	/* EDAC mode for this dimm */
+
+	u32 ce_count;		/* Correctable Errors for this dimm */
 };
 
 /**
@@ -343,19 +350,17 @@ struct rank_info {
 };
 
 struct csrow_info {
-	unsigned long first_page;	/* first page number in dimm */
-	unsigned long last_page;	/* last page number in dimm */
+	unsigned long first_page;	/* first page number in csrow */
+	unsigned long last_page;	/* last page number in csrow */
+	u32 nr_pages;			/* number of pages in csrow */
 	unsigned long page_mask;	/* used for interleaving -
 					 * 0UL for non intlv
 					 */
-	u32 nr_pages;		/* number of pages in csrow */
-	u32 grain;		/* granularity of reported error in bytes */
-	int csrow_idx;		/* the chip-select row */
-	enum dev_type dtype;	/* memory device type */
+	int csrow_idx;			/* the chip-select row */
+
 	u32 ue_count;		/* Uncorrectable Errors for this csrow */
 	u32 ce_count;		/* Correctable Errors for this csrow */
-	enum mem_type mtype;	/* memory csrow type */
-	enum edac_type edac_mode;	/* EDAC mode for this csrow */
+
 	struct mem_ctl_info *mci;	/* the parent */
 
 	struct kobject kobj;	/* sysfs kobject for this csrow */
-- 
cgit v1.3-7-g2ca7


From a895bf8b1e1ea4c032a8fa8a09475a2ce09fe77a Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Sat, 28 Jan 2012 09:09:38 -0300
Subject: edac: move nr_pages to dimm struct
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The number of pages is a dimm property. Move it to the dimm struct.

After this change, it is possible to add sysfs nodes for the DIMM's that
will properly represent the DIMM stick properties, including its size.

A TODO fix here is to properly represent dual-rank/quad-rank DIMMs when
the memory controller represents the memory via chip select rows.

Reviewed-by: Aristeu Rozanski <arozansk@redhat.com>
Acked-by: Borislav Petkov <borislav.petkov@amd.com>
Acked-by: Chris Metcalf <cmetcalf@tilera.com>
Cc: Doug Thompson <norsk5@yahoo.com>
Cc: Mark Gross <mark.gross@intel.com>
Cc: Jason Uhlenkott <juhlenko@akamai.com>
Cc: Tim Small <tim@buttersideup.com>
Cc: Ranganathan Desikan <ravi@jetztechnologies.com>
Cc: "Arvind R." <arvino55@gmail.com>
Cc: Olof Johansson <olof@lixom.net>
Cc: Egor Martovetsky <egor@pasemi.com>
Cc: Michal Marek <mmarek@suse.cz>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Joe Perches <joe@perches.com>
Cc: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Hitoshi Mitake <h.mitake@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: "Niklas Söderlund" <niklas.soderlund@ericsson.com>
Cc: Shaohui Xie <Shaohui.Xie@freescale.com>
Cc: Josh Boyer <jwboyer@gmail.com>
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/edac/amd64_edac.c      | 10 +++++----
 drivers/edac/amd76x_edac.c     |  6 +++---
 drivers/edac/cell_edac.c       |  8 ++++---
 drivers/edac/cpc925_edac.c     |  8 ++++---
 drivers/edac/e752x_edac.c      |  6 ++++--
 drivers/edac/e7xxx_edac.c      |  5 +++--
 drivers/edac/edac_mc.c         | 16 ++++++++------
 drivers/edac/edac_mc_sysfs.c   | 47 +++++++++++++++++++++++++++++-------------
 drivers/edac/i3000_edac.c      |  6 ++++--
 drivers/edac/i3200_edac.c      |  3 +--
 drivers/edac/i5000_edac.c      | 14 +++++++------
 drivers/edac/i5100_edac.c      | 22 ++++++++++++--------
 drivers/edac/i5400_edac.c      |  9 +++-----
 drivers/edac/i7300_edac.c      | 22 ++++++--------------
 drivers/edac/i7core_edac.c     | 10 +++------
 drivers/edac/i82443bxgx_edac.c |  2 +-
 drivers/edac/i82860_edac.c     |  2 +-
 drivers/edac/i82875p_edac.c    |  5 +++--
 drivers/edac/i82975x_edac.c    | 11 +++++++---
 drivers/edac/mpc85xx_edac.c    |  3 ++-
 drivers/edac/mv64x60_edac.c    |  3 ++-
 drivers/edac/pasemi_edac.c     | 14 ++++++-------
 drivers/edac/ppc4xx_edac.c     |  5 +++--
 drivers/edac/r82600_edac.c     |  3 ++-
 drivers/edac/sb_edac.c         |  8 ++-----
 drivers/edac/tile_edac.c       |  2 +-
 drivers/edac/x38_edac.c        |  4 ++--
 include/linux/edac.h           |  8 ++++---
 28 files changed, 146 insertions(+), 116 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index e2c5a94f683c..1ceb8e276376 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -2152,7 +2152,7 @@ static u32 amd64_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr)
 	nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode) << (20 - PAGE_SHIFT);
 
 	debugf0("  (csrow=%d) DBAM map index= %d\n", csrow_nr, cs_mode);
-	debugf0("    nr_pages= %u  channel-count = %d\n",
+	debugf0("    nr_pages/channel= %u  channel-count = %d\n",
 		nr_pages, pvt->channel_count);
 
 	return nr_pages;
@@ -2171,6 +2171,7 @@ static int init_csrows(struct mem_ctl_info *mci)
 	int i, j, empty = 1;
 	enum mem_type mtype;
 	enum edac_type edac_mode;
+	int nr_pages = 0;
 
 	amd64_read_pci_cfg(pvt->F3, NBCFG, &val);
 
@@ -2194,9 +2195,9 @@ static int init_csrows(struct mem_ctl_info *mci)
 
 		empty = 0;
 		if (csrow_enabled(i, 0, pvt))
-			csrow->nr_pages = amd64_csrow_nr_pages(pvt, 0, i);
+			nr_pages = amd64_csrow_nr_pages(pvt, 0, i);
 		if (csrow_enabled(i, 1, pvt))
-			csrow->nr_pages += amd64_csrow_nr_pages(pvt, 1, i);
+			nr_pages += amd64_csrow_nr_pages(pvt, 1, i);
 
 		get_cs_base_and_mask(pvt, i, 0, &base, &mask);
 		/* 8 bytes of resolution */
@@ -2204,7 +2205,7 @@ static int init_csrows(struct mem_ctl_info *mci)
 		mtype = amd64_determine_memory_type(pvt, i);
 
 		debugf1("  for MC node %d csrow %d:\n", pvt->mc_node_id, i);
-		debugf1("    nr_pages: %u\n", csrow->nr_pages);
+		debugf1("    nr_pages: %u\n", nr_pages * pvt->channel_count);
 
 		/*
 		 * determine whether CHIPKILL or JUST ECC or NO ECC is operating
@@ -2218,6 +2219,7 @@ static int init_csrows(struct mem_ctl_info *mci)
 		for (j = 0; j < pvt->channel_count; j++) {
 			csrow->channels[j].dimm->mtype = mtype;
 			csrow->channels[j].dimm->edac_mode = edac_mode;
+			csrow->channels[j].dimm->nr_pages = nr_pages;
 		}
 	}
 
diff --git a/drivers/edac/amd76x_edac.c b/drivers/edac/amd76x_edac.c
index fcfe359f7be5..a2dde205f651 100644
--- a/drivers/edac/amd76x_edac.c
+++ b/drivers/edac/amd76x_edac.c
@@ -205,10 +205,10 @@ static void amd76x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
 		mba_mask = ((mba & 0xff80) << 16) | 0x7fffffUL;
 		pci_read_config_dword(pdev, AMD76X_DRAM_MODE_STATUS, &dms);
 		csrow->first_page = mba_base >> PAGE_SHIFT;
-		csrow->nr_pages = (mba_mask + 1) >> PAGE_SHIFT;
-		csrow->last_page = csrow->first_page + csrow->nr_pages - 1;
+		dimm->nr_pages = (mba_mask + 1) >> PAGE_SHIFT;
+		csrow->last_page = csrow->first_page + dimm->nr_pages - 1;
 		csrow->page_mask = mba_mask >> PAGE_SHIFT;
-		dimm->grain = csrow->nr_pages << PAGE_SHIFT;
+		dimm->grain = dimm->nr_pages << PAGE_SHIFT;
 		dimm->mtype = MEM_RDDR;
 		dimm->dtype = ((dms >> index) & 0x1) ? DEV_X4 : DEV_UNKNOWN;
 		dimm->edac_mode = edac_mode;
diff --git a/drivers/edac/cell_edac.c b/drivers/edac/cell_edac.c
index 94fbb127215a..09e1b5d3df70 100644
--- a/drivers/edac/cell_edac.c
+++ b/drivers/edac/cell_edac.c
@@ -128,6 +128,7 @@ static void __devinit cell_edac_init_csrows(struct mem_ctl_info *mci)
 	struct cell_edac_priv		*priv = mci->pvt_info;
 	struct device_node		*np;
 	int				j;
+	u32				nr_pages;
 
 	for (np = NULL;
 	     (np = of_find_node_by_name(np, "memory")) != NULL;) {
@@ -142,19 +143,20 @@ static void __devinit cell_edac_init_csrows(struct mem_ctl_info *mci)
 		if (of_node_to_nid(np) != priv->node)
 			continue;
 		csrow->first_page = r.start >> PAGE_SHIFT;
-		csrow->nr_pages = resource_size(&r) >> PAGE_SHIFT;
-		csrow->last_page = csrow->first_page + csrow->nr_pages - 1;
+		nr_pages = resource_size(&r) >> PAGE_SHIFT;
+		csrow->last_page = csrow->first_page + nr_pages - 1;
 
 		for (j = 0; j < csrow->nr_channels; j++) {
 			dimm = csrow->channels[j].dimm;
 			dimm->mtype = MEM_XDR;
 			dimm->edac_mode = EDAC_SECDED;
+			dimm->nr_pages = nr_pages / csrow->nr_channels;
 		}
 		dev_dbg(mci->dev,
 			"Initialized on node %d, chanmask=0x%x,"
 			" first_page=0x%lx, nr_pages=0x%x\n",
 			priv->node, priv->chanmask,
-			csrow->first_page, csrow->nr_pages);
+			csrow->first_page, dimm->nr_pages);
 		break;
 	}
 }
diff --git a/drivers/edac/cpc925_edac.c b/drivers/edac/cpc925_edac.c
index ee90f3da8f3a..7b764a882dae 100644
--- a/drivers/edac/cpc925_edac.c
+++ b/drivers/edac/cpc925_edac.c
@@ -332,7 +332,7 @@ static void cpc925_init_csrows(struct mem_ctl_info *mci)
 	struct dimm_info *dimm;
 	int index, j;
 	u32 mbmr, mbbar, bba;
-	unsigned long row_size, last_nr_pages = 0;
+	unsigned long row_size, nr_pages, last_nr_pages = 0;
 
 	get_total_mem(pdata);
 
@@ -351,12 +351,14 @@ static void cpc925_init_csrows(struct mem_ctl_info *mci)
 
 		row_size = bba * (1UL << 28);	/* 256M */
 		csrow->first_page = last_nr_pages;
-		csrow->nr_pages = row_size >> PAGE_SHIFT;
-		csrow->last_page = csrow->first_page + csrow->nr_pages - 1;
+		nr_pages = row_size >> PAGE_SHIFT;
+		csrow->last_page = csrow->first_page + nr_pages - 1;
 		last_nr_pages = csrow->last_page + 1;
 
 		for (j = 0; j < csrow->nr_channels; j++) {
 			dimm = csrow->channels[j].dimm;
+
+			dimm->nr_pages = nr_pages / csrow->nr_channels;
 			dimm->mtype = MEM_RDDR;
 			dimm->edac_mode = EDAC_SECDED;
 
diff --git a/drivers/edac/e752x_edac.c b/drivers/edac/e752x_edac.c
index 6cf6ec6bc71e..cf17579ebc6d 100644
--- a/drivers/edac/e752x_edac.c
+++ b/drivers/edac/e752x_edac.c
@@ -1044,7 +1044,7 @@ static void e752x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
 	int drc_drbg;		/* DRB granularity 0=64mb, 1=128mb */
 	int drc_ddim;		/* DRAM Data Integrity Mode 0=none, 2=edac */
 	u8 value;
-	u32 dra, drc, cumul_size, i;
+	u32 dra, drc, cumul_size, i, nr_pages;
 
 	dra = 0;
 	for (index = 0; index < 4; index++) {
@@ -1078,11 +1078,13 @@ static void e752x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
 
 		csrow->first_page = last_cumul_size;
 		csrow->last_page = cumul_size - 1;
-		csrow->nr_pages = cumul_size - last_cumul_size;
+		nr_pages = cumul_size - last_cumul_size;
 		last_cumul_size = cumul_size;
 
 		for (i = 0; i < drc_chan + 1; i++) {
 			struct dimm_info *dimm = csrow->channels[i].dimm;
+
+			dimm->nr_pages = nr_pages / (drc_chan + 1);
 			dimm->grain = 1 << 12;	/* 4KiB - resolution of CELOG */
 			dimm->mtype = MEM_RDDR;	/* only one type supported */
 			dimm->dtype = mem_dev ? DEV_X4 : DEV_X8;
diff --git a/drivers/edac/e7xxx_edac.c b/drivers/edac/e7xxx_edac.c
index 5ed97f6eb346..709aca216639 100644
--- a/drivers/edac/e7xxx_edac.c
+++ b/drivers/edac/e7xxx_edac.c
@@ -349,7 +349,7 @@ static void e7xxx_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
 	unsigned long last_cumul_size;
 	int index, j;
 	u8 value;
-	u32 dra, cumul_size;
+	u32 dra, cumul_size, nr_pages;
 	int drc_chan, drc_drbg, drc_ddim, mem_dev;
 	struct csrow_info *csrow;
 	struct dimm_info *dimm;
@@ -380,12 +380,13 @@ static void e7xxx_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
 
 		csrow->first_page = last_cumul_size;
 		csrow->last_page = cumul_size - 1;
-		csrow->nr_pages = cumul_size - last_cumul_size;
+		nr_pages = cumul_size - last_cumul_size;
 		last_cumul_size = cumul_size;
 
 		for (j = 0; j < drc_chan + 1; j++) {
 			dimm = csrow->channels[j].dimm;
 
+			dimm->nr_pages = nr_pages / (drc_chan + 1);
 			dimm->grain = 1 << 12;	/* 4KiB - resolution of CELOG */
 			dimm->mtype = MEM_RDDR;	/* only one type supported */
 			dimm->dtype = mem_dev ? DEV_X4 : DEV_X8;
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 0942efad55c1..072aa81b4a70 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -43,9 +43,10 @@ static void edac_mc_dump_channel(struct rank_info *chan)
 {
 	debugf4("\tchannel = %p\n", chan);
 	debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
-	debugf4("\tchannel->ce_count = %d\n", chan->dimm->ce_count);
-	debugf4("\tchannel->label = '%s'\n", chan->dimm->label);
 	debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
+	debugf4("\tdimm->ce_count = %d\n", chan->dimm->ce_count);
+	debugf4("\tdimm->label = '%s'\n", chan->dimm->label);
+	debugf4("\tdimm->nr_pages = 0x%x\n", chan->dimm->nr_pages);
 }
 
 static void edac_mc_dump_csrow(struct csrow_info *csrow)
@@ -55,7 +56,6 @@ static void edac_mc_dump_csrow(struct csrow_info *csrow)
 	debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page);
 	debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
 	debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
-	debugf4("\tcsrow->nr_pages = 0x%x\n", csrow->nr_pages);
 	debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels);
 	debugf4("\tcsrow->channels = %p\n", csrow->channels);
 	debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
@@ -652,15 +652,19 @@ static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
 int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
 {
 	struct csrow_info *csrows = mci->csrows;
-	int row, i;
+	int row, i, j, n;
 
 	debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
 	row = -1;
 
 	for (i = 0; i < mci->nr_csrows; i++) {
 		struct csrow_info *csrow = &csrows[i];
-
-		if (csrow->nr_pages == 0)
+		n = 0;
+		for (j = 0; j < csrow->nr_channels; j++) {
+			struct dimm_info *dimm = csrow->channels[j].dimm;
+			n += dimm->nr_pages;
+		}
+		if (n == 0)
 			continue;
 
 		debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) "
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index 487e03eeed26..1dc1c6ca4308 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -144,7 +144,13 @@ static ssize_t csrow_ce_count_show(struct csrow_info *csrow, char *data,
 static ssize_t csrow_size_show(struct csrow_info *csrow, char *data,
 				int private)
 {
-	return sprintf(data, "%u\n", PAGES_TO_MiB(csrow->nr_pages));
+	int i;
+	u32 nr_pages = 0;
+
+	for (i = 0; i < csrow->nr_channels; i++)
+		nr_pages += csrow->channels[i].dimm->nr_pages;
+
+	return sprintf(data, "%u\n", PAGES_TO_MiB(nr_pages));
 }
 
 static ssize_t csrow_mem_type_show(struct csrow_info *csrow, char *data,
@@ -519,16 +525,16 @@ static ssize_t mci_ctl_name_show(struct mem_ctl_info *mci, char *data)
 
 static ssize_t mci_size_mb_show(struct mem_ctl_info *mci, char *data)
 {
-	int total_pages, csrow_idx;
+	int total_pages = 0, csrow_idx, j;
 
-	for (total_pages = csrow_idx = 0; csrow_idx < mci->nr_csrows;
-		csrow_idx++) {
+	for (csrow_idx = 0; csrow_idx < mci->nr_csrows; csrow_idx++) {
 		struct csrow_info *csrow = &mci->csrows[csrow_idx];
 
-		if (!csrow->nr_pages)
-			continue;
+		for (j = 0; j < csrow->nr_channels; j++) {
+			struct dimm_info *dimm = csrow->channels[j].dimm;
 
-		total_pages += csrow->nr_pages;
+			total_pages += dimm->nr_pages;
+		}
 	}
 
 	return sprintf(data, "%u\n", PAGES_TO_MiB(total_pages));
@@ -900,7 +906,7 @@ static void edac_remove_mci_instance_attributes(struct mem_ctl_info *mci,
  */
 int edac_create_sysfs_mci_device(struct mem_ctl_info *mci)
 {
-	int i;
+	int i, j;
 	int err;
 	struct csrow_info *csrow;
 	struct kobject *kobj_mci = &mci->edac_mci_kobj;
@@ -934,10 +940,13 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci)
 	/* Make directories for each CSROW object under the mc<id> kobject
 	 */
 	for (i = 0; i < mci->nr_csrows; i++) {
+		int nr_pages = 0;
+
 		csrow = &mci->csrows[i];
+		for (j = 0; j < csrow->nr_channels; j++)
+			nr_pages += csrow->channels[j].dimm->nr_pages;
 
-		/* Only expose populated CSROWs */
-		if (csrow->nr_pages > 0) {
+		if (nr_pages > 0) {
 			err = edac_create_csrow_object(mci, csrow, i);
 			if (err) {
 				debugf1("%s() failure: create csrow %d obj\n",
@@ -949,10 +958,14 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci)
 
 	return 0;
 
-	/* CSROW error: backout what has already been registered,  */
 fail1:
 	for (i--; i >= 0; i--) {
-		if (mci->csrows[i].nr_pages > 0)
+		int nr_pages = 0;
+
+		csrow = &mci->csrows[i];
+		for (j = 0; j < csrow->nr_channels; j++)
+			nr_pages += csrow->channels[j].dimm->nr_pages;
+		if (nr_pages > 0)
 			kobject_put(&mci->csrows[i].kobj);
 	}
 
@@ -972,14 +985,20 @@ fail0:
  */
 void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci)
 {
-	int i;
+	struct csrow_info *csrow;
+	int i, j;
 
 	debugf0("%s()\n", __func__);
 
 	/* remove all csrow kobjects */
 	debugf4("%s()  unregister this mci kobj\n", __func__);
 	for (i = 0; i < mci->nr_csrows; i++) {
-		if (mci->csrows[i].nr_pages > 0) {
+		int nr_pages = 0;
+
+		csrow = &mci->csrows[i];
+		for (j = 0; j < csrow->nr_channels; j++)
+			nr_pages += csrow->channels[j].dimm->nr_pages;
+		if (nr_pages > 0) {
 			debugf0("%s()  unreg csrow-%d\n", __func__, i);
 			kobject_put(&mci->csrows[i].kobj);
 		}
diff --git a/drivers/edac/i3000_edac.c b/drivers/edac/i3000_edac.c
index 8fe60ee37826..719ccbed7435 100644
--- a/drivers/edac/i3000_edac.c
+++ b/drivers/edac/i3000_edac.c
@@ -306,7 +306,7 @@ static int i3000_probe1(struct pci_dev *pdev, int dev_idx)
 	int rc;
 	int i, j;
 	struct mem_ctl_info *mci = NULL;
-	unsigned long last_cumul_size;
+	unsigned long last_cumul_size, nr_pages;
 	int interleaved, nr_channels;
 	unsigned char dra[I3000_RANKS / 2], drb[I3000_RANKS];
 	unsigned char *c0dra = dra, *c1dra = &dra[I3000_RANKS_PER_CHANNEL / 2];
@@ -391,11 +391,13 @@ static int i3000_probe1(struct pci_dev *pdev, int dev_idx)
 
 		csrow->first_page = last_cumul_size;
 		csrow->last_page = cumul_size - 1;
-		csrow->nr_pages = cumul_size - last_cumul_size;
+		nr_pages = cumul_size - last_cumul_size;
 		last_cumul_size = cumul_size;
 
 		for (j = 0; j < nr_channels; j++) {
 			struct dimm_info *dimm = csrow->channels[j].dimm;
+
+			dimm->nr_pages = nr_pages / nr_channels;
 			dimm->grain = I3000_DEAP_GRAIN;
 			dimm->mtype = MEM_DDR2;
 			dimm->dtype = DEV_UNKNOWN;
diff --git a/drivers/edac/i3200_edac.c b/drivers/edac/i3200_edac.c
index 93c4d5a6a623..3b3622209f3e 100644
--- a/drivers/edac/i3200_edac.c
+++ b/drivers/edac/i3200_edac.c
@@ -376,11 +376,10 @@ static int i3200_probe1(struct pci_dev *pdev, int dev_idx)
 		if (nr_pages == 0)
 			continue;
 
-		csrow->nr_pages = nr_pages;
-
 		for (j = 0; j < nr_channels; j++) {
 			struct dimm_info *dimm = csrow->channels[j].dimm;
 
+			dimm->nr_pages = nr_pages / nr_channels;
 			dimm->grain = nr_pages << PAGE_SHIFT;
 			dimm->mtype = MEM_DDR2;
 			dimm->dtype = DEV_UNKNOWN;
diff --git a/drivers/edac/i5000_edac.c b/drivers/edac/i5000_edac.c
index 26b40556958e..f3a1a3e1e4e1 100644
--- a/drivers/edac/i5000_edac.c
+++ b/drivers/edac/i5000_edac.c
@@ -1236,6 +1236,7 @@ static int i5000_init_csrows(struct mem_ctl_info *mci)
 {
 	struct i5000_pvt *pvt;
 	struct csrow_info *p_csrow;
+	struct dimm_info *dimm;
 	int empty, channel_count;
 	int max_csrows;
 	int mtr, mtr1;
@@ -1265,21 +1266,22 @@ static int i5000_init_csrows(struct mem_ctl_info *mci)
 
 		csrow_megs = 0;
 		for (channel = 0; channel < pvt->maxch; channel++) {
+			dimm = p_csrow->channels[channel].dimm;
 			csrow_megs += pvt->dimm_info[csrow][channel].megabytes;
-			p_csrow->channels[channel].dimm->grain = 8;
+			dimm->grain = 8;
 
 			/* Assume DDR2 for now */
-			p_csrow->channels[channel].dimm->mtype = MEM_FB_DDR2;
+			dimm->mtype = MEM_FB_DDR2;
 
 			/* ask what device type on this row */
 			if (MTR_DRAM_WIDTH(mtr))
-				p_csrow->channels[channel].dimm->dtype = DEV_X8;
+				dimm->dtype = DEV_X8;
 			else
-				p_csrow->channels[channel].dimm->dtype = DEV_X4;
+				dimm->dtype = DEV_X4;
 
-			p_csrow->channels[channel].dimm->edac_mode = EDAC_S8ECD8ED;
+			dimm->edac_mode = EDAC_S8ECD8ED;
+			dimm->nr_pages = (csrow_megs << 8) / pvt->maxch;
 		}
-		p_csrow->nr_pages = csrow_megs << 8;
 
 		empty = 0;
 	}
diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c
index 5338c7968f78..c08e94064ef6 100644
--- a/drivers/edac/i5100_edac.c
+++ b/drivers/edac/i5100_edac.c
@@ -859,7 +859,6 @@ static void __devinit i5100_init_csrows(struct mem_ctl_info *mci)
 		 * FIXME: these two are totally bogus -- I don't see how to
 		 * map them correctly to this structure...
 		 */
-		mci->csrows[i].nr_pages = npages;
 		mci->csrows[i].csrow_idx = i;
 		mci->csrows[i].mci = mci;
 		mci->csrows[i].nr_channels = 1;
@@ -867,14 +866,19 @@ static void __devinit i5100_init_csrows(struct mem_ctl_info *mci)
 		total_pages += npages;
 
 		dimm = mci->csrows[i].channels[0].dimm;
-		dimm->grain = 32;
-		dimm->dtype = (priv->mtr[chan][rank].width == 4) ?
-			      DEV_X4 : DEV_X8;
-		dimm->mtype = MEM_RDDR2;
-		dimm->edac_mode = EDAC_SECDED;
-		snprintf(dimm->label, sizeof(dimm->label),
-			 "DIMM%u",
-			 i5100_rank_to_slot(mci, chan, rank));
+		dimm->nr_pages = npages;
+		if (npages) {
+			total_pages += npages;
+
+			dimm->grain = 32;
+			dimm->dtype = (priv->mtr[chan][rank].width == 4) ?
+				DEV_X4 : DEV_X8;
+			dimm->mtype = MEM_RDDR2;
+			dimm->edac_mode = EDAC_SECDED;
+			snprintf(dimm->label, sizeof(dimm->label),
+				"DIMM%u",
+				i5100_rank_to_slot(mci, chan, rank));
+		}
 	}
 }
 
diff --git a/drivers/edac/i5400_edac.c b/drivers/edac/i5400_edac.c
index 6f85dcb34019..6543f4a8367b 100644
--- a/drivers/edac/i5400_edac.c
+++ b/drivers/edac/i5400_edac.c
@@ -1156,7 +1156,7 @@ static int i5400_init_csrows(struct mem_ctl_info *mci)
 	int empty, channel_count;
 	int max_csrows;
 	int mtr;
-	int csrow_megs;
+	int size_mb;
 	int channel;
 	int csrow;
 	struct dimm_info *dimm;
@@ -1171,8 +1171,6 @@ static int i5400_init_csrows(struct mem_ctl_info *mci)
 	for (csrow = 0; csrow < max_csrows; csrow++) {
 		p_csrow = &mci->csrows[csrow];
 
-		p_csrow->csrow_idx = csrow;
-
 		/* use branch 0 for the basis */
 		mtr = determine_mtr(pvt, csrow, 0);
 
@@ -1180,12 +1178,11 @@ static int i5400_init_csrows(struct mem_ctl_info *mci)
 		if (!MTR_DIMMS_PRESENT(mtr))
 			continue;
 
-		csrow_megs = 0;
 		for (channel = 0; channel < pvt->maxch; channel++) {
-			csrow_megs += pvt->dimm_info[csrow][channel].megabytes;
+			size_mb = pvt->dimm_info[csrow][channel].megabytes;
 
-			p_csrow->nr_pages = csrow_megs << 8;
 			dimm = p_csrow->channels[channel].dimm;
+			dimm->nr_pages = size_mb << 8;
 			dimm->grain = 8;
 			dimm->dtype = MTR_DRAM_WIDTH(mtr) ? DEV_X8 : DEV_X4;
 			dimm->mtype = MEM_RDDR2;
diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c
index d4153d6cfe30..d6f3a2d0f70a 100644
--- a/drivers/edac/i7300_edac.c
+++ b/drivers/edac/i7300_edac.c
@@ -617,9 +617,7 @@ static void i7300_enable_error_reporting(struct mem_ctl_info *mci)
 static int decode_mtr(struct i7300_pvt *pvt,
 		      int slot, int ch, int branch,
 		      struct i7300_dimm_info *dinfo,
-		      struct csrow_info *p_csrow,
-		      struct dimm_info *dimm,
-		      u32 *nr_pages)
+		      struct dimm_info *dimm)
 {
 	int mtr, ans, addrBits, channel;
 
@@ -651,7 +649,6 @@ static int decode_mtr(struct i7300_pvt *pvt,
 	addrBits -= 3;	/* 8 bits per bytes */
 
 	dinfo->megabytes = 1 << addrBits;
-	*nr_pages = dinfo->megabytes << 8;
 
 	debugf2("\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr));
 
@@ -664,8 +661,6 @@ static int decode_mtr(struct i7300_pvt *pvt,
 	debugf2("\t\tNUMCOL: %s\n", numcol_toString[MTR_DIMM_COLS(mtr)]);
 	debugf2("\t\tSIZE: %d MB\n", dinfo->megabytes);
 
-	p_csrow->csrow_idx = slot;
-
 	/*
 	 * The type of error detection actually depends of the
 	 * mode of operation. When it is just one single memory chip, at
@@ -675,6 +670,7 @@ static int decode_mtr(struct i7300_pvt *pvt,
 	 * See datasheet Sections 7.3.6 to 7.3.8
 	 */
 
+	dimm->nr_pages = MiB_TO_PAGES(dinfo->megabytes);
 	dimm->grain = 8;
 	dimm->mtype = MEM_FB_DDR2;
 	if (IS_SINGLE_MODE(pvt->mc_settings_a)) {
@@ -774,11 +770,9 @@ static int i7300_init_csrows(struct mem_ctl_info *mci)
 {
 	struct i7300_pvt *pvt;
 	struct i7300_dimm_info *dinfo;
-	struct csrow_info *p_csrow;
 	int rc = -ENODEV;
 	int mtr;
 	int ch, branch, slot, channel;
-	u32 nr_pages;
 	struct dimm_info *dimm;
 
 	pvt = mci->pvt_info;
@@ -804,7 +798,6 @@ static int i7300_init_csrows(struct mem_ctl_info *mci)
 	}
 
 	/* Get the set of MTR[0-7] regs by each branch */
-	nr_pages = 0;
 	for (slot = 0; slot < MAX_SLOTS; slot++) {
 		int where = mtr_regs[slot];
 		for (branch = 0; branch < MAX_BRANCHES; branch++) {
@@ -815,21 +808,18 @@ static int i7300_init_csrows(struct mem_ctl_info *mci)
 				int channel = to_channel(ch, branch);
 
 				dinfo = &pvt->dimm_info[slot][channel];
-				p_csrow = &mci->csrows[slot];
 
-				dimm = p_csrow->channels[branch * MAX_CH_PER_BRANCH + ch].dimm;
+				dimm = mci->csrows[slot].channels[branch * MAX_CH_PER_BRANCH + ch].dimm;
 
 				mtr = decode_mtr(pvt, slot, ch, branch,
-						 dinfo, p_csrow, dimm,
-						 &nr_pages);
+						 dinfo, dimm);
+
 				/* if no DIMMS on this row, continue */
 				if (!MTR_DIMMS_PRESENT(mtr))
 					continue;
 
-				/* Update per_csrow memory count */
-				p_csrow->nr_pages += nr_pages;
-
 				rc = 0;
+
 			}
 		}
 	}
diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c
index 76c957c525fb..0e3cc34bcc22 100644
--- a/drivers/edac/i7core_edac.c
+++ b/drivers/edac/i7core_edac.c
@@ -715,17 +715,12 @@ static int get_dimm_config(struct mem_ctl_info *mci)
 			npages = MiB_TO_PAGES(size);
 
 			csr = &mci->csrows[csrow];
-			csr->nr_pages = npages;
-
-			csr->csrow_idx = csrow;
-			csr->nr_channels = 1;
-
-			csr->channels[0].chan_idx = i;
-			csr->channels[0].ce_count = 0;
 
 			pvt->csrow_map[i][j] = csrow;
 
 			dimm = csr->channels[0].dimm;
+			dimm->nr_pages = npages;
+
 			switch (banks) {
 			case 4:
 				dimm->dtype = DEV_X4;
@@ -746,6 +741,7 @@ static int get_dimm_config(struct mem_ctl_info *mci)
 			dimm->grain = 8;
 			dimm->edac_mode = mode;
 			dimm->mtype = mtype;
+			csrow++;
 		}
 
 		pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
diff --git a/drivers/edac/i82443bxgx_edac.c b/drivers/edac/i82443bxgx_edac.c
index 0b98dd3408b9..02b252acd999 100644
--- a/drivers/edac/i82443bxgx_edac.c
+++ b/drivers/edac/i82443bxgx_edac.c
@@ -220,7 +220,7 @@ static void i82443bxgx_init_csrows(struct mem_ctl_info *mci,
 		row_base = row_high_limit_last;
 		csrow->first_page = row_base >> PAGE_SHIFT;
 		csrow->last_page = (row_high_limit >> PAGE_SHIFT) - 1;
-		csrow->nr_pages = csrow->last_page - csrow->first_page + 1;
+		dimm->nr_pages = csrow->last_page - csrow->first_page + 1;
 		/* EAP reports in 4kilobyte granularity [61] */
 		dimm->grain = 1 << 12;
 		dimm->mtype = mtype;
diff --git a/drivers/edac/i82860_edac.c b/drivers/edac/i82860_edac.c
index 3eb77845cfca..8485bbf4379f 100644
--- a/drivers/edac/i82860_edac.c
+++ b/drivers/edac/i82860_edac.c
@@ -167,7 +167,7 @@ static void i82860_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev)
 
 		csrow->first_page = last_cumul_size;
 		csrow->last_page = cumul_size - 1;
-		csrow->nr_pages = cumul_size - last_cumul_size;
+		dimm->nr_pages = cumul_size - last_cumul_size;
 		last_cumul_size = cumul_size;
 		dimm->grain = 1 << 12;	/* I82860_EAP has 4KiB reolution */
 		dimm->mtype = MEM_RMBS;
diff --git a/drivers/edac/i82875p_edac.c b/drivers/edac/i82875p_edac.c
index eac574285da8..e16281b41f3b 100644
--- a/drivers/edac/i82875p_edac.c
+++ b/drivers/edac/i82875p_edac.c
@@ -347,7 +347,7 @@ static void i82875p_init_csrows(struct mem_ctl_info *mci,
 	unsigned long last_cumul_size;
 	u8 value;
 	u32 drc_ddim;		/* DRAM Data Integrity Mode 0=none,2=edac */
-	u32 cumul_size;
+	u32 cumul_size, nr_pages;
 	int index, j;
 
 	drc_ddim = (drc >> 18) & 0x1;
@@ -371,12 +371,13 @@ static void i82875p_init_csrows(struct mem_ctl_info *mci,
 
 		csrow->first_page = last_cumul_size;
 		csrow->last_page = cumul_size - 1;
-		csrow->nr_pages = cumul_size - last_cumul_size;
+		nr_pages = cumul_size - last_cumul_size;
 		last_cumul_size = cumul_size;
 
 		for (j = 0; j < nr_chans; j++) {
 			dimm = csrow->channels[j].dimm;
 
+			dimm->nr_pages = nr_pages / nr_chans;
 			dimm->grain = 1 << 12;	/* I82875P_EAP has 4KiB reolution */
 			dimm->mtype = MEM_DDR;
 			dimm->dtype = DEV_UNKNOWN;
diff --git a/drivers/edac/i82975x_edac.c b/drivers/edac/i82975x_edac.c
index b8ec8719e2f5..014a9483fccc 100644
--- a/drivers/edac/i82975x_edac.c
+++ b/drivers/edac/i82975x_edac.c
@@ -370,7 +370,7 @@ static void i82975x_init_csrows(struct mem_ctl_info *mci,
 	struct csrow_info *csrow;
 	unsigned long last_cumul_size;
 	u8 value;
-	u32 cumul_size;
+	u32 cumul_size, nr_pages;
 	int index, chan;
 	struct dimm_info *dimm;
 	enum dev_type dtype;
@@ -402,6 +402,7 @@ static void i82975x_init_csrows(struct mem_ctl_info *mci,
 		debugf3("%s(): (%d) cumul_size 0x%x\n", __func__, index,
 			cumul_size);
 
+		nr_pages = cumul_size - last_cumul_size;
 		/*
 		 * Initialise dram labels
 		 * index values:
@@ -411,6 +412,11 @@ static void i82975x_init_csrows(struct mem_ctl_info *mci,
 		dtype = i82975x_dram_type(mch_window, index);
 		for (chan = 0; chan < csrow->nr_channels; chan++) {
 			dimm = mci->csrows[index].channels[chan].dimm;
+
+			if (!nr_pages)
+				continue;
+
+			dimm->nr_pages = nr_pages / csrow->nr_channels;
 			strncpy(csrow->channels[chan].dimm->label,
 					labels[(index >> 1) + (chan * 2)],
 					EDAC_MC_LABEL_LEN);
@@ -420,12 +426,11 @@ static void i82975x_init_csrows(struct mem_ctl_info *mci,
 			dimm->edac_mode = EDAC_SECDED; /* only supported */
 		}
 
-		if (cumul_size == last_cumul_size)
+		if (!nr_pages)
 			continue;	/* not populated */
 
 		csrow->first_page = last_cumul_size;
 		csrow->last_page = cumul_size - 1;
-		csrow->nr_pages = cumul_size - last_cumul_size;
 		last_cumul_size = cumul_size;
 	}
 }
diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c
index fb92916d0872..c1d9e158972c 100644
--- a/drivers/edac/mpc85xx_edac.c
+++ b/drivers/edac/mpc85xx_edac.c
@@ -947,7 +947,8 @@ static void __devinit mpc85xx_init_csrows(struct mem_ctl_info *mci)
 
 		csrow->first_page = start;
 		csrow->last_page = end;
-		csrow->nr_pages = end + 1 - start;
+
+		dimm->nr_pages = end + 1 - start;
 		dimm->grain = 8;
 		dimm->mtype = mtype;
 		dimm->dtype = DEV_UNKNOWN;
diff --git a/drivers/edac/mv64x60_edac.c b/drivers/edac/mv64x60_edac.c
index d2e3c39ede9f..281e24528599 100644
--- a/drivers/edac/mv64x60_edac.c
+++ b/drivers/edac/mv64x60_edac.c
@@ -667,7 +667,8 @@ static void mv64x60_init_csrows(struct mem_ctl_info *mci,
 
 	csrow = &mci->csrows[0];
 	dimm = csrow->channels[0].dimm;
-	csrow->nr_pages = pdata->total_mem >> PAGE_SHIFT;
+
+	dimm->nr_pages = pdata->total_mem >> PAGE_SHIFT;
 	dimm->grain = 8;
 
 	dimm->mtype = (ctl & MV64X60_SDRAM_REGISTERED) ? MEM_RDDR : MEM_DDR;
diff --git a/drivers/edac/pasemi_edac.c b/drivers/edac/pasemi_edac.c
index 4e53270bc336..3fcefda653fd 100644
--- a/drivers/edac/pasemi_edac.c
+++ b/drivers/edac/pasemi_edac.c
@@ -153,20 +153,20 @@ static int pasemi_edac_init_csrows(struct mem_ctl_info *mci,
 		switch ((rankcfg & MCDRAM_RANKCFG_TYPE_SIZE_M) >>
 			MCDRAM_RANKCFG_TYPE_SIZE_S) {
 		case 0:
-			csrow->nr_pages = 128 << (20 - PAGE_SHIFT);
+			dimm->nr_pages = 128 << (20 - PAGE_SHIFT);
 			break;
 		case 1:
-			csrow->nr_pages = 256 << (20 - PAGE_SHIFT);
+			dimm->nr_pages = 256 << (20 - PAGE_SHIFT);
 			break;
 		case 2:
 		case 3:
-			csrow->nr_pages = 512 << (20 - PAGE_SHIFT);
+			dimm->nr_pages = 512 << (20 - PAGE_SHIFT);
 			break;
 		case 4:
-			csrow->nr_pages = 1024 << (20 - PAGE_SHIFT);
+			dimm->nr_pages = 1024 << (20 - PAGE_SHIFT);
 			break;
 		case 5:
-			csrow->nr_pages = 2048 << (20 - PAGE_SHIFT);
+			dimm->nr_pages = 2048 << (20 - PAGE_SHIFT);
 			break;
 		default:
 			edac_mc_printk(mci, KERN_ERR,
@@ -176,8 +176,8 @@ static int pasemi_edac_init_csrows(struct mem_ctl_info *mci,
 		}
 
 		csrow->first_page = last_page_in_mmc;
-		csrow->last_page = csrow->first_page + csrow->nr_pages - 1;
-		last_page_in_mmc += csrow->nr_pages;
+		csrow->last_page = csrow->first_page + dimm->nr_pages - 1;
+		last_page_in_mmc += dimm->nr_pages;
 		csrow->page_mask = 0;
 		dimm->grain = PASEMI_EDAC_ERROR_GRAIN;
 		dimm->mtype = MEM_DDR;
diff --git a/drivers/edac/ppc4xx_edac.c b/drivers/edac/ppc4xx_edac.c
index ec5e529e33f6..95cfc0f8d46d 100644
--- a/drivers/edac/ppc4xx_edac.c
+++ b/drivers/edac/ppc4xx_edac.c
@@ -896,7 +896,7 @@ ppc4xx_edac_init_csrows(struct mem_ctl_info *mci, u32 mcopt1)
 	enum dev_type dtype;
 	enum edac_type edac_mode;
 	int row, j;
-	u32 mbxcf, size;
+	u32 mbxcf, size, nr_pages;
 
 	/* Establish the memory type and width */
 
@@ -947,7 +947,7 @@ ppc4xx_edac_init_csrows(struct mem_ctl_info *mci, u32 mcopt1)
 		case SDRAM_MBCF_SZ_2GB:
 		case SDRAM_MBCF_SZ_4GB:
 		case SDRAM_MBCF_SZ_8GB:
-			csi->nr_pages = SDRAM_MBCF_SZ_TO_PAGES(size);
+			nr_pages = SDRAM_MBCF_SZ_TO_PAGES(size);
 			break;
 		default:
 			ppc4xx_edac_mc_printk(KERN_ERR, mci,
@@ -973,6 +973,7 @@ ppc4xx_edac_init_csrows(struct mem_ctl_info *mci, u32 mcopt1)
 		for (j = 0; j < csi->nr_channels; j++) {
 			struct dimm_info *dimm = csi->channels[j].dimm;
 
+			dimm->nr_pages  = nr_pages / csi->nr_channels;
 			dimm->grain	= 1;
 
 			dimm->mtype	= mtype;
diff --git a/drivers/edac/r82600_edac.c b/drivers/edac/r82600_edac.c
index 70b0dfa81db4..c41b375e1f38 100644
--- a/drivers/edac/r82600_edac.c
+++ b/drivers/edac/r82600_edac.c
@@ -249,7 +249,8 @@ static void r82600_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
 
 		csrow->first_page = row_base >> PAGE_SHIFT;
 		csrow->last_page = (row_high_limit >> PAGE_SHIFT) - 1;
-		csrow->nr_pages = csrow->last_page - csrow->first_page + 1;
+
+		dimm->nr_pages = csrow->last_page - csrow->first_page + 1;
 		/* Error address is top 19 bits - so granularity is      *
 		 * 14 bits                                               */
 		dimm->grain = 1 << 14;
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index d5892c052bf4..2ce9bf5e354b 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -561,7 +561,6 @@ static int get_dimm_config(struct mem_ctl_info *mci)
 	u32 reg;
 	enum edac_type mode;
 	enum mem_type mtype;
-	struct dimm_info *dimm;
 
 	pci_read_config_dword(pvt->pci_br, SAD_TARGET, &reg);
 	pvt->sbridge_dev->source_id = SOURCE_ID(reg);
@@ -613,11 +612,11 @@ static int get_dimm_config(struct mem_ctl_info *mci)
 	/* On all supported DDR3 DIMM types, there are 8 banks available */
 	banks = 8;
 
-	dimm = mci->dimms;
 	for (i = 0; i < NUM_CHANNELS; i++) {
 		u32 mtr;
 
 		for (j = 0; j < ARRAY_SIZE(mtr_regs); j++) {
+			struct dimm_info *dimm = &mci->dimms[j];
 			pci_read_config_dword(pvt->pci_tad[i],
 					      mtr_regs[j], &mtr);
 			debugf4("Channel #%d  MTR%d = %x\n", i, j, mtr);
@@ -642,15 +641,12 @@ static int get_dimm_config(struct mem_ctl_info *mci)
 				 * csrows.
 				 */
 				csr = &mci->csrows[csrow];
-				csr->nr_pages = npages;
-				csr->csrow_idx = csrow;
-				csr->nr_channels = 1;
-				csr->channels[0].chan_idx = i;
 				pvt->csrow_map[i][j] = csrow;
 				last_page += npages;
 				csrow++;
 
 				csr->channels[0].dimm = dimm;
+				dimm->nr_pages = npages;
 				dimm->grain = 32;
 				dimm->dtype = (banks == 8) ? DEV_X8 : DEV_X4;
 				dimm->mtype = mtype;
diff --git a/drivers/edac/tile_edac.c b/drivers/edac/tile_edac.c
index 54067c4b0cc1..054c9bb3a5dc 100644
--- a/drivers/edac/tile_edac.c
+++ b/drivers/edac/tile_edac.c
@@ -110,7 +110,7 @@ static int __devinit tile_edac_init_csrows(struct mem_ctl_info *mci)
 		return -1;
 	}
 
-	csrow->nr_pages = mem_info.mem_size >> PAGE_SHIFT;
+	dimm->nr_pages = mem_info.mem_size >> PAGE_SHIFT;
 	dimm->grain = TILE_EDAC_ERROR_GRAIN;
 	dimm->dtype = DEV_UNKNOWN;
 
diff --git a/drivers/edac/x38_edac.c b/drivers/edac/x38_edac.c
index bc7f880a4eed..e3247997aa00 100644
--- a/drivers/edac/x38_edac.c
+++ b/drivers/edac/x38_edac.c
@@ -373,10 +373,10 @@ static int x38_probe1(struct pci_dev *pdev, int dev_idx)
 		if (nr_pages == 0)
 			continue;
 
-		csrow->nr_pages = nr_pages;
-
 		for (j = 0; j < x38_channel_num; j++) {
 			struct dimm_info *dimm = csrow->channels[j].dimm;
+
+			dimm->nr_pages = nr_pages / x38_channel_num;
 			dimm->grain = nr_pages << PAGE_SHIFT;
 			dimm->mtype = MEM_DDR2;
 			dimm->dtype = DEV_UNKNOWN;
diff --git a/include/linux/edac.h b/include/linux/edac.h
index 87aa07d2ee28..67717cab1313 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -324,6 +324,8 @@ struct dimm_info {
 	enum mem_type mtype;	/* memory dimm type */
 	enum edac_type edac_mode;	/* EDAC mode for this dimm */
 
+	u32 nr_pages;			/* number of pages in csrow */
+
 	u32 ce_count;		/* Correctable Errors for this dimm */
 };
 
@@ -350,12 +352,12 @@ struct rank_info {
 };
 
 struct csrow_info {
+	/* Used only by edac_mc_find_csrow_by_page() */
 	unsigned long first_page;	/* first page number in csrow */
 	unsigned long last_page;	/* last page number in csrow */
-	u32 nr_pages;			/* number of pages in csrow */
 	unsigned long page_mask;	/* used for interleaving -
-					 * 0UL for non intlv
-					 */
+					 * 0UL for non intlv */
+
 	int csrow_idx;			/* the chip-select row */
 
 	u32 ue_count;		/* Uncorrectable Errors for this csrow */
-- 
cgit v1.3-7-g2ca7


From 982216a4290543fe73ae4f0a156f3d7906bd9b73 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Mon, 16 Apr 2012 13:04:46 -0300
Subject: edac.h: Add generic layers for describing a memory location

The edac core were written with the idea that memory controllers
are able to directly access csrows, and that the channels are
used inside a csrows select.

This is not true for FB-DIMM and RAMBUS memory controllers.

Also, some recent advanced memory controllers don't present a per-csrows
view. Instead, they view memories as DIMMs, instead of ranks, accessed
via csrow/channel.

So, changes are needed in order to allow the EDAC core to
work with all types of architectures.

In preparation for handling non-csrows based memory controllers,
add some memory structs and a macro:

enum hw_event_mc_err_type: describes the type of error
			   (corrected, uncorrected, fatal)

To be used by the new edac_mc_handle_error function;

enum edac_mc_layer: describes the type of a given memory
architecture layer (branch, channel, slot, csrow).

struct edac_mc_layer: describes the properties of a memory
		      layer (type, size, and if the layer
		      will be used on a virtual csrow.

EDAC_DIMM_PTR() - as the number of layers can vary from 1 to 3,
this macro converts from an address with up to 3 layers into
a linear address.

Reviewed-by: Borislav Petkov <bp@amd64.org>
Cc: Doug Thompson <norsk5@yahoo.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/edac.h | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 102 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/edac.h b/include/linux/edac.h
index 67717cab1313..9e628434e164 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -70,6 +70,25 @@ enum dev_type {
 #define DEV_FLAG_X32		BIT(DEV_X32)
 #define DEV_FLAG_X64		BIT(DEV_X64)
 
+/**
+ * enum hw_event_mc_err_type - type of the detected error
+ *
+ * @HW_EVENT_ERR_CORRECTED:	Corrected Error - Indicates that an ECC
+ *				corrected error was detected
+ * @HW_EVENT_ERR_UNCORRECTED:	Uncorrected Error - Indicates an error that
+ *				can't be corrected by ECC, but it is not
+ *				fatal (maybe it is on an unused memory area,
+ *				or the memory controller could recover from
+ *				it for example, by re-trying the operation).
+ * @HW_EVENT_ERR_FATAL:		Fatal Error - Uncorrected error that could not
+ *				be recovered.
+ */
+enum hw_event_mc_err_type {
+	HW_EVENT_ERR_CORRECTED,
+	HW_EVENT_ERR_UNCORRECTED,
+	HW_EVENT_ERR_FATAL,
+};
+
 /**
  * enum mem_type - memory types. For a more detailed reference, please see
  *			http://en.wikipedia.org/wiki/DRAM
@@ -312,7 +331,89 @@ enum scrub_type {
  * PS - I enjoyed writing all that about as much as you enjoyed reading it.
  */
 
-/* FIXME: add a per-dimm ce error count */
+/**
+ * enum edac_mc_layer - memory controller hierarchy layer
+ *
+ * @EDAC_MC_LAYER_BRANCH:	memory layer is named "branch"
+ * @EDAC_MC_LAYER_CHANNEL:	memory layer is named "channel"
+ * @EDAC_MC_LAYER_SLOT:		memory layer is named "slot"
+ * @EDAC_MC_LAYER_CHIP_SELECT:	memory layer is named "chip select"
+ *
+ * This enum is used by the drivers to tell edac_mc_sysfs what name should
+ * be used when describing a memory stick location.
+ */
+enum edac_mc_layer_type {
+	EDAC_MC_LAYER_BRANCH,
+	EDAC_MC_LAYER_CHANNEL,
+	EDAC_MC_LAYER_SLOT,
+	EDAC_MC_LAYER_CHIP_SELECT,
+};
+
+/**
+ * struct edac_mc_layer - describes the memory controller hierarchy
+ * @layer:		layer type
+ * @size:		number of components per layer. For example,
+ *			if the channel layer has two channels, size = 2
+ * @is_virt_csrow:	This layer is part of the "csrow" when old API
+ *			compatibility mode is enabled. Otherwise, it is
+ *			a channel
+ */
+struct edac_mc_layer {
+	enum edac_mc_layer_type	type;
+	unsigned		size;
+	bool			is_virt_csrow;
+};
+
+/*
+ * Maximum number of layers used by the memory controller to uniquely
+ * identify a single memory stick.
+ * NOTE: Changing this constant requires not only to change the constant
+ * below, but also to change the existing code at the core, as there are
+ * some code there that are optimized for 3 layers.
+ */
+#define EDAC_MAX_LAYERS		3
+
+/**
+ * EDAC_DIMM_PTR - Macro responsible to find a pointer inside a pointer array
+ *		   for the element given by [layer0,layer1,layer2] position
+ *
+ * @layers:	a struct edac_mc_layer array, describing how many elements
+ *		were allocated for each layer
+ * @var:	name of the var where we want to get the pointer
+ *		(like mci->dimms)
+ * @n_layers:	Number of layers at the @layers array
+ * @layer0:	layer0 position
+ * @layer1:	layer1 position. Unused if n_layers < 2
+ * @layer2:	layer2 position. Unused if n_layers < 3
+ *
+ * For 1 layer, this macro returns &var[layer0]
+ * For 2 layers, this macro is similar to allocate a bi-dimensional array
+ *		and to return "&var[layer0][layer1]"
+ * For 3 layers, this macro is similar to allocate a tri-dimensional array
+ *		and to return "&var[layer0][layer1][layer2]"
+ *
+ * A loop could be used here to make it more generic, but, as we only have
+ * 3 layers, this is a little faster.
+ * By design, layers can never be 0 or more than 3. If that ever happens,
+ * a NULL is returned, causing an OOPS during the memory allocation routine,
+ * with would point to the developer that he's doing something wrong.
+ */
+#define EDAC_DIMM_PTR(layers, var, nlayers, layer0, layer1, layer2) ({	\
+	typeof(var) __p;						\
+	if ((nlayers) == 1)						\
+		__p = &var[layer0];					\
+	else if ((nlayers) == 2)					\
+		__p = &var[(layer1) + ((layers[1]).size * (layer0))];	\
+	else if ((nlayers) == 3)					\
+		__p = &var[(layer2) + ((layers[2]).size * ((layer1) +	\
+			    ((layers[1]).size * (layer0))))];		\
+	else								\
+		__p = NULL;						\
+	__p;								\
+})
+
+
+/* FIXME: add the proper per-location error counts */
 struct dimm_info {
 	char label[EDAC_MC_LABEL_LEN + 1];	/* DIMM label on motherboard */
 	unsigned memory_controller;
-- 
cgit v1.3-7-g2ca7


From 4275be63559719c3149b19751029f1b0f1b26775 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Wed, 18 Apr 2012 15:20:50 -0300
Subject: edac: Change internal representation to work with layers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Change the EDAC internal representation to work with non-csrow
based memory controllers.

There are lots of those memory controllers nowadays, and more
are coming. So, the EDAC internal representation needs to be
changed, in order to work with those memory controllers, while
preserving backward compatibility with the old ones.

The edac core was written with the idea that memory controllers
are able to directly access csrows.

This is not true for FB-DIMM and RAMBUS memory controllers.

Also, some recent advanced memory controllers don't present a per-csrows
view. Instead, they view memories as DIMMs, instead of ranks.

So, change the allocation and error report routines to allow
them to work with all types of architectures.

This will allow the removal of several hacks with FB-DIMM and RAMBUS
memory controllers.

Also, several tests were done on different platforms using different
x86 drivers.

TODO: a multi-rank DIMMs are currently represented by multiple DIMM
entries in struct dimm_info. That means that changing a label for one
rank won't change the same label for the other ranks at the same DIMM.
This bug is present since the beginning of the EDAC, so it is not a big
deal. However, on several drivers, it is possible to fix this issue, but
it should be a per-driver fix, as the csrow => DIMM arrangement may not
be equal for all. So, don't try to fix it here yet.

I tried to make this patch as short as possible, preceding it with
several other patches that simplified the logic here. Yet, as the
internal API changes, all drivers need changes. The changes are
generally bigger in the drivers for FB-DIMMs.

Cc: Aristeu Rozanski <arozansk@redhat.com>
Cc: Doug Thompson <norsk5@yahoo.com>
Cc: Borislav Petkov <borislav.petkov@amd.com>
Cc: Mark Gross <mark.gross@intel.com>
Cc: Jason Uhlenkott <juhlenko@akamai.com>
Cc: Tim Small <tim@buttersideup.com>
Cc: Ranganathan Desikan <ravi@jetztechnologies.com>
Cc: "Arvind R." <arvino55@gmail.com>
Cc: Olof Johansson <olof@lixom.net>
Cc: Egor Martovetsky <egor@pasemi.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Michal Marek <mmarek@suse.cz>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Joe Perches <joe@perches.com>
Cc: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Hitoshi Mitake <h.mitake@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: "Niklas Söderlund" <niklas.soderlund@ericsson.com>
Cc: Shaohui Xie <Shaohui.Xie@freescale.com>
Cc: Josh Boyer <jwboyer@gmail.com>
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/edac/edac_core.h |  99 +++++--
 drivers/edac/edac_mc.c   | 702 ++++++++++++++++++++++++++++++-----------------
 include/linux/edac.h     |  38 ++-
 3 files changed, 552 insertions(+), 287 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h
index e48ab3108ad8..1286c5e1bdc0 100644
--- a/drivers/edac/edac_core.h
+++ b/drivers/edac/edac_core.h
@@ -447,8 +447,12 @@ static inline void pci_write_bits32(struct pci_dev *pdev, int offset,
 
 #endif				/* CONFIG_PCI */
 
-extern struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
-					  unsigned nr_chans, int edac_index);
+struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
+				   unsigned nr_chans, int edac_index);
+struct mem_ctl_info *new_edac_mc_alloc(unsigned edac_index,
+				   unsigned n_layers,
+				   struct edac_mc_layer *layers,
+				   unsigned sz_pvt);
 extern int edac_mc_add_mc(struct mem_ctl_info *mci);
 extern void edac_mc_free(struct mem_ctl_info *mci);
 extern struct mem_ctl_info *edac_mc_find(int idx);
@@ -467,24 +471,78 @@ extern int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci,
  * reporting logic and function interface - reduces conditional
  * statement clutter and extra function arguments.
  */
-extern void edac_mc_handle_ce(struct mem_ctl_info *mci,
-			      unsigned long page_frame_number,
-			      unsigned long offset_in_page,
-			      unsigned long syndrome, int row, int channel,
-			      const char *msg);
-extern void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci,
-				      const char *msg);
-extern void edac_mc_handle_ue(struct mem_ctl_info *mci,
-			      unsigned long page_frame_number,
-			      unsigned long offset_in_page, int row,
-			      const char *msg);
-extern void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci,
-				      const char *msg);
-extern void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci, unsigned int csrow,
-				  unsigned int channel0, unsigned int channel1,
-				  char *msg);
-extern void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci, unsigned int csrow,
-				  unsigned int channel, char *msg);
+
+void edac_mc_handle_error(const enum hw_event_mc_err_type type,
+			  struct mem_ctl_info *mci,
+			  const unsigned long page_frame_number,
+			  const unsigned long offset_in_page,
+			  const unsigned long syndrome,
+			  const int layer0,
+			  const int layer1,
+			  const int layer2,
+			  const char *msg,
+			  const char *other_detail,
+			  const void *mcelog);
+
+static inline void edac_mc_handle_ce(struct mem_ctl_info *mci,
+				     unsigned long page_frame_number,
+				     unsigned long offset_in_page,
+				     unsigned long syndrome, int row, int channel,
+				     const char *msg)
+{
+	 edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
+			      page_frame_number, offset_in_page, syndrome,
+			      row, channel, -1, msg, NULL, NULL);
+}
+
+static inline void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci,
+					     const char *msg)
+{
+	 edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
+			      0, 0, 0, -1, -1, -1, msg, NULL, NULL);
+}
+
+static inline void edac_mc_handle_ue(struct mem_ctl_info *mci,
+				     unsigned long page_frame_number,
+				     unsigned long offset_in_page, int row,
+				     const char *msg)
+{
+	 edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
+			      page_frame_number, offset_in_page, 0,
+			      row, -1, -1, msg, NULL, NULL);
+}
+
+static inline void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci,
+					     const char *msg)
+{
+	 edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
+			      0, 0, 0, -1, -1, -1, msg, NULL, NULL);
+}
+
+static inline void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci,
+					 unsigned int csrow,
+					 unsigned int channel0,
+					 unsigned int channel1,
+					 char *msg)
+{
+	/*
+	 *FIXME: The error can also be at channel1 (e. g. at the second
+	 *	  channel of the same branch). The fix is to push
+	 *	  edac_mc_handle_error() call into each driver
+	 */
+	 edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
+			      0, 0, 0,
+			      csrow, channel0, -1, msg, NULL, NULL);
+}
+
+static inline void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci,
+					 unsigned int csrow,
+					 unsigned int channel, char *msg)
+{
+	 edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
+			      0, 0, 0,
+			      csrow, channel, -1, msg, NULL, NULL);
+}
 
 /*
  * edac_device APIs
@@ -496,6 +554,7 @@ extern void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
 extern void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
 				int inst_nr, int block_nr, const char *msg);
 extern int edac_device_alloc_index(void);
+extern const char *edac_layer_name[];
 
 /*
  * edac_pci APIs
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index ff8c0020649c..1bd237ee4ca7 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -44,9 +44,25 @@ static void edac_mc_dump_channel(struct rank_info *chan)
 	debugf4("\tchannel = %p\n", chan);
 	debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
 	debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
-	debugf4("\tdimm->ce_count = %d\n", chan->dimm->ce_count);
-	debugf4("\tdimm->label = '%s'\n", chan->dimm->label);
-	debugf4("\tdimm->nr_pages = 0x%x\n", chan->dimm->nr_pages);
+	debugf4("\tchannel->dimm = %p\n", chan->dimm);
+}
+
+static void edac_mc_dump_dimm(struct dimm_info *dimm)
+{
+	int i;
+
+	debugf4("\tdimm = %p\n", dimm);
+	debugf4("\tdimm->label = '%s'\n", dimm->label);
+	debugf4("\tdimm->nr_pages = 0x%x\n", dimm->nr_pages);
+	debugf4("\tdimm location ");
+	for (i = 0; i < dimm->mci->n_layers; i++) {
+		printk(KERN_CONT "%d", dimm->location[i]);
+		if (i < dimm->mci->n_layers - 1)
+			printk(KERN_CONT ".");
+	}
+	printk(KERN_CONT "\n");
+	debugf4("\tdimm->grain = %d\n", dimm->grain);
+	debugf4("\tdimm->nr_pages = 0x%x\n", dimm->nr_pages);
 }
 
 static void edac_mc_dump_csrow(struct csrow_info *csrow)
@@ -70,6 +86,8 @@ static void edac_mc_dump_mci(struct mem_ctl_info *mci)
 	debugf4("\tmci->edac_check = %p\n", mci->edac_check);
 	debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
 		mci->nr_csrows, mci->csrows);
+	debugf3("\tmci->nr_dimms = %d, dimms = %p\n",
+		mci->tot_dimms, mci->dimms);
 	debugf3("\tdev = %p\n", mci->dev);
 	debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name);
 	debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
@@ -157,10 +175,12 @@ void *edac_align_ptr(void **p, unsigned size, int n_elems)
 }
 
 /**
- * edac_mc_alloc: Allocate a struct mem_ctl_info structure
- * @size_pvt:	size of private storage needed
- * @nr_csrows:	Number of CWROWS needed for this MC
- * @nr_chans:	Number of channels for the MC
+ * edac_mc_alloc: Allocate and partially fill a struct mem_ctl_info structure
+ * @mc_num:		Memory controller number
+ * @n_layers:		Number of MC hierarchy layers
+ * layers:		Describes each layer as seen by the Memory Controller
+ * @size_pvt:		size of private storage needed
+ *
  *
  * Everything is kmalloc'ed as one big chunk - more efficient.
  * Only can be used if all structures have the same lifetime - otherwise
@@ -168,22 +188,49 @@ void *edac_align_ptr(void **p, unsigned size, int n_elems)
  *
  * Use edac_mc_free() to free mc structures allocated by this function.
  *
+ * NOTE: drivers handle multi-rank memories in different ways: in some
+ * drivers, one multi-rank memory stick is mapped as one entry, while, in
+ * others, a single multi-rank memory stick would be mapped into several
+ * entries. Currently, this function will allocate multiple struct dimm_info
+ * on such scenarios, as grouping the multiple ranks require drivers change.
+ *
  * Returns:
  *	NULL allocation failed
  *	struct mem_ctl_info pointer
  */
-struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
-				unsigned nr_chans, int edac_index)
+struct mem_ctl_info *new_edac_mc_alloc(unsigned mc_num,
+				       unsigned n_layers,
+				       struct edac_mc_layer *layers,
+				       unsigned sz_pvt)
 {
-	void *ptr = NULL;
 	struct mem_ctl_info *mci;
-	struct csrow_info *csi, *csrow;
+	struct edac_mc_layer *layer;
+	struct csrow_info *csi, *csr;
 	struct rank_info *chi, *chp, *chan;
 	struct dimm_info *dimm;
-	void *pvt;
-	unsigned size;
-	int row, chn;
-	int err;
+	u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
+	unsigned pos[EDAC_MAX_LAYERS];
+	void *pvt, *ptr = NULL;
+	unsigned size, tot_dimms = 1, count = 1;
+	unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
+	int i, j, err, row, chn;
+	bool per_rank = false;
+
+	BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0);
+	/*
+	 * Calculate the total amount of dimms and csrows/cschannels while
+	 * in the old API emulation mode
+	 */
+	for (i = 0; i < n_layers; i++) {
+		tot_dimms *= layers[i].size;
+		if (layers[i].is_virt_csrow)
+			tot_csrows *= layers[i].size;
+		else
+			tot_channels *= layers[i].size;
+
+		if (layers[i].type == EDAC_MC_LAYER_CHIP_SELECT)
+			per_rank = true;
+	}
 
 	/* Figure out the offsets of the various items from the start of an mc
 	 * structure.  We want the alignment of each item to be at least as
@@ -191,12 +238,27 @@ struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
 	 * hardcode everything into a single struct.
 	 */
 	mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
-	csi = edac_align_ptr(&ptr, sizeof(*csi), nr_csrows);
-	chi = edac_align_ptr(&ptr, sizeof(*chi), nr_csrows * nr_chans);
-	dimm = edac_align_ptr(&ptr, sizeof(*dimm), nr_csrows * nr_chans);
+	layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
+	csi = edac_align_ptr(&ptr, sizeof(*csi), tot_csrows);
+	chi = edac_align_ptr(&ptr, sizeof(*chi), tot_csrows * tot_channels);
+	dimm = edac_align_ptr(&ptr, sizeof(*dimm), tot_dimms);
+	for (i = 0; i < n_layers; i++) {
+		count *= layers[i].size;
+		debugf4("%s: errcount layer %d size %d\n", __func__, i, count);
+		ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
+		ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
+		tot_errcount += 2 * count;
+	}
+
+	debugf4("%s: allocating %d error counters\n", __func__, tot_errcount);
 	pvt = edac_align_ptr(&ptr, sz_pvt, 1);
 	size = ((unsigned long)pvt) + sz_pvt;
 
+	debugf1("%s(): allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
+		__func__, size,
+		tot_dimms,
+		per_rank ? "ranks" : "dimms",
+		tot_csrows * tot_channels);
 	mci = kzalloc(size, GFP_KERNEL);
 	if (mci == NULL)
 		return NULL;
@@ -204,42 +266,87 @@ struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
 	/* Adjust pointers so they point within the memory we just allocated
 	 * rather than an imaginary chunk of memory located at address 0.
 	 */
+	layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
 	csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi));
 	chi = (struct rank_info *)(((char *)mci) + ((unsigned long)chi));
 	dimm = (struct dimm_info *)(((char *)mci) + ((unsigned long)dimm));
+	for (i = 0; i < n_layers; i++) {
+		mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i]));
+		mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i]));
+	}
 	pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
 
 	/* setup index and various internal pointers */
-	mci->mc_idx = edac_index;
+	mci->mc_idx = mc_num;
 	mci->csrows = csi;
 	mci->dimms  = dimm;
+	mci->tot_dimms = tot_dimms;
 	mci->pvt_info = pvt;
-	mci->nr_csrows = nr_csrows;
+	mci->n_layers = n_layers;
+	mci->layers = layer;
+	memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
+	mci->nr_csrows = tot_csrows;
+	mci->num_cschannel = tot_channels;
+	mci->mem_is_per_rank = per_rank;
 
 	/*
-	 * For now, assumes that a per-csrow arrangement for dimms.
-	 * This will be latter changed.
+	 * Fill the csrow struct
 	 */
-	dimm = mci->dimms;
-
-	for (row = 0; row < nr_csrows; row++) {
-		csrow = &csi[row];
-		csrow->csrow_idx = row;
-		csrow->mci = mci;
-		csrow->nr_channels = nr_chans;
-		chp = &chi[row * nr_chans];
-		csrow->channels = chp;
-
-		for (chn = 0; chn < nr_chans; chn++) {
+	for (row = 0; row < tot_csrows; row++) {
+		csr = &csi[row];
+		csr->csrow_idx = row;
+		csr->mci = mci;
+		csr->nr_channels = tot_channels;
+		chp = &chi[row * tot_channels];
+		csr->channels = chp;
+
+		for (chn = 0; chn < tot_channels; chn++) {
 			chan = &chp[chn];
 			chan->chan_idx = chn;
-			chan->csrow = csrow;
+			chan->csrow = csr;
+		}
+	}
+
+	/*
+	 * Fill the dimm struct
+	 */
+	memset(&pos, 0, sizeof(pos));
+	row = 0;
+	chn = 0;
+	debugf4("%s: initializing %d %s\n", __func__, tot_dimms,
+		per_rank ? "ranks" : "dimms");
+	for (i = 0; i < tot_dimms; i++) {
+		chan = &csi[row].channels[chn];
+		dimm = EDAC_DIMM_PTR(layer, mci->dimms, n_layers,
+			       pos[0], pos[1], pos[2]);
+		dimm->mci = mci;
+
+		debugf2("%s: %d: %s%zd (%d:%d:%d): row %d, chan %d\n", __func__,
+			i, per_rank ? "rank" : "dimm", (dimm - mci->dimms),
+			pos[0], pos[1], pos[2], row, chn);
+
+		/* Copy DIMM location */
+		for (j = 0; j < n_layers; j++)
+			dimm->location[j] = pos[j];
+
+		/* Link it to the csrows old API data */
+		chan->dimm = dimm;
+		dimm->csrow = row;
+		dimm->cschannel = chn;
+
+		/* Increment csrow location */
+		row++;
+		if (row == tot_csrows) {
+			row = 0;
+			chn++;
+		}
 
-			mci->csrows[row].channels[chn].dimm = dimm;
-			dimm->csrow = row;
-			dimm->csrow_channel = chn;
-			dimm++;
-			mci->nr_dimms++;
+		/* Increment dimm location */
+		for (j = n_layers - 1; j >= 0; j--) {
+			pos[j]++;
+			if (pos[j] < layers[j].size)
+				break;
+			pos[j] = 0;
 		}
 	}
 
@@ -263,6 +370,46 @@ struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
 	 */
 	return mci;
 }
+EXPORT_SYMBOL_GPL(new_edac_mc_alloc);
+
+/**
+ * edac_mc_alloc: Allocate and partially fill a struct mem_ctl_info structure
+ * @mc_num:		Memory controller number
+ * @n_layers:		Number of layers at the MC hierarchy
+ * layers:		Describes each layer as seen by the Memory Controller
+ * @size_pvt:		Size of private storage needed
+ *
+ *
+ * FIXME: drivers handle multi-rank memories in different ways: some
+ * drivers map multi-ranked DIMMs as one DIMM while others
+ * as several DIMMs.
+ *
+ * Everything is kmalloc'ed as one big chunk - more efficient.
+ * It can only be used if all structures have the same lifetime - otherwise
+ * you have to allocate and initialize your own structures.
+ *
+ * Use edac_mc_free() to free mc structures allocated by this function.
+ *
+ * Returns:
+ *	On failure: NULL
+ *	On success: struct mem_ctl_info pointer
+ */
+
+struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
+				   unsigned nr_chans, int mc_num)
+{
+	unsigned n_layers = 2;
+	struct edac_mc_layer layers[n_layers];
+
+	layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+	layers[0].size = nr_csrows;
+	layers[0].is_virt_csrow = true;
+	layers[1].type = EDAC_MC_LAYER_CHANNEL;
+	layers[1].size = nr_chans;
+	layers[1].is_virt_csrow = false;
+
+	return new_edac_mc_alloc(mc_num, ARRAY_SIZE(layers), layers, sz_pvt);
+}
 EXPORT_SYMBOL_GPL(edac_mc_alloc);
 
 /**
@@ -528,7 +675,6 @@ EXPORT_SYMBOL(edac_mc_find);
  * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
  *                 create sysfs entries associated with mci structure
  * @mci: pointer to the mci structure to be added to the list
- * @mc_idx: A unique numeric identifier to be assigned to the 'mci' structure.
  *
  * Return:
  *	0	Success
@@ -555,6 +701,8 @@ int edac_mc_add_mc(struct mem_ctl_info *mci)
 				edac_mc_dump_channel(&mci->csrows[i].
 						channels[j]);
 		}
+		for (i = 0; i < mci->tot_dimms; i++)
+			edac_mc_dump_dimm(&mci->dimms[i]);
 	}
 #endif
 	mutex_lock(&mem_ctls_mutex);
@@ -712,261 +860,307 @@ int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
 }
 EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
 
-/* FIXME - setable log (warning/emerg) levels */
-/* FIXME - integrate with evlog: http://evlog.sourceforge.net/ */
-void edac_mc_handle_ce(struct mem_ctl_info *mci,
-		unsigned long page_frame_number,
-		unsigned long offset_in_page, unsigned long syndrome,
-		int row, int channel, const char *msg)
+const char *edac_layer_name[] = {
+	[EDAC_MC_LAYER_BRANCH] = "branch",
+	[EDAC_MC_LAYER_CHANNEL] = "channel",
+	[EDAC_MC_LAYER_SLOT] = "slot",
+	[EDAC_MC_LAYER_CHIP_SELECT] = "csrow",
+};
+EXPORT_SYMBOL_GPL(edac_layer_name);
+
+static void edac_inc_ce_error(struct mem_ctl_info *mci,
+				    bool enable_per_layer_report,
+				    const int pos[EDAC_MAX_LAYERS])
 {
-	unsigned long remapped_page;
-	char *label = NULL;
-	u32 grain;
+	int i, index = 0;
 
-	debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
+	mci->ce_count++;
 
-	/* FIXME - maybe make panic on INTERNAL ERROR an option */
-	if (row >= mci->nr_csrows || row < 0) {
-		/* something is wrong */
-		edac_mc_printk(mci, KERN_ERR,
-			"INTERNAL ERROR: row out of range "
-			"(%d >= %d)\n", row, mci->nr_csrows);
-		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
+	if (!enable_per_layer_report) {
+		mci->ce_noinfo_count++;
 		return;
 	}
 
-	if (channel >= mci->csrows[row].nr_channels || channel < 0) {
-		/* something is wrong */
-		edac_mc_printk(mci, KERN_ERR,
-			"INTERNAL ERROR: channel out of range "
-			"(%d >= %d)\n", channel,
-			mci->csrows[row].nr_channels);
-		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
+	for (i = 0; i < mci->n_layers; i++) {
+		if (pos[i] < 0)
+			break;
+		index += pos[i];
+		mci->ce_per_layer[i][index]++;
+
+		if (i < mci->n_layers - 1)
+			index *= mci->layers[i + 1].size;
+	}
+}
+
+static void edac_inc_ue_error(struct mem_ctl_info *mci,
+				    bool enable_per_layer_report,
+				    const int pos[EDAC_MAX_LAYERS])
+{
+	int i, index = 0;
+
+	mci->ue_count++;
+
+	if (!enable_per_layer_report) {
+		mci->ce_noinfo_count++;
 		return;
 	}
 
-	label = mci->csrows[row].channels[channel].dimm->label;
-	grain = mci->csrows[row].channels[channel].dimm->grain;
+	for (i = 0; i < mci->n_layers; i++) {
+		if (pos[i] < 0)
+			break;
+		index += pos[i];
+		mci->ue_per_layer[i][index]++;
 
-	if (edac_mc_get_log_ce())
-		/* FIXME - put in DIMM location */
-		edac_mc_printk(mci, KERN_WARNING,
-			"CE page 0x%lx, offset 0x%lx, grain %d, syndrome "
-			"0x%lx, row %d, channel %d, label \"%s\": %s\n",
-			page_frame_number, offset_in_page,
-			grain, syndrome, row, channel,
-			label, msg);
+		if (i < mci->n_layers - 1)
+			index *= mci->layers[i + 1].size;
+	}
+}
 
-	mci->ce_count++;
-	mci->csrows[row].ce_count++;
-	mci->csrows[row].channels[channel].dimm->ce_count++;
-	mci->csrows[row].channels[channel].ce_count++;
+static void edac_ce_error(struct mem_ctl_info *mci,
+			  const int pos[EDAC_MAX_LAYERS],
+			  const char *msg,
+			  const char *location,
+			  const char *label,
+			  const char *detail,
+			  const char *other_detail,
+			  const bool enable_per_layer_report,
+			  const unsigned long page_frame_number,
+			  const unsigned long offset_in_page,
+			  u32 grain)
+{
+	unsigned long remapped_page;
+
+	if (edac_mc_get_log_ce()) {
+		if (other_detail && *other_detail)
+			edac_mc_printk(mci, KERN_WARNING,
+				       "CE %s on %s (%s%s - %s)\n",
+				       msg, label, location,
+				       detail, other_detail);
+		else
+			edac_mc_printk(mci, KERN_WARNING,
+				       "CE %s on %s (%s%s)\n",
+				       msg, label, location,
+				       detail);
+	}
+	edac_inc_ce_error(mci, enable_per_layer_report, pos);
 
 	if (mci->scrub_mode & SCRUB_SW_SRC) {
 		/*
-		 * Some MC's can remap memory so that it is still available
-		 * at a different address when PCI devices map into memory.
-		 * MC's that can't do this lose the memory where PCI devices
-		 * are mapped.  This mapping is MC dependent and so we call
-		 * back into the MC driver for it to map the MC page to
-		 * a physical (CPU) page which can then be mapped to a virtual
-		 * page - which can then be scrubbed.
-		 */
+			* Some memory controllers (called MCs below) can remap
+			* memory so that it is still available at a different
+			* address when PCI devices map into memory.
+			* MC's that can't do this, lose the memory where PCI
+			* devices are mapped. This mapping is MC-dependent
+			* and so we call back into the MC driver for it to
+			* map the MC page to a physical (CPU) page which can
+			* then be mapped to a virtual page - which can then
+			* be scrubbed.
+			*/
 		remapped_page = mci->ctl_page_to_phys ?
 			mci->ctl_page_to_phys(mci, page_frame_number) :
 			page_frame_number;
 
-		edac_mc_scrub_block(remapped_page, offset_in_page, grain);
+		edac_mc_scrub_block(remapped_page,
+					offset_in_page, grain);
 	}
 }
-EXPORT_SYMBOL_GPL(edac_mc_handle_ce);
 
-void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci, const char *msg)
+static void edac_ue_error(struct mem_ctl_info *mci,
+			  const int pos[EDAC_MAX_LAYERS],
+			  const char *msg,
+			  const char *location,
+			  const char *label,
+			  const char *detail,
+			  const char *other_detail,
+			  const bool enable_per_layer_report)
 {
-	if (edac_mc_get_log_ce())
-		edac_mc_printk(mci, KERN_WARNING,
-			"CE - no information available: %s\n", msg);
+	if (edac_mc_get_log_ue()) {
+		if (other_detail && *other_detail)
+			edac_mc_printk(mci, KERN_WARNING,
+				       "UE %s on %s (%s%s - %s)\n",
+			               msg, label, location, detail,
+				       other_detail);
+		else
+			edac_mc_printk(mci, KERN_WARNING,
+				       "UE %s on %s (%s%s)\n",
+			               msg, label, location, detail);
+	}
 
-	mci->ce_noinfo_count++;
-	mci->ce_count++;
+	if (edac_mc_get_panic_on_ue()) {
+		if (other_detail && *other_detail)
+			panic("UE %s on %s (%s%s - %s)\n",
+			      msg, label, location, detail, other_detail);
+		else
+			panic("UE %s on %s (%s%s)\n",
+			      msg, label, location, detail);
+	}
+
+	edac_inc_ue_error(mci, enable_per_layer_report, pos);
 }
-EXPORT_SYMBOL_GPL(edac_mc_handle_ce_no_info);
 
-void edac_mc_handle_ue(struct mem_ctl_info *mci,
-		unsigned long page_frame_number,
-		unsigned long offset_in_page, int row, const char *msg)
+#define OTHER_LABEL " or "
+void edac_mc_handle_error(const enum hw_event_mc_err_type type,
+			  struct mem_ctl_info *mci,
+			  const unsigned long page_frame_number,
+			  const unsigned long offset_in_page,
+			  const unsigned long syndrome,
+			  const int layer0,
+			  const int layer1,
+			  const int layer2,
+			  const char *msg,
+			  const char *other_detail,
+			  const void *mcelog)
 {
-	int len = EDAC_MC_LABEL_LEN * 4;
-	char labels[len + 1];
-	char *pos = labels;
-	int chan;
-	int chars;
-	char *label = NULL;
+	/* FIXME: too much for stack: move it to some pre-alocated area */
+	char detail[80], location[80];
+	char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms];
+	char *p;
+	int row = -1, chan = -1;
+	int pos[EDAC_MAX_LAYERS] = { layer0, layer1, layer2 };
+	int i;
 	u32 grain;
+	bool enable_per_layer_report = false;
 
 	debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
 
-	/* FIXME - maybe make panic on INTERNAL ERROR an option */
-	if (row >= mci->nr_csrows || row < 0) {
-		/* something is wrong */
-		edac_mc_printk(mci, KERN_ERR,
-			"INTERNAL ERROR: row out of range "
-			"(%d >= %d)\n", row, mci->nr_csrows);
-		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
-		return;
-	}
-
-	grain = mci->csrows[row].channels[0].dimm->grain;
-	label = mci->csrows[row].channels[0].dimm->label;
-	chars = snprintf(pos, len + 1, "%s", label);
-	len -= chars;
-	pos += chars;
-
-	for (chan = 1; (chan < mci->csrows[row].nr_channels) && (len > 0);
-		chan++) {
-		label = mci->csrows[row].channels[chan].dimm->label;
-		chars = snprintf(pos, len + 1, ":%s", label);
-		len -= chars;
-		pos += chars;
+	/*
+	 * Check if the event report is consistent and if the memory
+	 * location is known. If it is known, enable_per_layer_report will be
+	 * true, the DIMM(s) label info will be filled and the per-layer
+	 * error counters will be incremented.
+	 */
+	for (i = 0; i < mci->n_layers; i++) {
+		if (pos[i] >= (int)mci->layers[i].size) {
+			if (type == HW_EVENT_ERR_CORRECTED)
+				p = "CE";
+			else
+				p = "UE";
+
+			edac_mc_printk(mci, KERN_ERR,
+				       "INTERNAL ERROR: %s value is out of range (%d >= %d)\n",
+				       edac_layer_name[mci->layers[i].type],
+				       pos[i], mci->layers[i].size);
+			/*
+			 * Instead of just returning it, let's use what's
+			 * known about the error. The increment routines and
+			 * the DIMM filter logic will do the right thing by
+			 * pointing the likely damaged DIMMs.
+			 */
+			pos[i] = -1;
+		}
+		if (pos[i] >= 0)
+			enable_per_layer_report = true;
 	}
 
-	if (edac_mc_get_log_ue())
-		edac_mc_printk(mci, KERN_EMERG,
-			"UE page 0x%lx, offset 0x%lx, grain %d, row %d, "
-			"labels \"%s\": %s\n", page_frame_number,
-			offset_in_page, grain, row, labels, msg);
-
-	if (edac_mc_get_panic_on_ue())
-		panic("EDAC MC%d: UE page 0x%lx, offset 0x%lx, grain %d, "
-			"row %d, labels \"%s\": %s\n", mci->mc_idx,
-			page_frame_number, offset_in_page,
-			grain, row, labels, msg);
-
-	mci->ue_count++;
-	mci->csrows[row].ue_count++;
-}
-EXPORT_SYMBOL_GPL(edac_mc_handle_ue);
-
-void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci, const char *msg)
-{
-	if (edac_mc_get_panic_on_ue())
-		panic("EDAC MC%d: Uncorrected Error", mci->mc_idx);
+	/*
+	 * Get the dimm label/grain that applies to the match criteria.
+	 * As the error algorithm may not be able to point to just one memory
+	 * stick, the logic here will get all possible labels that could
+	 * pottentially be affected by the error.
+	 * On FB-DIMM memory controllers, for uncorrected errors, it is common
+	 * to have only the MC channel and the MC dimm (also called "branch")
+	 * but the channel is not known, as the memory is arranged in pairs,
+	 * where each memory belongs to a separate channel within the same
+	 * branch.
+	 */
+	grain = 0;
+	p = label;
+	*p = '\0';
+	for (i = 0; i < mci->tot_dimms; i++) {
+		struct dimm_info *dimm = &mci->dimms[i];
 
-	if (edac_mc_get_log_ue())
-		edac_mc_printk(mci, KERN_WARNING,
-			"UE - no information available: %s\n", msg);
-	mci->ue_noinfo_count++;
-	mci->ue_count++;
-}
-EXPORT_SYMBOL_GPL(edac_mc_handle_ue_no_info);
+		if (layer0 >= 0 && layer0 != dimm->location[0])
+			continue;
+		if (layer1 >= 0 && layer1 != dimm->location[1])
+			continue;
+		if (layer2 >= 0 && layer2 != dimm->location[2])
+			continue;
 
-/*************************************************************
- * On Fully Buffered DIMM modules, this help function is
- * called to process UE events
- */
-void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci,
-			unsigned int csrow,
-			unsigned int channela,
-			unsigned int channelb, char *msg)
-{
-	int len = EDAC_MC_LABEL_LEN * 4;
-	char labels[len + 1];
-	char *pos = labels;
-	int chars;
-	char *label;
-
-	if (csrow >= mci->nr_csrows) {
-		/* something is wrong */
-		edac_mc_printk(mci, KERN_ERR,
-			"INTERNAL ERROR: row out of range (%d >= %d)\n",
-			csrow, mci->nr_csrows);
-		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
-		return;
-	}
+		/* get the max grain, over the error match range */
+		if (dimm->grain > grain)
+			grain = dimm->grain;
 
-	if (channela >= mci->csrows[csrow].nr_channels) {
-		/* something is wrong */
-		edac_mc_printk(mci, KERN_ERR,
-			"INTERNAL ERROR: channel-a out of range "
-			"(%d >= %d)\n",
-			channela, mci->csrows[csrow].nr_channels);
-		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
-		return;
+		/*
+		 * If the error is memory-controller wide, there's no need to
+		 * seek for the affected DIMMs because the whole
+		 * channel/memory controller/...  may be affected.
+		 * Also, don't show errors for empty DIMM slots.
+		 */
+		if (enable_per_layer_report && dimm->nr_pages) {
+			if (p != label) {
+				strcpy(p, OTHER_LABEL);
+				p += strlen(OTHER_LABEL);
+			}
+			strcpy(p, dimm->label);
+			p += strlen(p);
+			*p = '\0';
+
+			/*
+			 * get csrow/channel of the DIMM, in order to allow
+			 * incrementing the compat API counters
+			 */
+			debugf4("%s: %s csrows map: (%d,%d)\n",
+				__func__,
+				mci->mem_is_per_rank ? "rank" : "dimm",
+				dimm->csrow, dimm->cschannel);
+
+			if (row == -1)
+				row = dimm->csrow;
+			else if (row >= 0 && row != dimm->csrow)
+				row = -2;
+
+			if (chan == -1)
+				chan = dimm->cschannel;
+			else if (chan >= 0 && chan != dimm->cschannel)
+				chan = -2;
+		}
 	}
 
-	if (channelb >= mci->csrows[csrow].nr_channels) {
-		/* something is wrong */
-		edac_mc_printk(mci, KERN_ERR,
-			"INTERNAL ERROR: channel-b out of range "
-			"(%d >= %d)\n",
-			channelb, mci->csrows[csrow].nr_channels);
-		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
-		return;
+	if (!enable_per_layer_report) {
+		strcpy(label, "any memory");
+	} else {
+		debugf4("%s: csrow/channel to increment: (%d,%d)\n",
+			__func__, row, chan);
+		if (p == label)
+			strcpy(label, "unknown memory");
+		if (type == HW_EVENT_ERR_CORRECTED) {
+			if (row >= 0) {
+				mci->csrows[row].ce_count++;
+				if (chan >= 0)
+					mci->csrows[row].channels[chan].ce_count++;
+			}
+		} else
+			if (row >= 0)
+				mci->csrows[row].ue_count++;
 	}
 
-	mci->ue_count++;
-	mci->csrows[csrow].ue_count++;
-
-	/* Generate the DIMM labels from the specified channels */
-	label = mci->csrows[csrow].channels[channela].dimm->label;
-	chars = snprintf(pos, len + 1, "%s", label);
-	len -= chars;
-	pos += chars;
-
-	chars = snprintf(pos, len + 1, "-%s",
-			mci->csrows[csrow].channels[channelb].dimm->label);
-
-	if (edac_mc_get_log_ue())
-		edac_mc_printk(mci, KERN_EMERG,
-			"UE row %d, channel-a= %d channel-b= %d "
-			"labels \"%s\": %s\n", csrow, channela, channelb,
-			labels, msg);
-
-	if (edac_mc_get_panic_on_ue())
-		panic("UE row %d, channel-a= %d channel-b= %d "
-			"labels \"%s\": %s\n", csrow, channela,
-			channelb, labels, msg);
-}
-EXPORT_SYMBOL(edac_mc_handle_fbd_ue);
-
-/*************************************************************
- * On Fully Buffered DIMM modules, this help function is
- * called to process CE events
- */
-void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci,
-			unsigned int csrow, unsigned int channel, char *msg)
-{
-	char *label = NULL;
+	/* Fill the RAM location data */
+	p = location;
+	for (i = 0; i < mci->n_layers; i++) {
+		if (pos[i] < 0)
+			continue;
 
-	/* Ensure boundary values */
-	if (csrow >= mci->nr_csrows) {
-		/* something is wrong */
-		edac_mc_printk(mci, KERN_ERR,
-			"INTERNAL ERROR: row out of range (%d >= %d)\n",
-			csrow, mci->nr_csrows);
-		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
-		return;
+		p += sprintf(p, "%s:%d ",
+			     edac_layer_name[mci->layers[i].type],
+			     pos[i]);
 	}
-	if (channel >= mci->csrows[csrow].nr_channels) {
-		/* something is wrong */
-		edac_mc_printk(mci, KERN_ERR,
-			"INTERNAL ERROR: channel out of range (%d >= %d)\n",
-			channel, mci->csrows[csrow].nr_channels);
-		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
-		return;
-	}
-
-	label = mci->csrows[csrow].channels[channel].dimm->label;
 
-	if (edac_mc_get_log_ce())
-		/* FIXME - put in DIMM location */
-		edac_mc_printk(mci, KERN_WARNING,
-			"CE row %d, channel %d, label \"%s\": %s\n",
-			csrow, channel, label, msg);
+	/* Memory type dependent details about the error */
+	if (type == HW_EVENT_ERR_CORRECTED) {
+		snprintf(detail, sizeof(detail),
+			"page:0x%lx offset:0x%lx grain:%d syndrome:0x%lx",
+			page_frame_number, offset_in_page,
+			grain, syndrome);
+		edac_ce_error(mci, pos, msg, location, label, detail,
+			      other_detail, enable_per_layer_report,
+			      page_frame_number, offset_in_page, grain);
+	} else {
+		snprintf(detail, sizeof(detail),
+			"page:0x%lx offset:0x%lx grain:%d",
+			page_frame_number, offset_in_page, grain);
 
-	mci->ce_count++;
-	mci->csrows[csrow].ce_count++;
-	mci->csrows[csrow].channels[channel].dimm->ce_count++;
-	mci->csrows[csrow].channels[channel].ce_count++;
+		edac_ue_error(mci, pos, msg, location, label, detail,
+			      other_detail, enable_per_layer_report);
+	}
 }
-EXPORT_SYMBOL(edac_mc_handle_fbd_ce);
+EXPORT_SYMBOL_GPL(edac_mc_handle_error);
diff --git a/include/linux/edac.h b/include/linux/edac.h
index 9e628434e164..d68b01cad068 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -416,18 +416,20 @@ struct edac_mc_layer {
 /* FIXME: add the proper per-location error counts */
 struct dimm_info {
 	char label[EDAC_MC_LABEL_LEN + 1];	/* DIMM label on motherboard */
-	unsigned memory_controller;
-	unsigned csrow;
-	unsigned csrow_channel;
+
+	/* Memory location data */
+	unsigned location[EDAC_MAX_LAYERS];
+
+	struct mem_ctl_info *mci;	/* the parent */
 
 	u32 grain;		/* granularity of reported error in bytes */
 	enum dev_type dtype;	/* memory device type */
 	enum mem_type mtype;	/* memory dimm type */
 	enum edac_type edac_mode;	/* EDAC mode for this dimm */
 
-	u32 nr_pages;			/* number of pages in csrow */
+	u32 nr_pages;			/* number of pages on this dimm */
 
-	u32 ce_count;		/* Correctable Errors for this dimm */
+	unsigned csrow, cschannel;	/* Points to the old API data */
 };
 
 /**
@@ -447,9 +449,10 @@ struct dimm_info {
  */
 struct rank_info {
 	int chan_idx;
-	u32 ce_count;
 	struct csrow_info *csrow;
 	struct dimm_info *dimm;
+
+	u32 ce_count;		/* Correctable Errors for this csrow */
 };
 
 struct csrow_info {
@@ -545,13 +548,18 @@ struct mem_ctl_info {
 	unsigned long (*ctl_page_to_phys) (struct mem_ctl_info * mci,
 					   unsigned long page);
 	int mc_idx;
-	int nr_csrows;
 	struct csrow_info *csrows;
+	unsigned nr_csrows, num_cschannel;
+
+	/* Memory Controller hierarchy */
+	unsigned n_layers;
+	struct edac_mc_layer *layers;
+	bool mem_is_per_rank;
 
 	/*
 	 * DIMM info. Will eventually remove the entire csrows_info some day
 	 */
-	unsigned nr_dimms;
+	unsigned tot_dimms;
 	struct dimm_info *dimms;
 
 	/*
@@ -566,12 +574,16 @@ struct mem_ctl_info {
 	const char *dev_name;
 	char proc_name[MC_PROC_NAME_MAX_LEN + 1];
 	void *pvt_info;
-	u32 ue_noinfo_count;	/* Uncorrectable Errors w/o info */
-	u32 ce_noinfo_count;	/* Correctable Errors w/o info */
-	u32 ue_count;		/* Total Uncorrectable Errors for this MC */
-	u32 ce_count;		/* Total Correctable Errors for this MC */
 	unsigned long start_time;	/* mci load start time (in jiffies) */
 
+	/*
+	 * drivers shouldn't access those fields directly, as the core
+	 * already handles that.
+	 */
+	u32 ce_noinfo_count, ue_noinfo_count;
+	u32 ue_count, ce_count;
+	u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
+
 	struct completion complete;
 
 	/* edac sysfs device control */
@@ -584,7 +596,7 @@ struct mem_ctl_info {
 	 * by the low level driver.
 	 *
 	 * Set by the low level driver to provide attributes at the
-	 * controller level, same level as 'ue_count' and 'ce_count' above.
+	 * controller level.
 	 * An array of structures, NULL terminated
 	 *
 	 * If attributes are desired, then set to array of attributes
-- 
cgit v1.3-7-g2ca7


From 5926ff502f6b93ca0c1654f8a5c5317ea236dbdb Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Thu, 9 Feb 2012 11:05:20 -0300
Subject: edac: Initialize the dimm label with the known information

While userspace doesn't fill the dimm labels, add there the dimm location,
as described by the used memory model. This could eventually match what
is described at the dmidecode, making easier for people to identify the
memory.

For example, on an Intel motherboard where the DMI table is reliable,
the first memory stick is described as:

Memory Device
	Array Handle: 0x0029
	Error Information Handle: Not Provided
	Total Width: 64 bits
	Data Width: 64 bits
	Size: 2048 MB
	Form Factor: DIMM
	Set: 1
	Locator: A1_DIMM0
	Bank Locator: A1_Node0_Channel0_Dimm0
	Type: <OUT OF SPEC>
	Type Detail: Synchronous
	Speed: 800 MHz
	Manufacturer: A1_Manufacturer0
	Serial Number: A1_SerNum0
	Asset Tag: A1_AssetTagNum0
	Part Number: A1_PartNum0

The memory named as "A1_DIMM0" is physically located at the first
memory controller (node 0), at channel 0, dimm slot 0.

After this patch, the memory label will be filled with:
	/sys/devices/system/edac/mc/csrow0/ch0_dimm_label:mc#0channel#0slot#0

And (after the new EDAC API patches) as:
	/sys/devices/system/edac/mc/mc0/dimm0/dimm_label:mc#0channel#0slot#0

So, even if the memory label is not initialized on userspace, an useful
information with the error location is filled there, expecially since
several systems/motherboards are provided with enough info to map from
channel/slot (or branch/channel/slot) into the DIMM label. So, letting the
EDAC core fill it by default is a good thing.

It should noticed that, as the label filling happens at the
edac_mc_alloc(), drivers can override it to better describe the memories
(and some actually do it).

Cc: Aristeu Rozanski <arozansk@redhat.com>
Cc: Doug Thompson <norsk5@yahoo.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/edac/edac_mc.c       | 28 ++++++++++++++++++++++------
 drivers/edac/edac_mc_sysfs.c |  8 ++++----
 include/linux/edac.h         |  2 +-
 3 files changed, 27 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 06028de5fe1b..10f375032e96 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -210,10 +210,10 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
 	struct dimm_info *dimm;
 	u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
 	unsigned pos[EDAC_MAX_LAYERS];
-	void *pvt, *ptr = NULL;
 	unsigned size, tot_dimms = 1, count = 1;
 	unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
-	int i, j, err, row, chn;
+	void *pvt, *p, *ptr = NULL;
+	int i, j, err, row, chn, n, len;
 	bool per_rank = false;
 
 	BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0);
@@ -325,10 +325,26 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
 			i, per_rank ? "rank" : "dimm", (dimm - mci->dimms),
 			pos[0], pos[1], pos[2], row, chn);
 
-		/* Copy DIMM location */
-		for (j = 0; j < n_layers; j++)
+		/*
+		 * Copy DIMM location and initialize it.
+		 */
+		len = sizeof(dimm->label);
+		p = dimm->label;
+		n = snprintf(p, len, "mc#%u", mc_num);
+		p += n;
+		len -= n;
+		for (j = 0; j < n_layers; j++) {
+			n = snprintf(p, len, "%s#%u",
+				     edac_layer_name[layers[j].type],
+				     pos[j]);
+			p += n;
+			len -= n;
 			dimm->location[j] = pos[j];
 
+			if (len <= 0)
+				break;
+		}
+
 		/* Link it to the csrows old API data */
 		chan->dimm = dimm;
 		dimm->csrow = row;
@@ -834,7 +850,7 @@ static void edac_inc_ce_error(struct mem_ctl_info *mci,
 {
 	int i, index = 0;
 
-	mci->ce_count++;
+	mci->ce_mc++;
 
 	if (!enable_per_layer_report) {
 		mci->ce_noinfo_count++;
@@ -858,7 +874,7 @@ static void edac_inc_ue_error(struct mem_ctl_info *mci,
 {
 	int i, index = 0;
 
-	mci->ue_count++;
+	mci->ue_mc++;
 
 	if (!enable_per_layer_report) {
 		mci->ce_noinfo_count++;
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index 1dc1c6ca4308..f6a29b0eedc8 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -425,8 +425,8 @@ static ssize_t mci_reset_counters_store(struct mem_ctl_info *mci,
 
 	mci->ue_noinfo_count = 0;
 	mci->ce_noinfo_count = 0;
-	mci->ue_count = 0;
-	mci->ce_count = 0;
+	mci->ue_mc = 0;
+	mci->ce_mc = 0;
 
 	for (row = 0; row < mci->nr_csrows; row++) {
 		struct csrow_info *ri = &mci->csrows[row];
@@ -495,12 +495,12 @@ static ssize_t mci_sdram_scrub_rate_show(struct mem_ctl_info *mci, char *data)
 /* default attribute files for the MCI object */
 static ssize_t mci_ue_count_show(struct mem_ctl_info *mci, char *data)
 {
-	return sprintf(data, "%d\n", mci->ue_count);
+	return sprintf(data, "%d\n", mci->ue_mc);
 }
 
 static ssize_t mci_ce_count_show(struct mem_ctl_info *mci, char *data)
 {
-	return sprintf(data, "%d\n", mci->ce_count);
+	return sprintf(data, "%d\n", mci->ce_mc);
 }
 
 static ssize_t mci_ce_noinfo_show(struct mem_ctl_info *mci, char *data)
diff --git a/include/linux/edac.h b/include/linux/edac.h
index d68b01cad068..91ba3bae42ee 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -581,7 +581,7 @@ struct mem_ctl_info {
 	 * already handles that.
 	 */
 	u32 ce_noinfo_count, ue_noinfo_count;
-	u32 ue_count, ce_count;
+	u32 ue_mc, ce_mc;
 	u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
 
 	struct completion complete;
-- 
cgit v1.3-7-g2ca7


From af2e840971dee21ba9b87e9ecee7d5cc6109baaa Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Tue, 29 May 2012 15:06:14 -0700
Subject: pagemap.h: fix warning about possibly used before init var

Commit f56f821feb7b ("mm: extend prefault helpers to fault in more than
PAGE_SIZE") added in the new functions: fault_in_multipages_writeable()
and fault_in_multipages_readable().

However, we currently see:

  include/linux/pagemap.h:492: warning: 'ret' may be used uninitialized in this function
  include/linux/pagemap.h:492: note: 'ret' was declared here

Unlike a lot of gcc nags, this one appears somewhat legit.  i.e.  passing
in an invalid negative value of "size" does make it look like all the
conditionals in there would be bypassed and the uninitialized value would
be returned.

Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pagemap.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index efa26b4da8d2..7cfad3bbb0cc 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -460,11 +460,11 @@ static inline int fault_in_pages_readable(const char __user *uaddr, int size)
  */
 static inline int fault_in_multipages_writeable(char __user *uaddr, int size)
 {
-	int ret;
+	int ret = 0;
 	char __user *end = uaddr + size - 1;
 
 	if (unlikely(size == 0))
-		return 0;
+		return ret;
 
 	/*
 	 * Writing zeroes into userspace here is OK, because we know that if
@@ -489,11 +489,11 @@ static inline int fault_in_multipages_readable(const char __user *uaddr,
 					       int size)
 {
 	volatile char c;
-	int ret;
+	int ret = 0;
 	const char __user *end = uaddr + size - 1;
 
 	if (unlikely(size == 0))
-		return 0;
+		return ret;
 
 	while (uaddr <= end) {
 		ret = __get_user(c, uaddr);
-- 
cgit v1.3-7-g2ca7


From e709ffd6169ccd259eb5874e853303e91e94e829 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Tue, 29 May 2012 15:06:18 -0700
Subject: mm: remove swap token code

The swap token code no longer fits in with the current VM model.  It
does not play well with cgroups or the better NUMA placement code in
development, since we have only one swap token globally.

It also has the potential to mess with scalability of the system, by
increasing the number of non-reclaimable pages on the active and
inactive anon LRU lists.

Last but not least, the swap token code has been broken for a year
without complaints, as reported by Konstantin Khlebnikov.  This suggests
we no longer have much use for it.

The days of sub-1G memory systems with heavy use of swap are over.  If
we ever need thrashing reducing code in the future, we will have to
implement something that does scale.

Signed-off-by: Rik van Riel <riel@redhat.com>
Cc: Konstantin Khlebnikov <khlebnikov@openvz.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Hugh Dickins <hughd@google.com>
Acked-by: Bob Picco <bpicco@meloft.net>
Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h      |  11 ---
 include/linux/swap.h          |  35 ----------
 include/trace/events/vmscan.h |  82 ----------------------
 kernel/fork.c                 |   9 ---
 mm/Makefile                   |   2 +-
 mm/memcontrol.c               |   1 -
 mm/memory.c                   |   2 +-
 mm/rmap.c                     |   6 --
 mm/thrash.c                   | 155 ------------------------------------------
 mm/vmscan.c                   |   6 --
 10 files changed, 2 insertions(+), 307 deletions(-)
 delete mode 100644 mm/thrash.c

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 26574c726121..dad95bdd06d7 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -345,17 +345,6 @@ struct mm_struct {
 	/* Architecture-specific MM context */
 	mm_context_t context;
 
-	/* Swap token stuff */
-	/*
-	 * Last value of global fault stamp as seen by this process.
-	 * In other words, this value gives an indication of how long
-	 * it has been since this task got the token.
-	 * Look at mm/thrash.c
-	 */
-	unsigned int faultstamp;
-	unsigned int token_priority;
-	unsigned int last_interval;
-
 	unsigned long flags; /* Must use atomic bitops to access the bits */
 
 	struct core_state *core_state; /* coredumping support */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index b1fd5c7925fe..bc3073ce95cc 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -355,23 +355,6 @@ extern int reuse_swap_page(struct page *);
 extern int try_to_free_swap(struct page *);
 struct backing_dev_info;
 
-/* linux/mm/thrash.c */
-extern struct mm_struct *swap_token_mm;
-extern void grab_swap_token(struct mm_struct *);
-extern void __put_swap_token(struct mm_struct *);
-extern void disable_swap_token(struct mem_cgroup *memcg);
-
-static inline int has_swap_token(struct mm_struct *mm)
-{
-	return (mm == swap_token_mm);
-}
-
-static inline void put_swap_token(struct mm_struct *mm)
-{
-	if (has_swap_token(mm))
-		__put_swap_token(mm);
-}
-
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
 extern void
 mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout);
@@ -476,24 +459,6 @@ static inline swp_entry_t get_swap_page(void)
 	return entry;
 }
 
-/* linux/mm/thrash.c */
-static inline void put_swap_token(struct mm_struct *mm)
-{
-}
-
-static inline void grab_swap_token(struct mm_struct *mm)
-{
-}
-
-static inline int has_swap_token(struct mm_struct *mm)
-{
-	return 0;
-}
-
-static inline void disable_swap_token(struct mem_cgroup *memcg)
-{
-}
-
 static inline void
 mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
 {
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index f64560e204bc..572195459d58 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -395,88 +395,6 @@ TRACE_EVENT(mm_vmscan_lru_shrink_inactive,
 		show_reclaim_flags(__entry->reclaim_flags))
 );
 
-TRACE_EVENT(replace_swap_token,
-	TP_PROTO(struct mm_struct *old_mm,
-		 struct mm_struct *new_mm),
-
-	TP_ARGS(old_mm, new_mm),
-
-	TP_STRUCT__entry(
-		__field(struct mm_struct*,	old_mm)
-		__field(unsigned int,		old_prio)
-		__field(struct mm_struct*,	new_mm)
-		__field(unsigned int,		new_prio)
-	),
-
-	TP_fast_assign(
-		__entry->old_mm   = old_mm;
-		__entry->old_prio = old_mm ? old_mm->token_priority : 0;
-		__entry->new_mm   = new_mm;
-		__entry->new_prio = new_mm->token_priority;
-	),
-
-	TP_printk("old_token_mm=%p old_prio=%u new_token_mm=%p new_prio=%u",
-		  __entry->old_mm, __entry->old_prio,
-		  __entry->new_mm, __entry->new_prio)
-);
-
-DECLARE_EVENT_CLASS(put_swap_token_template,
-	TP_PROTO(struct mm_struct *swap_token_mm),
-
-	TP_ARGS(swap_token_mm),
-
-	TP_STRUCT__entry(
-		__field(struct mm_struct*, swap_token_mm)
-	),
-
-	TP_fast_assign(
-		__entry->swap_token_mm = swap_token_mm;
-	),
-
-	TP_printk("token_mm=%p", __entry->swap_token_mm)
-);
-
-DEFINE_EVENT(put_swap_token_template, put_swap_token,
-	TP_PROTO(struct mm_struct *swap_token_mm),
-	TP_ARGS(swap_token_mm)
-);
-
-DEFINE_EVENT_CONDITION(put_swap_token_template, disable_swap_token,
-	TP_PROTO(struct mm_struct *swap_token_mm),
-	TP_ARGS(swap_token_mm),
-	TP_CONDITION(swap_token_mm != NULL)
-);
-
-TRACE_EVENT_CONDITION(update_swap_token_priority,
-	TP_PROTO(struct mm_struct *mm,
-		 unsigned int old_prio,
-		 struct mm_struct *swap_token_mm),
-
-	TP_ARGS(mm, old_prio, swap_token_mm),
-
-	TP_CONDITION(mm->token_priority != old_prio),
-
-	TP_STRUCT__entry(
-		__field(struct mm_struct*, mm)
-		__field(unsigned int, old_prio)
-		__field(unsigned int, new_prio)
-		__field(struct mm_struct*, swap_token_mm)
-		__field(unsigned int, swap_token_prio)
-	),
-
-	TP_fast_assign(
-		__entry->mm		= mm;
-		__entry->old_prio	= old_prio;
-		__entry->new_prio	= mm->token_priority;
-		__entry->swap_token_mm	= swap_token_mm;
-		__entry->swap_token_prio = swap_token_mm ? swap_token_mm->token_priority : 0;
-	),
-
-	TP_printk("mm=%p old_prio=%u new_prio=%u swap_token_mm=%p token_prio=%u",
-		  __entry->mm, __entry->old_prio, __entry->new_prio,
-		  __entry->swap_token_mm, __entry->swap_token_prio)
-);
-
 #endif /* _TRACE_VMSCAN_H */
 
 /* This part must be outside protection */
diff --git a/kernel/fork.c b/kernel/fork.c
index 47b4e4f379f9..5b13eea2e757 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -614,7 +614,6 @@ void mmput(struct mm_struct *mm)
 			list_del(&mm->mmlist);
 			spin_unlock(&mmlist_lock);
 		}
-		put_swap_token(mm);
 		if (mm->binfmt)
 			module_put(mm->binfmt->module);
 		mmdrop(mm);
@@ -831,10 +830,6 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
 	memcpy(mm, oldmm, sizeof(*mm));
 	mm_init_cpumask(mm);
 
-	/* Initializing for Swap token stuff */
-	mm->token_priority = 0;
-	mm->last_interval = 0;
-
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	mm->pmd_huge_pte = NULL;
 #endif
@@ -913,10 +908,6 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
 		goto fail_nomem;
 
 good_mm:
-	/* Initializing for Swap token stuff */
-	mm->token_priority = 0;
-	mm->last_interval = 0;
-
 	tsk->mm = mm;
 	tsk->active_mm = mm;
 	return 0;
diff --git a/mm/Makefile b/mm/Makefile
index 8aada89efbbb..ccecbf9818f5 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -25,7 +25,7 @@ endif
 obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o
 
 obj-$(CONFIG_BOUNCE)	+= bounce.o
-obj-$(CONFIG_SWAP)	+= page_io.o swap_state.o swapfile.o thrash.o
+obj-$(CONFIG_SWAP)	+= page_io.o swap_state.o swapfile.o
 obj-$(CONFIG_HAS_DMA)	+= dmapool.o
 obj-$(CONFIG_HUGETLBFS)	+= hugetlb.o
 obj-$(CONFIG_NUMA) 	+= mempolicy.o
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f342778a0c0a..92675fe8a2ef 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5598,7 +5598,6 @@ static void mem_cgroup_move_task(struct cgroup *cont,
 	if (mm) {
 		if (mc.to)
 			mem_cgroup_move_charge(mm);
-		put_swap_token(mm);
 		mmput(mm);
 	}
 	if (mc.to)
diff --git a/mm/memory.c b/mm/memory.c
index e40f6759ba98..2bf9e110437c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2908,7 +2908,6 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	delayacct_set_flag(DELAYACCT_PF_SWAPIN);
 	page = lookup_swap_cache(entry);
 	if (!page) {
-		grab_swap_token(mm); /* Contend for token _before_ read-in */
 		page = swapin_readahead(entry,
 					GFP_HIGHUSER_MOVABLE, vma, address);
 		if (!page) {
@@ -2938,6 +2937,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	}
 
 	locked = lock_page_or_retry(page, mm, flags);
+
 	delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
 	if (!locked) {
 		ret |= VM_FAULT_RETRY;
diff --git a/mm/rmap.c b/mm/rmap.c
index 5b5ad584ffb7..0f3b7cda2a24 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -755,12 +755,6 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma,
 		pte_unmap_unlock(pte, ptl);
 	}
 
-	/* Pretend the page is referenced if the task has the
-	   swap token and is in the middle of a page fault. */
-	if (mm != current->mm && has_swap_token(mm) &&
-			rwsem_is_locked(&mm->mmap_sem))
-		referenced++;
-
 	(*mapcount)--;
 
 	if (referenced)
diff --git a/mm/thrash.c b/mm/thrash.c
deleted file mode 100644
index 57ad495dbd54..000000000000
--- a/mm/thrash.c
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * mm/thrash.c
- *
- * Copyright (C) 2004, Red Hat, Inc.
- * Copyright (C) 2004, Rik van Riel <riel@redhat.com>
- * Released under the GPL, see the file COPYING for details.
- *
- * Simple token based thrashing protection, using the algorithm
- * described in: http://www.cse.ohio-state.edu/hpcs/WWW/HTML/publications/abs05-1.html
- *
- * Sep 2006, Ashwin Chaugule <ashwin.chaugule@celunite.com>
- * Improved algorithm to pass token:
- * Each task has a priority which is incremented if it contended
- * for the token in an interval less than its previous attempt.
- * If the token is acquired, that task's priority is boosted to prevent
- * the token from bouncing around too often and to let the task make
- * some progress in its execution.
- */
-
-#include <linux/jiffies.h>
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/swap.h>
-#include <linux/memcontrol.h>
-
-#include <trace/events/vmscan.h>
-
-#define TOKEN_AGING_INTERVAL	(0xFF)
-
-static DEFINE_SPINLOCK(swap_token_lock);
-struct mm_struct *swap_token_mm;
-static struct mem_cgroup *swap_token_memcg;
-
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR
-static struct mem_cgroup *swap_token_memcg_from_mm(struct mm_struct *mm)
-{
-	struct mem_cgroup *memcg;
-
-	memcg = try_get_mem_cgroup_from_mm(mm);
-	if (memcg)
-		css_put(mem_cgroup_css(memcg));
-
-	return memcg;
-}
-#else
-static struct mem_cgroup *swap_token_memcg_from_mm(struct mm_struct *mm)
-{
-	return NULL;
-}
-#endif
-
-void grab_swap_token(struct mm_struct *mm)
-{
-	int current_interval;
-	unsigned int old_prio = mm->token_priority;
-	static unsigned int global_faults;
-	static unsigned int last_aging;
-
-	global_faults++;
-
-	current_interval = global_faults - mm->faultstamp;
-
-	if (!spin_trylock(&swap_token_lock))
-		return;
-
-	/* First come first served */
-	if (!swap_token_mm)
-		goto replace_token;
-
-	/*
-	 * Usually, we don't need priority aging because long interval faults
-	 * makes priority decrease quickly. But there is one exception. If the
-	 * token owner task is sleeping, it never make long interval faults.
-	 * Thus, we need a priority aging mechanism instead. The requirements
-	 * of priority aging are
-	 *  1) An aging interval is reasonable enough long. Too short aging
-	 *     interval makes quick swap token lost and decrease performance.
-	 *  2) The swap token owner task have to get priority aging even if
-	 *     it's under sleep.
-	 */
-	if ((global_faults - last_aging) > TOKEN_AGING_INTERVAL) {
-		swap_token_mm->token_priority /= 2;
-		last_aging = global_faults;
-	}
-
-	if (mm == swap_token_mm) {
-		mm->token_priority += 2;
-		goto update_priority;
-	}
-
-	if (current_interval < mm->last_interval)
-		mm->token_priority++;
-	else {
-		if (likely(mm->token_priority > 0))
-			mm->token_priority--;
-	}
-
-	/* Check if we deserve the token */
-	if (mm->token_priority > swap_token_mm->token_priority)
-		goto replace_token;
-
-update_priority:
-	trace_update_swap_token_priority(mm, old_prio, swap_token_mm);
-
-out:
-	mm->faultstamp = global_faults;
-	mm->last_interval = current_interval;
-	spin_unlock(&swap_token_lock);
-	return;
-
-replace_token:
-	mm->token_priority += 2;
-	trace_replace_swap_token(swap_token_mm, mm);
-	swap_token_mm = mm;
-	swap_token_memcg = swap_token_memcg_from_mm(mm);
-	last_aging = global_faults;
-	goto out;
-}
-
-/* Called on process exit. */
-void __put_swap_token(struct mm_struct *mm)
-{
-	spin_lock(&swap_token_lock);
-	if (likely(mm == swap_token_mm)) {
-		trace_put_swap_token(swap_token_mm);
-		swap_token_mm = NULL;
-		swap_token_memcg = NULL;
-	}
-	spin_unlock(&swap_token_lock);
-}
-
-static bool match_memcg(struct mem_cgroup *a, struct mem_cgroup *b)
-{
-	if (!a)
-		return true;
-	if (!b)
-		return true;
-	if (a == b)
-		return true;
-	return false;
-}
-
-void disable_swap_token(struct mem_cgroup *memcg)
-{
-	/* memcg reclaim don't disable unrelated mm token. */
-	if (match_memcg(memcg, swap_token_memcg)) {
-		spin_lock(&swap_token_lock);
-		if (match_memcg(memcg, swap_token_memcg)) {
-			trace_disable_swap_token(swap_token_mm);
-			swap_token_mm = NULL;
-			swap_token_memcg = NULL;
-		}
-		spin_unlock(&swap_token_lock);
-	}
-}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 33dc256033b5..ca46080bb074 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2352,8 +2352,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
 
 	for (priority = DEF_PRIORITY; priority >= 0; priority--) {
 		sc->nr_scanned = 0;
-		if (!priority)
-			disable_swap_token(sc->target_mem_cgroup);
 		aborted_reclaim = shrink_zones(priority, zonelist, sc);
 
 		/*
@@ -2704,10 +2702,6 @@ loop_again:
 		unsigned long lru_pages = 0;
 		int has_under_min_watermark_zone = 0;
 
-		/* The swap token gets in the way of swapout... */
-		if (!priority)
-			disable_swap_token(NULL);
-
 		all_zones_ok = 1;
 		balanced = 0;
 
-- 
cgit v1.3-7-g2ca7


From 0ce72d4f7333248efbef1f3309770c7edb1b2625 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Tue, 29 May 2012 15:06:24 -0700
Subject: mm: do_migrate_pages(): rename arguments

s/from_nodes/from and s/to_nodes/to/.  The "_nodes" is redundant - it
duplicates the argument's type.

Done in a fit of irritation over 80-col issues :(

Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <mkosaki@redhat.com>
Cc: Larry Woodman <lwoodman@redhat.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mempolicy.h |  9 ++++-----
 mm/mempolicy.c            | 18 +++++++++---------
 2 files changed, 13 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 7c727a90d70d..4aa42732e47f 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -225,8 +225,8 @@ static inline void check_highest_zone(enum zone_type k)
 		policy_zone = k;
 }
 
-int do_migrate_pages(struct mm_struct *mm,
-	const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags);
+int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
+		     const nodemask_t *to, int flags);
 
 
 #ifdef CONFIG_TMPFS
@@ -354,9 +354,8 @@ static inline bool mempolicy_nodemask_intersects(struct task_struct *tsk,
 	return false;
 }
 
-static inline int do_migrate_pages(struct mm_struct *mm,
-			const nodemask_t *from_nodes,
-			const nodemask_t *to_nodes, int flags)
+static inline int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
+				   const nodemask_t *to, int flags)
 {
 	return 0;
 }
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index d3c5de47ff6d..f15c1b24ca18 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -950,8 +950,8 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
  *
  * Returns the number of page that could not be moved.
  */
-int do_migrate_pages(struct mm_struct *mm,
-	const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags)
+int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
+		     const nodemask_t *to, int flags)
 {
 	int busy = 0;
 	int err;
@@ -963,7 +963,7 @@ int do_migrate_pages(struct mm_struct *mm,
 
 	down_read(&mm->mmap_sem);
 
-	err = migrate_vmas(mm, from_nodes, to_nodes, flags);
+	err = migrate_vmas(mm, from, to, flags);
 	if (err)
 		goto out;
 
@@ -998,7 +998,7 @@ int do_migrate_pages(struct mm_struct *mm,
 	 * moved to an empty node, then there is nothing left worth migrating.
 	 */
 
-	tmp = *from_nodes;
+	tmp = *from;
 	while (!nodes_empty(tmp)) {
 		int s,d;
 		int source = -1;
@@ -1021,11 +1021,11 @@ int do_migrate_pages(struct mm_struct *mm,
 			 *          [0-7] - > [3,4,5] moves only 0,1,2,6,7.
 			 */
 
-			if ((nodes_weight(*from_nodes) != nodes_weight(*to_nodes)) &&
-						(node_isset(s, *to_nodes)))
+			if ((nodes_weight(*from) != nodes_weight(*to)) &&
+						(node_isset(s, *to)))
 				continue;
 
-			d = node_remap(s, *from_nodes, *to_nodes);
+			d = node_remap(s, *from, *to);
 			if (s == d)
 				continue;
 
@@ -1085,8 +1085,8 @@ static void migrate_page_add(struct page *page, struct list_head *pagelist,
 {
 }
 
-int do_migrate_pages(struct mm_struct *mm,
-	const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags)
+int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
+		     const nodemask_t *to, int flags)
 {
 	return -ENOSYS;
 }
-- 
cgit v1.3-7-g2ca7


From c3ac9a8ade65ccbfd145fbff895ae8d8d62d09b0 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Tue, 29 May 2012 15:06:25 -0700
Subject: mm: memcg: count pte references from every member of the reclaimed
 hierarchy

The rmap walker checking page table references has historically ignored
references from VMAs that were not part of the memcg that was being
reclaimed during memcg hard limit reclaim.

When transitioning global reclaim to memcg hierarchy reclaim, I missed
that bit and now references from outside a memcg are ignored even during
global reclaim.

Reverting back to traditional behaviour - count all references during
global reclaim and only mind references of the memcg being reclaimed
during limit reclaim would be one option.

However, the more generic idea is to ignore references exactly then when
they are outside the hierarchy that is currently under reclaim; because
only then will their reclamation be of any use to help the pressure
situation.  It makes no sense to ignore references from a sibling memcg
and then evict a page that will be immediately refaulted by that sibling
which contributes to the same usage of the common ancestor under
reclaim.

The solution: make the rmap walker ignore references from VMAs that are
not part of the hierarchy that is being reclaimed.

Flat limit reclaim will stay the same, hierarchical limit reclaim will
mind the references only to pages that the hierarchy owns.  Global
reclaim, since it reclaims from all memcgs, will be fixed to regard all
references.

[akpm@linux-foundation.org: name the args in the declaration]
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reported-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Acked-by: Konstantin Khlebnikov<khlebnikov@openvz.org>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  7 ++++++-
 mm/memcontrol.c            | 16 +++++++++++-----
 mm/vmscan.c                |  6 ++++--
 3 files changed, 21 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index f94efd2f6c27..18ea0b7baf32 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -79,6 +79,8 @@ extern void mem_cgroup_uncharge_cache_page(struct page *page);
 
 extern void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
 				     int order);
+bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
+				  struct mem_cgroup *memcg);
 int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg);
 
 extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page);
@@ -92,10 +94,13 @@ static inline
 int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup)
 {
 	struct mem_cgroup *memcg;
+	int match;
+
 	rcu_read_lock();
 	memcg = mem_cgroup_from_task(rcu_dereference((mm)->owner));
+	match = __mem_cgroup_same_or_subtree(cgroup, memcg);
 	rcu_read_unlock();
-	return cgroup == memcg;
+	return match;
 }
 
 extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index faad98e6d17d..4f71219cc53e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1149,17 +1149,23 @@ struct lruvec *mem_cgroup_lru_move_lists(struct zone *zone,
  * Checks whether given mem is same or in the root_mem_cgroup's
  * hierarchy subtree
  */
-static bool mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
-		struct mem_cgroup *memcg)
+bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
+				  struct mem_cgroup *memcg)
 {
-	bool ret;
-
 	if (root_memcg == memcg)
 		return true;
 	if (!root_memcg->use_hierarchy)
 		return false;
+	return css_is_ancestor(&memcg->css, &root_memcg->css);
+}
+
+static bool mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
+				       struct mem_cgroup *memcg)
+{
+	bool ret;
+
 	rcu_read_lock();
-	ret = css_is_ancestor(&memcg->css, &root_memcg->css);
+	ret = __mem_cgroup_same_or_subtree(root_memcg, memcg);
 	rcu_read_unlock();
 	return ret;
 }
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 68e5819d0f1b..8fffc65a84de 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -645,7 +645,8 @@ static enum page_references page_check_references(struct page *page,
 	int referenced_ptes, referenced_page;
 	unsigned long vm_flags;
 
-	referenced_ptes = page_referenced(page, 1, mz->mem_cgroup, &vm_flags);
+	referenced_ptes = page_referenced(page, 1, sc->target_mem_cgroup,
+					  &vm_flags);
 	referenced_page = TestClearPageReferenced(page);
 
 	/*
@@ -1513,7 +1514,8 @@ static void shrink_active_list(unsigned long nr_to_scan,
 			}
 		}
 
-		if (page_referenced(page, 0, mz->mem_cgroup, &vm_flags)) {
+		if (page_referenced(page, 0, sc->target_mem_cgroup,
+				    &vm_flags)) {
 			nr_rotated += hpage_nr_pages(page);
 			/*
 			 * Identify referenced, file-backed active pages and
-- 
cgit v1.3-7-g2ca7


From baf05aa9271bdbc07d3160035a231abc5fbd429a Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@openvz.org>
Date: Tue, 29 May 2012 15:06:27 -0700
Subject: bug: introduce BUILD_BUG_ON_INVALID() macro

Sometimes we want to check some expressions correctness at compile time.
"(void)(e);" or "if (e);" can be dangerous if the expression has
side-effects, and gcc sometimes generates a lot of code, even if the
expression has no effect.

This patch introduces macro BUILD_BUG_ON_INVALID() for such checks, it
forces a compilation error if expression is invalid without any extra
code.

[Cast to "long" required because sizeof does not work for bit-fields.]

Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bug.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bug.h b/include/linux/bug.h
index 72961c39576a..aaac4bba6f5c 100644
--- a/include/linux/bug.h
+++ b/include/linux/bug.h
@@ -30,6 +30,13 @@ struct pt_regs;
 #define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))
 #define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); }))
 
+/*
+ * BUILD_BUG_ON_INVALID() permits the compiler to check the validity of the
+ * expression but avoids the generation of any code, even if that expression
+ * has side-effects.
+ */
+#define BUILD_BUG_ON_INVALID(e) ((void)(sizeof((__force long)(e))))
+
 /**
  * BUILD_BUG_ON - break compile if a condition is true.
  * @condition: the condition which the compiler should know is false.
-- 
cgit v1.3-7-g2ca7


From 02602a18c32d76f0e0f50eefa91b2d53c8a3a751 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@openvz.org>
Date: Tue, 29 May 2012 15:06:28 -0700
Subject: bug: completely remove code generated by disabled VM_BUG_ON()

Even if CONFIG_DEBUG_VM=n gcc genereates code for some VM_BUG_ON()

for example VM_BUG_ON(!PageCompound(page) || !PageHead(page)); in
do_huge_pmd_wp_page() generates 114 bytes of code.

But they mostly disappears when I split this VM_BUG_ON into two:

  -VM_BUG_ON(!PageCompound(page) || !PageHead(page));
  +VM_BUG_ON(!PageCompound(page));
  +VM_BUG_ON(!PageHead(page));

weird... but anyway after this patch code disappears completely.

  add/remove: 0/0 grow/shrink: 7/97 up/down: 135/-1784 (-1649)

Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmdebug.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index c04ecfe03f7f..580bd587d916 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -4,7 +4,7 @@
 #ifdef CONFIG_DEBUG_VM
 #define VM_BUG_ON(cond) BUG_ON(cond)
 #else
-#define VM_BUG_ON(cond) do { (void)(cond); } while (0)
+#define VM_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond)
 #endif
 
 #ifdef CONFIG_DEBUG_VIRTUAL
-- 
cgit v1.3-7-g2ca7


From 9295b7a07c859a42346221b5839be0ae612333b0 Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@akkadia.org>
Date: Tue, 29 May 2012 15:06:30 -0700
Subject: kbuild: install kernel-page-flags.h

Programs using /proc/kpageflags need to know about the various flags.  The
<linux/kernel-page-flags.h> provides them and the comments in the file
indicate that it is supposed to be used by user-level code.  But the file
is not installed.

Install the headers and mark the unstable flags as out-of-bounds.  The
page-type tool is also adjusted to not duplicate the definitions

Signed-off-by: Ulrich Drepper <drepper@gmail.com>
Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/Kbuild              |  1 +
 include/linux/kernel-page-flags.h |  4 ++++
 tools/vm/page-types.c             | 28 +---------------------------
 3 files changed, 6 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 4cd59b95858f..7185b8f15ced 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -225,6 +225,7 @@ header-y += kd.h
 header-y += kdev_t.h
 header-y += kernel.h
 header-y += kernelcapi.h
+header-y += kernel-page-flags.h
 header-y += keyboard.h
 header-y += keyctl.h
 header-y += l2tp.h
diff --git a/include/linux/kernel-page-flags.h b/include/linux/kernel-page-flags.h
index 26a65711676f..a1bdf6966357 100644
--- a/include/linux/kernel-page-flags.h
+++ b/include/linux/kernel-page-flags.h
@@ -32,6 +32,8 @@
 #define KPF_KSM			21
 #define KPF_THP			22
 
+#ifdef __KERNEL__
+
 /* kernel hacking assistances
  * WARNING: subject to change, never rely on them!
  */
@@ -44,4 +46,6 @@
 #define KPF_ARCH		38
 #define KPF_UNCACHED		39
 
+#endif /* __KERNEL__ */
+
 #endif /* LINUX_KERNEL_PAGE_FLAGS_H */
diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c
index 7dab7b25b5c6..f77c96bec7eb 100644
--- a/tools/vm/page-types.c
+++ b/tools/vm/page-types.c
@@ -35,6 +35,7 @@
 #include <sys/mount.h>
 #include <sys/statfs.h>
 #include "../../include/linux/magic.h"
+#include "../../include/linux/kernel-page-flags.h"
 
 
 #ifndef MAX_PATH
@@ -73,33 +74,6 @@
 #define KPF_BYTES		8
 #define PROC_KPAGEFLAGS		"/proc/kpageflags"
 
-/* copied from kpageflags_read() */
-#define KPF_LOCKED		0
-#define KPF_ERROR		1
-#define KPF_REFERENCED		2
-#define KPF_UPTODATE		3
-#define KPF_DIRTY		4
-#define KPF_LRU			5
-#define KPF_ACTIVE		6
-#define KPF_SLAB		7
-#define KPF_WRITEBACK		8
-#define KPF_RECLAIM		9
-#define KPF_BUDDY		10
-
-/* [11-20] new additions in 2.6.31 */
-#define KPF_MMAP		11
-#define KPF_ANON		12
-#define KPF_SWAPCACHE		13
-#define KPF_SWAPBACKED		14
-#define KPF_COMPOUND_HEAD	15
-#define KPF_COMPOUND_TAIL	16
-#define KPF_HUGE		17
-#define KPF_UNEVICTABLE		18
-#define KPF_HWPOISON		19
-#define KPF_NOPAGE		20
-#define KPF_KSM			21
-#define KPF_THP			22
-
 /* [32-] kernel hacking assistances */
 #define KPF_RESERVED		32
 #define KPF_MLOCKED		33
-- 
cgit v1.3-7-g2ca7


From 2099597401c7710c00b0d7c32b24a44a193836e1 Mon Sep 17 00:00:00 2001
From: Alex Shi <alex.shi@intel.com>
Date: Tue, 29 May 2012 15:06:31 -0700
Subject: mm: move is_vma_temporary_stack() declaration to huge_mm.h

When transparent_hugepage_enabled() is used outside mm/, such as in
arch/x86/xx/tlb.c:

+       if (!cpu_has_invlpg || vma->vm_flags & VM_HUGETLB
+                       || transparent_hugepage_enabled(vma)) {
+               flush_tlb_mm(vma->vm_mm);

is_vma_temporary_stack() isn't referenced in huge_mm.h, so it has compile
errors:

  arch/x86/mm/tlb.c: In function `flush_tlb_range':
  arch/x86/mm/tlb.c:324:4: error: implicit declaration of function `is_vma_temporary_stack' [-Werror=implicit-function-declaration]

Since is_vma_temporay_stack() is just used in rmap.c and huge_memory.c, it
is better to move it to huge_mm.h from rmap.h to avoid such errors.

Signed-off-by: Alex Shi <alex.shi@intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/huge_mm.h | 2 ++
 include/linux/rmap.h    | 2 --
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index c8af7a2efb52..4c59b1131187 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -59,6 +59,8 @@ extern pmd_t *page_check_address_pmd(struct page *page,
 #define HPAGE_PMD_MASK HPAGE_MASK
 #define HPAGE_PMD_SIZE HPAGE_SIZE
 
+extern bool is_vma_temporary_stack(struct vm_area_struct *vma);
+
 #define transparent_hugepage_enabled(__vma)				\
 	((transparent_hugepage_flags &					\
 	  (1<<TRANSPARENT_HUGEPAGE_FLAG) ||				\
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index fd07c4542cee..3fce545df394 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -173,8 +173,6 @@ enum ttu_flags {
 };
 #define TTU_ACTION(x) ((x) & TTU_ACTION_MASK)
 
-bool is_vma_temporary_stack(struct vm_area_struct *vma);
-
 int try_to_unmap(struct page *, enum ttu_flags flags);
 int try_to_unmap_one(struct page *, struct vm_area_struct *,
 			unsigned long address, enum ttu_flags flags);
-- 
cgit v1.3-7-g2ca7


From 238305bb4d418c95977162ba13c11880685fc731 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Tue, 29 May 2012 15:06:36 -0700
Subject: mm: remove sparsemem allocation details from the bootmem allocator

alloc_bootmem_section() derives allocation area constraints from the
specified sparsemem section.  This is a bit specific for a generic memory
allocator like bootmem, though, so move it over to sparsemem.

As __alloc_bootmem_node_nopanic() already retries failed allocations with
relaxed area constraints, the fallback code in sparsemem.c can be removed
and the code becomes a bit more compact overall.

[akpm@linux-foundation.org: fix build]
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Tejun Heo <tj@kernel.org>
Acked-by: David S. Miller <davem@davemloft.net>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Gavin Shan <shangw@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bootmem.h |  3 ---
 mm/bootmem.c            | 22 ----------------------
 mm/nobootmem.c          | 22 ----------------------
 mm/sparse.c             | 25 ++++++++++++-------------
 4 files changed, 12 insertions(+), 60 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 1a0cd270bb7a..324fe08ea3b1 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -135,9 +135,6 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
 extern int reserve_bootmem_generic(unsigned long addr, unsigned long size,
 				   int flags);
 
-extern void *alloc_bootmem_section(unsigned long size,
-				   unsigned long section_nr);
-
 #ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP
 extern void *alloc_remap(int nid, unsigned long size);
 #else
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 9d0f26664b3b..d1c7a79d6f3a 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -803,28 +803,6 @@ void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
 
 }
 
-#ifdef CONFIG_SPARSEMEM
-/**
- * alloc_bootmem_section - allocate boot memory from a specific section
- * @size: size of the request in bytes
- * @section_nr: sparse map section to allocate from
- *
- * Return NULL on failure.
- */
-void * __init alloc_bootmem_section(unsigned long size,
-				    unsigned long section_nr)
-{
-	bootmem_data_t *bdata;
-	unsigned long pfn, goal;
-
-	pfn = section_nr_to_pfn(section_nr);
-	goal = pfn << PAGE_SHIFT;
-	bdata = &bootmem_node_data[early_pfn_to_nid(pfn)];
-
-	return alloc_bootmem_bdata(bdata, size, SMP_CACHE_BYTES, goal, 0);
-}
-#endif
-
 #ifndef ARCH_LOW_ADDRESS_LIMIT
 #define ARCH_LOW_ADDRESS_LIMIT	0xffffffffUL
 #endif
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index 9f4048149f64..d23415c001bc 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -355,28 +355,6 @@ void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
 	return __alloc_bootmem_node(pgdat, size, align, goal);
 }
 
-#ifdef CONFIG_SPARSEMEM
-/**
- * alloc_bootmem_section - allocate boot memory from a specific section
- * @size: size of the request in bytes
- * @section_nr: sparse map section to allocate from
- *
- * Return NULL on failure.
- */
-void * __init alloc_bootmem_section(unsigned long size,
-				    unsigned long section_nr)
-{
-	unsigned long pfn, goal, limit;
-
-	pfn = section_nr_to_pfn(section_nr);
-	goal = pfn << PAGE_SHIFT;
-	limit = section_nr_to_pfn(section_nr + 1) << PAGE_SHIFT;
-
-	return __alloc_memory_core_early(early_pfn_to_nid(pfn), size,
-					 SMP_CACHE_BYTES, goal, limit);
-}
-#endif
-
 #ifndef ARCH_LOW_ADDRESS_LIMIT
 #define ARCH_LOW_ADDRESS_LIMIT	0xffffffffUL
 #endif
diff --git a/mm/sparse.c b/mm/sparse.c
index a8bc7d364deb..6a4bf9160e85 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -273,10 +273,10 @@ static unsigned long *__kmalloc_section_usemap(void)
 #ifdef CONFIG_MEMORY_HOTREMOVE
 static unsigned long * __init
 sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
-					 unsigned long count)
+					 unsigned long size)
 {
-	unsigned long section_nr;
-
+	pg_data_t *host_pgdat;
+	unsigned long goal;
 	/*
 	 * A page may contain usemaps for other sections preventing the
 	 * page being freed and making a section unremovable while
@@ -287,8 +287,10 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
 	 * from the same section as the pgdat where possible to avoid
 	 * this problem.
 	 */
-	section_nr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
-	return alloc_bootmem_section(usemap_size() * count, section_nr);
+	goal = __pa(pgdat) & PAGE_SECTION_MASK;
+	host_pgdat = NODE_DATA(early_pfn_to_nid(goal >> PAGE_SHIFT));
+	return __alloc_bootmem_node_nopanic(host_pgdat, size,
+					    SMP_CACHE_BYTES, goal);
 }
 
 static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
@@ -332,9 +334,9 @@ static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
 #else
 static unsigned long * __init
 sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
-					 unsigned long count)
+					 unsigned long size)
 {
-	return NULL;
+	return alloc_bootmem_node_nopanic(pgdat, size);
 }
 
 static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
@@ -352,13 +354,10 @@ static void __init sparse_early_usemaps_alloc_node(unsigned long**usemap_map,
 	int size = usemap_size();
 
 	usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nodeid),
-								 usemap_count);
+							  size * usemap_count);
 	if (!usemap) {
-		usemap = alloc_bootmem_node(NODE_DATA(nodeid), size * usemap_count);
-		if (!usemap) {
-			printk(KERN_WARNING "%s: allocation failed\n", __func__);
-			return;
-		}
+		printk(KERN_WARNING "%s: allocation failed\n", __func__);
+		return;
 	}
 
 	for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
-- 
cgit v1.3-7-g2ca7


From 5ceb9ce6fe9462a298bb2cd5c9f1ca6cb80a0199 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Date: Tue, 29 May 2012 15:06:37 -0700
Subject: mm: compaction: handle incorrect MIGRATE_UNMOVABLE type pageblocks

When MIGRATE_UNMOVABLE pages are freed from MIGRATE_UNMOVABLE type
pageblock (and some MIGRATE_MOVABLE pages are left in it) waiting until an
allocation takes ownership of the block may take too long.  The type of
the pageblock remains unchanged so the pageblock cannot be used as a
migration target during compaction.

Fix it by:

* Adding enum compact_mode (COMPACT_ASYNC_[MOVABLE,UNMOVABLE], and
  COMPACT_SYNC) and then converting sync field in struct compact_control
  to use it.

* Adding nr_pageblocks_skipped field to struct compact_control and
  tracking how many destination pageblocks were of MIGRATE_UNMOVABLE type.
   If COMPACT_ASYNC_MOVABLE mode compaction ran fully in
  try_to_compact_pages() (COMPACT_COMPLETE) it implies that there is not a
  suitable page for allocation.  In this case then check how if there were
  enough MIGRATE_UNMOVABLE pageblocks to try a second pass in
  COMPACT_ASYNC_UNMOVABLE mode.

* Scanning the MIGRATE_UNMOVABLE pageblocks (during COMPACT_SYNC and
  COMPACT_ASYNC_UNMOVABLE compaction modes) and building a count based on
  finding PageBuddy pages, page_count(page) == 0 or PageLRU pages.  If all
  pages within the MIGRATE_UNMOVABLE pageblock are in one of those three
  sets change the whole pageblock type to MIGRATE_MOVABLE.

My particular test case (on a ARM EXYNOS4 device with 512 MiB, which means
131072 standard 4KiB pages in 'Normal' zone) is to:

- allocate 120000 pages for kernel's usage
- free every second page (60000 pages) of memory just allocated
- allocate and use 60000 pages from user space
- free remaining 60000 pages of kernel memory
  (now we have fragmented memory occupied mostly by user space pages)
- try to allocate 100 order-9 (2048 KiB) pages for kernel's usage

The results:
- with compaction disabled I get 11 successful allocations
- with compaction enabled - 14 successful allocations
- with this patch I'm able to get all 100 successful allocations

NOTE: If we can make kswapd aware of order-0 request during compaction, we
can enhance kswapd with changing mode to COMPACT_ASYNC_FULL
(COMPACT_ASYNC_MOVABLE + COMPACT_ASYNC_UNMOVABLE).  Please see the
following thread:

	http://marc.info/?l=linux-mm&m=133552069417068&w=2

[minchan@kernel.org: minor cleanups]
Cc: Mel Gorman <mgorman@suse.de>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compaction.h |  19 ++++++
 mm/compaction.c            | 142 +++++++++++++++++++++++++++++++++++++--------
 mm/internal.h              |   9 ++-
 mm/page_alloc.c            |   8 +--
 4 files changed, 150 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 51a90b7f2d60..e988037abd2a 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -1,6 +1,8 @@
 #ifndef _LINUX_COMPACTION_H
 #define _LINUX_COMPACTION_H
 
+#include <linux/node.h>
+
 /* Return values for compact_zone() and try_to_compact_pages() */
 /* compaction didn't start as it was not possible or direct reclaim was more suitable */
 #define COMPACT_SKIPPED		0
@@ -11,6 +13,23 @@
 /* The full zone was compacted */
 #define COMPACT_COMPLETE	3
 
+/*
+ * compaction supports three modes
+ *
+ * COMPACT_ASYNC_MOVABLE uses asynchronous migration and only scans
+ *    MIGRATE_MOVABLE pageblocks as migration sources and targets.
+ * COMPACT_ASYNC_UNMOVABLE uses asynchronous migration and only scans
+ *    MIGRATE_MOVABLE pageblocks as migration sources.
+ *    MIGRATE_UNMOVABLE pageblocks are scanned as potential migration
+ *    targets and convers them to MIGRATE_MOVABLE if possible
+ * COMPACT_SYNC uses synchronous migration and scans all pageblocks
+ */
+enum compact_mode {
+	COMPACT_ASYNC_MOVABLE,
+	COMPACT_ASYNC_UNMOVABLE,
+	COMPACT_SYNC,
+};
+
 #ifdef CONFIG_COMPACTION
 extern int sysctl_compact_memory;
 extern int sysctl_compaction_handler(struct ctl_table *table, int write,
diff --git a/mm/compaction.c b/mm/compaction.c
index da7d35ea5103..840ee288e296 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -235,7 +235,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
 	 */
 	while (unlikely(too_many_isolated(zone))) {
 		/* async migration should just abort */
-		if (!cc->sync)
+		if (cc->mode != COMPACT_SYNC)
 			return 0;
 
 		congestion_wait(BLK_RW_ASYNC, HZ/10);
@@ -303,7 +303,8 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
 		 * satisfies the allocation
 		 */
 		pageblock_nr = low_pfn >> pageblock_order;
-		if (!cc->sync && last_pageblock_nr != pageblock_nr &&
+		if (cc->mode != COMPACT_SYNC &&
+		    last_pageblock_nr != pageblock_nr &&
 		    !migrate_async_suitable(get_pageblock_migratetype(page))) {
 			low_pfn += pageblock_nr_pages;
 			low_pfn = ALIGN(low_pfn, pageblock_nr_pages) - 1;
@@ -324,7 +325,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
 			continue;
 		}
 
-		if (!cc->sync)
+		if (cc->mode != COMPACT_SYNC)
 			mode |= ISOLATE_ASYNC_MIGRATE;
 
 		/* Try isolate the page */
@@ -357,27 +358,90 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
 
 #endif /* CONFIG_COMPACTION || CONFIG_CMA */
 #ifdef CONFIG_COMPACTION
+/*
+ * Returns true if MIGRATE_UNMOVABLE pageblock was successfully
+ * converted to MIGRATE_MOVABLE type, false otherwise.
+ */
+static bool rescue_unmovable_pageblock(struct page *page)
+{
+	unsigned long pfn, start_pfn, end_pfn;
+	struct page *start_page, *end_page;
+
+	pfn = page_to_pfn(page);
+	start_pfn = pfn & ~(pageblock_nr_pages - 1);
+	end_pfn = start_pfn + pageblock_nr_pages;
+
+	start_page = pfn_to_page(start_pfn);
+	end_page = pfn_to_page(end_pfn);
+
+	/* Do not deal with pageblocks that overlap zones */
+	if (page_zone(start_page) != page_zone(end_page))
+		return false;
+
+	for (page = start_page, pfn = start_pfn; page < end_page; pfn++,
+								  page++) {
+		if (!pfn_valid_within(pfn))
+			continue;
+
+		if (PageBuddy(page)) {
+			int order = page_order(page);
+
+			pfn += (1 << order) - 1;
+			page += (1 << order) - 1;
+
+			continue;
+		} else if (page_count(page) == 0 || PageLRU(page))
+			continue;
+
+		return false;
+	}
+
+	set_pageblock_migratetype(page, MIGRATE_MOVABLE);
+	move_freepages_block(page_zone(page), page, MIGRATE_MOVABLE);
+	return true;
+}
 
-/* Returns true if the page is within a block suitable for migration to */
-static bool suitable_migration_target(struct page *page)
+enum smt_result {
+	GOOD_AS_MIGRATION_TARGET,
+	FAIL_UNMOVABLE_TARGET,
+	FAIL_BAD_TARGET,
+};
+
+/*
+ * Returns GOOD_AS_MIGRATION_TARGET if the page is within a block
+ * suitable for migration to, FAIL_UNMOVABLE_TARGET if the page
+ * is within a MIGRATE_UNMOVABLE block, FAIL_BAD_TARGET otherwise.
+ */
+static enum smt_result suitable_migration_target(struct page *page,
+				      struct compact_control *cc)
 {
 
 	int migratetype = get_pageblock_migratetype(page);
 
 	/* Don't interfere with memory hot-remove or the min_free_kbytes blocks */
 	if (migratetype == MIGRATE_ISOLATE || migratetype == MIGRATE_RESERVE)
-		return false;
+		return FAIL_BAD_TARGET;
 
 	/* If the page is a large free page, then allow migration */
 	if (PageBuddy(page) && page_order(page) >= pageblock_order)
-		return true;
+		return GOOD_AS_MIGRATION_TARGET;
 
 	/* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */
-	if (migrate_async_suitable(migratetype))
-		return true;
+	if (cc->mode != COMPACT_ASYNC_UNMOVABLE &&
+	    migrate_async_suitable(migratetype))
+		return GOOD_AS_MIGRATION_TARGET;
+
+	if (cc->mode == COMPACT_ASYNC_MOVABLE &&
+	    migratetype == MIGRATE_UNMOVABLE)
+		return FAIL_UNMOVABLE_TARGET;
+
+	if (cc->mode != COMPACT_ASYNC_MOVABLE &&
+	    migratetype == MIGRATE_UNMOVABLE &&
+	    rescue_unmovable_pageblock(page))
+		return GOOD_AS_MIGRATION_TARGET;
 
 	/* Otherwise skip the block */
-	return false;
+	return FAIL_BAD_TARGET;
 }
 
 /*
@@ -410,6 +474,13 @@ static void isolate_freepages(struct zone *zone,
 
 	zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages;
 
+	/*
+	 * isolate_freepages() may be called more than once during
+	 * compact_zone_order() run and we want only the most recent
+	 * count.
+	 */
+	cc->nr_pageblocks_skipped = 0;
+
 	/*
 	 * Isolate free pages until enough are available to migrate the
 	 * pages on cc->migratepages. We stop searching if the migrate
@@ -418,6 +489,7 @@ static void isolate_freepages(struct zone *zone,
 	for (; pfn > low_pfn && cc->nr_migratepages > nr_freepages;
 					pfn -= pageblock_nr_pages) {
 		unsigned long isolated;
+		enum smt_result ret;
 
 		if (!pfn_valid(pfn))
 			continue;
@@ -434,9 +506,12 @@ static void isolate_freepages(struct zone *zone,
 			continue;
 
 		/* Check the block is suitable for migration */
-		if (!suitable_migration_target(page))
+		ret = suitable_migration_target(page, cc);
+		if (ret != GOOD_AS_MIGRATION_TARGET) {
+			if (ret == FAIL_UNMOVABLE_TARGET)
+				cc->nr_pageblocks_skipped++;
 			continue;
-
+		}
 		/*
 		 * Found a block suitable for isolating free pages from. Now
 		 * we disabled interrupts, double check things are ok and
@@ -445,12 +520,14 @@ static void isolate_freepages(struct zone *zone,
 		 */
 		isolated = 0;
 		spin_lock_irqsave(&zone->lock, flags);
-		if (suitable_migration_target(page)) {
+		ret = suitable_migration_target(page, cc);
+		if (ret == GOOD_AS_MIGRATION_TARGET) {
 			end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn);
 			isolated = isolate_freepages_block(pfn, end_pfn,
 							   freelist, false);
 			nr_freepages += isolated;
-		}
+		} else if (ret == FAIL_UNMOVABLE_TARGET)
+			cc->nr_pageblocks_skipped++;
 		spin_unlock_irqrestore(&zone->lock, flags);
 
 		/*
@@ -682,8 +759,9 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
 
 		nr_migrate = cc->nr_migratepages;
 		err = migrate_pages(&cc->migratepages, compaction_alloc,
-				(unsigned long)cc, false,
-				cc->sync ? MIGRATE_SYNC_LIGHT : MIGRATE_ASYNC);
+			(unsigned long)&cc->freepages, false,
+			(cc->mode == COMPACT_SYNC) ? MIGRATE_SYNC_LIGHT
+						      : MIGRATE_ASYNC);
 		update_nr_listpages(cc);
 		nr_remaining = cc->nr_migratepages;
 
@@ -712,7 +790,8 @@ out:
 
 static unsigned long compact_zone_order(struct zone *zone,
 				 int order, gfp_t gfp_mask,
-				 bool sync)
+				 enum compact_mode mode,
+				 unsigned long *nr_pageblocks_skipped)
 {
 	struct compact_control cc = {
 		.nr_freepages = 0,
@@ -720,12 +799,17 @@ static unsigned long compact_zone_order(struct zone *zone,
 		.order = order,
 		.migratetype = allocflags_to_migratetype(gfp_mask),
 		.zone = zone,
-		.sync = sync,
+		.mode = mode,
 	};
+	unsigned long rc;
+
 	INIT_LIST_HEAD(&cc.freepages);
 	INIT_LIST_HEAD(&cc.migratepages);
 
-	return compact_zone(zone, &cc);
+	rc = compact_zone(zone, &cc);
+	*nr_pageblocks_skipped = cc.nr_pageblocks_skipped;
+
+	return rc;
 }
 
 int sysctl_extfrag_threshold = 500;
@@ -750,6 +834,8 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
 	struct zoneref *z;
 	struct zone *zone;
 	int rc = COMPACT_SKIPPED;
+	unsigned long nr_pageblocks_skipped;
+	enum compact_mode mode;
 
 	/*
 	 * Check whether it is worth even starting compaction. The order check is
@@ -766,12 +852,22 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
 								nodemask) {
 		int status;
 
-		status = compact_zone_order(zone, order, gfp_mask, sync);
+		mode = sync ? COMPACT_SYNC : COMPACT_ASYNC_MOVABLE;
+retry:
+		status = compact_zone_order(zone, order, gfp_mask, mode,
+						&nr_pageblocks_skipped);
 		rc = max(status, rc);
 
 		/* If a normal allocation would succeed, stop compacting */
 		if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0))
 			break;
+
+		if (rc == COMPACT_COMPLETE && mode == COMPACT_ASYNC_MOVABLE) {
+			if (nr_pageblocks_skipped) {
+				mode = COMPACT_ASYNC_UNMOVABLE;
+				goto retry;
+			}
+		}
 	}
 
 	return rc;
@@ -805,7 +901,7 @@ static int __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc)
 			if (ok && cc->order > zone->compact_order_failed)
 				zone->compact_order_failed = cc->order + 1;
 			/* Currently async compaction is never deferred. */
-			else if (!ok && cc->sync)
+			else if (!ok && cc->mode == COMPACT_SYNC)
 				defer_compaction(zone, cc->order);
 		}
 
@@ -820,7 +916,7 @@ int compact_pgdat(pg_data_t *pgdat, int order)
 {
 	struct compact_control cc = {
 		.order = order,
-		.sync = false,
+		.mode = COMPACT_ASYNC_MOVABLE,
 	};
 
 	return __compact_pgdat(pgdat, &cc);
@@ -830,7 +926,7 @@ static int compact_node(int nid)
 {
 	struct compact_control cc = {
 		.order = -1,
-		.sync = true,
+		.mode = COMPACT_SYNC,
 	};
 
 	return __compact_pgdat(NODE_DATA(nid), &cc);
diff --git a/mm/internal.h b/mm/internal.h
index 8b0fc8da8028..4194ab9dc19b 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -94,6 +94,9 @@ extern void putback_lru_page(struct page *page);
 /*
  * in mm/page_alloc.c
  */
+extern void set_pageblock_migratetype(struct page *page, int migratetype);
+extern int move_freepages_block(struct zone *zone, struct page *page,
+				int migratetype);
 extern void __free_pages_bootmem(struct page *page, unsigned int order);
 extern void prep_compound_page(struct page *page, unsigned long order);
 #ifdef CONFIG_MEMORY_FAILURE
@@ -101,6 +104,7 @@ extern bool is_free_buddy_page(struct page *page);
 #endif
 
 #if defined CONFIG_COMPACTION || defined CONFIG_CMA
+#include <linux/compaction.h>
 
 /*
  * in mm/compaction.c
@@ -119,11 +123,14 @@ struct compact_control {
 	unsigned long nr_migratepages;	/* Number of pages to migrate */
 	unsigned long free_pfn;		/* isolate_freepages search base */
 	unsigned long migrate_pfn;	/* isolate_migratepages search base */
-	bool sync;			/* Synchronous migration */
+	enum compact_mode mode;		/* Compaction mode */
 
 	int order;			/* order a direct compactor needs */
 	int migratetype;		/* MOVABLE, RECLAIMABLE etc */
 	struct zone *zone;
+
+	/* Number of UNMOVABLE destination pageblocks skipped during scan */
+	unsigned long nr_pageblocks_skipped;
 };
 
 unsigned long
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 84f2c599d5d4..457b4de122f4 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -219,7 +219,7 @@ EXPORT_SYMBOL(nr_online_nodes);
 
 int page_group_by_mobility_disabled __read_mostly;
 
-static void set_pageblock_migratetype(struct page *page, int migratetype)
+void set_pageblock_migratetype(struct page *page, int migratetype)
 {
 
 	if (unlikely(page_group_by_mobility_disabled))
@@ -954,8 +954,8 @@ static int move_freepages(struct zone *zone,
 	return pages_moved;
 }
 
-static int move_freepages_block(struct zone *zone, struct page *page,
-				int migratetype)
+int move_freepages_block(struct zone *zone, struct page *page,
+			 int migratetype)
 {
 	unsigned long start_pfn, end_pfn;
 	struct page *start_page, *end_page;
@@ -5657,7 +5657,7 @@ static int __alloc_contig_migrate_range(unsigned long start, unsigned long end)
 		.nr_migratepages = 0,
 		.order = -1,
 		.zone = page_zone(pfn_to_page(start)),
-		.sync = true,
+		.mode = COMPACT_SYNC,
 	};
 	INIT_LIST_HEAD(&cc.migratepages);
 
-- 
cgit v1.3-7-g2ca7


From bde05d1ccd512696b09db9dd2e5f33ad19152605 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Tue, 29 May 2012 15:06:38 -0700
Subject: shmem: replace page if mapping excludes its zone

The GMA500 GPU driver uses GEM shmem objects, but with a new twist: the
backing RAM has to be below 4GB.  Not a problem while the boards
supported only 4GB: but now Intel's D2700MUD boards support 8GB, and
their GMA3600 is managed by the GMA500 driver.

shmem/tmpfs has never pretended to support hardware restrictions on the
backing memory, but it might have appeared to do so before v3.1, and
even now it works fine until a page is swapped out then back in.  When
read_cache_page_gfp() supplied a freshly allocated page for copy, that
compensated for whatever choice might have been made by earlier swapin
readahead; but swapoff was likely to destroy the illusion.

We'd like to continue to support GMA500, so now add a new
shmem_should_replace_page() check on the zone when about to move a page
from swapcache to filecache (in swapin and swapoff cases), with
shmem_replace_page() to allocate and substitute a suitable page (given
gma500/gem.c's mapping_set_gfp_mask GFP_KERNEL | __GFP_DMA32).

This does involve a minor extension to mem_cgroup_replace_page_cache()
(the page may or may not have already been charged); and I've removed a
comment and call to mem_cgroup_uncharge_cache_page(), which in fact is
always a no-op while PageSwapCache.

Also removed optimization of an unlikely path in shmem_getpage_gfp(),
now that we need to check PageSwapCache more carefully (a racing caller
might already have made the copy).  And at one point shmem_unuse_inode()
needs to use the hitherto private page_swapcount(), to guard against
racing with inode eviction.

It would make sense to extend shmem_should_replace_page(), to cover
cpuset and NUMA mempolicy restrictions too, but set that aside for now:
needs a cleanup of shmem mempolicy handling, and more testing, and ought
to handle swap faults in do_swap_page() as well as shmem.

Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Christoph Hellwig <hch@infradead.org>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Stephane Marchesin <marcheu@chromium.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Dave Airlie <airlied@gmail.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Rob Clark <rob.clark@linaro.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h |   6 +++
 mm/memcontrol.c      |  17 +++++--
 mm/shmem.c           | 141 ++++++++++++++++++++++++++++++++++++++++++++-------
 mm/swapfile.c        |   2 +-
 4 files changed, 142 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index bc3073ce95cc..d965c4bfab3a 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -351,6 +351,7 @@ extern int swap_type_of(dev_t, sector_t, struct block_device **);
 extern unsigned int count_swap_pages(int, int);
 extern sector_t map_swap_page(struct page *, struct block_device **);
 extern sector_t swapdev_block(int, pgoff_t);
+extern int page_swapcount(struct page *);
 extern int reuse_swap_page(struct page *);
 extern int try_to_free_swap(struct page *);
 struct backing_dev_info;
@@ -445,6 +446,11 @@ static inline void delete_from_swap_cache(struct page *page)
 {
 }
 
+static inline int page_swapcount(struct page *page)
+{
+	return 0;
+}
+
 #define reuse_swap_page(page)	(page_mapcount(page) == 1)
 
 static inline int try_to_free_swap(struct page *page)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 4f71219cc53e..d7ce417cae7c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3373,7 +3373,7 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg,
 void mem_cgroup_replace_page_cache(struct page *oldpage,
 				  struct page *newpage)
 {
-	struct mem_cgroup *memcg;
+	struct mem_cgroup *memcg = NULL;
 	struct page_cgroup *pc;
 	enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
 
@@ -3383,11 +3383,20 @@ void mem_cgroup_replace_page_cache(struct page *oldpage,
 	pc = lookup_page_cgroup(oldpage);
 	/* fix accounting on old pages */
 	lock_page_cgroup(pc);
-	memcg = pc->mem_cgroup;
-	mem_cgroup_charge_statistics(memcg, false, -1);
-	ClearPageCgroupUsed(pc);
+	if (PageCgroupUsed(pc)) {
+		memcg = pc->mem_cgroup;
+		mem_cgroup_charge_statistics(memcg, false, -1);
+		ClearPageCgroupUsed(pc);
+	}
 	unlock_page_cgroup(pc);
 
+	/*
+	 * When called from shmem_replace_page(), in some cases the
+	 * oldpage has already been charged, and in some cases not.
+	 */
+	if (!memcg)
+		return;
+
 	if (PageSwapBacked(oldpage))
 		type = MEM_CGROUP_CHARGE_TYPE_SHMEM;
 
diff --git a/mm/shmem.c b/mm/shmem.c
index be5af34a070d..db72d8e44ec6 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -103,6 +103,9 @@ static unsigned long shmem_default_max_inodes(void)
 }
 #endif
 
+static bool shmem_should_replace_page(struct page *page, gfp_t gfp);
+static int shmem_replace_page(struct page **pagep, gfp_t gfp,
+				struct shmem_inode_info *info, pgoff_t index);
 static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
 	struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type);
 
@@ -604,12 +607,13 @@ static void shmem_evict_inode(struct inode *inode)
  * If swap found in inode, free it and move page from swapcache to filecache.
  */
 static int shmem_unuse_inode(struct shmem_inode_info *info,
-			     swp_entry_t swap, struct page *page)
+			     swp_entry_t swap, struct page **pagep)
 {
 	struct address_space *mapping = info->vfs_inode.i_mapping;
 	void *radswap;
 	pgoff_t index;
-	int error;
+	gfp_t gfp;
+	int error = 0;
 
 	radswap = swp_to_radix_entry(swap);
 	index = radix_tree_locate_item(&mapping->page_tree, radswap);
@@ -625,22 +629,37 @@ static int shmem_unuse_inode(struct shmem_inode_info *info,
 	if (shmem_swaplist.next != &info->swaplist)
 		list_move_tail(&shmem_swaplist, &info->swaplist);
 
+	gfp = mapping_gfp_mask(mapping);
+	if (shmem_should_replace_page(*pagep, gfp)) {
+		mutex_unlock(&shmem_swaplist_mutex);
+		error = shmem_replace_page(pagep, gfp, info, index);
+		mutex_lock(&shmem_swaplist_mutex);
+		/*
+		 * We needed to drop mutex to make that restrictive page
+		 * allocation; but the inode might already be freed by now,
+		 * and we cannot refer to inode or mapping or info to check.
+		 * However, we do hold page lock on the PageSwapCache page,
+		 * so can check if that still has our reference remaining.
+		 */
+		if (!page_swapcount(*pagep))
+			error = -ENOENT;
+	}
+
 	/*
 	 * We rely on shmem_swaplist_mutex, not only to protect the swaplist,
 	 * but also to hold up shmem_evict_inode(): so inode cannot be freed
 	 * beneath us (pagelock doesn't help until the page is in pagecache).
 	 */
-	error = shmem_add_to_page_cache(page, mapping, index,
+	if (!error)
+		error = shmem_add_to_page_cache(*pagep, mapping, index,
 						GFP_NOWAIT, radswap);
-	/* which does mem_cgroup_uncharge_cache_page on error */
-
 	if (error != -ENOMEM) {
 		/*
 		 * Truncation and eviction use free_swap_and_cache(), which
 		 * only does trylock page: if we raced, best clean up here.
 		 */
-		delete_from_swap_cache(page);
-		set_page_dirty(page);
+		delete_from_swap_cache(*pagep);
+		set_page_dirty(*pagep);
 		if (!error) {
 			spin_lock(&info->lock);
 			info->swapped--;
@@ -660,7 +679,14 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
 	struct list_head *this, *next;
 	struct shmem_inode_info *info;
 	int found = 0;
-	int error;
+	int error = 0;
+
+	/*
+	 * There's a faint possibility that swap page was replaced before
+	 * caller locked it: it will come back later with the right page.
+	 */
+	if (unlikely(!PageSwapCache(page)))
+		goto out;
 
 	/*
 	 * Charge page using GFP_KERNEL while we can wait, before taking
@@ -676,7 +702,7 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
 	list_for_each_safe(this, next, &shmem_swaplist) {
 		info = list_entry(this, struct shmem_inode_info, swaplist);
 		if (info->swapped)
-			found = shmem_unuse_inode(info, swap, page);
+			found = shmem_unuse_inode(info, swap, &page);
 		else
 			list_del_init(&info->swaplist);
 		cond_resched();
@@ -685,8 +711,6 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
 	}
 	mutex_unlock(&shmem_swaplist_mutex);
 
-	if (!found)
-		mem_cgroup_uncharge_cache_page(page);
 	if (found < 0)
 		error = found;
 out:
@@ -855,6 +879,84 @@ static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
 }
 #endif
 
+/*
+ * When a page is moved from swapcache to shmem filecache (either by the
+ * usual swapin of shmem_getpage_gfp(), or by the less common swapoff of
+ * shmem_unuse_inode()), it may have been read in earlier from swap, in
+ * ignorance of the mapping it belongs to.  If that mapping has special
+ * constraints (like the gma500 GEM driver, which requires RAM below 4GB),
+ * we may need to copy to a suitable page before moving to filecache.
+ *
+ * In a future release, this may well be extended to respect cpuset and
+ * NUMA mempolicy, and applied also to anonymous pages in do_swap_page();
+ * but for now it is a simple matter of zone.
+ */
+static bool shmem_should_replace_page(struct page *page, gfp_t gfp)
+{
+	return page_zonenum(page) > gfp_zone(gfp);
+}
+
+static int shmem_replace_page(struct page **pagep, gfp_t gfp,
+				struct shmem_inode_info *info, pgoff_t index)
+{
+	struct page *oldpage, *newpage;
+	struct address_space *swap_mapping;
+	pgoff_t swap_index;
+	int error;
+
+	oldpage = *pagep;
+	swap_index = page_private(oldpage);
+	swap_mapping = page_mapping(oldpage);
+
+	/*
+	 * We have arrived here because our zones are constrained, so don't
+	 * limit chance of success by further cpuset and node constraints.
+	 */
+	gfp &= ~GFP_CONSTRAINT_MASK;
+	newpage = shmem_alloc_page(gfp, info, index);
+	if (!newpage)
+		return -ENOMEM;
+	VM_BUG_ON(shmem_should_replace_page(newpage, gfp));
+
+	*pagep = newpage;
+	page_cache_get(newpage);
+	copy_highpage(newpage, oldpage);
+
+	VM_BUG_ON(!PageLocked(oldpage));
+	__set_page_locked(newpage);
+	VM_BUG_ON(!PageUptodate(oldpage));
+	SetPageUptodate(newpage);
+	VM_BUG_ON(!PageSwapBacked(oldpage));
+	SetPageSwapBacked(newpage);
+	VM_BUG_ON(!swap_index);
+	set_page_private(newpage, swap_index);
+	VM_BUG_ON(!PageSwapCache(oldpage));
+	SetPageSwapCache(newpage);
+
+	/*
+	 * Our caller will very soon move newpage out of swapcache, but it's
+	 * a nice clean interface for us to replace oldpage by newpage there.
+	 */
+	spin_lock_irq(&swap_mapping->tree_lock);
+	error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage,
+								   newpage);
+	__inc_zone_page_state(newpage, NR_FILE_PAGES);
+	__dec_zone_page_state(oldpage, NR_FILE_PAGES);
+	spin_unlock_irq(&swap_mapping->tree_lock);
+	BUG_ON(error);
+
+	mem_cgroup_replace_page_cache(oldpage, newpage);
+	lru_cache_add_anon(newpage);
+
+	ClearPageSwapCache(oldpage);
+	set_page_private(oldpage, 0);
+
+	unlock_page(oldpage);
+	page_cache_release(oldpage);
+	page_cache_release(oldpage);
+	return 0;
+}
+
 /*
  * shmem_getpage_gfp - find page in cache, or get from swap, or allocate
  *
@@ -923,19 +1025,20 @@ repeat:
 
 		/* We have to do this with page locked to prevent races */
 		lock_page(page);
+		if (!PageSwapCache(page) || page->mapping) {
+			error = -EEXIST;	/* try again */
+			goto failed;
+		}
 		if (!PageUptodate(page)) {
 			error = -EIO;
 			goto failed;
 		}
 		wait_on_page_writeback(page);
 
-		/* Someone may have already done it for us */
-		if (page->mapping) {
-			if (page->mapping == mapping &&
-			    page->index == index)
-				goto done;
-			error = -EEXIST;
-			goto failed;
+		if (shmem_should_replace_page(page, gfp)) {
+			error = shmem_replace_page(&page, gfp, info, index);
+			if (error)
+				goto failed;
 		}
 
 		error = mem_cgroup_cache_charge(page, current->mm,
@@ -998,7 +1101,7 @@ repeat:
 		if (sgp == SGP_DIRTY)
 			set_page_dirty(page);
 	}
-done:
+
 	/* Perhaps the file has been truncated since we checked */
 	if (sgp != SGP_WRITE &&
 	    ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
diff --git a/mm/swapfile.c b/mm/swapfile.c
index fafc26d1b1dc..b0c86e92f42c 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -601,7 +601,7 @@ void swapcache_free(swp_entry_t entry, struct page *page)
  * This does not give an exact answer when swap count is continued,
  * but does include the high COUNT_CONTINUED flag to allow for that.
  */
-static inline int page_swapcount(struct page *page)
+int page_swapcount(struct page *page)
 {
 	int count = 0;
 	struct swap_info_struct *p;
-- 
cgit v1.3-7-g2ca7


From 17cf28afea2a1112f240a3a2da8af883be024811 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Tue, 29 May 2012 15:06:41 -0700
Subject: mm/fs: remove truncate_range

Remove vmtruncate_range(), and remove the truncate_range method from
struct inode_operations: only tmpfs ever supported it, and tmpfs has now
converted over to using the fallocate method of file_operations.

Update Documentation accordingly, adding (setlease and) fallocate lines.
And while we're in mm.h, remove duplicate declarations of shmem_lock() and
shmem_file_setup(): everyone is now using the ones in shmem_fs.h.

Based-on-patch-by: Cong Wang <amwang@redhat.com>
Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Cong Wang <amwang@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/Locking |  2 --
 Documentation/filesystems/vfs.txt | 13 ++++++++-----
 fs/bad_inode.c                    |  1 -
 include/linux/fs.h                |  1 -
 include/linux/mm.h                |  4 ----
 mm/shmem.c                        |  1 -
 mm/truncate.c                     | 25 -------------------------
 7 files changed, 8 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 4fca82e5276e..d449e632e6a0 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -60,7 +60,6 @@ ata *);
 	ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
 	ssize_t (*listxattr) (struct dentry *, char *, size_t);
 	int (*removexattr) (struct dentry *, const char *);
-	void (*truncate_range)(struct inode *, loff_t, loff_t);
 	int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len);
 
 locking rules:
@@ -87,7 +86,6 @@ setxattr:	yes
 getxattr:	no
 listxattr:	no
 removexattr:	yes
-truncate_range:	yes
 fiemap:		no
 	Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on
 victim.
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 0d0492028082..ef19f91a0f12 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -363,7 +363,6 @@ struct inode_operations {
 	ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
 	ssize_t (*listxattr) (struct dentry *, char *, size_t);
 	int (*removexattr) (struct dentry *, const char *);
-	void (*truncate_range)(struct inode *, loff_t, loff_t);
 };
 
 Again, all methods are called without any locks being held, unless
@@ -472,9 +471,6 @@ otherwise noted.
   removexattr: called by the VFS to remove an extended attribute from
   	a file. This method is called by removexattr(2) system call.
 
-  truncate_range: a method provided by the underlying filesystem to truncate a
-  	range of blocks , i.e. punch a hole somewhere in a file.
-
 
 The Address Space Object
 ========================
@@ -760,7 +756,7 @@ struct file_operations
 ----------------------
 
 This describes how the VFS can manipulate an open file. As of kernel
-2.6.22, the following members are defined:
+3.5, the following members are defined:
 
 struct file_operations {
 	struct module *owner;
@@ -790,6 +786,8 @@ struct file_operations {
 	int (*flock) (struct file *, int, struct file_lock *);
 	ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, size_t, unsigned int);
 	ssize_t (*splice_read)(struct file *, struct pipe_inode_info *, size_t, unsigned int);
+	int (*setlease)(struct file *, long arg, struct file_lock **);
+	long (*fallocate)(struct file *, int mode, loff_t offset, loff_t len);
 };
 
 Again, all methods are called without any locks being held, unless
@@ -858,6 +856,11 @@ otherwise noted.
   splice_read: called by the VFS to splice data from file to a pipe. This
 	       method is used by the splice(2) system call
 
+  setlease: called by the VFS to set or release a file lock lease.
+	    setlease has the file_lock_lock held and must not sleep.
+
+  fallocate: called by the VFS to preallocate blocks or punch a hole.
+
 Note that the file operations are implemented by the specific
 filesystem in which the inode resides. When opening a device node
 (character or block special) most filesystems will call special
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 37268c5bb98b..1b35d6bd06b0 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -292,7 +292,6 @@ static const struct inode_operations bad_inode_ops =
 	.getxattr	= bad_inode_getxattr,
 	.listxattr	= bad_inode_listxattr,
 	.removexattr	= bad_inode_removexattr,
-	/* truncate_range returns void */
 };
 
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index cdc1a9630948..038076b27ea4 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1681,7 +1681,6 @@ struct inode_operations {
 	ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
 	ssize_t (*listxattr) (struct dentry *, char *, size_t);
 	int (*removexattr) (struct dentry *, const char *);
-	void (*truncate_range)(struct inode *, loff_t, loff_t);
 	int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
 		      u64 len);
 } ____cacheline_aligned;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7d5c37f24c63..aa20bafa40f6 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -871,8 +871,6 @@ extern void pagefault_out_of_memory(void);
 extern void show_free_areas(unsigned int flags);
 extern bool skip_free_areas_node(unsigned int flags, int nid);
 
-int shmem_lock(struct file *file, int lock, struct user_struct *user);
-struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags);
 int shmem_zero_setup(struct vm_area_struct *);
 
 extern int can_do_mlock(void);
@@ -951,11 +949,9 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
 extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new);
 extern void truncate_setsize(struct inode *inode, loff_t newsize);
 extern int vmtruncate(struct inode *inode, loff_t offset);
-extern int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end);
 void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end);
 int truncate_inode_page(struct address_space *mapping, struct page *page);
 int generic_error_remove_page(struct address_space *mapping, struct page *page);
-
 int invalidate_inode_page(struct page *page);
 
 #ifdef CONFIG_MMU
diff --git a/mm/shmem.c b/mm/shmem.c
index 7e54ff1c63e1..f368d0acb52c 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2541,7 +2541,6 @@ static const struct file_operations shmem_file_operations = {
 
 static const struct inode_operations shmem_inode_operations = {
 	.setattr	= shmem_setattr,
-	.truncate_range	= shmem_truncate_range,
 #ifdef CONFIG_TMPFS_XATTR
 	.setxattr	= shmem_setxattr,
 	.getxattr	= shmem_getxattr,
diff --git a/mm/truncate.c b/mm/truncate.c
index 61a183b89df6..75801acdaac7 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -602,31 +602,6 @@ int vmtruncate(struct inode *inode, loff_t newsize)
 }
 EXPORT_SYMBOL(vmtruncate);
 
-int vmtruncate_range(struct inode *inode, loff_t lstart, loff_t lend)
-{
-	struct address_space *mapping = inode->i_mapping;
-	loff_t holebegin = round_up(lstart, PAGE_SIZE);
-	loff_t holelen = 1 + lend - holebegin;
-
-	/*
-	 * If the underlying filesystem is not going to provide
-	 * a way to truncate a range of blocks (punch a hole) -
-	 * we should return failure right now.
-	 */
-	if (!inode->i_op->truncate_range)
-		return -ENOSYS;
-
-	mutex_lock(&inode->i_mutex);
-	inode_dio_wait(inode);
-	unmap_mapping_range(mapping, holebegin, holelen, 1);
-	inode->i_op->truncate_range(inode, lstart, lend);
-	/* unmap again to remove racily COWed private pages */
-	unmap_mapping_range(mapping, holebegin, holelen, 1);
-	mutex_unlock(&inode->i_mutex);
-
-	return 0;
-}
-
 /**
  * truncate_pagecache_range - unmap and remove pagecache that is hole-punched
  * @inode: inode
-- 
cgit v1.3-7-g2ca7


From a7f638f999ff42310e9582273b1fe25ea6e469ba Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Tue, 29 May 2012 15:06:47 -0700
Subject: mm, oom: normalize oom scores to oom_score_adj scale only for
 userspace

The oom_score_adj scale ranges from -1000 to 1000 and represents the
proportion of memory available to the process at allocation time.  This
means an oom_score_adj value of 300, for example, will bias a process as
though it was using an extra 30.0% of available memory and a value of
-350 will discount 35.0% of available memory from its usage.

The oom killer badness heuristic also uses this scale to report the oom
score for each eligible process in determining the "best" process to
kill.  Thus, it can only differentiate each process's memory usage by
0.1% of system RAM.

On large systems, this can end up being a large amount of memory: 256MB
on 256GB systems, for example.

This can be fixed by having the badness heuristic to use the actual
memory usage in scoring threads and then normalizing it to the
oom_score_adj scale for userspace.  This results in better comparison
between eligible threads for kill and no change from the userspace
perspective.

Suggested-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Tested-by: Dave Jones <davej@redhat.com>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/base.c      |  5 +++--
 include/linux/oom.h |  5 +++--
 mm/oom_kill.c       | 44 ++++++++++++++++----------------------------
 3 files changed, 22 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index d2d3108a611c..d7d711876b6a 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -411,12 +411,13 @@ static const struct file_operations proc_lstats_operations = {
 
 static int proc_oom_score(struct task_struct *task, char *buffer)
 {
+	unsigned long totalpages = totalram_pages + total_swap_pages;
 	unsigned long points = 0;
 
 	read_lock(&tasklist_lock);
 	if (pid_alive(task))
-		points = oom_badness(task, NULL, NULL,
-					totalram_pages + total_swap_pages);
+		points = oom_badness(task, NULL, NULL, totalpages) *
+						1000 / totalpages;
 	read_unlock(&tasklist_lock);
 	return sprintf(buffer, "%lu\n", points);
 }
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 3d7647536b03..e4c29bc72e70 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -43,8 +43,9 @@ enum oom_constraint {
 extern void compare_swap_oom_score_adj(int old_val, int new_val);
 extern int test_set_oom_score_adj(int new_val);
 
-extern unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
-			const nodemask_t *nodemask, unsigned long totalpages);
+extern unsigned long oom_badness(struct task_struct *p,
+		struct mem_cgroup *memcg, const nodemask_t *nodemask,
+		unsigned long totalpages);
 extern int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);
 extern void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);
 
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 9f09a1fde9f9..ed0e19677360 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -180,10 +180,10 @@ static bool oom_unkillable_task(struct task_struct *p,
  * predictable as possible.  The goal is to return the highest value for the
  * task consuming the most memory to avoid subsequent oom failures.
  */
-unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
-		      const nodemask_t *nodemask, unsigned long totalpages)
+unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
+			  const nodemask_t *nodemask, unsigned long totalpages)
 {
-	long points;
+	unsigned long points;
 
 	if (oom_unkillable_task(p, memcg, nodemask))
 		return 0;
@@ -197,22 +197,12 @@ unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
 		return 0;
 	}
 
-	/*
-	 * The memory controller may have a limit of 0 bytes, so avoid a divide
-	 * by zero, if necessary.
-	 */
-	if (!totalpages)
-		totalpages = 1;
-
 	/*
 	 * The baseline for the badness score is the proportion of RAM that each
 	 * task's rss, pagetable and swap space use.
 	 */
-	points = get_mm_rss(p->mm) + p->mm->nr_ptes;
-	points += get_mm_counter(p->mm, MM_SWAPENTS);
-
-	points *= 1000;
-	points /= totalpages;
+	points = get_mm_rss(p->mm) + p->mm->nr_ptes +
+		 get_mm_counter(p->mm, MM_SWAPENTS);
 	task_unlock(p);
 
 	/*
@@ -220,23 +210,20 @@ unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
 	 * implementation used by LSMs.
 	 */
 	if (has_capability_noaudit(p, CAP_SYS_ADMIN))
-		points -= 30;
+		points -= 30 * totalpages / 1000;
 
 	/*
 	 * /proc/pid/oom_score_adj ranges from -1000 to +1000 such that it may
 	 * either completely disable oom killing or always prefer a certain
 	 * task.
 	 */
-	points += p->signal->oom_score_adj;
+	points += p->signal->oom_score_adj * totalpages / 1000;
 
 	/*
-	 * Never return 0 for an eligible task that may be killed since it's
-	 * possible that no single user task uses more than 0.1% of memory and
-	 * no single admin tasks uses more than 3.0%.
+	 * Never return 0 for an eligible task regardless of the root bonus and
+	 * oom_score_adj (oom_score_adj can't be OOM_SCORE_ADJ_MIN here).
 	 */
-	if (points <= 0)
-		return 1;
-	return (points < 1000) ? points : 1000;
+	return points ? points : 1;
 }
 
 /*
@@ -314,7 +301,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
 {
 	struct task_struct *g, *p;
 	struct task_struct *chosen = NULL;
-	*ppoints = 0;
+	unsigned long chosen_points = 0;
 
 	do_each_thread(g, p) {
 		unsigned int points;
@@ -354,7 +341,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
 			 */
 			if (p == current) {
 				chosen = p;
-				*ppoints = 1000;
+				chosen_points = ULONG_MAX;
 			} else if (!force_kill) {
 				/*
 				 * If this task is not being ptraced on exit,
@@ -367,12 +354,13 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
 		}
 
 		points = oom_badness(p, memcg, nodemask, totalpages);
-		if (points > *ppoints) {
+		if (points > chosen_points) {
 			chosen = p;
-			*ppoints = points;
+			chosen_points = points;
 		}
 	} while_each_thread(g, p);
 
+	*ppoints = chosen_points * 1000 / totalpages;
 	return chosen;
 }
 
@@ -572,7 +560,7 @@ void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
 	}
 
 	check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, order, NULL);
-	limit = mem_cgroup_get_limit(memcg) >> PAGE_SHIFT;
+	limit = mem_cgroup_get_limit(memcg) >> PAGE_SHIFT ? : 1;
 	read_lock(&tasklist_lock);
 	p = select_bad_process(&points, limit, memcg, NULL, false);
 	if (p && PTR_ERR(p) != -1UL)
-- 
cgit v1.3-7-g2ca7


From 5bf5f03c271907978489868a4c72aeb42b5127d2 Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Tue, 29 May 2012 15:06:49 -0700
Subject: mm: fix slab->page flags corruption

Transparent huge pages can change page->flags (PG_compound_lock) without
taking Slab lock.  Since THP can not break slab pages we can safely access
compound page without taking compound lock.

Specifically this patch fixes a race between compound_unlock() and slab
functions which perform page-flags updates.  This can occur when
get_page()/put_page() is called on a page from slab.

[akpm@linux-foundation.org: tweak comment text, fix comment layout, fix label indenting]
Reported-by: Amey Bhide <abhide@nicira.com>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Reviewed-by: Christoph Lameter <cl@linux.com>
Acked-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  2 ++
 mm/swap.c          | 37 +++++++++++++++++++++++++++++++++++--
 2 files changed, 37 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index aa20bafa40f6..ce26716238c3 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -321,6 +321,7 @@ static inline int is_vmalloc_or_module_addr(const void *x)
 static inline void compound_lock(struct page *page)
 {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	VM_BUG_ON(PageSlab(page));
 	bit_spin_lock(PG_compound_lock, &page->flags);
 #endif
 }
@@ -328,6 +329,7 @@ static inline void compound_lock(struct page *page)
 static inline void compound_unlock(struct page *page)
 {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	VM_BUG_ON(PageSlab(page));
 	bit_spin_unlock(PG_compound_lock, &page->flags);
 #endif
 }
diff --git a/mm/swap.c b/mm/swap.c
index 5c13f1338972..6fdd72ec15b0 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -82,6 +82,25 @@ static void put_compound_page(struct page *page)
 		if (likely(page != page_head &&
 			   get_page_unless_zero(page_head))) {
 			unsigned long flags;
+
+			/*
+			 * THP can not break up slab pages so avoid taking
+			 * compound_lock().  Slab performs non-atomic bit ops
+			 * on page->flags for better performance.  In particular
+			 * slab_unlock() in slub used to be a hot path.  It is
+			 * still hot on arches that do not support
+			 * this_cpu_cmpxchg_double().
+			 */
+			if (PageSlab(page_head)) {
+				if (PageTail(page)) {
+					if (put_page_testzero(page_head))
+						VM_BUG_ON(1);
+
+					atomic_dec(&page->_mapcount);
+					goto skip_lock_tail;
+				} else
+					goto skip_lock;
+			}
 			/*
 			 * page_head wasn't a dangling pointer but it
 			 * may not be a head page anymore by the time
@@ -92,10 +111,10 @@ static void put_compound_page(struct page *page)
 			if (unlikely(!PageTail(page))) {
 				/* __split_huge_page_refcount run before us */
 				compound_unlock_irqrestore(page_head, flags);
-				VM_BUG_ON(PageHead(page_head));
+skip_lock:
 				if (put_page_testzero(page_head))
 					__put_single_page(page_head);
-			out_put_single:
+out_put_single:
 				if (put_page_testzero(page))
 					__put_single_page(page);
 				return;
@@ -115,6 +134,8 @@ static void put_compound_page(struct page *page)
 			VM_BUG_ON(atomic_read(&page_head->_count) <= 0);
 			VM_BUG_ON(atomic_read(&page->_count) != 0);
 			compound_unlock_irqrestore(page_head, flags);
+
+skip_lock_tail:
 			if (put_page_testzero(page_head)) {
 				if (PageHead(page_head))
 					__put_compound_page(page_head);
@@ -162,6 +183,18 @@ bool __get_page_tail(struct page *page)
 	struct page *page_head = compound_trans_head(page);
 
 	if (likely(page != page_head && get_page_unless_zero(page_head))) {
+
+		/* Ref to put_compound_page() comment. */
+		if (PageSlab(page_head)) {
+			if (likely(PageTail(page))) {
+				__get_page_tail_foll(page, false);
+				return true;
+			} else {
+				put_page(page_head);
+				return false;
+			}
+		}
+
 		/*
 		 * page_head wasn't a dangling pointer but it
 		 * may not be a head page anymore by the time
-- 
cgit v1.3-7-g2ca7


From 4b91355e9dc9ac1eb3d69e56de093899ff2677ef Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Tue, 29 May 2012 15:06:51 -0700
Subject: memcg: fix/change behavior of shared anon at moving task

This patch changes memcg's behavior at task_move().

At task_move(), the kernel scans a task's page table and move the changes
for mapped pages from source cgroup to target cgroup.  There has been a
bug at handling shared anonymous pages for a long time.

Before patch:
  - The spec says 'shared anonymous pages are not moved.'
  - The implementation was 'shared anonymoys pages may be moved'.
    If page_mapcount <=2, shared anonymous pages's charge were moved.

After patch:
  - The spec says 'all anonymous pages are moved'.
  - The implementation is 'all anonymous pages are moved'.

Considering usage of memcg, this will not affect user's experience.
'shared anonymous' pages only exists between a tree of processes which
don't do exec().  Moving one of process without exec() seems not sane.
For example, libcgroup will not be affected by this change.  (Anyway, no
one noticed the implementation for a long time...)

Below is a discussion log:

 - current spec/implementation are complex
 - Now, shared file caches are moved
 - It adds unclear check as page_mapcount(). To do correct check,
   we should check swap users, etc.
 - No one notice this implementation behavior. So, no one get benefit
   from the design.
 - In general, once task is moved to a cgroup for running, it will not
   be moved....
 - Finally, we have control knob as memory.move_charge_at_immigrate.

Here is a patch to allow moving shared pages, completely. This makes
memcg simpler and fix current broken code.

Suggested-by: Hugh Dickins <hughd@google.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Glauber Costa <glommer@parallels.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/cgroups/memory.txt |  9 ++++-----
 include/linux/swap.h             |  9 ---------
 mm/memcontrol.c                  | 22 ++++++++++++++--------
 mm/swapfile.c                    | 31 -------------------------------
 4 files changed, 18 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index e479007f1a75..6a066a270fc5 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -184,12 +184,14 @@ behind this approach is that a cgroup that aggressively uses a shared
 page will eventually get charged for it (once it is uncharged from
 the cgroup that brought it in -- this will happen on memory pressure).
 
+But see section 8.2: when moving a task to another cgroup, its pages may
+be recharged to the new cgroup, if move_charge_at_immigrate has been chosen.
+
 Exception: If CONFIG_CGROUP_CGROUP_MEM_RES_CTLR_SWAP is not used.
 When you do swapoff and make swapped-out pages of shmem(tmpfs) to
 be backed into memory in force, charges for pages are accounted against the
 caller of swapoff rather than the users of shmem.
 
-
 2.4 Swap Extension (CONFIG_CGROUP_MEM_RES_CTLR_SWAP)
 
 Swap Extension allows you to record charge for swap. A swapped-in page is
@@ -615,8 +617,7 @@ memory cgroup.
   bit | what type of charges would be moved ?
  -----+------------------------------------------------------------------------
    0  | A charge of an anonymous page(or swap of it) used by the target task.
-      | Those pages and swaps must be used only by the target task. You must
-      | enable Swap Extension(see 2.4) to enable move of swap charges.
+      | You must enable Swap Extension(see 2.4) to enable move of swap charges.
  -----+------------------------------------------------------------------------
    1  | A charge of file pages(normal file, tmpfs file(e.g. ipc shared memory)
       | and swaps of tmpfs file) mmapped by the target task. Unlike the case of
@@ -629,8 +630,6 @@ memory cgroup.
 
 8.3 TODO
 
-- Implement madvise(2) to let users decide the vma to be moved or not to be
-  moved.
 - All of moving charge operations are done under cgroup_mutex. It's not good
   behavior to hold the mutex too long, so we may need some trick.
 
diff --git a/include/linux/swap.h b/include/linux/swap.h
index d965c4bfab3a..49c0fa9ef5cf 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -359,7 +359,6 @@ struct backing_dev_info;
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
 extern void
 mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout);
-extern int mem_cgroup_count_swap_user(swp_entry_t ent, struct page **pagep);
 #else
 static inline void
 mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
@@ -470,14 +469,6 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
 {
 }
 
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR
-static inline int
-mem_cgroup_count_swap_user(swp_entry_t ent, struct page **pagep)
-{
-	return 0;
-}
-#endif
-
 #endif /* CONFIG_SWAP */
 #endif /* __KERNEL__*/
 #endif /* _LINUX_SWAP_H */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d7ce417cae7c..e7db70f3d2d6 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5154,7 +5154,7 @@ static struct page *mc_handle_present_pte(struct vm_area_struct *vma,
 		return NULL;
 	if (PageAnon(page)) {
 		/* we don't move shared anon */
-		if (!move_anon() || page_mapcount(page) > 2)
+		if (!move_anon())
 			return NULL;
 	} else if (!move_file())
 		/* we ignore mapcount for file pages */
@@ -5165,26 +5165,32 @@ static struct page *mc_handle_present_pte(struct vm_area_struct *vma,
 	return page;
 }
 
+#ifdef CONFIG_SWAP
 static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
 			unsigned long addr, pte_t ptent, swp_entry_t *entry)
 {
-	int usage_count;
 	struct page *page = NULL;
 	swp_entry_t ent = pte_to_swp_entry(ptent);
 
 	if (!move_anon() || non_swap_entry(ent))
 		return NULL;
-	usage_count = mem_cgroup_count_swap_user(ent, &page);
-	if (usage_count > 1) { /* we don't move shared anon */
-		if (page)
-			put_page(page);
-		return NULL;
-	}
+	/*
+	 * Because lookup_swap_cache() updates some statistics counter,
+	 * we call find_get_page() with swapper_space directly.
+	 */
+	page = find_get_page(&swapper_space, ent.val);
 	if (do_swap_account)
 		entry->val = ent.val;
 
 	return page;
 }
+#else
+static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
+			unsigned long addr, pte_t ptent, swp_entry_t *entry)
+{
+	return NULL;
+}
+#endif
 
 static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
 			unsigned long addr, pte_t ptent, swp_entry_t *entry)
diff --git a/mm/swapfile.c b/mm/swapfile.c
index b0c86e92f42c..457b10baef59 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -717,37 +717,6 @@ int free_swap_and_cache(swp_entry_t entry)
 	return p != NULL;
 }
 
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR
-/**
- * mem_cgroup_count_swap_user - count the user of a swap entry
- * @ent: the swap entry to be checked
- * @pagep: the pointer for the swap cache page of the entry to be stored
- *
- * Returns the number of the user of the swap entry. The number is valid only
- * for swaps of anonymous pages.
- * If the entry is found on swap cache, the page is stored to pagep with
- * refcount of it being incremented.
- */
-int mem_cgroup_count_swap_user(swp_entry_t ent, struct page **pagep)
-{
-	struct page *page;
-	struct swap_info_struct *p;
-	int count = 0;
-
-	page = find_get_page(&swapper_space, ent.val);
-	if (page)
-		count += page_mapcount(page);
-	p = swap_info_get(ent);
-	if (p) {
-		count += swap_count(p->swap_map[swp_offset(ent)]);
-		spin_unlock(&swap_lock);
-	}
-
-	*pagep = page;
-	return count;
-}
-#endif
-
 #ifdef CONFIG_HIBERNATION
 /*
  * Find the swap type that corresponds to given device (if any).
-- 
cgit v1.3-7-g2ca7


From 89abfab133ef1f5902abafb744df72793213ac19 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Tue, 29 May 2012 15:06:53 -0700
Subject: mm/memcg: move reclaim_stat into lruvec

With mem_cgroup_disabled() now explicit, it becomes clear that the
zone_reclaim_stat structure actually belongs in lruvec, per-zone when
memcg is disabled but per-memcg per-zone when it's enabled.

We can delete mem_cgroup_get_reclaim_stat(), and change
update_page_reclaim_stat() to update just the one set of stats, the one
which get_scan_count() will actually use.

Signed-off-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Reviewed-by: Minchan Kim <minchan@kernel.org>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Cc: Glauber Costa <glommer@parallels.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  9 ---------
 include/linux/mmzone.h     | 29 ++++++++++++++---------------
 mm/memcontrol.c            | 27 +++++++--------------------
 mm/page_alloc.c            |  8 ++++----
 mm/swap.c                  | 14 ++++----------
 mm/vmscan.c                |  5 +----
 6 files changed, 30 insertions(+), 62 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 18ea0b7baf32..cfe9050ad8da 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -126,8 +126,6 @@ int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg,
 int mem_cgroup_select_victim_node(struct mem_cgroup *memcg);
 unsigned long mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg,
 					int nid, int zid, unsigned int lrumask);
-struct zone_reclaim_stat *mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg,
-						      struct zone *zone);
 struct zone_reclaim_stat*
 mem_cgroup_get_reclaim_stat_from_page(struct page *page);
 extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
@@ -356,13 +354,6 @@ mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg, int nid, int zid,
 	return 0;
 }
 
-
-static inline struct zone_reclaim_stat*
-mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg, struct zone *zone)
-{
-	return NULL;
-}
-
 static inline struct zone_reclaim_stat*
 mem_cgroup_get_reclaim_stat_from_page(struct page *page)
 {
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 4871e31ae277..1b89861eedc0 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -185,8 +185,22 @@ static inline int is_unevictable_lru(enum lru_list lru)
 	return (lru == LRU_UNEVICTABLE);
 }
 
+struct zone_reclaim_stat {
+	/*
+	 * The pageout code in vmscan.c keeps track of how many of the
+	 * mem/swap backed and file backed pages are refeferenced.
+	 * The higher the rotated/scanned ratio, the more valuable
+	 * that cache is.
+	 *
+	 * The anon LRU stats live in [0], file LRU stats in [1]
+	 */
+	unsigned long		recent_rotated[2];
+	unsigned long		recent_scanned[2];
+};
+
 struct lruvec {
 	struct list_head lists[NR_LRU_LISTS];
+	struct zone_reclaim_stat reclaim_stat;
 };
 
 /* Mask used at gathering information at once (see memcontrol.c) */
@@ -313,19 +327,6 @@ enum zone_type {
 #error ZONES_SHIFT -- too many zones configured adjust calculation
 #endif
 
-struct zone_reclaim_stat {
-	/*
-	 * The pageout code in vmscan.c keeps track of how many of the
-	 * mem/swap backed and file backed pages are refeferenced.
-	 * The higher the rotated/scanned ratio, the more valuable
-	 * that cache is.
-	 *
-	 * The anon LRU stats live in [0], file LRU stats in [1]
-	 */
-	unsigned long		recent_rotated[2];
-	unsigned long		recent_scanned[2];
-};
-
 struct zone {
 	/* Fields commonly accessed by the page allocator */
 
@@ -407,8 +408,6 @@ struct zone {
 	spinlock_t		lru_lock;
 	struct lruvec		lruvec;
 
-	struct zone_reclaim_stat reclaim_stat;
-
 	unsigned long		pages_scanned;	   /* since last reclaim */
 	unsigned long		flags;		   /* zone flags, see below */
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 30f938c86453..00c8898dbb81 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -138,7 +138,6 @@ struct mem_cgroup_per_zone {
 
 	struct mem_cgroup_reclaim_iter reclaim_iter[DEF_PRIORITY + 1];
 
-	struct zone_reclaim_stat reclaim_stat;
 	struct rb_node		tree_node;	/* RB tree node */
 	unsigned long long	usage_in_excess;/* Set to the value by which */
 						/* the soft limit is exceeded*/
@@ -1243,16 +1242,6 @@ int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg, struct zone *zone)
 	return (active > inactive);
 }
 
-struct zone_reclaim_stat *mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg,
-						      struct zone *zone)
-{
-	int nid = zone_to_nid(zone);
-	int zid = zone_idx(zone);
-	struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(memcg, nid, zid);
-
-	return &mz->reclaim_stat;
-}
-
 struct zone_reclaim_stat *
 mem_cgroup_get_reclaim_stat_from_page(struct page *page)
 {
@@ -1268,7 +1257,7 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page)
 	/* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
 	smp_rmb();
 	mz = page_cgroup_zoneinfo(pc->mem_cgroup, page);
-	return &mz->reclaim_stat;
+	return &mz->lruvec.reclaim_stat;
 }
 
 #define mem_cgroup_from_res_counter(counter, member)	\
@@ -4216,21 +4205,19 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
 	{
 		int nid, zid;
 		struct mem_cgroup_per_zone *mz;
+		struct zone_reclaim_stat *rstat;
 		unsigned long recent_rotated[2] = {0, 0};
 		unsigned long recent_scanned[2] = {0, 0};
 
 		for_each_online_node(nid)
 			for (zid = 0; zid < MAX_NR_ZONES; zid++) {
 				mz = mem_cgroup_zoneinfo(memcg, nid, zid);
+				rstat = &mz->lruvec.reclaim_stat;
 
-				recent_rotated[0] +=
-					mz->reclaim_stat.recent_rotated[0];
-				recent_rotated[1] +=
-					mz->reclaim_stat.recent_rotated[1];
-				recent_scanned[0] +=
-					mz->reclaim_stat.recent_scanned[0];
-				recent_scanned[1] +=
-					mz->reclaim_stat.recent_scanned[1];
+				recent_rotated[0] += rstat->recent_rotated[0];
+				recent_rotated[1] += rstat->recent_rotated[1];
+				recent_scanned[0] += rstat->recent_scanned[0];
+				recent_scanned[1] += rstat->recent_scanned[1];
 			}
 		cb->fill(cb, "recent_rotated_anon", recent_rotated[0]);
 		cb->fill(cb, "recent_rotated_file", recent_rotated[1]);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4fc462b5fcf1..8cbfc38e68ac 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4410,10 +4410,10 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
 		zone_pcp_init(zone);
 		for_each_lru(lru)
 			INIT_LIST_HEAD(&zone->lruvec.lists[lru]);
-		zone->reclaim_stat.recent_rotated[0] = 0;
-		zone->reclaim_stat.recent_rotated[1] = 0;
-		zone->reclaim_stat.recent_scanned[0] = 0;
-		zone->reclaim_stat.recent_scanned[1] = 0;
+		zone->lruvec.reclaim_stat.recent_rotated[0] = 0;
+		zone->lruvec.reclaim_stat.recent_rotated[1] = 0;
+		zone->lruvec.reclaim_stat.recent_scanned[0] = 0;
+		zone->lruvec.reclaim_stat.recent_scanned[1] = 0;
 		zap_zone_vm_stats(zone);
 		zone->flags = 0;
 		if (!size)
diff --git a/mm/swap.c b/mm/swap.c
index 6fdd72ec15b0..0503ad705e7c 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -312,21 +312,15 @@ void rotate_reclaimable_page(struct page *page)
 static void update_page_reclaim_stat(struct zone *zone, struct page *page,
 				     int file, int rotated)
 {
-	struct zone_reclaim_stat *reclaim_stat = &zone->reclaim_stat;
-	struct zone_reclaim_stat *memcg_reclaim_stat;
+	struct zone_reclaim_stat *reclaim_stat;
 
-	memcg_reclaim_stat = mem_cgroup_get_reclaim_stat_from_page(page);
+	reclaim_stat = mem_cgroup_get_reclaim_stat_from_page(page);
+	if (!reclaim_stat)
+		reclaim_stat = &zone->lruvec.reclaim_stat;
 
 	reclaim_stat->recent_scanned[file]++;
 	if (rotated)
 		reclaim_stat->recent_rotated[file]++;
-
-	if (!memcg_reclaim_stat)
-		return;
-
-	memcg_reclaim_stat->recent_scanned[file]++;
-	if (rotated)
-		memcg_reclaim_stat->recent_rotated[file]++;
 }
 
 static void __activate_page(struct page *page, void *arg)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 52fac58b4461..e234ada18747 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -149,10 +149,7 @@ static bool global_reclaim(struct scan_control *sc)
 
 static struct zone_reclaim_stat *get_reclaim_stat(struct mem_cgroup_zone *mz)
 {
-	if (!mem_cgroup_disabled())
-		return mem_cgroup_get_reclaim_stat(mz->mem_cgroup, mz->zone);
-
-	return &mz->zone->reclaim_stat;
+	return &mem_cgroup_zone_lruvec(mz->zone, mz->mem_cgroup)->reclaim_stat;
 }
 
 static unsigned long zone_nr_lru_pages(struct mem_cgroup_zone *mz,
-- 
cgit v1.3-7-g2ca7


From 014483bcccc5edbf861d89dc1a6f7cdc02f9f4c0 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@openvz.org>
Date: Tue, 29 May 2012 15:06:53 -0700
Subject: mm: mark mm-inline functions as __always_inline

GCC sometimes ignores "inline" directives even for small and simple functions.
This supposed to be fixed in gcc 4.7, but it was released only yesterday.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Glauber Costa <glommer@parallels.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_inline.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 227fd3e9a9c9..16d45d9c31a4 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -21,7 +21,7 @@ static inline int page_is_file_cache(struct page *page)
 	return !PageSwapBacked(page);
 }
 
-static inline void
+static __always_inline void
 add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list lru)
 {
 	struct lruvec *lruvec;
@@ -31,7 +31,7 @@ add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list lru)
 	__mod_zone_page_state(zone, NR_LRU_BASE + lru, hpage_nr_pages(page));
 }
 
-static inline void
+static __always_inline void
 del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list lru)
 {
 	mem_cgroup_lru_del_list(page, lru);
@@ -61,7 +61,7 @@ static inline enum lru_list page_lru_base_type(struct page *page)
  * Returns the LRU list a page was on, as an index into the array of LRU
  * lists; and clears its Unevictable or Active flags, ready for freeing.
  */
-static inline enum lru_list page_off_lru(struct page *page)
+static __always_inline enum lru_list page_off_lru(struct page *page)
 {
 	enum lru_list lru;
 
@@ -85,7 +85,7 @@ static inline enum lru_list page_off_lru(struct page *page)
  * Returns the LRU list a page should be on, as an index
  * into the array of LRU lists.
  */
-static inline enum lru_list page_lru(struct page *page)
+static __always_inline enum lru_list page_lru(struct page *page)
 {
 	enum lru_list lru;
 
-- 
cgit v1.3-7-g2ca7


From f3fd4a61928a5edf5b033a417e761b488b43e203 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@openvz.org>
Date: Tue, 29 May 2012 15:06:54 -0700
Subject: mm: remove lru type checks from __isolate_lru_page()

After patch "mm: forbid lumpy-reclaim in shrink_active_list()" we can
completely remove anon/file and active/inactive lru type filters from
__isolate_lru_page(), because isolation for 0-order reclaim always
isolates pages from right lru list.  And pages-isolation for lumpy
shrink_inactive_list() or memory-compaction anyway allowed to isolate
pages from all evictable lru lists.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hugh Dickins <hughd@google.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Glauber Costa <glommer@parallels.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 10 +++-------
 include/linux/swap.h   |  2 +-
 mm/compaction.c        |  4 ++--
 mm/vmscan.c            | 23 ++++-------------------
 4 files changed, 10 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 1b89861eedc0..5c4880bc027a 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -209,16 +209,12 @@ struct lruvec {
 #define LRU_ALL_EVICTABLE (LRU_ALL_FILE | LRU_ALL_ANON)
 #define LRU_ALL	     ((1 << NR_LRU_LISTS) - 1)
 
-/* Isolate inactive pages */
-#define ISOLATE_INACTIVE	((__force isolate_mode_t)0x1)
-/* Isolate active pages */
-#define ISOLATE_ACTIVE		((__force isolate_mode_t)0x2)
 /* Isolate clean file */
-#define ISOLATE_CLEAN		((__force isolate_mode_t)0x4)
+#define ISOLATE_CLEAN		((__force isolate_mode_t)0x1)
 /* Isolate unmapped file */
-#define ISOLATE_UNMAPPED	((__force isolate_mode_t)0x8)
+#define ISOLATE_UNMAPPED	((__force isolate_mode_t)0x2)
 /* Isolate for asynchronous migration */
-#define ISOLATE_ASYNC_MIGRATE	((__force isolate_mode_t)0x10)
+#define ISOLATE_ASYNC_MIGRATE	((__force isolate_mode_t)0x4)
 
 /* LRU Isolation modes. */
 typedef unsigned __bitwise__ isolate_mode_t;
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 49c0fa9ef5cf..ff38eb7c0ec4 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -251,7 +251,7 @@ static inline void lru_cache_add_file(struct page *page)
 /* linux/mm/vmscan.c */
 extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 					gfp_t gfp_mask, nodemask_t *mask);
-extern int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file);
+extern int __isolate_lru_page(struct page *page, isolate_mode_t mode);
 extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
 						  gfp_t gfp_mask, bool noswap);
 extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
diff --git a/mm/compaction.c b/mm/compaction.c
index 840ee288e296..74e1b3803839 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -226,7 +226,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
 	unsigned long last_pageblock_nr = 0, pageblock_nr;
 	unsigned long nr_scanned = 0, nr_isolated = 0;
 	struct list_head *migratelist = &cc->migratepages;
-	isolate_mode_t mode = ISOLATE_ACTIVE|ISOLATE_INACTIVE;
+	isolate_mode_t mode = 0;
 
 	/*
 	 * Ensure that there are not too many pages isolated from the LRU
@@ -329,7 +329,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
 			mode |= ISOLATE_ASYNC_MIGRATE;
 
 		/* Try isolate the page */
-		if (__isolate_lru_page(page, mode, 0) != 0)
+		if (__isolate_lru_page(page, mode) != 0)
 			continue;
 
 		VM_BUG_ON(PageTransCompound(page));
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 987be819fad6..27ef5769b9e4 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -949,29 +949,14 @@ keep:
  *
  * returns 0 on success, -ve errno on failure.
  */
-int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file)
+int __isolate_lru_page(struct page *page, isolate_mode_t mode)
 {
-	bool all_lru_mode;
 	int ret = -EINVAL;
 
 	/* Only take pages on the LRU. */
 	if (!PageLRU(page))
 		return ret;
 
-	all_lru_mode = (mode & (ISOLATE_ACTIVE|ISOLATE_INACTIVE)) ==
-		(ISOLATE_ACTIVE|ISOLATE_INACTIVE);
-
-	/*
-	 * When checking the active state, we need to be sure we are
-	 * dealing with comparible boolean values.  Take the logical not
-	 * of each.
-	 */
-	if (!all_lru_mode && !PageActive(page) != !(mode & ISOLATE_ACTIVE))
-		return ret;
-
-	if (!all_lru_mode && !!page_is_file_cache(page) != file)
-		return ret;
-
 	/* Do not give back unevictable pages for compaction */
 	if (PageUnevictable(page))
 		return ret;
@@ -1070,7 +1055,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
 
 		VM_BUG_ON(!PageLRU(page));
 
-		switch (__isolate_lru_page(page, mode, file)) {
+		switch (__isolate_lru_page(page, mode)) {
 		case 0:
 			mem_cgroup_lru_del(page);
 			list_move(&page->lru, dst);
@@ -1282,7 +1267,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
 	unsigned long nr_file;
 	unsigned long nr_dirty = 0;
 	unsigned long nr_writeback = 0;
-	isolate_mode_t isolate_mode = ISOLATE_INACTIVE;
+	isolate_mode_t isolate_mode = 0;
 	int file = is_file_lru(lru);
 	struct zone *zone = mz->zone;
 	struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz);
@@ -1452,7 +1437,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
 	struct page *page;
 	struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz);
 	unsigned long nr_rotated = 0;
-	isolate_mode_t isolate_mode = ISOLATE_ACTIVE;
+	isolate_mode_t isolate_mode = 0;
 	int file = is_file_lru(lru);
 	struct zone *zone = mz->zone;
 
-- 
cgit v1.3-7-g2ca7


From bbf808ed7de68fdf626fd4f9718d88cf03ce13a9 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@openvz.org>
Date: Tue, 29 May 2012 15:06:54 -0700
Subject: mm/memcg: kill mem_cgroup_lru_del()

This patch kills mem_cgroup_lru_del(), we can use
mem_cgroup_lru_del_list() instead.  On 0-order isolation we already have
right lru list id.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Glauber Costa <glommer@parallels.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 5 -----
 mm/memcontrol.c            | 5 -----
 mm/vmscan.c                | 2 +-
 3 files changed, 1 insertion(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index cfe9050ad8da..e3fc200cd68e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -66,7 +66,6 @@ struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *);
 struct lruvec *mem_cgroup_lru_add_list(struct zone *, struct page *,
 				       enum lru_list);
 void mem_cgroup_lru_del_list(struct page *, enum lru_list);
-void mem_cgroup_lru_del(struct page *);
 struct lruvec *mem_cgroup_lru_move_lists(struct zone *, struct page *,
 					 enum lru_list, enum lru_list);
 
@@ -265,10 +264,6 @@ static inline void mem_cgroup_lru_del_list(struct page *page, enum lru_list lru)
 {
 }
 
-static inline void mem_cgroup_lru_del(struct page *page)
-{
-}
-
 static inline struct lruvec *mem_cgroup_lru_move_lists(struct zone *zone,
 						       struct page *page,
 						       enum lru_list from,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 00c8898dbb81..75f23e98db43 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1115,11 +1115,6 @@ void mem_cgroup_lru_del_list(struct page *page, enum lru_list lru)
 	mz->lru_size[lru] -= 1 << compound_order(page);
 }
 
-void mem_cgroup_lru_del(struct page *page)
-{
-	mem_cgroup_lru_del_list(page, page_lru(page));
-}
-
 /**
  * mem_cgroup_lru_move_lists - account for moving a page between lrus
  * @zone: zone of the page
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 27ef5769b9e4..c94d17d75d73 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1057,7 +1057,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
 
 		switch (__isolate_lru_page(page, mode)) {
 		case 0:
-			mem_cgroup_lru_del(page);
+			mem_cgroup_lru_del_list(page, lru);
 			list_move(&page->lru, dst);
 			nr_taken += hpage_nr_pages(page);
 			break;
-- 
cgit v1.3-7-g2ca7


From 7f5e86c2ccc1480946d2c869d7f7d5278e828092 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@openvz.org>
Date: Tue, 29 May 2012 15:06:58 -0700
Subject: mm: add link from struct lruvec to struct zone

This is the first stage of struct mem_cgroup_zone removal.  Further
patches replace struct mem_cgroup_zone with a pointer to struct lruvec.

If CONFIG_CGROUP_MEM_RES_CTLR=n lruvec_zone() is just container_of().

Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 14 ++++++++++++++
 mm/memcontrol.c        |  4 +---
 mm/mmzone.c            | 14 ++++++++++++++
 mm/page_alloc.c        |  8 +-------
 4 files changed, 30 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 5c4880bc027a..2427706f78b4 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -201,6 +201,9 @@ struct zone_reclaim_stat {
 struct lruvec {
 	struct list_head lists[NR_LRU_LISTS];
 	struct zone_reclaim_stat reclaim_stat;
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+	struct zone *zone;
+#endif
 };
 
 /* Mask used at gathering information at once (see memcontrol.c) */
@@ -729,6 +732,17 @@ extern int init_currently_empty_zone(struct zone *zone, unsigned long start_pfn,
 				     unsigned long size,
 				     enum memmap_context context);
 
+extern void lruvec_init(struct lruvec *lruvec, struct zone *zone);
+
+static inline struct zone *lruvec_zone(struct lruvec *lruvec)
+{
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+	return lruvec->zone;
+#else
+	return container_of(lruvec, struct zone, lruvec);
+#endif
+}
+
 #ifdef CONFIG_HAVE_MEMORY_PRESENT
 void memory_present(int nid, unsigned long start, unsigned long end);
 #else
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index a8abf919c70a..39b2c14f1509 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4738,7 +4738,6 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
 {
 	struct mem_cgroup_per_node *pn;
 	struct mem_cgroup_per_zone *mz;
-	enum lru_list lru;
 	int zone, tmp = node;
 	/*
 	 * This routine is called against possible nodes.
@@ -4756,8 +4755,7 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
 
 	for (zone = 0; zone < MAX_NR_ZONES; zone++) {
 		mz = &pn->zoneinfo[zone];
-		for_each_lru(lru)
-			INIT_LIST_HEAD(&mz->lruvec.lists[lru]);
+		lruvec_init(&mz->lruvec, &NODE_DATA(node)->node_zones[zone]);
 		mz->usage_in_excess = 0;
 		mz->on_tree = false;
 		mz->memcg = memcg;
diff --git a/mm/mmzone.c b/mm/mmzone.c
index 7cf7b7ddc7c5..6830eab5bf09 100644
--- a/mm/mmzone.c
+++ b/mm/mmzone.c
@@ -86,3 +86,17 @@ int memmap_valid_within(unsigned long pfn,
 	return 1;
 }
 #endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */
+
+void lruvec_init(struct lruvec *lruvec, struct zone *zone)
+{
+	enum lru_list lru;
+
+	memset(lruvec, 0, sizeof(struct lruvec));
+
+	for_each_lru(lru)
+		INIT_LIST_HEAD(&lruvec->lists[lru]);
+
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+	lruvec->zone = zone;
+#endif
+}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8cbfc38e68ac..6092f331b32e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4358,7 +4358,6 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
 	for (j = 0; j < MAX_NR_ZONES; j++) {
 		struct zone *zone = pgdat->node_zones + j;
 		unsigned long size, realsize, memmap_pages;
-		enum lru_list lru;
 
 		size = zone_spanned_pages_in_node(nid, j, zones_size);
 		realsize = size - zone_absent_pages_in_node(nid, j,
@@ -4408,12 +4407,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
 		zone->zone_pgdat = pgdat;
 
 		zone_pcp_init(zone);
-		for_each_lru(lru)
-			INIT_LIST_HEAD(&zone->lruvec.lists[lru]);
-		zone->lruvec.reclaim_stat.recent_rotated[0] = 0;
-		zone->lruvec.reclaim_stat.recent_rotated[1] = 0;
-		zone->lruvec.reclaim_stat.recent_scanned[0] = 0;
-		zone->lruvec.reclaim_stat.recent_scanned[1] = 0;
+		lruvec_init(&zone->lruvec, zone);
 		zap_zone_vm_stats(zone);
 		zone->flags = 0;
 		if (!size)
-- 
cgit v1.3-7-g2ca7


From 074291fea8bcedeabf295360e2ddd9bbb5830b4a Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@openvz.org>
Date: Tue, 29 May 2012 15:07:00 -0700
Subject: mm/vmscan: replace zone_nr_lru_pages() with get_lruvec_size()

If memory cgroup is enabled we always use lruvecs which are embedded into
struct mem_cgroup_per_zone, so we can reach lru_size counters via
container_of().

Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  6 ++----
 mm/memcontrol.c            |  9 +++++++++
 mm/vmscan.c                | 31 ++++++++++++++++---------------
 3 files changed, 27 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e3fc200cd68e..ccb3e3c65dd2 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -123,8 +123,7 @@ int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg,
 int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg,
 				    struct zone *zone);
 int mem_cgroup_select_victim_node(struct mem_cgroup *memcg);
-unsigned long mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg,
-					int nid, int zid, unsigned int lrumask);
+unsigned long mem_cgroup_get_lruvec_size(struct lruvec *lruvec, enum lru_list);
 struct zone_reclaim_stat*
 mem_cgroup_get_reclaim_stat_from_page(struct page *page);
 extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
@@ -343,8 +342,7 @@ mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg, struct zone *zone)
 }
 
 static inline unsigned long
-mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg, int nid, int zid,
-				unsigned int lru_mask)
+mem_cgroup_get_lruvec_size(struct lruvec *lruvec, enum lru_list lru)
 {
 	return 0;
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 39b2c14f1509..a68db296ada6 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -723,6 +723,15 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
 }
 
 unsigned long
+mem_cgroup_get_lruvec_size(struct lruvec *lruvec, enum lru_list lru)
+{
+	struct mem_cgroup_per_zone *mz;
+
+	mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec);
+	return mz->lru_size[lru];
+}
+
+static unsigned long
 mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg, int nid, int zid,
 			unsigned int lru_mask)
 {
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 76d786eb84a8..5318faa6a251 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -155,19 +155,14 @@ static struct zone_reclaim_stat *get_reclaim_stat(struct mem_cgroup_zone *mz)
 	return &mem_cgroup_zone_lruvec(mz->zone, mz->mem_cgroup)->reclaim_stat;
 }
 
-static unsigned long zone_nr_lru_pages(struct mem_cgroup_zone *mz,
-				       enum lru_list lru)
+static unsigned long get_lruvec_size(struct lruvec *lruvec, enum lru_list lru)
 {
 	if (!mem_cgroup_disabled())
-		return mem_cgroup_zone_nr_lru_pages(mz->mem_cgroup,
-						    zone_to_nid(mz->zone),
-						    zone_idx(mz->zone),
-						    BIT(lru));
+		return mem_cgroup_get_lruvec_size(lruvec, lru);
 
-	return zone_page_state(mz->zone, NR_LRU_BASE + lru);
+	return zone_page_state(lruvec_zone(lruvec), NR_LRU_BASE + lru);
 }
 
-
 /*
  * Add a shrinker callback to be called from the vm
  */
@@ -1603,6 +1598,9 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc,
 	enum lru_list lru;
 	int noswap = 0;
 	bool force_scan = false;
+	struct lruvec *lruvec;
+
+	lruvec = mem_cgroup_zone_lruvec(mz->zone, mz->mem_cgroup);
 
 	/*
 	 * If the zone or memcg is small, nr[l] can be 0.  This
@@ -1628,10 +1626,10 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc,
 		goto out;
 	}
 
-	anon  = zone_nr_lru_pages(mz, LRU_ACTIVE_ANON) +
-		zone_nr_lru_pages(mz, LRU_INACTIVE_ANON);
-	file  = zone_nr_lru_pages(mz, LRU_ACTIVE_FILE) +
-		zone_nr_lru_pages(mz, LRU_INACTIVE_FILE);
+	anon  = get_lruvec_size(lruvec, LRU_ACTIVE_ANON) +
+		get_lruvec_size(lruvec, LRU_INACTIVE_ANON);
+	file  = get_lruvec_size(lruvec, LRU_ACTIVE_FILE) +
+		get_lruvec_size(lruvec, LRU_INACTIVE_FILE);
 
 	if (global_reclaim(sc)) {
 		free  = zone_page_state(mz->zone, NR_FREE_PAGES);
@@ -1694,7 +1692,7 @@ out:
 		int file = is_file_lru(lru);
 		unsigned long scan;
 
-		scan = zone_nr_lru_pages(mz, lru);
+		scan = get_lruvec_size(lruvec, lru);
 		if (sc->priority || noswap || !vmscan_swappiness(sc)) {
 			scan >>= sc->priority;
 			if (!scan && force_scan)
@@ -1730,6 +1728,7 @@ static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz,
 {
 	unsigned long pages_for_compaction;
 	unsigned long inactive_lru_pages;
+	struct lruvec *lruvec;
 
 	/* If not in reclaim/compaction mode, stop */
 	if (!in_reclaim_compaction(sc))
@@ -1762,10 +1761,12 @@ static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz,
 	 * If we have not reclaimed enough pages for compaction and the
 	 * inactive lists are large enough, continue reclaiming
 	 */
+	lruvec = mem_cgroup_zone_lruvec(mz->zone, mz->mem_cgroup);
 	pages_for_compaction = (2UL << sc->order);
-	inactive_lru_pages = zone_nr_lru_pages(mz, LRU_INACTIVE_FILE);
+	inactive_lru_pages = get_lruvec_size(lruvec, LRU_INACTIVE_FILE);
 	if (nr_swap_pages > 0)
-		inactive_lru_pages += zone_nr_lru_pages(mz, LRU_INACTIVE_ANON);
+		inactive_lru_pages += get_lruvec_size(lruvec,
+						      LRU_INACTIVE_ANON);
 	if (sc->nr_reclaimed < pages_for_compaction &&
 			inactive_lru_pages > pages_for_compaction)
 		return true;
-- 
cgit v1.3-7-g2ca7


From c56d5c7dfeb5cc754e17fa3d423086a3c551c219 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@openvz.org>
Date: Tue, 29 May 2012 15:07:00 -0700
Subject: mm/vmscan: push lruvec pointer into inactive_list_is_low()

Switch mem_cgroup_inactive_anon_is_low() to lruvec pointers,
mem_cgroup_get_lruvec_size() is more effective than
mem_cgroup_zone_nr_lru_pages()

Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 10 ++++------
 mm/memcontrol.c            | 20 ++++++--------------
 mm/vmscan.c                | 40 ++++++++++++++++++++++------------------
 3 files changed, 32 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index ccb3e3c65dd2..fc81dc244309 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -118,10 +118,8 @@ void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
 /*
  * For memory reclaim.
  */
-int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg,
-				    struct zone *zone);
-int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg,
-				    struct zone *zone);
+int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec);
+int mem_cgroup_inactive_file_is_low(struct lruvec *lruvec);
 int mem_cgroup_select_victim_node(struct mem_cgroup *memcg);
 unsigned long mem_cgroup_get_lruvec_size(struct lruvec *lruvec, enum lru_list);
 struct zone_reclaim_stat*
@@ -330,13 +328,13 @@ static inline bool mem_cgroup_disabled(void)
 }
 
 static inline int
-mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone)
+mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec)
 {
 	return 1;
 }
 
 static inline int
-mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg, struct zone *zone)
+mem_cgroup_inactive_file_is_low(struct lruvec *lruvec)
 {
 	return 1;
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index a68db296ada6..9d1764630dff 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1208,19 +1208,15 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg)
 	return ret;
 }
 
-int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone)
+int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec)
 {
 	unsigned long inactive_ratio;
-	int nid = zone_to_nid(zone);
-	int zid = zone_idx(zone);
 	unsigned long inactive;
 	unsigned long active;
 	unsigned long gb;
 
-	inactive = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
-						BIT(LRU_INACTIVE_ANON));
-	active = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
-					      BIT(LRU_ACTIVE_ANON));
+	inactive = mem_cgroup_get_lruvec_size(lruvec, LRU_INACTIVE_ANON);
+	active = mem_cgroup_get_lruvec_size(lruvec, LRU_ACTIVE_ANON);
 
 	gb = (inactive + active) >> (30 - PAGE_SHIFT);
 	if (gb)
@@ -1231,17 +1227,13 @@ int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone)
 	return inactive * inactive_ratio < active;
 }
 
-int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg, struct zone *zone)
+int mem_cgroup_inactive_file_is_low(struct lruvec *lruvec)
 {
 	unsigned long active;
 	unsigned long inactive;
-	int zid = zone_idx(zone);
-	int nid = zone_to_nid(zone);
 
-	inactive = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
-						BIT(LRU_INACTIVE_FILE));
-	active = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
-					      BIT(LRU_ACTIVE_FILE));
+	inactive = mem_cgroup_get_lruvec_size(lruvec, LRU_INACTIVE_FILE);
+	active = mem_cgroup_get_lruvec_size(lruvec, LRU_ACTIVE_FILE);
 
 	return (active > inactive);
 }
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5318faa6a251..79e2ead21c57 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1488,13 +1488,12 @@ static int inactive_anon_is_low_global(struct zone *zone)
 
 /**
  * inactive_anon_is_low - check if anonymous pages need to be deactivated
- * @zone: zone to check
- * @sc:   scan control of this context
+ * @lruvec: LRU vector to check
  *
  * Returns true if the zone does not have enough inactive anon pages,
  * meaning some active anon pages need to be deactivated.
  */
-static int inactive_anon_is_low(struct mem_cgroup_zone *mz)
+static int inactive_anon_is_low(struct lruvec *lruvec)
 {
 	/*
 	 * If we don't have swap space, anonymous page deactivation
@@ -1504,13 +1503,12 @@ static int inactive_anon_is_low(struct mem_cgroup_zone *mz)
 		return 0;
 
 	if (!mem_cgroup_disabled())
-		return mem_cgroup_inactive_anon_is_low(mz->mem_cgroup,
-						       mz->zone);
+		return mem_cgroup_inactive_anon_is_low(lruvec);
 
-	return inactive_anon_is_low_global(mz->zone);
+	return inactive_anon_is_low_global(lruvec_zone(lruvec));
 }
 #else
-static inline int inactive_anon_is_low(struct mem_cgroup_zone *mz)
+static inline int inactive_anon_is_low(struct lruvec *lruvec)
 {
 	return 0;
 }
@@ -1528,7 +1526,7 @@ static int inactive_file_is_low_global(struct zone *zone)
 
 /**
  * inactive_file_is_low - check if file pages need to be deactivated
- * @mz: memory cgroup and zone to check
+ * @lruvec: LRU vector to check
  *
  * When the system is doing streaming IO, memory pressure here
  * ensures that active file pages get deactivated, until more
@@ -1540,21 +1538,20 @@ static int inactive_file_is_low_global(struct zone *zone)
  * This uses a different ratio than the anonymous pages, because
  * the page cache uses a use-once replacement algorithm.
  */
-static int inactive_file_is_low(struct mem_cgroup_zone *mz)
+static int inactive_file_is_low(struct lruvec *lruvec)
 {
 	if (!mem_cgroup_disabled())
-		return mem_cgroup_inactive_file_is_low(mz->mem_cgroup,
-						       mz->zone);
+		return mem_cgroup_inactive_file_is_low(lruvec);
 
-	return inactive_file_is_low_global(mz->zone);
+	return inactive_file_is_low_global(lruvec_zone(lruvec));
 }
 
-static int inactive_list_is_low(struct mem_cgroup_zone *mz, int file)
+static int inactive_list_is_low(struct lruvec *lruvec, int file)
 {
 	if (file)
-		return inactive_file_is_low(mz);
+		return inactive_file_is_low(lruvec);
 	else
-		return inactive_anon_is_low(mz);
+		return inactive_anon_is_low(lruvec);
 }
 
 static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
@@ -1564,7 +1561,10 @@ static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
 	int file = is_file_lru(lru);
 
 	if (is_active_lru(lru)) {
-		if (inactive_list_is_low(mz, file))
+		struct lruvec *lruvec = mem_cgroup_zone_lruvec(mz->zone,
+							       mz->mem_cgroup);
+
+		if (inactive_list_is_low(lruvec, file))
 			shrink_active_list(nr_to_scan, mz, sc, lru);
 		return 0;
 	}
@@ -1793,6 +1793,9 @@ static void shrink_mem_cgroup_zone(struct mem_cgroup_zone *mz,
 	unsigned long nr_reclaimed, nr_scanned;
 	unsigned long nr_to_reclaim = sc->nr_to_reclaim;
 	struct blk_plug plug;
+	struct lruvec *lruvec;
+
+	lruvec = mem_cgroup_zone_lruvec(mz->zone, mz->mem_cgroup);
 
 restart:
 	nr_reclaimed = 0;
@@ -1831,7 +1834,7 @@ restart:
 	 * Even if we did not try to evict anon pages at all, we want to
 	 * rebalance the anon lru active/inactive ratio.
 	 */
-	if (inactive_anon_is_low(mz))
+	if (inactive_anon_is_low(lruvec))
 		shrink_active_list(SWAP_CLUSTER_MAX, mz,
 				   sc, LRU_ACTIVE_ANON);
 
@@ -2264,12 +2267,13 @@ static void age_active_anon(struct zone *zone, struct scan_control *sc)
 
 	memcg = mem_cgroup_iter(NULL, NULL, NULL);
 	do {
+		struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg);
 		struct mem_cgroup_zone mz = {
 			.mem_cgroup = memcg,
 			.zone = zone,
 		};
 
-		if (inactive_anon_is_low(&mz))
+		if (inactive_anon_is_low(lruvec))
 			shrink_active_list(SWAP_CLUSTER_MAX, &mz,
 					   sc, LRU_ACTIVE_ANON);
 
-- 
cgit v1.3-7-g2ca7


From 2bb2ba9d51a8044a71a29608d2c4ef8f5b2d57a2 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 29 May 2012 15:07:03 -0700
Subject: rescounters: add res_counter_uncharge_until()

When killing a res_counter which is a child of other counter, we need to
do

	res_counter_uncharge(child, xxx)
	res_counter_charge(parent, xxx)

This is not atomic and wastes CPU.  This patch adds
res_counter_uncharge_until().  This function's uncharge propagates to
ancestors until specified res_counter.

	res_counter_uncharge_until(child, parent, xxx)

Now the operation is atomic and efficient.

Signed-off-by: Frederic Weisbecker <fweisbec@redhat.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Ying Han <yinghan@google.com>
Cc: Glauber Costa <glommer@parallels.com>
Reviewed-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/cgroups/resource_counter.txt |  8 ++++++++
 include/linux/res_counter.h                |  3 +++
 kernel/res_counter.c                       | 10 ++++++++--
 3 files changed, 19 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cgroups/resource_counter.txt b/Documentation/cgroups/resource_counter.txt
index f3c4ec3626a2..0c4a344e78fa 100644
--- a/Documentation/cgroups/resource_counter.txt
+++ b/Documentation/cgroups/resource_counter.txt
@@ -92,6 +92,14 @@ to work with it.
 
 	The _locked routines imply that the res_counter->lock is taken.
 
+ f. void res_counter_uncharge_until
+		(struct res_counter *rc, struct res_counter *top,
+		 unsinged long val)
+
+	Almost same as res_cunter_uncharge() but propagation of uncharge
+	stops when rc == top. This is useful when kill a res_coutner in
+	child cgroup.
+
  2.1 Other accounting routines
 
     There are more routines that may help you with common needs, like
diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index fb201896a8b0..5de7a146ead9 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -135,6 +135,9 @@ int __must_check res_counter_charge_nofail(struct res_counter *counter,
 void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val);
 void res_counter_uncharge(struct res_counter *counter, unsigned long val);
 
+void res_counter_uncharge_until(struct res_counter *counter,
+				struct res_counter *top,
+				unsigned long val);
 /**
  * res_counter_margin - calculate chargeable space of a counter
  * @cnt: the counter
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index bebe2b170d49..ad581aa2369a 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -94,13 +94,15 @@ void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val)
 	counter->usage -= val;
 }
 
-void res_counter_uncharge(struct res_counter *counter, unsigned long val)
+void res_counter_uncharge_until(struct res_counter *counter,
+				struct res_counter *top,
+				unsigned long val)
 {
 	unsigned long flags;
 	struct res_counter *c;
 
 	local_irq_save(flags);
-	for (c = counter; c != NULL; c = c->parent) {
+	for (c = counter; c != top; c = c->parent) {
 		spin_lock(&c->lock);
 		res_counter_uncharge_locked(c, val);
 		spin_unlock(&c->lock);
@@ -108,6 +110,10 @@ void res_counter_uncharge(struct res_counter *counter, unsigned long val)
 	local_irq_restore(flags);
 }
 
+void res_counter_uncharge(struct res_counter *counter, unsigned long val)
+{
+	res_counter_uncharge_until(counter, NULL, val);
+}
 
 static inline unsigned long long *
 res_counter_member(struct res_counter *counter, int member)
-- 
cgit v1.3-7-g2ca7


From 04eac7ffdea1090f81bc33bd8f4bf072b1fe5bdb Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Tue, 29 May 2012 15:07:05 -0700
Subject: rescounter: remove __must_check from res_counter_charge_nofail()

Since we will succeed with the allocation no matter what, there isn't a
need to use __must_check with it.  It can very well be optional.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ying Han <yinghan@google.com>
Reviewed-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/res_counter.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index 5de7a146ead9..7d7fbe2ef782 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -119,7 +119,7 @@ int __must_check res_counter_charge_locked(struct res_counter *counter,
 					   unsigned long val, bool force);
 int __must_check res_counter_charge(struct res_counter *counter,
 		unsigned long val, struct res_counter **limit_fail_at);
-int __must_check res_counter_charge_nofail(struct res_counter *counter,
+int res_counter_charge_nofail(struct res_counter *counter,
 		unsigned long val, struct res_counter **limit_fail_at);
 
 /*
-- 
cgit v1.3-7-g2ca7


From 4d7dcca213921fbaf08ee05359d28e4aaf2245f1 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Tue, 29 May 2012 15:07:08 -0700
Subject: mm/memcg: get_lru_size not get_lruvec_size

Konstantin just introduced mem_cgroup_get_lruvec_size() and
get_lruvec_size(), I'm about to add mem_cgroup_update_lru_size(): but
we're dealing with the same thing, lru_size[lru].  We ought to agree on
the naming, and I do think lru_size is the more correct: so rename his
ones to get_lru_size().

Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  4 ++--
 mm/memcontrol.c            | 10 +++++-----
 mm/vmscan.c                | 19 +++++++++----------
 3 files changed, 16 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index fc81dc244309..609ef7c28c6c 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -121,7 +121,7 @@ void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
 int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec);
 int mem_cgroup_inactive_file_is_low(struct lruvec *lruvec);
 int mem_cgroup_select_victim_node(struct mem_cgroup *memcg);
-unsigned long mem_cgroup_get_lruvec_size(struct lruvec *lruvec, enum lru_list);
+unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list);
 struct zone_reclaim_stat*
 mem_cgroup_get_reclaim_stat_from_page(struct page *page);
 extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
@@ -340,7 +340,7 @@ mem_cgroup_inactive_file_is_low(struct lruvec *lruvec)
 }
 
 static inline unsigned long
-mem_cgroup_get_lruvec_size(struct lruvec *lruvec, enum lru_list lru)
+mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
 {
 	return 0;
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 35e008e25422..75198dac3fe8 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -737,7 +737,7 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
 }
 
 unsigned long
-mem_cgroup_get_lruvec_size(struct lruvec *lruvec, enum lru_list lru)
+mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
 {
 	struct mem_cgroup_per_zone *mz;
 
@@ -1229,8 +1229,8 @@ int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec)
 	unsigned long active;
 	unsigned long gb;
 
-	inactive = mem_cgroup_get_lruvec_size(lruvec, LRU_INACTIVE_ANON);
-	active = mem_cgroup_get_lruvec_size(lruvec, LRU_ACTIVE_ANON);
+	inactive = mem_cgroup_get_lru_size(lruvec, LRU_INACTIVE_ANON);
+	active = mem_cgroup_get_lru_size(lruvec, LRU_ACTIVE_ANON);
 
 	gb = (inactive + active) >> (30 - PAGE_SHIFT);
 	if (gb)
@@ -1246,8 +1246,8 @@ int mem_cgroup_inactive_file_is_low(struct lruvec *lruvec)
 	unsigned long active;
 	unsigned long inactive;
 
-	inactive = mem_cgroup_get_lruvec_size(lruvec, LRU_INACTIVE_FILE);
-	active = mem_cgroup_get_lruvec_size(lruvec, LRU_ACTIVE_FILE);
+	inactive = mem_cgroup_get_lru_size(lruvec, LRU_INACTIVE_FILE);
+	active = mem_cgroup_get_lru_size(lruvec, LRU_ACTIVE_FILE);
 
 	return (active > inactive);
 }
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 4c5453f8427d..8b941f303cea 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -145,10 +145,10 @@ static bool global_reclaim(struct scan_control *sc)
 }
 #endif
 
-static unsigned long get_lruvec_size(struct lruvec *lruvec, enum lru_list lru)
+static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru)
 {
 	if (!mem_cgroup_disabled())
-		return mem_cgroup_get_lruvec_size(lruvec, lru);
+		return mem_cgroup_get_lru_size(lruvec, lru);
 
 	return zone_page_state(lruvec_zone(lruvec), NR_LRU_BASE + lru);
 }
@@ -1608,10 +1608,10 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
 		goto out;
 	}
 
-	anon  = get_lruvec_size(lruvec, LRU_ACTIVE_ANON) +
-		get_lruvec_size(lruvec, LRU_INACTIVE_ANON);
-	file  = get_lruvec_size(lruvec, LRU_ACTIVE_FILE) +
-		get_lruvec_size(lruvec, LRU_INACTIVE_FILE);
+	anon  = get_lru_size(lruvec, LRU_ACTIVE_ANON) +
+		get_lru_size(lruvec, LRU_INACTIVE_ANON);
+	file  = get_lru_size(lruvec, LRU_ACTIVE_FILE) +
+		get_lru_size(lruvec, LRU_INACTIVE_FILE);
 
 	if (global_reclaim(sc)) {
 		free  = zone_page_state(zone, NR_FREE_PAGES);
@@ -1674,7 +1674,7 @@ out:
 		int file = is_file_lru(lru);
 		unsigned long scan;
 
-		scan = get_lruvec_size(lruvec, lru);
+		scan = get_lru_size(lruvec, lru);
 		if (sc->priority || noswap || !vmscan_swappiness(sc)) {
 			scan >>= sc->priority;
 			if (!scan && force_scan)
@@ -1743,10 +1743,9 @@ static inline bool should_continue_reclaim(struct lruvec *lruvec,
 	 * inactive lists are large enough, continue reclaiming
 	 */
 	pages_for_compaction = (2UL << sc->order);
-	inactive_lru_pages = get_lruvec_size(lruvec, LRU_INACTIVE_FILE);
+	inactive_lru_pages = get_lru_size(lruvec, LRU_INACTIVE_FILE);
 	if (nr_swap_pages > 0)
-		inactive_lru_pages += get_lruvec_size(lruvec,
-						      LRU_INACTIVE_ANON);
+		inactive_lru_pages += get_lru_size(lruvec, LRU_INACTIVE_ANON);
 	if (sc->nr_reclaimed < pages_for_compaction &&
 			inactive_lru_pages > pages_for_compaction)
 		return true;
-- 
cgit v1.3-7-g2ca7


From fa9add641b1b1c564db916accac1db346e7a2759 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Tue, 29 May 2012 15:07:09 -0700
Subject: mm/memcg: apply add/del_page to lruvec

Take lruvec further: pass it instead of zone to add_page_to_lru_list() and
del_page_from_lru_list(); and pagevec_lru_move_fn() pass lruvec down to
its target functions.

This cleanup eliminates a swathe of cruft in memcontrol.c, including
mem_cgroup_lru_add_list(), mem_cgroup_lru_del_list() and
mem_cgroup_lru_move_lists() - which never actually touched the lists.

In their place, mem_cgroup_page_lruvec() to decide the lruvec, previously
a side-effect of add, and mem_cgroup_update_lru_size() to maintain the
lru_size stats.

Whilst these are simplifications in their own right, the goal is to bring
the evaluation of lruvec next to the spin_locking of the lrus, in
preparation for a future patch.

Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  32 ++++----------
 include/linux/mm_inline.h  |  20 ++++-----
 include/linux/swap.h       |   4 +-
 mm/compaction.c            |   5 ++-
 mm/huge_memory.c           |   8 ++--
 mm/memcontrol.c            | 101 +++++++++++----------------------------------
 mm/swap.c                  |  86 +++++++++++++++++++-------------------
 mm/vmscan.c                |  47 ++++++++++++---------
 8 files changed, 122 insertions(+), 181 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 609ef7c28c6c..83e7ba90d6e5 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -63,11 +63,7 @@ extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
 					gfp_t gfp_mask);
 
 struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *);
-struct lruvec *mem_cgroup_lru_add_list(struct zone *, struct page *,
-				       enum lru_list);
-void mem_cgroup_lru_del_list(struct page *, enum lru_list);
-struct lruvec *mem_cgroup_lru_move_lists(struct zone *, struct page *,
-					 enum lru_list, enum lru_list);
+struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *);
 
 /* For coalescing uncharge for reducing memcg' overhead*/
 extern void mem_cgroup_uncharge_start(void);
@@ -122,8 +118,7 @@ int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec);
 int mem_cgroup_inactive_file_is_low(struct lruvec *lruvec);
 int mem_cgroup_select_victim_node(struct mem_cgroup *memcg);
 unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list);
-struct zone_reclaim_stat*
-mem_cgroup_get_reclaim_stat_from_page(struct page *page);
+void mem_cgroup_update_lru_size(struct lruvec *, enum lru_list, int);
 extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
 					struct task_struct *p);
 extern void mem_cgroup_replace_page_cache(struct page *oldpage,
@@ -250,21 +245,8 @@ static inline struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone,
 	return &zone->lruvec;
 }
 
-static inline struct lruvec *mem_cgroup_lru_add_list(struct zone *zone,
-						     struct page *page,
-						     enum lru_list lru)
-{
-	return &zone->lruvec;
-}
-
-static inline void mem_cgroup_lru_del_list(struct page *page, enum lru_list lru)
-{
-}
-
-static inline struct lruvec *mem_cgroup_lru_move_lists(struct zone *zone,
-						       struct page *page,
-						       enum lru_list from,
-						       enum lru_list to)
+static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page,
+						    struct zone *zone)
 {
 	return &zone->lruvec;
 }
@@ -345,10 +327,10 @@ mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
 	return 0;
 }
 
-static inline struct zone_reclaim_stat*
-mem_cgroup_get_reclaim_stat_from_page(struct page *page)
+static inline void
+mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
+			      int increment)
 {
-	return NULL;
 }
 
 static inline void
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 16d45d9c31a4..1397ccf81e91 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -21,22 +21,22 @@ static inline int page_is_file_cache(struct page *page)
 	return !PageSwapBacked(page);
 }
 
-static __always_inline void
-add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list lru)
+static __always_inline void add_page_to_lru_list(struct page *page,
+				struct lruvec *lruvec, enum lru_list lru)
 {
-	struct lruvec *lruvec;
-
-	lruvec = mem_cgroup_lru_add_list(zone, page, lru);
+	int nr_pages = hpage_nr_pages(page);
+	mem_cgroup_update_lru_size(lruvec, lru, nr_pages);
 	list_add(&page->lru, &lruvec->lists[lru]);
-	__mod_zone_page_state(zone, NR_LRU_BASE + lru, hpage_nr_pages(page));
+	__mod_zone_page_state(lruvec_zone(lruvec), NR_LRU_BASE + lru, nr_pages);
 }
 
-static __always_inline void
-del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list lru)
+static __always_inline void del_page_from_lru_list(struct page *page,
+				struct lruvec *lruvec, enum lru_list lru)
 {
-	mem_cgroup_lru_del_list(page, lru);
+	int nr_pages = hpage_nr_pages(page);
+	mem_cgroup_update_lru_size(lruvec, lru, -nr_pages);
 	list_del(&page->lru);
-	__mod_zone_page_state(zone, NR_LRU_BASE + lru, -hpage_nr_pages(page));
+	__mod_zone_page_state(lruvec_zone(lruvec), NR_LRU_BASE + lru, -nr_pages);
 }
 
 /**
diff --git a/include/linux/swap.h b/include/linux/swap.h
index ff38eb7c0ec4..b6661933e252 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -221,8 +221,8 @@ extern unsigned int nr_free_pagecache_pages(void);
 /* linux/mm/swap.c */
 extern void __lru_cache_add(struct page *, enum lru_list lru);
 extern void lru_cache_add_lru(struct page *, enum lru_list lru);
-extern void lru_add_page_tail(struct zone* zone,
-			      struct page *page, struct page *page_tail);
+extern void lru_add_page_tail(struct page *page, struct page *page_tail,
+			      struct lruvec *lruvec);
 extern void activate_page(struct page *);
 extern void mark_page_accessed(struct page *);
 extern void lru_add_drain(void);
diff --git a/mm/compaction.c b/mm/compaction.c
index 74e1b3803839..4ac338af5120 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -227,6 +227,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
 	unsigned long nr_scanned = 0, nr_isolated = 0;
 	struct list_head *migratelist = &cc->migratepages;
 	isolate_mode_t mode = 0;
+	struct lruvec *lruvec;
 
 	/*
 	 * Ensure that there are not too many pages isolated from the LRU
@@ -328,6 +329,8 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
 		if (cc->mode != COMPACT_SYNC)
 			mode |= ISOLATE_ASYNC_MIGRATE;
 
+		lruvec = mem_cgroup_page_lruvec(page, zone);
+
 		/* Try isolate the page */
 		if (__isolate_lru_page(page, mode) != 0)
 			continue;
@@ -335,7 +338,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
 		VM_BUG_ON(PageTransCompound(page));
 
 		/* Successfully isolated */
-		del_page_from_lru_list(zone, page, page_lru(page));
+		del_page_from_lru_list(page, lruvec, page_lru(page));
 		list_add(&page->lru, migratelist);
 		cc->nr_migratepages++;
 		nr_isolated++;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index d0def42c121b..57c4b9309015 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1231,10 +1231,13 @@ static void __split_huge_page_refcount(struct page *page)
 {
 	int i;
 	struct zone *zone = page_zone(page);
+	struct lruvec *lruvec;
 	int tail_count = 0;
 
 	/* prevent PageLRU to go away from under us, and freeze lru stats */
 	spin_lock_irq(&zone->lru_lock);
+	lruvec = mem_cgroup_page_lruvec(page, zone);
+
 	compound_lock(page);
 	/* complete memcg works before add pages to LRU */
 	mem_cgroup_split_huge_fixup(page);
@@ -1309,13 +1312,12 @@ static void __split_huge_page_refcount(struct page *page)
 		BUG_ON(!PageDirty(page_tail));
 		BUG_ON(!PageSwapBacked(page_tail));
 
-
-		lru_add_page_tail(zone, page, page_tail);
+		lru_add_page_tail(page, page_tail, lruvec);
 	}
 	atomic_sub(tail_count, &page->_count);
 	BUG_ON(atomic_read(&page->_count) <= 0);
 
-	__dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
+	__mod_zone_page_state(zone, NR_ANON_TRANSPARENT_HUGEPAGES, -1);
 	__mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR);
 
 	ClearPageCompound(page);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 75198dac3fe8..bb8d7d3cf302 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1035,7 +1035,7 @@ EXPORT_SYMBOL(mem_cgroup_count_vm_event);
 /**
  * mem_cgroup_zone_lruvec - get the lru list vector for a zone and memcg
  * @zone: zone of the wanted lruvec
- * @mem: memcg of the wanted lruvec
+ * @memcg: memcg of the wanted lruvec
  *
  * Returns the lru list vector holding pages for the given @zone and
  * @mem.  This can be the global zone lruvec, if the memory controller
@@ -1068,19 +1068,11 @@ struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone,
  */
 
 /**
- * mem_cgroup_lru_add_list - account for adding an lru page and return lruvec
- * @zone: zone of the page
+ * mem_cgroup_page_lruvec - return lruvec for adding an lru page
  * @page: the page
- * @lru: current lru
- *
- * This function accounts for @page being added to @lru, and returns
- * the lruvec for the given @zone and the memcg @page is charged to.
- *
- * The callsite is then responsible for physically linking the page to
- * the returned lruvec->lists[@lru].
+ * @zone: zone of the page
  */
-struct lruvec *mem_cgroup_lru_add_list(struct zone *zone, struct page *page,
-				       enum lru_list lru)
+struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone)
 {
 	struct mem_cgroup_per_zone *mz;
 	struct mem_cgroup *memcg;
@@ -1093,7 +1085,7 @@ struct lruvec *mem_cgroup_lru_add_list(struct zone *zone, struct page *page,
 	memcg = pc->mem_cgroup;
 
 	/*
-	 * Surreptitiously switch any uncharged page to root:
+	 * Surreptitiously switch any uncharged offlist page to root:
 	 * an uncharged page off lru does nothing to secure
 	 * its former mem_cgroup from sudden removal.
 	 *
@@ -1101,65 +1093,35 @@ struct lruvec *mem_cgroup_lru_add_list(struct zone *zone, struct page *page,
 	 * under page_cgroup lock: between them, they make all uses
 	 * of pc->mem_cgroup safe.
 	 */
-	if (!PageCgroupUsed(pc) && memcg != root_mem_cgroup)
+	if (!PageLRU(page) && !PageCgroupUsed(pc) && memcg != root_mem_cgroup)
 		pc->mem_cgroup = memcg = root_mem_cgroup;
 
 	mz = page_cgroup_zoneinfo(memcg, page);
-	/* compound_order() is stabilized through lru_lock */
-	mz->lru_size[lru] += 1 << compound_order(page);
 	return &mz->lruvec;
 }
 
 /**
- * mem_cgroup_lru_del_list - account for removing an lru page
- * @page: the page
- * @lru: target lru
+ * mem_cgroup_update_lru_size - account for adding or removing an lru page
+ * @lruvec: mem_cgroup per zone lru vector
+ * @lru: index of lru list the page is sitting on
+ * @nr_pages: positive when adding or negative when removing
  *
- * This function accounts for @page being removed from @lru.
- *
- * The callsite is then responsible for physically unlinking
- * @page->lru.
+ * This function must be called when a page is added to or removed from an
+ * lru list.
  */
-void mem_cgroup_lru_del_list(struct page *page, enum lru_list lru)
+void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
+				int nr_pages)
 {
 	struct mem_cgroup_per_zone *mz;
-	struct mem_cgroup *memcg;
-	struct page_cgroup *pc;
+	unsigned long *lru_size;
 
 	if (mem_cgroup_disabled())
 		return;
 
-	pc = lookup_page_cgroup(page);
-	memcg = pc->mem_cgroup;
-	VM_BUG_ON(!memcg);
-	mz = page_cgroup_zoneinfo(memcg, page);
-	/* huge page split is done under lru_lock. so, we have no races. */
-	VM_BUG_ON(mz->lru_size[lru] < (1 << compound_order(page)));
-	mz->lru_size[lru] -= 1 << compound_order(page);
-}
-
-/**
- * mem_cgroup_lru_move_lists - account for moving a page between lrus
- * @zone: zone of the page
- * @page: the page
- * @from: current lru
- * @to: target lru
- *
- * This function accounts for @page being moved between the lrus @from
- * and @to, and returns the lruvec for the given @zone and the memcg
- * @page is charged to.
- *
- * The callsite is then responsible for physically relinking
- * @page->lru to the returned lruvec->lists[@to].
- */
-struct lruvec *mem_cgroup_lru_move_lists(struct zone *zone,
-					 struct page *page,
-					 enum lru_list from,
-					 enum lru_list to)
-{
-	/* XXX: Optimize this, especially for @from == @to */
-	mem_cgroup_lru_del_list(page, from);
-	return mem_cgroup_lru_add_list(zone, page, to);
+	mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec);
+	lru_size = mz->lru_size + lru;
+	*lru_size += nr_pages;
+	VM_BUG_ON((long)(*lru_size) < 0);
 }
 
 /*
@@ -1252,24 +1214,6 @@ int mem_cgroup_inactive_file_is_low(struct lruvec *lruvec)
 	return (active > inactive);
 }
 
-struct zone_reclaim_stat *
-mem_cgroup_get_reclaim_stat_from_page(struct page *page)
-{
-	struct page_cgroup *pc;
-	struct mem_cgroup_per_zone *mz;
-
-	if (mem_cgroup_disabled())
-		return NULL;
-
-	pc = lookup_page_cgroup(page);
-	if (!PageCgroupUsed(pc))
-		return NULL;
-	/* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
-	smp_rmb();
-	mz = page_cgroup_zoneinfo(pc->mem_cgroup, page);
-	return &mz->lruvec.reclaim_stat;
-}
-
 #define mem_cgroup_from_res_counter(counter, member)	\
 	container_of(counter, struct mem_cgroup, member)
 
@@ -2509,6 +2453,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
 {
 	struct page_cgroup *pc = lookup_page_cgroup(page);
 	struct zone *uninitialized_var(zone);
+	struct lruvec *lruvec;
 	bool was_on_lru = false;
 	bool anon;
 
@@ -2531,8 +2476,9 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
 		zone = page_zone(page);
 		spin_lock_irq(&zone->lru_lock);
 		if (PageLRU(page)) {
+			lruvec = mem_cgroup_zone_lruvec(zone, pc->mem_cgroup);
 			ClearPageLRU(page);
-			del_page_from_lru_list(zone, page, page_lru(page));
+			del_page_from_lru_list(page, lruvec, page_lru(page));
 			was_on_lru = true;
 		}
 	}
@@ -2550,9 +2496,10 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
 
 	if (lrucare) {
 		if (was_on_lru) {
+			lruvec = mem_cgroup_zone_lruvec(zone, pc->mem_cgroup);
 			VM_BUG_ON(PageLRU(page));
 			SetPageLRU(page);
-			add_page_to_lru_list(zone, page, page_lru(page));
+			add_page_to_lru_list(page, lruvec, page_lru(page));
 		}
 		spin_unlock_irq(&zone->lru_lock);
 	}
diff --git a/mm/swap.c b/mm/swap.c
index 0503ad705e7c..4e7e2ec67078 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -47,13 +47,15 @@ static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);
 static void __page_cache_release(struct page *page)
 {
 	if (PageLRU(page)) {
-		unsigned long flags;
 		struct zone *zone = page_zone(page);
+		struct lruvec *lruvec;
+		unsigned long flags;
 
 		spin_lock_irqsave(&zone->lru_lock, flags);
+		lruvec = mem_cgroup_page_lruvec(page, zone);
 		VM_BUG_ON(!PageLRU(page));
 		__ClearPageLRU(page);
-		del_page_from_lru_list(zone, page, page_off_lru(page));
+		del_page_from_lru_list(page, lruvec, page_off_lru(page));
 		spin_unlock_irqrestore(&zone->lru_lock, flags);
 	}
 }
@@ -235,11 +237,12 @@ void put_pages_list(struct list_head *pages)
 EXPORT_SYMBOL(put_pages_list);
 
 static void pagevec_lru_move_fn(struct pagevec *pvec,
-				void (*move_fn)(struct page *page, void *arg),
-				void *arg)
+	void (*move_fn)(struct page *page, struct lruvec *lruvec, void *arg),
+	void *arg)
 {
 	int i;
 	struct zone *zone = NULL;
+	struct lruvec *lruvec;
 	unsigned long flags = 0;
 
 	for (i = 0; i < pagevec_count(pvec); i++) {
@@ -253,7 +256,8 @@ static void pagevec_lru_move_fn(struct pagevec *pvec,
 			spin_lock_irqsave(&zone->lru_lock, flags);
 		}
 
-		(*move_fn)(page, arg);
+		lruvec = mem_cgroup_page_lruvec(page, zone);
+		(*move_fn)(page, lruvec, arg);
 	}
 	if (zone)
 		spin_unlock_irqrestore(&zone->lru_lock, flags);
@@ -261,16 +265,13 @@ static void pagevec_lru_move_fn(struct pagevec *pvec,
 	pagevec_reinit(pvec);
 }
 
-static void pagevec_move_tail_fn(struct page *page, void *arg)
+static void pagevec_move_tail_fn(struct page *page, struct lruvec *lruvec,
+				 void *arg)
 {
 	int *pgmoved = arg;
 
 	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
 		enum lru_list lru = page_lru_base_type(page);
-		struct lruvec *lruvec;
-
-		lruvec = mem_cgroup_lru_move_lists(page_zone(page),
-						   page, lru, lru);
 		list_move_tail(&page->lru, &lruvec->lists[lru]);
 		(*pgmoved)++;
 	}
@@ -309,35 +310,30 @@ void rotate_reclaimable_page(struct page *page)
 	}
 }
 
-static void update_page_reclaim_stat(struct zone *zone, struct page *page,
+static void update_page_reclaim_stat(struct lruvec *lruvec,
 				     int file, int rotated)
 {
-	struct zone_reclaim_stat *reclaim_stat;
-
-	reclaim_stat = mem_cgroup_get_reclaim_stat_from_page(page);
-	if (!reclaim_stat)
-		reclaim_stat = &zone->lruvec.reclaim_stat;
+	struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
 
 	reclaim_stat->recent_scanned[file]++;
 	if (rotated)
 		reclaim_stat->recent_rotated[file]++;
 }
 
-static void __activate_page(struct page *page, void *arg)
+static void __activate_page(struct page *page, struct lruvec *lruvec,
+			    void *arg)
 {
-	struct zone *zone = page_zone(page);
-
 	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
 		int file = page_is_file_cache(page);
 		int lru = page_lru_base_type(page);
-		del_page_from_lru_list(zone, page, lru);
 
+		del_page_from_lru_list(page, lruvec, lru);
 		SetPageActive(page);
 		lru += LRU_ACTIVE;
-		add_page_to_lru_list(zone, page, lru);
-		__count_vm_event(PGACTIVATE);
+		add_page_to_lru_list(page, lruvec, lru);
 
-		update_page_reclaim_stat(zone, page, file, 1);
+		__count_vm_event(PGACTIVATE);
+		update_page_reclaim_stat(lruvec, file, 1);
 	}
 }
 
@@ -374,7 +370,7 @@ void activate_page(struct page *page)
 	struct zone *zone = page_zone(page);
 
 	spin_lock_irq(&zone->lru_lock);
-	__activate_page(page, NULL);
+	__activate_page(page, mem_cgroup_page_lruvec(page, zone), NULL);
 	spin_unlock_irq(&zone->lru_lock);
 }
 #endif
@@ -441,11 +437,13 @@ void lru_cache_add_lru(struct page *page, enum lru_list lru)
 void add_page_to_unevictable_list(struct page *page)
 {
 	struct zone *zone = page_zone(page);
+	struct lruvec *lruvec;
 
 	spin_lock_irq(&zone->lru_lock);
+	lruvec = mem_cgroup_page_lruvec(page, zone);
 	SetPageUnevictable(page);
 	SetPageLRU(page);
-	add_page_to_lru_list(zone, page, LRU_UNEVICTABLE);
+	add_page_to_lru_list(page, lruvec, LRU_UNEVICTABLE);
 	spin_unlock_irq(&zone->lru_lock);
 }
 
@@ -470,11 +468,11 @@ void add_page_to_unevictable_list(struct page *page)
  * be write it out by flusher threads as this is much more effective
  * than the single-page writeout from reclaim.
  */
-static void lru_deactivate_fn(struct page *page, void *arg)
+static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec,
+			      void *arg)
 {
 	int lru, file;
 	bool active;
-	struct zone *zone = page_zone(page);
 
 	if (!PageLRU(page))
 		return;
@@ -487,13 +485,13 @@ static void lru_deactivate_fn(struct page *page, void *arg)
 		return;
 
 	active = PageActive(page);
-
 	file = page_is_file_cache(page);
 	lru = page_lru_base_type(page);
-	del_page_from_lru_list(zone, page, lru + active);
+
+	del_page_from_lru_list(page, lruvec, lru + active);
 	ClearPageActive(page);
 	ClearPageReferenced(page);
-	add_page_to_lru_list(zone, page, lru);
+	add_page_to_lru_list(page, lruvec, lru);
 
 	if (PageWriteback(page) || PageDirty(page)) {
 		/*
@@ -503,19 +501,17 @@ static void lru_deactivate_fn(struct page *page, void *arg)
 		 */
 		SetPageReclaim(page);
 	} else {
-		struct lruvec *lruvec;
 		/*
 		 * The page's writeback ends up during pagevec
 		 * We moves tha page into tail of inactive.
 		 */
-		lruvec = mem_cgroup_lru_move_lists(zone, page, lru, lru);
 		list_move_tail(&page->lru, &lruvec->lists[lru]);
 		__count_vm_event(PGROTATED);
 	}
 
 	if (active)
 		__count_vm_event(PGDEACTIVATE);
-	update_page_reclaim_stat(zone, page, file, 0);
+	update_page_reclaim_stat(lruvec, file, 0);
 }
 
 /*
@@ -615,6 +611,7 @@ void release_pages(struct page **pages, int nr, int cold)
 	int i;
 	LIST_HEAD(pages_to_free);
 	struct zone *zone = NULL;
+	struct lruvec *lruvec;
 	unsigned long uninitialized_var(flags);
 
 	for (i = 0; i < nr; i++) {
@@ -642,9 +639,11 @@ void release_pages(struct page **pages, int nr, int cold)
 				zone = pagezone;
 				spin_lock_irqsave(&zone->lru_lock, flags);
 			}
+
+			lruvec = mem_cgroup_page_lruvec(page, zone);
 			VM_BUG_ON(!PageLRU(page));
 			__ClearPageLRU(page);
-			del_page_from_lru_list(zone, page, page_off_lru(page));
+			del_page_from_lru_list(page, lruvec, page_off_lru(page));
 		}
 
 		list_add(&page->lru, &pages_to_free);
@@ -676,8 +675,8 @@ EXPORT_SYMBOL(__pagevec_release);
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 /* used by __split_huge_page_refcount() */
-void lru_add_page_tail(struct zone* zone,
-		       struct page *page, struct page *page_tail)
+void lru_add_page_tail(struct page *page, struct page *page_tail,
+		       struct lruvec *lruvec)
 {
 	int uninitialized_var(active);
 	enum lru_list lru;
@@ -686,7 +685,8 @@ void lru_add_page_tail(struct zone* zone,
 	VM_BUG_ON(!PageHead(page));
 	VM_BUG_ON(PageCompound(page_tail));
 	VM_BUG_ON(PageLRU(page_tail));
-	VM_BUG_ON(NR_CPUS != 1 && !spin_is_locked(&zone->lru_lock));
+	VM_BUG_ON(NR_CPUS != 1 &&
+		  !spin_is_locked(&lruvec_zone(lruvec)->lru_lock));
 
 	SetPageLRU(page_tail);
 
@@ -715,20 +715,20 @@ void lru_add_page_tail(struct zone* zone,
 		 * Use the standard add function to put page_tail on the list,
 		 * but then correct its position so they all end up in order.
 		 */
-		add_page_to_lru_list(zone, page_tail, lru);
+		add_page_to_lru_list(page_tail, lruvec, lru);
 		list_head = page_tail->lru.prev;
 		list_move_tail(&page_tail->lru, list_head);
 	}
 
 	if (!PageUnevictable(page))
-		update_page_reclaim_stat(zone, page_tail, file, active);
+		update_page_reclaim_stat(lruvec, file, active);
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
-static void __pagevec_lru_add_fn(struct page *page, void *arg)
+static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec,
+				 void *arg)
 {
 	enum lru_list lru = (enum lru_list)arg;
-	struct zone *zone = page_zone(page);
 	int file = is_file_lru(lru);
 	int active = is_active_lru(lru);
 
@@ -739,8 +739,8 @@ static void __pagevec_lru_add_fn(struct page *page, void *arg)
 	SetPageLRU(page);
 	if (active)
 		SetPageActive(page);
-	add_page_to_lru_list(zone, page, lru);
-	update_page_reclaim_stat(zone, page, file, active);
+	add_page_to_lru_list(page, lruvec, lru);
+	update_page_reclaim_stat(lruvec, file, active);
 }
 
 /*
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 05d439dc1af9..eeb3bc9d1d36 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1031,6 +1031,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
 
 	for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) {
 		struct page *page;
+		int nr_pages;
 
 		page = lru_to_page(src);
 		prefetchw_prev_lru_page(page, src, flags);
@@ -1039,9 +1040,10 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
 
 		switch (__isolate_lru_page(page, mode)) {
 		case 0:
-			mem_cgroup_lru_del_list(page, lru);
+			nr_pages = hpage_nr_pages(page);
+			mem_cgroup_update_lru_size(lruvec, lru, -nr_pages);
 			list_move(&page->lru, dst);
-			nr_taken += hpage_nr_pages(page);
+			nr_taken += nr_pages;
 			break;
 
 		case -EBUSY:
@@ -1093,15 +1095,16 @@ int isolate_lru_page(struct page *page)
 
 	if (PageLRU(page)) {
 		struct zone *zone = page_zone(page);
+		struct lruvec *lruvec;
 
 		spin_lock_irq(&zone->lru_lock);
+		lruvec = mem_cgroup_page_lruvec(page, zone);
 		if (PageLRU(page)) {
 			int lru = page_lru(page);
-			ret = 0;
 			get_page(page);
 			ClearPageLRU(page);
-
-			del_page_from_lru_list(zone, page, lru);
+			del_page_from_lru_list(page, lruvec, lru);
+			ret = 0;
 		}
 		spin_unlock_irq(&zone->lru_lock);
 	}
@@ -1155,9 +1158,13 @@ putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list)
 			spin_lock_irq(&zone->lru_lock);
 			continue;
 		}
+
+		lruvec = mem_cgroup_page_lruvec(page, zone);
+
 		SetPageLRU(page);
 		lru = page_lru(page);
-		add_page_to_lru_list(zone, page, lru);
+		add_page_to_lru_list(page, lruvec, lru);
+
 		if (is_active_lru(lru)) {
 			int file = is_file_lru(lru);
 			int numpages = hpage_nr_pages(page);
@@ -1166,7 +1173,7 @@ putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list)
 		if (put_page_testzero(page)) {
 			__ClearPageLRU(page);
 			__ClearPageActive(page);
-			del_page_from_lru_list(zone, page, lru);
+			del_page_from_lru_list(page, lruvec, lru);
 
 			if (unlikely(PageCompound(page))) {
 				spin_unlock_irq(&zone->lru_lock);
@@ -1314,30 +1321,32 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
  * But we had to alter page->flags anyway.
  */
 
-static void move_active_pages_to_lru(struct zone *zone,
+static void move_active_pages_to_lru(struct lruvec *lruvec,
 				     struct list_head *list,
 				     struct list_head *pages_to_free,
 				     enum lru_list lru)
 {
+	struct zone *zone = lruvec_zone(lruvec);
 	unsigned long pgmoved = 0;
 	struct page *page;
+	int nr_pages;
 
 	while (!list_empty(list)) {
-		struct lruvec *lruvec;
-
 		page = lru_to_page(list);
+		lruvec = mem_cgroup_page_lruvec(page, zone);
 
 		VM_BUG_ON(PageLRU(page));
 		SetPageLRU(page);
 
-		lruvec = mem_cgroup_lru_add_list(zone, page, lru);
+		nr_pages = hpage_nr_pages(page);
+		mem_cgroup_update_lru_size(lruvec, lru, nr_pages);
 		list_move(&page->lru, &lruvec->lists[lru]);
-		pgmoved += hpage_nr_pages(page);
+		pgmoved += nr_pages;
 
 		if (put_page_testzero(page)) {
 			__ClearPageLRU(page);
 			__ClearPageActive(page);
-			del_page_from_lru_list(zone, page, lru);
+			del_page_from_lru_list(page, lruvec, lru);
 
 			if (unlikely(PageCompound(page))) {
 				spin_unlock_irq(&zone->lru_lock);
@@ -1443,8 +1452,8 @@ static void shrink_active_list(unsigned long nr_to_scan,
 	 */
 	reclaim_stat->recent_rotated[file] += nr_rotated;
 
-	move_active_pages_to_lru(zone, &l_active, &l_hold, lru);
-	move_active_pages_to_lru(zone, &l_inactive, &l_hold, lru - LRU_ACTIVE);
+	move_active_pages_to_lru(lruvec, &l_active, &l_hold, lru);
+	move_active_pages_to_lru(lruvec, &l_inactive, &l_hold, lru - LRU_ACTIVE);
 	__mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken);
 	spin_unlock_irq(&zone->lru_lock);
 
@@ -3237,6 +3246,7 @@ void check_move_unevictable_pages(struct page **pages, int nr_pages)
 			zone = pagezone;
 			spin_lock_irq(&zone->lru_lock);
 		}
+		lruvec = mem_cgroup_page_lruvec(page, zone);
 
 		if (!PageLRU(page) || !PageUnevictable(page))
 			continue;
@@ -3246,11 +3256,8 @@ void check_move_unevictable_pages(struct page **pages, int nr_pages)
 
 			VM_BUG_ON(PageActive(page));
 			ClearPageUnevictable(page);
-			__dec_zone_state(zone, NR_UNEVICTABLE);
-			lruvec = mem_cgroup_lru_move_lists(zone, page,
-						LRU_UNEVICTABLE, lru);
-			list_move(&page->lru, &lruvec->lists[lru]);
-			__inc_zone_state(zone, NR_INACTIVE_ANON + lru);
+			del_page_from_lru_list(page, lruvec, LRU_UNEVICTABLE);
+			add_page_to_lru_list(page, lruvec, lru);
 			pgrescued++;
 		}
 	}
-- 
cgit v1.3-7-g2ca7


From bf05929f41d6c3c79ec1961d90d808a634f09dd9 Mon Sep 17 00:00:00 2001
From: Inki Dae <inki.dae@samsung.com>
Date: Tue, 29 May 2012 15:07:12 -0700
Subject: fbdev: add events for early fb event support

Add FB_EARLY_EVENT_BLANK and FB_R_EARLY_EVENT_BLANK event mode supports.
first, fb_notifier_call_chain() is called with FB_EARLY_EVENT_BLANK and
fb_blank() of specific fb driver is called and then
fb_notifier_call_chain() is called with FB_EVENT_BLANK again at
fb_blank().  and if fb_blank() was failed then fb_nitifier_call_chain()
would be called with FB_R_EARLY_EVENT_BLANK to revert the previous
effects.

Signed-off-by: Inki Dae <inki.dae@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Cc: Lars-Peter Clausen <lars@metafoo.de>
Acked-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Cc: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/fbmem.c | 21 +++++++++++++++------
 include/linux/fb.h    |  4 ++++
 2 files changed, 19 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
index c6ce416ab587..0dff12a1daef 100644
--- a/drivers/video/fbmem.c
+++ b/drivers/video/fbmem.c
@@ -1046,20 +1046,29 @@ fb_set_var(struct fb_info *info, struct fb_var_screeninfo *var)
 int
 fb_blank(struct fb_info *info, int blank)
 {	
- 	int ret = -EINVAL;
+	struct fb_event event;
+	int ret = -EINVAL, early_ret;
 
  	if (blank > FB_BLANK_POWERDOWN)
  		blank = FB_BLANK_POWERDOWN;
 
+	event.info = info;
+	event.data = &blank;
+
+	early_ret = fb_notifier_call_chain(FB_EARLY_EVENT_BLANK, &event);
+
 	if (info->fbops->fb_blank)
  		ret = info->fbops->fb_blank(blank, info);
 
- 	if (!ret) {
-		struct fb_event event;
-
-		event.info = info;
-		event.data = &blank;
+	if (!ret)
 		fb_notifier_call_chain(FB_EVENT_BLANK, &event);
+	else {
+		/*
+		 * if fb_blank is failed then revert effects of
+		 * the early blank event.
+		 */
+		if (!early_ret)
+			fb_notifier_call_chain(FB_R_EARLY_EVENT_BLANK, &event);
 	}
 
  	return ret;
diff --git a/include/linux/fb.h b/include/linux/fb.h
index d31cb682e173..a3229d7ab9f2 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -554,6 +554,10 @@ struct fb_cursor_user {
 #define FB_EVENT_FB_UNBIND              0x0E
 /*      CONSOLE-SPECIFIC: remap all consoles to new fb - for vga switcheroo */
 #define FB_EVENT_REMAP_ALL_CONSOLE      0x0F
+/*      A hardware display blank early change occured */
+#define FB_EARLY_EVENT_BLANK		0x10
+/*      A hardware display blank revert early change occured */
+#define FB_R_EARLY_EVENT_BLANK		0x11
 
 struct fb_event {
 	struct fb_info *info;
-- 
cgit v1.3-7-g2ca7


From d54ad83f3d56228a42e1021b97fc52bfbad7d560 Mon Sep 17 00:00:00 2001
From: Inki Dae <inki.dae@samsung.com>
Date: Tue, 29 May 2012 15:07:13 -0700
Subject: lcd: add callbacks for early fb event blank support

This patchset adds early fb blank feature that a callback of lcd panel
driver is called prior to specific fb driver's one.  In the case of
MIPI-DSI based video mode LCD Panel, for lcd power off, the power off
commands should be transferred to lcd panel with display and mipi-dsi
controller enabled because the commands is set to lcd panel at vsync porch
period.  and in opposite case, the callback of fb driver should be called
prior to lcd panel driver's one because of same issue.  Also if fb_blank
mode is changed to FB_BLANK_POWERDOWN then display controller would be
off(clock disable) but lcd panel would be still on.  at this time, you
could see some issue like sparkling on lcd panel because video clock to be
delivered to ldi module of lcd panel was disabled.  this issue could
occurs for all lcd panels.

The callback order is as the following:

at fb_blank function of fbmem.c
-> fb_notifier_call_chain(FB_EARLY_EVENT_BLANK)
       -> lcd panel driver's early_set_power()
-> info->fbops->fb_blank()
       -> spcefic fb driver's fb_blank()
-> fb_notifier_call_chain(FB_EVENT_BLANK)
       -> lcd panel driver's set_power()
   -> fb_notifier_call_chain(FB_R_EARLY_EVENT_BLANK) if
info->fops->fb_blank() was failed.

fb_notifier_call_chain(FB_R_EARLY_EVENT_BLANK) would be called to revert
the effects of previous FB_EARLY_EVENT_BLANK call.  and note that if
early_set_power() of lcd_ops is NULL then early fb blank callback would be
ignored.

This patch:

Add early_set_power and r_early_set_power callbacks.  early_set_power
callback is called prior to fb_blank() of fbmem.c and r_early_set_power
callback is called if fb_blank() was failed to revert the effects of the
early_set_power call of lcd panel driver.

Signed-off-by: Inki Dae <inki.dae@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Cc: Lars-Peter Clausen <lars@metafoo.de>
Cc: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Cc: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/backlight/lcd.c | 10 ++++++++++
 include/linux/lcd.h           | 10 ++++++++++
 2 files changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/video/backlight/lcd.c b/drivers/video/backlight/lcd.c
index 79c1b0d609a8..1c298d5bf3af 100644
--- a/drivers/video/backlight/lcd.c
+++ b/drivers/video/backlight/lcd.c
@@ -32,6 +32,8 @@ static int fb_notifier_callback(struct notifier_block *self,
 	case FB_EVENT_BLANK:
 	case FB_EVENT_MODE_CHANGE:
 	case FB_EVENT_MODE_CHANGE_ALL:
+	case FB_EARLY_EVENT_BLANK:
+	case FB_R_EARLY_EVENT_BLANK:
 		break;
 	default:
 		return 0;
@@ -46,6 +48,14 @@ static int fb_notifier_callback(struct notifier_block *self,
 		if (event == FB_EVENT_BLANK) {
 			if (ld->ops->set_power)
 				ld->ops->set_power(ld, *(int *)evdata->data);
+		} else if (event == FB_EARLY_EVENT_BLANK) {
+			if (ld->ops->early_set_power)
+				ld->ops->early_set_power(ld,
+						*(int *)evdata->data);
+		} else if (event == FB_R_EARLY_EVENT_BLANK) {
+			if (ld->ops->r_early_set_power)
+				ld->ops->r_early_set_power(ld,
+						*(int *)evdata->data);
 		} else {
 			if (ld->ops->set_mode)
 				ld->ops->set_mode(ld, evdata->data);
diff --git a/include/linux/lcd.h b/include/linux/lcd.h
index 8877123f2d6e..e00c3b0ebc6b 100644
--- a/include/linux/lcd.h
+++ b/include/linux/lcd.h
@@ -40,6 +40,16 @@ struct lcd_ops {
 	/* Get the LCD panel power status (0: full on, 1..3: controller
 	   power on, flat panel power off, 4: full off), see FB_BLANK_XXX */
 	int (*get_power)(struct lcd_device *);
+	/*
+	 * Enable or disable power to the LCD(0: on; 4: off, see FB_BLANK_XXX)
+	 * and this callback would be called proir to fb driver's callback.
+	 *
+	 * P.S. note that if early_set_power is not NULL then early fb notifier
+	 *	would be registered.
+	 */
+	int (*early_set_power)(struct lcd_device *, int power);
+	/* revert the effects of the early blank event. */
+	int (*r_early_set_power)(struct lcd_device *, int power);
 	/* Enable or disable power to the LCD (0: on; 4: off, see FB_BLANK_XXX) */
 	int (*set_power)(struct lcd_device *, int power);
 	/* Get the current contrast setting (0-max_contrast) */
-- 
cgit v1.3-7-g2ca7


From 1615d210dbc9c67c38b66bcff53233452dbaae22 Mon Sep 17 00:00:00 2001
From: H Hartley Sweeten <hartleys@visionengravers.com>
Date: Tue, 29 May 2012 15:07:14 -0700
Subject: drivers/video/backlight/apple_bl.c: include header for exported
 symbol prototypes

Include the header to pickup the exported symbol prototype.

Quiets the sparse warning:

  warning: symbol 'apple_bl_register' was not declared. Should it be static?
  warning: symbol 'apple_bl_unregister' was not declared. Should it be static?

[akpm@linux-foundation.org: fix resulting build error]
Signed-off-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Cc: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/backlight/apple_bl.c | 1 +
 include/linux/apple_bl.h           | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/video/backlight/apple_bl.c b/drivers/video/backlight/apple_bl.c
index a523b255e124..ef5ca0d6cd20 100644
--- a/drivers/video/backlight/apple_bl.c
+++ b/drivers/video/backlight/apple_bl.c
@@ -25,6 +25,7 @@
 #include <linux/pci.h>
 #include <linux/acpi.h>
 #include <linux/atomic.h>
+#include <linux/apple_bl.h>
 
 static struct backlight_device *apple_backlight_device;
 
diff --git a/include/linux/apple_bl.h b/include/linux/apple_bl.h
index 47bedc0eee69..0a95e730fcea 100644
--- a/include/linux/apple_bl.h
+++ b/include/linux/apple_bl.h
@@ -5,7 +5,7 @@
 #ifndef _LINUX_APPLE_BL_H
 #define _LINUX_APPLE_BL_H
 
-#ifdef CONFIG_BACKLIGHT_APPLE
+#if defined(CONFIG_BACKLIGHT_APPLE) || defined(CONFIG_BACKLIGHT_APPLE_MODULE)
 
 extern int apple_bl_register(void);
 extern void apple_bl_unregister(void);
-- 
cgit v1.3-7-g2ca7


From b00961824a33aadec4a825eaeccfbe3db8ec7032 Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkhan@gmail.com>
Date: Tue, 29 May 2012 15:07:27 -0700
Subject: leds: add new field to led_classdev struct to save activation state

Add a new field to led_classdev to save activattion state after activate
routine is successful.  This saved state is used in deactivate routine to
do cleanup such as removing device files, and free memory allocated during
activation.  Currently trigger_data not being null is used for this
purpose.

Existing triggers will need changes to use this new field.

Signed-off-by: Shuah Khan <shuahkhan@gmail.com>
Cc: Richard Purdie <rpurdie@rpsys.net>
Cc: Bryan Wu <bryan.wu@canonical.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/leds.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/leds.h b/include/linux/leds.h
index 5884def15a24..39eee41d8c6f 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -73,6 +73,8 @@ struct led_classdev {
 	struct led_trigger	*trigger;
 	struct list_head	 trig_list;
 	void			*trigger_data;
+	/* true if activated - deactivate routine uses it to do cleanup */
+	bool			activated;
 #endif
 };
 
-- 
cgit v1.3-7-g2ca7


From 8035a50224302f9eb129d210daf263405d5a91fd Mon Sep 17 00:00:00 2001
From: "Kim, Milo" <Milo.Kim@ti.com>
Date: Tue, 29 May 2012 15:07:28 -0700
Subject: include/linux/led-lm3530.h: comment correction about the range of
 brightness

max brightness is 127, so the range of brt_val should be from 0 to 127

Signed-off-by: Milo(Woogyom) Kim <milo.kim@ti.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Cc: Shreshtha Kumar SAHU <shreshthakumar.sahu@stericsson.com>
Cc: Richard Purdie <rpurdie@rpsys.net>
Cc: Bryan Wu <bryan.wu@canonical.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/led-lm3530.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/led-lm3530.h b/include/linux/led-lm3530.h
index eeae6e742471..4b133479d6ea 100644
--- a/include/linux/led-lm3530.h
+++ b/include/linux/led-lm3530.h
@@ -92,7 +92,7 @@ struct lm3530_pwm_data {
  * @als2_resistor_sel: internal resistance from ALS2 input to ground
  * @als_vmin: als input voltage calibrated for max brightness in mV
  * @als_vmax: als input voltage calibrated for min brightness in mV
- * @brt_val: brightness value (0-255)
+ * @brt_val: brightness value (0-127)
  * @pwm_data: PWM control functions (only valid when the mode is PWM)
  */
 struct lm3530_platform_data {
-- 
cgit v1.3-7-g2ca7


From 4796dd200db943e36f876e7029552212e5bbdf33 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Tue, 29 May 2012 15:07:33 -0700
Subject: vsprintf: fix %ps on non symbols when using kallsyms

Using %ps in a printk format will sometimes fail silently and print the
empty string if the address passed in does not match a symbol that
kallsyms knows about.  But using %pS will fall back to printing the full
address if kallsyms can't find the symbol.  Make %ps act the same as %pS
by falling back to printing the address.

While we're here also make %ps print the module that a symbol comes from
so that it matches what %pS already does.  Take this simple function for
example (in a module):

	static void test_printk(void)
	{
		int test;
		pr_info("with pS: %pS\n", &test);
		pr_info("with ps: %ps\n", &test);
	}

Before this patch:

 with pS: 0xdff7df44
 with ps:

After this patch:

 with pS: 0xdff7df44
 with ps: 0xdff7df44

Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kallsyms.h |  7 +++++++
 kernel/kallsyms.c        | 32 ++++++++++++++++++++++++--------
 lib/vsprintf.c           |  2 +-
 3 files changed, 32 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h
index 387571959dd9..6883e197acb9 100644
--- a/include/linux/kallsyms.h
+++ b/include/linux/kallsyms.h
@@ -36,6 +36,7 @@ const char *kallsyms_lookup(unsigned long addr,
 
 /* Look up a kernel symbol and return it in a text buffer. */
 extern int sprint_symbol(char *buffer, unsigned long address);
+extern int sprint_symbol_no_offset(char *buffer, unsigned long address);
 extern int sprint_backtrace(char *buffer, unsigned long address);
 
 /* Look up a kernel symbol and print it to the kernel messages. */
@@ -80,6 +81,12 @@ static inline int sprint_symbol(char *buffer, unsigned long addr)
 	return 0;
 }
 
+static inline int sprint_symbol_no_offset(char *buffer, unsigned long addr)
+{
+	*buffer = '\0';
+	return 0;
+}
+
 static inline int sprint_backtrace(char *buffer, unsigned long addr)
 {
 	*buffer = '\0';
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 079f1d39a8b8..2169feeba529 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -343,7 +343,7 @@ int lookup_symbol_attrs(unsigned long addr, unsigned long *size,
 
 /* Look up a kernel symbol and return it in a text buffer. */
 static int __sprint_symbol(char *buffer, unsigned long address,
-			   int symbol_offset)
+			   int symbol_offset, int add_offset)
 {
 	char *modname;
 	const char *name;
@@ -358,13 +358,13 @@ static int __sprint_symbol(char *buffer, unsigned long address,
 	if (name != buffer)
 		strcpy(buffer, name);
 	len = strlen(buffer);
-	buffer += len;
 	offset -= symbol_offset;
 
+	if (add_offset)
+		len += sprintf(buffer + len, "+%#lx/%#lx", offset, size);
+
 	if (modname)
-		len += sprintf(buffer, "+%#lx/%#lx [%s]", offset, size, modname);
-	else
-		len += sprintf(buffer, "+%#lx/%#lx", offset, size);
+		len += sprintf(buffer + len, " [%s]", modname);
 
 	return len;
 }
@@ -382,11 +382,27 @@ static int __sprint_symbol(char *buffer, unsigned long address,
  */
 int sprint_symbol(char *buffer, unsigned long address)
 {
-	return __sprint_symbol(buffer, address, 0);
+	return __sprint_symbol(buffer, address, 0, 1);
 }
-
 EXPORT_SYMBOL_GPL(sprint_symbol);
 
+/**
+ * sprint_symbol_no_offset - Look up a kernel symbol and return it in a text buffer
+ * @buffer: buffer to be stored
+ * @address: address to lookup
+ *
+ * This function looks up a kernel symbol with @address and stores its name
+ * and module name to @buffer if possible. If no symbol was found, just saves
+ * its @address as is.
+ *
+ * This function returns the number of bytes stored in @buffer.
+ */
+int sprint_symbol_no_offset(char *buffer, unsigned long address)
+{
+	return __sprint_symbol(buffer, address, 0, 0);
+}
+EXPORT_SYMBOL_GPL(sprint_symbol_no_offset);
+
 /**
  * sprint_backtrace - Look up a backtrace symbol and return it in a text buffer
  * @buffer: buffer to be stored
@@ -403,7 +419,7 @@ EXPORT_SYMBOL_GPL(sprint_symbol);
  */
 int sprint_backtrace(char *buffer, unsigned long address)
 {
-	return __sprint_symbol(buffer, address, -1);
+	return __sprint_symbol(buffer, address, -1, 1);
 }
 
 /* Look up a kernel symbol and print it to the kernel messages. */
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index abbabec9720a..f5dfe0ca34f6 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -436,7 +436,7 @@ char *symbol_string(char *buf, char *end, void *ptr,
 	else if (ext != 'f' && ext != 's')
 		sprint_symbol(sym, value);
 	else
-		kallsyms_lookup(value, NULL, NULL, NULL, sym);
+		sprint_symbol_no_offset(sym, value);
 
 	return string(buf, end, sym, spec);
 #else
-- 
cgit v1.3-7-g2ca7


From e311c9295912209dcf8e54de5401f8518112b7f8 Mon Sep 17 00:00:00 2001
From: Alexander Stein <alexander.stein@systec-electronic.com>
Date: Tue, 29 May 2012 15:07:36 -0700
Subject: rtc: add ioctl to get/clear battery low voltage status

Currently there is no generic way to get the RTC battery status within an
application.  So add an ioctl to read the status bit.  The idea is that
the bit is set once a low voltage is detected.  It stays there until it is
reset using the RTC_VL_CLR ioctl.

Signed-off-by: Alexander Stein <alexander.stein@systec-electronic.com>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rtc.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index fcabfb4873c8..f071b3922c67 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -91,6 +91,9 @@ struct rtc_pll_info {
 #define RTC_PLL_GET	_IOR('p', 0x11, struct rtc_pll_info)  /* Get PLL correction */
 #define RTC_PLL_SET	_IOW('p', 0x12, struct rtc_pll_info)  /* Set PLL correction */
 
+#define RTC_VL_READ	_IOR('p', 0x13, int)	/* Voltage low detector */
+#define RTC_VL_CLR	_IO('p', 0x14)		/* Clear voltage low information */
+
 /* interrupt flags */
 #define RTC_IRQF 0x80	/* Any of the following is active */
 #define RTC_PF 0x40	/* Periodic interrupt */
-- 
cgit v1.3-7-g2ca7


From eb86c3064b3c53837fdfea17df1483d825919894 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Tue, 29 May 2012 15:07:38 -0700
Subject: rtc: ds1307: add trickle charger support

Some DS13XX devices have "trickle chargers".  Its configuration register
is at different locations, the setup is the same, though.  Since the
configuration is board specific, introduce a platform_data to this driver.
Tested with a DS1339 on a custom board.

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Cc: Alessandro Zummo <alessandro.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rtc/rtc-ds1307.c   | 19 ++++++++++++++++---
 include/linux/rtc/ds1307.h | 22 ++++++++++++++++++++++
 2 files changed, 38 insertions(+), 3 deletions(-)
 create mode 100644 include/linux/rtc/ds1307.h

(limited to 'include/linux')

diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index 5dc1c7941199..836710ce750e 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c
@@ -17,8 +17,7 @@
 #include <linux/string.h>
 #include <linux/rtc.h>
 #include <linux/bcd.h>
-
-
+#include <linux/rtc/ds1307.h>
 
 /*
  * We can't determine type by probing, but if we expect pre-Linux code
@@ -92,7 +91,8 @@ enum ds_type {
 #	define DS1337_BIT_A2I		0x02
 #	define DS1337_BIT_A1I		0x01
 #define DS1339_REG_ALARM1_SECS	0x07
-#define DS1339_REG_TRICKLE	0x10
+
+#define DS13XX_TRICKLE_CHARGER_MAGIC	0xa0
 
 #define RX8025_REG_CTRL1	0x0e
 #	define RX8025_BIT_2412		0x20
@@ -124,6 +124,7 @@ struct chip_desc {
 	unsigned		alarm:1;
 	u16			nvram_offset;
 	u16			nvram_size;
+	u16			trickle_charger_reg;
 };
 
 static const struct chip_desc chips[last_ds_type] = {
@@ -140,6 +141,13 @@ static const struct chip_desc chips[last_ds_type] = {
 	},
 	[ds_1339] = {
 		.alarm		= 1,
+		.trickle_charger_reg = 0x10,
+	},
+	[ds_1340] = {
+		.trickle_charger_reg = 0x08,
+	},
+	[ds_1388] = {
+		.trickle_charger_reg = 0x0a,
 	},
 	[ds_3231] = {
 		.alarm		= 1,
@@ -619,6 +627,7 @@ static int __devinit ds1307_probe(struct i2c_client *client,
 	struct i2c_adapter	*adapter = to_i2c_adapter(client->dev.parent);
 	int			want_irq = false;
 	unsigned char		*buf;
+	struct ds1307_platform_data *pdata = client->dev.platform_data;
 	static const int	bbsqi_bitpos[] = {
 		[ds_1337] = 0,
 		[ds_1339] = DS1339_BIT_BBSQI,
@@ -638,6 +647,10 @@ static int __devinit ds1307_probe(struct i2c_client *client,
 	ds1307->client	= client;
 	ds1307->type	= id->driver_data;
 
+	if (pdata && pdata->trickle_charger_setup && chip->trickle_charger_reg)
+		i2c_smbus_write_byte_data(client, chip->trickle_charger_reg,
+			DS13XX_TRICKLE_CHARGER_MAGIC | pdata->trickle_charger_setup);
+
 	buf = ds1307->regs;
 	if (i2c_check_functionality(adapter, I2C_FUNC_SMBUS_I2C_BLOCK)) {
 		ds1307->read_block_data = i2c_smbus_read_i2c_block_data;
diff --git a/include/linux/rtc/ds1307.h b/include/linux/rtc/ds1307.h
new file mode 100644
index 000000000000..291b1c490367
--- /dev/null
+++ b/include/linux/rtc/ds1307.h
@@ -0,0 +1,22 @@
+/*
+ * ds1307.h - platform_data for the ds1307 (and variants) rtc driver
+ * (C) Copyright 2012 by Wolfram Sang, Pengutronix e.K.
+ * same license as the driver
+ */
+
+#ifndef _LINUX_DS1307_H
+#define _LINUX_DS1307_H
+
+#include <linux/types.h>
+
+#define DS1307_TRICKLE_CHARGER_250_OHM	0x01
+#define DS1307_TRICKLE_CHARGER_2K_OHM	0x02
+#define DS1307_TRICKLE_CHARGER_4K_OHM	0x03
+#define DS1307_TRICKLE_CHARGER_NO_DIODE	0x04
+#define DS1307_TRICKLE_CHARGER_DIODE	0x08
+
+struct ds1307_platform_data {
+	u8 trickle_charger_setup;
+};
+
+#endif /* _LINUX_DS1307_H */
-- 
cgit v1.3-7-g2ca7