aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/pci/setup-bus.c5
-rw-r--r--include/linux/sched.h6
-rw-r--r--kernel/relay.c7
-rw-r--r--kernel/sched.c43
-rw-r--r--kernel/time/clocksource.c2
-rw-r--r--kernel/timer.c10
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c21
7 files changed, 76 insertions, 18 deletions
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index 125e7b7f34ff..f7cb8e0758b4 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -486,12 +486,7 @@ void __ref pci_bus_size_bridges(struct pci_bus *bus)
break;
case PCI_CLASS_BRIDGE_PCI:
- /* don't size subtractive decoding (transparent)
- * PCI-to-PCI bridges */
- if (bus->self->transparent)
- break;
pci_bridge_check_ranges(bus);
- /* fall through */
default:
pbus_size_io(bus);
/* If the bridge supports prefetchable range, size it
diff --git a/include/linux/sched.h b/include/linux/sched.h
index fed07d03364e..6a1e7afb099b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1541,6 +1541,12 @@ static inline void idle_task_exit(void) {}
extern void sched_idle_next(void);
+#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
+extern void wake_up_idle_cpu(int cpu);
+#else
+static inline void wake_up_idle_cpu(int cpu) { }
+#endif
+
#ifdef CONFIG_SCHED_DEBUG
extern unsigned int sysctl_sched_latency;
extern unsigned int sysctl_sched_min_granularity;
diff --git a/kernel/relay.c b/kernel/relay.c
index 4c035a8a248c..d6204a485818 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -736,7 +736,7 @@ static int relay_file_open(struct inode *inode, struct file *filp)
kref_get(&buf->kref);
filp->private_data = buf;
- return 0;
+ return nonseekable_open(inode, filp);
}
/**
@@ -1056,6 +1056,10 @@ static struct pipe_buf_operations relay_pipe_buf_ops = {
.get = generic_pipe_buf_get,
};
+static void relay_page_release(struct splice_pipe_desc *spd, unsigned int i)
+{
+}
+
/*
* subbuf_splice_actor - splice up to one subbuf's worth of data
*/
@@ -1083,6 +1087,7 @@ static int subbuf_splice_actor(struct file *in,
.partial = partial,
.flags = flags,
.ops = &relay_pipe_buf_ops,
+ .spd_release = relay_page_release,
};
if (rbuf->subbufs_produced == rbuf->subbufs_consumed)
diff --git a/kernel/sched.c b/kernel/sched.c
index 28c73f07efb2..8dcdec6fe0fe 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1052,6 +1052,49 @@ static void resched_cpu(int cpu)
resched_task(cpu_curr(cpu));
spin_unlock_irqrestore(&rq->lock, flags);
}
+
+#ifdef CONFIG_NO_HZ
+/*
+ * When add_timer_on() enqueues a timer into the timer wheel of an
+ * idle CPU then this timer might expire before the next timer event
+ * which is scheduled to wake up that CPU. In case of a completely
+ * idle system the next event might even be infinite time into the
+ * future. wake_up_idle_cpu() ensures that the CPU is woken up and
+ * leaves the inner idle loop so the newly added timer is taken into
+ * account when the CPU goes back to idle and evaluates the timer
+ * wheel for the next timer event.
+ */
+void wake_up_idle_cpu(int cpu)
+{
+ struct rq *rq = cpu_rq(cpu);
+
+ if (cpu == smp_processor_id())
+ return;
+
+ /*
+ * This is safe, as this function is called with the timer
+ * wheel base lock of (cpu) held. When the CPU is on the way
+ * to idle and has not yet set rq->curr to idle then it will
+ * be serialized on the timer wheel base lock and take the new
+ * timer into account automatically.
+ */
+ if (rq->curr != rq->idle)
+ return;
+
+ /*
+ * We can set TIF_RESCHED on the idle task of the other CPU
+ * lockless. The worst case is that the other CPU runs the
+ * idle task through an additional NOOP schedule()
+ */
+ set_tsk_thread_flag(rq->idle, TIF_NEED_RESCHED);
+
+ /* NEED_RESCHED must be visible before we test polling */
+ smp_mb();
+ if (!tsk_is_polling(rq->idle))
+ smp_send_reschedule(cpu);
+}
+#endif
+
#else
static void __resched_task(struct task_struct *p, int tif_bit)
{
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 278534bbca95..7f60097d443a 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -174,7 +174,7 @@ static void clocksource_check_watchdog(struct clocksource *cs)
if (watchdog)
del_timer(&watchdog_timer);
watchdog = cs;
- init_timer_deferrable(&watchdog_timer);
+ init_timer(&watchdog_timer);
watchdog_timer.function = clocksource_watchdog;
/* Reset watchdog cycles */
diff --git a/kernel/timer.c b/kernel/timer.c
index 99b00a25f88b..b024106daa70 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -451,10 +451,18 @@ void add_timer_on(struct timer_list *timer, int cpu)
spin_lock_irqsave(&base->lock, flags);
timer_set_base(timer, base);
internal_add_timer(base, timer);
+ /*
+ * Check whether the other CPU is idle and needs to be
+ * triggered to reevaluate the timer wheel when nohz is
+ * active. We are protected against the other CPU fiddling
+ * with the timer by holding the timer base lock. This also
+ * makes sure that a CPU on the way to idle can not evaluate
+ * the timer wheel.
+ */
+ wake_up_idle_cpu(cpu);
spin_unlock_irqrestore(&base->lock, flags);
}
-
/**
* mod_timer - modify a timer's timeout
* @timer: the timer to be modified
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 971271602dd0..c22d6b6f2db4 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -322,15 +322,6 @@ next_sge:
ctxt->direction = DMA_FROM_DEVICE;
clear_bit(RDMACTXT_F_READ_DONE, &ctxt->flags);
clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
- if ((ch+1)->rc_discrim == 0) {
- /*
- * Checked in sq_cq_reap to see if we need to
- * be enqueued
- */
- set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
- ctxt->next = hdr_ctxt;
- hdr_ctxt->next = head;
- }
/* Prepare READ WR */
memset(&read_wr, 0, sizeof read_wr);
@@ -348,7 +339,17 @@ next_sge:
rdma_set_ctxt_sge(ctxt, &sge[ch_sge_ary[ch_no].start],
&sgl_offset,
read_wr.num_sge);
-
+ if (((ch+1)->rc_discrim == 0) &&
+ (read_wr.num_sge == ch_sge_ary[ch_no].count)) {
+ /*
+ * Mark the last RDMA_READ with a bit to
+ * indicate all RPC data has been fetched from
+ * the client and the RPC needs to be enqueued.
+ */
+ set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
+ ctxt->next = hdr_ctxt;
+ hdr_ctxt->next = head;
+ }
/* Post the read */
err = svc_rdma_send(xprt, &read_wr);
if (err) {