aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/time
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/time')
-rw-r--r--kernel/time/namespace.c104
1 files changed, 103 insertions, 1 deletions
diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c
index 1a0fbaa5d2d4..d705c15d0273 100644
--- a/kernel/time/namespace.c
+++ b/kernel/time/namespace.c
@@ -16,6 +16,8 @@
#include <linux/err.h>
#include <linux/mm.h>
+#include <vdso/datapage.h>
+
ktime_t do_timens_ktime_to_host(clockid_t clockid, ktime_t tim,
struct timens_offsets *ns_offsets)
{
@@ -90,16 +92,23 @@ static struct time_namespace *clone_time_ns(struct user_namespace *user_ns,
kref_init(&ns->kref);
+ ns->vvar_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ if (!ns->vvar_page)
+ goto fail_free;
+
err = ns_alloc_inum(&ns->ns);
if (err)
- goto fail_free;
+ goto fail_free_page;
ns->ucounts = ucounts;
ns->ns.ops = &timens_operations;
ns->user_ns = get_user_ns(user_ns);
ns->offsets = old_ns->offsets;
+ ns->frozen_offsets = false;
return ns;
+fail_free_page:
+ __free_page(ns->vvar_page);
fail_free:
kfree(ns);
fail_dec:
@@ -128,6 +137,93 @@ struct time_namespace *copy_time_ns(unsigned long flags,
return clone_time_ns(user_ns, old_ns);
}
+static struct timens_offset offset_from_ts(struct timespec64 off)
+{
+ struct timens_offset ret;
+
+ ret.sec = off.tv_sec;
+ ret.nsec = off.tv_nsec;
+
+ return ret;
+}
+
+/*
+ * A time namespace VVAR page has the same layout as the VVAR page which
+ * contains the system wide VDSO data.
+ *
+ * For a normal task the VVAR pages are installed in the normal ordering:
+ * VVAR
+ * PVCLOCK
+ * HVCLOCK
+ * TIMENS <- Not really required
+ *
+ * Now for a timens task the pages are installed in the following order:
+ * TIMENS
+ * PVCLOCK
+ * HVCLOCK
+ * VVAR
+ *
+ * The check for vdso_data->clock_mode is in the unlikely path of
+ * the seq begin magic. So for the non-timens case most of the time
+ * 'seq' is even, so the branch is not taken.
+ *
+ * If 'seq' is odd, i.e. a concurrent update is in progress, the extra check
+ * for vdso_data->clock_mode is a non-issue. The task is spin waiting for the
+ * update to finish and for 'seq' to become even anyway.
+ *
+ * Timens page has vdso_data->clock_mode set to VCLOCK_TIMENS which enforces
+ * the time namespace handling path.
+ */
+static void timens_setup_vdso_data(struct vdso_data *vdata,
+ struct time_namespace *ns)
+{
+ struct timens_offset *offset = vdata->offset;
+ struct timens_offset monotonic = offset_from_ts(ns->offsets.monotonic);
+ struct timens_offset boottime = offset_from_ts(ns->offsets.boottime);
+
+ vdata->seq = 1;
+ vdata->clock_mode = VCLOCK_TIMENS;
+ offset[CLOCK_MONOTONIC] = monotonic;
+ offset[CLOCK_MONOTONIC_RAW] = monotonic;
+ offset[CLOCK_MONOTONIC_COARSE] = monotonic;
+ offset[CLOCK_BOOTTIME] = boottime;
+ offset[CLOCK_BOOTTIME_ALARM] = boottime;
+}
+
+/*
+ * Protects possibly multiple offsets writers racing each other
+ * and tasks entering the namespace.
+ */
+static DEFINE_MUTEX(offset_lock);
+
+static void timens_set_vvar_page(struct task_struct *task,
+ struct time_namespace *ns)
+{
+ struct vdso_data *vdata;
+ unsigned int i;
+
+ if (ns == &init_time_ns)
+ return;
+
+ /* Fast-path, taken by every task in namespace except the first. */
+ if (likely(ns->frozen_offsets))
+ return;
+
+ mutex_lock(&offset_lock);
+ /* Nothing to-do: vvar_page has been already initialized. */
+ if (ns->frozen_offsets)
+ goto out;
+
+ ns->frozen_offsets = true;
+ vdata = arch_get_vdso_data(page_address(ns->vvar_page));
+
+ for (i = 0; i < CS_BASES; i++)
+ timens_setup_vdso_data(&vdata[i], ns);
+
+out:
+ mutex_unlock(&offset_lock);
+}
+
void free_time_ns(struct kref *kref)
{
struct time_namespace *ns;
@@ -136,6 +232,7 @@ void free_time_ns(struct kref *kref)
dec_time_namespaces(ns->ucounts);
put_user_ns(ns->user_ns);
ns_free_inum(&ns->ns);
+ __free_page(ns->vvar_page);
kfree(ns);
}
@@ -192,6 +289,8 @@ static int timens_install(struct nsproxy *nsproxy, struct ns_common *new)
!ns_capable(current_user_ns(), CAP_SYS_ADMIN))
return -EPERM;
+ timens_set_vvar_page(current, ns);
+
get_time_ns(ns);
put_time_ns(nsproxy->time_ns);
nsproxy->time_ns = ns;
@@ -211,6 +310,8 @@ int timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk)
if (nsproxy->time_ns == nsproxy->time_ns_for_children)
return 0;
+ timens_set_vvar_page(tsk, ns);
+
get_time_ns(ns);
put_time_ns(nsproxy->time_ns);
nsproxy->time_ns = ns;
@@ -246,6 +347,7 @@ struct time_namespace init_time_ns = {
.user_ns = &init_user_ns,
.ns.inum = PROC_TIME_INIT_INO,
.ns.ops = &timens_operations,
+ .frozen_offsets = true,
};
static int __init time_ns_init(void)