aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/include/asm/fpu/types.h72
-rw-r--r--arch/x86/include/asm/processor.h10
-rw-r--r--arch/x86/kernel/fpu/init.c39
-rw-r--r--arch/x86/kernel/process.c2
-rw-r--r--fs/proc/kcore.c4
-rw-r--r--include/linux/sched.h12
-rw-r--r--kernel/fork.c8
7 files changed, 104 insertions, 43 deletions
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 0637826292de..c49c5173158e 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -189,6 +189,7 @@ union fpregs_state {
struct fxregs_state fxsave;
struct swregs_state soft;
struct xregs_state xsave;
+ u8 __padding[PAGE_SIZE];
};
/*
@@ -198,40 +199,6 @@ union fpregs_state {
*/
struct fpu {
/*
- * @state:
- *
- * In-memory copy of all FPU registers that we save/restore
- * over context switches. If the task is using the FPU then
- * the registers in the FPU are more recent than this state
- * copy. If the task context-switches away then they get
- * saved here and represent the FPU state.
- *
- * After context switches there may be a (short) time period
- * during which the in-FPU hardware registers are unchanged
- * and still perfectly match this state, if the tasks
- * scheduled afterwards are not using the FPU.
- *
- * This is the 'lazy restore' window of optimization, which
- * we track though 'fpu_fpregs_owner_ctx' and 'fpu->last_cpu'.
- *
- * We detect whether a subsequent task uses the FPU via setting
- * CR0::TS to 1, which causes any FPU use to raise a #NM fault.
- *
- * During this window, if the task gets scheduled again, we
- * might be able to skip having to do a restore from this
- * memory buffer to the hardware registers - at the cost of
- * incurring the overhead of #NM fault traps.
- *
- * Note that on modern CPUs that support the XSAVEOPT (or other
- * optimized XSAVE instructions), we don't use #NM traps anymore,
- * as the hardware can track whether FPU registers need saving
- * or not. On such CPUs we activate the non-lazy ('eagerfpu')
- * logic, which unconditionally saves/restores all FPU state
- * across context switches. (if FPU state exists.)
- */
- union fpregs_state state;
-
- /*
* @last_cpu:
*
* Records the last CPU on which this context was loaded into
@@ -288,6 +255,43 @@ struct fpu {
* deal with bursty apps that only use the FPU for a short time:
*/
unsigned char counter;
+ /*
+ * @state:
+ *
+ * In-memory copy of all FPU registers that we save/restore
+ * over context switches. If the task is using the FPU then
+ * the registers in the FPU are more recent than this state
+ * copy. If the task context-switches away then they get
+ * saved here and represent the FPU state.
+ *
+ * After context switches there may be a (short) time period
+ * during which the in-FPU hardware registers are unchanged
+ * and still perfectly match this state, if the tasks
+ * scheduled afterwards are not using the FPU.
+ *
+ * This is the 'lazy restore' window of optimization, which
+ * we track though 'fpu_fpregs_owner_ctx' and 'fpu->last_cpu'.
+ *
+ * We detect whether a subsequent task uses the FPU via setting
+ * CR0::TS to 1, which causes any FPU use to raise a #NM fault.
+ *
+ * During this window, if the task gets scheduled again, we
+ * might be able to skip having to do a restore from this
+ * memory buffer to the hardware registers - at the cost of
+ * incurring the overhead of #NM fault traps.
+ *
+ * Note that on modern CPUs that support the XSAVEOPT (or other
+ * optimized XSAVE instructions), we don't use #NM traps anymore,
+ * as the hardware can track whether FPU registers need saving
+ * or not. On such CPUs we activate the non-lazy ('eagerfpu')
+ * logic, which unconditionally saves/restores all FPU state
+ * across context switches. (if FPU state exists.)
+ */
+ union fpregs_state state;
+ /*
+ * WARNING: 'state' is dynamically-sized. Do not put
+ * anything after it here.
+ */
};
#endif /* _ASM_X86_FPU_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 43e6519df0d5..944f1785ed0d 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -390,9 +390,6 @@ struct thread_struct {
#endif
unsigned long gs;
- /* Floating point and extended processor state */
- struct fpu fpu;
-
/* Save middle states of ptrace breakpoints */
struct perf_event *ptrace_bps[HBP_NUM];
/* Debug status used for traps, single steps, etc... */
@@ -418,6 +415,13 @@ struct thread_struct {
unsigned long iopl;
/* Max allowed port in the bitmap, in bytes: */
unsigned io_bitmap_max;
+
+ /* Floating point and extended processor state */
+ struct fpu fpu;
+ /*
+ * WARNING: 'fpu' is dynamically-sized. It *MUST* be at
+ * the end.
+ */
};
/*
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 32826791e675..deacbfa6b33e 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -136,6 +136,45 @@ static void __init fpu__init_system_generic(void)
unsigned int xstate_size;
EXPORT_SYMBOL_GPL(xstate_size);
+#define CHECK_MEMBER_AT_END_OF(TYPE, MEMBER) \
+ BUILD_BUG_ON((sizeof(TYPE) - \
+ offsetof(TYPE, MEMBER) - \
+ sizeof(((TYPE *)0)->MEMBER)) > \
+ 0) \
+
+/*
+ * We append the 'struct fpu' to the task_struct.
+ */
+int __weak arch_task_struct_size(void)
+{
+ int task_size = sizeof(struct task_struct);
+
+ /*
+ * Subtract off the static size of the register state.
+ * It potentially has a bunch of padding.
+ */
+ task_size -= sizeof(((struct task_struct *)0)->thread.fpu.state);
+
+ /*
+ * Add back the dynamically-calculated register state
+ * size.
+ */
+ task_size += xstate_size;
+
+ /*
+ * We dynamically size 'struct fpu', so we require that
+ * it be at the end of 'thread_struct' and that
+ * 'thread_struct' be at the end of 'task_struct'. If
+ * you hit a compile error here, check the structure to
+ * see if something got added to the end.
+ */
+ CHECK_MEMBER_AT_END_OF(struct fpu, state);
+ CHECK_MEMBER_AT_END_OF(struct thread_struct, fpu);
+ CHECK_MEMBER_AT_END_OF(struct task_struct, thread);
+
+ return task_size;
+}
+
/*
* Set up the xstate_size based on the legacy FPU context size.
*
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 9cad694ed7c4..975420eac105 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -81,7 +81,7 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregister);
*/
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
- *dst = *src;
+ memcpy(dst, src, arch_task_struct_size());
return fpu__copy(&dst->thread.fpu, &src->thread.fpu);
}
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 91a4e6426321..a0fe99485687 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -92,7 +92,7 @@ static size_t get_kcore_size(int *nphdr, size_t *elf_buflen)
roundup(sizeof(CORE_STR), 4)) +
roundup(sizeof(struct elf_prstatus), 4) +
roundup(sizeof(struct elf_prpsinfo), 4) +
- roundup(sizeof(struct task_struct), 4);
+ roundup(arch_task_struct_size(), 4);
*elf_buflen = PAGE_ALIGN(*elf_buflen);
return size + *elf_buflen;
}
@@ -415,7 +415,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
/* set up the task structure */
notes[2].name = CORE_STR;
notes[2].type = NT_TASKSTRUCT;
- notes[2].datasz = sizeof(struct task_struct);
+ notes[2].datasz = arch_task_struct_size();
notes[2].data = current;
nhdr->p_filesz += notesize(&notes[2]);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ae21f1591615..e43a41d892b6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1522,8 +1522,6 @@ struct task_struct {
/* hung task detection */
unsigned long last_switch_count;
#endif
-/* CPU-specific state of this task */
- struct thread_struct thread;
/* filesystem information */
struct fs_struct *fs;
/* open file information */
@@ -1778,8 +1776,18 @@ struct task_struct {
unsigned long task_state_change;
#endif
int pagefault_disabled;
+/* CPU-specific state of this task */
+ struct thread_struct thread;
+/*
+ * WARNING: on x86, 'thread_struct' contains a variable-sized
+ * structure. It *MUST* be at the end of 'task_struct'.
+ *
+ * Do not put anything below here!
+ */
};
+extern int arch_task_struct_size(void);
+
/* Future-safe accessor for struct task_struct's cpus_allowed. */
#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
diff --git a/kernel/fork.c b/kernel/fork.c
index 1bfefc6f96a4..431b67a6098c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -287,15 +287,21 @@ static void set_max_threads(unsigned int max_threads_suggested)
max_threads = clamp_t(u64, threads, MIN_THREADS, MAX_THREADS);
}
+int __weak arch_task_struct_size(void)
+{
+ return sizeof(struct task_struct);
+}
+
void __init fork_init(void)
{
+ int task_struct_size = arch_task_struct_size();
#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
#ifndef ARCH_MIN_TASKALIGN
#define ARCH_MIN_TASKALIGN L1_CACHE_BYTES
#endif
/* create a slab on which task_structs can be allocated */
task_struct_cachep =
- kmem_cache_create("task_struct", sizeof(struct task_struct),
+ kmem_cache_create("task_struct", task_struct_size,
ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_NOTRACK, NULL);
#endif