aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/fpu/xstate.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--arch/x86/kernel/fpu/xstate.c469
1 files changed, 224 insertions, 245 deletions
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index d28829403ed0..59e543b95a3c 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -81,10 +81,10 @@ static unsigned int xstate_offsets[XFEATURE_MAX] __ro_after_init =
{ [ 0 ... XFEATURE_MAX - 1] = -1};
static unsigned int xstate_sizes[XFEATURE_MAX] __ro_after_init =
{ [ 0 ... XFEATURE_MAX - 1] = -1};
-static unsigned int xstate_comp_offsets[XFEATURE_MAX] __ro_after_init =
- { [ 0 ... XFEATURE_MAX - 1] = -1};
-static unsigned int xstate_supervisor_only_offsets[XFEATURE_MAX] __ro_after_init =
- { [ 0 ... XFEATURE_MAX - 1] = -1};
+static unsigned int xstate_flags[XFEATURE_MAX] __ro_after_init;
+
+#define XSTATE_FLAG_SUPERVISOR BIT(0)
+#define XSTATE_FLAG_ALIGNED64 BIT(1)
/*
* Return whether the system supports a given xfeature.
@@ -124,17 +124,42 @@ int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name)
}
EXPORT_SYMBOL_GPL(cpu_has_xfeatures);
+static bool xfeature_is_aligned64(int xfeature_nr)
+{
+ return xstate_flags[xfeature_nr] & XSTATE_FLAG_ALIGNED64;
+}
+
static bool xfeature_is_supervisor(int xfeature_nr)
{
+ return xstate_flags[xfeature_nr] & XSTATE_FLAG_SUPERVISOR;
+}
+
+static unsigned int xfeature_get_offset(u64 xcomp_bv, int xfeature)
+{
+ unsigned int offs, i;
+
/*
- * Extended State Enumeration Sub-leaves (EAX = 0DH, ECX = n, n > 1)
- * returns ECX[0] set to (1) for a supervisor state, and cleared (0)
- * for a user state.
+ * Non-compacted format and legacy features use the cached fixed
+ * offsets.
*/
- u32 eax, ebx, ecx, edx;
+ if (!cpu_feature_enabled(X86_FEATURE_XCOMPACTED) ||
+ xfeature <= XFEATURE_SSE)
+ return xstate_offsets[xfeature];
- cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
- return ecx & 1;
+ /*
+ * Compacted format offsets depend on the actual content of the
+ * compacted xsave area which is determined by the xcomp_bv header
+ * field.
+ */
+ offs = FXSAVE_SIZE + XSAVE_HDR_SIZE;
+ for_each_extended_xfeature(i, xcomp_bv) {
+ if (xfeature_is_aligned64(i))
+ offs = ALIGN(offs, 64);
+ if (i == xfeature)
+ break;
+ offs += xstate_sizes[i];
+ }
+ return offs;
}
/*
@@ -182,7 +207,7 @@ static bool xfeature_enabled(enum xfeature xfeature)
* Record the offsets and sizes of various xstates contained
* in the XSAVE state memory layout.
*/
-static void __init setup_xstate_features(void)
+static void __init setup_xstate_cache(void)
{
u32 eax, ebx, ecx, edx, i;
/* start at the beginning of the "extended state" */
@@ -205,6 +230,7 @@ static void __init setup_xstate_features(void)
cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
xstate_sizes[i] = eax;
+ xstate_flags[i] = ecx;
/*
* If an xfeature is supervisor state, the offset in EBX is
@@ -264,94 +290,6 @@ static void __init print_xstate_features(void)
} while (0)
/*
- * We could cache this like xstate_size[], but we only use
- * it here, so it would be a waste of space.
- */
-static int xfeature_is_aligned(int xfeature_nr)
-{
- u32 eax, ebx, ecx, edx;
-
- CHECK_XFEATURE(xfeature_nr);
-
- if (!xfeature_enabled(xfeature_nr)) {
- WARN_ONCE(1, "Checking alignment of disabled xfeature %d\n",
- xfeature_nr);
- return 0;
- }
-
- cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
- /*
- * The value returned by ECX[1] indicates the alignment
- * of state component 'i' when the compacted format
- * of the extended region of an XSAVE area is used:
- */
- return !!(ecx & 2);
-}
-
-/*
- * This function sets up offsets and sizes of all extended states in
- * xsave area. This supports both standard format and compacted format
- * of the xsave area.
- */
-static void __init setup_xstate_comp_offsets(void)
-{
- unsigned int next_offset;
- int i;
-
- /*
- * The FP xstates and SSE xstates are legacy states. They are always
- * in the fixed offsets in the xsave area in either compacted form
- * or standard form.
- */
- xstate_comp_offsets[XFEATURE_FP] = 0;
- xstate_comp_offsets[XFEATURE_SSE] = offsetof(struct fxregs_state,
- xmm_space);
-
- if (!cpu_feature_enabled(X86_FEATURE_XSAVES)) {
- for_each_extended_xfeature(i, fpu_kernel_cfg.max_features)
- xstate_comp_offsets[i] = xstate_offsets[i];
- return;
- }
-
- next_offset = FXSAVE_SIZE + XSAVE_HDR_SIZE;
-
- for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
- if (xfeature_is_aligned(i))
- next_offset = ALIGN(next_offset, 64);
-
- xstate_comp_offsets[i] = next_offset;
- next_offset += xstate_sizes[i];
- }
-}
-
-/*
- * Setup offsets of a supervisor-state-only XSAVES buffer:
- *
- * The offsets stored in xstate_comp_offsets[] only work for one specific
- * value of the Requested Feature BitMap (RFBM). In cases where a different
- * RFBM value is used, a different set of offsets is required. This set of
- * offsets is for when RFBM=xfeatures_mask_supervisor().
- */
-static void __init setup_supervisor_only_offsets(void)
-{
- unsigned int next_offset;
- int i;
-
- next_offset = FXSAVE_SIZE + XSAVE_HDR_SIZE;
-
- for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
- if (!xfeature_is_supervisor(i))
- continue;
-
- if (xfeature_is_aligned(i))
- next_offset = ALIGN(next_offset, 64);
-
- xstate_supervisor_only_offsets[i] = next_offset;
- next_offset += xstate_sizes[i];
- }
-}
-
-/*
* Print out xstate component offsets and sizes
*/
static void __init print_xstate_offset_size(void)
@@ -360,7 +298,8 @@ static void __init print_xstate_offset_size(void)
for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
pr_info("x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n",
- i, xstate_comp_offsets[i], i, xstate_sizes[i]);
+ i, xfeature_get_offset(fpu_kernel_cfg.max_features, i),
+ i, xstate_sizes[i]);
}
}
@@ -419,10 +358,9 @@ static void __init setup_init_fpu_buf(void)
if (!boot_cpu_has(X86_FEATURE_XSAVE))
return;
- setup_xstate_features();
print_xstate_features();
- xstate_init_xcomp_bv(&init_fpstate.regs.xsave, fpu_kernel_cfg.max_features);
+ xstate_init_xcomp_bv(&init_fpstate.regs.xsave, init_fpstate.xfeatures);
/*
* Init all the features state with header.xfeatures being 0x0
@@ -432,12 +370,12 @@ static void __init setup_init_fpu_buf(void)
/*
* All components are now in init state. Read the state back so
* that init_fpstate contains all non-zero init state. This only
- * works with XSAVE, but not with XSAVEOPT and XSAVES because
+ * works with XSAVE, but not with XSAVEOPT and XSAVEC/S because
* those use the init optimization which skips writing data for
* components in init state.
*
* XSAVE could be used, but that would require to reshuffle the
- * data when XSAVES is available because XSAVES uses xstate
+ * data when XSAVEC/S is available because XSAVEC/S uses xstate
* compaction. But doing so is a pointless exercise because most
* components have an all zeros init state except for the legacy
* ones (FP and SSE). Those can be saved with FXSAVE into the
@@ -448,25 +386,6 @@ static void __init setup_init_fpu_buf(void)
fxsave(&init_fpstate.regs.fxsave);
}
-static int xfeature_uncompacted_offset(int xfeature_nr)
-{
- u32 eax, ebx, ecx, edx;
-
- /*
- * Only XSAVES supports supervisor states and it uses compacted
- * format. Checking a supervisor state's uncompacted offset is
- * an error.
- */
- if (XFEATURE_MASK_SUPERVISOR_ALL & BIT_ULL(xfeature_nr)) {
- WARN_ONCE(1, "No fixed offset for xstate %d\n", xfeature_nr);
- return -1;
- }
-
- CHECK_XFEATURE(xfeature_nr);
- cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
- return ebx;
-}
-
int xfeature_size(int xfeature_nr)
{
u32 eax, ebx, ecx, edx;
@@ -644,29 +563,15 @@ static bool __init check_xstate_against_struct(int nr)
static unsigned int xstate_calculate_size(u64 xfeatures, bool compacted)
{
- unsigned int size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
- int i;
+ unsigned int topmost = fls64(xfeatures) - 1;
+ unsigned int offset = xstate_offsets[topmost];
- for_each_extended_xfeature(i, xfeatures) {
- /* Align from the end of the previous feature */
- if (xfeature_is_aligned(i))
- size = ALIGN(size, 64);
- /*
- * In compacted format the enabled features are packed,
- * i.e. disabled features do not occupy space.
- *
- * In non-compacted format the offsets are fixed and
- * disabled states still occupy space in the memory buffer.
- */
- if (!compacted)
- size = xfeature_uncompacted_offset(i);
- /*
- * Add the feature size even for non-compacted format
- * to make the end result correct
- */
- size += xfeature_size(i);
- }
- return size;
+ if (topmost <= XFEATURE_SSE)
+ return sizeof(struct xregs_state);
+
+ if (compacted)
+ offset = xfeature_get_offset(xfeatures, topmost);
+ return offset + xstate_sizes[topmost];
}
/*
@@ -680,7 +585,8 @@ static unsigned int xstate_calculate_size(u64 xfeatures, bool compacted)
*/
static bool __init paranoid_xstate_size_valid(unsigned int kernel_size)
{
- bool compacted = cpu_feature_enabled(X86_FEATURE_XSAVES);
+ bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
+ bool xsaves = cpu_feature_enabled(X86_FEATURE_XSAVES);
unsigned int size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
int i;
@@ -691,7 +597,7 @@ static bool __init paranoid_xstate_size_valid(unsigned int kernel_size)
* Supervisor state components can be managed only by
* XSAVES.
*/
- if (!compacted && xfeature_is_supervisor(i)) {
+ if (!xsaves && xfeature_is_supervisor(i)) {
XSTATE_WARN_ON(1);
return false;
}
@@ -708,8 +614,11 @@ static bool __init paranoid_xstate_size_valid(unsigned int kernel_size)
* the size of the *user* states. If we use it to size a buffer
* that we use 'XSAVES' on, we could potentially overflow the
* buffer because 'XSAVES' saves system states too.
+ *
+ * This also takes compaction into account. So this works for
+ * XSAVEC as well.
*/
-static unsigned int __init get_xsaves_size(void)
+static unsigned int __init get_compacted_size(void)
{
unsigned int eax, ebx, ecx, edx;
/*
@@ -719,6 +628,10 @@ static unsigned int __init get_xsaves_size(void)
* containing all the state components
* corresponding to bits currently set in
* XCR0 | IA32_XSS.
+ *
+ * When XSAVES is not available but XSAVEC is (virt), then there
+ * are no supervisor states, but XSAVEC still uses compacted
+ * format.
*/
cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
return ebx;
@@ -728,13 +641,13 @@ static unsigned int __init get_xsaves_size(void)
* Get the total size of the enabled xstates without the independent supervisor
* features.
*/
-static unsigned int __init get_xsaves_size_no_independent(void)
+static unsigned int __init get_xsave_compacted_size(void)
{
u64 mask = xfeatures_mask_independent();
unsigned int size;
if (!mask)
- return get_xsaves_size();
+ return get_compacted_size();
/* Disable independent features. */
wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor());
@@ -743,7 +656,7 @@ static unsigned int __init get_xsaves_size_no_independent(void)
* Ask the hardware what size is required of the buffer.
* This is the size required for the task->fpu buffer.
*/
- size = get_xsaves_size();
+ size = get_compacted_size();
/* Re-enable independent features so XSAVES will work on them again. */
wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | mask);
@@ -765,48 +678,31 @@ static unsigned int __init get_xsave_size_user(void)
return ebx;
}
-/*
- * Will the runtime-enumerated 'xstate_size' fit in the init
- * task's statically-allocated buffer?
- */
-static bool __init is_supported_xstate_size(unsigned int test_xstate_size)
-{
- if (test_xstate_size <= sizeof(init_fpstate.regs))
- return true;
-
- pr_warn("x86/fpu: xstate buffer too small (%zu < %d), disabling xsave\n",
- sizeof(init_fpstate.regs), test_xstate_size);
- return false;
-}
-
static int __init init_xstate_size(void)
{
/* Recompute the context size for enabled features: */
unsigned int user_size, kernel_size, kernel_default_size;
- bool compacted = cpu_feature_enabled(X86_FEATURE_XSAVES);
+ bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
/* Uncompacted user space size */
user_size = get_xsave_size_user();
/*
- * XSAVES kernel size includes supervisor states and
- * uses compacted format when available.
+ * XSAVES kernel size includes supervisor states and uses compacted
+ * format. XSAVEC uses compacted format, but does not save
+ * supervisor states.
*
- * XSAVE does not support supervisor states so
- * kernel and user size is identical.
+ * XSAVE[OPT] do not support supervisor states so kernel and user
+ * size is identical.
*/
if (compacted)
- kernel_size = get_xsaves_size_no_independent();
+ kernel_size = get_xsave_compacted_size();
else
kernel_size = user_size;
kernel_default_size =
xstate_calculate_size(fpu_kernel_cfg.default_features, compacted);
- /* Ensure we have the space to store all default enabled features. */
- if (!is_supported_xstate_size(kernel_default_size))
- return -EINVAL;
-
if (!paranoid_xstate_size_valid(kernel_size))
return -EINVAL;
@@ -909,8 +805,11 @@ void __init fpu__init_system_xstate(unsigned int legacy_size)
if (!cpu_feature_enabled(X86_FEATURE_XFD))
fpu_kernel_cfg.max_features &= ~XFEATURE_MASK_USER_DYNAMIC;
- fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED |
- XFEATURE_MASK_SUPERVISOR_SUPPORTED;
+ if (!cpu_feature_enabled(X86_FEATURE_XSAVES))
+ fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED;
+ else
+ fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED |
+ XFEATURE_MASK_SUPERVISOR_SUPPORTED;
fpu_user_cfg.max_features = fpu_kernel_cfg.max_features;
fpu_user_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED;
@@ -933,8 +832,17 @@ void __init fpu__init_system_xstate(unsigned int legacy_size)
*/
init_fpstate.xfd = fpu_user_cfg.max_features & XFEATURE_MASK_USER_DYNAMIC;
+ /* Set up compaction feature bit */
+ if (cpu_feature_enabled(X86_FEATURE_XSAVEC) ||
+ cpu_feature_enabled(X86_FEATURE_XSAVES))
+ setup_force_cpu_cap(X86_FEATURE_XCOMPACTED);
+
/* Enable xstate instructions to be able to continue with initialization: */
fpu__init_cpu_xstate();
+
+ /* Cache size, offset and flags for initialization */
+ setup_xstate_cache();
+
err = init_xstate_size();
if (err)
goto out_disable;
@@ -949,9 +857,20 @@ void __init fpu__init_system_xstate(unsigned int legacy_size)
update_regset_xstate_info(fpu_user_cfg.max_size,
fpu_user_cfg.max_features);
+ /*
+ * init_fpstate excludes dynamic states as they are large but init
+ * state is zero.
+ */
+ init_fpstate.size = fpu_kernel_cfg.default_size;
+ init_fpstate.xfeatures = fpu_kernel_cfg.default_features;
+
+ if (init_fpstate.size > sizeof(init_fpstate.regs)) {
+ pr_warn("x86/fpu: init_fpstate buffer too small (%zu < %d), disabling XSAVE\n",
+ sizeof(init_fpstate.regs), init_fpstate.size);
+ goto out_disable;
+ }
+
setup_init_fpu_buf();
- setup_xstate_comp_offsets();
- setup_supervisor_only_offsets();
/*
* Paranoia check whether something in the setup modified the
@@ -967,7 +886,7 @@ void __init fpu__init_system_xstate(unsigned int legacy_size)
pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
fpu_kernel_cfg.max_features,
fpu_kernel_cfg.max_size,
- boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard");
+ boot_cpu_has(X86_FEATURE_XCOMPACTED) ? "compacted" : "standard");
return;
out_disable:
@@ -1006,13 +925,19 @@ void fpu__resume_cpu(void)
*/
static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
{
- if (!xfeature_enabled(xfeature_nr)) {
- WARN_ON_FPU(1);
+ u64 xcomp_bv = xsave->header.xcomp_bv;
+
+ if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr)))
return NULL;
+
+ if (cpu_feature_enabled(X86_FEATURE_XCOMPACTED)) {
+ if (WARN_ON_ONCE(!(xcomp_bv & BIT_ULL(xfeature_nr))))
+ return NULL;
}
- return (void *)xsave + xstate_comp_offsets[xfeature_nr];
+ return (void *)xsave + xfeature_get_offset(xcomp_bv, xfeature_nr);
}
+
/*
* Given the xsave area and a state inside, this function returns the
* address of the state.
@@ -1043,8 +968,9 @@ void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
* We should not ever be requesting features that we
* have not enabled.
*/
- WARN_ONCE(!(fpu_kernel_cfg.max_features & BIT_ULL(xfeature_nr)),
- "get of unsupported state");
+ if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr)))
+ return NULL;
+
/*
* This assumes the last 'xsave*' instruction to
* have requested that 'xfeature_nr' be saved.
@@ -1199,6 +1125,15 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
*/
mask = fpstate->user_xfeatures;
+ /*
+ * Dynamic features are not present in init_fpstate. When they are
+ * in an all zeros init state, remove those from 'mask' to zero
+ * those features in the user buffer instead of retrieving them
+ * from init_fpstate.
+ */
+ if (fpu_state_size_dynamic())
+ mask &= (header.xfeatures | xinit->header.xcomp_bv);
+
for_each_extended_xfeature(i, mask) {
/*
* If there was a feature or alignment gap, zero the space
@@ -1302,7 +1237,7 @@ static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf,
}
for (i = 0; i < XFEATURE_MAX; i++) {
- u64 mask = ((u64)1 << i);
+ mask = BIT_ULL(i);
if (hdr.xfeatures & mask) {
void *dst = __raw_xsave_addr(xsave, i);
@@ -1500,35 +1435,13 @@ void fpstate_free(struct fpu *fpu)
}
/**
- * fpu_install_fpstate - Update the active fpstate in the FPU
- *
- * @fpu: A struct fpu * pointer
- * @newfps: A struct fpstate * pointer
- *
- * Returns: A null pointer if the last active fpstate is the embedded
- * one or the new fpstate is already installed;
- * otherwise, a pointer to the old fpstate which has to
- * be freed by the caller.
- */
-static struct fpstate *fpu_install_fpstate(struct fpu *fpu,
- struct fpstate *newfps)
-{
- struct fpstate *oldfps = fpu->fpstate;
-
- if (fpu->fpstate == newfps)
- return NULL;
-
- fpu->fpstate = newfps;
- return oldfps != &fpu->__fpstate ? oldfps : NULL;
-}
-
-/**
* fpstate_realloc - Reallocate struct fpstate for the requested new features
*
* @xfeatures: A bitmap of xstate features which extend the enabled features
* of that task
* @ksize: The required size for the kernel buffer
* @usize: The required size for user space buffers
+ * @guest_fpu: Pointer to a guest FPU container. NULL for host allocations
*
* Note vs. vmalloc(): If the task with a vzalloc()-allocated buffer
* terminates quickly, vfree()-induced IPIs may be a concern, but tasks
@@ -1537,13 +1450,13 @@ static struct fpstate *fpu_install_fpstate(struct fpu *fpu,
* Returns: 0 on success, -ENOMEM on allocation error.
*/
static int fpstate_realloc(u64 xfeatures, unsigned int ksize,
- unsigned int usize)
+ unsigned int usize, struct fpu_guest *guest_fpu)
{
struct fpu *fpu = &current->thread.fpu;
struct fpstate *curfps, *newfps = NULL;
unsigned int fpsize;
+ bool in_use;
- curfps = fpu->fpstate;
fpsize = ksize + ALIGN(offsetof(struct fpstate, regs), 64);
newfps = vzalloc(fpsize);
@@ -1553,28 +1466,59 @@ static int fpstate_realloc(u64 xfeatures, unsigned int ksize,
newfps->user_size = usize;
newfps->is_valloc = true;
+ /*
+ * When a guest FPU is supplied, use @guest_fpu->fpstate
+ * as reference independent whether it is in use or not.
+ */
+ curfps = guest_fpu ? guest_fpu->fpstate : fpu->fpstate;
+
+ /* Determine whether @curfps is the active fpstate */
+ in_use = fpu->fpstate == curfps;
+
+ if (guest_fpu) {
+ newfps->is_guest = true;
+ newfps->is_confidential = curfps->is_confidential;
+ newfps->in_use = curfps->in_use;
+ guest_fpu->xfeatures |= xfeatures;
+ guest_fpu->uabi_size = usize;
+ }
+
fpregs_lock();
/*
- * Ensure that the current state is in the registers before
- * swapping fpstate as that might invalidate it due to layout
- * changes.
+ * If @curfps is in use, ensure that the current state is in the
+ * registers before swapping fpstate as that might invalidate it
+ * due to layout changes.
*/
- if (test_thread_flag(TIF_NEED_FPU_LOAD))
+ if (in_use && test_thread_flag(TIF_NEED_FPU_LOAD))
fpregs_restore_userregs();
newfps->xfeatures = curfps->xfeatures | xfeatures;
- newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
- newfps->xfd = curfps->xfd & ~xfeatures;
- curfps = fpu_install_fpstate(fpu, newfps);
+ if (!guest_fpu)
+ newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
+
+ newfps->xfd = curfps->xfd & ~xfeatures;
/* Do the final updates within the locked region */
xstate_init_xcomp_bv(&newfps->regs.xsave, newfps->xfeatures);
- xfd_update_state(newfps);
+ if (guest_fpu) {
+ guest_fpu->fpstate = newfps;
+ /* If curfps is active, update the FPU fpstate pointer */
+ if (in_use)
+ fpu->fpstate = newfps;
+ } else {
+ fpu->fpstate = newfps;
+ }
+
+ if (in_use)
+ xfd_update_state(fpu->fpstate);
fpregs_unlock();
- vfree(curfps);
+ /* Only free valloc'ed state */
+ if (curfps && curfps->is_valloc)
+ vfree(curfps);
+
return 0;
}
@@ -1595,7 +1539,7 @@ static int validate_sigaltstack(unsigned int usize)
return 0;
}
-static int __xstate_request_perm(u64 permitted, u64 requested)
+static int __xstate_request_perm(u64 permitted, u64 requested, bool guest)
{
/*
* This deliberately does not exclude !XSAVES as we still might
@@ -1603,11 +1547,12 @@ static int __xstate_request_perm(u64 permitted, u64 requested)
* vendors into extending XFD for the pre AMX states, especially
* AVX512.
*/
- bool compacted = cpu_feature_enabled(X86_FEATURE_XSAVES);
+ bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
struct fpu *fpu = &current->group_leader->thread.fpu;
+ struct fpu_state_perm *perm;
unsigned int ksize, usize;
u64 mask;
- int ret;
+ int ret = 0;
/* Check whether fully enabled */
if ((permitted & requested) == requested)
@@ -1615,21 +1560,27 @@ static int __xstate_request_perm(u64 permitted, u64 requested)
/* Calculate the resulting kernel state size */
mask = permitted | requested;
+ /* Take supervisor states into account on the host */
+ if (!guest)
+ mask |= xfeatures_mask_supervisor();
ksize = xstate_calculate_size(mask, compacted);
/* Calculate the resulting user state size */
mask &= XFEATURE_MASK_USER_SUPPORTED;
usize = xstate_calculate_size(mask, false);
- ret = validate_sigaltstack(usize);
- if (ret)
- return ret;
+ if (!guest) {
+ ret = validate_sigaltstack(usize);
+ if (ret)
+ return ret;
+ }
+ perm = guest ? &fpu->guest_perm : &fpu->perm;
/* Pairs with the READ_ONCE() in xstate_get_group_perm() */
- WRITE_ONCE(fpu->perm.__state_perm, requested);
+ WRITE_ONCE(perm->__state_perm, mask);
/* Protected by sighand lock */
- fpu->perm.__state_size = ksize;
- fpu->perm.__user_state_size = usize;
+ perm->__state_size = ksize;
+ perm->__user_state_size = usize;
return ret;
}
@@ -1640,7 +1591,7 @@ static const u64 xstate_prctl_req[XFEATURE_MAX] = {
[XFEATURE_XTILE_DATA] = XFEATURE_MASK_XTILE_DATA,
};
-static int xstate_request_perm(unsigned long idx)
+static int xstate_request_perm(unsigned long idx, bool guest)
{
u64 permitted, requested;
int ret;
@@ -1661,26 +1612,33 @@ static int xstate_request_perm(unsigned long idx)
return -EOPNOTSUPP;
/* Lockless quick check */
- permitted = xstate_get_host_group_perm();
+ permitted = xstate_get_group_perm(guest);
if ((permitted & requested) == requested)
return 0;
/* Protect against concurrent modifications */
spin_lock_irq(&current->sighand->siglock);
- permitted = xstate_get_host_group_perm();
- ret = __xstate_request_perm(permitted, requested);
+ permitted = xstate_get_group_perm(guest);
+
+ /* First vCPU allocation locks the permissions. */
+ if (guest && (permitted & FPU_GUEST_PERM_LOCKED))
+ ret = -EBUSY;
+ else
+ ret = __xstate_request_perm(permitted, requested, guest);
spin_unlock_irq(&current->sighand->siglock);
return ret;
}
-int xfd_enable_feature(u64 xfd_err)
+int __xfd_enable_feature(u64 xfd_err, struct fpu_guest *guest_fpu)
{
u64 xfd_event = xfd_err & XFEATURE_MASK_USER_DYNAMIC;
+ struct fpu_state_perm *perm;
unsigned int ksize, usize;
struct fpu *fpu;
if (!xfd_event) {
- pr_err_once("XFD: Invalid xfd error: %016llx\n", xfd_err);
+ if (!guest_fpu)
+ pr_err_once("XFD: Invalid xfd error: %016llx\n", xfd_err);
return 0;
}
@@ -1688,14 +1646,16 @@ int xfd_enable_feature(u64 xfd_err)
spin_lock_irq(&current->sighand->siglock);
/* If not permitted let it die */
- if ((xstate_get_host_group_perm() & xfd_event) != xfd_event) {
+ if ((xstate_get_group_perm(!!guest_fpu) & xfd_event) != xfd_event) {
spin_unlock_irq(&current->sighand->siglock);
return -EPERM;
}
fpu = &current->group_leader->thread.fpu;
- ksize = fpu->perm.__state_size;
- usize = fpu->perm.__user_state_size;
+ perm = guest_fpu ? &fpu->guest_perm : &fpu->perm;
+ ksize = perm->__state_size;
+ usize = perm->__user_state_size;
+
/*
* The feature is permitted. State size is sufficient. Dropping
* the lock is safe here even if more features are added from
@@ -1708,17 +1668,29 @@ int xfd_enable_feature(u64 xfd_err)
* Try to allocate a new fpstate. If that fails there is no way
* out.
*/
- if (fpstate_realloc(xfd_event, ksize, usize))
+ if (fpstate_realloc(xfd_event, ksize, usize, guest_fpu))
return -EFAULT;
return 0;
}
+
+int xfd_enable_feature(u64 xfd_err)
+{
+ return __xfd_enable_feature(xfd_err, NULL);
+}
+
#else /* CONFIG_X86_64 */
-static inline int xstate_request_perm(unsigned long idx)
+static inline int xstate_request_perm(unsigned long idx, bool guest)
{
return -EPERM;
}
#endif /* !CONFIG_X86_64 */
+u64 xstate_get_guest_group_perm(void)
+{
+ return xstate_get_group_perm(true);
+}
+EXPORT_SYMBOL_GPL(xstate_get_guest_group_perm);
+
/**
* fpu_xstate_prctl - xstate permission operations
* @tsk: Redundant pointer to current
@@ -1737,14 +1709,12 @@ static inline int xstate_request_perm(unsigned long idx)
* e.g. for AMX which requires XFEATURE_XTILE_CFG(17) and
* XFEATURE_XTILE_DATA(18) this would be XFEATURE_XTILE_DATA(18).
*/
-long fpu_xstate_prctl(struct task_struct *tsk, int option, unsigned long arg2)
+long fpu_xstate_prctl(int option, unsigned long arg2)
{
u64 __user *uptr = (u64 __user *)arg2;
u64 permitted, supported;
unsigned long idx = arg2;
-
- if (tsk != current)
- return -EPERM;
+ bool guest = false;
switch (option) {
case ARCH_GET_XCOMP_SUPP:
@@ -1760,11 +1730,20 @@ long fpu_xstate_prctl(struct task_struct *tsk, int option, unsigned long arg2)
permitted &= XFEATURE_MASK_USER_SUPPORTED;
return put_user(permitted, uptr);
+ case ARCH_GET_XCOMP_GUEST_PERM:
+ permitted = xstate_get_guest_group_perm();
+ permitted &= XFEATURE_MASK_USER_SUPPORTED;
+ return put_user(permitted, uptr);
+
+ case ARCH_REQ_XCOMP_GUEST_PERM:
+ guest = true;
+ fallthrough;
+
case ARCH_REQ_XCOMP_PERM:
if (!IS_ENABLED(CONFIG_X86_64))
return -EOPNOTSUPP;
- return xstate_request_perm(idx);
+ return xstate_request_perm(idx, guest);
default:
return -EINVAL;