aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/arch/arm64/kernel/fpsimd.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/kernel/fpsimd.c')
-rw-r--r--arch/arm64/kernel/fpsimd.c633
1 files changed, 370 insertions, 263 deletions
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index dd63ffc3a2fa..ebb0158997ca 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -85,13 +85,13 @@
* softirq kicks in. Upon vcpu_put(), KVM will save the vcpu FP state and
* flag the register state as invalid.
*
- * In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may
- * save the task's FPSIMD context back to task_struct from softirq context.
- * To prevent this from racing with the manipulation of the task's FPSIMD state
- * from task context and thereby corrupting the state, it is necessary to
- * protect any manipulation of a task's fpsimd_state or TIF_FOREIGN_FPSTATE
- * flag with {, __}get_cpu_fpsimd_context(). This will still allow softirqs to
- * run but prevent them to use FPSIMD.
+ * In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may be
+ * called from softirq context, which will save the task's FPSIMD context back
+ * to task_struct. To prevent this from racing with the manipulation of the
+ * task's FPSIMD state from task context and thereby corrupting the state, it
+ * is necessary to protect any manipulation of a task's fpsimd_state or
+ * TIF_FOREIGN_FPSTATE flag with get_cpu_fpsimd_context(), which will suspend
+ * softirq servicing entirely until put_cpu_fpsimd_context() is called.
*
* For a certain task, the sequence may look something like this:
* - the task gets scheduled in; if both the task's fpsimd_cpu field
@@ -118,16 +118,8 @@
* returned from the 2nd syscall yet, TIF_FOREIGN_FPSTATE is still set so
* whatever is in the FPSIMD registers is not saved to memory, but discarded.
*/
-struct fpsimd_last_state_struct {
- struct user_fpsimd_state *st;
- void *sve_state;
- void *za_state;
- u64 *svcr;
- unsigned int sve_vl;
- unsigned int sme_vl;
-};
-static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state);
+static DEFINE_PER_CPU(struct cpu_fp_state, fpsimd_last_state);
__ro_after_init struct vl_info vl_info[ARM64_VEC_MAX] = {
#ifdef CONFIG_ARM64_SVE
@@ -217,27 +209,14 @@ static inline void sme_free(struct task_struct *t) { }
#endif
-DEFINE_PER_CPU(bool, fpsimd_context_busy);
-EXPORT_PER_CPU_SYMBOL(fpsimd_context_busy);
-
static void fpsimd_bind_task_to_cpu(void);
-static void __get_cpu_fpsimd_context(void)
-{
- bool busy = __this_cpu_xchg(fpsimd_context_busy, true);
-
- WARN_ON(busy);
-}
-
/*
* Claim ownership of the CPU FPSIMD context for use by the calling context.
*
* The caller may freely manipulate the FPSIMD context metadata until
* put_cpu_fpsimd_context() is called.
*
- * The double-underscore version must only be called if you know the task
- * can't be preempted.
- *
* On RT kernels local_bh_disable() is not sufficient because it only
* serializes soft interrupt related sections via a local lock, but stays
* preemptible. Disabling preemption is the right choice here as bottom
@@ -250,14 +229,6 @@ static void get_cpu_fpsimd_context(void)
local_bh_disable();
else
preempt_disable();
- __get_cpu_fpsimd_context();
-}
-
-static void __put_cpu_fpsimd_context(void)
-{
- bool busy = __this_cpu_xchg(fpsimd_context_busy, false);
-
- WARN_ON(!busy); /* No matching get_cpu_fpsimd_context()? */
}
/*
@@ -269,18 +240,12 @@ static void __put_cpu_fpsimd_context(void)
*/
static void put_cpu_fpsimd_context(void)
{
- __put_cpu_fpsimd_context();
if (!IS_ENABLED(CONFIG_PREEMPT_RT))
local_bh_enable();
else
preempt_enable();
}
-static bool have_cpu_fpsimd_context(void)
-{
- return !preemptible() && __this_cpu_read(fpsimd_context_busy);
-}
-
unsigned int task_get_vl(const struct task_struct *task, enum vec_type type)
{
return task->thread.vl[type];
@@ -307,7 +272,7 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
/*
* TIF_SME controls whether a task can use SME without trapping while
* in userspace, when TIF_SME is set then we must have storage
- * alocated in sve_state and za_state to store the contents of both ZA
+ * allocated in sve_state and sme_state to store the contents of both ZA
* and the SVE registers for both streaming and non-streaming modes.
*
* If both SVCR.ZA and SVCR.SM are disabled then at any point we
@@ -330,15 +295,6 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
* The task can execute SVE instructions while in userspace without
* trapping to the kernel.
*
- * When stored, Z0-Z31 (incorporating Vn in bits[127:0] or the
- * corresponding Zn), P0-P15 and FFR are encoded in
- * task->thread.sve_state, formatted appropriately for vector
- * length task->thread.sve_vl or, if SVCR.SM is set,
- * task->thread.sme_vl.
- *
- * task->thread.sve_state must point to a valid buffer at least
- * sve_state_size(task) bytes in size.
- *
* During any syscall, the kernel may optionally clear TIF_SVE and
* discard the vector state except for the FPSIMD subset.
*
@@ -348,7 +304,15 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
* do_sve_acc() to be called, which does some preparation and then
* sets TIF_SVE.
*
- * When stored, FPSIMD registers V0-V31 are encoded in
+ * During any syscall, the kernel may optionally clear TIF_SVE and
+ * discard the vector state except for the FPSIMD subset.
+ *
+ * The data will be stored in one of two formats:
+ *
+ * * FPSIMD only - FP_STATE_FPSIMD:
+ *
+ * When the FPSIMD only state stored task->thread.fp_type is set to
+ * FP_STATE_FPSIMD, the FPSIMD registers V0-V31 are encoded in
* task->thread.uw.fpsimd_state; bits [max : 128] for each of Z0-Z31 are
* logically zero but not stored anywhere; P0-P15 and FFR are not
* stored and have unspecified values from userspace's point of
@@ -356,7 +320,23 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
* but userspace is discouraged from relying on this.
*
* task->thread.sve_state does not need to be non-NULL, valid or any
- * particular size: it must not be dereferenced.
+ * particular size: it must not be dereferenced and any data stored
+ * there should be considered stale and not referenced.
+ *
+ * * SVE state - FP_STATE_SVE:
+ *
+ * When the full SVE state is stored task->thread.fp_type is set to
+ * FP_STATE_SVE and Z0-Z31 (incorporating Vn in bits[127:0] or the
+ * corresponding Zn), P0-P15 and FFR are encoded in in
+ * task->thread.sve_state, formatted appropriately for vector
+ * length task->thread.sve_vl or, if SVCR.SM is set,
+ * task->thread.sme_vl. The storage for the vector registers in
+ * task->thread.uw.fpsimd_state should be ignored.
+ *
+ * task->thread.sve_state must point to a valid buffer at least
+ * sve_state_size(task) bytes in size. The data stored in
+ * task->thread.uw.fpsimd_state.vregs should be considered stale
+ * and not referenced.
*
* * FPSR and FPCR are always stored in task->thread.uw.fpsimd_state
* irrespective of whether TIF_SVE is clear or set, since these are
@@ -376,13 +356,43 @@ static void task_fpsimd_load(void)
bool restore_ffr;
WARN_ON(!system_supports_fpsimd());
- WARN_ON(!have_cpu_fpsimd_context());
+ WARN_ON(preemptible());
+ WARN_ON(test_thread_flag(TIF_KERNEL_FPSTATE));
- /* Check if we should restore SVE first */
- if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE)) {
- sve_set_vq(sve_vq_from_vl(task_get_sve_vl(current)) - 1);
- restore_sve_regs = true;
- restore_ffr = true;
+ if (system_supports_fpmr())
+ write_sysreg_s(current->thread.uw.fpmr, SYS_FPMR);
+
+ if (system_supports_sve() || system_supports_sme()) {
+ switch (current->thread.fp_type) {
+ case FP_STATE_FPSIMD:
+ /* Stop tracking SVE for this task until next use. */
+ if (test_and_clear_thread_flag(TIF_SVE))
+ sve_user_disable();
+ break;
+ case FP_STATE_SVE:
+ if (!thread_sm_enabled(&current->thread) &&
+ !WARN_ON_ONCE(!test_and_set_thread_flag(TIF_SVE)))
+ sve_user_enable();
+
+ if (test_thread_flag(TIF_SVE))
+ sve_set_vq(sve_vq_from_vl(task_get_sve_vl(current)) - 1);
+
+ restore_sve_regs = true;
+ restore_ffr = true;
+ break;
+ default:
+ /*
+ * This indicates either a bug in
+ * fpsimd_save_user_state() or memory corruption, we
+ * should always record an explicit format
+ * when we save. We always at least have the
+ * memory allocated for FPSMID registers so
+ * try that and hope for the best.
+ */
+ WARN_ON_ONCE(1);
+ clear_thread_flag(TIF_SVE);
+ break;
+ }
}
/* Restore SME, override SVE register configuration if needed */
@@ -396,20 +406,22 @@ static void task_fpsimd_load(void)
write_sysreg_s(current->thread.svcr, SYS_SVCR);
if (thread_za_enabled(&current->thread))
- za_load_state(current->thread.za_state);
+ sme_load_state(current->thread.sme_state,
+ system_supports_sme2());
- if (thread_sm_enabled(&current->thread)) {
- restore_sve_regs = true;
+ if (thread_sm_enabled(&current->thread))
restore_ffr = system_supports_fa64();
- }
}
- if (restore_sve_regs)
+ if (restore_sve_regs) {
+ WARN_ON_ONCE(current->thread.fp_type != FP_STATE_SVE);
sve_load_state(sve_pffr(&current->thread),
&current->thread.uw.fpsimd_state.fpsr,
restore_ffr);
- else
+ } else {
+ WARN_ON_ONCE(current->thread.fp_type != FP_STATE_FPSIMD);
fpsimd_load_state(&current->thread.uw.fpsimd_state);
+ }
}
/*
@@ -419,12 +431,12 @@ static void task_fpsimd_load(void)
* last, if KVM is involved this may be the guest VM context rather
* than the host thread for the VM pointed to by current. This means
* that we must always reference the state storage via last rather
- * than via current, other than the TIF_ flags which KVM will
- * carefully maintain for us.
+ * than via current, if we are saving KVM state then it will have
+ * ensured that the type of registers to save is set in last->to_save.
*/
-static void fpsimd_save(void)
+static void fpsimd_save_user_state(void)
{
- struct fpsimd_last_state_struct const *last =
+ struct cpu_fp_state const *last =
this_cpu_ptr(&fpsimd_last_state);
/* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */
bool save_sve_regs = false;
@@ -432,12 +444,22 @@ static void fpsimd_save(void)
unsigned int vl;
WARN_ON(!system_supports_fpsimd());
- WARN_ON(!have_cpu_fpsimd_context());
+ WARN_ON(preemptible());
if (test_thread_flag(TIF_FOREIGN_FPSTATE))
return;
- if (test_thread_flag(TIF_SVE)) {
+ if (system_supports_fpmr())
+ *(last->fpmr) = read_sysreg_s(SYS_FPMR);
+
+ /*
+ * If a task is in a syscall the ABI allows us to only
+ * preserve the state shared with FPSIMD so don't bother
+ * saving the full SVE state in that case.
+ */
+ if ((last->to_save == FP_STATE_CURRENT && test_thread_flag(TIF_SVE) &&
+ !in_syscall(current_pt_regs())) ||
+ last->to_save == FP_STATE_SVE) {
save_sve_regs = true;
save_ffr = true;
vl = last->sve_vl;
@@ -449,7 +471,8 @@ static void fpsimd_save(void)
*svcr = read_sysreg_s(SYS_SVCR);
if (*svcr & SVCR_ZA_MASK)
- za_save_state(last->za_state);
+ sme_save_state(last->sme_state,
+ system_supports_sme2());
/* If we are in streaming mode override regular SVE. */
if (*svcr & SVCR_SM_MASK) {
@@ -474,8 +497,10 @@ static void fpsimd_save(void)
sve_save_state((char *)last->sve_state +
sve_ffr_offset(vl),
&last->st->fpsr, save_ffr);
+ *last->fp_type = FP_STATE_SVE;
} else {
fpsimd_save_state(last->st);
+ *last->fp_type = FP_STATE_FPSIMD;
}
}
@@ -544,7 +569,6 @@ static struct ctl_table sve_default_vl_table[] = {
.proc_handler = vec_proc_do_default_vl,
.extra1 = &vl_info[ARM64_VEC_SVE],
},
- { }
};
static int __init sve_sysctl_init(void)
@@ -568,7 +592,6 @@ static struct ctl_table sme_default_vl_table[] = {
.proc_handler = vec_proc_do_default_vl,
.extra1 = &vl_info[ARM64_VEC_SME],
},
- { }
};
static int __init sme_sysctl_init(void)
@@ -634,7 +657,7 @@ static void fpsimd_to_sve(struct task_struct *task)
void *sst = task->thread.sve_state;
struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
- if (!system_supports_sve())
+ if (!system_supports_sve() && !system_supports_sme())
return;
vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread));
@@ -660,7 +683,7 @@ static void sve_to_fpsimd(struct task_struct *task)
unsigned int i;
__uint128_t const *p;
- if (!system_supports_sve())
+ if (!system_supports_sve() && !system_supports_sme())
return;
vl = thread_get_cur_vl(&task->thread);
@@ -671,6 +694,12 @@ static void sve_to_fpsimd(struct task_struct *task)
}
}
+void cpu_enable_fpmr(const struct arm64_cpu_capabilities *__always_unused p)
+{
+ write_sysreg_s(read_sysreg_s(SYS_SCTLR_EL1) | SCTLR_EL1_EnFPM_MASK,
+ SYS_SCTLR_EL1);
+}
+
#ifdef CONFIG_ARM64_SVE
/*
* Call __sve_free() directly only if you know task can't be scheduled
@@ -715,10 +744,12 @@ size_t sve_state_size(struct task_struct const *task)
* do_sve_acc() case, there is no ABI requirement to hide stale data
* written previously be task.
*/
-void sve_alloc(struct task_struct *task)
+void sve_alloc(struct task_struct *task, bool flush)
{
if (task->thread.sve_state) {
- memset(task->thread.sve_state, 0, sve_state_size(task));
+ if (flush)
+ memset(task->thread.sve_state, 0,
+ sve_state_size(task));
return;
}
@@ -766,8 +797,7 @@ void fpsimd_sync_to_sve(struct task_struct *task)
*/
void sve_sync_to_fpsimd(struct task_struct *task)
{
- if (test_tsk_thread_flag(task, TIF_SVE) ||
- thread_sm_enabled(&task->thread))
+ if (task->thread.fp_type == FP_STATE_SVE)
sve_to_fpsimd(task);
}
@@ -789,7 +819,8 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
void *sst = task->thread.sve_state;
struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
- if (!test_tsk_thread_flag(task, TIF_SVE))
+ if (!test_tsk_thread_flag(task, TIF_SVE) &&
+ !thread_sm_enabled(&task->thread))
return;
vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread));
@@ -801,6 +832,8 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
int vec_set_vector_length(struct task_struct *task, enum vec_type type,
unsigned long vl, unsigned long flags)
{
+ bool free_sme = false;
+
if (flags & ~(unsigned long)(PR_SVE_VL_INHERIT |
PR_SVE_SET_VL_ONEXEC))
return -EINVAL;
@@ -841,32 +874,47 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type,
if (task == current) {
get_cpu_fpsimd_context();
- fpsimd_save();
+ fpsimd_save_user_state();
}
fpsimd_flush_task_state(task);
if (test_and_clear_tsk_thread_flag(task, TIF_SVE) ||
- thread_sm_enabled(&task->thread))
+ thread_sm_enabled(&task->thread)) {
sve_to_fpsimd(task);
+ task->thread.fp_type = FP_STATE_FPSIMD;
+ }
- if (system_supports_sme() && type == ARM64_VEC_SME) {
- task->thread.svcr &= ~(SVCR_SM_MASK |
- SVCR_ZA_MASK);
- clear_thread_flag(TIF_SME);
+ if (system_supports_sme()) {
+ if (type == ARM64_VEC_SME ||
+ !(task->thread.svcr & (SVCR_SM_MASK | SVCR_ZA_MASK))) {
+ /*
+ * We are changing the SME VL or weren't using
+ * SME anyway, discard the state and force a
+ * reallocation.
+ */
+ task->thread.svcr &= ~(SVCR_SM_MASK |
+ SVCR_ZA_MASK);
+ clear_tsk_thread_flag(task, TIF_SME);
+ free_sme = true;
+ }
}
if (task == current)
put_cpu_fpsimd_context();
+ task_set_vl(task, type, vl);
+
/*
- * Force reallocation of task SVE and SME state to the correct
- * size on next use:
+ * Free the changed states if they are not in use, SME will be
+ * reallocated to the correct size on next use and we just
+ * allocate SVE now in case it is needed for use in streaming
+ * mode.
*/
sve_free(task);
- if (system_supports_sme() && type == ARM64_VEC_SME)
- sme_free(task);
+ sve_alloc(task, true);
- task_set_vl(task, type, vl);
+ if (free_sme)
+ sme_free(task);
out:
update_tsk_thread_flag(task, vec_vl_inherit_flag(type),
@@ -1094,49 +1142,20 @@ fail:
panic("Cannot allocate percpu memory for EFI SVE save/restore");
}
-/*
- * Enable SVE for EL1.
- * Intended for use by the cpufeatures code during CPU boot.
- */
-void sve_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
+void cpu_enable_sve(const struct arm64_cpu_capabilities *__always_unused p)
{
write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_ZEN_EL1EN, CPACR_EL1);
isb();
-}
-
-/*
- * Read the pseudo-ZCR used by cpufeatures to identify the supported SVE
- * vector length.
- *
- * Use only if SVE is present.
- * This function clobbers the SVE vector length.
- */
-u64 read_zcr_features(void)
-{
- u64 zcr;
- unsigned int vq_max;
-
- /*
- * Set the maximum possible VL, and write zeroes to all other
- * bits to see if they stick.
- */
- sve_kernel_enable(NULL);
- write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL1);
-
- zcr = read_sysreg_s(SYS_ZCR_EL1);
- zcr &= ~(u64)ZCR_ELx_LEN_MASK; /* find sticky 1s outside LEN field */
- vq_max = sve_vq_from_vl(sve_get_vl());
- zcr |= vq_max - 1; /* set LEN field to maximum effective value */
- return zcr;
+ write_sysreg_s(0, SYS_ZCR_EL1);
}
void __init sve_setup(void)
{
struct vl_info *info = &vl_info[ARM64_VEC_SVE];
- u64 zcr;
DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
unsigned long b;
+ int max_bit;
if (!system_supports_sve())
return;
@@ -1149,17 +1168,8 @@ void __init sve_setup(void)
if (WARN_ON(!test_bit(__vq_to_bit(SVE_VQ_MIN), info->vq_map)))
set_bit(__vq_to_bit(SVE_VQ_MIN), info->vq_map);
- zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1);
- info->max_vl = sve_vl_from_vq((zcr & ZCR_ELx_LEN_MASK) + 1);
-
- /*
- * Sanity-check that the max VL we determined through CPU features
- * corresponds properly to sve_vq_map. If not, do our best:
- */
- if (WARN_ON(info->max_vl != find_supported_vector_length(ARM64_VEC_SVE,
- info->max_vl)))
- info->max_vl = find_supported_vector_length(ARM64_VEC_SVE,
- info->max_vl);
+ max_bit = find_first_bit(info->vq_map, SVE_VQ_MAX);
+ info->max_vl = sve_vl_from_vq(__bit_to_vq(max_bit));
/*
* For the default VL, pick the maximum supported value <= 64.
@@ -1211,33 +1221,35 @@ void fpsimd_release_task(struct task_struct *dead_task)
#ifdef CONFIG_ARM64_SME
/*
- * Ensure that task->thread.za_state is allocated and sufficiently large.
+ * Ensure that task->thread.sme_state is allocated and sufficiently large.
*
* This function should be used only in preparation for replacing
- * task->thread.za_state with new data. The memory is always zeroed
+ * task->thread.sme_state with new data. The memory is always zeroed
* here to prevent stale data from showing through: this is done in
* the interest of testability and predictability, the architecture
* guarantees that when ZA is enabled it will be zeroed.
*/
-void sme_alloc(struct task_struct *task)
+void sme_alloc(struct task_struct *task, bool flush)
{
- if (task->thread.za_state) {
- memset(task->thread.za_state, 0, za_state_size(task));
+ if (task->thread.sme_state) {
+ if (flush)
+ memset(task->thread.sme_state, 0,
+ sme_state_size(task));
return;
}
/* This could potentially be up to 64K. */
- task->thread.za_state =
- kzalloc(za_state_size(task), GFP_KERNEL);
+ task->thread.sme_state =
+ kzalloc(sme_state_size(task), GFP_KERNEL);
}
static void sme_free(struct task_struct *task)
{
- kfree(task->thread.za_state);
- task->thread.za_state = NULL;
+ kfree(task->thread.sme_state);
+ task->thread.sme_state = NULL;
}
-void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
+void cpu_enable_sme(const struct arm64_cpu_capabilities *__always_unused p)
{
/* Set priority for all PEs to architecturally defined minimum */
write_sysreg_s(read_sysreg_s(SYS_SMPRI_EL1) & ~SMPRI_EL1_PRIORITY_MASK,
@@ -1247,58 +1259,38 @@ void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_SMEN_EL1EN, CPACR_EL1);
isb();
+ /* Ensure all bits in SMCR are set to known values */
+ write_sysreg_s(0, SYS_SMCR_EL1);
+
/* Allow EL0 to access TPIDR2 */
write_sysreg(read_sysreg(SCTLR_EL1) | SCTLR_ELx_ENTP2, SCTLR_EL1);
isb();
}
-/*
- * This must be called after sme_kernel_enable(), we rely on the
- * feature table being sorted to ensure this.
- */
-void fa64_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
+void cpu_enable_sme2(const struct arm64_cpu_capabilities *__always_unused p)
{
- /* Allow use of FA64 */
- write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_FA64_MASK,
+ /* This must be enabled after SME */
+ BUILD_BUG_ON(ARM64_SME2 <= ARM64_SME);
+
+ /* Allow use of ZT0 */
+ write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_EZT0_MASK,
SYS_SMCR_EL1);
}
-/*
- * Read the pseudo-SMCR used by cpufeatures to identify the supported
- * vector length.
- *
- * Use only if SME is present.
- * This function clobbers the SME vector length.
- */
-u64 read_smcr_features(void)
+void cpu_enable_fa64(const struct arm64_cpu_capabilities *__always_unused p)
{
- u64 smcr;
- unsigned int vq_max;
-
- sme_kernel_enable(NULL);
- sme_smstart_sm();
+ /* This must be enabled after SME */
+ BUILD_BUG_ON(ARM64_SME_FA64 <= ARM64_SME);
- /*
- * Set the maximum possible VL.
- */
- write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_LEN_MASK,
+ /* Allow use of FA64 */
+ write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_FA64_MASK,
SYS_SMCR_EL1);
-
- smcr = read_sysreg_s(SYS_SMCR_EL1);
- smcr &= ~(u64)SMCR_ELx_LEN_MASK; /* Only the LEN field */
- vq_max = sve_vq_from_vl(sve_get_vl());
- smcr |= vq_max - 1; /* set LEN field to maximum effective value */
-
- sme_smstop_sm();
-
- return smcr;
}
void __init sme_setup(void)
{
struct vl_info *info = &vl_info[ARM64_VEC_SME];
- u64 smcr;
- int min_bit;
+ int min_bit, max_bit;
if (!system_supports_sme())
return;
@@ -1307,24 +1299,16 @@ void __init sme_setup(void)
* SME doesn't require any particular vector length be
* supported but it does require at least one. We should have
* disabled the feature entirely while bringing up CPUs but
- * let's double check here.
+ * let's double check here. The bitmap is SVE_VQ_MAP sized for
+ * sharing with SVE.
*/
WARN_ON(bitmap_empty(info->vq_map, SVE_VQ_MAX));
min_bit = find_last_bit(info->vq_map, SVE_VQ_MAX);
info->min_vl = sve_vl_from_vq(__bit_to_vq(min_bit));
- smcr = read_sanitised_ftr_reg(SYS_SMCR_EL1);
- info->max_vl = sve_vl_from_vq((smcr & SMCR_ELx_LEN_MASK) + 1);
-
- /*
- * Sanity-check that the max VL we determined through CPU features
- * corresponds properly to sme_vq_map. If not, do our best:
- */
- if (WARN_ON(info->max_vl != find_supported_vector_length(ARM64_VEC_SME,
- info->max_vl)))
- info->max_vl = find_supported_vector_length(ARM64_VEC_SME,
- info->max_vl);
+ max_bit = find_first_bit(info->vq_map, SVE_VQ_MAX);
+ info->max_vl = sve_vl_from_vq(__bit_to_vq(max_bit));
WARN_ON(info->min_vl > info->max_vl);
@@ -1344,6 +1328,22 @@ void __init sme_setup(void)
get_sme_default_vl());
}
+void sme_suspend_exit(void)
+{
+ u64 smcr = 0;
+
+ if (!system_supports_sme())
+ return;
+
+ if (system_supports_fa64())
+ smcr |= SMCR_ELx_FA64;
+ if (system_supports_sme2())
+ smcr |= SMCR_ELx_EZT0;
+
+ write_sysreg_s(smcr, SYS_SMCR_EL1);
+ write_sysreg_s(0, SYS_SMPRI_EL1);
+}
+
#endif /* CONFIG_ARM64_SME */
static void sve_init_regs(void)
@@ -1366,6 +1366,7 @@ static void sve_init_regs(void)
fpsimd_bind_task_to_cpu();
} else {
fpsimd_to_sve(current);
+ current->thread.fp_type = FP_STATE_SVE;
}
}
@@ -1388,7 +1389,7 @@ void do_sve_acc(unsigned long esr, struct pt_regs *regs)
return;
}
- sve_alloc(current);
+ sve_alloc(current, true);
if (!current->thread.sve_state) {
force_sig(SIGKILL);
return;
@@ -1420,7 +1421,7 @@ void do_sve_acc(unsigned long esr, struct pt_regs *regs)
*
* TIF_SME should be clear on entry: otherwise, fpsimd_restore_current_state()
* would have disabled the SME access trap for userspace during
- * ret_to_user, making an SVE access trap impossible in that case.
+ * ret_to_user, making an SME access trap impossible in that case.
*/
void do_sme_acc(unsigned long esr, struct pt_regs *regs)
{
@@ -1439,9 +1440,9 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs)
return;
}
- sve_alloc(current);
- sme_alloc(current);
- if (!current->thread.sve_state || !current->thread.za_state) {
+ sve_alloc(current, false);
+ sme_alloc(current, true);
+ if (!current->thread.sve_state || !current->thread.sme_state) {
force_sig(SIGKILL);
return;
}
@@ -1460,17 +1461,6 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs)
fpsimd_bind_task_to_cpu();
}
- /*
- * If SVE was not already active initialise the SVE registers,
- * any non-shared state between the streaming and regular SVE
- * registers is architecturally guaranteed to be zeroed when
- * we enter streaming mode. We do not need to initialize ZA
- * since ZA must be disabled at this point and enabling ZA is
- * architecturally defined to zero ZA.
- */
- if (system_supports_sve() && !test_thread_flag(TIF_SVE))
- sve_init_regs();
-
put_cpu_fpsimd_context();
}
@@ -1479,8 +1469,17 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs)
*/
void do_fpsimd_acc(unsigned long esr, struct pt_regs *regs)
{
- /* TODO: implement lazy context saving/restoring */
- WARN_ON(1);
+ /* Even if we chose not to use FPSIMD, the hardware could still trap: */
+ if (!system_supports_fpsimd()) {
+ force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
+ return;
+ }
+
+ /*
+ * When FPSIMD is enabled, we should never take a trap unless something
+ * has gone very wrong.
+ */
+ BUG();
}
/*
@@ -1508,6 +1507,34 @@ void do_fpsimd_exc(unsigned long esr, struct pt_regs *regs)
current);
}
+static void fpsimd_load_kernel_state(struct task_struct *task)
+{
+ struct cpu_fp_state *last = this_cpu_ptr(&fpsimd_last_state);
+
+ /*
+ * Elide the load if this CPU holds the most recent kernel mode
+ * FPSIMD context of the current task.
+ */
+ if (last->st == &task->thread.kernel_fpsimd_state &&
+ task->thread.kernel_fpsimd_cpu == smp_processor_id())
+ return;
+
+ fpsimd_load_state(&task->thread.kernel_fpsimd_state);
+}
+
+static void fpsimd_save_kernel_state(struct task_struct *task)
+{
+ struct cpu_fp_state cpu_fp_state = {
+ .st = &task->thread.kernel_fpsimd_state,
+ .to_save = FP_STATE_FPSIMD,
+ };
+
+ fpsimd_save_state(&task->thread.kernel_fpsimd_state);
+ fpsimd_bind_state_to_cpu(&cpu_fp_state);
+
+ task->thread.kernel_fpsimd_cpu = smp_processor_id();
+}
+
void fpsimd_thread_switch(struct task_struct *next)
{
bool wrong_task, wrong_cpu;
@@ -1515,24 +1542,31 @@ void fpsimd_thread_switch(struct task_struct *next)
if (!system_supports_fpsimd())
return;
- __get_cpu_fpsimd_context();
+ WARN_ON_ONCE(!irqs_disabled());
/* Save unsaved fpsimd state, if any: */
- fpsimd_save();
-
- /*
- * Fix up TIF_FOREIGN_FPSTATE to correctly describe next's
- * state. For kernel threads, FPSIMD registers are never loaded
- * and wrong_task and wrong_cpu will always be true.
- */
- wrong_task = __this_cpu_read(fpsimd_last_state.st) !=
- &next->thread.uw.fpsimd_state;
- wrong_cpu = next->thread.fpsimd_cpu != smp_processor_id();
+ if (test_thread_flag(TIF_KERNEL_FPSTATE))
+ fpsimd_save_kernel_state(current);
+ else
+ fpsimd_save_user_state();
- update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE,
- wrong_task || wrong_cpu);
+ if (test_tsk_thread_flag(next, TIF_KERNEL_FPSTATE)) {
+ fpsimd_load_kernel_state(next);
+ set_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE);
+ } else {
+ /*
+ * Fix up TIF_FOREIGN_FPSTATE to correctly describe next's
+ * state. For kernel threads, FPSIMD registers are never
+ * loaded with user mode FPSIMD state and so wrong_task and
+ * wrong_cpu will always be true.
+ */
+ wrong_task = __this_cpu_read(fpsimd_last_state.st) !=
+ &next->thread.uw.fpsimd_state;
+ wrong_cpu = next->thread.fpsimd_cpu != smp_processor_id();
- __put_cpu_fpsimd_context();
+ update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE,
+ wrong_task || wrong_cpu);
+ }
}
static void fpsimd_flush_thread_vl(enum vec_type type)
@@ -1573,7 +1607,7 @@ static void fpsimd_flush_thread_vl(enum vec_type type)
void fpsimd_flush_thread(void)
{
void *sve_state = NULL;
- void *za_state = NULL;
+ void *sme_state = NULL;
if (!system_supports_fpsimd())
return;
@@ -1598,16 +1632,18 @@ void fpsimd_flush_thread(void)
clear_thread_flag(TIF_SME);
/* Defer kfree() while in atomic context */
- za_state = current->thread.za_state;
- current->thread.za_state = NULL;
+ sme_state = current->thread.sme_state;
+ current->thread.sme_state = NULL;
fpsimd_flush_thread_vl(ARM64_VEC_SME);
current->thread.svcr = 0;
}
+ current->thread.fp_type = FP_STATE_FPSIMD;
+
put_cpu_fpsimd_context();
kfree(sve_state);
- kfree(za_state);
+ kfree(sme_state);
}
/*
@@ -1620,7 +1656,7 @@ void fpsimd_preserve_current_state(void)
return;
get_cpu_fpsimd_context();
- fpsimd_save();
+ fpsimd_save_user_state();
put_cpu_fpsimd_context();
}
@@ -1632,27 +1668,54 @@ void fpsimd_preserve_current_state(void)
void fpsimd_signal_preserve_current_state(void)
{
fpsimd_preserve_current_state();
- if (test_thread_flag(TIF_SVE))
+ if (current->thread.fp_type == FP_STATE_SVE)
sve_to_fpsimd(current);
}
/*
+ * Called by KVM when entering the guest.
+ */
+void fpsimd_kvm_prepare(void)
+{
+ if (!system_supports_sve())
+ return;
+
+ /*
+ * KVM does not save host SVE state since we can only enter
+ * the guest from a syscall so the ABI means that only the
+ * non-saved SVE state needs to be saved. If we have left
+ * SVE enabled for performance reasons then update the task
+ * state to be FPSIMD only.
+ */
+ get_cpu_fpsimd_context();
+
+ if (test_and_clear_thread_flag(TIF_SVE)) {
+ sve_to_fpsimd(current);
+ current->thread.fp_type = FP_STATE_FPSIMD;
+ }
+
+ put_cpu_fpsimd_context();
+}
+
+/*
* Associate current's FPSIMD context with this cpu
* The caller must have ownership of the cpu FPSIMD context before calling
* this function.
*/
static void fpsimd_bind_task_to_cpu(void)
{
- struct fpsimd_last_state_struct *last =
- this_cpu_ptr(&fpsimd_last_state);
+ struct cpu_fp_state *last = this_cpu_ptr(&fpsimd_last_state);
WARN_ON(!system_supports_fpsimd());
last->st = &current->thread.uw.fpsimd_state;
last->sve_state = current->thread.sve_state;
- last->za_state = current->thread.za_state;
+ last->sme_state = current->thread.sme_state;
last->sve_vl = task_get_sve_vl(current);
last->sme_vl = task_get_sme_vl(current);
last->svcr = &current->thread.svcr;
+ last->fpmr = &current->thread.uw.fpmr;
+ last->fp_type = &current->thread.fp_type;
+ last->to_save = FP_STATE_CURRENT;
current->thread.fpsimd_cpu = smp_processor_id();
/*
@@ -1674,22 +1737,14 @@ static void fpsimd_bind_task_to_cpu(void)
}
}
-void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state,
- unsigned int sve_vl, void *za_state,
- unsigned int sme_vl, u64 *svcr)
+void fpsimd_bind_state_to_cpu(struct cpu_fp_state *state)
{
- struct fpsimd_last_state_struct *last =
- this_cpu_ptr(&fpsimd_last_state);
+ struct cpu_fp_state *last = this_cpu_ptr(&fpsimd_last_state);
WARN_ON(!system_supports_fpsimd());
WARN_ON(!in_softirq() && !irqs_disabled());
- last->st = st;
- last->svcr = svcr;
- last->sve_state = sve_state;
- last->za_state = za_state;
- last->sve_vl = sve_vl;
- last->sme_vl = sme_vl;
+ *last = *state;
}
/*
@@ -1701,13 +1756,23 @@ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state,
void fpsimd_restore_current_state(void)
{
/*
- * For the tasks that were created before we detected the absence of
- * FP/SIMD, the TIF_FOREIGN_FPSTATE could be set via fpsimd_thread_switch(),
- * e.g, init. This could be then inherited by the children processes.
- * If we later detect that the system doesn't support FP/SIMD,
- * we must clear the flag for all the tasks to indicate that the
- * FPSTATE is clean (as we can't have one) to avoid looping for ever in
- * do_notify_resume().
+ * TIF_FOREIGN_FPSTATE is set on the init task and copied by
+ * arch_dup_task_struct() regardless of whether FP/SIMD is detected.
+ * Thus user threads can have this set even when FP/SIMD hasn't been
+ * detected.
+ *
+ * When FP/SIMD is detected, begin_new_exec() will set
+ * TIF_FOREIGN_FPSTATE via flush_thread() -> fpsimd_flush_thread(),
+ * and fpsimd_thread_switch() will set TIF_FOREIGN_FPSTATE when
+ * switching tasks. We detect FP/SIMD before we exec the first user
+ * process, ensuring this has TIF_FOREIGN_FPSTATE set and
+ * do_notify_resume() will call fpsimd_restore_current_state() to
+ * install the user FP/SIMD context.
+ *
+ * When FP/SIMD is not detected, nothing else will clear or set
+ * TIF_FOREIGN_FPSTATE prior to the first return to userspace, and
+ * we must clear TIF_FOREIGN_FPSTATE to avoid do_notify_resume()
+ * looping forever calling fpsimd_restore_current_state().
*/
if (!system_supports_fpsimd()) {
clear_thread_flag(TIF_FOREIGN_FPSTATE);
@@ -1804,13 +1869,15 @@ static void fpsimd_flush_cpu_state(void)
*/
void fpsimd_save_and_flush_cpu_state(void)
{
+ unsigned long flags;
+
if (!system_supports_fpsimd())
return;
WARN_ON(preemptible());
- __get_cpu_fpsimd_context();
- fpsimd_save();
+ local_irq_save(flags);
+ fpsimd_save_user_state();
fpsimd_flush_cpu_state();
- __put_cpu_fpsimd_context();
+ local_irq_restore(flags);
}
#ifdef CONFIG_KERNEL_MODE_NEON
@@ -1842,12 +1909,39 @@ void kernel_neon_begin(void)
get_cpu_fpsimd_context();
/* Save unsaved fpsimd state, if any: */
- fpsimd_save();
+ if (test_thread_flag(TIF_KERNEL_FPSTATE)) {
+ BUG_ON(IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq());
+ fpsimd_save_kernel_state(current);
+ } else {
+ fpsimd_save_user_state();
+
+ /*
+ * Set the thread flag so that the kernel mode FPSIMD state
+ * will be context switched along with the rest of the task
+ * state.
+ *
+ * On non-PREEMPT_RT, softirqs may interrupt task level kernel
+ * mode FPSIMD, but the task will not be preemptible so setting
+ * TIF_KERNEL_FPSTATE for those would be both wrong (as it
+ * would mark the task context FPSIMD state as requiring a
+ * context switch) and unnecessary.
+ *
+ * On PREEMPT_RT, softirqs are serviced from a separate thread,
+ * which is scheduled as usual, and this guarantees that these
+ * softirqs are not interrupting use of the FPSIMD in kernel
+ * mode in task context. So in this case, setting the flag here
+ * is always appropriate.
+ */
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq())
+ set_thread_flag(TIF_KERNEL_FPSTATE);
+ }
/* Invalidate any task state remaining in the fpsimd regs: */
fpsimd_flush_cpu_state();
+
+ put_cpu_fpsimd_context();
}
-EXPORT_SYMBOL(kernel_neon_begin);
+EXPORT_SYMBOL_GPL(kernel_neon_begin);
/*
* kernel_neon_end(): give the CPU FPSIMD registers back to the current task
@@ -1863,9 +1957,18 @@ void kernel_neon_end(void)
if (!system_supports_fpsimd())
return;
- put_cpu_fpsimd_context();
+ /*
+ * If we are returning from a nested use of kernel mode FPSIMD, restore
+ * the task context kernel mode FPSIMD state. This can only happen when
+ * running in softirq context on non-PREEMPT_RT.
+ */
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq() &&
+ test_thread_flag(TIF_KERNEL_FPSTATE))
+ fpsimd_load_kernel_state(current);
+ else
+ clear_thread_flag(TIF_KERNEL_FPSTATE);
}
-EXPORT_SYMBOL(kernel_neon_end);
+EXPORT_SYMBOL_GPL(kernel_neon_end);
#ifdef CONFIG_EFI
@@ -2040,6 +2143,13 @@ static inline void fpsimd_hotplug_init(void)
static inline void fpsimd_hotplug_init(void) { }
#endif
+void cpu_enable_fpsimd(const struct arm64_cpu_capabilities *__always_unused p)
+{
+ unsigned long enable = CPACR_EL1_FPEN_EL1EN | CPACR_EL1_FPEN_EL0EN;
+ write_sysreg(read_sysreg(CPACR_EL1) | enable, CPACR_EL1);
+ isb();
+}
+
/*
* FP/SIMD support code initialisation.
*/
@@ -2056,9 +2166,6 @@ static int __init fpsimd_init(void)
pr_notice("Advanced SIMD is not implemented\n");
- if (cpu_have_named_feature(SME) && !cpu_have_named_feature(SVE))
- pr_notice("SME is implemented but not SVE\n");
-
sve_sysctl_init();
sme_sysctl_init();