From 06f7c88c107fb469f4f1344142e80df5175c6836 Mon Sep 17 00:00:00 2001 From: Beni Lev Date: Tue, 19 Jul 2016 19:28:56 +0300 Subject: cfg80211: consider VHT opmode on station update Currently, this attribute is only fetched on station addition, but not on station change. Since this info is only present in the assoc request, with full station state support in the driver it cannot be present when the station is added. Thus, add support for changing the VHT opmode on station update if done before (or while) the station is marked as associated. After this, ignore it, since it used to be ignored. Signed-off-by: Beni Lev Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 6b76e3b0c18e..bea982af9cfb 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1772,7 +1772,9 @@ enum nl80211_commands { * * @NL80211_ATTR_OPMODE_NOTIF: Operating mode field from Operating Mode * Notification Element based on association request when used with - * %NL80211_CMD_NEW_STATION; u8 attribute. + * %NL80211_CMD_NEW_STATION or %NL80211_CMD_SET_STATION (only when + * %NL80211_FEATURE_FULL_AP_CLIENT_STATE is supported, or with TDLS); + * u8 attribute. * * @NL80211_ATTR_VENDOR_ID: The vendor ID, either a 24-bit OUI or, if * %NL80211_VENDOR_ID_IS_LINUX is set, a special Linux ID (not used yet) -- cgit v1.3-6-gb490 From 546125d1614264d26080817d0c8cddb9b25081fa Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Thu, 5 Jan 2017 16:34:51 -0500 Subject: sunrpc: don't call sleeping functions from the notifier block callbacks The inet6addr_chain is an atomic notifier chain, so we can't call anything that might sleep (like lock_sock)... instead of closing the socket from svc_age_temp_xprts_now (which is called by the notifier function), just have the rpc service threads do it instead. Cc: stable@vger.kernel.org Fixes: c3d4879e01be "sunrpc: Add a function to close..." Signed-off-by: Scott Mayhew Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_xprt.h | 1 + net/sunrpc/svc_xprt.c | 10 +++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index e5d193440374..7440290f64ac 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -66,6 +66,7 @@ struct svc_xprt { #define XPT_LISTENER 10 /* listening endpoint */ #define XPT_CACHE_AUTH 11 /* cache auth info */ #define XPT_LOCAL 12 /* connection from loopback interface */ +#define XPT_KILL_TEMP 13 /* call xpo_kill_temp_xprt before closing */ struct svc_serv *xpt_server; /* service for transport */ atomic_t xpt_reserved; /* space on outq that is rsvd */ diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 3bc1d61694cb..9c9db55a0c1e 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -799,6 +799,8 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) { dprintk("svc_recv: found XPT_CLOSE\n"); + if (test_and_clear_bit(XPT_KILL_TEMP, &xprt->xpt_flags)) + xprt->xpt_ops->xpo_kill_temp_xprt(xprt); svc_delete_xprt(xprt); /* Leave XPT_BUSY set on the dead xprt: */ goto out; @@ -1020,9 +1022,11 @@ void svc_age_temp_xprts_now(struct svc_serv *serv, struct sockaddr *server_addr) le = to_be_closed.next; list_del_init(le); xprt = list_entry(le, struct svc_xprt, xpt_list); - dprintk("svc_age_temp_xprts_now: closing %p\n", xprt); - xprt->xpt_ops->xpo_kill_temp_xprt(xprt); - svc_close_xprt(xprt); + set_bit(XPT_CLOSE, &xprt->xpt_flags); + set_bit(XPT_KILL_TEMP, &xprt->xpt_flags); + dprintk("svc_age_temp_xprts_now: queuing xprt %p for closing\n", + xprt); + svc_xprt_enqueue(xprt); } } EXPORT_SYMBOL_GPL(svc_age_temp_xprts_now); -- cgit v1.3-6-gb490 From 488f94d7212b00a2ec72fb886b155f1b04c5aa98 Mon Sep 17 00:00:00 2001 From: Jintack Lim Date: Thu, 1 Dec 2016 14:32:05 -0500 Subject: KVM: arm64: Access CNTHCTL_EL2 bit fields correctly on VHE systems Current KVM world switch code is unintentionally setting wrong bits to CNTHCTL_EL2 when E2H == 1, which may allow guest OS to access physical timer. Bit positions of CNTHCTL_EL2 are changing depending on HCR_EL2.E2H bit. EL1PCEN and EL1PCTEN are 1st and 0th bits when E2H is not set, but they are 11th and 10th bits respectively when E2H is set. In fact, on VHE we only need to set those bits once, not for every world switch. This is because the host kernel runs in EL2 with HCR_EL2.TGE == 1, which makes those bits have no effect for the host kernel execution. So we just set those bits once for guests, and that's it. Signed-off-by: Jintack Lim Reviewed-by: Marc Zyngier Signed-off-by: Marc Zyngier --- arch/arm/include/asm/virt.h | 5 +++++ arch/arm/kvm/arm.c | 3 +++ arch/arm64/include/asm/virt.h | 9 +++++++++ include/kvm/arm_arch_timer.h | 1 + virt/kvm/arm/arch_timer.c | 23 +++++++++++++++++++++++ virt/kvm/arm/hyp/timer-sr.c | 33 +++++++++++++++++++++------------ 6 files changed, 62 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/arch/arm/include/asm/virt.h b/arch/arm/include/asm/virt.h index a2e75b84e2ae..6dae1956c74d 100644 --- a/arch/arm/include/asm/virt.h +++ b/arch/arm/include/asm/virt.h @@ -80,6 +80,11 @@ static inline bool is_kernel_in_hyp_mode(void) return false; } +static inline bool has_vhe(void) +{ + return false; +} + /* The section containing the hypervisor idmap text */ extern char __hyp_idmap_text_start[]; extern char __hyp_idmap_text_end[]; diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 11676787ad49..9d7446456e0c 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -1099,6 +1099,9 @@ static void cpu_init_hyp_mode(void *dummy) __cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr); __cpu_init_stage2(); + if (is_kernel_in_hyp_mode()) + kvm_timer_init_vhe(); + kvm_arm_init_debug(); } diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index fea10736b11f..439f6b5d31f6 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -47,6 +47,7 @@ #include #include #include +#include /* * __boot_cpu_mode records what mode CPUs were booted in. @@ -80,6 +81,14 @@ static inline bool is_kernel_in_hyp_mode(void) return read_sysreg(CurrentEL) == CurrentEL_EL2; } +static inline bool has_vhe(void) +{ + if (cpus_have_const_cap(ARM64_HAS_VIRT_HOST_EXTN)) + return true; + + return false; +} + #ifdef CONFIG_ARM64_VHE extern void verify_cpu_run_el(void); #else diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index b717ed9d2b75..5c970ce67949 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -76,4 +76,5 @@ void kvm_timer_unschedule(struct kvm_vcpu *vcpu); void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu); +void kvm_timer_init_vhe(void); #endif diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index a7fe6062b65a..6a084cd57b88 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -24,6 +24,7 @@ #include #include +#include #include #include @@ -509,3 +510,25 @@ void kvm_timer_init(struct kvm *kvm) { kvm->arch.timer.cntvoff = kvm_phys_timer_read(); } + +/* + * On VHE system, we only need to configure trap on physical timer and counter + * accesses in EL0 and EL1 once, not for every world switch. + * The host kernel runs at EL2 with HCR_EL2.TGE == 1, + * and this makes those bits have no effect for the host kernel execution. + */ +void kvm_timer_init_vhe(void) +{ + /* When HCR_EL2.E2H ==1, EL1PCEN and EL1PCTEN are shifted by 10 */ + u32 cnthctl_shift = 10; + u64 val; + + /* + * Disallow physical timer access for the guest. + * Physical counter access is allowed. + */ + val = read_sysreg(cnthctl_el2); + val &= ~(CNTHCTL_EL1PCEN << cnthctl_shift); + val |= (CNTHCTL_EL1PCTEN << cnthctl_shift); + write_sysreg(val, cnthctl_el2); +} diff --git a/virt/kvm/arm/hyp/timer-sr.c b/virt/kvm/arm/hyp/timer-sr.c index 798866a8d875..63e28dd18bb0 100644 --- a/virt/kvm/arm/hyp/timer-sr.c +++ b/virt/kvm/arm/hyp/timer-sr.c @@ -35,10 +35,16 @@ void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu) /* Disable the virtual timer */ write_sysreg_el0(0, cntv_ctl); - /* Allow physical timer/counter access for the host */ - val = read_sysreg(cnthctl_el2); - val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN; - write_sysreg(val, cnthctl_el2); + /* + * We don't need to do this for VHE since the host kernel runs in EL2 + * with HCR_EL2.TGE ==1, which makes those bits have no impact. + */ + if (!has_vhe()) { + /* Allow physical timer/counter access for the host */ + val = read_sysreg(cnthctl_el2); + val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN; + write_sysreg(val, cnthctl_el2); + } /* Clear cntvoff for the host */ write_sysreg(0, cntvoff_el2); @@ -50,14 +56,17 @@ void __hyp_text __timer_restore_state(struct kvm_vcpu *vcpu) struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; u64 val; - /* - * Disallow physical timer access for the guest - * Physical counter access is allowed - */ - val = read_sysreg(cnthctl_el2); - val &= ~CNTHCTL_EL1PCEN; - val |= CNTHCTL_EL1PCTEN; - write_sysreg(val, cnthctl_el2); + /* Those bits are already configured at boot on VHE-system */ + if (!has_vhe()) { + /* + * Disallow physical timer access for the guest + * Physical counter access is allowed + */ + val = read_sysreg(cnthctl_el2); + val &= ~CNTHCTL_EL1PCEN; + val |= CNTHCTL_EL1PCTEN; + write_sysreg(val, cnthctl_el2); + } if (timer->enabled) { write_sysreg(kvm->arch.timer.cntvoff, cntvoff_el2); -- cgit v1.3-6-gb490 From 003c941057eaa868ca6fedd29a274c863167230d Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Thu, 12 Jan 2017 14:24:58 -0800 Subject: tcp: fix tcp_fastopen unaligned access complaints on sparc Fix up a data alignment issue on sparc by swapping the order of the cookie byte array field with the length field in struct tcp_fastopen_cookie, and making it a proper union to clean up the typecasting. This addresses log complaints like these: log_unaligned: 113 callbacks suppressed Kernel unaligned access at TPC[976490] tcp_try_fastopen+0x2d0/0x360 Kernel unaligned access at TPC[9764ac] tcp_try_fastopen+0x2ec/0x360 Kernel unaligned access at TPC[9764c8] tcp_try_fastopen+0x308/0x360 Kernel unaligned access at TPC[9764e4] tcp_try_fastopen+0x324/0x360 Kernel unaligned access at TPC[976490] tcp_try_fastopen+0x2d0/0x360 Cc: Eric Dumazet Signed-off-by: Shannon Nelson Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 7 ++++++- net/ipv4/tcp_fastopen.c | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index fc5848dad7a4..c93f4b3a59cb 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -62,8 +62,13 @@ static inline unsigned int tcp_optlen(const struct sk_buff *skb) /* TCP Fast Open Cookie as stored in memory */ struct tcp_fastopen_cookie { + union { + u8 val[TCP_FASTOPEN_COOKIE_MAX]; +#if IS_ENABLED(CONFIG_IPV6) + struct in6_addr addr; +#endif + }; s8 len; - u8 val[TCP_FASTOPEN_COOKIE_MAX]; bool exp; /* In RFC6994 experimental option format */ }; diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 4e777a3243f9..f51919535ca7 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -113,7 +113,7 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req, struct tcp_fastopen_cookie tmp; if (__tcp_fastopen_cookie_gen(&ip6h->saddr, &tmp)) { - struct in6_addr *buf = (struct in6_addr *) tmp.val; + struct in6_addr *buf = &tmp.addr; int i; for (i = 0; i < 4; i++) -- cgit v1.3-6-gb490 From 52d7e48b86fc108e45a656d8e53e4237993c481d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 10 Jan 2017 02:28:26 -0800 Subject: rcu: Narrow early boot window of illegal synchronous grace periods The current preemptible RCU implementation goes through three phases during bootup. In the first phase, there is only one CPU that is running with preemption disabled, so that a no-op is a synchronous grace period. In the second mid-boot phase, the scheduler is running, but RCU has not yet gotten its kthreads spawned (and, for expedited grace periods, workqueues are not yet running. During this time, any attempt to do a synchronous grace period will hang the system (or complain bitterly, depending). In the third and final phase, RCU is fully operational and everything works normally. This has been OK for some time, but there has recently been some synchronous grace periods showing up during the second mid-boot phase. This code worked "by accident" for awhile, but started failing as soon as expedited RCU grace periods switched over to workqueues in commit 8b355e3bc140 ("rcu: Drive expedited grace periods from workqueue"). Note that the code was buggy even before this commit, as it was subject to failure on real-time systems that forced all expedited grace periods to run as normal grace periods (for example, using the rcu_normal ksysfs parameter). The callchain from the failure case is as follows: early_amd_iommu_init() |-> acpi_put_table(ivrs_base); |-> acpi_tb_put_table(table_desc); |-> acpi_tb_invalidate_table(table_desc); |-> acpi_tb_release_table(...) |-> acpi_os_unmap_memory |-> acpi_os_unmap_iomem |-> acpi_os_map_cleanup |-> synchronize_rcu_expedited The kernel showing this callchain was built with CONFIG_PREEMPT_RCU=y, which caused the code to try using workqueues before they were initialized, which did not go well. This commit therefore reworks RCU to permit synchronous grace periods to proceed during this mid-boot phase. This commit is therefore a fix to a regression introduced in v4.9, and is therefore being put forward post-merge-window in v4.10. This commit sets a flag from the existing rcu_scheduler_starting() function which causes all synchronous grace periods to take the expedited path. The expedited path now checks this flag, using the requesting task to drive the expedited grace period forward during the mid-boot phase. Finally, this flag is updated by a core_initcall() function named rcu_exp_runtime_mode(), which causes the runtime codepaths to be used. Note that this arrangement assumes that tasks are not sent POSIX signals (or anything similar) from the time that the first task is spawned through core_initcall() time. Fixes: 8b355e3bc140 ("rcu: Drive expedited grace periods from workqueue") Reported-by: "Zheng, Lv" Reported-by: Borislav Petkov Signed-off-by: Paul E. McKenney Tested-by: Stan Kain Tested-by: Ivan Tested-by: Emanuel Castelo Tested-by: Bruno Pesavento Tested-by: Borislav Petkov Tested-by: Frederic Bezies Cc: # 4.9.0- --- include/linux/rcupdate.h | 4 ++++ kernel/rcu/rcu.h | 1 + kernel/rcu/tiny_plugin.h | 9 +++++++-- kernel/rcu/tree.c | 33 ++++++++++++++++++------------ kernel/rcu/tree_exp.h | 52 ++++++++++++++++++++++++++++++++++++++---------- kernel/rcu/tree_plugin.h | 2 +- kernel/rcu/update.c | 38 +++++++++++++++++++++++++++-------- 7 files changed, 104 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 321f9ed552a9..01f71e1d2e94 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -444,6 +444,10 @@ bool __rcu_is_watching(void); #error "Unknown RCU implementation specified to kernel configuration" #endif +#define RCU_SCHEDULER_INACTIVE 0 +#define RCU_SCHEDULER_INIT 1 +#define RCU_SCHEDULER_RUNNING 2 + /* * init_rcu_head_on_stack()/destroy_rcu_head_on_stack() are needed for dynamic * initialization and destruction of rcu_head on the stack. rcu_head structures diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 80adef7d4c3d..0d6ff3e471be 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -136,6 +136,7 @@ int rcu_jiffies_till_stall_check(void); #define TPS(x) tracepoint_string(x) void rcu_early_boot_tests(void); +void rcu_test_sync_prims(void); /* * This function really isn't for public consumption, but RCU is special in diff --git a/kernel/rcu/tiny_plugin.h b/kernel/rcu/tiny_plugin.h index 196f0302e2f4..c64b827ecbca 100644 --- a/kernel/rcu/tiny_plugin.h +++ b/kernel/rcu/tiny_plugin.h @@ -60,12 +60,17 @@ EXPORT_SYMBOL_GPL(rcu_scheduler_active); /* * During boot, we forgive RCU lockdep issues. After this function is - * invoked, we start taking RCU lockdep issues seriously. + * invoked, we start taking RCU lockdep issues seriously. Note that unlike + * Tree RCU, Tiny RCU transitions directly from RCU_SCHEDULER_INACTIVE + * to RCU_SCHEDULER_RUNNING, skipping the RCU_SCHEDULER_INIT stage. + * The reason for this is that Tiny RCU does not need kthreads, so does + * not have to care about the fact that the scheduler is half-initialized + * at a certain phase of the boot process. */ void __init rcu_scheduler_starting(void) { WARN_ON(nr_context_switches() > 0); - rcu_scheduler_active = 1; + rcu_scheduler_active = RCU_SCHEDULER_RUNNING; } #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 96c52e43f7ca..cb4e2056ccf3 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -127,13 +127,16 @@ int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */ int sysctl_panic_on_rcu_stall __read_mostly; /* - * The rcu_scheduler_active variable transitions from zero to one just - * before the first task is spawned. So when this variable is zero, RCU - * can assume that there is but one task, allowing RCU to (for example) + * The rcu_scheduler_active variable is initialized to the value + * RCU_SCHEDULER_INACTIVE and transitions RCU_SCHEDULER_INIT just before the + * first task is spawned. So when this variable is RCU_SCHEDULER_INACTIVE, + * RCU can assume that there is but one task, allowing RCU to (for example) * optimize synchronize_rcu() to a simple barrier(). When this variable - * is one, RCU must actually do all the hard work required to detect real - * grace periods. This variable is also used to suppress boot-time false - * positives from lockdep-RCU error checking. + * is RCU_SCHEDULER_INIT, RCU must actually do all the hard work required + * to detect real grace periods. This variable is also used to suppress + * boot-time false positives from lockdep-RCU error checking. Finally, it + * transitions from RCU_SCHEDULER_INIT to RCU_SCHEDULER_RUNNING after RCU + * is fully initialized, including all of its kthreads having been spawned. */ int rcu_scheduler_active __read_mostly; EXPORT_SYMBOL_GPL(rcu_scheduler_active); @@ -3980,18 +3983,22 @@ static int __init rcu_spawn_gp_kthread(void) early_initcall(rcu_spawn_gp_kthread); /* - * This function is invoked towards the end of the scheduler's initialization - * process. Before this is called, the idle task might contain - * RCU read-side critical sections (during which time, this idle - * task is booting the system). After this function is called, the - * idle tasks are prohibited from containing RCU read-side critical - * sections. This function also enables RCU lockdep checking. + * This function is invoked towards the end of the scheduler's + * initialization process. Before this is called, the idle task might + * contain synchronous grace-period primitives (during which time, this idle + * task is booting the system, and such primitives are no-ops). After this + * function is called, any synchronous grace-period primitives are run as + * expedited, with the requesting task driving the grace period forward. + * A later core_initcall() rcu_exp_runtime_mode() will switch to full + * runtime RCU functionality. */ void rcu_scheduler_starting(void) { WARN_ON(num_online_cpus() != 1); WARN_ON(nr_context_switches() > 0); - rcu_scheduler_active = 1; + rcu_test_sync_prims(); + rcu_scheduler_active = RCU_SCHEDULER_INIT; + rcu_test_sync_prims(); } /* diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index d3053e99fdb6..e59e1849b89a 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -531,6 +531,20 @@ struct rcu_exp_work { struct work_struct rew_work; }; +/* + * Common code to drive an expedited grace period forward, used by + * workqueues and mid-boot-time tasks. + */ +static void rcu_exp_sel_wait_wake(struct rcu_state *rsp, + smp_call_func_t func, unsigned long s) +{ + /* Initialize the rcu_node tree in preparation for the wait. */ + sync_rcu_exp_select_cpus(rsp, func); + + /* Wait and clean up, including waking everyone. */ + rcu_exp_wait_wake(rsp, s); +} + /* * Work-queue handler to drive an expedited grace period forward. */ @@ -538,12 +552,8 @@ static void wait_rcu_exp_gp(struct work_struct *wp) { struct rcu_exp_work *rewp; - /* Initialize the rcu_node tree in preparation for the wait. */ rewp = container_of(wp, struct rcu_exp_work, rew_work); - sync_rcu_exp_select_cpus(rewp->rew_rsp, rewp->rew_func); - - /* Wait and clean up, including waking everyone. */ - rcu_exp_wait_wake(rewp->rew_rsp, rewp->rew_s); + rcu_exp_sel_wait_wake(rewp->rew_rsp, rewp->rew_func, rewp->rew_s); } /* @@ -569,12 +579,18 @@ static void _synchronize_rcu_expedited(struct rcu_state *rsp, if (exp_funnel_lock(rsp, s)) return; /* Someone else did our work for us. */ - /* Marshall arguments and schedule the expedited grace period. */ - rew.rew_func = func; - rew.rew_rsp = rsp; - rew.rew_s = s; - INIT_WORK_ONSTACK(&rew.rew_work, wait_rcu_exp_gp); - schedule_work(&rew.rew_work); + /* Ensure that load happens before action based on it. */ + if (unlikely(rcu_scheduler_active == RCU_SCHEDULER_INIT)) { + /* Direct call during scheduler init and early_initcalls(). */ + rcu_exp_sel_wait_wake(rsp, func, s); + } else { + /* Marshall arguments & schedule the expedited grace period. */ + rew.rew_func = func; + rew.rew_rsp = rsp; + rew.rew_s = s; + INIT_WORK_ONSTACK(&rew.rew_work, wait_rcu_exp_gp); + schedule_work(&rew.rew_work); + } /* Wait for expedited grace period to complete. */ rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id()); @@ -676,6 +692,8 @@ void synchronize_rcu_expedited(void) { struct rcu_state *rsp = rcu_state_p; + if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE) + return; _synchronize_rcu_expedited(rsp, sync_rcu_exp_handler); } EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); @@ -693,3 +711,15 @@ void synchronize_rcu_expedited(void) EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ + +/* + * Switch to run-time mode once Tree RCU has fully initialized. + */ +static int __init rcu_exp_runtime_mode(void) +{ + rcu_test_sync_prims(); + rcu_scheduler_active = RCU_SCHEDULER_RUNNING; + rcu_test_sync_prims(); + return 0; +} +core_initcall(rcu_exp_runtime_mode); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 85c5a883c6e3..56583e764ebf 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -670,7 +670,7 @@ void synchronize_rcu(void) lock_is_held(&rcu_lock_map) || lock_is_held(&rcu_sched_lock_map), "Illegal synchronize_rcu() in RCU read-side critical section"); - if (!rcu_scheduler_active) + if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE) return; if (rcu_gp_is_expedited()) synchronize_rcu_expedited(); diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index f19271dce0a9..4f6db7e6a117 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -121,11 +121,14 @@ EXPORT_SYMBOL(rcu_read_lock_sched_held); * Should expedited grace-period primitives always fall back to their * non-expedited counterparts? Intended for use within RCU. Note * that if the user specifies both rcu_expedited and rcu_normal, then - * rcu_normal wins. + * rcu_normal wins. (Except during the time period during boot from + * when the first task is spawned until the rcu_exp_runtime_mode() + * core_initcall() is invoked, at which point everything is expedited.) */ bool rcu_gp_is_normal(void) { - return READ_ONCE(rcu_normal); + return READ_ONCE(rcu_normal) && + rcu_scheduler_active != RCU_SCHEDULER_INIT; } EXPORT_SYMBOL_GPL(rcu_gp_is_normal); @@ -135,13 +138,14 @@ static atomic_t rcu_expedited_nesting = /* * Should normal grace-period primitives be expedited? Intended for * use within RCU. Note that this function takes the rcu_expedited - * sysfs/boot variable into account as well as the rcu_expedite_gp() - * nesting. So looping on rcu_unexpedite_gp() until rcu_gp_is_expedited() - * returns false is a -really- bad idea. + * sysfs/boot variable and rcu_scheduler_active into account as well + * as the rcu_expedite_gp() nesting. So looping on rcu_unexpedite_gp() + * until rcu_gp_is_expedited() returns false is a -really- bad idea. */ bool rcu_gp_is_expedited(void) { - return rcu_expedited || atomic_read(&rcu_expedited_nesting); + return rcu_expedited || atomic_read(&rcu_expedited_nesting) || + rcu_scheduler_active == RCU_SCHEDULER_INIT; } EXPORT_SYMBOL_GPL(rcu_gp_is_expedited); @@ -257,7 +261,7 @@ EXPORT_SYMBOL_GPL(rcu_callback_map); int notrace debug_lockdep_rcu_enabled(void) { - return rcu_scheduler_active && debug_locks && + return rcu_scheduler_active != RCU_SCHEDULER_INACTIVE && debug_locks && current->lockdep_recursion == 0; } EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled); @@ -591,7 +595,7 @@ EXPORT_SYMBOL_GPL(call_rcu_tasks); void synchronize_rcu_tasks(void) { /* Complain if the scheduler has not started. */ - RCU_LOCKDEP_WARN(!rcu_scheduler_active, + RCU_LOCKDEP_WARN(rcu_scheduler_active == RCU_SCHEDULER_INACTIVE, "synchronize_rcu_tasks called too soon"); /* Wait for the grace period. */ @@ -813,6 +817,23 @@ static void rcu_spawn_tasks_kthread(void) #endif /* #ifdef CONFIG_TASKS_RCU */ +/* + * Test each non-SRCU synchronous grace-period wait API. This is + * useful just after a change in mode for these primitives, and + * during early boot. + */ +void rcu_test_sync_prims(void) +{ + if (!IS_ENABLED(CONFIG_PROVE_RCU)) + return; + synchronize_rcu(); + synchronize_rcu_bh(); + synchronize_sched(); + synchronize_rcu_expedited(); + synchronize_rcu_bh_expedited(); + synchronize_sched_expedited(); +} + #ifdef CONFIG_PROVE_RCU /* @@ -865,6 +886,7 @@ void rcu_early_boot_tests(void) early_boot_test_call_rcu_bh(); if (rcu_self_test_sched) early_boot_test_call_rcu_sched(); + rcu_test_sync_prims(); } static int rcu_verify_early_boot_tests(void) -- cgit v1.3-6-gb490 From 4205e4786d0b9fc3b4fec7b1910cf645a0468307 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 10 Jan 2017 14:01:05 +0100 Subject: cpu/hotplug: Provide dynamic range for prepare stage Mathieu reported that the LTTNG modules are broken as of 4.10-rc1 due to the removal of the cpu hotplug notifiers. Usually I don't care much about out of tree modules, but LTTNG is widely used in distros. There are two ways to solve that: 1) Reserve a hotplug state for LTTNG 2) Add a dynamic range for the prepare states. While #1 is the simplest solution, #2 is the proper one as we can convert in tree users, which do not care about ordering, to the dynamic range as well. Add a dynamic range which allows LTTNG to request states in the prepare stage. Reported-and-tested-by: Mathieu Desnoyers Signed-off-by: Thomas Gleixner Reviewed-by: Mathieu Desnoyers Cc: Peter Zijlstra Cc: Sebastian Sewior Cc: Steven Rostedt Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1701101353010.3401@nanos Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 2 ++ kernel/cpu.c | 22 ++++++++++++++++++---- 2 files changed, 20 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 20bfefbe7594..d936a0021839 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -74,6 +74,8 @@ enum cpuhp_state { CPUHP_ZCOMP_PREPARE, CPUHP_TIMERS_DEAD, CPUHP_MIPS_SOC_PREPARE, + CPUHP_BP_PREPARE_DYN, + CPUHP_BP_PREPARE_DYN_END = CPUHP_BP_PREPARE_DYN + 20, CPUHP_BRINGUP_CPU, CPUHP_AP_IDLE_DEAD, CPUHP_AP_OFFLINE, diff --git a/kernel/cpu.c b/kernel/cpu.c index f75c4d031eeb..c47506357519 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1302,10 +1302,24 @@ static int cpuhp_cb_check(enum cpuhp_state state) */ static int cpuhp_reserve_state(enum cpuhp_state state) { - enum cpuhp_state i; + enum cpuhp_state i, end; + struct cpuhp_step *step; - for (i = CPUHP_AP_ONLINE_DYN; i <= CPUHP_AP_ONLINE_DYN_END; i++) { - if (!cpuhp_ap_states[i].name) + switch (state) { + case CPUHP_AP_ONLINE_DYN: + step = cpuhp_ap_states + CPUHP_AP_ONLINE_DYN; + end = CPUHP_AP_ONLINE_DYN_END; + break; + case CPUHP_BP_PREPARE_DYN: + step = cpuhp_bp_states + CPUHP_BP_PREPARE_DYN; + end = CPUHP_BP_PREPARE_DYN_END; + break; + default: + return -EINVAL; + } + + for (i = state; i <= end; i++, step++) { + if (!step->name) return i; } WARN(1, "No more dynamic states available for CPU hotplug\n"); @@ -1323,7 +1337,7 @@ static int cpuhp_store_callbacks(enum cpuhp_state state, const char *name, mutex_lock(&cpuhp_state_mutex); - if (state == CPUHP_AP_ONLINE_DYN) { + if (state == CPUHP_AP_ONLINE_DYN || state == CPUHP_BP_PREPARE_DYN) { ret = cpuhp_reserve_state(state); if (ret < 0) goto out; -- cgit v1.3-6-gb490 From f1f7714ea51c56b7163fb1a5acf39c6a204dd758 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 13 Jan 2017 23:38:15 +0100 Subject: bpf: rework prog_digest into prog_tag Commit 7bd509e311f4 ("bpf: add prog_digest and expose it via fdinfo/netlink") was recently discussed, partially due to admittedly suboptimal name of "prog_digest" in combination with sha1 hash usage, thus inevitably and rightfully concerns about its security in terms of collision resistance were raised with regards to use-cases. The intended use cases are for debugging resp. introspection only for providing a stable "tag" over the instruction sequence that both kernel and user space can calculate independently. It's not usable at all for making a security relevant decision. So collisions where two different instruction sequences generate the same tag can happen, but ideally at a rather low rate. The "tag" will be dumped in hex and is short enough to introspect in tracepoints or kallsyms output along with other data such as stack trace, etc. Thus, this patch performs a rename into prog_tag and truncates the tag to a short output (64 bits) to make it obvious it's not collision-free. Should in future a hash or facility be needed with a security relevant focus, then we can think about requirements, constraints, etc that would fit to that situation. For now, rework the exposed parts for the current use cases as long as nothing has been released yet. Tested on x86_64 and s390x. Fixes: 7bd509e311f4 ("bpf: add prog_digest and expose it via fdinfo/netlink") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Cc: Andy Lutomirski Signed-off-by: David S. Miller --- include/linux/bpf.h | 2 +- include/linux/filter.h | 6 ++++-- include/uapi/linux/pkt_cls.h | 2 +- include/uapi/linux/tc_act/tc_bpf.h | 2 +- kernel/bpf/core.c | 14 ++++++++------ kernel/bpf/syscall.c | 8 ++++---- kernel/bpf/verifier.c | 2 +- net/sched/act_bpf.c | 5 ++--- net/sched/cls_bpf.c | 4 ++-- 9 files changed, 24 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f74ae68086dc..05cf951df3fe 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -216,7 +216,7 @@ u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5); u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp); -int bpf_prog_calc_digest(struct bpf_prog *fp); +int bpf_prog_calc_tag(struct bpf_prog *fp); const struct bpf_func_proto *bpf_get_trace_printk_proto(void); diff --git a/include/linux/filter.h b/include/linux/filter.h index a0934e6c9bab..e4eb2546339a 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -57,6 +57,8 @@ struct bpf_prog_aux; /* BPF program can access up to 512 bytes of stack space. */ #define MAX_BPF_STACK 512 +#define BPF_TAG_SIZE 8 + /* Helper macros for filter block array initializers. */ /* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ @@ -408,7 +410,7 @@ struct bpf_prog { kmemcheck_bitfield_end(meta); enum bpf_prog_type type; /* Type of BPF program */ u32 len; /* Number of filter blocks */ - u32 digest[SHA_DIGEST_WORDS]; /* Program digest */ + u8 tag[BPF_TAG_SIZE]; struct bpf_prog_aux *aux; /* Auxiliary fields */ struct sock_fprog_kern *orig_prog; /* Original BPF program */ unsigned int (*bpf_func)(const void *ctx, @@ -519,7 +521,7 @@ static inline u32 bpf_prog_insn_size(const struct bpf_prog *prog) return prog->len * sizeof(struct bpf_insn); } -static inline u32 bpf_prog_digest_scratch_size(const struct bpf_prog *prog) +static inline u32 bpf_prog_tag_scratch_size(const struct bpf_prog *prog) { return round_up(bpf_prog_insn_size(prog) + sizeof(__be64) + 1, SHA_MESSAGE_BYTES); diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index cb4bcdc58543..a4dcd88ec271 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -397,7 +397,7 @@ enum { TCA_BPF_NAME, TCA_BPF_FLAGS, TCA_BPF_FLAGS_GEN, - TCA_BPF_DIGEST, + TCA_BPF_TAG, __TCA_BPF_MAX, }; diff --git a/include/uapi/linux/tc_act/tc_bpf.h b/include/uapi/linux/tc_act/tc_bpf.h index a6b88a6f7f71..975b50dc8d1d 100644 --- a/include/uapi/linux/tc_act/tc_bpf.h +++ b/include/uapi/linux/tc_act/tc_bpf.h @@ -27,7 +27,7 @@ enum { TCA_ACT_BPF_FD, TCA_ACT_BPF_NAME, TCA_ACT_BPF_PAD, - TCA_ACT_BPF_DIGEST, + TCA_ACT_BPF_TAG, __TCA_ACT_BPF_MAX, }; #define TCA_ACT_BPF_MAX (__TCA_ACT_BPF_MAX - 1) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 1eb4f1303756..503d4211988a 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -146,10 +146,11 @@ void __bpf_prog_free(struct bpf_prog *fp) vfree(fp); } -int bpf_prog_calc_digest(struct bpf_prog *fp) +int bpf_prog_calc_tag(struct bpf_prog *fp) { const u32 bits_offset = SHA_MESSAGE_BYTES - sizeof(__be64); - u32 raw_size = bpf_prog_digest_scratch_size(fp); + u32 raw_size = bpf_prog_tag_scratch_size(fp); + u32 digest[SHA_DIGEST_WORDS]; u32 ws[SHA_WORKSPACE_WORDS]; u32 i, bsize, psize, blocks; struct bpf_insn *dst; @@ -162,7 +163,7 @@ int bpf_prog_calc_digest(struct bpf_prog *fp) if (!raw) return -ENOMEM; - sha_init(fp->digest); + sha_init(digest); memset(ws, 0, sizeof(ws)); /* We need to take out the map fd for the digest calculation @@ -204,13 +205,14 @@ int bpf_prog_calc_digest(struct bpf_prog *fp) *bits = cpu_to_be64((psize - 1) << 3); while (blocks--) { - sha_transform(fp->digest, todo, ws); + sha_transform(digest, todo, ws); todo += SHA_MESSAGE_BYTES; } - result = (__force __be32 *)fp->digest; + result = (__force __be32 *)digest; for (i = 0; i < SHA_DIGEST_WORDS; i++) - result[i] = cpu_to_be32(fp->digest[i]); + result[i] = cpu_to_be32(digest[i]); + memcpy(fp->tag, result, sizeof(fp->tag)); vfree(raw); return 0; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index e89acea22ecf..1d6b29e4e2c3 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -688,17 +688,17 @@ static int bpf_prog_release(struct inode *inode, struct file *filp) static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) { const struct bpf_prog *prog = filp->private_data; - char prog_digest[sizeof(prog->digest) * 2 + 1] = { }; + char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; - bin2hex(prog_digest, prog->digest, sizeof(prog->digest)); + bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); seq_printf(m, "prog_type:\t%u\n" "prog_jited:\t%u\n" - "prog_digest:\t%s\n" + "prog_tag:\t%s\n" "memlock:\t%llu\n", prog->type, prog->jited, - prog_digest, + prog_tag, prog->pages * 1ULL << PAGE_SHIFT); } #endif diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 83ed2f8f6f22..cdc43b899f28 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2936,7 +2936,7 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env) int insn_cnt = env->prog->len; int i, j, err; - err = bpf_prog_calc_digest(env->prog); + err = bpf_prog_calc_tag(env->prog); if (err) return err; diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 1c60317f0121..520baa41cba3 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -123,12 +123,11 @@ static int tcf_bpf_dump_ebpf_info(const struct tcf_bpf *prog, nla_put_string(skb, TCA_ACT_BPF_NAME, prog->bpf_name)) return -EMSGSIZE; - nla = nla_reserve(skb, TCA_ACT_BPF_DIGEST, - sizeof(prog->filter->digest)); + nla = nla_reserve(skb, TCA_ACT_BPF_TAG, sizeof(prog->filter->tag)); if (nla == NULL) return -EMSGSIZE; - memcpy(nla_data(nla), prog->filter->digest, nla_len(nla)); + memcpy(nla_data(nla), prog->filter->tag, nla_len(nla)); return 0; } diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index adc776048d1a..d9c97018317d 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -555,11 +555,11 @@ static int cls_bpf_dump_ebpf_info(const struct cls_bpf_prog *prog, nla_put_string(skb, TCA_BPF_NAME, prog->bpf_name)) return -EMSGSIZE; - nla = nla_reserve(skb, TCA_BPF_DIGEST, sizeof(prog->filter->digest)); + nla = nla_reserve(skb, TCA_BPF_TAG, sizeof(prog->filter->tag)); if (nla == NULL) return -EMSGSIZE; - memcpy(nla_data(nla), prog->filter->digest, nla_len(nla)); + memcpy(nla_data(nla), prog->filter->tag, nla_len(nla)); return 0; } -- cgit v1.3-6-gb490 From 67d35e70af9cabb663c827e03bc5c1e89b43db72 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Fri, 13 Jan 2017 11:40:01 +0800 Subject: scsi: libfc: Fix variable name in fc_set_wwpn The parameter name should be wwpn instead of wwnn. Signed-off-by: Fam Zheng Acked-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- include/scsi/libfc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h index 96dd0b3f70d7..da5033dd8cbc 100644 --- a/include/scsi/libfc.h +++ b/include/scsi/libfc.h @@ -809,11 +809,11 @@ static inline void fc_set_wwnn(struct fc_lport *lport, u64 wwnn) /** * fc_set_wwpn() - Set the World Wide Port Name of a local port * @lport: The local port whose WWPN is to be set - * @wwnn: The new WWPN + * @wwpn: The new WWPN */ -static inline void fc_set_wwpn(struct fc_lport *lport, u64 wwnn) +static inline void fc_set_wwpn(struct fc_lport *lport, u64 wwpn) { - lport->wwpn = wwnn; + lport->wwpn = wwpn; } /** -- cgit v1.3-6-gb490 From 5eb7c0d04f04a667c049fe090a95494a8de2955c Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Sun, 1 Jan 2017 20:25:25 -0600 Subject: taint/module: Fix problems when out-of-kernel driver defines true or false Commit 7fd8329ba502 ("taint/module: Clean up global and module taint flags handling") used the key words true and false as character members of a new struct. These names cause problems when out-of-kernel modules such as VirtualBox include their own definitions of true and false. Fixes: 7fd8329ba502 ("taint/module: Clean up global and module taint flags handling") Signed-off-by: Larry Finger Cc: Petr Mladek Cc: Jessica Yu Cc: Rusty Russell Reported-by: Valdis Kletnieks Reviewed-by: Petr Mladek Acked-by: Rusty Russell Signed-off-by: Jessica Yu --- include/linux/kernel.h | 4 ++-- kernel/module.c | 2 +- kernel/panic.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 56aec84237ad..cb09238f6d32 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -514,8 +514,8 @@ extern enum system_states { #define TAINT_FLAGS_COUNT 16 struct taint_flag { - char true; /* character printed when tainted */ - char false; /* character printed when not tainted */ + char c_true; /* character printed when tainted */ + char c_false; /* character printed when not tainted */ bool module; /* also show as a per-module taint flag */ }; diff --git a/kernel/module.c b/kernel/module.c index 5088784c0cf9..38d4270925d4 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1145,7 +1145,7 @@ static size_t module_flags_taint(struct module *mod, char *buf) for (i = 0; i < TAINT_FLAGS_COUNT; i++) { if (taint_flags[i].module && test_bit(i, &mod->taints)) - buf[l++] = taint_flags[i].true; + buf[l++] = taint_flags[i].c_true; } return l; diff --git a/kernel/panic.c b/kernel/panic.c index c51edaa04fce..901c4fb46002 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -355,7 +355,7 @@ const char *print_tainted(void) for (i = 0; i < TAINT_FLAGS_COUNT; i++) { const struct taint_flag *t = &taint_flags[i]; *s++ = test_bit(i, &tainted_mask) ? - t->true : t->false; + t->c_true : t->c_false; } *s = 0; } else -- cgit v1.3-6-gb490 From 8694e4da66a636665f51b94a6a7a40c9fc0dc5ec Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 23 Jan 2017 14:07:18 +0100 Subject: KVM: arm/arm64: Remove struct vgic_irq pending field One of the goals behind the VGIC redesign was to get rid of cached or intermediate state in the data structures, but we decided to allow ourselves to precompute the pending value of an IRQ based on the line level and pending latch state. However, this has now become difficult to base proper GICv3 save/restore on, because there is a potential to modify the pending state without knowing if an interrupt is edge or level configured. See the following post and related message for more background: https://lists.cs.columbia.edu/pipermail/kvmarm/2017-January/023195.html This commit gets rid of the precomputed pending field in favor of a function that calculates the value when needed, irq_is_pending(). The soft_pending field is renamed to pending_latch to represent that this latch is the equivalent hardware latch which gets manipulated by the input signal for edge-triggered interrupts and when writing to the SPENDR/CPENDR registers. After this commit save/restore code should be able to simply restore the pending_latch state, line_level state, and config state in any order and get the desired result. Reviewed-by: Andre Przywara Reviewed-by: Marc Zyngier Tested-by: Andre Przywara Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 5 +++-- virt/kvm/arm/vgic/vgic-its.c | 6 +++--- virt/kvm/arm/vgic/vgic-mmio-v2.c | 6 +++--- virt/kvm/arm/vgic/vgic-mmio-v3.c | 2 +- virt/kvm/arm/vgic/vgic-mmio.c | 19 +++++-------------- virt/kvm/arm/vgic/vgic-v2.c | 12 +++++------- virt/kvm/arm/vgic/vgic-v3.c | 12 +++++------- virt/kvm/arm/vgic/vgic.c | 16 +++++++--------- virt/kvm/arm/vgic/vgic.h | 8 ++++++++ 9 files changed, 40 insertions(+), 46 deletions(-) (limited to 'include') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 002f0922cd92..da2ce086ce31 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -101,9 +101,10 @@ struct vgic_irq { */ u32 intid; /* Guest visible INTID */ - bool pending; bool line_level; /* Level only */ - bool soft_pending; /* Level only */ + bool pending_latch; /* The pending latch state used to calculate + * the pending state for both level + * and edge triggered IRQs. */ bool active; /* not used for LPIs */ bool enabled; bool hw; /* Tied to HW IRQ */ diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 8c2b3cdcb2c5..571b64a01c50 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -350,7 +350,7 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu) irq = vgic_get_irq(vcpu->kvm, NULL, intids[i]); spin_lock(&irq->irq_lock); - irq->pending = pendmask & (1U << bit_nr); + irq->pending_latch = pendmask & (1U << bit_nr); vgic_queue_irq_unlock(vcpu->kvm, irq); vgic_put_irq(vcpu->kvm, irq); } @@ -465,7 +465,7 @@ static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its, return -EBUSY; spin_lock(&itte->irq->irq_lock); - itte->irq->pending = true; + itte->irq->pending_latch = true; vgic_queue_irq_unlock(kvm, itte->irq); return 0; @@ -913,7 +913,7 @@ static int vgic_its_cmd_handle_clear(struct kvm *kvm, struct vgic_its *its, if (!itte) return E_ITS_CLEAR_UNMAPPED_INTERRUPT; - itte->irq->pending = false; + itte->irq->pending_latch = false; return 0; } diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c index 78e34bc4d89b..07e67f1e25ef 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v2.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c @@ -98,7 +98,7 @@ static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu, irq = vgic_get_irq(source_vcpu->kvm, vcpu, intid); spin_lock(&irq->irq_lock); - irq->pending = true; + irq->pending_latch = true; irq->source |= 1U << source_vcpu->vcpu_id; vgic_queue_irq_unlock(source_vcpu->kvm, irq); @@ -182,7 +182,7 @@ static void vgic_mmio_write_sgipendc(struct kvm_vcpu *vcpu, irq->source &= ~((val >> (i * 8)) & 0xff); if (!irq->source) - irq->pending = false; + irq->pending_latch = false; spin_unlock(&irq->irq_lock); vgic_put_irq(vcpu->kvm, irq); @@ -204,7 +204,7 @@ static void vgic_mmio_write_sgipends(struct kvm_vcpu *vcpu, irq->source |= (val >> (i * 8)) & 0xff; if (irq->source) { - irq->pending = true; + irq->pending_latch = true; vgic_queue_irq_unlock(vcpu->kvm, irq); } else { spin_unlock(&irq->irq_lock); diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index 50f42f0f8c4f..2aca52a6c9eb 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -646,7 +646,7 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg) irq = vgic_get_irq(vcpu->kvm, c_vcpu, sgi); spin_lock(&irq->irq_lock); - irq->pending = true; + irq->pending_latch = true; vgic_queue_irq_unlock(vcpu->kvm, irq); vgic_put_irq(vcpu->kvm, irq); diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c index ebe1b9fa3c4d..2670d39d1f86 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.c +++ b/virt/kvm/arm/vgic/vgic-mmio.c @@ -111,7 +111,7 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, for (i = 0; i < len * 8; i++) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - if (irq->pending) + if (irq_is_pending(irq)) value |= (1U << i); vgic_put_irq(vcpu->kvm, irq); @@ -131,9 +131,7 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); spin_lock(&irq->irq_lock); - irq->pending = true; - if (irq->config == VGIC_CONFIG_LEVEL) - irq->soft_pending = true; + irq->pending_latch = true; vgic_queue_irq_unlock(vcpu->kvm, irq); vgic_put_irq(vcpu->kvm, irq); @@ -152,12 +150,7 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, spin_lock(&irq->irq_lock); - if (irq->config == VGIC_CONFIG_LEVEL) { - irq->soft_pending = false; - irq->pending = irq->line_level; - } else { - irq->pending = false; - } + irq->pending_latch = false; spin_unlock(&irq->irq_lock); vgic_put_irq(vcpu->kvm, irq); @@ -359,12 +352,10 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu, irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); spin_lock(&irq->irq_lock); - if (test_bit(i * 2 + 1, &val)) { + if (test_bit(i * 2 + 1, &val)) irq->config = VGIC_CONFIG_EDGE; - } else { + else irq->config = VGIC_CONFIG_LEVEL; - irq->pending = irq->line_level | irq->soft_pending; - } spin_unlock(&irq->irq_lock); vgic_put_irq(vcpu->kvm, irq); diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index 834137e7b83f..b834ecdf3225 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -104,7 +104,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) /* Edge is the only case where we preserve the pending bit */ if (irq->config == VGIC_CONFIG_EDGE && (val & GICH_LR_PENDING_BIT)) { - irq->pending = true; + irq->pending_latch = true; if (vgic_irq_is_sgi(intid)) { u32 cpuid = val & GICH_LR_PHYSID_CPUID; @@ -120,9 +120,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) */ if (irq->config == VGIC_CONFIG_LEVEL) { if (!(val & GICH_LR_PENDING_BIT)) - irq->soft_pending = false; - - irq->pending = irq->line_level || irq->soft_pending; + irq->pending_latch = false; } spin_unlock(&irq->irq_lock); @@ -145,11 +143,11 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) { u32 val = irq->intid; - if (irq->pending) { + if (irq_is_pending(irq)) { val |= GICH_LR_PENDING_BIT; if (irq->config == VGIC_CONFIG_EDGE) - irq->pending = false; + irq->pending_latch = false; if (vgic_irq_is_sgi(irq->intid)) { u32 src = ffs(irq->source); @@ -158,7 +156,7 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT; irq->source &= ~(1 << (src - 1)); if (irq->source) - irq->pending = true; + irq->pending_latch = true; } } diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index e6b03fd8c374..679ba935698f 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -94,7 +94,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) /* Edge is the only case where we preserve the pending bit */ if (irq->config == VGIC_CONFIG_EDGE && (val & ICH_LR_PENDING_BIT)) { - irq->pending = true; + irq->pending_latch = true; if (vgic_irq_is_sgi(intid) && model == KVM_DEV_TYPE_ARM_VGIC_V2) { @@ -111,9 +111,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) */ if (irq->config == VGIC_CONFIG_LEVEL) { if (!(val & ICH_LR_PENDING_BIT)) - irq->soft_pending = false; - - irq->pending = irq->line_level || irq->soft_pending; + irq->pending_latch = false; } spin_unlock(&irq->irq_lock); @@ -127,11 +125,11 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) u32 model = vcpu->kvm->arch.vgic.vgic_model; u64 val = irq->intid; - if (irq->pending) { + if (irq_is_pending(irq)) { val |= ICH_LR_PENDING_BIT; if (irq->config == VGIC_CONFIG_EDGE) - irq->pending = false; + irq->pending_latch = false; if (vgic_irq_is_sgi(irq->intid) && model == KVM_DEV_TYPE_ARM_VGIC_V2) { @@ -141,7 +139,7 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT; irq->source &= ~(1 << (src - 1)); if (irq->source) - irq->pending = true; + irq->pending_latch = true; } } diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index 6440b56ec90e..dea12df2b879 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -160,7 +160,7 @@ static struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq) * If the distributor is disabled, pending interrupts shouldn't be * forwarded. */ - if (irq->enabled && irq->pending) { + if (irq->enabled && irq_is_pending(irq)) { if (unlikely(irq->target_vcpu && !irq->target_vcpu->kvm->arch.vgic.enabled)) return NULL; @@ -204,8 +204,8 @@ static int vgic_irq_cmp(void *priv, struct list_head *a, struct list_head *b) goto out; } - penda = irqa->enabled && irqa->pending; - pendb = irqb->enabled && irqb->pending; + penda = irqa->enabled && irq_is_pending(irqa); + pendb = irqb->enabled && irq_is_pending(irqb); if (!penda || !pendb) { ret = (int)pendb - (int)penda; @@ -371,12 +371,10 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, return 0; } - if (irq->config == VGIC_CONFIG_LEVEL) { + if (irq->config == VGIC_CONFIG_LEVEL) irq->line_level = level; - irq->pending = level || irq->soft_pending; - } else { - irq->pending = true; - } + else + irq->pending_latch = true; vgic_queue_irq_unlock(kvm, irq); vgic_put_irq(kvm, irq); @@ -689,7 +687,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { spin_lock(&irq->irq_lock); - pending = irq->pending && irq->enabled; + pending = irq_is_pending(irq) && irq->enabled; spin_unlock(&irq->irq_lock); if (pending) diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index 859f65c6e056..b2cf34bde243 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -30,6 +30,14 @@ #define vgic_irq_is_sgi(intid) ((intid) < VGIC_NR_SGIS) +static inline bool irq_is_pending(struct vgic_irq *irq) +{ + if (irq->config == VGIC_CONFIG_EDGE) + return irq->pending_latch; + else + return irq->pending_latch || irq->line_level; +} + struct vgic_vmcr { u32 ctlr; u32 abpr; -- cgit v1.3-6-gb490 From 10f92c4c537794f4e2b5f545a8953790c5445d0f Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 17 Jan 2017 23:09:13 +0100 Subject: KVM: arm/arm64: vgic: Add debugfs vgic-state file Add a file to debugfs to read the in-kernel state of the vgic. We don't do any locking of the entire VGIC state while traversing all the IRQs, so if the VM is running the user/developer may not see a quiesced state, but should take care to pause the VM using facilities in user space for that purpose. We also don't support LPIs yet, but they can be added easily if needed. Reviewed-by: Eric Auger Tested-by: Eric Auger Tested-by: Andre Przywara Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm/kvm/Makefile | 1 + arch/arm64/kvm/Makefile | 1 + include/kvm/arm_vgic.h | 5 + virt/kvm/arm/vgic/vgic-debug.c | 283 +++++++++++++++++++++++++++++++++++++++++ virt/kvm/arm/vgic/vgic-init.c | 4 + virt/kvm/arm/vgic/vgic.h | 3 + 6 files changed, 297 insertions(+) create mode 100644 virt/kvm/arm/vgic/vgic-debug.c (limited to 'include') diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index d571243ab4d1..12b628187e90 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -33,5 +33,6 @@ obj-y += $(KVM)/arm/vgic/vgic-mmio-v2.o obj-y += $(KVM)/arm/vgic/vgic-mmio-v3.o obj-y += $(KVM)/arm/vgic/vgic-kvm-device.o obj-y += $(KVM)/arm/vgic/vgic-its.o +obj-y += $(KVM)/arm/vgic/vgic-debug.o obj-y += $(KVM)/irqchip.o obj-y += $(KVM)/arm/arch_timer.o diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index d50a82a16ff6..e025beceda39 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -31,6 +31,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v2.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v3.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-kvm-device.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-its.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-debug.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/irqchip.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index da2ce086ce31..0af1477cfe8d 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -166,6 +166,8 @@ struct vgic_its { struct list_head collection_list; }; +struct vgic_state_iter; + struct vgic_dist { bool in_kernel; bool ready; @@ -213,6 +215,9 @@ struct vgic_dist { spinlock_t lpi_list_lock; struct list_head lpi_list_head; int lpi_list_count; + + /* used by vgic-debug */ + struct vgic_state_iter *iter; }; struct vgic_v2_cpu_if { diff --git a/virt/kvm/arm/vgic/vgic-debug.c b/virt/kvm/arm/vgic/vgic-debug.c new file mode 100644 index 000000000000..7072ab743332 --- /dev/null +++ b/virt/kvm/arm/vgic/vgic-debug.c @@ -0,0 +1,283 @@ +/* + * Copyright (C) 2016 Linaro + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include "vgic.h" + +/* + * Structure to control looping through the entire vgic state. We start at + * zero for each field and move upwards. So, if dist_id is 0 we print the + * distributor info. When dist_id is 1, we have already printed it and move + * on. + * + * When vcpu_id < nr_cpus we print the vcpu info until vcpu_id == nr_cpus and + * so on. + */ +struct vgic_state_iter { + int nr_cpus; + int nr_spis; + int dist_id; + int vcpu_id; + int intid; +}; + +static void iter_next(struct vgic_state_iter *iter) +{ + if (iter->dist_id == 0) { + iter->dist_id++; + return; + } + + iter->intid++; + if (iter->intid == VGIC_NR_PRIVATE_IRQS && + ++iter->vcpu_id < iter->nr_cpus) + iter->intid = 0; +} + +static void iter_init(struct kvm *kvm, struct vgic_state_iter *iter, + loff_t pos) +{ + int nr_cpus = atomic_read(&kvm->online_vcpus); + + memset(iter, 0, sizeof(*iter)); + + iter->nr_cpus = nr_cpus; + iter->nr_spis = kvm->arch.vgic.nr_spis; + + /* Fast forward to the right position if needed */ + while (pos--) + iter_next(iter); +} + +static bool end_of_vgic(struct vgic_state_iter *iter) +{ + return iter->dist_id > 0 && + iter->vcpu_id == iter->nr_cpus && + (iter->intid - VGIC_NR_PRIVATE_IRQS) == iter->nr_spis; +} + +static void *vgic_debug_start(struct seq_file *s, loff_t *pos) +{ + struct kvm *kvm = (struct kvm *)s->private; + struct vgic_state_iter *iter; + + mutex_lock(&kvm->lock); + iter = kvm->arch.vgic.iter; + if (iter) { + iter = ERR_PTR(-EBUSY); + goto out; + } + + iter = kmalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) { + iter = ERR_PTR(-ENOMEM); + goto out; + } + + iter_init(kvm, iter, *pos); + kvm->arch.vgic.iter = iter; + + if (end_of_vgic(iter)) + iter = NULL; +out: + mutex_unlock(&kvm->lock); + return iter; +} + +static void *vgic_debug_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct kvm *kvm = (struct kvm *)s->private; + struct vgic_state_iter *iter = kvm->arch.vgic.iter; + + ++*pos; + iter_next(iter); + if (end_of_vgic(iter)) + iter = NULL; + return iter; +} + +static void vgic_debug_stop(struct seq_file *s, void *v) +{ + struct kvm *kvm = (struct kvm *)s->private; + struct vgic_state_iter *iter; + + /* + * If the seq file wasn't properly opened, there's nothing to clearn + * up. + */ + if (IS_ERR(v)) + return; + + mutex_lock(&kvm->lock); + iter = kvm->arch.vgic.iter; + kfree(iter); + kvm->arch.vgic.iter = NULL; + mutex_unlock(&kvm->lock); +} + +static void print_dist_state(struct seq_file *s, struct vgic_dist *dist) +{ + seq_printf(s, "Distributor\n"); + seq_printf(s, "===========\n"); + seq_printf(s, "vgic_model:\t%s\n", + (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) ? + "GICv3" : "GICv2"); + seq_printf(s, "nr_spis:\t%d\n", dist->nr_spis); + seq_printf(s, "enabled:\t%d\n", dist->enabled); + seq_printf(s, "\n"); + + seq_printf(s, "P=pending_latch, L=line_level, A=active\n"); + seq_printf(s, "E=enabled, H=hw, C=config (level=1, edge=0)\n"); +} + +static void print_header(struct seq_file *s, struct vgic_irq *irq, + struct kvm_vcpu *vcpu) +{ + int id = 0; + char *hdr = "SPI "; + + if (vcpu) { + hdr = "VCPU"; + id = vcpu->vcpu_id; + } + + seq_printf(s, "\n"); + seq_printf(s, "%s%2d TYP ID TGT_ID PLAEHC HWID TARGET SRC PRI VCPU_ID\n", hdr, id); + seq_printf(s, "---------------------------------------------------------------\n"); +} + +static void print_irq_state(struct seq_file *s, struct vgic_irq *irq, + struct kvm_vcpu *vcpu) +{ + char *type; + if (irq->intid < VGIC_NR_SGIS) + type = "SGI"; + else if (irq->intid < VGIC_NR_PRIVATE_IRQS) + type = "PPI"; + else + type = "SPI"; + + if (irq->intid ==0 || irq->intid == VGIC_NR_PRIVATE_IRQS) + print_header(s, irq, vcpu); + + seq_printf(s, " %s %4d " + " %2d " + "%d%d%d%d%d%d " + "%8d " + "%8x " + " %2x " + "%3d " + " %2d " + "\n", + type, irq->intid, + (irq->target_vcpu) ? irq->target_vcpu->vcpu_id : -1, + irq->pending_latch, + irq->line_level, + irq->active, + irq->enabled, + irq->hw, + irq->config == VGIC_CONFIG_LEVEL, + irq->hwintid, + irq->mpidr, + irq->source, + irq->priority, + (irq->vcpu) ? irq->vcpu->vcpu_id : -1); + +} + +static int vgic_debug_show(struct seq_file *s, void *v) +{ + struct kvm *kvm = (struct kvm *)s->private; + struct vgic_state_iter *iter = (struct vgic_state_iter *)v; + struct vgic_irq *irq; + struct kvm_vcpu *vcpu = NULL; + + if (iter->dist_id == 0) { + print_dist_state(s, &kvm->arch.vgic); + return 0; + } + + if (!kvm->arch.vgic.initialized) + return 0; + + if (iter->vcpu_id < iter->nr_cpus) { + vcpu = kvm_get_vcpu(kvm, iter->vcpu_id); + irq = &vcpu->arch.vgic_cpu.private_irqs[iter->intid]; + } else { + irq = &kvm->arch.vgic.spis[iter->intid - VGIC_NR_PRIVATE_IRQS]; + } + + spin_lock(&irq->irq_lock); + print_irq_state(s, irq, vcpu); + spin_unlock(&irq->irq_lock); + + return 0; +} + +static struct seq_operations vgic_debug_seq_ops = { + .start = vgic_debug_start, + .next = vgic_debug_next, + .stop = vgic_debug_stop, + .show = vgic_debug_show +}; + +static int debug_open(struct inode *inode, struct file *file) +{ + int ret; + ret = seq_open(file, &vgic_debug_seq_ops); + if (!ret) { + struct seq_file *seq; + /* seq_open will have modified file->private_data */ + seq = file->private_data; + seq->private = inode->i_private; + } + + return ret; +}; + +static struct file_operations vgic_debug_fops = { + .owner = THIS_MODULE, + .open = debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release +}; + +int vgic_debug_init(struct kvm *kvm) +{ + if (!kvm->debugfs_dentry) + return -ENOENT; + + if (!debugfs_create_file("vgic-state", 0444, + kvm->debugfs_dentry, + kvm, + &vgic_debug_fops)) + return -ENOMEM; + + return 0; +} + +int vgic_debug_destroy(struct kvm *kvm) +{ + return 0; +} diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index c737ea0a310a..276139a24e6f 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -259,6 +259,8 @@ int vgic_init(struct kvm *kvm) if (ret) goto out; + vgic_debug_init(kvm); + dist->initialized = true; out: return ret; @@ -288,6 +290,8 @@ static void __kvm_vgic_destroy(struct kvm *kvm) struct kvm_vcpu *vcpu; int i; + vgic_debug_destroy(kvm); + kvm_vgic_dist_destroy(kvm); kvm_for_each_vcpu(i, vcpu, kvm) diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index b2cf34bde243..48da1f65a6c3 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -102,4 +102,7 @@ int kvm_register_vgic_device(unsigned long type); int vgic_lazy_init(struct kvm *kvm); int vgic_init(struct kvm *kvm); +int vgic_debug_init(struct kvm *kvm); +int vgic_debug_destroy(struct kvm *kvm); + #endif -- cgit v1.3-6-gb490 From 5c34153704898ed7ec0f8c0dceb651cbe4b713fd Mon Sep 17 00:00:00 2001 From: Vijaya Kumar K Date: Thu, 26 Jan 2017 19:50:49 +0530 Subject: irqchip/gic-v3: Add missing system register definitions Define register definitions for ICH_VMCR_EL2, ICC_CTLR_EL1 and ICH_VTR_EL2, ICC_BPR0_EL1, ICC_BPR1_EL1 registers. Signed-off-by: Vijaya Kumar K Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 43 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index e808f8ae6f14..7f6d904a62c6 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -352,8 +352,30 @@ /* * CPU interface registers */ -#define ICC_CTLR_EL1_EOImode_drop_dir (0U << 1) -#define ICC_CTLR_EL1_EOImode_drop (1U << 1) +#define ICC_CTLR_EL1_EOImode_SHIFT (1) +#define ICC_CTLR_EL1_EOImode_drop_dir (0U << ICC_CTLR_EL1_EOImode_SHIFT) +#define ICC_CTLR_EL1_EOImode_drop (1U << ICC_CTLR_EL1_EOImode_SHIFT) +#define ICC_CTLR_EL1_EOImode_MASK (1 << ICC_CTLR_EL1_EOImode_SHIFT) +#define ICC_CTLR_EL1_CBPR_SHIFT 0 +#define ICC_CTLR_EL1_CBPR_MASK (1 << ICC_CTLR_EL1_CBPR_SHIFT) +#define ICC_CTLR_EL1_PRI_BITS_SHIFT 8 +#define ICC_CTLR_EL1_PRI_BITS_MASK (0x7 << ICC_CTLR_EL1_PRI_BITS_SHIFT) +#define ICC_CTLR_EL1_ID_BITS_SHIFT 11 +#define ICC_CTLR_EL1_ID_BITS_MASK (0x7 << ICC_CTLR_EL1_ID_BITS_SHIFT) +#define ICC_CTLR_EL1_SEIS_SHIFT 14 +#define ICC_CTLR_EL1_SEIS_MASK (0x1 << ICC_CTLR_EL1_SEIS_SHIFT) +#define ICC_CTLR_EL1_A3V_SHIFT 15 +#define ICC_CTLR_EL1_A3V_MASK (0x1 << ICC_CTLR_EL1_A3V_SHIFT) +#define ICC_PMR_EL1_SHIFT 0 +#define ICC_PMR_EL1_MASK (0xff << ICC_PMR_EL1_SHIFT) +#define ICC_BPR0_EL1_SHIFT 0 +#define ICC_BPR0_EL1_MASK (0x7 << ICC_BPR0_EL1_SHIFT) +#define ICC_BPR1_EL1_SHIFT 0 +#define ICC_BPR1_EL1_MASK (0x7 << ICC_BPR1_EL1_SHIFT) +#define ICC_IGRPEN0_EL1_SHIFT 0 +#define ICC_IGRPEN0_EL1_MASK (1 << ICC_IGRPEN0_EL1_SHIFT) +#define ICC_IGRPEN1_EL1_SHIFT 0 +#define ICC_IGRPEN1_EL1_MASK (1 << ICC_IGRPEN1_EL1_SHIFT) #define ICC_SRE_EL1_SRE (1U << 0) /* @@ -384,12 +406,29 @@ #define ICH_VMCR_CTLR_SHIFT 0 #define ICH_VMCR_CTLR_MASK (0x21f << ICH_VMCR_CTLR_SHIFT) +#define ICH_VMCR_CBPR_SHIFT 4 +#define ICH_VMCR_CBPR_MASK (1 << ICH_VMCR_CBPR_SHIFT) +#define ICH_VMCR_EOIM_SHIFT 9 +#define ICH_VMCR_EOIM_MASK (1 << ICH_VMCR_EOIM_SHIFT) #define ICH_VMCR_BPR1_SHIFT 18 #define ICH_VMCR_BPR1_MASK (7 << ICH_VMCR_BPR1_SHIFT) #define ICH_VMCR_BPR0_SHIFT 21 #define ICH_VMCR_BPR0_MASK (7 << ICH_VMCR_BPR0_SHIFT) #define ICH_VMCR_PMR_SHIFT 24 #define ICH_VMCR_PMR_MASK (0xffUL << ICH_VMCR_PMR_SHIFT) +#define ICH_VMCR_ENG0_SHIFT 0 +#define ICH_VMCR_ENG0_MASK (1 << ICH_VMCR_ENG0_SHIFT) +#define ICH_VMCR_ENG1_SHIFT 1 +#define ICH_VMCR_ENG1_MASK (1 << ICH_VMCR_ENG1_SHIFT) + +#define ICH_VTR_PRI_BITS_SHIFT 29 +#define ICH_VTR_PRI_BITS_MASK (7 << ICH_VTR_PRI_BITS_SHIFT) +#define ICH_VTR_ID_BITS_SHIFT 23 +#define ICH_VTR_ID_BITS_MASK (7 << ICH_VTR_ID_BITS_SHIFT) +#define ICH_VTR_SEIS_SHIFT 22 +#define ICH_VTR_SEIS_MASK (1 << ICH_VTR_SEIS_SHIFT) +#define ICH_VTR_A3V_SHIFT 21 +#define ICH_VTR_A3V_MASK (1 << ICH_VTR_A3V_SHIFT) #define ICC_IAR1_EL1_SPURIOUS 0x3ff -- cgit v1.3-6-gb490 From 5fb247d79c04240dce86c842976cde1edde7f7ed Mon Sep 17 00:00:00 2001 From: Vijaya Kumar K Date: Thu, 26 Jan 2017 19:50:50 +0530 Subject: KVM: arm/arm64: vgic: Introduce VENG0 and VENG1 fields to vmcr struct ICC_VMCR_EL2 supports virtual access to ICC_IGRPEN1_EL1.Enable and ICC_IGRPEN0_EL1.Enable fields. Add grpen0 and grpen1 member variables to struct vmcr to support read and write of these fields. Also refactor vgic_set_vmcr and vgic_get_vmcr() code. Drop ICH_VMCR_CTLR_SHIFT and ICH_VMCR_CTLR_MASK macros and instead use ICH_VMCR_EOI* and ICH_VMCR_CBPR* macros. Signed-off-by: Vijaya Kumar K Reviewed-by: Christoffer Dall Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 2 -- virt/kvm/arm/vgic/vgic-mmio-v2.c | 16 ---------------- virt/kvm/arm/vgic/vgic-mmio.c | 16 ++++++++++++++++ virt/kvm/arm/vgic/vgic-v3.c | 20 ++++++++++++++++++-- virt/kvm/arm/vgic/vgic.h | 5 +++++ 5 files changed, 39 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 7f6d904a62c6..170e00a40826 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -404,8 +404,6 @@ #define ICH_HCR_EN (1 << 0) #define ICH_HCR_UIE (1 << 1) -#define ICH_VMCR_CTLR_SHIFT 0 -#define ICH_VMCR_CTLR_MASK (0x21f << ICH_VMCR_CTLR_SHIFT) #define ICH_VMCR_CBPR_SHIFT 4 #define ICH_VMCR_CBPR_MASK (1 << ICH_VMCR_CBPR_SHIFT) #define ICH_VMCR_EOIM_SHIFT 9 diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c index fa68dd41756e..a3ad7ff95c9b 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v2.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c @@ -213,22 +213,6 @@ static void vgic_mmio_write_sgipends(struct kvm_vcpu *vcpu, } } -static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) -{ - if (kvm_vgic_global_state.type == VGIC_V2) - vgic_v2_set_vmcr(vcpu, vmcr); - else - vgic_v3_set_vmcr(vcpu, vmcr); -} - -static void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) -{ - if (kvm_vgic_global_state.type == VGIC_V2) - vgic_v2_get_vmcr(vcpu, vmcr); - else - vgic_v3_get_vmcr(vcpu, vmcr); -} - #define GICC_ARCH_VERSION_V2 0x2 /* These are for userland accesses only, there is no guest-facing emulation. */ diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c index 746c8afca718..1d1886e7d640 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.c +++ b/virt/kvm/arm/vgic/vgic-mmio.c @@ -385,6 +385,22 @@ vgic_find_mmio_region(const struct vgic_register_region *region, int nr_regions, sizeof(region[0]), match_region); } +void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) +{ + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_set_vmcr(vcpu, vmcr); + else + vgic_v3_set_vmcr(vcpu, vmcr); +} + +void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) +{ + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_get_vmcr(vcpu, vmcr); + else + vgic_v3_get_vmcr(vcpu, vmcr); +} + /* * kvm_mmio_read_buf() returns a value in a format where it can be converted * to a byte array and be directly observed as the guest wanted it to appear diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 679ba935698f..42ff9c9826d3 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -175,10 +175,18 @@ void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) { u32 vmcr; - vmcr = (vmcrp->ctlr << ICH_VMCR_CTLR_SHIFT) & ICH_VMCR_CTLR_MASK; + /* + * Ignore the FIQen bit, because GIC emulation always implies + * SRE=1 which means the vFIQEn bit is also RES1. + */ + vmcr = ((vmcrp->ctlr >> ICC_CTLR_EL1_EOImode_SHIFT) << + ICH_VMCR_EOIM_SHIFT) & ICH_VMCR_EOIM_MASK; + vmcr |= (vmcrp->ctlr << ICH_VMCR_CBPR_SHIFT) & ICH_VMCR_CBPR_MASK; vmcr |= (vmcrp->abpr << ICH_VMCR_BPR1_SHIFT) & ICH_VMCR_BPR1_MASK; vmcr |= (vmcrp->bpr << ICH_VMCR_BPR0_SHIFT) & ICH_VMCR_BPR0_MASK; vmcr |= (vmcrp->pmr << ICH_VMCR_PMR_SHIFT) & ICH_VMCR_PMR_MASK; + vmcr |= (vmcrp->grpen0 << ICH_VMCR_ENG0_SHIFT) & ICH_VMCR_ENG0_MASK; + vmcr |= (vmcrp->grpen1 << ICH_VMCR_ENG1_SHIFT) & ICH_VMCR_ENG1_MASK; vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = vmcr; } @@ -187,10 +195,18 @@ void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) { u32 vmcr = vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr; - vmcrp->ctlr = (vmcr & ICH_VMCR_CTLR_MASK) >> ICH_VMCR_CTLR_SHIFT; + /* + * Ignore the FIQen bit, because GIC emulation always implies + * SRE=1 which means the vFIQEn bit is also RES1. + */ + vmcrp->ctlr = ((vmcr >> ICH_VMCR_EOIM_SHIFT) << + ICC_CTLR_EL1_EOImode_SHIFT) & ICC_CTLR_EL1_EOImode_MASK; + vmcrp->ctlr |= (vmcr & ICH_VMCR_CBPR_MASK) >> ICH_VMCR_CBPR_SHIFT; vmcrp->abpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT; vmcrp->bpr = (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT; vmcrp->pmr = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT; + vmcrp->grpen0 = (vmcr & ICH_VMCR_ENG0_MASK) >> ICH_VMCR_ENG0_SHIFT; + vmcrp->grpen1 = (vmcr & ICH_VMCR_ENG1_MASK) >> ICH_VMCR_ENG1_SHIFT; } #define INITIAL_PENDBASER_VALUE \ diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index 4505fd4919fd..ecfe1a6c841a 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -65,6 +65,9 @@ struct vgic_vmcr { u32 abpr; u32 bpr; u32 pmr; + /* Below member variable are valid only for GICv3 */ + u32 grpen0; + u32 grpen1; }; struct vgic_reg_attr { @@ -137,6 +140,8 @@ int vgic_v3_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write, int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write, int offset, u32 *val); int kvm_register_vgic_device(unsigned long type); +void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); +void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); int vgic_lazy_init(struct kvm *kvm); int vgic_init(struct kvm *kvm); -- cgit v1.3-6-gb490 From d017d7b0bd7ab32644d35666a6c4412daa0b0a1d Mon Sep 17 00:00:00 2001 From: Vijaya Kumar K Date: Thu, 26 Jan 2017 19:50:51 +0530 Subject: KVM: arm/arm64: vgic: Implement VGICv3 CPU interface access VGICv3 CPU interface registers are accessed using KVM_DEV_ARM_VGIC_CPU_SYSREGS ioctl. These registers are accessed as 64-bit. The cpu MPIDR value is passed along with register id. It is used to identify the cpu for registers access. The VM that supports SEIs expect it on destination machine to handle guest aborts and hence checked for ICC_CTLR_EL1.SEIS compatibility. Similarly, VM that supports Affinity Level 3 that is required for AArch64 mode, is required to be supported on destination machine. Hence checked for ICC_CTLR_EL1.A3V compatibility. The arch/arm64/kvm/vgic-sys-reg-v3.c handles read and write of VGIC CPU registers for AArch64. For AArch32 mode, arch/arm/kvm/vgic-v3-coproc.c file is created but APIs are not implemented. Updated arch/arm/include/uapi/asm/kvm.h with new definitions required to compile for AArch32. The version of VGIC v3 specification is defined here Documentation/virtual/kvm/devices/arm-vgic-v3.txt Acked-by: Christoffer Dall Reviewed-by: Eric Auger Signed-off-by: Pavel Fedin Signed-off-by: Vijaya Kumar K Signed-off-by: Marc Zyngier --- arch/arm/include/uapi/asm/kvm.h | 3 + arch/arm/kvm/Makefile | 4 +- arch/arm/kvm/vgic-v3-coproc.c | 35 ++++ arch/arm64/include/uapi/asm/kvm.h | 3 + arch/arm64/kvm/Makefile | 3 +- arch/arm64/kvm/vgic-sys-reg-v3.c | 346 ++++++++++++++++++++++++++++++++++++ include/kvm/arm_vgic.h | 8 + virt/kvm/arm/vgic/vgic-kvm-device.c | 27 +++ virt/kvm/arm/vgic/vgic-mmio-v3.c | 6 + virt/kvm/arm/vgic/vgic-v3.c | 8 + virt/kvm/arm/vgic/vgic.h | 25 +++ 11 files changed, 465 insertions(+), 3 deletions(-) create mode 100644 arch/arm/kvm/vgic-v3-coproc.c create mode 100644 arch/arm64/kvm/vgic-sys-reg-v3.c (limited to 'include') diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index 0ae6035e0604..7a3e5370ba68 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -186,9 +186,12 @@ struct kvm_arch_memory_slot { (0xffffffffULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT) #define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 #define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) +#define KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0xffff) #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3 #define KVM_DEV_ARM_VGIC_GRP_CTRL 4 #define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5 +#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6 + #define KVM_DEV_ARM_VGIC_CTRL_INIT 0 /* KVM_IRQ_LINE irq field index values */ diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index 12b628187e90..7b3670c2ae7b 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -7,7 +7,7 @@ ifeq ($(plus_virt),+virt) plus_virt_def := -DREQUIRES_VIRT=1 endif -ccflags-y += -Iarch/arm/kvm +ccflags-y += -Iarch/arm/kvm -Ivirt/kvm/arm/vgic CFLAGS_arm.o := -I. $(plus_virt_def) CFLAGS_mmu.o := -I. @@ -20,7 +20,7 @@ kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vf obj-$(CONFIG_KVM_ARM_HOST) += hyp/ obj-y += kvm-arm.o init.o interrupts.o obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o -obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o +obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o vgic-v3-coproc.o obj-y += $(KVM)/arm/aarch32.o obj-y += $(KVM)/arm/vgic/vgic.o diff --git a/arch/arm/kvm/vgic-v3-coproc.c b/arch/arm/kvm/vgic-v3-coproc.c new file mode 100644 index 000000000000..f41abf76366f --- /dev/null +++ b/arch/arm/kvm/vgic-v3-coproc.c @@ -0,0 +1,35 @@ +/* + * VGIC system registers handling functions for AArch32 mode + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include "vgic.h" + +int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, bool is_write, u64 id, + u64 *reg) +{ + /* + * TODO: Implement for AArch32 + */ + return -ENXIO; +} + +int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu, bool is_write, u64 id, + u64 *reg) +{ + /* + * TODO: Implement for AArch32 + */ + return -ENXIO; +} diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 56dc08db6492..be379d7bc9f1 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -206,9 +206,12 @@ struct kvm_arch_memory_slot { (0xffffffffULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT) #define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 #define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) +#define KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0xffff) #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3 #define KVM_DEV_ARM_VGIC_GRP_CTRL 4 #define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5 +#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6 + #define KVM_DEV_ARM_VGIC_CTRL_INIT 0 /* Device Control API on vcpu fd */ diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index e025beceda39..afd51bebb9c5 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -2,7 +2,7 @@ # Makefile for Kernel-based Virtual Machine module # -ccflags-y += -Iarch/arm64/kvm +ccflags-y += -Iarch/arm64/kvm -Ivirt/kvm/arm/vgic CFLAGS_arm.o := -I. CFLAGS_mmu.o := -I. @@ -19,6 +19,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o +kvm-$(CONFIG_KVM_ARM_HOST) += vgic-sys-reg-v3.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/aarch32.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic.o diff --git a/arch/arm64/kvm/vgic-sys-reg-v3.c b/arch/arm64/kvm/vgic-sys-reg-v3.c new file mode 100644 index 000000000000..79f37e37d367 --- /dev/null +++ b/arch/arm64/kvm/vgic-sys-reg-v3.c @@ -0,0 +1,346 @@ +/* + * VGIC system registers handling functions for AArch64 mode + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include "vgic.h" +#include "sys_regs.h" + +static bool access_gic_ctlr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + u32 host_pri_bits, host_id_bits, host_seis, host_a3v, seis, a3v; + struct vgic_cpu *vgic_v3_cpu = &vcpu->arch.vgic_cpu; + struct vgic_vmcr vmcr; + u64 val; + + vgic_get_vmcr(vcpu, &vmcr); + if (p->is_write) { + val = p->regval; + + /* + * Disallow restoring VM state if not supported by this + * hardware. + */ + host_pri_bits = ((val & ICC_CTLR_EL1_PRI_BITS_MASK) >> + ICC_CTLR_EL1_PRI_BITS_SHIFT) + 1; + if (host_pri_bits > vgic_v3_cpu->num_pri_bits) + return false; + + vgic_v3_cpu->num_pri_bits = host_pri_bits; + + host_id_bits = (val & ICC_CTLR_EL1_ID_BITS_MASK) >> + ICC_CTLR_EL1_ID_BITS_SHIFT; + if (host_id_bits > vgic_v3_cpu->num_id_bits) + return false; + + vgic_v3_cpu->num_id_bits = host_id_bits; + + host_seis = ((kvm_vgic_global_state.ich_vtr_el2 & + ICH_VTR_SEIS_MASK) >> ICH_VTR_SEIS_SHIFT); + seis = (val & ICC_CTLR_EL1_SEIS_MASK) >> + ICC_CTLR_EL1_SEIS_SHIFT; + if (host_seis != seis) + return false; + + host_a3v = ((kvm_vgic_global_state.ich_vtr_el2 & + ICH_VTR_A3V_MASK) >> ICH_VTR_A3V_SHIFT); + a3v = (val & ICC_CTLR_EL1_A3V_MASK) >> ICC_CTLR_EL1_A3V_SHIFT; + if (host_a3v != a3v) + return false; + + /* + * Here set VMCR.CTLR in ICC_CTLR_EL1 layout. + * The vgic_set_vmcr() will convert to ICH_VMCR layout. + */ + vmcr.ctlr = val & ICC_CTLR_EL1_CBPR_MASK; + vmcr.ctlr |= val & ICC_CTLR_EL1_EOImode_MASK; + vgic_set_vmcr(vcpu, &vmcr); + } else { + val = 0; + val |= (vgic_v3_cpu->num_pri_bits - 1) << + ICC_CTLR_EL1_PRI_BITS_SHIFT; + val |= vgic_v3_cpu->num_id_bits << ICC_CTLR_EL1_ID_BITS_SHIFT; + val |= ((kvm_vgic_global_state.ich_vtr_el2 & + ICH_VTR_SEIS_MASK) >> ICH_VTR_SEIS_SHIFT) << + ICC_CTLR_EL1_SEIS_SHIFT; + val |= ((kvm_vgic_global_state.ich_vtr_el2 & + ICH_VTR_A3V_MASK) >> ICH_VTR_A3V_SHIFT) << + ICC_CTLR_EL1_A3V_SHIFT; + /* + * The VMCR.CTLR value is in ICC_CTLR_EL1 layout. + * Extract it directly using ICC_CTLR_EL1 reg definitions. + */ + val |= vmcr.ctlr & ICC_CTLR_EL1_CBPR_MASK; + val |= vmcr.ctlr & ICC_CTLR_EL1_EOImode_MASK; + + p->regval = val; + } + + return true; +} + +static bool access_gic_pmr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + struct vgic_vmcr vmcr; + + vgic_get_vmcr(vcpu, &vmcr); + if (p->is_write) { + vmcr.pmr = (p->regval & ICC_PMR_EL1_MASK) >> ICC_PMR_EL1_SHIFT; + vgic_set_vmcr(vcpu, &vmcr); + } else { + p->regval = (vmcr.pmr << ICC_PMR_EL1_SHIFT) & ICC_PMR_EL1_MASK; + } + + return true; +} + +static bool access_gic_bpr0(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + struct vgic_vmcr vmcr; + + vgic_get_vmcr(vcpu, &vmcr); + if (p->is_write) { + vmcr.bpr = (p->regval & ICC_BPR0_EL1_MASK) >> + ICC_BPR0_EL1_SHIFT; + vgic_set_vmcr(vcpu, &vmcr); + } else { + p->regval = (vmcr.bpr << ICC_BPR0_EL1_SHIFT) & + ICC_BPR0_EL1_MASK; + } + + return true; +} + +static bool access_gic_bpr1(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + struct vgic_vmcr vmcr; + + if (!p->is_write) + p->regval = 0; + + vgic_get_vmcr(vcpu, &vmcr); + if (!((vmcr.ctlr & ICH_VMCR_CBPR_MASK) >> ICH_VMCR_CBPR_SHIFT)) { + if (p->is_write) { + vmcr.abpr = (p->regval & ICC_BPR1_EL1_MASK) >> + ICC_BPR1_EL1_SHIFT; + vgic_set_vmcr(vcpu, &vmcr); + } else { + p->regval = (vmcr.abpr << ICC_BPR1_EL1_SHIFT) & + ICC_BPR1_EL1_MASK; + } + } else { + if (!p->is_write) + p->regval = min((vmcr.bpr + 1), 7U); + } + + return true; +} + +static bool access_gic_grpen0(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + struct vgic_vmcr vmcr; + + vgic_get_vmcr(vcpu, &vmcr); + if (p->is_write) { + vmcr.grpen0 = (p->regval & ICC_IGRPEN0_EL1_MASK) >> + ICC_IGRPEN0_EL1_SHIFT; + vgic_set_vmcr(vcpu, &vmcr); + } else { + p->regval = (vmcr.grpen0 << ICC_IGRPEN0_EL1_SHIFT) & + ICC_IGRPEN0_EL1_MASK; + } + + return true; +} + +static bool access_gic_grpen1(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + struct vgic_vmcr vmcr; + + vgic_get_vmcr(vcpu, &vmcr); + if (p->is_write) { + vmcr.grpen1 = (p->regval & ICC_IGRPEN1_EL1_MASK) >> + ICC_IGRPEN1_EL1_SHIFT; + vgic_set_vmcr(vcpu, &vmcr); + } else { + p->regval = (vmcr.grpen1 << ICC_IGRPEN1_EL1_SHIFT) & + ICC_IGRPEN1_EL1_MASK; + } + + return true; +} + +static void vgic_v3_access_apr_reg(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, u8 apr, u8 idx) +{ + struct vgic_v3_cpu_if *vgicv3 = &vcpu->arch.vgic_cpu.vgic_v3; + uint32_t *ap_reg; + + if (apr) + ap_reg = &vgicv3->vgic_ap1r[idx]; + else + ap_reg = &vgicv3->vgic_ap0r[idx]; + + if (p->is_write) + *ap_reg = p->regval; + else + p->regval = *ap_reg; +} + +static bool access_gic_aprn(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *r, u8 apr) +{ + struct vgic_cpu *vgic_v3_cpu = &vcpu->arch.vgic_cpu; + u8 idx = r->Op2 & 3; + + /* + * num_pri_bits are initialized with HW supported values. + * We can rely safely on num_pri_bits even if VM has not + * restored ICC_CTLR_EL1 before restoring APnR registers. + */ + switch (vgic_v3_cpu->num_pri_bits) { + case 7: + vgic_v3_access_apr_reg(vcpu, p, apr, idx); + break; + case 6: + if (idx > 1) + goto err; + vgic_v3_access_apr_reg(vcpu, p, apr, idx); + break; + default: + if (idx > 0) + goto err; + vgic_v3_access_apr_reg(vcpu, p, apr, idx); + } + + return true; +err: + if (!p->is_write) + p->regval = 0; + + return false; +} + +static bool access_gic_ap0r(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *r) + +{ + return access_gic_aprn(vcpu, p, r, 0); +} + +static bool access_gic_ap1r(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + return access_gic_aprn(vcpu, p, r, 1); +} + +static bool access_gic_sre(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + struct vgic_v3_cpu_if *vgicv3 = &vcpu->arch.vgic_cpu.vgic_v3; + + /* Validate SRE bit */ + if (p->is_write) { + if (!(p->regval & ICC_SRE_EL1_SRE)) + return false; + } else { + p->regval = vgicv3->vgic_sre; + } + + return true; +} +static const struct sys_reg_desc gic_v3_icc_reg_descs[] = { + /* ICC_PMR_EL1 */ + { Op0(3), Op1(0), CRn(4), CRm(6), Op2(0), access_gic_pmr }, + /* ICC_BPR0_EL1 */ + { Op0(3), Op1(0), CRn(12), CRm(8), Op2(3), access_gic_bpr0 }, + /* ICC_AP0R0_EL1 */ + { Op0(3), Op1(0), CRn(12), CRm(8), Op2(4), access_gic_ap0r }, + /* ICC_AP0R1_EL1 */ + { Op0(3), Op1(0), CRn(12), CRm(8), Op2(5), access_gic_ap0r }, + /* ICC_AP0R2_EL1 */ + { Op0(3), Op1(0), CRn(12), CRm(8), Op2(6), access_gic_ap0r }, + /* ICC_AP0R3_EL1 */ + { Op0(3), Op1(0), CRn(12), CRm(8), Op2(7), access_gic_ap0r }, + /* ICC_AP1R0_EL1 */ + { Op0(3), Op1(0), CRn(12), CRm(9), Op2(0), access_gic_ap1r }, + /* ICC_AP1R1_EL1 */ + { Op0(3), Op1(0), CRn(12), CRm(9), Op2(1), access_gic_ap1r }, + /* ICC_AP1R2_EL1 */ + { Op0(3), Op1(0), CRn(12), CRm(9), Op2(2), access_gic_ap1r }, + /* ICC_AP1R3_EL1 */ + { Op0(3), Op1(0), CRn(12), CRm(9), Op2(3), access_gic_ap1r }, + /* ICC_BPR1_EL1 */ + { Op0(3), Op1(0), CRn(12), CRm(12), Op2(3), access_gic_bpr1 }, + /* ICC_CTLR_EL1 */ + { Op0(3), Op1(0), CRn(12), CRm(12), Op2(4), access_gic_ctlr }, + /* ICC_SRE_EL1 */ + { Op0(3), Op1(0), CRn(12), CRm(12), Op2(5), access_gic_sre }, + /* ICC_IGRPEN0_EL1 */ + { Op0(3), Op1(0), CRn(12), CRm(12), Op2(6), access_gic_grpen0 }, + /* ICC_GRPEN1_EL1 */ + { Op0(3), Op1(0), CRn(12), CRm(12), Op2(7), access_gic_grpen1 }, +}; + +int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, bool is_write, u64 id, + u64 *reg) +{ + struct sys_reg_params params; + u64 sysreg = (id & KVM_DEV_ARM_VGIC_SYSREG_MASK) | KVM_REG_SIZE_U64; + + params.regval = *reg; + params.is_write = is_write; + params.is_aarch32 = false; + params.is_32bit = false; + + if (find_reg_by_id(sysreg, ¶ms, gic_v3_icc_reg_descs, + ARRAY_SIZE(gic_v3_icc_reg_descs))) + return 0; + + return -ENXIO; +} + +int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu, bool is_write, u64 id, + u64 *reg) +{ + struct sys_reg_params params; + const struct sys_reg_desc *r; + u64 sysreg = (id & KVM_DEV_ARM_VGIC_SYSREG_MASK) | KVM_REG_SIZE_U64; + + if (is_write) + params.regval = *reg; + params.is_write = is_write; + params.is_aarch32 = false; + params.is_32bit = false; + + r = find_reg_by_id(sysreg, ¶ms, gic_v3_icc_reg_descs, + ARRAY_SIZE(gic_v3_icc_reg_descs)); + if (!r) + return -ENXIO; + + if (!r->access(vcpu, ¶ms, r)) + return -EINVAL; + + if (!is_write) + *reg = params.regval; + + return 0; +} diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 0af1477cfe8d..b72dd2ad5f44 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -71,6 +71,8 @@ struct vgic_global { /* GIC system register CPU interface */ struct static_key_false gicv3_cpuif; + + u32 ich_vtr_el2; }; extern struct vgic_global kvm_vgic_global_state; @@ -275,6 +277,12 @@ struct vgic_cpu { u64 pendbaser; bool lpis_enabled; + + /* Cache guest priority bits */ + u32 num_pri_bits; + + /* Cache guest interrupt ID bits */ + u32 num_id_bits; }; extern struct static_key_false vgic_v2_cpuif_trap; diff --git a/virt/kvm/arm/vgic/vgic-kvm-device.c b/virt/kvm/arm/vgic/vgic-kvm-device.c index 227337fd5156..b30372b59dc2 100644 --- a/virt/kvm/arm/vgic/vgic-kvm-device.c +++ b/virt/kvm/arm/vgic/vgic-kvm-device.c @@ -504,6 +504,14 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev, if (!is_write) *reg = tmp32; break; + case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: { + u64 regid; + + regid = (attr->attr & KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK); + ret = vgic_v3_cpu_sysregs_uaccess(vcpu, is_write, + regid, reg); + break; + } default: ret = -EINVAL; break; @@ -537,6 +545,15 @@ static int vgic_v3_set_attr(struct kvm_device *dev, reg = tmp32; return vgic_v3_attr_regs_access(dev, attr, ®, true); } + case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: { + u64 __user *uaddr = (u64 __user *)(long)attr->addr; + u64 reg; + + if (get_user(reg, uaddr)) + return -EFAULT; + + return vgic_v3_attr_regs_access(dev, attr, ®, true); + } } return -ENXIO; } @@ -563,6 +580,15 @@ static int vgic_v3_get_attr(struct kvm_device *dev, tmp32 = reg; return put_user(tmp32, uaddr); } + case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: { + u64 __user *uaddr = (u64 __user *)(long)attr->addr; + u64 reg; + + ret = vgic_v3_attr_regs_access(dev, attr, ®, false); + if (ret) + return ret; + return put_user(reg, uaddr); + } } return -ENXIO; @@ -581,6 +607,7 @@ static int vgic_v3_has_attr(struct kvm_device *dev, break; case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: + case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: return vgic_v3_has_attr_regs(dev, attr); case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: return 0; diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index 2031138cfcf4..549ae459ca0e 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -645,6 +645,12 @@ int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr) iodev.base_addr = 0; break; } + case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: { + u64 reg, id; + + id = (attr->attr & KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK); + return vgic_v3_has_cpu_sysregs_attr(vcpu, 0, id, ®); + } default: return -ENXIO; } diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 42ff9c9826d3..edc6ee2dc852 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -238,6 +238,13 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu) vgic_v3->vgic_sre = 0; } + vcpu->arch.vgic_cpu.num_id_bits = (kvm_vgic_global_state.ich_vtr_el2 & + ICH_VTR_ID_BITS_MASK) >> + ICH_VTR_ID_BITS_SHIFT; + vcpu->arch.vgic_cpu.num_pri_bits = ((kvm_vgic_global_state.ich_vtr_el2 & + ICH_VTR_PRI_BITS_MASK) >> + ICH_VTR_PRI_BITS_SHIFT) + 1; + /* Get the show on the road... */ vgic_v3->vgic_hcr = ICH_HCR_EN; } @@ -336,6 +343,7 @@ int vgic_v3_probe(const struct gic_kvm_info *info) */ kvm_vgic_global_state.nr_lr = (ich_vtr_el2 & 0xf) + 1; kvm_vgic_global_state.can_emulate_gicv2 = false; + kvm_vgic_global_state.ich_vtr_el2 = ich_vtr_el2; if (!info->vcpu.start) { kvm_info("GICv3: no GICV resource entry\n"); diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index ecfe1a6c841a..a5a45f66c058 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -52,6 +52,27 @@ VGIC_AFFINITY_LEVEL(val, 2) | \ VGIC_AFFINITY_LEVEL(val, 3)) +/* + * As per Documentation/virtual/kvm/devices/arm-vgic-v3.txt, + * below macros are defined for CPUREG encoding. + */ +#define KVM_REG_ARM_VGIC_SYSREG_OP0_MASK 0x000000000000c000 +#define KVM_REG_ARM_VGIC_SYSREG_OP0_SHIFT 14 +#define KVM_REG_ARM_VGIC_SYSREG_OP1_MASK 0x0000000000003800 +#define KVM_REG_ARM_VGIC_SYSREG_OP1_SHIFT 11 +#define KVM_REG_ARM_VGIC_SYSREG_CRN_MASK 0x0000000000000780 +#define KVM_REG_ARM_VGIC_SYSREG_CRN_SHIFT 7 +#define KVM_REG_ARM_VGIC_SYSREG_CRM_MASK 0x0000000000000078 +#define KVM_REG_ARM_VGIC_SYSREG_CRM_SHIFT 3 +#define KVM_REG_ARM_VGIC_SYSREG_OP2_MASK 0x0000000000000007 +#define KVM_REG_ARM_VGIC_SYSREG_OP2_SHIFT 0 + +#define KVM_DEV_ARM_VGIC_SYSREG_MASK (KVM_REG_ARM_VGIC_SYSREG_OP0_MASK | \ + KVM_REG_ARM_VGIC_SYSREG_OP1_MASK | \ + KVM_REG_ARM_VGIC_SYSREG_CRN_MASK | \ + KVM_REG_ARM_VGIC_SYSREG_CRM_MASK | \ + KVM_REG_ARM_VGIC_SYSREG_OP2_MASK) + static inline bool irq_is_pending(struct vgic_irq *irq) { if (irq->config == VGIC_CONFIG_EDGE) @@ -139,6 +160,10 @@ int vgic_v3_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write, int offset, u32 *val); int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write, int offset, u32 *val); +int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu, bool is_write, + u64 id, u64 *val); +int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, bool is_write, u64 id, + u64 *reg); int kvm_register_vgic_device(unsigned long type); void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); -- cgit v1.3-6-gb490 From 9d93dc1c96ec446bef9c34a189ea24556f1af89a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 25 Jan 2017 13:47:43 +0000 Subject: arm/arm64: KVM: Get rid of KVM_MEMSLOT_INCOHERENT KVM_MEMSLOT_INCOHERENT is not used anymore, as we've killed its only use in the arm/arm64 MMU code. Let's remove the last artifacts. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier --- arch/arm/kvm/mmu.c | 9 --------- include/linux/kvm_host.h | 1 - 2 files changed, 10 deletions(-) (limited to 'include') diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 5cc35080cbf7..962616fd4ddd 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -1876,15 +1876,6 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, unsigned long npages) { - /* - * Readonly memslots are not incoherent with the caches by definition, - * but in practice, they are used mostly to emulate ROMs or NOR flashes - * that the guest may consider devices and hence map as uncached. - * To prevent incoherency issues in these cases, tag all readonly - * regions as incoherent. - */ - if (slot->flags & KVM_MEM_READONLY) - slot->flags |= KVM_MEMSLOT_INCOHERENT; return 0; } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1c5190dab2c1..cda457bcedc1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -45,7 +45,6 @@ * include/linux/kvm_h. */ #define KVM_MEMSLOT_INVALID (1UL << 16) -#define KVM_MEMSLOT_INCOHERENT (1UL << 17) /* Two fragments for cross MMIO pages. */ #define KVM_MAX_MMIO_FRAGMENTS 2 -- cgit v1.3-6-gb490 From fbb4aeec5fc2ab47615b2a0cbabc503e1eef4c60 Mon Sep 17 00:00:00 2001 From: Jintack Lim Date: Fri, 3 Feb 2017 10:19:59 -0500 Subject: KVM: arm/arm64: Abstract virtual timer context into separate structure Abstract virtual timer context into a separate structure and change all callers referring to timer registers, irq state and so on. No change in functionality. This is about to become very handy when adding the EL1 physical timer. Signed-off-by: Jintack Lim Acked-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Marc Zyngier --- include/kvm/arm_arch_timer.h | 27 ++++++++--------- virt/kvm/arm/arch_timer.c | 69 +++++++++++++++++++++++--------------------- virt/kvm/arm/hyp/timer-sr.c | 10 ++++--- 3 files changed, 56 insertions(+), 50 deletions(-) (limited to 'include') diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 5c970ce67949..daad3c133b9f 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -28,15 +28,20 @@ struct arch_timer_kvm { u64 cntvoff; }; -struct arch_timer_cpu { +struct arch_timer_context { /* Registers: control register, timer value */ - u32 cntv_ctl; /* Saved/restored */ - u64 cntv_cval; /* Saved/restored */ + u32 cnt_ctl; + u64 cnt_cval; + + /* Timer IRQ */ + struct kvm_irq_level irq; + + /* Active IRQ state caching */ + bool active_cleared_last; +}; - /* - * Anything that is not used directly from assembly code goes - * here. - */ +struct arch_timer_cpu { + struct arch_timer_context vtimer; /* Background timer used when the guest is not running */ struct hrtimer timer; @@ -47,12 +52,6 @@ struct arch_timer_cpu { /* Background timer active */ bool armed; - /* Timer IRQ */ - struct kvm_irq_level irq; - - /* Active IRQ state caching */ - bool active_cleared_last; - /* Is the timer enabled */ bool enabled; }; @@ -77,4 +76,6 @@ void kvm_timer_unschedule(struct kvm_vcpu *vcpu); void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu); void kvm_timer_init_vhe(void); + +#define vcpu_vtimer(v) (&(v)->arch.timer_cpu.vtimer) #endif diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 91ecf48f7fe2..d3556b3ca694 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -37,7 +37,7 @@ static u32 host_vtimer_irq_flags; void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) { - vcpu->arch.timer_cpu.active_cleared_last = false; + vcpu_vtimer(vcpu)->active_cleared_last = false; } static u64 kvm_phys_timer_read(void) @@ -102,7 +102,7 @@ static u64 kvm_timer_compute_delta(struct kvm_vcpu *vcpu) { u64 cval, now; - cval = vcpu->arch.timer_cpu.cntv_cval; + cval = vcpu_vtimer(vcpu)->cnt_cval; now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; if (now < cval) { @@ -144,21 +144,21 @@ static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) static bool kvm_timer_irq_can_fire(struct kvm_vcpu *vcpu) { - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); - return !(timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) && - (timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE); + return !(vtimer->cnt_ctl & ARCH_TIMER_CTRL_IT_MASK) && + (vtimer->cnt_ctl & ARCH_TIMER_CTRL_ENABLE); } bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) { - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); u64 cval, now; if (!kvm_timer_irq_can_fire(vcpu)) return false; - cval = timer->cntv_cval; + cval = vtimer->cnt_cval; now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; return cval <= now; @@ -167,18 +167,18 @@ bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level) { int ret; - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); BUG_ON(!vgic_initialized(vcpu->kvm)); - timer->active_cleared_last = false; - timer->irq.level = new_level; - trace_kvm_timer_update_irq(vcpu->vcpu_id, timer->irq.irq, - timer->irq.level); + vtimer->active_cleared_last = false; + vtimer->irq.level = new_level; + trace_kvm_timer_update_irq(vcpu->vcpu_id, vtimer->irq.irq, + vtimer->irq.level); ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, - timer->irq.irq, - timer->irq.level); + vtimer->irq.irq, + vtimer->irq.level); WARN_ON(ret); } @@ -189,18 +189,19 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level) static int kvm_timer_update_state(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); /* * If userspace modified the timer registers via SET_ONE_REG before - * the vgic was initialized, we mustn't set the timer->irq.level value + * the vgic was initialized, we mustn't set the vtimer->irq.level value * because the guest would never see the interrupt. Instead wait * until we call this function from kvm_timer_flush_hwstate. */ if (!vgic_initialized(vcpu->kvm) || !timer->enabled) return -ENODEV; - if (kvm_timer_should_fire(vcpu) != timer->irq.level) - kvm_timer_update_irq(vcpu, !timer->irq.level); + if (kvm_timer_should_fire(vcpu) != vtimer->irq.level) + kvm_timer_update_irq(vcpu, !vtimer->irq.level); return 0; } @@ -250,7 +251,7 @@ void kvm_timer_unschedule(struct kvm_vcpu *vcpu) */ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) { - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); bool phys_active; int ret; @@ -274,8 +275,8 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) * to ensure that hardware interrupts from the timer triggers a guest * exit. */ - phys_active = timer->irq.level || - kvm_vgic_map_is_active(vcpu, timer->irq.irq); + phys_active = vtimer->irq.level || + kvm_vgic_map_is_active(vcpu, vtimer->irq.irq); /* * We want to avoid hitting the (re)distributor as much as @@ -297,7 +298,7 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) * - cached value is "active clear" * - value to be programmed is "active clear" */ - if (timer->active_cleared_last && !phys_active) + if (vtimer->active_cleared_last && !phys_active) return; ret = irq_set_irqchip_state(host_vtimer_irq, @@ -305,7 +306,7 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) phys_active); WARN_ON(ret); - timer->active_cleared_last = !phys_active; + vtimer->active_cleared_last = !phys_active; } /** @@ -331,7 +332,7 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, const struct kvm_irq_level *irq) { - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); /* * The vcpu timer irq number cannot be determined in @@ -339,7 +340,7 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, * kvm_vcpu_set_target(). To handle this, we determine * vcpu timer irq number when the vcpu is reset. */ - timer->irq.irq = irq->irq; + vtimer->irq.irq = irq->irq; /* * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 @@ -347,7 +348,7 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, * resets the timer to be disabled and unmasked and is compliant with * the ARMv7 architecture. */ - timer->cntv_ctl = 0; + vtimer->cnt_ctl = 0; kvm_timer_update_state(vcpu); return 0; @@ -369,17 +370,17 @@ static void kvm_timer_init_interrupt(void *info) int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) { - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); switch (regid) { case KVM_REG_ARM_TIMER_CTL: - timer->cntv_ctl = value; + vtimer->cnt_ctl = value; break; case KVM_REG_ARM_TIMER_CNT: vcpu->kvm->arch.timer.cntvoff = kvm_phys_timer_read() - value; break; case KVM_REG_ARM_TIMER_CVAL: - timer->cntv_cval = value; + vtimer->cnt_cval = value; break; default: return -1; @@ -391,15 +392,15 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) { - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); switch (regid) { case KVM_REG_ARM_TIMER_CTL: - return timer->cntv_ctl; + return vtimer->cnt_ctl; case KVM_REG_ARM_TIMER_CNT: return kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; case KVM_REG_ARM_TIMER_CVAL: - return timer->cntv_cval; + return vtimer->cnt_cval; } return (u64)-1; } @@ -463,14 +464,16 @@ int kvm_timer_hyp_init(void) void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); timer_disarm(timer); - kvm_vgic_unmap_phys_irq(vcpu, timer->irq.irq); + kvm_vgic_unmap_phys_irq(vcpu, vtimer->irq.irq); } int kvm_timer_enable(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); struct irq_desc *desc; struct irq_data *data; int phys_irq; @@ -498,7 +501,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) * Tell the VGIC that the virtual interrupt is tied to a * physical interrupt. We do that once per VCPU. */ - ret = kvm_vgic_map_phys_irq(vcpu, timer->irq.irq, phys_irq); + ret = kvm_vgic_map_phys_irq(vcpu, vtimer->irq.irq, phys_irq); if (ret) return ret; diff --git a/virt/kvm/arm/hyp/timer-sr.c b/virt/kvm/arm/hyp/timer-sr.c index 63e28dd18bb0..0cf08953e81c 100644 --- a/virt/kvm/arm/hyp/timer-sr.c +++ b/virt/kvm/arm/hyp/timer-sr.c @@ -25,11 +25,12 @@ void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); u64 val; if (timer->enabled) { - timer->cntv_ctl = read_sysreg_el0(cntv_ctl); - timer->cntv_cval = read_sysreg_el0(cntv_cval); + vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl); + vtimer->cnt_cval = read_sysreg_el0(cntv_cval); } /* Disable the virtual timer */ @@ -54,6 +55,7 @@ void __hyp_text __timer_restore_state(struct kvm_vcpu *vcpu) { struct kvm *kvm = kern_hyp_va(vcpu->kvm); struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); u64 val; /* Those bits are already configured at boot on VHE-system */ @@ -70,8 +72,8 @@ void __hyp_text __timer_restore_state(struct kvm_vcpu *vcpu) if (timer->enabled) { write_sysreg(kvm->arch.timer.cntvoff, cntvoff_el2); - write_sysreg_el0(timer->cntv_cval, cntv_cval); + write_sysreg_el0(vtimer->cnt_cval, cntv_cval); isb(); - write_sysreg_el0(timer->cntv_ctl, cntv_ctl); + write_sysreg_el0(vtimer->cnt_ctl, cntv_ctl); } } -- cgit v1.3-6-gb490 From 90de943a430028ee389b22bf4a7ae5867c32ce0c Mon Sep 17 00:00:00 2001 From: Jintack Lim Date: Fri, 3 Feb 2017 10:20:00 -0500 Subject: KVM: arm/arm64: Move cntvoff to each timer context Make cntvoff per each timer context. This is helpful to abstract kvm timer functions to work with timer context without considering timer types (e.g. physical timer or virtual timer). This also would pave the way for ever doing adjustments of the cntvoff on a per-CPU basis if that should ever make sense. Signed-off-by: Jintack Lim Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- arch/arm/include/asm/kvm_host.h | 3 --- arch/arm/kvm/arm.c | 1 - arch/arm64/include/asm/kvm_host.h | 3 --- include/kvm/arm_arch_timer.h | 9 +++------ virt/kvm/arm/arch_timer.c | 38 ++++++++++++++++++++++++++++---------- virt/kvm/arm/hyp/timer-sr.c | 3 +-- 6 files changed, 32 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index d5423ab15ed5..cc495d799c67 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -60,9 +60,6 @@ struct kvm_arch { /* The last vcpu id that ran on each physical CPU */ int __percpu *last_vcpu_ran; - /* Timer */ - struct arch_timer_kvm timer; - /* * Anything that is not used directly from assembly code goes * here. diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 9d7446456e0c..f93f2171a48b 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -135,7 +135,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) goto out_free_stage2_pgd; kvm_vgic_early_init(kvm); - kvm_timer_init(kvm); /* Mark the initial VMID generation invalid */ kvm->arch.vmid_gen = 0; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index e5050388e062..4a758cba1262 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -70,9 +70,6 @@ struct kvm_arch { /* Interrupt controller */ struct vgic_dist vgic; - - /* Timer */ - struct arch_timer_kvm timer; }; #define KVM_NR_MEM_OBJS 40 diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index daad3c133b9f..2c8560b4642a 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -23,11 +23,6 @@ #include #include -struct arch_timer_kvm { - /* Virtual offset */ - u64 cntvoff; -}; - struct arch_timer_context { /* Registers: control register, timer value */ u32 cnt_ctl; @@ -38,6 +33,9 @@ struct arch_timer_context { /* Active IRQ state caching */ bool active_cleared_last; + + /* Virtual offset */ + u64 cntvoff; }; struct arch_timer_cpu { @@ -58,7 +56,6 @@ struct arch_timer_cpu { int kvm_timer_hyp_init(void); int kvm_timer_enable(struct kvm_vcpu *vcpu); -void kvm_timer_init(struct kvm *kvm); int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, const struct kvm_irq_level *irq); void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index d3556b3ca694..5004a679b125 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -101,9 +101,10 @@ static void kvm_timer_inject_irq_work(struct work_struct *work) static u64 kvm_timer_compute_delta(struct kvm_vcpu *vcpu) { u64 cval, now; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); - cval = vcpu_vtimer(vcpu)->cnt_cval; - now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; + cval = vtimer->cnt_cval; + now = kvm_phys_timer_read() - vtimer->cntvoff; if (now < cval) { u64 ns; @@ -159,7 +160,7 @@ bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) return false; cval = vtimer->cnt_cval; - now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; + now = kvm_phys_timer_read() - vtimer->cntvoff; return cval <= now; } @@ -354,10 +355,32 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, return 0; } +/* Make the updates of cntvoff for all vtimer contexts atomic */ +static void update_vtimer_cntvoff(struct kvm_vcpu *vcpu, u64 cntvoff) +{ + int i; + struct kvm *kvm = vcpu->kvm; + struct kvm_vcpu *tmp; + + mutex_lock(&kvm->lock); + kvm_for_each_vcpu(i, tmp, kvm) + vcpu_vtimer(tmp)->cntvoff = cntvoff; + + /* + * When called from the vcpu create path, the CPU being created is not + * included in the loop above, so we just set it here as well. + */ + vcpu_vtimer(vcpu)->cntvoff = cntvoff; + mutex_unlock(&kvm->lock); +} + void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + /* Synchronize cntvoff across all vtimers of a VM. */ + update_vtimer_cntvoff(vcpu, kvm_phys_timer_read()); + INIT_WORK(&timer->expired, kvm_timer_inject_irq_work); hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); timer->timer.function = kvm_timer_expire; @@ -377,7 +400,7 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) vtimer->cnt_ctl = value; break; case KVM_REG_ARM_TIMER_CNT: - vcpu->kvm->arch.timer.cntvoff = kvm_phys_timer_read() - value; + update_vtimer_cntvoff(vcpu, kvm_phys_timer_read() - value); break; case KVM_REG_ARM_TIMER_CVAL: vtimer->cnt_cval = value; @@ -398,7 +421,7 @@ u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) case KVM_REG_ARM_TIMER_CTL: return vtimer->cnt_ctl; case KVM_REG_ARM_TIMER_CNT: - return kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; + return kvm_phys_timer_read() - vtimer->cntvoff; case KVM_REG_ARM_TIMER_CVAL: return vtimer->cnt_cval; } @@ -510,11 +533,6 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) return 0; } -void kvm_timer_init(struct kvm *kvm) -{ - kvm->arch.timer.cntvoff = kvm_phys_timer_read(); -} - /* * On VHE system, we only need to configure trap on physical timer and counter * accesses in EL0 and EL1 once, not for every world switch. diff --git a/virt/kvm/arm/hyp/timer-sr.c b/virt/kvm/arm/hyp/timer-sr.c index 0cf08953e81c..4734915ab71f 100644 --- a/virt/kvm/arm/hyp/timer-sr.c +++ b/virt/kvm/arm/hyp/timer-sr.c @@ -53,7 +53,6 @@ void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu) void __hyp_text __timer_restore_state(struct kvm_vcpu *vcpu) { - struct kvm *kvm = kern_hyp_va(vcpu->kvm); struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); u64 val; @@ -71,7 +70,7 @@ void __hyp_text __timer_restore_state(struct kvm_vcpu *vcpu) } if (timer->enabled) { - write_sysreg(kvm->arch.timer.cntvoff, cntvoff_el2); + write_sysreg(vtimer->cntvoff, cntvoff_el2); write_sysreg_el0(vtimer->cnt_cval, cntv_cval); isb(); write_sysreg_el0(vtimer->cnt_ctl, cntv_ctl); -- cgit v1.3-6-gb490 From 9171fa2e0951b0cb6c443e0dce2c4620de3b1dfe Mon Sep 17 00:00:00 2001 From: Jintack Lim Date: Fri, 3 Feb 2017 10:20:01 -0500 Subject: KVM: arm/arm64: Decouple kvm timer functions from virtual timer Now that we have a separate structure for timer context, make functions generic so that they can work with any timer context, not just the virtual timer context. This does not change the virtual timer functionality. Signed-off-by: Jintack Lim Acked-by: Marc Zyngier Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier --- arch/arm/kvm/arm.c | 2 +- include/kvm/arm_arch_timer.h | 2 +- virt/kvm/arm/arch_timer.c | 54 ++++++++++++++++++++------------------------ 3 files changed, 27 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index f93f2171a48b..0ecd6cf362fc 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -300,7 +300,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) { - return kvm_timer_should_fire(vcpu); + return kvm_timer_should_fire(vcpu_vtimer(vcpu)); } void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 2c8560b4642a..f46fa3b62b06 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -66,7 +66,7 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu); u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); -bool kvm_timer_should_fire(struct kvm_vcpu *vcpu); +bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx); void kvm_timer_schedule(struct kvm_vcpu *vcpu); void kvm_timer_unschedule(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 5004a679b125..5261f98ae686 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -98,13 +98,12 @@ static void kvm_timer_inject_irq_work(struct work_struct *work) kvm_vcpu_kick(vcpu); } -static u64 kvm_timer_compute_delta(struct kvm_vcpu *vcpu) +static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx) { u64 cval, now; - struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); - cval = vtimer->cnt_cval; - now = kvm_phys_timer_read() - vtimer->cntvoff; + cval = timer_ctx->cnt_cval; + now = kvm_phys_timer_read() - timer_ctx->cntvoff; if (now < cval) { u64 ns; @@ -133,7 +132,7 @@ static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) * PoV (NTP on the host may have forced it to expire * early). If we should have slept longer, restart it. */ - ns = kvm_timer_compute_delta(vcpu); + ns = kvm_timer_compute_delta(vcpu_vtimer(vcpu)); if (unlikely(ns)) { hrtimer_forward_now(hrt, ns_to_ktime(ns)); return HRTIMER_RESTART; @@ -143,43 +142,39 @@ static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) return HRTIMER_NORESTART; } -static bool kvm_timer_irq_can_fire(struct kvm_vcpu *vcpu) +static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx) { - struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); - - return !(vtimer->cnt_ctl & ARCH_TIMER_CTRL_IT_MASK) && - (vtimer->cnt_ctl & ARCH_TIMER_CTRL_ENABLE); + return !(timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_IT_MASK) && + (timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_ENABLE); } -bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) +bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) { - struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); u64 cval, now; - if (!kvm_timer_irq_can_fire(vcpu)) + if (!kvm_timer_irq_can_fire(timer_ctx)) return false; - cval = vtimer->cnt_cval; - now = kvm_phys_timer_read() - vtimer->cntvoff; + cval = timer_ctx->cnt_cval; + now = kvm_phys_timer_read() - timer_ctx->cntvoff; return cval <= now; } -static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level) +static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, + struct arch_timer_context *timer_ctx) { int ret; - struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); BUG_ON(!vgic_initialized(vcpu->kvm)); - vtimer->active_cleared_last = false; - vtimer->irq.level = new_level; - trace_kvm_timer_update_irq(vcpu->vcpu_id, vtimer->irq.irq, - vtimer->irq.level); + timer_ctx->active_cleared_last = false; + timer_ctx->irq.level = new_level; + trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq, + timer_ctx->irq.level); - ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, - vtimer->irq.irq, - vtimer->irq.level); + ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, timer_ctx->irq.irq, + timer_ctx->irq.level); WARN_ON(ret); } @@ -201,8 +196,8 @@ static int kvm_timer_update_state(struct kvm_vcpu *vcpu) if (!vgic_initialized(vcpu->kvm) || !timer->enabled) return -ENODEV; - if (kvm_timer_should_fire(vcpu) != vtimer->irq.level) - kvm_timer_update_irq(vcpu, !vtimer->irq.level); + if (kvm_timer_should_fire(vtimer) != vtimer->irq.level) + kvm_timer_update_irq(vcpu, !vtimer->irq.level, vtimer); return 0; } @@ -215,6 +210,7 @@ static int kvm_timer_update_state(struct kvm_vcpu *vcpu) void kvm_timer_schedule(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); BUG_ON(timer_is_armed(timer)); @@ -223,18 +219,18 @@ void kvm_timer_schedule(struct kvm_vcpu *vcpu) * already expired, because kvm_vcpu_block will return before putting * the thread to sleep. */ - if (kvm_timer_should_fire(vcpu)) + if (kvm_timer_should_fire(vtimer)) return; /* * If the timer is not capable of raising interrupts (disabled or * masked), then there's no more work for us to do. */ - if (!kvm_timer_irq_can_fire(vcpu)) + if (!kvm_timer_irq_can_fire(vtimer)) return; /* The timer has not yet expired, schedule a background timer */ - timer_arm(timer, kvm_timer_compute_delta(vcpu)); + timer_arm(timer, kvm_timer_compute_delta(vtimer)); } void kvm_timer_unschedule(struct kvm_vcpu *vcpu) -- cgit v1.3-6-gb490 From 009a5701bb2d166073f75643bc9237fe014c6bf5 Mon Sep 17 00:00:00 2001 From: Jintack Lim Date: Fri, 3 Feb 2017 10:20:02 -0500 Subject: KVM: arm/arm64: Add the EL1 physical timer context Add the EL1 physical timer context. Signed-off-by: Jintack Lim Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier --- include/kvm/arm_arch_timer.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index f46fa3b62b06..6445a3d9a6e2 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -40,6 +40,7 @@ struct arch_timer_context { struct arch_timer_cpu { struct arch_timer_context vtimer; + struct arch_timer_context ptimer; /* Background timer used when the guest is not running */ struct hrtimer timer; @@ -75,4 +76,5 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu); void kvm_timer_init_vhe(void); #define vcpu_vtimer(v) (&(v)->arch.timer_cpu.vtimer) +#define vcpu_ptimer(v) (&(v)->arch.timer_cpu.ptimer) #endif -- cgit v1.3-6-gb490 From a91d18551e7b35e34a04b6fd199ca8568e7e9315 Mon Sep 17 00:00:00 2001 From: Jintack Lim Date: Fri, 3 Feb 2017 10:20:03 -0500 Subject: KVM: arm/arm64: Initialize the emulated EL1 physical timer Initialize the emulated EL1 physical timer with the default irq number. Signed-off-by: Jintack Lim Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier --- arch/arm/kvm/reset.c | 9 ++++++++- arch/arm64/kvm/reset.c | 9 ++++++++- include/kvm/arm_arch_timer.h | 3 ++- virt/kvm/arm/arch_timer.c | 9 +++++++-- 4 files changed, 25 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c index 4b5e802e57d1..1da8b2d14550 100644 --- a/arch/arm/kvm/reset.c +++ b/arch/arm/kvm/reset.c @@ -37,6 +37,11 @@ static struct kvm_regs cortexa_regs_reset = { .usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT, }; +static const struct kvm_irq_level cortexa_ptimer_irq = { + { .irq = 30 }, + .level = 1, +}; + static const struct kvm_irq_level cortexa_vtimer_irq = { { .irq = 27 }, .level = 1, @@ -58,6 +63,7 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) { struct kvm_regs *reset_regs; const struct kvm_irq_level *cpu_vtimer_irq; + const struct kvm_irq_level *cpu_ptimer_irq; switch (vcpu->arch.target) { case KVM_ARM_TARGET_CORTEX_A7: @@ -65,6 +71,7 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) reset_regs = &cortexa_regs_reset; vcpu->arch.midr = read_cpuid_id(); cpu_vtimer_irq = &cortexa_vtimer_irq; + cpu_ptimer_irq = &cortexa_ptimer_irq; break; default: return -ENODEV; @@ -77,5 +84,5 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) kvm_reset_coprocs(vcpu); /* Reset arch_timer context */ - return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq); + return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq, cpu_ptimer_irq); } diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index e95d4f68bf54..d9e9697de1b2 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -46,6 +46,11 @@ static const struct kvm_regs default_regs_reset32 = { COMPAT_PSR_I_BIT | COMPAT_PSR_F_BIT), }; +static const struct kvm_irq_level default_ptimer_irq = { + .irq = 30, + .level = 1, +}; + static const struct kvm_irq_level default_vtimer_irq = { .irq = 27, .level = 1, @@ -104,6 +109,7 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext) int kvm_reset_vcpu(struct kvm_vcpu *vcpu) { const struct kvm_irq_level *cpu_vtimer_irq; + const struct kvm_irq_level *cpu_ptimer_irq; const struct kvm_regs *cpu_reset; switch (vcpu->arch.target) { @@ -117,6 +123,7 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) } cpu_vtimer_irq = &default_vtimer_irq; + cpu_ptimer_irq = &default_ptimer_irq; break; } @@ -130,5 +137,5 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) kvm_pmu_vcpu_reset(vcpu); /* Reset timer */ - return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq); + return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq, cpu_ptimer_irq); } diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 6445a3d9a6e2..f1d2fba0b9c6 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -58,7 +58,8 @@ struct arch_timer_cpu { int kvm_timer_hyp_init(void); int kvm_timer_enable(struct kvm_vcpu *vcpu); int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, - const struct kvm_irq_level *irq); + const struct kvm_irq_level *virt_irq, + const struct kvm_irq_level *phys_irq); void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu); void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 5261f98ae686..dbd0af19d27e 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -327,9 +327,11 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) } int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, - const struct kvm_irq_level *irq) + const struct kvm_irq_level *virt_irq, + const struct kvm_irq_level *phys_irq) { struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); /* * The vcpu timer irq number cannot be determined in @@ -337,7 +339,8 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, * kvm_vcpu_set_target(). To handle this, we determine * vcpu timer irq number when the vcpu is reset. */ - vtimer->irq.irq = irq->irq; + vtimer->irq.irq = virt_irq->irq; + ptimer->irq.irq = phys_irq->irq; /* * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 @@ -346,6 +349,7 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, * the ARMv7 architecture. */ vtimer->cnt_ctl = 0; + ptimer->cnt_ctl = 0; kvm_timer_update_state(vcpu); return 0; @@ -376,6 +380,7 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) /* Synchronize cntvoff across all vtimers of a VM. */ update_vtimer_cntvoff(vcpu, kvm_phys_timer_read()); + vcpu_ptimer(vcpu)->cntvoff = 0; INIT_WORK(&timer->expired, kvm_timer_inject_irq_work); hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); -- cgit v1.3-6-gb490 From 7b6b46311a8562fb3a9e035ed6ffab6d49c28886 Mon Sep 17 00:00:00 2001 From: Jintack Lim Date: Fri, 3 Feb 2017 10:20:08 -0500 Subject: KVM: arm/arm64: Emulate the EL1 phys timer registers Emulate read and write operations to CNTP_TVAL, CNTP_CVAL and CNTP_CTL. Now VMs are able to use the EL1 physical timer. Signed-off-by: Jintack Lim Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 37 ++++++++++++++++++++++++++++++++++--- include/kvm/arm_arch_timer.h | 2 ++ virt/kvm/arm/arch_timer.c | 2 +- 3 files changed, 37 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 1cd3464ff88d..0e26f8c2b56f 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -824,7 +824,14 @@ static bool access_cntp_tval(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { - kvm_inject_undefined(vcpu); + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); + u64 now = kvm_phys_timer_read(); + + if (p->is_write) + ptimer->cnt_cval = p->regval + now; + else + p->regval = ptimer->cnt_cval - now; + return true; } @@ -832,7 +839,25 @@ static bool access_cntp_ctl(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { - kvm_inject_undefined(vcpu); + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); + + if (p->is_write) { + /* ISTATUS bit is read-only */ + ptimer->cnt_ctl = p->regval & ~ARCH_TIMER_CTRL_IT_STAT; + } else { + u64 now = kvm_phys_timer_read(); + + p->regval = ptimer->cnt_ctl; + /* + * Set ISTATUS bit if it's expired. + * Note that according to ARMv8 ARM Issue A.k, ISTATUS bit is + * UNKNOWN when ENABLE bit is 0, so we chose to set ISTATUS bit + * regardless of ENABLE bit for our implementation convenience. + */ + if (ptimer->cnt_cval <= now) + p->regval |= ARCH_TIMER_CTRL_IT_STAT; + } + return true; } @@ -840,7 +865,13 @@ static bool access_cntp_cval(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { - kvm_inject_undefined(vcpu); + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); + + if (p->is_write) + ptimer->cnt_cval = p->regval; + else + p->regval = ptimer->cnt_cval; + return true; } diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index f1d2fba0b9c6..fe797d6ef89d 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -72,6 +72,8 @@ bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx); void kvm_timer_schedule(struct kvm_vcpu *vcpu); void kvm_timer_unschedule(struct kvm_vcpu *vcpu); +u64 kvm_phys_timer_read(void); + void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu); void kvm_timer_init_vhe(void); diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 33257b560f74..35d7100e0815 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -40,7 +40,7 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) vcpu_vtimer(vcpu)->active_cleared_last = false; } -static u64 kvm_phys_timer_read(void) +u64 kvm_phys_timer_read(void) { return timecounter->cc->read(timecounter->cc); } -- cgit v1.3-6-gb490