aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to '')
-rwxr-xr-xtools/debugging/kernel-chktaint9
-rw-r--r--tools/hv/hv_kvp_daemon.c6
-rw-r--r--tools/include/uapi/asm/errno.h4
-rw-r--r--tools/lib/perf/evlist.c50
-rw-r--r--tools/perf/Makefile.perf24
-rw-r--r--tools/perf/builtin-c2c.c12
-rw-r--r--tools/perf/builtin-lock.c3
-rw-r--r--tools/perf/builtin-record.c34
-rw-r--r--tools/perf/builtin-script.c5
-rw-r--r--tools/perf/builtin-stat.c5
-rw-r--r--tools/perf/dlfilters/dlfilter-show-cycles.c4
-rw-r--r--tools/perf/util/affinity.c8
-rw-r--r--tools/perf/util/genelf.c20
-rw-r--r--tools/perf/util/metricgroup.c3
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.s390x1
-rw-r--r--tools/testing/selftests/bpf/verifier/precise.c25
-rw-r--r--tools/testing/selftests/kvm/.gitignore1
-rw-r--r--tools/testing/selftests/kvm/Makefile12
-rw-r--r--tools/testing/selftests/kvm/access_tracking_perf_test.c25
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util_base.h4
-rw-r--r--tools/testing/selftests/kvm/include/test_util.h6
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/evmcs.h45
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/processor.h8
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/svm_util.h7
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/vmx.h54
-rw-r--r--tools/testing/selftests/kvm/lib/assert.c20
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c39
-rw-r--r--tools/testing/selftests/kvm/lib/string_override.c39
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/processor.c40
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/svm.c14
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/vmx.c20
-rw-r--r--tools/testing/selftests/kvm/rseq_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c134
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_features.c13
-rw-r--r--tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c295
-rw-r--r--tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c43
-rw-r--r--tools/testing/selftests/landlock/fs_test.c155
-rw-r--r--tools/testing/selftests/net/.gitignore50
-rw-r--r--tools/testing/selftests/net/io_uring_zerocopy_tx.c101
-rwxr-xr-xtools/testing/selftests/net/io_uring_zerocopy_tx.sh10
-rwxr-xr-xtools/testing/selftests/netfilter/nft_conntrack_helper.sh36
41 files changed, 1016 insertions, 370 deletions
diff --git a/tools/debugging/kernel-chktaint b/tools/debugging/kernel-chktaint
index f1af27ce9f20..279be06332be 100755
--- a/tools/debugging/kernel-chktaint
+++ b/tools/debugging/kernel-chktaint
@@ -187,6 +187,7 @@ else
echo " * auxiliary taint, defined for and used by distros (#16)"
fi
+
T=`expr $T / 2`
if [ `expr $T % 2` -eq 0 ]; then
addout " "
@@ -195,6 +196,14 @@ else
echo " * kernel was built with the struct randomization plugin (#17)"
fi
+T=`expr $T / 2`
+if [ `expr $T % 2` -eq 0 ]; then
+ addout " "
+else
+ addout "N"
+ echo " * an in-kernel test (such as a KUnit test) has been run (#18)"
+fi
+
echo "For a more detailed explanation of the various taint flags see"
echo " Documentation/admin-guide/tainted-kernels.rst in the Linux kernel sources"
echo " or https://kernel.org/doc/html/latest/admin-guide/tainted-kernels.html"
diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
index 1e6fd6ca513b..27f5e7dfc2f7 100644
--- a/tools/hv/hv_kvp_daemon.c
+++ b/tools/hv/hv_kvp_daemon.c
@@ -44,7 +44,7 @@
/*
* KVP protocol: The user mode component first registers with the
- * the kernel component. Subsequently, the kernel component requests, data
+ * kernel component. Subsequently, the kernel component requests, data
* for the specified keys. In response to this message the user mode component
* fills in the value corresponding to the specified key. We overload the
* sequence field in the cn_msg header to define our KVP message types.
@@ -772,11 +772,11 @@ static int kvp_process_ip_address(void *addrp,
const char *str;
if (family == AF_INET) {
- addr = (struct sockaddr_in *)addrp;
+ addr = addrp;
str = inet_ntop(family, &addr->sin_addr, tmp, 50);
addr_length = INET_ADDRSTRLEN;
} else {
- addr6 = (struct sockaddr_in6 *)addrp;
+ addr6 = addrp;
str = inet_ntop(family, &addr6->sin6_addr.s6_addr, tmp, 50);
addr_length = INET6_ADDRSTRLEN;
}
diff --git a/tools/include/uapi/asm/errno.h b/tools/include/uapi/asm/errno.h
index d30439b4b8ab..869379f91fe4 100644
--- a/tools/include/uapi/asm/errno.h
+++ b/tools/include/uapi/asm/errno.h
@@ -9,8 +9,8 @@
#include "../../../arch/alpha/include/uapi/asm/errno.h"
#elif defined(__mips__)
#include "../../../arch/mips/include/uapi/asm/errno.h"
-#elif defined(__xtensa__)
-#include "../../../arch/xtensa/include/uapi/asm/errno.h"
+#elif defined(__hppa__)
+#include "../../../arch/parisc/include/uapi/asm/errno.h"
#else
#include <asm-generic/errno.h>
#endif
diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c
index e6c98a6e3908..6b1bafe267a4 100644
--- a/tools/lib/perf/evlist.c
+++ b/tools/lib/perf/evlist.c
@@ -486,6 +486,7 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
if (ops->idx)
ops->idx(evlist, evsel, mp, idx);
+ pr_debug("idx %d: mmapping fd %d\n", idx, *output);
if (ops->mmap(map, mp, *output, evlist_cpu) < 0)
return -1;
@@ -494,6 +495,7 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
if (!idx)
perf_evlist__set_mmap_first(evlist, map, overwrite);
} else {
+ pr_debug("idx %d: set output fd %d -> %d\n", idx, fd, *output);
if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0)
return -1;
@@ -520,6 +522,48 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
}
static int
+mmap_per_thread(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
+ struct perf_mmap_param *mp)
+{
+ int nr_threads = perf_thread_map__nr(evlist->threads);
+ int nr_cpus = perf_cpu_map__nr(evlist->all_cpus);
+ int cpu, thread, idx = 0;
+ int nr_mmaps = 0;
+
+ pr_debug("%s: nr cpu values (may include -1) %d nr threads %d\n",
+ __func__, nr_cpus, nr_threads);
+
+ /* per-thread mmaps */
+ for (thread = 0; thread < nr_threads; thread++, idx++) {
+ int output = -1;
+ int output_overwrite = -1;
+
+ if (mmap_per_evsel(evlist, ops, idx, mp, 0, thread, &output,
+ &output_overwrite, &nr_mmaps))
+ goto out_unmap;
+ }
+
+ /* system-wide mmaps i.e. per-cpu */
+ for (cpu = 1; cpu < nr_cpus; cpu++, idx++) {
+ int output = -1;
+ int output_overwrite = -1;
+
+ if (mmap_per_evsel(evlist, ops, idx, mp, cpu, 0, &output,
+ &output_overwrite, &nr_mmaps))
+ goto out_unmap;
+ }
+
+ if (nr_mmaps != evlist->nr_mmaps)
+ pr_err("Miscounted nr_mmaps %d vs %d\n", nr_mmaps, evlist->nr_mmaps);
+
+ return 0;
+
+out_unmap:
+ perf_evlist__munmap(evlist);
+ return -1;
+}
+
+static int
mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
struct perf_mmap_param *mp)
{
@@ -528,6 +572,8 @@ mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
int nr_mmaps = 0;
int cpu, thread;
+ pr_debug("%s: nr cpu values %d nr threads %d\n", __func__, nr_cpus, nr_threads);
+
for (cpu = 0; cpu < nr_cpus; cpu++) {
int output = -1;
int output_overwrite = -1;
@@ -569,6 +615,7 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist,
struct perf_evlist_mmap_ops *ops,
struct perf_mmap_param *mp)
{
+ const struct perf_cpu_map *cpus = evlist->all_cpus;
struct perf_evsel *evsel;
if (!ops || !ops->get || !ops->mmap)
@@ -588,6 +635,9 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist,
if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
return -ENOMEM;
+ if (perf_cpu_map__empty(cpus))
+ return mmap_per_thread(evlist, ops, mp);
+
return mmap_per_cpu(evlist, ops, mp);
}
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index e5921b347153..bd947885a639 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -954,11 +954,11 @@ ifndef NO_LIBBPF
$(call QUIET_INSTALL, bpf-headers) \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf'; \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf/linux'; \
- $(INSTALL) include/bpf/*.h -t '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf'; \
- $(INSTALL) include/bpf/linux/*.h -t '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf/linux'
+ $(INSTALL) include/bpf/*.h -m 644 -t '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf'; \
+ $(INSTALL) include/bpf/linux/*.h -m 644 -t '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf/linux'
$(call QUIET_INSTALL, bpf-examples) \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf'; \
- $(INSTALL) examples/bpf/*.c -t '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf'
+ $(INSTALL) examples/bpf/*.c -m 644 -t '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf'
endif
$(call QUIET_INSTALL, perf-archive) \
$(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
@@ -967,13 +967,13 @@ endif
ifndef NO_LIBAUDIT
$(call QUIET_INSTALL, strace/groups) \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(STRACE_GROUPS_INSTDIR_SQ)'; \
- $(INSTALL) trace/strace/groups/* -t '$(DESTDIR_SQ)$(STRACE_GROUPS_INSTDIR_SQ)'
+ $(INSTALL) trace/strace/groups/* -m 644 -t '$(DESTDIR_SQ)$(STRACE_GROUPS_INSTDIR_SQ)'
endif
ifndef NO_LIBPERL
$(call QUIET_INSTALL, perl-scripts) \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'; \
- $(INSTALL) scripts/perl/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'; \
- $(INSTALL) scripts/perl/*.pl -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl'; \
+ $(INSTALL) scripts/perl/Perf-Trace-Util/lib/Perf/Trace/* -m 644 -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'; \
+ $(INSTALL) scripts/perl/*.pl -m 644 -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl'; \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'; \
$(INSTALL) scripts/perl/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'
endif
@@ -990,23 +990,23 @@ endif
$(INSTALL) $(DLFILTERS) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/dlfilters';
$(call QUIET_INSTALL, perf_completion-script) \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d'; \
- $(INSTALL) perf-completion.sh '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d/perf'
+ $(INSTALL) perf-completion.sh -m 644 '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d/perf'
$(call QUIET_INSTALL, perf-tip) \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(tip_instdir_SQ)'; \
- $(INSTALL) Documentation/tips.txt -t '$(DESTDIR_SQ)$(tip_instdir_SQ)'
+ $(INSTALL) Documentation/tips.txt -m 644 -t '$(DESTDIR_SQ)$(tip_instdir_SQ)'
install-tests: all install-gtk
$(call QUIET_INSTALL, tests) \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \
- $(INSTALL) tests/attr.py '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \
+ $(INSTALL) tests/attr.py -m 644 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \
$(INSTALL) tests/pe-file.exe* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \
- $(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \
+ $(INSTALL) tests/attr/* -m 644 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell'; \
$(INSTALL) tests/shell/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell'; \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/lib'; \
- $(INSTALL) tests/shell/lib/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/lib'; \
- $(INSTALL) tests/shell/lib/*.py '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/lib'
+ $(INSTALL) tests/shell/lib/*.sh -m 644 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/lib'; \
+ $(INSTALL) tests/shell/lib/*.py -m 644 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/lib'
install-bin: install-tools install-tests install-traceevent-plugins
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 653e13b5037e..438fc222e213 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -146,15 +146,15 @@ static void *c2c_he_zalloc(size_t size)
c2c_he->cpuset = bitmap_zalloc(c2c.cpus_cnt);
if (!c2c_he->cpuset)
- return NULL;
+ goto out_free;
c2c_he->nodeset = bitmap_zalloc(c2c.nodes_cnt);
if (!c2c_he->nodeset)
- return NULL;
+ goto out_free;
c2c_he->node_stats = zalloc(c2c.nodes_cnt * sizeof(*c2c_he->node_stats));
if (!c2c_he->node_stats)
- return NULL;
+ goto out_free;
init_stats(&c2c_he->cstats.lcl_hitm);
init_stats(&c2c_he->cstats.rmt_hitm);
@@ -163,6 +163,12 @@ static void *c2c_he_zalloc(size_t size)
init_stats(&c2c_he->cstats.load);
return &c2c_he->he;
+
+out_free:
+ free(c2c_he->nodeset);
+ free(c2c_he->cpuset);
+ free(c2c_he);
+ return NULL;
}
static void c2c_he_free(void *he)
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index dd11d3471baf..ea40ae52cd2c 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -1874,8 +1874,7 @@ int cmd_lock(int argc, const char **argv)
NULL
};
const char *const lock_subcommands[] = { "record", "report", "script",
- "info", "contention",
- "contention", NULL };
+ "info", "contention", NULL };
const char *lock_usage[] = {
NULL,
NULL
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 4713f0f3a6cf..f87ef43eb820 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1906,14 +1906,18 @@ static int record__synthesize(struct record *rec, bool tail)
err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
machine, opts);
- if (err < 0)
+ if (err < 0) {
pr_warning("Couldn't synthesize bpf events.\n");
+ err = 0;
+ }
if (rec->opts.synth & PERF_SYNTH_CGROUP) {
err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
machine);
- if (err < 0)
+ if (err < 0) {
pr_warning("Couldn't synthesize cgroup events.\n");
+ err = 0;
+ }
}
if (rec->opts.nr_threads_synthesize > 1) {
@@ -3358,16 +3362,22 @@ static struct option __record_options[] = {
struct option *record_options = __record_options;
-static void record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus)
+static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus)
{
struct perf_cpu cpu;
int idx;
if (cpu_map__is_dummy(cpus))
- return;
+ return 0;
- perf_cpu_map__for_each_cpu(cpu, idx, cpus)
+ perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
+ /* Return ENODEV is input cpu is greater than max cpu */
+ if ((unsigned long)cpu.cpu > mask->nbits)
+ return -ENODEV;
set_bit(cpu.cpu, mask->bits);
+ }
+
+ return 0;
}
static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec)
@@ -3379,7 +3389,9 @@ static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const cha
return -ENOMEM;
bitmap_zero(mask->bits, mask->nbits);
- record__mmap_cpu_mask_init(mask, cpus);
+ if (record__mmap_cpu_mask_init(mask, cpus))
+ return -ENODEV;
+
perf_cpu_map__put(cpus);
return 0;
@@ -3461,7 +3473,12 @@ static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_ma
pr_err("Failed to allocate CPUs mask\n");
return ret;
}
- record__mmap_cpu_mask_init(&cpus_mask, cpus);
+
+ ret = record__mmap_cpu_mask_init(&cpus_mask, cpus);
+ if (ret) {
+ pr_err("Failed to init cpu mask\n");
+ goto out_free_cpu_mask;
+ }
ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu);
if (ret) {
@@ -3702,7 +3719,8 @@ static int record__init_thread_default_masks(struct record *rec, struct perf_cpu
if (ret)
return ret;
- record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus);
+ if (record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus))
+ return -ENODEV;
rec->nr_threads = 1;
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 13580a9c50b8..029b4330e59b 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -445,6 +445,9 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session)
struct perf_event_attr *attr = &evsel->core.attr;
bool allow_user_set;
+ if (evsel__is_dummy_event(evsel))
+ return 0;
+
if (perf_header__has_feat(&session->header, HEADER_STAT))
return 0;
@@ -566,6 +569,8 @@ static struct evsel *find_first_output_type(struct evlist *evlist,
struct evsel *evsel;
evlist__for_each_entry(evlist, evsel) {
+ if (evsel__is_dummy_event(evsel))
+ continue;
if (output_type(evsel->core.attr.type) == (int)type)
return evsel;
}
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 54cd29d07ca8..0b4a62e4ff67 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1932,6 +1932,9 @@ setup_metrics:
free(str);
}
+ if (!stat_config.topdown_level)
+ stat_config.topdown_level = TOPDOWN_MAX_LEVEL;
+
if (!evsel_list->core.nr_entries) {
if (target__has_cpu(&target))
default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK;
@@ -1948,8 +1951,6 @@ setup_metrics:
}
if (evlist__add_default_attrs(evsel_list, default_attrs1) < 0)
return -1;
-
- stat_config.topdown_level = TOPDOWN_MAX_LEVEL;
/* Platform specific attrs */
if (evlist__add_default_attrs(evsel_list, default_null_attrs) < 0)
return -1;
diff --git a/tools/perf/dlfilters/dlfilter-show-cycles.c b/tools/perf/dlfilters/dlfilter-show-cycles.c
index 9eccc97bff82..6d47298ebe9f 100644
--- a/tools/perf/dlfilters/dlfilter-show-cycles.c
+++ b/tools/perf/dlfilters/dlfilter-show-cycles.c
@@ -98,9 +98,9 @@ int filter_event_early(void *data, const struct perf_dlfilter_sample *sample, vo
static void print_vals(__u64 cycles, __u64 delta)
{
if (delta)
- printf("%10llu %10llu ", cycles, delta);
+ printf("%10llu %10llu ", (unsigned long long)cycles, (unsigned long long)delta);
else
- printf("%10llu %10s ", cycles, "");
+ printf("%10llu %10s ", (unsigned long long)cycles, "");
}
int filter_event(void *data, const struct perf_dlfilter_sample *sample, void *ctx)
diff --git a/tools/perf/util/affinity.c b/tools/perf/util/affinity.c
index 4d216c0dc425..4ee96b3c755b 100644
--- a/tools/perf/util/affinity.c
+++ b/tools/perf/util/affinity.c
@@ -49,8 +49,14 @@ void affinity__set(struct affinity *a, int cpu)
{
int cpu_set_size = get_cpu_set_size();
- if (cpu == -1)
+ /*
+ * Return:
+ * - if cpu is -1
+ * - restrict out of bound access to sched_cpus
+ */
+ if (cpu == -1 || ((cpu >= (cpu_set_size * 8))))
return;
+
a->changed = true;
set_bit(cpu, a->sched_cpus);
/*
diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c
index 953338b9e887..ed28a0dbcb7f 100644
--- a/tools/perf/util/genelf.c
+++ b/tools/perf/util/genelf.c
@@ -30,10 +30,6 @@
#define BUILD_ID_URANDOM /* different uuid for each run */
-// FIXME, remove this and fix the deprecation warnings before its removed and
-// We'll break for good here...
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-
#ifdef HAVE_LIBCRYPTO_SUPPORT
#define BUILD_ID_MD5
@@ -45,6 +41,7 @@
#endif
#ifdef BUILD_ID_MD5
+#include <openssl/evp.h>
#include <openssl/md5.h>
#endif
#endif
@@ -142,15 +139,20 @@ gen_build_id(struct buildid_note *note,
static void
gen_build_id(struct buildid_note *note, unsigned long load_addr, const void *code, size_t csize)
{
- MD5_CTX context;
+ EVP_MD_CTX *mdctx;
if (sizeof(note->build_id) < 16)
errx(1, "build_id too small for MD5");
- MD5_Init(&context);
- MD5_Update(&context, &load_addr, sizeof(load_addr));
- MD5_Update(&context, code, csize);
- MD5_Final((unsigned char *)note->build_id, &context);
+ mdctx = EVP_MD_CTX_new();
+ if (!mdctx)
+ errx(2, "failed to create EVP_MD_CTX");
+
+ EVP_DigestInit_ex(mdctx, EVP_md5(), NULL);
+ EVP_DigestUpdate(mdctx, &load_addr, sizeof(load_addr));
+ EVP_DigestUpdate(mdctx, code, csize);
+ EVP_DigestFinal_ex(mdctx, (unsigned char *)note->build_id, NULL);
+ EVP_MD_CTX_free(mdctx);
}
#endif
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 464475fd6b9a..c93bcaf6d55d 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -1655,6 +1655,9 @@ int metricgroup__parse_groups(const struct option *opt,
struct evlist *perf_evlist = *(struct evlist **)opt->value;
const struct pmu_events_table *table = pmu_events_table__find();
+ if (!table)
+ return -EINVAL;
+
return parse_groups(perf_evlist, str, metric_no_group,
metric_no_merge, NULL, metric_events, table);
}
diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index e33cab34d22f..db9810611788 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -65,3 +65,4 @@ send_signal # intermittently fails to receive signa
select_reuseport # intermittently fails on new s390x setup
xdp_synproxy # JIT does not support calling kernel function (kfunc)
unpriv_bpf_disabled # fentry
+lru_bug # prog 'printk': failed to auto-attach: -524
diff --git a/tools/testing/selftests/bpf/verifier/precise.c b/tools/testing/selftests/bpf/verifier/precise.c
index 9e754423fa8b..6c03a7d805f9 100644
--- a/tools/testing/selftests/bpf/verifier/precise.c
+++ b/tools/testing/selftests/bpf/verifier/precise.c
@@ -192,3 +192,28 @@
.result = VERBOSE_ACCEPT,
.retval = -1,
},
+{
+ "precise: mark_chain_precision for ARG_CONST_ALLOC_SIZE_OR_ZERO",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, offsetof(struct xdp_md, ingress_ifindex)),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_2, 1),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_2, 0x1000),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 42),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_ringbuf = { 1 },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = BPF_F_TEST_STATE_FREQ,
+ .errstr = "invalid access to memory, mem_size=1 off=42 size=8",
+ .result = REJECT,
+},
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 87d1a0b1bae0..2f0d705db9db 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -29,6 +29,7 @@
/x86_64/max_vcpuid_cap_test
/x86_64/mmio_warning_test
/x86_64/monitor_mwait_test
+/x86_64/nested_exceptions_test
/x86_64/nx_huge_pages_test
/x86_64/platform_info_test
/x86_64/pmu_event_filter_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 784abe7f0962..0172eb6cb6ee 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -48,6 +48,8 @@ LIBKVM += lib/rbtree.c
LIBKVM += lib/sparsebit.c
LIBKVM += lib/test_util.c
+LIBKVM_STRING += lib/string_override.c
+
LIBKVM_x86_64 += lib/x86_64/apic.c
LIBKVM_x86_64 += lib/x86_64/handlers.S
LIBKVM_x86_64 += lib/x86_64/perf_test_util.c
@@ -89,6 +91,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/kvm_clock_test
TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test
TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test
TEST_GEN_PROGS_x86_64 += x86_64/monitor_mwait_test
+TEST_GEN_PROGS_x86_64 += x86_64/nested_exceptions_test
TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
TEST_GEN_PROGS_x86_64 += x86_64/pmu_event_filter_test
TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id
@@ -221,7 +224,8 @@ LIBKVM_C := $(filter %.c,$(LIBKVM))
LIBKVM_S := $(filter %.S,$(LIBKVM))
LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C))
LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S))
-LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ)
+LIBKVM_STRING_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_STRING))
+LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(LIBKVM_STRING_OBJ)
EXTRA_CLEAN += $(LIBKVM_OBJS) cscope.*
@@ -232,6 +236,12 @@ $(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c
$(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S
$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
+# Compile the string overrides as freestanding to prevent the compiler from
+# generating self-referential code, e.g. without "freestanding" the compiler may
+# "optimize" memcmp() by invoking memcmp(), thus causing infinite recursion.
+$(LIBKVM_STRING_OBJ): $(OUTPUT)/%.o: %.c
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c -ffreestanding $< -o $@
+
x := $(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS))))
$(TEST_GEN_PROGS): $(LIBKVM_OBJS)
$(TEST_GEN_PROGS_EXTENDED): $(LIBKVM_OBJS)
diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c
index 1c2749b1481a..76c583a07ea2 100644
--- a/tools/testing/selftests/kvm/access_tracking_perf_test.c
+++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c
@@ -31,8 +31,9 @@
* These limitations are worked around in this test by using a large enough
* region of memory for each vCPU such that the number of translations cached in
* the TLB and the number of pages held in pagevecs are a small fraction of the
- * overall workload. And if either of those conditions are not true this test
- * will fail rather than silently passing.
+ * overall workload. And if either of those conditions are not true (for example
+ * in nesting, where TLB size is unlimited) this test will print a warning
+ * rather than silently passing.
*/
#include <inttypes.h>
#include <limits.h>
@@ -172,17 +173,23 @@ static void mark_vcpu_memory_idle(struct kvm_vm *vm,
vcpu_idx, no_pfn, pages);
/*
- * Test that at least 90% of memory has been marked idle (the rest might
- * not be marked idle because the pages have not yet made it to an LRU
- * list or the translations are still cached in the TLB). 90% is
+ * Check that at least 90% of memory has been marked idle (the rest
+ * might not be marked idle because the pages have not yet made it to an
+ * LRU list or the translations are still cached in the TLB). 90% is
* arbitrary; high enough that we ensure most memory access went through
* access tracking but low enough as to not make the test too brittle
* over time and across architectures.
+ *
+ * Note that when run in nested virtualization, this check will trigger
+ * much more frequently because TLB size is unlimited and since no flush
+ * happens, much more pages are cached there and guest won't see the
+ * "idle" bit cleared.
*/
- TEST_ASSERT(still_idle < pages / 10,
- "vCPU%d: Too many pages still idle (%"PRIu64 " out of %"
- PRIu64 ").\n",
- vcpu_idx, still_idle, pages);
+ if (still_idle < pages / 10)
+ printf("WARNING: vCPU%d: Too many pages still idle (%" PRIu64
+ "out of %" PRIu64 "), this will affect performance results"
+ ".\n",
+ vcpu_idx, still_idle, pages);
close(page_idle_fd);
close(pagemap_fd);
diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h
index 24fde97f6121..e42a09cd24a0 100644
--- a/tools/testing/selftests/kvm/include/kvm_util_base.h
+++ b/tools/testing/selftests/kvm/include/kvm_util_base.h
@@ -175,6 +175,10 @@ extern const struct vm_guest_mode_params vm_guest_mode_params[];
int open_path_or_exit(const char *path, int flags);
int open_kvm_dev_path_or_exit(void);
+
+bool get_kvm_intel_param_bool(const char *param);
+bool get_kvm_amd_param_bool(const char *param);
+
unsigned int kvm_check_cap(long cap);
static inline bool kvm_has_cap(long cap)
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index 5c5a88180b6c..befc754ce9b3 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -63,8 +63,10 @@ void test_assert(bool exp, const char *exp_str,
#a, #b, #a, (unsigned long) __a, #b, (unsigned long) __b); \
} while (0)
-#define TEST_FAIL(fmt, ...) \
- TEST_ASSERT(false, fmt, ##__VA_ARGS__)
+#define TEST_FAIL(fmt, ...) do { \
+ TEST_ASSERT(false, fmt, ##__VA_ARGS__); \
+ __builtin_unreachable(); \
+} while (0)
size_t parse_size(const char *size);
diff --git a/tools/testing/selftests/kvm/include/x86_64/evmcs.h b/tools/testing/selftests/kvm/include/x86_64/evmcs.h
index 3c9260f8e116..58db74f68af2 100644
--- a/tools/testing/selftests/kvm/include/x86_64/evmcs.h
+++ b/tools/testing/selftests/kvm/include/x86_64/evmcs.h
@@ -203,14 +203,25 @@ struct hv_enlightened_vmcs {
u32 reserved:30;
} hv_enlightenments_control;
u32 hv_vp_id;
-
+ u32 padding32_2;
u64 hv_vm_id;
u64 partition_assist_page;
u64 padding64_4[4];
u64 guest_bndcfgs;
- u64 padding64_5[7];
+ u64 guest_ia32_perf_global_ctrl;
+ u64 guest_ia32_s_cet;
+ u64 guest_ssp;
+ u64 guest_ia32_int_ssp_table_addr;
+ u64 guest_ia32_lbr_ctl;
+ u64 padding64_5[2];
u64 xss_exit_bitmap;
- u64 padding64_6[7];
+ u64 encls_exiting_bitmap;
+ u64 host_ia32_perf_global_ctrl;
+ u64 tsc_multiplier;
+ u64 host_ia32_s_cet;
+ u64 host_ssp;
+ u64 host_ia32_int_ssp_table_addr;
+ u64 padding64_6;
};
#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE 0
@@ -656,6 +667,18 @@ static inline int evmcs_vmread(uint64_t encoding, uint64_t *value)
case VIRTUAL_PROCESSOR_ID:
*value = current_evmcs->virtual_processor_id;
break;
+ case HOST_IA32_PERF_GLOBAL_CTRL:
+ *value = current_evmcs->host_ia32_perf_global_ctrl;
+ break;
+ case GUEST_IA32_PERF_GLOBAL_CTRL:
+ *value = current_evmcs->guest_ia32_perf_global_ctrl;
+ break;
+ case ENCLS_EXITING_BITMAP:
+ *value = current_evmcs->encls_exiting_bitmap;
+ break;
+ case TSC_MULTIPLIER:
+ *value = current_evmcs->tsc_multiplier;
+ break;
default: return 1;
}
@@ -1169,6 +1192,22 @@ static inline int evmcs_vmwrite(uint64_t encoding, uint64_t value)
current_evmcs->virtual_processor_id = value;
current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT;
break;
+ case HOST_IA32_PERF_GLOBAL_CTRL:
+ current_evmcs->host_ia32_perf_global_ctrl = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+ break;
+ case GUEST_IA32_PERF_GLOBAL_CTRL:
+ current_evmcs->guest_ia32_perf_global_ctrl = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+ break;
+ case ENCLS_EXITING_BITMAP:
+ current_evmcs->encls_exiting_bitmap = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2;
+ break;
+ case TSC_MULTIPLIER:
+ current_evmcs->tsc_multiplier = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2;
+ break;
default: return 1;
}
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 45edf45821d0..e8ca0d8a6a7e 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -754,7 +754,7 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector,
void (*handler)(struct ex_regs *));
/* If a toddler were to say "abracadabra". */
-#define KVM_EXCEPTION_MAGIC 0xabacadabaull
+#define KVM_EXCEPTION_MAGIC 0xabacadabaULL
/*
* KVM selftest exception fixup uses registers to coordinate with the exception
@@ -786,7 +786,7 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector,
"lea 1f(%%rip), %%r10\n\t" \
"lea 2f(%%rip), %%r11\n\t" \
"1: " insn "\n\t" \
- "mov $0, %[vector]\n\t" \
+ "movb $0, %[vector]\n\t" \
"jmp 3f\n\t" \
"2:\n\t" \
"mov %%r9b, %[vector]\n\t" \
@@ -825,6 +825,8 @@ static inline uint8_t wrmsr_safe(uint32_t msr, uint64_t val)
return kvm_asm_safe("wrmsr", "a"(val & -1u), "d"(val >> 32), "c"(msr));
}
+bool kvm_is_tdp_enabled(void);
+
uint64_t vm_get_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
uint64_t vaddr);
void vm_set_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
@@ -855,6 +857,8 @@ enum pg_level {
#define PG_SIZE_1G PG_LEVEL_SIZE(PG_LEVEL_1G)
void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level);
+void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ uint64_t nr_bytes, int level);
/*
* Basic CPU control in CR0
diff --git a/tools/testing/selftests/kvm/include/x86_64/svm_util.h b/tools/testing/selftests/kvm/include/x86_64/svm_util.h
index a339b537a575..7aee6244ab6a 100644
--- a/tools/testing/selftests/kvm/include/x86_64/svm_util.h
+++ b/tools/testing/selftests/kvm/include/x86_64/svm_util.h
@@ -9,15 +9,12 @@
#ifndef SELFTEST_KVM_SVM_UTILS_H
#define SELFTEST_KVM_SVM_UTILS_H
+#include <asm/svm.h>
+
#include <stdint.h>
#include "svm.h"
#include "processor.h"
-#define SVM_EXIT_EXCP_BASE 0x040
-#define SVM_EXIT_HLT 0x078
-#define SVM_EXIT_MSR 0x07c
-#define SVM_EXIT_VMMCALL 0x081
-
struct svm_test_data {
/* VMCB */
struct vmcb *vmcb; /* gva */
diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h
index 99fa1410964c..71b290b6469d 100644
--- a/tools/testing/selftests/kvm/include/x86_64/vmx.h
+++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h
@@ -8,6 +8,8 @@
#ifndef SELFTEST_KVM_VMX_H
#define SELFTEST_KVM_VMX_H
+#include <asm/vmx.h>
+
#include <stdint.h>
#include "processor.h"
#include "apic.h"
@@ -100,55 +102,6 @@
#define VMX_EPT_VPID_CAP_AD_BITS 0x00200000
#define EXIT_REASON_FAILED_VMENTRY 0x80000000
-#define EXIT_REASON_EXCEPTION_NMI 0
-#define EXIT_REASON_EXTERNAL_INTERRUPT 1
-#define EXIT_REASON_TRIPLE_FAULT 2
-#define EXIT_REASON_INTERRUPT_WINDOW 7
-#define EXIT_REASON_NMI_WINDOW 8
-#define EXIT_REASON_TASK_SWITCH 9
-#define EXIT_REASON_CPUID 10
-#define EXIT_REASON_HLT 12
-#define EXIT_REASON_INVD 13
-#define EXIT_REASON_INVLPG 14
-#define EXIT_REASON_RDPMC 15
-#define EXIT_REASON_RDTSC 16
-#define EXIT_REASON_VMCALL 18
-#define EXIT_REASON_VMCLEAR 19
-#define EXIT_REASON_VMLAUNCH 20
-#define EXIT_REASON_VMPTRLD 21
-#define EXIT_REASON_VMPTRST 22
-#define EXIT_REASON_VMREAD 23
-#define EXIT_REASON_VMRESUME 24
-#define EXIT_REASON_VMWRITE 25
-#define EXIT_REASON_VMOFF 26
-#define EXIT_REASON_VMON 27
-#define EXIT_REASON_CR_ACCESS 28
-#define EXIT_REASON_DR_ACCESS 29
-#define EXIT_REASON_IO_INSTRUCTION 30
-#define EXIT_REASON_MSR_READ 31
-#define EXIT_REASON_MSR_WRITE 32
-#define EXIT_REASON_INVALID_STATE 33
-#define EXIT_REASON_MWAIT_INSTRUCTION 36
-#define EXIT_REASON_MONITOR_INSTRUCTION 39
-#define EXIT_REASON_PAUSE_INSTRUCTION 40
-#define EXIT_REASON_MCE_DURING_VMENTRY 41
-#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
-#define EXIT_REASON_APIC_ACCESS 44
-#define EXIT_REASON_EOI_INDUCED 45
-#define EXIT_REASON_EPT_VIOLATION 48
-#define EXIT_REASON_EPT_MISCONFIG 49
-#define EXIT_REASON_INVEPT 50
-#define EXIT_REASON_RDTSCP 51
-#define EXIT_REASON_PREEMPTION_TIMER 52
-#define EXIT_REASON_INVVPID 53
-#define EXIT_REASON_WBINVD 54
-#define EXIT_REASON_XSETBV 55
-#define EXIT_REASON_APIC_WRITE 56
-#define EXIT_REASON_INVPCID 58
-#define EXIT_REASON_PML_FULL 62
-#define EXIT_REASON_XSAVES 63
-#define EXIT_REASON_XRSTORS 64
-#define LAST_EXIT_REASON 64
enum vmcs_field {
VIRTUAL_PROCESSOR_ID = 0x00000000,
@@ -208,6 +161,8 @@ enum vmcs_field {
VMWRITE_BITMAP_HIGH = 0x00002029,
XSS_EXIT_BITMAP = 0x0000202C,
XSS_EXIT_BITMAP_HIGH = 0x0000202D,
+ ENCLS_EXITING_BITMAP = 0x0000202E,
+ ENCLS_EXITING_BITMAP_HIGH = 0x0000202F,
TSC_MULTIPLIER = 0x00002032,
TSC_MULTIPLIER_HIGH = 0x00002033,
GUEST_PHYSICAL_ADDRESS = 0x00002400,
@@ -617,6 +572,7 @@ void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
uint32_t memslot);
void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm,
uint64_t addr, uint64_t size);
+bool kvm_vm_has_ept(struct kvm_vm *vm);
void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
uint32_t eptp_memslot);
void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm);
diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c
index 71ade6100fd3..2bd25b191d15 100644
--- a/tools/testing/selftests/kvm/lib/assert.c
+++ b/tools/testing/selftests/kvm/lib/assert.c
@@ -22,7 +22,7 @@ static void test_dump_stack(void)
* Build and run this command:
*
* addr2line -s -e /proc/$PPID/exe -fpai {backtrace addresses} | \
- * grep -v test_dump_stack | cat -n 1>&2
+ * cat -n 1>&2
*
* Note that the spacing is different and there's no newline.
*/
@@ -36,18 +36,24 @@ static void test_dump_stack(void)
n * (((sizeof(void *)) * 2) + 1) +
/* Null terminator: */
1];
- char *c;
+ char *c = cmd;
n = backtrace(stack, n);
- c = &cmd[0];
- c += sprintf(c, "%s", addr2line);
/*
- * Skip the first 3 frames: backtrace, test_dump_stack, and
- * test_assert. We hope that backtrace isn't inlined and the other two
- * we've declared noinline.
+ * Skip the first 2 frames, which should be test_dump_stack() and
+ * test_assert(); both of which are declared noinline. Bail if the
+ * resulting stack trace would be empty. Otherwise, addr2line will block
+ * waiting for addresses to be passed in via stdin.
*/
+ if (n <= 2) {
+ fputs(" (stack trace empty)\n", stderr);
+ return;
+ }
+
+ c += sprintf(c, "%s", addr2line);
for (i = 2; i < n; i++)
c += sprintf(c, " %lx", ((unsigned long) stack[i]) - 1);
+
c += sprintf(c, "%s", pipeline);
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-result"
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 411a4c0bc81c..f1cb1627161f 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -50,6 +50,45 @@ int open_kvm_dev_path_or_exit(void)
return _open_kvm_dev_path_or_exit(O_RDONLY);
}
+static bool get_module_param_bool(const char *module_name, const char *param)
+{
+ const int path_size = 128;
+ char path[path_size];
+ char value;
+ ssize_t r;
+ int fd;
+
+ r = snprintf(path, path_size, "/sys/module/%s/parameters/%s",
+ module_name, param);
+ TEST_ASSERT(r < path_size,
+ "Failed to construct sysfs path in %d bytes.", path_size);
+
+ fd = open_path_or_exit(path, O_RDONLY);
+
+ r = read(fd, &value, 1);
+ TEST_ASSERT(r == 1, "read(%s) failed", path);
+
+ r = close(fd);
+ TEST_ASSERT(!r, "close(%s) failed", path);
+
+ if (value == 'Y')
+ return true;
+ else if (value == 'N')
+ return false;
+
+ TEST_FAIL("Unrecognized value '%c' for boolean module param", value);
+}
+
+bool get_kvm_intel_param_bool(const char *param)
+{
+ return get_module_param_bool("kvm_intel", param);
+}
+
+bool get_kvm_amd_param_bool(const char *param)
+{
+ return get_module_param_bool("kvm_amd", param);
+}
+
/*
* Capability
*
diff --git a/tools/testing/selftests/kvm/lib/string_override.c b/tools/testing/selftests/kvm/lib/string_override.c
new file mode 100644
index 000000000000..632398adc229
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/string_override.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <stddef.h>
+
+/*
+ * Override the "basic" built-in string helpers so that they can be used in
+ * guest code. KVM selftests don't support dynamic loading in guest code and
+ * will jump into the weeds if the compiler decides to insert an out-of-line
+ * call via the PLT.
+ */
+int memcmp(const void *cs, const void *ct, size_t count)
+{
+ const unsigned char *su1, *su2;
+ int res = 0;
+
+ for (su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--) {
+ if ((res = *su1 - *su2) != 0)
+ break;
+ }
+ return res;
+}
+
+void *memcpy(void *dest, const void *src, size_t count)
+{
+ char *tmp = dest;
+ const char *s = src;
+
+ while (count--)
+ *tmp++ = *s++;
+ return dest;
+}
+
+void *memset(void *s, int c, size_t count)
+{
+ char *xs = s;
+
+ while (count--)
+ *xs++ = c;
+ return s;
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index 2e6e61bbe81b..39c4409ef56a 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -111,6 +111,14 @@ static void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent)
}
}
+bool kvm_is_tdp_enabled(void)
+{
+ if (is_intel_cpu())
+ return get_kvm_intel_param_bool("ept");
+ else
+ return get_kvm_amd_param_bool("npt");
+}
+
void virt_arch_pgd_alloc(struct kvm_vm *vm)
{
TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
@@ -214,6 +222,25 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
__virt_pg_map(vm, vaddr, paddr, PG_LEVEL_4K);
}
+void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ uint64_t nr_bytes, int level)
+{
+ uint64_t pg_size = PG_LEVEL_SIZE(level);
+ uint64_t nr_pages = nr_bytes / pg_size;
+ int i;
+
+ TEST_ASSERT(nr_bytes % pg_size == 0,
+ "Region size not aligned: nr_bytes: 0x%lx, page size: 0x%lx",
+ nr_bytes, pg_size);
+
+ for (i = 0; i < nr_pages; i++) {
+ __virt_pg_map(vm, vaddr, paddr, level);
+
+ vaddr += pg_size;
+ paddr += pg_size;
+ }
+}
+
static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm,
struct kvm_vcpu *vcpu,
uint64_t vaddr)
@@ -1294,20 +1321,9 @@ done:
/* Returns true if kvm_intel was loaded with unrestricted_guest=1. */
bool vm_is_unrestricted_guest(struct kvm_vm *vm)
{
- char val = 'N';
- size_t count;
- FILE *f;
-
/* Ensure that a KVM vendor-specific module is loaded. */
if (vm == NULL)
close(open_kvm_dev_path_or_exit());
- f = fopen("/sys/module/kvm_intel/parameters/unrestricted_guest", "r");
- if (f) {
- count = fread(&val, sizeof(char), 1, f);
- TEST_ASSERT(count == 1, "Unable to read from param file.");
- fclose(f);
- }
-
- return val == 'Y';
+ return get_kvm_intel_param_bool("unrestricted_guest");
}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/svm.c b/tools/testing/selftests/kvm/lib/x86_64/svm.c
index 6d445886e16c..5495a92dfd5a 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/svm.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/svm.c
@@ -60,18 +60,6 @@ static void vmcb_set_seg(struct vmcb_seg *seg, u16 selector,
seg->base = base;
}
-/*
- * Avoid using memset to clear the vmcb, since libc may not be
- * available in L1 (and, even if it is, features that libc memset may
- * want to use, like AVX, may not be enabled).
- */
-static void clear_vmcb(struct vmcb *vmcb)
-{
- int n = sizeof(*vmcb) / sizeof(u32);
-
- asm volatile ("rep stosl" : "+c"(n), "+D"(vmcb) : "a"(0) : "memory");
-}
-
void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp)
{
struct vmcb *vmcb = svm->vmcb;
@@ -88,7 +76,7 @@ void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_r
wrmsr(MSR_EFER, efer | EFER_SVME);
wrmsr(MSR_VM_HSAVE_PA, svm->save_area_gpa);
- clear_vmcb(vmcb);
+ memset(vmcb, 0, sizeof(*vmcb));
asm volatile ("vmsave %0\n\t" : : "a" (vmcb_gpa) : "memory");
vmcb_set_seg(&save->es, get_es(), 0, -1U, data_seg_attr);
vmcb_set_seg(&save->cs, get_cs(), 0, -1U, code_seg_attr);
diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
index 80a568c439b8..d21049c38fc5 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
@@ -5,6 +5,8 @@
* Copyright (C) 2018, Google LLC.
*/
+#include <asm/msr-index.h>
+
#include "test_util.h"
#include "kvm_util.h"
#include "processor.h"
@@ -542,9 +544,27 @@ void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm,
__nested_map(vmx, vm, addr, addr, size, PG_LEVEL_1G);
}
+bool kvm_vm_has_ept(struct kvm_vm *vm)
+{
+ struct kvm_vcpu *vcpu;
+ uint64_t ctrl;
+
+ vcpu = list_first_entry(&vm->vcpus, struct kvm_vcpu, list);
+ TEST_ASSERT(vcpu, "Cannot determine EPT support without vCPUs.\n");
+
+ ctrl = vcpu_get_msr(vcpu, MSR_IA32_VMX_TRUE_PROCBASED_CTLS) >> 32;
+ if (!(ctrl & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
+ return false;
+
+ ctrl = vcpu_get_msr(vcpu, MSR_IA32_VMX_PROCBASED_CTLS2) >> 32;
+ return ctrl & SECONDARY_EXEC_ENABLE_EPT;
+}
+
void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
uint32_t eptp_memslot)
{
+ TEST_REQUIRE(kvm_vm_has_ept(vm));
+
vmx->eptp = (void *)vm_vaddr_alloc_page(vm);
vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp);
vmx->eptp_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->eptp);
diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c
index fac248a43666..6f88da7e60be 100644
--- a/tools/testing/selftests/kvm/rseq_test.c
+++ b/tools/testing/selftests/kvm/rseq_test.c
@@ -227,7 +227,7 @@ int main(int argc, char *argv[])
ucall_init(vm, NULL);
pthread_create(&migration_thread, NULL, migration_worker,
- (void *)(unsigned long)gettid());
+ (void *)(unsigned long)syscall(SYS_gettid));
for (i = 0; !done; i++) {
vcpu_run(vcpu);
diff --git a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c b/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c
index b1905d280ef5..32f7e09ef67c 100644
--- a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c
+++ b/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c
@@ -14,89 +14,73 @@
#include "kvm_util.h"
#include "processor.h"
-static bool ud_expected;
+/* VMCALL and VMMCALL are both 3-byte opcodes. */
+#define HYPERCALL_INSN_SIZE 3
+
+static bool quirk_disabled;
static void guest_ud_handler(struct ex_regs *regs)
{
- GUEST_ASSERT(ud_expected);
- GUEST_DONE();
+ regs->rax = -EFAULT;
+ regs->rip += HYPERCALL_INSN_SIZE;
}
-extern unsigned char svm_hypercall_insn;
-static uint64_t svm_do_sched_yield(uint8_t apic_id)
-{
- uint64_t ret;
-
- asm volatile("mov %1, %%rax\n\t"
- "mov %2, %%rbx\n\t"
- "svm_hypercall_insn:\n\t"
- "vmmcall\n\t"
- "mov %%rax, %0\n\t"
- : "=r"(ret)
- : "r"((uint64_t)KVM_HC_SCHED_YIELD), "r"((uint64_t)apic_id)
- : "rax", "rbx", "memory");
+static const uint8_t vmx_vmcall[HYPERCALL_INSN_SIZE] = { 0x0f, 0x01, 0xc1 };
+static const uint8_t svm_vmmcall[HYPERCALL_INSN_SIZE] = { 0x0f, 0x01, 0xd9 };
- return ret;
-}
-
-extern unsigned char vmx_hypercall_insn;
-static uint64_t vmx_do_sched_yield(uint8_t apic_id)
+extern uint8_t hypercall_insn[HYPERCALL_INSN_SIZE];
+static uint64_t do_sched_yield(uint8_t apic_id)
{
uint64_t ret;
- asm volatile("mov %1, %%rax\n\t"
- "mov %2, %%rbx\n\t"
- "vmx_hypercall_insn:\n\t"
- "vmcall\n\t"
- "mov %%rax, %0\n\t"
- : "=r"(ret)
- : "r"((uint64_t)KVM_HC_SCHED_YIELD), "r"((uint64_t)apic_id)
- : "rax", "rbx", "memory");
+ asm volatile("hypercall_insn:\n\t"
+ ".byte 0xcc,0xcc,0xcc\n\t"
+ : "=a"(ret)
+ : "a"((uint64_t)KVM_HC_SCHED_YIELD), "b"((uint64_t)apic_id)
+ : "memory");
return ret;
}
-static void assert_hypercall_insn(unsigned char *exp_insn, unsigned char *obs_insn)
-{
- uint32_t exp = 0, obs = 0;
-
- memcpy(&exp, exp_insn, sizeof(exp));
- memcpy(&obs, obs_insn, sizeof(obs));
-
- GUEST_ASSERT_EQ(exp, obs);
-}
-
static void guest_main(void)
{
- unsigned char *native_hypercall_insn, *hypercall_insn;
- uint8_t apic_id;
-
- apic_id = GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID));
+ const uint8_t *native_hypercall_insn;
+ const uint8_t *other_hypercall_insn;
+ uint64_t ret;
if (is_intel_cpu()) {
- native_hypercall_insn = &vmx_hypercall_insn;
- hypercall_insn = &svm_hypercall_insn;
- svm_do_sched_yield(apic_id);
+ native_hypercall_insn = vmx_vmcall;
+ other_hypercall_insn = svm_vmmcall;
} else if (is_amd_cpu()) {
- native_hypercall_insn = &svm_hypercall_insn;
- hypercall_insn = &vmx_hypercall_insn;
- vmx_do_sched_yield(apic_id);
+ native_hypercall_insn = svm_vmmcall;
+ other_hypercall_insn = vmx_vmcall;
} else {
GUEST_ASSERT(0);
/* unreachable */
return;
}
- GUEST_ASSERT(!ud_expected);
- assert_hypercall_insn(native_hypercall_insn, hypercall_insn);
- GUEST_DONE();
-}
+ memcpy(hypercall_insn, other_hypercall_insn, HYPERCALL_INSN_SIZE);
-static void setup_ud_vector(struct kvm_vcpu *vcpu)
-{
- vm_init_descriptor_tables(vcpu->vm);
- vcpu_init_descriptor_tables(vcpu);
- vm_install_exception_handler(vcpu->vm, UD_VECTOR, guest_ud_handler);
+ ret = do_sched_yield(GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID)));
+
+ /*
+ * If the quirk is disabled, verify that guest_ud_handler() "returned"
+ * -EFAULT and that KVM did NOT patch the hypercall. If the quirk is
+ * enabled, verify that the hypercall succeeded and that KVM patched in
+ * the "right" hypercall.
+ */
+ if (quirk_disabled) {
+ GUEST_ASSERT(ret == (uint64_t)-EFAULT);
+ GUEST_ASSERT(!memcmp(other_hypercall_insn, hypercall_insn,
+ HYPERCALL_INSN_SIZE));
+ } else {
+ GUEST_ASSERT(!ret);
+ GUEST_ASSERT(!memcmp(native_hypercall_insn, hypercall_insn,
+ HYPERCALL_INSN_SIZE));
+ }
+
+ GUEST_DONE();
}
static void enter_guest(struct kvm_vcpu *vcpu)
@@ -119,35 +103,23 @@ static void enter_guest(struct kvm_vcpu *vcpu)
}
}
-static void test_fix_hypercall(void)
+static void test_fix_hypercall(bool disable_quirk)
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
vm = vm_create_with_one_vcpu(&vcpu, guest_main);
- setup_ud_vector(vcpu);
-
- ud_expected = false;
- sync_global_to_guest(vm, ud_expected);
-
- virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
-
- enter_guest(vcpu);
-}
-
-static void test_fix_hypercall_disabled(void)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
- vm = vm_create_with_one_vcpu(&vcpu, guest_main);
- setup_ud_vector(vcpu);
+ vm_init_descriptor_tables(vcpu->vm);
+ vcpu_init_descriptor_tables(vcpu);
+ vm_install_exception_handler(vcpu->vm, UD_VECTOR, guest_ud_handler);
- vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2,
- KVM_X86_QUIRK_FIX_HYPERCALL_INSN);
+ if (disable_quirk)
+ vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2,
+ KVM_X86_QUIRK_FIX_HYPERCALL_INSN);
- ud_expected = true;
- sync_global_to_guest(vm, ud_expected);
+ quirk_disabled = disable_quirk;
+ sync_global_to_guest(vm, quirk_disabled);
virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
@@ -158,6 +130,6 @@ int main(void)
{
TEST_REQUIRE(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_FIX_HYPERCALL_INSN);
- test_fix_hypercall();
- test_fix_hypercall_disabled();
+ test_fix_hypercall(false);
+ test_fix_hypercall(true);
}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
index 79ab0152d281..05b32e550a80 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
@@ -26,7 +26,8 @@ static inline uint8_t hypercall(u64 control, vm_vaddr_t input_address,
: "=a" (*hv_status),
"+c" (control), "+d" (input_address),
KVM_ASM_SAFE_OUTPUTS(vector)
- : [output_address] "r"(output_address)
+ : [output_address] "r"(output_address),
+ "a" (-EFAULT)
: "cc", "memory", "r8", KVM_ASM_SAFE_CLOBBERS);
return vector;
}
@@ -81,13 +82,13 @@ static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall)
}
vector = hypercall(hcall->control, input, output, &res);
- if (hcall->ud_expected)
+ if (hcall->ud_expected) {
GUEST_ASSERT_2(vector == UD_VECTOR, hcall->control, vector);
- else
+ } else {
GUEST_ASSERT_2(!vector, hcall->control, vector);
+ GUEST_ASSERT_2(res == hcall->expect, hcall->expect, res);
+ }
- GUEST_ASSERT_2(!hcall->ud_expected || res == hcall->expect,
- hcall->expect, res);
GUEST_DONE();
}
@@ -507,7 +508,7 @@ static void guest_test_hcalls_access(void)
switch (stage) {
case 0:
feat->eax |= HV_MSR_HYPERCALL_AVAILABLE;
- hcall->control = 0xdeadbeef;
+ hcall->control = 0xbeef;
hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE;
break;
diff --git a/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c b/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c
new file mode 100644
index 000000000000..ac33835f78f4
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c
@@ -0,0 +1,295 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#define _GNU_SOURCE /* for program_invocation_short_name */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+#include "svm_util.h"
+
+#define L2_GUEST_STACK_SIZE 256
+
+/*
+ * Arbitrary, never shoved into KVM/hardware, just need to avoid conflict with
+ * the "real" exceptions used, #SS/#GP/#DF (12/13/8).
+ */
+#define FAKE_TRIPLE_FAULT_VECTOR 0xaa
+
+/* Arbitrary 32-bit error code injected by this test. */
+#define SS_ERROR_CODE 0xdeadbeef
+
+/*
+ * Bit '0' is set on Intel if the exception occurs while delivering a previous
+ * event/exception. AMD's wording is ambiguous, but presumably the bit is set
+ * if the exception occurs while delivering an external event, e.g. NMI or INTR,
+ * but not for exceptions that occur when delivering other exceptions or
+ * software interrupts.
+ *
+ * Note, Intel's name for it, "External event", is misleading and much more
+ * aligned with AMD's behavior, but the SDM is quite clear on its behavior.
+ */
+#define ERROR_CODE_EXT_FLAG BIT(0)
+
+/*
+ * Bit '1' is set if the fault occurred when looking up a descriptor in the
+ * IDT, which is the case here as the IDT is empty/NULL.
+ */
+#define ERROR_CODE_IDT_FLAG BIT(1)
+
+/*
+ * The #GP that occurs when vectoring #SS should show the index into the IDT
+ * for #SS, plus have the "IDT flag" set.
+ */
+#define GP_ERROR_CODE_AMD ((SS_VECTOR * 8) | ERROR_CODE_IDT_FLAG)
+#define GP_ERROR_CODE_INTEL ((SS_VECTOR * 8) | ERROR_CODE_IDT_FLAG | ERROR_CODE_EXT_FLAG)
+
+/*
+ * Intel and AMD both shove '0' into the error code on #DF, regardless of what
+ * led to the double fault.
+ */
+#define DF_ERROR_CODE 0
+
+#define INTERCEPT_SS (BIT_ULL(SS_VECTOR))
+#define INTERCEPT_SS_DF (INTERCEPT_SS | BIT_ULL(DF_VECTOR))
+#define INTERCEPT_SS_GP_DF (INTERCEPT_SS_DF | BIT_ULL(GP_VECTOR))
+
+static void l2_ss_pending_test(void)
+{
+ GUEST_SYNC(SS_VECTOR);
+}
+
+static void l2_ss_injected_gp_test(void)
+{
+ GUEST_SYNC(GP_VECTOR);
+}
+
+static void l2_ss_injected_df_test(void)
+{
+ GUEST_SYNC(DF_VECTOR);
+}
+
+static void l2_ss_injected_tf_test(void)
+{
+ GUEST_SYNC(FAKE_TRIPLE_FAULT_VECTOR);
+}
+
+static void svm_run_l2(struct svm_test_data *svm, void *l2_code, int vector,
+ uint32_t error_code)
+{
+ struct vmcb *vmcb = svm->vmcb;
+ struct vmcb_control_area *ctrl = &vmcb->control;
+
+ vmcb->save.rip = (u64)l2_code;
+ run_guest(vmcb, svm->vmcb_gpa);
+
+ if (vector == FAKE_TRIPLE_FAULT_VECTOR)
+ return;
+
+ GUEST_ASSERT_EQ(ctrl->exit_code, (SVM_EXIT_EXCP_BASE + vector));
+ GUEST_ASSERT_EQ(ctrl->exit_info_1, error_code);
+}
+
+static void l1_svm_code(struct svm_test_data *svm)
+{
+ struct vmcb_control_area *ctrl = &svm->vmcb->control;
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ generic_svm_setup(svm, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ svm->vmcb->save.idtr.limit = 0;
+ ctrl->intercept |= BIT_ULL(INTERCEPT_SHUTDOWN);
+
+ ctrl->intercept_exceptions = INTERCEPT_SS_GP_DF;
+ svm_run_l2(svm, l2_ss_pending_test, SS_VECTOR, SS_ERROR_CODE);
+ svm_run_l2(svm, l2_ss_injected_gp_test, GP_VECTOR, GP_ERROR_CODE_AMD);
+
+ ctrl->intercept_exceptions = INTERCEPT_SS_DF;
+ svm_run_l2(svm, l2_ss_injected_df_test, DF_VECTOR, DF_ERROR_CODE);
+
+ ctrl->intercept_exceptions = INTERCEPT_SS;
+ svm_run_l2(svm, l2_ss_injected_tf_test, FAKE_TRIPLE_FAULT_VECTOR, 0);
+ GUEST_ASSERT_EQ(ctrl->exit_code, SVM_EXIT_SHUTDOWN);
+
+ GUEST_DONE();
+}
+
+static void vmx_run_l2(void *l2_code, int vector, uint32_t error_code)
+{
+ GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_code));
+
+ GUEST_ASSERT_EQ(vector == SS_VECTOR ? vmlaunch() : vmresume(), 0);
+
+ if (vector == FAKE_TRIPLE_FAULT_VECTOR)
+ return;
+
+ GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_EXCEPTION_NMI);
+ GUEST_ASSERT_EQ((vmreadz(VM_EXIT_INTR_INFO) & 0xff), vector);
+ GUEST_ASSERT_EQ(vmreadz(VM_EXIT_INTR_ERROR_CODE), error_code);
+}
+
+static void l1_vmx_code(struct vmx_pages *vmx)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ GUEST_ASSERT_EQ(prepare_for_vmx_operation(vmx), true);
+
+ GUEST_ASSERT_EQ(load_vmcs(vmx), true);
+
+ prepare_vmcs(vmx, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ GUEST_ASSERT_EQ(vmwrite(GUEST_IDTR_LIMIT, 0), 0);
+
+ /*
+ * VMX disallows injecting an exception with error_code[31:16] != 0,
+ * and hardware will never generate a VM-Exit with bits 31:16 set.
+ * KVM should likewise truncate the "bad" userspace value.
+ */
+ GUEST_ASSERT_EQ(vmwrite(EXCEPTION_BITMAP, INTERCEPT_SS_GP_DF), 0);
+ vmx_run_l2(l2_ss_pending_test, SS_VECTOR, (u16)SS_ERROR_CODE);
+ vmx_run_l2(l2_ss_injected_gp_test, GP_VECTOR, GP_ERROR_CODE_INTEL);
+
+ GUEST_ASSERT_EQ(vmwrite(EXCEPTION_BITMAP, INTERCEPT_SS_DF), 0);
+ vmx_run_l2(l2_ss_injected_df_test, DF_VECTOR, DF_ERROR_CODE);
+
+ GUEST_ASSERT_EQ(vmwrite(EXCEPTION_BITMAP, INTERCEPT_SS), 0);
+ vmx_run_l2(l2_ss_injected_tf_test, FAKE_TRIPLE_FAULT_VECTOR, 0);
+ GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_TRIPLE_FAULT);
+
+ GUEST_DONE();
+}
+
+static void __attribute__((__flatten__)) l1_guest_code(void *test_data)
+{
+ if (this_cpu_has(X86_FEATURE_SVM))
+ l1_svm_code(test_data);
+ else
+ l1_vmx_code(test_data);
+}
+
+static void assert_ucall_vector(struct kvm_vcpu *vcpu, int vector)
+{
+ struct kvm_run *run = vcpu->run;
+ struct ucall uc;
+
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Unexpected exit reason: %u (%s),\n",
+ run->exit_reason, exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ TEST_ASSERT(vector == uc.args[1],
+ "Expected L2 to ask for %d, got %ld", vector, uc.args[1]);
+ break;
+ case UCALL_DONE:
+ TEST_ASSERT(vector == -1,
+ "Expected L2 to ask for %d, L2 says it's done", vector);
+ break;
+ case UCALL_ABORT:
+ TEST_FAIL("%s at %s:%ld (0x%lx != 0x%lx)",
+ (const char *)uc.args[0], __FILE__, uc.args[1],
+ uc.args[2], uc.args[3]);
+ break;
+ default:
+ TEST_FAIL("Expected L2 to ask for %d, got unexpected ucall %lu", vector, uc.cmd);
+ }
+}
+
+static void queue_ss_exception(struct kvm_vcpu *vcpu, bool inject)
+{
+ struct kvm_vcpu_events events;
+
+ vcpu_events_get(vcpu, &events);
+
+ TEST_ASSERT(!events.exception.pending,
+ "Vector %d unexpectedlt pending", events.exception.nr);
+ TEST_ASSERT(!events.exception.injected,
+ "Vector %d unexpectedly injected", events.exception.nr);
+
+ events.flags = KVM_VCPUEVENT_VALID_PAYLOAD;
+ events.exception.pending = !inject;
+ events.exception.injected = inject;
+ events.exception.nr = SS_VECTOR;
+ events.exception.has_error_code = true;
+ events.exception.error_code = SS_ERROR_CODE;
+ vcpu_events_set(vcpu, &events);
+}
+
+/*
+ * Verify KVM_{G,S}ET_EVENTS play nice with pending vs. injected exceptions
+ * when an exception is being queued for L2. Specifically, verify that KVM
+ * honors L1 exception intercept controls when a #SS is pending/injected,
+ * triggers a #GP on vectoring the #SS, morphs to #DF if #GP isn't intercepted
+ * by L1, and finally causes (nested) SHUTDOWN if #DF isn't intercepted by L1.
+ */
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t nested_test_data_gva;
+ struct kvm_vcpu_events events;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_EXCEPTION_PAYLOAD));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX));
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+ vm_enable_cap(vm, KVM_CAP_EXCEPTION_PAYLOAD, -2ul);
+
+ if (kvm_cpu_has(X86_FEATURE_SVM))
+ vcpu_alloc_svm(vm, &nested_test_data_gva);
+ else
+ vcpu_alloc_vmx(vm, &nested_test_data_gva);
+
+ vcpu_args_set(vcpu, 1, nested_test_data_gva);
+
+ /* Run L1 => L2. L2 should sync and request #SS. */
+ vcpu_run(vcpu);
+ assert_ucall_vector(vcpu, SS_VECTOR);
+
+ /* Pend #SS and request immediate exit. #SS should still be pending. */
+ queue_ss_exception(vcpu, false);
+ vcpu->run->immediate_exit = true;
+ vcpu_run_complete_io(vcpu);
+
+ /* Verify the pending events comes back out the same as it went in. */
+ vcpu_events_get(vcpu, &events);
+ ASSERT_EQ(events.flags & KVM_VCPUEVENT_VALID_PAYLOAD,
+ KVM_VCPUEVENT_VALID_PAYLOAD);
+ ASSERT_EQ(events.exception.pending, true);
+ ASSERT_EQ(events.exception.nr, SS_VECTOR);
+ ASSERT_EQ(events.exception.has_error_code, true);
+ ASSERT_EQ(events.exception.error_code, SS_ERROR_CODE);
+
+ /*
+ * Run for real with the pending #SS, L1 should get a VM-Exit due to
+ * #SS interception and re-enter L2 to request #GP (via injected #SS).
+ */
+ vcpu->run->immediate_exit = false;
+ vcpu_run(vcpu);
+ assert_ucall_vector(vcpu, GP_VECTOR);
+
+ /*
+ * Inject #SS, the #SS should bypass interception and cause #GP, which
+ * L1 should intercept before KVM morphs it to #DF. L1 should then
+ * disable #GP interception and run L2 to request #DF (via #SS => #GP).
+ */
+ queue_ss_exception(vcpu, true);
+ vcpu_run(vcpu);
+ assert_ucall_vector(vcpu, DF_VECTOR);
+
+ /*
+ * Inject #SS, the #SS should bypass interception and cause #GP, which
+ * L1 is no longer interception, and so should see a #DF VM-Exit. L1
+ * should then signal that is done.
+ */
+ queue_ss_exception(vcpu, true);
+ vcpu_run(vcpu);
+ assert_ucall_vector(vcpu, FAKE_TRIPLE_FAULT_VECTOR);
+
+ /*
+ * Inject #SS yet again. L1 is not intercepting #GP or #DF, and so
+ * should see nested TRIPLE_FAULT / SHUTDOWN.
+ */
+ queue_ss_exception(vcpu, true);
+ vcpu_run(vcpu);
+ assert_ucall_vector(vcpu, -1);
+
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
index cc6421716400..59ffe7fd354f 100644
--- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
+++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
@@ -112,19 +112,13 @@ void run_test(int reclaim_period_ms, bool disable_nx_huge_pages,
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
+ uint64_t nr_bytes;
void *hva;
int r;
vm = vm_create(1);
if (disable_nx_huge_pages) {
- /*
- * Cannot run the test without NX huge pages if the kernel
- * does not support it.
- */
- if (!kvm_check_cap(KVM_CAP_VM_DISABLE_NX_HUGE_PAGES))
- return;
-
r = __vm_disable_nx_huge_pages(vm);
if (reboot_permissions) {
TEST_ASSERT(!r, "Disabling NX huge pages should succeed if process has reboot permissions");
@@ -141,10 +135,24 @@ void run_test(int reclaim_period_ms, bool disable_nx_huge_pages,
HPAGE_GPA, HPAGE_SLOT,
HPAGE_SLOT_NPAGES, 0);
- virt_map(vm, HPAGE_GVA, HPAGE_GPA, HPAGE_SLOT_NPAGES);
+ nr_bytes = HPAGE_SLOT_NPAGES * vm->page_size;
+
+ /*
+ * Ensure that KVM can map HPAGE_SLOT with huge pages by mapping the
+ * region into the guest with 2MiB pages whenever TDP is disabled (i.e.
+ * whenever KVM is shadowing the guest page tables).
+ *
+ * When TDP is enabled, KVM should be able to map HPAGE_SLOT with huge
+ * pages irrespective of the guest page size, so map with 4KiB pages
+ * to test that that is the case.
+ */
+ if (kvm_is_tdp_enabled())
+ virt_map_level(vm, HPAGE_GVA, HPAGE_GPA, nr_bytes, PG_LEVEL_4K);
+ else
+ virt_map_level(vm, HPAGE_GVA, HPAGE_GPA, nr_bytes, PG_LEVEL_2M);
hva = addr_gpa2hva(vm, HPAGE_GPA);
- memset(hva, RETURN_OPCODE, HPAGE_SLOT_NPAGES * PAGE_SIZE);
+ memset(hva, RETURN_OPCODE, nr_bytes);
check_2m_page_count(vm, 0);
check_split_count(vm, 0);
@@ -248,18 +256,13 @@ int main(int argc, char **argv)
}
}
- if (token != MAGIC_TOKEN) {
- print_skip("This test must be run with the magic token %d.\n"
- "This is done by nx_huge_pages_test.sh, which\n"
- "also handles environment setup for the test.",
- MAGIC_TOKEN);
- exit(KSFT_SKIP);
- }
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_DISABLE_NX_HUGE_PAGES));
+ TEST_REQUIRE(reclaim_period_ms > 0);
- if (!reclaim_period_ms) {
- print_skip("The NX reclaim period must be specified and non-zero");
- exit(KSFT_SKIP);
- }
+ __TEST_REQUIRE(token == MAGIC_TOKEN,
+ "This test must be run with the magic token %d.\n"
+ "This is done by nx_huge_pages_test.sh, which\n"
+ "also handles environment setup for the test.");
run_test(reclaim_period_ms, false, reboot_permissions);
run_test(reclaim_period_ms, true, reboot_permissions);
diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c
index 21a2ce8fa739..45de42a027c5 100644
--- a/tools/testing/selftests/landlock/fs_test.c
+++ b/tools/testing/selftests/landlock/fs_test.c
@@ -4,7 +4,7 @@
*
* Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net>
* Copyright © 2020 ANSSI
- * Copyright © 2020-2021 Microsoft Corporation
+ * Copyright © 2020-2022 Microsoft Corporation
*/
#define _GNU_SOURCE
@@ -371,6 +371,13 @@ TEST_F_FORK(layout1, inval)
ASSERT_EQ(EINVAL, errno);
path_beneath.allowed_access &= ~LANDLOCK_ACCESS_FS_EXECUTE;
+ /* Tests with denied-by-default access right. */
+ path_beneath.allowed_access |= LANDLOCK_ACCESS_FS_REFER;
+ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath, 0));
+ ASSERT_EQ(EINVAL, errno);
+ path_beneath.allowed_access &= ~LANDLOCK_ACCESS_FS_REFER;
+
/* Test with unknown (64-bits) value. */
path_beneath.allowed_access |= (1ULL << 60);
ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
@@ -1826,6 +1833,20 @@ TEST_F_FORK(layout1, link)
ASSERT_EQ(0, link(file1_s1d3, file2_s1d3));
}
+static int test_rename(const char *const oldpath, const char *const newpath)
+{
+ if (rename(oldpath, newpath))
+ return errno;
+ return 0;
+}
+
+static int test_exchange(const char *const oldpath, const char *const newpath)
+{
+ if (renameat2(AT_FDCWD, oldpath, AT_FDCWD, newpath, RENAME_EXCHANGE))
+ return errno;
+ return 0;
+}
+
TEST_F_FORK(layout1, rename_file)
{
const struct rule rules[] = {
@@ -1867,10 +1888,10 @@ TEST_F_FORK(layout1, rename_file)
* to a different directory (which allows file removal).
*/
ASSERT_EQ(-1, rename(file1_s2d1, file1_s1d3));
- ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(EACCES, errno);
ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d1, AT_FDCWD, file1_s1d3,
RENAME_EXCHANGE));
- ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(EACCES, errno);
ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s2d2, AT_FDCWD, file1_s1d3,
RENAME_EXCHANGE));
ASSERT_EQ(EXDEV, errno);
@@ -1894,7 +1915,7 @@ TEST_F_FORK(layout1, rename_file)
ASSERT_EQ(EXDEV, errno);
ASSERT_EQ(0, unlink(file1_s1d3));
ASSERT_EQ(-1, rename(file1_s2d1, file1_s1d3));
- ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(EACCES, errno);
/* Exchanges and renames files with same parent. */
ASSERT_EQ(0, renameat2(AT_FDCWD, file2_s2d3, AT_FDCWD, file1_s2d3,
@@ -2014,6 +2035,115 @@ TEST_F_FORK(layout1, reparent_refer)
ASSERT_EQ(0, rename(dir_s1d3, dir_s2d3));
}
+/* Checks renames beneath dir_s1d1. */
+static void refer_denied_by_default(struct __test_metadata *const _metadata,
+ const struct rule layer1[],
+ const int layer1_err,
+ const struct rule layer2[])
+{
+ int ruleset_fd;
+
+ ASSERT_EQ(0, unlink(file1_s1d2));
+
+ ruleset_fd = create_ruleset(_metadata, layer1[0].access, layer1);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /*
+ * If the first layer handles LANDLOCK_ACCESS_FS_REFER (according to
+ * layer1_err), then it allows some different-parent renames and links.
+ */
+ ASSERT_EQ(layer1_err, test_rename(file1_s1d1, file1_s1d2));
+ if (layer1_err == 0)
+ ASSERT_EQ(layer1_err, test_rename(file1_s1d2, file1_s1d1));
+ ASSERT_EQ(layer1_err, test_exchange(file2_s1d1, file2_s1d2));
+ ASSERT_EQ(layer1_err, test_exchange(file2_s1d2, file2_s1d1));
+
+ ruleset_fd = create_ruleset(_metadata, layer2[0].access, layer2);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /*
+ * Now, either the first or the second layer does not handle
+ * LANDLOCK_ACCESS_FS_REFER, which means that any different-parent
+ * renames and links are denied, thus making the layer handling
+ * LANDLOCK_ACCESS_FS_REFER null and void.
+ */
+ ASSERT_EQ(EXDEV, test_rename(file1_s1d1, file1_s1d2));
+ ASSERT_EQ(EXDEV, test_exchange(file2_s1d1, file2_s1d2));
+ ASSERT_EQ(EXDEV, test_exchange(file2_s1d2, file2_s1d1));
+}
+
+const struct rule layer_dir_s1d1_refer[] = {
+ {
+ .path = dir_s1d1,
+ .access = LANDLOCK_ACCESS_FS_REFER,
+ },
+ {},
+};
+
+const struct rule layer_dir_s1d1_execute[] = {
+ {
+ /* Matches a parent directory. */
+ .path = dir_s1d1,
+ .access = LANDLOCK_ACCESS_FS_EXECUTE,
+ },
+ {},
+};
+
+const struct rule layer_dir_s2d1_execute[] = {
+ {
+ /* Does not match a parent directory. */
+ .path = dir_s2d1,
+ .access = LANDLOCK_ACCESS_FS_EXECUTE,
+ },
+ {},
+};
+
+/*
+ * Tests precedence over renames: denied by default for different parent
+ * directories, *with* a rule matching a parent directory, but not directly
+ * denying access (with MAKE_REG nor REMOVE).
+ */
+TEST_F_FORK(layout1, refer_denied_by_default1)
+{
+ refer_denied_by_default(_metadata, layer_dir_s1d1_refer, 0,
+ layer_dir_s1d1_execute);
+}
+
+/*
+ * Same test but this time turning around the ABI version order: the first
+ * layer does not handle LANDLOCK_ACCESS_FS_REFER.
+ */
+TEST_F_FORK(layout1, refer_denied_by_default2)
+{
+ refer_denied_by_default(_metadata, layer_dir_s1d1_execute, EXDEV,
+ layer_dir_s1d1_refer);
+}
+
+/*
+ * Tests precedence over renames: denied by default for different parent
+ * directories, *without* a rule matching a parent directory, but not directly
+ * denying access (with MAKE_REG nor REMOVE).
+ */
+TEST_F_FORK(layout1, refer_denied_by_default3)
+{
+ refer_denied_by_default(_metadata, layer_dir_s1d1_refer, 0,
+ layer_dir_s2d1_execute);
+}
+
+/*
+ * Same test but this time turning around the ABI version order: the first
+ * layer does not handle LANDLOCK_ACCESS_FS_REFER.
+ */
+TEST_F_FORK(layout1, refer_denied_by_default4)
+{
+ refer_denied_by_default(_metadata, layer_dir_s2d1_execute, EXDEV,
+ layer_dir_s1d1_refer);
+}
+
TEST_F_FORK(layout1, reparent_link)
{
const struct rule layer1[] = {
@@ -2336,11 +2466,12 @@ TEST_F_FORK(layout1, reparent_exdev_layers_rename1)
ASSERT_EQ(EXDEV, errno);
/*
- * However, moving the file2_s1d3 file below dir_s2d3 is allowed
- * because it cannot inherit MAKE_REG nor MAKE_DIR rights (which are
- * dedicated to directories).
+ * Moving the file2_s1d3 file below dir_s2d3 is denied because the
+ * second layer does not handle REFER, which is always denied by
+ * default.
*/
- ASSERT_EQ(0, rename(file2_s1d3, file1_s2d3));
+ ASSERT_EQ(-1, rename(file2_s1d3, file1_s2d3));
+ ASSERT_EQ(EXDEV, errno);
}
TEST_F_FORK(layout1, reparent_exdev_layers_rename2)
@@ -2373,8 +2504,12 @@ TEST_F_FORK(layout1, reparent_exdev_layers_rename2)
ASSERT_EQ(EACCES, errno);
ASSERT_EQ(-1, rename(file1_s1d1, file1_s2d3));
ASSERT_EQ(EXDEV, errno);
- /* Modify layout! */
- ASSERT_EQ(0, rename(file2_s1d2, file1_s2d3));
+ /*
+ * Modifying the layout is now denied because the second layer does not
+ * handle REFER, which is always denied by default.
+ */
+ ASSERT_EQ(-1, rename(file2_s1d2, file1_s2d3));
+ ASSERT_EQ(EXDEV, errno);
/* Without REFER source, EACCES wins over EXDEV. */
ASSERT_EQ(-1, rename(dir_s1d1, file1_s2d2));
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 0e5751af6247..de7d5cc15f85 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -1,42 +1,42 @@
# SPDX-License-Identifier: GPL-2.0-only
+cmsg_sender
+fin_ack_lat
+gro
+hwtstamp_config
+ioam6_parser
+ip_defrag
ipsec
+ipv6_flowlabel
+ipv6_flowlabel_mgr
msg_zerocopy
-socket
+nettest
psock_fanout
psock_snd
psock_tpacket
-stress_reuseport_listen
+reuseaddr_conflict
+reuseaddr_ports_exhausted
reuseport_addr_any
reuseport_bpf
reuseport_bpf_cpu
reuseport_bpf_numa
reuseport_dualstack
-reuseaddr_conflict
-tcp_mmap
-udpgso
-udpgso_bench_rx
-udpgso_bench_tx
-tcp_inq
-tls
-txring_overwrite
-ip_defrag
-ipv6_flowlabel
-ipv6_flowlabel_mgr
-so_txtime
-tcp_fastopen_backup_key
-nettest
-fin_ack_lat
-reuseaddr_ports_exhausted
-hwtstamp_config
rxtimestamp
-timestamping
-txtimestamp
+socket
so_netns_cookie
+so_txtime
+stress_reuseport_listen
+tap
+tcp_fastopen_backup_key
+tcp_inq
+tcp_mmap
test_unix_oob
-gro
-ioam6_parser
+timestamping
+tls
toeplitz
tun
-cmsg_sender
+txring_overwrite
+txtimestamp
+udpgso
+udpgso_bench_rx
+udpgso_bench_tx
unix_connect
-tap \ No newline at end of file
diff --git a/tools/testing/selftests/net/io_uring_zerocopy_tx.c b/tools/testing/selftests/net/io_uring_zerocopy_tx.c
index 9d64c560a2d6..8ce48aca8321 100644
--- a/tools/testing/selftests/net/io_uring_zerocopy_tx.c
+++ b/tools/testing/selftests/net/io_uring_zerocopy_tx.c
@@ -47,7 +47,6 @@ enum {
MODE_MIXED = 3,
};
-static bool cfg_flush = false;
static bool cfg_cork = false;
static int cfg_mode = MODE_ZC_FIXED;
static int cfg_nr_reqs = 8;
@@ -166,21 +165,6 @@ static int io_uring_register_buffers(struct io_uring *ring,
return (ret < 0) ? -errno : ret;
}
-static int io_uring_register_notifications(struct io_uring *ring,
- unsigned nr,
- struct io_uring_notification_slot *slots)
-{
- int ret;
- struct io_uring_notification_register r = {
- .nr_slots = nr,
- .data = (unsigned long)slots,
- };
-
- ret = syscall(__NR_io_uring_register, ring->ring_fd,
- IORING_REGISTER_NOTIFIERS, &r, sizeof(r));
- return (ret < 0) ? -errno : ret;
-}
-
static int io_uring_mmap(int fd, struct io_uring_params *p,
struct io_uring_sq *sq, struct io_uring_cq *cq)
{
@@ -297,11 +281,10 @@ static inline void io_uring_prep_send(struct io_uring_sqe *sqe, int sockfd,
static inline void io_uring_prep_sendzc(struct io_uring_sqe *sqe, int sockfd,
const void *buf, size_t len, int flags,
- unsigned slot_idx, unsigned zc_flags)
+ unsigned zc_flags)
{
io_uring_prep_send(sqe, sockfd, buf, len, flags);
- sqe->opcode = (__u8) IORING_OP_SENDZC_NOTIF;
- sqe->notification_idx = slot_idx;
+ sqe->opcode = (__u8) IORING_OP_SEND_ZC;
sqe->ioprio = zc_flags;
}
@@ -374,7 +357,6 @@ static int do_setup_tx(int domain, int type, int protocol)
static void do_tx(int domain, int type, int protocol)
{
- struct io_uring_notification_slot b[1] = {{.tag = NOTIF_TAG}};
struct io_uring_sqe *sqe;
struct io_uring_cqe *cqe;
unsigned long packets = 0, bytes = 0;
@@ -390,10 +372,6 @@ static void do_tx(int domain, int type, int protocol)
if (ret)
error(1, ret, "io_uring: queue init");
- ret = io_uring_register_notifications(&ring, 1, b);
- if (ret)
- error(1, ret, "io_uring: tx ctx registration");
-
iov.iov_base = payload;
iov.iov_len = cfg_payload_len;
@@ -409,9 +387,8 @@ static void do_tx(int domain, int type, int protocol)
for (i = 0; i < cfg_nr_reqs; i++) {
unsigned zc_flags = 0;
unsigned buf_idx = 0;
- unsigned slot_idx = 0;
unsigned mode = cfg_mode;
- unsigned msg_flags = 0;
+ unsigned msg_flags = MSG_WAITALL;
if (cfg_mode == MODE_MIXED)
mode = rand() % 3;
@@ -423,13 +400,10 @@ static void do_tx(int domain, int type, int protocol)
cfg_payload_len, msg_flags);
sqe->user_data = NONZC_TAG;
} else {
- if (cfg_flush) {
- zc_flags |= IORING_RECVSEND_NOTIF_FLUSH;
- compl_cqes++;
- }
+ compl_cqes++;
io_uring_prep_sendzc(sqe, fd, payload,
cfg_payload_len,
- msg_flags, slot_idx, zc_flags);
+ msg_flags, zc_flags);
if (mode == MODE_ZC_FIXED) {
sqe->ioprio |= IORING_RECVSEND_FIXED_BUF;
sqe->buf_index = buf_idx;
@@ -442,51 +416,57 @@ static void do_tx(int domain, int type, int protocol)
if (ret != cfg_nr_reqs)
error(1, ret, "submit");
+ if (cfg_cork)
+ do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0);
for (i = 0; i < cfg_nr_reqs; i++) {
ret = io_uring_wait_cqe(&ring, &cqe);
if (ret)
error(1, ret, "wait cqe");
- if (cqe->user_data == NOTIF_TAG) {
+ if (cqe->user_data != NONZC_TAG &&
+ cqe->user_data != ZC_TAG)
+ error(1, -EINVAL, "invalid cqe->user_data");
+
+ if (cqe->flags & IORING_CQE_F_NOTIF) {
+ if (cqe->flags & IORING_CQE_F_MORE)
+ error(1, -EINVAL, "invalid notif flags");
compl_cqes--;
i--;
- } else if (cqe->user_data != NONZC_TAG &&
- cqe->user_data != ZC_TAG) {
- error(1, cqe->res, "invalid user_data");
- } else if (cqe->res <= 0 && cqe->res != -EAGAIN) {
+ } else if (cqe->res <= 0) {
+ if (cqe->flags & IORING_CQE_F_MORE)
+ error(1, cqe->res, "more with a failed send");
error(1, cqe->res, "send failed");
} else {
- if (cqe->res > 0) {
- packets++;
- bytes += cqe->res;
- }
- /* failed requests don't flush */
- if (cfg_flush &&
- cqe->res <= 0 &&
- cqe->user_data == ZC_TAG)
- compl_cqes--;
+ if (cqe->user_data == ZC_TAG &&
+ !(cqe->flags & IORING_CQE_F_MORE))
+ error(1, cqe->res, "missing more flag");
+ packets++;
+ bytes += cqe->res;
}
io_uring_cqe_seen(&ring);
}
- if (cfg_cork)
- do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0);
} while (gettimeofday_ms() < tstop);
- if (close(fd))
- error(1, errno, "close");
-
- fprintf(stderr, "tx=%lu (MB=%lu), tx/s=%lu (MB/s=%lu)\n",
- packets, bytes >> 20,
- packets / (cfg_runtime_ms / 1000),
- (bytes >> 20) / (cfg_runtime_ms / 1000));
-
while (compl_cqes) {
ret = io_uring_wait_cqe(&ring, &cqe);
if (ret)
error(1, ret, "wait cqe");
+ if (cqe->flags & IORING_CQE_F_MORE)
+ error(1, -EINVAL, "invalid notif flags");
+ if (!(cqe->flags & IORING_CQE_F_NOTIF))
+ error(1, -EINVAL, "missing notif flag");
+
io_uring_cqe_seen(&ring);
compl_cqes--;
}
+
+ fprintf(stderr, "tx=%lu (MB=%lu), tx/s=%lu (MB/s=%lu)\n",
+ packets, bytes >> 20,
+ packets / (cfg_runtime_ms / 1000),
+ (bytes >> 20) / (cfg_runtime_ms / 1000));
+
+ if (close(fd))
+ error(1, errno, "close");
}
static void do_test(int domain, int type, int protocol)
@@ -500,8 +480,8 @@ static void do_test(int domain, int type, int protocol)
static void usage(const char *filepath)
{
- error(1, 0, "Usage: %s [-f] [-n<N>] [-z0] [-s<payload size>] "
- "(-4|-6) [-t<time s>] -D<dst_ip> udp", filepath);
+ error(1, 0, "Usage: %s (-4|-6) (udp|tcp) -D<dst_ip> [-s<payload size>] "
+ "[-t<time s>] [-n<batch>] [-p<port>] [-m<mode>]", filepath);
}
static void parse_opts(int argc, char **argv)
@@ -519,7 +499,7 @@ static void parse_opts(int argc, char **argv)
usage(argv[0]);
cfg_payload_len = max_payload_len;
- while ((c = getopt(argc, argv, "46D:p:s:t:n:fc:m:")) != -1) {
+ while ((c = getopt(argc, argv, "46D:p:s:t:n:c:m:")) != -1) {
switch (c) {
case '4':
if (cfg_family != PF_UNSPEC)
@@ -548,9 +528,6 @@ static void parse_opts(int argc, char **argv)
case 'n':
cfg_nr_reqs = strtoul(optarg, NULL, 0);
break;
- case 'f':
- cfg_flush = 1;
- break;
case 'c':
cfg_cork = strtol(optarg, NULL, 0);
break;
@@ -583,8 +560,6 @@ static void parse_opts(int argc, char **argv)
if (cfg_payload_len > max_payload_len)
error(1, 0, "-s: payload exceeds max (%d)", max_payload_len);
- if (cfg_mode == MODE_NONZC && cfg_flush)
- error(1, 0, "-f: only zerocopy modes support notifications");
if (optind != argc - 1)
usage(argv[0]);
}
diff --git a/tools/testing/selftests/net/io_uring_zerocopy_tx.sh b/tools/testing/selftests/net/io_uring_zerocopy_tx.sh
index 6a65e4437640..32aa6e9dacc2 100755
--- a/tools/testing/selftests/net/io_uring_zerocopy_tx.sh
+++ b/tools/testing/selftests/net/io_uring_zerocopy_tx.sh
@@ -25,15 +25,11 @@ readonly path_sysctl_mem="net.core.optmem_max"
# No arguments: automated test
if [[ "$#" -eq "0" ]]; then
IPs=( "4" "6" )
- protocols=( "tcp" "udp" )
for IP in "${IPs[@]}"; do
- for proto in "${protocols[@]}"; do
- for mode in $(seq 1 3); do
- $0 "$IP" "$proto" -m "$mode" -t 1 -n 32
- $0 "$IP" "$proto" -m "$mode" -t 1 -n 32 -f
- $0 "$IP" "$proto" -m "$mode" -t 1 -n 32 -c -f
- done
+ for mode in $(seq 1 3); do
+ $0 "$IP" udp -m "$mode" -t 1 -n 32
+ $0 "$IP" tcp -m "$mode" -t 1 -n 32
done
done
diff --git a/tools/testing/selftests/netfilter/nft_conntrack_helper.sh b/tools/testing/selftests/netfilter/nft_conntrack_helper.sh
index bf6b9626c7dd..faa7778d7bd1 100755
--- a/tools/testing/selftests/netfilter/nft_conntrack_helper.sh
+++ b/tools/testing/selftests/netfilter/nft_conntrack_helper.sh
@@ -102,26 +102,42 @@ check_for_helper()
ip netns exec ${netns} conntrack -L -f $family -p tcp --dport $port 2> /dev/null |grep -q 'helper=ftp'
if [ $? -ne 0 ] ; then
- echo "FAIL: ${netns} did not show attached helper $message" 1>&2
- ret=1
+ if [ $autoassign -eq 0 ] ;then
+ echo "FAIL: ${netns} did not show attached helper $message" 1>&2
+ ret=1
+ else
+ echo "PASS: ${netns} did not show attached helper $message" 1>&2
+ fi
+ else
+ if [ $autoassign -eq 0 ] ;then
+ echo "PASS: ${netns} connection on port $port has ftp helper attached" 1>&2
+ else
+ echo "FAIL: ${netns} connection on port $port has ftp helper attached" 1>&2
+ ret=1
+ fi
fi
- echo "PASS: ${netns} connection on port $port has ftp helper attached" 1>&2
return 0
}
test_helper()
{
local port=$1
- local msg=$2
+ local autoassign=$2
+
+ if [ $autoassign -eq 0 ] ;then
+ msg="set via ruleset"
+ else
+ msg="auto-assign"
+ fi
sleep 3 | ip netns exec ${ns2} nc -w 2 -l -p $port > /dev/null &
sleep 1 | ip netns exec ${ns1} nc -w 2 10.0.1.2 $port > /dev/null &
sleep 1
- check_for_helper "$ns1" "ip $msg" $port
- check_for_helper "$ns2" "ip $msg" $port
+ check_for_helper "$ns1" "ip $msg" $port $autoassign
+ check_for_helper "$ns2" "ip $msg" $port $autoassign
wait
@@ -173,9 +189,9 @@ if [ $? -ne 0 ];then
fi
fi
-test_helper 2121 "set via ruleset"
-ip netns exec ${ns1} sysctl -q 'net.netfilter.nf_conntrack_helper=1'
-ip netns exec ${ns2} sysctl -q 'net.netfilter.nf_conntrack_helper=1'
-test_helper 21 "auto-assign"
+test_helper 2121 0
+ip netns exec ${ns1} sysctl -qe 'net.netfilter.nf_conntrack_helper=1'
+ip netns exec ${ns2} sysctl -qe 'net.netfilter.nf_conntrack_helper=1'
+test_helper 21 1
exit $ret