482 files changed, 26143 insertions, 8547 deletions
diff --git a/tools/Makefile b/tools/Makefile
index be02c8b904db..abb358a70ad0 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -21,6 +21,7 @@ help:
 	@echo '  leds                   - LEDs  tools'
 	@echo '  liblockdep             - user-space wrapper for kernel locking-validator'
 	@echo '  bpf                    - misc BPF tools'
+	@echo '  pci                    - PCI tools'
 	@echo '  perf                   - Linux performance measurement and analysis tool'
 	@echo '  selftests              - various kernel selftests'
 	@echo '  spi                    - spi tools'
@@ -59,7 +60,7 @@ acpi: FORCE
 cpupower: FORCE
 	$(call descend,power/$@)
 
-cgroup firewire hv guest spi usb virtio vm bpf iio gpio objtool leds wmi: FORCE
+cgroup firewire hv guest spi usb virtio vm bpf iio gpio objtool leds wmi pci: FORCE
 	$(call descend,$@)
 
 liblockdep: FORCE
@@ -94,7 +95,7 @@ kvm_stat: FORCE
 all: acpi cgroup cpupower gpio hv firewire liblockdep \
 		perf selftests spi turbostat usb \
 		virtio vm bpf x86_energy_perf_policy \
-		tmon freefall iio objtool kvm_stat wmi
+		tmon freefall iio objtool kvm_stat wmi pci
 
 acpi_install:
 	$(call descend,power/$(@:_install=),install)
@@ -102,7 +103,7 @@ acpi_install:
 cpupower_install:
 	$(call descend,power/$(@:_install=),install)
 
-cgroup_install firewire_install gpio_install hv_install iio_install perf_install spi_install usb_install virtio_install vm_install bpf_install objtool_install wmi_install:
+cgroup_install firewire_install gpio_install hv_install iio_install perf_install spi_install usb_install virtio_install vm_install bpf_install objtool_install wmi_install pci_install:
 	$(call descend,$(@:_install=),install)
 
 liblockdep_install:
@@ -128,7 +129,7 @@ install: acpi_install cgroup_install cpupower_install gpio_install \
 		perf_install selftests_install turbostat_install usb_install \
 		virtio_install vm_install bpf_install x86_energy_perf_policy_install \
 		tmon_install freefall_install objtool_install kvm_stat_install \
-		wmi_install
+		wmi_install pci_install
 
 acpi_clean:
 	$(call descend,power/acpi,clean)
@@ -136,7 +137,7 @@ acpi_clean:
 cpupower_clean:
 	$(call descend,power/cpupower,clean)
 
-cgroup_clean hv_clean firewire_clean spi_clean usb_clean virtio_clean vm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean:
+cgroup_clean hv_clean firewire_clean spi_clean usb_clean virtio_clean vm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean pci_clean:
 	$(call descend,$(@:_clean=),clean)
 
 liblockdep_clean:
@@ -174,6 +175,6 @@ clean: acpi_clean cgroup_clean cpupower_clean hv_clean firewire_clean \
 		perf_clean selftests_clean turbostat_clean spi_clean usb_clean virtio_clean \
 		vm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \
 		freefall_clean build_clean libbpf_clean libsubcmd_clean liblockdep_clean \
-		gpio_clean objtool_clean leds_clean wmi_clean
+		gpio_clean objtool_clean leds_clean wmi_clean pci_clean
 
 .PHONY: FORCE
diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c
index 9f420d98b5fb..8cb504d30384 100644
--- a/tools/accounting/getdelays.c
+++ b/tools/accounting/getdelays.c
@@ -203,6 +203,8 @@ static void print_delayacct(struct taskstats *t)
 	       "SWAP  %15s%15s%15s\n"
 	       "      %15llu%15llu%15llums\n"
 	       "RECLAIM  %12s%15s%15s\n"
+	       "      %15llu%15llu%15llums\n"
+	       "THRASHING%12s%15s%15s\n"
 	       "      %15llu%15llu%15llums\n",
 	       "count", "real total", "virtual total",
 	       "delay total", "delay average",
@@ -222,7 +224,11 @@ static void print_delayacct(struct taskstats *t)
 	       "count", "delay total", "delay average",
 	       (unsigned long long)t->freepages_count,
 	       (unsigned long long)t->freepages_delay_total,
-	       average_ms(t->freepages_delay_total, t->freepages_count));
+	       average_ms(t->freepages_delay_total, t->freepages_count),
+	       "count", "delay total", "delay average",
+	       (unsigned long long)t->thrashing_count,
+	       (unsigned long long)t->thrashing_delay_total,
+	       average_ms(t->thrashing_delay_total, t->thrashing_count));
 }
 
 static void task_context_switch_counts(struct taskstats *t)
diff --git a/tools/arch/arm64/include/asm/barrier.h b/tools/arch/arm64/include/asm/barrier.h
index 40bde6b23501..378c051fa177 100644
--- a/tools/arch/arm64/include/asm/barrier.h
+++ b/tools/arch/arm64/include/asm/barrier.h
@@ -14,4 +14,75 @@
 #define wmb()		asm volatile("dmb ishst" ::: "memory")
 #define rmb()		asm volatile("dmb ishld" ::: "memory")
 
+#define smp_store_release(p, v)						\
+do {									\
+	union { typeof(*p) __val; char __c[1]; } __u =			\
+		{ .__val = (v) }; 					\
+									\
+	switch (sizeof(*p)) {						\
+	case 1:								\
+		asm volatile ("stlrb %w1, %0"				\
+				: "=Q" (*p)				\
+				: "r" (*(__u8_alias_t *)__u.__c)	\
+				: "memory");				\
+		break;							\
+	case 2:								\
+		asm volatile ("stlrh %w1, %0"				\
+				: "=Q" (*p)				\
+				: "r" (*(__u16_alias_t *)__u.__c)	\
+				: "memory");				\
+		break;							\
+	case 4:								\
+		asm volatile ("stlr %w1, %0"				\
+				: "=Q" (*p)				\
+				: "r" (*(__u32_alias_t *)__u.__c)	\
+				: "memory");				\
+		break;							\
+	case 8:								\
+		asm volatile ("stlr %1, %0"				\
+				: "=Q" (*p)				\
+				: "r" (*(__u64_alias_t *)__u.__c)	\
+				: "memory");				\
+		break;							\
+	default:							\
+		/* Only to shut up gcc ... */				\
+		mb();							\
+		break;							\
+	}								\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	union { typeof(*p) __val; char __c[1]; } __u =			\
+		{ .__c = { 0 } };					\
+									\
+	switch (sizeof(*p)) {						\
+	case 1:								\
+		asm volatile ("ldarb %w0, %1"				\
+			: "=r" (*(__u8_alias_t *)__u.__c)		\
+			: "Q" (*p) : "memory");				\
+		break;							\
+	case 2:								\
+		asm volatile ("ldarh %w0, %1"				\
+			: "=r" (*(__u16_alias_t *)__u.__c)		\
+			: "Q" (*p) : "memory");				\
+		break;							\
+	case 4:								\
+		asm volatile ("ldar %w0, %1"				\
+			: "=r" (*(__u32_alias_t *)__u.__c)		\
+			: "Q" (*p) : "memory");				\
+		break;							\
+	case 8:								\
+		asm volatile ("ldar %0, %1"				\
+			: "=r" (*(__u64_alias_t *)__u.__c)		\
+			: "Q" (*p) : "memory");				\
+		break;							\
+	default:							\
+		/* Only to shut up gcc ... */				\
+		mb();							\
+		break;							\
+	}								\
+	__u.__val;							\
+})
+
 #endif /* _TOOLS_LINUX_ASM_AARCH64_BARRIER_H */
diff --git a/tools/arch/arm64/include/uapi/asm/unistd.h b/tools/arch/arm64/include/uapi/asm/unistd.h
index 5072cbd15c82..dae1584cf017 100644
--- a/tools/arch/arm64/include/uapi/asm/unistd.h
+++ b/tools/arch/arm64/include/uapi/asm/unistd.h
@@ -16,5 +16,6 @@
  */
 
 #define __ARCH_WANT_RENAMEAT
+#define __ARCH_WANT_NEW_STAT
 
 #include <asm-generic/unistd.h>
diff --git a/tools/arch/ia64/include/asm/barrier.h b/tools/arch/ia64/include/asm/barrier.h
index d808ee0e77b5..4d471d9511a5 100644
--- a/tools/arch/ia64/include/asm/barrier.h
+++ b/tools/arch/ia64/include/asm/barrier.h
@@ -46,4 +46,17 @@
 #define rmb()		mb()
 #define wmb()		mb()
 
+#define smp_store_release(p, v)			\
+do {						\
+	barrier();				\
+	WRITE_ONCE(*p, v);			\
+} while (0)
+
+#define smp_load_acquire(p)			\
+({						\
+	typeof(*p) ___p1 = READ_ONCE(*p);	\
+	barrier();				\
+	___p1;					\
+})
+
 #endif /* _TOOLS_LINUX_ASM_IA64_BARRIER_H */
diff --git a/tools/arch/powerpc/include/asm/barrier.h b/tools/arch/powerpc/include/asm/barrier.h
index a634da05bc97..905a2c66d96d 100644
--- a/tools/arch/powerpc/include/asm/barrier.h
+++ b/tools/arch/powerpc/include/asm/barrier.h
@@ -27,4 +27,20 @@
 #define rmb()  __asm__ __volatile__ ("sync" : : : "memory")
 #define wmb()  __asm__ __volatile__ ("sync" : : : "memory")
 
+#if defined(__powerpc64__)
+#define smp_lwsync()	__asm__ __volatile__ ("lwsync" : : : "memory")
+
+#define smp_store_release(p, v)			\
+do {						\
+	smp_lwsync();				\
+	WRITE_ONCE(*p, v);			\
+} while (0)
+
+#define smp_load_acquire(p)			\
+({						\
+	typeof(*p) ___p1 = READ_ONCE(*p);	\
+	smp_lwsync();				\
+	___p1;					\
+})
+#endif /* defined(__powerpc64__) */
 #endif /* _TOOLS_LINUX_ASM_POWERPC_BARRIER_H */
diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h
index 1b32b56a03d3..8c876c166ef2 100644
--- a/tools/arch/powerpc/include/uapi/asm/kvm.h
+++ b/tools/arch/powerpc/include/uapi/asm/kvm.h
@@ -634,6 +634,7 @@ struct kvm_ppc_cpu_char {
 
 #define KVM_REG_PPC_DEC_EXPIRY	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe)
 #define KVM_REG_PPC_ONLINE	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbf)
+#define KVM_REG_PPC_PTCR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc0)
 
 /* Transactional Memory checkpointed state:
  * This is all GPRs, all VSX regs and a subset of SPRs
diff --git a/tools/arch/s390/include/asm/barrier.h b/tools/arch/s390/include/asm/barrier.h
index 5030c99f47d2..de362fa664d4 100644
--- a/tools/arch/s390/include/asm/barrier.h
+++ b/tools/arch/s390/include/asm/barrier.h
@@ -28,4 +28,17 @@
 #define rmb()				mb()
 #define wmb()				mb()
 
+#define smp_store_release(p, v)			\
+do {						\
+	barrier();				\
+	WRITE_ONCE(*p, v);			\
+} while (0)
+
+#define smp_load_acquire(p)			\
+({						\
+	typeof(*p) ___p1 = READ_ONCE(*p);	\
+	barrier();				\
+	___p1;					\
+})
+
 #endif /* __TOOLS_LIB_ASM_BARRIER_H */
diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h
index 9a50f02b9894..16511d97e8dc 100644
--- a/tools/arch/s390/include/uapi/asm/kvm.h
+++ b/tools/arch/s390/include/uapi/asm/kvm.h
@@ -160,6 +160,8 @@ struct kvm_s390_vm_cpu_subfunc {
 #define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW	1
 #define KVM_S390_VM_CRYPTO_DISABLE_AES_KW	2
 #define KVM_S390_VM_CRYPTO_DISABLE_DEA_KW	3
+#define KVM_S390_VM_CRYPTO_ENABLE_APIE		4
+#define KVM_S390_VM_CRYPTO_DISABLE_APIE		5
 
 /* kvm attributes for migration mode */
 #define KVM_S390_VM_MIGRATION_STOP	0
diff --git a/tools/arch/sparc/include/asm/barrier_64.h b/tools/arch/sparc/include/asm/barrier_64.h
index ba61344287d5..cfb0fdc8ccf0 100644
--- a/tools/arch/sparc/include/asm/barrier_64.h
+++ b/tools/arch/sparc/include/asm/barrier_64.h
@@ -40,4 +40,17 @@ do {	__asm__ __volatile__("ba,pt	%%xcc, 1f\n\t" \
 #define rmb()	__asm__ __volatile__("":::"memory")
 #define wmb()	__asm__ __volatile__("":::"memory")
 
+#define smp_store_release(p, v)			\
+do {						\
+	barrier();				\
+	WRITE_ONCE(*p, v);			\
+} while (0)
+
+#define smp_load_acquire(p)			\
+({						\
+	typeof(*p) ___p1 = READ_ONCE(*p);	\
+	barrier();				\
+	___p1;					\
+})
+
 #endif /* !(__TOOLS_LINUX_SPARC64_BARRIER_H) */
diff --git a/tools/arch/x86/include/asm/barrier.h b/tools/arch/x86/include/asm/barrier.h
index 8774dee27471..58919868473c 100644
--- a/tools/arch/x86/include/asm/barrier.h
+++ b/tools/arch/x86/include/asm/barrier.h
@@ -26,4 +26,18 @@
 #define wmb()	asm volatile("sfence" ::: "memory")
 #endif
 
+#if defined(__x86_64__)
+#define smp_store_release(p, v)			\
+do {						\
+	barrier();				\
+	WRITE_ONCE(*p, v);			\
+} while (0)
+
+#define smp_load_acquire(p)			\
+({						\
+	typeof(*p) ___p1 = READ_ONCE(*p);	\
+	barrier();				\
+	___p1;					\
+})
+#endif /* defined(__x86_64__) */
 #endif /* _TOOLS_LINUX_ASM_X86_BARRIER_H */
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index fd23d5778ea1..dabfcf7c3941 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -288,6 +288,7 @@ struct kvm_reinject_control {
 #define KVM_VCPUEVENT_VALID_SIPI_VECTOR	0x00000002
 #define KVM_VCPUEVENT_VALID_SHADOW	0x00000004
 #define KVM_VCPUEVENT_VALID_SMM		0x00000008
+#define KVM_VCPUEVENT_VALID_PAYLOAD	0x00000010
 
 /* Interrupt shadow states */
 #define KVM_X86_SHADOW_INT_MOV_SS	0x01
@@ -299,7 +300,7 @@ struct kvm_vcpu_events {
 		__u8 injected;
 		__u8 nr;
 		__u8 has_error_code;
-		__u8 pad;
+		__u8 pending;
 		__u32 error_code;
 	} exception;
 	struct {
@@ -322,7 +323,9 @@ struct kvm_vcpu_events {
 		__u8 smm_inside_nmi;
 		__u8 latched_init;
 	} smi;
-	__u32 reserved[9];
+	__u8 reserved[27];
+	__u8 exception_has_payload;
+	__u64 exception_payload;
 };
 
 /* for KVM_GET/SET_DEBUGREGS */
@@ -381,6 +384,7 @@ struct kvm_sync_regs {
 
 #define KVM_STATE_NESTED_GUEST_MODE	0x00000001
 #define KVM_STATE_NESTED_RUN_PENDING	0x00000002
+#define KVM_STATE_NESTED_EVMCS		0x00000004
 
 #define KVM_STATE_NESTED_SMM_GUEST_MODE	0x00000001
 #define KVM_STATE_NESTED_SMM_VMXON	0x00000002
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index a6258bc8ec4f..f55a2daed59b 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -15,13 +15,15 @@ SYNOPSIS
 	*OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
 
 	*COMMANDS* :=
-	{ **show** | **list** | **dump** | **update** | **lookup** | **getnext** | **delete**
-	| **pin** | **help** }
+	{ **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext**
+	| **delete** | **pin** | **help** }
 
 MAP COMMANDS
 =============
 
 |	**bpftool** **map { show | list }**   [*MAP*]
+|	**bpftool** **map create**     *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE* \
+|		**entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**dev** *NAME*]
 |	**bpftool** **map dump**       *MAP*
 |	**bpftool** **map update**     *MAP*  **key** *DATA*   **value** *VALUE* [*UPDATE_FLAGS*]
 |	**bpftool** **map lookup**     *MAP*  **key** *DATA*
@@ -36,6 +38,11 @@ MAP COMMANDS
 |	*PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
 |	*VALUE* := { *DATA* | *MAP* | *PROG* }
 |	*UPDATE_FLAGS* := { **any** | **exist** | **noexist** }
+|	*TYPE* := { **hash** | **array** | **prog_array** | **perf_event_array** | **percpu_hash**
+|		| **percpu_array** | **stack_trace** | **cgroup_array** | **lru_hash**
+|		| **lru_percpu_hash** | **lpm_trie** | **array_of_maps** | **hash_of_maps**
+|		| **devmap** | **sockmap** | **cpumap** | **xskmap** | **sockhash**
+|		| **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage** }
 
 DESCRIPTION
 ===========
@@ -47,6 +54,10 @@ DESCRIPTION
 		  Output will start with map ID followed by map type and
 		  zero or more named attributes (depending on kernel version).
 
+	**bpftool map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE*  **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**dev** *NAME*]
+		  Create a new map with given parameters and pin it to *bpffs*
+		  as *FILE*.
+
 	**bpftool map dump**    *MAP*
 		  Dump all entries in a given *MAP*.
 
@@ -75,7 +86,9 @@ DESCRIPTION
 	**bpftool map pin**     *MAP*  *FILE*
 		  Pin map *MAP* as *FILE*.
 
-		  Note: *FILE* must be located in *bpffs* mount.
+		  Note: *FILE* must be located in *bpffs* mount. It must not
+		  contain a dot character ('.'), which is reserved for future
+		  extensions of *bpffs*.
 
 	**bpftool** **map event_pipe** *MAP* [**cpu** *N* **index** *M*]
 		  Read events from a BPF_MAP_TYPE_PERF_EVENT_ARRAY map.
diff --git a/tools/bpf/bpftool/Documentation/bpftool-net.rst b/tools/bpf/bpftool/Documentation/bpftool-net.rst
new file mode 100644
index 000000000000..408ec30d8872
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst
@@ -0,0 +1,139 @@
+================
+bpftool-net
+================
+-------------------------------------------------------------------------------
+tool for inspection of netdev/tc related bpf prog attachments
+-------------------------------------------------------------------------------
+
+:Manual section: 8
+
+SYNOPSIS
+========
+
+	**bpftool** [*OPTIONS*] **net** *COMMAND*
+
+	*OPTIONS* := { [{ **-j** | **--json** }] [{ **-p** | **--pretty** }] }
+
+	*COMMANDS* :=
+	{ **show** | **list** } [ **dev** name ] | **help**
+
+NET COMMANDS
+============
+
+|	**bpftool** **net { show | list } [ dev name ]**
+|	**bpftool** **net help**
+
+DESCRIPTION
+===========
+	**bpftool net { show | list } [ dev name ]**
+                  List bpf program attachments in the kernel networking subsystem.
+
+                  Currently, only device driver xdp attachments and tc filter
+                  classification/action attachments are implemented, i.e., for
+                  program types **BPF_PROG_TYPE_SCHED_CLS**,
+                  **BPF_PROG_TYPE_SCHED_ACT** and **BPF_PROG_TYPE_XDP**.
+                  For programs attached to a particular cgroup, e.g.,
+                  **BPF_PROG_TYPE_CGROUP_SKB**, **BPF_PROG_TYPE_CGROUP_SOCK**,
+                  **BPF_PROG_TYPE_SOCK_OPS** and **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**,
+                  users can use **bpftool cgroup** to dump cgroup attachments.
+                  For sk_{filter, skb, msg, reuseport} and lwt/seg6
+                  bpf programs, users should consult other tools, e.g., iproute2.
+
+                  The current output will start with all xdp program attachments, followed by
+                  all tc class/qdisc bpf program attachments. Both xdp programs and
+                  tc programs are ordered based on ifindex number. If multiple bpf
+                  programs attached to the same networking device through **tc filter**,
+                  the order will be first all bpf programs attached to tc classes, then
+                  all bpf programs attached to non clsact qdiscs, and finally all
+                  bpf programs attached to root and clsact qdisc.
+
+	**bpftool net help**
+		  Print short help message.
+
+OPTIONS
+=======
+	-h, --help
+		  Print short generic help message (similar to **bpftool help**).
+
+	-v, --version
+		  Print version number (similar to **bpftool version**).
+
+	-j, --json
+		  Generate JSON output. For commands that cannot produce JSON, this
+		  option has no effect.
+
+	-p, --pretty
+		  Generate human-readable JSON output. Implies **-j**.
+
+EXAMPLES
+========
+
+| **# bpftool net**
+
+::
+
+      xdp:
+      eth0(2) driver id 198
+
+      tc:
+      eth0(2) htb name prefix_matcher.o:[cls_prefix_matcher_htb] id 111727 act []
+      eth0(2) clsact/ingress fbflow_icmp id 130246 act []
+      eth0(2) clsact/egress prefix_matcher.o:[cls_prefix_matcher_clsact] id 111726
+      eth0(2) clsact/egress cls_fg_dscp id 108619 act []
+      eth0(2) clsact/egress fbflow_egress id 130245
+
+|
+| **# bpftool -jp net**
+
+::
+
+    [{
+            "xdp": [{
+                    "devname": "eth0",
+                    "ifindex": 2,
+                    "mode": "driver",
+                    "id": 198
+                }
+            ],
+            "tc": [{
+                    "devname": "eth0",
+                    "ifindex": 2,
+                    "kind": "htb",
+                    "name": "prefix_matcher.o:[cls_prefix_matcher_htb]",
+                    "id": 111727,
+                    "act": []
+                },{
+                    "devname": "eth0",
+                    "ifindex": 2,
+                    "kind": "clsact/ingress",
+                    "name": "fbflow_icmp",
+                    "id": 130246,
+                    "act": []
+                },{
+                    "devname": "eth0",
+                    "ifindex": 2,
+                    "kind": "clsact/egress",
+                    "name": "prefix_matcher.o:[cls_prefix_matcher_clsact]",
+                    "id": 111726,
+                },{
+                    "devname": "eth0",
+                    "ifindex": 2,
+                    "kind": "clsact/egress",
+                    "name": "cls_fg_dscp",
+                    "id": 108619,
+                    "act": []
+                },{
+                    "devname": "eth0",
+                    "ifindex": 2,
+                    "kind": "clsact/egress",
+                    "name": "fbflow_egress",
+                    "id": 130245,
+                }
+            ]
+        }
+    ]
+
+
+SEE ALSO
+========
+	**bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-map**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 64156a16d530..ac4e904b10fb 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -25,6 +25,8 @@ MAP COMMANDS
 |	**bpftool** **prog dump jited**  *PROG* [{**file** *FILE* | **opcodes**}]
 |	**bpftool** **prog pin** *PROG* *FILE*
 |	**bpftool** **prog load** *OBJ* *FILE* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*]
+|       **bpftool** **prog attach** *PROG* *ATTACH_TYPE* *MAP*
+|       **bpftool** **prog detach** *PROG* *ATTACH_TYPE* *MAP*
 |	**bpftool** **prog help**
 |
 |	*MAP* := { **id** *MAP_ID* | **pinned** *FILE* }
@@ -37,6 +39,7 @@ MAP COMMANDS
 |		**cgroup/bind4** | **cgroup/bind6** | **cgroup/post_bind4** | **cgroup/post_bind6** |
 |		**cgroup/connect4** | **cgroup/connect6** | **cgroup/sendmsg4** | **cgroup/sendmsg6**
 |	}
+|       *ATTACH_TYPE* := { **msg_verdict** | **skb_verdict** | **skb_parse** }
 
 
 DESCRIPTION
@@ -72,7 +75,9 @@ DESCRIPTION
 	**bpftool prog pin** *PROG* *FILE*
 		  Pin program *PROG* as *FILE*.
 
-		  Note: *FILE* must be located in *bpffs* mount.
+		  Note: *FILE* must be located in *bpffs* mount. It must not
+		  contain a dot character ('.'), which is reserved for future
+		  extensions of *bpffs*.
 
 	**bpftool prog load** *OBJ* *FILE* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*]
 		  Load bpf program from binary *OBJ* and pin as *FILE*.
@@ -88,7 +93,17 @@ DESCRIPTION
 		  If **dev** *NAME* is specified program will be loaded onto
 		  given networking device (offload).
 
-		  Note: *FILE* must be located in *bpffs* mount.
+		  Note: *FILE* must be located in *bpffs* mount. It must not
+		  contain a dot character ('.'), which is reserved for future
+		  extensions of *bpffs*.
+
+        **bpftool prog attach** *PROG* *ATTACH_TYPE* *MAP*
+                  Attach bpf program *PROG* (with type specified by *ATTACH_TYPE*)
+                  to the map *MAP*.
+
+        **bpftool prog detach** *PROG* *ATTACH_TYPE* *MAP*
+                  Detach bpf program *PROG* (with type specified by *ATTACH_TYPE*)
+                  from the map *MAP*.
 
 	**bpftool prog help**
 		  Print short help message.
diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst
index b6f5d560460d..04cd4f92ab89 100644
--- a/tools/bpf/bpftool/Documentation/bpftool.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool.rst
@@ -16,22 +16,24 @@ SYNOPSIS
 
 	**bpftool** **version**
 
-	*OBJECT* := { **map** | **program** | **cgroup** | **perf** }
+	*OBJECT* := { **map** | **program** | **cgroup** | **perf** | **net** }
 
 	*OPTIONS* := { { **-V** | **--version** } | { **-h** | **--help** }
 	| { **-j** | **--json** } [{ **-p** | **--pretty** }] }
 
 	*MAP-COMMANDS* :=
-	{ **show** | **list** | **dump** | **update** | **lookup** | **getnext** | **delete**
-	| **pin** | **event_pipe** | **help** }
+	{ **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext**
+	| **delete** | **pin** | **event_pipe** | **help** }
 
 	*PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin**
-	| **load** | **help** }
+	| **load** | **attach** | **detach** | **help** }
 
 	*CGROUP-COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** }
 
 	*PERF-COMMANDS* := { **show** | **list** | **help** }
 
+	*NET-COMMANDS* := { **show** | **list** | **help** }
+
 DESCRIPTION
 ===========
 	*bpftool* allows for inspection and simple modification of BPF objects
@@ -55,7 +57,11 @@ OPTIONS
 	-p, --pretty
 		  Generate human-readable JSON output. Implies **-j**.
 
+	-m, --mapcompat
+		  Allow loading maps with unknown map definitions.
+
+
 SEE ALSO
 ========
 	**bpftool-map**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8)
-        **bpftool-perf**\ (8)
+        **bpftool-perf**\ (8), **bpftool-net**\ (8)
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 74288a2197ab..dac7eff4c7e5 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -46,6 +46,13 @@ CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \
 	-I$(srctree)/tools/lib/bpf \
 	-I$(srctree)/tools/perf
 CFLAGS += -DBPFTOOL_VERSION='"$(BPFTOOL_VERSION)"'
+ifneq ($(EXTRA_CFLAGS),)
+CFLAGS += $(EXTRA_CFLAGS)
+endif
+ifneq ($(EXTRA_LDFLAGS),)
+LDFLAGS += $(EXTRA_LDFLAGS)
+endif
+
 LIBS = -lelf -lbfd -lopcodes $(LIBBPF)
 
 INSTALL ?= install
@@ -90,7 +97,7 @@ $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
 	$(QUIET_CC)$(COMPILE.c) -MMD -o $@ $<
 
 $(OUTPUT)bpftool: $(OBJS) $(LIBBPF)
-	$(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $^ $(LIBS)
+	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ $(LIBS)
 
 $(OUTPUT)%.o: %.c
 	$(QUIET_CC)$(COMPILE.c) -MMD -o $@ $<
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 598066c40191..3f78e6404589 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -143,7 +143,7 @@ _bpftool_map_update_map_type()
     local type
     type=$(bpftool -jp map show $keyword $ref | \
         command sed -n 's/.*"type": "\(.*\)",$/\1/p')
-    printf $type
+    [[ -n $type ]] && printf $type
 }
 
 _bpftool_map_update_get_id()
@@ -184,7 +184,7 @@ _bpftool()
 
     # Deal with options
     if [[ ${words[cword]} == -* ]]; then
-        local c='--version --json --pretty --bpffs'
+        local c='--version --json --pretty --bpffs --mapcompat'
         COMPREPLY=( $( compgen -W "$c" -- "$cur" ) )
         return 0
     fi
@@ -292,6 +292,23 @@ _bpftool()
                     fi
                     return 0
                     ;;
+                attach|detach)
+                    if [[ ${#words[@]} == 7 ]]; then
+                        COMPREPLY=( $( compgen -W "id pinned" -- "$cur" ) )
+                        return 0
+                    fi
+
+                    if [[ ${#words[@]} == 6 ]]; then
+                        COMPREPLY=( $( compgen -W "msg_verdict skb_verdict skb_parse" -- "$cur" ) )
+                        return 0
+                    fi
+
+                    if [[ $prev == "$command" ]]; then
+                        COMPREPLY=( $( compgen -W "id pinned" -- "$cur" ) )
+                        return 0
+                    fi
+                    return 0
+                    ;;
                 load)
                     local obj
 
@@ -347,7 +364,7 @@ _bpftool()
                     ;;
                 *)
                     [[ $prev == $object ]] && \
-                        COMPREPLY=( $( compgen -W 'dump help pin load \
+                        COMPREPLY=( $( compgen -W 'dump help pin attach detach load \
                             show list' -- "$cur" ) )
                     ;;
             esac
@@ -370,6 +387,42 @@ _bpftool()
                             ;;
                     esac
                     ;;
+                create)
+                    case $prev in
+                        $command)
+                            _filedir
+                            return 0
+                            ;;
+                        type)
+                            COMPREPLY=( $( compgen -W 'hash array prog_array \
+                                perf_event_array percpu_hash percpu_array \
+                                stack_trace cgroup_array lru_hash \
+                                lru_percpu_hash lpm_trie array_of_maps \
+                                hash_of_maps devmap sockmap cpumap xskmap \
+                                sockhash cgroup_storage reuseport_sockarray \
+                                percpu_cgroup_storage' -- \
+                                                   "$cur" ) )
+                            return 0
+                            ;;
+                        key|value|flags|name|entries)
+                            return 0
+                            ;;
+                        dev)
+                            _sysfs_get_netdevs
+                            return 0
+                            ;;
+                        *)
+                            _bpftool_once_attr 'type'
+                            _bpftool_once_attr 'key'
+                            _bpftool_once_attr 'value'
+                            _bpftool_once_attr 'entries'
+                            _bpftool_once_attr 'name'
+                            _bpftool_once_attr 'flags'
+                            _bpftool_once_attr 'dev'
+                            return 0
+                            ;;
+                    esac
+                    ;;
                 lookup|getnext|delete)
                     case $prev in
                         $command)
@@ -483,7 +536,7 @@ _bpftool()
                 *)
                     [[ $prev == $object ]] && \
                         COMPREPLY=( $( compgen -W 'delete dump getnext help \
-                            lookup pin event_pipe show list update' -- \
+                            lookup pin event_pipe show list update create' -- \
                             "$cur" ) )
                     ;;
             esac
@@ -494,10 +547,10 @@ _bpftool()
                     _filedir
                     return 0
                     ;;
-		tree)
-		    _filedir
-		    return 0
-		    ;;
+                tree)
+                    _filedir
+                    return 0
+                    ;;
                 attach|detach)
                     local ATTACH_TYPES='ingress egress sock_create sock_ops \
                         device bind4 bind6 post_bind4 post_bind6 connect4 \
@@ -552,6 +605,15 @@ _bpftool()
                     ;;
             esac
             ;;
+        net)
+            case $command in
+                *)
+                    [[ $prev == $object ]] && \
+                        COMPREPLY=( $( compgen -W 'help \
+                            show list' -- "$cur" ) )
+                    ;;
+            esac
+            ;;
     esac
 } &&
 complete -F _bpftool bpftool
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index b3a0709ea7ed..25af85304ebe 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -554,7 +554,9 @@ static int read_sysfs_netdev_hex_int(char *devname, const char *entry_name)
 	return read_sysfs_hex_int(full_path);
 }
 
-const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino)
+const char *
+ifindex_to_bfd_params(__u32 ifindex, __u64 ns_dev, __u64 ns_ino,
+		      const char **opt)
 {
 	char devname[IF_NAMESIZE];
 	int vendor_id;
@@ -579,6 +581,7 @@ const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino)
 		    device_id != 0x6000 &&
 		    device_id != 0x6003)
 			p_info("Unknown NFP device ID, assuming it is NFP-6xxx arch");
+		*opt = "ctx4";
 		return "NFP-6xxx";
 	default:
 		p_err("Can't get bfd arch name for device vendor id 0x%04x",
@@ -618,3 +621,24 @@ void print_dev_json(__u32 ifindex, __u64 ns_dev, __u64 ns_inode)
 		jsonw_string_field(json_wtr, "ifname", name);
 	jsonw_end_object(json_wtr);
 }
+
+int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what)
+{
+	char *endptr;
+
+	NEXT_ARGP();
+
+	if (*val) {
+		p_err("%s already specified", what);
+		return -1;
+	}
+
+	*val = strtoul(**argv, &endptr, 0);
+	if (*endptr) {
+		p_err("can't parse %s as %s", **argv, what);
+		return -1;
+	}
+	NEXT_ARGP();
+
+	return 0;
+}
diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c
index 87439320ef70..c75ffd9ce2bb 100644
--- a/tools/bpf/bpftool/jit_disasm.c
+++ b/tools/bpf/bpftool/jit_disasm.c
@@ -77,7 +77,7 @@ static int fprintf_json(void *out, const char *fmt, ...)
 }
 
 void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
-		       const char *arch)
+		       const char *arch, const char *disassembler_options)
 {
 	disassembler_ftype disassemble;
 	struct disassemble_info info;
@@ -116,6 +116,8 @@ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
 
 	info.arch = bfd_get_arch(bfdf);
 	info.mach = bfd_get_mach(bfdf);
+	if (disassembler_options)
+		info.disassembler_options = disassembler_options;
 	info.buffer = image;
 	info.buffer_length = len;
 
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index d15a62be6cf0..75a3296dc0bc 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -55,6 +55,7 @@ json_writer_t *json_wtr;
 bool pretty_output;
 bool json_output;
 bool show_pinned;
+int bpf_flags;
 struct pinned_obj_table prog_table;
 struct pinned_obj_table map_table;
 
@@ -85,7 +86,7 @@ static int do_help(int argc, char **argv)
 		"       %s batch file FILE\n"
 		"       %s version\n"
 		"\n"
-		"       OBJECT := { prog | map | cgroup | perf }\n"
+		"       OBJECT := { prog | map | cgroup | perf | net }\n"
 		"       " HELP_SPEC_OPTIONS "\n"
 		"",
 		bin_name, bin_name, bin_name);
@@ -215,6 +216,7 @@ static const struct cmd cmds[] = {
 	{ "map",	do_map },
 	{ "cgroup",	do_cgroup },
 	{ "perf",	do_perf },
+	{ "net",	do_net },
 	{ "version",	do_version },
 	{ 0 }
 };
@@ -319,7 +321,8 @@ static int do_batch(int argc, char **argv)
 		p_err("reading batch file failed: %s", strerror(errno));
 		err = -1;
 	} else {
-		p_info("processed %d commands", lines);
+		if (!json_output)
+			printf("processed %d commands\n", lines);
 		err = 0;
 	}
 err_close:
@@ -340,6 +343,7 @@ int main(int argc, char **argv)
 		{ "pretty",	no_argument,	NULL,	'p' },
 		{ "version",	no_argument,	NULL,	'V' },
 		{ "bpffs",	no_argument,	NULL,	'f' },
+		{ "mapcompat",	no_argument,	NULL,	'm' },
 		{ 0 }
 	};
 	int opt, ret;
@@ -354,7 +358,7 @@ int main(int argc, char **argv)
 	hash_init(map_table.table);
 
 	opterr = 0;
-	while ((opt = getopt_long(argc, argv, "Vhpjf",
+	while ((opt = getopt_long(argc, argv, "Vhpjfm",
 				  options, NULL)) >= 0) {
 		switch (opt) {
 		case 'V':
@@ -378,6 +382,9 @@ int main(int argc, char **argv)
 		case 'f':
 			show_pinned = true;
 			break;
+		case 'm':
+			bpf_flags = MAPS_RELAX_COMPAT;
+			break;
 		default:
 			p_err("unrecognized option '%s'", argv[optind - 1]);
 			if (json_output)
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 238e734d75b3..28322ace2856 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -74,7 +74,7 @@
 #define HELP_SPEC_PROGRAM						\
 	"PROG := { id PROG_ID | pinned FILE | tag PROG_TAG }"
 #define HELP_SPEC_OPTIONS						\
-	"OPTIONS := { {-j|--json} [{-p|--pretty}] | {-f|--bpffs} }"
+	"OPTIONS := { {-j|--json} [{-p|--pretty}] | {-f|--bpffs} | {-m|--mapcompat}"
 #define HELP_SPEC_MAP							\
 	"MAP := { id MAP_ID | pinned FILE }"
 
@@ -89,6 +89,7 @@ extern const char *bin_name;
 extern json_writer_t *json_wtr;
 extern bool json_output;
 extern bool show_pinned;
+extern int bpf_flags;
 extern struct pinned_obj_table prog_table;
 extern struct pinned_obj_table map_table;
 
@@ -136,19 +137,23 @@ int do_map(int argc, char **arg);
 int do_event_pipe(int argc, char **argv);
 int do_cgroup(int argc, char **arg);
 int do_perf(int argc, char **arg);
+int do_net(int argc, char **arg);
 
+int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what);
 int prog_parse_fd(int *argc, char ***argv);
 int map_parse_fd(int *argc, char ***argv);
 int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len);
 
 void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
-		       const char *arch);
+		       const char *arch, const char *disassembler_options);
 void print_data_json(uint8_t *data, size_t len);
 void print_hex_data_json(uint8_t *data, size_t len);
 
 unsigned int get_page_size(void);
 unsigned int get_possible_cpus(void);
-const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino);
+const char *
+ifindex_to_bfd_params(__u32 ifindex, __u64 ns_dev, __u64 ns_ino,
+		      const char **opt);
 
 struct btf_dumper {
 	const struct btf *btf;
@@ -165,4 +170,11 @@ struct btf_dumper {
  */
 int btf_dumper_type(const struct btf_dumper *d, __u32 type_id,
 		    const void *data);
+
+struct nlattr;
+struct ifinfomsg;
+struct tcmsg;
+int do_xdp_dump(struct ifinfomsg *ifinfo, struct nlattr **tb);
+int do_filter_dump(struct tcmsg *ifinfo, struct nlattr **tb, const char *kind,
+		   const char *devname, int ifindex);
 #endif
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index b455930a3eaf..7bf38f0e152e 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -36,6 +36,7 @@
 #include <fcntl.h>
 #include <linux/err.h>
 #include <linux/kernel.h>
+#include <net/if.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -71,13 +72,16 @@ static const char * const map_type_name[] = {
 	[BPF_MAP_TYPE_XSKMAP]           = "xskmap",
 	[BPF_MAP_TYPE_SOCKHASH]		= "sockhash",
 	[BPF_MAP_TYPE_CGROUP_STORAGE]	= "cgroup_storage",
+	[BPF_MAP_TYPE_REUSEPORT_SOCKARRAY] = "reuseport_sockarray",
+	[BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE]	= "percpu_cgroup_storage",
 };
 
 static bool map_is_per_cpu(__u32 type)
 {
 	return type == BPF_MAP_TYPE_PERCPU_HASH ||
 	       type == BPF_MAP_TYPE_PERCPU_ARRAY ||
-	       type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
+	       type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
+	       type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE;
 }
 
 static bool map_is_map_of_maps(__u32 type)
@@ -91,6 +95,17 @@ static bool map_is_map_of_progs(__u32 type)
 	return type == BPF_MAP_TYPE_PROG_ARRAY;
 }
 
+static int map_type_from_str(const char *type)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(map_type_name); i++)
+		/* Don't allow prefixing in case of possible future shadowing */
+		if (map_type_name[i] && !strcmp(map_type_name[i], type))
+			return i;
+	return -1;
+}
+
 static void *alloc_value(struct bpf_map_info *info)
 {
 	if (map_is_per_cpu(info->type))
@@ -170,9 +185,28 @@ static int do_dump_btf(const struct btf_dumper *d,
 	if (ret)
 		goto err_end_obj;
 
-	jsonw_name(d->jw, "value");
+	if (!map_is_per_cpu(map_info->type)) {
+		jsonw_name(d->jw, "value");
+		ret = btf_dumper_type(d, map_info->btf_value_type_id, value);
+	} else {
+		unsigned int i, n, step;
 
-	ret = btf_dumper_type(d, map_info->btf_value_type_id, value);
+		jsonw_name(d->jw, "values");
+		jsonw_start_array(d->jw);
+		n = get_possible_cpus();
+		step = round_up(map_info->value_size, 8);
+		for (i = 0; i < n; i++) {
+			jsonw_start_object(d->jw);
+			jsonw_int_field(d->jw, "cpu", i);
+			jsonw_name(d->jw, "value");
+			ret = btf_dumper_type(d, map_info->btf_value_type_id,
+					      value + i * step);
+			jsonw_end_object(d->jw);
+			if (ret)
+				break;
+		}
+		jsonw_end_array(d->jw);
+	}
 
 err_end_obj:
 	/* end of key-value pair */
@@ -299,11 +333,40 @@ static void print_entry_json(struct bpf_map_info *info, unsigned char *key,
 			jsonw_end_object(json_wtr);
 		}
 		jsonw_end_array(json_wtr);
+		if (btf) {
+			struct btf_dumper d = {
+				.btf = btf,
+				.jw = json_wtr,
+				.is_plain_text = false,
+			};
+
+			jsonw_name(json_wtr, "formatted");
+			do_dump_btf(&d, info, key, value);
+		}
 	}
 
 	jsonw_end_object(json_wtr);
 }
 
+static void print_entry_error(struct bpf_map_info *info, unsigned char *key,
+			      const char *value)
+{
+	int value_size = strlen(value);
+	bool single_line, break_names;
+
+	break_names = info->key_size > 16 || value_size > 16;
+	single_line = info->key_size + value_size <= 24 && !break_names;
+
+	printf("key:%c", break_names ? '\n' : ' ');
+	fprint_hex(stdout, key, info->key_size, " ");
+
+	printf(single_line ? "  " : "\n");
+
+	printf("value:%c%s", break_names ? '\n' : ' ', value);
+
+	printf("\n");
+}
+
 static void print_entry_plain(struct bpf_map_info *info, unsigned char *key,
 			      unsigned char *value)
 {
@@ -626,6 +689,54 @@ static int do_show(int argc, char **argv)
 	return errno == ENOENT ? 0 : -1;
 }
 
+static int dump_map_elem(int fd, void *key, void *value,
+			 struct bpf_map_info *map_info, struct btf *btf,
+			 json_writer_t *btf_wtr)
+{
+	int num_elems = 0;
+	int lookup_errno;
+
+	if (!bpf_map_lookup_elem(fd, key, value)) {
+		if (json_output) {
+			print_entry_json(map_info, key, value, btf);
+		} else {
+			if (btf) {
+				struct btf_dumper d = {
+					.btf = btf,
+					.jw = btf_wtr,
+					.is_plain_text = true,
+				};
+
+				do_dump_btf(&d, map_info, key, value);
+			} else {
+				print_entry_plain(map_info, key, value);
+			}
+			num_elems++;
+		}
+		return num_elems;
+	}
+
+	/* lookup error handling */
+	lookup_errno = errno;
+
+	if (map_is_map_of_maps(map_info->type) ||
+	    map_is_map_of_progs(map_info->type))
+		return 0;
+
+	if (json_output) {
+		jsonw_name(json_wtr, "key");
+		print_hex_data_json(key, map_info->key_size);
+		jsonw_name(json_wtr, "value");
+		jsonw_start_object(json_wtr);
+		jsonw_string_field(json_wtr, "error", strerror(lookup_errno));
+		jsonw_end_object(json_wtr);
+	} else {
+		print_entry_error(map_info, key, strerror(lookup_errno));
+	}
+
+	return 0;
+}
+
 static int do_dump(int argc, char **argv)
 {
 	struct bpf_map_info info = {};
@@ -644,12 +755,6 @@ static int do_dump(int argc, char **argv)
 	if (fd < 0)
 		return -1;
 
-	if (map_is_map_of_maps(info.type) || map_is_map_of_progs(info.type)) {
-		p_err("Dumping maps of maps and program maps not supported");
-		close(fd);
-		return -1;
-	}
-
 	key = malloc(info.key_size);
 	value = alloc_value(&info);
 	if (!key || !value) {
@@ -687,40 +792,8 @@ static int do_dump(int argc, char **argv)
 				err = 0;
 			break;
 		}
-
-		if (!bpf_map_lookup_elem(fd, key, value)) {
-			if (json_output)
-				print_entry_json(&info, key, value, btf);
-			else
-				if (btf) {
-					struct btf_dumper d = {
-						.btf = btf,
-						.jw = btf_wtr,
-						.is_plain_text = true,
-					};
-
-					do_dump_btf(&d, &info, key, value);
-				} else {
-					print_entry_plain(&info, key, value);
-				}
-		} else {
-			if (json_output) {
-				jsonw_name(json_wtr, "key");
-				print_hex_data_json(key, info.key_size);
-				jsonw_name(json_wtr, "value");
-				jsonw_start_object(json_wtr);
-				jsonw_string_field(json_wtr, "error",
-						   "can't lookup element");
-				jsonw_end_object(json_wtr);
-			} else {
-				p_info("can't lookup element with key: ");
-				fprint_hex(stderr, key, info.key_size, " ");
-				fprintf(stderr, "\n");
-			}
-		}
-
+		num_elems += dump_map_elem(fd, key, value, &info, btf, btf_wtr);
 		prev_key = key;
-		num_elems++;
 	}
 
 	if (json_output)
@@ -997,6 +1070,92 @@ static int do_pin(int argc, char **argv)
 	return err;
 }
 
+static int do_create(int argc, char **argv)
+{
+	struct bpf_create_map_attr attr = { NULL, };
+	const char *pinfile;
+	int err, fd;
+
+	if (!REQ_ARGS(7))
+		return -1;
+	pinfile = GET_ARG();
+
+	while (argc) {
+		if (!REQ_ARGS(2))
+			return -1;
+
+		if (is_prefix(*argv, "type")) {
+			NEXT_ARG();
+
+			if (attr.map_type) {
+				p_err("map type already specified");
+				return -1;
+			}
+
+			attr.map_type = map_type_from_str(*argv);
+			if ((int)attr.map_type < 0) {
+				p_err("unrecognized map type: %s", *argv);
+				return -1;
+			}
+			NEXT_ARG();
+		} else if (is_prefix(*argv, "name")) {
+			NEXT_ARG();
+			attr.name = GET_ARG();
+		} else if (is_prefix(*argv, "key")) {
+			if (parse_u32_arg(&argc, &argv, &attr.key_size,
+					  "key size"))
+				return -1;
+		} else if (is_prefix(*argv, "value")) {
+			if (parse_u32_arg(&argc, &argv, &attr.value_size,
+					  "value size"))
+				return -1;
+		} else if (is_prefix(*argv, "entries")) {
+			if (parse_u32_arg(&argc, &argv, &attr.max_entries,
+					  "max entries"))
+				return -1;
+		} else if (is_prefix(*argv, "flags")) {
+			if (parse_u32_arg(&argc, &argv, &attr.map_flags,
+					  "flags"))
+				return -1;
+		} else if (is_prefix(*argv, "dev")) {
+			NEXT_ARG();
+
+			if (attr.map_ifindex) {
+				p_err("offload device already specified");
+				return -1;
+			}
+
+			attr.map_ifindex = if_nametoindex(*argv);
+			if (!attr.map_ifindex) {
+				p_err("unrecognized netdevice '%s': %s",
+				      *argv, strerror(errno));
+				return -1;
+			}
+			NEXT_ARG();
+		}
+	}
+
+	if (!attr.name) {
+		p_err("map name not specified");
+		return -1;
+	}
+
+	fd = bpf_create_map_xattr(&attr);
+	if (fd < 0) {
+		p_err("map create failed: %s", strerror(errno));
+		return -1;
+	}
+
+	err = do_pin_fd(fd, pinfile);
+	close(fd);
+	if (err)
+		return err;
+
+	if (json_output)
+		jsonw_null(json_wtr);
+	return 0;
+}
+
 static int do_help(int argc, char **argv)
 {
 	if (json_output) {
@@ -1006,6 +1165,9 @@ static int do_help(int argc, char **argv)
 
 	fprintf(stderr,
 		"Usage: %s %s { show | list }   [MAP]\n"
+		"       %s %s create     FILE type TYPE key KEY_SIZE value VALUE_SIZE \\\n"
+		"                              entries MAX_ENTRIES name NAME [flags FLAGS] \\\n"
+		"                              [dev NAME]\n"
 		"       %s %s dump       MAP\n"
 		"       %s %s update     MAP  key DATA value VALUE [UPDATE_FLAGS]\n"
 		"       %s %s lookup     MAP  key DATA\n"
@@ -1020,11 +1182,17 @@ static int do_help(int argc, char **argv)
 		"       " HELP_SPEC_PROGRAM "\n"
 		"       VALUE := { DATA | MAP | PROG }\n"
 		"       UPDATE_FLAGS := { any | exist | noexist }\n"
+		"       TYPE := { hash | array | prog_array | perf_event_array | percpu_hash |\n"
+		"                 percpu_array | stack_trace | cgroup_array | lru_hash |\n"
+		"                 lru_percpu_hash | lpm_trie | array_of_maps | hash_of_maps |\n"
+		"                 devmap | sockmap | cpumap | xskmap | sockhash |\n"
+		"                 cgroup_storage | reuseport_sockarray | percpu_cgroup_storage }\n"
 		"       " HELP_SPEC_OPTIONS "\n"
 		"",
 		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
 		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
-		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2]);
+		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
+		bin_name, argv[-2]);
 
 	return 0;
 }
@@ -1040,6 +1208,7 @@ static const struct cmd cmds[] = {
 	{ "delete",	do_delete },
 	{ "pin",	do_pin },
 	{ "event_pipe",	do_event_pipe },
+	{ "create",	do_create },
 	{ 0 }
 };
 
diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c
index 6d41323be291..bdaf4062e26e 100644
--- a/tools/bpf/bpftool/map_perf_ring.c
+++ b/tools/bpf/bpftool/map_perf_ring.c
@@ -50,15 +50,17 @@ static void int_exit(int signo)
 	stop = true;
 }
 
-static enum bpf_perf_event_ret print_bpf_output(void *event, void *priv)
+static enum bpf_perf_event_ret
+print_bpf_output(struct perf_event_header *event, void *private_data)
 {
-	struct event_ring_info *ring = priv;
-	struct perf_event_sample *e = event;
+	struct perf_event_sample *e = container_of(event, struct perf_event_sample,
+						   header);
+	struct event_ring_info *ring = private_data;
 	struct {
 		struct perf_event_header header;
 		__u64 id;
 		__u64 lost;
-	} *lost = event;
+	} *lost = (typeof(lost))event;
 
 	if (json_output) {
 		jsonw_start_object(json_wtr);
diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c
new file mode 100644
index 000000000000..d441bb7035ca
--- /dev/null
+++ b/tools/bpf/bpftool/net.c
@@ -0,0 +1,275 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (C) 2018 Facebook
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <libbpf.h>
+#include <net/if.h>
+#include <linux/if.h>
+#include <linux/rtnetlink.h>
+#include <linux/tc_act/tc_bpf.h>
+#include <sys/socket.h>
+
+#include <bpf.h>
+#include <nlattr.h>
+#include "main.h"
+#include "netlink_dumper.h"
+
+struct ip_devname_ifindex {
+	char	devname[64];
+	int	ifindex;
+};
+
+struct bpf_netdev_t {
+	struct ip_devname_ifindex *devices;
+	int	used_len;
+	int	array_len;
+	int	filter_idx;
+};
+
+struct tc_kind_handle {
+	char	kind[64];
+	int	handle;
+};
+
+struct bpf_tcinfo_t {
+	struct tc_kind_handle	*handle_array;
+	int			used_len;
+	int			array_len;
+	bool			is_qdisc;
+};
+
+struct bpf_filter_t {
+	const char	*kind;
+	const char	*devname;
+	int		ifindex;
+};
+
+static int dump_link_nlmsg(void *cookie, void *msg, struct nlattr **tb)
+{
+	struct bpf_netdev_t *netinfo = cookie;
+	struct ifinfomsg *ifinfo = msg;
+
+	if (netinfo->filter_idx > 0 && netinfo->filter_idx != ifinfo->ifi_index)
+		return 0;
+
+	if (netinfo->used_len == netinfo->array_len) {
+		netinfo->devices = realloc(netinfo->devices,
+			(netinfo->array_len + 16) *
+			sizeof(struct ip_devname_ifindex));
+		if (!netinfo->devices)
+			return -ENOMEM;
+
+		netinfo->array_len += 16;
+	}
+	netinfo->devices[netinfo->used_len].ifindex = ifinfo->ifi_index;
+	snprintf(netinfo->devices[netinfo->used_len].devname,
+		 sizeof(netinfo->devices[netinfo->used_len].devname),
+		 "%s",
+		 tb[IFLA_IFNAME]
+			 ? libbpf_nla_getattr_str(tb[IFLA_IFNAME])
+			 : "");
+	netinfo->used_len++;
+
+	return do_xdp_dump(ifinfo, tb);
+}
+
+static int dump_class_qdisc_nlmsg(void *cookie, void *msg, struct nlattr **tb)
+{
+	struct bpf_tcinfo_t *tcinfo = cookie;
+	struct tcmsg *info = msg;
+
+	if (tcinfo->is_qdisc) {
+		/* skip clsact qdisc */
+		if (tb[TCA_KIND] &&
+		    strcmp(libbpf_nla_data(tb[TCA_KIND]), "clsact") == 0)
+			return 0;
+		if (info->tcm_handle == 0)
+			return 0;
+	}
+
+	if (tcinfo->used_len == tcinfo->array_len) {
+		tcinfo->handle_array = realloc(tcinfo->handle_array,
+			(tcinfo->array_len + 16) * sizeof(struct tc_kind_handle));
+		if (!tcinfo->handle_array)
+			return -ENOMEM;
+
+		tcinfo->array_len += 16;
+	}
+	tcinfo->handle_array[tcinfo->used_len].handle = info->tcm_handle;
+	snprintf(tcinfo->handle_array[tcinfo->used_len].kind,
+		 sizeof(tcinfo->handle_array[tcinfo->used_len].kind),
+		 "%s",
+		 tb[TCA_KIND]
+			 ? libbpf_nla_getattr_str(tb[TCA_KIND])
+			 : "unknown");
+	tcinfo->used_len++;
+
+	return 0;
+}
+
+static int dump_filter_nlmsg(void *cookie, void *msg, struct nlattr **tb)
+{
+	const struct bpf_filter_t *filter_info = cookie;
+
+	return do_filter_dump((struct tcmsg *)msg, tb, filter_info->kind,
+			      filter_info->devname, filter_info->ifindex);
+}
+
+static int show_dev_tc_bpf(int sock, unsigned int nl_pid,
+			   struct ip_devname_ifindex *dev)
+{
+	struct bpf_filter_t filter_info;
+	struct bpf_tcinfo_t tcinfo;
+	int i, handle, ret = 0;
+
+	tcinfo.handle_array = NULL;
+	tcinfo.used_len = 0;
+	tcinfo.array_len = 0;
+
+	tcinfo.is_qdisc = false;
+	ret = libbpf_nl_get_class(sock, nl_pid, dev->ifindex,
+				  dump_class_qdisc_nlmsg, &tcinfo);
+	if (ret)
+		goto out;
+
+	tcinfo.is_qdisc = true;
+	ret = libbpf_nl_get_qdisc(sock, nl_pid, dev->ifindex,
+				  dump_class_qdisc_nlmsg, &tcinfo);
+	if (ret)
+		goto out;
+
+	filter_info.devname = dev->devname;
+	filter_info.ifindex = dev->ifindex;
+	for (i = 0; i < tcinfo.used_len; i++) {
+		filter_info.kind = tcinfo.handle_array[i].kind;
+		ret = libbpf_nl_get_filter(sock, nl_pid, dev->ifindex,
+					   tcinfo.handle_array[i].handle,
+					   dump_filter_nlmsg, &filter_info);
+		if (ret)
+			goto out;
+	}
+
+	/* root, ingress and egress handle */
+	handle = TC_H_ROOT;
+	filter_info.kind = "root";
+	ret = libbpf_nl_get_filter(sock, nl_pid, dev->ifindex, handle,
+				   dump_filter_nlmsg, &filter_info);
+	if (ret)
+		goto out;
+
+	handle = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS);
+	filter_info.kind = "clsact/ingress";
+	ret = libbpf_nl_get_filter(sock, nl_pid, dev->ifindex, handle,
+				   dump_filter_nlmsg, &filter_info);
+	if (ret)
+		goto out;
+
+	handle = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_EGRESS);
+	filter_info.kind = "clsact/egress";
+	ret = libbpf_nl_get_filter(sock, nl_pid, dev->ifindex, handle,
+				   dump_filter_nlmsg, &filter_info);
+	if (ret)
+		goto out;
+
+out:
+	free(tcinfo.handle_array);
+	return 0;
+}
+
+static int do_show(int argc, char **argv)
+{
+	int i, sock, ret, filter_idx = -1;
+	struct bpf_netdev_t dev_array;
+	unsigned int nl_pid;
+	char err_buf[256];
+
+	if (argc == 2) {
+		if (strcmp(argv[0], "dev") != 0)
+			usage();
+		filter_idx = if_nametoindex(argv[1]);
+		if (filter_idx == 0) {
+			fprintf(stderr, "invalid dev name %s\n", argv[1]);
+			return -1;
+		}
+	} else if (argc != 0) {
+		usage();
+	}
+
+	sock = libbpf_netlink_open(&nl_pid);
+	if (sock < 0) {
+		fprintf(stderr, "failed to open netlink sock\n");
+		return -1;
+	}
+
+	dev_array.devices = NULL;
+	dev_array.used_len = 0;
+	dev_array.array_len = 0;
+	dev_array.filter_idx = filter_idx;
+
+	if (json_output)
+		jsonw_start_array(json_wtr);
+	NET_START_OBJECT;
+	NET_START_ARRAY("xdp", "%s:\n");
+	ret = libbpf_nl_get_link(sock, nl_pid, dump_link_nlmsg, &dev_array);
+	NET_END_ARRAY("\n");
+
+	if (!ret) {
+		NET_START_ARRAY("tc", "%s:\n");
+		for (i = 0; i < dev_array.used_len; i++) {
+			ret = show_dev_tc_bpf(sock, nl_pid,
+					      &dev_array.devices[i]);
+			if (ret)
+				break;
+		}
+		NET_END_ARRAY("\n");
+	}
+	NET_END_OBJECT;
+	if (json_output)
+		jsonw_end_array(json_wtr);
+
+	if (ret) {
+		if (json_output)
+			jsonw_null(json_wtr);
+		libbpf_strerror(ret, err_buf, sizeof(err_buf));
+		fprintf(stderr, "Error: %s\n", err_buf);
+	}
+	free(dev_array.devices);
+	close(sock);
+	return ret;
+}
+
+static int do_help(int argc, char **argv)
+{
+	if (json_output) {
+		jsonw_null(json_wtr);
+		return 0;
+	}
+
+	fprintf(stderr,
+		"Usage: %s %s { show | list } [dev <devname>]\n"
+		"       %s %s help\n"
+		"Note: Only xdp and tc attachments are supported now.\n"
+		"      For progs attached to cgroups, use \"bpftool cgroup\"\n"
+		"      to dump program attachments. For program types\n"
+		"      sk_{filter,skb,msg,reuseport} and lwt/seg6, please\n"
+		"      consult iproute2.\n",
+		bin_name, argv[-2], bin_name, argv[-2]);
+
+	return 0;
+}
+
+static const struct cmd cmds[] = {
+	{ "show",	do_show },
+	{ "list",	do_show },
+	{ "help",	do_help },
+	{ 0 }
+};
+
+int do_net(int argc, char **argv)
+{
+	return cmd_select(cmds, argc, argv, do_help);
+}
diff --git a/tools/bpf/bpftool/netlink_dumper.c b/tools/bpf/bpftool/netlink_dumper.c
new file mode 100644
index 000000000000..4e9f4531269f
--- /dev/null
+++ b/tools/bpf/bpftool/netlink_dumper.c
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (C) 2018 Facebook
+
+#include <stdlib.h>
+#include <string.h>
+#include <libbpf.h>
+#include <linux/rtnetlink.h>
+#include <linux/tc_act/tc_bpf.h>
+
+#include <nlattr.h>
+#include "main.h"
+#include "netlink_dumper.h"
+
+static void xdp_dump_prog_id(struct nlattr **tb, int attr,
+			     const char *mode,
+			     bool new_json_object)
+{
+	if (!tb[attr])
+		return;
+
+	if (new_json_object)
+		NET_START_OBJECT
+	NET_DUMP_STR("mode", " %s", mode);
+	NET_DUMP_UINT("id", " id %u", libbpf_nla_getattr_u32(tb[attr]))
+	if (new_json_object)
+		NET_END_OBJECT
+}
+
+static int do_xdp_dump_one(struct nlattr *attr, unsigned int ifindex,
+			   const char *name)
+{
+	struct nlattr *tb[IFLA_XDP_MAX + 1];
+	unsigned char mode;
+
+	if (libbpf_nla_parse_nested(tb, IFLA_XDP_MAX, attr, NULL) < 0)
+		return -1;
+
+	if (!tb[IFLA_XDP_ATTACHED])
+		return 0;
+
+	mode = libbpf_nla_getattr_u8(tb[IFLA_XDP_ATTACHED]);
+	if (mode == XDP_ATTACHED_NONE)
+		return 0;
+
+	NET_START_OBJECT;
+	if (name)
+		NET_DUMP_STR("devname", "%s", name);
+	NET_DUMP_UINT("ifindex", "(%d)", ifindex);
+
+	if (mode == XDP_ATTACHED_MULTI) {
+		if (json_output) {
+			jsonw_name(json_wtr, "multi_attachments");
+			jsonw_start_array(json_wtr);
+		}
+		xdp_dump_prog_id(tb, IFLA_XDP_SKB_PROG_ID, "generic", true);
+		xdp_dump_prog_id(tb, IFLA_XDP_DRV_PROG_ID, "driver", true);
+		xdp_dump_prog_id(tb, IFLA_XDP_HW_PROG_ID, "offload", true);
+		if (json_output)
+			jsonw_end_array(json_wtr);
+	} else if (mode == XDP_ATTACHED_DRV) {
+		xdp_dump_prog_id(tb, IFLA_XDP_PROG_ID, "driver", false);
+	} else if (mode == XDP_ATTACHED_SKB) {
+		xdp_dump_prog_id(tb, IFLA_XDP_PROG_ID, "generic", false);
+	} else if (mode == XDP_ATTACHED_HW) {
+		xdp_dump_prog_id(tb, IFLA_XDP_PROG_ID, "offload", false);
+	}
+
+	NET_END_OBJECT_FINAL;
+	return 0;
+}
+
+int do_xdp_dump(struct ifinfomsg *ifinfo, struct nlattr **tb)
+{
+	if (!tb[IFLA_XDP])
+		return 0;
+
+	return do_xdp_dump_one(tb[IFLA_XDP], ifinfo->ifi_index,
+			       libbpf_nla_getattr_str(tb[IFLA_IFNAME]));
+}
+
+static int do_bpf_dump_one_act(struct nlattr *attr)
+{
+	struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
+
+	if (libbpf_nla_parse_nested(tb, TCA_ACT_BPF_MAX, attr, NULL) < 0)
+		return -LIBBPF_ERRNO__NLPARSE;
+
+	if (!tb[TCA_ACT_BPF_PARMS])
+		return -LIBBPF_ERRNO__NLPARSE;
+
+	NET_START_OBJECT_NESTED2;
+	if (tb[TCA_ACT_BPF_NAME])
+		NET_DUMP_STR("name", "%s",
+			     libbpf_nla_getattr_str(tb[TCA_ACT_BPF_NAME]));
+	if (tb[TCA_ACT_BPF_ID])
+		NET_DUMP_UINT("id", " id %u",
+			      libbpf_nla_getattr_u32(tb[TCA_ACT_BPF_ID]));
+	NET_END_OBJECT_NESTED;
+	return 0;
+}
+
+static int do_dump_one_act(struct nlattr *attr)
+{
+	struct nlattr *tb[TCA_ACT_MAX + 1];
+
+	if (!attr)
+		return 0;
+
+	if (libbpf_nla_parse_nested(tb, TCA_ACT_MAX, attr, NULL) < 0)
+		return -LIBBPF_ERRNO__NLPARSE;
+
+	if (tb[TCA_ACT_KIND] &&
+	    strcmp(libbpf_nla_data(tb[TCA_ACT_KIND]), "bpf") == 0)
+		return do_bpf_dump_one_act(tb[TCA_ACT_OPTIONS]);
+
+	return 0;
+}
+
+static int do_bpf_act_dump(struct nlattr *attr)
+{
+	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
+	int act, ret;
+
+	if (libbpf_nla_parse_nested(tb, TCA_ACT_MAX_PRIO, attr, NULL) < 0)
+		return -LIBBPF_ERRNO__NLPARSE;
+
+	NET_START_ARRAY("act", " %s [");
+	for (act = 0; act <= TCA_ACT_MAX_PRIO; act++) {
+		ret = do_dump_one_act(tb[act]);
+		if (ret)
+			break;
+	}
+	NET_END_ARRAY("] ");
+
+	return ret;
+}
+
+static int do_bpf_filter_dump(struct nlattr *attr)
+{
+	struct nlattr *tb[TCA_BPF_MAX + 1];
+	int ret;
+
+	if (libbpf_nla_parse_nested(tb, TCA_BPF_MAX, attr, NULL) < 0)
+		return -LIBBPF_ERRNO__NLPARSE;
+
+	if (tb[TCA_BPF_NAME])
+		NET_DUMP_STR("name", " %s",
+			     libbpf_nla_getattr_str(tb[TCA_BPF_NAME]));
+	if (tb[TCA_BPF_ID])
+		NET_DUMP_UINT("id", " id %u",
+			      libbpf_nla_getattr_u32(tb[TCA_BPF_ID]));
+	if (tb[TCA_BPF_ACT]) {
+		ret = do_bpf_act_dump(tb[TCA_BPF_ACT]);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+int do_filter_dump(struct tcmsg *info, struct nlattr **tb, const char *kind,
+		   const char *devname, int ifindex)
+{
+	int ret = 0;
+
+	if (tb[TCA_OPTIONS] &&
+	    strcmp(libbpf_nla_data(tb[TCA_KIND]), "bpf") == 0) {
+		NET_START_OBJECT;
+		if (devname[0] != '\0')
+			NET_DUMP_STR("devname", "%s", devname);
+		NET_DUMP_UINT("ifindex", "(%u)", ifindex);
+		NET_DUMP_STR("kind", " %s", kind);
+		ret = do_bpf_filter_dump(tb[TCA_OPTIONS]);
+		NET_END_OBJECT_FINAL;
+	}
+
+	return ret;
+}
diff --git a/tools/bpf/bpftool/netlink_dumper.h b/tools/bpf/bpftool/netlink_dumper.h
new file mode 100644
index 000000000000..e3516b586a34
--- /dev/null
+++ b/tools/bpf/bpftool/netlink_dumper.h
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (C) 2018 Facebook
+
+#ifndef _NETLINK_DUMPER_H_
+#define _NETLINK_DUMPER_H_
+
+#define NET_START_OBJECT				\
+{							\
+	if (json_output)				\
+		jsonw_start_object(json_wtr);		\
+}
+
+#define NET_START_OBJECT_NESTED(name)			\
+{							\
+	if (json_output) {				\
+		jsonw_name(json_wtr, name);		\
+		jsonw_start_object(json_wtr);		\
+	} else {					\
+		fprintf(stdout, "%s {", name);		\
+	}						\
+}
+
+#define NET_START_OBJECT_NESTED2			\
+{							\
+	if (json_output)				\
+		jsonw_start_object(json_wtr);		\
+	else						\
+		fprintf(stdout, "{");			\
+}
+
+#define NET_END_OBJECT_NESTED				\
+{							\
+	if (json_output)				\
+		jsonw_end_object(json_wtr);		\
+	else						\
+		fprintf(stdout, "}");			\
+}
+
+#define NET_END_OBJECT					\
+{							\
+	if (json_output)				\
+		jsonw_end_object(json_wtr);		\
+}
+
+#define NET_END_OBJECT_FINAL				\
+{							\
+	if (json_output)				\
+		jsonw_end_object(json_wtr);		\
+	else						\
+		fprintf(stdout, "\n");			\
+}
+
+#define NET_START_ARRAY(name, fmt_str)			\
+{							\
+	if (json_output) {				\
+		jsonw_name(json_wtr, name);		\
+		jsonw_start_array(json_wtr);		\
+	} else {					\
+		fprintf(stdout, fmt_str, name);		\
+	}						\
+}
+
+#define NET_END_ARRAY(endstr)				\
+{							\
+	if (json_output)				\
+		jsonw_end_array(json_wtr);		\
+	else						\
+		fprintf(stdout, "%s", endstr);		\
+}
+
+#define NET_DUMP_UINT(name, fmt_str, val)		\
+{							\
+	if (json_output)				\
+		jsonw_uint_field(json_wtr, name, val);	\
+	else						\
+		fprintf(stdout, fmt_str, val);		\
+}
+
+#define NET_DUMP_STR(name, fmt_str, str)		\
+{							\
+	if (json_output)				\
+		jsonw_string_field(json_wtr, name, str);\
+	else						\
+		fprintf(stdout, fmt_str, str);		\
+}
+
+#define NET_DUMP_STR_ONLY(str)				\
+{							\
+	if (json_output)				\
+		jsonw_string(json_wtr, str);		\
+	else						\
+		fprintf(stdout, "%s ", str);		\
+}
+
+#endif
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index dce960d22106..5302ee282409 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -74,8 +74,29 @@ static const char * const prog_type_name[] = {
 	[BPF_PROG_TYPE_RAW_TRACEPOINT]	= "raw_tracepoint",
 	[BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr",
 	[BPF_PROG_TYPE_LIRC_MODE2]	= "lirc_mode2",
+	[BPF_PROG_TYPE_FLOW_DISSECTOR]	= "flow_dissector",
 };
 
+static const char * const attach_type_strings[] = {
+	[BPF_SK_SKB_STREAM_PARSER] = "stream_parser",
+	[BPF_SK_SKB_STREAM_VERDICT] = "stream_verdict",
+	[BPF_SK_MSG_VERDICT] = "msg_verdict",
+	[__MAX_BPF_ATTACH_TYPE] = NULL,
+};
+
+enum bpf_attach_type parse_attach_type(const char *str)
+{
+	enum bpf_attach_type type;
+
+	for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) {
+		if (attach_type_strings[type] &&
+		    is_prefix(str, attach_type_strings[type]))
+			return type;
+	}
+
+	return __MAX_BPF_ATTACH_TYPE;
+}
+
 static void print_boot_time(__u64 nsecs, char *buf, unsigned int size)
 {
 	struct timespec real_time_ts, boot_time_ts;
@@ -428,6 +449,7 @@ static int do_dump(int argc, char **argv)
 	unsigned long *func_ksyms = NULL;
 	struct bpf_prog_info info = {};
 	unsigned int *func_lens = NULL;
+	const char *disasm_opt = NULL;
 	unsigned int nr_func_ksyms;
 	unsigned int nr_func_lens;
 	struct dump_data dd = {};
@@ -586,9 +608,10 @@ static int do_dump(int argc, char **argv)
 		const char *name = NULL;
 
 		if (info.ifindex) {
-			name = ifindex_to_bfd_name_ns(info.ifindex,
-						      info.netns_dev,
-						      info.netns_ino);
+			name = ifindex_to_bfd_params(info.ifindex,
+						     info.netns_dev,
+						     info.netns_ino,
+						     &disasm_opt);
 			if (!name)
 				goto err_free;
 		}
@@ -630,7 +653,8 @@ static int do_dump(int argc, char **argv)
 					printf("%s:\n", sym_name);
 				}
 
-				disasm_print_insn(img, lens[i], opcodes, name);
+				disasm_print_insn(img, lens[i], opcodes, name,
+						  disasm_opt);
 				img += lens[i];
 
 				if (json_output)
@@ -642,7 +666,8 @@ static int do_dump(int argc, char **argv)
 			if (json_output)
 				jsonw_end_array(json_wtr);
 		} else {
-			disasm_print_insn(buf, *member_len, opcodes, name);
+			disasm_print_insn(buf, *member_len, opcodes, name,
+					  disasm_opt);
 		}
 	} else if (visual) {
 		if (json_output)
@@ -696,6 +721,77 @@ int map_replace_compar(const void *p1, const void *p2)
 	return a->idx - b->idx;
 }
 
+static int do_attach(int argc, char **argv)
+{
+	enum bpf_attach_type attach_type;
+	int err, mapfd, progfd;
+
+	if (!REQ_ARGS(5)) {
+		p_err("too few parameters for map attach");
+		return -EINVAL;
+	}
+
+	progfd = prog_parse_fd(&argc, &argv);
+	if (progfd < 0)
+		return progfd;
+
+	attach_type = parse_attach_type(*argv);
+	if (attach_type == __MAX_BPF_ATTACH_TYPE) {
+		p_err("invalid attach type");
+		return -EINVAL;
+	}
+	NEXT_ARG();
+
+	mapfd = map_parse_fd(&argc, &argv);
+	if (mapfd < 0)
+		return mapfd;
+
+	err = bpf_prog_attach(progfd, mapfd, attach_type, 0);
+	if (err) {
+		p_err("failed prog attach to map");
+		return -EINVAL;
+	}
+
+	if (json_output)
+		jsonw_null(json_wtr);
+	return 0;
+}
+
+static int do_detach(int argc, char **argv)
+{
+	enum bpf_attach_type attach_type;
+	int err, mapfd, progfd;
+
+	if (!REQ_ARGS(5)) {
+		p_err("too few parameters for map detach");
+		return -EINVAL;
+	}
+
+	progfd = prog_parse_fd(&argc, &argv);
+	if (progfd < 0)
+		return progfd;
+
+	attach_type = parse_attach_type(*argv);
+	if (attach_type == __MAX_BPF_ATTACH_TYPE) {
+		p_err("invalid attach type");
+		return -EINVAL;
+	}
+	NEXT_ARG();
+
+	mapfd = map_parse_fd(&argc, &argv);
+	if (mapfd < 0)
+		return mapfd;
+
+	err = bpf_prog_detach2(progfd, mapfd, attach_type);
+	if (err) {
+		p_err("failed prog detach from map");
+		return -EINVAL;
+	}
+
+	if (json_output)
+		jsonw_null(json_wtr);
+	return 0;
+}
 static int do_load(int argc, char **argv)
 {
 	enum bpf_attach_type expected_attach_type;
@@ -816,7 +912,7 @@ static int do_load(int argc, char **argv)
 		}
 	}
 
-	obj = bpf_object__open_xattr(&attr);
+	obj = __bpf_object__open_xattr(&attr, bpf_flags);
 	if (IS_ERR_OR_NULL(obj)) {
 		p_err("failed to open object file");
 		goto err_free_reuse_maps;
@@ -941,6 +1037,8 @@ static int do_help(int argc, char **argv)
 		"       %s %s pin   PROG FILE\n"
 		"       %s %s load  OBJ  FILE [type TYPE] [dev NAME] \\\n"
 		"                         [map { idx IDX | name NAME } MAP]\n"
+		"       %s %s attach PROG ATTACH_TYPE MAP\n"
+		"       %s %s detach PROG ATTACH_TYPE MAP\n"
 		"       %s %s help\n"
 		"\n"
 		"       " HELP_SPEC_MAP "\n"
@@ -952,10 +1050,12 @@ static int do_help(int argc, char **argv)
 		"                 cgroup/bind4 | cgroup/bind6 | cgroup/post_bind4 |\n"
 		"                 cgroup/post_bind6 | cgroup/connect4 | cgroup/connect6 |\n"
 		"                 cgroup/sendmsg4 | cgroup/sendmsg6 }\n"
+		"       ATTACH_TYPE := { msg_verdict | skb_verdict | skb_parse }\n"
 		"       " HELP_SPEC_OPTIONS "\n"
 		"",
 		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
-		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2]);
+		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
+		bin_name, argv[-2], bin_name, argv[-2]);
 
 	return 0;
 }
@@ -967,6 +1067,8 @@ static const struct cmd cmds[] = {
 	{ "dump",	do_dump },
 	{ "pin",	do_pin },
 	{ "load",	do_load },
+	{ "attach",	do_attach },
+	{ "detach",	do_detach },
 	{ 0 }
 };
 
diff --git a/tools/crypto/getstat.c b/tools/crypto/getstat.c
new file mode 100644
index 000000000000..24115173a483
--- /dev/null
+++ b/tools/crypto/getstat.c
@@ -0,0 +1,294 @@
+/* Heavily copied from libkcapi 2015 - 2017, Stephan Mueller <smueller@chronox.de> */
+#include <errno.h>
+#include <linux/cryptouser.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#define CR_RTA(x)  ((struct rtattr *)(((char *)(x)) + NLMSG_ALIGN(sizeof(struct crypto_user_alg))))
+
+static int get_stat(const char *drivername)
+{
+	struct {
+		struct nlmsghdr n;
+		struct crypto_user_alg cru;
+	} req;
+	struct sockaddr_nl nl;
+	int sd = 0, ret;
+	socklen_t addr_len;
+	struct iovec iov;
+	struct msghdr msg;
+	char buf[4096];
+	struct nlmsghdr *res_n = (struct nlmsghdr *)buf;
+	struct crypto_user_alg *cru_res = NULL;
+	int res_len = 0;
+	struct rtattr *tb[CRYPTOCFGA_MAX + 1];
+	struct rtattr *rta;
+	struct nlmsgerr *errmsg;
+
+	memset(&req, 0, sizeof(req));
+	memset(&buf, 0, sizeof(buf));
+	memset(&msg, 0, sizeof(msg));
+
+	req.n.nlmsg_len = NLMSG_LENGTH(sizeof(req.cru));
+	req.n.nlmsg_flags = NLM_F_REQUEST;
+	req.n.nlmsg_type = CRYPTO_MSG_GETSTAT;
+	req.n.nlmsg_seq = time(NULL);
+
+	strncpy(req.cru.cru_driver_name, drivername, strlen(drivername));
+
+	sd =  socket(AF_NETLINK, SOCK_RAW, NETLINK_CRYPTO);
+	if (sd < 0) {
+		fprintf(stderr, "Netlink error: cannot open netlink socket");
+		return -errno;
+	}
+	memset(&nl, 0, sizeof(nl));
+	nl.nl_family = AF_NETLINK;
+	if (bind(sd, (struct sockaddr *)&nl, sizeof(nl)) < 0) {
+		ret = -errno;
+		fprintf(stderr, "Netlink error: cannot bind netlink socket");
+		goto out;
+	}
+
+	/* sanity check that netlink socket was successfully opened */
+	addr_len = sizeof(nl);
+	if (getsockname(sd, (struct sockaddr *)&nl, &addr_len) < 0) {
+		ret = -errno;
+		printf("Netlink error: cannot getsockname");
+		goto out;
+	}
+	if (addr_len != sizeof(nl)) {
+		ret = -errno;
+		printf("Netlink error: wrong address length %d", addr_len);
+		goto out;
+	}
+	if (nl.nl_family != AF_NETLINK) {
+		ret = -errno;
+		printf("Netlink error: wrong address family %d",
+				nl.nl_family);
+		goto out;
+	}
+
+	memset(&nl, 0, sizeof(nl));
+	nl.nl_family = AF_NETLINK;
+	iov.iov_base = (void *)&req.n;
+	iov.iov_len = req.n.nlmsg_len;
+	msg.msg_name = &nl;
+	msg.msg_namelen = sizeof(nl);
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+	if (sendmsg(sd, &msg, 0) < 0) {
+		ret = -errno;
+		printf("Netlink error: sendmsg failed");
+		goto out;
+	}
+	memset(buf, 0, sizeof(buf));
+	iov.iov_base = buf;
+	while (1) {
+		iov.iov_len = sizeof(buf);
+		ret = recvmsg(sd, &msg, 0);
+		if (ret < 0) {
+			if (errno == EINTR || errno == EAGAIN)
+				continue;
+			ret = -errno;
+			printf("Netlink error: netlink receive error");
+			goto out;
+		}
+		if (ret == 0) {
+			ret = -errno;
+			printf("Netlink error: no data");
+			goto out;
+		}
+		if (ret > sizeof(buf)) {
+			ret = -errno;
+			printf("Netlink error: received too much data");
+			goto out;
+		}
+		break;
+	}
+
+	ret = -EFAULT;
+	res_len = res_n->nlmsg_len;
+	if (res_n->nlmsg_type == NLMSG_ERROR) {
+		errmsg = NLMSG_DATA(res_n);
+		fprintf(stderr, "Fail with %d\n", errmsg->error);
+		ret = errmsg->error;
+		goto out;
+	}
+
+	if (res_n->nlmsg_type == CRYPTO_MSG_GETSTAT) {
+		cru_res = NLMSG_DATA(res_n);
+		res_len -= NLMSG_SPACE(sizeof(*cru_res));
+	}
+	if (res_len < 0) {
+		printf("Netlink error: nlmsg len %d\n", res_len);
+		goto out;
+	}
+
+	if (!cru_res) {
+		ret = -EFAULT;
+		printf("Netlink error: no cru_res\n");
+		goto out;
+	}
+
+	rta = CR_RTA(cru_res);
+	memset(tb, 0, sizeof(struct rtattr *) * (CRYPTOCFGA_MAX + 1));
+	while (RTA_OK(rta, res_len)) {
+		if ((rta->rta_type <= CRYPTOCFGA_MAX) && (!tb[rta->rta_type]))
+			tb[rta->rta_type] = rta;
+		rta = RTA_NEXT(rta, res_len);
+	}
+	if (res_len) {
+		printf("Netlink error: unprocessed data %d",
+				res_len);
+		goto out;
+	}
+
+	if (tb[CRYPTOCFGA_STAT_HASH]) {
+		struct rtattr *rta = tb[CRYPTOCFGA_STAT_HASH];
+		struct crypto_stat *rhash =
+			(struct crypto_stat *)RTA_DATA(rta);
+		printf("%s\tHash\n\tHash: %u bytes: %llu\n\tErrors: %u\n",
+			drivername,
+			rhash->stat_hash_cnt, rhash->stat_hash_tlen,
+			rhash->stat_hash_err_cnt);
+	} else if (tb[CRYPTOCFGA_STAT_COMPRESS]) {
+		struct rtattr *rta = tb[CRYPTOCFGA_STAT_COMPRESS];
+		struct crypto_stat *rblk =
+			(struct crypto_stat *)RTA_DATA(rta);
+		printf("%s\tCompress\n\tCompress: %u bytes: %llu\n\tDecompress: %u bytes: %llu\n\tErrors: %u\n",
+			drivername,
+			rblk->stat_compress_cnt, rblk->stat_compress_tlen,
+			rblk->stat_decompress_cnt, rblk->stat_decompress_tlen,
+			rblk->stat_compress_err_cnt);
+	} else if (tb[CRYPTOCFGA_STAT_ACOMP]) {
+		struct rtattr *rta = tb[CRYPTOCFGA_STAT_ACOMP];
+		struct crypto_stat *rcomp =
+			(struct crypto_stat *)RTA_DATA(rta);
+		printf("%s\tACompress\n\tCompress: %u bytes: %llu\n\tDecompress: %u bytes: %llu\n\tErrors: %u\n",
+			drivername,
+			rcomp->stat_compress_cnt, rcomp->stat_compress_tlen,
+			rcomp->stat_decompress_cnt, rcomp->stat_decompress_tlen,
+			rcomp->stat_compress_err_cnt);
+	} else if (tb[CRYPTOCFGA_STAT_AEAD]) {
+		struct rtattr *rta = tb[CRYPTOCFGA_STAT_AEAD];
+		struct crypto_stat *raead =
+			(struct crypto_stat *)RTA_DATA(rta);
+		printf("%s\tAEAD\n\tEncrypt: %u bytes: %llu\n\tDecrypt: %u bytes: %llu\n\tErrors: %u\n",
+			drivername,
+			raead->stat_encrypt_cnt, raead->stat_encrypt_tlen,
+			raead->stat_decrypt_cnt, raead->stat_decrypt_tlen,
+			raead->stat_aead_err_cnt);
+	} else if (tb[CRYPTOCFGA_STAT_BLKCIPHER]) {
+		struct rtattr *rta = tb[CRYPTOCFGA_STAT_BLKCIPHER];
+		struct crypto_stat *rblk =
+			(struct crypto_stat *)RTA_DATA(rta);
+		printf("%s\tCipher\n\tEncrypt: %u bytes: %llu\n\tDecrypt: %u bytes: %llu\n\tErrors: %u\n",
+			drivername,
+			rblk->stat_encrypt_cnt, rblk->stat_encrypt_tlen,
+			rblk->stat_decrypt_cnt, rblk->stat_decrypt_tlen,
+			rblk->stat_cipher_err_cnt);
+	} else if (tb[CRYPTOCFGA_STAT_AKCIPHER]) {
+		struct rtattr *rta = tb[CRYPTOCFGA_STAT_AKCIPHER];
+		struct crypto_stat *rblk =
+			(struct crypto_stat *)RTA_DATA(rta);
+		printf("%s\tAkcipher\n\tEncrypt: %u bytes: %llu\n\tDecrypt: %u bytes: %llu\n\tSign: %u\n\tVerify: %u\n\tErrors: %u\n",
+			drivername,
+			rblk->stat_encrypt_cnt, rblk->stat_encrypt_tlen,
+			rblk->stat_decrypt_cnt, rblk->stat_decrypt_tlen,
+			rblk->stat_sign_cnt, rblk->stat_verify_cnt,
+			rblk->stat_akcipher_err_cnt);
+	} else if (tb[CRYPTOCFGA_STAT_CIPHER]) {
+		struct rtattr *rta = tb[CRYPTOCFGA_STAT_CIPHER];
+		struct crypto_stat *rblk =
+			(struct crypto_stat *)RTA_DATA(rta);
+		printf("%s\tcipher\n\tEncrypt: %u bytes: %llu\n\tDecrypt: %u bytes: %llu\n\tErrors: %u\n",
+			drivername,
+			rblk->stat_encrypt_cnt, rblk->stat_encrypt_tlen,
+			rblk->stat_decrypt_cnt, rblk->stat_decrypt_tlen,
+			rblk->stat_cipher_err_cnt);
+	} else if (tb[CRYPTOCFGA_STAT_RNG]) {
+		struct rtattr *rta = tb[CRYPTOCFGA_STAT_RNG];
+		struct crypto_stat *rrng =
+			(struct crypto_stat *)RTA_DATA(rta);
+		printf("%s\tRNG\n\tSeed: %u\n\tGenerate: %u bytes: %llu\n\tErrors: %u\n",
+			drivername,
+			rrng->stat_seed_cnt,
+			rrng->stat_generate_cnt, rrng->stat_generate_tlen,
+			rrng->stat_rng_err_cnt);
+	} else if (tb[CRYPTOCFGA_STAT_KPP]) {
+		struct rtattr *rta = tb[CRYPTOCFGA_STAT_KPP];
+		struct crypto_stat *rkpp =
+			(struct crypto_stat *)RTA_DATA(rta);
+		printf("%s\tKPP\n\tSetsecret: %u\n\tGenerate public key: %u\n\tCompute_shared_secret: %u\n\tErrors: %u\n",
+			drivername,
+			rkpp->stat_setsecret_cnt,
+			rkpp->stat_generate_public_key_cnt,
+			rkpp->stat_compute_shared_secret_cnt,
+			rkpp->stat_kpp_err_cnt);
+	} else {
+		fprintf(stderr, "%s is of an unknown algorithm\n", drivername);
+	}
+	ret = 0;
+out:
+	close(sd);
+	return ret;
+}
+
+int main(int argc, const char *argv[])
+{
+	char buf[4096];
+	FILE *procfd;
+	int i, lastspace;
+	int ret;
+
+	procfd = fopen("/proc/crypto", "r");
+	if (!procfd) {
+		ret = errno;
+		fprintf(stderr, "Cannot open /proc/crypto %s\n", strerror(errno));
+		return ret;
+	}
+	if (argc > 1) {
+		if (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
+			printf("Usage: %s [-h|--help] display this help\n", argv[0]);
+			printf("Usage: %s display all crypto statistics\n", argv[0]);
+			printf("Usage: %s drivername1 drivername2 ... = display crypto statistics about drivername1 ...\n", argv[0]);
+			return 0;
+		}
+		for (i = 1; i < argc; i++) {
+			ret = get_stat(argv[i]);
+			if (ret) {
+				fprintf(stderr, "Failed with %s\n", strerror(-ret));
+				return ret;
+			}
+		}
+		return 0;
+	}
+
+	while (fgets(buf, sizeof(buf), procfd)) {
+		if (!strncmp(buf, "driver", 6)) {
+			lastspace = 0;
+			i = 0;
+			while (i < strlen(buf)) {
+				i++;
+				if (buf[i] == ' ')
+					lastspace = i;
+			}
+			buf[strlen(buf) - 1] = '\0';
+			ret = get_stat(buf + lastspace + 1);
+			if (ret) {
+				fprintf(stderr, "Failed with %s\n", strerror(-ret));
+				goto out;
+			}
+		}
+	}
+out:
+	fclose(procfd);
+	return ret;
+}
diff --git a/tools/include/asm-generic/bitops.h b/tools/include/asm-generic/bitops.h
index 9bce3b56b5e7..5d2ab38965cc 100644
--- a/tools/include/asm-generic/bitops.h
+++ b/tools/include/asm-generic/bitops.h
@@ -27,5 +27,6 @@
 #include <asm-generic/bitops/hweight.h>
 
 #include <asm-generic/bitops/atomic.h>
+#include <asm-generic/bitops/non-atomic.h>
 
 #endif /* __TOOLS_ASM_GENERIC_BITOPS_H */
diff --git a/tools/include/asm-generic/bitops/atomic.h b/tools/include/asm-generic/bitops/atomic.h
index 21c41ccd1266..2f6ea28764a7 100644
--- a/tools/include/asm-generic/bitops/atomic.h
+++ b/tools/include/asm-generic/bitops/atomic.h
@@ -15,13 +15,4 @@ static inline void clear_bit(int nr, unsigned long *addr)
 	addr[nr / __BITS_PER_LONG] &= ~(1UL << (nr % __BITS_PER_LONG));
 }
 
-static __always_inline int test_bit(unsigned int nr, const unsigned long *addr)
-{
-	return ((1UL << (nr % __BITS_PER_LONG)) &
-		(((unsigned long *)addr)[nr / __BITS_PER_LONG])) != 0;
-}
-
-#define __set_bit(nr, addr)	set_bit(nr, addr)
-#define __clear_bit(nr, addr)	clear_bit(nr, addr)
-
 #endif /* _TOOLS_LINUX_ASM_GENERIC_BITOPS_ATOMIC_H_ */
diff --git a/tools/include/asm-generic/bitops/non-atomic.h b/tools/include/asm-generic/bitops/non-atomic.h
new file mode 100644
index 000000000000..7e10c4b50c5d
--- /dev/null
+++ b/tools/include/asm-generic/bitops/non-atomic.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_GENERIC_BITOPS_NON_ATOMIC_H_
+#define _ASM_GENERIC_BITOPS_NON_ATOMIC_H_
+
+#include <asm/types.h>
+
+/**
+ * __set_bit - Set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * Unlike set_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static inline void __set_bit(int nr, volatile unsigned long *addr)
+{
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+
+	*p  |= mask;
+}
+
+static inline void __clear_bit(int nr, volatile unsigned long *addr)
+{
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+
+	*p &= ~mask;
+}
+
+/**
+ * __change_bit - Toggle a bit in memory
+ * @nr: the bit to change
+ * @addr: the address to start counting from
+ *
+ * Unlike change_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static inline void __change_bit(int nr, volatile unsigned long *addr)
+{
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+
+	*p ^= mask;
+}
+
+/**
+ * __test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail.  You must protect multiple accesses with a lock.
+ */
+static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
+{
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+	unsigned long old = *p;
+
+	*p = old | mask;
+	return (old & mask) != 0;
+}
+
+/**
+ * __test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to clear
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail.  You must protect multiple accesses with a lock.
+ */
+static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
+{
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+	unsigned long old = *p;
+
+	*p = old & ~mask;
+	return (old & mask) != 0;
+}
+
+/* WARNING: non atomic and it can be reordered! */
+static inline int __test_and_change_bit(int nr,
+					    volatile unsigned long *addr)
+{
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+	unsigned long old = *p;
+
+	*p = old ^ mask;
+	return (old & mask) != 0;
+}
+
+/**
+ * test_bit - Determine whether a bit is set
+ * @nr: bit number to test
+ * @addr: Address to start counting from
+ */
+static inline int test_bit(int nr, const volatile unsigned long *addr)
+{
+	return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
+}
+
+#endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */
diff --git a/tools/include/asm/barrier.h b/tools/include/asm/barrier.h
index 391d942536e5..8d378c57cb01 100644
--- a/tools/include/asm/barrier.h
+++ b/tools/include/asm/barrier.h
@@ -1,4 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/compiler.h>
 #if defined(__i386__) || defined(__x86_64__)
 #include "../../arch/x86/include/asm/barrier.h"
 #elif defined(__arm__)
@@ -26,3 +27,37 @@
 #else
 #include <asm-generic/barrier.h>
 #endif
+
+/*
+ * Generic fallback smp_*() definitions for archs that haven't
+ * been updated yet.
+ */
+
+#ifndef smp_rmb
+# define smp_rmb()	rmb()
+#endif
+
+#ifndef smp_wmb
+# define smp_wmb()	wmb()
+#endif
+
+#ifndef smp_mb
+# define smp_mb()	mb()
+#endif
+
+#ifndef smp_store_release
+# define smp_store_release(p, v)		\
+do {						\
+	smp_mb();				\
+	WRITE_ONCE(*p, v);			\
+} while (0)
+#endif
+
+#ifndef smp_load_acquire
+# define smp_load_acquire(p)			\
+({						\
+	typeof(*p) ___p1 = READ_ONCE(*p);	\
+	smp_mb();				\
+	___p1;					\
+})
+#endif
diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h
index e63662db131b..05dca5c203f3 100644
--- a/tools/include/linux/bitmap.h
+++ b/tools/include/linux/bitmap.h
@@ -15,6 +15,7 @@ void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
 		 const unsigned long *bitmap2, int bits);
 int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
 		 const unsigned long *bitmap2, unsigned int bits);
+void bitmap_clear(unsigned long *map, unsigned int start, int len);
 
 #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1)))
 
diff --git a/tools/include/linux/bitops.h b/tools/include/linux/bitops.h
index acc704bd3998..0b0ef3abc966 100644
--- a/tools/include/linux/bitops.h
+++ b/tools/include/linux/bitops.h
@@ -3,8 +3,6 @@
 #define _TOOLS_LINUX_BITOPS_H_
 
 #include <asm/types.h>
-#include <linux/compiler.h>
-
 #ifndef __WORDSIZE
 #define __WORDSIZE (__SIZEOF_LONG__ * 8)
 #endif
@@ -12,10 +10,9 @@
 #ifndef BITS_PER_LONG
 # define BITS_PER_LONG __WORDSIZE
 #endif
+#include <linux/bits.h>
+#include <linux/compiler.h>
 
-#define BIT_MASK(nr)		(1UL << ((nr) % BITS_PER_LONG))
-#define BIT_WORD(nr)		((nr) / BITS_PER_LONG)
-#define BITS_PER_BYTE		8
 #define BITS_TO_LONGS(nr)	DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))
 #define BITS_TO_U64(nr)		DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u64))
 #define BITS_TO_U32(nr)		DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u32))
diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h
new file mode 100644
index 000000000000..2b7b532c1d51
--- /dev/null
+++ b/tools/include/linux/bits.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_BITS_H
+#define __LINUX_BITS_H
+#include <asm/bitsperlong.h>
+
+#define BIT(nr)			(1UL << (nr))
+#define BIT_ULL(nr)		(1ULL << (nr))
+#define BIT_MASK(nr)		(1UL << ((nr) % BITS_PER_LONG))
+#define BIT_WORD(nr)		((nr) / BITS_PER_LONG)
+#define BIT_ULL_MASK(nr)	(1ULL << ((nr) % BITS_PER_LONG_LONG))
+#define BIT_ULL_WORD(nr)	((nr) / BITS_PER_LONG_LONG)
+#define BITS_PER_BYTE		8
+
+/*
+ * Create a contiguous bitmask starting at bit position @l and ending at
+ * position @h. For example
+ * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000.
+ */
+#define GENMASK(h, l) \
+	(((~0UL) - (1UL << (l)) + 1) & (~0UL >> (BITS_PER_LONG - 1 - (h))))
+
+#define GENMASK_ULL(h, l) \
+	(((~0ULL) - (1ULL << (l)) + 1) & \
+	 (~0ULL >> (BITS_PER_LONG_LONG - 1 - (h))))
+
+#endif	/* __LINUX_BITS_H */
diff --git a/tools/include/linux/err.h b/tools/include/linux/err.h
index 7a8b61ad44cb..094649667bae 100644
--- a/tools/include/linux/err.h
+++ b/tools/include/linux/err.h
@@ -52,4 +52,11 @@ static inline bool __must_check IS_ERR_OR_NULL(__force const void *ptr)
 	return unlikely(!ptr) || IS_ERR_VALUE((unsigned long)ptr);
 }
 
+static inline int __must_check PTR_ERR_OR_ZERO(__force const void *ptr)
+{
+	if (IS_ERR(ptr))
+		return PTR_ERR(ptr);
+	else
+		return 0;
+}
 #endif /* _LINUX_ERR_H */
diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h
index 0ad884452c5c..6935ef94e77a 100644
--- a/tools/include/linux/kernel.h
+++ b/tools/include/linux/kernel.h
@@ -70,6 +70,7 @@
 #define BUG_ON(cond) assert(!(cond))
 #endif
 #endif
+#define BUG()	BUG_ON(1)
 
 #if __BYTE_ORDER == __BIG_ENDIAN
 #define cpu_to_le16 bswap_16
diff --git a/tools/include/linux/ring_buffer.h b/tools/include/linux/ring_buffer.h
new file mode 100644
index 000000000000..9a083ae60473
--- /dev/null
+++ b/tools/include/linux/ring_buffer.h
@@ -0,0 +1,73 @@
+#ifndef _TOOLS_LINUX_RING_BUFFER_H_
+#define _TOOLS_LINUX_RING_BUFFER_H_
+
+#include <asm/barrier.h>
+
+/*
+ * Contract with kernel for walking the perf ring buffer from
+ * user space requires the following barrier pairing (quote
+ * from kernel/events/ring_buffer.c):
+ *
+ *   Since the mmap() consumer (userspace) can run on a
+ *   different CPU:
+ *
+ *   kernel                             user
+ *
+ *   if (LOAD ->data_tail) {            LOAD ->data_head
+ *                      (A)             smp_rmb()       (C)
+ *      STORE $data                     LOAD $data
+ *      smp_wmb()       (B)             smp_mb()        (D)
+ *      STORE ->data_head               STORE ->data_tail
+ *   }
+ *
+ *   Where A pairs with D, and B pairs with C.
+ *
+ *   In our case A is a control dependency that separates the
+ *   load of the ->data_tail and the stores of $data. In case
+ *   ->data_tail indicates there is no room in the buffer to
+ *   store $data we do not.
+ *
+ *   D needs to be a full barrier since it separates the data
+ *   READ from the tail WRITE.
+ *
+ *   For B a WMB is sufficient since it separates two WRITEs,
+ *   and for C an RMB is sufficient since it separates two READs.
+ *
+ * Note, instead of B, C, D we could also use smp_store_release()
+ * in B and D as well as smp_load_acquire() in C.
+ *
+ * However, this optimization does not make sense for all kernel
+ * supported architectures since for a fair number it would
+ * resolve into READ_ONCE() + smp_mb() pair for smp_load_acquire(),
+ * and smp_mb() + WRITE_ONCE() pair for smp_store_release().
+ *
+ * Thus for those smp_wmb() in B and smp_rmb() in C would still
+ * be less expensive. For the case of D this has either the same
+ * cost or is less expensive, for example, due to TSO x86 can
+ * avoid the CPU barrier entirely.
+ */
+
+static inline u64 ring_buffer_read_head(struct perf_event_mmap_page *base)
+{
+/*
+ * Architectures where smp_load_acquire() does not fallback to
+ * READ_ONCE() + smp_mb() pair.
+ */
+#if defined(__x86_64__) || defined(__aarch64__) || defined(__powerpc64__) || \
+    defined(__ia64__) || defined(__sparc__) && defined(__arch64__)
+	return smp_load_acquire(&base->data_head);
+#else
+	u64 head = READ_ONCE(base->data_head);
+
+	smp_rmb();
+	return head;
+#endif
+}
+
+static inline void ring_buffer_write_tail(struct perf_event_mmap_page *base,
+					  u64 tail)
+{
+	smp_store_release(&base->data_tail, tail);
+}
+
+#endif /* _TOOLS_LINUX_RING_BUFFER_H_ */
diff --git a/tools/include/linux/spinlock.h b/tools/include/linux/spinlock.h
index 1738c0391da4..c934572d935c 100644
--- a/tools/include/linux/spinlock.h
+++ b/tools/include/linux/spinlock.h
@@ -8,8 +8,14 @@
 #define spinlock_t		pthread_mutex_t
 #define DEFINE_SPINLOCK(x)	pthread_mutex_t x = PTHREAD_MUTEX_INITIALIZER
 #define __SPIN_LOCK_UNLOCKED(x)	(pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER
-#define spin_lock_init(x)      pthread_mutex_init(x, NULL)
-
+#define spin_lock_init(x)	pthread_mutex_init(x, NULL)
+
+#define spin_lock(x)			pthread_mutex_lock(x)
+#define spin_unlock(x)			pthread_mutex_unlock(x)
+#define spin_lock_bh(x)			pthread_mutex_lock(x)
+#define spin_unlock_bh(x)		pthread_mutex_unlock(x)
+#define spin_lock_irq(x)		pthread_mutex_lock(x)
+#define spin_unlock_irq(x)		pthread_mutex_unlock(x)
 #define spin_lock_irqsave(x, f)		(void)f, pthread_mutex_lock(x)
 #define spin_unlock_irqrestore(x, f)	(void)f, pthread_mutex_unlock(x)
 
@@ -31,4 +37,6 @@ static inline bool arch_spin_is_locked(arch_spinlock_t *mutex)
 	return true;
 }
 
+#include <linux/lockdep.h>
+
 #endif
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index df4bedb9b01c..538546edbfbd 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -242,10 +242,12 @@ __SYSCALL(__NR_tee, sys_tee)
 /* fs/stat.c */
 #define __NR_readlinkat 78
 __SYSCALL(__NR_readlinkat, sys_readlinkat)
+#if defined(__ARCH_WANT_NEW_STAT) || defined(__ARCH_WANT_STAT64)
 #define __NR3264_fstatat 79
 __SC_3264(__NR3264_fstatat, sys_fstatat64, sys_newfstatat)
 #define __NR3264_fstat 80
 __SC_3264(__NR3264_fstat, sys_fstat64, sys_newfstat)
+#endif
 
 /* fs/sync.c */
 #define __NR_sync 81
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 66917a4eba27..852dc17ab47a 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -103,6 +103,7 @@ enum bpf_cmd {
 	BPF_BTF_LOAD,
 	BPF_BTF_GET_FD_BY_ID,
 	BPF_TASK_FD_QUERY,
+	BPF_MAP_LOOKUP_AND_DELETE_ELEM,
 };
 
 enum bpf_map_type {
@@ -127,6 +128,9 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_SOCKHASH,
 	BPF_MAP_TYPE_CGROUP_STORAGE,
 	BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
+	BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
+	BPF_MAP_TYPE_QUEUE,
+	BPF_MAP_TYPE_STACK,
 };
 
 enum bpf_prog_type {
@@ -152,6 +156,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_LWT_SEG6LOCAL,
 	BPF_PROG_TYPE_LIRC_MODE2,
 	BPF_PROG_TYPE_SK_REUSEPORT,
+	BPF_PROG_TYPE_FLOW_DISSECTOR,
 };
 
 enum bpf_attach_type {
@@ -172,6 +177,7 @@ enum bpf_attach_type {
 	BPF_CGROUP_UDP4_SENDMSG,
 	BPF_CGROUP_UDP6_SENDMSG,
 	BPF_LIRC_MODE2,
+	BPF_FLOW_DISSECTOR,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -459,6 +465,28 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
+ * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
+ * 	Description
+ * 		Push an element *value* in *map*. *flags* is one of:
+ *
+ * 		**BPF_EXIST**
+ * 		If the queue/stack is full, the oldest element is removed to
+ * 		make room for this.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_pop_elem(struct bpf_map *map, void *value)
+ * 	Description
+ * 		Pop an element from *map*.
+ * Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_peek_elem(struct bpf_map *map, void *value)
+ * 	Description
+ * 		Get an element from *map* without removing it.
+ * Return
+ * 		0 on success, or a negative error in case of failure.
+ *
  * int bpf_probe_read(void *dst, u32 size, const void *src)
  * 	Description
  * 		For tracing programs, safely attempt to read *size* bytes from
@@ -1430,7 +1458,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_skb_adjust_room(struct sk_buff *skb, u32 len_diff, u32 mode, u64 flags)
+ * int bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags)
  * 	Description
  * 		Grow or shrink the room for data in the packet associated to
  * 		*skb* by *len_diff*, and according to the selected *mode*.
@@ -2141,6 +2169,94 @@ union bpf_attr {
  *		request in the skb.
  *	Return
  *		0 on success, or a negative error in case of failure.
+ *
+ * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags)
+ *	Description
+ *		Look for TCP socket matching *tuple*, optionally in a child
+ *		network namespace *netns*. The return value must be checked,
+ *		and if non-NULL, released via **bpf_sk_release**\ ().
+ *
+ *		The *ctx* should point to the context of the program, such as
+ *		the skb or socket (depending on the hook in use). This is used
+ *		to determine the base network namespace for the lookup.
+ *
+ *		*tuple_size* must be one of:
+ *
+ *		**sizeof**\ (*tuple*\ **->ipv4**)
+ *			Look for an IPv4 socket.
+ *		**sizeof**\ (*tuple*\ **->ipv6**)
+ *			Look for an IPv6 socket.
+ *
+ *		If the *netns* is zero, then the socket lookup table in the
+ *		netns associated with the *ctx* will be used. For the TC hooks,
+ *		this in the netns of the device in the skb. For socket hooks,
+ *		this in the netns of the socket. If *netns* is non-zero, then
+ *		it specifies the ID of the netns relative to the netns
+ *		associated with the *ctx*.
+ *
+ *		All values for *flags* are reserved for future usage, and must
+ *		be left at zero.
+ *
+ *		This helper is available only if the kernel was compiled with
+ *		**CONFIG_NET** configuration option.
+ *	Return
+ *		Pointer to *struct bpf_sock*, or NULL in case of failure.
+ *
+ * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags)
+ *	Description
+ *		Look for UDP socket matching *tuple*, optionally in a child
+ *		network namespace *netns*. The return value must be checked,
+ *		and if non-NULL, released via **bpf_sk_release**\ ().
+ *
+ *		The *ctx* should point to the context of the program, such as
+ *		the skb or socket (depending on the hook in use). This is used
+ *		to determine the base network namespace for the lookup.
+ *
+ *		*tuple_size* must be one of:
+ *
+ *		**sizeof**\ (*tuple*\ **->ipv4**)
+ *			Look for an IPv4 socket.
+ *		**sizeof**\ (*tuple*\ **->ipv6**)
+ *			Look for an IPv6 socket.
+ *
+ *		If the *netns* is zero, then the socket lookup table in the
+ *		netns associated with the *ctx* will be used. For the TC hooks,
+ *		this in the netns of the device in the skb. For socket hooks,
+ *		this in the netns of the socket. If *netns* is non-zero, then
+ *		it specifies the ID of the netns relative to the netns
+ *		associated with the *ctx*.
+ *
+ *		All values for *flags* are reserved for future usage, and must
+ *		be left at zero.
+ *
+ *		This helper is available only if the kernel was compiled with
+ *		**CONFIG_NET** configuration option.
+ *	Return
+ *		Pointer to *struct bpf_sock*, or NULL in case of failure.
+ *
+ * int bpf_sk_release(struct bpf_sock *sk)
+ *	Description
+ *		Release the reference held by *sock*. *sock* must be a non-NULL
+ *		pointer that was returned from bpf_sk_lookup_xxx\ ().
+ *	Return
+ *		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_msg_push_data(struct sk_buff *skb, u32 start, u32 len, u64 flags)
+ *	Description
+ *		For socket policies, insert *len* bytes into msg at offset
+ *		*start*.
+ *
+ *		If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a
+ *		*msg* it may want to insert metadata or options into the msg.
+ *		This can later be read and used by any of the lower layer BPF
+ *		hooks.
+ *
+ *		This helper may fail if under memory pressure (a malloc
+ *		fails) in these cases BPF programs will get an appropriate
+ *		error and BPF programs will need to handle them.
+ *
+ *	Return
+ *		0 on success, or a negative error in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2226,7 +2342,14 @@ union bpf_attr {
 	FN(get_current_cgroup_id),	\
 	FN(get_local_storage),		\
 	FN(sk_select_reuseport),	\
-	FN(skb_ancestor_cgroup_id),
+	FN(skb_ancestor_cgroup_id),	\
+	FN(sk_lookup_tcp),		\
+	FN(sk_lookup_udp),		\
+	FN(sk_release),			\
+	FN(map_push_elem),		\
+	FN(map_pop_elem),		\
+	FN(map_peek_elem),		\
+	FN(msg_push_data),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -2333,6 +2456,7 @@ struct __sk_buff {
 	/* ... here. */
 
 	__u32 data_meta;
+	struct bpf_flow_keys *flow_keys;
 };
 
 struct bpf_tunnel_key {
@@ -2395,6 +2519,23 @@ struct bpf_sock {
 				 */
 };
 
+struct bpf_sock_tuple {
+	union {
+		struct {
+			__be32 saddr;
+			__be32 daddr;
+			__be16 sport;
+			__be16 dport;
+		} ipv4;
+		struct {
+			__be32 saddr[4];
+			__be32 daddr[4];
+			__be16 sport;
+			__be16 dport;
+		} ipv6;
+	};
+};
+
 #define XDP_PACKET_HEADROOM 256
 
 /* User return codes for XDP prog type.
@@ -2778,4 +2919,27 @@ enum bpf_task_fd_type {
 	BPF_FD_TYPE_URETPROBE,		/* filename + offset */
 };
 
+struct bpf_flow_keys {
+	__u16	nhoff;
+	__u16	thoff;
+	__u16	addr_proto;			/* ETH_P_* of valid addrs */
+	__u8	is_frag;
+	__u8	is_first_frag;
+	__u8	is_encap;
+	__u8	ip_proto;
+	__be16	n_proto;
+	__be16	sport;
+	__be16	dport;
+	union {
+		struct {
+			__be32	ipv4_src;
+			__be32	ipv4_dst;
+		};
+		struct {
+			__u32	ipv6_src[4];	/* in6_addr; network order */
+			__u32	ipv6_dst[4];	/* in6_addr; network order */
+		};
+	};
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h
new file mode 100644
index 000000000000..a441ea1bfe6d
--- /dev/null
+++ b/tools/include/uapi/linux/fs.h
@@ -0,0 +1,393 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_FS_H
+#define _UAPI_LINUX_FS_H
+
+/*
+ * This file has definitions for some important file table structures
+ * and constants and structures used by various generic file system
+ * ioctl's.  Please do not make any changes in this file before
+ * sending patches for review to linux-fsdevel@vger.kernel.org and
+ * linux-api@vger.kernel.org.
+ */
+
+#include <linux/limits.h>
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+/*
+ * It's silly to have NR_OPEN bigger than NR_FILE, but you can change
+ * the file limit at runtime and only root can increase the per-process
+ * nr_file rlimit, so it's safe to set up a ridiculously high absolute
+ * upper limit on files-per-process.
+ *
+ * Some programs (notably those using select()) may have to be 
+ * recompiled to take full advantage of the new limits..  
+ */
+
+/* Fixed constants first: */
+#undef NR_OPEN
+#define INR_OPEN_CUR 1024	/* Initial setting for nfile rlimits */
+#define INR_OPEN_MAX 4096	/* Hard limit for nfile rlimits */
+
+#define BLOCK_SIZE_BITS 10
+#define BLOCK_SIZE (1<<BLOCK_SIZE_BITS)
+
+#define SEEK_SET	0	/* seek relative to beginning of file */
+#define SEEK_CUR	1	/* seek relative to current file position */
+#define SEEK_END	2	/* seek relative to end of file */
+#define SEEK_DATA	3	/* seek to the next data */
+#define SEEK_HOLE	4	/* seek to the next hole */
+#define SEEK_MAX	SEEK_HOLE
+
+#define RENAME_NOREPLACE	(1 << 0)	/* Don't overwrite target */
+#define RENAME_EXCHANGE		(1 << 1)	/* Exchange source and dest */
+#define RENAME_WHITEOUT		(1 << 2)	/* Whiteout source */
+
+struct file_clone_range {
+	__s64 src_fd;
+	__u64 src_offset;
+	__u64 src_length;
+	__u64 dest_offset;
+};
+
+struct fstrim_range {
+	__u64 start;
+	__u64 len;
+	__u64 minlen;
+};
+
+/* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */
+#define FILE_DEDUPE_RANGE_SAME		0
+#define FILE_DEDUPE_RANGE_DIFFERS	1
+
+/* from struct btrfs_ioctl_file_extent_same_info */
+struct file_dedupe_range_info {
+	__s64 dest_fd;		/* in - destination file */
+	__u64 dest_offset;	/* in - start of extent in destination */
+	__u64 bytes_deduped;	/* out - total # of bytes we were able
+				 * to dedupe from this file. */
+	/* status of this dedupe operation:
+	 * < 0 for error
+	 * == FILE_DEDUPE_RANGE_SAME if dedupe succeeds
+	 * == FILE_DEDUPE_RANGE_DIFFERS if data differs
+	 */
+	__s32 status;		/* out - see above description */
+	__u32 reserved;		/* must be zero */
+};
+
+/* from struct btrfs_ioctl_file_extent_same_args */
+struct file_dedupe_range {
+	__u64 src_offset;	/* in - start of extent in source */
+	__u64 src_length;	/* in - length of extent */
+	__u16 dest_count;	/* in - total elements in info array */
+	__u16 reserved1;	/* must be zero */
+	__u32 reserved2;	/* must be zero */
+	struct file_dedupe_range_info info[0];
+};
+
+/* And dynamically-tunable limits and defaults: */
+struct files_stat_struct {
+	unsigned long nr_files;		/* read only */
+	unsigned long nr_free_files;	/* read only */
+	unsigned long max_files;		/* tunable */
+};
+
+struct inodes_stat_t {
+	long nr_inodes;
+	long nr_unused;
+	long dummy[5];		/* padding for sysctl ABI compatibility */
+};
+
+
+#define NR_FILE  8192	/* this can well be larger on a larger system */
+
+
+/*
+ * These are the fs-independent mount-flags: up to 32 flags are supported
+ */
+#define MS_RDONLY	 1	/* Mount read-only */
+#define MS_NOSUID	 2	/* Ignore suid and sgid bits */
+#define MS_NODEV	 4	/* Disallow access to device special files */
+#define MS_NOEXEC	 8	/* Disallow program execution */
+#define MS_SYNCHRONOUS	16	/* Writes are synced at once */
+#define MS_REMOUNT	32	/* Alter flags of a mounted FS */
+#define MS_MANDLOCK	64	/* Allow mandatory locks on an FS */
+#define MS_DIRSYNC	128	/* Directory modifications are synchronous */
+#define MS_NOATIME	1024	/* Do not update access times. */
+#define MS_NODIRATIME	2048	/* Do not update directory access times */
+#define MS_BIND		4096
+#define MS_MOVE		8192
+#define MS_REC		16384
+#define MS_VERBOSE	32768	/* War is peace. Verbosity is silence.
+				   MS_VERBOSE is deprecated. */
+#define MS_SILENT	32768
+#define MS_POSIXACL	(1<<16)	/* VFS does not apply the umask */
+#define MS_UNBINDABLE	(1<<17)	/* change to unbindable */
+#define MS_PRIVATE	(1<<18)	/* change to private */
+#define MS_SLAVE	(1<<19)	/* change to slave */
+#define MS_SHARED	(1<<20)	/* change to shared */
+#define MS_RELATIME	(1<<21)	/* Update atime relative to mtime/ctime. */
+#define MS_KERNMOUNT	(1<<22) /* this is a kern_mount call */
+#define MS_I_VERSION	(1<<23) /* Update inode I_version field */
+#define MS_STRICTATIME	(1<<24) /* Always perform atime updates */
+#define MS_LAZYTIME	(1<<25) /* Update the on-disk [acm]times lazily */
+
+/* These sb flags are internal to the kernel */
+#define MS_SUBMOUNT     (1<<26)
+#define MS_NOREMOTELOCK	(1<<27)
+#define MS_NOSEC	(1<<28)
+#define MS_BORN		(1<<29)
+#define MS_ACTIVE	(1<<30)
+#define MS_NOUSER	(1<<31)
+
+/*
+ * Superblock flags that can be altered by MS_REMOUNT
+ */
+#define MS_RMT_MASK	(MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION|\
+			 MS_LAZYTIME)
+
+/*
+ * Old magic mount flag and mask
+ */
+#define MS_MGC_VAL 0xC0ED0000
+#define MS_MGC_MSK 0xffff0000
+
+/*
+ * Structure for FS_IOC_FSGETXATTR[A] and FS_IOC_FSSETXATTR.
+ */
+struct fsxattr {
+	__u32		fsx_xflags;	/* xflags field value (get/set) */
+	__u32		fsx_extsize;	/* extsize field value (get/set)*/
+	__u32		fsx_nextents;	/* nextents field value (get)	*/
+	__u32		fsx_projid;	/* project identifier (get/set) */
+	__u32		fsx_cowextsize;	/* CoW extsize field value (get/set)*/
+	unsigned char	fsx_pad[8];
+};
+
+/*
+ * Flags for the fsx_xflags field
+ */
+#define FS_XFLAG_REALTIME	0x00000001	/* data in realtime volume */
+#define FS_XFLAG_PREALLOC	0x00000002	/* preallocated file extents */
+#define FS_XFLAG_IMMUTABLE	0x00000008	/* file cannot be modified */
+#define FS_XFLAG_APPEND		0x00000010	/* all writes append */
+#define FS_XFLAG_SYNC		0x00000020	/* all writes synchronous */
+#define FS_XFLAG_NOATIME	0x00000040	/* do not update access time */
+#define FS_XFLAG_NODUMP		0x00000080	/* do not include in backups */
+#define FS_XFLAG_RTINHERIT	0x00000100	/* create with rt bit set */
+#define FS_XFLAG_PROJINHERIT	0x00000200	/* create with parents projid */
+#define FS_XFLAG_NOSYMLINKS	0x00000400	/* disallow symlink creation */
+#define FS_XFLAG_EXTSIZE	0x00000800	/* extent size allocator hint */
+#define FS_XFLAG_EXTSZINHERIT	0x00001000	/* inherit inode extent size */
+#define FS_XFLAG_NODEFRAG	0x00002000	/* do not defragment */
+#define FS_XFLAG_FILESTREAM	0x00004000	/* use filestream allocator */
+#define FS_XFLAG_DAX		0x00008000	/* use DAX for IO */
+#define FS_XFLAG_COWEXTSIZE	0x00010000	/* CoW extent size allocator hint */
+#define FS_XFLAG_HASATTR	0x80000000	/* no DIFLAG for this	*/
+
+/* the read-only stuff doesn't really belong here, but any other place is
+   probably as bad and I don't want to create yet another include file. */
+
+#define BLKROSET   _IO(0x12,93)	/* set device read-only (0 = read-write) */
+#define BLKROGET   _IO(0x12,94)	/* get read-only status (0 = read_write) */
+#define BLKRRPART  _IO(0x12,95)	/* re-read partition table */
+#define BLKGETSIZE _IO(0x12,96)	/* return device size /512 (long *arg) */
+#define BLKFLSBUF  _IO(0x12,97)	/* flush buffer cache */
+#define BLKRASET   _IO(0x12,98)	/* set read ahead for block device */
+#define BLKRAGET   _IO(0x12,99)	/* get current read ahead setting */
+#define BLKFRASET  _IO(0x12,100)/* set filesystem (mm/filemap.c) read-ahead */
+#define BLKFRAGET  _IO(0x12,101)/* get filesystem (mm/filemap.c) read-ahead */
+#define BLKSECTSET _IO(0x12,102)/* set max sectors per request (ll_rw_blk.c) */
+#define BLKSECTGET _IO(0x12,103)/* get max sectors per request (ll_rw_blk.c) */
+#define BLKSSZGET  _IO(0x12,104)/* get block device sector size */
+#if 0
+#define BLKPG      _IO(0x12,105)/* See blkpg.h */
+
+/* Some people are morons.  Do not use sizeof! */
+
+#define BLKELVGET  _IOR(0x12,106,size_t)/* elevator get */
+#define BLKELVSET  _IOW(0x12,107,size_t)/* elevator set */
+/* This was here just to show that the number is taken -
+   probably all these _IO(0x12,*) ioctls should be moved to blkpg.h. */
+#endif
+/* A jump here: 108-111 have been used for various private purposes. */
+#define BLKBSZGET  _IOR(0x12,112,size_t)
+#define BLKBSZSET  _IOW(0x12,113,size_t)
+#define BLKGETSIZE64 _IOR(0x12,114,size_t)	/* return device size in bytes (u64 *arg) */
+#define BLKTRACESETUP _IOWR(0x12,115,struct blk_user_trace_setup)
+#define BLKTRACESTART _IO(0x12,116)
+#define BLKTRACESTOP _IO(0x12,117)
+#define BLKTRACETEARDOWN _IO(0x12,118)
+#define BLKDISCARD _IO(0x12,119)
+#define BLKIOMIN _IO(0x12,120)
+#define BLKIOOPT _IO(0x12,121)
+#define BLKALIGNOFF _IO(0x12,122)
+#define BLKPBSZGET _IO(0x12,123)
+#define BLKDISCARDZEROES _IO(0x12,124)
+#define BLKSECDISCARD _IO(0x12,125)
+#define BLKROTATIONAL _IO(0x12,126)
+#define BLKZEROOUT _IO(0x12,127)
+/*
+ * A jump here: 130-131 are reserved for zoned block devices
+ * (see uapi/linux/blkzoned.h)
+ */
+
+#define BMAP_IOCTL 1		/* obsolete - kept for compatibility */
+#define FIBMAP	   _IO(0x00,1)	/* bmap access */
+#define FIGETBSZ   _IO(0x00,2)	/* get the block size used for bmap */
+#define FIFREEZE	_IOWR('X', 119, int)	/* Freeze */
+#define FITHAW		_IOWR('X', 120, int)	/* Thaw */
+#define FITRIM		_IOWR('X', 121, struct fstrim_range)	/* Trim */
+#define FICLONE		_IOW(0x94, 9, int)
+#define FICLONERANGE	_IOW(0x94, 13, struct file_clone_range)
+#define FIDEDUPERANGE	_IOWR(0x94, 54, struct file_dedupe_range)
+
+#define FSLABEL_MAX 256	/* Max chars for the interface; each fs may differ */
+
+#define	FS_IOC_GETFLAGS			_IOR('f', 1, long)
+#define	FS_IOC_SETFLAGS			_IOW('f', 2, long)
+#define	FS_IOC_GETVERSION		_IOR('v', 1, long)
+#define	FS_IOC_SETVERSION		_IOW('v', 2, long)
+#define FS_IOC_FIEMAP			_IOWR('f', 11, struct fiemap)
+#define FS_IOC32_GETFLAGS		_IOR('f', 1, int)
+#define FS_IOC32_SETFLAGS		_IOW('f', 2, int)
+#define FS_IOC32_GETVERSION		_IOR('v', 1, int)
+#define FS_IOC32_SETVERSION		_IOW('v', 2, int)
+#define FS_IOC_FSGETXATTR		_IOR('X', 31, struct fsxattr)
+#define FS_IOC_FSSETXATTR		_IOW('X', 32, struct fsxattr)
+#define FS_IOC_GETFSLABEL		_IOR(0x94, 49, char[FSLABEL_MAX])
+#define FS_IOC_SETFSLABEL		_IOW(0x94, 50, char[FSLABEL_MAX])
+
+/*
+ * File system encryption support
+ */
+/* Policy provided via an ioctl on the topmost directory */
+#define FS_KEY_DESCRIPTOR_SIZE	8
+
+#define FS_POLICY_FLAGS_PAD_4		0x00
+#define FS_POLICY_FLAGS_PAD_8		0x01
+#define FS_POLICY_FLAGS_PAD_16		0x02
+#define FS_POLICY_FLAGS_PAD_32		0x03
+#define FS_POLICY_FLAGS_PAD_MASK	0x03
+#define FS_POLICY_FLAGS_VALID		0x03
+
+/* Encryption algorithms */
+#define FS_ENCRYPTION_MODE_INVALID		0
+#define FS_ENCRYPTION_MODE_AES_256_XTS		1
+#define FS_ENCRYPTION_MODE_AES_256_GCM		2
+#define FS_ENCRYPTION_MODE_AES_256_CBC		3
+#define FS_ENCRYPTION_MODE_AES_256_CTS		4
+#define FS_ENCRYPTION_MODE_AES_128_CBC		5
+#define FS_ENCRYPTION_MODE_AES_128_CTS		6
+#define FS_ENCRYPTION_MODE_SPECK128_256_XTS	7 /* Removed, do not use. */
+#define FS_ENCRYPTION_MODE_SPECK128_256_CTS	8 /* Removed, do not use. */
+
+struct fscrypt_policy {
+	__u8 version;
+	__u8 contents_encryption_mode;
+	__u8 filenames_encryption_mode;
+	__u8 flags;
+	__u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE];
+};
+
+#define FS_IOC_SET_ENCRYPTION_POLICY	_IOR('f', 19, struct fscrypt_policy)
+#define FS_IOC_GET_ENCRYPTION_PWSALT	_IOW('f', 20, __u8[16])
+#define FS_IOC_GET_ENCRYPTION_POLICY	_IOW('f', 21, struct fscrypt_policy)
+
+/* Parameters for passing an encryption key into the kernel keyring */
+#define FS_KEY_DESC_PREFIX		"fscrypt:"
+#define FS_KEY_DESC_PREFIX_SIZE		8
+
+/* Structure that userspace passes to the kernel keyring */
+#define FS_MAX_KEY_SIZE			64
+
+struct fscrypt_key {
+	__u32 mode;
+	__u8 raw[FS_MAX_KEY_SIZE];
+	__u32 size;
+};
+
+/*
+ * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS)
+ *
+ * Note: for historical reasons, these flags were originally used and
+ * defined for use by ext2/ext3, and then other file systems started
+ * using these flags so they wouldn't need to write their own version
+ * of chattr/lsattr (which was shipped as part of e2fsprogs).  You
+ * should think twice before trying to use these flags in new
+ * contexts, or trying to assign these flags, since they are used both
+ * as the UAPI and the on-disk encoding for ext2/3/4.  Also, we are
+ * almost out of 32-bit flags.  :-)
+ *
+ * We have recently hoisted FS_IOC_FSGETXATTR / FS_IOC_FSSETXATTR from
+ * XFS to the generic FS level interface.  This uses a structure that
+ * has padding and hence has more room to grow, so it may be more
+ * appropriate for many new use cases.
+ *
+ * Please do not change these flags or interfaces before checking with
+ * linux-fsdevel@vger.kernel.org and linux-api@vger.kernel.org.
+ */
+#define	FS_SECRM_FL			0x00000001 /* Secure deletion */
+#define	FS_UNRM_FL			0x00000002 /* Undelete */
+#define	FS_COMPR_FL			0x00000004 /* Compress file */
+#define FS_SYNC_FL			0x00000008 /* Synchronous updates */
+#define FS_IMMUTABLE_FL			0x00000010 /* Immutable file */
+#define FS_APPEND_FL			0x00000020 /* writes to file may only append */
+#define FS_NODUMP_FL			0x00000040 /* do not dump file */
+#define FS_NOATIME_FL			0x00000080 /* do not update atime */
+/* Reserved for compression usage... */
+#define FS_DIRTY_FL			0x00000100
+#define FS_COMPRBLK_FL			0x00000200 /* One or more compressed clusters */
+#define FS_NOCOMP_FL			0x00000400 /* Don't compress */
+/* End compression flags --- maybe not all used */
+#define FS_ENCRYPT_FL			0x00000800 /* Encrypted file */
+#define FS_BTREE_FL			0x00001000 /* btree format dir */
+#define FS_INDEX_FL			0x00001000 /* hash-indexed directory */
+#define FS_IMAGIC_FL			0x00002000 /* AFS directory */
+#define FS_JOURNAL_DATA_FL		0x00004000 /* Reserved for ext3 */
+#define FS_NOTAIL_FL			0x00008000 /* file tail should not be merged */
+#define FS_DIRSYNC_FL			0x00010000 /* dirsync behaviour (directories only) */
+#define FS_TOPDIR_FL			0x00020000 /* Top of directory hierarchies*/
+#define FS_HUGE_FILE_FL			0x00040000 /* Reserved for ext4 */
+#define FS_EXTENT_FL			0x00080000 /* Extents */
+#define FS_EA_INODE_FL			0x00200000 /* Inode used for large EA */
+#define FS_EOFBLOCKS_FL			0x00400000 /* Reserved for ext4 */
+#define FS_NOCOW_FL			0x00800000 /* Do not cow file */
+#define FS_INLINE_DATA_FL		0x10000000 /* Reserved for ext4 */
+#define FS_PROJINHERIT_FL		0x20000000 /* Create with parents projid */
+#define FS_RESERVED_FL			0x80000000 /* reserved for ext2 lib */
+
+#define FS_FL_USER_VISIBLE		0x0003DFFF /* User visible flags */
+#define FS_FL_USER_MODIFIABLE		0x000380FF /* User modifiable flags */
+
+
+#define SYNC_FILE_RANGE_WAIT_BEFORE	1
+#define SYNC_FILE_RANGE_WRITE		2
+#define SYNC_FILE_RANGE_WAIT_AFTER	4
+
+/*
+ * Flags for preadv2/pwritev2:
+ */
+
+typedef int __bitwise __kernel_rwf_t;
+
+/* high priority request, poll if possible */
+#define RWF_HIPRI	((__force __kernel_rwf_t)0x00000001)
+
+/* per-IO O_DSYNC */
+#define RWF_DSYNC	((__force __kernel_rwf_t)0x00000002)
+
+/* per-IO O_SYNC */
+#define RWF_SYNC	((__force __kernel_rwf_t)0x00000004)
+
+/* per-IO, return -EAGAIN if operation would block */
+#define RWF_NOWAIT	((__force __kernel_rwf_t)0x00000008)
+
+/* per-IO O_APPEND */
+#define RWF_APPEND	((__force __kernel_rwf_t)0x00000010)
+
+/* mask of flags supported by the kernel */
+#define RWF_SUPPORTED	(RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\
+			 RWF_APPEND)
+
+#endif /* _UAPI_LINUX_FS_H */
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index 43391e2d1153..1debfa42cba1 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -161,6 +161,7 @@ enum {
 	IFLA_EVENT,
 	IFLA_NEW_NETNSID,
 	IFLA_IF_NETNSID,
+	IFLA_TARGET_NETNSID = IFLA_IF_NETNSID, /* new alias */
 	IFLA_CARRIER_UP_COUNT,
 	IFLA_CARRIER_DOWN_COUNT,
 	IFLA_NEW_IFINDEX,
@@ -286,6 +287,7 @@ enum {
 	IFLA_BR_MCAST_STATS_ENABLED,
 	IFLA_BR_MCAST_IGMP_VERSION,
 	IFLA_BR_MCAST_MLD_VERSION,
+	IFLA_BR_VLAN_STATS_PER_PORT,
 	__IFLA_BR_MAX,
 };
 
@@ -554,6 +556,7 @@ enum {
 	IFLA_GENEVE_UDP_ZERO_CSUM6_TX,
 	IFLA_GENEVE_UDP_ZERO_CSUM6_RX,
 	IFLA_GENEVE_LABEL,
+	IFLA_GENEVE_TTL_INHERIT,
 	__IFLA_GENEVE_MAX
 };
 #define IFLA_GENEVE_MAX	(__IFLA_GENEVE_MAX - 1)
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 251be353f950..2b7a652c9fa4 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -420,13 +420,19 @@ struct kvm_run {
 struct kvm_coalesced_mmio_zone {
 	__u64 addr;
 	__u32 size;
-	__u32 pad;
+	union {
+		__u32 pad;
+		__u32 pio;
+	};
 };
 
 struct kvm_coalesced_mmio {
 	__u64 phys_addr;
 	__u32 len;
-	__u32 pad;
+	union {
+		__u32 pad;
+		__u32 pio;
+	};
 	__u8  data[8];
 };
 
@@ -719,6 +725,7 @@ struct kvm_ppc_one_seg_page_size {
 
 #define KVM_PPC_PAGE_SIZES_REAL		0x00000001
 #define KVM_PPC_1T_SEGMENTS		0x00000002
+#define KVM_PPC_NO_HASH			0x00000004
 
 struct kvm_ppc_smmu_info {
 	__u64 flags;
@@ -751,6 +758,15 @@ struct kvm_ppc_resize_hpt {
 #define KVM_S390_SIE_PAGE_OFFSET 1
 
 /*
+ * On arm64, machine type can be used to request the physical
+ * address size for the VM. Bits[7-0] are reserved for the guest
+ * PA size shift (i.e, log2(PA_Size)). For backward compatibility,
+ * value 0 implies the default IPA size, 40bits.
+ */
+#define KVM_VM_TYPE_ARM_IPA_SIZE_MASK	0xffULL
+#define KVM_VM_TYPE_ARM_IPA_SIZE(x)		\
+	((x) & KVM_VM_TYPE_ARM_IPA_SIZE_MASK)
+/*
  * ioctls for /dev/kvm fds:
  */
 #define KVM_GET_API_VERSION       _IO(KVMIO,   0x00)
@@ -953,6 +969,12 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_NESTED_STATE 157
 #define KVM_CAP_ARM_INJECT_SERROR_ESR 158
 #define KVM_CAP_MSR_PLATFORM_INFO 159
+#define KVM_CAP_PPC_NESTED_HV 160
+#define KVM_CAP_HYPERV_SEND_IPI 161
+#define KVM_CAP_COALESCED_PIO 162
+#define KVM_CAP_HYPERV_ENLIGHTENED_VMCS 163
+#define KVM_CAP_EXCEPTION_PAYLOAD 164
+#define KVM_CAP_ARM_VM_IPA_SIZE 165
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/tools/include/uapi/linux/mman.h b/tools/include/uapi/linux/mman.h
index bfd5938fede6..d0f515d53299 100644
--- a/tools/include/uapi/linux/mman.h
+++ b/tools/include/uapi/linux/mman.h
@@ -28,7 +28,9 @@
 #define MAP_HUGE_2MB	HUGETLB_FLAG_ENCODE_2MB
 #define MAP_HUGE_8MB	HUGETLB_FLAG_ENCODE_8MB
 #define MAP_HUGE_16MB	HUGETLB_FLAG_ENCODE_16MB
+#define MAP_HUGE_32MB	HUGETLB_FLAG_ENCODE_32MB
 #define MAP_HUGE_256MB	HUGETLB_FLAG_ENCODE_256MB
+#define MAP_HUGE_512MB	HUGETLB_FLAG_ENCODE_512MB
 #define MAP_HUGE_1GB	HUGETLB_FLAG_ENCODE_1GB
 #define MAP_HUGE_2GB	HUGETLB_FLAG_ENCODE_2GB
 #define MAP_HUGE_16GB	HUGETLB_FLAG_ENCODE_16GB
diff --git a/tools/include/uapi/linux/netlink.h b/tools/include/uapi/linux/netlink.h
index 776bc92e9118..486ed1f0c0bc 100644
--- a/tools/include/uapi/linux/netlink.h
+++ b/tools/include/uapi/linux/netlink.h
@@ -155,6 +155,7 @@ enum nlmsgerr_attrs {
 #define NETLINK_LIST_MEMBERSHIPS	9
 #define NETLINK_CAP_ACK			10
 #define NETLINK_EXT_ACK			11
+#define NETLINK_DUMP_STRICT_CHK		12
 
 struct nl_pktinfo {
 	__u32	group;
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index f35eb72739c0..9de8780ac8d9 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -646,10 +646,12 @@ struct perf_event_mmap_page {
  *
  *   PERF_RECORD_MISC_MMAP_DATA  - PERF_RECORD_MMAP* events
  *   PERF_RECORD_MISC_COMM_EXEC  - PERF_RECORD_COMM event
+ *   PERF_RECORD_MISC_FORK_EXEC  - PERF_RECORD_FORK event (perf internal)
  *   PERF_RECORD_MISC_SWITCH_OUT - PERF_RECORD_SWITCH* events
  */
 #define PERF_RECORD_MISC_MMAP_DATA		(1 << 13)
 #define PERF_RECORD_MISC_COMM_EXEC		(1 << 13)
+#define PERF_RECORD_MISC_FORK_EXEC		(1 << 13)
 #define PERF_RECORD_MISC_SWITCH_OUT		(1 << 13)
 /*
  * These PERF_RECORD_MISC_* flags below are safely reused
diff --git a/tools/include/uapi/linux/tls.h b/tools/include/uapi/linux/tls.h
new file mode 100644
index 000000000000..ff02287495ac
--- /dev/null
+++ b/tools/include/uapi/linux/tls.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
+/*
+ * Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _UAPI_LINUX_TLS_H
+#define _UAPI_LINUX_TLS_H
+
+#include <linux/types.h>
+
+/* TLS socket options */
+#define TLS_TX			1	/* Set transmit parameters */
+#define TLS_RX			2	/* Set receive parameters */
+
+/* Supported versions */
+#define TLS_VERSION_MINOR(ver)	((ver) & 0xFF)
+#define TLS_VERSION_MAJOR(ver)	(((ver) >> 8) & 0xFF)
+
+#define TLS_VERSION_NUMBER(id)	((((id##_VERSION_MAJOR) & 0xFF) << 8) |	\
+				 ((id##_VERSION_MINOR) & 0xFF))
+
+#define TLS_1_2_VERSION_MAJOR	0x3
+#define TLS_1_2_VERSION_MINOR	0x3
+#define TLS_1_2_VERSION		TLS_VERSION_NUMBER(TLS_1_2)
+
+/* Supported ciphers */
+#define TLS_CIPHER_AES_GCM_128				51
+#define TLS_CIPHER_AES_GCM_128_IV_SIZE			8
+#define TLS_CIPHER_AES_GCM_128_KEY_SIZE		16
+#define TLS_CIPHER_AES_GCM_128_SALT_SIZE		4
+#define TLS_CIPHER_AES_GCM_128_TAG_SIZE		16
+#define TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE		8
+
+#define TLS_SET_RECORD_TYPE	1
+#define TLS_GET_RECORD_TYPE	2
+
+struct tls_crypto_info {
+	__u16 version;
+	__u16 cipher_type;
+};
+
+struct tls12_crypto_info_aes_gcm_128 {
+	struct tls_crypto_info info;
+	unsigned char iv[TLS_CIPHER_AES_GCM_128_IV_SIZE];
+	unsigned char key[TLS_CIPHER_AES_GCM_128_KEY_SIZE];
+	unsigned char salt[TLS_CIPHER_AES_GCM_128_SALT_SIZE];
+	unsigned char rec_seq[TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE];
+};
+
+#endif /* _UAPI_LINUX_TLS_H */
diff --git a/tools/include/uapi/sound/asound.h b/tools/include/uapi/sound/asound.h
index ed0a120d4f08..404d4b9ffe76 100644
--- a/tools/include/uapi/sound/asound.h
+++ b/tools/include/uapi/sound/asound.h
@@ -752,7 +752,7 @@ struct snd_timer_info {
 #define SNDRV_TIMER_PSFLG_EARLY_EVENT	(1<<2)	/* write early event to the poll queue */
 
 struct snd_timer_params {
-	unsigned int flags;		/* flags - SNDRV_MIXER_PSFLG_* */
+	unsigned int flags;		/* flags - SNDRV_TIMER_PSFLG_* */
 	unsigned int ticks;		/* requested resolution in ticks */
 	unsigned int queue_size;	/* total size of queue (32-1024) */
 	unsigned int reserved0;		/* reserved, was: failure locations */
diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index 6eb9bacd1948..7bc31c905018 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1 +1 @@
-libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o
+libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o netlink.o
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index d49902e818b5..425b480bda75 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -1,4 +1,4 @@
-# SPDX-License-Identifier: GPL-2.0
+# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
 # Most of this file is copied from tools/lib/traceevent/Makefile
 
 BPF_VERSION = 0
@@ -69,7 +69,7 @@ FEATURE_USER = .libbpf
 FEATURE_TESTS = libelf libelf-mmap bpf reallocarray
 FEATURE_DISPLAY = libelf bpf
 
-INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi -I$(srctree)/tools/perf
+INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi
 FEATURE_CHECK_CFLAGS-bpf = $(INCLUDES)
 
 check_feat := 1
@@ -125,6 +125,7 @@ override CFLAGS += $(EXTRA_WARNINGS)
 override CFLAGS += -Werror -Wall
 override CFLAGS += -fPIC
 override CFLAGS += $(INCLUDES)
+override CFLAGS += -fvisibility=hidden
 
 ifeq ($(VERBOSE),1)
   Q =
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 60aa4ca8b2c5..03f9bcc4ef50 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: LGPL-2.1
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
 
 /*
  * common eBPF ELF operations.
@@ -28,16 +28,8 @@
 #include <linux/bpf.h>
 #include "bpf.h"
 #include "libbpf.h"
-#include "nlattr.h"
-#include <linux/rtnetlink.h>
-#include <linux/if_link.h>
-#include <sys/socket.h>
 #include <errno.h>
 
-#ifndef SOL_NETLINK
-#define SOL_NETLINK 270
-#endif
-
 /*
  * When building perf, unistd.h is overridden. __NR_bpf is
  * required to be defined explicitly.
@@ -286,6 +278,18 @@ int bpf_map_lookup_elem(int fd, const void *key, void *value)
 	return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
 }
 
+int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value)
+{
+	union bpf_attr attr;
+
+	bzero(&attr, sizeof(attr));
+	attr.map_fd = fd;
+	attr.key = ptr_to_u64(key);
+	attr.value = ptr_to_u64(value);
+
+	return sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr));
+}
+
 int bpf_map_delete_elem(int fd, const void *key)
 {
 	union bpf_attr attr;
@@ -499,127 +503,6 @@ int bpf_raw_tracepoint_open(const char *name, int prog_fd)
 	return sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr));
 }
 
-int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
-{
-	struct sockaddr_nl sa;
-	int sock, seq = 0, len, ret = -1;
-	char buf[4096];
-	struct nlattr *nla, *nla_xdp;
-	struct {
-		struct nlmsghdr  nh;
-		struct ifinfomsg ifinfo;
-		char             attrbuf[64];
-	} req;
-	struct nlmsghdr *nh;
-	struct nlmsgerr *err;
-	socklen_t addrlen;
-	int one = 1;
-
-	memset(&sa, 0, sizeof(sa));
-	sa.nl_family = AF_NETLINK;
-
-	sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
-	if (sock < 0) {
-		return -errno;
-	}
-
-	if (setsockopt(sock, SOL_NETLINK, NETLINK_EXT_ACK,
-		       &one, sizeof(one)) < 0) {
-		fprintf(stderr, "Netlink error reporting not supported\n");
-	}
-
-	if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
-		ret = -errno;
-		goto cleanup;
-	}
-
-	addrlen = sizeof(sa);
-	if (getsockname(sock, (struct sockaddr *)&sa, &addrlen) < 0) {
-		ret = -errno;
-		goto cleanup;
-	}
-
-	if (addrlen != sizeof(sa)) {
-		ret = -LIBBPF_ERRNO__INTERNAL;
-		goto cleanup;
-	}
-
-	memset(&req, 0, sizeof(req));
-	req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
-	req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
-	req.nh.nlmsg_type = RTM_SETLINK;
-	req.nh.nlmsg_pid = 0;
-	req.nh.nlmsg_seq = ++seq;
-	req.ifinfo.ifi_family = AF_UNSPEC;
-	req.ifinfo.ifi_index = ifindex;
-
-	/* started nested attribute for XDP */
-	nla = (struct nlattr *)(((char *)&req)
-				+ NLMSG_ALIGN(req.nh.nlmsg_len));
-	nla->nla_type = NLA_F_NESTED | IFLA_XDP;
-	nla->nla_len = NLA_HDRLEN;
-
-	/* add XDP fd */
-	nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
-	nla_xdp->nla_type = IFLA_XDP_FD;
-	nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
-	memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd));
-	nla->nla_len += nla_xdp->nla_len;
-
-	/* if user passed in any flags, add those too */
-	if (flags) {
-		nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
-		nla_xdp->nla_type = IFLA_XDP_FLAGS;
-		nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags);
-		memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags));
-		nla->nla_len += nla_xdp->nla_len;
-	}
-
-	req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
-
-	if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
-		ret = -errno;
-		goto cleanup;
-	}
-
-	len = recv(sock, buf, sizeof(buf), 0);
-	if (len < 0) {
-		ret = -errno;
-		goto cleanup;
-	}
-
-	for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
-	     nh = NLMSG_NEXT(nh, len)) {
-		if (nh->nlmsg_pid != sa.nl_pid) {
-			ret = -LIBBPF_ERRNO__WRNGPID;
-			goto cleanup;
-		}
-		if (nh->nlmsg_seq != seq) {
-			ret = -LIBBPF_ERRNO__INVSEQ;
-			goto cleanup;
-		}
-		switch (nh->nlmsg_type) {
-		case NLMSG_ERROR:
-			err = (struct nlmsgerr *)NLMSG_DATA(nh);
-			if (!err->error)
-				continue;
-			ret = err->error;
-			nla_dump_errormsg(nh);
-			goto cleanup;
-		case NLMSG_DONE:
-			break;
-		default:
-			break;
-		}
-	}
-
-	ret = 0;
-
-cleanup:
-	close(sock);
-	return ret;
-}
-
 int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size,
 		 bool do_log)
 {
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 6f38164b2618..26a51538213c 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: LGPL-2.1 */
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
 
 /*
  * common eBPF ELF operations.
@@ -20,13 +20,17 @@
  * You should have received a copy of the GNU Lesser General Public
  * License along with this program; if not,  see <http://www.gnu.org/licenses>
  */
-#ifndef __BPF_BPF_H
-#define __BPF_BPF_H
+#ifndef __LIBBPF_BPF_H
+#define __LIBBPF_BPF_H
 
 #include <linux/bpf.h>
 #include <stdbool.h>
 #include <stddef.h>
 
+#ifndef LIBBPF_API
+#define LIBBPF_API __attribute__((visibility("default")))
+#endif
+
 struct bpf_create_map_attr {
 	const char *name;
 	enum bpf_map_type map_type;
@@ -42,21 +46,24 @@ struct bpf_create_map_attr {
 	__u32 inner_map_fd;
 };
 
-int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr);
-int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
-			int key_size, int value_size, int max_entries,
-			__u32 map_flags, int node);
-int bpf_create_map_name(enum bpf_map_type map_type, const char *name,
-			int key_size, int value_size, int max_entries,
-			__u32 map_flags);
-int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size,
-		   int max_entries, __u32 map_flags);
-int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name,
-			       int key_size, int inner_map_fd, int max_entries,
-			       __u32 map_flags, int node);
-int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name,
-			  int key_size, int inner_map_fd, int max_entries,
-			  __u32 map_flags);
+LIBBPF_API int
+bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr);
+LIBBPF_API int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
+				   int key_size, int value_size,
+				   int max_entries, __u32 map_flags, int node);
+LIBBPF_API int bpf_create_map_name(enum bpf_map_type map_type, const char *name,
+				   int key_size, int value_size,
+				   int max_entries, __u32 map_flags);
+LIBBPF_API int bpf_create_map(enum bpf_map_type map_type, int key_size,
+			      int value_size, int max_entries, __u32 map_flags);
+LIBBPF_API int bpf_create_map_in_map_node(enum bpf_map_type map_type,
+					  const char *name, int key_size,
+					  int inner_map_fd, int max_entries,
+					  __u32 map_flags, int node);
+LIBBPF_API int bpf_create_map_in_map(enum bpf_map_type map_type,
+				     const char *name, int key_size,
+				     int inner_map_fd, int max_entries,
+				     __u32 map_flags);
 
 struct bpf_load_program_attr {
 	enum bpf_prog_type prog_type;
@@ -69,46 +76,56 @@ struct bpf_load_program_attr {
 	__u32 prog_ifindex;
 };
 
+/* Flags to direct loading requirements */
+#define MAPS_RELAX_COMPAT	0x01
+
 /* Recommend log buffer size */
 #define BPF_LOG_BUF_SIZE (256 * 1024)
-int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
-			   char *log_buf, size_t log_buf_sz);
-int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
-		     size_t insns_cnt, const char *license,
-		     __u32 kern_version, char *log_buf,
-		     size_t log_buf_sz);
-int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
-		       size_t insns_cnt, int strict_alignment,
-		       const char *license, __u32 kern_version,
-		       char *log_buf, size_t log_buf_sz, int log_level);
+LIBBPF_API int
+bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
+		       char *log_buf, size_t log_buf_sz);
+LIBBPF_API int bpf_load_program(enum bpf_prog_type type,
+				const struct bpf_insn *insns, size_t insns_cnt,
+				const char *license, __u32 kern_version,
+				char *log_buf, size_t log_buf_sz);
+LIBBPF_API int bpf_verify_program(enum bpf_prog_type type,
+				  const struct bpf_insn *insns,
+				  size_t insns_cnt, int strict_alignment,
+				  const char *license, __u32 kern_version,
+				  char *log_buf, size_t log_buf_sz,
+				  int log_level);
 
-int bpf_map_update_elem(int fd, const void *key, const void *value,
-			__u64 flags);
+LIBBPF_API int bpf_map_update_elem(int fd, const void *key, const void *value,
+				   __u64 flags);
 
-int bpf_map_lookup_elem(int fd, const void *key, void *value);
-int bpf_map_delete_elem(int fd, const void *key);
-int bpf_map_get_next_key(int fd, const void *key, void *next_key);
-int bpf_obj_pin(int fd, const char *pathname);
-int bpf_obj_get(const char *pathname);
-int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type,
-		    unsigned int flags);
-int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type);
-int bpf_prog_detach2(int prog_fd, int attachable_fd, enum bpf_attach_type type);
-int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
-		      void *data_out, __u32 *size_out, __u32 *retval,
-		      __u32 *duration);
-int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id);
-int bpf_map_get_next_id(__u32 start_id, __u32 *next_id);
-int bpf_prog_get_fd_by_id(__u32 id);
-int bpf_map_get_fd_by_id(__u32 id);
-int bpf_btf_get_fd_by_id(__u32 id);
-int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len);
-int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
-		   __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt);
-int bpf_raw_tracepoint_open(const char *name, int prog_fd);
-int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size,
-		 bool do_log);
-int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
-		      __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset,
-		      __u64 *probe_addr);
-#endif
+LIBBPF_API int bpf_map_lookup_elem(int fd, const void *key, void *value);
+LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key,
+					      void *value);
+LIBBPF_API int bpf_map_delete_elem(int fd, const void *key);
+LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key);
+LIBBPF_API int bpf_obj_pin(int fd, const char *pathname);
+LIBBPF_API int bpf_obj_get(const char *pathname);
+LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd,
+			       enum bpf_attach_type type, unsigned int flags);
+LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type);
+LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd,
+				enum bpf_attach_type type);
+LIBBPF_API int bpf_prog_test_run(int prog_fd, int repeat, void *data,
+				 __u32 size, void *data_out, __u32 *size_out,
+				 __u32 *retval, __u32 *duration);
+LIBBPF_API int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id);
+LIBBPF_API int bpf_map_get_next_id(__u32 start_id, __u32 *next_id);
+LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id);
+LIBBPF_API int bpf_map_get_fd_by_id(__u32 id);
+LIBBPF_API int bpf_btf_get_fd_by_id(__u32 id);
+LIBBPF_API int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len);
+LIBBPF_API int bpf_prog_query(int target_fd, enum bpf_attach_type type,
+			      __u32 query_flags, __u32 *attach_flags,
+			      __u32 *prog_ids, __u32 *prog_cnt);
+LIBBPF_API int bpf_raw_tracepoint_open(const char *name, int prog_fd);
+LIBBPF_API int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf,
+			    __u32 log_buf_size, bool do_log);
+LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf,
+				 __u32 *buf_len, __u32 *prog_id, __u32 *fd_type,
+				 __u64 *probe_offset, __u64 *probe_addr);
+#endif /* __LIBBPF_BPF_H */
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index cf94b0770522..449591aa9900 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: LGPL-2.1
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
 /* Copyright (c) 2018 Facebook */
 
 #include <stdlib.h>
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 4897e0724d4e..b77e7080f7e7 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -1,11 +1,15 @@
-/* SPDX-License-Identifier: LGPL-2.1 */
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
 /* Copyright (c) 2018 Facebook */
 
-#ifndef __BPF_BTF_H
-#define __BPF_BTF_H
+#ifndef __LIBBPF_BTF_H
+#define __LIBBPF_BTF_H
 
 #include <linux/types.h>
 
+#ifndef LIBBPF_API
+#define LIBBPF_API __attribute__((visibility("default")))
+#endif
+
 #define BTF_ELF_SEC ".BTF"
 
 struct btf;
@@ -14,13 +18,15 @@ struct btf_type;
 typedef int (*btf_print_fn_t)(const char *, ...)
 	__attribute__((format(printf, 1, 2)));
 
-void btf__free(struct btf *btf);
-struct btf *btf__new(__u8 *data, __u32 size, btf_print_fn_t err_log);
-__s32 btf__find_by_name(const struct btf *btf, const char *type_name);
-const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 id);
-__s64 btf__resolve_size(const struct btf *btf, __u32 type_id);
-int btf__resolve_type(const struct btf *btf, __u32 type_id);
-int btf__fd(const struct btf *btf);
-const char *btf__name_by_offset(const struct btf *btf, __u32 offset);
+LIBBPF_API void btf__free(struct btf *btf);
+LIBBPF_API struct btf *btf__new(__u8 *data, __u32 size, btf_print_fn_t err_log);
+LIBBPF_API __s32 btf__find_by_name(const struct btf *btf,
+				   const char *type_name);
+LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf,
+						  __u32 id);
+LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id);
+LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id);
+LIBBPF_API int btf__fd(const struct btf *btf);
+LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset);
 
-#endif
+#endif /* __LIBBPF_BTF_H */
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index bdb94939fd60..d6e62e90e8d4 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: LGPL-2.1
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
 
 /*
  * Common eBPF ELF object loading operations.
@@ -7,19 +7,6 @@
  * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
  * Copyright (C) 2015 Huawei Inc.
  * Copyright (C) 2017 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not,  see <http://www.gnu.org/licenses>
  */
 
 #define _GNU_SOURCE
@@ -32,7 +19,6 @@
 #include <unistd.h>
 #include <fcntl.h>
 #include <errno.h>
-#include <perf-sys.h>
 #include <asm/unistd.h>
 #include <linux/err.h>
 #include <linux/kernel.h>
@@ -40,6 +26,8 @@
 #include <linux/btf.h>
 #include <linux/list.h>
 #include <linux/limits.h>
+#include <linux/perf_event.h>
+#include <linux/ring_buffer.h>
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <sys/vfs.h>
@@ -182,7 +170,7 @@ static LIST_HEAD(bpf_objects_list);
 
 struct bpf_object {
 	char license[64];
-	u32 kern_version;
+	__u32 kern_version;
 
 	struct bpf_program *programs;
 	size_t nr_programs;
@@ -228,7 +216,7 @@ struct bpf_object {
 };
 #define obj_elf_valid(o)	((o)->efile.elf)
 
-static void bpf_program__unload(struct bpf_program *prog)
+void bpf_program__unload(struct bpf_program *prog)
 {
 	int i;
 
@@ -470,7 +458,8 @@ static int bpf_object__elf_init(struct bpf_object *obj)
 		obj->efile.fd = open(obj->path, O_RDONLY);
 		if (obj->efile.fd < 0) {
 			char errmsg[STRERR_BUFSIZE];
-			char *cp = str_error(errno, errmsg, sizeof(errmsg));
+			char *cp = libbpf_strerror_r(errno, errmsg,
+						     sizeof(errmsg));
 
 			pr_warning("failed to open %s: %s\n", obj->path, cp);
 			return -errno;
@@ -552,7 +541,7 @@ static int
 bpf_object__init_kversion(struct bpf_object *obj,
 			  void *data, size_t size)
 {
-	u32 kver;
+	__u32 kver;
 
 	if (size != sizeof(kver)) {
 		pr_warning("invalid kver section in %s\n", obj->path);
@@ -574,8 +563,9 @@ static int compare_bpf_map(const void *_a, const void *_b)
 }
 
 static int
-bpf_object__init_maps(struct bpf_object *obj)
+bpf_object__init_maps(struct bpf_object *obj, int flags)
 {
+	bool strict = !(flags & MAPS_RELAX_COMPAT);
 	int i, map_idx, map_def_sz, nr_maps = 0;
 	Elf_Scn *scn;
 	Elf_Data *data;
@@ -697,7 +687,8 @@ bpf_object__init_maps(struct bpf_object *obj)
 						   "has unrecognized, non-zero "
 						   "options\n",
 						   obj->path, map_name);
-					return -EINVAL;
+					if (strict)
+						return -EINVAL;
 				}
 			}
 			memcpy(&obj->maps[map_idx].def, def,
@@ -728,7 +719,7 @@ static bool section_have_execinstr(struct bpf_object *obj, int idx)
 	return false;
 }
 
-static int bpf_object__elf_collect(struct bpf_object *obj)
+static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
 {
 	Elf *elf = obj->efile.elf;
 	GElf_Ehdr *ep = &obj->efile.ehdr;
@@ -811,7 +802,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
 						      data->d_size, name, idx);
 			if (err) {
 				char errmsg[STRERR_BUFSIZE];
-				char *cp = str_error(-err, errmsg, sizeof(errmsg));
+				char *cp = libbpf_strerror_r(-err, errmsg,
+							     sizeof(errmsg));
 
 				pr_warning("failed to alloc program %s (%s): %s",
 					   name, obj->path, cp);
@@ -854,7 +846,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
 		return LIBBPF_ERRNO__FORMAT;
 	}
 	if (obj->efile.maps_shndx >= 0) {
-		err = bpf_object__init_maps(obj);
+		err = bpf_object__init_maps(obj, flags);
 		if (err)
 			goto out;
 	}
@@ -1140,7 +1132,7 @@ bpf_object__create_maps(struct bpf_object *obj)
 
 		*pfd = bpf_create_map_xattr(&create_attr);
 		if (*pfd < 0 && create_attr.btf_key_type_id) {
-			cp = str_error(errno, errmsg, sizeof(errmsg));
+			cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
 			pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
 				   map->name, cp, errno);
 			create_attr.btf_fd = 0;
@@ -1155,7 +1147,7 @@ bpf_object__create_maps(struct bpf_object *obj)
 			size_t j;
 
 			err = *pfd;
-			cp = str_error(errno, errmsg, sizeof(errmsg));
+			cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
 			pr_warning("failed to create map (name: '%s'): %s\n",
 				   map->name, cp);
 			for (j = 0; j < i; j++)
@@ -1306,7 +1298,7 @@ static int bpf_object__collect_reloc(struct bpf_object *obj)
 static int
 load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type,
 	     const char *name, struct bpf_insn *insns, int insns_cnt,
-	     char *license, u32 kern_version, int *pfd, int prog_ifindex)
+	     char *license, __u32 kern_version, int *pfd, int prog_ifindex)
 {
 	struct bpf_load_program_attr load_attr;
 	char *cp, errmsg[STRERR_BUFSIZE];
@@ -1339,7 +1331,7 @@ load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type,
 	}
 
 	ret = -LIBBPF_ERRNO__LOAD;
-	cp = str_error(errno, errmsg, sizeof(errmsg));
+	cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
 	pr_warning("load bpf program failed: %s\n", cp);
 
 	if (log_buf && log_buf[0] != '\0') {
@@ -1375,9 +1367,9 @@ out:
 	return ret;
 }
 
-static int
+int
 bpf_program__load(struct bpf_program *prog,
-		  char *license, u32 kern_version)
+		  char *license, __u32 kern_version)
 {
 	int err = 0, fd, i;
 
@@ -1502,6 +1494,7 @@ static bool bpf_prog_type__needs_kver(enum bpf_prog_type type)
 	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
 	case BPF_PROG_TYPE_LIRC_MODE2:
 	case BPF_PROG_TYPE_SK_REUSEPORT:
+	case BPF_PROG_TYPE_FLOW_DISSECTOR:
 		return false;
 	case BPF_PROG_TYPE_UNSPEC:
 	case BPF_PROG_TYPE_KPROBE:
@@ -1525,7 +1518,7 @@ static int bpf_object__validate(struct bpf_object *obj, bool needs_kver)
 
 static struct bpf_object *
 __bpf_object__open(const char *path, void *obj_buf, size_t obj_buf_sz,
-		   bool needs_kver)
+		   bool needs_kver, int flags)
 {
 	struct bpf_object *obj;
 	int err;
@@ -1541,7 +1534,7 @@ __bpf_object__open(const char *path, void *obj_buf, size_t obj_buf_sz,
 
 	CHECK_ERR(bpf_object__elf_init(obj), err, out);
 	CHECK_ERR(bpf_object__check_endianness(obj), err, out);
-	CHECK_ERR(bpf_object__elf_collect(obj), err, out);
+	CHECK_ERR(bpf_object__elf_collect(obj, flags), err, out);
 	CHECK_ERR(bpf_object__collect_reloc(obj), err, out);
 	CHECK_ERR(bpf_object__validate(obj, needs_kver), err, out);
 
@@ -1552,7 +1545,8 @@ out:
 	return ERR_PTR(err);
 }
 
-struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr)
+struct bpf_object *__bpf_object__open_xattr(struct bpf_object_open_attr *attr,
+					    int flags)
 {
 	/* param validation */
 	if (!attr->file)
@@ -1561,7 +1555,13 @@ struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr)
 	pr_debug("loading %s\n", attr->file);
 
 	return __bpf_object__open(attr->file, NULL, 0,
-				  bpf_prog_type__needs_kver(attr->prog_type));
+				  bpf_prog_type__needs_kver(attr->prog_type),
+				  flags);
+}
+
+struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr)
+{
+	return __bpf_object__open_xattr(attr, 0);
 }
 
 struct bpf_object *bpf_object__open(const char *path)
@@ -1594,7 +1594,7 @@ struct bpf_object *bpf_object__open_buffer(void *obj_buf,
 	pr_debug("loading object '%s' from buffer\n",
 		 name);
 
-	return __bpf_object__open(name, obj_buf, obj_buf_sz, true);
+	return __bpf_object__open(name, obj_buf, obj_buf_sz, true, true);
 }
 
 int bpf_object__unload(struct bpf_object *obj)
@@ -1654,7 +1654,7 @@ static int check_path(const char *path)
 
 	dir = dirname(dname);
 	if (statfs(dir, &st_fs)) {
-		cp = str_error(errno, errmsg, sizeof(errmsg));
+		cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
 		pr_warning("failed to statfs %s: %s\n", dir, cp);
 		err = -errno;
 	}
@@ -1690,7 +1690,7 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
 	}
 
 	if (bpf_obj_pin(prog->instances.fds[instance], path)) {
-		cp = str_error(errno, errmsg, sizeof(errmsg));
+		cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
 		pr_warning("failed to pin program: %s\n", cp);
 		return -errno;
 	}
@@ -1708,7 +1708,7 @@ static int make_dir(const char *path)
 		err = -errno;
 
 	if (err) {
-		cp = str_error(-err, errmsg, sizeof(errmsg));
+		cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
 		pr_warning("failed to mkdir %s: %s\n", path, cp);
 	}
 	return err;
@@ -1770,7 +1770,7 @@ int bpf_map__pin(struct bpf_map *map, const char *path)
 	}
 
 	if (bpf_obj_pin(map->fd, path)) {
-		cp = str_error(errno, errmsg, sizeof(errmsg));
+		cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
 		pr_warning("failed to pin map: %s\n", cp);
 		return -errno;
 	}
@@ -2084,57 +2084,91 @@ void bpf_program__set_expected_attach_type(struct bpf_program *prog,
 	prog->expected_attach_type = type;
 }
 
-#define BPF_PROG_SEC_FULL(string, ptype, atype) \
-	{ string, sizeof(string) - 1, ptype, atype }
+#define BPF_PROG_SEC_IMPL(string, ptype, eatype, is_attachable, atype) \
+	{ string, sizeof(string) - 1, ptype, eatype, is_attachable, atype }
 
-#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_FULL(string, ptype, 0)
+/* Programs that can NOT be attached. */
+#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, 0, 0)
 
-#define BPF_S_PROG_SEC(string, ptype) \
-	BPF_PROG_SEC_FULL(string, BPF_PROG_TYPE_CGROUP_SOCK, ptype)
+/* Programs that can be attached. */
+#define BPF_APROG_SEC(string, ptype, atype) \
+	BPF_PROG_SEC_IMPL(string, ptype, 0, 1, atype)
 
-#define BPF_SA_PROG_SEC(string, ptype) \
-	BPF_PROG_SEC_FULL(string, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, ptype)
+/* Programs that must specify expected attach type at load time. */
+#define BPF_EAPROG_SEC(string, ptype, eatype) \
+	BPF_PROG_SEC_IMPL(string, ptype, eatype, 1, eatype)
+
+/* Programs that can be attached but attach type can't be identified by section
+ * name. Kept for backward compatibility.
+ */
+#define BPF_APROG_COMPAT(string, ptype) BPF_PROG_SEC(string, ptype)
 
 static const struct {
 	const char *sec;
 	size_t len;
 	enum bpf_prog_type prog_type;
 	enum bpf_attach_type expected_attach_type;
+	int is_attachable;
+	enum bpf_attach_type attach_type;
 } section_names[] = {
-	BPF_PROG_SEC("socket",		BPF_PROG_TYPE_SOCKET_FILTER),
-	BPF_PROG_SEC("kprobe/",		BPF_PROG_TYPE_KPROBE),
-	BPF_PROG_SEC("kretprobe/",	BPF_PROG_TYPE_KPROBE),
-	BPF_PROG_SEC("classifier",	BPF_PROG_TYPE_SCHED_CLS),
-	BPF_PROG_SEC("action",		BPF_PROG_TYPE_SCHED_ACT),
-	BPF_PROG_SEC("tracepoint/",	BPF_PROG_TYPE_TRACEPOINT),
-	BPF_PROG_SEC("raw_tracepoint/",	BPF_PROG_TYPE_RAW_TRACEPOINT),
-	BPF_PROG_SEC("xdp",		BPF_PROG_TYPE_XDP),
-	BPF_PROG_SEC("perf_event",	BPF_PROG_TYPE_PERF_EVENT),
-	BPF_PROG_SEC("cgroup/skb",	BPF_PROG_TYPE_CGROUP_SKB),
-	BPF_PROG_SEC("cgroup/sock",	BPF_PROG_TYPE_CGROUP_SOCK),
-	BPF_PROG_SEC("cgroup/dev",	BPF_PROG_TYPE_CGROUP_DEVICE),
-	BPF_PROG_SEC("lwt_in",		BPF_PROG_TYPE_LWT_IN),
-	BPF_PROG_SEC("lwt_out",		BPF_PROG_TYPE_LWT_OUT),
-	BPF_PROG_SEC("lwt_xmit",	BPF_PROG_TYPE_LWT_XMIT),
-	BPF_PROG_SEC("lwt_seg6local",	BPF_PROG_TYPE_LWT_SEG6LOCAL),
-	BPF_PROG_SEC("sockops",		BPF_PROG_TYPE_SOCK_OPS),
-	BPF_PROG_SEC("sk_skb",		BPF_PROG_TYPE_SK_SKB),
-	BPF_PROG_SEC("sk_msg",		BPF_PROG_TYPE_SK_MSG),
-	BPF_PROG_SEC("lirc_mode2",	BPF_PROG_TYPE_LIRC_MODE2),
-	BPF_SA_PROG_SEC("cgroup/bind4",	BPF_CGROUP_INET4_BIND),
-	BPF_SA_PROG_SEC("cgroup/bind6",	BPF_CGROUP_INET6_BIND),
-	BPF_SA_PROG_SEC("cgroup/connect4", BPF_CGROUP_INET4_CONNECT),
-	BPF_SA_PROG_SEC("cgroup/connect6", BPF_CGROUP_INET6_CONNECT),
-	BPF_SA_PROG_SEC("cgroup/sendmsg4", BPF_CGROUP_UDP4_SENDMSG),
-	BPF_SA_PROG_SEC("cgroup/sendmsg6", BPF_CGROUP_UDP6_SENDMSG),
-	BPF_S_PROG_SEC("cgroup/post_bind4", BPF_CGROUP_INET4_POST_BIND),
-	BPF_S_PROG_SEC("cgroup/post_bind6", BPF_CGROUP_INET6_POST_BIND),
+	BPF_PROG_SEC("socket",			BPF_PROG_TYPE_SOCKET_FILTER),
+	BPF_PROG_SEC("kprobe/",			BPF_PROG_TYPE_KPROBE),
+	BPF_PROG_SEC("kretprobe/",		BPF_PROG_TYPE_KPROBE),
+	BPF_PROG_SEC("classifier",		BPF_PROG_TYPE_SCHED_CLS),
+	BPF_PROG_SEC("action",			BPF_PROG_TYPE_SCHED_ACT),
+	BPF_PROG_SEC("tracepoint/",		BPF_PROG_TYPE_TRACEPOINT),
+	BPF_PROG_SEC("raw_tracepoint/",		BPF_PROG_TYPE_RAW_TRACEPOINT),
+	BPF_PROG_SEC("xdp",			BPF_PROG_TYPE_XDP),
+	BPF_PROG_SEC("perf_event",		BPF_PROG_TYPE_PERF_EVENT),
+	BPF_PROG_SEC("lwt_in",			BPF_PROG_TYPE_LWT_IN),
+	BPF_PROG_SEC("lwt_out",			BPF_PROG_TYPE_LWT_OUT),
+	BPF_PROG_SEC("lwt_xmit",		BPF_PROG_TYPE_LWT_XMIT),
+	BPF_PROG_SEC("lwt_seg6local",		BPF_PROG_TYPE_LWT_SEG6LOCAL),
+	BPF_APROG_SEC("cgroup_skb/ingress",	BPF_PROG_TYPE_CGROUP_SKB,
+						BPF_CGROUP_INET_INGRESS),
+	BPF_APROG_SEC("cgroup_skb/egress",	BPF_PROG_TYPE_CGROUP_SKB,
+						BPF_CGROUP_INET_EGRESS),
+	BPF_APROG_COMPAT("cgroup/skb",		BPF_PROG_TYPE_CGROUP_SKB),
+	BPF_APROG_SEC("cgroup/sock",		BPF_PROG_TYPE_CGROUP_SOCK,
+						BPF_CGROUP_INET_SOCK_CREATE),
+	BPF_EAPROG_SEC("cgroup/post_bind4",	BPF_PROG_TYPE_CGROUP_SOCK,
+						BPF_CGROUP_INET4_POST_BIND),
+	BPF_EAPROG_SEC("cgroup/post_bind6",	BPF_PROG_TYPE_CGROUP_SOCK,
+						BPF_CGROUP_INET6_POST_BIND),
+	BPF_APROG_SEC("cgroup/dev",		BPF_PROG_TYPE_CGROUP_DEVICE,
+						BPF_CGROUP_DEVICE),
+	BPF_APROG_SEC("sockops",		BPF_PROG_TYPE_SOCK_OPS,
+						BPF_CGROUP_SOCK_OPS),
+	BPF_APROG_SEC("sk_skb/stream_parser",	BPF_PROG_TYPE_SK_SKB,
+						BPF_SK_SKB_STREAM_PARSER),
+	BPF_APROG_SEC("sk_skb/stream_verdict",	BPF_PROG_TYPE_SK_SKB,
+						BPF_SK_SKB_STREAM_VERDICT),
+	BPF_APROG_COMPAT("sk_skb",		BPF_PROG_TYPE_SK_SKB),
+	BPF_APROG_SEC("sk_msg",			BPF_PROG_TYPE_SK_MSG,
+						BPF_SK_MSG_VERDICT),
+	BPF_APROG_SEC("lirc_mode2",		BPF_PROG_TYPE_LIRC_MODE2,
+						BPF_LIRC_MODE2),
+	BPF_APROG_SEC("flow_dissector",		BPF_PROG_TYPE_FLOW_DISSECTOR,
+						BPF_FLOW_DISSECTOR),
+	BPF_EAPROG_SEC("cgroup/bind4",		BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+						BPF_CGROUP_INET4_BIND),
+	BPF_EAPROG_SEC("cgroup/bind6",		BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+						BPF_CGROUP_INET6_BIND),
+	BPF_EAPROG_SEC("cgroup/connect4",	BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+						BPF_CGROUP_INET4_CONNECT),
+	BPF_EAPROG_SEC("cgroup/connect6",	BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+						BPF_CGROUP_INET6_CONNECT),
+	BPF_EAPROG_SEC("cgroup/sendmsg4",	BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+						BPF_CGROUP_UDP4_SENDMSG),
+	BPF_EAPROG_SEC("cgroup/sendmsg6",	BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+						BPF_CGROUP_UDP6_SENDMSG),
 };
 
+#undef BPF_PROG_SEC_IMPL
 #undef BPF_PROG_SEC
-#undef BPF_PROG_SEC_FULL
-#undef BPF_S_PROG_SEC
-#undef BPF_SA_PROG_SEC
+#undef BPF_APROG_SEC
+#undef BPF_EAPROG_SEC
+#undef BPF_APROG_COMPAT
 
 int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
 			     enum bpf_attach_type *expected_attach_type)
@@ -2154,6 +2188,25 @@ int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
 	return -EINVAL;
 }
 
+int libbpf_attach_type_by_name(const char *name,
+			       enum bpf_attach_type *attach_type)
+{
+	int i;
+
+	if (!name)
+		return -EINVAL;
+
+	for (i = 0; i < ARRAY_SIZE(section_names); i++) {
+		if (strncmp(name, section_names[i].sec, section_names[i].len))
+			continue;
+		if (!section_names[i].is_attachable)
+			return -EINVAL;
+		*attach_type = section_names[i].attach_type;
+		return 0;
+	}
+	return -EINVAL;
+}
+
 static int
 bpf_program__identify_section(struct bpf_program *prog,
 			      enum bpf_prog_type *prog_type,
@@ -2336,7 +2389,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
 		bpf_program__set_expected_attach_type(prog,
 						      expected_attach_type);
 
-		if (!bpf_program__is_function_storage(prog, obj) && !first_prog)
+		if (!first_prog)
 			first_prog = prog;
 	}
 
@@ -2363,61 +2416,49 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
 }
 
 enum bpf_perf_event_ret
-bpf_perf_event_read_simple(void *mem, unsigned long size,
-			   unsigned long page_size, void **buf, size_t *buf_len,
-			   bpf_perf_event_print_t fn, void *priv)
+bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
+			   void **copy_mem, size_t *copy_size,
+			   bpf_perf_event_print_t fn, void *private_data)
 {
-	volatile struct perf_event_mmap_page *header = mem;
+	struct perf_event_mmap_page *header = mmap_mem;
+	__u64 data_head = ring_buffer_read_head(header);
 	__u64 data_tail = header->data_tail;
-	__u64 data_head = header->data_head;
-	int ret = LIBBPF_PERF_EVENT_ERROR;
-	void *base, *begin, *end;
-
-	asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */
-	if (data_head == data_tail)
-		return LIBBPF_PERF_EVENT_CONT;
-
-	base = ((char *)header) + page_size;
-
-	begin = base + data_tail % size;
-	end = base + data_head % size;
-
-	while (begin != end) {
-		struct perf_event_header *ehdr;
-
-		ehdr = begin;
-		if (begin + ehdr->size > base + size) {
-			long len = base + size - begin;
-
-			if (*buf_len < ehdr->size) {
-				free(*buf);
-				*buf = malloc(ehdr->size);
-				if (!*buf) {
+	void *base = ((__u8 *)header) + page_size;
+	int ret = LIBBPF_PERF_EVENT_CONT;
+	struct perf_event_header *ehdr;
+	size_t ehdr_size;
+
+	while (data_head != data_tail) {
+		ehdr = base + (data_tail & (mmap_size - 1));
+		ehdr_size = ehdr->size;
+
+		if (((void *)ehdr) + ehdr_size > base + mmap_size) {
+			void *copy_start = ehdr;
+			size_t len_first = base + mmap_size - copy_start;
+			size_t len_secnd = ehdr_size - len_first;
+
+			if (*copy_size < ehdr_size) {
+				free(*copy_mem);
+				*copy_mem = malloc(ehdr_size);
+				if (!*copy_mem) {
+					*copy_size = 0;
 					ret = LIBBPF_PERF_EVENT_ERROR;
 					break;
 				}
-				*buf_len = ehdr->size;
+				*copy_size = ehdr_size;
 			}
 
-			memcpy(*buf, begin, len);
-			memcpy(*buf + len, base, ehdr->size - len);
-			ehdr = (void *)*buf;
-			begin = base + ehdr->size - len;
-		} else if (begin + ehdr->size == base + size) {
-			begin = base;
-		} else {
-			begin += ehdr->size;
+			memcpy(*copy_mem, copy_start, len_first);
+			memcpy(*copy_mem + len_first, base, len_secnd);
+			ehdr = *copy_mem;
 		}
 
-		ret = fn(ehdr, priv);
+		ret = fn(ehdr, private_data);
+		data_tail += ehdr_size;
 		if (ret != LIBBPF_PERF_EVENT_CONT)
 			break;
-
-		data_tail += ehdr->size;
 	}
 
-	__sync_synchronize(); /* smp_mb() */
-	header->data_tail = data_tail;
-
+	ring_buffer_write_tail(header, data_tail);
 	return ret;
 }
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 96c55fac54c3..1f3468dad8b2 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: LGPL-2.1 */
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
 
 /*
  * Common eBPF ELF object loading operations.
@@ -6,22 +6,9 @@
  * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
  * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
  * Copyright (C) 2015 Huawei Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not,  see <http://www.gnu.org/licenses>
  */
-#ifndef __BPF_LIBBPF_H
-#define __BPF_LIBBPF_H
+#ifndef __LIBBPF_LIBBPF_H
+#define __LIBBPF_LIBBPF_H
 
 #include <stdio.h>
 #include <stdint.h>
@@ -29,6 +16,10 @@
 #include <sys/types.h>  // for size_t
 #include <linux/bpf.h>
 
+#ifndef LIBBPF_API
+#define LIBBPF_API __attribute__((visibility("default")))
+#endif
+
 enum libbpf_errno {
 	__LIBBPF_ERRNO__START = 4000,
 
@@ -46,10 +37,11 @@ enum libbpf_errno {
 	LIBBPF_ERRNO__PROGTYPE,	/* Kernel doesn't support this program type */
 	LIBBPF_ERRNO__WRNGPID,	/* Wrong pid in netlink message */
 	LIBBPF_ERRNO__INVSEQ,	/* Invalid netlink sequence */
+	LIBBPF_ERRNO__NLPARSE,	/* netlink parsing error */
 	__LIBBPF_ERRNO__END,
 };
 
-int libbpf_strerror(int err, char *buf, size_t size);
+LIBBPF_API int libbpf_strerror(int err, char *buf, size_t size);
 
 /*
  * __printf is defined in include/linux/compiler-gcc.h. However,
@@ -59,9 +51,9 @@ int libbpf_strerror(int err, char *buf, size_t size);
 typedef int (*libbpf_print_fn_t)(const char *, ...)
 	__attribute__((format(printf, 1, 2)));
 
-void libbpf_set_print(libbpf_print_fn_t warn,
-		      libbpf_print_fn_t info,
-		      libbpf_print_fn_t debug);
+LIBBPF_API void libbpf_set_print(libbpf_print_fn_t warn,
+				 libbpf_print_fn_t info,
+				 libbpf_print_fn_t debug);
 
 /* Hide internal to user */
 struct bpf_object;
@@ -71,25 +63,28 @@ struct bpf_object_open_attr {
 	enum bpf_prog_type prog_type;
 };
 
-struct bpf_object *bpf_object__open(const char *path);
-struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr);
-struct bpf_object *bpf_object__open_buffer(void *obj_buf,
-					   size_t obj_buf_sz,
-					   const char *name);
-int bpf_object__pin(struct bpf_object *object, const char *path);
-void bpf_object__close(struct bpf_object *object);
+LIBBPF_API struct bpf_object *bpf_object__open(const char *path);
+LIBBPF_API struct bpf_object *
+bpf_object__open_xattr(struct bpf_object_open_attr *attr);
+struct bpf_object *__bpf_object__open_xattr(struct bpf_object_open_attr *attr,
+					    int flags);
+LIBBPF_API struct bpf_object *bpf_object__open_buffer(void *obj_buf,
+						      size_t obj_buf_sz,
+						      const char *name);
+LIBBPF_API int bpf_object__pin(struct bpf_object *object, const char *path);
+LIBBPF_API void bpf_object__close(struct bpf_object *object);
 
 /* Load/unload object into/from kernel */
-int bpf_object__load(struct bpf_object *obj);
-int bpf_object__unload(struct bpf_object *obj);
-const char *bpf_object__name(struct bpf_object *obj);
-unsigned int bpf_object__kversion(struct bpf_object *obj);
-int bpf_object__btf_fd(const struct bpf_object *obj);
+LIBBPF_API int bpf_object__load(struct bpf_object *obj);
+LIBBPF_API int bpf_object__unload(struct bpf_object *obj);
+LIBBPF_API const char *bpf_object__name(struct bpf_object *obj);
+LIBBPF_API unsigned int bpf_object__kversion(struct bpf_object *obj);
+LIBBPF_API int bpf_object__btf_fd(const struct bpf_object *obj);
 
-struct bpf_program *
+LIBBPF_API struct bpf_program *
 bpf_object__find_program_by_title(struct bpf_object *obj, const char *title);
 
-struct bpf_object *bpf_object__next(struct bpf_object *prev);
+LIBBPF_API struct bpf_object *bpf_object__next(struct bpf_object *prev);
 #define bpf_object__for_each_safe(pos, tmp)			\
 	for ((pos) = bpf_object__next(NULL),		\
 		(tmp) = bpf_object__next(pos);		\
@@ -97,17 +92,20 @@ struct bpf_object *bpf_object__next(struct bpf_object *prev);
 	     (pos) = (tmp), (tmp) = bpf_object__next(tmp))
 
 typedef void (*bpf_object_clear_priv_t)(struct bpf_object *, void *);
-int bpf_object__set_priv(struct bpf_object *obj, void *priv,
-			 bpf_object_clear_priv_t clear_priv);
-void *bpf_object__priv(struct bpf_object *prog);
+LIBBPF_API int bpf_object__set_priv(struct bpf_object *obj, void *priv,
+				    bpf_object_clear_priv_t clear_priv);
+LIBBPF_API void *bpf_object__priv(struct bpf_object *prog);
 
-int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
-			     enum bpf_attach_type *expected_attach_type);
+LIBBPF_API int
+libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
+			 enum bpf_attach_type *expected_attach_type);
+LIBBPF_API int libbpf_attach_type_by_name(const char *name,
+					  enum bpf_attach_type *attach_type);
 
 /* Accessors of bpf_program */
 struct bpf_program;
-struct bpf_program *bpf_program__next(struct bpf_program *prog,
-				      struct bpf_object *obj);
+LIBBPF_API struct bpf_program *bpf_program__next(struct bpf_program *prog,
+						 struct bpf_object *obj);
 
 #define bpf_object__for_each_program(pos, obj)		\
 	for ((pos) = bpf_program__next(NULL, (obj));	\
@@ -117,18 +115,24 @@ struct bpf_program *bpf_program__next(struct bpf_program *prog,
 typedef void (*bpf_program_clear_priv_t)(struct bpf_program *,
 					 void *);
 
-int bpf_program__set_priv(struct bpf_program *prog, void *priv,
-			  bpf_program_clear_priv_t clear_priv);
+LIBBPF_API int bpf_program__set_priv(struct bpf_program *prog, void *priv,
+				     bpf_program_clear_priv_t clear_priv);
 
-void *bpf_program__priv(struct bpf_program *prog);
-void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex);
+LIBBPF_API void *bpf_program__priv(struct bpf_program *prog);
+LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog,
+					 __u32 ifindex);
 
-const char *bpf_program__title(struct bpf_program *prog, bool needs_copy);
+LIBBPF_API const char *bpf_program__title(struct bpf_program *prog,
+					  bool needs_copy);
 
-int bpf_program__fd(struct bpf_program *prog);
-int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
-			      int instance);
-int bpf_program__pin(struct bpf_program *prog, const char *path);
+LIBBPF_API int bpf_program__load(struct bpf_program *prog, char *license,
+				 __u32 kern_version);
+LIBBPF_API int bpf_program__fd(struct bpf_program *prog);
+LIBBPF_API int bpf_program__pin_instance(struct bpf_program *prog,
+					 const char *path,
+					 int instance);
+LIBBPF_API int bpf_program__pin(struct bpf_program *prog, const char *path);
+LIBBPF_API void bpf_program__unload(struct bpf_program *prog);
 
 struct bpf_insn;
 
@@ -189,34 +193,36 @@ typedef int (*bpf_program_prep_t)(struct bpf_program *prog, int n,
 				  struct bpf_insn *insns, int insns_cnt,
 				  struct bpf_prog_prep_result *res);
 
-int bpf_program__set_prep(struct bpf_program *prog, int nr_instance,
-			  bpf_program_prep_t prep);
+LIBBPF_API int bpf_program__set_prep(struct bpf_program *prog, int nr_instance,
+				     bpf_program_prep_t prep);
 
-int bpf_program__nth_fd(struct bpf_program *prog, int n);
+LIBBPF_API int bpf_program__nth_fd(struct bpf_program *prog, int n);
 
 /*
  * Adjust type of BPF program. Default is kprobe.
  */
-int bpf_program__set_socket_filter(struct bpf_program *prog);
-int bpf_program__set_tracepoint(struct bpf_program *prog);
-int bpf_program__set_raw_tracepoint(struct bpf_program *prog);
-int bpf_program__set_kprobe(struct bpf_program *prog);
-int bpf_program__set_sched_cls(struct bpf_program *prog);
-int bpf_program__set_sched_act(struct bpf_program *prog);
-int bpf_program__set_xdp(struct bpf_program *prog);
-int bpf_program__set_perf_event(struct bpf_program *prog);
-void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type);
-void bpf_program__set_expected_attach_type(struct bpf_program *prog,
-					   enum bpf_attach_type type);
-
-bool bpf_program__is_socket_filter(struct bpf_program *prog);
-bool bpf_program__is_tracepoint(struct bpf_program *prog);
-bool bpf_program__is_raw_tracepoint(struct bpf_program *prog);
-bool bpf_program__is_kprobe(struct bpf_program *prog);
-bool bpf_program__is_sched_cls(struct bpf_program *prog);
-bool bpf_program__is_sched_act(struct bpf_program *prog);
-bool bpf_program__is_xdp(struct bpf_program *prog);
-bool bpf_program__is_perf_event(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_socket_filter(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_tracepoint(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_raw_tracepoint(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_kprobe(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_sched_cls(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_sched_act(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_xdp(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_perf_event(struct bpf_program *prog);
+LIBBPF_API void bpf_program__set_type(struct bpf_program *prog,
+				      enum bpf_prog_type type);
+LIBBPF_API void
+bpf_program__set_expected_attach_type(struct bpf_program *prog,
+				      enum bpf_attach_type type);
+
+LIBBPF_API bool bpf_program__is_socket_filter(struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_tracepoint(struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_raw_tracepoint(struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_kprobe(struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_sched_cls(struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_sched_act(struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_xdp(struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_perf_event(struct bpf_program *prog);
 
 /*
  * No need for __attribute__((packed)), all members of 'bpf_map_def'
@@ -237,39 +243,39 @@ struct bpf_map_def {
  * so no need to worry about a name clash.
  */
 struct bpf_map;
-struct bpf_map *
+LIBBPF_API struct bpf_map *
 bpf_object__find_map_by_name(struct bpf_object *obj, const char *name);
 
 /*
  * Get bpf_map through the offset of corresponding struct bpf_map_def
  * in the BPF object file.
  */
-struct bpf_map *
+LIBBPF_API struct bpf_map *
 bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset);
 
-struct bpf_map *
+LIBBPF_API struct bpf_map *
 bpf_map__next(struct bpf_map *map, struct bpf_object *obj);
 #define bpf_map__for_each(pos, obj)		\
 	for ((pos) = bpf_map__next(NULL, (obj));	\
 	     (pos) != NULL;				\
 	     (pos) = bpf_map__next((pos), (obj)))
 
-int bpf_map__fd(struct bpf_map *map);
-const struct bpf_map_def *bpf_map__def(struct bpf_map *map);
-const char *bpf_map__name(struct bpf_map *map);
-__u32 bpf_map__btf_key_type_id(const struct bpf_map *map);
-__u32 bpf_map__btf_value_type_id(const struct bpf_map *map);
+LIBBPF_API int bpf_map__fd(struct bpf_map *map);
+LIBBPF_API const struct bpf_map_def *bpf_map__def(struct bpf_map *map);
+LIBBPF_API const char *bpf_map__name(struct bpf_map *map);
+LIBBPF_API __u32 bpf_map__btf_key_type_id(const struct bpf_map *map);
+LIBBPF_API __u32 bpf_map__btf_value_type_id(const struct bpf_map *map);
 
 typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);
-int bpf_map__set_priv(struct bpf_map *map, void *priv,
-		      bpf_map_clear_priv_t clear_priv);
-void *bpf_map__priv(struct bpf_map *map);
-int bpf_map__reuse_fd(struct bpf_map *map, int fd);
-bool bpf_map__is_offload_neutral(struct bpf_map *map);
-void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex);
-int bpf_map__pin(struct bpf_map *map, const char *path);
+LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv,
+				 bpf_map_clear_priv_t clear_priv);
+LIBBPF_API void *bpf_map__priv(struct bpf_map *map);
+LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd);
+LIBBPF_API bool bpf_map__is_offload_neutral(struct bpf_map *map);
+LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex);
+LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path);
 
-long libbpf_get_error(const void *ptr);
+LIBBPF_API long libbpf_get_error(const void *ptr);
 
 struct bpf_prog_load_attr {
 	const char *file;
@@ -278,12 +284,12 @@ struct bpf_prog_load_attr {
 	int ifindex;
 };
 
-int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
-			struct bpf_object **pobj, int *prog_fd);
-int bpf_prog_load(const char *file, enum bpf_prog_type type,
-		  struct bpf_object **pobj, int *prog_fd);
+LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
+				   struct bpf_object **pobj, int *prog_fd);
+LIBBPF_API int bpf_prog_load(const char *file, enum bpf_prog_type type,
+			     struct bpf_object **pobj, int *prog_fd);
 
-int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags);
+LIBBPF_API int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags);
 
 enum bpf_perf_event_ret {
 	LIBBPF_PERF_EVENT_DONE	= 0,
@@ -291,10 +297,24 @@ enum bpf_perf_event_ret {
 	LIBBPF_PERF_EVENT_CONT	= -2,
 };
 
-typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(void *event,
-							  void *priv);
-int bpf_perf_event_read_simple(void *mem, unsigned long size,
-			       unsigned long page_size,
-			       void **buf, size_t *buf_len,
-			       bpf_perf_event_print_t fn, void *priv);
-#endif
+struct perf_event_header;
+typedef enum bpf_perf_event_ret
+	(*bpf_perf_event_print_t)(struct perf_event_header *hdr,
+				  void *private_data);
+LIBBPF_API enum bpf_perf_event_ret
+bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
+			   void **copy_mem, size_t *copy_size,
+			   bpf_perf_event_print_t fn, void *private_data);
+
+struct nlattr;
+typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb);
+int libbpf_netlink_open(unsigned int *nl_pid);
+int libbpf_nl_get_link(int sock, unsigned int nl_pid,
+		       libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie);
+int libbpf_nl_get_class(int sock, unsigned int nl_pid, int ifindex,
+			libbpf_dump_nlmsg_t dump_class_nlmsg, void *cookie);
+int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex,
+			libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie);
+int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle,
+			 libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie);
+#endif /* __LIBBPF_LIBBPF_H */
diff --git a/tools/lib/bpf/libbpf_errno.c b/tools/lib/bpf/libbpf_errno.c
index d9ba851bd7f9..d83b17f8435c 100644
--- a/tools/lib/bpf/libbpf_errno.c
+++ b/tools/lib/bpf/libbpf_errno.c
@@ -1,23 +1,10 @@
-// SPDX-License-Identifier: LGPL-2.1
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
 
 /*
  * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
  * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
  * Copyright (C) 2015 Huawei Inc.
  * Copyright (C) 2017 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not,  see <http://www.gnu.org/licenses>
  */
 
 #include <stdio.h>
@@ -42,6 +29,7 @@ static const char *libbpf_strerror_table[NR_ERRNO] = {
 	[ERRCODE_OFFSET(PROGTYPE)]	= "Kernel doesn't support this program type",
 	[ERRCODE_OFFSET(WRNGPID)]	= "Wrong pid in netlink message",
 	[ERRCODE_OFFSET(INVSEQ)]	= "Invalid netlink sequence",
+	[ERRCODE_OFFSET(NLPARSE)]	= "Incorrect netlink message parsing",
 };
 
 int libbpf_strerror(int err, char *buf, size_t size)
diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
new file mode 100644
index 000000000000..0ce67aea8f3b
--- /dev/null
+++ b/tools/lib/bpf/netlink.c
@@ -0,0 +1,337 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2018 Facebook */
+
+#include <stdlib.h>
+#include <memory.h>
+#include <unistd.h>
+#include <linux/bpf.h>
+#include <linux/rtnetlink.h>
+#include <sys/socket.h>
+#include <errno.h>
+#include <time.h>
+
+#include "bpf.h"
+#include "libbpf.h"
+#include "nlattr.h"
+
+#ifndef SOL_NETLINK
+#define SOL_NETLINK 270
+#endif
+
+typedef int (*__dump_nlmsg_t)(struct nlmsghdr *nlmsg, libbpf_dump_nlmsg_t,
+			      void *cookie);
+
+int libbpf_netlink_open(__u32 *nl_pid)
+{
+	struct sockaddr_nl sa;
+	socklen_t addrlen;
+	int one = 1, ret;
+	int sock;
+
+	memset(&sa, 0, sizeof(sa));
+	sa.nl_family = AF_NETLINK;
+
+	sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+	if (sock < 0)
+		return -errno;
+
+	if (setsockopt(sock, SOL_NETLINK, NETLINK_EXT_ACK,
+		       &one, sizeof(one)) < 0) {
+		fprintf(stderr, "Netlink error reporting not supported\n");
+	}
+
+	if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
+		ret = -errno;
+		goto cleanup;
+	}
+
+	addrlen = sizeof(sa);
+	if (getsockname(sock, (struct sockaddr *)&sa, &addrlen) < 0) {
+		ret = -errno;
+		goto cleanup;
+	}
+
+	if (addrlen != sizeof(sa)) {
+		ret = -LIBBPF_ERRNO__INTERNAL;
+		goto cleanup;
+	}
+
+	*nl_pid = sa.nl_pid;
+	return sock;
+
+cleanup:
+	close(sock);
+	return ret;
+}
+
+static int bpf_netlink_recv(int sock, __u32 nl_pid, int seq,
+			    __dump_nlmsg_t _fn, libbpf_dump_nlmsg_t fn,
+			    void *cookie)
+{
+	bool multipart = true;
+	struct nlmsgerr *err;
+	struct nlmsghdr *nh;
+	char buf[4096];
+	int len, ret;
+
+	while (multipart) {
+		multipart = false;
+		len = recv(sock, buf, sizeof(buf), 0);
+		if (len < 0) {
+			ret = -errno;
+			goto done;
+		}
+
+		if (len == 0)
+			break;
+
+		for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
+		     nh = NLMSG_NEXT(nh, len)) {
+			if (nh->nlmsg_pid != nl_pid) {
+				ret = -LIBBPF_ERRNO__WRNGPID;
+				goto done;
+			}
+			if (nh->nlmsg_seq != seq) {
+				ret = -LIBBPF_ERRNO__INVSEQ;
+				goto done;
+			}
+			if (nh->nlmsg_flags & NLM_F_MULTI)
+				multipart = true;
+			switch (nh->nlmsg_type) {
+			case NLMSG_ERROR:
+				err = (struct nlmsgerr *)NLMSG_DATA(nh);
+				if (!err->error)
+					continue;
+				ret = err->error;
+				libbpf_nla_dump_errormsg(nh);
+				goto done;
+			case NLMSG_DONE:
+				return 0;
+			default:
+				break;
+			}
+			if (_fn) {
+				ret = _fn(nh, fn, cookie);
+				if (ret)
+					return ret;
+			}
+		}
+	}
+	ret = 0;
+done:
+	return ret;
+}
+
+int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
+{
+	int sock, seq = 0, ret;
+	struct nlattr *nla, *nla_xdp;
+	struct {
+		struct nlmsghdr  nh;
+		struct ifinfomsg ifinfo;
+		char             attrbuf[64];
+	} req;
+	__u32 nl_pid;
+
+	sock = libbpf_netlink_open(&nl_pid);
+	if (sock < 0)
+		return sock;
+
+	memset(&req, 0, sizeof(req));
+	req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
+	req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+	req.nh.nlmsg_type = RTM_SETLINK;
+	req.nh.nlmsg_pid = 0;
+	req.nh.nlmsg_seq = ++seq;
+	req.ifinfo.ifi_family = AF_UNSPEC;
+	req.ifinfo.ifi_index = ifindex;
+
+	/* started nested attribute for XDP */
+	nla = (struct nlattr *)(((char *)&req)
+				+ NLMSG_ALIGN(req.nh.nlmsg_len));
+	nla->nla_type = NLA_F_NESTED | IFLA_XDP;
+	nla->nla_len = NLA_HDRLEN;
+
+	/* add XDP fd */
+	nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
+	nla_xdp->nla_type = IFLA_XDP_FD;
+	nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
+	memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd));
+	nla->nla_len += nla_xdp->nla_len;
+
+	/* if user passed in any flags, add those too */
+	if (flags) {
+		nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
+		nla_xdp->nla_type = IFLA_XDP_FLAGS;
+		nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags);
+		memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags));
+		nla->nla_len += nla_xdp->nla_len;
+	}
+
+	req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
+
+	if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+		ret = -errno;
+		goto cleanup;
+	}
+	ret = bpf_netlink_recv(sock, nl_pid, seq, NULL, NULL, NULL);
+
+cleanup:
+	close(sock);
+	return ret;
+}
+
+static int __dump_link_nlmsg(struct nlmsghdr *nlh,
+			     libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie)
+{
+	struct nlattr *tb[IFLA_MAX + 1], *attr;
+	struct ifinfomsg *ifi = NLMSG_DATA(nlh);
+	int len;
+
+	len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
+	attr = (struct nlattr *) ((void *) ifi + NLMSG_ALIGN(sizeof(*ifi)));
+	if (libbpf_nla_parse(tb, IFLA_MAX, attr, len, NULL) != 0)
+		return -LIBBPF_ERRNO__NLPARSE;
+
+	return dump_link_nlmsg(cookie, ifi, tb);
+}
+
+int libbpf_nl_get_link(int sock, unsigned int nl_pid,
+		       libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie)
+{
+	struct {
+		struct nlmsghdr nlh;
+		struct ifinfomsg ifm;
+	} req = {
+		.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
+		.nlh.nlmsg_type = RTM_GETLINK,
+		.nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+		.ifm.ifi_family = AF_PACKET,
+	};
+	int seq = time(NULL);
+
+	req.nlh.nlmsg_seq = seq;
+	if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
+		return -errno;
+
+	return bpf_netlink_recv(sock, nl_pid, seq, __dump_link_nlmsg,
+				dump_link_nlmsg, cookie);
+}
+
+static int __dump_class_nlmsg(struct nlmsghdr *nlh,
+			      libbpf_dump_nlmsg_t dump_class_nlmsg,
+			      void *cookie)
+{
+	struct nlattr *tb[TCA_MAX + 1], *attr;
+	struct tcmsg *t = NLMSG_DATA(nlh);
+	int len;
+
+	len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t));
+	attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t)));
+	if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0)
+		return -LIBBPF_ERRNO__NLPARSE;
+
+	return dump_class_nlmsg(cookie, t, tb);
+}
+
+int libbpf_nl_get_class(int sock, unsigned int nl_pid, int ifindex,
+			libbpf_dump_nlmsg_t dump_class_nlmsg, void *cookie)
+{
+	struct {
+		struct nlmsghdr nlh;
+		struct tcmsg t;
+	} req = {
+		.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)),
+		.nlh.nlmsg_type = RTM_GETTCLASS,
+		.nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+		.t.tcm_family = AF_UNSPEC,
+		.t.tcm_ifindex = ifindex,
+	};
+	int seq = time(NULL);
+
+	req.nlh.nlmsg_seq = seq;
+	if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
+		return -errno;
+
+	return bpf_netlink_recv(sock, nl_pid, seq, __dump_class_nlmsg,
+				dump_class_nlmsg, cookie);
+}
+
+static int __dump_qdisc_nlmsg(struct nlmsghdr *nlh,
+			      libbpf_dump_nlmsg_t dump_qdisc_nlmsg,
+			      void *cookie)
+{
+	struct nlattr *tb[TCA_MAX + 1], *attr;
+	struct tcmsg *t = NLMSG_DATA(nlh);
+	int len;
+
+	len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t));
+	attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t)));
+	if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0)
+		return -LIBBPF_ERRNO__NLPARSE;
+
+	return dump_qdisc_nlmsg(cookie, t, tb);
+}
+
+int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex,
+			libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie)
+{
+	struct {
+		struct nlmsghdr nlh;
+		struct tcmsg t;
+	} req = {
+		.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)),
+		.nlh.nlmsg_type = RTM_GETQDISC,
+		.nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+		.t.tcm_family = AF_UNSPEC,
+		.t.tcm_ifindex = ifindex,
+	};
+	int seq = time(NULL);
+
+	req.nlh.nlmsg_seq = seq;
+	if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
+		return -errno;
+
+	return bpf_netlink_recv(sock, nl_pid, seq, __dump_qdisc_nlmsg,
+				dump_qdisc_nlmsg, cookie);
+}
+
+static int __dump_filter_nlmsg(struct nlmsghdr *nlh,
+			       libbpf_dump_nlmsg_t dump_filter_nlmsg,
+			       void *cookie)
+{
+	struct nlattr *tb[TCA_MAX + 1], *attr;
+	struct tcmsg *t = NLMSG_DATA(nlh);
+	int len;
+
+	len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t));
+	attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t)));
+	if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0)
+		return -LIBBPF_ERRNO__NLPARSE;
+
+	return dump_filter_nlmsg(cookie, t, tb);
+}
+
+int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle,
+			 libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie)
+{
+	struct {
+		struct nlmsghdr nlh;
+		struct tcmsg t;
+	} req = {
+		.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)),
+		.nlh.nlmsg_type = RTM_GETTFILTER,
+		.nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+		.t.tcm_family = AF_UNSPEC,
+		.t.tcm_ifindex = ifindex,
+		.t.tcm_parent = handle,
+	};
+	int seq = time(NULL);
+
+	req.nlh.nlmsg_seq = seq;
+	if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
+		return -errno;
+
+	return bpf_netlink_recv(sock, nl_pid, seq, __dump_filter_nlmsg,
+				dump_filter_nlmsg, cookie);
+}
diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c
index 4719434278b2..1e69c0c8d413 100644
--- a/tools/lib/bpf/nlattr.c
+++ b/tools/lib/bpf/nlattr.c
@@ -1,13 +1,8 @@
-// SPDX-License-Identifier: LGPL-2.1
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
 
 /*
  * NETLINK      Netlink attributes
  *
- *	This library is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU Lesser General Public
- *	License as published by the Free Software Foundation version 2.1
- *	of the License.
- *
  * Copyright (c) 2003-2013 Thomas Graf <tgraf@suug.ch>
  */
 
@@ -17,20 +12,15 @@
 #include <string.h>
 #include <stdio.h>
 
-static uint16_t nla_attr_minlen[NLA_TYPE_MAX+1] = {
-	[NLA_U8]	= sizeof(uint8_t),
-	[NLA_U16]	= sizeof(uint16_t),
-	[NLA_U32]	= sizeof(uint32_t),
-	[NLA_U64]	= sizeof(uint64_t),
-	[NLA_STRING]	= 1,
-	[NLA_FLAG]	= 0,
+static uint16_t nla_attr_minlen[LIBBPF_NLA_TYPE_MAX+1] = {
+	[LIBBPF_NLA_U8]		= sizeof(uint8_t),
+	[LIBBPF_NLA_U16]	= sizeof(uint16_t),
+	[LIBBPF_NLA_U32]	= sizeof(uint32_t),
+	[LIBBPF_NLA_U64]	= sizeof(uint64_t),
+	[LIBBPF_NLA_STRING]	= 1,
+	[LIBBPF_NLA_FLAG]	= 0,
 };
 
-static int nla_len(const struct nlattr *nla)
-{
-	return nla->nla_len - NLA_HDRLEN;
-}
-
 static struct nlattr *nla_next(const struct nlattr *nla, int *remaining)
 {
 	int totlen = NLA_ALIGN(nla->nla_len);
@@ -46,20 +36,15 @@ static int nla_ok(const struct nlattr *nla, int remaining)
 	       nla->nla_len <= remaining;
 }
 
-static void *nla_data(const struct nlattr *nla)
-{
-	return (char *) nla + NLA_HDRLEN;
-}
-
 static int nla_type(const struct nlattr *nla)
 {
 	return nla->nla_type & NLA_TYPE_MASK;
 }
 
 static int validate_nla(struct nlattr *nla, int maxtype,
-			struct nla_policy *policy)
+			struct libbpf_nla_policy *policy)
 {
-	struct nla_policy *pt;
+	struct libbpf_nla_policy *pt;
 	unsigned int minlen = 0;
 	int type = nla_type(nla);
 
@@ -68,23 +53,24 @@ static int validate_nla(struct nlattr *nla, int maxtype,
 
 	pt = &policy[type];
 
-	if (pt->type > NLA_TYPE_MAX)
+	if (pt->type > LIBBPF_NLA_TYPE_MAX)
 		return 0;
 
 	if (pt->minlen)
 		minlen = pt->minlen;
-	else if (pt->type != NLA_UNSPEC)
+	else if (pt->type != LIBBPF_NLA_UNSPEC)
 		minlen = nla_attr_minlen[pt->type];
 
-	if (nla_len(nla) < minlen)
+	if (libbpf_nla_len(nla) < minlen)
 		return -1;
 
-	if (pt->maxlen && nla_len(nla) > pt->maxlen)
+	if (pt->maxlen && libbpf_nla_len(nla) > pt->maxlen)
 		return -1;
 
-	if (pt->type == NLA_STRING) {
-		char *data = nla_data(nla);
-		if (data[nla_len(nla) - 1] != '\0')
+	if (pt->type == LIBBPF_NLA_STRING) {
+		char *data = libbpf_nla_data(nla);
+
+		if (data[libbpf_nla_len(nla) - 1] != '\0')
 			return -1;
 	}
 
@@ -114,15 +100,15 @@ static inline int nlmsg_len(const struct nlmsghdr *nlh)
  * @see nla_validate
  * @return 0 on success or a negative error code.
  */
-static int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len,
-		     struct nla_policy *policy)
+int libbpf_nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head,
+		     int len, struct libbpf_nla_policy *policy)
 {
 	struct nlattr *nla;
 	int rem, err;
 
 	memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
 
-	nla_for_each_attr(nla, head, len, rem) {
+	libbpf_nla_for_each_attr(nla, head, len, rem) {
 		int type = nla_type(nla);
 
 		if (type > maxtype)
@@ -146,12 +132,33 @@ errout:
 	return err;
 }
 
+/**
+ * Create attribute index based on nested attribute
+ * @arg tb              Index array to be filled (maxtype+1 elements).
+ * @arg maxtype         Maximum attribute type expected and accepted.
+ * @arg nla             Nested Attribute.
+ * @arg policy          Attribute validation policy.
+ *
+ * Feeds the stream of attributes nested into the specified attribute
+ * to libbpf_nla_parse().
+ *
+ * @see libbpf_nla_parse
+ * @return 0 on success or a negative error code.
+ */
+int libbpf_nla_parse_nested(struct nlattr *tb[], int maxtype,
+			    struct nlattr *nla,
+			    struct libbpf_nla_policy *policy)
+{
+	return libbpf_nla_parse(tb, maxtype, libbpf_nla_data(nla),
+				libbpf_nla_len(nla), policy);
+}
+
 /* dump netlink extended ack error message */
-int nla_dump_errormsg(struct nlmsghdr *nlh)
+int libbpf_nla_dump_errormsg(struct nlmsghdr *nlh)
 {
-	struct nla_policy extack_policy[NLMSGERR_ATTR_MAX + 1] = {
-		[NLMSGERR_ATTR_MSG]	= { .type = NLA_STRING },
-		[NLMSGERR_ATTR_OFFS]	= { .type = NLA_U32 },
+	struct libbpf_nla_policy extack_policy[NLMSGERR_ATTR_MAX + 1] = {
+		[NLMSGERR_ATTR_MSG]	= { .type = LIBBPF_NLA_STRING },
+		[NLMSGERR_ATTR_OFFS]	= { .type = LIBBPF_NLA_U32 },
 	};
 	struct nlattr *tb[NLMSGERR_ATTR_MAX + 1], *attr;
 	struct nlmsgerr *err;
@@ -172,14 +179,15 @@ int nla_dump_errormsg(struct nlmsghdr *nlh)
 	attr = (struct nlattr *) ((void *) err + hlen);
 	alen = nlh->nlmsg_len - hlen;
 
-	if (nla_parse(tb, NLMSGERR_ATTR_MAX, attr, alen, extack_policy) != 0) {
+	if (libbpf_nla_parse(tb, NLMSGERR_ATTR_MAX, attr, alen,
+			     extack_policy) != 0) {
 		fprintf(stderr,
 			"Failed to parse extended error attributes\n");
 		return 0;
 	}
 
 	if (tb[NLMSGERR_ATTR_MSG])
-		errmsg = (char *) nla_data(tb[NLMSGERR_ATTR_MSG]);
+		errmsg = (char *) libbpf_nla_data(tb[NLMSGERR_ATTR_MSG]);
 
 	fprintf(stderr, "Kernel error message: %s\n", errmsg);
 
diff --git a/tools/lib/bpf/nlattr.h b/tools/lib/bpf/nlattr.h
index 931a71f68f93..6cc3ac91690f 100644
--- a/tools/lib/bpf/nlattr.h
+++ b/tools/lib/bpf/nlattr.h
@@ -1,18 +1,13 @@
-/* SPDX-License-Identifier: LGPL-2.1 */
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
 
 /*
  * NETLINK      Netlink attributes
  *
- *	This library is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU Lesser General Public
- *	License as published by the Free Software Foundation version 2.1
- *	of the License.
- *
  * Copyright (c) 2003-2013 Thomas Graf <tgraf@suug.ch>
  */
 
-#ifndef __NLATTR_H
-#define __NLATTR_H
+#ifndef __LIBBPF_NLATTR_H
+#define __LIBBPF_NLATTR_H
 
 #include <stdint.h>
 #include <linux/netlink.h>
@@ -23,19 +18,19 @@
  * Standard attribute types to specify validation policy
  */
 enum {
-	NLA_UNSPEC,	/**< Unspecified type, binary data chunk */
-	NLA_U8,		/**< 8 bit integer */
-	NLA_U16,	/**< 16 bit integer */
-	NLA_U32,	/**< 32 bit integer */
-	NLA_U64,	/**< 64 bit integer */
-	NLA_STRING,	/**< NUL terminated character string */
-	NLA_FLAG,	/**< Flag */
-	NLA_MSECS,	/**< Micro seconds (64bit) */
-	NLA_NESTED,	/**< Nested attributes */
-	__NLA_TYPE_MAX,
+	LIBBPF_NLA_UNSPEC,	/**< Unspecified type, binary data chunk */
+	LIBBPF_NLA_U8,		/**< 8 bit integer */
+	LIBBPF_NLA_U16,		/**< 16 bit integer */
+	LIBBPF_NLA_U32,		/**< 32 bit integer */
+	LIBBPF_NLA_U64,		/**< 64 bit integer */
+	LIBBPF_NLA_STRING,	/**< NUL terminated character string */
+	LIBBPF_NLA_FLAG,	/**< Flag */
+	LIBBPF_NLA_MSECS,	/**< Micro seconds (64bit) */
+	LIBBPF_NLA_NESTED,	/**< Nested attributes */
+	__LIBBPF_NLA_TYPE_MAX,
 };
 
-#define NLA_TYPE_MAX (__NLA_TYPE_MAX - 1)
+#define LIBBPF_NLA_TYPE_MAX (__LIBBPF_NLA_TYPE_MAX - 1)
 
 /**
  * @ingroup attr
@@ -43,8 +38,8 @@ enum {
  *
  * See section @core_doc{core_attr_parse,Attribute Parsing} for more details.
  */
-struct nla_policy {
-	/** Type of attribute or NLA_UNSPEC */
+struct libbpf_nla_policy {
+	/** Type of attribute or LIBBPF_NLA_UNSPEC */
 	uint16_t	type;
 
 	/** Minimal length of payload required */
@@ -62,11 +57,50 @@ struct nla_policy {
  * @arg len	length of attribute stream
  * @arg rem	initialized to len, holds bytes currently remaining in stream
  */
-#define nla_for_each_attr(pos, head, len, rem) \
+#define libbpf_nla_for_each_attr(pos, head, len, rem) \
 	for (pos = head, rem = len; \
 	     nla_ok(pos, rem); \
 	     pos = nla_next(pos, &(rem)))
 
-int nla_dump_errormsg(struct nlmsghdr *nlh);
+/**
+ * libbpf_nla_data - head of payload
+ * @nla: netlink attribute
+ */
+static inline void *libbpf_nla_data(const struct nlattr *nla)
+{
+	return (char *) nla + NLA_HDRLEN;
+}
+
+static inline uint8_t libbpf_nla_getattr_u8(const struct nlattr *nla)
+{
+	return *(uint8_t *)libbpf_nla_data(nla);
+}
+
+static inline uint32_t libbpf_nla_getattr_u32(const struct nlattr *nla)
+{
+	return *(uint32_t *)libbpf_nla_data(nla);
+}
+
+static inline const char *libbpf_nla_getattr_str(const struct nlattr *nla)
+{
+	return (const char *)libbpf_nla_data(nla);
+}
+
+/**
+ * libbpf_nla_len - length of payload
+ * @nla: netlink attribute
+ */
+static inline int libbpf_nla_len(const struct nlattr *nla)
+{
+	return nla->nla_len - NLA_HDRLEN;
+}
+
+int libbpf_nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head,
+		     int len, struct libbpf_nla_policy *policy);
+int libbpf_nla_parse_nested(struct nlattr *tb[], int maxtype,
+			    struct nlattr *nla,
+			    struct libbpf_nla_policy *policy);
+
+int libbpf_nla_dump_errormsg(struct nlmsghdr *nlh);
 
-#endif /* __NLATTR_H */
+#endif /* __LIBBPF_NLATTR_H */
diff --git a/tools/lib/bpf/str_error.c b/tools/lib/bpf/str_error.c
index b8798114a357..00e48ac5b806 100644
--- a/tools/lib/bpf/str_error.c
+++ b/tools/lib/bpf/str_error.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: LGPL-2.1
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
 #undef _GNU_SOURCE
 #include <string.h>
 #include <stdio.h>
@@ -9,7 +9,7 @@
  * libc, while checking strerror_r() return to avoid having to check this in
  * all places calling it.
  */
-char *str_error(int err, char *dst, int len)
+char *libbpf_strerror_r(int err, char *dst, int len)
 {
 	int ret = strerror_r(err, dst, len);
 	if (ret)
diff --git a/tools/lib/bpf/str_error.h b/tools/lib/bpf/str_error.h
index 355b1db571d1..a139334d57b6 100644
--- a/tools/lib/bpf/str_error.h
+++ b/tools/lib/bpf/str_error.h
@@ -1,6 +1,6 @@
-// SPDX-License-Identifier: LGPL-2.1
-#ifndef BPF_STR_ERROR
-#define BPF_STR_ERROR
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __LIBBPF_STR_ERROR_H
+#define __LIBBPF_STR_ERROR_H
 
-char *str_error(int err, char *dst, int len);
-#endif // BPF_STR_ERROR
+char *libbpf_strerror_r(int err, char *dst, int len);
+#endif /* __LIBBPF_STR_ERROR_H */
diff --git a/tools/lib/subcmd/pager.c b/tools/lib/subcmd/pager.c
index 9997a8805a82..e3d47b59b14d 100644
--- a/tools/lib/subcmd/pager.c
+++ b/tools/lib/subcmd/pager.c
@@ -23,6 +23,13 @@ void pager_init(const char *pager_env)
 	subcmd_config.pager_env = pager_env;
 }
 
+static const char *forced_pager;
+
+void force_pager(const char *pager)
+{
+	forced_pager = pager;
+}
+
 static void pager_preexec(void)
 {
 	/*
@@ -66,7 +73,9 @@ void setup_pager(void)
 	const char *pager = getenv(subcmd_config.pager_env);
 	struct winsize sz;
 
-	if (!isatty(1))
+	if (forced_pager)
+		pager = forced_pager;
+	if (!isatty(1) && !forced_pager)
 		return;
 	if (ioctl(1, TIOCGWINSZ, &sz) == 0)
 		pager_columns = sz.ws_col;
diff --git a/tools/lib/subcmd/pager.h b/tools/lib/subcmd/pager.h
index f1a53cf29880..a818964693ab 100644
--- a/tools/lib/subcmd/pager.h
+++ b/tools/lib/subcmd/pager.h
@@ -7,5 +7,6 @@ extern void pager_init(const char *pager_env);
 extern void setup_pager(void);
 extern int pager_in_use(void);
 extern int pager_get_columns(void);
+extern void force_pager(const char *);
 
 #endif /* __SUBCMD_PAGER_H */
diff --git a/tools/lib/subcmd/parse-options.c b/tools/lib/subcmd/parse-options.c
index cb7154eccbdc..dbb9efbf718a 100644
--- a/tools/lib/subcmd/parse-options.c
+++ b/tools/lib/subcmd/parse-options.c
@@ -116,6 +116,7 @@ static int get_value(struct parse_opt_ctx_t *p,
 		case OPTION_INTEGER:
 		case OPTION_UINTEGER:
 		case OPTION_LONG:
+		case OPTION_ULONG:
 		case OPTION_U64:
 		default:
 			break;
@@ -166,6 +167,7 @@ static int get_value(struct parse_opt_ctx_t *p,
 		case OPTION_INTEGER:
 		case OPTION_UINTEGER:
 		case OPTION_LONG:
+		case OPTION_ULONG:
 		case OPTION_U64:
 		default:
 			break;
@@ -295,6 +297,22 @@ static int get_value(struct parse_opt_ctx_t *p,
 			return opterror(opt, "expects a numerical value", flags);
 		return 0;
 
+	case OPTION_ULONG:
+		if (unset) {
+			*(unsigned long *)opt->value = 0;
+			return 0;
+		}
+		if (opt->flags & PARSE_OPT_OPTARG && !p->opt) {
+			*(unsigned long *)opt->value = opt->defval;
+			return 0;
+		}
+		if (get_arg(p, opt, flags, &arg))
+			return -1;
+		*(unsigned long *)opt->value = strtoul(arg, (char **)&s, 10);
+		if (*s)
+			return opterror(opt, "expects a numerical value", flags);
+		return 0;
+
 	case OPTION_U64:
 		if (unset) {
 			*(u64 *)opt->value = 0;
@@ -703,6 +721,7 @@ static void print_option_help(const struct option *opts, int full)
 	case OPTION_ARGUMENT:
 		break;
 	case OPTION_LONG:
+	case OPTION_ULONG:
 	case OPTION_U64:
 	case OPTION_INTEGER:
 	case OPTION_UINTEGER:
diff --git a/tools/lib/subcmd/parse-options.h b/tools/lib/subcmd/parse-options.h
index 92fdbe1519f6..6ca2a8bfe716 100644
--- a/tools/lib/subcmd/parse-options.h
+++ b/tools/lib/subcmd/parse-options.h
@@ -25,6 +25,7 @@ enum parse_opt_type {
 	OPTION_STRING,
 	OPTION_INTEGER,
 	OPTION_LONG,
+	OPTION_ULONG,
 	OPTION_CALLBACK,
 	OPTION_U64,
 	OPTION_UINTEGER,
@@ -133,6 +134,7 @@ struct option {
 #define OPT_INTEGER(s, l, v, h)     { .type = OPTION_INTEGER, .short_name = (s), .long_name = (l), .value = check_vtype(v, int *), .help = (h) }
 #define OPT_UINTEGER(s, l, v, h)    { .type = OPTION_UINTEGER, .short_name = (s), .long_name = (l), .value = check_vtype(v, unsigned int *), .help = (h) }
 #define OPT_LONG(s, l, v, h)        { .type = OPTION_LONG, .short_name = (s), .long_name = (l), .value = check_vtype(v, long *), .help = (h) }
+#define OPT_ULONG(s, l, v, h)        { .type = OPTION_ULONG, .short_name = (s), .long_name = (l), .value = check_vtype(v, unsigned long *), .help = (h) }
 #define OPT_U64(s, l, v, h)         { .type = OPTION_U64, .short_name = (s), .long_name = (l), .value = check_vtype(v, u64 *), .help = (h) }
 #define OPT_STRING(s, l, v, a, h)   { .type = OPTION_STRING,  .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), .argh = (a), .help = (h) }
 #define OPT_STRING_OPTARG(s, l, v, a, h, d) \
diff --git a/tools/lib/traceevent/Build b/tools/lib/traceevent/Build
index c681d0575d16..ba54bfce0b0b 100644
--- a/tools/lib/traceevent/Build
+++ b/tools/lib/traceevent/Build
@@ -4,6 +4,8 @@ libtraceevent-y += trace-seq.o
 libtraceevent-y += parse-filter.o
 libtraceevent-y += parse-utils.o
 libtraceevent-y += kbuffer-parse.o
+libtraceevent-y += tep_strerror.o
+libtraceevent-y += event-parse-api.o
 
 plugin_jbd2-y         += plugin_jbd2.o
 plugin_hrtimer-y      += plugin_hrtimer.o
diff --git a/tools/lib/traceevent/event-parse-api.c b/tools/lib/traceevent/event-parse-api.c
new file mode 100644
index 000000000000..61f7149085ee
--- /dev/null
+++ b/tools/lib/traceevent/event-parse-api.c
@@ -0,0 +1,275 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+
+#include "event-parse.h"
+#include "event-parse-local.h"
+#include "event-utils.h"
+
+/**
+ * tep_get_first_event - returns the first event in the events array
+ * @tep: a handle to the tep_handle
+ *
+ * This returns pointer to the first element of the events array
+ * If @tep is NULL, NULL is returned.
+ */
+struct tep_event_format *tep_get_first_event(struct tep_handle *tep)
+{
+	if (tep && tep->events)
+		return tep->events[0];
+
+	return NULL;
+}
+
+/**
+ * tep_get_events_count - get the number of defined events
+ * @tep: a handle to the tep_handle
+ *
+ * This returns number of elements in event array
+ * If @tep is NULL, 0 is returned.
+ */
+int tep_get_events_count(struct tep_handle *tep)
+{
+	if(tep)
+		return tep->nr_events;
+	return 0;
+}
+
+/**
+ * tep_set_flag - set event parser flag
+ * @tep: a handle to the tep_handle
+ * @flag: flag, or combination of flags to be set
+ * can be any combination from enum tep_flag
+ *
+ * This sets a flag or mbination of flags  from enum tep_flag
+  */
+void tep_set_flag(struct tep_handle *tep, int flag)
+{
+	if(tep)
+		tep->flags |= flag;
+}
+
+unsigned short __tep_data2host2(struct tep_handle *pevent, unsigned short data)
+{
+	unsigned short swap;
+
+	if (!pevent || pevent->host_bigendian == pevent->file_bigendian)
+		return data;
+
+	swap = ((data & 0xffULL) << 8) |
+		((data & (0xffULL << 8)) >> 8);
+
+	return swap;
+}
+
+unsigned int __tep_data2host4(struct tep_handle *pevent, unsigned int data)
+{
+	unsigned int swap;
+
+	if (!pevent || pevent->host_bigendian == pevent->file_bigendian)
+		return data;
+
+	swap = ((data & 0xffULL) << 24) |
+		((data & (0xffULL << 8)) << 8) |
+		((data & (0xffULL << 16)) >> 8) |
+		((data & (0xffULL << 24)) >> 24);
+
+	return swap;
+}
+
+unsigned long long
+__tep_data2host8(struct tep_handle *pevent, unsigned long long data)
+{
+	unsigned long long swap;
+
+	if (!pevent || pevent->host_bigendian == pevent->file_bigendian)
+		return data;
+
+	swap = ((data & 0xffULL) << 56) |
+		((data & (0xffULL << 8)) << 40) |
+		((data & (0xffULL << 16)) << 24) |
+		((data & (0xffULL << 24)) << 8) |
+		((data & (0xffULL << 32)) >> 8) |
+		((data & (0xffULL << 40)) >> 24) |
+		((data & (0xffULL << 48)) >> 40) |
+		((data & (0xffULL << 56)) >> 56);
+
+	return swap;
+}
+
+/**
+ * tep_get_header_page_size - get size of the header page
+ * @pevent: a handle to the tep_handle
+ *
+ * This returns size of the header page
+ * If @pevent is NULL, 0 is returned.
+ */
+int tep_get_header_page_size(struct tep_handle *pevent)
+{
+	if(pevent)
+		return pevent->header_page_size_size;
+	return 0;
+}
+
+/**
+ * tep_get_cpus - get the number of CPUs
+ * @pevent: a handle to the tep_handle
+ *
+ * This returns the number of CPUs
+ * If @pevent is NULL, 0 is returned.
+ */
+int tep_get_cpus(struct tep_handle *pevent)
+{
+	if(pevent)
+		return pevent->cpus;
+	return 0;
+}
+
+/**
+ * tep_set_cpus - set the number of CPUs
+ * @pevent: a handle to the tep_handle
+ *
+ * This sets the number of CPUs
+ */
+void tep_set_cpus(struct tep_handle *pevent, int cpus)
+{
+	if(pevent)
+		pevent->cpus = cpus;
+}
+
+/**
+ * tep_get_long_size - get the size of a long integer on the current machine
+ * @pevent: a handle to the tep_handle
+ *
+ * This returns the size of a long integer on the current machine
+ * If @pevent is NULL, 0 is returned.
+ */
+int tep_get_long_size(struct tep_handle *pevent)
+{
+	if(pevent)
+		return pevent->long_size;
+	return 0;
+}
+
+/**
+ * tep_set_long_size - set the size of a long integer on the current machine
+ * @pevent: a handle to the tep_handle
+ * @size: size, in bytes, of a long integer
+ *
+ * This sets the size of a long integer on the current machine
+ */
+void tep_set_long_size(struct tep_handle *pevent, int long_size)
+{
+	if(pevent)
+		pevent->long_size = long_size;
+}
+
+/**
+ * tep_get_page_size - get the size of a memory page on the current machine
+ * @pevent: a handle to the tep_handle
+ *
+ * This returns the size of a memory page on the current machine
+ * If @pevent is NULL, 0 is returned.
+ */
+int tep_get_page_size(struct tep_handle *pevent)
+{
+	if(pevent)
+		return pevent->page_size;
+	return 0;
+}
+
+/**
+ * tep_set_page_size - set the size of a memory page on the current machine
+ * @pevent: a handle to the tep_handle
+ * @_page_size: size of a memory page, in bytes
+ *
+ * This sets the size of a memory page on the current machine
+ */
+void tep_set_page_size(struct tep_handle *pevent, int _page_size)
+{
+	if(pevent)
+		pevent->page_size = _page_size;
+}
+
+/**
+ * tep_is_file_bigendian - get if the file is in big endian order
+ * @pevent: a handle to the tep_handle
+ *
+ * This returns if the file is in big endian order
+ * If @pevent is NULL, 0 is returned.
+ */
+int tep_is_file_bigendian(struct tep_handle *pevent)
+{
+	if(pevent)
+		return pevent->file_bigendian;
+	return 0;
+}
+
+/**
+ * tep_set_file_bigendian - set if the file is in big endian order
+ * @pevent: a handle to the tep_handle
+ * @endian: non zero, if the file is in big endian order
+ *
+ * This sets if the file is in big endian order
+ */
+void tep_set_file_bigendian(struct tep_handle *pevent, enum tep_endian endian)
+{
+	if(pevent)
+		pevent->file_bigendian = endian;
+}
+
+/**
+ * tep_is_host_bigendian - get if the order of the current host is big endian
+ * @pevent: a handle to the tep_handle
+ *
+ * This gets if the order of the current host is big endian
+ * If @pevent is NULL, 0 is returned.
+ */
+int tep_is_host_bigendian(struct tep_handle *pevent)
+{
+	if(pevent)
+		return pevent->host_bigendian;
+	return 0;
+}
+
+/**
+ * tep_set_host_bigendian - set the order of the local host
+ * @pevent: a handle to the tep_handle
+ * @endian: non zero, if the local host has big endian order
+ *
+ * This sets the order of the local host
+ */
+void tep_set_host_bigendian(struct tep_handle *pevent, enum tep_endian endian)
+{
+	if(pevent)
+		pevent->host_bigendian = endian;
+}
+
+/**
+ * tep_is_latency_format - get if the latency output format is configured
+ * @pevent: a handle to the tep_handle
+ *
+ * This gets if the latency output format is configured
+ * If @pevent is NULL, 0 is returned.
+ */
+int tep_is_latency_format(struct tep_handle *pevent)
+{
+	if(pevent)
+		return pevent->latency_format;
+	return 0;
+}
+
+/**
+ * tep_set_latency_format - set the latency output format
+ * @pevent: a handle to the tep_handle
+ * @lat: non zero for latency output format
+ *
+ * This sets the latency output format
+  */
+void tep_set_latency_format(struct tep_handle *pevent, int lat)
+{
+	if(pevent)
+		pevent->latency_format = lat;
+}
diff --git a/tools/lib/traceevent/event-parse-local.h b/tools/lib/traceevent/event-parse-local.h
new file mode 100644
index 000000000000..b9bddde577f8
--- /dev/null
+++ b/tools/lib/traceevent/event-parse-local.h
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+
+#ifndef _PARSE_EVENTS_INT_H
+#define _PARSE_EVENTS_INT_H
+
+struct cmdline;
+struct cmdline_list;
+struct func_map;
+struct func_list;
+struct event_handler;
+struct func_resolver;
+
+struct tep_handle {
+	int ref_count;
+
+	int header_page_ts_offset;
+	int header_page_ts_size;
+	int header_page_size_offset;
+	int header_page_size_size;
+	int header_page_data_offset;
+	int header_page_data_size;
+	int header_page_overwrite;
+
+	enum tep_endian file_bigendian;
+	enum tep_endian host_bigendian;
+
+	int latency_format;
+
+	int old_format;
+
+	int cpus;
+	int long_size;
+	int page_size;
+
+	struct cmdline *cmdlines;
+	struct cmdline_list *cmdlist;
+	int cmdline_count;
+
+	struct func_map *func_map;
+	struct func_resolver *func_resolver;
+	struct func_list *funclist;
+	unsigned int func_count;
+
+	struct printk_map *printk_map;
+	struct printk_list *printklist;
+	unsigned int printk_count;
+
+
+	struct tep_event_format **events;
+	int nr_events;
+	struct tep_event_format **sort_events;
+	enum tep_event_sort_type last_type;
+
+	int type_offset;
+	int type_size;
+
+	int pid_offset;
+	int pid_size;
+
+	int pc_offset;
+	int pc_size;
+
+	int flags_offset;
+	int flags_size;
+
+	int ld_offset;
+	int ld_size;
+
+	int print_raw;
+
+	int test_filters;
+
+	int flags;
+
+	struct tep_format_field *bprint_ip_field;
+	struct tep_format_field *bprint_fmt_field;
+	struct tep_format_field *bprint_buf_field;
+
+	struct event_handler *handlers;
+	struct tep_function_handler *func_handlers;
+
+	/* cache */
+	struct tep_event_format *last_event;
+
+	char *trace_clock;
+};
+
+#endif /* _PARSE_EVENTS_INT_H */
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index ce1e20227c64..3692f29fee46 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -18,12 +18,14 @@
 #include <errno.h>
 #include <stdint.h>
 #include <limits.h>
-#include <linux/string.h>
 #include <linux/time64.h>
 
 #include <netinet/in.h>
 #include "event-parse.h"
+
+#include "event-parse-local.h"
 #include "event-utils.h"
+#include "trace-seq.h"
 
 static const char *input_buf;
 static unsigned long long input_buf_ptr;
@@ -94,7 +96,7 @@ struct tep_function_handler {
 
 static unsigned long long
 process_defined_func(struct trace_seq *s, void *data, int size,
-		     struct event_format *event, struct print_arg *arg);
+		     struct tep_event_format *event, struct tep_print_arg *arg);
 
 static void free_func_handle(struct tep_function_handler *func);
 
@@ -117,9 +119,9 @@ void breakpoint(void)
 	x++;
 }
 
-struct print_arg *alloc_arg(void)
+struct tep_print_arg *alloc_arg(void)
 {
-	return calloc(1, sizeof(struct print_arg));
+	return calloc(1, sizeof(struct tep_print_arg));
 }
 
 struct cmdline {
@@ -737,16 +739,16 @@ void tep_print_printk(struct tep_handle *pevent)
 	}
 }
 
-static struct event_format *alloc_event(void)
+static struct tep_event_format *alloc_event(void)
 {
-	return calloc(1, sizeof(struct event_format));
+	return calloc(1, sizeof(struct tep_event_format));
 }
 
-static int add_event(struct tep_handle *pevent, struct event_format *event)
+static int add_event(struct tep_handle *pevent, struct tep_event_format *event)
 {
 	int i;
-	struct event_format **events = realloc(pevent->events, sizeof(event) *
-					       (pevent->nr_events + 1));
+	struct tep_event_format **events = realloc(pevent->events, sizeof(event) *
+						  (pevent->nr_events + 1));
 	if (!events)
 		return -1;
 
@@ -769,20 +771,20 @@ static int add_event(struct tep_handle *pevent, struct event_format *event)
 	return 0;
 }
 
-static int event_item_type(enum event_type type)
+static int event_item_type(enum tep_event_type type)
 {
 	switch (type) {
-	case EVENT_ITEM ... EVENT_SQUOTE:
+	case TEP_EVENT_ITEM ... TEP_EVENT_SQUOTE:
 		return 1;
-	case EVENT_ERROR ... EVENT_DELIM:
+	case TEP_EVENT_ERROR ... TEP_EVENT_DELIM:
 	default:
 		return 0;
 	}
 }
 
-static void free_flag_sym(struct print_flag_sym *fsym)
+static void free_flag_sym(struct tep_print_flag_sym *fsym)
 {
-	struct print_flag_sym *next;
+	struct tep_print_flag_sym *next;
 
 	while (fsym) {
 		next = fsym->next;
@@ -793,60 +795,60 @@ static void free_flag_sym(struct print_flag_sym *fsym)
 	}
 }
 
-static void free_arg(struct print_arg *arg)
+static void free_arg(struct tep_print_arg *arg)
 {
-	struct print_arg *farg;
+	struct tep_print_arg *farg;
 
 	if (!arg)
 		return;
 
 	switch (arg->type) {
-	case PRINT_ATOM:
+	case TEP_PRINT_ATOM:
 		free(arg->atom.atom);
 		break;
-	case PRINT_FIELD:
+	case TEP_PRINT_FIELD:
 		free(arg->field.name);
 		break;
-	case PRINT_FLAGS:
+	case TEP_PRINT_FLAGS:
 		free_arg(arg->flags.field);
 		free(arg->flags.delim);
 		free_flag_sym(arg->flags.flags);
 		break;
-	case PRINT_SYMBOL:
+	case TEP_PRINT_SYMBOL:
 		free_arg(arg->symbol.field);
 		free_flag_sym(arg->symbol.symbols);
 		break;
-	case PRINT_HEX:
-	case PRINT_HEX_STR:
+	case TEP_PRINT_HEX:
+	case TEP_PRINT_HEX_STR:
 		free_arg(arg->hex.field);
 		free_arg(arg->hex.size);
 		break;
-	case PRINT_INT_ARRAY:
+	case TEP_PRINT_INT_ARRAY:
 		free_arg(arg->int_array.field);
 		free_arg(arg->int_array.count);
 		free_arg(arg->int_array.el_size);
 		break;
-	case PRINT_TYPE:
+	case TEP_PRINT_TYPE:
 		free(arg->typecast.type);
 		free_arg(arg->typecast.item);
 		break;
-	case PRINT_STRING:
-	case PRINT_BSTRING:
+	case TEP_PRINT_STRING:
+	case TEP_PRINT_BSTRING:
 		free(arg->string.string);
 		break;
-	case PRINT_BITMASK:
+	case TEP_PRINT_BITMASK:
 		free(arg->bitmask.bitmask);
 		break;
-	case PRINT_DYNAMIC_ARRAY:
-	case PRINT_DYNAMIC_ARRAY_LEN:
+	case TEP_PRINT_DYNAMIC_ARRAY:
+	case TEP_PRINT_DYNAMIC_ARRAY_LEN:
 		free(arg->dynarray.index);
 		break;
-	case PRINT_OP:
+	case TEP_PRINT_OP:
 		free(arg->op.op);
 		free_arg(arg->op.left);
 		free_arg(arg->op.right);
 		break;
-	case PRINT_FUNC:
+	case TEP_PRINT_FUNC:
 		while (arg->func.args) {
 			farg = arg->func.args;
 			arg->func.args = farg->next;
@@ -854,7 +856,7 @@ static void free_arg(struct print_arg *arg)
 		}
 		break;
 
-	case PRINT_NULL:
+	case TEP_PRINT_NULL:
 	default:
 		break;
 	}
@@ -862,24 +864,24 @@ static void free_arg(struct print_arg *arg)
 	free(arg);
 }
 
-static enum event_type get_type(int ch)
+static enum tep_event_type get_type(int ch)
 {
 	if (ch == '\n')
-		return EVENT_NEWLINE;
+		return TEP_EVENT_NEWLINE;
 	if (isspace(ch))
-		return EVENT_SPACE;
+		return TEP_EVENT_SPACE;
 	if (isalnum(ch) || ch == '_')
-		return EVENT_ITEM;
+		return TEP_EVENT_ITEM;
 	if (ch == '\'')
-		return EVENT_SQUOTE;
+		return TEP_EVENT_SQUOTE;
 	if (ch == '"')
-		return EVENT_DQUOTE;
+		return TEP_EVENT_DQUOTE;
 	if (!isprint(ch))
-		return EVENT_NONE;
+		return TEP_EVENT_NONE;
 	if (ch == '(' || ch == ')' || ch == ',')
-		return EVENT_DELIM;
+		return TEP_EVENT_DELIM;
 
-	return EVENT_OP;
+	return TEP_EVENT_OP;
 }
 
 static int __read_char(void)
@@ -927,38 +929,38 @@ static int extend_token(char **tok, char *buf, int size)
 	return 0;
 }
 
-static enum event_type force_token(const char *str, char **tok);
+static enum tep_event_type force_token(const char *str, char **tok);
 
-static enum event_type __read_token(char **tok)
+static enum tep_event_type __read_token(char **tok)
 {
 	char buf[BUFSIZ];
 	int ch, last_ch, quote_ch, next_ch;
 	int i = 0;
 	int tok_size = 0;
-	enum event_type type;
+	enum tep_event_type type;
 
 	*tok = NULL;
 
 
 	ch = __read_char();
 	if (ch < 0)
-		return EVENT_NONE;
+		return TEP_EVENT_NONE;
 
 	type = get_type(ch);
-	if (type == EVENT_NONE)
+	if (type == TEP_EVENT_NONE)
 		return type;
 
 	buf[i++] = ch;
 
 	switch (type) {
-	case EVENT_NEWLINE:
-	case EVENT_DELIM:
+	case TEP_EVENT_NEWLINE:
+	case TEP_EVENT_DELIM:
 		if (asprintf(tok, "%c", ch) < 0)
-			return EVENT_ERROR;
+			return TEP_EVENT_ERROR;
 
 		return type;
 
-	case EVENT_OP:
+	case TEP_EVENT_OP:
 		switch (ch) {
 		case '-':
 			next_ch = __peek_char();
@@ -1001,8 +1003,8 @@ static enum event_type __read_token(char **tok)
 			buf[i++] = __read_char();
 		goto out;
 
-	case EVENT_DQUOTE:
-	case EVENT_SQUOTE:
+	case TEP_EVENT_DQUOTE:
+	case TEP_EVENT_SQUOTE:
 		/* don't keep quotes */
 		i--;
 		quote_ch = ch;
@@ -1014,7 +1016,7 @@ static enum event_type __read_token(char **tok)
 				tok_size += BUFSIZ;
 
 				if (extend_token(tok, buf, tok_size) < 0)
-					return EVENT_NONE;
+					return TEP_EVENT_NONE;
 				i = 0;
 			}
 			last_ch = ch;
@@ -1031,7 +1033,7 @@ static enum event_type __read_token(char **tok)
 		 * For strings (double quotes) check the next token.
 		 * If it is another string, concatinate the two.
 		 */
-		if (type == EVENT_DQUOTE) {
+		if (type == TEP_EVENT_DQUOTE) {
 			unsigned long long save_input_buf_ptr = input_buf_ptr;
 
 			do {
@@ -1044,8 +1046,8 @@ static enum event_type __read_token(char **tok)
 
 		goto out;
 
-	case EVENT_ERROR ... EVENT_SPACE:
-	case EVENT_ITEM:
+	case TEP_EVENT_ERROR ... TEP_EVENT_SPACE:
+	case TEP_EVENT_ITEM:
 	default:
 		break;
 	}
@@ -1056,7 +1058,7 @@ static enum event_type __read_token(char **tok)
 			tok_size += BUFSIZ;
 
 			if (extend_token(tok, buf, tok_size) < 0)
-				return EVENT_NONE;
+				return TEP_EVENT_NONE;
 			i = 0;
 		}
 		ch = __read_char();
@@ -1066,9 +1068,9 @@ static enum event_type __read_token(char **tok)
  out:
 	buf[i] = 0;
 	if (extend_token(tok, buf, tok_size + i + 1) < 0)
-		return EVENT_NONE;
+		return TEP_EVENT_NONE;
 
-	if (type == EVENT_ITEM) {
+	if (type == TEP_EVENT_ITEM) {
 		/*
 		 * Older versions of the kernel has a bug that
 		 * creates invalid symbols and will break the mac80211
@@ -1095,12 +1097,12 @@ static enum event_type __read_token(char **tok)
 	return type;
 }
 
-static enum event_type force_token(const char *str, char **tok)
+static enum tep_event_type force_token(const char *str, char **tok)
 {
 	const char *save_input_buf;
 	unsigned long long save_input_buf_ptr;
 	unsigned long long save_input_buf_siz;
-	enum event_type type;
+	enum tep_event_type type;
 	
 	/* save off the current input pointers */
 	save_input_buf = input_buf;
@@ -1125,13 +1127,13 @@ static void free_token(char *tok)
 		free(tok);
 }
 
-static enum event_type read_token(char **tok)
+static enum tep_event_type read_token(char **tok)
 {
-	enum event_type type;
+	enum tep_event_type type;
 
 	for (;;) {
 		type = __read_token(tok);
-		if (type != EVENT_SPACE)
+		if (type != TEP_EVENT_SPACE)
 			return type;
 
 		free_token(*tok);
@@ -1139,7 +1141,7 @@ static enum event_type read_token(char **tok)
 
 	/* not reached */
 	*tok = NULL;
-	return EVENT_NONE;
+	return TEP_EVENT_NONE;
 }
 
 /**
@@ -1151,7 +1153,7 @@ static enum event_type read_token(char **tok)
  *
  * Returns the token type.
  */
-enum event_type tep_read_token(char **tok)
+enum tep_event_type tep_read_token(char **tok)
 {
 	return read_token(tok);
 }
@@ -1166,13 +1168,13 @@ void tep_free_token(char *token)
 }
 
 /* no newline */
-static enum event_type read_token_item(char **tok)
+static enum tep_event_type read_token_item(char **tok)
 {
-	enum event_type type;
+	enum tep_event_type type;
 
 	for (;;) {
 		type = __read_token(tok);
-		if (type != EVENT_SPACE && type != EVENT_NEWLINE)
+		if (type != TEP_EVENT_SPACE && type != TEP_EVENT_NEWLINE)
 			return type;
 		free_token(*tok);
 		*tok = NULL;
@@ -1180,10 +1182,10 @@ static enum event_type read_token_item(char **tok)
 
 	/* not reached */
 	*tok = NULL;
-	return EVENT_NONE;
+	return TEP_EVENT_NONE;
 }
 
-static int test_type(enum event_type type, enum event_type expect)
+static int test_type(enum tep_event_type type, enum tep_event_type expect)
 {
 	if (type != expect) {
 		do_warning("Error: expected type %d but read %d",
@@ -1193,8 +1195,8 @@ static int test_type(enum event_type type, enum event_type expect)
 	return 0;
 }
 
-static int test_type_token(enum event_type type, const char *token,
-		    enum event_type expect, const char *expect_tok)
+static int test_type_token(enum tep_event_type type, const char *token,
+		    enum tep_event_type expect, const char *expect_tok)
 {
 	if (type != expect) {
 		do_warning("Error: expected type %d but read %d",
@@ -1210,9 +1212,9 @@ static int test_type_token(enum event_type type, const char *token,
 	return 0;
 }
 
-static int __read_expect_type(enum event_type expect, char **tok, int newline_ok)
+static int __read_expect_type(enum tep_event_type expect, char **tok, int newline_ok)
 {
-	enum event_type type;
+	enum tep_event_type type;
 
 	if (newline_ok)
 		type = read_token(tok);
@@ -1221,15 +1223,15 @@ static int __read_expect_type(enum event_type expect, char **tok, int newline_ok
 	return test_type(type, expect);
 }
 
-static int read_expect_type(enum event_type expect, char **tok)
+static int read_expect_type(enum tep_event_type expect, char **tok)
 {
 	return __read_expect_type(expect, tok, 1);
 }
 
-static int __read_expected(enum event_type expect, const char *str,
+static int __read_expected(enum tep_event_type expect, const char *str,
 			   int newline_ok)
 {
-	enum event_type type;
+	enum tep_event_type type;
 	char *token;
 	int ret;
 
@@ -1245,12 +1247,12 @@ static int __read_expected(enum event_type expect, const char *str,
 	return ret;
 }
 
-static int read_expected(enum event_type expect, const char *str)
+static int read_expected(enum tep_event_type expect, const char *str)
 {
 	return __read_expected(expect, str, 1);
 }
 
-static int read_expected_item(enum event_type expect, const char *str)
+static int read_expected_item(enum tep_event_type expect, const char *str)
 {
 	return __read_expected(expect, str, 0);
 }
@@ -1259,13 +1261,13 @@ static char *event_read_name(void)
 {
 	char *token;
 
-	if (read_expected(EVENT_ITEM, "name") < 0)
+	if (read_expected(TEP_EVENT_ITEM, "name") < 0)
 		return NULL;
 
-	if (read_expected(EVENT_OP, ":") < 0)
+	if (read_expected(TEP_EVENT_OP, ":") < 0)
 		return NULL;
 
-	if (read_expect_type(EVENT_ITEM, &token) < 0)
+	if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
 		goto fail;
 
 	return token;
@@ -1280,13 +1282,13 @@ static int event_read_id(void)
 	char *token;
 	int id;
 
-	if (read_expected_item(EVENT_ITEM, "ID") < 0)
+	if (read_expected_item(TEP_EVENT_ITEM, "ID") < 0)
 		return -1;
 
-	if (read_expected(EVENT_OP, ":") < 0)
+	if (read_expected(TEP_EVENT_OP, ":") < 0)
 		return -1;
 
-	if (read_expect_type(EVENT_ITEM, &token) < 0)
+	if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
 		goto fail;
 
 	id = strtoul(token, NULL, 0);
@@ -1298,9 +1300,9 @@ static int event_read_id(void)
 	return -1;
 }
 
-static int field_is_string(struct format_field *field)
+static int field_is_string(struct tep_format_field *field)
 {
-	if ((field->flags & FIELD_IS_ARRAY) &&
+	if ((field->flags & TEP_FIELD_IS_ARRAY) &&
 	    (strstr(field->type, "char") || strstr(field->type, "u8") ||
 	     strstr(field->type, "s8")))
 		return 1;
@@ -1308,7 +1310,7 @@ static int field_is_string(struct format_field *field)
 	return 0;
 }
 
-static int field_is_dynamic(struct format_field *field)
+static int field_is_dynamic(struct tep_format_field *field)
 {
 	if (strncmp(field->type, "__data_loc", 10) == 0)
 		return 1;
@@ -1316,7 +1318,7 @@ static int field_is_dynamic(struct format_field *field)
 	return 0;
 }
 
-static int field_is_long(struct format_field *field)
+static int field_is_long(struct tep_format_field *field)
 {
 	/* includes long long */
 	if (strstr(field->type, "long"))
@@ -1327,7 +1329,7 @@ static int field_is_long(struct format_field *field)
 
 static unsigned int type_size(const char *name)
 {
-	/* This covers all FIELD_IS_STRING types. */
+	/* This covers all TEP_FIELD_IS_STRING types. */
 	static struct {
 		const char *type;
 		unsigned int size;
@@ -1353,10 +1355,10 @@ static unsigned int type_size(const char *name)
 	return 0;
 }
 
-static int event_read_fields(struct event_format *event, struct format_field **fields)
+static int event_read_fields(struct tep_event_format *event, struct tep_format_field **fields)
 {
-	struct format_field *field = NULL;
-	enum event_type type;
+	struct tep_format_field *field = NULL;
+	enum tep_event_type type;
 	char *token;
 	char *last_token;
 	int count = 0;
@@ -1365,14 +1367,14 @@ static int event_read_fields(struct event_format *event, struct format_field **f
 		unsigned int size_dynamic = 0;
 
 		type = read_token(&token);
-		if (type == EVENT_NEWLINE) {
+		if (type == TEP_EVENT_NEWLINE) {
 			free_token(token);
 			return count;
 		}
 
 		count++;
 
-		if (test_type_token(type, token, EVENT_ITEM, "field"))
+		if (test_type_token(type, token, TEP_EVENT_ITEM, "field"))
 			goto fail;
 		free_token(token);
 
@@ -1381,17 +1383,17 @@ static int event_read_fields(struct event_format *event, struct format_field **f
 		 * The ftrace fields may still use the "special" name.
 		 * Just ignore it.
 		 */
-		if (event->flags & EVENT_FL_ISFTRACE &&
-		    type == EVENT_ITEM && strcmp(token, "special") == 0) {
+		if (event->flags & TEP_EVENT_FL_ISFTRACE &&
+		    type == TEP_EVENT_ITEM && strcmp(token, "special") == 0) {
 			free_token(token);
 			type = read_token(&token);
 		}
 
-		if (test_type_token(type, token, EVENT_OP, ":") < 0)
+		if (test_type_token(type, token, TEP_EVENT_OP, ":") < 0)
 			goto fail;
 
 		free_token(token);
-		if (read_expect_type(EVENT_ITEM, &token) < 0)
+		if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
 			goto fail;
 
 		last_token = token;
@@ -1405,17 +1407,17 @@ static int event_read_fields(struct event_format *event, struct format_field **f
 		/* read the rest of the type */
 		for (;;) {
 			type = read_token(&token);
-			if (type == EVENT_ITEM ||
-			    (type == EVENT_OP && strcmp(token, "*") == 0) ||
+			if (type == TEP_EVENT_ITEM ||
+			    (type == TEP_EVENT_OP && strcmp(token, "*") == 0) ||
 			    /*
 			     * Some of the ftrace fields are broken and have
 			     * an illegal "." in them.
 			     */
-			    (event->flags & EVENT_FL_ISFTRACE &&
-			     type == EVENT_OP && strcmp(token, ".") == 0)) {
+			    (event->flags & TEP_EVENT_FL_ISFTRACE &&
+			     type == TEP_EVENT_OP && strcmp(token, ".") == 0)) {
 
 				if (strcmp(token, "*") == 0)
-					field->flags |= FIELD_IS_POINTER;
+					field->flags |= TEP_FIELD_IS_POINTER;
 
 				if (field->type) {
 					char *new_type;
@@ -1445,27 +1447,27 @@ static int event_read_fields(struct event_format *event, struct format_field **f
 		}
 		field->name = field->alias = last_token;
 
-		if (test_type(type, EVENT_OP))
+		if (test_type(type, TEP_EVENT_OP))
 			goto fail;
 
 		if (strcmp(token, "[") == 0) {
-			enum event_type last_type = type;
+			enum tep_event_type last_type = type;
 			char *brackets = token;
 			char *new_brackets;
 			int len;
 
-			field->flags |= FIELD_IS_ARRAY;
+			field->flags |= TEP_FIELD_IS_ARRAY;
 
 			type = read_token(&token);
 
-			if (type == EVENT_ITEM)
+			if (type == TEP_EVENT_ITEM)
 				field->arraylen = strtoul(token, NULL, 0);
 			else
 				field->arraylen = 0;
 
 		        while (strcmp(token, "]") != 0) {
-				if (last_type == EVENT_ITEM &&
-				    type == EVENT_ITEM)
+				if (last_type == TEP_EVENT_ITEM &&
+				    type == TEP_EVENT_ITEM)
 					len = 2;
 				else
 					len = 1;
@@ -1486,7 +1488,7 @@ static int event_read_fields(struct event_format *event, struct format_field **f
 				field->arraylen = strtoul(token, NULL, 0);
 				free_token(token);
 				type = read_token(&token);
-				if (type == EVENT_NONE) {
+				if (type == TEP_EVENT_NONE) {
 					do_warning_event(event, "failed to find token");
 					goto fail;
 				}
@@ -1509,7 +1511,7 @@ static int event_read_fields(struct event_format *event, struct format_field **f
 			 * If the next token is not an OP, then it is of
 			 * the format: type [] item;
 			 */
-			if (type == EVENT_ITEM) {
+			if (type == TEP_EVENT_ITEM) {
 				char *new_type;
 				new_type = realloc(field->type,
 						   strlen(field->type) +
@@ -1543,79 +1545,79 @@ static int event_read_fields(struct event_format *event, struct format_field **f
 		}
 
 		if (field_is_string(field))
-			field->flags |= FIELD_IS_STRING;
+			field->flags |= TEP_FIELD_IS_STRING;
 		if (field_is_dynamic(field))
-			field->flags |= FIELD_IS_DYNAMIC;
+			field->flags |= TEP_FIELD_IS_DYNAMIC;
 		if (field_is_long(field))
-			field->flags |= FIELD_IS_LONG;
+			field->flags |= TEP_FIELD_IS_LONG;
 
-		if (test_type_token(type, token,  EVENT_OP, ";"))
+		if (test_type_token(type, token,  TEP_EVENT_OP, ";"))
 			goto fail;
 		free_token(token);
 
-		if (read_expected(EVENT_ITEM, "offset") < 0)
+		if (read_expected(TEP_EVENT_ITEM, "offset") < 0)
 			goto fail_expect;
 
-		if (read_expected(EVENT_OP, ":") < 0)
+		if (read_expected(TEP_EVENT_OP, ":") < 0)
 			goto fail_expect;
 
-		if (read_expect_type(EVENT_ITEM, &token))
+		if (read_expect_type(TEP_EVENT_ITEM, &token))
 			goto fail;
 		field->offset = strtoul(token, NULL, 0);
 		free_token(token);
 
-		if (read_expected(EVENT_OP, ";") < 0)
+		if (read_expected(TEP_EVENT_OP, ";") < 0)
 			goto fail_expect;
 
-		if (read_expected(EVENT_ITEM, "size") < 0)
+		if (read_expected(TEP_EVENT_ITEM, "size") < 0)
 			goto fail_expect;
 
-		if (read_expected(EVENT_OP, ":") < 0)
+		if (read_expected(TEP_EVENT_OP, ":") < 0)
 			goto fail_expect;
 
-		if (read_expect_type(EVENT_ITEM, &token))
+		if (read_expect_type(TEP_EVENT_ITEM, &token))
 			goto fail;
 		field->size = strtoul(token, NULL, 0);
 		free_token(token);
 
-		if (read_expected(EVENT_OP, ";") < 0)
+		if (read_expected(TEP_EVENT_OP, ";") < 0)
 			goto fail_expect;
 
 		type = read_token(&token);
-		if (type != EVENT_NEWLINE) {
+		if (type != TEP_EVENT_NEWLINE) {
 			/* newer versions of the kernel have a "signed" type */
-			if (test_type_token(type, token, EVENT_ITEM, "signed"))
+			if (test_type_token(type, token, TEP_EVENT_ITEM, "signed"))
 				goto fail;
 
 			free_token(token);
 
-			if (read_expected(EVENT_OP, ":") < 0)
+			if (read_expected(TEP_EVENT_OP, ":") < 0)
 				goto fail_expect;
 
-			if (read_expect_type(EVENT_ITEM, &token))
+			if (read_expect_type(TEP_EVENT_ITEM, &token))
 				goto fail;
 
 			if (strtoul(token, NULL, 0))
-				field->flags |= FIELD_IS_SIGNED;
+				field->flags |= TEP_FIELD_IS_SIGNED;
 
 			free_token(token);
-			if (read_expected(EVENT_OP, ";") < 0)
+			if (read_expected(TEP_EVENT_OP, ";") < 0)
 				goto fail_expect;
 
-			if (read_expect_type(EVENT_NEWLINE, &token))
+			if (read_expect_type(TEP_EVENT_NEWLINE, &token))
 				goto fail;
 		}
 
 		free_token(token);
 
-		if (field->flags & FIELD_IS_ARRAY) {
+		if (field->flags & TEP_FIELD_IS_ARRAY) {
 			if (field->arraylen)
 				field->elementsize = field->size / field->arraylen;
-			else if (field->flags & FIELD_IS_DYNAMIC)
+			else if (field->flags & TEP_FIELD_IS_DYNAMIC)
 				field->elementsize = size_dynamic;
-			else if (field->flags & FIELD_IS_STRING)
+			else if (field->flags & TEP_FIELD_IS_STRING)
 				field->elementsize = 1;
-			else if (field->flags & FIELD_IS_LONG)
+			else if (field->flags & TEP_FIELD_IS_LONG)
 				field->elementsize = event->pevent ?
 						     event->pevent->long_size :
 						     sizeof(long);
@@ -1640,18 +1642,18 @@ fail_expect:
 	return -1;
 }
 
-static int event_read_format(struct event_format *event)
+static int event_read_format(struct tep_event_format *event)
 {
 	char *token;
 	int ret;
 
-	if (read_expected_item(EVENT_ITEM, "format") < 0)
+	if (read_expected_item(TEP_EVENT_ITEM, "format") < 0)
 		return -1;
 
-	if (read_expected(EVENT_OP, ":") < 0)
+	if (read_expected(TEP_EVENT_OP, ":") < 0)
 		return -1;
 
-	if (read_expect_type(EVENT_NEWLINE, &token))
+	if (read_expect_type(TEP_EVENT_NEWLINE, &token))
 		goto fail;
 	free_token(token);
 
@@ -1672,14 +1674,14 @@ static int event_read_format(struct event_format *event)
 	return -1;
 }
 
-static enum event_type
-process_arg_token(struct event_format *event, struct print_arg *arg,
-		  char **tok, enum event_type type);
+static enum tep_event_type
+process_arg_token(struct tep_event_format *event, struct tep_print_arg *arg,
+		  char **tok, enum tep_event_type type);
 
-static enum event_type
-process_arg(struct event_format *event, struct print_arg *arg, char **tok)
+static enum tep_event_type
+process_arg(struct tep_event_format *event, struct tep_print_arg *arg, char **tok)
 {
-	enum event_type type;
+	enum tep_event_type type;
 	char *token;
 
 	type = read_token(&token);
@@ -1688,32 +1690,32 @@ process_arg(struct event_format *event, struct print_arg *arg, char **tok)
 	return process_arg_token(event, arg, tok, type);
 }
 
-static enum event_type
-process_op(struct event_format *event, struct print_arg *arg, char **tok);
+static enum tep_event_type
+process_op(struct tep_event_format *event, struct tep_print_arg *arg, char **tok);
 
 /*
  * For __print_symbolic() and __print_flags, we need to completely
  * evaluate the first argument, which defines what to print next.
  */
-static enum event_type
-process_field_arg(struct event_format *event, struct print_arg *arg, char **tok)
+static enum tep_event_type
+process_field_arg(struct tep_event_format *event, struct tep_print_arg *arg, char **tok)
 {
-	enum event_type type;
+	enum tep_event_type type;
 
 	type = process_arg(event, arg, tok);
 
-	while (type == EVENT_OP) {
+	while (type == TEP_EVENT_OP) {
 		type = process_op(event, arg, tok);
 	}
 
 	return type;
 }
 
-static enum event_type
-process_cond(struct event_format *event, struct print_arg *top, char **tok)
+static enum tep_event_type
+process_cond(struct tep_event_format *event, struct tep_print_arg *top, char **tok)
 {
-	struct print_arg *arg, *left, *right;
-	enum event_type type;
+	struct tep_print_arg *arg, *left, *right;
+	enum tep_event_type type;
 	char *token = NULL;
 
 	arg = alloc_arg();
@@ -1728,7 +1730,7 @@ process_cond(struct event_format *event, struct print_arg *top, char **tok)
 		goto out_free;
 	}
 
-	arg->type = PRINT_OP;
+	arg->type = TEP_PRINT_OP;
 	arg->op.left = left;
 	arg->op.right = right;
 
@@ -1736,16 +1738,16 @@ process_cond(struct event_format *event, struct print_arg *top, char **tok)
 	type = process_arg(event, left, &token);
 
  again:
-	if (type == EVENT_ERROR)
+	if (type == TEP_EVENT_ERROR)
 		goto out_free;
 
 	/* Handle other operations in the arguments */
-	if (type == EVENT_OP && strcmp(token, ":") != 0) {
+	if (type == TEP_EVENT_OP && strcmp(token, ":") != 0) {
 		type = process_op(event, left, &token);
 		goto again;
 	}
 
-	if (test_type_token(type, token, EVENT_OP, ":"))
+	if (test_type_token(type, token, TEP_EVENT_OP, ":"))
 		goto out_free;
 
 	arg->op.op = token;
@@ -1762,14 +1764,14 @@ out_free:
 	top->op.right = NULL;
 	free_token(token);
 	free_arg(arg);
-	return EVENT_ERROR;
+	return TEP_EVENT_ERROR;
 }
 
-static enum event_type
-process_array(struct event_format *event, struct print_arg *top, char **tok)
+static enum tep_event_type
+process_array(struct tep_event_format *event, struct tep_print_arg *top, char **tok)
 {
-	struct print_arg *arg;
-	enum event_type type;
+	struct tep_print_arg *arg;
+	enum tep_event_type type;
 	char *token = NULL;
 
 	arg = alloc_arg();
@@ -1777,12 +1779,12 @@ process_array(struct event_format *event, struct print_arg *top, char **tok)
 		do_warning_event(event, "%s: not enough memory!", __func__);
 		/* '*tok' is set to top->op.op.  No need to free. */
 		*tok = NULL;
-		return EVENT_ERROR;
+		return TEP_EVENT_ERROR;
 	}
 
 	*tok = NULL;
 	type = process_arg(event, arg, &token);
-	if (test_type_token(type, token, EVENT_OP, "]"))
+	if (test_type_token(type, token, TEP_EVENT_OP, "]"))
 		goto out_free;
 
 	top->op.right = arg;
@@ -1796,7 +1798,7 @@ process_array(struct event_format *event, struct print_arg *top, char **tok)
 out_free:
 	free_token(token);
 	free_arg(arg);
-	return EVENT_ERROR;
+	return TEP_EVENT_ERROR;
 }
 
 static int get_op_prio(char *op)
@@ -1854,11 +1856,11 @@ static int get_op_prio(char *op)
 	}
 }
 
-static int set_op_prio(struct print_arg *arg)
+static int set_op_prio(struct tep_print_arg *arg)
 {
 
 	/* single ops are the greatest */
-	if (!arg->op.left || arg->op.left->type == PRINT_NULL)
+	if (!arg->op.left || arg->op.left->type == TEP_PRINT_NULL)
 		arg->op.prio = 0;
 	else
 		arg->op.prio = get_op_prio(arg->op.op);
@@ -1867,17 +1869,17 @@ static int set_op_prio(struct print_arg *arg)
 }
 
 /* Note, *tok does not get freed, but will most likely be saved */
-static enum event_type
-process_op(struct event_format *event, struct print_arg *arg, char **tok)
+static enum tep_event_type
+process_op(struct tep_event_format *event, struct tep_print_arg *arg, char **tok)
 {
-	struct print_arg *left, *right = NULL;
-	enum event_type type;
+	struct tep_print_arg *left, *right = NULL;
+	enum tep_event_type type;
 	char *token;
 
 	/* the op is passed in via tok */
 	token = *tok;
 
-	if (arg->type == PRINT_OP && !arg->op.left) {
+	if (arg->type == TEP_PRINT_OP && !arg->op.left) {
 		/* handle single op */
 		if (token[1]) {
 			do_warning_event(event, "bad op token %s", token);
@@ -1900,7 +1902,7 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok)
 		if (!left)
 			goto out_warn_free;
 
-		left->type = PRINT_NULL;
+		left->type = TEP_PRINT_NULL;
 		arg->op.left = left;
 
 		right = alloc_arg();
@@ -1922,7 +1924,7 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok)
 		/* copy the top arg to the left */
 		*left = *arg;
 
-		arg->type = PRINT_OP;
+		arg->type = TEP_PRINT_OP;
 		arg->op.op = token;
 		arg->op.left = left;
 		arg->op.prio = 0;
@@ -1956,13 +1958,13 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok)
 		/* copy the top arg to the left */
 		*left = *arg;
 
-		arg->type = PRINT_OP;
+		arg->type = TEP_PRINT_OP;
 		arg->op.op = token;
 		arg->op.left = left;
 		arg->op.right = NULL;
 
 		if (set_op_prio(arg) == -1) {
-			event->flags |= EVENT_FL_FAILED;
+			event->flags |= TEP_EVENT_FL_FAILED;
 			/* arg->op.op (= token) will be freed at out_free */
 			arg->op.op = NULL;
 			goto out_free;
@@ -1973,10 +1975,10 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok)
 
 		/* could just be a type pointer */
 		if ((strcmp(arg->op.op, "*") == 0) &&
-		    type == EVENT_DELIM && (strcmp(token, ")") == 0)) {
+		    type == TEP_EVENT_DELIM && (strcmp(token, ")") == 0)) {
 			char *new_atom;
 
-			if (left->type != PRINT_ATOM) {
+			if (left->type != TEP_PRINT_ATOM) {
 				do_warning_event(event, "bad pointer type");
 				goto out_free;
 			}
@@ -1999,16 +2001,16 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok)
 			goto out_warn_free;
 
 		type = process_arg_token(event, right, tok, type);
-		if (type == EVENT_ERROR) {
+		if (type == TEP_EVENT_ERROR) {
 			free_arg(right);
 			/* token was freed in process_arg_token() via *tok */
 			token = NULL;
 			goto out_free;
 		}
 
-		if (right->type == PRINT_OP &&
+		if (right->type == TEP_PRINT_OP &&
 		    get_op_prio(arg->op.op) < get_op_prio(right->op.op)) {
-			struct print_arg tmp;
+			struct tep_print_arg tmp;
 
 			/* rotate ops according to the priority */
 			arg->op.right = right->op.left;
@@ -2030,7 +2032,7 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok)
 
 		*left = *arg;
 
-		arg->type = PRINT_OP;
+		arg->type = TEP_PRINT_OP;
 		arg->op.op = token;
 		arg->op.left = left;
 
@@ -2041,12 +2043,12 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok)
 
 	} else {
 		do_warning_event(event, "unknown op '%s'", token);
-		event->flags |= EVENT_FL_FAILED;
+		event->flags |= TEP_EVENT_FL_FAILED;
 		/* the arg is now the left side */
 		goto out_free;
 	}
 
-	if (type == EVENT_OP && strcmp(*tok, ":") != 0) {
+	if (type == TEP_EVENT_OP && strcmp(*tok, ":") != 0) {
 		int prio;
 
 		/* higher prios need to be closer to the root */
@@ -2065,34 +2067,34 @@ out_warn_free:
 out_free:
 	free_token(token);
 	*tok = NULL;
-	return EVENT_ERROR;
+	return TEP_EVENT_ERROR;
 }
 
-static enum event_type
-process_entry(struct event_format *event __maybe_unused, struct print_arg *arg,
+static enum tep_event_type
+process_entry(struct tep_event_format *event __maybe_unused, struct tep_print_arg *arg,
 	      char **tok)
 {
-	enum event_type type;
+	enum tep_event_type type;
 	char *field;
 	char *token;
 
-	if (read_expected(EVENT_OP, "->") < 0)
+	if (read_expected(TEP_EVENT_OP, "->") < 0)
 		goto out_err;
 
-	if (read_expect_type(EVENT_ITEM, &token) < 0)
+	if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
 		goto out_free;
 	field = token;
 
-	arg->type = PRINT_FIELD;
+	arg->type = TEP_PRINT_FIELD;
 	arg->field.name = field;
 
 	if (is_flag_field) {
 		arg->field.field = tep_find_any_field(event, arg->field.name);
-		arg->field.field->flags |= FIELD_IS_FLAG;
+		arg->field.field->flags |= TEP_FIELD_IS_FLAG;
 		is_flag_field = 0;
 	} else if (is_symbolic_field) {
 		arg->field.field = tep_find_any_field(event, arg->field.name);
-		arg->field.field->flags |= FIELD_IS_SYMBOLIC;
+		arg->field.field->flags |= TEP_FIELD_IS_SYMBOLIC;
 		is_symbolic_field = 0;
 	}
 
@@ -2105,14 +2107,14 @@ process_entry(struct event_format *event __maybe_unused, struct print_arg *arg,
 	free_token(token);
  out_err:
 	*tok = NULL;
-	return EVENT_ERROR;
+	return TEP_EVENT_ERROR;
 }
 
-static int alloc_and_process_delim(struct event_format *event, char *next_token,
-				   struct print_arg **print_arg)
+static int alloc_and_process_delim(struct tep_event_format *event, char *next_token,
+				   struct tep_print_arg **print_arg)
 {
-	struct print_arg *field;
-	enum event_type type;
+	struct tep_print_arg *field;
+	enum tep_event_type type;
 	char *token;
 	int ret = 0;
 
@@ -2125,7 +2127,7 @@ static int alloc_and_process_delim(struct event_format *event, char *next_token,
 
 	type = process_arg(event, field, &token);
 
-	if (test_type_token(type, token, EVENT_DELIM, next_token)) {
+	if (test_type_token(type, token, TEP_EVENT_DELIM, next_token)) {
 		errno = EINVAL;
 		ret = -1;
 		free_arg(field);
@@ -2140,7 +2142,7 @@ out_free_token:
 	return ret;
 }
 
-static char *arg_eval (struct print_arg *arg);
+static char *arg_eval (struct tep_print_arg *arg);
 
 static unsigned long long
 eval_type_str(unsigned long long val, const char *type, int pointer)
@@ -2237,9 +2239,9 @@ eval_type_str(unsigned long long val, const char *type, int pointer)
  * Try to figure out the type.
  */
 static unsigned long long
-eval_type(unsigned long long val, struct print_arg *arg, int pointer)
+eval_type(unsigned long long val, struct tep_print_arg *arg, int pointer)
 {
-	if (arg->type != PRINT_TYPE) {
+	if (arg->type != TEP_PRINT_TYPE) {
 		do_warning("expected type argument");
 		return 0;
 	}
@@ -2247,22 +2249,22 @@ eval_type(unsigned long long val, struct print_arg *arg, int pointer)
 	return eval_type_str(val, arg->typecast.type, pointer);
 }
 
-static int arg_num_eval(struct print_arg *arg, long long *val)
+static int arg_num_eval(struct tep_print_arg *arg, long long *val)
 {
 	long long left, right;
 	int ret = 1;
 
 	switch (arg->type) {
-	case PRINT_ATOM:
+	case TEP_PRINT_ATOM:
 		*val = strtoll(arg->atom.atom, NULL, 0);
 		break;
-	case PRINT_TYPE:
+	case TEP_PRINT_TYPE:
 		ret = arg_num_eval(arg->typecast.item, val);
 		if (!ret)
 			break;
 		*val = eval_type(*val, arg, 0);
 		break;
-	case PRINT_OP:
+	case TEP_PRINT_OP:
 		switch (arg->op.op[0]) {
 		case '|':
 			ret = arg_num_eval(arg->op.left, &left);
@@ -2365,7 +2367,7 @@ static int arg_num_eval(struct print_arg *arg, long long *val)
 			break;
 		case '-':
 			/* check for negative */
-			if (arg->op.left->type == PRINT_NULL)
+			if (arg->op.left->type == TEP_PRINT_NULL)
 				left = 0;
 			else
 				ret = arg_num_eval(arg->op.left, &left);
@@ -2377,7 +2379,7 @@ static int arg_num_eval(struct print_arg *arg, long long *val)
 			*val = left - right;
 			break;
 		case '+':
-			if (arg->op.left->type == PRINT_NULL)
+			if (arg->op.left->type == TEP_PRINT_NULL)
 				left = 0;
 			else
 				ret = arg_num_eval(arg->op.left, &left);
@@ -2400,11 +2402,11 @@ static int arg_num_eval(struct print_arg *arg, long long *val)
 		}
 		break;
 
-	case PRINT_NULL:
-	case PRINT_FIELD ... PRINT_SYMBOL:
-	case PRINT_STRING:
-	case PRINT_BSTRING:
-	case PRINT_BITMASK:
+	case TEP_PRINT_NULL:
+	case TEP_PRINT_FIELD ... TEP_PRINT_SYMBOL:
+	case TEP_PRINT_STRING:
+	case TEP_PRINT_BSTRING:
+	case TEP_PRINT_BITMASK:
 	default:
 		do_warning("invalid eval type %d", arg->type);
 		ret = 0;
@@ -2413,27 +2415,27 @@ static int arg_num_eval(struct print_arg *arg, long long *val)
 	return ret;
 }
 
-static char *arg_eval (struct print_arg *arg)
+static char *arg_eval (struct tep_print_arg *arg)
 {
 	long long val;
 	static char buf[20];
 
 	switch (arg->type) {
-	case PRINT_ATOM:
+	case TEP_PRINT_ATOM:
 		return arg->atom.atom;
-	case PRINT_TYPE:
+	case TEP_PRINT_TYPE:
 		return arg_eval(arg->typecast.item);
-	case PRINT_OP:
+	case TEP_PRINT_OP:
 		if (!arg_num_eval(arg, &val))
 			break;
 		sprintf(buf, "%lld", val);
 		return buf;
 
-	case PRINT_NULL:
-	case PRINT_FIELD ... PRINT_SYMBOL:
-	case PRINT_STRING:
-	case PRINT_BSTRING:
-	case PRINT_BITMASK:
+	case TEP_PRINT_NULL:
+	case TEP_PRINT_FIELD ... TEP_PRINT_SYMBOL:
+	case TEP_PRINT_STRING:
+	case TEP_PRINT_BSTRING:
+	case TEP_PRINT_BITMASK:
 	default:
 		do_warning("invalid eval type %d", arg->type);
 		break;
@@ -2442,19 +2444,19 @@ static char *arg_eval (struct print_arg *arg)
 	return NULL;
 }
 
-static enum event_type
-process_fields(struct event_format *event, struct print_flag_sym **list, char **tok)
+static enum tep_event_type
+process_fields(struct tep_event_format *event, struct tep_print_flag_sym **list, char **tok)
 {
-	enum event_type type;
-	struct print_arg *arg = NULL;
-	struct print_flag_sym *field;
+	enum tep_event_type type;
+	struct tep_print_arg *arg = NULL;
+	struct tep_print_flag_sym *field;
 	char *token = *tok;
 	char *value;
 
 	do {
 		free_token(token);
 		type = read_token_item(&token);
-		if (test_type_token(type, token, EVENT_OP, "{"))
+		if (test_type_token(type, token, TEP_EVENT_OP, "{"))
 			break;
 
 		arg = alloc_arg();
@@ -2464,13 +2466,13 @@ process_fields(struct event_format *event, struct print_flag_sym **list, char **
 		free_token(token);
 		type = process_arg(event, arg, &token);
 
-		if (type == EVENT_OP)
+		if (type == TEP_EVENT_OP)
 			type = process_op(event, arg, &token);
 
-		if (type == EVENT_ERROR)
+		if (type == TEP_EVENT_ERROR)
 			goto out_free;
 
-		if (test_type_token(type, token, EVENT_DELIM, ","))
+		if (test_type_token(type, token, TEP_EVENT_DELIM, ","))
 			goto out_free;
 
 		field = calloc(1, sizeof(*field));
@@ -2491,7 +2493,7 @@ process_fields(struct event_format *event, struct print_flag_sym **list, char **
 
 		free_token(token);
 		type = process_arg(event, arg, &token);
-		if (test_type_token(type, token, EVENT_OP, "}"))
+		if (test_type_token(type, token, TEP_EVENT_OP, "}"))
 			goto out_free_field;
 
 		value = arg_eval(arg);
@@ -2508,7 +2510,7 @@ process_fields(struct event_format *event, struct print_flag_sym **list, char **
 
 		free_token(token);
 		type = read_token_item(&token);
-	} while (type == EVENT_DELIM && strcmp(token, ",") == 0);
+	} while (type == TEP_EVENT_DELIM && strcmp(token, ",") == 0);
 
 	*tok = token;
 	return type;
@@ -2520,18 +2522,18 @@ out_free:
 	free_token(token);
 	*tok = NULL;
 
-	return EVENT_ERROR;
+	return TEP_EVENT_ERROR;
 }
 
-static enum event_type
-process_flags(struct event_format *event, struct print_arg *arg, char **tok)
+static enum tep_event_type
+process_flags(struct tep_event_format *event, struct tep_print_arg *arg, char **tok)
 {
-	struct print_arg *field;
-	enum event_type type;
+	struct tep_print_arg *field;
+	enum tep_event_type type;
 	char *token = NULL;
 
 	memset(arg, 0, sizeof(*arg));
-	arg->type = PRINT_FLAGS;
+	arg->type = TEP_PRINT_FLAGS;
 
 	field = alloc_arg();
 	if (!field) {
@@ -2542,10 +2544,10 @@ process_flags(struct event_format *event, struct print_arg *arg, char **tok)
 	type = process_field_arg(event, field, &token);
 
 	/* Handle operations in the first argument */
-	while (type == EVENT_OP)
+	while (type == TEP_EVENT_OP)
 		type = process_op(event, field, &token);
 
-	if (test_type_token(type, token, EVENT_DELIM, ","))
+	if (test_type_token(type, token, TEP_EVENT_DELIM, ","))
 		goto out_free_field;
 	free_token(token);
 
@@ -2557,11 +2559,11 @@ process_flags(struct event_format *event, struct print_arg *arg, char **tok)
 		type = read_token_item(&token);
 	}
 
-	if (test_type_token(type, token, EVENT_DELIM, ","))
+	if (test_type_token(type, token, TEP_EVENT_DELIM, ","))
 		goto out_free;
 
 	type = process_fields(event, &arg->flags.flags, &token);
-	if (test_type_token(type, token, EVENT_DELIM, ")"))
+	if (test_type_token(type, token, TEP_EVENT_DELIM, ")"))
 		goto out_free;
 
 	free_token(token);
@@ -2573,18 +2575,18 @@ out_free_field:
 out_free:
 	free_token(token);
 	*tok = NULL;
-	return EVENT_ERROR;
+	return TEP_EVENT_ERROR;
 }
 
-static enum event_type
-process_symbols(struct event_format *event, struct print_arg *arg, char **tok)
+static enum tep_event_type
+process_symbols(struct tep_event_format *event, struct tep_print_arg *arg, char **tok)
 {
-	struct print_arg *field;
-	enum event_type type;
+	struct tep_print_arg *field;
+	enum tep_event_type type;
 	char *token = NULL;
 
 	memset(arg, 0, sizeof(*arg));
-	arg->type = PRINT_SYMBOL;
+	arg->type = TEP_PRINT_SYMBOL;
 
 	field = alloc_arg();
 	if (!field) {
@@ -2594,13 +2596,13 @@ process_symbols(struct event_format *event, struct print_arg *arg, char **tok)
 
 	type = process_field_arg(event, field, &token);
 
-	if (test_type_token(type, token, EVENT_DELIM, ","))
+	if (test_type_token(type, token, TEP_EVENT_DELIM, ","))
 		goto out_free_field;
 
 	arg->symbol.field = field;
 
 	type = process_fields(event, &arg->symbol.symbols, &token);
-	if (test_type_token(type, token, EVENT_DELIM, ")"))
+	if (test_type_token(type, token, TEP_EVENT_DELIM, ")"))
 		goto out_free;
 
 	free_token(token);
@@ -2612,12 +2614,12 @@ out_free_field:
 out_free:
 	free_token(token);
 	*tok = NULL;
-	return EVENT_ERROR;
+	return TEP_EVENT_ERROR;
 }
 
-static enum event_type
-process_hex_common(struct event_format *event, struct print_arg *arg,
-		   char **tok, enum print_arg_type type)
+static enum tep_event_type
+process_hex_common(struct tep_event_format *event, struct tep_print_arg *arg,
+		   char **tok, enum tep_print_arg_type type)
 {
 	memset(arg, 0, sizeof(*arg));
 	arg->type = type;
@@ -2635,27 +2637,27 @@ free_field:
 	arg->hex.field = NULL;
 out:
 	*tok = NULL;
-	return EVENT_ERROR;
+	return TEP_EVENT_ERROR;
 }
 
-static enum event_type
-process_hex(struct event_format *event, struct print_arg *arg, char **tok)
+static enum tep_event_type
+process_hex(struct tep_event_format *event, struct tep_print_arg *arg, char **tok)
 {
-	return process_hex_common(event, arg, tok, PRINT_HEX);
+	return process_hex_common(event, arg, tok, TEP_PRINT_HEX);
 }
 
-static enum event_type
-process_hex_str(struct event_format *event, struct print_arg *arg,
+static enum tep_event_type
+process_hex_str(struct tep_event_format *event, struct tep_print_arg *arg,
 		char **tok)
 {
-	return process_hex_common(event, arg, tok, PRINT_HEX_STR);
+	return process_hex_common(event, arg, tok, TEP_PRINT_HEX_STR);
 }
 
-static enum event_type
-process_int_array(struct event_format *event, struct print_arg *arg, char **tok)
+static enum tep_event_type
+process_int_array(struct tep_event_format *event, struct tep_print_arg *arg, char **tok)
 {
 	memset(arg, 0, sizeof(*arg));
-	arg->type = PRINT_INT_ARRAY;
+	arg->type = TEP_PRINT_INT_ARRAY;
 
 	if (alloc_and_process_delim(event, ",", &arg->int_array.field))
 		goto out;
@@ -2676,18 +2678,18 @@ free_field:
 	arg->int_array.field = NULL;
 out:
 	*tok = NULL;
-	return EVENT_ERROR;
+	return TEP_EVENT_ERROR;
 }
 
-static enum event_type
-process_dynamic_array(struct event_format *event, struct print_arg *arg, char **tok)
+static enum tep_event_type
+process_dynamic_array(struct tep_event_format *event, struct tep_print_arg *arg, char **tok)
 {
-	struct format_field *field;
-	enum event_type type;
+	struct tep_format_field *field;
+	enum tep_event_type type;
 	char *token;
 
 	memset(arg, 0, sizeof(*arg));
-	arg->type = PRINT_DYNAMIC_ARRAY;
+	arg->type = TEP_PRINT_DYNAMIC_ARRAY;
 
 	/*
 	 * The item within the parenthesis is another field that holds
@@ -2695,7 +2697,7 @@ process_dynamic_array(struct event_format *event, struct print_arg *arg, char **
 	 */
 	type = read_token(&token);
 	*tok = token;
-	if (type != EVENT_ITEM)
+	if (type != TEP_EVENT_ITEM)
 		goto out_free;
 
 	/* Find the field */
@@ -2707,13 +2709,13 @@ process_dynamic_array(struct event_format *event, struct print_arg *arg, char **
 	arg->dynarray.field = field;
 	arg->dynarray.index = 0;
 
-	if (read_expected(EVENT_DELIM, ")") < 0)
+	if (read_expected(TEP_EVENT_DELIM, ")") < 0)
 		goto out_free;
 
 	free_token(token);
 	type = read_token_item(&token);
 	*tok = token;
-	if (type != EVENT_OP || strcmp(token, "[") != 0)
+	if (type != TEP_EVENT_OP || strcmp(token, "[") != 0)
 		return type;
 
 	free_token(token);
@@ -2721,14 +2723,14 @@ process_dynamic_array(struct event_format *event, struct print_arg *arg, char **
 	if (!arg) {
 		do_warning_event(event, "%s: not enough memory!", __func__);
 		*tok = NULL;
-		return EVENT_ERROR;
+		return TEP_EVENT_ERROR;
 	}
 
 	type = process_arg(event, arg, &token);
-	if (type == EVENT_ERROR)
+	if (type == TEP_EVENT_ERROR)
 		goto out_free_arg;
 
-	if (!test_type_token(type, token, EVENT_OP, "]"))
+	if (!test_type_token(type, token, TEP_EVENT_OP, "]"))
 		goto out_free_arg;
 
 	free_token(token);
@@ -2740,21 +2742,21 @@ process_dynamic_array(struct event_format *event, struct print_arg *arg, char **
  out_free:
 	free_token(token);
 	*tok = NULL;
-	return EVENT_ERROR;
+	return TEP_EVENT_ERROR;
 }
 
-static enum event_type
-process_dynamic_array_len(struct event_format *event, struct print_arg *arg,
+static enum tep_event_type
+process_dynamic_array_len(struct tep_event_format *event, struct tep_print_arg *arg,
 			  char **tok)
 {
-	struct format_field *field;
-	enum event_type type;
+	struct tep_format_field *field;
+	enum tep_event_type type;
 	char *token;
 
-	if (read_expect_type(EVENT_ITEM, &token) < 0)
+	if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
 		goto out_free;
 
-	arg->type = PRINT_DYNAMIC_ARRAY_LEN;
+	arg->type = TEP_PRINT_DYNAMIC_ARRAY_LEN;
 
 	/* Find the field */
 	field = tep_find_field(event, token);
@@ -2764,7 +2766,7 @@ process_dynamic_array_len(struct event_format *event, struct print_arg *arg,
 	arg->dynarray.field = field;
 	arg->dynarray.index = 0;
 
-	if (read_expected(EVENT_DELIM, ")") < 0)
+	if (read_expected(TEP_EVENT_DELIM, ")") < 0)
 		goto out_err;
 
 	type = read_token(&token);
@@ -2776,28 +2778,28 @@ process_dynamic_array_len(struct event_format *event, struct print_arg *arg,
 	free_token(token);
  out_err:
 	*tok = NULL;
-	return EVENT_ERROR;
+	return TEP_EVENT_ERROR;
 }
 
-static enum event_type
-process_paren(struct event_format *event, struct print_arg *arg, char **tok)
+static enum tep_event_type
+process_paren(struct tep_event_format *event, struct tep_print_arg *arg, char **tok)
 {
-	struct print_arg *item_arg;
-	enum event_type type;
+	struct tep_print_arg *item_arg;
+	enum tep_event_type type;
 	char *token;
 
 	type = process_arg(event, arg, &token);
 
-	if (type == EVENT_ERROR)
+	if (type == TEP_EVENT_ERROR)
 		goto out_free;
 
-	if (type == EVENT_OP)
+	if (type == TEP_EVENT_OP)
 		type = process_op(event, arg, &token);
 
-	if (type == EVENT_ERROR)
+	if (type == TEP_EVENT_ERROR)
 		goto out_free;
 
-	if (test_type_token(type, token, EVENT_DELIM, ")"))
+	if (test_type_token(type, token, TEP_EVENT_DELIM, ")"))
 		goto out_free;
 
 	free_token(token);
@@ -2808,13 +2810,13 @@ process_paren(struct event_format *event, struct print_arg *arg, char **tok)
 	 * this was a typecast.
 	 */
 	if (event_item_type(type) ||
-	    (type == EVENT_DELIM && strcmp(token, "(") == 0)) {
+	    (type == TEP_EVENT_DELIM && strcmp(token, "(") == 0)) {
 
 		/* make this a typecast and contine */
 
 		/* prevous must be an atom */
-		if (arg->type != PRINT_ATOM) {
-			do_warning_event(event, "previous needed to be PRINT_ATOM");
+		if (arg->type != TEP_PRINT_ATOM) {
+			do_warning_event(event, "previous needed to be TEP_PRINT_ATOM");
 			goto out_free;
 		}
 
@@ -2825,7 +2827,7 @@ process_paren(struct event_format *event, struct print_arg *arg, char **tok)
 			goto out_free;
 		}
 
-		arg->type = PRINT_TYPE;
+		arg->type = TEP_PRINT_TYPE;
 		arg->typecast.type = arg->atom.atom;
 		arg->typecast.item = item_arg;
 		type = process_arg_token(event, item_arg, &token, type);
@@ -2838,25 +2840,25 @@ process_paren(struct event_format *event, struct print_arg *arg, char **tok)
  out_free:
 	free_token(token);
 	*tok = NULL;
-	return EVENT_ERROR;
+	return TEP_EVENT_ERROR;
 }
 
 
-static enum event_type
-process_str(struct event_format *event __maybe_unused, struct print_arg *arg,
+static enum tep_event_type
+process_str(struct tep_event_format *event __maybe_unused, struct tep_print_arg *arg,
 	    char **tok)
 {
-	enum event_type type;
+	enum tep_event_type type;
 	char *token;
 
-	if (read_expect_type(EVENT_ITEM, &token) < 0)
+	if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
 		goto out_free;
 
-	arg->type = PRINT_STRING;
+	arg->type = TEP_PRINT_STRING;
 	arg->string.string = token;
 	arg->string.offset = -1;
 
-	if (read_expected(EVENT_DELIM, ")") < 0)
+	if (read_expected(TEP_EVENT_DELIM, ")") < 0)
 		goto out_err;
 
 	type = read_token(&token);
@@ -2868,24 +2870,24 @@ process_str(struct event_format *event __maybe_unused, struct print_arg *arg,
 	free_token(token);
  out_err:
 	*tok = NULL;
-	return EVENT_ERROR;
+	return TEP_EVENT_ERROR;
 }
 
-static enum event_type
-process_bitmask(struct event_format *event __maybe_unused, struct print_arg *arg,
-	    char **tok)
+static enum tep_event_type
+process_bitmask(struct tep_event_format *event __maybe_unused, struct tep_print_arg *arg,
+		char **tok)
 {
-	enum event_type type;
+	enum tep_event_type type;
 	char *token;
 
-	if (read_expect_type(EVENT_ITEM, &token) < 0)
+	if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
 		goto out_free;
 
-	arg->type = PRINT_BITMASK;
+	arg->type = TEP_PRINT_BITMASK;
 	arg->bitmask.bitmask = token;
 	arg->bitmask.offset = -1;
 
-	if (read_expected(EVENT_DELIM, ")") < 0)
+	if (read_expected(TEP_EVENT_DELIM, ")") < 0)
 		goto out_err;
 
 	type = read_token(&token);
@@ -2897,7 +2899,7 @@ process_bitmask(struct event_format *event __maybe_unused, struct print_arg *arg
 	free_token(token);
  out_err:
 	*tok = NULL;
-	return EVENT_ERROR;
+	return TEP_EVENT_ERROR;
 }
 
 static struct tep_function_handler *
@@ -2932,17 +2934,17 @@ static void remove_func_handler(struct tep_handle *pevent, char *func_name)
 	}
 }
 
-static enum event_type
-process_func_handler(struct event_format *event, struct tep_function_handler *func,
-		     struct print_arg *arg, char **tok)
+static enum tep_event_type
+process_func_handler(struct tep_event_format *event, struct tep_function_handler *func,
+		     struct tep_print_arg *arg, char **tok)
 {
-	struct print_arg **next_arg;
-	struct print_arg *farg;
-	enum event_type type;
+	struct tep_print_arg **next_arg;
+	struct tep_print_arg *farg;
+	enum tep_event_type type;
 	char *token;
 	int i;
 
-	arg->type = PRINT_FUNC;
+	arg->type = TEP_PRINT_FUNC;
 	arg->func.func = func;
 
 	*tok = NULL;
@@ -2953,12 +2955,12 @@ process_func_handler(struct event_format *event, struct tep_function_handler *fu
 		if (!farg) {
 			do_warning_event(event, "%s: not enough memory!",
 					 __func__);
-			return EVENT_ERROR;
+			return TEP_EVENT_ERROR;
 		}
 
 		type = process_arg(event, farg, &token);
 		if (i < (func->nr_args - 1)) {
-			if (type != EVENT_DELIM || strcmp(token, ",") != 0) {
+			if (type != TEP_EVENT_DELIM || strcmp(token, ",") != 0) {
 				do_warning_event(event,
 					"Error: function '%s()' expects %d arguments but event %s only uses %d",
 					func->name, func->nr_args,
@@ -2966,7 +2968,7 @@ process_func_handler(struct event_format *event, struct tep_function_handler *fu
 				goto err;
 			}
 		} else {
-			if (type != EVENT_DELIM || strcmp(token, ")") != 0) {
+			if (type != TEP_EVENT_DELIM || strcmp(token, ")") != 0) {
 				do_warning_event(event,
 					"Error: function '%s()' only expects %d arguments but event %s has more",
 					func->name, func->nr_args, event->name);
@@ -2987,11 +2989,11 @@ process_func_handler(struct event_format *event, struct tep_function_handler *fu
 err:
 	free_arg(farg);
 	free_token(token);
-	return EVENT_ERROR;
+	return TEP_EVENT_ERROR;
 }
 
-static enum event_type
-process_function(struct event_format *event, struct print_arg *arg,
+static enum tep_event_type
+process_function(struct tep_event_format *event, struct tep_print_arg *arg,
 		 char *token, char **tok)
 {
 	struct tep_function_handler *func;
@@ -3043,12 +3045,12 @@ process_function(struct event_format *event, struct print_arg *arg,
 
 	do_warning_event(event, "function %s not defined", token);
 	free_token(token);
-	return EVENT_ERROR;
+	return TEP_EVENT_ERROR;
 }
 
-static enum event_type
-process_arg_token(struct event_format *event, struct print_arg *arg,
-		  char **tok, enum event_type type)
+static enum tep_event_type
+process_arg_token(struct tep_event_format *event, struct tep_print_arg *arg,
+		  char **tok, enum tep_event_type type)
 {
 	char *token;
 	char *atom;
@@ -3056,7 +3058,7 @@ process_arg_token(struct event_format *event, struct print_arg *arg,
 	token = *tok;
 
 	switch (type) {
-	case EVENT_ITEM:
+	case TEP_EVENT_ITEM:
 		if (strcmp(token, "REC") == 0) {
 			free_token(token);
 			type = process_entry(event, arg, &token);
@@ -3070,7 +3072,7 @@ process_arg_token(struct event_format *event, struct print_arg *arg,
 		 * If the next token is a parenthesis, then this
 		 * is a function.
 		 */
-		if (type == EVENT_DELIM && strcmp(token, "(") == 0) {
+		if (type == TEP_EVENT_DELIM && strcmp(token, "(") == 0) {
 			free_token(token);
 			token = NULL;
 			/* this will free atom. */
@@ -3078,7 +3080,7 @@ process_arg_token(struct event_format *event, struct print_arg *arg,
 			break;
 		}
 		/* atoms can be more than one token long */
-		while (type == EVENT_ITEM) {
+		while (type == TEP_EVENT_ITEM) {
 			char *new_atom;
 			new_atom = realloc(atom,
 					   strlen(atom) + strlen(token) + 2);
@@ -3086,7 +3088,7 @@ process_arg_token(struct event_format *event, struct print_arg *arg,
 				free(atom);
 				*tok = NULL;
 				free_token(token);
-				return EVENT_ERROR;
+				return TEP_EVENT_ERROR;
 			}
 			atom = new_atom;
 			strcat(atom, " ");
@@ -3095,55 +3097,55 @@ process_arg_token(struct event_format *event, struct print_arg *arg,
 			type = read_token_item(&token);
 		}
 
-		arg->type = PRINT_ATOM;
+		arg->type = TEP_PRINT_ATOM;
 		arg->atom.atom = atom;
 		break;
 
-	case EVENT_DQUOTE:
-	case EVENT_SQUOTE:
-		arg->type = PRINT_ATOM;
+	case TEP_EVENT_DQUOTE:
+	case TEP_EVENT_SQUOTE:
+		arg->type = TEP_PRINT_ATOM;
 		arg->atom.atom = token;
 		type = read_token_item(&token);
 		break;
-	case EVENT_DELIM:
+	case TEP_EVENT_DELIM:
 		if (strcmp(token, "(") == 0) {
 			free_token(token);
 			type = process_paren(event, arg, &token);
 			break;
 		}
-	case EVENT_OP:
+	case TEP_EVENT_OP:
 		/* handle single ops */
-		arg->type = PRINT_OP;
+		arg->type = TEP_PRINT_OP;
 		arg->op.op = token;
 		arg->op.left = NULL;
 		type = process_op(event, arg, &token);
 
 		/* On error, the op is freed */
-		if (type == EVENT_ERROR)
+		if (type == TEP_EVENT_ERROR)
 			arg->op.op = NULL;
 
 		/* return error type if errored */
 		break;
 
-	case EVENT_ERROR ... EVENT_NEWLINE:
+	case TEP_EVENT_ERROR ... TEP_EVENT_NEWLINE:
 	default:
 		do_warning_event(event, "unexpected type %d", type);
-		return EVENT_ERROR;
+		return TEP_EVENT_ERROR;
 	}
 	*tok = token;
 
 	return type;
 }
 
-static int event_read_print_args(struct event_format *event, struct print_arg **list)
+static int event_read_print_args(struct tep_event_format *event, struct tep_print_arg **list)
 {
-	enum event_type type = EVENT_ERROR;
-	struct print_arg *arg;
+	enum tep_event_type type = TEP_EVENT_ERROR;
+	struct tep_print_arg *arg;
 	char *token;
 	int args = 0;
 
 	do {
-		if (type == EVENT_NEWLINE) {
+		if (type == TEP_EVENT_NEWLINE) {
 			type = read_token_item(&token);
 			continue;
 		}
@@ -3157,7 +3159,7 @@ static int event_read_print_args(struct event_format *event, struct print_arg **
 
 		type = process_arg(event, arg, &token);
 
-		if (type == EVENT_ERROR) {
+		if (type == TEP_EVENT_ERROR) {
 			free_token(token);
 			free_arg(arg);
 			return -1;
@@ -3166,10 +3168,10 @@ static int event_read_print_args(struct event_format *event, struct print_arg **
 		*list = arg;
 		args++;
 
-		if (type == EVENT_OP) {
+		if (type == TEP_EVENT_OP) {
 			type = process_op(event, arg, &token);
 			free_token(token);
-			if (type == EVENT_ERROR) {
+			if (type == TEP_EVENT_ERROR) {
 				*list = NULL;
 				free_arg(arg);
 				return -1;
@@ -3178,37 +3180,37 @@ static int event_read_print_args(struct event_format *event, struct print_arg **
 			continue;
 		}
 
-		if (type == EVENT_DELIM && strcmp(token, ",") == 0) {
+		if (type == TEP_EVENT_DELIM && strcmp(token, ",") == 0) {
 			free_token(token);
 			*list = arg;
 			list = &arg->next;
 			continue;
 		}
 		break;
-	} while (type != EVENT_NONE);
+	} while (type != TEP_EVENT_NONE);
 
-	if (type != EVENT_NONE && type != EVENT_ERROR)
+	if (type != TEP_EVENT_NONE && type != TEP_EVENT_ERROR)
 		free_token(token);
 
 	return args;
 }
 
-static int event_read_print(struct event_format *event)
+static int event_read_print(struct tep_event_format *event)
 {
-	enum event_type type;
+	enum tep_event_type type;
 	char *token;
 	int ret;
 
-	if (read_expected_item(EVENT_ITEM, "print") < 0)
+	if (read_expected_item(TEP_EVENT_ITEM, "print") < 0)
 		return -1;
 
-	if (read_expected(EVENT_ITEM, "fmt") < 0)
+	if (read_expected(TEP_EVENT_ITEM, "fmt") < 0)
 		return -1;
 
-	if (read_expected(EVENT_OP, ":") < 0)
+	if (read_expected(TEP_EVENT_OP, ":") < 0)
 		return -1;
 
-	if (read_expect_type(EVENT_DQUOTE, &token) < 0)
+	if (read_expect_type(TEP_EVENT_DQUOTE, &token) < 0)
 		goto fail;
 
  concat:
@@ -3218,11 +3220,11 @@ static int event_read_print(struct event_format *event)
 	/* ok to have no arg */
 	type = read_token_item(&token);
 
-	if (type == EVENT_NONE)
+	if (type == TEP_EVENT_NONE)
 		return 0;
 
 	/* Handle concatenation of print lines */
-	if (type == EVENT_DQUOTE) {
+	if (type == TEP_EVENT_DQUOTE) {
 		char *cat;
 
 		if (asprintf(&cat, "%s%s", event->print_fmt.format, token) < 0)
@@ -3234,7 +3236,7 @@ static int event_read_print(struct event_format *event)
 		goto concat;
 	}
 			     
-	if (test_type_token(type, token, EVENT_DELIM, ","))
+	if (test_type_token(type, token, TEP_EVENT_DELIM, ","))
 		goto fail;
 
 	free_token(token);
@@ -3258,10 +3260,10 @@ static int event_read_print(struct event_format *event)
  * Returns a common field from the event by the given @name.
  * This only searchs the common fields and not all field.
  */
-struct format_field *
-tep_find_common_field(struct event_format *event, const char *name)
+struct tep_format_field *
+tep_find_common_field(struct tep_event_format *event, const char *name)
 {
-	struct format_field *format;
+	struct tep_format_field *format;
 
 	for (format = event->format.common_fields;
 	     format; format = format->next) {
@@ -3280,10 +3282,10 @@ tep_find_common_field(struct event_format *event, const char *name)
  * Returns a non-common field by the given @name.
  * This does not search common fields.
  */
-struct format_field *
-tep_find_field(struct event_format *event, const char *name)
+struct tep_format_field *
+tep_find_field(struct tep_event_format *event, const char *name)
 {
-	struct format_field *format;
+	struct tep_format_field *format;
 
 	for (format = event->format.fields;
 	     format; format = format->next) {
@@ -3303,10 +3305,10 @@ tep_find_field(struct event_format *event, const char *name)
  * This searchs the common field names first, then
  * the non-common ones if a common one was not found.
  */
-struct format_field *
-tep_find_any_field(struct event_format *event, const char *name)
+struct tep_format_field *
+tep_find_any_field(struct tep_event_format *event, const char *name)
 {
-	struct format_field *format;
+	struct tep_format_field *format;
 
 	format = tep_find_common_field(event, name);
 	if (format)
@@ -3330,11 +3332,11 @@ unsigned long long tep_read_number(struct tep_handle *pevent,
 	case 1:
 		return *(unsigned char *)ptr;
 	case 2:
-		return data2host2(pevent, ptr);
+		return tep_data2host2(pevent, ptr);
 	case 4:
-		return data2host4(pevent, ptr);
+		return tep_data2host4(pevent, ptr);
 	case 8:
-		return data2host8(pevent, ptr);
+		return tep_data2host8(pevent, ptr);
 	default:
 		/* BUG! */
 		return 0;
@@ -3352,7 +3354,7 @@ unsigned long long tep_read_number(struct tep_handle *pevent,
  *
  * Returns 0 on success, -1 otherwise.
  */
-int tep_read_number_field(struct format_field *field, const void *data,
+int tep_read_number_field(struct tep_format_field *field, const void *data,
 			  unsigned long long *value)
 {
 	if (!field)
@@ -3373,8 +3375,8 @@ int tep_read_number_field(struct format_field *field, const void *data,
 static int get_common_info(struct tep_handle *pevent,
 			   const char *type, int *offset, int *size)
 {
-	struct event_format *event;
-	struct format_field *field;
+	struct tep_event_format *event;
+	struct tep_format_field *field;
 
 	/*
 	 * All events should have the same common elements.
@@ -3460,11 +3462,11 @@ static int events_id_cmp(const void *a, const void *b);
  *
  * Returns an event that has a given @id.
  */
-struct event_format *tep_find_event(struct tep_handle *pevent, int id)
+struct tep_event_format *tep_find_event(struct tep_handle *pevent, int id)
 {
-	struct event_format **eventptr;
-	struct event_format key;
-	struct event_format *pkey = &key;
+	struct tep_event_format **eventptr;
+	struct tep_event_format key;
+	struct tep_event_format *pkey = &key;
 
 	/* Check cache first */
 	if (pevent->last_event && pevent->last_event->id == id)
@@ -3492,11 +3494,11 @@ struct event_format *tep_find_event(struct tep_handle *pevent, int id)
  * This returns an event with a given @name and under the system
  * @sys. If @sys is NULL the first event with @name is returned.
  */
-struct event_format *
+struct tep_event_format *
 tep_find_event_by_name(struct tep_handle *pevent,
 		       const char *sys, const char *name)
 {
-	struct event_format *event;
+	struct tep_event_format *event;
 	int i;
 
 	if (pevent->last_event &&
@@ -3521,23 +3523,23 @@ tep_find_event_by_name(struct tep_handle *pevent,
 }
 
 static unsigned long long
-eval_num_arg(void *data, int size, struct event_format *event, struct print_arg *arg)
+eval_num_arg(void *data, int size, struct tep_event_format *event, struct tep_print_arg *arg)
 {
 	struct tep_handle *pevent = event->pevent;
 	unsigned long long val = 0;
 	unsigned long long left, right;
-	struct print_arg *typearg = NULL;
-	struct print_arg *larg;
+	struct tep_print_arg *typearg = NULL;
+	struct tep_print_arg *larg;
 	unsigned long offset;
 	unsigned int field_size;
 
 	switch (arg->type) {
-	case PRINT_NULL:
+	case TEP_PRINT_NULL:
 		/* ?? */
 		return 0;
-	case PRINT_ATOM:
+	case TEP_PRINT_ATOM:
 		return strtoull(arg->atom.atom, NULL, 0);
-	case PRINT_FIELD:
+	case TEP_PRINT_FIELD:
 		if (!arg->field.field) {
 			arg->field.field = tep_find_any_field(event, arg->field.name);
 			if (!arg->field.field)
@@ -3548,27 +3550,27 @@ eval_num_arg(void *data, int size, struct event_format *event, struct print_arg
 		val = tep_read_number(pevent, data + arg->field.field->offset,
 				      arg->field.field->size);
 		break;
-	case PRINT_FLAGS:
-	case PRINT_SYMBOL:
-	case PRINT_INT_ARRAY:
-	case PRINT_HEX:
-	case PRINT_HEX_STR:
+	case TEP_PRINT_FLAGS:
+	case TEP_PRINT_SYMBOL:
+	case TEP_PRINT_INT_ARRAY:
+	case TEP_PRINT_HEX:
+	case TEP_PRINT_HEX_STR:
 		break;
-	case PRINT_TYPE:
+	case TEP_PRINT_TYPE:
 		val = eval_num_arg(data, size, event, arg->typecast.item);
 		return eval_type(val, arg, 0);
-	case PRINT_STRING:
-	case PRINT_BSTRING:
-	case PRINT_BITMASK:
+	case TEP_PRINT_STRING:
+	case TEP_PRINT_BSTRING:
+	case TEP_PRINT_BITMASK:
 		return 0;
-	case PRINT_FUNC: {
+	case TEP_PRINT_FUNC: {
 		struct trace_seq s;
 		trace_seq_init(&s);
 		val = process_defined_func(&s, data, size, event, arg);
 		trace_seq_destroy(&s);
 		return val;
 	}
-	case PRINT_OP:
+	case TEP_PRINT_OP:
 		if (strcmp(arg->op.op, "[") == 0) {
 			/*
 			 * Arrays are special, since we don't want
@@ -3578,7 +3580,7 @@ eval_num_arg(void *data, int size, struct event_format *event, struct print_arg
 
 			/* handle typecasts */
 			larg = arg->op.left;
-			while (larg->type == PRINT_TYPE) {
+			while (larg->type == TEP_PRINT_TYPE) {
 				if (!typearg)
 					typearg = larg;
 				larg = larg->typecast.item;
@@ -3588,7 +3590,7 @@ eval_num_arg(void *data, int size, struct event_format *event, struct print_arg
 			field_size = pevent->long_size;
 
 			switch (larg->type) {
-			case PRINT_DYNAMIC_ARRAY:
+			case TEP_PRINT_DYNAMIC_ARRAY:
 				offset = tep_read_number(pevent,
 						   data + larg->dynarray.field->offset,
 						   larg->dynarray.field->size);
@@ -3602,7 +3604,7 @@ eval_num_arg(void *data, int size, struct event_format *event, struct print_arg
 				offset &= 0xffff;
 				offset += right;
 				break;
-			case PRINT_FIELD:
+			case TEP_PRINT_FIELD:
 				if (!larg->field.field) {
 					larg->field.field =
 						tep_find_any_field(event, larg->field.name);
@@ -3718,7 +3720,7 @@ eval_num_arg(void *data, int size, struct event_format *event, struct print_arg
 			goto out_warning_op;
 		}
 		break;
-	case PRINT_DYNAMIC_ARRAY_LEN:
+	case TEP_PRINT_DYNAMIC_ARRAY_LEN:
 		offset = tep_read_number(pevent,
 					 data + arg->dynarray.field->offset,
 					 arg->dynarray.field->size);
@@ -3729,7 +3731,7 @@ eval_num_arg(void *data, int size, struct event_format *event, struct print_arg
 		 */
 		val = (unsigned long long)(offset >> 16);
 		break;
-	case PRINT_DYNAMIC_ARRAY:
+	case TEP_PRINT_DYNAMIC_ARRAY:
 		/* Without [], we pass the address to the dynamic data */
 		offset = tep_read_number(pevent,
 					 data + arg->dynarray.field->offset,
@@ -3861,12 +3863,12 @@ static void print_bitmask_to_seq(struct tep_handle *pevent,
 }
 
 static void print_str_arg(struct trace_seq *s, void *data, int size,
-			  struct event_format *event, const char *format,
-			  int len_arg, struct print_arg *arg)
+			  struct tep_event_format *event, const char *format,
+			  int len_arg, struct tep_print_arg *arg)
 {
 	struct tep_handle *pevent = event->pevent;
-	struct print_flag_sym *flag;
-	struct format_field *field;
+	struct tep_print_flag_sym *flag;
+	struct tep_format_field *field;
 	struct printk_map *printk;
 	long long val, fval;
 	unsigned long long addr;
@@ -3876,13 +3878,13 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
 	int i, len;
 
 	switch (arg->type) {
-	case PRINT_NULL:
+	case TEP_PRINT_NULL:
 		/* ?? */
 		return;
-	case PRINT_ATOM:
+	case TEP_PRINT_ATOM:
 		print_str_to_seq(s, format, len_arg, arg->atom.atom);
 		return;
-	case PRINT_FIELD:
+	case TEP_PRINT_FIELD:
 		field = arg->field.field;
 		if (!field) {
 			field = tep_find_any_field(event, arg->field.name);
@@ -3900,7 +3902,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
 		 * and the size is the same as long_size, assume that it
 		 * is a pointer.
 		 */
-		if (!(field->flags & FIELD_IS_ARRAY) &&
+		if (!(field->flags & TEP_FIELD_IS_ARRAY) &&
 		    field->size == pevent->long_size) {
 
 			/* Handle heterogeneous recording and processing
@@ -3939,7 +3941,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
 		print_str_to_seq(s, format, len_arg, str);
 		free(str);
 		break;
-	case PRINT_FLAGS:
+	case TEP_PRINT_FLAGS:
 		val = eval_num_arg(data, size, event, arg->flags.field);
 		print = 0;
 		for (flag = arg->flags.flags; flag; flag = flag->next) {
@@ -3962,7 +3964,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
 			trace_seq_printf(s, "0x%llx", val);
 		}
 		break;
-	case PRINT_SYMBOL:
+	case TEP_PRINT_SYMBOL:
 		val = eval_num_arg(data, size, event, arg->symbol.field);
 		for (flag = arg->symbol.symbols; flag; flag = flag->next) {
 			fval = eval_flag(flag->value);
@@ -3974,9 +3976,9 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
 		if (!flag)
 			trace_seq_printf(s, "0x%llx", val);
 		break;
-	case PRINT_HEX:
-	case PRINT_HEX_STR:
-		if (arg->hex.field->type == PRINT_DYNAMIC_ARRAY) {
+	case TEP_PRINT_HEX:
+	case TEP_PRINT_HEX_STR:
+		if (arg->hex.field->type == TEP_PRINT_DYNAMIC_ARRAY) {
 			unsigned long offset;
 			offset = tep_read_number(pevent,
 				data + arg->hex.field->dynarray.field->offset,
@@ -3995,19 +3997,19 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
 		}
 		len = eval_num_arg(data, size, event, arg->hex.size);
 		for (i = 0; i < len; i++) {
-			if (i && arg->type == PRINT_HEX)
+			if (i && arg->type == TEP_PRINT_HEX)
 				trace_seq_putc(s, ' ');
 			trace_seq_printf(s, "%02x", hex[i]);
 		}
 		break;
 
-	case PRINT_INT_ARRAY: {
+	case TEP_PRINT_INT_ARRAY: {
 		void *num;
 		int el_size;
 
-		if (arg->int_array.field->type == PRINT_DYNAMIC_ARRAY) {
+		if (arg->int_array.field->type == TEP_PRINT_DYNAMIC_ARRAY) {
 			unsigned long offset;
-			struct format_field *field =
+			struct tep_format_field *field =
 				arg->int_array.field->dynarray.field;
 			offset = tep_read_number(pevent,
 						 data + field->offset,
@@ -4049,43 +4051,43 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
 		}
 		break;
 	}
-	case PRINT_TYPE:
+	case TEP_PRINT_TYPE:
 		break;
-	case PRINT_STRING: {
+	case TEP_PRINT_STRING: {
 		int str_offset;
 
 		if (arg->string.offset == -1) {
-			struct format_field *f;
+			struct tep_format_field *f;
 
 			f = tep_find_any_field(event, arg->string.string);
 			arg->string.offset = f->offset;
 		}
-		str_offset = data2host4(pevent, data + arg->string.offset);
+		str_offset = tep_data2host4(pevent, data + arg->string.offset);
 		str_offset &= 0xffff;
 		print_str_to_seq(s, format, len_arg, ((char *)data) + str_offset);
 		break;
 	}
-	case PRINT_BSTRING:
+	case TEP_PRINT_BSTRING:
 		print_str_to_seq(s, format, len_arg, arg->string.string);
 		break;
-	case PRINT_BITMASK: {
+	case TEP_PRINT_BITMASK: {
 		int bitmask_offset;
 		int bitmask_size;
 
 		if (arg->bitmask.offset == -1) {
-			struct format_field *f;
+			struct tep_format_field *f;
 
 			f = tep_find_any_field(event, arg->bitmask.bitmask);
 			arg->bitmask.offset = f->offset;
 		}
-		bitmask_offset = data2host4(pevent, data + arg->bitmask.offset);
+		bitmask_offset = tep_data2host4(pevent, data + arg->bitmask.offset);
 		bitmask_size = bitmask_offset >> 16;
 		bitmask_offset &= 0xffff;
 		print_bitmask_to_seq(pevent, s, format, len_arg,
 				     data + bitmask_offset, bitmask_size);
 		break;
 	}
-	case PRINT_OP:
+	case TEP_PRINT_OP:
 		/*
 		 * The only op for string should be ? :
 		 */
@@ -4099,7 +4101,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
 			print_str_arg(s, data, size, event,
 				      format, len_arg, arg->op.right->op.right);
 		break;
-	case PRINT_FUNC:
+	case TEP_PRINT_FUNC:
 		process_defined_func(s, data, size, event, arg);
 		break;
 	default:
@@ -4116,13 +4118,13 @@ out_warning_field:
 
 static unsigned long long
 process_defined_func(struct trace_seq *s, void *data, int size,
-		     struct event_format *event, struct print_arg *arg)
+		     struct tep_event_format *event, struct tep_print_arg *arg)
 {
 	struct tep_function_handler *func_handle = arg->func.func;
 	struct func_params *param;
 	unsigned long long *args;
 	unsigned long long ret;
-	struct print_arg *farg;
+	struct tep_print_arg *farg;
 	struct trace_seq str;
 	struct save_str {
 		struct save_str *next;
@@ -4199,9 +4201,9 @@ out_free:
 	return ret;
 }
 
-static void free_args(struct print_arg *args)
+static void free_args(struct tep_print_arg *args)
 {
-	struct print_arg *next;
+	struct tep_print_arg *next;
 
 	while (args) {
 		next = args->next;
@@ -4211,11 +4213,11 @@ static void free_args(struct print_arg *args)
 	}
 }
 
-static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struct event_format *event)
+static struct tep_print_arg *make_bprint_args(char *fmt, void *data, int size, struct tep_event_format *event)
 {
 	struct tep_handle *pevent = event->pevent;
-	struct format_field *field, *ip_field;
-	struct print_arg *args, *arg, **next;
+	struct tep_format_field *field, *ip_field;
+	struct tep_print_arg *args, *arg, **next;
 	unsigned long long ip, val;
 	char *ptr;
 	void *bptr;
@@ -4254,7 +4256,7 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc
 	arg->next = NULL;
 	next = &arg->next;
 
-	arg->type = PRINT_ATOM;
+	arg->type = TEP_PRINT_ATOM;
 		
 	if (asprintf(&arg->atom.atom, "%lld", ip) < 0)
 		goto out_free;
@@ -4342,7 +4344,7 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc
 					goto out_free;
 				}
 				arg->next = NULL;
-				arg->type = PRINT_ATOM;
+				arg->type = TEP_PRINT_ATOM;
 				if (asprintf(&arg->atom.atom, "%lld", val) < 0) {
 					free(arg);
 					goto out_free;
@@ -4366,7 +4368,7 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc
 					goto out_free;
 				}
 				arg->next = NULL;
-				arg->type = PRINT_BSTRING;
+				arg->type = TEP_PRINT_BSTRING;
 				arg->string.string = strdup(bptr);
 				if (!arg->string.string)
 					goto out_free;
@@ -4388,11 +4390,11 @@ out_free:
 
 static char *
 get_bprint_format(void *data, int size __maybe_unused,
-		  struct event_format *event)
+		  struct tep_event_format *event)
 {
 	struct tep_handle *pevent = event->pevent;
 	unsigned long long addr;
-	struct format_field *field;
+	struct tep_format_field *field;
 	struct printk_map *printk;
 	char *format;
 
@@ -4423,17 +4425,17 @@ get_bprint_format(void *data, int size __maybe_unused,
 }
 
 static void print_mac_arg(struct trace_seq *s, int mac, void *data, int size,
-			  struct event_format *event, struct print_arg *arg)
+			  struct tep_event_format *event, struct tep_print_arg *arg)
 {
 	unsigned char *buf;
 	const char *fmt = "%.2x:%.2x:%.2x:%.2x:%.2x:%.2x";
 
-	if (arg->type == PRINT_FUNC) {
+	if (arg->type == TEP_PRINT_FUNC) {
 		process_defined_func(s, data, size, event, arg);
 		return;
 	}
 
-	if (arg->type != PRINT_FIELD) {
+	if (arg->type != TEP_PRINT_FIELD) {
 		trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d",
 				 arg->type);
 		return;
@@ -4576,17 +4578,17 @@ static void print_ip6_addr(struct trace_seq *s, char i, unsigned char *buf)
  * %pISpc print an IP address based on sockaddr; p adds port.
  */
 static int print_ipv4_arg(struct trace_seq *s, const char *ptr, char i,
-			  void *data, int size, struct event_format *event,
-			  struct print_arg *arg)
+			  void *data, int size, struct tep_event_format *event,
+			  struct tep_print_arg *arg)
 {
 	unsigned char *buf;
 
-	if (arg->type == PRINT_FUNC) {
+	if (arg->type == TEP_PRINT_FUNC) {
 		process_defined_func(s, data, size, event, arg);
 		return 0;
 	}
 
-	if (arg->type != PRINT_FIELD) {
+	if (arg->type != TEP_PRINT_FIELD) {
 		trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type);
 		return 0;
 	}
@@ -4613,8 +4615,8 @@ static int print_ipv4_arg(struct trace_seq *s, const char *ptr, char i,
 }
 
 static int print_ipv6_arg(struct trace_seq *s, const char *ptr, char i,
-			  void *data, int size, struct event_format *event,
-			  struct print_arg *arg)
+			  void *data, int size, struct tep_event_format *event,
+			  struct tep_print_arg *arg)
 {
 	char have_c = 0;
 	unsigned char *buf;
@@ -4627,12 +4629,12 @@ static int print_ipv6_arg(struct trace_seq *s, const char *ptr, char i,
 		rc++;
 	}
 
-	if (arg->type == PRINT_FUNC) {
+	if (arg->type == TEP_PRINT_FUNC) {
 		process_defined_func(s, data, size, event, arg);
 		return rc;
 	}
 
-	if (arg->type != PRINT_FIELD) {
+	if (arg->type != TEP_PRINT_FIELD) {
 		trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type);
 		return rc;
 	}
@@ -4663,8 +4665,8 @@ static int print_ipv6_arg(struct trace_seq *s, const char *ptr, char i,
 }
 
 static int print_ipsa_arg(struct trace_seq *s, const char *ptr, char i,
-			  void *data, int size, struct event_format *event,
-			  struct print_arg *arg)
+			  void *data, int size, struct tep_event_format *event,
+			  struct tep_print_arg *arg)
 {
 	char have_c = 0, have_p = 0;
 	unsigned char *buf;
@@ -4685,12 +4687,12 @@ static int print_ipsa_arg(struct trace_seq *s, const char *ptr, char i,
 		}
 	}
 
-	if (arg->type == PRINT_FUNC) {
+	if (arg->type == TEP_PRINT_FUNC) {
 		process_defined_func(s, data, size, event, arg);
 		return rc;
 	}
 
-	if (arg->type != PRINT_FIELD) {
+	if (arg->type != TEP_PRINT_FIELD) {
 		trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type);
 		return rc;
 	}
@@ -4745,8 +4747,8 @@ static int print_ipsa_arg(struct trace_seq *s, const char *ptr, char i,
 }
 
 static int print_ip_arg(struct trace_seq *s, const char *ptr,
-			void *data, int size, struct event_format *event,
-			struct print_arg *arg)
+			void *data, int size, struct tep_event_format *event,
+			struct tep_print_arg *arg)
 {
 	char i = *ptr;  /* 'i' or 'I' */
 	char ver;
@@ -4787,22 +4789,22 @@ static int is_printable_array(char *p, unsigned int len)
 }
 
 void tep_print_field(struct trace_seq *s, void *data,
-		     struct format_field *field)
+		     struct tep_format_field *field)
 {
 	unsigned long long val;
 	unsigned int offset, len, i;
 	struct tep_handle *pevent = field->event->pevent;
 
-	if (field->flags & FIELD_IS_ARRAY) {
+	if (field->flags & TEP_FIELD_IS_ARRAY) {
 		offset = field->offset;
 		len = field->size;
-		if (field->flags & FIELD_IS_DYNAMIC) {
+		if (field->flags & TEP_FIELD_IS_DYNAMIC) {
 			val = tep_read_number(pevent, data + offset, len);
 			offset = val;
 			len = offset >> 16;
 			offset &= 0xffff;
 		}
-		if (field->flags & FIELD_IS_STRING &&
+		if (field->flags & TEP_FIELD_IS_STRING &&
 		    is_printable_array(data + offset, len)) {
 			trace_seq_printf(s, "%s", (char *)data + offset);
 		} else {
@@ -4814,21 +4816,21 @@ void tep_print_field(struct trace_seq *s, void *data,
 						 *((unsigned char *)data + offset + i));
 			}
 			trace_seq_putc(s, ']');
-			field->flags &= ~FIELD_IS_STRING;
+			field->flags &= ~TEP_FIELD_IS_STRING;
 		}
 	} else {
 		val = tep_read_number(pevent, data + field->offset,
 				      field->size);
-		if (field->flags & FIELD_IS_POINTER) {
+		if (field->flags & TEP_FIELD_IS_POINTER) {
 			trace_seq_printf(s, "0x%llx", val);
-		} else if (field->flags & FIELD_IS_SIGNED) {
+		} else if (field->flags & TEP_FIELD_IS_SIGNED) {
 			switch (field->size) {
 			case 4:
 				/*
 				 * If field is long then print it in hex.
 				 * A long usually stores pointers.
 				 */
-				if (field->flags & FIELD_IS_LONG)
+				if (field->flags & TEP_FIELD_IS_LONG)
 					trace_seq_printf(s, "0x%x", (int)val);
 				else
 					trace_seq_printf(s, "%d", (int)val);
@@ -4843,7 +4845,7 @@ void tep_print_field(struct trace_seq *s, void *data,
 				trace_seq_printf(s, "%lld", val);
 			}
 		} else {
-			if (field->flags & FIELD_IS_LONG)
+			if (field->flags & TEP_FIELD_IS_LONG)
 				trace_seq_printf(s, "0x%llx", val);
 			else
 				trace_seq_printf(s, "%llu", val);
@@ -4852,9 +4854,9 @@ void tep_print_field(struct trace_seq *s, void *data,
 }
 
 void tep_print_fields(struct trace_seq *s, void *data,
-		      int size __maybe_unused, struct event_format *event)
+		      int size __maybe_unused, struct tep_event_format *event)
 {
-	struct format_field *field;
+	struct tep_format_field *field;
 
 	field = event->format.fields;
 	while (field) {
@@ -4864,12 +4866,12 @@ void tep_print_fields(struct trace_seq *s, void *data,
 	}
 }
 
-static void pretty_print(struct trace_seq *s, void *data, int size, struct event_format *event)
+static void pretty_print(struct trace_seq *s, void *data, int size, struct tep_event_format *event)
 {
 	struct tep_handle *pevent = event->pevent;
-	struct print_fmt *print_fmt = &event->print_fmt;
-	struct print_arg *arg = print_fmt->args;
-	struct print_arg *args = NULL;
+	struct tep_print_fmt *print_fmt = &event->print_fmt;
+	struct tep_print_arg *arg = print_fmt->args;
+	struct tep_print_arg *args = NULL;
 	const char *ptr = print_fmt->format;
 	unsigned long long val;
 	struct func_map *func;
@@ -4883,13 +4885,13 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event
 	int len;
 	int ls;
 
-	if (event->flags & EVENT_FL_FAILED) {
+	if (event->flags & TEP_EVENT_FL_FAILED) {
 		trace_seq_printf(s, "[FAILED TO PARSE]");
 		tep_print_fields(s, data, size, event);
 		return;
 	}
 
-	if (event->flags & EVENT_FL_ISBPRINT) {
+	if (event->flags & TEP_EVENT_FL_ISBPRINT) {
 		bprint_fmt = get_bprint_format(data, size, event);
 		args = make_bprint_args(bprint_fmt, data, size, event);
 		arg = args;
@@ -4944,7 +4946,7 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event
 				/* The argument is the length. */
 				if (!arg) {
 					do_warning_event(event, "no argument match");
-					event->flags |= EVENT_FL_FAILED;
+					event->flags |= TEP_EVENT_FL_FAILED;
 					goto out_failed;
 				}
 				len_arg = eval_num_arg(data, size, event, arg);
@@ -4966,7 +4968,7 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event
 				if (isalnum(ptr[1]))
 					ptr++;
 
-				if (arg->type == PRINT_BSTRING) {
+				if (arg->type == TEP_PRINT_BSTRING) {
 					trace_seq_puts(s, arg->string.string);
 					break;
 				}
@@ -4997,7 +4999,7 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event
 			case 'u':
 				if (!arg) {
 					do_warning_event(event, "no argument match");
-					event->flags |= EVENT_FL_FAILED;
+					event->flags |= TEP_EVENT_FL_FAILED;
 					goto out_failed;
 				}
 
@@ -5007,7 +5009,7 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event
 				/* should never happen */
 				if (len > 31) {
 					do_warning_event(event, "bad format!");
-					event->flags |= EVENT_FL_FAILED;
+					event->flags |= TEP_EVENT_FL_FAILED;
 					len = 31;
 				}
 
@@ -5073,13 +5075,13 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event
 					break;
 				default:
 					do_warning_event(event, "bad count (%d)", ls);
-					event->flags |= EVENT_FL_FAILED;
+					event->flags |= TEP_EVENT_FL_FAILED;
 				}
 				break;
 			case 's':
 				if (!arg) {
 					do_warning_event(event, "no matching argument");
-					event->flags |= EVENT_FL_FAILED;
+					event->flags |= TEP_EVENT_FL_FAILED;
 					goto out_failed;
 				}
 
@@ -5089,7 +5091,7 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event
 				/* should never happen */
 				if (len > 31) {
 					do_warning_event(event, "bad format!");
-					event->flags |= EVENT_FL_FAILED;
+					event->flags |= TEP_EVENT_FL_FAILED;
 					len = 31;
 				}
 
@@ -5114,7 +5116,7 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event
 			trace_seq_putc(s, *ptr);
 	}
 
-	if (event->flags & EVENT_FL_FAILED) {
+	if (event->flags & TEP_EVENT_FL_FAILED) {
 out_failed:
 		trace_seq_printf(s, "[FAILED TO PARSE]");
 	}
@@ -5227,7 +5229,7 @@ int tep_data_type(struct tep_handle *pevent, struct tep_record *rec)
  *
  * This returns the event form a given @type;
  */
-struct event_format *tep_data_event_from_type(struct tep_handle *pevent, int type)
+struct tep_event_format *tep_data_event_from_type(struct tep_handle *pevent, int type)
 {
 	return tep_find_event(pevent, type);
 }
@@ -5385,16 +5387,16 @@ int tep_cmdline_pid(struct tep_handle *pevent, struct cmdline *cmdline)
  * This parses the raw @data using the given @event information and
  * writes the print format into the trace_seq.
  */
-void tep_event_info(struct trace_seq *s, struct event_format *event,
+void tep_event_info(struct trace_seq *s, struct tep_event_format *event,
 		    struct tep_record *record)
 {
 	int print_pretty = 1;
 
-	if (event->pevent->print_raw || (event->flags & EVENT_FL_PRINTRAW))
+	if (event->pevent->print_raw || (event->flags & TEP_EVENT_FL_PRINTRAW))
 		tep_print_fields(s, record->data, record->size, event);
 	else {
 
-		if (event->handler && !(event->flags & EVENT_FL_NOHANDLE))
+		if (event->handler && !(event->flags & TEP_EVENT_FL_NOHANDLE))
 			print_pretty = event->handler(s, record, event,
 						      event->context);
 
@@ -5426,7 +5428,7 @@ static bool is_timestamp_in_us(char *trace_clock, bool use_trace_clock)
  * Returns the associated event for a given record, or NULL if non is
  * is found.
  */
-struct event_format *
+struct tep_event_format *
 tep_find_event_by_record(struct tep_handle *pevent, struct tep_record *record)
 {
 	int type;
@@ -5451,7 +5453,7 @@ tep_find_event_by_record(struct tep_handle *pevent, struct tep_record *record)
  * Writes the tasks comm, pid and CPU to @s.
  */
 void tep_print_event_task(struct tep_handle *pevent, struct trace_seq *s,
-			  struct event_format *event,
+			  struct tep_event_format *event,
 			  struct tep_record *record)
 {
 	void *data = record->data;
@@ -5479,7 +5481,7 @@ void tep_print_event_task(struct tep_handle *pevent, struct trace_seq *s,
  * Writes the timestamp of the record into @s.
  */
 void tep_print_event_time(struct tep_handle *pevent, struct trace_seq *s,
-			  struct event_format *event,
+			  struct tep_event_format *event,
 			  struct tep_record *record,
 			  bool use_trace_clock)
 {
@@ -5529,7 +5531,7 @@ void tep_print_event_time(struct tep_handle *pevent, struct trace_seq *s,
  * Writes the parsing of the record's data to @s.
  */
 void tep_print_event_data(struct tep_handle *pevent, struct trace_seq *s,
-			  struct event_format *event,
+			  struct tep_event_format *event,
 			  struct tep_record *record)
 {
 	static const char *spaces = "                    "; /* 20 spaces */
@@ -5548,7 +5550,7 @@ void tep_print_event_data(struct tep_handle *pevent, struct trace_seq *s,
 void tep_print_event(struct tep_handle *pevent, struct trace_seq *s,
 		     struct tep_record *record, bool use_trace_clock)
 {
-	struct event_format *event;
+	struct tep_event_format *event;
 
 	event = tep_find_event_by_record(pevent, record);
 	if (!event) {
@@ -5570,8 +5572,8 @@ void tep_print_event(struct tep_handle *pevent, struct trace_seq *s,
 
 static int events_id_cmp(const void *a, const void *b)
 {
-	struct event_format * const * ea = a;
-	struct event_format * const * eb = b;
+	struct tep_event_format * const * ea = a;
+	struct tep_event_format * const * eb = b;
 
 	if ((*ea)->id < (*eb)->id)
 		return -1;
@@ -5584,8 +5586,8 @@ static int events_id_cmp(const void *a, const void *b)
 
 static int events_name_cmp(const void *a, const void *b)
 {
-	struct event_format * const * ea = a;
-	struct event_format * const * eb = b;
+	struct tep_event_format * const * ea = a;
+	struct tep_event_format * const * eb = b;
 	int res;
 
 	res = strcmp((*ea)->name, (*eb)->name);
@@ -5601,8 +5603,8 @@ static int events_name_cmp(const void *a, const void *b)
 
 static int events_system_cmp(const void *a, const void *b)
 {
-	struct event_format * const * ea = a;
-	struct event_format * const * eb = b;
+	struct tep_event_format * const * ea = a;
+	struct tep_event_format * const * eb = b;
 	int res;
 
 	res = strcmp((*ea)->system, (*eb)->system);
@@ -5616,9 +5618,9 @@ static int events_system_cmp(const void *a, const void *b)
 	return events_id_cmp(a, b);
 }
 
-struct event_format **tep_list_events(struct tep_handle *pevent, enum event_sort_type sort_type)
+struct tep_event_format **tep_list_events(struct tep_handle *pevent, enum tep_event_sort_type sort_type)
 {
-	struct event_format **events;
+	struct tep_event_format **events;
 	int (*sort)(const void *a, const void *b);
 
 	events = pevent->sort_events;
@@ -5637,20 +5639,20 @@ struct event_format **tep_list_events(struct tep_handle *pevent, enum event_sort
 		pevent->sort_events = events;
 
 		/* the internal events are sorted by id */
-		if (sort_type == EVENT_SORT_ID) {
+		if (sort_type == TEP_EVENT_SORT_ID) {
 			pevent->last_type = sort_type;
 			return events;
 		}
 	}
 
 	switch (sort_type) {
-	case EVENT_SORT_ID:
+	case TEP_EVENT_SORT_ID:
 		sort = events_id_cmp;
 		break;
-	case EVENT_SORT_NAME:
+	case TEP_EVENT_SORT_NAME:
 		sort = events_name_cmp;
 		break;
-	case EVENT_SORT_SYSTEM:
+	case TEP_EVENT_SORT_SYSTEM:
 		sort = events_system_cmp;
 		break;
 	default:
@@ -5663,12 +5665,12 @@ struct event_format **tep_list_events(struct tep_handle *pevent, enum event_sort
 	return events;
 }
 
-static struct format_field **
+static struct tep_format_field **
 get_event_fields(const char *type, const char *name,
-		 int count, struct format_field *list)
+		 int count, struct tep_format_field *list)
 {
-	struct format_field **fields;
-	struct format_field *field;
+	struct tep_format_field **fields;
+	struct tep_format_field *field;
 	int i = 0;
 
 	fields = malloc(sizeof(*fields) * (count + 1));
@@ -5701,7 +5703,7 @@ get_event_fields(const char *type, const char *name,
  * Returns an allocated array of fields. The last item in the array is NULL.
  * The array must be freed with free().
  */
-struct format_field **tep_event_common_fields(struct event_format *event)
+struct tep_format_field **tep_event_common_fields(struct tep_event_format *event)
 {
 	return get_event_fields("common", event->name,
 				event->format.nr_common,
@@ -5715,14 +5717,14 @@ struct format_field **tep_event_common_fields(struct event_format *event)
  * Returns an allocated array of fields. The last item in the array is NULL.
  * The array must be freed with free().
  */
-struct format_field **tep_event_fields(struct event_format *event)
+struct tep_format_field **tep_event_fields(struct tep_event_format *event)
 {
 	return get_event_fields("event", event->name,
 				event->format.nr_fields,
 				event->format.fields);
 }
 
-static void print_fields(struct trace_seq *s, struct print_flag_sym *field)
+static void print_fields(struct trace_seq *s, struct tep_print_flag_sym *field)
 {
 	trace_seq_printf(s, "{ %s, %s }", field->value, field->str);
 	if (field->next) {
@@ -5732,22 +5734,22 @@ static void print_fields(struct trace_seq *s, struct print_flag_sym *field)
 }
 
 /* for debugging */
-static void print_args(struct print_arg *args)
+static void print_args(struct tep_print_arg *args)
 {
 	int print_paren = 1;
 	struct trace_seq s;
 
 	switch (args->type) {
-	case PRINT_NULL:
+	case TEP_PRINT_NULL:
 		printf("null");
 		break;
-	case PRINT_ATOM:
+	case TEP_PRINT_ATOM:
 		printf("%s", args->atom.atom);
 		break;
-	case PRINT_FIELD:
+	case TEP_PRINT_FIELD:
 		printf("REC->%s", args->field.name);
 		break;
-	case PRINT_FLAGS:
+	case TEP_PRINT_FLAGS:
 		printf("__print_flags(");
 		print_args(args->flags.field);
 		printf(", %s, ", args->flags.delim);
@@ -5757,7 +5759,7 @@ static void print_args(struct print_arg *args)
 		trace_seq_destroy(&s);
 		printf(")");
 		break;
-	case PRINT_SYMBOL:
+	case TEP_PRINT_SYMBOL:
 		printf("__print_symbolic(");
 		print_args(args->symbol.field);
 		printf(", ");
@@ -5767,21 +5769,21 @@ static void print_args(struct print_arg *args)
 		trace_seq_destroy(&s);
 		printf(")");
 		break;
-	case PRINT_HEX:
+	case TEP_PRINT_HEX:
 		printf("__print_hex(");
 		print_args(args->hex.field);
 		printf(", ");
 		print_args(args->hex.size);
 		printf(")");
 		break;
-	case PRINT_HEX_STR:
+	case TEP_PRINT_HEX_STR:
 		printf("__print_hex_str(");
 		print_args(args->hex.field);
 		printf(", ");
 		print_args(args->hex.size);
 		printf(")");
 		break;
-	case PRINT_INT_ARRAY:
+	case TEP_PRINT_INT_ARRAY:
 		printf("__print_array(");
 		print_args(args->int_array.field);
 		printf(", ");
@@ -5790,18 +5792,18 @@ static void print_args(struct print_arg *args)
 		print_args(args->int_array.el_size);
 		printf(")");
 		break;
-	case PRINT_STRING:
-	case PRINT_BSTRING:
+	case TEP_PRINT_STRING:
+	case TEP_PRINT_BSTRING:
 		printf("__get_str(%s)", args->string.string);
 		break;
-	case PRINT_BITMASK:
+	case TEP_PRINT_BITMASK:
 		printf("__get_bitmask(%s)", args->bitmask.bitmask);
 		break;
-	case PRINT_TYPE:
+	case TEP_PRINT_TYPE:
 		printf("(%s)", args->typecast.type);
 		print_args(args->typecast.item);
 		break;
-	case PRINT_OP:
+	case TEP_PRINT_OP:
 		if (strcmp(args->op.op, ":") == 0)
 			print_paren = 0;
 		if (print_paren)
@@ -5833,13 +5835,13 @@ static void parse_header_field(const char *field,
 	save_input_buf_ptr = input_buf_ptr;
 	save_input_buf_siz = input_buf_siz;
 
-	if (read_expected(EVENT_ITEM, "field") < 0)
+	if (read_expected(TEP_EVENT_ITEM, "field") < 0)
 		return;
-	if (read_expected(EVENT_OP, ":") < 0)
+	if (read_expected(TEP_EVENT_OP, ":") < 0)
 		return;
 
 	/* type */
-	if (read_expect_type(EVENT_ITEM, &token) < 0)
+	if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
 		goto fail;
 	free_token(token);
 
@@ -5847,42 +5849,42 @@ static void parse_header_field(const char *field,
 	 * If this is not a mandatory field, then test it first.
 	 */
 	if (mandatory) {
-		if (read_expected(EVENT_ITEM, field) < 0)
+		if (read_expected(TEP_EVENT_ITEM, field) < 0)
 			return;
 	} else {
-		if (read_expect_type(EVENT_ITEM, &token) < 0)
+		if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
 			goto fail;
 		if (strcmp(token, field) != 0)
 			goto discard;
 		free_token(token);
 	}
 
-	if (read_expected(EVENT_OP, ";") < 0)
+	if (read_expected(TEP_EVENT_OP, ";") < 0)
 		return;
-	if (read_expected(EVENT_ITEM, "offset") < 0)
+	if (read_expected(TEP_EVENT_ITEM, "offset") < 0)
 		return;
-	if (read_expected(EVENT_OP, ":") < 0)
+	if (read_expected(TEP_EVENT_OP, ":") < 0)
 		return;
-	if (read_expect_type(EVENT_ITEM, &token) < 0)
+	if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
 		goto fail;
 	*offset = atoi(token);
 	free_token(token);
-	if (read_expected(EVENT_OP, ";") < 0)
+	if (read_expected(TEP_EVENT_OP, ";") < 0)
 		return;
-	if (read_expected(EVENT_ITEM, "size") < 0)
+	if (read_expected(TEP_EVENT_ITEM, "size") < 0)
 		return;
-	if (read_expected(EVENT_OP, ":") < 0)
+	if (read_expected(TEP_EVENT_OP, ":") < 0)
 		return;
-	if (read_expect_type(EVENT_ITEM, &token) < 0)
+	if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
 		goto fail;
 	*size = atoi(token);
 	free_token(token);
-	if (read_expected(EVENT_OP, ";") < 0)
+	if (read_expected(TEP_EVENT_OP, ";") < 0)
 		return;
 	type = read_token(&token);
-	if (type != EVENT_NEWLINE) {
+	if (type != TEP_EVENT_NEWLINE) {
 		/* newer versions of the kernel have a "signed" type */
-		if (type != EVENT_ITEM)
+		if (type != TEP_EVENT_ITEM)
 			goto fail;
 
 		if (strcmp(token, "signed") != 0)
@@ -5890,17 +5892,17 @@ static void parse_header_field(const char *field,
 
 		free_token(token);
 
-		if (read_expected(EVENT_OP, ":") < 0)
+		if (read_expected(TEP_EVENT_OP, ":") < 0)
 			return;
 
-		if (read_expect_type(EVENT_ITEM, &token))
+		if (read_expect_type(TEP_EVENT_ITEM, &token))
 			goto fail;
 
 		free_token(token);
-		if (read_expected(EVENT_OP, ";") < 0)
+		if (read_expected(TEP_EVENT_OP, ";") < 0)
 			return;
 
-		if (read_expect_type(EVENT_NEWLINE, &token))
+		if (read_expect_type(TEP_EVENT_NEWLINE, &token))
 			goto fail;
 	}
  fail:
@@ -5957,7 +5959,7 @@ int tep_parse_header_page(struct tep_handle *pevent, char *buf, unsigned long si
 	return 0;
 }
 
-static int event_matches(struct event_format *event,
+static int event_matches(struct tep_event_format *event,
 			 int id, const char *sys_name,
 			 const char *event_name)
 {
@@ -5980,7 +5982,7 @@ static void free_handler(struct event_handler *handle)
 	free(handle);
 }
 
-static int find_event_handle(struct tep_handle *pevent, struct event_format *event)
+static int find_event_handle(struct tep_handle *pevent, struct tep_event_format *event)
 {
 	struct event_handler *handle, **next;
 
@@ -6021,11 +6023,11 @@ static int find_event_handle(struct tep_handle *pevent, struct event_format *eve
  *
  * /sys/kernel/debug/tracing/events/.../.../format
  */
-enum tep_errno __tep_parse_format(struct event_format **eventp,
+enum tep_errno __tep_parse_format(struct tep_event_format **eventp,
 				  struct tep_handle *pevent, const char *buf,
 				  unsigned long size, const char *sys)
 {
-	struct event_format *event;
+	struct tep_event_format *event;
 	int ret;
 
 	init_input_buf(buf, size);
@@ -6042,10 +6044,10 @@ enum tep_errno __tep_parse_format(struct event_format **eventp,
 	}
 
 	if (strcmp(sys, "ftrace") == 0) {
-		event->flags |= EVENT_FL_ISFTRACE;
+		event->flags |= TEP_EVENT_FL_ISFTRACE;
 
 		if (strcmp(event->name, "bprint") == 0)
-			event->flags |= EVENT_FL_ISBPRINT;
+			event->flags |= TEP_EVENT_FL_ISBPRINT;
 	}
 		
 	event->id = event_read_id();
@@ -6088,22 +6090,22 @@ enum tep_errno __tep_parse_format(struct event_format **eventp,
 		goto event_parse_failed;
 	}
 
-	if (!ret && (event->flags & EVENT_FL_ISFTRACE)) {
-		struct format_field *field;
-		struct print_arg *arg, **list;
+	if (!ret && (event->flags & TEP_EVENT_FL_ISFTRACE)) {
+		struct tep_format_field *field;
+		struct tep_print_arg *arg, **list;
 
 		/* old ftrace had no args */
 		list = &event->print_fmt.args;
 		for (field = event->format.fields; field; field = field->next) {
 			arg = alloc_arg();
 			if (!arg) {
-				event->flags |= EVENT_FL_FAILED;
+				event->flags |= TEP_EVENT_FL_FAILED;
 				return TEP_ERRNO__OLD_FTRACE_ARG_FAILED;
 			}
-			arg->type = PRINT_FIELD;
+			arg->type = TEP_PRINT_FIELD;
 			arg->field.name = strdup(field->name);
 			if (!arg->field.name) {
-				event->flags |= EVENT_FL_FAILED;
+				event->flags |= TEP_EVENT_FL_FAILED;
 				free_arg(arg);
 				return TEP_ERRNO__OLD_FTRACE_ARG_FAILED;
 			}
@@ -6117,7 +6119,7 @@ enum tep_errno __tep_parse_format(struct event_format **eventp,
 	return 0;
 
  event_parse_failed:
-	event->flags |= EVENT_FL_FAILED;
+	event->flags |= TEP_EVENT_FL_FAILED;
 	return ret;
 
  event_alloc_failed:
@@ -6130,12 +6132,12 @@ enum tep_errno __tep_parse_format(struct event_format **eventp,
 
 static enum tep_errno
 __parse_event(struct tep_handle *pevent,
-	      struct event_format **eventp,
+	      struct tep_event_format **eventp,
 	      const char *buf, unsigned long size,
 	      const char *sys)
 {
 	int ret = __tep_parse_format(eventp, pevent, buf, size, sys);
-	struct event_format *event = *eventp;
+	struct tep_event_format *event = *eventp;
 
 	if (event == NULL)
 		return ret;
@@ -6172,7 +6174,7 @@ event_add_failed:
  * /sys/kernel/debug/tracing/events/.../.../format
  */
 enum tep_errno tep_parse_format(struct tep_handle *pevent,
-				struct event_format **eventp,
+				struct tep_event_format **eventp,
 				const char *buf,
 				unsigned long size, const char *sys)
 {
@@ -6196,40 +6198,11 @@ enum tep_errno tep_parse_format(struct tep_handle *pevent,
 enum tep_errno tep_parse_event(struct tep_handle *pevent, const char *buf,
 			       unsigned long size, const char *sys)
 {
-	struct event_format *event = NULL;
+	struct tep_event_format *event = NULL;
 	return __parse_event(pevent, &event, buf, size, sys);
 }
 
-#undef _PE
-#define _PE(code, str) str
-static const char * const tep_error_str[] = {
-	TEP_ERRORS
-};
-#undef _PE
-
-int tep_strerror(struct tep_handle *pevent __maybe_unused,
-		 enum tep_errno errnum, char *buf, size_t buflen)
-{
-	int idx;
-	const char *msg;
-
-	if (errnum >= 0) {
-		str_error_r(errnum, buf, buflen);
-		return 0;
-	}
-
-	if (errnum <= __TEP_ERRNO__START ||
-	    errnum >= __TEP_ERRNO__END)
-		return -1;
-
-	idx = errnum - __TEP_ERRNO__START - 1;
-	msg = tep_error_str[idx];
-	snprintf(buf, buflen, "%s", msg);
-
-	return 0;
-}
-
-int get_field_val(struct trace_seq *s, struct format_field *field,
+int get_field_val(struct trace_seq *s, struct tep_format_field *field,
 		  const char *name, struct tep_record *record,
 		  unsigned long long *val, int err)
 {
@@ -6262,11 +6235,11 @@ int get_field_val(struct trace_seq *s, struct format_field *field,
  *
  * On failure, it returns NULL.
  */
-void *tep_get_field_raw(struct trace_seq *s, struct event_format *event,
+void *tep_get_field_raw(struct trace_seq *s, struct tep_event_format *event,
 			const char *name, struct tep_record *record,
 			int *len, int err)
 {
-	struct format_field *field;
+	struct tep_format_field *field;
 	void *data = record->data;
 	unsigned offset;
 	int dummy;
@@ -6287,7 +6260,7 @@ void *tep_get_field_raw(struct trace_seq *s, struct event_format *event,
 		len = &dummy;
 
 	offset = field->offset;
-	if (field->flags & FIELD_IS_DYNAMIC) {
+	if (field->flags & TEP_FIELD_IS_DYNAMIC) {
 		offset = tep_read_number(event->pevent,
 					    data + offset, field->size);
 		*len = offset >> 16;
@@ -6309,11 +6282,11 @@ void *tep_get_field_raw(struct trace_seq *s, struct event_format *event,
  *
  * Returns 0 on success -1 on field not found.
  */
-int tep_get_field_val(struct trace_seq *s, struct event_format *event,
+int tep_get_field_val(struct trace_seq *s, struct tep_event_format *event,
 		      const char *name, struct tep_record *record,
 		      unsigned long long *val, int err)
 {
-	struct format_field *field;
+	struct tep_format_field *field;
 
 	if (!event)
 		return -1;
@@ -6334,11 +6307,11 @@ int tep_get_field_val(struct trace_seq *s, struct event_format *event,
  *
  * Returns 0 on success -1 on field not found.
  */
-int tep_get_common_field_val(struct trace_seq *s, struct event_format *event,
+int tep_get_common_field_val(struct trace_seq *s, struct tep_event_format *event,
 			     const char *name, struct tep_record *record,
 			     unsigned long long *val, int err)
 {
-	struct format_field *field;
+	struct tep_format_field *field;
 
 	if (!event)
 		return -1;
@@ -6359,11 +6332,11 @@ int tep_get_common_field_val(struct trace_seq *s, struct event_format *event,
  *
  * Returns 0 on success -1 on field not found.
  */
-int tep_get_any_field_val(struct trace_seq *s, struct event_format *event,
+int tep_get_any_field_val(struct trace_seq *s, struct tep_event_format *event,
 			  const char *name, struct tep_record *record,
 			  unsigned long long *val, int err)
 {
-	struct format_field *field;
+	struct tep_format_field *field;
 
 	if (!event)
 		return -1;
@@ -6385,10 +6358,10 @@ int tep_get_any_field_val(struct trace_seq *s, struct event_format *event,
  * Returns: 0 on success, -1 field not found, or 1 if buffer is full.
  */
 int tep_print_num_field(struct trace_seq *s, const char *fmt,
-			struct event_format *event, const char *name,
+			struct tep_event_format *event, const char *name,
 			struct tep_record *record, int err)
 {
-	struct format_field *field = tep_find_field(event, name);
+	struct tep_format_field *field = tep_find_field(event, name);
 	unsigned long long val;
 
 	if (!field)
@@ -6417,10 +6390,10 @@ int tep_print_num_field(struct trace_seq *s, const char *fmt,
  * Returns: 0 on success, -1 field not found, or 1 if buffer is full.
  */
 int tep_print_func_field(struct trace_seq *s, const char *fmt,
-			 struct event_format *event, const char *name,
+			 struct tep_event_format *event, const char *name,
 			 struct tep_record *record, int err)
 {
-	struct format_field *field = tep_find_field(event, name);
+	struct tep_format_field *field = tep_find_field(event, name);
 	struct tep_handle *pevent = event->pevent;
 	unsigned long long val;
 	struct func_map *func;
@@ -6577,11 +6550,11 @@ int tep_unregister_print_function(struct tep_handle *pevent,
 	return -1;
 }
 
-static struct event_format *search_event(struct tep_handle *pevent, int id,
+static struct tep_event_format *search_event(struct tep_handle *pevent, int id,
 					 const char *sys_name,
 					 const char *event_name)
 {
-	struct event_format *event;
+	struct tep_event_format *event;
 
 	if (id >= 0) {
 		/* search by id */
@@ -6621,7 +6594,7 @@ int tep_register_event_handler(struct tep_handle *pevent, int id,
 			       const char *sys_name, const char *event_name,
 			       tep_event_handler_func func, void *context)
 {
-	struct event_format *event;
+	struct tep_event_format *event;
 	struct event_handler *handle;
 
 	event = search_event(pevent, id, sys_name, event_name);
@@ -6705,7 +6678,7 @@ int tep_unregister_event_handler(struct tep_handle *pevent, int id,
 				 const char *sys_name, const char *event_name,
 				 tep_event_handler_func func, void *context)
 {
-	struct event_format *event;
+	struct tep_event_format *event;
 	struct event_handler *handle;
 	struct event_handler **next;
 
@@ -6757,7 +6730,7 @@ void tep_ref(struct tep_handle *pevent)
 	pevent->ref_count++;
 }
 
-void tep_free_format_field(struct format_field *field)
+void tep_free_format_field(struct tep_format_field *field)
 {
 	free(field->type);
 	if (field->alias != field->name)
@@ -6766,9 +6739,9 @@ void tep_free_format_field(struct format_field *field)
 	free(field);
 }
 
-static void free_format_fields(struct format_field *field)
+static void free_format_fields(struct tep_format_field *field)
 {
-	struct format_field *next;
+	struct tep_format_field *next;
 
 	while (field) {
 		next = field->next;
@@ -6777,13 +6750,13 @@ static void free_format_fields(struct format_field *field)
 	}
 }
 
-static void free_formats(struct format *format)
+static void free_formats(struct tep_format *format)
 {
 	free_format_fields(format->common_fields);
 	free_format_fields(format->fields);
 }
 
-void tep_free_format(struct event_format *event)
+void tep_free_format(struct tep_event_format *event)
 {
 	free(event->name);
 	free(event->system);
diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h
index 44b7c2d41f9f..16bf4c890b6f 100644
--- a/tools/lib/traceevent/event-parse.h
+++ b/tools/lib/traceevent/event-parse.h
@@ -26,17 +26,12 @@
 #include <regex.h>
 #include <string.h>
 
+#include "trace-seq.h"
+
 #ifndef __maybe_unused
 #define __maybe_unused __attribute__((unused))
 #endif
 
-/* ----------------------- trace_seq ----------------------- */
-
-
-#ifndef TRACE_SEQ_BUF_SIZE
-#define TRACE_SEQ_BUF_SIZE 4096
-#endif
-
 #ifndef DEBUG_RECORD
 #define DEBUG_RECORD 0
 #endif
@@ -59,51 +54,14 @@ struct tep_record {
 #endif
 };
 
-enum trace_seq_fail {
-	TRACE_SEQ__GOOD,
-	TRACE_SEQ__BUFFER_POISONED,
-	TRACE_SEQ__MEM_ALLOC_FAILED,
-};
-
-/*
- * Trace sequences are used to allow a function to call several other functions
- * to create a string of data to use (up to a max of PAGE_SIZE).
- */
-
-struct trace_seq {
-	char			*buffer;
-	unsigned int		buffer_size;
-	unsigned int		len;
-	unsigned int		readpos;
-	enum trace_seq_fail	state;
-};
-
-void trace_seq_init(struct trace_seq *s);
-void trace_seq_reset(struct trace_seq *s);
-void trace_seq_destroy(struct trace_seq *s);
-
-extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
-	__attribute__ ((format (printf, 2, 3)));
-extern int trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
-	__attribute__ ((format (printf, 2, 0)));
-
-extern int trace_seq_puts(struct trace_seq *s, const char *str);
-extern int trace_seq_putc(struct trace_seq *s, unsigned char c);
-
-extern void trace_seq_terminate(struct trace_seq *s);
-
-extern int trace_seq_do_fprintf(struct trace_seq *s, FILE *fp);
-extern int trace_seq_do_printf(struct trace_seq *s);
-
-
-/* ----------------------- pevent ----------------------- */
+/* ----------------------- tep ----------------------- */
 
 struct tep_handle;
-struct event_format;
+struct tep_event_format;
 
 typedef int (*tep_event_handler_func)(struct trace_seq *s,
 				      struct tep_record *record,
-				      struct event_format *event,
+				      struct tep_event_format *event,
 				      void *context);
 
 typedef int (*tep_plugin_load_func)(struct tep_handle *pevent);
@@ -172,20 +130,20 @@ struct tep_plugin_option {
 #define TEP_PLUGIN_OPTIONS_NAME MAKE_STR(TEP_PLUGIN_OPTIONS)
 #define TEP_PLUGIN_ALIAS_NAME MAKE_STR(TEP_PLUGIN_ALIAS)
 
-enum format_flags {
-	FIELD_IS_ARRAY		= 1,
-	FIELD_IS_POINTER	= 2,
-	FIELD_IS_SIGNED		= 4,
-	FIELD_IS_STRING		= 8,
-	FIELD_IS_DYNAMIC	= 16,
-	FIELD_IS_LONG		= 32,
-	FIELD_IS_FLAG		= 64,
-	FIELD_IS_SYMBOLIC	= 128,
+enum tep_format_flags {
+	TEP_FIELD_IS_ARRAY	= 1,
+	TEP_FIELD_IS_POINTER	= 2,
+	TEP_FIELD_IS_SIGNED	= 4,
+	TEP_FIELD_IS_STRING	= 8,
+	TEP_FIELD_IS_DYNAMIC	= 16,
+	TEP_FIELD_IS_LONG	= 32,
+	TEP_FIELD_IS_FLAG	= 64,
+	TEP_FIELD_IS_SYMBOLIC	= 128,
 };
 
-struct format_field {
-	struct format_field	*next;
-	struct event_format	*event;
+struct tep_format_field {
+	struct tep_format_field	*next;
+	struct tep_event_format	*event;
 	char			*type;
 	char			*name;
 	char			*alias;
@@ -196,169 +154,169 @@ struct format_field {
 	unsigned long		flags;
 };
 
-struct format {
+struct tep_format {
 	int			nr_common;
 	int			nr_fields;
-	struct format_field	*common_fields;
-	struct format_field	*fields;
+	struct tep_format_field	*common_fields;
+	struct tep_format_field	*fields;
 };
 
-struct print_arg_atom {
+struct tep_print_arg_atom {
 	char			*atom;
 };
 
-struct print_arg_string {
+struct tep_print_arg_string {
 	char			*string;
 	int			offset;
 };
 
-struct print_arg_bitmask {
+struct tep_print_arg_bitmask {
 	char			*bitmask;
 	int			offset;
 };
 
-struct print_arg_field {
+struct tep_print_arg_field {
 	char			*name;
-	struct format_field	*field;
+	struct tep_format_field	*field;
 };
 
-struct print_flag_sym {
-	struct print_flag_sym	*next;
-	char			*value;
-	char			*str;
+struct tep_print_flag_sym {
+	struct tep_print_flag_sym	*next;
+	char				*value;
+	char				*str;
 };
 
-struct print_arg_typecast {
+struct tep_print_arg_typecast {
 	char 			*type;
-	struct print_arg	*item;
+	struct tep_print_arg	*item;
 };
 
-struct print_arg_flags {
-	struct print_arg	*field;
-	char			*delim;
-	struct print_flag_sym	*flags;
+struct tep_print_arg_flags {
+	struct tep_print_arg		*field;
+	char				*delim;
+	struct tep_print_flag_sym	*flags;
 };
 
-struct print_arg_symbol {
-	struct print_arg	*field;
-	struct print_flag_sym	*symbols;
+struct tep_print_arg_symbol {
+	struct tep_print_arg		*field;
+	struct tep_print_flag_sym	*symbols;
 };
 
-struct print_arg_hex {
-	struct print_arg	*field;
-	struct print_arg	*size;
+struct tep_print_arg_hex {
+	struct tep_print_arg	*field;
+	struct tep_print_arg	*size;
 };
 
-struct print_arg_int_array {
-	struct print_arg	*field;
-	struct print_arg	*count;
-	struct print_arg	*el_size;
+struct tep_print_arg_int_array {
+	struct tep_print_arg	*field;
+	struct tep_print_arg	*count;
+	struct tep_print_arg	*el_size;
 };
 
-struct print_arg_dynarray {
-	struct format_field	*field;
-	struct print_arg	*index;
+struct tep_print_arg_dynarray {
+	struct tep_format_field	*field;
+	struct tep_print_arg	*index;
 };
 
-struct print_arg;
+struct tep_print_arg;
 
-struct print_arg_op {
+struct tep_print_arg_op {
 	char			*op;
 	int			prio;
-	struct print_arg	*left;
-	struct print_arg	*right;
+	struct tep_print_arg	*left;
+	struct tep_print_arg	*right;
 };
 
 struct tep_function_handler;
 
-struct print_arg_func {
+struct tep_print_arg_func {
 	struct tep_function_handler	*func;
-	struct print_arg		*args;
-};
-
-enum print_arg_type {
-	PRINT_NULL,
-	PRINT_ATOM,
-	PRINT_FIELD,
-	PRINT_FLAGS,
-	PRINT_SYMBOL,
-	PRINT_HEX,
-	PRINT_INT_ARRAY,
-	PRINT_TYPE,
-	PRINT_STRING,
-	PRINT_BSTRING,
-	PRINT_DYNAMIC_ARRAY,
-	PRINT_OP,
-	PRINT_FUNC,
-	PRINT_BITMASK,
-	PRINT_DYNAMIC_ARRAY_LEN,
-	PRINT_HEX_STR,
-};
-
-struct print_arg {
-	struct print_arg		*next;
-	enum print_arg_type		type;
+	struct tep_print_arg		*args;
+};
+
+enum tep_print_arg_type {
+	TEP_PRINT_NULL,
+	TEP_PRINT_ATOM,
+	TEP_PRINT_FIELD,
+	TEP_PRINT_FLAGS,
+	TEP_PRINT_SYMBOL,
+	TEP_PRINT_HEX,
+	TEP_PRINT_INT_ARRAY,
+	TEP_PRINT_TYPE,
+	TEP_PRINT_STRING,
+	TEP_PRINT_BSTRING,
+	TEP_PRINT_DYNAMIC_ARRAY,
+	TEP_PRINT_OP,
+	TEP_PRINT_FUNC,
+	TEP_PRINT_BITMASK,
+	TEP_PRINT_DYNAMIC_ARRAY_LEN,
+	TEP_PRINT_HEX_STR,
+};
+
+struct tep_print_arg {
+	struct tep_print_arg		*next;
+	enum tep_print_arg_type		type;
 	union {
-		struct print_arg_atom		atom;
-		struct print_arg_field		field;
-		struct print_arg_typecast	typecast;
-		struct print_arg_flags		flags;
-		struct print_arg_symbol		symbol;
-		struct print_arg_hex		hex;
-		struct print_arg_int_array	int_array;
-		struct print_arg_func		func;
-		struct print_arg_string		string;
-		struct print_arg_bitmask	bitmask;
-		struct print_arg_op		op;
-		struct print_arg_dynarray	dynarray;
+		struct tep_print_arg_atom	atom;
+		struct tep_print_arg_field	field;
+		struct tep_print_arg_typecast	typecast;
+		struct tep_print_arg_flags	flags;
+		struct tep_print_arg_symbol	symbol;
+		struct tep_print_arg_hex	hex;
+		struct tep_print_arg_int_array	int_array;
+		struct tep_print_arg_func	func;
+		struct tep_print_arg_string	string;
+		struct tep_print_arg_bitmask	bitmask;
+		struct tep_print_arg_op		op;
+		struct tep_print_arg_dynarray	dynarray;
 	};
 };
 
-struct print_fmt {
+struct tep_print_fmt {
 	char			*format;
-	struct print_arg	*args;
+	struct tep_print_arg	*args;
 };
 
-struct event_format {
+struct tep_event_format {
 	struct tep_handle	*pevent;
 	char			*name;
 	int			id;
 	int			flags;
-	struct format		format;
-	struct print_fmt	print_fmt;
+	struct tep_format	format;
+	struct tep_print_fmt	print_fmt;
 	char			*system;
 	tep_event_handler_func	handler;
 	void			*context;
 };
 
 enum {
-	EVENT_FL_ISFTRACE	= 0x01,
-	EVENT_FL_ISPRINT	= 0x02,
-	EVENT_FL_ISBPRINT	= 0x04,
-	EVENT_FL_ISFUNCENT	= 0x10,
-	EVENT_FL_ISFUNCRET	= 0x20,
-	EVENT_FL_NOHANDLE	= 0x40,
-	EVENT_FL_PRINTRAW	= 0x80,
+	TEP_EVENT_FL_ISFTRACE	= 0x01,
+	TEP_EVENT_FL_ISPRINT	= 0x02,
+	TEP_EVENT_FL_ISBPRINT	= 0x04,
+	TEP_EVENT_FL_ISFUNCENT	= 0x10,
+	TEP_EVENT_FL_ISFUNCRET	= 0x20,
+	TEP_EVENT_FL_NOHANDLE	= 0x40,
+	TEP_EVENT_FL_PRINTRAW	= 0x80,
 
-	EVENT_FL_FAILED		= 0x80000000
+	TEP_EVENT_FL_FAILED	= 0x80000000
 };
 
-enum event_sort_type {
-	EVENT_SORT_ID,
-	EVENT_SORT_NAME,
-	EVENT_SORT_SYSTEM,
+enum tep_event_sort_type {
+	TEP_EVENT_SORT_ID,
+	TEP_EVENT_SORT_NAME,
+	TEP_EVENT_SORT_SYSTEM,
 };
 
-enum event_type {
-	EVENT_ERROR,
-	EVENT_NONE,
-	EVENT_SPACE,
-	EVENT_NEWLINE,
-	EVENT_OP,
-	EVENT_DELIM,
-	EVENT_ITEM,
-	EVENT_DQUOTE,
-	EVENT_SQUOTE,
+enum tep_event_type {
+	TEP_EVENT_ERROR,
+	TEP_EVENT_NONE,
+	TEP_EVENT_SPACE,
+	TEP_EVENT_NEWLINE,
+	TEP_EVENT_OP,
+	TEP_EVENT_DELIM,
+	TEP_EVENT_ITEM,
+	TEP_EVENT_DQUOTE,
+	TEP_EVENT_SQUOTE,
 };
 
 typedef unsigned long long (*tep_func_handler)(struct trace_seq *s,
@@ -431,12 +389,12 @@ enum tep_errno {
 };
 #undef _PE
 
-struct plugin_list;
+struct tep_plugin_list;
 
 #define INVALID_PLUGIN_LIST_OPTION	((char **)((unsigned long)-1))
 
-struct plugin_list *tep_load_plugins(struct tep_handle *pevent);
-void tep_unload_plugins(struct plugin_list *plugin_list,
+struct tep_plugin_list *tep_load_plugins(struct tep_handle *pevent);
+void tep_unload_plugins(struct tep_plugin_list *plugin_list,
 			struct tep_handle *pevent);
 char **tep_plugin_list_options(void);
 void tep_plugin_free_options_list(char **list);
@@ -445,156 +403,25 @@ int tep_plugin_add_options(const char *name,
 void tep_plugin_remove_options(struct tep_plugin_option *options);
 void tep_print_plugins(struct trace_seq *s,
 			const char *prefix, const char *suffix,
-			const struct plugin_list *list);
-
-struct cmdline;
-struct cmdline_list;
-struct func_map;
-struct func_list;
-struct event_handler;
-struct func_resolver;
+			const struct tep_plugin_list *list);
 
+/* tep_handle */
 typedef char *(tep_func_resolver_t)(void *priv,
 				    unsigned long long *addrp, char **modp);
+void tep_set_flag(struct tep_handle *tep, int flag);
+unsigned short __tep_data2host2(struct tep_handle *pevent, unsigned short data);
+unsigned int __tep_data2host4(struct tep_handle *pevent, unsigned int data);
+unsigned long long
+__tep_data2host8(struct tep_handle *pevent, unsigned long long data);
 
-struct tep_handle {
-	int ref_count;
-
-	int header_page_ts_offset;
-	int header_page_ts_size;
-	int header_page_size_offset;
-	int header_page_size_size;
-	int header_page_data_offset;
-	int header_page_data_size;
-	int header_page_overwrite;
-
-	int file_bigendian;
-	int host_bigendian;
-
-	int latency_format;
-
-	int old_format;
-
-	int cpus;
-	int long_size;
-	int page_size;
-
-	struct cmdline *cmdlines;
-	struct cmdline_list *cmdlist;
-	int cmdline_count;
-
-	struct func_map *func_map;
-	struct func_resolver *func_resolver;
-	struct func_list *funclist;
-	unsigned int func_count;
-
-	struct printk_map *printk_map;
-	struct printk_list *printklist;
-	unsigned int printk_count;
-
-
-	struct event_format **events;
-	int nr_events;
-	struct event_format **sort_events;
-	enum event_sort_type last_type;
-
-	int type_offset;
-	int type_size;
-
-	int pid_offset;
-	int pid_size;
-
- 	int pc_offset;
-	int pc_size;
-
-	int flags_offset;
-	int flags_size;
-
-	int ld_offset;
-	int ld_size;
-
-	int print_raw;
-
-	int test_filters;
-
-	int flags;
-
-	struct format_field *bprint_ip_field;
-	struct format_field *bprint_fmt_field;
-	struct format_field *bprint_buf_field;
-
-	struct event_handler *handlers;
-	struct tep_function_handler *func_handlers;
-
-	/* cache */
-	struct event_format *last_event;
-
-	char *trace_clock;
-};
-
-static inline void tep_set_flag(struct tep_handle *pevent, int flag)
-{
-	pevent->flags |= flag;
-}
-
-static inline unsigned short
-__data2host2(struct tep_handle *pevent, unsigned short data)
-{
-	unsigned short swap;
-
-	if (pevent->host_bigendian == pevent->file_bigendian)
-		return data;
-
-	swap = ((data & 0xffULL) << 8) |
-		((data & (0xffULL << 8)) >> 8);
-
-	return swap;
-}
-
-static inline unsigned int
-__data2host4(struct tep_handle *pevent, unsigned int data)
-{
-	unsigned int swap;
-
-	if (pevent->host_bigendian == pevent->file_bigendian)
-		return data;
-
-	swap = ((data & 0xffULL) << 24) |
-		((data & (0xffULL << 8)) << 8) |
-		((data & (0xffULL << 16)) >> 8) |
-		((data & (0xffULL << 24)) >> 24);
-
-	return swap;
-}
-
-static inline unsigned long long
-__data2host8(struct tep_handle *pevent, unsigned long long data)
-{
-	unsigned long long swap;
-
-	if (pevent->host_bigendian == pevent->file_bigendian)
-		return data;
-
-	swap = ((data & 0xffULL) << 56) |
-		((data & (0xffULL << 8)) << 40) |
-		((data & (0xffULL << 16)) << 24) |
-		((data & (0xffULL << 24)) << 8) |
-		((data & (0xffULL << 32)) >> 8) |
-		((data & (0xffULL << 40)) >> 24) |
-		((data & (0xffULL << 48)) >> 40) |
-		((data & (0xffULL << 56)) >> 56);
-
-	return swap;
-}
-
-#define data2host2(pevent, ptr)		__data2host2(pevent, *(unsigned short *)(ptr))
-#define data2host4(pevent, ptr)		__data2host4(pevent, *(unsigned int *)(ptr))
-#define data2host8(pevent, ptr)					\
+#define tep_data2host2(pevent, ptr)	__tep_data2host2(pevent, *(unsigned short *)(ptr))
+#define tep_data2host4(pevent, ptr)	__tep_data2host4(pevent, *(unsigned int *)(ptr))
+#define tep_data2host8(pevent, ptr)	\
 ({								\
 	unsigned long long __val;				\
 								\
 	memcpy(&__val, (ptr), sizeof(unsigned long long));	\
-	__data2host8(pevent, __val);				\
+	__tep_data2host8(pevent, __val);				\
 })
 
 static inline int tep_host_bigendian(void)
@@ -627,14 +454,14 @@ int tep_register_print_string(struct tep_handle *pevent, const char *fmt,
 int tep_pid_is_registered(struct tep_handle *pevent, int pid);
 
 void tep_print_event_task(struct tep_handle *pevent, struct trace_seq *s,
-			  struct event_format *event,
+			  struct tep_event_format *event,
 			  struct tep_record *record);
 void tep_print_event_time(struct tep_handle *pevent, struct trace_seq *s,
-			  struct event_format *event,
+			  struct tep_event_format *event,
 			  struct tep_record *record,
 			  bool use_trace_clock);
 void tep_print_event_data(struct tep_handle *pevent, struct trace_seq *s,
-			  struct event_format *event,
+			  struct tep_event_format *event,
 			  struct tep_record *record);
 void tep_print_event(struct tep_handle *pevent, struct trace_seq *s,
 		     struct tep_record *record, bool use_trace_clock);
@@ -645,32 +472,32 @@ int tep_parse_header_page(struct tep_handle *pevent, char *buf, unsigned long si
 enum tep_errno tep_parse_event(struct tep_handle *pevent, const char *buf,
 			       unsigned long size, const char *sys);
 enum tep_errno tep_parse_format(struct tep_handle *pevent,
-				struct event_format **eventp,
+				struct tep_event_format **eventp,
 				const char *buf,
 				unsigned long size, const char *sys);
-void tep_free_format(struct event_format *event);
-void tep_free_format_field(struct format_field *field);
+void tep_free_format(struct tep_event_format *event);
+void tep_free_format_field(struct tep_format_field *field);
 
-void *tep_get_field_raw(struct trace_seq *s, struct event_format *event,
+void *tep_get_field_raw(struct trace_seq *s, struct tep_event_format *event,
 			const char *name, struct tep_record *record,
 			int *len, int err);
 
-int tep_get_field_val(struct trace_seq *s, struct event_format *event,
+int tep_get_field_val(struct trace_seq *s, struct tep_event_format *event,
 		      const char *name, struct tep_record *record,
 		      unsigned long long *val, int err);
-int tep_get_common_field_val(struct trace_seq *s, struct event_format *event,
+int tep_get_common_field_val(struct trace_seq *s, struct tep_event_format *event,
 			     const char *name, struct tep_record *record,
 			     unsigned long long *val, int err);
-int tep_get_any_field_val(struct trace_seq *s, struct event_format *event,
+int tep_get_any_field_val(struct trace_seq *s, struct tep_event_format *event,
 			  const char *name, struct tep_record *record,
 			  unsigned long long *val, int err);
 
 int tep_print_num_field(struct trace_seq *s, const char *fmt,
-			   struct event_format *event, const char *name,
-			   struct tep_record *record, int err);
+			struct tep_event_format *event, const char *name,
+			struct tep_record *record, int err);
 
 int tep_print_func_field(struct trace_seq *s, const char *fmt,
-			 struct event_format *event, const char *name,
+			 struct tep_event_format *event, const char *name,
 			 struct tep_record *record, int err);
 
 int tep_register_event_handler(struct tep_handle *pevent, int id,
@@ -686,29 +513,30 @@ int tep_register_print_function(struct tep_handle *pevent,
 int tep_unregister_print_function(struct tep_handle *pevent,
 				  tep_func_handler func, char *name);
 
-struct format_field *tep_find_common_field(struct event_format *event, const char *name);
-struct format_field *tep_find_field(struct event_format *event, const char *name);
-struct format_field *tep_find_any_field(struct event_format *event, const char *name);
+struct tep_format_field *tep_find_common_field(struct tep_event_format *event, const char *name);
+struct tep_format_field *tep_find_field(struct tep_event_format *event, const char *name);
+struct tep_format_field *tep_find_any_field(struct tep_event_format *event, const char *name);
 
 const char *tep_find_function(struct tep_handle *pevent, unsigned long long addr);
 unsigned long long
 tep_find_function_address(struct tep_handle *pevent, unsigned long long addr);
 unsigned long long tep_read_number(struct tep_handle *pevent, const void *ptr, int size);
-int tep_read_number_field(struct format_field *field, const void *data,
+int tep_read_number_field(struct tep_format_field *field, const void *data,
 			  unsigned long long *value);
 
-struct event_format *tep_find_event(struct tep_handle *pevent, int id);
+struct tep_event_format *tep_get_first_event(struct tep_handle *tep);
+int tep_get_events_count(struct tep_handle *tep);
+struct tep_event_format *tep_find_event(struct tep_handle *pevent, int id);
 
-struct event_format *
+struct tep_event_format *
 tep_find_event_by_name(struct tep_handle *pevent, const char *sys, const char *name);
-
-struct event_format *
+struct tep_event_format *
 tep_find_event_by_record(struct tep_handle *pevent, struct tep_record *record);
 
 void tep_data_lat_fmt(struct tep_handle *pevent,
 		      struct trace_seq *s, struct tep_record *record);
 int tep_data_type(struct tep_handle *pevent, struct tep_record *rec);
-struct event_format *tep_data_event_from_type(struct tep_handle *pevent, int type);
+struct tep_event_format *tep_data_event_from_type(struct tep_handle *pevent, int type);
 int tep_data_pid(struct tep_handle *pevent, struct tep_record *rec);
 int tep_data_preempt_count(struct tep_handle *pevent, struct tep_record *rec);
 int tep_data_flags(struct tep_handle *pevent, struct tep_record *rec);
@@ -719,77 +547,35 @@ struct cmdline *tep_data_pid_from_comm(struct tep_handle *pevent, const char *co
 int tep_cmdline_pid(struct tep_handle *pevent, struct cmdline *cmdline);
 
 void tep_print_field(struct trace_seq *s, void *data,
-		     struct format_field *field);
+		     struct tep_format_field *field);
 void tep_print_fields(struct trace_seq *s, void *data,
-		      int size __maybe_unused, struct event_format *event);
-void tep_event_info(struct trace_seq *s, struct event_format *event,
+		      int size __maybe_unused, struct tep_event_format *event);
+void tep_event_info(struct trace_seq *s, struct tep_event_format *event,
 		       struct tep_record *record);
 int tep_strerror(struct tep_handle *pevent, enum tep_errno errnum,
 		    char *buf, size_t buflen);
 
-struct event_format **tep_list_events(struct tep_handle *pevent, enum event_sort_type);
-struct format_field **tep_event_common_fields(struct event_format *event);
-struct format_field **tep_event_fields(struct event_format *event);
-
-static inline int tep_get_cpus(struct tep_handle *pevent)
-{
-	return pevent->cpus;
-}
-
-static inline void tep_set_cpus(struct tep_handle *pevent, int cpus)
-{
-	pevent->cpus = cpus;
-}
-
-static inline int tep_get_long_size(struct tep_handle *pevent)
-{
-	return pevent->long_size;
-}
-
-static inline void tep_set_long_size(struct tep_handle *pevent, int long_size)
-{
-	pevent->long_size = long_size;
-}
-
-static inline int tep_get_page_size(struct tep_handle *pevent)
-{
-	return pevent->page_size;
-}
-
-static inline void tep_set_page_size(struct tep_handle *pevent, int _page_size)
-{
-	pevent->page_size = _page_size;
-}
-
-static inline int tep_is_file_bigendian(struct tep_handle *pevent)
-{
-	return pevent->file_bigendian;
-}
-
-static inline void tep_set_file_bigendian(struct tep_handle *pevent, int endian)
-{
-	pevent->file_bigendian = endian;
-}
-
-static inline int tep_is_host_bigendian(struct tep_handle *pevent)
-{
-	return pevent->host_bigendian;
-}
-
-static inline void tep_set_host_bigendian(struct tep_handle *pevent, int endian)
-{
-	pevent->host_bigendian = endian;
-}
-
-static inline int tep_is_latency_format(struct tep_handle *pevent)
-{
-	return pevent->latency_format;
-}
-
-static inline void tep_set_latency_format(struct tep_handle *pevent, int lat)
-{
-	pevent->latency_format = lat;
-}
+struct tep_event_format **tep_list_events(struct tep_handle *pevent, enum tep_event_sort_type);
+struct tep_format_field **tep_event_common_fields(struct tep_event_format *event);
+struct tep_format_field **tep_event_fields(struct tep_event_format *event);
+
+enum tep_endian {
+        TEP_LITTLE_ENDIAN = 0,
+        TEP_BIG_ENDIAN
+};
+int tep_get_cpus(struct tep_handle *pevent);
+void tep_set_cpus(struct tep_handle *pevent, int cpus);
+int tep_get_long_size(struct tep_handle *pevent);
+void tep_set_long_size(struct tep_handle *pevent, int long_size);
+int tep_get_page_size(struct tep_handle *pevent);
+void tep_set_page_size(struct tep_handle *pevent, int _page_size);
+int tep_is_file_bigendian(struct tep_handle *pevent);
+void tep_set_file_bigendian(struct tep_handle *pevent, enum tep_endian endian);
+int tep_is_host_bigendian(struct tep_handle *pevent);
+void tep_set_host_bigendian(struct tep_handle *pevent, enum tep_endian endian);
+int tep_is_latency_format(struct tep_handle *pevent);
+void tep_set_latency_format(struct tep_handle *pevent, int lat);
+int tep_get_header_page_size(struct tep_handle *pevent);
 
 struct tep_handle *tep_alloc(void);
 void tep_free(struct tep_handle *pevent);
@@ -798,7 +584,7 @@ void tep_unref(struct tep_handle *pevent);
 
 /* access to the internal parser */
 void tep_buffer_init(const char *buf, unsigned long long size);
-enum event_type tep_read_token(char **tok);
+enum tep_event_type tep_read_token(char **tok);
 void tep_free_token(char *token);
 int tep_peek_char(void);
 const char *tep_get_input_buf(void);
@@ -810,136 +596,136 @@ void tep_print_printk(struct tep_handle *pevent);
 
 /* ----------------------- filtering ----------------------- */
 
-enum filter_boolean_type {
-	FILTER_FALSE,
-	FILTER_TRUE,
+enum tep_filter_boolean_type {
+	TEP_FILTER_FALSE,
+	TEP_FILTER_TRUE,
 };
 
-enum filter_op_type {
-	FILTER_OP_AND = 1,
-	FILTER_OP_OR,
-	FILTER_OP_NOT,
+enum tep_filter_op_type {
+	TEP_FILTER_OP_AND = 1,
+	TEP_FILTER_OP_OR,
+	TEP_FILTER_OP_NOT,
 };
 
-enum filter_cmp_type {
-	FILTER_CMP_NONE,
-	FILTER_CMP_EQ,
-	FILTER_CMP_NE,
-	FILTER_CMP_GT,
-	FILTER_CMP_LT,
-	FILTER_CMP_GE,
-	FILTER_CMP_LE,
-	FILTER_CMP_MATCH,
-	FILTER_CMP_NOT_MATCH,
-	FILTER_CMP_REGEX,
-	FILTER_CMP_NOT_REGEX,
+enum tep_filter_cmp_type {
+	TEP_FILTER_CMP_NONE,
+	TEP_FILTER_CMP_EQ,
+	TEP_FILTER_CMP_NE,
+	TEP_FILTER_CMP_GT,
+	TEP_FILTER_CMP_LT,
+	TEP_FILTER_CMP_GE,
+	TEP_FILTER_CMP_LE,
+	TEP_FILTER_CMP_MATCH,
+	TEP_FILTER_CMP_NOT_MATCH,
+	TEP_FILTER_CMP_REGEX,
+	TEP_FILTER_CMP_NOT_REGEX,
 };
 
-enum filter_exp_type {
-	FILTER_EXP_NONE,
-	FILTER_EXP_ADD,
-	FILTER_EXP_SUB,
-	FILTER_EXP_MUL,
-	FILTER_EXP_DIV,
-	FILTER_EXP_MOD,
-	FILTER_EXP_RSHIFT,
-	FILTER_EXP_LSHIFT,
-	FILTER_EXP_AND,
-	FILTER_EXP_OR,
-	FILTER_EXP_XOR,
-	FILTER_EXP_NOT,
+enum tep_filter_exp_type {
+	TEP_FILTER_EXP_NONE,
+	TEP_FILTER_EXP_ADD,
+	TEP_FILTER_EXP_SUB,
+	TEP_FILTER_EXP_MUL,
+	TEP_FILTER_EXP_DIV,
+	TEP_FILTER_EXP_MOD,
+	TEP_FILTER_EXP_RSHIFT,
+	TEP_FILTER_EXP_LSHIFT,
+	TEP_FILTER_EXP_AND,
+	TEP_FILTER_EXP_OR,
+	TEP_FILTER_EXP_XOR,
+	TEP_FILTER_EXP_NOT,
 };
 
-enum filter_arg_type {
-	FILTER_ARG_NONE,
-	FILTER_ARG_BOOLEAN,
-	FILTER_ARG_VALUE,
-	FILTER_ARG_FIELD,
-	FILTER_ARG_EXP,
-	FILTER_ARG_OP,
-	FILTER_ARG_NUM,
-	FILTER_ARG_STR,
+enum tep_filter_arg_type {
+	TEP_FILTER_ARG_NONE,
+	TEP_FILTER_ARG_BOOLEAN,
+	TEP_FILTER_ARG_VALUE,
+	TEP_FILTER_ARG_FIELD,
+	TEP_FILTER_ARG_EXP,
+	TEP_FILTER_ARG_OP,
+	TEP_FILTER_ARG_NUM,
+	TEP_FILTER_ARG_STR,
 };
 
-enum filter_value_type {
-	FILTER_NUMBER,
-	FILTER_STRING,
-	FILTER_CHAR
+enum tep_filter_value_type {
+	TEP_FILTER_NUMBER,
+	TEP_FILTER_STRING,
+	TEP_FILTER_CHAR
 };
 
-struct fliter_arg;
+struct tep_filter_arg;
 
-struct filter_arg_boolean {
-	enum filter_boolean_type	value;
+struct tep_filter_arg_boolean {
+	enum tep_filter_boolean_type	value;
 };
 
-struct filter_arg_field {
-	struct format_field	*field;
+struct tep_filter_arg_field {
+	struct tep_format_field		*field;
 };
 
-struct filter_arg_value {
-	enum filter_value_type	type;
+struct tep_filter_arg_value {
+	enum tep_filter_value_type	type;
 	union {
 		char			*str;
 		unsigned long long	val;
 	};
 };
 
-struct filter_arg_op {
-	enum filter_op_type	type;
-	struct filter_arg	*left;
-	struct filter_arg	*right;
+struct tep_filter_arg_op {
+	enum tep_filter_op_type		type;
+	struct tep_filter_arg		*left;
+	struct tep_filter_arg		*right;
 };
 
-struct filter_arg_exp {
-	enum filter_exp_type	type;
-	struct filter_arg	*left;
-	struct filter_arg	*right;
+struct tep_filter_arg_exp {
+	enum tep_filter_exp_type	type;
+	struct tep_filter_arg		*left;
+	struct tep_filter_arg		*right;
 };
 
-struct filter_arg_num {
-	enum filter_cmp_type	type;
-	struct filter_arg	*left;
-	struct filter_arg	*right;
+struct tep_filter_arg_num {
+	enum tep_filter_cmp_type	type;
+	struct tep_filter_arg		*left;
+	struct tep_filter_arg		*right;
 };
 
-struct filter_arg_str {
-	enum filter_cmp_type	type;
-	struct format_field	*field;
-	char			*val;
-	char			*buffer;
-	regex_t			reg;
+struct tep_filter_arg_str {
+	enum tep_filter_cmp_type	type;
+	struct tep_format_field		*field;
+	char				*val;
+	char				*buffer;
+	regex_t				reg;
 };
 
-struct filter_arg {
-	enum filter_arg_type	type;
+struct tep_filter_arg {
+	enum tep_filter_arg_type		type;
 	union {
-		struct filter_arg_boolean	boolean;
-		struct filter_arg_field		field;
-		struct filter_arg_value		value;
-		struct filter_arg_op		op;
-		struct filter_arg_exp		exp;
-		struct filter_arg_num		num;
-		struct filter_arg_str		str;
+		struct tep_filter_arg_boolean	boolean;
+		struct tep_filter_arg_field	field;
+		struct tep_filter_arg_value	value;
+		struct tep_filter_arg_op	op;
+		struct tep_filter_arg_exp	exp;
+		struct tep_filter_arg_num	num;
+		struct tep_filter_arg_str	str;
 	};
 };
 
-struct filter_type {
+struct tep_filter_type {
 	int			event_id;
-	struct event_format	*event;
-	struct filter_arg	*filter;
+	struct tep_event_format	*event;
+	struct tep_filter_arg	*filter;
 };
 
 #define TEP_FILTER_ERROR_BUFSZ  1024
 
-struct event_filter {
+struct tep_event_filter {
 	struct tep_handle	*pevent;
 	int			filters;
-	struct filter_type	*event_filters;
+	struct tep_filter_type	*event_filters;
 	char			error_buffer[TEP_FILTER_ERROR_BUFSZ];
 };
 
-struct event_filter *tep_filter_alloc(struct tep_handle *pevent);
+struct tep_event_filter *tep_filter_alloc(struct tep_handle *pevent);
 
 /* for backward compatibility */
 #define FILTER_NONE		TEP_ERRNO__NO_FILTER
@@ -947,45 +733,45 @@ struct event_filter *tep_filter_alloc(struct tep_handle *pevent);
 #define FILTER_MISS		TEP_ERRNO__FILTER_MISS
 #define FILTER_MATCH		TEP_ERRNO__FILTER_MATCH
 
-enum filter_trivial_type {
-	FILTER_TRIVIAL_FALSE,
-	FILTER_TRIVIAL_TRUE,
-	FILTER_TRIVIAL_BOTH,
+enum tep_filter_trivial_type {
+	TEP_FILTER_TRIVIAL_FALSE,
+	TEP_FILTER_TRIVIAL_TRUE,
+	TEP_FILTER_TRIVIAL_BOTH,
 };
 
-enum tep_errno tep_filter_add_filter_str(struct event_filter *filter,
+enum tep_errno tep_filter_add_filter_str(struct tep_event_filter *filter,
 					 const char *filter_str);
 
-enum tep_errno tep_filter_match(struct event_filter *filter,
+enum tep_errno tep_filter_match(struct tep_event_filter *filter,
 				struct tep_record *record);
 
-int tep_filter_strerror(struct event_filter *filter, enum tep_errno err,
+int tep_filter_strerror(struct tep_event_filter *filter, enum tep_errno err,
 			char *buf, size_t buflen);
 
-int tep_event_filtered(struct event_filter *filter,
+int tep_event_filtered(struct tep_event_filter *filter,
 		       int event_id);
 
-void tep_filter_reset(struct event_filter *filter);
+void tep_filter_reset(struct tep_event_filter *filter);
 
-int tep_filter_clear_trivial(struct event_filter *filter,
-			     enum filter_trivial_type type);
+int tep_filter_clear_trivial(struct tep_event_filter *filter,
+			     enum tep_filter_trivial_type type);
 
-void tep_filter_free(struct event_filter *filter);
+void tep_filter_free(struct tep_event_filter *filter);
 
-char *tep_filter_make_string(struct event_filter *filter, int event_id);
+char *tep_filter_make_string(struct tep_event_filter *filter, int event_id);
 
-int tep_filter_remove_event(struct event_filter *filter,
+int tep_filter_remove_event(struct tep_event_filter *filter,
 			    int event_id);
 
-int tep_filter_event_has_trivial(struct event_filter *filter,
+int tep_filter_event_has_trivial(struct tep_event_filter *filter,
 				 int event_id,
-				 enum filter_trivial_type type);
+				 enum tep_filter_trivial_type type);
 
-int tep_filter_copy(struct event_filter *dest, struct event_filter *source);
+int tep_filter_copy(struct tep_event_filter *dest, struct tep_event_filter *source);
 
-int tep_update_trivial(struct event_filter *dest, struct event_filter *source,
-			enum filter_trivial_type type);
+int tep_update_trivial(struct tep_event_filter *dest, struct tep_event_filter *source,
+			enum tep_filter_trivial_type type);
 
-int tep_filter_compare(struct event_filter *filter1, struct event_filter *filter2);
+int tep_filter_compare(struct tep_event_filter *filter1, struct tep_event_filter *filter2);
 
 #endif /* _PARSE_EVENTS_H */
diff --git a/tools/lib/traceevent/event-plugin.c b/tools/lib/traceevent/event-plugin.c
index f17e25097e1e..e74f16c88398 100644
--- a/tools/lib/traceevent/event-plugin.c
+++ b/tools/lib/traceevent/event-plugin.c
@@ -14,7 +14,9 @@
 #include <unistd.h>
 #include <dirent.h>
 #include "event-parse.h"
+#include "event-parse-local.h"
 #include "event-utils.h"
+#include "trace-seq.h"
 
 #define LOCAL_PLUGIN_DIR ".traceevent/plugins"
 
@@ -30,8 +32,8 @@ static struct trace_plugin_options {
 	char				*value;
 } *trace_plugin_options;
 
-struct plugin_list {
-	struct plugin_list	*next;
+struct tep_plugin_list {
+	struct tep_plugin_list	*next;
 	char			*name;
 	void			*handle;
 };
@@ -258,7 +260,7 @@ void tep_plugin_remove_options(struct tep_plugin_option *options)
  */
 void tep_print_plugins(struct trace_seq *s,
 		       const char *prefix, const char *suffix,
-		       const struct plugin_list *list)
+		       const struct tep_plugin_list *list)
 {
 	while (list) {
 		trace_seq_printf(s, "%s%s%s", prefix, list->name, suffix);
@@ -270,9 +272,9 @@ static void
 load_plugin(struct tep_handle *pevent, const char *path,
 	    const char *file, void *data)
 {
-	struct plugin_list **plugin_list = data;
+	struct tep_plugin_list **plugin_list = data;
 	tep_plugin_load_func func;
-	struct plugin_list *list;
+	struct tep_plugin_list *list;
 	const char *alias;
 	char *plugin;
 	void *handle;
@@ -416,20 +418,20 @@ load_plugins(struct tep_handle *pevent, const char *suffix,
 	free(path);
 }
 
-struct plugin_list*
+struct tep_plugin_list*
 tep_load_plugins(struct tep_handle *pevent)
 {
-	struct plugin_list *list = NULL;
+	struct tep_plugin_list *list = NULL;
 
 	load_plugins(pevent, ".so", load_plugin, &list);
 	return list;
 }
 
 void
-tep_unload_plugins(struct plugin_list *plugin_list, struct tep_handle *pevent)
+tep_unload_plugins(struct tep_plugin_list *plugin_list, struct tep_handle *pevent)
 {
 	tep_plugin_unload_func func;
-	struct plugin_list *list;
+	struct tep_plugin_list *list;
 
 	while (plugin_list) {
 		list = plugin_list;
diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c
index e76154c02ee7..ed87cb56713d 100644
--- a/tools/lib/traceevent/parse-filter.c
+++ b/tools/lib/traceevent/parse-filter.c
@@ -11,22 +11,23 @@
 #include <sys/types.h>
 
 #include "event-parse.h"
+#include "event-parse-local.h"
 #include "event-utils.h"
 
 #define COMM "COMM"
 #define CPU "CPU"
 
-static struct format_field comm = {
+static struct tep_format_field comm = {
 	.name = "COMM",
 };
 
-static struct format_field cpu = {
+static struct tep_format_field cpu = {
 	.name = "CPU",
 };
 
 struct event_list {
 	struct event_list	*next;
-	struct event_format	*event;
+	struct tep_event_format	*event;
 };
 
 static void show_error(char *error_buf, const char *fmt, ...)
@@ -61,15 +62,15 @@ static void free_token(char *token)
 	tep_free_token(token);
 }
 
-static enum event_type read_token(char **tok)
+static enum tep_event_type read_token(char **tok)
 {
-	enum event_type type;
+	enum tep_event_type type;
 	char *token = NULL;
 
 	do {
 		free_token(token);
 		type = tep_read_token(&token);
-	} while (type == EVENT_NEWLINE || type == EVENT_SPACE);
+	} while (type == TEP_EVENT_NEWLINE || type == TEP_EVENT_SPACE);
 
 	/* If token is = or ! check to see if the next char is ~ */
 	if (token &&
@@ -79,7 +80,7 @@ static enum event_type read_token(char **tok)
 		*tok = malloc(3);
 		if (*tok == NULL) {
 			free_token(token);
-			return EVENT_ERROR;
+			return TEP_EVENT_ERROR;
 		}
 		sprintf(*tok, "%c%c", *token, '~');
 		free_token(token);
@@ -94,8 +95,8 @@ static enum event_type read_token(char **tok)
 
 static int filter_cmp(const void *a, const void *b)
 {
-	const struct filter_type *ea = a;
-	const struct filter_type *eb = b;
+	const struct tep_filter_type *ea = a;
+	const struct tep_filter_type *eb = b;
 
 	if (ea->event_id < eb->event_id)
 		return -1;
@@ -106,11 +107,11 @@ static int filter_cmp(const void *a, const void *b)
 	return 0;
 }
 
-static struct filter_type *
-find_filter_type(struct event_filter *filter, int id)
+static struct tep_filter_type *
+find_filter_type(struct tep_event_filter *filter, int id)
 {
-	struct filter_type *filter_type;
-	struct filter_type key;
+	struct tep_filter_type *filter_type;
+	struct tep_filter_type key;
 
 	key.event_id = id;
 
@@ -122,10 +123,10 @@ find_filter_type(struct event_filter *filter, int id)
 	return filter_type;
 }
 
-static struct filter_type *
-add_filter_type(struct event_filter *filter, int id)
+static struct tep_filter_type *
+add_filter_type(struct tep_event_filter *filter, int id)
 {
-	struct filter_type *filter_type;
+	struct tep_filter_type *filter_type;
 	int i;
 
 	filter_type = find_filter_type(filter, id);
@@ -165,9 +166,9 @@ add_filter_type(struct event_filter *filter, int id)
  * tep_filter_alloc - create a new event filter
  * @pevent: The pevent that this filter is associated with
  */
-struct event_filter *tep_filter_alloc(struct tep_handle *pevent)
+struct tep_event_filter *tep_filter_alloc(struct tep_handle *pevent)
 {
-	struct event_filter *filter;
+	struct tep_event_filter *filter;
 
 	filter = malloc(sizeof(*filter));
 	if (filter == NULL)
@@ -180,44 +181,44 @@ struct event_filter *tep_filter_alloc(struct tep_handle *pevent)
 	return filter;
 }
 
-static struct filter_arg *allocate_arg(void)
+static struct tep_filter_arg *allocate_arg(void)
 {
-	return calloc(1, sizeof(struct filter_arg));
+	return calloc(1, sizeof(struct tep_filter_arg));
 }
 
-static void free_arg(struct filter_arg *arg)
+static void free_arg(struct tep_filter_arg *arg)
 {
 	if (!arg)
 		return;
 
 	switch (arg->type) {
-	case FILTER_ARG_NONE:
-	case FILTER_ARG_BOOLEAN:
+	case TEP_FILTER_ARG_NONE:
+	case TEP_FILTER_ARG_BOOLEAN:
 		break;
 
-	case FILTER_ARG_NUM:
+	case TEP_FILTER_ARG_NUM:
 		free_arg(arg->num.left);
 		free_arg(arg->num.right);
 		break;
 
-	case FILTER_ARG_EXP:
+	case TEP_FILTER_ARG_EXP:
 		free_arg(arg->exp.left);
 		free_arg(arg->exp.right);
 		break;
 
-	case FILTER_ARG_STR:
+	case TEP_FILTER_ARG_STR:
 		free(arg->str.val);
 		regfree(&arg->str.reg);
 		free(arg->str.buffer);
 		break;
 
-	case FILTER_ARG_VALUE:
-		if (arg->value.type == FILTER_STRING ||
-		    arg->value.type == FILTER_CHAR)
+	case TEP_FILTER_ARG_VALUE:
+		if (arg->value.type == TEP_FILTER_STRING ||
+		    arg->value.type == TEP_FILTER_CHAR)
 			free(arg->value.str);
 		break;
 
-	case FILTER_ARG_OP:
+	case TEP_FILTER_ARG_OP:
 		free_arg(arg->op.left);
 		free_arg(arg->op.right);
 	default:
@@ -228,7 +229,7 @@ static void free_arg(struct filter_arg *arg)
 }
 
 static int add_event(struct event_list **events,
-		      struct event_format *event)
+		     struct tep_event_format *event)
 {
 	struct event_list *list;
 
@@ -242,7 +243,7 @@ static int add_event(struct event_list **events,
 	return 0;
 }
 
-static int event_match(struct event_format *event,
+static int event_match(struct tep_event_format *event,
 		       regex_t *sreg, regex_t *ereg)
 {
 	if (sreg) {
@@ -258,7 +259,7 @@ static enum tep_errno
 find_event(struct tep_handle *pevent, struct event_list **events,
 	   char *sys_name, char *event_name)
 {
-	struct event_format *event;
+	struct tep_event_format *event;
 	regex_t ereg;
 	regex_t sreg;
 	int match = 0;
@@ -333,11 +334,11 @@ static void free_events(struct event_list *events)
 }
 
 static enum tep_errno
-create_arg_item(struct event_format *event, const char *token,
-		enum event_type type, struct filter_arg **parg, char *error_str)
+create_arg_item(struct tep_event_format *event, const char *token,
+		enum tep_event_type type, struct tep_filter_arg **parg, char *error_str)
 {
-	struct format_field *field;
-	struct filter_arg *arg;
+	struct tep_format_field *field;
+	struct tep_filter_arg *arg;
 
 	arg = allocate_arg();
 	if (arg == NULL) {
@@ -347,11 +348,11 @@ create_arg_item(struct event_format *event, const char *token,
 
 	switch (type) {
 
-	case EVENT_SQUOTE:
-	case EVENT_DQUOTE:
-		arg->type = FILTER_ARG_VALUE;
+	case TEP_EVENT_SQUOTE:
+	case TEP_EVENT_DQUOTE:
+		arg->type = TEP_FILTER_ARG_VALUE;
 		arg->value.type =
-			type == EVENT_DQUOTE ? FILTER_STRING : FILTER_CHAR;
+			type == TEP_EVENT_DQUOTE ? TEP_FILTER_STRING : TEP_FILTER_CHAR;
 		arg->value.str = strdup(token);
 		if (!arg->value.str) {
 			free_arg(arg);
@@ -359,11 +360,11 @@ create_arg_item(struct event_format *event, const char *token,
 			return TEP_ERRNO__MEM_ALLOC_FAILED;
 		}
 		break;
-	case EVENT_ITEM:
+	case TEP_EVENT_ITEM:
 		/* if it is a number, then convert it */
 		if (isdigit(token[0])) {
-			arg->type = FILTER_ARG_VALUE;
-			arg->value.type = FILTER_NUMBER;
+			arg->type = TEP_FILTER_ARG_VALUE;
+			arg->value.type = TEP_FILTER_NUMBER;
 			arg->value.val = strtoull(token, NULL, 0);
 			break;
 		}
@@ -377,12 +378,12 @@ create_arg_item(struct event_format *event, const char *token,
 				field = &cpu;
 			} else {
 				/* not a field, Make it false */
-				arg->type = FILTER_ARG_BOOLEAN;
-				arg->boolean.value = FILTER_FALSE;
+				arg->type = TEP_FILTER_ARG_BOOLEAN;
+				arg->boolean.value = TEP_FILTER_FALSE;
 				break;
 			}
 		}
-		arg->type = FILTER_ARG_FIELD;
+		arg->type = TEP_FILTER_ARG_FIELD;
 		arg->field.field = field;
 		break;
 	default:
@@ -394,82 +395,82 @@ create_arg_item(struct event_format *event, const char *token,
 	return 0;
 }
 
-static struct filter_arg *
-create_arg_op(enum filter_op_type btype)
+static struct tep_filter_arg *
+create_arg_op(enum tep_filter_op_type btype)
 {
-	struct filter_arg *arg;
+	struct tep_filter_arg *arg;
 
 	arg = allocate_arg();
 	if (!arg)
 		return NULL;
 
-	arg->type = FILTER_ARG_OP;
+	arg->type = TEP_FILTER_ARG_OP;
 	arg->op.type = btype;
 
 	return arg;
 }
 
-static struct filter_arg *
-create_arg_exp(enum filter_exp_type etype)
+static struct tep_filter_arg *
+create_arg_exp(enum tep_filter_exp_type etype)
 {
-	struct filter_arg *arg;
+	struct tep_filter_arg *arg;
 
 	arg = allocate_arg();
 	if (!arg)
 		return NULL;
 
-	arg->type = FILTER_ARG_EXP;
+	arg->type = TEP_FILTER_ARG_EXP;
 	arg->exp.type = etype;
 
 	return arg;
 }
 
-static struct filter_arg *
-create_arg_cmp(enum filter_cmp_type ctype)
+static struct tep_filter_arg *
+create_arg_cmp(enum tep_filter_cmp_type ctype)
 {
-	struct filter_arg *arg;
+	struct tep_filter_arg *arg;
 
 	arg = allocate_arg();
 	if (!arg)
 		return NULL;
 
 	/* Use NUM and change if necessary */
-	arg->type = FILTER_ARG_NUM;
+	arg->type = TEP_FILTER_ARG_NUM;
 	arg->num.type = ctype;
 
 	return arg;
 }
 
 static enum tep_errno
-add_right(struct filter_arg *op, struct filter_arg *arg, char *error_str)
+add_right(struct tep_filter_arg *op, struct tep_filter_arg *arg, char *error_str)
 {
-	struct filter_arg *left;
+	struct tep_filter_arg *left;
 	char *str;
 	int op_type;
 	int ret;
 
 	switch (op->type) {
-	case FILTER_ARG_EXP:
+	case TEP_FILTER_ARG_EXP:
 		if (op->exp.right)
 			goto out_fail;
 		op->exp.right = arg;
 		break;
 
-	case FILTER_ARG_OP:
+	case TEP_FILTER_ARG_OP:
 		if (op->op.right)
 			goto out_fail;
 		op->op.right = arg;
 		break;
 
-	case FILTER_ARG_NUM:
+	case TEP_FILTER_ARG_NUM:
 		if (op->op.right)
 			goto out_fail;
 		/*
 		 * The arg must be num, str, or field
 		 */
 		switch (arg->type) {
-		case FILTER_ARG_VALUE:
-		case FILTER_ARG_FIELD:
+		case TEP_FILTER_ARG_VALUE:
+		case TEP_FILTER_ARG_FIELD:
 			break;
 		default:
 			show_error(error_str, "Illegal rvalue");
@@ -481,20 +482,20 @@ add_right(struct filter_arg *op, struct filter_arg *arg, char *error_str)
 		 * convert this to a string or regex.
 		 */
 		switch (arg->value.type) {
-		case FILTER_CHAR:
+		case TEP_FILTER_CHAR:
 			/*
 			 * A char should be converted to number if
 			 * the string is 1 byte, and the compare
 			 * is not a REGEX.
 			 */
 			if (strlen(arg->value.str) == 1 &&
-			    op->num.type != FILTER_CMP_REGEX &&
-			    op->num.type != FILTER_CMP_NOT_REGEX) {
-				arg->value.type = FILTER_NUMBER;
+			    op->num.type != TEP_FILTER_CMP_REGEX &&
+			    op->num.type != TEP_FILTER_CMP_NOT_REGEX) {
+				arg->value.type = TEP_FILTER_NUMBER;
 				goto do_int;
 			}
 			/* fall through */
-		case FILTER_STRING:
+		case TEP_FILTER_STRING:
 
 			/* convert op to a string arg */
 			op_type = op->num.type;
@@ -508,16 +509,16 @@ add_right(struct filter_arg *op, struct filter_arg *arg, char *error_str)
 			 * If left arg was a field not found then
 			 * NULL the entire op.
 			 */
-			if (left->type == FILTER_ARG_BOOLEAN) {
+			if (left->type == TEP_FILTER_ARG_BOOLEAN) {
 				free_arg(left);
 				free_arg(arg);
-				op->type = FILTER_ARG_BOOLEAN;
-				op->boolean.value = FILTER_FALSE;
+				op->type = TEP_FILTER_ARG_BOOLEAN;
+				op->boolean.value = TEP_FILTER_FALSE;
 				break;
 			}
 
 			/* Left arg must be a field */
-			if (left->type != FILTER_ARG_FIELD) {
+			if (left->type != TEP_FILTER_ARG_FIELD) {
 				show_error(error_str,
 					   "Illegal lvalue for string comparison");
 				return TEP_ERRNO__ILLEGAL_LVALUE;
@@ -525,15 +526,15 @@ add_right(struct filter_arg *op, struct filter_arg *arg, char *error_str)
 
 			/* Make sure this is a valid string compare */
 			switch (op_type) {
-			case FILTER_CMP_EQ:
-				op_type = FILTER_CMP_MATCH;
+			case TEP_FILTER_CMP_EQ:
+				op_type = TEP_FILTER_CMP_MATCH;
 				break;
-			case FILTER_CMP_NE:
-				op_type = FILTER_CMP_NOT_MATCH;
+			case TEP_FILTER_CMP_NE:
+				op_type = TEP_FILTER_CMP_NOT_MATCH;
 				break;
 
-			case FILTER_CMP_REGEX:
-			case FILTER_CMP_NOT_REGEX:
+			case TEP_FILTER_CMP_REGEX:
+			case TEP_FILTER_CMP_NOT_REGEX:
 				ret = regcomp(&op->str.reg, str, REG_ICASE|REG_NOSUB);
 				if (ret) {
 					show_error(error_str,
@@ -548,7 +549,7 @@ add_right(struct filter_arg *op, struct filter_arg *arg, char *error_str)
 				return TEP_ERRNO__ILLEGAL_STRING_CMP;
 			}
 
-			op->type = FILTER_ARG_STR;
+			op->type = TEP_FILTER_ARG_STR;
 			op->str.type = op_type;
 			op->str.field = left->field.field;
 			op->str.val = strdup(str);
@@ -573,12 +574,12 @@ add_right(struct filter_arg *op, struct filter_arg *arg, char *error_str)
 
 			break;
 
-		case FILTER_NUMBER:
+		case TEP_FILTER_NUMBER:
 
  do_int:
 			switch (op->num.type) {
-			case FILTER_CMP_REGEX:
-			case FILTER_CMP_NOT_REGEX:
+			case TEP_FILTER_CMP_REGEX:
+			case TEP_FILTER_CMP_NOT_REGEX:
 				show_error(error_str,
 					   "Op not allowed with integers");
 				return TEP_ERRNO__ILLEGAL_INTEGER_CMP;
@@ -605,35 +606,35 @@ add_right(struct filter_arg *op, struct filter_arg *arg, char *error_str)
 	return TEP_ERRNO__SYNTAX_ERROR;
 }
 
-static struct filter_arg *
-rotate_op_right(struct filter_arg *a, struct filter_arg *b)
+static struct tep_filter_arg *
+rotate_op_right(struct tep_filter_arg *a, struct tep_filter_arg *b)
 {
-	struct filter_arg *arg;
+	struct tep_filter_arg *arg;
 
 	arg = a->op.right;
 	a->op.right = b;
 	return arg;
 }
 
-static enum tep_errno add_left(struct filter_arg *op, struct filter_arg *arg)
+static enum tep_errno add_left(struct tep_filter_arg *op, struct tep_filter_arg *arg)
 {
 	switch (op->type) {
-	case FILTER_ARG_EXP:
-		if (arg->type == FILTER_ARG_OP)
+	case TEP_FILTER_ARG_EXP:
+		if (arg->type == TEP_FILTER_ARG_OP)
 			arg = rotate_op_right(arg, op);
 		op->exp.left = arg;
 		break;
 
-	case FILTER_ARG_OP:
+	case TEP_FILTER_ARG_OP:
 		op->op.left = arg;
 		break;
-	case FILTER_ARG_NUM:
-		if (arg->type == FILTER_ARG_OP)
+	case TEP_FILTER_ARG_NUM:
+		if (arg->type == TEP_FILTER_ARG_OP)
 			arg = rotate_op_right(arg, op);
 
 		/* left arg of compares must be a field */
-		if (arg->type != FILTER_ARG_FIELD &&
-		    arg->type != FILTER_ARG_BOOLEAN)
+		if (arg->type != TEP_FILTER_ARG_FIELD &&
+		    arg->type != TEP_FILTER_ARG_BOOLEAN)
 			return TEP_ERRNO__INVALID_ARG_TYPE;
 		op->num.left = arg;
 		break;
@@ -652,91 +653,91 @@ enum op_type {
 };
 
 static enum op_type process_op(const char *token,
-			       enum filter_op_type *btype,
-			       enum filter_cmp_type *ctype,
-			       enum filter_exp_type *etype)
+			       enum tep_filter_op_type *btype,
+			       enum tep_filter_cmp_type *ctype,
+			       enum tep_filter_exp_type *etype)
 {
-	*btype = FILTER_OP_NOT;
-	*etype = FILTER_EXP_NONE;
-	*ctype = FILTER_CMP_NONE;
+	*btype = TEP_FILTER_OP_NOT;
+	*etype = TEP_FILTER_EXP_NONE;
+	*ctype = TEP_FILTER_CMP_NONE;
 
 	if (strcmp(token, "&&") == 0)
-		*btype = FILTER_OP_AND;
+		*btype = TEP_FILTER_OP_AND;
 	else if (strcmp(token, "||") == 0)
-		*btype = FILTER_OP_OR;
+		*btype = TEP_FILTER_OP_OR;
 	else if (strcmp(token, "!") == 0)
 		return OP_NOT;
 
-	if (*btype != FILTER_OP_NOT)
+	if (*btype != TEP_FILTER_OP_NOT)
 		return OP_BOOL;
 
 	/* Check for value expressions */
 	if (strcmp(token, "+") == 0) {
-		*etype = FILTER_EXP_ADD;
+		*etype = TEP_FILTER_EXP_ADD;
 	} else if (strcmp(token, "-") == 0) {
-		*etype = FILTER_EXP_SUB;
+		*etype = TEP_FILTER_EXP_SUB;
 	} else if (strcmp(token, "*") == 0) {
-		*etype = FILTER_EXP_MUL;
+		*etype = TEP_FILTER_EXP_MUL;
 	} else if (strcmp(token, "/") == 0) {
-		*etype = FILTER_EXP_DIV;
+		*etype = TEP_FILTER_EXP_DIV;
 	} else if (strcmp(token, "%") == 0) {
-		*etype = FILTER_EXP_MOD;
+		*etype = TEP_FILTER_EXP_MOD;
 	} else if (strcmp(token, ">>") == 0) {
-		*etype = FILTER_EXP_RSHIFT;
+		*etype = TEP_FILTER_EXP_RSHIFT;
 	} else if (strcmp(token, "<<") == 0) {
-		*etype = FILTER_EXP_LSHIFT;
+		*etype = TEP_FILTER_EXP_LSHIFT;
 	} else if (strcmp(token, "&") == 0) {
-		*etype = FILTER_EXP_AND;
+		*etype = TEP_FILTER_EXP_AND;
 	} else if (strcmp(token, "|") == 0) {
-		*etype = FILTER_EXP_OR;
+		*etype = TEP_FILTER_EXP_OR;
 	} else if (strcmp(token, "^") == 0) {
-		*etype = FILTER_EXP_XOR;
+		*etype = TEP_FILTER_EXP_XOR;
 	} else if (strcmp(token, "~") == 0)
-		*etype = FILTER_EXP_NOT;
+		*etype = TEP_FILTER_EXP_NOT;
 
-	if (*etype != FILTER_EXP_NONE)
+	if (*etype != TEP_FILTER_EXP_NONE)
 		return OP_EXP;
 
 	/* Check for compares */
 	if (strcmp(token, "==") == 0)
-		*ctype = FILTER_CMP_EQ;
+		*ctype = TEP_FILTER_CMP_EQ;
 	else if (strcmp(token, "!=") == 0)
-		*ctype = FILTER_CMP_NE;
+		*ctype = TEP_FILTER_CMP_NE;
 	else if (strcmp(token, "<") == 0)
-		*ctype = FILTER_CMP_LT;
+		*ctype = TEP_FILTER_CMP_LT;
 	else if (strcmp(token, ">") == 0)
-		*ctype = FILTER_CMP_GT;
+		*ctype = TEP_FILTER_CMP_GT;
 	else if (strcmp(token, "<=") == 0)
-		*ctype = FILTER_CMP_LE;
+		*ctype = TEP_FILTER_CMP_LE;
 	else if (strcmp(token, ">=") == 0)
-		*ctype = FILTER_CMP_GE;
+		*ctype = TEP_FILTER_CMP_GE;
 	else if (strcmp(token, "=~") == 0)
-		*ctype = FILTER_CMP_REGEX;
+		*ctype = TEP_FILTER_CMP_REGEX;
 	else if (strcmp(token, "!~") == 0)
-		*ctype = FILTER_CMP_NOT_REGEX;
+		*ctype = TEP_FILTER_CMP_NOT_REGEX;
 	else
 		return OP_NONE;
 
 	return OP_CMP;
 }
 
-static int check_op_done(struct filter_arg *arg)
+static int check_op_done(struct tep_filter_arg *arg)
 {
 	switch (arg->type) {
-	case FILTER_ARG_EXP:
+	case TEP_FILTER_ARG_EXP:
 		return arg->exp.right != NULL;
 
-	case FILTER_ARG_OP:
+	case TEP_FILTER_ARG_OP:
 		return arg->op.right != NULL;
 
-	case FILTER_ARG_NUM:
+	case TEP_FILTER_ARG_NUM:
 		return arg->num.right != NULL;
 
-	case FILTER_ARG_STR:
+	case TEP_FILTER_ARG_STR:
 		/* A string conversion is always done */
 		return 1;
 
-	case FILTER_ARG_BOOLEAN:
+	case TEP_FILTER_ARG_BOOLEAN:
 		/* field not found, is ok */
 		return 1;
 
@@ -752,14 +753,14 @@ enum filter_vals {
 };
 
 static enum tep_errno
-reparent_op_arg(struct filter_arg *parent, struct filter_arg *old_child,
-		struct filter_arg *arg, char *error_str)
+reparent_op_arg(struct tep_filter_arg *parent, struct tep_filter_arg *old_child,
+		struct tep_filter_arg *arg, char *error_str)
 {
-	struct filter_arg *other_child;
-	struct filter_arg **ptr;
+	struct tep_filter_arg *other_child;
+	struct tep_filter_arg **ptr;
 
-	if (parent->type != FILTER_ARG_OP &&
-	    arg->type != FILTER_ARG_OP) {
+	if (parent->type != TEP_FILTER_ARG_OP &&
+	    arg->type != TEP_FILTER_ARG_OP) {
 		show_error(error_str, "can not reparent other than OP");
 		return TEP_ERRNO__REPARENT_NOT_OP;
 	}
@@ -804,7 +805,7 @@ reparent_op_arg(struct filter_arg *parent, struct filter_arg *old_child,
 }
 
 /* Returns either filter_vals (success) or tep_errno (failfure) */
-static int test_arg(struct filter_arg *parent, struct filter_arg *arg,
+static int test_arg(struct tep_filter_arg *parent, struct tep_filter_arg *arg,
 		    char *error_str)
 {
 	int lval, rval;
@@ -812,16 +813,16 @@ static int test_arg(struct filter_arg *parent, struct filter_arg *arg,
 	switch (arg->type) {
 
 		/* bad case */
-	case FILTER_ARG_BOOLEAN:
+	case TEP_FILTER_ARG_BOOLEAN:
 		return FILTER_VAL_FALSE + arg->boolean.value;
 
 		/* good cases: */
-	case FILTER_ARG_STR:
-	case FILTER_ARG_VALUE:
-	case FILTER_ARG_FIELD:
+	case TEP_FILTER_ARG_STR:
+	case TEP_FILTER_ARG_VALUE:
+	case TEP_FILTER_ARG_FIELD:
 		return FILTER_VAL_NORM;
 
-	case FILTER_ARG_EXP:
+	case TEP_FILTER_ARG_EXP:
 		lval = test_arg(arg, arg->exp.left, error_str);
 		if (lval != FILTER_VAL_NORM)
 			return lval;
@@ -830,7 +831,7 @@ static int test_arg(struct filter_arg *parent, struct filter_arg *arg,
 			return rval;
 		return FILTER_VAL_NORM;
 
-	case FILTER_ARG_NUM:
+	case TEP_FILTER_ARG_NUM:
 		lval = test_arg(arg, arg->num.left, error_str);
 		if (lval != FILTER_VAL_NORM)
 			return lval;
@@ -839,14 +840,14 @@ static int test_arg(struct filter_arg *parent, struct filter_arg *arg,
 			return rval;
 		return FILTER_VAL_NORM;
 
-	case FILTER_ARG_OP:
-		if (arg->op.type != FILTER_OP_NOT) {
+	case TEP_FILTER_ARG_OP:
+		if (arg->op.type != TEP_FILTER_OP_NOT) {
 			lval = test_arg(arg, arg->op.left, error_str);
 			switch (lval) {
 			case FILTER_VAL_NORM:
 				break;
 			case FILTER_VAL_TRUE:
-				if (arg->op.type == FILTER_OP_OR)
+				if (arg->op.type == TEP_FILTER_OP_OR)
 					return FILTER_VAL_TRUE;
 				rval = test_arg(arg, arg->op.right, error_str);
 				if (rval != FILTER_VAL_NORM)
@@ -856,7 +857,7 @@ static int test_arg(struct filter_arg *parent, struct filter_arg *arg,
 						       error_str);
 
 			case FILTER_VAL_FALSE:
-				if (arg->op.type == FILTER_OP_AND)
+				if (arg->op.type == TEP_FILTER_OP_AND)
 					return FILTER_VAL_FALSE;
 				rval = test_arg(arg, arg->op.right, error_str);
 				if (rval != FILTER_VAL_NORM)
@@ -877,18 +878,18 @@ static int test_arg(struct filter_arg *parent, struct filter_arg *arg,
 			break;
 
 		case FILTER_VAL_TRUE:
-			if (arg->op.type == FILTER_OP_OR)
+			if (arg->op.type == TEP_FILTER_OP_OR)
 				return FILTER_VAL_TRUE;
-			if (arg->op.type == FILTER_OP_NOT)
+			if (arg->op.type == TEP_FILTER_OP_NOT)
 				return FILTER_VAL_FALSE;
 
 			return reparent_op_arg(parent, arg, arg->op.left,
 					       error_str);
 
 		case FILTER_VAL_FALSE:
-			if (arg->op.type == FILTER_OP_AND)
+			if (arg->op.type == TEP_FILTER_OP_AND)
 				return FILTER_VAL_FALSE;
-			if (arg->op.type == FILTER_OP_NOT)
+			if (arg->op.type == TEP_FILTER_OP_NOT)
 				return FILTER_VAL_TRUE;
 
 			return reparent_op_arg(parent, arg, arg->op.left,
@@ -904,8 +905,8 @@ static int test_arg(struct filter_arg *parent, struct filter_arg *arg,
 }
 
 /* Remove any unknown event fields */
-static int collapse_tree(struct filter_arg *arg,
-			 struct filter_arg **arg_collapsed, char *error_str)
+static int collapse_tree(struct tep_filter_arg *arg,
+			 struct tep_filter_arg **arg_collapsed, char *error_str)
 {
 	int ret;
 
@@ -919,7 +920,7 @@ static int collapse_tree(struct filter_arg *arg,
 		free_arg(arg);
 		arg = allocate_arg();
 		if (arg) {
-			arg->type = FILTER_ARG_BOOLEAN;
+			arg->type = TEP_FILTER_ARG_BOOLEAN;
 			arg->boolean.value = ret == FILTER_VAL_TRUE;
 		} else {
 			show_error(error_str, "Failed to allocate filter arg");
@@ -939,19 +940,19 @@ static int collapse_tree(struct filter_arg *arg,
 }
 
 static enum tep_errno
-process_filter(struct event_format *event, struct filter_arg **parg,
+process_filter(struct tep_event_format *event, struct tep_filter_arg **parg,
 	       char *error_str, int not)
 {
-	enum event_type type;
+	enum tep_event_type type;
 	char *token = NULL;
-	struct filter_arg *current_op = NULL;
-	struct filter_arg *current_exp = NULL;
-	struct filter_arg *left_item = NULL;
-	struct filter_arg *arg = NULL;
+	struct tep_filter_arg *current_op = NULL;
+	struct tep_filter_arg *current_exp = NULL;
+	struct tep_filter_arg *left_item = NULL;
+	struct tep_filter_arg *arg = NULL;
 	enum op_type op_type;
-	enum filter_op_type btype;
-	enum filter_exp_type etype;
-	enum filter_cmp_type ctype;
+	enum tep_filter_op_type btype;
+	enum tep_filter_exp_type etype;
+	enum tep_filter_cmp_type ctype;
 	enum tep_errno ret;
 
 	*parg = NULL;
@@ -960,9 +961,9 @@ process_filter(struct event_format *event, struct filter_arg **parg,
 		free(token);
 		type = read_token(&token);
 		switch (type) {
-		case EVENT_SQUOTE:
-		case EVENT_DQUOTE:
-		case EVENT_ITEM:
+		case TEP_EVENT_SQUOTE:
+		case TEP_EVENT_DQUOTE:
+		case TEP_EVENT_ITEM:
 			ret = create_arg_item(event, token, type, &arg, error_str);
 			if (ret < 0)
 				goto fail;
@@ -987,7 +988,7 @@ process_filter(struct event_format *event, struct filter_arg **parg,
 			arg = NULL;
 			break;
 
-		case EVENT_DELIM:
+		case TEP_EVENT_DELIM:
 			if (*token == ',') {
 				show_error(error_str, "Illegal token ','");
 				ret = TEP_ERRNO__ILLEGAL_TOKEN;
@@ -1054,7 +1055,7 @@ process_filter(struct event_format *event, struct filter_arg **parg,
 			}
 			break;
 
-		case EVENT_OP:
+		case TEP_EVENT_OP:
 			op_type = process_op(token, &btype, &ctype, &etype);
 
 			/* All expect a left arg except for NOT */
@@ -1139,14 +1140,14 @@ process_filter(struct event_format *event, struct filter_arg **parg,
 			if (ret < 0)
 				goto fail_syntax;
 			break;
-		case EVENT_NONE:
+		case TEP_EVENT_NONE:
 			break;
-		case EVENT_ERROR:
+		case TEP_EVENT_ERROR:
 			goto fail_alloc;
 		default:
 			goto fail_syntax;
 		}
-	} while (type != EVENT_NONE);
+	} while (type != TEP_EVENT_NONE);
 
 	if (!current_op && !current_exp)
 		goto fail_syntax;
@@ -1179,8 +1180,8 @@ process_filter(struct event_format *event, struct filter_arg **parg,
 }
 
 static enum tep_errno
-process_event(struct event_format *event, const char *filter_str,
-	      struct filter_arg **parg, char *error_str)
+process_event(struct tep_event_format *event, const char *filter_str,
+	      struct tep_filter_arg **parg, char *error_str)
 {
 	int ret;
 
@@ -1196,19 +1197,19 @@ process_event(struct event_format *event, const char *filter_str,
 		if (*parg == NULL)
 			return TEP_ERRNO__MEM_ALLOC_FAILED;
 
-		(*parg)->type = FILTER_ARG_BOOLEAN;
-		(*parg)->boolean.value = FILTER_FALSE;
+		(*parg)->type = TEP_FILTER_ARG_BOOLEAN;
+		(*parg)->boolean.value = TEP_FILTER_FALSE;
 	}
 
 	return 0;
 }
 
 static enum tep_errno
-filter_event(struct event_filter *filter, struct event_format *event,
+filter_event(struct tep_event_filter *filter, struct tep_event_format *event,
 	     const char *filter_str, char *error_str)
 {
-	struct filter_type *filter_type;
-	struct filter_arg *arg;
+	struct tep_filter_type *filter_type;
+	struct tep_filter_arg *arg;
 	enum tep_errno ret;
 
 	if (filter_str) {
@@ -1222,8 +1223,8 @@ filter_event(struct event_filter *filter, struct event_format *event,
 		if (arg == NULL)
 			return TEP_ERRNO__MEM_ALLOC_FAILED;
 
-		arg->type = FILTER_ARG_BOOLEAN;
-		arg->boolean.value = FILTER_TRUE;
+		arg->type = TEP_FILTER_ARG_BOOLEAN;
+		arg->boolean.value = TEP_FILTER_TRUE;
 	}
 
 	filter_type = add_filter_type(filter, event->id);
@@ -1237,7 +1238,7 @@ filter_event(struct event_filter *filter, struct event_format *event,
 	return 0;
 }
 
-static void filter_init_error_buf(struct event_filter *filter)
+static void filter_init_error_buf(struct tep_event_filter *filter)
 {
 	/* clear buffer to reset show error */
 	tep_buffer_init("", 0);
@@ -1253,7 +1254,7 @@ static void filter_init_error_buf(struct event_filter *filter)
  * negative error code.  Use tep_filter_strerror() to see
  * actual error message in case of error.
  */
-enum tep_errno tep_filter_add_filter_str(struct event_filter *filter,
+enum tep_errno tep_filter_add_filter_str(struct tep_event_filter *filter,
 					 const char *filter_str)
 {
 	struct tep_handle *pevent = filter->pevent;
@@ -1351,7 +1352,7 @@ enum tep_errno tep_filter_add_filter_str(struct event_filter *filter,
 	return rtn;
 }
 
-static void free_filter_type(struct filter_type *filter_type)
+static void free_filter_type(struct tep_filter_type *filter_type)
 {
 	free_arg(filter_type->filter);
 }
@@ -1365,7 +1366,7 @@ static void free_filter_type(struct filter_type *filter_type)
  *
  * Returns 0 if message was filled successfully, -1 if error
  */
-int tep_filter_strerror(struct event_filter *filter, enum tep_errno err,
+int tep_filter_strerror(struct tep_event_filter *filter, enum tep_errno err,
 			char *buf, size_t buflen)
 {
 	if (err <= __TEP_ERRNO__START || err >= __TEP_ERRNO__END)
@@ -1393,10 +1394,10 @@ int tep_filter_strerror(struct event_filter *filter, enum tep_errno err,
  * Returns 1: if an event was removed
  *   0: if the event was not found
  */
-int tep_filter_remove_event(struct event_filter *filter,
+int tep_filter_remove_event(struct tep_event_filter *filter,
 			    int event_id)
 {
-	struct filter_type *filter_type;
+	struct tep_filter_type *filter_type;
 	unsigned long len;
 
 	if (!filter->filters)
@@ -1428,7 +1429,7 @@ int tep_filter_remove_event(struct event_filter *filter,
  *
  * Removes all filters from a filter and resets it.
  */
-void tep_filter_reset(struct event_filter *filter)
+void tep_filter_reset(struct tep_event_filter *filter)
 {
 	int i;
 
@@ -1440,7 +1441,7 @@ void tep_filter_reset(struct event_filter *filter)
 	filter->event_filters = NULL;
 }
 
-void tep_filter_free(struct event_filter *filter)
+void tep_filter_free(struct tep_event_filter *filter)
 {
 	tep_unref(filter->pevent);
 
@@ -1449,14 +1450,14 @@ void tep_filter_free(struct event_filter *filter)
 	free(filter);
 }
 
-static char *arg_to_str(struct event_filter *filter, struct filter_arg *arg);
+static char *arg_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg);
 
-static int copy_filter_type(struct event_filter *filter,
-			     struct event_filter *source,
-			     struct filter_type *filter_type)
+static int copy_filter_type(struct tep_event_filter *filter,
+			    struct tep_event_filter *source,
+			    struct tep_filter_type *filter_type)
 {
-	struct filter_arg *arg;
-	struct event_format *event;
+	struct tep_filter_arg *arg;
+	struct tep_event_format *event;
 	const char *sys;
 	const char *name;
 	char *str;
@@ -1478,7 +1479,7 @@ static int copy_filter_type(struct event_filter *filter,
 		if (arg == NULL)
 			return -1;
 
-		arg->type = FILTER_ARG_BOOLEAN;
+		arg->type = TEP_FILTER_ARG_BOOLEAN;
 		if (strcmp(str, "TRUE") == 0)
 			arg->boolean.value = 1;
 		else
@@ -1507,7 +1508,7 @@ static int copy_filter_type(struct event_filter *filter,
  *
  * Returns 0 on success and -1 if not all filters were copied
  */
-int tep_filter_copy(struct event_filter *dest, struct event_filter *source)
+int tep_filter_copy(struct tep_event_filter *dest, struct tep_event_filter *source)
 {
 	int ret = 0;
 	int i;
@@ -1533,14 +1534,14 @@ int tep_filter_copy(struct event_filter *dest, struct event_filter *source)
  * Returns 0 on success and -1 if there was a problem updating, but
  *   events may have still been updated on error.
  */
-int tep_update_trivial(struct event_filter *dest, struct event_filter *source,
-		       enum filter_trivial_type type)
+int tep_update_trivial(struct tep_event_filter *dest, struct tep_event_filter *source,
+		       enum tep_filter_trivial_type type)
 {
 	struct tep_handle *src_pevent;
 	struct tep_handle *dest_pevent;
-	struct event_format *event;
-	struct filter_type *filter_type;
-	struct filter_arg *arg;
+	struct tep_event_format *event;
+	struct tep_filter_type *filter_type;
+	struct tep_filter_arg *arg;
 	char *str;
 	int i;
 
@@ -1554,10 +1555,10 @@ int tep_update_trivial(struct event_filter *dest, struct event_filter *source,
 	for (i = 0; i < dest->filters; i++) {
 		filter_type = &dest->event_filters[i];
 		arg = filter_type->filter;
-		if (arg->type != FILTER_ARG_BOOLEAN)
+		if (arg->type != TEP_FILTER_ARG_BOOLEAN)
 			continue;
-		if ((arg->boolean.value && type == FILTER_TRIVIAL_FALSE) ||
-		    (!arg->boolean.value && type == FILTER_TRIVIAL_TRUE))
+		if ((arg->boolean.value && type == TEP_FILTER_TRIVIAL_FALSE) ||
+		    (!arg->boolean.value && type == TEP_FILTER_TRIVIAL_TRUE))
 			continue;
 
 		event = filter_type->event;
@@ -1592,10 +1593,10 @@ int tep_update_trivial(struct event_filter *dest, struct event_filter *source,
  *
  * Returns 0 on success and -1 if there was a problem.
  */
-int tep_filter_clear_trivial(struct event_filter *filter,
-			     enum filter_trivial_type type)
+int tep_filter_clear_trivial(struct tep_event_filter *filter,
+			     enum tep_filter_trivial_type type)
 {
-	struct filter_type *filter_type;
+	struct tep_filter_type *filter_type;
 	int count = 0;
 	int *ids = NULL;
 	int i;
@@ -1611,14 +1612,14 @@ int tep_filter_clear_trivial(struct event_filter *filter,
 		int *new_ids;
 
 		filter_type = &filter->event_filters[i];
-		if (filter_type->filter->type != FILTER_ARG_BOOLEAN)
+		if (filter_type->filter->type != TEP_FILTER_ARG_BOOLEAN)
 			continue;
 		switch (type) {
-		case FILTER_TRIVIAL_FALSE:
+		case TEP_FILTER_TRIVIAL_FALSE:
 			if (filter_type->filter->boolean.value)
 				continue;
 			break;
-		case FILTER_TRIVIAL_TRUE:
+		case TEP_FILTER_TRIVIAL_TRUE:
 			if (!filter_type->filter->boolean.value)
 				continue;
 		default:
@@ -1654,11 +1655,11 @@ int tep_filter_clear_trivial(struct event_filter *filter,
  * Returns 1 if the event contains a matching trivial type
  *  otherwise 0.
  */
-int tep_filter_event_has_trivial(struct event_filter *filter,
+int tep_filter_event_has_trivial(struct tep_event_filter *filter,
 				 int event_id,
-				 enum filter_trivial_type type)
+				 enum tep_filter_trivial_type type)
 {
-	struct filter_type *filter_type;
+	struct tep_filter_type *filter_type;
 
 	if (!filter->filters)
 		return 0;
@@ -1668,25 +1669,25 @@ int tep_filter_event_has_trivial(struct event_filter *filter,
 	if (!filter_type)
 		return 0;
 
-	if (filter_type->filter->type != FILTER_ARG_BOOLEAN)
+	if (filter_type->filter->type != TEP_FILTER_ARG_BOOLEAN)
 		return 0;
 
 	switch (type) {
-	case FILTER_TRIVIAL_FALSE:
+	case TEP_FILTER_TRIVIAL_FALSE:
 		return !filter_type->filter->boolean.value;
 
-	case FILTER_TRIVIAL_TRUE:
+	case TEP_FILTER_TRIVIAL_TRUE:
 		return filter_type->filter->boolean.value;
 	default:
 		return 1;
 	}
 }
 
-static int test_filter(struct event_format *event, struct filter_arg *arg,
+static int test_filter(struct tep_event_format *event, struct tep_filter_arg *arg,
 		       struct tep_record *record, enum tep_errno *err);
 
 static const char *
-get_comm(struct event_format *event, struct tep_record *record)
+get_comm(struct tep_event_format *event, struct tep_record *record)
 {
 	const char *comm;
 	int pid;
@@ -1697,8 +1698,8 @@ get_comm(struct event_format *event, struct tep_record *record)
 }
 
 static unsigned long long
-get_value(struct event_format *event,
-	  struct format_field *field, struct tep_record *record)
+get_value(struct tep_event_format *event,
+	  struct tep_format_field *field, struct tep_record *record)
 {
 	unsigned long long val;
 
@@ -1716,7 +1717,7 @@ get_value(struct event_format *event,
 
 	tep_read_number_field(field, record->data, &val);
 
-	if (!(field->flags & FIELD_IS_SIGNED))
+	if (!(field->flags & TEP_FIELD_IS_SIGNED))
 		return val;
 
 	switch (field->size) {
@@ -1733,11 +1734,11 @@ get_value(struct event_format *event,
 }
 
 static unsigned long long
-get_arg_value(struct event_format *event, struct filter_arg *arg,
+get_arg_value(struct tep_event_format *event, struct tep_filter_arg *arg,
 	      struct tep_record *record, enum tep_errno *err);
 
 static unsigned long long
-get_exp_value(struct event_format *event, struct filter_arg *arg,
+get_exp_value(struct tep_event_format *event, struct tep_filter_arg *arg,
 	      struct tep_record *record, enum tep_errno *err)
 {
 	unsigned long long lval, rval;
@@ -1753,37 +1754,37 @@ get_exp_value(struct event_format *event, struct filter_arg *arg,
 	}
 
 	switch (arg->exp.type) {
-	case FILTER_EXP_ADD:
+	case TEP_FILTER_EXP_ADD:
 		return lval + rval;
 
-	case FILTER_EXP_SUB:
+	case TEP_FILTER_EXP_SUB:
 		return lval - rval;
 
-	case FILTER_EXP_MUL:
+	case TEP_FILTER_EXP_MUL:
 		return lval * rval;
 
-	case FILTER_EXP_DIV:
+	case TEP_FILTER_EXP_DIV:
 		return lval / rval;
 
-	case FILTER_EXP_MOD:
+	case TEP_FILTER_EXP_MOD:
 		return lval % rval;
 
-	case FILTER_EXP_RSHIFT:
+	case TEP_FILTER_EXP_RSHIFT:
 		return lval >> rval;
 
-	case FILTER_EXP_LSHIFT:
+	case TEP_FILTER_EXP_LSHIFT:
 		return lval << rval;
 
-	case FILTER_EXP_AND:
+	case TEP_FILTER_EXP_AND:
 		return lval & rval;
 
-	case FILTER_EXP_OR:
+	case TEP_FILTER_EXP_OR:
 		return lval | rval;
 
-	case FILTER_EXP_XOR:
+	case TEP_FILTER_EXP_XOR:
 		return lval ^ rval;
 
-	case FILTER_EXP_NOT:
+	case TEP_FILTER_EXP_NOT:
 	default:
 		if (!*err)
 			*err = TEP_ERRNO__INVALID_EXP_TYPE;
@@ -1792,21 +1793,21 @@ get_exp_value(struct event_format *event, struct filter_arg *arg,
 }
 
 static unsigned long long
-get_arg_value(struct event_format *event, struct filter_arg *arg,
+get_arg_value(struct tep_event_format *event, struct tep_filter_arg *arg,
 	      struct tep_record *record, enum tep_errno *err)
 {
 	switch (arg->type) {
-	case FILTER_ARG_FIELD:
+	case TEP_FILTER_ARG_FIELD:
 		return get_value(event, arg->field.field, record);
 
-	case FILTER_ARG_VALUE:
-		if (arg->value.type != FILTER_NUMBER) {
+	case TEP_FILTER_ARG_VALUE:
+		if (arg->value.type != TEP_FILTER_NUMBER) {
 			if (!*err)
 				*err = TEP_ERRNO__NOT_A_NUMBER;
 		}
 		return arg->value.val;
 
-	case FILTER_ARG_EXP:
+	case TEP_FILTER_ARG_EXP:
 		return get_exp_value(event, arg, record, err);
 
 	default:
@@ -1816,7 +1817,7 @@ get_arg_value(struct event_format *event, struct filter_arg *arg,
 	return 0;
 }
 
-static int test_num(struct event_format *event, struct filter_arg *arg,
+static int test_num(struct tep_event_format *event, struct tep_filter_arg *arg,
 		    struct tep_record *record, enum tep_errno *err)
 {
 	unsigned long long lval, rval;
@@ -1832,22 +1833,22 @@ static int test_num(struct event_format *event, struct filter_arg *arg,
 	}
 
 	switch (arg->num.type) {
-	case FILTER_CMP_EQ:
+	case TEP_FILTER_CMP_EQ:
 		return lval == rval;
 
-	case FILTER_CMP_NE:
+	case TEP_FILTER_CMP_NE:
 		return lval != rval;
 
-	case FILTER_CMP_GT:
+	case TEP_FILTER_CMP_GT:
 		return lval > rval;
 
-	case FILTER_CMP_LT:
+	case TEP_FILTER_CMP_LT:
 		return lval < rval;
 
-	case FILTER_CMP_GE:
+	case TEP_FILTER_CMP_GE:
 		return lval >= rval;
 
-	case FILTER_CMP_LE:
+	case TEP_FILTER_CMP_LE:
 		return lval <= rval;
 
 	default:
@@ -1857,9 +1858,9 @@ static int test_num(struct event_format *event, struct filter_arg *arg,
 	}
 }
 
-static const char *get_field_str(struct filter_arg *arg, struct tep_record *record)
+static const char *get_field_str(struct tep_filter_arg *arg, struct tep_record *record)
 {
-	struct event_format *event;
+	struct tep_event_format *event;
 	struct tep_handle *pevent;
 	unsigned long long addr;
 	const char *val = NULL;
@@ -1867,11 +1868,11 @@ static const char *get_field_str(struct filter_arg *arg, struct tep_record *reco
 	char hex[64];
 
 	/* If the field is not a string convert it */
-	if (arg->str.field->flags & FIELD_IS_STRING) {
+	if (arg->str.field->flags & TEP_FIELD_IS_STRING) {
 		val = record->data + arg->str.field->offset;
 		size = arg->str.field->size;
 
-		if (arg->str.field->flags & FIELD_IS_DYNAMIC) {
+		if (arg->str.field->flags & TEP_FIELD_IS_DYNAMIC) {
 			addr = *(unsigned int *)val;
 			val = record->data + (addr & 0xffff);
 			size = addr >> 16;
@@ -1893,7 +1894,7 @@ static const char *get_field_str(struct filter_arg *arg, struct tep_record *reco
 		pevent = event->pevent;
 		addr = get_value(event, arg->str.field, record);
 
-		if (arg->str.field->flags & (FIELD_IS_POINTER | FIELD_IS_LONG))
+		if (arg->str.field->flags & (TEP_FIELD_IS_POINTER | TEP_FIELD_IS_LONG))
 			/* convert to a kernel symbol */
 			val = tep_find_function(pevent, addr);
 
@@ -1907,7 +1908,7 @@ static const char *get_field_str(struct filter_arg *arg, struct tep_record *reco
 	return val;
 }
 
-static int test_str(struct event_format *event, struct filter_arg *arg,
+static int test_str(struct tep_event_format *event, struct tep_filter_arg *arg,
 		    struct tep_record *record, enum tep_errno *err)
 {
 	const char *val;
@@ -1918,17 +1919,17 @@ static int test_str(struct event_format *event, struct filter_arg *arg,
 		val = get_field_str(arg, record);
 
 	switch (arg->str.type) {
-	case FILTER_CMP_MATCH:
+	case TEP_FILTER_CMP_MATCH:
 		return strcmp(val, arg->str.val) == 0;
 
-	case FILTER_CMP_NOT_MATCH:
+	case TEP_FILTER_CMP_NOT_MATCH:
 		return strcmp(val, arg->str.val) != 0;
 
-	case FILTER_CMP_REGEX:
+	case TEP_FILTER_CMP_REGEX:
 		/* Returns zero on match */
 		return !regexec(&arg->str.reg, val, 0, NULL, 0);
 
-	case FILTER_CMP_NOT_REGEX:
+	case TEP_FILTER_CMP_NOT_REGEX:
 		return regexec(&arg->str.reg, val, 0, NULL, 0);
 
 	default:
@@ -1938,19 +1939,19 @@ static int test_str(struct event_format *event, struct filter_arg *arg,
 	}
 }
 
-static int test_op(struct event_format *event, struct filter_arg *arg,
+static int test_op(struct tep_event_format *event, struct tep_filter_arg *arg,
 		   struct tep_record *record, enum tep_errno *err)
 {
 	switch (arg->op.type) {
-	case FILTER_OP_AND:
+	case TEP_FILTER_OP_AND:
 		return test_filter(event, arg->op.left, record, err) &&
 			test_filter(event, arg->op.right, record, err);
 
-	case FILTER_OP_OR:
+	case TEP_FILTER_OP_OR:
 		return test_filter(event, arg->op.left, record, err) ||
 			test_filter(event, arg->op.right, record, err);
 
-	case FILTER_OP_NOT:
+	case TEP_FILTER_OP_NOT:
 		return !test_filter(event, arg->op.right, record, err);
 
 	default:
@@ -1960,7 +1961,7 @@ static int test_op(struct event_format *event, struct filter_arg *arg,
 	}
 }
 
-static int test_filter(struct event_format *event, struct filter_arg *arg,
+static int test_filter(struct tep_event_format *event, struct tep_filter_arg *arg,
 		       struct tep_record *record, enum tep_errno *err)
 {
 	if (*err) {
@@ -1971,22 +1972,22 @@ static int test_filter(struct event_format *event, struct filter_arg *arg,
 	}
 
 	switch (arg->type) {
-	case FILTER_ARG_BOOLEAN:
+	case TEP_FILTER_ARG_BOOLEAN:
 		/* easy case */
 		return arg->boolean.value;
 
-	case FILTER_ARG_OP:
+	case TEP_FILTER_ARG_OP:
 		return test_op(event, arg, record, err);
 
-	case FILTER_ARG_NUM:
+	case TEP_FILTER_ARG_NUM:
 		return test_num(event, arg, record, err);
 
-	case FILTER_ARG_STR:
+	case TEP_FILTER_ARG_STR:
 		return test_str(event, arg, record, err);
 
-	case FILTER_ARG_EXP:
-	case FILTER_ARG_VALUE:
-	case FILTER_ARG_FIELD:
+	case TEP_FILTER_ARG_EXP:
+	case TEP_FILTER_ARG_VALUE:
+	case TEP_FILTER_ARG_FIELD:
 		/*
 		 * Expressions, fields and values evaluate
 		 * to true if they return non zero
@@ -2008,9 +2009,9 @@ static int test_filter(struct event_format *event, struct filter_arg *arg,
  * Returns 1 if filter found for @event_id
  *   otherwise 0;
  */
-int tep_event_filtered(struct event_filter *filter, int event_id)
+int tep_event_filtered(struct tep_event_filter *filter, int event_id)
 {
-	struct filter_type *filter_type;
+	struct tep_filter_type *filter_type;
 
 	if (!filter->filters)
 		return 0;
@@ -2032,11 +2033,11 @@ int tep_event_filtered(struct event_filter *filter, int event_id)
  * NO_FILTER - if no filters exist
  * otherwise - error occurred during test
  */
-enum tep_errno tep_filter_match(struct event_filter *filter,
+enum tep_errno tep_filter_match(struct tep_event_filter *filter,
 				struct tep_record *record)
 {
 	struct tep_handle *pevent = filter->pevent;
-	struct filter_type *filter_type;
+	struct tep_filter_type *filter_type;
 	int event_id;
 	int ret;
 	enum tep_errno err = 0;
@@ -2059,7 +2060,7 @@ enum tep_errno tep_filter_match(struct event_filter *filter,
 	return ret ? TEP_ERRNO__FILTER_MATCH : TEP_ERRNO__FILTER_MISS;
 }
 
-static char *op_to_str(struct event_filter *filter, struct filter_arg *arg)
+static char *op_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg)
 {
 	char *str = NULL;
 	char *left = NULL;
@@ -2070,10 +2071,10 @@ static char *op_to_str(struct event_filter *filter, struct filter_arg *arg)
 	int val;
 
 	switch (arg->op.type) {
-	case FILTER_OP_AND:
+	case TEP_FILTER_OP_AND:
 		op = "&&";
 		/* fall through */
-	case FILTER_OP_OR:
+	case TEP_FILTER_OP_OR:
 		if (!op)
 			op = "||";
 
@@ -2094,8 +2095,8 @@ static char *op_to_str(struct event_filter *filter, struct filter_arg *arg)
 			right_val = 0;
 
 		if (left_val >= 0) {
-			if ((arg->op.type == FILTER_OP_AND && !left_val) ||
-			    (arg->op.type == FILTER_OP_OR && left_val)) {
+			if ((arg->op.type == TEP_FILTER_OP_AND && !left_val) ||
+			    (arg->op.type == TEP_FILTER_OP_OR && left_val)) {
 				/* Just return left value */
 				str = left;
 				left = NULL;
@@ -2105,10 +2106,10 @@ static char *op_to_str(struct event_filter *filter, struct filter_arg *arg)
 				/* just evaluate this. */
 				val = 0;
 				switch (arg->op.type) {
-				case FILTER_OP_AND:
+				case TEP_FILTER_OP_AND:
 					val = left_val && right_val;
 					break;
-				case FILTER_OP_OR:
+				case TEP_FILTER_OP_OR:
 					val = left_val || right_val;
 					break;
 				default:
@@ -2119,8 +2120,8 @@ static char *op_to_str(struct event_filter *filter, struct filter_arg *arg)
 			}
 		}
 		if (right_val >= 0) {
-			if ((arg->op.type == FILTER_OP_AND && !right_val) ||
-			    (arg->op.type == FILTER_OP_OR && right_val)) {
+			if ((arg->op.type == TEP_FILTER_OP_AND && !right_val) ||
+			    (arg->op.type == TEP_FILTER_OP_OR && right_val)) {
 				/* Just return right value */
 				str = right;
 				right = NULL;
@@ -2135,7 +2136,7 @@ static char *op_to_str(struct event_filter *filter, struct filter_arg *arg)
 		asprintf(&str, "(%s) %s (%s)", left, op, right);
 		break;
 
-	case FILTER_OP_NOT:
+	case TEP_FILTER_OP_NOT:
 		op = "!";
 		right = arg_to_str(filter, arg->op.right);
 		if (!right)
@@ -2163,7 +2164,7 @@ static char *op_to_str(struct event_filter *filter, struct filter_arg *arg)
 	return str;
 }
 
-static char *val_to_str(struct event_filter *filter, struct filter_arg *arg)
+static char *val_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg)
 {
 	char *str = NULL;
 
@@ -2172,12 +2173,12 @@ static char *val_to_str(struct event_filter *filter, struct filter_arg *arg)
 	return str;
 }
 
-static char *field_to_str(struct event_filter *filter, struct filter_arg *arg)
+static char *field_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg)
 {
 	return strdup(arg->field.field->name);
 }
 
-static char *exp_to_str(struct event_filter *filter, struct filter_arg *arg)
+static char *exp_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg)
 {
 	char *lstr;
 	char *rstr;
@@ -2190,34 +2191,34 @@ static char *exp_to_str(struct event_filter *filter, struct filter_arg *arg)
 		goto out;
 
 	switch (arg->exp.type) {
-	case FILTER_EXP_ADD:
+	case TEP_FILTER_EXP_ADD:
 		op = "+";
 		break;
-	case FILTER_EXP_SUB:
+	case TEP_FILTER_EXP_SUB:
 		op = "-";
 		break;
-	case FILTER_EXP_MUL:
+	case TEP_FILTER_EXP_MUL:
 		op = "*";
 		break;
-	case FILTER_EXP_DIV:
+	case TEP_FILTER_EXP_DIV:
 		op = "/";
 		break;
-	case FILTER_EXP_MOD:
+	case TEP_FILTER_EXP_MOD:
 		op = "%";
 		break;
-	case FILTER_EXP_RSHIFT:
+	case TEP_FILTER_EXP_RSHIFT:
 		op = ">>";
 		break;
-	case FILTER_EXP_LSHIFT:
+	case TEP_FILTER_EXP_LSHIFT:
 		op = "<<";
 		break;
-	case FILTER_EXP_AND:
+	case TEP_FILTER_EXP_AND:
 		op = "&";
 		break;
-	case FILTER_EXP_OR:
+	case TEP_FILTER_EXP_OR:
 		op = "|";
 		break;
-	case FILTER_EXP_XOR:
+	case TEP_FILTER_EXP_XOR:
 		op = "^";
 		break;
 	default:
@@ -2233,7 +2234,7 @@ out:
 	return str;
 }
 
-static char *num_to_str(struct event_filter *filter, struct filter_arg *arg)
+static char *num_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg)
 {
 	char *lstr;
 	char *rstr;
@@ -2246,26 +2247,26 @@ static char *num_to_str(struct event_filter *filter, struct filter_arg *arg)
 		goto out;
 
 	switch (arg->num.type) {
-	case FILTER_CMP_EQ:
+	case TEP_FILTER_CMP_EQ:
 		op = "==";
 		/* fall through */
-	case FILTER_CMP_NE:
+	case TEP_FILTER_CMP_NE:
 		if (!op)
 			op = "!=";
 		/* fall through */
-	case FILTER_CMP_GT:
+	case TEP_FILTER_CMP_GT:
 		if (!op)
 			op = ">";
 		/* fall through */
-	case FILTER_CMP_LT:
+	case TEP_FILTER_CMP_LT:
 		if (!op)
 			op = "<";
 		/* fall through */
-	case FILTER_CMP_GE:
+	case TEP_FILTER_CMP_GE:
 		if (!op)
 			op = ">=";
 		/* fall through */
-	case FILTER_CMP_LE:
+	case TEP_FILTER_CMP_LE:
 		if (!op)
 			op = "<=";
 
@@ -2283,24 +2284,24 @@ out:
 	return str;
 }
 
-static char *str_to_str(struct event_filter *filter, struct filter_arg *arg)
+static char *str_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg)
 {
 	char *str = NULL;
 	char *op = NULL;
 
 	switch (arg->str.type) {
-	case FILTER_CMP_MATCH:
+	case TEP_FILTER_CMP_MATCH:
 		op = "==";
 		/* fall through */
-	case FILTER_CMP_NOT_MATCH:
+	case TEP_FILTER_CMP_NOT_MATCH:
 		if (!op)
 			op = "!=";
 		/* fall through */
-	case FILTER_CMP_REGEX:
+	case TEP_FILTER_CMP_REGEX:
 		if (!op)
 			op = "=~";
 		/* fall through */
-	case FILTER_CMP_NOT_REGEX:
+	case TEP_FILTER_CMP_NOT_REGEX:
 		if (!op)
 			op = "!~";
 
@@ -2315,31 +2316,31 @@ static char *str_to_str(struct event_filter *filter, struct filter_arg *arg)
 	return str;
 }
 
-static char *arg_to_str(struct event_filter *filter, struct filter_arg *arg)
+static char *arg_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg)
 {
 	char *str = NULL;
 
 	switch (arg->type) {
-	case FILTER_ARG_BOOLEAN:
+	case TEP_FILTER_ARG_BOOLEAN:
 		asprintf(&str, arg->boolean.value ? "TRUE" : "FALSE");
 		return str;
 
-	case FILTER_ARG_OP:
+	case TEP_FILTER_ARG_OP:
 		return op_to_str(filter, arg);
 
-	case FILTER_ARG_NUM:
+	case TEP_FILTER_ARG_NUM:
 		return num_to_str(filter, arg);
 
-	case FILTER_ARG_STR:
+	case TEP_FILTER_ARG_STR:
 		return str_to_str(filter, arg);
 
-	case FILTER_ARG_VALUE:
+	case TEP_FILTER_ARG_VALUE:
 		return val_to_str(filter, arg);
 
-	case FILTER_ARG_FIELD:
+	case TEP_FILTER_ARG_FIELD:
 		return field_to_str(filter, arg);
 
-	case FILTER_ARG_EXP:
+	case TEP_FILTER_ARG_EXP:
 		return exp_to_str(filter, arg);
 
 	default:
@@ -2359,9 +2360,9 @@ static char *arg_to_str(struct event_filter *filter, struct filter_arg *arg)
  *  NULL is returned if no filter is found or allocation failed.
  */
 char *
-tep_filter_make_string(struct event_filter *filter, int event_id)
+tep_filter_make_string(struct tep_event_filter *filter, int event_id)
 {
-	struct filter_type *filter_type;
+	struct tep_filter_type *filter_type;
 
 	if (!filter->filters)
 		return NULL;
@@ -2383,10 +2384,10 @@ tep_filter_make_string(struct event_filter *filter, int event_id)
  *  1 if the two filters hold the same content.
  *  0 if they do not.
  */
-int tep_filter_compare(struct event_filter *filter1, struct event_filter *filter2)
+int tep_filter_compare(struct tep_event_filter *filter1, struct tep_event_filter *filter2)
 {
-	struct filter_type *filter_type1;
-	struct filter_type *filter_type2;
+	struct tep_filter_type *filter_type1;
+	struct tep_filter_type *filter_type2;
 	char *str1, *str2;
 	int result;
 	int i;
@@ -2409,8 +2410,8 @@ int tep_filter_compare(struct event_filter *filter1, struct event_filter *filter
 		if (filter_type1->filter->type != filter_type2->filter->type)
 			break;
 		switch (filter_type1->filter->type) {
-		case FILTER_TRIVIAL_FALSE:
-		case FILTER_TRIVIAL_TRUE:
+		case TEP_FILTER_TRIVIAL_FALSE:
+		case TEP_FILTER_TRIVIAL_TRUE:
 			/* trivial types just need the type compared */
 			continue;
 		default:
diff --git a/tools/lib/traceevent/plugin_function.c b/tools/lib/traceevent/plugin_function.c
index 424747475d37..528acc75d81a 100644
--- a/tools/lib/traceevent/plugin_function.c
+++ b/tools/lib/traceevent/plugin_function.c
@@ -23,6 +23,7 @@
 
 #include "event-parse.h"
 #include "event-utils.h"
+#include "trace-seq.h"
 
 static struct func_stack {
 	int size;
@@ -123,7 +124,7 @@ static int add_and_get_index(const char *parent, const char *child, int cpu)
 }
 
 static int function_handler(struct trace_seq *s, struct tep_record *record,
-			    struct event_format *event, void *context)
+			    struct tep_event_format *event, void *context)
 {
 	struct tep_handle *pevent = event->pevent;
 	unsigned long long function;
diff --git a/tools/lib/traceevent/plugin_hrtimer.c b/tools/lib/traceevent/plugin_hrtimer.c
index b43bfec565d8..9aa05b4ca811 100644
--- a/tools/lib/traceevent/plugin_hrtimer.c
+++ b/tools/lib/traceevent/plugin_hrtimer.c
@@ -23,10 +23,11 @@
 #include <string.h>
 
 #include "event-parse.h"
+#include "trace-seq.h"
 
 static int timer_expire_handler(struct trace_seq *s,
 				struct tep_record *record,
-				struct event_format *event, void *context)
+				struct tep_event_format *event, void *context)
 {
 	trace_seq_printf(s, "hrtimer=");
 
@@ -46,7 +47,7 @@ static int timer_expire_handler(struct trace_seq *s,
 
 static int timer_start_handler(struct trace_seq *s,
 			       struct tep_record *record,
-			       struct event_format *event, void *context)
+			       struct tep_event_format *event, void *context)
 {
 	trace_seq_printf(s, "hrtimer=");
 
diff --git a/tools/lib/traceevent/plugin_jbd2.c b/tools/lib/traceevent/plugin_jbd2.c
index 45a9acd19640..a5e34135dd6a 100644
--- a/tools/lib/traceevent/plugin_jbd2.c
+++ b/tools/lib/traceevent/plugin_jbd2.c
@@ -22,6 +22,7 @@
 #include <string.h>
 
 #include "event-parse.h"
+#include "trace-seq.h"
 
 #define MINORBITS	20
 #define MINORMASK	((1U << MINORBITS) - 1)
diff --git a/tools/lib/traceevent/plugin_kmem.c b/tools/lib/traceevent/plugin_kmem.c
index 73966b05abce..1beb4eaddfdf 100644
--- a/tools/lib/traceevent/plugin_kmem.c
+++ b/tools/lib/traceevent/plugin_kmem.c
@@ -22,11 +22,12 @@
 #include <string.h>
 
 #include "event-parse.h"
+#include "trace-seq.h"
 
 static int call_site_handler(struct trace_seq *s, struct tep_record *record,
-			     struct event_format *event, void *context)
+			     struct tep_event_format *event, void *context)
 {
-	struct format_field *field;
+	struct tep_format_field *field;
 	unsigned long long val, addr;
 	void *data = record->data;
 	const char *func;
diff --git a/tools/lib/traceevent/plugin_kvm.c b/tools/lib/traceevent/plugin_kvm.c
index 1d0d15906225..d13c22846fa9 100644
--- a/tools/lib/traceevent/plugin_kvm.c
+++ b/tools/lib/traceevent/plugin_kvm.c
@@ -23,6 +23,7 @@
 #include <stdint.h>
 
 #include "event-parse.h"
+#include "trace-seq.h"
 
 #ifdef HAVE_UDIS86
 
@@ -248,7 +249,7 @@ static const char *find_exit_reason(unsigned isa, int val)
 }
 
 static int print_exit_reason(struct trace_seq *s, struct tep_record *record,
-			     struct event_format *event, const char *field)
+			     struct tep_event_format *event, const char *field)
 {
 	unsigned long long isa;
 	unsigned long long val;
@@ -269,7 +270,7 @@ static int print_exit_reason(struct trace_seq *s, struct tep_record *record,
 }
 
 static int kvm_exit_handler(struct trace_seq *s, struct tep_record *record,
-			    struct event_format *event, void *context)
+			    struct tep_event_format *event, void *context)
 {
 	unsigned long long info1 = 0, info2 = 0;
 
@@ -292,7 +293,7 @@ static int kvm_exit_handler(struct trace_seq *s, struct tep_record *record,
 
 static int kvm_emulate_insn_handler(struct trace_seq *s,
 				    struct tep_record *record,
-				    struct event_format *event, void *context)
+				    struct tep_event_format *event, void *context)
 {
 	unsigned long long rip, csbase, len, flags, failed;
 	int llen;
@@ -331,7 +332,7 @@ static int kvm_emulate_insn_handler(struct trace_seq *s,
 
 
 static int kvm_nested_vmexit_inject_handler(struct trace_seq *s, struct tep_record *record,
-					    struct event_format *event, void *context)
+					    struct tep_event_format *event, void *context)
 {
 	if (print_exit_reason(s, record, event, "exit_code") < 0)
 		return -1;
@@ -345,7 +346,7 @@ static int kvm_nested_vmexit_inject_handler(struct trace_seq *s, struct tep_reco
 }
 
 static int kvm_nested_vmexit_handler(struct trace_seq *s, struct tep_record *record,
-				     struct event_format *event, void *context)
+				     struct tep_event_format *event, void *context)
 {
 	tep_print_num_field(s, "rip %llx ", event, "rip", record, 1);
 
@@ -371,7 +372,7 @@ union kvm_mmu_page_role {
 };
 
 static int kvm_mmu_print_role(struct trace_seq *s, struct tep_record *record,
-			      struct event_format *event, void *context)
+			      struct tep_event_format *event, void *context)
 {
 	unsigned long long val;
 	static const char *access_str[] = {
@@ -418,7 +419,7 @@ static int kvm_mmu_print_role(struct trace_seq *s, struct tep_record *record,
 
 static int kvm_mmu_get_page_handler(struct trace_seq *s,
 				    struct tep_record *record,
-				    struct event_format *event, void *context)
+				    struct tep_event_format *event, void *context)
 {
 	unsigned long long val;
 
diff --git a/tools/lib/traceevent/plugin_mac80211.c b/tools/lib/traceevent/plugin_mac80211.c
index de50a5316203..da3855e7b86f 100644
--- a/tools/lib/traceevent/plugin_mac80211.c
+++ b/tools/lib/traceevent/plugin_mac80211.c
@@ -22,13 +22,14 @@
 #include <string.h>
 
 #include "event-parse.h"
+#include "trace-seq.h"
 
 #define INDENT 65
 
-static void print_string(struct trace_seq *s, struct event_format *event,
+static void print_string(struct trace_seq *s, struct tep_event_format *event,
 			 const char *name, const void *data)
 {
-	struct format_field *f = tep_find_field(event, name);
+	struct tep_format_field *f = tep_find_field(event, name);
 	int offset;
 	int length;
 
@@ -59,7 +60,7 @@ static void print_string(struct trace_seq *s, struct event_format *event,
 
 static int drv_bss_info_changed(struct trace_seq *s,
 				struct tep_record *record,
-				struct event_format *event, void *context)
+				struct tep_event_format *event, void *context)
 {
 	void *data = record->data;
 
diff --git a/tools/lib/traceevent/plugin_sched_switch.c b/tools/lib/traceevent/plugin_sched_switch.c
index eecb4bd95c11..77882272672f 100644
--- a/tools/lib/traceevent/plugin_sched_switch.c
+++ b/tools/lib/traceevent/plugin_sched_switch.c
@@ -22,6 +22,7 @@
 #include <string.h>
 
 #include "event-parse.h"
+#include "trace-seq.h"
 
 static void write_state(struct trace_seq *s, int val)
 {
@@ -44,7 +45,7 @@ static void write_state(struct trace_seq *s, int val)
 		trace_seq_putc(s, 'R');
 }
 
-static void write_and_save_comm(struct format_field *field,
+static void write_and_save_comm(struct tep_format_field *field,
 				struct tep_record *record,
 				struct trace_seq *s, int pid)
 {
@@ -66,9 +67,9 @@ static void write_and_save_comm(struct format_field *field,
 
 static int sched_wakeup_handler(struct trace_seq *s,
 				struct tep_record *record,
-				struct event_format *event, void *context)
+				struct tep_event_format *event, void *context)
 {
-	struct format_field *field;
+	struct tep_format_field *field;
 	unsigned long long val;
 
 	if (tep_get_field_val(s, event, "pid", record, &val, 1))
@@ -95,9 +96,9 @@ static int sched_wakeup_handler(struct trace_seq *s,
 
 static int sched_switch_handler(struct trace_seq *s,
 				struct tep_record *record,
-				struct event_format *event, void *context)
+				struct tep_event_format *event, void *context)
 {
-	struct format_field *field;
+	struct tep_format_field *field;
 	unsigned long long val;
 
 	if (tep_get_field_val(s, event, "prev_pid", record, &val, 1))
diff --git a/tools/lib/traceevent/plugin_scsi.c b/tools/lib/traceevent/plugin_scsi.c
index 5ec346f6b842..4eba25cc1431 100644
--- a/tools/lib/traceevent/plugin_scsi.c
+++ b/tools/lib/traceevent/plugin_scsi.c
@@ -3,6 +3,7 @@
 #include <string.h>
 #include <inttypes.h>
 #include "event-parse.h"
+#include "trace-seq.h"
 
 typedef unsigned long sector_t;
 typedef uint64_t u64;
diff --git a/tools/lib/traceevent/plugin_xen.c b/tools/lib/traceevent/plugin_xen.c
index b2acbd6e9c86..bc0496e4c296 100644
--- a/tools/lib/traceevent/plugin_xen.c
+++ b/tools/lib/traceevent/plugin_xen.c
@@ -3,6 +3,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include "event-parse.h"
+#include "trace-seq.h"
 
 #define __HYPERVISOR_set_trap_table			0
 #define __HYPERVISOR_mmu_update				1
diff --git a/tools/lib/traceevent/tep_strerror.c b/tools/lib/traceevent/tep_strerror.c
new file mode 100644
index 000000000000..4ac26445b2f6
--- /dev/null
+++ b/tools/lib/traceevent/tep_strerror.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: LGPL-2.1
+#undef _GNU_SOURCE
+#include <string.h>
+#include <stdio.h>
+
+#include "event-parse.h"
+
+#undef _PE
+#define _PE(code, str) str
+static const char * const tep_error_str[] = {
+	TEP_ERRORS
+};
+#undef _PE
+
+/*
+ * The tools so far have been using the strerror_r() GNU variant, that returns
+ * a string, be it the buffer passed or something else.
+ *
+ * But that, besides being tricky in cases where we expect that the function
+ * using strerror_r() returns the error formatted in a provided buffer (we have
+ * to check if it returned something else and copy that instead), breaks the
+ * build on systems not using glibc, like Alpine Linux, where musl libc is
+ * used.
+ *
+ * So, introduce yet another wrapper, str_error_r(), that has the GNU
+ * interface, but uses the portable XSI variant of strerror_r(), so that users
+ * rest asured that the provided buffer is used and it is what is returned.
+ */
+int tep_strerror(struct tep_handle *tep __maybe_unused,
+		 enum tep_errno errnum, char *buf, size_t buflen)
+{
+	const char *msg;
+	int idx;
+
+	if (!buflen)
+		return 0;
+
+	if (errnum >= 0) {
+		int err = strerror_r(errnum, buf, buflen);
+		buf[buflen - 1] = 0;
+		return err;
+	}
+
+	if (errnum <= __TEP_ERRNO__START ||
+	    errnum >= __TEP_ERRNO__END)
+		return -1;
+
+	idx = errnum - __TEP_ERRNO__START - 1;
+	msg = tep_error_str[idx];
+	snprintf(buf, buflen, "%s", msg);
+
+	return 0;
+}
diff --git a/tools/lib/traceevent/trace-seq.c b/tools/lib/traceevent/trace-seq.c
index e3bac4543d3b..8ff1d55954d1 100644
--- a/tools/lib/traceevent/trace-seq.c
+++ b/tools/lib/traceevent/trace-seq.c
@@ -3,6 +3,8 @@
  * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
  *
  */
+#include "trace-seq.h"
+
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
diff --git a/tools/lib/traceevent/trace-seq.h b/tools/lib/traceevent/trace-seq.h
new file mode 100644
index 000000000000..d68ec69f8d1a
--- /dev/null
+++ b/tools/lib/traceevent/trace-seq.h
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+
+#ifndef _TRACE_SEQ_H
+#define _TRACE_SEQ_H
+
+#include <stdarg.h>
+#include <stdio.h>
+
+/* ----------------------- trace_seq ----------------------- */
+
+#ifndef TRACE_SEQ_BUF_SIZE
+#define TRACE_SEQ_BUF_SIZE 4096
+#endif
+
+enum trace_seq_fail {
+	TRACE_SEQ__GOOD,
+	TRACE_SEQ__BUFFER_POISONED,
+	TRACE_SEQ__MEM_ALLOC_FAILED,
+};
+
+/*
+ * Trace sequences are used to allow a function to call several other functions
+ * to create a string of data to use (up to a max of PAGE_SIZE).
+ */
+
+struct trace_seq {
+	char			*buffer;
+	unsigned int		buffer_size;
+	unsigned int		len;
+	unsigned int		readpos;
+	enum trace_seq_fail	state;
+};
+
+void trace_seq_init(struct trace_seq *s);
+void trace_seq_reset(struct trace_seq *s);
+void trace_seq_destroy(struct trace_seq *s);
+
+extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
+	__attribute__ ((format (printf, 2, 3)));
+extern int trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
+	__attribute__ ((format (printf, 2, 0)));
+
+extern int trace_seq_puts(struct trace_seq *s, const char *str);
+extern int trace_seq_putc(struct trace_seq *s, unsigned char c);
+
+extern void trace_seq_terminate(struct trace_seq *s);
+
+extern int trace_seq_do_fprintf(struct trace_seq *s, FILE *fp);
+extern int trace_seq_do_printf(struct trace_seq *s);
+
+#endif /* _TRACE_SEQ_H */
diff --git a/tools/memory-model/Documentation/explanation.txt b/tools/memory-model/Documentation/explanation.txt
index 0cbd1ef8f86d..35bff92cc773 100644
--- a/tools/memory-model/Documentation/explanation.txt
+++ b/tools/memory-model/Documentation/explanation.txt
@@ -28,7 +28,8 @@ Explanation of the Linux-Kernel Memory Consistency Model
   20. THE HAPPENS-BEFORE RELATION: hb
   21. THE PROPAGATES-BEFORE RELATION: pb
   22. RCU RELATIONS: rcu-link, gp, rscs, rcu-fence, and rb
-  23. ODDS AND ENDS
+  23. LOCKING
+  24. ODDS AND ENDS
 
 
 
@@ -1067,28 +1068,6 @@ allowing out-of-order writes like this to occur.  The model avoided
 violating the write-write coherence rule by requiring the CPU not to
 send the W write to the memory subsystem at all!)
 
-There is one last example of preserved program order in the LKMM: when
-a load-acquire reads from an earlier store-release.  For example:
-
-	smp_store_release(&x, 123);
-	r1 = smp_load_acquire(&x);
-
-If the smp_load_acquire() ends up obtaining the 123 value that was
-stored by the smp_store_release(), the LKMM says that the load must be
-executed after the store; the store cannot be forwarded to the load.
-This requirement does not arise from the operational model, but it
-yields correct predictions on all architectures supported by the Linux
-kernel, although for differing reasons.
-
-On some architectures, including x86 and ARMv8, it is true that the
-store cannot be forwarded to the load.  On others, including PowerPC
-and ARMv7, smp_store_release() generates object code that starts with
-a fence and smp_load_acquire() generates object code that ends with a
-fence.  The upshot is that even though the store may be forwarded to
-the load, it is still true that any instruction preceding the store
-will be executed before the load or any following instructions, and
-the store will be executed before any instruction following the load.
-
 
 AND THEN THERE WAS ALPHA
 ------------------------
@@ -1766,6 +1745,147 @@ before it does, and the critical section in P2 both starts after P1's
 grace period does and ends after it does.
 
 
+LOCKING
+-------
+
+The LKMM includes locking.  In fact, there is special code for locking
+in the formal model, added in order to make tools run faster.
+However, this special code is intended to be more or less equivalent
+to concepts we have already covered.  A spinlock_t variable is treated
+the same as an int, and spin_lock(&s) is treated almost the same as:
+
+	while (cmpxchg_acquire(&s, 0, 1) != 0)
+		cpu_relax();
+
+This waits until s is equal to 0 and then atomically sets it to 1,
+and the read part of the cmpxchg operation acts as an acquire fence.
+An alternate way to express the same thing would be:
+
+	r = xchg_acquire(&s, 1);
+
+along with a requirement that at the end, r = 0.  Similarly,
+spin_trylock(&s) is treated almost the same as:
+
+	return !cmpxchg_acquire(&s, 0, 1);
+
+which atomically sets s to 1 if it is currently equal to 0 and returns
+true if it succeeds (the read part of the cmpxchg operation acts as an
+acquire fence only if the operation is successful).  spin_unlock(&s)
+is treated almost the same as:
+
+	smp_store_release(&s, 0);
+
+The "almost" qualifiers above need some explanation.  In the LKMM, the
+store-release in a spin_unlock() and the load-acquire which forms the
+first half of the atomic rmw update in a spin_lock() or a successful
+spin_trylock() -- we can call these things lock-releases and
+lock-acquires -- have two properties beyond those of ordinary releases
+and acquires.
+
+First, when a lock-acquire reads from a lock-release, the LKMM
+requires that every instruction po-before the lock-release must
+execute before any instruction po-after the lock-acquire.  This would
+naturally hold if the release and acquire operations were on different
+CPUs, but the LKMM says it holds even when they are on the same CPU.
+For example:
+
+	int x, y;
+	spinlock_t s;
+
+	P0()
+	{
+		int r1, r2;
+
+		spin_lock(&s);
+		r1 = READ_ONCE(x);
+		spin_unlock(&s);
+		spin_lock(&s);
+		r2 = READ_ONCE(y);
+		spin_unlock(&s);
+	}
+
+	P1()
+	{
+		WRITE_ONCE(y, 1);
+		smp_wmb();
+		WRITE_ONCE(x, 1);
+	}
+
+Here the second spin_lock() reads from the first spin_unlock(), and
+therefore the load of x must execute before the load of y.  Thus we
+cannot have r1 = 1 and r2 = 0 at the end (this is an instance of the
+MP pattern).
+
+This requirement does not apply to ordinary release and acquire
+fences, only to lock-related operations.  For instance, suppose P0()
+in the example had been written as:
+
+	P0()
+	{
+		int r1, r2, r3;
+
+		r1 = READ_ONCE(x);
+		smp_store_release(&s, 1);
+		r3 = smp_load_acquire(&s);
+		r2 = READ_ONCE(y);
+	}
+
+Then the CPU would be allowed to forward the s = 1 value from the
+smp_store_release() to the smp_load_acquire(), executing the
+instructions in the following order:
+
+		r3 = smp_load_acquire(&s);	// Obtains r3 = 1
+		r2 = READ_ONCE(y);
+		r1 = READ_ONCE(x);
+		smp_store_release(&s, 1);	// Value is forwarded
+
+and thus it could load y before x, obtaining r2 = 0 and r1 = 1.
+
+Second, when a lock-acquire reads from a lock-release, and some other
+stores W and W' occur po-before the lock-release and po-after the
+lock-acquire respectively, the LKMM requires that W must propagate to
+each CPU before W' does.  For example, consider:
+
+	int x, y;
+	spinlock_t x;
+
+	P0()
+	{
+		spin_lock(&s);
+		WRITE_ONCE(x, 1);
+		spin_unlock(&s);
+	}
+
+	P1()
+	{
+		int r1;
+
+		spin_lock(&s);
+		r1 = READ_ONCE(x);
+		WRITE_ONCE(y, 1);
+		spin_unlock(&s);
+	}
+
+	P2()
+	{
+		int r2, r3;
+
+		r2 = READ_ONCE(y);
+		smp_rmb();
+		r3 = READ_ONCE(x);
+	}
+
+If r1 = 1 at the end then the spin_lock() in P1 must have read from
+the spin_unlock() in P0.  Hence the store to x must propagate to P2
+before the store to y does, so we cannot have r2 = 1 and r3 = 0.
+
+These two special requirements for lock-release and lock-acquire do
+not arise from the operational model.  Nevertheless, kernel developers
+have come to expect and rely on them because they do hold on all
+architectures supported by the Linux kernel, albeit for various
+differing reasons.
+
+
 ODDS AND ENDS
 -------------
 
@@ -1831,26 +1951,6 @@ they behave as follows:
 	events and the events preceding them against all po-later
 	events.
 
-The LKMM includes locking.  In fact, there is special code for locking
-in the formal model, added in order to make tools run faster.
-However, this special code is intended to be exactly equivalent to
-concepts we have already covered.  A spinlock_t variable is treated
-the same as an int, and spin_lock(&s) is treated the same as:
-
-	while (cmpxchg_acquire(&s, 0, 1) != 0)
-		cpu_relax();
-
-which waits until s is equal to 0 and then atomically sets it to 1,
-and where the read part of the atomic update is also an acquire fence.
-An alternate way to express the same thing would be:
-
-	r = xchg_acquire(&s, 1);
-
-along with a requirement that at the end, r = 0.  spin_unlock(&s) is
-treated the same as:
-
-	smp_store_release(&s, 0);
-
 Interestingly, RCU and locking each introduce the possibility of
 deadlock.  When faced with code sequences such as:
 
diff --git a/tools/memory-model/Documentation/recipes.txt b/tools/memory-model/Documentation/recipes.txt
index af72700cc20a..7fe8d7aa3029 100644
--- a/tools/memory-model/Documentation/recipes.txt
+++ b/tools/memory-model/Documentation/recipes.txt
@@ -311,7 +311,7 @@ The smp_wmb() macro orders prior stores against later stores, and the
 smp_rmb() macro orders prior loads against later loads.  Therefore, if
 the final value of r0 is 1, the final value of r1 must also be 1.
 
-The the xlog_state_switch_iclogs() function in fs/xfs/xfs_log.c contains
+The xlog_state_switch_iclogs() function in fs/xfs/xfs_log.c contains
 the following write-side code fragment:
 
 	log->l_curr_block -= log->l_logBBsize;
diff --git a/tools/memory-model/README b/tools/memory-model/README
index ee987ce20aae..acf9077cffaa 100644
--- a/tools/memory-model/README
+++ b/tools/memory-model/README
@@ -171,6 +171,12 @@ The Linux-kernel memory model has the following limitations:
 	particular, the "THE PROGRAM ORDER RELATION: po AND po-loc"
 	and "A WARNING" sections).
 
+	Note that this limitation in turn limits LKMM's ability to
+	accurately model address, control, and data dependencies.
+	For example, if the compiler can deduce the value of some variable
+	carrying a dependency, then the compiler can break that dependency
+	by substituting a constant of that value.
+
 2.	Multiple access sizes for a single variable are not supported,
 	and neither are misaligned or partially overlapping accesses.
 
@@ -190,6 +196,36 @@ The Linux-kernel memory model has the following limitations:
 	However, a substantial amount of support is provided for these
 	operations, as shown in the linux-kernel.def file.
 
+	a.	When rcu_assign_pointer() is passed NULL, the Linux
+		kernel provides no ordering, but LKMM models this
+		case as a store release.
+
+	b.	The "unless" RMW operations are not currently modeled:
+		atomic_long_add_unless(), atomic_add_unless(),
+		atomic_inc_unless_negative(), and
+		atomic_dec_unless_positive().  These can be emulated
+		in litmus tests, for example, by using atomic_cmpxchg().
+
+	c.	The call_rcu() function is not modeled.  It can be
+		emulated in litmus tests by adding another process that
+		invokes synchronize_rcu() and the body of the callback
+		function, with (for example) a release-acquire from
+		the site of the emulated call_rcu() to the beginning
+		of the additional process.
+
+	d.	The rcu_barrier() function is not modeled.  It can be
+		emulated in litmus tests emulating call_rcu() via
+		(for example) a release-acquire from the end of each
+		additional call_rcu() process to the site of the
+		emulated rcu-barrier().
+
+	e.	Sleepable RCU (SRCU) is not modeled.  It can be
+		emulated, but perhaps not simply.
+
+	f.	Reader-writer locking is not modeled.  It can be
+		emulated in litmus tests using atomic read-modify-write
+		operations.
+
 The "herd7" tool has some additional limitations of its own, apart from
 the memory model:
 
@@ -204,3 +240,6 @@ the memory model:
 Some of these limitations may be overcome in the future, but others are
 more likely to be addressed by incorporating the Linux-kernel memory model
 into other tools.
+
+Finally, please note that LKMM is subject to change as hardware, use cases,
+and compilers evolve.
diff --git a/tools/memory-model/linux-kernel.cat b/tools/memory-model/linux-kernel.cat
index 59b5cbe6b624..882fc33274ac 100644
--- a/tools/memory-model/linux-kernel.cat
+++ b/tools/memory-model/linux-kernel.cat
@@ -38,7 +38,7 @@ let strong-fence = mb | gp
 (* Release Acquire *)
 let acq-po = [Acquire] ; po ; [M]
 let po-rel = [M] ; po ; [Release]
-let rfi-rel-acq = [Release] ; rfi ; [Acquire]
+let po-unlock-rf-lock-po = po ; [UL] ; rf ; [LKR] ; po
 
 (**********************************)
 (* Fundamental coherence ordering *)
@@ -60,13 +60,13 @@ let dep = addr | data
 let rwdep = (dep | ctrl) ; [W]
 let overwrite = co | fr
 let to-w = rwdep | (overwrite & int)
-let to-r = addr | (dep ; rfi) | rfi-rel-acq
+let to-r = addr | (dep ; rfi)
 let fence = strong-fence | wmb | po-rel | rmb | acq-po
-let ppo = to-r | to-w | fence
+let ppo = to-r | to-w | fence | (po-unlock-rf-lock-po & int)
 
 (* Propagation: Ordering from release operations and strong fences. *)
 let A-cumul(r) = rfe? ; r
-let cumul-fence = A-cumul(strong-fence | po-rel) | wmb
+let cumul-fence = A-cumul(strong-fence | po-rel) | wmb | po-unlock-rf-lock-po
 let prop = (overwrite & ext)? ; cumul-fence* ; rfe?
 
 (*
diff --git a/tools/memory-model/litmus-tests/ISA2+pooncelock+pooncelock+pombonce.litmus b/tools/memory-model/litmus-tests/ISA2+pooncelock+pooncelock+pombonce.litmus
index 0f749e419b34..094d58df7789 100644
--- a/tools/memory-model/litmus-tests/ISA2+pooncelock+pooncelock+pombonce.litmus
+++ b/tools/memory-model/litmus-tests/ISA2+pooncelock+pooncelock+pombonce.litmus
@@ -1,11 +1,10 @@
 C ISA2+pooncelock+pooncelock+pombonce
 
 (*
- * Result: Sometimes
+ * Result: Never
  *
- * This test shows that the ordering provided by a lock-protected S
- * litmus test (P0() and P1()) are not visible to external process P2().
- * This is likely to change soon.
+ * This test shows that write-write ordering provided by locks
+ * (in P0() and P1()) is visible to external process P2().
  *)
 
 {}
diff --git a/tools/memory-model/litmus-tests/README b/tools/memory-model/litmus-tests/README
index 4581ec2d3c57..5ee08f129094 100644
--- a/tools/memory-model/litmus-tests/README
+++ b/tools/memory-model/litmus-tests/README
@@ -1,4 +1,6 @@
-This directory contains the following litmus tests:
+============
+LITMUS TESTS
+============
 
 CoRR+poonceonce+Once.litmus
 	Test of read-read coherence, that is, whether or not two
@@ -36,7 +38,7 @@ IRIW+poonceonces+OnceOnce.litmus
 ISA2+pooncelock+pooncelock+pombonce.litmus
 	Tests whether the ordering provided by a lock-protected S
 	litmus test is visible to an external process whose accesses are
-	separated by smp_mb().	This addition of an external process to
+	separated by smp_mb().  This addition of an external process to
 	S is otherwise known as ISA2.
 
 ISA2+poonceonces.litmus
@@ -151,3 +153,101 @@ Z6.0+pooncerelease+poacquirerelease+fencembonceonce.litmus
 A great many more litmus tests are available here:
 
 	https://github.com/paulmckrcu/litmus
+
+==================
+LITMUS TEST NAMING
+==================
+
+Litmus tests are usually named based on their contents, which means that
+looking at the name tells you what the litmus test does.  The naming
+scheme covers litmus tests having a single cycle that passes through
+each process exactly once, so litmus tests not fitting this description
+are named on an ad-hoc basis.
+
+The structure of a litmus-test name is the litmus-test class, a plus
+sign ("+"), and one string for each process, separated by plus signs.
+The end of the name is ".litmus".
+
+The litmus-test classes may be found in the infamous test6.pdf:
+https://www.cl.cam.ac.uk/~pes20/ppc-supplemental/test6.pdf
+Each class defines the pattern of accesses and of the variables accessed.
+For example, if the one process writes to a pair of variables, and
+the other process reads from these same variables, the corresponding
+litmus-test class is "MP" (message passing), which may be found on the
+left-hand end of the second row of tests on page one of test6.pdf.
+
+The strings used to identify the actions carried out by each process are
+complex due to a desire to have short(er) names.  Thus, there is a tool to
+generate these strings from a given litmus test's actions.  For example,
+consider the processes from SB+rfionceonce-poonceonces.litmus:
+
+	P0(int *x, int *y)
+	{
+		int r1;
+		int r2;
+
+		WRITE_ONCE(*x, 1);
+		r1 = READ_ONCE(*x);
+		r2 = READ_ONCE(*y);
+	}
+
+	P1(int *x, int *y)
+	{
+		int r3;
+		int r4;
+
+		WRITE_ONCE(*y, 1);
+		r3 = READ_ONCE(*y);
+		r4 = READ_ONCE(*x);
+	}
+
+The next step is to construct a space-separated list of descriptors,
+interleaving descriptions of the relation between a pair of consecutive
+accesses with descriptions of the second access in the pair.
+
+P0()'s WRITE_ONCE() is read by its first READ_ONCE(), which is a
+reads-from link (rf) and internal to the P0() process.  This is
+"rfi", which is an abbreviation for "reads-from internal".  Because
+some of the tools string these abbreviations together with space
+characters separating processes, the first character is capitalized,
+resulting in "Rfi".
+
+P0()'s second access is a READ_ONCE(), as opposed to (for example)
+smp_load_acquire(), so next is "Once".  Thus far, we have "Rfi Once".
+
+P0()'s third access is also a READ_ONCE(), but to y rather than x.
+This is related to P0()'s second access by program order ("po"),
+to a different variable ("d"), and both accesses are reads ("RR").
+The resulting descriptor is "PodRR".  Because P0()'s third access is
+READ_ONCE(), we add another "Once" descriptor.
+
+A from-read ("fre") relation links P0()'s third to P1()'s first
+access, and the resulting descriptor is "Fre".  P1()'s first access is
+WRITE_ONCE(), which as before gives the descriptor "Once".  The string
+thus far is thus "Rfi Once PodRR Once Fre Once".
+
+The remainder of P1() is similar to P0(), which means we add
+"Rfi Once PodRR Once".  Another fre links P1()'s last access to
+P0()'s first access, which is WRITE_ONCE(), so we add "Fre Once".
+The full string is thus:
+
+	Rfi Once PodRR Once Fre Once Rfi Once PodRR Once Fre Once
+
+This string can be given to the "norm7" and "classify7" tools to
+produce the name:
+
+	$ norm7 -bell linux-kernel.bell \
+		Rfi Once PodRR Once Fre Once Rfi Once PodRR Once Fre Once | \
+	  sed -e 's/:.*//g'
+	SB+rfionceonce-poonceonces
+
+Adding the ".litmus" suffix: SB+rfionceonce-poonceonces.litmus
+
+The descriptors that describe connections between consecutive accesses
+within the cycle through a given litmus test can be provided by the herd
+tool (Rfi, Po, Fre, and so on) or by the linux-kernel.bell file (Once,
+Release, Acquire, and so on).
+
+To see the full list of descriptors, execute the following command:
+
+	$ diyone7 -bell linux-kernel.bell -show edges
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 2928939b98ec..0414a0d52262 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -836,7 +836,7 @@ static int add_switch_table(struct objtool_file *file, struct instruction *insn,
 	struct symbol *pfunc = insn->func->pfunc;
 	unsigned int prev_offset = 0;
 
-	list_for_each_entry_from(rela, &file->rodata->rela->rela_list, list) {
+	list_for_each_entry_from(rela, &table->rela_sec->rela_list, list) {
 		if (rela == next_table)
 			break;
 
@@ -926,6 +926,7 @@ static struct rela *find_switch_table(struct objtool_file *file,
 {
 	struct rela *text_rela, *rodata_rela;
 	struct instruction *orig_insn = insn;
+	struct section *rodata_sec;
 	unsigned long table_offset;
 
 	/*
@@ -953,10 +954,13 @@ static struct rela *find_switch_table(struct objtool_file *file,
 		/* look for a relocation which references .rodata */
 		text_rela = find_rela_by_dest_range(insn->sec, insn->offset,
 						    insn->len);
-		if (!text_rela || text_rela->sym != file->rodata->sym)
+		if (!text_rela || text_rela->sym->type != STT_SECTION ||
+		    !text_rela->sym->sec->rodata)
 			continue;
 
 		table_offset = text_rela->addend;
+		rodata_sec = text_rela->sym->sec;
+
 		if (text_rela->type == R_X86_64_PC32)
 			table_offset += 4;
 
@@ -964,10 +968,10 @@ static struct rela *find_switch_table(struct objtool_file *file,
 		 * Make sure the .rodata address isn't associated with a
 		 * symbol.  gcc jump tables are anonymous data.
 		 */
-		if (find_symbol_containing(file->rodata, table_offset))
+		if (find_symbol_containing(rodata_sec, table_offset))
 			continue;
 
-		rodata_rela = find_rela_by_dest(file->rodata, table_offset);
+		rodata_rela = find_rela_by_dest(rodata_sec, table_offset);
 		if (rodata_rela) {
 			/*
 			 * Use of RIP-relative switch jumps is quite rare, and
@@ -1052,7 +1056,7 @@ static int add_switch_table_alts(struct objtool_file *file)
 	struct symbol *func;
 	int ret;
 
-	if (!file->rodata || !file->rodata->rela)
+	if (!file->rodata)
 		return 0;
 
 	for_each_sec(file, sec) {
@@ -1198,10 +1202,33 @@ static int read_retpoline_hints(struct objtool_file *file)
 	return 0;
 }
 
+static void mark_rodata(struct objtool_file *file)
+{
+	struct section *sec;
+	bool found = false;
+
+	/*
+	 * This searches for the .rodata section or multiple .rodata.func_name
+	 * sections if -fdata-sections is being used. The .str.1.1 and .str.1.8
+	 * rodata sections are ignored as they don't contain jump tables.
+	 */
+	for_each_sec(file, sec) {
+		if (!strncmp(sec->name, ".rodata", 7) &&
+		    !strstr(sec->name, ".str1.")) {
+			sec->rodata = true;
+			found = true;
+		}
+	}
+
+	file->rodata = found;
+}
+
 static int decode_sections(struct objtool_file *file)
 {
 	int ret;
 
+	mark_rodata(file);
+
 	ret = decode_instructions(file);
 	if (ret)
 		return ret;
@@ -2171,7 +2198,6 @@ int check(const char *_objname, bool orc)
 	INIT_LIST_HEAD(&file.insn_list);
 	hash_init(file.insn_hash);
 	file.whitelist = find_section_by_name(file.elf, ".discard.func_stack_frame_non_standard");
-	file.rodata = find_section_by_name(file.elf, ".rodata");
 	file.c_file = find_section_by_name(file.elf, ".comment");
 	file.ignore_unreachables = no_unreachable;
 	file.hints = false;
diff --git a/tools/objtool/check.h b/tools/objtool/check.h
index 95700a2bcb7c..e6e8a655b556 100644
--- a/tools/objtool/check.h
+++ b/tools/objtool/check.h
@@ -60,8 +60,8 @@ struct objtool_file {
 	struct elf *elf;
 	struct list_head insn_list;
 	DECLARE_HASHTABLE(insn_hash, 16);
-	struct section *rodata, *whitelist;
-	bool ignore_unreachables, c_file, hints;
+	struct section *whitelist;
+	bool ignore_unreachables, c_file, hints, rodata;
 };
 
 int check(const char *objname, bool orc);
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 7ec85d567598..6dbb9fae0f9d 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -301,7 +301,7 @@ static int read_symbols(struct elf *elf)
 			if (sym->type != STT_FUNC)
 				continue;
 			sym->pfunc = sym->cfunc = sym;
-			coldstr = strstr(sym->name, ".cold.");
+			coldstr = strstr(sym->name, ".cold");
 			if (!coldstr)
 				continue;
 
@@ -379,6 +379,7 @@ static int read_relas(struct elf *elf)
 			rela->offset = rela->rela.r_offset;
 			symndx = GELF_R_SYM(rela->rela.r_info);
 			rela->sym = find_symbol_by_index(elf, symndx);
+			rela->rela_sec = sec;
 			if (!rela->sym) {
 				WARN("can't find rela entry symbol %d for %s",
 				     symndx, sec->name);
diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h
index de5cd2ddded9..bc97ed86b9cd 100644
--- a/tools/objtool/elf.h
+++ b/tools/objtool/elf.h
@@ -48,7 +48,7 @@ struct section {
 	char *name;
 	int idx;
 	unsigned int len;
-	bool changed, text;
+	bool changed, text, rodata;
 };
 
 struct symbol {
@@ -68,6 +68,7 @@ struct rela {
 	struct list_head list;
 	struct hlist_node hash;
 	GElf_Rela rela;
+	struct section *rela_sec;
 	struct symbol *sym;
 	unsigned int type;
 	unsigned long offset;
diff --git a/tools/objtool/special.c b/tools/objtool/special.c
index 84f001d52322..50af4e1274b3 100644
--- a/tools/objtool/special.c
+++ b/tools/objtool/special.c
@@ -30,9 +30,9 @@
 #define EX_ORIG_OFFSET		0
 #define EX_NEW_OFFSET		4
 
-#define JUMP_ENTRY_SIZE		24
+#define JUMP_ENTRY_SIZE		16
 #define JUMP_ORIG_OFFSET	0
-#define JUMP_NEW_OFFSET		8
+#define JUMP_NEW_OFFSET		4
 
 #define ALT_ENTRY_SIZE		13
 #define ALT_ORIG_OFFSET		0
diff --git a/tools/pci/Build b/tools/pci/Build
new file mode 100644
index 000000000000..c375aea21790
--- /dev/null
+++ b/tools/pci/Build
@@ -0,0 +1 @@
+pcitest-y += pcitest.o
diff --git a/tools/pci/Makefile b/tools/pci/Makefile
new file mode 100644
index 000000000000..46e4c2f318c9
--- /dev/null
+++ b/tools/pci/Makefile
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: GPL-2.0
+include ../scripts/Makefile.include
+
+bindir ?= /usr/bin
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+endif
+
+# Do not use make's built-in rules
+# (this improves performance and avoids hard-to-debug behaviour);
+MAKEFLAGS += -r
+
+CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include
+
+ALL_TARGETS := pcitest pcitest.sh
+ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS))
+
+all: $(ALL_PROGRAMS)
+
+export srctree OUTPUT CC LD CFLAGS
+include $(srctree)/tools/build/Makefile.include
+
+#
+# We need the following to be outside of kernel tree
+#
+$(OUTPUT)include/linux/: ../../include/uapi/linux/
+	mkdir -p $(OUTPUT)include/linux/ 2>&1 || true
+	ln -sf $(CURDIR)/../../include/uapi/linux/pcitest.h $@
+
+prepare: $(OUTPUT)include/linux/
+
+PCITEST_IN := $(OUTPUT)pcitest-in.o
+$(PCITEST_IN): prepare FORCE
+	$(Q)$(MAKE) $(build)=pcitest
+$(OUTPUT)pcitest: $(PCITEST_IN)
+	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
+
+clean:
+	rm -f $(ALL_PROGRAMS)
+	rm -rf $(OUTPUT)include/
+	find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete
+
+install: $(ALL_PROGRAMS)
+	install -d -m 755 $(DESTDIR)$(bindir);		\
+	for program in $(ALL_PROGRAMS); do		\
+		install $$program $(DESTDIR)$(bindir);	\
+	done
+
+FORCE:
+
+.PHONY: all install clean FORCE prepare
diff --git a/tools/pci/pcitest.c b/tools/pci/pcitest.c
index af146bb03b4d..ec4d51f3308b 100644
--- a/tools/pci/pcitest.c
+++ b/tools/pci/pcitest.c
@@ -23,7 +23,6 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <sys/ioctl.h>
-#include <time.h>
 #include <unistd.h>
 
 #include <linux/pcitest.h>
@@ -48,17 +47,15 @@ struct pci_test {
 	unsigned long	size;
 };
 
-static int run_test(struct pci_test *test)
+static void run_test(struct pci_test *test)
 {
 	long ret;
 	int fd;
-	struct timespec start, end;
-	double time;
 
 	fd = open(test->device, O_RDWR);
 	if (fd < 0) {
 		perror("can't open PCI Endpoint Test device");
-		return fd;
+		return;
 	}
 
 	if (test->barnum >= 0 && test->barnum <= 5) {
diff --git a/tools/perf/Documentation/build-xed.txt b/tools/perf/Documentation/build-xed.txt
new file mode 100644
index 000000000000..6222c1e7231f
--- /dev/null
+++ b/tools/perf/Documentation/build-xed.txt
@@ -0,0 +1,19 @@
+
+For --xed the xed tool is needed. Here is how to install it:
+
+  $ git clone https://github.com/intelxed/mbuild.git mbuild
+  $ git clone https://github.com/intelxed/xed
+  $ cd xed
+  $ ./mfile.py --share
+  $ ./mfile.py examples
+  $ sudo ./mfile.py --prefix=/usr/local install
+  $ sudo ldconfig
+  $ sudo cp obj/examples/xed /usr/local/bin
+
+Basic xed testing:
+
+  $ xed | head -3
+  ERROR: required argument(s) were missing
+  Copyright (C) 2017, Intel Corporation. All rights reserved.
+  XED version: [v10.0-328-g7d62c8c49b7b]
+  $
diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt
index 76971d2e4164..115eaacc455f 100644
--- a/tools/perf/Documentation/intel-pt.txt
+++ b/tools/perf/Documentation/intel-pt.txt
@@ -106,7 +106,7 @@ in transaction, respectively.
 While it is possible to create scripts to analyze the data, an alternative
 approach is available to export the data to a sqlite or postgresql database.
 Refer to script export-to-sqlite.py or export-to-postgresql.py for more details,
-and to script call-graph-from-sql.py for an example of using the database.
+and to script exported-sql-viewer.py for an example of using the database.
 
 There is also script intel-pt-events.py which provides an example of how to
 unpack the raw data for power events and PTWRITE.
diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt
index a3abe04c779d..c2182cbabde3 100644
--- a/tools/perf/Documentation/itrace.txt
+++ b/tools/perf/Documentation/itrace.txt
@@ -11,10 +11,11 @@
 		l	synthesize last branch entries (use with i or x)
 		s       skip initial number of events
 
-	The default is all events i.e. the same as --itrace=ibxwpe
+	The default is all events i.e. the same as --itrace=ibxwpe,
+	except for perf script where it is --itrace=ce
 
-	In addition, the period (default 100000) for instructions events
-	can be specified in units of:
+	In addition, the period (default 100000, except for perf script where it is 1)
+	for instructions events can be specified in units of:
 
 		i	instructions
 		t	ticks
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index 236b9b97dfdb..667c14e56031 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -55,7 +55,6 @@ counted. The following modifiers exist:
  S - read sample value (PERF_SAMPLE_READ)
  D - pin the event to the PMU
  W - group is weak and will fallback to non-group if not schedulable,
-     only supported in 'perf stat' for now.
 
 The 'p' modifier can be used for specifying how precise the instruction
 address should be. The 'p' modifier can be specified multiple times:
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index afdafe2110a1..a2b37ce48094 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -383,6 +383,24 @@ include::itrace.txt[]
 	will be printed. Each entry has function name and file/line. Enabled by
 	default, disable with --no-inline.
 
+--insn-trace::
+	Show instruction stream for intel_pt traces. Combine with --xed to
+	show disassembly.
+
+--xed::
+	Run xed disassembler on output. Requires installing the xed disassembler.
+
+--call-trace::
+	Show call stream for intel_pt traces. The CPUs are interleaved, but
+	can be filtered with -C.
+
+--call-ret-trace::
+	Show call and return stream for intel_pt traces.
+
+--graph-function::
+	For itrace only show specified functions and their callees for
+	itrace. Multiple functions can be separated by comma.
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-script-perl[1],
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 114fda12aa49..808b664343c9 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -242,6 +242,16 @@ Default is to monitor all CPUS.
 --hierarchy::
 	Enable hierarchy output.
 
+--overwrite::
+	Enable this to use just the most recent records, which helps in high core count
+	machines such as Knights Landing/Mill, but right now is disabled by default as
+	the pausing used in this technique is leading to loss of metadata events such
+	as PERF_RECORD_MMAP which makes 'perf top' unable to resolve samples, leading
+	to lots of unknown samples appearing on the UI. Enable this if you are in such
+	machines and profiling a workload that doesn't creates short lived threads and/or
+	doesn't uses many executable mmap operations. Work is being planed to solve
+	this situation, till then, this will remain disabled by default.
+
 --force::
 	Don't do ownership validation.
 
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index 115db9e06ecd..e113450503d2 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -171,6 +171,11 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
 --kernel-syscall-graph::
 	 Show the kernel callchains on the syscall exit path.
 
+--max-events=N::
+	Stop after processing N events. Note that strace-like events are considered
+	only at exit time or when a syscall is interrupted, i.e. in those cases this
+	option is equivalent to the number of lines printed.
+
 --max-stack::
         Set the stack depth limit when parsing the callchain, anything
         beyond the specified depth will be ignored. Note that at this point
@@ -238,6 +243,68 @@ Trace syscalls, major and minor pagefaults:
   As you can see, there was major pagefault in python process, from
   CRYPTO_push_info_ routine which faulted somewhere in libcrypto.so.
 
+Trace the first 4 open, openat or open_by_handle_at syscalls (in the future more syscalls may match here):
+
+  $ perf trace -e open* --max-events 4
+  [root@jouet perf]# trace -e open* --max-events 4
+  2272.992 ( 0.037 ms): gnome-shell/1370 openat(dfd: CWD, filename: /proc/self/stat) = 31
+  2277.481 ( 0.139 ms): gnome-shell/3039 openat(dfd: CWD, filename: /proc/self/stat) = 65
+  3026.398 ( 0.076 ms): gnome-shell/3039 openat(dfd: CWD, filename: /proc/self/stat) = 65
+  4294.665 ( 0.015 ms): sed/15879 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) = 3
+  $
+
+Trace the first minor page fault when running a workload:
+
+  # perf trace -F min --max-stack=7 --max-events 1 sleep 1
+     0.000 ( 0.000 ms): sleep/18006 minfault [__clear_user+0x1a] => 0x5626efa56080 (?k)
+                                       __clear_user ([kernel.kallsyms])
+                                       load_elf_binary ([kernel.kallsyms])
+                                       search_binary_handler ([kernel.kallsyms])
+                                       __do_execve_file.isra.33 ([kernel.kallsyms])
+                                       __x64_sys_execve ([kernel.kallsyms])
+                                       do_syscall_64 ([kernel.kallsyms])
+                                       entry_SYSCALL_64 ([kernel.kallsyms])
+  #
+
+Trace the next min page page fault to take place on the first CPU:
+
+  # perf trace -F min --call-graph=dwarf --max-events 1 --cpu 0
+     0.000 ( 0.000 ms): Web Content/17136 minfault [js::gc::Chunk::fetchNextDecommittedArena+0x4b] => 0x7fbe6181b000 (?.)
+                                       js::gc::FreeSpan::initAsEmpty (inlined)
+                                       js::gc::Arena::setAsNotAllocated (inlined)
+                                       js::gc::Chunk::fetchNextDecommittedArena (/usr/lib64/firefox/libxul.so)
+                                       js::gc::Chunk::allocateArena (/usr/lib64/firefox/libxul.so)
+                                       js::gc::GCRuntime::allocateArena (/usr/lib64/firefox/libxul.so)
+                                       js::gc::ArenaLists::allocateFromArena (/usr/lib64/firefox/libxul.so)
+                                       js::gc::GCRuntime::tryNewTenuredThing<JSString, (js::AllowGC)1> (inlined)
+                                       js::AllocateString<JSString, (js::AllowGC)1> (/usr/lib64/firefox/libxul.so)
+                                       js::Allocate<JSThinInlineString, (js::AllowGC)1> (inlined)
+                                       JSThinInlineString::new_<(js::AllowGC)1> (inlined)
+                                       AllocateInlineString<(js::AllowGC)1, unsigned char> (inlined)
+                                       js::ConcatStrings<(js::AllowGC)1> (/usr/lib64/firefox/libxul.so)
+                                       [0x18b26e6bc2bd] (/tmp/perf-17136.map)
+  #
+
+Trace the next two sched:sched_switch events, four block:*_plug events, the
+next block:*_unplug and the next three net:*dev_queue events, this last one
+with a backtrace of at most 16 entries, system wide:
+
+  # perf trace -e sched:*switch/nr=2/,block:*_plug/nr=4/,block:*_unplug/nr=1/,net:*dev_queue/nr=3,max-stack=16/
+     0.000 :0/0 sched:sched_switch:swapper/2:0 [120] S ==> rcu_sched:10 [120]
+     0.015 rcu_sched/10 sched:sched_switch:rcu_sched:10 [120] R ==> swapper/2:0 [120]
+   254.198 irq/50-iwlwifi/680 net:net_dev_queue:dev=wlp3s0 skbaddr=0xffff93498051f600 len=66
+                                       __dev_queue_xmit ([kernel.kallsyms])
+   273.977 :0/0 net:net_dev_queue:dev=wlp3s0 skbaddr=0xffff93498051f600 len=78
+                                       __dev_queue_xmit ([kernel.kallsyms])
+   274.007 :0/0 net:net_dev_queue:dev=wlp3s0 skbaddr=0xffff93498051ff00 len=78
+                                       __dev_queue_xmit ([kernel.kallsyms])
+  2930.140 kworker/u16:58/2722 block:block_plug:[kworker/u16:58]
+  2930.162 kworker/u16:58/2722 block:block_unplug:[kworker/u16:58] 1
+  4466.094 jbd2/dm-2-8/748 block:block_plug:[jbd2/dm-2-8]
+  8050.123 kworker/u16:30/2694 block:block_plug:[kworker/u16:30]
+  8050.271 kworker/u16:30/2694 block:block_plug:[kworker/u16:30]
+  #
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-script[1]
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 0be411695379..d95655489f7e 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -1,4 +1,5 @@
 include ../scripts/Makefile.include
+include ../scripts/Makefile.arch
 
 # The default target of this Makefile is...
 all:
@@ -385,6 +386,8 @@ export INSTALL SHELL_PATH
 SHELL = $(SHELL_PATH)
 
 linux_uapi_dir := $(srctree)/tools/include/uapi/linux
+asm_generic_uapi_dir := $(srctree)/tools/include/uapi/asm-generic
+arch_asm_uapi_dir := $(srctree)/tools/arch/$(SRCARCH)/include/uapi/asm/
 
 beauty_outdir := $(OUTPUT)trace/beauty/generated
 beauty_ioctl_outdir := $(beauty_outdir)/ioctl
@@ -460,6 +463,18 @@ madvise_behavior_tbl := $(srctree)/tools/perf/trace/beauty/madvise_behavior.sh
 $(madvise_behavior_array): $(madvise_hdr_dir)/mman-common.h $(madvise_behavior_tbl)
 	$(Q)$(SHELL) '$(madvise_behavior_tbl)' $(madvise_hdr_dir) > $@
 
+mmap_flags_array := $(beauty_outdir)/mmap_flags_array.c
+mmap_flags_tbl := $(srctree)/tools/perf/trace/beauty/mmap_flags.sh
+
+$(mmap_flags_array): $(asm_generic_uapi_dir)/mman.h $(asm_generic_uapi_dir)/mman-common.h $(arch_asm_uapi_dir)/mman.h $(mmap_flags_tbl)
+	$(Q)$(SHELL) '$(mmap_flags_tbl)' $(asm_generic_uapi_dir) $(arch_asm_uapi_dir) > $@
+
+mount_flags_array := $(beauty_outdir)/mount_flags_array.c
+mount_flags_tbl := $(srctree)/tools/perf/trace/beauty/mount_flags.sh
+
+$(mount_flags_array): $(linux_uapi_dir)/fs.h $(mount_flags_tbl)
+	$(Q)$(SHELL) '$(mount_flags_tbl)' $(linux_uapi_dir) > $@
+
 prctl_option_array := $(beauty_outdir)/prctl_option_array.c
 prctl_hdr_dir := $(srctree)/tools/include/uapi/linux/
 prctl_option_tbl := $(srctree)/tools/perf/trace/beauty/prctl_option.sh
@@ -577,6 +592,8 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc
 	$(socket_ipproto_array) \
 	$(vhost_virtio_ioctl_array) \
 	$(madvise_behavior_array) \
+	$(mmap_flags_array) \
+	$(mount_flags_array) \
 	$(perf_ioctl_array) \
 	$(prctl_option_array) \
 	$(arch_errno_name_array)
@@ -779,7 +796,9 @@ endif
 ifndef NO_LIBBPF
 	$(call QUIET_INSTALL, bpf-headers) \
 		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf'; \
-		$(INSTALL) include/bpf/*.h -t '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf'
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf/linux'; \
+		$(INSTALL) include/bpf/*.h -t '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf'; \
+		$(INSTALL) include/bpf/linux/*.h -t '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf/linux'
 	$(call QUIET_INSTALL, bpf-examples) \
 		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf'; \
 		$(INSTALL) examples/bpf/*.c -t '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf'
@@ -861,6 +880,8 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea
 		$(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \
 		$(OUTPUT)pmu-events/pmu-events.c \
 		$(OUTPUT)$(madvise_behavior_array) \
+		$(OUTPUT)$(mmap_flags_array) \
+		$(OUTPUT)$(mount_flags_array) \
 		$(OUTPUT)$(drm_ioctl_array) \
 		$(OUTPUT)$(pkey_alloc_access_rights_array) \
 		$(OUTPUT)$(sndrv_ctl_ioctl_array) \
diff --git a/tools/perf/arch/arm64/annotate/instructions.c b/tools/perf/arch/arm64/annotate/instructions.c
index 6688977e4ac7..76c6345a57d5 100644
--- a/tools/perf/arch/arm64/annotate/instructions.c
+++ b/tools/perf/arch/arm64/annotate/instructions.c
@@ -8,6 +8,63 @@ struct arm64_annotate {
 		jump_insn;
 };
 
+static int arm64_mov__parse(struct arch *arch __maybe_unused,
+			    struct ins_operands *ops,
+			    struct map_symbol *ms __maybe_unused)
+{
+	char *s = strchr(ops->raw, ','), *target, *endptr;
+
+	if (s == NULL)
+		return -1;
+
+	*s = '\0';
+	ops->source.raw = strdup(ops->raw);
+	*s = ',';
+
+	if (ops->source.raw == NULL)
+		return -1;
+
+	target = ++s;
+	ops->target.raw = strdup(target);
+	if (ops->target.raw == NULL)
+		goto out_free_source;
+
+	ops->target.addr = strtoull(target, &endptr, 16);
+	if (endptr == target)
+		goto out_free_target;
+
+	s = strchr(endptr, '<');
+	if (s == NULL)
+		goto out_free_target;
+	endptr = strchr(s + 1, '>');
+	if (endptr == NULL)
+		goto out_free_target;
+
+	*endptr = '\0';
+	*s = ' ';
+	ops->target.name = strdup(s);
+	*s = '<';
+	*endptr = '>';
+	if (ops->target.name == NULL)
+		goto out_free_target;
+
+	return 0;
+
+out_free_target:
+	zfree(&ops->target.raw);
+out_free_source:
+	zfree(&ops->source.raw);
+	return -1;
+}
+
+static int mov__scnprintf(struct ins *ins, char *bf, size_t size,
+			  struct ins_operands *ops);
+
+static struct ins_ops arm64_mov_ops = {
+	.parse	   = arm64_mov__parse,
+	.scnprintf = mov__scnprintf,
+};
+
 static struct ins_ops *arm64__associate_instruction_ops(struct arch *arch, const char *name)
 {
 	struct arm64_annotate *arm = arch->priv;
@@ -21,7 +78,7 @@ static struct ins_ops *arm64__associate_instruction_ops(struct arch *arch, const
 	else if (!strcmp(name, "ret"))
 		ops = &ret_ops;
 	else
-		return NULL;
+		ops = &arm64_mov_ops;
 
 	arch__associate_ins_ops(arch, name, ops);
 	return ops;
diff --git a/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl b/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl
index 2dbb8cade048..c88fd32563eb 100755
--- a/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl
+++ b/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl
@@ -23,7 +23,7 @@ create_table_from_c()
 {
 	local sc nr last_sc
 
-	create_table_exe=`mktemp /tmp/create-table-XXXXXX`
+	create_table_exe=`mktemp ${TMPDIR:-/tmp}/create-table-XXXXXX`
 
 	{
 
diff --git a/tools/perf/arch/powerpc/util/book3s_hv_exits.h b/tools/perf/arch/powerpc/util/book3s_hv_exits.h
index 853b95d1e139..2011376c7ab5 100644
--- a/tools/perf/arch/powerpc/util/book3s_hv_exits.h
+++ b/tools/perf/arch/powerpc/util/book3s_hv_exits.h
@@ -15,7 +15,6 @@
 	{0x400, "INST_STORAGE"}, \
 	{0x480, "INST_SEGMENT"}, \
 	{0x500, "EXTERNAL"}, \
-	{0x501, "EXTERNAL_LEVEL"}, \
 	{0x502, "EXTERNAL_HV"}, \
 	{0x600, "ALIGNMENT"}, \
 	{0x700, "PROGRAM"}, \
diff --git a/tools/perf/arch/s390/annotate/instructions.c b/tools/perf/arch/s390/annotate/instructions.c
index cee4e2f7c057..de0dd66dbb48 100644
--- a/tools/perf/arch/s390/annotate/instructions.c
+++ b/tools/perf/arch/s390/annotate/instructions.c
@@ -100,8 +100,6 @@ out_free_source:
 	return -1;
 }
 
-static int mov__scnprintf(struct ins *ins, char *bf, size_t size,
-			  struct ins_operands *ops);
 
 static struct ins_ops s390_mov_ops = {
 	.parse	   = s390_mov__parse,
diff --git a/tools/perf/arch/sparc/Makefile b/tools/perf/arch/sparc/Makefile
index 7fbca175099e..275dea7ff59a 100644
--- a/tools/perf/arch/sparc/Makefile
+++ b/tools/perf/arch/sparc/Makefile
@@ -1,3 +1,5 @@
 ifndef NO_DWARF
 PERF_HAVE_DWARF_REGS := 1
 endif
+
+PERF_HAVE_JITDUMP := 1
diff --git a/tools/perf/arch/sparc/annotate/instructions.c b/tools/perf/arch/sparc/annotate/instructions.c
new file mode 100644
index 000000000000..2614c010c235
--- /dev/null
+++ b/tools/perf/arch/sparc/annotate/instructions.c
@@ -0,0 +1,169 @@
+// SPDX-License-Identifier: GPL-2.0
+
+static int is_branch_cond(const char *cond)
+{
+	if (cond[0] == '\0')
+		return 1;
+
+	if (cond[0] == 'a' && cond[1] == '\0')
+		return 1;
+
+	if (cond[0] == 'c' &&
+	    (cond[1] == 'c' || cond[1] == 's') &&
+	    cond[2] == '\0')
+		return 1;
+
+	if (cond[0] == 'e' &&
+	    (cond[1] == '\0' ||
+	     (cond[1] == 'q' && cond[2] == '\0')))
+		return 1;
+
+	if (cond[0] == 'g' &&
+	    (cond[1] == '\0' ||
+	     (cond[1] == 't' && cond[2] == '\0') ||
+	     (cond[1] == 'e' && cond[2] == '\0') ||
+	     (cond[1] == 'e' && cond[2] == 'u' && cond[3] == '\0')))
+		return 1;
+
+	if (cond[0] == 'l' &&
+	    (cond[1] == '\0' ||
+	     (cond[1] == 't' && cond[2] == '\0') ||
+	     (cond[1] == 'u' && cond[2] == '\0') ||
+	     (cond[1] == 'e' && cond[2] == '\0') ||
+	     (cond[1] == 'e' && cond[2] == 'u' && cond[3] == '\0')))
+		return 1;
+
+	if (cond[0] == 'n' &&
+	    (cond[1] == '\0' ||
+	     (cond[1] == 'e' && cond[2] == '\0') ||
+	     (cond[1] == 'z' && cond[2] == '\0') ||
+	     (cond[1] == 'e' && cond[2] == 'g' && cond[3] == '\0')))
+		return 1;
+
+	if (cond[0] == 'b' &&
+	    cond[1] == 'p' &&
+	    cond[2] == 'o' &&
+	    cond[3] == 's' &&
+	    cond[4] == '\0')
+		return 1;
+
+	if (cond[0] == 'v' &&
+	    (cond[1] == 'c' || cond[1] == 's') &&
+	    cond[2] == '\0')
+		return 1;
+
+	if (cond[0] == 'b' &&
+	    cond[1] == 'z' &&
+	    cond[2] == '\0')
+		return 1;
+
+	return 0;
+}
+
+static int is_branch_reg_cond(const char *cond)
+{
+	if ((cond[0] == 'n' || cond[0] == 'l') &&
+	    cond[1] == 'z' &&
+	    cond[2] == '\0')
+		return 1;
+
+	if (cond[0] == 'z' &&
+	    cond[1] == '\0')
+		return 1;
+
+	if ((cond[0] == 'g' || cond[0] == 'l') &&
+	    cond[1] == 'e' &&
+	    cond[2] == 'z' &&
+	    cond[3] == '\0')
+		return 1;
+
+	if (cond[0] == 'g' &&
+	    cond[1] == 'z' &&
+	    cond[2] == '\0')
+		return 1;
+
+	return 0;
+}
+
+static int is_branch_float_cond(const char *cond)
+{
+	if (cond[0] == '\0')
+		return 1;
+
+	if ((cond[0] == 'a' || cond[0] == 'e' ||
+	     cond[0] == 'z' || cond[0] == 'g' ||
+	     cond[0] == 'l' || cond[0] == 'n' ||
+	     cond[0] == 'o' || cond[0] == 'u') &&
+	    cond[1] == '\0')
+		return 1;
+
+	if (((cond[0] == 'g' && cond[1] == 'e') ||
+	     (cond[0] == 'l' && (cond[1] == 'e' ||
+				 cond[1] == 'g')) ||
+	     (cond[0] == 'n' && (cond[1] == 'e' ||
+				 cond[1] == 'z')) ||
+	     (cond[0] == 'u' && (cond[1] == 'e' ||
+				 cond[1] == 'g' ||
+				 cond[1] == 'l'))) &&
+	    cond[2] == '\0')
+		return 1;
+
+	if (cond[0] == 'u' &&
+	    (cond[1] == 'g' || cond[1] == 'l') &&
+	    cond[2] == 'e' &&
+	    cond[3] == '\0')
+		return 1;
+
+	return 0;
+}
+
+static struct ins_ops *sparc__associate_instruction_ops(struct arch *arch, const char *name)
+{
+	struct ins_ops *ops = NULL;
+
+	if (!strcmp(name, "call") ||
+	    !strcmp(name, "jmp") ||
+	    !strcmp(name, "jmpl")) {
+		ops = &call_ops;
+	} else if (!strcmp(name, "ret") ||
+		   !strcmp(name, "retl") ||
+		   !strcmp(name, "return")) {
+		ops = &ret_ops;
+	} else if (!strcmp(name, "mov")) {
+		ops = &mov_ops;
+	} else {
+		if (name[0] == 'c' &&
+		    (name[1] == 'w' || name[1] == 'x'))
+			name += 2;
+
+		if (name[0] == 'b') {
+			const char *cond = name + 1;
+
+			if (cond[0] == 'r') {
+				if (is_branch_reg_cond(cond + 1))
+					ops = &jump_ops;
+			} else if (is_branch_cond(cond)) {
+				ops = &jump_ops;
+			}
+		} else if (name[0] == 'f' && name[1] == 'b') {
+			if (is_branch_float_cond(name + 2))
+				ops = &jump_ops;
+		}
+	}
+
+	if (ops)
+		arch__associate_ins_ops(arch, name, ops);
+
+	return ops;
+}
+
+static int sparc__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
+{
+	if (!arch->initialized) {
+		arch->initialized = true;
+		arch->associate_instruction_ops = sparc__associate_instruction_ops;
+		arch->objdump.comment_char = '#';
+	}
+
+	return 0;
+}
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 830481b8db26..93d679eaf1f4 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -283,12 +283,11 @@ out_put:
 	return ret;
 }
 
-static int process_feature_event(struct perf_tool *tool,
-				 union perf_event *event,
-				 struct perf_session *session)
+static int process_feature_event(struct perf_session *session,
+				 union perf_event *event)
 {
 	if (event->feat.feat_id < HEADER_LAST_FEATURE)
-		return perf_event__process_feature(tool, event, session);
+		return perf_event__process_feature(session, event);
 	return 0;
 }
 
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index a3b346359ba0..eda41673c4f3 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -86,12 +86,10 @@ static int perf_event__drop_oe(struct perf_tool *tool __maybe_unused,
 }
 #endif
 
-static int perf_event__repipe_op2_synth(struct perf_tool *tool,
-					union perf_event *event,
-					struct perf_session *session
-					__maybe_unused)
+static int perf_event__repipe_op2_synth(struct perf_session *session,
+					union perf_event *event)
 {
-	return perf_event__repipe_synth(tool, event);
+	return perf_event__repipe_synth(session->tool, event);
 }
 
 static int perf_event__repipe_attr(struct perf_tool *tool,
@@ -133,10 +131,10 @@ static int copy_bytes(struct perf_inject *inject, int fd, off_t size)
 	return 0;
 }
 
-static s64 perf_event__repipe_auxtrace(struct perf_tool *tool,
-				       union perf_event *event,
-				       struct perf_session *session)
+static s64 perf_event__repipe_auxtrace(struct perf_session *session,
+				       union perf_event *event)
 {
+	struct perf_tool *tool = session->tool;
 	struct perf_inject *inject = container_of(tool, struct perf_inject,
 						  tool);
 	int ret;
@@ -174,9 +172,8 @@ static s64 perf_event__repipe_auxtrace(struct perf_tool *tool,
 #else
 
 static s64
-perf_event__repipe_auxtrace(struct perf_tool *tool __maybe_unused,
-			    union perf_event *event __maybe_unused,
-			    struct perf_session *session __maybe_unused)
+perf_event__repipe_auxtrace(struct perf_session *session __maybe_unused,
+			    union perf_event *event __maybe_unused)
 {
 	pr_err("AUX area tracing not supported\n");
 	return -EINVAL;
@@ -362,26 +359,24 @@ static int perf_event__repipe_exit(struct perf_tool *tool,
 	return err;
 }
 
-static int perf_event__repipe_tracing_data(struct perf_tool *tool,
-					   union perf_event *event,
-					   struct perf_session *session)
+static int perf_event__repipe_tracing_data(struct perf_session *session,
+					   union perf_event *event)
 {
 	int err;
 
-	perf_event__repipe_synth(tool, event);
-	err = perf_event__process_tracing_data(tool, event, session);
+	perf_event__repipe_synth(session->tool, event);
+	err = perf_event__process_tracing_data(session, event);
 
 	return err;
 }
 
-static int perf_event__repipe_id_index(struct perf_tool *tool,
-				       union perf_event *event,
-				       struct perf_session *session)
+static int perf_event__repipe_id_index(struct perf_session *session,
+				       union perf_event *event)
 {
 	int err;
 
-	perf_event__repipe_synth(tool, event);
-	err = perf_event__process_id_index(tool, event, session);
+	perf_event__repipe_synth(session->tool, event);
+	err = perf_event__process_id_index(session, event);
 
 	return err;
 }
@@ -803,7 +798,8 @@ int cmd_inject(int argc, const char **argv)
 			   "kallsyms pathname"),
 		OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"),
 		OPT_CALLBACK_OPTARG(0, "itrace", &inject.itrace_synth_opts,
-				    NULL, "opts", "Instruction Tracing options",
+				    NULL, "opts", "Instruction Tracing options\n"
+				    ITRACE_HELP,
 				    itrace_parse_synth_opts),
 		OPT_BOOLEAN(0, "strip", &inject.strip,
 			    "strip non-synthesized events (use with --itrace)"),
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 22ebeb92ac51..488779bc4c8d 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -106,9 +106,12 @@ static bool switch_output_time(struct record *rec)
 	       trigger_is_ready(&switch_output_trigger);
 }
 
-static int record__write(struct record *rec, void *bf, size_t size)
+static int record__write(struct record *rec, struct perf_mmap *map __maybe_unused,
+			 void *bf, size_t size)
 {
-	if (perf_data__write(rec->session->data, bf, size) < 0) {
+	struct perf_data_file *file = &rec->session->data->file;
+
+	if (perf_data_file__write(file, bf, size) < 0) {
 		pr_err("failed to write perf data, error: %m\n");
 		return -1;
 	}
@@ -127,15 +130,15 @@ static int process_synthesized_event(struct perf_tool *tool,
 				     struct machine *machine __maybe_unused)
 {
 	struct record *rec = container_of(tool, struct record, tool);
-	return record__write(rec, event, event->header.size);
+	return record__write(rec, NULL, event, event->header.size);
 }
 
-static int record__pushfn(void *to, void *bf, size_t size)
+static int record__pushfn(struct perf_mmap *map, void *to, void *bf, size_t size)
 {
 	struct record *rec = to;
 
 	rec->samples++;
-	return record__write(rec, bf, size);
+	return record__write(rec, map, bf, size);
 }
 
 static volatile int done;
@@ -170,6 +173,7 @@ static void record__sig_exit(void)
 #ifdef HAVE_AUXTRACE_SUPPORT
 
 static int record__process_auxtrace(struct perf_tool *tool,
+				    struct perf_mmap *map,
 				    union perf_event *event, void *data1,
 				    size_t len1, void *data2, size_t len2)
 {
@@ -197,21 +201,21 @@ static int record__process_auxtrace(struct perf_tool *tool,
 	if (padding)
 		padding = 8 - padding;
 
-	record__write(rec, event, event->header.size);
-	record__write(rec, data1, len1);
+	record__write(rec, map, event, event->header.size);
+	record__write(rec, map, data1, len1);
 	if (len2)
-		record__write(rec, data2, len2);
-	record__write(rec, &pad, padding);
+		record__write(rec, map, data2, len2);
+	record__write(rec, map, &pad, padding);
 
 	return 0;
 }
 
 static int record__auxtrace_mmap_read(struct record *rec,
-				      struct auxtrace_mmap *mm)
+				      struct perf_mmap *map)
 {
 	int ret;
 
-	ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
+	ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
 				  record__process_auxtrace);
 	if (ret < 0)
 		return ret;
@@ -223,11 +227,11 @@ static int record__auxtrace_mmap_read(struct record *rec,
 }
 
 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
-					       struct auxtrace_mmap *mm)
+					       struct perf_mmap *map)
 {
 	int ret;
 
-	ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool,
+	ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
 					   record__process_auxtrace,
 					   rec->opts.auxtrace_snapshot_size);
 	if (ret < 0)
@@ -245,13 +249,12 @@ static int record__auxtrace_read_snapshot_all(struct record *rec)
 	int rc = 0;
 
 	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
-		struct auxtrace_mmap *mm =
-				&rec->evlist->mmap[i].auxtrace_mmap;
+		struct perf_mmap *map = &rec->evlist->mmap[i];
 
-		if (!mm->base)
+		if (!map->auxtrace_mmap.base)
 			continue;
 
-		if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) {
+		if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
 			rc = -1;
 			goto out;
 		}
@@ -295,7 +298,7 @@ static int record__auxtrace_init(struct record *rec)
 
 static inline
 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
-			       struct auxtrace_mmap *mm __maybe_unused)
+			       struct perf_mmap *map __maybe_unused)
 {
 	return 0;
 }
@@ -388,7 +391,12 @@ try_again:
 					ui__warning("%s\n", msg);
 				goto try_again;
 			}
-
+			if ((errno == EINVAL || errno == EBADF) &&
+			    pos->leader != pos &&
+			    pos->weak_group) {
+			        pos = perf_evlist__reset_weak_group(evlist, pos);
+				goto try_again;
+			}
 			rc = -errno;
 			perf_evsel__open_strerror(pos, &opts->target,
 						  errno, msg, sizeof(msg));
@@ -529,17 +537,17 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
 		return 0;
 
 	for (i = 0; i < evlist->nr_mmaps; i++) {
-		struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap;
+		struct perf_mmap *map = &maps[i];
 
-		if (maps[i].base) {
-			if (perf_mmap__push(&maps[i], rec, record__pushfn) != 0) {
+		if (map->base) {
+			if (perf_mmap__push(map, rec, record__pushfn) != 0) {
 				rc = -1;
 				goto out;
 			}
 		}
 
-		if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
-		    record__auxtrace_mmap_read(rec, mm) != 0) {
+		if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
+		    record__auxtrace_mmap_read(rec, map) != 0) {
 			rc = -1;
 			goto out;
 		}
@@ -550,7 +558,7 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
 	 * at least one event.
 	 */
 	if (bytes_written != rec->bytes_written)
-		rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
+		rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
 
 	if (overwrite)
 		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
@@ -589,6 +597,9 @@ static void record__init_features(struct record *rec)
 	if (!rec->opts.full_auxtrace)
 		perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
 
+	if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
+		perf_header__clear_feat(&session->header, HEADER_CLOCKID);
+
 	perf_header__clear_feat(&session->header, HEADER_STAT);
 }
 
@@ -758,7 +769,7 @@ static int record__synthesize(struct record *rec, bool tail)
 		 * We need to synthesize events first, because some
 		 * features works on top of them (on report side).
 		 */
-		err = perf_event__synthesize_attrs(tool, session,
+		err = perf_event__synthesize_attrs(tool, rec->evlist,
 						   process_synthesized_event);
 		if (err < 0) {
 			pr_err("Couldn't synthesize attrs.\n");
@@ -894,6 +905,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 
 	record__init_features(rec);
 
+	if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
+		session->header.env.clockid_res_ns = rec->opts.clockid_res_ns;
+
 	if (forks) {
 		err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
 						    argv, data->is_pipe,
@@ -1334,6 +1348,19 @@ static const struct clockid_map clockids[] = {
 	CLOCKID_END,
 };
 
+static int get_clockid_res(clockid_t clk_id, u64 *res_ns)
+{
+	struct timespec res;
+
+	*res_ns = 0;
+	if (!clock_getres(clk_id, &res))
+		*res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC;
+	else
+		pr_warning("WARNING: Failed to determine specified clock resolution.\n");
+
+	return 0;
+}
+
 static int parse_clockid(const struct option *opt, const char *str, int unset)
 {
 	struct record_opts *opts = (struct record_opts *)opt->value;
@@ -1357,7 +1384,7 @@ static int parse_clockid(const struct option *opt, const char *str, int unset)
 
 	/* if its a number, we're done */
 	if (sscanf(str, "%d", &opts->clockid) == 1)
-		return 0;
+		return get_clockid_res(opts->clockid, &opts->clockid_res_ns);
 
 	/* allow a "CLOCK_" prefix to the name */
 	if (!strncasecmp(str, "CLOCK_", 6))
@@ -1366,7 +1393,8 @@ static int parse_clockid(const struct option *opt, const char *str, int unset)
 	for (cm = clockids; cm->name; cm++) {
 		if (!strcasecmp(str, cm->name)) {
 			opts->clockid = cm->clockid;
-			return 0;
+			return get_clockid_res(opts->clockid,
+					       &opts->clockid_res_ns);
 		}
 	}
 
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index b2188e623e22..257c9c18cb7e 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -201,14 +201,13 @@ static void setup_forced_leader(struct report *report,
 		perf_evlist__force_leader(evlist);
 }
 
-static int process_feature_event(struct perf_tool *tool,
-				 union perf_event *event,
-				 struct perf_session *session __maybe_unused)
+static int process_feature_event(struct perf_session *session,
+				 union perf_event *event)
 {
-	struct report *rep = container_of(tool, struct report, tool);
+	struct report *rep = container_of(session->tool, struct report, tool);
 
 	if (event->feat.feat_id < HEADER_LAST_FEATURE)
-		return perf_event__process_feature(tool, event, session);
+		return perf_event__process_feature(session, event);
 
 	if (event->feat.feat_id != HEADER_LAST_FEATURE) {
 		pr_err("failed: wrong feature ID: %" PRIu64 "\n",
@@ -1106,7 +1105,7 @@ int cmd_report(int argc, const char **argv)
 	OPT_CALLBACK(0, "percentage", NULL, "relative|absolute",
 		     "how to display percentage of filtered entries", parse_filter_percentage),
 	OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts",
-			    "Instruction Tracing options",
+			    "Instruction Tracing options\n" ITRACE_HELP,
 			    itrace_parse_synth_opts),
 	OPT_BOOLEAN(0, "full-source-path", &srcline_full_filename,
 			"Show full source file name path for source lines"),
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index ba481d73f910..b5bc85bd0bbe 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -44,6 +44,7 @@
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <unistd.h>
+#include <subcmd/pager.h>
 
 #include "sane_ctype.h"
 
@@ -406,9 +407,10 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
 					PERF_OUTPUT_WEIGHT))
 		return -EINVAL;
 
-	if (PRINT_FIELD(SYM) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) {
+	if (PRINT_FIELD(SYM) &&
+		!(evsel->attr.sample_type & (PERF_SAMPLE_IP|PERF_SAMPLE_ADDR))) {
 		pr_err("Display of symbols requested but neither sample IP nor "
-			   "sample address\nis selected. Hence, no addresses to convert "
+			   "sample address\navailable. Hence, no addresses to convert "
 		       "to symbols.\n");
 		return -EINVAL;
 	}
@@ -417,10 +419,9 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
 		       "selected.\n");
 		return -EINVAL;
 	}
-	if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR) &&
-	    !PRINT_FIELD(BRSTACK) && !PRINT_FIELD(BRSTACKSYM) && !PRINT_FIELD(BRSTACKOFF)) {
-		pr_err("Display of DSO requested but no address to convert.  Select\n"
-		       "sample IP, sample address, brstack, brstacksym, or brstackoff.\n");
+	if (PRINT_FIELD(DSO) &&
+		!(evsel->attr.sample_type & (PERF_SAMPLE_IP|PERF_SAMPLE_ADDR))) {
+		pr_err("Display of DSO requested but no address to convert.\n");
 		return -EINVAL;
 	}
 	if (PRINT_FIELD(SRCLINE) && !PRINT_FIELD(IP)) {
@@ -912,7 +913,7 @@ static int grab_bb(u8 *buffer, u64 start, u64 end,
 
 static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en,
 			    struct perf_insn *x, u8 *inbuf, int len,
-			    int insn, FILE *fp)
+			    int insn, FILE *fp, int *total_cycles)
 {
 	int printed = fprintf(fp, "\t%016" PRIx64 "\t%-30s\t#%s%s%s%s", ip,
 			      dump_insn(x, ip, inbuf, len, NULL),
@@ -921,7 +922,8 @@ static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en,
 			      en->flags.in_tx ? " INTX" : "",
 			      en->flags.abort ? " ABORT" : "");
 	if (en->flags.cycles) {
-		printed += fprintf(fp, " %d cycles", en->flags.cycles);
+		*total_cycles += en->flags.cycles;
+		printed += fprintf(fp, " %d cycles [%d]", en->flags.cycles, *total_cycles);
 		if (insn)
 			printed += fprintf(fp, " %.2f IPC", (float)insn / en->flags.cycles);
 	}
@@ -978,6 +980,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
 	u8 buffer[MAXBB];
 	unsigned off;
 	struct symbol *lastsym = NULL;
+	int total_cycles = 0;
 
 	if (!(br && br->nr))
 		return 0;
@@ -998,7 +1001,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
 		printed += ip__fprintf_sym(br->entries[nr - 1].from, thread,
 					   x.cpumode, x.cpu, &lastsym, attr, fp);
 		printed += ip__fprintf_jump(br->entries[nr - 1].from, &br->entries[nr - 1],
-					    &x, buffer, len, 0, fp);
+					    &x, buffer, len, 0, fp, &total_cycles);
 	}
 
 	/* Print all blocks */
@@ -1026,7 +1029,8 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
 
 			printed += ip__fprintf_sym(ip, thread, x.cpumode, x.cpu, &lastsym, attr, fp);
 			if (ip == end) {
-				printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, insn, fp);
+				printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, insn, fp,
+							    &total_cycles);
 				break;
 			} else {
 				printed += fprintf(fp, "\t%016" PRIx64 "\t%s\n", ip,
@@ -1104,6 +1108,35 @@ out:
 	return printed;
 }
 
+static const char *resolve_branch_sym(struct perf_sample *sample,
+				      struct perf_evsel *evsel,
+				      struct thread *thread,
+				      struct addr_location *al,
+				      u64 *ip)
+{
+	struct addr_location addr_al;
+	struct perf_event_attr *attr = &evsel->attr;
+	const char *name = NULL;
+
+	if (sample->flags & (PERF_IP_FLAG_CALL | PERF_IP_FLAG_TRACE_BEGIN)) {
+		if (sample_addr_correlates_sym(attr)) {
+			thread__resolve(thread, &addr_al, sample);
+			if (addr_al.sym)
+				name = addr_al.sym->name;
+			else
+				*ip = sample->addr;
+		} else {
+			*ip = sample->addr;
+		}
+	} else if (sample->flags & (PERF_IP_FLAG_RETURN | PERF_IP_FLAG_TRACE_END)) {
+		if (al->sym)
+			name = al->sym->name;
+		else
+			*ip = sample->ip;
+	}
+	return name;
+}
+
 static int perf_sample__fprintf_callindent(struct perf_sample *sample,
 					   struct perf_evsel *evsel,
 					   struct thread *thread,
@@ -1111,10 +1144,10 @@ static int perf_sample__fprintf_callindent(struct perf_sample *sample,
 {
 	struct perf_event_attr *attr = &evsel->attr;
 	size_t depth = thread_stack__depth(thread);
-	struct addr_location addr_al;
 	const char *name = NULL;
 	static int spacing;
 	int len = 0;
+	int dlen = 0;
 	u64 ip = 0;
 
 	/*
@@ -1124,21 +1157,12 @@ static int perf_sample__fprintf_callindent(struct perf_sample *sample,
 	if (thread->ts && sample->flags & PERF_IP_FLAG_RETURN)
 		depth += 1;
 
-	if (sample->flags & (PERF_IP_FLAG_CALL | PERF_IP_FLAG_TRACE_BEGIN)) {
-		if (sample_addr_correlates_sym(attr)) {
-			thread__resolve(thread, &addr_al, sample);
-			if (addr_al.sym)
-				name = addr_al.sym->name;
-			else
-				ip = sample->addr;
-		} else {
-			ip = sample->addr;
-		}
-	} else if (sample->flags & (PERF_IP_FLAG_RETURN | PERF_IP_FLAG_TRACE_END)) {
-		if (al->sym)
-			name = al->sym->name;
-		else
-			ip = sample->ip;
+	name = resolve_branch_sym(sample, evsel, thread, al, &ip);
+
+	if (PRINT_FIELD(DSO) && !(PRINT_FIELD(IP) || PRINT_FIELD(ADDR))) {
+		dlen += fprintf(fp, "(");
+		dlen += map__fprintf_dsoname(al->map, fp);
+		dlen += fprintf(fp, ")\t");
 	}
 
 	if (name)
@@ -1159,7 +1183,7 @@ static int perf_sample__fprintf_callindent(struct perf_sample *sample,
 	if (len < spacing)
 		len += fprintf(fp, "%*s", spacing - len, "");
 
-	return len;
+	return len + dlen;
 }
 
 static int perf_sample__fprintf_insn(struct perf_sample *sample,
@@ -1255,6 +1279,18 @@ static struct {
 	{0, NULL}
 };
 
+static const char *sample_flags_to_name(u32 flags)
+{
+	int i;
+
+	for (i = 0; sample_flags[i].name ; i++) {
+		if (sample_flags[i].flags == flags)
+			return sample_flags[i].name;
+	}
+
+	return NULL;
+}
+
 static int perf_sample__fprintf_flags(u32 flags, FILE *fp)
 {
 	const char *chars = PERF_IP_FLAG_CHARS;
@@ -1264,11 +1300,20 @@ static int perf_sample__fprintf_flags(u32 flags, FILE *fp)
 	char str[33];
 	int i, pos = 0;
 
-	for (i = 0; sample_flags[i].name ; i++) {
-		if (sample_flags[i].flags == (flags & ~PERF_IP_FLAG_IN_TX)) {
-			name = sample_flags[i].name;
-			break;
-		}
+	name = sample_flags_to_name(flags & ~PERF_IP_FLAG_IN_TX);
+	if (name)
+		return fprintf(fp, "  %-15s%4s ", name, in_tx ? "(x)" : "");
+
+	if (flags & PERF_IP_FLAG_TRACE_BEGIN) {
+		name = sample_flags_to_name(flags & ~(PERF_IP_FLAG_IN_TX | PERF_IP_FLAG_TRACE_BEGIN));
+		if (name)
+			return fprintf(fp, "  tr strt %-7s%4s ", name, in_tx ? "(x)" : "");
+	}
+
+	if (flags & PERF_IP_FLAG_TRACE_END) {
+		name = sample_flags_to_name(flags & ~(PERF_IP_FLAG_IN_TX | PERF_IP_FLAG_TRACE_END));
+		if (name)
+			return fprintf(fp, "  tr end  %-7s%4s ", name, in_tx ? "(x)" : "");
 	}
 
 	for (i = 0; i < n; i++, flags >>= 1) {
@@ -1281,10 +1326,7 @@ static int perf_sample__fprintf_flags(u32 flags, FILE *fp)
 	}
 	str[pos] = 0;
 
-	if (name)
-		return fprintf(fp, "  %-7s%4s ", name, in_tx ? "(x)" : "");
-
-	return fprintf(fp, "  %-11s ", str);
+	return fprintf(fp, "  %-19s ", str);
 }
 
 struct printer_data {
@@ -1544,7 +1586,8 @@ struct metric_ctx {
 	FILE 			*fp;
 };
 
-static void script_print_metric(void *ctx, const char *color,
+static void script_print_metric(struct perf_stat_config *config __maybe_unused,
+				void *ctx, const char *color,
 			        const char *fmt,
 			        const char *unit, double val)
 {
@@ -1562,7 +1605,8 @@ static void script_print_metric(void *ctx, const char *color,
 	fprintf(mctx->fp, " %s\n", unit);
 }
 
-static void script_new_line(void *ctx)
+static void script_new_line(struct perf_stat_config *config __maybe_unused,
+			    void *ctx)
 {
 	struct metric_ctx *mctx = ctx;
 
@@ -1608,7 +1652,7 @@ static void perf_sample__fprint_metric(struct perf_script *script,
 	evsel_script(evsel)->val = val;
 	if (evsel_script(evsel->leader)->gnum == evsel->leader->nr_members) {
 		for_each_group_member (ev2, evsel->leader) {
-			perf_stat__print_shadow_stats(ev2,
+			perf_stat__print_shadow_stats(&stat_config, ev2,
 						      evsel_script(ev2)->val,
 						      sample->cpu,
 						      &ctx,
@@ -1619,6 +1663,47 @@ static void perf_sample__fprint_metric(struct perf_script *script,
 	}
 }
 
+static bool show_event(struct perf_sample *sample,
+		       struct perf_evsel *evsel,
+		       struct thread *thread,
+		       struct addr_location *al)
+{
+	int depth = thread_stack__depth(thread);
+
+	if (!symbol_conf.graph_function)
+		return true;
+
+	if (thread->filter) {
+		if (depth <= thread->filter_entry_depth) {
+			thread->filter = false;
+			return false;
+		}
+		return true;
+	} else {
+		const char *s = symbol_conf.graph_function;
+		u64 ip;
+		const char *name = resolve_branch_sym(sample, evsel, thread, al,
+				&ip);
+		unsigned nlen;
+
+		if (!name)
+			return false;
+		nlen = strlen(name);
+		while (*s) {
+			unsigned len = strcspn(s, ",");
+			if (nlen == len && !strncmp(name, s, len)) {
+				thread->filter = true;
+				thread->filter_entry_depth = depth;
+				return true;
+			}
+			s += len;
+			if (*s == ',')
+				s++;
+		}
+		return false;
+	}
+}
+
 static void process_event(struct perf_script *script,
 			  struct perf_sample *sample, struct perf_evsel *evsel,
 			  struct addr_location *al,
@@ -1633,6 +1718,9 @@ static void process_event(struct perf_script *script,
 	if (output[type].fields == 0)
 		return;
 
+	if (!show_event(sample, evsel, thread, al))
+		return;
+
 	++es->samples;
 
 	perf_sample__fprintf_start(sample, thread, evsel,
@@ -1710,6 +1798,9 @@ static void process_event(struct perf_script *script,
 
 	if (PRINT_FIELD(METRIC))
 		perf_sample__fprint_metric(script, thread, evsel, sample, fp);
+
+	if (verbose)
+		fflush(fp);
 }
 
 static struct scripting_ops	*scripting_ops;
@@ -2489,6 +2580,8 @@ parse:
 						output[j].fields &= ~all_output_options[i].field;
 					else
 						output[j].fields |= all_output_options[i].field;
+					output[j].user_set = true;
+					output[j].wildcard_set = true;
 				}
 			}
 		} else {
@@ -2499,7 +2592,8 @@ parse:
 				rc = -EINVAL;
 				goto out;
 			}
-			output[type].fields |= all_output_options[i].field;
+			output[type].user_set = true;
+			output[type].wildcard_set = true;
 		}
 	}
 
@@ -2963,9 +3057,8 @@ static void script__setup_sample_type(struct perf_script *script)
 	}
 }
 
-static int process_stat_round_event(struct perf_tool *tool __maybe_unused,
-				    union perf_event *event,
-				    struct perf_session *session)
+static int process_stat_round_event(struct perf_session *session,
+				    union perf_event *event)
 {
 	struct stat_round_event *round = &event->stat_round;
 	struct perf_evsel *counter;
@@ -2979,9 +3072,8 @@ static int process_stat_round_event(struct perf_tool *tool __maybe_unused,
 	return 0;
 }
 
-static int process_stat_config_event(struct perf_tool *tool __maybe_unused,
-				     union perf_event *event,
-				     struct perf_session *session __maybe_unused)
+static int process_stat_config_event(struct perf_session *session __maybe_unused,
+				     union perf_event *event)
 {
 	perf_event__read_stat_config(&stat_config, &event->stat_config);
 	return 0;
@@ -3007,10 +3099,10 @@ static int set_maps(struct perf_script *script)
 }
 
 static
-int process_thread_map_event(struct perf_tool *tool,
-			     union perf_event *event,
-			     struct perf_session *session __maybe_unused)
+int process_thread_map_event(struct perf_session *session,
+			     union perf_event *event)
 {
+	struct perf_tool *tool = session->tool;
 	struct perf_script *script = container_of(tool, struct perf_script, tool);
 
 	if (script->threads) {
@@ -3026,10 +3118,10 @@ int process_thread_map_event(struct perf_tool *tool,
 }
 
 static
-int process_cpu_map_event(struct perf_tool *tool __maybe_unused,
-			  union perf_event *event,
-			  struct perf_session *session __maybe_unused)
+int process_cpu_map_event(struct perf_session *session,
+			  union perf_event *event)
 {
+	struct perf_tool *tool = session->tool;
 	struct perf_script *script = container_of(tool, struct perf_script, tool);
 
 	if (script->cpus) {
@@ -3044,21 +3136,21 @@ int process_cpu_map_event(struct perf_tool *tool __maybe_unused,
 	return set_maps(script);
 }
 
-static int process_feature_event(struct perf_tool *tool,
-				 union perf_event *event,
-				 struct perf_session *session)
+static int process_feature_event(struct perf_session *session,
+				 union perf_event *event)
 {
 	if (event->feat.feat_id < HEADER_LAST_FEATURE)
-		return perf_event__process_feature(tool, event, session);
+		return perf_event__process_feature(session, event);
 	return 0;
 }
 
 #ifdef HAVE_AUXTRACE_SUPPORT
-static int perf_script__process_auxtrace_info(struct perf_tool *tool,
-					      union perf_event *event,
-					      struct perf_session *session)
+static int perf_script__process_auxtrace_info(struct perf_session *session,
+					      union perf_event *event)
 {
-	int ret = perf_event__process_auxtrace_info(tool, event, session);
+	struct perf_tool *tool = session->tool;
+
+	int ret = perf_event__process_auxtrace_info(session, event);
 
 	if (ret == 0) {
 		struct perf_script *script = container_of(tool, struct perf_script, tool);
@@ -3072,6 +3164,44 @@ static int perf_script__process_auxtrace_info(struct perf_tool *tool,
 #define perf_script__process_auxtrace_info 0
 #endif
 
+static int parse_insn_trace(const struct option *opt __maybe_unused,
+			    const char *str __maybe_unused,
+			    int unset __maybe_unused)
+{
+	parse_output_fields(NULL, "+insn,-event,-period", 0);
+	itrace_parse_synth_opts(opt, "i0ns", 0);
+	nanosecs = true;
+	return 0;
+}
+
+static int parse_xed(const struct option *opt __maybe_unused,
+		     const char *str __maybe_unused,
+		     int unset __maybe_unused)
+{
+	force_pager("xed -F insn: -A -64 | less");
+	return 0;
+}
+
+static int parse_call_trace(const struct option *opt __maybe_unused,
+			    const char *str __maybe_unused,
+			    int unset __maybe_unused)
+{
+	parse_output_fields(NULL, "-ip,-addr,-event,-period,+callindent", 0);
+	itrace_parse_synth_opts(opt, "cewp", 0);
+	nanosecs = true;
+	return 0;
+}
+
+static int parse_callret_trace(const struct option *opt __maybe_unused,
+			    const char *str __maybe_unused,
+			    int unset __maybe_unused)
+{
+	parse_output_fields(NULL, "-ip,-addr,-event,-period,+callindent,+flags", 0);
+	itrace_parse_synth_opts(opt, "crewp", 0);
+	nanosecs = true;
+	return 0;
+}
+
 int cmd_script(int argc, const char **argv)
 {
 	bool show_full_info = false;
@@ -3081,7 +3211,10 @@ int cmd_script(int argc, const char **argv)
 	char *rec_script_path = NULL;
 	char *rep_script_path = NULL;
 	struct perf_session *session;
-	struct itrace_synth_opts itrace_synth_opts = { .set = false, };
+	struct itrace_synth_opts itrace_synth_opts = {
+		.set = false,
+		.default_no_sample = true,
+	};
 	char *script_path = NULL;
 	const char **__argv;
 	int i, j, err = 0;
@@ -3156,6 +3289,16 @@ int cmd_script(int argc, const char **argv)
 		    "system-wide collection from all CPUs"),
 	OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
 		   "only consider these symbols"),
+	OPT_CALLBACK_OPTARG(0, "insn-trace", &itrace_synth_opts, NULL, NULL,
+			"Decode instructions from itrace", parse_insn_trace),
+	OPT_CALLBACK_OPTARG(0, "xed", NULL, NULL, NULL,
+			"Run xed disassembler on output", parse_xed),
+	OPT_CALLBACK_OPTARG(0, "call-trace", &itrace_synth_opts, NULL, NULL,
+			"Decode calls from from itrace", parse_call_trace),
+	OPT_CALLBACK_OPTARG(0, "call-ret-trace", &itrace_synth_opts, NULL, NULL,
+			"Decode calls and returns from itrace", parse_callret_trace),
+	OPT_STRING(0, "graph-function", &symbol_conf.graph_function, "symbol[,symbol...]",
+			"Only print symbols and callees with --call-trace/--call-ret-trace"),
 	OPT_STRING(0, "stop-bt", &symbol_conf.bt_stop_list_str, "symbol[,symbol...]",
 		   "Stop display of callgraph at these symbols"),
 	OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
@@ -3193,7 +3336,7 @@ int cmd_script(int argc, const char **argv)
 	OPT_BOOLEAN(0, "ns", &nanosecs,
 		    "Use 9 decimal places when displaying time"),
 	OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts",
-			    "Instruction Tracing options",
+			    "Instruction Tracing options\n" ITRACE_HELP,
 			    itrace_parse_synth_opts),
 	OPT_BOOLEAN(0, "full-source-path", &srcline_full_filename,
 			"Show full source file name path for source lines"),
@@ -3389,8 +3532,10 @@ int cmd_script(int argc, const char **argv)
 		exit(-1);
 	}
 
-	if (!script_name)
+	if (!script_name) {
 		setup_pager();
+		use_browser = 0;
+	}
 
 	session = perf_session__new(&data, false, &script.tool);
 	if (session == NULL)
@@ -3411,7 +3556,8 @@ int cmd_script(int argc, const char **argv)
 	script.session = session;
 	script__setup_sample_type(&script);
 
-	if (output[PERF_TYPE_HARDWARE].fields & PERF_OUTPUT_CALLINDENT)
+	if ((output[PERF_TYPE_HARDWARE].fields & PERF_OUTPUT_CALLINDENT) ||
+	    symbol_conf.graph_function)
 		itrace_synth_opts.thread_stack = true;
 
 	session->itrace_synth_opts = &itrace_synth_opts;
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index d097b5b47eb8..a635abfa77b6 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -88,8 +88,6 @@
 #include "sane_ctype.h"
 
 #define DEFAULT_SEPARATOR	" "
-#define CNTR_NOT_SUPPORTED	"<not supported>"
-#define CNTR_NOT_COUNTED	"<not counted>"
 #define FREEZE_ON_SMI_PATH	"devices/cpu/freeze_on_smi"
 
 static void print_counters(struct timespec *ts, int argc, const char **argv);
@@ -137,54 +135,30 @@ static const char *smi_cost_attrs = {
 
 static struct perf_evlist	*evsel_list;
 
-static struct rblist		 metric_events;
-
 static struct target target = {
 	.uid	= UINT_MAX,
 };
 
-typedef int (*aggr_get_id_t)(struct cpu_map *m, int cpu);
-
 #define METRIC_ONLY_LEN 20
 
-static int			run_count			=  1;
-static bool			no_inherit			= false;
 static volatile pid_t		child_pid			= -1;
-static bool			null_run			=  false;
 static int			detailed_run			=  0;
 static bool			transaction_run;
 static bool			topdown_run			= false;
 static bool			smi_cost			= false;
 static bool			smi_reset			= false;
-static bool			big_num				=  true;
 static int			big_num_opt			=  -1;
-static const char		*csv_sep			= NULL;
-static bool			csv_output			= false;
 static bool			group				= false;
 static const char		*pre_cmd			= NULL;
 static const char		*post_cmd			= NULL;
 static bool			sync_run			= false;
-static unsigned int		initial_delay			= 0;
-static unsigned int		unit_width			= 4; /* strlen("unit") */
 static bool			forever				= false;
-static bool			metric_only			= false;
 static bool			force_metric_only		= false;
-static bool			no_merge			= false;
-static bool			walltime_run_table		= false;
 static struct timespec		ref_time;
-static struct cpu_map		*aggr_map;
-static aggr_get_id_t		aggr_get_id;
 static bool			append_file;
 static bool			interval_count;
-static bool			interval_clear;
 static const char		*output_name;
 static int			output_fd;
-static int			print_free_counters_hint;
-static int			print_mixed_hw_group_error;
-static u64			*walltime_run;
-static bool			ru_display			= false;
-static struct rusage		ru_data;
-static unsigned int		metric_only_len			= METRIC_ONLY_LEN;
 
 struct perf_stat {
 	bool			 record;
@@ -204,15 +178,15 @@ static struct perf_stat		perf_stat;
 static volatile int done = 0;
 
 static struct perf_stat_config stat_config = {
-	.aggr_mode	= AGGR_GLOBAL,
-	.scale		= true,
+	.aggr_mode		= AGGR_GLOBAL,
+	.scale			= true,
+	.unit_width		= 4, /* strlen("unit") */
+	.run_count		= 1,
+	.metric_only_len	= METRIC_ONLY_LEN,
+	.walltime_nsecs_stats	= &walltime_nsecs_stats,
+	.big_num		= true,
 };
 
-static bool is_duration_time(struct perf_evsel *evsel)
-{
-	return !strcmp(evsel->name, "duration_time");
-}
-
 static inline void diff_timespec(struct timespec *r, struct timespec *a,
 				 struct timespec *b)
 {
@@ -236,66 +210,6 @@ static void perf_stat__reset_stats(void)
 		perf_stat__reset_shadow_per_stat(&stat_config.stats[i]);
 }
 
-static int create_perf_stat_counter(struct perf_evsel *evsel)
-{
-	struct perf_event_attr *attr = &evsel->attr;
-	struct perf_evsel *leader = evsel->leader;
-
-	if (stat_config.scale) {
-		attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
-				    PERF_FORMAT_TOTAL_TIME_RUNNING;
-	}
-
-	/*
-	 * The event is part of non trivial group, let's enable
-	 * the group read (for leader) and ID retrieval for all
-	 * members.
-	 */
-	if (leader->nr_members > 1)
-		attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP;
-
-	attr->inherit = !no_inherit;
-
-	/*
-	 * Some events get initialized with sample_(period/type) set,
-	 * like tracepoints. Clear it up for counting.
-	 */
-	attr->sample_period = 0;
-
-	/*
-	 * But set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless
-	 * while avoiding that older tools show confusing messages.
-	 *
-	 * However for pipe sessions we need to keep it zero,
-	 * because script's perf_evsel__check_attr is triggered
-	 * by attr->sample_type != 0, and we can't run it on
-	 * stat sessions.
-	 */
-	if (!(STAT_RECORD && perf_stat.data.is_pipe))
-		attr->sample_type = PERF_SAMPLE_IDENTIFIER;
-
-	/*
-	 * Disabling all counters initially, they will be enabled
-	 * either manually by us or by kernel via enable_on_exec
-	 * set later.
-	 */
-	if (perf_evsel__is_group_leader(evsel)) {
-		attr->disabled = 1;
-
-		/*
-		 * In case of initial_delay we enable tracee
-		 * events manually.
-		 */
-		if (target__none(&target) && !initial_delay)
-			attr->enable_on_exec = 1;
-	}
-
-	if (target__has_cpu(&target) && !target__has_per_thread(&target))
-		return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel));
-
-	return perf_evsel__open_per_thread(evsel, evsel_list->threads);
-}
-
 static int process_synthesized_event(struct perf_tool *tool __maybe_unused,
 				     union perf_event *event,
 				     struct perf_sample *sample __maybe_unused,
@@ -428,15 +342,15 @@ static void process_interval(void)
 
 static void enable_counters(void)
 {
-	if (initial_delay)
-		usleep(initial_delay * USEC_PER_MSEC);
+	if (stat_config.initial_delay)
+		usleep(stat_config.initial_delay * USEC_PER_MSEC);
 
 	/*
 	 * We need to enable counters only if:
 	 * - we don't have tracee (attaching to task or cpu)
 	 * - we have initial delay configured
 	 */
-	if (!target__none(&target) || initial_delay)
+	if (!target__none(&target) || stat_config.initial_delay)
 		perf_evlist__enable(evsel_list);
 }
 
@@ -464,109 +378,31 @@ static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *inf
 	workload_exec_errno = info->si_value.sival_int;
 }
 
-static int perf_stat_synthesize_config(bool is_pipe)
-{
-	int err;
-
-	if (is_pipe) {
-		err = perf_event__synthesize_attrs(NULL, perf_stat.session,
-						   process_synthesized_event);
-		if (err < 0) {
-			pr_err("Couldn't synthesize attrs.\n");
-			return err;
-		}
-	}
-
-	err = perf_event__synthesize_extra_attr(NULL,
-						evsel_list,
-						process_synthesized_event,
-						is_pipe);
-
-	err = perf_event__synthesize_thread_map2(NULL, evsel_list->threads,
-						process_synthesized_event,
-						NULL);
-	if (err < 0) {
-		pr_err("Couldn't synthesize thread map.\n");
-		return err;
-	}
-
-	err = perf_event__synthesize_cpu_map(NULL, evsel_list->cpus,
-					     process_synthesized_event, NULL);
-	if (err < 0) {
-		pr_err("Couldn't synthesize thread map.\n");
-		return err;
-	}
-
-	err = perf_event__synthesize_stat_config(NULL, &stat_config,
-						 process_synthesized_event, NULL);
-	if (err < 0) {
-		pr_err("Couldn't synthesize config.\n");
-		return err;
-	}
-
-	return 0;
-}
-
-#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
-
-static int __store_counter_ids(struct perf_evsel *counter)
-{
-	int cpu, thread;
-
-	for (cpu = 0; cpu < xyarray__max_x(counter->fd); cpu++) {
-		for (thread = 0; thread < xyarray__max_y(counter->fd);
-		     thread++) {
-			int fd = FD(counter, cpu, thread);
-
-			if (perf_evlist__id_add_fd(evsel_list, counter,
-						   cpu, thread, fd) < 0)
-				return -1;
-		}
-	}
-
-	return 0;
-}
-
-static int store_counter_ids(struct perf_evsel *counter)
-{
-	struct cpu_map *cpus = counter->cpus;
-	struct thread_map *threads = counter->threads;
-
-	if (perf_evsel__alloc_id(counter, cpus->nr, threads->nr))
-		return -ENOMEM;
-
-	return __store_counter_ids(counter);
-}
-
 static bool perf_evsel__should_store_id(struct perf_evsel *counter)
 {
 	return STAT_RECORD || counter->attr.read_format & PERF_FORMAT_ID;
 }
 
-static struct perf_evsel *perf_evsel__reset_weak_group(struct perf_evsel *evsel)
+static bool is_target_alive(struct target *_target,
+			    struct thread_map *threads)
 {
-	struct perf_evsel *c2, *leader;
-	bool is_open = true;
+	struct stat st;
+	int i;
 
-	leader = evsel->leader;
-	pr_debug("Weak group for %s/%d failed\n",
-			leader->name, leader->nr_members);
+	if (!target__has_task(_target))
+		return true;
 
-	/*
-	 * for_each_group_member doesn't work here because it doesn't
-	 * include the first entry.
-	 */
-	evlist__for_each_entry(evsel_list, c2) {
-		if (c2 == evsel)
-			is_open = false;
-		if (c2->leader == leader) {
-			if (is_open)
-				perf_evsel__close(c2);
-			c2->leader = c2;
-			c2->nr_members = 0;
-		}
+	for (i = 0; i < threads->nr; i++) {
+		char path[PATH_MAX];
+
+		scnprintf(path, PATH_MAX, "%s/%d", procfs__mountpoint(),
+			  threads->map[i].pid);
+
+		if (!stat(path, &st))
+			return true;
 	}
-	return leader;
+
+	return false;
 }
 
 static int __run_perf_stat(int argc, const char **argv, int run_idx)
@@ -609,13 +445,13 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
 
 	evlist__for_each_entry(evsel_list, counter) {
 try_again:
-		if (create_perf_stat_counter(counter) < 0) {
+		if (create_perf_stat_counter(counter, &stat_config, &target) < 0) {
 
 			/* Weak group failed. Reset the group. */
 			if ((errno == EINVAL || errno == EBADF) &&
 			    counter->leader != counter &&
 			    counter->weak_group) {
-				counter = perf_evsel__reset_weak_group(counter);
+				counter = perf_evlist__reset_weak_group(evsel_list, counter);
 				goto try_again;
 			}
 
@@ -664,11 +500,11 @@ try_again:
 		counter->supported = true;
 
 		l = strlen(counter->unit);
-		if (l > unit_width)
-			unit_width = l;
+		if (l > stat_config.unit_width)
+			stat_config.unit_width = l;
 
 		if (perf_evsel__should_store_id(counter) &&
-		    store_counter_ids(counter))
+		    perf_evsel__store_ids(counter, evsel_list))
 			return -1;
 	}
 
@@ -699,7 +535,8 @@ try_again:
 		if (err < 0)
 			return err;
 
-		err = perf_stat_synthesize_config(is_pipe);
+		err = perf_stat_synthesize_config(&stat_config, NULL, evsel_list,
+						  process_synthesized_event, is_pipe);
 		if (err < 0)
 			return err;
 	}
@@ -724,7 +561,7 @@ try_again:
 					break;
 			}
 		}
-		wait4(child_pid, &status, 0, &ru_data);
+		wait4(child_pid, &status, 0, &stat_config.ru_data);
 
 		if (workload_exec_errno) {
 			const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
@@ -738,6 +575,8 @@ try_again:
 		enable_counters();
 		while (!done) {
 			nanosleep(&ts, NULL);
+			if (!is_target_alive(&target, evsel_list->threads))
+				break;
 			if (timeout)
 				break;
 			if (interval) {
@@ -752,8 +591,8 @@ try_again:
 
 	t1 = rdclock();
 
-	if (walltime_run_table)
-		walltime_run[run_idx] = t1 - t0;
+	if (stat_config.walltime_run_table)
+		stat_config.walltime_run[run_idx] = t1 - t0;
 
 	update_stats(&walltime_nsecs_stats, t1 - t0);
 
@@ -795,1105 +634,14 @@ static int run_perf_stat(int argc, const char **argv, int run_idx)
 	return ret;
 }
 
-static void print_running(u64 run, u64 ena)
-{
-	if (csv_output) {
-		fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f",
-					csv_sep,
-					run,
-					csv_sep,
-					ena ? 100.0 * run / ena : 100.0);
-	} else if (run != ena) {
-		fprintf(stat_config.output, "  (%.2f%%)", 100.0 * run / ena);
-	}
-}
-
-static void print_noise_pct(double total, double avg)
-{
-	double pct = rel_stddev_stats(total, avg);
-
-	if (csv_output)
-		fprintf(stat_config.output, "%s%.2f%%", csv_sep, pct);
-	else if (pct)
-		fprintf(stat_config.output, "  ( +-%6.2f%% )", pct);
-}
-
-static void print_noise(struct perf_evsel *evsel, double avg)
-{
-	struct perf_stat_evsel *ps;
-
-	if (run_count == 1)
-		return;
-
-	ps = evsel->stats;
-	print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
-}
-
-static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
-{
-	switch (stat_config.aggr_mode) {
-	case AGGR_CORE:
-		fprintf(stat_config.output, "S%d-C%*d%s%*d%s",
-			cpu_map__id_to_socket(id),
-			csv_output ? 0 : -8,
-			cpu_map__id_to_cpu(id),
-			csv_sep,
-			csv_output ? 0 : 4,
-			nr,
-			csv_sep);
-		break;
-	case AGGR_SOCKET:
-		fprintf(stat_config.output, "S%*d%s%*d%s",
-			csv_output ? 0 : -5,
-			id,
-			csv_sep,
-			csv_output ? 0 : 4,
-			nr,
-			csv_sep);
-			break;
-	case AGGR_NONE:
-		fprintf(stat_config.output, "CPU%*d%s",
-			csv_output ? 0 : -4,
-			perf_evsel__cpus(evsel)->map[id], csv_sep);
-		break;
-	case AGGR_THREAD:
-		fprintf(stat_config.output, "%*s-%*d%s",
-			csv_output ? 0 : 16,
-			thread_map__comm(evsel->threads, id),
-			csv_output ? 0 : -8,
-			thread_map__pid(evsel->threads, id),
-			csv_sep);
-		break;
-	case AGGR_GLOBAL:
-	case AGGR_UNSET:
-	default:
-		break;
-	}
-}
-
-struct outstate {
-	FILE *fh;
-	bool newline;
-	const char *prefix;
-	int  nfields;
-	int  id, nr;
-	struct perf_evsel *evsel;
-};
-
-#define METRIC_LEN  35
-
-static void new_line_std(void *ctx)
-{
-	struct outstate *os = ctx;
-
-	os->newline = true;
-}
-
-static void do_new_line_std(struct outstate *os)
-{
-	fputc('\n', os->fh);
-	fputs(os->prefix, os->fh);
-	aggr_printout(os->evsel, os->id, os->nr);
-	if (stat_config.aggr_mode == AGGR_NONE)
-		fprintf(os->fh, "        ");
-	fprintf(os->fh, "                                                 ");
-}
-
-static void print_metric_std(void *ctx, const char *color, const char *fmt,
-			     const char *unit, double val)
-{
-	struct outstate *os = ctx;
-	FILE *out = os->fh;
-	int n;
-	bool newline = os->newline;
-
-	os->newline = false;
-
-	if (unit == NULL || fmt == NULL) {
-		fprintf(out, "%-*s", METRIC_LEN, "");
-		return;
-	}
-
-	if (newline)
-		do_new_line_std(os);
-
-	n = fprintf(out, " # ");
-	if (color)
-		n += color_fprintf(out, color, fmt, val);
-	else
-		n += fprintf(out, fmt, val);
-	fprintf(out, " %-*s", METRIC_LEN - n - 1, unit);
-}
-
-static void new_line_csv(void *ctx)
-{
-	struct outstate *os = ctx;
-	int i;
-
-	fputc('\n', os->fh);
-	if (os->prefix)
-		fprintf(os->fh, "%s%s", os->prefix, csv_sep);
-	aggr_printout(os->evsel, os->id, os->nr);
-	for (i = 0; i < os->nfields; i++)
-		fputs(csv_sep, os->fh);
-}
-
-static void print_metric_csv(void *ctx,
-			     const char *color __maybe_unused,
-			     const char *fmt, const char *unit, double val)
-{
-	struct outstate *os = ctx;
-	FILE *out = os->fh;
-	char buf[64], *vals, *ends;
-
-	if (unit == NULL || fmt == NULL) {
-		fprintf(out, "%s%s", csv_sep, csv_sep);
-		return;
-	}
-	snprintf(buf, sizeof(buf), fmt, val);
-	ends = vals = ltrim(buf);
-	while (isdigit(*ends) || *ends == '.')
-		ends++;
-	*ends = 0;
-	while (isspace(*unit))
-		unit++;
-	fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit);
-}
-
-/* Filter out some columns that don't work well in metrics only mode */
-
-static bool valid_only_metric(const char *unit)
-{
-	if (!unit)
-		return false;
-	if (strstr(unit, "/sec") ||
-	    strstr(unit, "hz") ||
-	    strstr(unit, "Hz") ||
-	    strstr(unit, "CPUs utilized"))
-		return false;
-	return true;
-}
-
-static const char *fixunit(char *buf, struct perf_evsel *evsel,
-			   const char *unit)
-{
-	if (!strncmp(unit, "of all", 6)) {
-		snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel),
-			 unit);
-		return buf;
-	}
-	return unit;
-}
-
-static void print_metric_only(void *ctx, const char *color, const char *fmt,
-			      const char *unit, double val)
-{
-	struct outstate *os = ctx;
-	FILE *out = os->fh;
-	char buf[1024], str[1024];
-	unsigned mlen = metric_only_len;
-
-	if (!valid_only_metric(unit))
-		return;
-	unit = fixunit(buf, os->evsel, unit);
-	if (mlen < strlen(unit))
-		mlen = strlen(unit) + 1;
-
-	if (color)
-		mlen += strlen(color) + sizeof(PERF_COLOR_RESET) - 1;
-
-	color_snprintf(str, sizeof(str), color ?: "", fmt, val);
-	fprintf(out, "%*s ", mlen, str);
-}
-
-static void print_metric_only_csv(void *ctx, const char *color __maybe_unused,
-				  const char *fmt,
-				  const char *unit, double val)
-{
-	struct outstate *os = ctx;
-	FILE *out = os->fh;
-	char buf[64], *vals, *ends;
-	char tbuf[1024];
-
-	if (!valid_only_metric(unit))
-		return;
-	unit = fixunit(tbuf, os->evsel, unit);
-	snprintf(buf, sizeof buf, fmt, val);
-	ends = vals = ltrim(buf);
-	while (isdigit(*ends) || *ends == '.')
-		ends++;
-	*ends = 0;
-	fprintf(out, "%s%s", vals, csv_sep);
-}
-
-static void new_line_metric(void *ctx __maybe_unused)
-{
-}
-
-static void print_metric_header(void *ctx, const char *color __maybe_unused,
-				const char *fmt __maybe_unused,
-				const char *unit, double val __maybe_unused)
-{
-	struct outstate *os = ctx;
-	char tbuf[1024];
-
-	if (!valid_only_metric(unit))
-		return;
-	unit = fixunit(tbuf, os->evsel, unit);
-	if (csv_output)
-		fprintf(os->fh, "%s%s", unit, csv_sep);
-	else
-		fprintf(os->fh, "%*s ", metric_only_len, unit);
-}
-
-static int first_shadow_cpu(struct perf_evsel *evsel, int id)
-{
-	int i;
-
-	if (!aggr_get_id)
-		return 0;
-
-	if (stat_config.aggr_mode == AGGR_NONE)
-		return id;
-
-	if (stat_config.aggr_mode == AGGR_GLOBAL)
-		return 0;
-
-	for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) {
-		int cpu2 = perf_evsel__cpus(evsel)->map[i];
-
-		if (aggr_get_id(evsel_list->cpus, cpu2) == id)
-			return cpu2;
-	}
-	return 0;
-}
-
-static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
-{
-	FILE *output = stat_config.output;
-	double sc =  evsel->scale;
-	const char *fmt;
-
-	if (csv_output) {
-		fmt = floor(sc) != sc ?  "%.2f%s" : "%.0f%s";
-	} else {
-		if (big_num)
-			fmt = floor(sc) != sc ? "%'18.2f%s" : "%'18.0f%s";
-		else
-			fmt = floor(sc) != sc ? "%18.2f%s" : "%18.0f%s";
-	}
-
-	aggr_printout(evsel, id, nr);
-
-	fprintf(output, fmt, avg, csv_sep);
-
-	if (evsel->unit)
-		fprintf(output, "%-*s%s",
-			csv_output ? 0 : unit_width,
-			evsel->unit, csv_sep);
-
-	fprintf(output, "%-*s", csv_output ? 0 : 25, perf_evsel__name(evsel));
-
-	if (evsel->cgrp)
-		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
-}
-
-static bool is_mixed_hw_group(struct perf_evsel *counter)
-{
-	struct perf_evlist *evlist = counter->evlist;
-	u32 pmu_type = counter->attr.type;
-	struct perf_evsel *pos;
-
-	if (counter->nr_members < 2)
-		return false;
-
-	evlist__for_each_entry(evlist, pos) {
-		/* software events can be part of any hardware group */
-		if (pos->attr.type == PERF_TYPE_SOFTWARE)
-			continue;
-		if (pmu_type == PERF_TYPE_SOFTWARE) {
-			pmu_type = pos->attr.type;
-			continue;
-		}
-		if (pmu_type != pos->attr.type)
-			return true;
-	}
-
-	return false;
-}
-
-static void printout(int id, int nr, struct perf_evsel *counter, double uval,
-		     char *prefix, u64 run, u64 ena, double noise,
-		     struct runtime_stat *st)
-{
-	struct perf_stat_output_ctx out;
-	struct outstate os = {
-		.fh = stat_config.output,
-		.prefix = prefix ? prefix : "",
-		.id = id,
-		.nr = nr,
-		.evsel = counter,
-	};
-	print_metric_t pm = print_metric_std;
-	void (*nl)(void *);
-
-	if (metric_only) {
-		nl = new_line_metric;
-		if (csv_output)
-			pm = print_metric_only_csv;
-		else
-			pm = print_metric_only;
-	} else
-		nl = new_line_std;
-
-	if (csv_output && !metric_only) {
-		static int aggr_fields[] = {
-			[AGGR_GLOBAL] = 0,
-			[AGGR_THREAD] = 1,
-			[AGGR_NONE] = 1,
-			[AGGR_SOCKET] = 2,
-			[AGGR_CORE] = 2,
-		};
-
-		pm = print_metric_csv;
-		nl = new_line_csv;
-		os.nfields = 3;
-		os.nfields += aggr_fields[stat_config.aggr_mode];
-		if (counter->cgrp)
-			os.nfields++;
-	}
-	if (run == 0 || ena == 0 || counter->counts->scaled == -1) {
-		if (metric_only) {
-			pm(&os, NULL, "", "", 0);
-			return;
-		}
-		aggr_printout(counter, id, nr);
-
-		fprintf(stat_config.output, "%*s%s",
-			csv_output ? 0 : 18,
-			counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
-			csv_sep);
-
-		if (counter->supported) {
-			print_free_counters_hint = 1;
-			if (is_mixed_hw_group(counter))
-				print_mixed_hw_group_error = 1;
-		}
-
-		fprintf(stat_config.output, "%-*s%s",
-			csv_output ? 0 : unit_width,
-			counter->unit, csv_sep);
-
-		fprintf(stat_config.output, "%*s",
-			csv_output ? 0 : -25,
-			perf_evsel__name(counter));
-
-		if (counter->cgrp)
-			fprintf(stat_config.output, "%s%s",
-				csv_sep, counter->cgrp->name);
-
-		if (!csv_output)
-			pm(&os, NULL, NULL, "", 0);
-		print_noise(counter, noise);
-		print_running(run, ena);
-		if (csv_output)
-			pm(&os, NULL, NULL, "", 0);
-		return;
-	}
-
-	if (!metric_only)
-		abs_printout(id, nr, counter, uval);
-
-	out.print_metric = pm;
-	out.new_line = nl;
-	out.ctx = &os;
-	out.force_header = false;
-
-	if (csv_output && !metric_only) {
-		print_noise(counter, noise);
-		print_running(run, ena);
-	}
-
-	perf_stat__print_shadow_stats(counter, uval,
-				first_shadow_cpu(counter, id),
-				&out, &metric_events, st);
-	if (!csv_output && !metric_only) {
-		print_noise(counter, noise);
-		print_running(run, ena);
-	}
-}
-
-static void aggr_update_shadow(void)
-{
-	int cpu, s2, id, s;
-	u64 val;
-	struct perf_evsel *counter;
-
-	for (s = 0; s < aggr_map->nr; s++) {
-		id = aggr_map->map[s];
-		evlist__for_each_entry(evsel_list, counter) {
-			val = 0;
-			for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
-				s2 = aggr_get_id(evsel_list->cpus, cpu);
-				if (s2 != id)
-					continue;
-				val += perf_counts(counter->counts, cpu, 0)->val;
-			}
-			perf_stat__update_shadow_stats(counter, val,
-					first_shadow_cpu(counter, id),
-					&rt_stat);
-		}
-	}
-}
-
-static void uniquify_event_name(struct perf_evsel *counter)
-{
-	char *new_name;
-	char *config;
-
-	if (counter->uniquified_name ||
-	    !counter->pmu_name || !strncmp(counter->name, counter->pmu_name,
-					   strlen(counter->pmu_name)))
-		return;
-
-	config = strchr(counter->name, '/');
-	if (config) {
-		if (asprintf(&new_name,
-			     "%s%s", counter->pmu_name, config) > 0) {
-			free(counter->name);
-			counter->name = new_name;
-		}
-	} else {
-		if (asprintf(&new_name,
-			     "%s [%s]", counter->name, counter->pmu_name) > 0) {
-			free(counter->name);
-			counter->name = new_name;
-		}
-	}
-
-	counter->uniquified_name = true;
-}
-
-static void collect_all_aliases(struct perf_evsel *counter,
-			    void (*cb)(struct perf_evsel *counter, void *data,
-				       bool first),
-			    void *data)
-{
-	struct perf_evsel *alias;
-
-	alias = list_prepare_entry(counter, &(evsel_list->entries), node);
-	list_for_each_entry_continue (alias, &evsel_list->entries, node) {
-		if (strcmp(perf_evsel__name(alias), perf_evsel__name(counter)) ||
-		    alias->scale != counter->scale ||
-		    alias->cgrp != counter->cgrp ||
-		    strcmp(alias->unit, counter->unit) ||
-		    perf_evsel__is_clock(alias) != perf_evsel__is_clock(counter))
-			break;
-		alias->merged_stat = true;
-		cb(alias, data, false);
-	}
-}
-
-static bool collect_data(struct perf_evsel *counter,
-			    void (*cb)(struct perf_evsel *counter, void *data,
-				       bool first),
-			    void *data)
-{
-	if (counter->merged_stat)
-		return false;
-	cb(counter, data, true);
-	if (no_merge)
-		uniquify_event_name(counter);
-	else if (counter->auto_merge_stats)
-		collect_all_aliases(counter, cb, data);
-	return true;
-}
-
-struct aggr_data {
-	u64 ena, run, val;
-	int id;
-	int nr;
-	int cpu;
-};
-
-static void aggr_cb(struct perf_evsel *counter, void *data, bool first)
-{
-	struct aggr_data *ad = data;
-	int cpu, s2;
-
-	for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
-		struct perf_counts_values *counts;
-
-		s2 = aggr_get_id(perf_evsel__cpus(counter), cpu);
-		if (s2 != ad->id)
-			continue;
-		if (first)
-			ad->nr++;
-		counts = perf_counts(counter->counts, cpu, 0);
-		/*
-		 * When any result is bad, make them all to give
-		 * consistent output in interval mode.
-		 */
-		if (counts->ena == 0 || counts->run == 0 ||
-		    counter->counts->scaled == -1) {
-			ad->ena = 0;
-			ad->run = 0;
-			break;
-		}
-		ad->val += counts->val;
-		ad->ena += counts->ena;
-		ad->run += counts->run;
-	}
-}
-
-static void print_aggr(char *prefix)
-{
-	FILE *output = stat_config.output;
-	struct perf_evsel *counter;
-	int s, id, nr;
-	double uval;
-	u64 ena, run, val;
-	bool first;
-
-	if (!(aggr_map || aggr_get_id))
-		return;
-
-	aggr_update_shadow();
-
-	/*
-	 * With metric_only everything is on a single line.
-	 * Without each counter has its own line.
-	 */
-	for (s = 0; s < aggr_map->nr; s++) {
-		struct aggr_data ad;
-		if (prefix && metric_only)
-			fprintf(output, "%s", prefix);
-
-		ad.id = id = aggr_map->map[s];
-		first = true;
-		evlist__for_each_entry(evsel_list, counter) {
-			if (is_duration_time(counter))
-				continue;
-
-			ad.val = ad.ena = ad.run = 0;
-			ad.nr = 0;
-			if (!collect_data(counter, aggr_cb, &ad))
-				continue;
-			nr = ad.nr;
-			ena = ad.ena;
-			run = ad.run;
-			val = ad.val;
-			if (first && metric_only) {
-				first = false;
-				aggr_printout(counter, id, nr);
-			}
-			if (prefix && !metric_only)
-				fprintf(output, "%s", prefix);
-
-			uval = val * counter->scale;
-			printout(id, nr, counter, uval, prefix, run, ena, 1.0,
-				 &rt_stat);
-			if (!metric_only)
-				fputc('\n', output);
-		}
-		if (metric_only)
-			fputc('\n', output);
-	}
-}
-
-static int cmp_val(const void *a, const void *b)
-{
-	return ((struct perf_aggr_thread_value *)b)->val -
-		((struct perf_aggr_thread_value *)a)->val;
-}
-
-static struct perf_aggr_thread_value *sort_aggr_thread(
-					struct perf_evsel *counter,
-					int nthreads, int ncpus,
-					int *ret)
-{
-	int cpu, thread, i = 0;
-	double uval;
-	struct perf_aggr_thread_value *buf;
-
-	buf = calloc(nthreads, sizeof(struct perf_aggr_thread_value));
-	if (!buf)
-		return NULL;
-
-	for (thread = 0; thread < nthreads; thread++) {
-		u64 ena = 0, run = 0, val = 0;
-
-		for (cpu = 0; cpu < ncpus; cpu++) {
-			val += perf_counts(counter->counts, cpu, thread)->val;
-			ena += perf_counts(counter->counts, cpu, thread)->ena;
-			run += perf_counts(counter->counts, cpu, thread)->run;
-		}
-
-		uval = val * counter->scale;
-
-		/*
-		 * Skip value 0 when enabling --per-thread globally,
-		 * otherwise too many 0 output.
-		 */
-		if (uval == 0.0 && target__has_per_thread(&target))
-			continue;
-
-		buf[i].counter = counter;
-		buf[i].id = thread;
-		buf[i].uval = uval;
-		buf[i].val = val;
-		buf[i].run = run;
-		buf[i].ena = ena;
-		i++;
-	}
-
-	qsort(buf, i, sizeof(struct perf_aggr_thread_value), cmp_val);
-
-	if (ret)
-		*ret = i;
-
-	return buf;
-}
-
-static void print_aggr_thread(struct perf_evsel *counter, char *prefix)
-{
-	FILE *output = stat_config.output;
-	int nthreads = thread_map__nr(counter->threads);
-	int ncpus = cpu_map__nr(counter->cpus);
-	int thread, sorted_threads, id;
-	struct perf_aggr_thread_value *buf;
-
-	buf = sort_aggr_thread(counter, nthreads, ncpus, &sorted_threads);
-	if (!buf) {
-		perror("cannot sort aggr thread");
-		return;
-	}
-
-	for (thread = 0; thread < sorted_threads; thread++) {
-		if (prefix)
-			fprintf(output, "%s", prefix);
-
-		id = buf[thread].id;
-		if (stat_config.stats)
-			printout(id, 0, buf[thread].counter, buf[thread].uval,
-				 prefix, buf[thread].run, buf[thread].ena, 1.0,
-				 &stat_config.stats[id]);
-		else
-			printout(id, 0, buf[thread].counter, buf[thread].uval,
-				 prefix, buf[thread].run, buf[thread].ena, 1.0,
-				 &rt_stat);
-		fputc('\n', output);
-	}
-
-	free(buf);
-}
-
-struct caggr_data {
-	double avg, avg_enabled, avg_running;
-};
-
-static void counter_aggr_cb(struct perf_evsel *counter, void *data,
-			    bool first __maybe_unused)
-{
-	struct caggr_data *cd = data;
-	struct perf_stat_evsel *ps = counter->stats;
-
-	cd->avg += avg_stats(&ps->res_stats[0]);
-	cd->avg_enabled += avg_stats(&ps->res_stats[1]);
-	cd->avg_running += avg_stats(&ps->res_stats[2]);
-}
-
-/*
- * Print out the results of a single counter:
- * aggregated counts in system-wide mode
- */
-static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
-{
-	FILE *output = stat_config.output;
-	double uval;
-	struct caggr_data cd = { .avg = 0.0 };
-
-	if (!collect_data(counter, counter_aggr_cb, &cd))
-		return;
-
-	if (prefix && !metric_only)
-		fprintf(output, "%s", prefix);
-
-	uval = cd.avg * counter->scale;
-	printout(-1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled,
-		 cd.avg, &rt_stat);
-	if (!metric_only)
-		fprintf(output, "\n");
-}
-
-static void counter_cb(struct perf_evsel *counter, void *data,
-		       bool first __maybe_unused)
-{
-	struct aggr_data *ad = data;
-
-	ad->val += perf_counts(counter->counts, ad->cpu, 0)->val;
-	ad->ena += perf_counts(counter->counts, ad->cpu, 0)->ena;
-	ad->run += perf_counts(counter->counts, ad->cpu, 0)->run;
-}
-
-/*
- * Print out the results of a single counter:
- * does not use aggregated count in system-wide
- */
-static void print_counter(struct perf_evsel *counter, char *prefix)
-{
-	FILE *output = stat_config.output;
-	u64 ena, run, val;
-	double uval;
-	int cpu;
-
-	for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
-		struct aggr_data ad = { .cpu = cpu };
-
-		if (!collect_data(counter, counter_cb, &ad))
-			return;
-		val = ad.val;
-		ena = ad.ena;
-		run = ad.run;
-
-		if (prefix)
-			fprintf(output, "%s", prefix);
-
-		uval = val * counter->scale;
-		printout(cpu, 0, counter, uval, prefix, run, ena, 1.0,
-			 &rt_stat);
-
-		fputc('\n', output);
-	}
-}
-
-static void print_no_aggr_metric(char *prefix)
-{
-	int cpu;
-	int nrcpus = 0;
-	struct perf_evsel *counter;
-	u64 ena, run, val;
-	double uval;
-
-	nrcpus = evsel_list->cpus->nr;
-	for (cpu = 0; cpu < nrcpus; cpu++) {
-		bool first = true;
-
-		if (prefix)
-			fputs(prefix, stat_config.output);
-		evlist__for_each_entry(evsel_list, counter) {
-			if (is_duration_time(counter))
-				continue;
-			if (first) {
-				aggr_printout(counter, cpu, 0);
-				first = false;
-			}
-			val = perf_counts(counter->counts, cpu, 0)->val;
-			ena = perf_counts(counter->counts, cpu, 0)->ena;
-			run = perf_counts(counter->counts, cpu, 0)->run;
-
-			uval = val * counter->scale;
-			printout(cpu, 0, counter, uval, prefix, run, ena, 1.0,
-				 &rt_stat);
-		}
-		fputc('\n', stat_config.output);
-	}
-}
-
-static int aggr_header_lens[] = {
-	[AGGR_CORE] = 18,
-	[AGGR_SOCKET] = 12,
-	[AGGR_NONE] = 6,
-	[AGGR_THREAD] = 24,
-	[AGGR_GLOBAL] = 0,
-};
-
-static const char *aggr_header_csv[] = {
-	[AGGR_CORE] 	= 	"core,cpus,",
-	[AGGR_SOCKET] 	= 	"socket,cpus",
-	[AGGR_NONE] 	= 	"cpu,",
-	[AGGR_THREAD] 	= 	"comm-pid,",
-	[AGGR_GLOBAL] 	=	""
-};
-
-static void print_metric_headers(const char *prefix, bool no_indent)
-{
-	struct perf_stat_output_ctx out;
-	struct perf_evsel *counter;
-	struct outstate os = {
-		.fh = stat_config.output
-	};
-
-	if (prefix)
-		fprintf(stat_config.output, "%s", prefix);
-
-	if (!csv_output && !no_indent)
-		fprintf(stat_config.output, "%*s",
-			aggr_header_lens[stat_config.aggr_mode], "");
-	if (csv_output) {
-		if (stat_config.interval)
-			fputs("time,", stat_config.output);
-		fputs(aggr_header_csv[stat_config.aggr_mode],
-			stat_config.output);
-	}
-
-	/* Print metrics headers only */
-	evlist__for_each_entry(evsel_list, counter) {
-		if (is_duration_time(counter))
-			continue;
-		os.evsel = counter;
-		out.ctx = &os;
-		out.print_metric = print_metric_header;
-		out.new_line = new_line_metric;
-		out.force_header = true;
-		os.evsel = counter;
-		perf_stat__print_shadow_stats(counter, 0,
-					      0,
-					      &out,
-					      &metric_events,
-					      &rt_stat);
-	}
-	fputc('\n', stat_config.output);
-}
-
-static void print_interval(char *prefix, struct timespec *ts)
-{
-	FILE *output = stat_config.output;
-	static int num_print_interval;
-
-	if (interval_clear)
-		puts(CONSOLE_CLEAR);
-
-	sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep);
-
-	if ((num_print_interval == 0 && !csv_output) || interval_clear) {
-		switch (stat_config.aggr_mode) {
-		case AGGR_SOCKET:
-			fprintf(output, "#           time socket cpus");
-			if (!metric_only)
-				fprintf(output, "             counts %*s events\n", unit_width, "unit");
-			break;
-		case AGGR_CORE:
-			fprintf(output, "#           time core         cpus");
-			if (!metric_only)
-				fprintf(output, "             counts %*s events\n", unit_width, "unit");
-			break;
-		case AGGR_NONE:
-			fprintf(output, "#           time CPU    ");
-			if (!metric_only)
-				fprintf(output, "                counts %*s events\n", unit_width, "unit");
-			break;
-		case AGGR_THREAD:
-			fprintf(output, "#           time             comm-pid");
-			if (!metric_only)
-				fprintf(output, "                  counts %*s events\n", unit_width, "unit");
-			break;
-		case AGGR_GLOBAL:
-		default:
-			fprintf(output, "#           time");
-			if (!metric_only)
-				fprintf(output, "             counts %*s events\n", unit_width, "unit");
-		case AGGR_UNSET:
-			break;
-		}
-	}
-
-	if ((num_print_interval == 0 || interval_clear) && metric_only)
-		print_metric_headers(" ", true);
-	if (++num_print_interval == 25)
-		num_print_interval = 0;
-}
-
-static void print_header(int argc, const char **argv)
-{
-	FILE *output = stat_config.output;
-	int i;
-
-	fflush(stdout);
-
-	if (!csv_output) {
-		fprintf(output, "\n");
-		fprintf(output, " Performance counter stats for ");
-		if (target.system_wide)
-			fprintf(output, "\'system wide");
-		else if (target.cpu_list)
-			fprintf(output, "\'CPU(s) %s", target.cpu_list);
-		else if (!target__has_task(&target)) {
-			fprintf(output, "\'%s", argv ? argv[0] : "pipe");
-			for (i = 1; argv && (i < argc); i++)
-				fprintf(output, " %s", argv[i]);
-		} else if (target.pid)
-			fprintf(output, "process id \'%s", target.pid);
-		else
-			fprintf(output, "thread id \'%s", target.tid);
-
-		fprintf(output, "\'");
-		if (run_count > 1)
-			fprintf(output, " (%d runs)", run_count);
-		fprintf(output, ":\n\n");
-	}
-}
-
-static int get_precision(double num)
-{
-	if (num > 1)
-		return 0;
-
-	return lround(ceil(-log10(num)));
-}
-
-static void print_table(FILE *output, int precision, double avg)
-{
-	char tmp[64];
-	int idx, indent = 0;
-
-	scnprintf(tmp, 64, " %17.*f", precision, avg);
-	while (tmp[indent] == ' ')
-		indent++;
-
-	fprintf(output, "%*s# Table of individual measurements:\n", indent, "");
-
-	for (idx = 0; idx < run_count; idx++) {
-		double run = (double) walltime_run[idx] / NSEC_PER_SEC;
-		int h, n = 1 + abs((int) (100.0 * (run - avg)/run) / 5);
-
-		fprintf(output, " %17.*f (%+.*f) ",
-			precision, run, precision, run - avg);
-
-		for (h = 0; h < n; h++)
-			fprintf(output, "#");
-
-		fprintf(output, "\n");
-	}
-
-	fprintf(output, "\n%*s# Final result:\n", indent, "");
-}
-
-static double timeval2double(struct timeval *t)
-{
-	return t->tv_sec + (double) t->tv_usec/USEC_PER_SEC;
-}
-
-static void print_footer(void)
-{
-	double avg = avg_stats(&walltime_nsecs_stats) / NSEC_PER_SEC;
-	FILE *output = stat_config.output;
-	int n;
-
-	if (!null_run)
-		fprintf(output, "\n");
-
-	if (run_count == 1) {
-		fprintf(output, " %17.9f seconds time elapsed", avg);
-
-		if (ru_display) {
-			double ru_utime = timeval2double(&ru_data.ru_utime);
-			double ru_stime = timeval2double(&ru_data.ru_stime);
-
-			fprintf(output, "\n\n");
-			fprintf(output, " %17.9f seconds user\n", ru_utime);
-			fprintf(output, " %17.9f seconds sys\n", ru_stime);
-		}
-	} else {
-		double sd = stddev_stats(&walltime_nsecs_stats) / NSEC_PER_SEC;
-		/*
-		 * Display at most 2 more significant
-		 * digits than the stddev inaccuracy.
-		 */
-		int precision = get_precision(sd) + 2;
-
-		if (walltime_run_table)
-			print_table(output, precision, avg);
-
-		fprintf(output, " %17.*f +- %.*f seconds time elapsed",
-			precision, avg, precision, sd);
-
-		print_noise_pct(sd, avg);
-	}
-	fprintf(output, "\n\n");
-
-	if (print_free_counters_hint &&
-	    sysctl__read_int("kernel/nmi_watchdog", &n) >= 0 &&
-	    n > 0)
-		fprintf(output,
-"Some events weren't counted. Try disabling the NMI watchdog:\n"
-"	echo 0 > /proc/sys/kernel/nmi_watchdog\n"
-"	perf stat ...\n"
-"	echo 1 > /proc/sys/kernel/nmi_watchdog\n");
-
-	if (print_mixed_hw_group_error)
-		fprintf(output,
-			"The events in group usually have to be from "
-			"the same PMU. Try reorganizing the group.\n");
-}
-
 static void print_counters(struct timespec *ts, int argc, const char **argv)
 {
-	int interval = stat_config.interval;
-	struct perf_evsel *counter;
-	char buf[64], *prefix = NULL;
-
 	/* Do not print anything if we record to the pipe. */
 	if (STAT_RECORD && perf_stat.data.is_pipe)
 		return;
 
-	if (interval)
-		print_interval(prefix = buf, ts);
-	else
-		print_header(argc, argv);
-
-	if (metric_only) {
-		static int num_print_iv;
-
-		if (num_print_iv == 0 && !interval)
-			print_metric_headers(prefix, false);
-		if (num_print_iv++ == 25)
-			num_print_iv = 0;
-		if (stat_config.aggr_mode == AGGR_GLOBAL && prefix)
-			fprintf(stat_config.output, "%s", prefix);
-	}
-
-	switch (stat_config.aggr_mode) {
-	case AGGR_CORE:
-	case AGGR_SOCKET:
-		print_aggr(prefix);
-		break;
-	case AGGR_THREAD:
-		evlist__for_each_entry(evsel_list, counter) {
-			if (is_duration_time(counter))
-				continue;
-			print_aggr_thread(counter, prefix);
-		}
-		break;
-	case AGGR_GLOBAL:
-		evlist__for_each_entry(evsel_list, counter) {
-			if (is_duration_time(counter))
-				continue;
-			print_counter_aggr(counter, prefix);
-		}
-		if (metric_only)
-			fputc('\n', stat_config.output);
-		break;
-	case AGGR_NONE:
-		if (metric_only)
-			print_no_aggr_metric(prefix);
-		else {
-			evlist__for_each_entry(evsel_list, counter) {
-				if (is_duration_time(counter))
-					continue;
-				print_counter(counter, prefix);
-			}
-		}
-		break;
-	case AGGR_UNSET:
-	default:
-		break;
-	}
-
-	if (!interval && !csv_output)
-		print_footer();
-
-	fflush(stat_config.output);
+	perf_evlist__print_counters(evsel_list, &stat_config, &target,
+				    ts, argc, argv);
 }
 
 static volatile int signr = -1;
@@ -1950,7 +698,7 @@ static int enable_metric_only(const struct option *opt __maybe_unused,
 			      const char *s __maybe_unused, int unset)
 {
 	force_metric_only = true;
-	metric_only = !unset;
+	stat_config.metric_only = !unset;
 	return 0;
 }
 
@@ -1958,7 +706,7 @@ static int parse_metric_groups(const struct option *opt,
 			       const char *str,
 			       int unset __maybe_unused)
 {
-	return metricgroup__parse_groups(opt, str, &metric_events);
+	return metricgroup__parse_groups(opt, str, &stat_config.metric_events);
 }
 
 static const struct option stat_options[] = {
@@ -1969,7 +717,7 @@ static const struct option stat_options[] = {
 		     parse_events_option),
 	OPT_CALLBACK(0, "filter", &evsel_list, "filter",
 		     "event filter", parse_filter),
-	OPT_BOOLEAN('i', "no-inherit", &no_inherit,
+	OPT_BOOLEAN('i', "no-inherit", &stat_config.no_inherit,
 		    "child tasks do not inherit counters"),
 	OPT_STRING('p', "pid", &target.pid, "pid",
 		   "stat events on existing process id"),
@@ -1982,11 +730,11 @@ static const struct option stat_options[] = {
 	OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"),
 	OPT_INCR('v', "verbose", &verbose,
 		    "be more verbose (show counter open errors, etc)"),
-	OPT_INTEGER('r', "repeat", &run_count,
+	OPT_INTEGER('r', "repeat", &stat_config.run_count,
 		    "repeat command and print average + stddev (max: 100, forever: 0)"),
-	OPT_BOOLEAN(0, "table", &walltime_run_table,
+	OPT_BOOLEAN(0, "table", &stat_config.walltime_run_table,
 		    "display details about each run (only with -r option)"),
-	OPT_BOOLEAN('n', "null", &null_run,
+	OPT_BOOLEAN('n', "null", &stat_config.null_run,
 		    "null run - dont start any counters"),
 	OPT_INCR('d', "detailed", &detailed_run,
 		    "detailed run - start a lot of events"),
@@ -1999,8 +747,8 @@ static const struct option stat_options[] = {
 		    "list of cpus to monitor in system-wide"),
 	OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode,
 		    "disable CPU count aggregation", AGGR_NONE),
-	OPT_BOOLEAN(0, "no-merge", &no_merge, "Do not merge identical named events"),
-	OPT_STRING('x', "field-separator", &csv_sep, "separator",
+	OPT_BOOLEAN(0, "no-merge", &stat_config.no_merge, "Do not merge identical named events"),
+	OPT_STRING('x', "field-separator", &stat_config.csv_sep, "separator",
 		   "print counts with custom separator"),
 	OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
 		     "monitor event in cgroup name only", parse_cgroups),
@@ -2017,7 +765,7 @@ static const struct option stat_options[] = {
 		    "(overhead is possible for values <= 100ms)"),
 	OPT_INTEGER(0, "interval-count", &stat_config.times,
 		    "print counts for fixed number of times"),
-	OPT_BOOLEAN(0, "interval-clear", &interval_clear,
+	OPT_BOOLEAN(0, "interval-clear", &stat_config.interval_clear,
 		    "clear screen in between new interval"),
 	OPT_UINTEGER(0, "timeout", &stat_config.timeout,
 		    "stop workload and print counts after a timeout period in ms (>= 10ms)"),
@@ -2027,9 +775,9 @@ static const struct option stat_options[] = {
 		     "aggregate counts per physical processor core", AGGR_CORE),
 	OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode,
 		     "aggregate counts per thread", AGGR_THREAD),
-	OPT_UINTEGER('D', "delay", &initial_delay,
+	OPT_UINTEGER('D', "delay", &stat_config.initial_delay,
 		     "ms to wait before starting measurement after program start"),
-	OPT_CALLBACK_NOOPT(0, "metric-only", &metric_only, NULL,
+	OPT_CALLBACK_NOOPT(0, "metric-only", &stat_config.metric_only, NULL,
 			"Only print computed metrics. No raw values", enable_metric_only),
 	OPT_BOOLEAN(0, "topdown", &topdown_run,
 			"measure topdown level 1 statistics"),
@@ -2041,12 +789,14 @@ static const struct option stat_options[] = {
 	OPT_END()
 };
 
-static int perf_stat__get_socket(struct cpu_map *map, int cpu)
+static int perf_stat__get_socket(struct perf_stat_config *config __maybe_unused,
+				 struct cpu_map *map, int cpu)
 {
 	return cpu_map__get_socket(map, cpu, NULL);
 }
 
-static int perf_stat__get_core(struct cpu_map *map, int cpu)
+static int perf_stat__get_core(struct perf_stat_config *config __maybe_unused,
+			       struct cpu_map *map, int cpu)
 {
 	return cpu_map__get_core(map, cpu, NULL);
 }
@@ -2063,9 +813,8 @@ static int cpu_map__get_max(struct cpu_map *map)
 	return max;
 }
 
-static struct cpu_map *cpus_aggr_map;
-
-static int perf_stat__get_aggr(aggr_get_id_t get_id, struct cpu_map *map, int idx)
+static int perf_stat__get_aggr(struct perf_stat_config *config,
+			       aggr_get_id_t get_id, struct cpu_map *map, int idx)
 {
 	int cpu;
 
@@ -2074,20 +823,22 @@ static int perf_stat__get_aggr(aggr_get_id_t get_id, struct cpu_map *map, int id
 
 	cpu = map->map[idx];
 
-	if (cpus_aggr_map->map[cpu] == -1)
-		cpus_aggr_map->map[cpu] = get_id(map, idx);
+	if (config->cpus_aggr_map->map[cpu] == -1)
+		config->cpus_aggr_map->map[cpu] = get_id(config, map, idx);
 
-	return cpus_aggr_map->map[cpu];
+	return config->cpus_aggr_map->map[cpu];
 }
 
-static int perf_stat__get_socket_cached(struct cpu_map *map, int idx)
+static int perf_stat__get_socket_cached(struct perf_stat_config *config,
+					struct cpu_map *map, int idx)
 {
-	return perf_stat__get_aggr(perf_stat__get_socket, map, idx);
+	return perf_stat__get_aggr(config, perf_stat__get_socket, map, idx);
 }
 
-static int perf_stat__get_core_cached(struct cpu_map *map, int idx)
+static int perf_stat__get_core_cached(struct perf_stat_config *config,
+				      struct cpu_map *map, int idx)
 {
-	return perf_stat__get_aggr(perf_stat__get_core, map, idx);
+	return perf_stat__get_aggr(config, perf_stat__get_core, map, idx);
 }
 
 static int perf_stat_init_aggr_mode(void)
@@ -2096,18 +847,18 @@ static int perf_stat_init_aggr_mode(void)
 
 	switch (stat_config.aggr_mode) {
 	case AGGR_SOCKET:
-		if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) {
+		if (cpu_map__build_socket_map(evsel_list->cpus, &stat_config.aggr_map)) {
 			perror("cannot build socket map");
 			return -1;
 		}
-		aggr_get_id = perf_stat__get_socket_cached;
+		stat_config.aggr_get_id = perf_stat__get_socket_cached;
 		break;
 	case AGGR_CORE:
-		if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) {
+		if (cpu_map__build_core_map(evsel_list->cpus, &stat_config.aggr_map)) {
 			perror("cannot build core map");
 			return -1;
 		}
-		aggr_get_id = perf_stat__get_core_cached;
+		stat_config.aggr_get_id = perf_stat__get_core_cached;
 		break;
 	case AGGR_NONE:
 	case AGGR_GLOBAL:
@@ -2123,16 +874,16 @@ static int perf_stat_init_aggr_mode(void)
 	 * the aggregation translate cpumap.
 	 */
 	nr = cpu_map__get_max(evsel_list->cpus);
-	cpus_aggr_map = cpu_map__empty_new(nr + 1);
-	return cpus_aggr_map ? 0 : -ENOMEM;
+	stat_config.cpus_aggr_map = cpu_map__empty_new(nr + 1);
+	return stat_config.cpus_aggr_map ? 0 : -ENOMEM;
 }
 
 static void perf_stat__exit_aggr_mode(void)
 {
-	cpu_map__put(aggr_map);
-	cpu_map__put(cpus_aggr_map);
-	aggr_map = NULL;
-	cpus_aggr_map = NULL;
+	cpu_map__put(stat_config.aggr_map);
+	cpu_map__put(stat_config.cpus_aggr_map);
+	stat_config.aggr_map = NULL;
+	stat_config.cpus_aggr_map = NULL;
 }
 
 static inline int perf_env__get_cpu(struct perf_env *env, struct cpu_map *map, int idx)
@@ -2190,12 +941,14 @@ static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus,
 	return cpu_map__build_map(cpus, corep, perf_env__get_core, env);
 }
 
-static int perf_stat__get_socket_file(struct cpu_map *map, int idx)
+static int perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused,
+				      struct cpu_map *map, int idx)
 {
 	return perf_env__get_socket(map, idx, &perf_stat.session->header.env);
 }
 
-static int perf_stat__get_core_file(struct cpu_map *map, int idx)
+static int perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused,
+				    struct cpu_map *map, int idx)
 {
 	return perf_env__get_core(map, idx, &perf_stat.session->header.env);
 }
@@ -2206,18 +959,18 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
 
 	switch (stat_config.aggr_mode) {
 	case AGGR_SOCKET:
-		if (perf_env__build_socket_map(env, evsel_list->cpus, &aggr_map)) {
+		if (perf_env__build_socket_map(env, evsel_list->cpus, &stat_config.aggr_map)) {
 			perror("cannot build socket map");
 			return -1;
 		}
-		aggr_get_id = perf_stat__get_socket_file;
+		stat_config.aggr_get_id = perf_stat__get_socket_file;
 		break;
 	case AGGR_CORE:
-		if (perf_env__build_core_map(env, evsel_list->cpus, &aggr_map)) {
+		if (perf_env__build_core_map(env, evsel_list->cpus, &stat_config.aggr_map)) {
 			perror("cannot build core map");
 			return -1;
 		}
-		aggr_get_id = perf_stat__get_core_file;
+		stat_config.aggr_get_id = perf_stat__get_core_file;
 		break;
 	case AGGR_NONE:
 	case AGGR_GLOBAL:
@@ -2401,7 +1154,7 @@ static int add_default_attributes(void)
 	struct parse_events_error errinfo;
 
 	/* Set attrs if no event is selected and !null_run: */
-	if (null_run)
+	if (stat_config.null_run)
 		return 0;
 
 	if (transaction_run) {
@@ -2414,7 +1167,7 @@ static int add_default_attributes(void)
 			struct option opt = { .value = &evsel_list };
 
 			return metricgroup__parse_groups(&opt, "transaction",
-							 &metric_events);
+							 &stat_config.metric_events);
 		}
 
 		if (pmu_have_event("cpu", "cycles-ct") &&
@@ -2452,7 +1205,7 @@ static int add_default_attributes(void)
 		if (pmu_have_event("msr", "aperf") &&
 		    pmu_have_event("msr", "smi")) {
 			if (!force_metric_only)
-				metric_only = true;
+				stat_config.metric_only = true;
 			err = parse_events(evsel_list, smi_cost_attrs, &errinfo);
 		} else {
 			fprintf(stderr, "To measure SMI cost, it needs "
@@ -2483,7 +1236,7 @@ static int add_default_attributes(void)
 		}
 
 		if (!force_metric_only)
-			metric_only = true;
+			stat_config.metric_only = true;
 		if (topdown_filter_events(topdown_attrs, &str,
 				arch_topdown_check_group(&warn)) < 0) {
 			pr_err("Out of memory\n");
@@ -2580,7 +1333,7 @@ static int __cmd_record(int argc, const char **argv)
 	if (output_name)
 		data->file.path = output_name;
 
-	if (run_count != 1 || forever) {
+	if (stat_config.run_count != 1 || forever) {
 		pr_err("Cannot use -r option with perf stat record.\n");
 		return -1;
 	}
@@ -2599,9 +1352,8 @@ static int __cmd_record(int argc, const char **argv)
 	return argc;
 }
 
-static int process_stat_round_event(struct perf_tool *tool __maybe_unused,
-				    union perf_event *event,
-				    struct perf_session *session)
+static int process_stat_round_event(struct perf_session *session,
+				    union perf_event *event)
 {
 	struct stat_round_event *stat_round = &event->stat_round;
 	struct perf_evsel *counter;
@@ -2626,10 +1378,10 @@ static int process_stat_round_event(struct perf_tool *tool __maybe_unused,
 }
 
 static
-int process_stat_config_event(struct perf_tool *tool,
-			      union perf_event *event,
-			      struct perf_session *session __maybe_unused)
+int process_stat_config_event(struct perf_session *session,
+			      union perf_event *event)
 {
+	struct perf_tool *tool = session->tool;
 	struct perf_stat *st = container_of(tool, struct perf_stat, tool);
 
 	perf_event__read_stat_config(&stat_config, &event->stat_config);
@@ -2669,10 +1421,10 @@ static int set_maps(struct perf_stat *st)
 }
 
 static
-int process_thread_map_event(struct perf_tool *tool,
-			     union perf_event *event,
-			     struct perf_session *session __maybe_unused)
+int process_thread_map_event(struct perf_session *session,
+			     union perf_event *event)
 {
+	struct perf_tool *tool = session->tool;
 	struct perf_stat *st = container_of(tool, struct perf_stat, tool);
 
 	if (st->threads) {
@@ -2688,10 +1440,10 @@ int process_thread_map_event(struct perf_tool *tool,
 }
 
 static
-int process_cpu_map_event(struct perf_tool *tool,
-			  union perf_event *event,
-			  struct perf_session *session __maybe_unused)
+int process_cpu_map_event(struct perf_session *session,
+			  union perf_event *event)
 {
+	struct perf_tool *tool = session->tool;
 	struct perf_stat *st = container_of(tool, struct perf_stat, tool);
 	struct cpu_map *cpus;
 
@@ -2853,12 +1605,12 @@ int cmd_stat(int argc, const char **argv)
 	perf_stat__collect_metric_expr(evsel_list);
 	perf_stat__init_shadow_stats();
 
-	if (csv_sep) {
-		csv_output = true;
-		if (!strcmp(csv_sep, "\\t"))
-			csv_sep = "\t";
+	if (stat_config.csv_sep) {
+		stat_config.csv_output = true;
+		if (!strcmp(stat_config.csv_sep, "\\t"))
+			stat_config.csv_sep = "\t";
 	} else
-		csv_sep = DEFAULT_SEPARATOR;
+		stat_config.csv_sep = DEFAULT_SEPARATOR;
 
 	if (argc && !strncmp(argv[0], "rec", 3)) {
 		argc = __cmd_record(argc, argv);
@@ -2883,17 +1635,17 @@ int cmd_stat(int argc, const char **argv)
 		goto out;
 	}
 
-	if (metric_only && stat_config.aggr_mode == AGGR_THREAD) {
+	if (stat_config.metric_only && stat_config.aggr_mode == AGGR_THREAD) {
 		fprintf(stderr, "--metric-only is not supported with --per-thread\n");
 		goto out;
 	}
 
-	if (metric_only && run_count > 1) {
+	if (stat_config.metric_only && stat_config.run_count > 1) {
 		fprintf(stderr, "--metric-only is not supported with -r\n");
 		goto out;
 	}
 
-	if (walltime_run_table && run_count <= 1) {
+	if (stat_config.walltime_run_table && stat_config.run_count <= 1) {
 		fprintf(stderr, "--table is only supported with -r\n");
 		parse_options_usage(stat_usage, stat_options, "r", 1);
 		parse_options_usage(NULL, stat_options, "table", 0);
@@ -2931,7 +1683,7 @@ int cmd_stat(int argc, const char **argv)
 	/*
 	 * let the spreadsheet do the pretty-printing
 	 */
-	if (csv_output) {
+	if (stat_config.csv_output) {
 		/* User explicitly passed -B? */
 		if (big_num_opt == 1) {
 			fprintf(stderr, "-B option not supported with -x\n");
@@ -2939,9 +1691,9 @@ int cmd_stat(int argc, const char **argv)
 			parse_options_usage(NULL, stat_options, "x", 1);
 			goto out;
 		} else /* Nope, so disable big number formatting */
-			big_num = false;
+			stat_config.big_num = false;
 	} else if (big_num_opt == 0) /* User passed --no-big-num */
-		big_num = false;
+		stat_config.big_num = false;
 
 	setup_system_wide(argc);
 
@@ -2949,21 +1701,21 @@ int cmd_stat(int argc, const char **argv)
 	 * Display user/system times only for single
 	 * run and when there's specified tracee.
 	 */
-	if ((run_count == 1) && target__none(&target))
-		ru_display = true;
+	if ((stat_config.run_count == 1) && target__none(&target))
+		stat_config.ru_display = true;
 
-	if (run_count < 0) {
+	if (stat_config.run_count < 0) {
 		pr_err("Run count must be a positive number\n");
 		parse_options_usage(stat_usage, stat_options, "r", 1);
 		goto out;
-	} else if (run_count == 0) {
+	} else if (stat_config.run_count == 0) {
 		forever = true;
-		run_count = 1;
+		stat_config.run_count = 1;
 	}
 
-	if (walltime_run_table) {
-		walltime_run = zalloc(run_count * sizeof(walltime_run[0]));
-		if (!walltime_run) {
+	if (stat_config.walltime_run_table) {
+		stat_config.walltime_run = zalloc(stat_config.run_count * sizeof(stat_config.walltime_run[0]));
+		if (!stat_config.walltime_run) {
 			pr_err("failed to setup -r option");
 			goto out;
 		}
@@ -3066,6 +1818,17 @@ int cmd_stat(int argc, const char **argv)
 		goto out;
 
 	/*
+	 * Set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless
+	 * while avoiding that older tools show confusing messages.
+	 *
+	 * However for pipe sessions we need to keep it zero,
+	 * because script's perf_evsel__check_attr is triggered
+	 * by attr->sample_type != 0, and we can't run it on
+	 * stat sessions.
+	 */
+	stat_config.identifier = !(STAT_RECORD && perf_stat.data.is_pipe);
+
+	/*
 	 * We dont want to block the signals - that would cause
 	 * child tasks to inherit that and Ctrl-C would not work.
 	 * What we want is for Ctrl-C to work in the exec()-ed
@@ -3079,8 +1842,8 @@ int cmd_stat(int argc, const char **argv)
 	signal(SIGABRT, skip_signal);
 
 	status = 0;
-	for (run_idx = 0; forever || run_idx < run_count; run_idx++) {
-		if (run_count != 1 && verbose > 0)
+	for (run_idx = 0; forever || run_idx < stat_config.run_count; run_idx++) {
+		if (stat_config.run_count != 1 && verbose > 0)
 			fprintf(output, "[ perf stat: executing run #%d ... ]\n",
 				run_idx + 1);
 
@@ -3132,7 +1895,7 @@ int cmd_stat(int argc, const char **argv)
 	perf_stat__exit_aggr_mode();
 	perf_evlist__free_stats(evsel_list);
 out:
-	free(walltime_run);
+	free(stat_config.walltime_run);
 
 	if (smi_cost && smi_reset)
 		sysfs__write_int(FREEZE_ON_SMI_PATH, 0);
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index d21d8751e749..aa0c73e57924 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1134,11 +1134,6 @@ static int __cmd_top(struct perf_top *top)
         if (!target__none(&opts->target))
                 perf_evlist__enable(top->evlist);
 
-	/* Wait for a minimal set of events before starting the snapshot */
-	perf_evlist__poll(top->evlist, 100);
-
-	perf_top__mmap_read(top);
-
 	ret = -1;
 	if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui :
 							    display_thread), top)) {
@@ -1156,6 +1151,11 @@ static int __cmd_top(struct perf_top *top)
 		}
 	}
 
+	/* Wait for a minimal set of events before starting the snapshot */
+	perf_evlist__poll(top->evlist, 100);
+
+	perf_top__mmap_read(top);
+
 	while (!done) {
 		u64 hits = top->samples;
 
@@ -1257,7 +1257,14 @@ int cmd_top(int argc, const char **argv)
 				.uses_mmap   = true,
 			},
 			.proc_map_timeout    = 500,
-			.overwrite	= 1,
+			/*
+			 * FIXME: This will lose PERF_RECORD_MMAP and other metadata
+			 * when we pause, fix that and reenable. Probably using a
+			 * separate evlist with a dummy event, i.e. a non-overwrite
+			 * ring buffer just for metadata events, while PERF_RECORD_SAMPLE
+			 * stays in overwrite mode. -acme
+			 * */
+			.overwrite	= 0,
 		},
 		.max_stack	     = sysctl__max_stack(),
 		.annotation_opts     = annotation__default_options,
@@ -1372,6 +1379,8 @@ int cmd_top(int argc, const char **argv)
 		    "Show raw trace event output (do not use print fmt or plugins)"),
 	OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy,
 		    "Show entries in a hierarchy"),
+	OPT_BOOLEAN(0, "overwrite", &top.record_opts.overwrite,
+		    "Use a backward ring buffer, default: no"),
 	OPT_BOOLEAN(0, "force", &symbol_conf.force, "don't complain, do it"),
 	OPT_UINTEGER(0, "num-thread-synthesize", &top.nr_threads_synthesize,
 			"number of thread to run event synthesize"),
@@ -1420,6 +1429,9 @@ int cmd_top(int argc, const char **argv)
 		}
 	}
 
+	if (opts->branch_stack && callchain_param.enabled)
+		symbol_conf.show_branchflag_count = true;
+
 	sort__mode = SORT_MODE__TOP;
 	/* display thread wants entries to be collapsed in a different tree */
 	perf_hpp_list.need_collapse = 1;
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 22ab8e67c760..835619476370 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -89,6 +89,8 @@ struct trace {
 	u64			base_time;
 	FILE			*output;
 	unsigned long		nr_events;
+	unsigned long		nr_events_printed;
+	unsigned long		max_events;
 	struct strlist		*ev_qualifier;
 	struct {
 		size_t		nr;
@@ -106,6 +108,7 @@ struct trace {
 	} stats;
 	unsigned int		max_stack;
 	unsigned int		min_stack;
+	bool			raw_augmented_syscalls;
 	bool			not_ev_qualifier;
 	bool			live;
 	bool			full_time;
@@ -181,7 +184,7 @@ static int __tp_field__init_uint(struct tp_field *field, int size, int offset, b
 	return 0;
 }
 
-static int tp_field__init_uint(struct tp_field *field, struct format_field *format_field, bool needs_swap)
+static int tp_field__init_uint(struct tp_field *field, struct tep_format_field *format_field, bool needs_swap)
 {
 	return __tp_field__init_uint(field, format_field->size, format_field->offset, needs_swap);
 }
@@ -198,7 +201,7 @@ static int __tp_field__init_ptr(struct tp_field *field, int offset)
 	return 0;
 }
 
-static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
+static int tp_field__init_ptr(struct tp_field *field, struct tep_format_field *format_field)
 {
 	return __tp_field__init_ptr(field, format_field->offset);
 }
@@ -214,7 +217,7 @@ static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
 					  struct tp_field *field,
 					  const char *name)
 {
-	struct format_field *format_field = perf_evsel__field(evsel, name);
+	struct tep_format_field *format_field = perf_evsel__field(evsel, name);
 
 	if (format_field == NULL)
 		return -1;
@@ -230,7 +233,7 @@ static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
 					 struct tp_field *field,
 					 const char *name)
 {
-	struct format_field *format_field = perf_evsel__field(evsel, name);
+	struct tep_format_field *format_field = perf_evsel__field(evsel, name);
 
 	if (format_field == NULL)
 		return -1;
@@ -288,6 +291,13 @@ static int perf_evsel__init_augmented_syscall_tp_args(struct perf_evsel *evsel)
 	return __tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64));
 }
 
+static int perf_evsel__init_augmented_syscall_tp_ret(struct perf_evsel *evsel)
+{
+	struct syscall_tp *sc = evsel->priv;
+
+	return __tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap);
+}
+
 static int perf_evsel__init_raw_syscall_tp(struct perf_evsel *evsel, void *handler)
 {
 	evsel->priv = malloc(sizeof(struct syscall_tp));
@@ -498,16 +508,6 @@ static const char *clockid[] = {
 };
 static DEFINE_STRARRAY(clockid);
 
-static const char *socket_families[] = {
-	"UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
-	"BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
-	"SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
-	"RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
-	"BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
-	"ALG", "NFC", "VSOCK",
-};
-static DEFINE_STRARRAY(socket_families);
-
 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
 						 struct syscall_arg *arg)
 {
@@ -615,6 +615,7 @@ static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
 
 struct syscall_arg_fmt {
 	size_t	   (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
+	unsigned long (*mask_val)(struct syscall_arg *arg, unsigned long val);
 	void	   *parm;
 	const char *name;
 	bool	   show_zero;
@@ -631,6 +632,8 @@ static struct syscall_fmt {
 } syscall_fmts[] = {
 	{ .name	    = "access",
 	  .arg = { [1] = { .scnprintf = SCA_ACCMODE,  /* mode */ }, }, },
+	{ .name	    = "bind",
+	  .arg = { [1] = { .scnprintf = SCA_SOCKADDR, /* umyaddr */ }, }, },
 	{ .name	    = "bpf",
 	  .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, },
 	{ .name	    = "brk",	    .hexret = true,
@@ -645,6 +648,8 @@ static struct syscall_fmt {
 		   [4] = { .name = "tls",	    .scnprintf = SCA_HEX, }, }, },
 	{ .name	    = "close",
 	  .arg = { [0] = { .scnprintf = SCA_CLOSE_FD, /* fd */ }, }, },
+	{ .name	    = "connect",
+	  .arg = { [1] = { .scnprintf = SCA_SOCKADDR, /* servaddr */ }, }, },
 	{ .name	    = "epoll_ctl",
 	  .arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, },
 	{ .name	    = "eventfd2",
@@ -722,6 +727,10 @@ static struct syscall_fmt {
 	  .arg = { [0] = { .scnprintf = SCA_HEX,	/* addr */ },
 		   [2] = { .scnprintf = SCA_MMAP_PROT,	/* prot */ },
 		   [3] = { .scnprintf = SCA_MMAP_FLAGS,	/* flags */ }, }, },
+	{ .name	    = "mount",
+	  .arg = { [0] = { .scnprintf = SCA_FILENAME, /* dev_name */ },
+		   [3] = { .scnprintf = SCA_MOUNT_FLAGS, /* flags */
+			   .mask_val  = SCAMV_MOUNT_FLAGS, /* flags */ }, }, },
 	{ .name	    = "mprotect",
 	  .arg = { [0] = { .scnprintf = SCA_HEX,	/* start */ },
 		   [2] = { .scnprintf = SCA_MMAP_PROT,	/* prot */ }, }, },
@@ -801,7 +810,8 @@ static struct syscall_fmt {
 	{ .name	    = "sendmsg",
 	  .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
 	{ .name	    = "sendto",
-	  .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
+	  .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ },
+		   [4] = { .scnprintf = SCA_SOCKADDR, /* addr */ }, }, },
 	{ .name	    = "set_tid_address", .errpid = true, },
 	{ .name	    = "setitimer",
 	  .arg = { [0] = STRARRAY(which, itimers), }, },
@@ -830,6 +840,8 @@ static struct syscall_fmt {
 	  .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
 	{ .name	    = "tkill",
 	  .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
+	{ .name     = "umount2", .alias = "umount",
+	  .arg = { [0] = { .scnprintf = SCA_FILENAME, /* name */ }, }, },
 	{ .name	    = "uname", .alias = "newuname", },
 	{ .name	    = "unlinkat",
 	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
@@ -853,16 +865,30 @@ static struct syscall_fmt *syscall_fmt__find(const char *name)
 	return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
 }
 
+static struct syscall_fmt *syscall_fmt__find_by_alias(const char *alias)
+{
+	int i, nmemb = ARRAY_SIZE(syscall_fmts);
+
+	for (i = 0; i < nmemb; ++i) {
+		if (syscall_fmts[i].alias && strcmp(syscall_fmts[i].alias, alias) == 0)
+			return &syscall_fmts[i];
+	}
+
+	return NULL;
+}
+
 /*
  * is_exit: is this "exit" or "exit_group"?
  * is_open: is this "open" or "openat"? To associate the fd returned in sys_exit with the pathname in sys_enter.
+ * args_size: sum of the sizes of the syscall arguments, anything after that is augmented stuff: pathname for openat, etc.
  */
 struct syscall {
-	struct event_format *tp_format;
+	struct tep_event_format *tp_format;
 	int		    nr_args;
+	int		    args_size;
 	bool		    is_exit;
 	bool		    is_open;
-	struct format_field *args;
+	struct tep_format_field *args;
 	const char	    *name;
 	struct syscall_fmt  *fmt;
 	struct syscall_arg_fmt *arg_fmt;
@@ -1095,11 +1121,21 @@ static void thread__set_filename_pos(struct thread *thread, const char *bf,
 	ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
 }
 
+static size_t syscall_arg__scnprintf_augmented_string(struct syscall_arg *arg, char *bf, size_t size)
+{
+	struct augmented_arg *augmented_arg = arg->augmented.args;
+
+	return scnprintf(bf, size, "%.*s", augmented_arg->size, augmented_arg->value);
+}
+
 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
 					      struct syscall_arg *arg)
 {
 	unsigned long ptr = arg->val;
 
+	if (arg->augmented.args)
+		return syscall_arg__scnprintf_augmented_string(arg, bf, size);
+
 	if (!arg->trace->vfs_getname)
 		return scnprintf(bf, size, "%#x", ptr);
 
@@ -1142,11 +1178,9 @@ static void sig_handler(int sig)
 	interrupted = sig == SIGINT;
 }
 
-static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
-					u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
+static size_t trace__fprintf_comm_tid(struct trace *trace, struct thread *thread, FILE *fp)
 {
-	size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
-	printed += fprintf_duration(duration, duration_calculated, fp);
+	size_t printed = 0;
 
 	if (trace->multiple_threads) {
 		if (trace->show_comm)
@@ -1157,6 +1191,14 @@ static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thre
 	return printed;
 }
 
+static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
+					u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
+{
+	size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
+	printed += fprintf_duration(duration, duration_calculated, fp);
+	return printed + trace__fprintf_comm_tid(trace, thread, fp);
+}
+
 static int trace__process_event(struct trace *trace, struct machine *machine,
 				union perf_event *event, struct perf_sample *sample)
 {
@@ -1258,10 +1300,12 @@ static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args)
 
 static int syscall__set_arg_fmts(struct syscall *sc)
 {
-	struct format_field *field;
+	struct tep_format_field *field, *last_field = NULL;
 	int idx = 0, len;
 
 	for (field = sc->args; field; field = field->next, ++idx) {
+		last_field = field;
+
 		if (sc->fmt && sc->fmt->arg[idx].scnprintf)
 			continue;
 
@@ -1270,7 +1314,7 @@ static int syscall__set_arg_fmts(struct syscall *sc)
 			  strcmp(field->name, "path") == 0 ||
 			  strcmp(field->name, "pathname") == 0))
 			sc->arg_fmt[idx].scnprintf = SCA_FILENAME;
-		else if (field->flags & FIELD_IS_POINTER)
+		else if (field->flags & TEP_FIELD_IS_POINTER)
 			sc->arg_fmt[idx].scnprintf = syscall_arg__scnprintf_hex;
 		else if (strcmp(field->type, "pid_t") == 0)
 			sc->arg_fmt[idx].scnprintf = SCA_PID;
@@ -1292,6 +1336,9 @@ static int syscall__set_arg_fmts(struct syscall *sc)
 		}
 	}
 
+	if (last_field)
+		sc->args_size = last_field->offset + last_field->size;
+
 	return 0;
 }
 
@@ -1459,6 +1506,19 @@ static size_t syscall__scnprintf_name(struct syscall *sc, char *bf, size_t size,
 	return scnprintf(bf, size, "arg%d: ", arg->idx);
 }
 
+/*
+ * Check if the value is in fact zero, i.e. mask whatever needs masking, such
+ * as mount 'flags' argument that needs ignoring some magic flag, see comment
+ * in tools/perf/trace/beauty/mount_flags.c
+ */
+static unsigned long syscall__mask_val(struct syscall *sc, struct syscall_arg *arg, unsigned long val)
+{
+	if (sc->arg_fmt && sc->arg_fmt[arg->idx].mask_val)
+		return sc->arg_fmt[arg->idx].mask_val(arg, val);
+
+	return val;
+}
+
 static size_t syscall__scnprintf_val(struct syscall *sc, char *bf, size_t size,
 				     struct syscall_arg *arg, unsigned long val)
 {
@@ -1472,14 +1532,18 @@ static size_t syscall__scnprintf_val(struct syscall *sc, char *bf, size_t size,
 }
 
 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
-				      unsigned char *args, struct trace *trace,
-				      struct thread *thread)
+				      unsigned char *args, void *augmented_args, int augmented_args_size,
+				      struct trace *trace, struct thread *thread)
 {
 	size_t printed = 0;
 	unsigned long val;
 	u8 bit = 1;
 	struct syscall_arg arg = {
 		.args	= args,
+		.augmented = {
+			.size = augmented_args_size,
+			.args = augmented_args,
+		},
 		.idx	= 0,
 		.mask	= 0,
 		.trace  = trace,
@@ -1495,7 +1559,7 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
 	ttrace->ret_scnprintf = NULL;
 
 	if (sc->args != NULL) {
-		struct format_field *field;
+		struct tep_format_field *field;
 
 		for (field = sc->args; field;
 		     field = field->next, ++arg.idx, bit <<= 1) {
@@ -1503,6 +1567,11 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
 				continue;
 
 			val = syscall_arg__val(&arg, arg.idx);
+			/*
+			 * Some syscall args need some mask, most don't and
+			 * return val untouched.
+			 */
+			val = syscall__mask_val(sc, &arg, val);
 
 			/*
  			 * Suppress this argument if its value is zero and
@@ -1634,6 +1703,8 @@ static int trace__printf_interrupted_entry(struct trace *trace)
 	printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
 	ttrace->entry_pending = false;
 
+	++trace->nr_events_printed;
+
 	return printed;
 }
 
@@ -1654,6 +1725,32 @@ static int trace__fprintf_sample(struct trace *trace, struct perf_evsel *evsel,
 	return printed;
 }
 
+static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sample, int *augmented_args_size, bool raw_augmented)
+{
+	void *augmented_args = NULL;
+	/*
+	 * For now with BPF raw_augmented we hook into raw_syscalls:sys_enter
+	 * and there we get all 6 syscall args plus the tracepoint common
+	 * fields (sizeof(long)) and the syscall_nr (another long). So we check
+	 * if that is the case and if so don't look after the sc->args_size,
+	 * but always after the full raw_syscalls:sys_enter payload, which is
+	 * fixed.
+	 *
+	 * We'll revisit this later to pass s->args_size to the BPF augmenter
+	 * (now tools/perf/examples/bpf/augmented_raw_syscalls.c, so that it
+	 * copies only what we need for each syscall, like what happens when we
+	 * use syscalls:sys_enter_NAME, so that we reduce the kernel/userspace
+	 * traffic to just what is needed for each syscall.
+	 */
+	int args_size = raw_augmented ? (8 * (int)sizeof(long)) : sc->args_size;
+
+	*augmented_args_size = sample->raw_size - args_size;
+	if (*augmented_args_size > 0)
+		augmented_args = sample->raw_data + args_size;
+
+	return augmented_args;
+}
+
 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
 			    union perf_event *event __maybe_unused,
 			    struct perf_sample *sample)
@@ -1663,6 +1760,8 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
 	size_t printed = 0;
 	struct thread *thread;
 	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
+	int augmented_args_size = 0;
+	void *augmented_args = NULL;
 	struct syscall *sc = trace__syscall_info(trace, evsel, id);
 	struct thread_trace *ttrace;
 
@@ -1686,13 +1785,24 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
 
 	if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
 		trace__printf_interrupted_entry(trace);
-
+	/*
+	 * If this is raw_syscalls.sys_enter, then it always comes with the 6 possible
+	 * arguments, even if the syscall being handled, say "openat", uses only 4 arguments
+	 * this breaks syscall__augmented_args() check for augmented args, as we calculate
+	 * syscall->args_size using each syscalls:sys_enter_NAME tracefs format file,
+	 * so when handling, say the openat syscall, we end up getting 6 args for the
+	 * raw_syscalls:sys_enter event, when we expected just 4, we end up mistakenly
+	 * thinking that the extra 2 u64 args are the augmented filename, so just check
+	 * here and avoid using augmented syscalls when the evsel is the raw_syscalls one.
+	 */
+	if (evsel != trace->syscalls.events.sys_enter)
+		augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls);
 	ttrace->entry_time = sample->time;
 	msg = ttrace->entry_str;
 	printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
 
 	printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
-					   args, trace, thread);
+					   args, augmented_args, augmented_args_size, trace, thread);
 
 	if (sc->is_exit) {
 		if (!(trace->duration_filter || trace->summary_only || trace->failure_only || trace->min_stack)) {
@@ -1723,7 +1833,8 @@ static int trace__fprintf_sys_enter(struct trace *trace, struct perf_evsel *evse
 	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
 	struct syscall *sc = trace__syscall_info(trace, evsel, id);
 	char msg[1024];
-	void *args;
+	void *args, *augmented_args = NULL;
+	int augmented_args_size;
 
 	if (sc == NULL)
 		return -1;
@@ -1738,7 +1849,8 @@ static int trace__fprintf_sys_enter(struct trace *trace, struct perf_evsel *evse
 		goto out_put;
 
 	args = perf_evsel__sc_tp_ptr(evsel, args, sample);
-	syscall__scnprintf_args(sc, msg, sizeof(msg), args, trace, thread);
+	augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls);
+	syscall__scnprintf_args(sc, msg, sizeof(msg), args, augmented_args, augmented_args_size, trace, thread);
 	fprintf(trace->output, "%s", msg);
 	err = 0;
 out_put:
@@ -1754,12 +1866,14 @@ static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evse
 	int max_stack = evsel->attr.sample_max_stack ?
 			evsel->attr.sample_max_stack :
 			trace->max_stack;
+	int err;
 
-	if (machine__resolve(trace->host, &al, sample) < 0 ||
-	    thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, max_stack))
+	if (machine__resolve(trace->host, &al, sample) < 0)
 		return -1;
 
-	return 0;
+	err = thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, max_stack);
+	addr_location__put(&al);
+	return err;
 }
 
 static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
@@ -1884,6 +1998,13 @@ errno_print: {
 
 	fputc('\n', trace->output);
 
+	/*
+	 * We only consider an 'event' for the sake of --max-events a non-filtered
+	 * sys_enter + sys_exit and other tracepoint events.
+	 */
+	if (++trace->nr_events_printed == trace->max_events && trace->max_events != ULONG_MAX)
+		interrupted = true;
+
 	if (callchain_ret > 0)
 		trace__fprintf_callchain(trace, sample);
 	else if (callchain_ret < 0)
@@ -2016,13 +2137,25 @@ static void bpf_output__fprintf(struct trace *trace,
 {
 	binary__fprintf(sample->raw_data, sample->raw_size, 8,
 			bpf_output__printer, NULL, trace->output);
+	++trace->nr_events_printed;
 }
 
 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
 				union perf_event *event __maybe_unused,
 				struct perf_sample *sample)
 {
+	struct thread *thread;
 	int callchain_ret = 0;
+	/*
+	 * Check if we called perf_evsel__disable(evsel) due to, for instance,
+	 * this event's max_events having been hit and this is an entry coming
+	 * from the ring buffer that we should discard, since the max events
+	 * have already been considered/printed.
+	 */
+	if (evsel->disabled)
+		return 0;
+
+	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
 
 	if (sample->callchain) {
 		callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
@@ -2039,22 +2172,47 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
 	if (trace->trace_syscalls)
 		fprintf(trace->output, "(         ): ");
 
+	if (thread)
+		trace__fprintf_comm_tid(trace, thread, trace->output);
+
+	if (evsel == trace->syscalls.events.augmented) {
+		int id = perf_evsel__sc_tp_uint(evsel, id, sample);
+		struct syscall *sc = trace__syscall_info(trace, evsel, id);
+
+		if (sc) {
+			fprintf(trace->output, "%s(", sc->name);
+			trace__fprintf_sys_enter(trace, evsel, sample);
+			fputc(')', trace->output);
+			goto newline;
+		}
+
+		/*
+		 * XXX: Not having the associated syscall info or not finding/adding
+		 * 	the thread should never happen, but if it does...
+		 * 	fall thru and print it as a bpf_output event.
+		 */
+	}
+
 	fprintf(trace->output, "%s:", evsel->name);
 
 	if (perf_evsel__is_bpf_output(evsel)) {
-		if (evsel == trace->syscalls.events.augmented)
-			trace__fprintf_sys_enter(trace, evsel, sample);
-		else
-			bpf_output__fprintf(trace, sample);
+		bpf_output__fprintf(trace, sample);
 	} else if (evsel->tp_format) {
 		if (strncmp(evsel->tp_format->name, "sys_enter_", 10) ||
 		    trace__fprintf_sys_enter(trace, evsel, sample)) {
 			event_format__fprintf(evsel->tp_format, sample->cpu,
 					      sample->raw_data, sample->raw_size,
 					      trace->output);
+			++trace->nr_events_printed;
+
+			if (evsel->max_events != ULONG_MAX && ++evsel->nr_events_printed == evsel->max_events) {
+				perf_evsel__disable(evsel);
+				perf_evsel__close(evsel);
+			}
 		}
 	}
 
+newline:
 	fprintf(trace->output, "\n");
 
 	if (callchain_ret > 0)
@@ -2062,6 +2220,7 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
 	else if (callchain_ret < 0)
 		pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
 out:
+	thread__put(thread);
 	return 0;
 }
 
@@ -2148,6 +2307,8 @@ static int trace__pgfault(struct trace *trace,
 		trace__fprintf_callchain(trace, sample);
 	else if (callchain_ret < 0)
 		pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
+
+	++trace->nr_events_printed;
 out:
 	err = 0;
 out_put:
@@ -2325,6 +2486,9 @@ static void trace__handle_event(struct trace *trace, union perf_event *event, st
 		tracepoint_handler handler = evsel->handler;
 		handler(trace, evsel, event, sample);
 	}
+
+	if (trace->nr_events_printed >= trace->max_events && trace->max_events != ULONG_MAX)
+		interrupted = true;
 }
 
 static int trace__add_syscall_newtp(struct trace *trace)
@@ -2629,7 +2793,7 @@ next_event:
 		int timeout = done ? 100 : -1;
 
 		if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
-			if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
+			if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP | POLLNVAL) == 0)
 				draining = true;
 
 			goto again;
@@ -3061,6 +3225,7 @@ static int trace__parse_events_option(const struct option *opt, const char *str,
 	int len = strlen(str) + 1, err = -1, list, idx;
 	char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
 	char group_name[PATH_MAX];
+	struct syscall_fmt *fmt;
 
 	if (strace_groups_dir == NULL)
 		return -1;
@@ -3078,12 +3243,19 @@ static int trace__parse_events_option(const struct option *opt, const char *str,
 		if (syscalltbl__id(trace->sctbl, s) >= 0 ||
 		    syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) {
 			list = 1;
+			goto do_concat;
+		}
+
+		fmt = syscall_fmt__find_by_alias(s);
+		if (fmt != NULL) {
+			list = 1;
+			s = fmt->name;
 		} else {
 			path__join(group_name, sizeof(group_name), strace_groups_dir, s);
 			if (access(group_name, R_OK) == 0)
 				list = 1;
 		}
-
+do_concat:
 		if (lists[list]) {
 			sprintf(lists[list] + strlen(lists[list]), ",%s", s);
 		} else {
@@ -3172,6 +3344,7 @@ int cmd_trace(int argc, const char **argv)
 		.trace_syscalls = false,
 		.kernel_syscallchains = false,
 		.max_stack = UINT_MAX,
+		.max_events = ULONG_MAX,
 	};
 	const char *output_name = NULL;
 	const struct option trace_options[] = {
@@ -3224,6 +3397,8 @@ int cmd_trace(int argc, const char **argv)
 		     &record_parse_callchain_opt),
 	OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
 		    "Show the kernel callchains on the syscall exit path"),
+	OPT_ULONG(0, "max-events", &trace.max_events,
+		"Set the maximum number of events to print, exit after that is reached. "),
 	OPT_UINTEGER(0, "min-stack", &trace.min_stack,
 		     "Set the minimum stack depth when parsing the callchain, "
 		     "anything below the specified depth will be ignored."),
@@ -3276,12 +3451,8 @@ int cmd_trace(int argc, const char **argv)
 		goto out;
 	}
 
-	if (evsel) {
-		if (perf_evsel__init_augmented_syscall_tp(evsel) ||
-		    perf_evsel__init_augmented_syscall_tp_args(evsel))
-			goto out;
+	if (evsel)
 		trace.syscalls.events.augmented = evsel;
-	}
 
 	err = bpf__setup_stdout(trace.evlist);
 	if (err) {
@@ -3326,6 +3497,42 @@ int cmd_trace(int argc, const char **argv)
 		}
 	}
 
+	/*
+	 * If we are augmenting syscalls, then combine what we put in the
+	 * __augmented_syscalls__ BPF map with what is in the
+	 * syscalls:sys_exit_FOO tracepoints, i.e. just like we do without BPF,
+	 * combining raw_syscalls:sys_enter with raw_syscalls:sys_exit.
+	 *
+	 * We'll switch to look at two BPF maps, one for sys_enter and the
+	 * other for sys_exit when we start augmenting the sys_exit paths with
+	 * buffers that are being copied from kernel to userspace, think 'read'
+	 * syscall.
+	 */
+	if (trace.syscalls.events.augmented) {
+		evsel = trace.syscalls.events.augmented;
+
+		if (perf_evsel__init_augmented_syscall_tp(evsel) ||
+		    perf_evsel__init_augmented_syscall_tp_args(evsel))
+			goto out;
+		evsel->handler = trace__sys_enter;
+
+		evlist__for_each_entry(trace.evlist, evsel) {
+			bool raw_syscalls_sys_exit = strcmp(perf_evsel__name(evsel), "raw_syscalls:sys_exit") == 0;
+
+			if (raw_syscalls_sys_exit) {
+				trace.raw_augmented_syscalls = true;
+				goto init_augmented_syscall_tp;
+			}
+
+			if (strstarts(perf_evsel__name(evsel), "syscalls:sys_exit_")) {
+init_augmented_syscall_tp:
+				perf_evsel__init_augmented_syscall_tp(evsel);
+				perf_evsel__init_augmented_syscall_tp_ret(evsel);
+				evsel->handler = trace__sys_exit;
+			}
+		}
+	}
+
 	if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
 		return trace__record(&trace, argc-1, &argv[1]);
 
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index 466540ee8ea7..9531f7bd7d9b 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -5,6 +5,7 @@ HEADERS='
 include/uapi/drm/drm.h
 include/uapi/drm/i915_drm.h
 include/uapi/linux/fcntl.h
+include/uapi/linux/fs.h
 include/uapi/linux/kcmp.h
 include/uapi/linux/kvm.h
 include/uapi/linux/in.h
@@ -14,6 +15,7 @@ include/uapi/linux/sched.h
 include/uapi/linux/stat.h
 include/uapi/linux/vhost.h
 include/uapi/sound/asound.h
+include/linux/bits.h
 include/linux/hash.h
 include/uapi/linux/hw_breakpoint.h
 arch/x86/include/asm/disabled-features.h
diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt
index 2d0caf20ff3a..bc6c585f74fc 100644
--- a/tools/perf/command-list.txt
+++ b/tools/perf/command-list.txt
@@ -30,3 +30,4 @@ perf-test			mainporcelain common
 perf-timechart			mainporcelain common
 perf-top			mainporcelain common
 perf-trace			mainporcelain audit
+perf-version			mainporcelain common
diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c
new file mode 100644
index 000000000000..90a19336310b
--- /dev/null
+++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Augment the raw_syscalls tracepoints with the contents of the pointer arguments.
+ *
+ * Test it with:
+ *
+ * perf trace -e tools/perf/examples/bpf/augmented_raw_syscalls.c cat /etc/passwd > /dev/null
+ *
+ * This exactly matches what is marshalled into the raw_syscall:sys_enter
+ * payload expected by the 'perf trace' beautifiers.
+ *
+ * For now it just uses the existing tracepoint augmentation code in 'perf
+ * trace', in the next csets we'll hook up these with the sys_enter/sys_exit
+ * code that will combine entry/exit in a strace like way.
+ */
+
+#include <stdio.h>
+#include <linux/socket.h>
+
+/* bpf-output associated map */
+struct bpf_map SEC("maps") __augmented_syscalls__ = {
+	.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(u32),
+	.max_entries = __NR_CPUS__,
+};
+
+struct syscall_enter_args {
+	unsigned long long common_tp_fields;
+	long		   syscall_nr;
+	unsigned long	   args[6];
+};
+
+struct syscall_exit_args {
+	unsigned long long common_tp_fields;
+	long		   syscall_nr;
+	long		   ret;
+};
+
+struct augmented_filename {
+	unsigned int	size;
+	int		reserved;
+	char		value[256];
+};
+
+#define SYS_OPEN 2
+#define SYS_OPENAT 257
+
+SEC("raw_syscalls:sys_enter")
+int sys_enter(struct syscall_enter_args *args)
+{
+	struct {
+		struct syscall_enter_args args;
+		struct augmented_filename filename;
+	} augmented_args;
+	unsigned int len = sizeof(augmented_args);
+	const void *filename_arg = NULL;
+
+	probe_read(&augmented_args.args, sizeof(augmented_args.args), args);
+	/*
+	 * Yonghong and Edward Cree sayz:
+	 *
+	 * https://www.spinics.net/lists/netdev/msg531645.html
+	 *
+	 * >>   R0=inv(id=0) R1=inv2 R6=ctx(id=0,off=0,imm=0) R7=inv64 R10=fp0,call_-1
+	 * >> 10: (bf) r1 = r6
+	 * >> 11: (07) r1 += 16
+	 * >> 12: (05) goto pc+2
+	 * >> 15: (79) r3 = *(u64 *)(r1 +0)
+	 * >> dereference of modified ctx ptr R1 off=16 disallowed
+	 * > Aha, we at least got a different error message this time.
+	 * > And indeed llvm has done that optimisation, rather than the more obvious
+	 * > 11: r3 = *(u64 *)(r1 +16)
+	 * > because it wants to have lots of reads share a single insn.  You may be able
+	 * > to defeat that optimisation by adding compiler barriers, idk.  Maybe someone
+	 * > with llvm knowledge can figure out how to stop it (ideally, llvm would know
+	 * > when it's generating for bpf backend and not do that).  -O0?  ¯\_(ツ)_/¯
+	 *
+	 * The optimization mostly likes below:
+	 *
+	 *	br1:
+	 * 	...
+	 *	r1 += 16
+	 *	goto merge
+	 *	br2:
+	 *	...
+	 *	r1 += 20
+	 *	goto merge
+	 *	merge:
+	 *	*(u64 *)(r1 + 0)
+	 *
+	 * The compiler tries to merge common loads. There is no easy way to
+	 * stop this compiler optimization without turning off a lot of other
+	 * optimizations. The easiest way is to add barriers:
+	 *
+	 * 	 __asm__ __volatile__("": : :"memory")
+	 *
+	 * 	 after the ctx memory access to prevent their down stream merging.
+	 */
+	switch (augmented_args.args.syscall_nr) {
+	case SYS_OPEN:	 filename_arg = (const void *)args->args[0];
+			__asm__ __volatile__("": : :"memory");
+			 break;
+	case SYS_OPENAT: filename_arg = (const void *)args->args[1];
+			 break;
+	}
+
+	if (filename_arg != NULL) {
+		augmented_args.filename.reserved = 0;
+		augmented_args.filename.size = probe_read_str(&augmented_args.filename.value,
+							      sizeof(augmented_args.filename.value),
+							      filename_arg);
+		if (augmented_args.filename.size < sizeof(augmented_args.filename.value)) {
+			len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size;
+			len &= sizeof(augmented_args.filename.value) - 1;
+		}
+	} else {
+		len = sizeof(augmented_args.args);
+	}
+
+	perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len);
+	return 0;
+}
+
+SEC("raw_syscalls:sys_exit")
+int sys_exit(struct syscall_exit_args *args)
+{
+	return 1; /* 0 as soon as we start copying data returned by the kernel, e.g. 'read' */
+}
+
+license(GPL);
diff --git a/tools/perf/examples/bpf/augmented_syscalls.c b/tools/perf/examples/bpf/augmented_syscalls.c
index 69a31386d8cd..2ae44813ef2d 100644
--- a/tools/perf/examples/bpf/augmented_syscalls.c
+++ b/tools/perf/examples/bpf/augmented_syscalls.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * Augment the openat syscall with the contents of the filename pointer argument.
+ * Augment syscalls with the contents of the pointer arguments.
  *
  * Test it with:
  *
@@ -10,15 +10,14 @@
  * the last one should be the one for '/etc/passwd'.
  *
  * This matches what is marshalled into the raw_syscall:sys_enter payload
- * expected by the 'perf trace' beautifiers, and can be used by them unmodified,
- * which will be done as that feature is implemented in the next csets, for now
- * it will appear in a dump done by the default tracepoint handler in 'perf trace',
- * that uses bpf_output__fprintf() to just dump those contents, as done with
- * the bpf-output event associated with the __bpf_output__ map declared in
- * tools/perf/include/bpf/stdio.h.
+ * expected by the 'perf trace' beautifiers, and can be used by them, that will
+ * check if perf_sample->raw_data is more than what is expected for each
+ * syscalls:sys_{enter,exit}_SYSCALL tracepoint, uing the extra data as the
+ * contents of pointer arguments.
  */
 
 #include <stdio.h>
+#include <linux/socket.h>
 
 struct bpf_map SEC("maps") __augmented_syscalls__ = {
        .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
@@ -27,6 +26,44 @@ struct bpf_map SEC("maps") __augmented_syscalls__ = {
        .max_entries = __NR_CPUS__,
 };
 
+struct syscall_exit_args {
+	unsigned long long common_tp_fields;
+	long		   syscall_nr;
+	long		   ret;
+};
+
+struct augmented_filename {
+	unsigned int	size;
+	int		reserved;
+	char		value[256];
+};
+
+#define augmented_filename_syscall(syscall)							\
+struct augmented_enter_##syscall##_args {			 				\
+	struct syscall_enter_##syscall##_args	args;				 		\
+	struct augmented_filename		filename;				 	\
+};												\
+int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args)				\
+{												\
+	struct augmented_enter_##syscall##_args augmented_args = { .filename.reserved = 0, }; 	\
+	unsigned int len = sizeof(augmented_args);						\
+	probe_read(&augmented_args.args, sizeof(augmented_args.args), args);			\
+	augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, 		\
+						      sizeof(augmented_args.filename.value), 	\
+						      args->filename_ptr); 			\
+	if (augmented_args.filename.size < sizeof(augmented_args.filename.value)) {		\
+		len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size;	\
+		len &= sizeof(augmented_args.filename.value) - 1;				\
+	}											\
+	perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, 			\
+			  &augmented_args, len);						\
+	return 0;										\
+}												\
+int syscall_exit(syscall)(struct syscall_exit_args *args)					\
+{												\
+       return 1; /* 0 as soon as we start copying data returned by the kernel, e.g. 'read' */	\
+}
+
 struct syscall_enter_openat_args {
 	unsigned long long common_tp_fields;
 	long		   syscall_nr;
@@ -36,20 +73,101 @@ struct syscall_enter_openat_args {
 	long		   mode;
 };
 
-struct augmented_enter_openat_args {
-	struct syscall_enter_openat_args args;
-	char				 filename[64];
+augmented_filename_syscall(openat);
+
+struct syscall_enter_open_args {
+	unsigned long long common_tp_fields;
+	long		   syscall_nr;
+	char		   *filename_ptr;
+	long		   flags;
+	long		   mode;
+};
+
+augmented_filename_syscall(open);
+
+struct syscall_enter_inotify_add_watch_args {
+	unsigned long long common_tp_fields;
+	long		   syscall_nr;
+	long		   fd;
+	char		   *filename_ptr;
+	long		   mask;
+};
+
+augmented_filename_syscall(inotify_add_watch);
+
+struct statbuf;
+
+struct syscall_enter_newstat_args {
+	unsigned long long common_tp_fields;
+	long		   syscall_nr;
+	char		   *filename_ptr;
+	struct stat	   *statbuf;
 };
 
-int syscall_enter(openat)(struct syscall_enter_openat_args *args)
-{
-	struct augmented_enter_openat_args augmented_args;
+augmented_filename_syscall(newstat);
+
+#ifndef _K_SS_MAXSIZE
+#define _K_SS_MAXSIZE 128
+#endif
 
-	probe_read(&augmented_args.args, sizeof(augmented_args.args), args);
-	probe_read_str(&augmented_args.filename, sizeof(augmented_args.filename), args->filename_ptr);
-	perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU,
-			  &augmented_args, sizeof(augmented_args));
-	return 1;
+#define augmented_sockaddr_syscall(syscall)						\
+struct augmented_enter_##syscall##_args {			 				\
+	struct syscall_enter_##syscall##_args	args;				 		\
+	struct sockaddr_storage			addr;						\
+};												\
+int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args)				\
+{												\
+	struct augmented_enter_##syscall##_args augmented_args;				 	\
+	unsigned long addrlen = sizeof(augmented_args.addr);					\
+	probe_read(&augmented_args.args, sizeof(augmented_args.args), args);			\
+/* FIXME_CLANG_OPTIMIZATION_THAT_ACCESSES_USER_CONTROLLED_ADDRLEN_DESPITE_THIS_CHECK */		\
+/*	if (addrlen > augmented_args.args.addrlen)				     */		\
+/*		addrlen = augmented_args.args.addrlen;				     */		\
+/*										     */		\
+	probe_read(&augmented_args.addr, addrlen, args->addr_ptr); 				\
+	perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, 			\
+			  &augmented_args, 							\
+			  sizeof(augmented_args) - sizeof(augmented_args.addr) + addrlen);	\
+	return 0;										\
+}												\
+int syscall_exit(syscall)(struct syscall_exit_args *args)					\
+{												\
+       return 1; /* 0 as soon as we start copying data returned by the kernel, e.g. 'read' */	\
 }
 
+struct sockaddr;
+
+struct syscall_enter_bind_args {
+	unsigned long long common_tp_fields;
+	long		   syscall_nr;
+	long		   fd;
+	struct sockaddr	   *addr_ptr;
+	unsigned long	   addrlen;
+};
+
+augmented_sockaddr_syscall(bind);
+
+struct syscall_enter_connect_args {
+	unsigned long long common_tp_fields;
+	long		   syscall_nr;
+	long		   fd;
+	struct sockaddr	   *addr_ptr;
+	unsigned long	   addrlen;
+};
+
+augmented_sockaddr_syscall(connect);
+
+struct syscall_enter_sendto_args {
+	unsigned long long common_tp_fields;
+	long		   syscall_nr;
+	long		   fd;
+	void		   *buff;
+	long		   len;
+	unsigned long	   flags;
+	struct sockaddr	   *addr_ptr;
+	long		   addr_len;
+};
+
+augmented_sockaddr_syscall(sendto);
+
 license(GPL);
diff --git a/tools/perf/examples/bpf/etcsnoop.c b/tools/perf/examples/bpf/etcsnoop.c
new file mode 100644
index 000000000000..b59e8812ee8c
--- /dev/null
+++ b/tools/perf/examples/bpf/etcsnoop.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Augment the filename syscalls with the contents of the filename pointer argument
+ * filtering only those that do not start with /etc/.
+ *
+ * Test it with:
+ *
+ * perf trace -e tools/perf/examples/bpf/augmented_syscalls.c cat /etc/passwd > /dev/null
+ *
+ * It'll catch some openat syscalls related to the dynamic linked and
+ * the last one should be the one for '/etc/passwd'.
+ *
+ * This matches what is marshalled into the raw_syscall:sys_enter payload
+ * expected by the 'perf trace' beautifiers, and can be used by them unmodified,
+ * which will be done as that feature is implemented in the next csets, for now
+ * it will appear in a dump done by the default tracepoint handler in 'perf trace',
+ * that uses bpf_output__fprintf() to just dump those contents, as done with
+ * the bpf-output event associated with the __bpf_output__ map declared in
+ * tools/perf/include/bpf/stdio.h.
+ */
+
+#include <stdio.h>
+
+struct bpf_map SEC("maps") __augmented_syscalls__ = {
+       .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+       .key_size = sizeof(int),
+       .value_size = sizeof(u32),
+       .max_entries = __NR_CPUS__,
+};
+
+struct augmented_filename {
+	int	size;
+	int	reserved;
+	char	value[64];
+};
+
+#define augmented_filename_syscall_enter(syscall) 						\
+struct augmented_enter_##syscall##_args {			 				\
+	struct syscall_enter_##syscall##_args	args;				 		\
+	struct augmented_filename		filename;				 	\
+};												\
+int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args)				\
+{												\
+	char etc[6] = "/etc/";									\
+	struct augmented_enter_##syscall##_args augmented_args = { .filename.reserved = 0, }; 	\
+	probe_read(&augmented_args.args, sizeof(augmented_args.args), args);			\
+	augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, 		\
+						      sizeof(augmented_args.filename.value), 	\
+						      args->filename_ptr); 			\
+	if (__builtin_memcmp(augmented_args.filename.value, etc, 4) != 0)			\
+		return 0;									\
+	perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, 			\
+			  &augmented_args, 							\
+			  (sizeof(augmented_args) - sizeof(augmented_args.filename.value) +	\
+			   augmented_args.filename.size));					\
+	return 0;										\
+}
+
+struct syscall_enter_openat_args {
+	unsigned long long common_tp_fields;
+	long		   syscall_nr;
+	long		   dfd;
+	char		   *filename_ptr;
+	long		   flags;
+	long		   mode;
+};
+
+augmented_filename_syscall_enter(openat);
+
+struct syscall_enter_open_args {
+	unsigned long long common_tp_fields;
+	long		   syscall_nr;
+	char		   *filename_ptr;
+	long		   flags;
+	long		   mode;
+};
+
+augmented_filename_syscall_enter(open);
+
+license(GPL);
diff --git a/tools/perf/include/bpf/bpf.h b/tools/perf/include/bpf/bpf.h
index 47897d65e799..52b6d87fe822 100644
--- a/tools/perf/include/bpf/bpf.h
+++ b/tools/perf/include/bpf/bpf.h
@@ -26,6 +26,9 @@ struct bpf_map {
 #define syscall_enter(name) \
 	SEC("syscalls:sys_enter_" #name) syscall_enter_ ## name
 
+#define syscall_exit(name) \
+	SEC("syscalls:sys_exit_" #name) syscall_exit_ ## name
+
 #define license(name) \
 char _license[] SEC("license") = #name; \
 int _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/tools/perf/include/bpf/linux/socket.h b/tools/perf/include/bpf/linux/socket.h
new file mode 100644
index 000000000000..7f844568dab8
--- /dev/null
+++ b/tools/perf/include/bpf/linux/socket.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_SOCKET_H
+#define _UAPI_LINUX_SOCKET_H
+
+/*
+ * Desired design of maximum size and alignment (see RFC2553)
+ */
+#define _K_SS_MAXSIZE	128	/* Implementation specific max size */
+#define _K_SS_ALIGNSIZE	(__alignof__ (struct sockaddr *))
+				/* Implementation specific desired alignment */
+
+typedef unsigned short __kernel_sa_family_t;
+
+struct __kernel_sockaddr_storage {
+	__kernel_sa_family_t	ss_family;		/* address family */
+	/* Following field(s) are implementation specific */
+	char		__data[_K_SS_MAXSIZE - sizeof(unsigned short)];
+				/* space to achieve desired size, */
+				/* _SS_MAXSIZE value minus size of ss_family */
+} __attribute__ ((aligned(_K_SS_ALIGNSIZE)));	/* force desired alignment */
+
+#define sockaddr_storage __kernel_sockaddr_storage
+
+#endif /* _UAPI_LINUX_SOCKET_H */
diff --git a/tools/perf/jvmti/jvmti_agent.c b/tools/perf/jvmti/jvmti_agent.c
index ac1bcdc17dae..f7eb63cbbc65 100644
--- a/tools/perf/jvmti/jvmti_agent.c
+++ b/tools/perf/jvmti/jvmti_agent.c
@@ -125,7 +125,7 @@ perf_get_timestamp(void)
 }
 
 static int
-debug_cache_init(void)
+create_jit_cache_dir(void)
 {
 	char str[32];
 	char *base, *p;
@@ -144,8 +144,13 @@ debug_cache_init(void)
 
 	strftime(str, sizeof(str), JIT_LANG"-jit-%Y%m%d", &tm);
 
-	snprintf(jit_path, PATH_MAX - 1, "%s/.debug/", base);
-
+	ret = snprintf(jit_path, PATH_MAX, "%s/.debug/", base);
+	if (ret >= PATH_MAX) {
+		warnx("jvmti: cannot generate jit cache dir because %s/.debug/"
+			" is too long, please check the cwd, JITDUMPDIR, and"
+			" HOME variables", base);
+		return -1;
+	}
 	ret = mkdir(jit_path, 0755);
 	if (ret == -1) {
 		if (errno != EEXIST) {
@@ -154,20 +159,32 @@ debug_cache_init(void)
 		}
 	}
 
-	snprintf(jit_path, PATH_MAX - 1, "%s/.debug/jit", base);
+	ret = snprintf(jit_path, PATH_MAX, "%s/.debug/jit", base);
+	if (ret >= PATH_MAX) {
+		warnx("jvmti: cannot generate jit cache dir because"
+			" %s/.debug/jit is too long, please check the cwd,"
+			" JITDUMPDIR, and HOME variables", base);
+		return -1;
+	}
 	ret = mkdir(jit_path, 0755);
 	if (ret == -1) {
 		if (errno != EEXIST) {
-			warn("cannot create jit cache dir %s", jit_path);
+			warn("jvmti: cannot create jit cache dir %s", jit_path);
 			return -1;
 		}
 	}
 
-	snprintf(jit_path, PATH_MAX - 1, "%s/.debug/jit/%s.XXXXXXXX", base, str);
-
+	ret = snprintf(jit_path, PATH_MAX, "%s/.debug/jit/%s.XXXXXXXX", base, str);
+	if (ret >= PATH_MAX) {
+		warnx("jvmti: cannot generate jit cache dir because"
+			" %s/.debug/jit/%s.XXXXXXXX is too long, please check"
+			" the cwd, JITDUMPDIR, and HOME variables",
+			base, str);
+		return -1;
+	}
 	p = mkdtemp(jit_path);
 	if (p != jit_path) {
-		warn("cannot create jit cache dir %s", jit_path);
+		warn("jvmti: cannot create jit cache dir %s", jit_path);
 		return -1;
 	}
 
@@ -228,7 +245,7 @@ void *jvmti_open(void)
 {
 	char dump_path[PATH_MAX];
 	struct jitheader header;
-	int fd;
+	int fd, ret;
 	FILE *fp;
 
 	init_arch_timestamp();
@@ -245,12 +262,22 @@ void *jvmti_open(void)
 
 	memset(&header, 0, sizeof(header));
 
-	debug_cache_init();
+	/*
+	 * jitdump file dir
+	 */
+	if (create_jit_cache_dir() < 0)
+		return NULL;
 
 	/*
 	 * jitdump file name
 	 */
-	scnprintf(dump_path, PATH_MAX, "%s/jit-%i.dump", jit_path, getpid());
+	ret = snprintf(dump_path, PATH_MAX, "%s/jit-%i.dump", jit_path, getpid());
+	if (ret >= PATH_MAX) {
+		warnx("jvmti: cannot generate jitdump file full path because"
+			" %s/jit-%i.dump is too long, please check the cwd,"
+			" JITDUMPDIR, and HOME variables", jit_path, getpid());
+		return NULL;
+	}
 
 	fd = open(dump_path, O_CREAT|O_TRUNC|O_RDWR, 0666);
 	if (fd == -1)
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 21bf7f5a3cf5..0ed4a34c74c4 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -81,6 +81,7 @@ struct record_opts {
 	unsigned     initial_delay;
 	bool         use_clockid;
 	clockid_t    clockid;
+	u64          clockid_res_ns;
 	unsigned int proc_map_timeout;
 };
 
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/branch.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/branch.json
new file mode 100644
index 000000000000..abc98b018446
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/branch.json
@@ -0,0 +1,23 @@
+[
+    {
+        "ArchStdEvent": "BR_IMMED_SPEC",
+    },
+    {
+        "ArchStdEvent": "BR_RETURN_SPEC",
+    },
+    {
+        "ArchStdEvent": "BR_INDIRECT_SPEC",
+    },
+    {
+        "PublicDescription": "Mispredicted or not predicted branch speculatively executed",
+        "EventCode": "0x10",
+        "EventName": "BR_MIS_PRED",
+        "BriefDescription": "Branch mispredicted"
+    },
+    {
+        "PublicDescription": "Predictable branch speculatively executed",
+        "EventCode": "0x12",
+        "EventName": "BR_PRED",
+        "BriefDescription": "Predictable branch"
+    },
+]
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/bus.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/bus.json
new file mode 100644
index 000000000000..687b2629e1d1
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/bus.json
@@ -0,0 +1,26 @@
+[
+    {
+        "ArchStdEvent": "BUS_ACCESS_RD",
+    },
+    {
+        "ArchStdEvent": "BUS_ACCESS_WR",
+    },
+    {
+        "ArchStdEvent": "BUS_ACCESS_SHARED",
+    },
+    {
+        "ArchStdEvent": "BUS_ACCESS_NOT_SHARED",
+    },
+    {
+        "ArchStdEvent": "BUS_ACCESS_NORMAL",
+    },
+    {
+        "ArchStdEvent": "BUS_ACCESS_PERIPH",
+    },
+    {
+        "PublicDescription": "Bus access",
+        "EventCode": "0x19",
+        "EventName": "BUS_ACCESS",
+        "BriefDescription": "Bus access"
+    },
+]
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/cache.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/cache.json
new file mode 100644
index 000000000000..df9201434cb6
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/cache.json
@@ -0,0 +1,191 @@
+[
+    {
+        "ArchStdEvent": "L1D_CACHE_RD",
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_WR",
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_REFILL_RD",
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_INVAL",
+    },
+    {
+        "ArchStdEvent": "L1D_TLB_REFILL_RD",
+    },
+    {
+        "ArchStdEvent": "L1D_TLB_REFILL_WR",
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_RD",
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_WR",
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_REFILL_RD",
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_REFILL_WR",
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_WB_VICTIM",
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_WB_CLEAN",
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_INVAL",
+    },
+    {
+        "PublicDescription": "Level 1 instruction cache refill",
+        "EventCode": "0x01",
+        "EventName": "L1I_CACHE_REFILL",
+        "BriefDescription": "L1I cache refill"
+    },
+    {
+        "PublicDescription": "Level 1 instruction TLB refill",
+        "EventCode": "0x02",
+        "EventName": "L1I_TLB_REFILL",
+        "BriefDescription": "L1I TLB refill"
+    },
+    {
+        "PublicDescription": "Level 1 data cache refill",
+        "EventCode": "0x03",
+        "EventName": "L1D_CACHE_REFILL",
+        "BriefDescription": "L1D cache refill"
+    },
+    {
+        "PublicDescription": "Level 1 data cache access",
+        "EventCode": "0x04",
+        "EventName": "L1D_CACHE_ACCESS",
+        "BriefDescription": "L1D cache access"
+    },
+    {
+        "PublicDescription": "Level 1 data TLB refill",
+        "EventCode": "0x05",
+        "EventName": "L1D_TLB_REFILL",
+        "BriefDescription": "L1D TLB refill"
+    },
+    {
+        "PublicDescription": "Level 1 instruction cache access",
+        "EventCode": "0x14",
+        "EventName": "L1I_CACHE_ACCESS",
+        "BriefDescription": "L1I cache access"
+    },
+    {
+        "PublicDescription": "Level 2 data cache access",
+        "EventCode": "0x16",
+        "EventName": "L2D_CACHE_ACCESS",
+        "BriefDescription": "L2D cache access"
+    },
+    {
+        "PublicDescription": "Level 2 data refill",
+        "EventCode": "0x17",
+        "EventName": "L2D_CACHE_REFILL",
+        "BriefDescription": "L2D cache refill"
+    },
+    {
+        "PublicDescription": "Level 2 data cache, Write-Back",
+        "EventCode": "0x18",
+        "EventName": "L2D_CACHE_WB",
+        "BriefDescription": "L2D cache Write-Back"
+    },
+    {
+        "PublicDescription": "Level 1 data TLB access. This event counts any load or store operation which accesses the data L1 TLB",
+        "EventCode": "0x25",
+        "EventName": "L1D_TLB_ACCESS",
+        "BriefDescription": "L1D TLB access"
+    },
+    {
+        "PublicDescription": "Level 1 instruction TLB access. This event counts any instruction fetch which accesses the instruction L1 TLB",
+        "EventCode": "0x26",
+        "EventName": "L1I_TLB_ACCESS",
+        "BriefDescription": "L1I TLB access"
+    },
+    {
+        "PublicDescription": "Level 2 access to data TLB that caused a page table walk. This event counts on any data access which causes L2D_TLB_REFILL to count",
+        "EventCode": "0x34",
+        "EventName": "L2D_TLB_ACCESS",
+        "BriefDescription": "L2D TLB access"
+    },
+    {
+        "PublicDescription": "Level 2 access to instruciton TLB that caused a page table walk. This event counts on any instruciton access which causes L2I_TLB_REFILL to count",
+        "EventCode": "0x35",
+        "EventName": "L2I_TLB_ACCESS",
+        "BriefDescription": "L2D TLB access"
+    },
+    {
+        "PublicDescription": "Branch target buffer misprediction",
+        "EventCode": "0x102",
+        "EventName": "BTB_MIS_PRED",
+        "BriefDescription": "BTB misprediction"
+    },
+    {
+        "PublicDescription": "ITB miss",
+        "EventCode": "0x103",
+        "EventName": "ITB_MISS",
+        "BriefDescription": "ITB miss"
+    },
+    {
+        "PublicDescription": "DTB miss",
+        "EventCode": "0x104",
+        "EventName": "DTB_MISS",
+        "BriefDescription": "DTB miss"
+    },
+    {
+        "PublicDescription": "Level 1 data cache late miss",
+        "EventCode": "0x105",
+        "EventName": "L1D_CACHE_LATE_MISS",
+        "BriefDescription": "L1D cache late miss"
+    },
+    {
+        "PublicDescription": "Level 1 data cache prefetch request",
+        "EventCode": "0x106",
+        "EventName": "L1D_CACHE_PREFETCH",
+        "BriefDescription": "L1D cache prefetch"
+    },
+    {
+        "PublicDescription": "Level 2 data cache prefetch request",
+        "EventCode": "0x107",
+        "EventName": "L2D_CACHE_PREFETCH",
+        "BriefDescription": "L2D cache prefetch"
+    },
+    {
+        "PublicDescription": "Level 1 stage 2 TLB refill",
+        "EventCode": "0x111",
+        "EventName": "L1_STAGE2_TLB_REFILL",
+        "BriefDescription": "L1 stage 2 TLB refill"
+    },
+    {
+        "PublicDescription": "Page walk cache level-0 stage-1 hit",
+        "EventCode": "0x112",
+        "EventName": "PAGE_WALK_L0_STAGE1_HIT",
+        "BriefDescription": "Page walk, L0 stage-1 hit"
+    },
+    {
+        "PublicDescription": "Page walk cache level-1 stage-1 hit",
+        "EventCode": "0x113",
+        "EventName": "PAGE_WALK_L1_STAGE1_HIT",
+        "BriefDescription": "Page walk, L1 stage-1 hit"
+    },
+    {
+        "PublicDescription": "Page walk cache level-2 stage-1 hit",
+        "EventCode": "0x114",
+        "EventName": "PAGE_WALK_L2_STAGE1_HIT",
+        "BriefDescription": "Page walk, L2 stage-1 hit"
+    },
+    {
+        "PublicDescription": "Page walk cache level-1 stage-2 hit",
+        "EventCode": "0x115",
+        "EventName": "PAGE_WALK_L1_STAGE2_HIT",
+        "BriefDescription": "Page walk, L1 stage-2 hit"
+    },
+    {
+        "PublicDescription": "Page walk cache level-2 stage-2 hit",
+        "EventCode": "0x116",
+        "EventName": "PAGE_WALK_L2_STAGE2_HIT",
+        "BriefDescription": "Page walk, L2 stage-2 hit"
+    },
+]
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/clock.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/clock.json
new file mode 100644
index 000000000000..38cd1f1a70dc
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/clock.json
@@ -0,0 +1,20 @@
+[
+    {
+        "PublicDescription": "The number of core clock cycles",
+        "EventCode": "0x11",
+        "EventName": "CPU_CYCLES",
+        "BriefDescription": "Clock cycles"
+    },
+    {
+        "PublicDescription": "FSU clocking gated off cycle",
+        "EventCode": "0x101",
+        "EventName": "FSU_CLOCK_OFF_CYCLES",
+        "BriefDescription": "FSU clocking gated off cycle"
+    },
+    {
+        "PublicDescription": "Wait state cycle",
+        "EventCode": "0x110",
+        "EventName": "Wait_CYCLES",
+        "BriefDescription": "Wait state cycle"
+    },
+]
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/core-imp-def.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/core-imp-def.json
deleted file mode 100644
index bc03c06c3918..000000000000
--- a/tools/perf/pmu-events/arch/arm64/ampere/emag/core-imp-def.json
+++ /dev/null
@@ -1,32 +0,0 @@
-[
-    {
-        "ArchStdEvent": "L1D_CACHE_RD",
-    },
-    {
-        "ArchStdEvent": "L1D_CACHE_WR",
-    },
-    {
-        "ArchStdEvent": "L1D_CACHE_REFILL_RD",
-    },
-    {
-        "ArchStdEvent": "L1D_CACHE_REFILL_WR",
-    },
-    {
-        "ArchStdEvent": "L1D_TLB_REFILL_RD",
-    },
-    {
-        "ArchStdEvent": "L1D_TLB_REFILL_WR",
-    },
-    {
-        "ArchStdEvent": "L1D_TLB_RD",
-    },
-    {
-        "ArchStdEvent": "L1D_TLB_WR",
-    },
-    {
-        "ArchStdEvent": "BUS_ACCESS_RD",
-   },
-   {
-        "ArchStdEvent": "BUS_ACCESS_WR",
-   }
-]
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/exception.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/exception.json
new file mode 100644
index 000000000000..3720dc28a15f
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/exception.json
@@ -0,0 +1,50 @@
+[
+    {
+        "ArchStdEvent": "EXC_UNDEF",
+    },
+    {
+        "ArchStdEvent": "EXC_SVC",
+    },
+    {
+        "ArchStdEvent": "EXC_PABORT",
+    },
+    {
+        "ArchStdEvent": "EXC_DABORT",
+    },
+    {
+        "ArchStdEvent": "EXC_IRQ",
+    },
+    {
+        "ArchStdEvent": "EXC_FIQ",
+    },
+    {
+        "ArchStdEvent": "EXC_HVC",
+    },
+    {
+        "ArchStdEvent": "EXC_TRAP_PABORT",
+    },
+    {
+        "ArchStdEvent": "EXC_TRAP_DABORT",
+    },
+    {
+        "ArchStdEvent": "EXC_TRAP_OTHER",
+    },
+    {
+        "ArchStdEvent": "EXC_TRAP_IRQ",
+    },
+    {
+        "ArchStdEvent": "EXC_TRAP_FIQ",
+    },
+    {
+        "PublicDescription": "Exception taken",
+        "EventCode": "0x09",
+        "EventName": "EXC_TAKEN",
+        "BriefDescription": "Exception taken"
+    },
+    {
+        "PublicDescription": "Instruction architecturally executed, condition check pass, exception return",
+        "EventCode": "0x0a",
+        "EventName": "EXC_RETURN",
+        "BriefDescription": "Exception return"
+    },
+]
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/instruction.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/instruction.json
new file mode 100644
index 000000000000..82cf753e6472
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/instruction.json
@@ -0,0 +1,89 @@
+[
+    {
+        "ArchStdEvent": "LD_SPEC",
+    },
+    {
+        "ArchStdEvent": "ST_SPEC",
+    },
+    {
+        "ArchStdEvent": "LDST_SPEC",
+    },
+    {
+        "ArchStdEvent": "DP_SPEC",
+    },
+    {
+        "ArchStdEvent": "ASE_SPEC",
+    },
+    {
+        "ArchStdEvent": "VFP_SPEC",
+    },
+    {
+        "ArchStdEvent": "PC_WRITE_SPEC",
+    },
+    {
+        "ArchStdEvent": "CRYPTO_SPEC",
+    },
+    {
+        "ArchStdEvent": "ISB_SPEC",
+    },
+    {
+        "ArchStdEvent": "DSB_SPEC",
+    },
+    {
+        "ArchStdEvent": "DMB_SPEC",
+    },
+    {
+        "ArchStdEvent": "RC_LD_SPEC",
+    },
+    {
+        "ArchStdEvent": "RC_ST_SPEC",
+    },
+    {
+        "PublicDescription": "Instruction architecturally executed, software increment",
+        "EventCode": "0x00",
+        "EventName": "SW_INCR",
+        "BriefDescription": "Software increment"
+    },
+    {
+        "PublicDescription": "Instruction architecturally executed",
+        "EventCode": "0x08",
+        "EventName": "INST_RETIRED",
+        "BriefDescription": "Instruction retired"
+    },
+    {
+        "PublicDescription": "Instruction architecturally executed, condition code check pass, write to CONTEXTIDR",
+        "EventCode": "0x0b",
+        "EventName": "CID_WRITE_RETIRED",
+        "BriefDescription": "Write to CONTEXTIDR"
+    },
+    {
+        "PublicDescription": "Operation speculatively executed",
+        "EventCode": "0x1b",
+        "EventName": "INST_SPEC",
+        "BriefDescription": "Speculatively executed"
+    },
+    {
+        "PublicDescription": "Instruction architecturally executed (condition check pass), write to TTBR",
+        "EventCode": "0x1c",
+        "EventName": "TTBR_WRITE_RETIRED",
+        "BriefDescription": "Instruction executed, TTBR write"
+    },
+    {
+        "PublicDescription": "Instruction architecturally executed, branch. This event counts all branches, taken or not. This excludes exception entries, debug entries and CCFAIL branches",
+        "EventCode": "0x21",
+        "EventName": "BR_RETIRED",
+        "BriefDescription": "Branch retired"
+    },
+    {
+        "PublicDescription": "Instruction architecturally executed, mispredicted branch. This event counts any branch counted by BR_RETIRED which is not correctly predicted and causes a pipeline flush",
+        "EventCode": "0x22",
+        "EventName": "BR_MISPRED_RETIRED",
+        "BriefDescription": "Mispredicted branch retired"
+    },
+    {
+        "PublicDescription": "Operation speculatively executed, NOP",
+        "EventCode": "0x100",
+        "EventName": "NOP_SPEC",
+        "BriefDescription": "Speculatively executed, NOP"
+    },
+]
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/intrinsic.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/intrinsic.json
new file mode 100644
index 000000000000..2aecc5c2347d
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/intrinsic.json
@@ -0,0 +1,14 @@
+[
+    {
+        "ArchStdEvent": "LDREX_SPEC",
+    },
+    {
+        "ArchStdEvent": "STREX_PASS_SPEC",
+    },
+    {
+        "ArchStdEvent": "STREX_FAIL_SPEC",
+    },
+    {
+        "ArchStdEvent": "STREX_SPEC",
+    },
+]
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/memory.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/memory.json
new file mode 100644
index 000000000000..08508697b318
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/memory.json
@@ -0,0 +1,29 @@
+[
+    {
+        "ArchStdEvent": "MEM_ACCESS_RD",
+    },
+    {
+        "ArchStdEvent": "MEM_ACCESS_WR",
+    },
+    {
+        "ArchStdEvent": "UNALIGNED_LD_SPEC",
+    },
+    {
+        "ArchStdEvent": "UNALIGNED_ST_SPEC",
+    },
+    {
+        "ArchStdEvent": "UNALIGNED_LDST_SPEC",
+    },
+    {
+        "PublicDescription": "Data memory access",
+        "EventCode": "0x13",
+        "EventName": "MEM_ACCESS",
+        "BriefDescription": "Memory access"
+    },
+    {
+        "PublicDescription": "Local memory error. This event counts any correctable or uncorrectable memory error (ECC or parity) in the protected core RAMs",
+        "EventCode": "0x1a",
+        "EventName": "MEM_ERROR",
+        "BriefDescription": "Memory error"
+    },
+]
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/pipeline.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/pipeline.json
new file mode 100644
index 000000000000..e2087de586bf
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/pipeline.json
@@ -0,0 +1,50 @@
+[
+    {
+        "PublicDescription": "Decode starved for instruction cycle",
+        "EventCode": "0x108",
+        "EventName": "DECODE_STALL",
+        "BriefDescription": "Decode starved"
+    },
+    {
+        "PublicDescription": "Op dispatch stalled cycle",
+        "EventCode": "0x109",
+        "EventName": "DISPATCH_STALL",
+        "BriefDescription": "Dispatch stalled"
+    },
+    {
+        "PublicDescription": "IXA Op non-issue",
+        "EventCode": "0x10a",
+        "EventName": "IXA_STALL",
+        "BriefDescription": "IXA stalled"
+    },
+    {
+        "PublicDescription": "IXB Op non-issue",
+        "EventCode": "0x10b",
+        "EventName": "IXB_STALL",
+        "BriefDescription": "IXB stalled"
+    },
+    {
+        "PublicDescription": "BX Op non-issue",
+        "EventCode": "0x10c",
+        "EventName": "BX_STALL",
+        "BriefDescription": "BX stalled"
+    },
+    {
+        "PublicDescription": "LX Op non-issue",
+        "EventCode": "0x10d",
+        "EventName": "LX_STALL",
+        "BriefDescription": "LX stalled"
+    },
+    {
+        "PublicDescription": "SX Op non-issue",
+        "EventCode": "0x10e",
+        "EventName": "SX_STALL",
+        "BriefDescription": "SX stalled"
+    },
+    {
+        "PublicDescription": "FX Op non-issue",
+        "EventCode": "0x10f",
+        "EventName": "FX_STALL",
+        "BriefDescription": "FX stalled"
+    },
+]
diff --git a/tools/perf/scripts/python/call-graph-from-sql.py b/tools/perf/scripts/python/call-graph-from-sql.py
deleted file mode 100644
index b494a67a1c67..000000000000
--- a/tools/perf/scripts/python/call-graph-from-sql.py
+++ /dev/null
@@ -1,339 +0,0 @@
-#!/usr/bin/python2
-# call-graph-from-sql.py: create call-graph from sql database
-# Copyright (c) 2014-2017, Intel Corporation.
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms and conditions of the GNU General Public License,
-# version 2, as published by the Free Software Foundation.
-#
-# This program is distributed in the hope it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-# more details.
-
-# To use this script you will need to have exported data using either the
-# export-to-sqlite.py or the export-to-postgresql.py script.  Refer to those
-# scripts for details.
-#
-# Following on from the example in the export scripts, a
-# call-graph can be displayed for the pt_example database like this:
-#
-#	python tools/perf/scripts/python/call-graph-from-sql.py pt_example
-#
-# Note that for PostgreSQL, this script supports connecting to remote databases
-# by setting hostname, port, username, password, and dbname e.g.
-#
-#	python tools/perf/scripts/python/call-graph-from-sql.py "hostname=myhost username=myuser password=mypassword dbname=pt_example"
-#
-# The result is a GUI window with a tree representing a context-sensitive
-# call-graph.  Expanding a couple of levels of the tree and adjusting column
-# widths to suit will display something like:
-#
-#                                         Call Graph: pt_example
-# Call Path                          Object      Count   Time(ns)  Time(%)  Branch Count   Branch Count(%)
-# v- ls
-#     v- 2638:2638
-#         v- _start                  ld-2.19.so    1     10074071   100.0         211135            100.0
-#           |- unknown               unknown       1        13198     0.1              1              0.0
-#           >- _dl_start             ld-2.19.so    1      1400980    13.9          19637              9.3
-#           >- _d_linit_internal     ld-2.19.so    1       448152     4.4          11094              5.3
-#           v-__libc_start_main@plt  ls            1      8211741    81.5         180397             85.4
-#              >- _dl_fixup          ld-2.19.so    1         7607     0.1            108              0.1
-#              >- __cxa_atexit       libc-2.19.so  1        11737     0.1             10              0.0
-#              >- __libc_csu_init    ls            1        10354     0.1             10              0.0
-#              |- _setjmp            libc-2.19.so  1            0     0.0              4              0.0
-#              v- main               ls            1      8182043    99.6         180254             99.9
-#
-# Points to note:
-#	The top level is a command name (comm)
-#	The next level is a thread (pid:tid)
-#	Subsequent levels are functions
-#	'Count' is the number of calls
-#	'Time' is the elapsed time until the function returns
-#	Percentages are relative to the level above
-#	'Branch Count' is the total number of branches for that function and all
-#       functions that it calls
-
-import sys
-from PySide.QtCore import *
-from PySide.QtGui import *
-from PySide.QtSql import *
-from decimal import *
-
-class TreeItem():
-
-	def __init__(self, db, row, parent_item):
-		self.db = db
-		self.row = row
-		self.parent_item = parent_item
-		self.query_done = False;
-		self.child_count = 0
-		self.child_items = []
-		self.data = ["", "", "", "", "", "", ""]
-		self.comm_id = 0
-		self.thread_id = 0
-		self.call_path_id = 1
-		self.branch_count = 0
-		self.time = 0
-		if not parent_item:
-			self.setUpRoot()
-
-	def setUpRoot(self):
-		self.query_done = True
-		query = QSqlQuery(self.db)
-		ret = query.exec_('SELECT id, comm FROM comms')
-		if not ret:
-			raise Exception("Query failed: " + query.lastError().text())
-		while query.next():
-			if not query.value(0):
-				continue
-			child_item = TreeItem(self.db, self.child_count, self)
-			self.child_items.append(child_item)
-			self.child_count += 1
-			child_item.setUpLevel1(query.value(0), query.value(1))
-
-	def setUpLevel1(self, comm_id, comm):
-		self.query_done = True;
-		self.comm_id = comm_id
-		self.data[0] = comm
-		self.child_items = []
-		self.child_count = 0
-		query = QSqlQuery(self.db)
-		ret = query.exec_('SELECT thread_id, ( SELECT pid FROM threads WHERE id = thread_id ), ( SELECT tid FROM threads WHERE id = thread_id ) FROM comm_threads WHERE comm_id = ' + str(comm_id))
-		if not ret:
-			raise Exception("Query failed: " + query.lastError().text())
-		while query.next():
-			child_item = TreeItem(self.db, self.child_count, self)
-			self.child_items.append(child_item)
-			self.child_count += 1
-			child_item.setUpLevel2(comm_id, query.value(0), query.value(1), query.value(2))
-
-	def setUpLevel2(self, comm_id, thread_id, pid, tid):
-		self.comm_id = comm_id
-		self.thread_id = thread_id
-		self.data[0] = str(pid) + ":" + str(tid)
-
-	def getChildItem(self, row):
-		return self.child_items[row]
-
-	def getParentItem(self):
-		return self.parent_item
-
-	def getRow(self):
-		return self.row
-
-	def timePercent(self, b):
-		if not self.time:
-			return "0.0"
-		x = (b * Decimal(100)) / self.time
-		return str(x.quantize(Decimal('.1'), rounding=ROUND_HALF_UP))
-
-	def branchPercent(self, b):
-		if not self.branch_count:
-			return "0.0"
-		x = (b * Decimal(100)) / self.branch_count
-		return str(x.quantize(Decimal('.1'), rounding=ROUND_HALF_UP))
-
-	def addChild(self, call_path_id, name, dso, count, time, branch_count):
-		child_item = TreeItem(self.db, self.child_count, self)
-		child_item.comm_id = self.comm_id
-		child_item.thread_id = self.thread_id
-		child_item.call_path_id = call_path_id
-		child_item.branch_count = branch_count
-		child_item.time = time
-		child_item.data[0] = name
-		if dso == "[kernel.kallsyms]":
-			dso = "[kernel]"
-		child_item.data[1] = dso
-		child_item.data[2] = str(count)
-		child_item.data[3] = str(time)
-		child_item.data[4] = self.timePercent(time)
-		child_item.data[5] = str(branch_count)
-		child_item.data[6] = self.branchPercent(branch_count)
-		self.child_items.append(child_item)
-		self.child_count += 1
-
-	def selectCalls(self):
-		self.query_done = True;
-		query = QSqlQuery(self.db)
-		ret = query.exec_('SELECT id, call_path_id, branch_count, call_time, return_time, '
-				  '( SELECT name FROM symbols WHERE id = ( SELECT symbol_id FROM call_paths WHERE id = call_path_id ) ), '
-				  '( SELECT short_name FROM dsos WHERE id = ( SELECT dso_id FROM symbols WHERE id = ( SELECT symbol_id FROM call_paths WHERE id = call_path_id ) ) ), '
-				  '( SELECT ip FROM call_paths where id = call_path_id ) '
-				  'FROM calls WHERE parent_call_path_id = ' + str(self.call_path_id) + ' AND comm_id = ' + str(self.comm_id) + ' AND thread_id = ' + str(self.thread_id) +
-				  ' ORDER BY call_path_id')
-		if not ret:
-			raise Exception("Query failed: " + query.lastError().text())
-		last_call_path_id = 0
-		name = ""
-		dso = ""
-		count = 0
-		branch_count = 0
-		total_branch_count = 0
-		time = 0
-		total_time = 0
-		while query.next():
-			if query.value(1) == last_call_path_id:
-				count += 1
-				branch_count += query.value(2)
-				time += query.value(4) - query.value(3)
-			else:
-				if count:
-					self.addChild(last_call_path_id, name, dso, count, time, branch_count)
-				last_call_path_id = query.value(1)
-				name = query.value(5)
-				dso = query.value(6)
-				count = 1
-				total_branch_count += branch_count
-				total_time += time
-				branch_count = query.value(2)
-				time = query.value(4) - query.value(3)
-		if count:
-			self.addChild(last_call_path_id, name, dso, count, time, branch_count)
-		total_branch_count += branch_count
-		total_time += time
-		# Top level does not have time or branch count, so fix that here
-		if total_branch_count > self.branch_count:
-			self.branch_count = total_branch_count
-			if self.branch_count:
-				for child_item in self.child_items:
-					child_item.data[6] = self.branchPercent(child_item.branch_count)
-		if total_time > self.time:
-			self.time = total_time
-			if self.time:
-				for child_item in self.child_items:
-					child_item.data[4] = self.timePercent(child_item.time)
-
-	def childCount(self):
-		if not self.query_done:
-			self.selectCalls()
-		return self.child_count
-
-	def columnCount(self):
-		return 7
-
-	def columnHeader(self, column):
-		headers = ["Call Path", "Object", "Count ", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "]
-		return headers[column]
-
-	def getData(self, column):
-		return self.data[column]
-
-class TreeModel(QAbstractItemModel):
-
-	def __init__(self, db, parent=None):
-		super(TreeModel, self).__init__(parent)
-		self.db = db
-		self.root = TreeItem(db, 0, None)
-
-	def columnCount(self, parent):
-		return self.root.columnCount()
-
-	def rowCount(self, parent):
-		if parent.isValid():
-			parent_item = parent.internalPointer()
-		else:
-			parent_item = self.root
-		return parent_item.childCount()
-
-	def headerData(self, section, orientation, role):
-		if role == Qt.TextAlignmentRole:
-			if section > 1:
-				return Qt.AlignRight
-		if role != Qt.DisplayRole:
-			return None
-		if orientation != Qt.Horizontal:
-			return None
-		return self.root.columnHeader(section)
-
-	def parent(self, child):
-		child_item = child.internalPointer()
-		if child_item is self.root:
-			return QModelIndex()
-		parent_item = child_item.getParentItem()
-		return self.createIndex(parent_item.getRow(), 0, parent_item)
-
-	def index(self, row, column, parent):
-		if parent.isValid():
-			parent_item = parent.internalPointer()
-		else:
-			parent_item = self.root
-		child_item = parent_item.getChildItem(row)
-		return self.createIndex(row, column, child_item)
-
-	def data(self, index, role):
-		if role == Qt.TextAlignmentRole:
-			if index.column() > 1:
-				return Qt.AlignRight
-		if role != Qt.DisplayRole:
-			return None
-		index_item = index.internalPointer()
-		return index_item.getData(index.column())
-
-class MainWindow(QMainWindow):
-
-	def __init__(self, db, dbname, parent=None):
-		super(MainWindow, self).__init__(parent)
-
-		self.setObjectName("MainWindow")
-		self.setWindowTitle("Call Graph: " + dbname)
-		self.move(100, 100)
-		self.resize(800, 600)
-		style = self.style()
-		icon = style.standardIcon(QStyle.SP_MessageBoxInformation)
-		self.setWindowIcon(icon);
-
-		self.model = TreeModel(db)
-
-		self.view = QTreeView()
-		self.view.setModel(self.model)
-
-		self.setCentralWidget(self.view)
-
-if __name__ == '__main__':
-	if (len(sys.argv) < 2):
-		print >> sys.stderr, "Usage is: call-graph-from-sql.py <database name>"
-		raise Exception("Too few arguments")
-
-	dbname = sys.argv[1]
-
-	is_sqlite3 = False
-	try:
-		f = open(dbname)
-		if f.read(15) == "SQLite format 3":
-			is_sqlite3 = True
-		f.close()
-	except:
-		pass
-
-	if is_sqlite3:
-		db = QSqlDatabase.addDatabase('QSQLITE')
-	else:
-		db = QSqlDatabase.addDatabase('QPSQL')
-		opts = dbname.split()
-		for opt in opts:
-			if '=' in opt:
-				opt = opt.split('=')
-				if opt[0] == 'hostname':
-					db.setHostName(opt[1])
-				elif opt[0] == 'port':
-					db.setPort(int(opt[1]))
-				elif opt[0] == 'username':
-					db.setUserName(opt[1])
-				elif opt[0] == 'password':
-					db.setPassword(opt[1])
-				elif opt[0] == 'dbname':
-					dbname = opt[1]
-			else:
-				dbname = opt
-
-	db.setDatabaseName(dbname)
-	if not db.open():
-		raise Exception("Failed to open database " + dbname + " error: " + db.lastError().text())
-
-	app = QApplication(sys.argv)
-	window = MainWindow(db, dbname)
-	window.show()
-	err = app.exec_()
-	db.close()
-	sys.exit(err)
diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py
index e46f51b17513..0564dd7377f2 100644
--- a/tools/perf/scripts/python/export-to-postgresql.py
+++ b/tools/perf/scripts/python/export-to-postgresql.py
@@ -59,7 +59,7 @@ import datetime
 #	pt_example=# \q
 #
 # An example of using the database is provided by the script
-# call-graph-from-sql.py.  Refer to that script for details.
+# exported-sql-viewer.py.  Refer to that script for details.
 #
 # Tables:
 #
diff --git a/tools/perf/scripts/python/export-to-sqlite.py b/tools/perf/scripts/python/export-to-sqlite.py
index e4bb82c8aba9..245caf2643ed 100644
--- a/tools/perf/scripts/python/export-to-sqlite.py
+++ b/tools/perf/scripts/python/export-to-sqlite.py
@@ -40,7 +40,7 @@ import datetime
 #	sqlite> .quit
 #
 # An example of using the database is provided by the script
-# call-graph-from-sql.py.  Refer to that script for details.
+# exported-sql-viewer.py.  Refer to that script for details.
 #
 # The database structure is practically the same as created by the script
 # export-to-postgresql.py. Refer to that script for details.  A notable
diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py
new file mode 100755
index 000000000000..f278ce5ebab7
--- /dev/null
+++ b/tools/perf/scripts/python/exported-sql-viewer.py
@@ -0,0 +1,2615 @@
+#!/usr/bin/python2
+# SPDX-License-Identifier: GPL-2.0
+# exported-sql-viewer.py: view data from sql database
+# Copyright (c) 2014-2018, Intel Corporation.
+
+# To use this script you will need to have exported data using either the
+# export-to-sqlite.py or the export-to-postgresql.py script.  Refer to those
+# scripts for details.
+#
+# Following on from the example in the export scripts, a
+# call-graph can be displayed for the pt_example database like this:
+#
+#	python tools/perf/scripts/python/exported-sql-viewer.py pt_example
+#
+# Note that for PostgreSQL, this script supports connecting to remote databases
+# by setting hostname, port, username, password, and dbname e.g.
+#
+#	python tools/perf/scripts/python/exported-sql-viewer.py "hostname=myhost username=myuser password=mypassword dbname=pt_example"
+#
+# The result is a GUI window with a tree representing a context-sensitive
+# call-graph.  Expanding a couple of levels of the tree and adjusting column
+# widths to suit will display something like:
+#
+#                                         Call Graph: pt_example
+# Call Path                          Object      Count   Time(ns)  Time(%)  Branch Count   Branch Count(%)
+# v- ls
+#     v- 2638:2638
+#         v- _start                  ld-2.19.so    1     10074071   100.0         211135            100.0
+#           |- unknown               unknown       1        13198     0.1              1              0.0
+#           >- _dl_start             ld-2.19.so    1      1400980    13.9          19637              9.3
+#           >- _d_linit_internal     ld-2.19.so    1       448152     4.4          11094              5.3
+#           v-__libc_start_main@plt  ls            1      8211741    81.5         180397             85.4
+#              >- _dl_fixup          ld-2.19.so    1         7607     0.1            108              0.1
+#              >- __cxa_atexit       libc-2.19.so  1        11737     0.1             10              0.0
+#              >- __libc_csu_init    ls            1        10354     0.1             10              0.0
+#              |- _setjmp            libc-2.19.so  1            0     0.0              4              0.0
+#              v- main               ls            1      8182043    99.6         180254             99.9
+#
+# Points to note:
+#	The top level is a command name (comm)
+#	The next level is a thread (pid:tid)
+#	Subsequent levels are functions
+#	'Count' is the number of calls
+#	'Time' is the elapsed time until the function returns
+#	Percentages are relative to the level above
+#	'Branch Count' is the total number of branches for that function and all
+#       functions that it calls
+
+# There is also a "All branches" report, which displays branches and
+# possibly disassembly.  However, presently, the only supported disassembler is
+# Intel XED, and additionally the object code must be present in perf build ID
+# cache. To use Intel XED, libxed.so must be present. To build and install
+# libxed.so:
+#            git clone https://github.com/intelxed/mbuild.git mbuild
+#            git clone https://github.com/intelxed/xed
+#            cd xed
+#            ./mfile.py --share
+#            sudo ./mfile.py --prefix=/usr/local install
+#            sudo ldconfig
+#
+# Example report:
+#
+# Time           CPU  Command  PID    TID    Branch Type            In Tx  Branch
+# 8107675239590  2    ls       22011  22011  return from interrupt  No     ffffffff86a00a67 native_irq_return_iret ([kernel]) -> 7fab593ea260 _start (ld-2.19.so)
+#                                                                              7fab593ea260 48 89 e7                                        mov %rsp, %rdi
+# 8107675239899  2    ls       22011  22011  hardware interrupt     No         7fab593ea260 _start (ld-2.19.so) -> ffffffff86a012e0 page_fault ([kernel])
+# 8107675241900  2    ls       22011  22011  return from interrupt  No     ffffffff86a00a67 native_irq_return_iret ([kernel]) -> 7fab593ea260 _start (ld-2.19.so)
+#                                                                              7fab593ea260 48 89 e7                                        mov %rsp, %rdi
+#                                                                              7fab593ea263 e8 c8 06 00 00                                  callq  0x7fab593ea930
+# 8107675241900  2    ls       22011  22011  call                   No         7fab593ea263 _start+0x3 (ld-2.19.so) -> 7fab593ea930 _dl_start (ld-2.19.so)
+#                                                                              7fab593ea930 55                                              pushq  %rbp
+#                                                                              7fab593ea931 48 89 e5                                        mov %rsp, %rbp
+#                                                                              7fab593ea934 41 57                                           pushq  %r15
+#                                                                              7fab593ea936 41 56                                           pushq  %r14
+#                                                                              7fab593ea938 41 55                                           pushq  %r13
+#                                                                              7fab593ea93a 41 54                                           pushq  %r12
+#                                                                              7fab593ea93c 53                                              pushq  %rbx
+#                                                                              7fab593ea93d 48 89 fb                                        mov %rdi, %rbx
+#                                                                              7fab593ea940 48 83 ec 68                                     sub $0x68, %rsp
+#                                                                              7fab593ea944 0f 31                                           rdtsc
+#                                                                              7fab593ea946 48 c1 e2 20                                     shl $0x20, %rdx
+#                                                                              7fab593ea94a 89 c0                                           mov %eax, %eax
+#                                                                              7fab593ea94c 48 09 c2                                        or %rax, %rdx
+#                                                                              7fab593ea94f 48 8b 05 1a 15 22 00                            movq  0x22151a(%rip), %rax
+# 8107675242232  2    ls       22011  22011  hardware interrupt     No         7fab593ea94f _dl_start+0x1f (ld-2.19.so) -> ffffffff86a012e0 page_fault ([kernel])
+# 8107675242900  2    ls       22011  22011  return from interrupt  No     ffffffff86a00a67 native_irq_return_iret ([kernel]) -> 7fab593ea94f _dl_start+0x1f (ld-2.19.so)
+#                                                                              7fab593ea94f 48 8b 05 1a 15 22 00                            movq  0x22151a(%rip), %rax
+#                                                                              7fab593ea956 48 89 15 3b 13 22 00                            movq  %rdx, 0x22133b(%rip)
+# 8107675243232  2    ls       22011  22011  hardware interrupt     No         7fab593ea956 _dl_start+0x26 (ld-2.19.so) -> ffffffff86a012e0 page_fault ([kernel])
+
+import sys
+import weakref
+import threading
+import string
+import cPickle
+import re
+import os
+from PySide.QtCore import *
+from PySide.QtGui import *
+from PySide.QtSql import *
+from decimal import *
+from ctypes import *
+from multiprocessing import Process, Array, Value, Event
+
+# Data formatting helpers
+
+def tohex(ip):
+	if ip < 0:
+		ip += 1 << 64
+	return "%x" % ip
+
+def offstr(offset):
+	if offset:
+		return "+0x%x" % offset
+	return ""
+
+def dsoname(name):
+	if name == "[kernel.kallsyms]":
+		return "[kernel]"
+	return name
+
+def findnth(s, sub, n, offs=0):
+	pos = s.find(sub)
+	if pos < 0:
+		return pos
+	if n <= 1:
+		return offs + pos
+	return findnth(s[pos + 1:], sub, n - 1, offs + pos + 1)
+
+# Percent to one decimal place
+
+def PercentToOneDP(n, d):
+	if not d:
+		return "0.0"
+	x = (n * Decimal(100)) / d
+	return str(x.quantize(Decimal(".1"), rounding=ROUND_HALF_UP))
+
+# Helper for queries that must not fail
+
+def QueryExec(query, stmt):
+	ret = query.exec_(stmt)
+	if not ret:
+		raise Exception("Query failed: " + query.lastError().text())
+
+# Background thread
+
+class Thread(QThread):
+
+	done = Signal(object)
+
+	def __init__(self, task, param=None, parent=None):
+		super(Thread, self).__init__(parent)
+		self.task = task
+		self.param = param
+
+	def run(self):
+		while True:
+			if self.param is None:
+				done, result = self.task()
+			else:
+				done, result = self.task(self.param)
+			self.done.emit(result)
+			if done:
+				break
+
+# Tree data model
+
+class TreeModel(QAbstractItemModel):
+
+	def __init__(self, root, parent=None):
+		super(TreeModel, self).__init__(parent)
+		self.root = root
+		self.last_row_read = 0
+
+	def Item(self, parent):
+		if parent.isValid():
+			return parent.internalPointer()
+		else:
+			return self.root
+
+	def rowCount(self, parent):
+		result = self.Item(parent).childCount()
+		if result < 0:
+			result = 0
+			self.dataChanged.emit(parent, parent)
+		return result
+
+	def hasChildren(self, parent):
+		return self.Item(parent).hasChildren()
+
+	def headerData(self, section, orientation, role):
+		if role == Qt.TextAlignmentRole:
+			return self.columnAlignment(section)
+		if role != Qt.DisplayRole:
+			return None
+		if orientation != Qt.Horizontal:
+			return None
+		return self.columnHeader(section)
+
+	def parent(self, child):
+		child_item = child.internalPointer()
+		if child_item is self.root:
+			return QModelIndex()
+		parent_item = child_item.getParentItem()
+		return self.createIndex(parent_item.getRow(), 0, parent_item)
+
+	def index(self, row, column, parent):
+		child_item = self.Item(parent).getChildItem(row)
+		return self.createIndex(row, column, child_item)
+
+	def DisplayData(self, item, index):
+		return item.getData(index.column())
+
+	def FetchIfNeeded(self, row):
+		if row > self.last_row_read:
+			self.last_row_read = row
+			if row + 10 >= self.root.child_count:
+				self.fetcher.Fetch(glb_chunk_sz)
+
+	def columnAlignment(self, column):
+		return Qt.AlignLeft
+
+	def columnFont(self, column):
+		return None
+
+	def data(self, index, role):
+		if role == Qt.TextAlignmentRole:
+			return self.columnAlignment(index.column())
+		if role == Qt.FontRole:
+			return self.columnFont(index.column())
+		if role != Qt.DisplayRole:
+			return None
+		item = index.internalPointer()
+		return self.DisplayData(item, index)
+
+# Table data model
+
+class TableModel(QAbstractTableModel):
+
+	def __init__(self, parent=None):
+		super(TableModel, self).__init__(parent)
+		self.child_count = 0
+		self.child_items = []
+		self.last_row_read = 0
+
+	def Item(self, parent):
+		if parent.isValid():
+			return parent.internalPointer()
+		else:
+			return self
+
+	def rowCount(self, parent):
+		return self.child_count
+
+	def headerData(self, section, orientation, role):
+		if role == Qt.TextAlignmentRole:
+			return self.columnAlignment(section)
+		if role != Qt.DisplayRole:
+			return None
+		if orientation != Qt.Horizontal:
+			return None
+		return self.columnHeader(section)
+
+	def index(self, row, column, parent):
+		return self.createIndex(row, column, self.child_items[row])
+
+	def DisplayData(self, item, index):
+		return item.getData(index.column())
+
+	def FetchIfNeeded(self, row):
+		if row > self.last_row_read:
+			self.last_row_read = row
+			if row + 10 >= self.child_count:
+				self.fetcher.Fetch(glb_chunk_sz)
+
+	def columnAlignment(self, column):
+		return Qt.AlignLeft
+
+	def columnFont(self, column):
+		return None
+
+	def data(self, index, role):
+		if role == Qt.TextAlignmentRole:
+			return self.columnAlignment(index.column())
+		if role == Qt.FontRole:
+			return self.columnFont(index.column())
+		if role != Qt.DisplayRole:
+			return None
+		item = index.internalPointer()
+		return self.DisplayData(item, index)
+
+# Model cache
+
+model_cache = weakref.WeakValueDictionary()
+model_cache_lock = threading.Lock()
+
+def LookupCreateModel(model_name, create_fn):
+	model_cache_lock.acquire()
+	try:
+		model = model_cache[model_name]
+	except:
+		model = None
+	if model is None:
+		model = create_fn()
+		model_cache[model_name] = model
+	model_cache_lock.release()
+	return model
+
+# Find bar
+
+class FindBar():
+
+	def __init__(self, parent, finder, is_reg_expr=False):
+		self.finder = finder
+		self.context = []
+		self.last_value = None
+		self.last_pattern = None
+
+		label = QLabel("Find:")
+		label.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed)
+
+		self.textbox = QComboBox()
+		self.textbox.setEditable(True)
+		self.textbox.currentIndexChanged.connect(self.ValueChanged)
+
+		self.progress = QProgressBar()
+		self.progress.setRange(0, 0)
+		self.progress.hide()
+
+		if is_reg_expr:
+			self.pattern = QCheckBox("Regular Expression")
+		else:
+			self.pattern = QCheckBox("Pattern")
+		self.pattern.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed)
+
+		self.next_button = QToolButton()
+		self.next_button.setIcon(parent.style().standardIcon(QStyle.SP_ArrowDown))
+		self.next_button.released.connect(lambda: self.NextPrev(1))
+
+		self.prev_button = QToolButton()
+		self.prev_button.setIcon(parent.style().standardIcon(QStyle.SP_ArrowUp))
+		self.prev_button.released.connect(lambda: self.NextPrev(-1))
+
+		self.close_button = QToolButton()
+		self.close_button.setIcon(parent.style().standardIcon(QStyle.SP_DockWidgetCloseButton))
+		self.close_button.released.connect(self.Deactivate)
+
+		self.hbox = QHBoxLayout()
+		self.hbox.setContentsMargins(0, 0, 0, 0)
+
+		self.hbox.addWidget(label)
+		self.hbox.addWidget(self.textbox)
+		self.hbox.addWidget(self.progress)
+		self.hbox.addWidget(self.pattern)
+		self.hbox.addWidget(self.next_button)
+		self.hbox.addWidget(self.prev_button)
+		self.hbox.addWidget(self.close_button)
+
+		self.bar = QWidget()
+		self.bar.setLayout(self.hbox);
+		self.bar.hide()
+
+	def Widget(self):
+		return self.bar
+
+	def Activate(self):
+		self.bar.show()
+		self.textbox.setFocus()
+
+	def Deactivate(self):
+		self.bar.hide()
+
+	def Busy(self):
+		self.textbox.setEnabled(False)
+		self.pattern.hide()
+		self.next_button.hide()
+		self.prev_button.hide()
+		self.progress.show()
+
+	def Idle(self):
+		self.textbox.setEnabled(True)
+		self.progress.hide()
+		self.pattern.show()
+		self.next_button.show()
+		self.prev_button.show()
+
+	def Find(self, direction):
+		value = self.textbox.currentText()
+		pattern = self.pattern.isChecked()
+		self.last_value = value
+		self.last_pattern = pattern
+		self.finder.Find(value, direction, pattern, self.context)
+
+	def ValueChanged(self):
+		value = self.textbox.currentText()
+		pattern = self.pattern.isChecked()
+		index = self.textbox.currentIndex()
+		data = self.textbox.itemData(index)
+		# Store the pattern in the combo box to keep it with the text value
+		if data == None:
+			self.textbox.setItemData(index, pattern)
+		else:
+			self.pattern.setChecked(data)
+		self.Find(0)
+
+	def NextPrev(self, direction):
+		value = self.textbox.currentText()
+		pattern = self.pattern.isChecked()
+		if value != self.last_value:
+			index = self.textbox.findText(value)
+			# Allow for a button press before the value has been added to the combo box
+			if index < 0:
+				index = self.textbox.count()
+				self.textbox.addItem(value, pattern)
+				self.textbox.setCurrentIndex(index)
+				return
+			else:
+				self.textbox.setItemData(index, pattern)
+		elif pattern != self.last_pattern:
+			# Keep the pattern recorded in the combo box up to date
+			index = self.textbox.currentIndex()
+			self.textbox.setItemData(index, pattern)
+		self.Find(direction)
+
+	def NotFound(self):
+		QMessageBox.information(self.bar, "Find", "'" + self.textbox.currentText() + "' not found")
+
+# Context-sensitive call graph data model item base
+
+class CallGraphLevelItemBase(object):
+
+	def __init__(self, glb, row, parent_item):
+		self.glb = glb
+		self.row = row
+		self.parent_item = parent_item
+		self.query_done = False;
+		self.child_count = 0
+		self.child_items = []
+
+	def getChildItem(self, row):
+		return self.child_items[row]
+
+	def getParentItem(self):
+		return self.parent_item
+
+	def getRow(self):
+		return self.row
+
+	def childCount(self):
+		if not self.query_done:
+			self.Select()
+			if not self.child_count:
+				return -1
+		return self.child_count
+
+	def hasChildren(self):
+		if not self.query_done:
+			return True
+		return self.child_count > 0
+
+	def getData(self, column):
+		return self.data[column]
+
+# Context-sensitive call graph data model level 2+ item base
+
+class CallGraphLevelTwoPlusItemBase(CallGraphLevelItemBase):
+
+	def __init__(self, glb, row, comm_id, thread_id, call_path_id, time, branch_count, parent_item):
+		super(CallGraphLevelTwoPlusItemBase, self).__init__(glb, row, parent_item)
+		self.comm_id = comm_id
+		self.thread_id = thread_id
+		self.call_path_id = call_path_id
+		self.branch_count = branch_count
+		self.time = time
+
+	def Select(self):
+		self.query_done = True;
+		query = QSqlQuery(self.glb.db)
+		QueryExec(query, "SELECT call_path_id, name, short_name, COUNT(calls.id), SUM(return_time - call_time), SUM(branch_count)"
+					" FROM calls"
+					" INNER JOIN call_paths ON calls.call_path_id = call_paths.id"
+					" INNER JOIN symbols ON call_paths.symbol_id = symbols.id"
+					" INNER JOIN dsos ON symbols.dso_id = dsos.id"
+					" WHERE parent_call_path_id = " + str(self.call_path_id) +
+					" AND comm_id = " + str(self.comm_id) +
+					" AND thread_id = " + str(self.thread_id) +
+					" GROUP BY call_path_id, name, short_name"
+					" ORDER BY call_path_id")
+		while query.next():
+			child_item = CallGraphLevelThreeItem(self.glb, self.child_count, self.comm_id, self.thread_id, query.value(0), query.value(1), query.value(2), query.value(3), int(query.value(4)), int(query.value(5)), self)
+			self.child_items.append(child_item)
+			self.child_count += 1
+
+# Context-sensitive call graph data model level three item
+
+class CallGraphLevelThreeItem(CallGraphLevelTwoPlusItemBase):
+
+	def __init__(self, glb, row, comm_id, thread_id, call_path_id, name, dso, count, time, branch_count, parent_item):
+		super(CallGraphLevelThreeItem, self).__init__(glb, row, comm_id, thread_id, call_path_id, time, branch_count, parent_item)
+		dso = dsoname(dso)
+		self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(branch_count), PercentToOneDP(branch_count, parent_item.branch_count) ]
+		self.dbid = call_path_id
+
+# Context-sensitive call graph data model level two item
+
+class CallGraphLevelTwoItem(CallGraphLevelTwoPlusItemBase):
+
+	def __init__(self, glb, row, comm_id, thread_id, pid, tid, parent_item):
+		super(CallGraphLevelTwoItem, self).__init__(glb, row, comm_id, thread_id, 1, 0, 0, parent_item)
+		self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", ""]
+		self.dbid = thread_id
+
+	def Select(self):
+		super(CallGraphLevelTwoItem, self).Select()
+		for child_item in self.child_items:
+			self.time += child_item.time
+			self.branch_count += child_item.branch_count
+		for child_item in self.child_items:
+			child_item.data[4] = PercentToOneDP(child_item.time, self.time)
+			child_item.data[6] = PercentToOneDP(child_item.branch_count, self.branch_count)
+
+# Context-sensitive call graph data model level one item
+
+class CallGraphLevelOneItem(CallGraphLevelItemBase):
+
+	def __init__(self, glb, row, comm_id, comm, parent_item):
+		super(CallGraphLevelOneItem, self).__init__(glb, row, parent_item)
+		self.data = [comm, "", "", "", "", "", ""]
+		self.dbid = comm_id
+
+	def Select(self):
+		self.query_done = True;
+		query = QSqlQuery(self.glb.db)
+		QueryExec(query, "SELECT thread_id, pid, tid"
+					" FROM comm_threads"
+					" INNER JOIN threads ON thread_id = threads.id"
+					" WHERE comm_id = " + str(self.dbid))
+		while query.next():
+			child_item = CallGraphLevelTwoItem(self.glb, self.child_count, self.dbid, query.value(0), query.value(1), query.value(2), self)
+			self.child_items.append(child_item)
+			self.child_count += 1
+
+# Context-sensitive call graph data model root item
+
+class CallGraphRootItem(CallGraphLevelItemBase):
+
+	def __init__(self, glb):
+		super(CallGraphRootItem, self).__init__(glb, 0, None)
+		self.dbid = 0
+		self.query_done = True;
+		query = QSqlQuery(glb.db)
+		QueryExec(query, "SELECT id, comm FROM comms")
+		while query.next():
+			if not query.value(0):
+				continue
+			child_item = CallGraphLevelOneItem(glb, self.child_count, query.value(0), query.value(1), self)
+			self.child_items.append(child_item)
+			self.child_count += 1
+
+# Context-sensitive call graph data model
+
+class CallGraphModel(TreeModel):
+
+	def __init__(self, glb, parent=None):
+		super(CallGraphModel, self).__init__(CallGraphRootItem(glb), parent)
+		self.glb = glb
+
+	def columnCount(self, parent=None):
+		return 7
+
+	def columnHeader(self, column):
+		headers = ["Call Path", "Object", "Count ", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "]
+		return headers[column]
+
+	def columnAlignment(self, column):
+		alignment = [ Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight ]
+		return alignment[column]
+
+	def FindSelect(self, value, pattern, query):
+		if pattern:
+			# postgresql and sqlite pattern patching differences:
+			#   postgresql LIKE is case sensitive but sqlite LIKE is not
+			#   postgresql LIKE allows % and _ to be escaped with \ but sqlite LIKE does not
+			#   postgresql supports ILIKE which is case insensitive
+			#   sqlite supports GLOB (text only) which uses * and ? and is case sensitive
+			if not self.glb.dbref.is_sqlite3:
+				# Escape % and _
+				s = value.replace("%", "\%")
+				s = s.replace("_", "\_")
+				# Translate * and ? into SQL LIKE pattern characters % and _
+				trans = string.maketrans("*?", "%_")
+				match = " LIKE '" + str(s).translate(trans) + "'"
+			else:
+				match = " GLOB '" + str(value) + "'"
+		else:
+			match = " = '" + str(value) + "'"
+		QueryExec(query, "SELECT call_path_id, comm_id, thread_id"
+						" FROM calls"
+						" INNER JOIN call_paths ON calls.call_path_id = call_paths.id"
+						" INNER JOIN symbols ON call_paths.symbol_id = symbols.id"
+						" WHERE symbols.name" + match +
+						" GROUP BY comm_id, thread_id, call_path_id"
+						" ORDER BY comm_id, thread_id, call_path_id")
+
+	def FindPath(self, query):
+		# Turn the query result into a list of ids that the tree view can walk
+		# to open the tree at the right place.
+		ids = []
+		parent_id = query.value(0)
+		while parent_id:
+			ids.insert(0, parent_id)
+			q2 = QSqlQuery(self.glb.db)
+			QueryExec(q2, "SELECT parent_id"
+					" FROM call_paths"
+					" WHERE id = " + str(parent_id))
+			if not q2.next():
+				break
+			parent_id = q2.value(0)
+		# The call path root is not used
+		if ids[0] == 1:
+			del ids[0]
+		ids.insert(0, query.value(2))
+		ids.insert(0, query.value(1))
+		return ids
+
+	def Found(self, query, found):
+		if found:
+			return self.FindPath(query)
+		return []
+
+	def FindValue(self, value, pattern, query, last_value, last_pattern):
+		if last_value == value and pattern == last_pattern:
+			found = query.first()
+		else:
+			self.FindSelect(value, pattern, query)
+			found = query.next()
+		return self.Found(query, found)
+
+	def FindNext(self, query):
+		found = query.next()
+		if not found:
+			found = query.first()
+		return self.Found(query, found)
+
+	def FindPrev(self, query):
+		found = query.previous()
+		if not found:
+			found = query.last()
+		return self.Found(query, found)
+
+	def FindThread(self, c):
+		if c.direction == 0 or c.value != c.last_value or c.pattern != c.last_pattern:
+			ids = self.FindValue(c.value, c.pattern, c.query, c.last_value, c.last_pattern)
+		elif c.direction > 0:
+			ids = self.FindNext(c.query)
+		else:
+			ids = self.FindPrev(c.query)
+		return (True, ids)
+
+	def Find(self, value, direction, pattern, context, callback):
+		class Context():
+			def __init__(self, *x):
+				self.value, self.direction, self.pattern, self.query, self.last_value, self.last_pattern = x
+			def Update(self, *x):
+				self.value, self.direction, self.pattern, self.last_value, self.last_pattern = x + (self.value, self.pattern)
+		if len(context):
+			context[0].Update(value, direction, pattern)
+		else:
+			context.append(Context(value, direction, pattern, QSqlQuery(self.glb.db), None, None))
+		# Use a thread so the UI is not blocked during the SELECT
+		thread = Thread(self.FindThread, context[0])
+		thread.done.connect(lambda ids, t=thread, c=callback: self.FindDone(t, c, ids), Qt.QueuedConnection)
+		thread.start()
+
+	def FindDone(self, thread, callback, ids):
+		callback(ids)
+
+# Vertical widget layout
+
+class VBox():
+
+	def __init__(self, w1, w2, w3=None):
+		self.vbox = QWidget()
+		self.vbox.setLayout(QVBoxLayout());
+
+		self.vbox.layout().setContentsMargins(0, 0, 0, 0)
+
+		self.vbox.layout().addWidget(w1)
+		self.vbox.layout().addWidget(w2)
+		if w3:
+			self.vbox.layout().addWidget(w3)
+
+	def Widget(self):
+		return self.vbox
+
+# Context-sensitive call graph window
+
+class CallGraphWindow(QMdiSubWindow):
+
+	def __init__(self, glb, parent=None):
+		super(CallGraphWindow, self).__init__(parent)
+
+		self.model = LookupCreateModel("Context-Sensitive Call Graph", lambda x=glb: CallGraphModel(x))
+
+		self.view = QTreeView()
+		self.view.setModel(self.model)
+
+		for c, w in ((0, 250), (1, 100), (2, 60), (3, 70), (4, 70), (5, 100)):
+			self.view.setColumnWidth(c, w)
+
+		self.find_bar = FindBar(self, self)
+
+		self.vbox = VBox(self.view, self.find_bar.Widget())
+
+		self.setWidget(self.vbox.Widget())
+
+		AddSubWindow(glb.mainwindow.mdi_area, self, "Context-Sensitive Call Graph")
+
+	def DisplayFound(self, ids):
+		if not len(ids):
+			return False
+		parent = QModelIndex()
+		for dbid in ids:
+			found = False
+			n = self.model.rowCount(parent)
+			for row in xrange(n):
+				child = self.model.index(row, 0, parent)
+				if child.internalPointer().dbid == dbid:
+					found = True
+					self.view.setCurrentIndex(child)
+					parent = child
+					break
+			if not found:
+				break
+		return found
+
+	def Find(self, value, direction, pattern, context):
+		self.view.setFocus()
+		self.find_bar.Busy()
+		self.model.Find(value, direction, pattern, context, self.FindDone)
+
+	def FindDone(self, ids):
+		found = True
+		if not self.DisplayFound(ids):
+			found = False
+		self.find_bar.Idle()
+		if not found:
+			self.find_bar.NotFound()
+
+# Child data item  finder
+
+class ChildDataItemFinder():
+
+	def __init__(self, root):
+		self.root = root
+		self.value, self.direction, self.pattern, self.last_value, self.last_pattern = (None,) * 5
+		self.rows = []
+		self.pos = 0
+
+	def FindSelect(self):
+		self.rows = []
+		if self.pattern:
+			pattern = re.compile(self.value)
+			for child in self.root.child_items:
+				for column_data in child.data:
+					if re.search(pattern, str(column_data)) is not None:
+						self.rows.append(child.row)
+						break
+		else:
+			for child in self.root.child_items:
+				for column_data in child.data:
+					if self.value in str(column_data):
+						self.rows.append(child.row)
+						break
+
+	def FindValue(self):
+		self.pos = 0
+		if self.last_value != self.value or self.pattern != self.last_pattern:
+			self.FindSelect()
+		if not len(self.rows):
+			return -1
+		return self.rows[self.pos]
+
+	def FindThread(self):
+		if self.direction == 0 or self.value != self.last_value or self.pattern != self.last_pattern:
+			row = self.FindValue()
+		elif len(self.rows):
+			if self.direction > 0:
+				self.pos += 1
+				if self.pos >= len(self.rows):
+					self.pos = 0
+			else:
+				self.pos -= 1
+				if self.pos < 0:
+					self.pos = len(self.rows) - 1
+			row = self.rows[self.pos]
+		else:
+			row = -1
+		return (True, row)
+
+	def Find(self, value, direction, pattern, context, callback):
+		self.value, self.direction, self.pattern, self.last_value, self.last_pattern = (value, direction,pattern, self.value, self.pattern)
+		# Use a thread so the UI is not blocked
+		thread = Thread(self.FindThread)
+		thread.done.connect(lambda row, t=thread, c=callback: self.FindDone(t, c, row), Qt.QueuedConnection)
+		thread.start()
+
+	def FindDone(self, thread, callback, row):
+		callback(row)
+
+# Number of database records to fetch in one go
+
+glb_chunk_sz = 10000
+
+# size of pickled integer big enough for record size
+
+glb_nsz = 8
+
+# Background process for SQL data fetcher
+
+class SQLFetcherProcess():
+
+	def __init__(self, dbref, sql, buffer, head, tail, fetch_count, fetching_done, process_target, wait_event, fetched_event, prep):
+		# Need a unique connection name
+		conn_name = "SQLFetcher" + str(os.getpid())
+		self.db, dbname = dbref.Open(conn_name)
+		self.sql = sql
+		self.buffer = buffer
+		self.head = head
+		self.tail = tail
+		self.fetch_count = fetch_count
+		self.fetching_done = fetching_done
+		self.process_target = process_target
+		self.wait_event = wait_event
+		self.fetched_event = fetched_event
+		self.prep = prep
+		self.query = QSqlQuery(self.db)
+		self.query_limit = 0 if "$$last_id$$" in sql else 2
+		self.last_id = -1
+		self.fetched = 0
+		self.more = True
+		self.local_head = self.head.value
+		self.local_tail = self.tail.value
+
+	def Select(self):
+		if self.query_limit:
+			if self.query_limit == 1:
+				return
+			self.query_limit -= 1
+		stmt = self.sql.replace("$$last_id$$", str(self.last_id))
+		QueryExec(self.query, stmt)
+
+	def Next(self):
+		if not self.query.next():
+			self.Select()
+			if not self.query.next():
+				return None
+		self.last_id = self.query.value(0)
+		return self.prep(self.query)
+
+	def WaitForTarget(self):
+		while True:
+			self.wait_event.clear()
+			target = self.process_target.value
+			if target > self.fetched or target < 0:
+				break
+			self.wait_event.wait()
+		return target
+
+	def HasSpace(self, sz):
+		if self.local_tail <= self.local_head:
+			space = len(self.buffer) - self.local_head
+			if space > sz:
+				return True
+			if space >= glb_nsz:
+				# Use 0 (or space < glb_nsz) to mean there is no more at the top of the buffer
+				nd = cPickle.dumps(0, cPickle.HIGHEST_PROTOCOL)
+				self.buffer[self.local_head : self.local_head + len(nd)] = nd
+			self.local_head = 0
+		if self.local_tail - self.local_head > sz:
+			return True
+		return False
+
+	def WaitForSpace(self, sz):
+		if self.HasSpace(sz):
+			return
+		while True:
+			self.wait_event.clear()
+			self.local_tail = self.tail.value
+			if self.HasSpace(sz):
+				return
+			self.wait_event.wait()
+
+	def AddToBuffer(self, obj):
+		d = cPickle.dumps(obj, cPickle.HIGHEST_PROTOCOL)
+		n = len(d)
+		nd = cPickle.dumps(n, cPickle.HIGHEST_PROTOCOL)
+		sz = n + glb_nsz
+		self.WaitForSpace(sz)
+		pos = self.local_head
+		self.buffer[pos : pos + len(nd)] = nd
+		self.buffer[pos + glb_nsz : pos + sz] = d
+		self.local_head += sz
+
+	def FetchBatch(self, batch_size):
+		fetched = 0
+		while batch_size > fetched:
+			obj = self.Next()
+			if obj is None:
+				self.more = False
+				break
+			self.AddToBuffer(obj)
+			fetched += 1
+		if fetched:
+			self.fetched += fetched
+			with self.fetch_count.get_lock():
+				self.fetch_count.value += fetched
+			self.head.value = self.local_head
+			self.fetched_event.set()
+
+	def Run(self):
+		while self.more:
+			target = self.WaitForTarget()
+			if target < 0:
+				break
+			batch_size = min(glb_chunk_sz, target - self.fetched)
+			self.FetchBatch(batch_size)
+		self.fetching_done.value = True
+		self.fetched_event.set()
+
+def SQLFetcherFn(*x):
+	process = SQLFetcherProcess(*x)
+	process.Run()
+
+# SQL data fetcher
+
+class SQLFetcher(QObject):
+
+	done = Signal(object)
+
+	def __init__(self, glb, sql, prep, process_data, parent=None):
+		super(SQLFetcher, self).__init__(parent)
+		self.process_data = process_data
+		self.more = True
+		self.target = 0
+		self.last_target = 0
+		self.fetched = 0
+		self.buffer_size = 16 * 1024 * 1024
+		self.buffer = Array(c_char, self.buffer_size, lock=False)
+		self.head = Value(c_longlong)
+		self.tail = Value(c_longlong)
+		self.local_tail = 0
+		self.fetch_count = Value(c_longlong)
+		self.fetching_done = Value(c_bool)
+		self.last_count = 0
+		self.process_target = Value(c_longlong)
+		self.wait_event = Event()
+		self.fetched_event = Event()
+		glb.AddInstanceToShutdownOnExit(self)
+		self.process = Process(target=SQLFetcherFn, args=(glb.dbref, sql, self.buffer, self.head, self.tail, self.fetch_count, self.fetching_done, self.process_target, self.wait_event, self.fetched_event, prep))
+		self.process.start()
+		self.thread = Thread(self.Thread)
+		self.thread.done.connect(self.ProcessData, Qt.QueuedConnection)
+		self.thread.start()
+
+	def Shutdown(self):
+		# Tell the thread and process to exit
+		self.process_target.value = -1
+		self.wait_event.set()
+		self.more = False
+		self.fetching_done.value = True
+		self.fetched_event.set()
+
+	def Thread(self):
+		if not self.more:
+			return True, 0
+		while True:
+			self.fetched_event.clear()
+			fetch_count = self.fetch_count.value
+			if fetch_count != self.last_count:
+				break
+			if self.fetching_done.value:
+				self.more = False
+				return True, 0
+			self.fetched_event.wait()
+		count = fetch_count - self.last_count
+		self.last_count = fetch_count
+		self.fetched += count
+		return False, count
+
+	def Fetch(self, nr):
+		if not self.more:
+			# -1 inidcates there are no more
+			return -1
+		result = self.fetched
+		extra = result + nr - self.target
+		if extra > 0:
+			self.target += extra
+			# process_target < 0 indicates shutting down
+			if self.process_target.value >= 0:
+				self.process_target.value = self.target
+			self.wait_event.set()
+		return result
+
+	def RemoveFromBuffer(self):
+		pos = self.local_tail
+		if len(self.buffer) - pos < glb_nsz:
+			pos = 0
+		n = cPickle.loads(self.buffer[pos : pos + glb_nsz])
+		if n == 0:
+			pos = 0
+			n = cPickle.loads(self.buffer[0 : glb_nsz])
+		pos += glb_nsz
+		obj = cPickle.loads(self.buffer[pos : pos + n])
+		self.local_tail = pos + n
+		return obj
+
+	def ProcessData(self, count):
+		for i in xrange(count):
+			obj = self.RemoveFromBuffer()
+			self.process_data(obj)
+		self.tail.value = self.local_tail
+		self.wait_event.set()
+		self.done.emit(count)
+
+# Fetch more records bar
+
+class FetchMoreRecordsBar():
+
+	def __init__(self, model, parent):
+		self.model = model
+
+		self.label = QLabel("Number of records (x " + "{:,}".format(glb_chunk_sz) + ") to fetch:")
+		self.label.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed)
+
+		self.fetch_count = QSpinBox()
+		self.fetch_count.setRange(1, 1000000)
+		self.fetch_count.setValue(10)
+		self.fetch_count.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed)
+
+		self.fetch = QPushButton("Go!")
+		self.fetch.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed)
+		self.fetch.released.connect(self.FetchMoreRecords)
+
+		self.progress = QProgressBar()
+		self.progress.setRange(0, 100)
+		self.progress.hide()
+
+		self.done_label = QLabel("All records fetched")
+		self.done_label.hide()
+
+		self.spacer = QLabel("")
+
+		self.close_button = QToolButton()
+		self.close_button.setIcon(parent.style().standardIcon(QStyle.SP_DockWidgetCloseButton))
+		self.close_button.released.connect(self.Deactivate)
+
+		self.hbox = QHBoxLayout()
+		self.hbox.setContentsMargins(0, 0, 0, 0)
+
+		self.hbox.addWidget(self.label)
+		self.hbox.addWidget(self.fetch_count)
+		self.hbox.addWidget(self.fetch)
+		self.hbox.addWidget(self.spacer)
+		self.hbox.addWidget(self.progress)
+		self.hbox.addWidget(self.done_label)
+		self.hbox.addWidget(self.close_button)
+
+		self.bar = QWidget()
+		self.bar.setLayout(self.hbox);
+		self.bar.show()
+
+		self.in_progress = False
+		self.model.progress.connect(self.Progress)
+
+		self.done = False
+
+		if not model.HasMoreRecords():
+			self.Done()
+
+	def Widget(self):
+		return self.bar
+
+	def Activate(self):
+		self.bar.show()
+		self.fetch.setFocus()
+
+	def Deactivate(self):
+		self.bar.hide()
+
+	def Enable(self, enable):
+		self.fetch.setEnabled(enable)
+		self.fetch_count.setEnabled(enable)
+
+	def Busy(self):
+		self.Enable(False)
+		self.fetch.hide()
+		self.spacer.hide()
+		self.progress.show()
+
+	def Idle(self):
+		self.in_progress = False
+		self.Enable(True)
+		self.progress.hide()
+		self.fetch.show()
+		self.spacer.show()
+
+	def Target(self):
+		return self.fetch_count.value() * glb_chunk_sz
+
+	def Done(self):
+		self.done = True
+		self.Idle()
+		self.label.hide()
+		self.fetch_count.hide()
+		self.fetch.hide()
+		self.spacer.hide()
+		self.done_label.show()
+
+	def Progress(self, count):
+		if self.in_progress:
+			if count:
+				percent = ((count - self.start) * 100) / self.Target()
+				if percent >= 100:
+					self.Idle()
+				else:
+					self.progress.setValue(percent)
+		if not count:
+			# Count value of zero means no more records
+			self.Done()
+
+	def FetchMoreRecords(self):
+		if self.done:
+			return
+		self.progress.setValue(0)
+		self.Busy()
+		self.in_progress = True
+		self.start = self.model.FetchMoreRecords(self.Target())
+
+# Brance data model level two item
+
+class BranchLevelTwoItem():
+
+	def __init__(self, row, text, parent_item):
+		self.row = row
+		self.parent_item = parent_item
+		self.data = [""] * 8
+		self.data[7] = text
+		self.level = 2
+
+	def getParentItem(self):
+		return self.parent_item
+
+	def getRow(self):
+		return self.row
+
+	def childCount(self):
+		return 0
+
+	def hasChildren(self):
+		return False
+
+	def getData(self, column):
+		return self.data[column]
+
+# Brance data model level one item
+
+class BranchLevelOneItem():
+
+	def __init__(self, glb, row, data, parent_item):
+		self.glb = glb
+		self.row = row
+		self.parent_item = parent_item
+		self.child_count = 0
+		self.child_items = []
+		self.data = data[1:]
+		self.dbid = data[0]
+		self.level = 1
+		self.query_done = False
+
+	def getChildItem(self, row):
+		return self.child_items[row]
+
+	def getParentItem(self):
+		return self.parent_item
+
+	def getRow(self):
+		return self.row
+
+	def Select(self):
+		self.query_done = True
+
+		if not self.glb.have_disassembler:
+			return
+
+		query = QSqlQuery(self.glb.db)
+
+		QueryExec(query, "SELECT cpu, to_dso_id, to_symbol_id, to_sym_offset, short_name, long_name, build_id, sym_start, to_ip"
+				  " FROM samples"
+				  " INNER JOIN dsos ON samples.to_dso_id = dsos.id"
+				  " INNER JOIN symbols ON samples.to_symbol_id = symbols.id"
+				  " WHERE samples.id = " + str(self.dbid))
+		if not query.next():
+			return
+		cpu = query.value(0)
+		dso = query.value(1)
+		sym = query.value(2)
+		if dso == 0 or sym == 0:
+			return
+		off = query.value(3)
+		short_name = query.value(4)
+		long_name = query.value(5)
+		build_id = query.value(6)
+		sym_start = query.value(7)
+		ip = query.value(8)
+
+		QueryExec(query, "SELECT samples.dso_id, symbol_id, sym_offset, sym_start"
+				  " FROM samples"
+				  " INNER JOIN symbols ON samples.symbol_id = symbols.id"
+				  " WHERE samples.id > " + str(self.dbid) + " AND cpu = " + str(cpu) +
+				  " ORDER BY samples.id"
+				  " LIMIT 1")
+		if not query.next():
+			return
+		if query.value(0) != dso:
+			# Cannot disassemble from one dso to another
+			return
+		bsym = query.value(1)
+		boff = query.value(2)
+		bsym_start = query.value(3)
+		if bsym == 0:
+			return
+		tot = bsym_start + boff + 1 - sym_start - off
+		if tot <= 0 or tot > 16384:
+			return
+
+		inst = self.glb.disassembler.Instruction()
+		f = self.glb.FileFromNamesAndBuildId(short_name, long_name, build_id)
+		if not f:
+			return
+		mode = 0 if Is64Bit(f) else 1
+		self.glb.disassembler.SetMode(inst, mode)
+
+		buf_sz = tot + 16
+		buf = create_string_buffer(tot + 16)
+		f.seek(sym_start + off)
+		buf.value = f.read(buf_sz)
+		buf_ptr = addressof(buf)
+		i = 0
+		while tot > 0:
+			cnt, text = self.glb.disassembler.DisassembleOne(inst, buf_ptr, buf_sz, ip)
+			if cnt:
+				byte_str = tohex(ip).rjust(16)
+				for k in xrange(cnt):
+					byte_str += " %02x" % ord(buf[i])
+					i += 1
+				while k < 15:
+					byte_str += "   "
+					k += 1
+				self.child_items.append(BranchLevelTwoItem(0, byte_str + " " + text, self))
+				self.child_count += 1
+			else:
+				return
+			buf_ptr += cnt
+			tot -= cnt
+			buf_sz -= cnt
+			ip += cnt
+
+	def childCount(self):
+		if not self.query_done:
+			self.Select()
+			if not self.child_count:
+				return -1
+		return self.child_count
+
+	def hasChildren(self):
+		if not self.query_done:
+			return True
+		return self.child_count > 0
+
+	def getData(self, column):
+		return self.data[column]
+
+# Brance data model root item
+
+class BranchRootItem():
+
+	def __init__(self):
+		self.child_count = 0
+		self.child_items = []
+		self.level = 0
+
+	def getChildItem(self, row):
+		return self.child_items[row]
+
+	def getParentItem(self):
+		return None
+
+	def getRow(self):
+		return 0
+
+	def childCount(self):
+		return self.child_count
+
+	def hasChildren(self):
+		return self.child_count > 0
+
+	def getData(self, column):
+		return ""
+
+# Branch data preparation
+
+def BranchDataPrep(query):
+	data = []
+	for i in xrange(0, 8):
+		data.append(query.value(i))
+	data.append(tohex(query.value(8)).rjust(16) + " " + query.value(9) + offstr(query.value(10)) +
+			" (" + dsoname(query.value(11)) + ")" + " -> " +
+			tohex(query.value(12)) + " " + query.value(13) + offstr(query.value(14)) +
+			" (" + dsoname(query.value(15)) + ")")
+	return data
+
+# Branch data model
+
+class BranchModel(TreeModel):
+
+	progress = Signal(object)
+
+	def __init__(self, glb, event_id, where_clause, parent=None):
+		super(BranchModel, self).__init__(BranchRootItem(), parent)
+		self.glb = glb
+		self.event_id = event_id
+		self.more = True
+		self.populated = 0
+		sql = ("SELECT samples.id, time, cpu, comm, pid, tid, branch_types.name,"
+			" CASE WHEN in_tx = '0' THEN 'No' ELSE 'Yes' END,"
+			" ip, symbols.name, sym_offset, dsos.short_name,"
+			" to_ip, to_symbols.name, to_sym_offset, to_dsos.short_name"
+			" FROM samples"
+			" INNER JOIN comms ON comm_id = comms.id"
+			" INNER JOIN threads ON thread_id = threads.id"
+			" INNER JOIN branch_types ON branch_type = branch_types.id"
+			" INNER JOIN symbols ON symbol_id = symbols.id"
+			" INNER JOIN symbols to_symbols ON to_symbol_id = to_symbols.id"
+			" INNER JOIN dsos ON samples.dso_id = dsos.id"
+			" INNER JOIN dsos AS to_dsos ON samples.to_dso_id = to_dsos.id"
+			" WHERE samples.id > $$last_id$$" + where_clause +
+			" AND evsel_id = " + str(self.event_id) +
+			" ORDER BY samples.id"
+			" LIMIT " + str(glb_chunk_sz))
+		self.fetcher = SQLFetcher(glb, sql, BranchDataPrep, self.AddSample)
+		self.fetcher.done.connect(self.Update)
+		self.fetcher.Fetch(glb_chunk_sz)
+
+	def columnCount(self, parent=None):
+		return 8
+
+	def columnHeader(self, column):
+		return ("Time", "CPU", "Command", "PID", "TID", "Branch Type", "In Tx", "Branch")[column]
+
+	def columnFont(self, column):
+		if column != 7:
+			return None
+		return QFont("Monospace")
+
+	def DisplayData(self, item, index):
+		if item.level == 1:
+			self.FetchIfNeeded(item.row)
+		return item.getData(index.column())
+
+	def AddSample(self, data):
+		child = BranchLevelOneItem(self.glb, self.populated, data, self.root)
+		self.root.child_items.append(child)
+		self.populated += 1
+
+	def Update(self, fetched):
+		if not fetched:
+			self.more = False
+			self.progress.emit(0)
+		child_count = self.root.child_count
+		count = self.populated - child_count
+		if count > 0:
+			parent = QModelIndex()
+			self.beginInsertRows(parent, child_count, child_count + count - 1)
+			self.insertRows(child_count, count, parent)
+			self.root.child_count += count
+			self.endInsertRows()
+			self.progress.emit(self.root.child_count)
+
+	def FetchMoreRecords(self, count):
+		current = self.root.child_count
+		if self.more:
+			self.fetcher.Fetch(count)
+		else:
+			self.progress.emit(0)
+		return current
+
+	def HasMoreRecords(self):
+		return self.more
+
+# Branch window
+
+class BranchWindow(QMdiSubWindow):
+
+	def __init__(self, glb, event_id, name, where_clause, parent=None):
+		super(BranchWindow, self).__init__(parent)
+
+		model_name = "Branch Events " + str(event_id)
+		if len(where_clause):
+			model_name = where_clause + " " + model_name
+
+		self.model = LookupCreateModel(model_name, lambda: BranchModel(glb, event_id, where_clause))
+
+		self.view = QTreeView()
+		self.view.setUniformRowHeights(True)
+		self.view.setModel(self.model)
+
+		self.ResizeColumnsToContents()
+
+		self.find_bar = FindBar(self, self, True)
+
+		self.finder = ChildDataItemFinder(self.model.root)
+
+		self.fetch_bar = FetchMoreRecordsBar(self.model, self)
+
+		self.vbox = VBox(self.view, self.find_bar.Widget(), self.fetch_bar.Widget())
+
+		self.setWidget(self.vbox.Widget())
+
+		AddSubWindow(glb.mainwindow.mdi_area, self, name + " Branch Events")
+
+	def ResizeColumnToContents(self, column, n):
+		# Using the view's resizeColumnToContents() here is extrememly slow
+		# so implement a crude alternative
+		mm = "MM" if column else "MMMM"
+		font = self.view.font()
+		metrics = QFontMetrics(font)
+		max = 0
+		for row in xrange(n):
+			val = self.model.root.child_items[row].data[column]
+			len = metrics.width(str(val) + mm)
+			max = len if len > max else max
+		val = self.model.columnHeader(column)
+		len = metrics.width(str(val) + mm)
+		max = len if len > max else max
+		self.view.setColumnWidth(column, max)
+
+	def ResizeColumnsToContents(self):
+		n = min(self.model.root.child_count, 100)
+		if n < 1:
+			# No data yet, so connect a signal to notify when there is
+			self.model.rowsInserted.connect(self.UpdateColumnWidths)
+			return
+		columns = self.model.columnCount()
+		for i in xrange(columns):
+			self.ResizeColumnToContents(i, n)
+
+	def UpdateColumnWidths(self, *x):
+		# This only needs to be done once, so disconnect the signal now
+		self.model.rowsInserted.disconnect(self.UpdateColumnWidths)
+		self.ResizeColumnsToContents()
+
+	def Find(self, value, direction, pattern, context):
+		self.view.setFocus()
+		self.find_bar.Busy()
+		self.finder.Find(value, direction, pattern, context, self.FindDone)
+
+	def FindDone(self, row):
+		self.find_bar.Idle()
+		if row >= 0:
+			self.view.setCurrentIndex(self.model.index(row, 0, QModelIndex()))
+		else:
+			self.find_bar.NotFound()
+
+# Dialog data item converted and validated using a SQL table
+
+class SQLTableDialogDataItem():
+
+	def __init__(self, glb, label, placeholder_text, table_name, match_column, column_name1, column_name2, parent):
+		self.glb = glb
+		self.label = label
+		self.placeholder_text = placeholder_text
+		self.table_name = table_name
+		self.match_column = match_column
+		self.column_name1 = column_name1
+		self.column_name2 = column_name2
+		self.parent = parent
+
+		self.value = ""
+
+		self.widget = QLineEdit()
+		self.widget.editingFinished.connect(self.Validate)
+		self.widget.textChanged.connect(self.Invalidate)
+		self.red = False
+		self.error = ""
+		self.validated = True
+
+		self.last_id = 0
+		self.first_time = 0
+		self.last_time = 2 ** 64
+		if self.table_name == "<timeranges>":
+			query = QSqlQuery(self.glb.db)
+			QueryExec(query, "SELECT id, time FROM samples ORDER BY id DESC LIMIT 1")
+			if query.next():
+				self.last_id = int(query.value(0))
+				self.last_time = int(query.value(1))
+			QueryExec(query, "SELECT time FROM samples WHERE time != 0 ORDER BY id LIMIT 1")
+			if query.next():
+				self.first_time = int(query.value(0))
+			if placeholder_text:
+				placeholder_text += ", between " + str(self.first_time) + " and " + str(self.last_time)
+
+		if placeholder_text:
+			self.widget.setPlaceholderText(placeholder_text)
+
+	def ValueToIds(self, value):
+		ids = []
+		query = QSqlQuery(self.glb.db)
+		stmt = "SELECT id FROM " + self.table_name + " WHERE " + self.match_column + " = '" + value + "'"
+		ret = query.exec_(stmt)
+		if ret:
+			while query.next():
+				ids.append(str(query.value(0)))
+		return ids
+
+	def IdBetween(self, query, lower_id, higher_id, order):
+		QueryExec(query, "SELECT id FROM samples WHERE id > " + str(lower_id) + " AND id < " + str(higher_id) + " ORDER BY id " + order + " LIMIT 1")
+		if query.next():
+			return True, int(query.value(0))
+		else:
+			return False, 0
+
+	def BinarySearchTime(self, lower_id, higher_id, target_time, get_floor):
+		query = QSqlQuery(self.glb.db)
+		while True:
+			next_id = int((lower_id + higher_id) / 2)
+			QueryExec(query, "SELECT time FROM samples WHERE id = " + str(next_id))
+			if not query.next():
+				ok, dbid = self.IdBetween(query, lower_id, next_id, "DESC")
+				if not ok:
+					ok, dbid = self.IdBetween(query, next_id, higher_id, "")
+					if not ok:
+						return str(higher_id)
+				next_id = dbid
+				QueryExec(query, "SELECT time FROM samples WHERE id = " + str(next_id))
+			next_time = int(query.value(0))
+			if get_floor:
+				if target_time > next_time:
+					lower_id = next_id
+				else:
+					higher_id = next_id
+				if higher_id <= lower_id + 1:
+					return str(higher_id)
+			else:
+				if target_time >= next_time:
+					lower_id = next_id
+				else:
+					higher_id = next_id
+				if higher_id <= lower_id + 1:
+					return str(lower_id)
+
+	def ConvertRelativeTime(self, val):
+		print "val ", val
+		mult = 1
+		suffix = val[-2:]
+		if suffix == "ms":
+			mult = 1000000
+		elif suffix == "us":
+			mult = 1000
+		elif suffix == "ns":
+			mult = 1
+		else:
+			return val
+		val = val[:-2].strip()
+		if not self.IsNumber(val):
+			return val
+		val = int(val) * mult
+		if val >= 0:
+			val += self.first_time
+		else:
+			val += self.last_time
+		return str(val)
+
+	def ConvertTimeRange(self, vrange):
+		print "vrange ", vrange
+		if vrange[0] == "":
+			vrange[0] = str(self.first_time)
+		if vrange[1] == "":
+			vrange[1] = str(self.last_time)
+		vrange[0] = self.ConvertRelativeTime(vrange[0])
+		vrange[1] = self.ConvertRelativeTime(vrange[1])
+		print "vrange2 ", vrange
+		if not self.IsNumber(vrange[0]) or not self.IsNumber(vrange[1]):
+			return False
+		print "ok1"
+		beg_range = max(int(vrange[0]), self.first_time)
+		end_range = min(int(vrange[1]), self.last_time)
+		if beg_range > self.last_time or end_range < self.first_time:
+			return False
+		print "ok2"
+		vrange[0] = self.BinarySearchTime(0, self.last_id, beg_range, True)
+		vrange[1] = self.BinarySearchTime(1, self.last_id + 1, end_range, False)
+		print "vrange3 ", vrange
+		return True
+
+	def AddTimeRange(self, value, ranges):
+		print "value ", value
+		n = value.count("-")
+		if n == 1:
+			pass
+		elif n == 2:
+			if value.split("-")[1].strip() == "":
+				n = 1
+		elif n == 3:
+			n = 2
+		else:
+			return False
+		pos = findnth(value, "-", n)
+		vrange = [value[:pos].strip() ,value[pos+1:].strip()]
+		if self.ConvertTimeRange(vrange):
+			ranges.append(vrange)
+			return True
+		return False
+
+	def InvalidValue(self, value):
+		self.value = ""
+		palette = QPalette()
+		palette.setColor(QPalette.Text,Qt.red)
+		self.widget.setPalette(palette)
+		self.red = True
+		self.error = self.label + " invalid value '" + value + "'"
+		self.parent.ShowMessage(self.error)
+
+	def IsNumber(self, value):
+		try:
+			x = int(value)
+		except:
+			x = 0
+		return str(x) == value
+
+	def Invalidate(self):
+		self.validated = False
+
+	def Validate(self):
+		input_string = self.widget.text()
+		self.validated = True
+		if self.red:
+			palette = QPalette()
+			self.widget.setPalette(palette)
+			self.red = False
+		if not len(input_string.strip()):
+			self.error = ""
+			self.value = ""
+			return
+		if self.table_name == "<timeranges>":
+			ranges = []
+			for value in [x.strip() for x in input_string.split(",")]:
+				if not self.AddTimeRange(value, ranges):
+					return self.InvalidValue(value)
+			ranges = [("(" + self.column_name1 + " >= " + r[0] + " AND " + self.column_name1 + " <= " + r[1] + ")") for r in ranges]
+			self.value = " OR ".join(ranges)
+		elif self.table_name == "<ranges>":
+			singles = []
+			ranges = []
+			for value in [x.strip() for x in input_string.split(",")]:
+				if "-" in value:
+					vrange = value.split("-")
+					if len(vrange) != 2 or not self.IsNumber(vrange[0]) or not self.IsNumber(vrange[1]):
+						return self.InvalidValue(value)
+					ranges.append(vrange)
+				else:
+					if not self.IsNumber(value):
+						return self.InvalidValue(value)
+					singles.append(value)
+			ranges = [("(" + self.column_name1 + " >= " + r[0] + " AND " + self.column_name1 + " <= " + r[1] + ")") for r in ranges]
+			if len(singles):
+				ranges.append(self.column_name1 + " IN (" + ",".join(singles) + ")")
+			self.value = " OR ".join(ranges)
+		elif self.table_name:
+			all_ids = []
+			for value in [x.strip() for x in input_string.split(",")]:
+				ids = self.ValueToIds(value)
+				if len(ids):
+					all_ids.extend(ids)
+				else:
+					return self.InvalidValue(value)
+			self.value = self.column_name1 + " IN (" + ",".join(all_ids) + ")"
+			if self.column_name2:
+				self.value = "( " + self.value + " OR " + self.column_name2 + " IN (" + ",".join(all_ids) + ") )"
+		else:
+			self.value = input_string.strip()
+		self.error = ""
+		self.parent.ClearMessage()
+
+	def IsValid(self):
+		if not self.validated:
+			self.Validate()
+		if len(self.error):
+			self.parent.ShowMessage(self.error)
+			return False
+		return True
+
+# Selected branch report creation dialog
+
+class SelectedBranchDialog(QDialog):
+
+	def __init__(self, glb, parent=None):
+		super(SelectedBranchDialog, self).__init__(parent)
+
+		self.glb = glb
+
+		self.name = ""
+		self.where_clause = ""
+
+		self.setWindowTitle("Selected Branches")
+		self.setMinimumWidth(600)
+
+		items = (
+			("Report name:", "Enter a name to appear in the window title bar", "", "", "", ""),
+			("Time ranges:", "Enter time ranges", "<timeranges>", "", "samples.id", ""),
+			("CPUs:", "Enter CPUs or ranges e.g. 0,5-6", "<ranges>", "", "cpu", ""),
+			("Commands:", "Only branches with these commands will be included", "comms", "comm", "comm_id", ""),
+			("PIDs:", "Only branches with these process IDs will be included", "threads", "pid", "thread_id", ""),
+			("TIDs:", "Only branches with these thread IDs will be included", "threads", "tid", "thread_id", ""),
+			("DSOs:", "Only branches with these DSOs will be included", "dsos", "short_name", "samples.dso_id", "to_dso_id"),
+			("Symbols:", "Only branches with these symbols will be included", "symbols", "name", "symbol_id", "to_symbol_id"),
+			("Raw SQL clause: ", "Enter a raw SQL WHERE clause", "", "", "", ""),
+			)
+		self.data_items = [SQLTableDialogDataItem(glb, *x, parent=self) for x in items]
+
+		self.grid = QGridLayout()
+
+		for row in xrange(len(self.data_items)):
+			self.grid.addWidget(QLabel(self.data_items[row].label), row, 0)
+			self.grid.addWidget(self.data_items[row].widget, row, 1)
+
+		self.status = QLabel()
+
+		self.ok_button = QPushButton("Ok", self)
+		self.ok_button.setDefault(True)
+		self.ok_button.released.connect(self.Ok)
+		self.ok_button.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed)
+
+		self.cancel_button = QPushButton("Cancel", self)
+		self.cancel_button.released.connect(self.reject)
+		self.cancel_button.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed)
+
+		self.hbox = QHBoxLayout()
+		#self.hbox.addStretch()
+		self.hbox.addWidget(self.status)
+		self.hbox.addWidget(self.ok_button)
+		self.hbox.addWidget(self.cancel_button)
+
+		self.vbox = QVBoxLayout()
+		self.vbox.addLayout(self.grid)
+		self.vbox.addLayout(self.hbox)
+
+		self.setLayout(self.vbox);
+
+	def Ok(self):
+		self.name = self.data_items[0].value
+		if not self.name:
+			self.ShowMessage("Report name is required")
+			return
+		for d in self.data_items:
+			if not d.IsValid():
+				return
+		for d in self.data_items[1:]:
+			if len(d.value):
+				if len(self.where_clause):
+					self.where_clause += " AND "
+				self.where_clause += d.value
+		if len(self.where_clause):
+			self.where_clause = " AND ( " + self.where_clause + " ) "
+		else:
+			self.ShowMessage("No selection")
+			return
+		self.accept()
+
+	def ShowMessage(self, msg):
+		self.status.setText("<font color=#FF0000>" + msg)
+
+	def ClearMessage(self):
+		self.status.setText("")
+
+# Event list
+
+def GetEventList(db):
+	events = []
+	query = QSqlQuery(db)
+	QueryExec(query, "SELECT name FROM selected_events WHERE id > 0 ORDER BY id")
+	while query.next():
+		events.append(query.value(0))
+	return events
+
+# SQL data preparation
+
+def SQLTableDataPrep(query, count):
+	data = []
+	for i in xrange(count):
+		data.append(query.value(i))
+	return data
+
+# SQL table data model item
+
+class SQLTableItem():
+
+	def __init__(self, row, data):
+		self.row = row
+		self.data = data
+
+	def getData(self, column):
+		return self.data[column]
+
+# SQL table data model
+
+class SQLTableModel(TableModel):
+
+	progress = Signal(object)
+
+	def __init__(self, glb, sql, column_count, parent=None):
+		super(SQLTableModel, self).__init__(parent)
+		self.glb = glb
+		self.more = True
+		self.populated = 0
+		self.fetcher = SQLFetcher(glb, sql, lambda x, y=column_count: SQLTableDataPrep(x, y), self.AddSample)
+		self.fetcher.done.connect(self.Update)
+		self.fetcher.Fetch(glb_chunk_sz)
+
+	def DisplayData(self, item, index):
+		self.FetchIfNeeded(item.row)
+		return item.getData(index.column())
+
+	def AddSample(self, data):
+		child = SQLTableItem(self.populated, data)
+		self.child_items.append(child)
+		self.populated += 1
+
+	def Update(self, fetched):
+		if not fetched:
+			self.more = False
+			self.progress.emit(0)
+		child_count = self.child_count
+		count = self.populated - child_count
+		if count > 0:
+			parent = QModelIndex()
+			self.beginInsertRows(parent, child_count, child_count + count - 1)
+			self.insertRows(child_count, count, parent)
+			self.child_count += count
+			self.endInsertRows()
+			self.progress.emit(self.child_count)
+
+	def FetchMoreRecords(self, count):
+		current = self.child_count
+		if self.more:
+			self.fetcher.Fetch(count)
+		else:
+			self.progress.emit(0)
+		return current
+
+	def HasMoreRecords(self):
+		return self.more
+
+# SQL automatic table data model
+
+class SQLAutoTableModel(SQLTableModel):
+
+	def __init__(self, glb, table_name, parent=None):
+		sql = "SELECT * FROM " + table_name + " WHERE id > $$last_id$$ ORDER BY id LIMIT " + str(glb_chunk_sz)
+		if table_name == "comm_threads_view":
+			# For now, comm_threads_view has no id column
+			sql = "SELECT * FROM " + table_name + " WHERE comm_id > $$last_id$$ ORDER BY comm_id LIMIT " + str(glb_chunk_sz)
+		self.column_headers = []
+		query = QSqlQuery(glb.db)
+		if glb.dbref.is_sqlite3:
+			QueryExec(query, "PRAGMA table_info(" + table_name + ")")
+			while query.next():
+				self.column_headers.append(query.value(1))
+			if table_name == "sqlite_master":
+				sql = "SELECT * FROM " + table_name
+		else:
+			if table_name[:19] == "information_schema.":
+				sql = "SELECT * FROM " + table_name
+				select_table_name = table_name[19:]
+				schema = "information_schema"
+			else:
+				select_table_name = table_name
+				schema = "public"
+			QueryExec(query, "SELECT column_name FROM information_schema.columns WHERE table_schema = '" + schema + "' and table_name = '" + select_table_name + "'")
+			while query.next():
+				self.column_headers.append(query.value(0))
+		super(SQLAutoTableModel, self).__init__(glb, sql, len(self.column_headers), parent)
+
+	def columnCount(self, parent=None):
+		return len(self.column_headers)
+
+	def columnHeader(self, column):
+		return self.column_headers[column]
+
+# Base class for custom ResizeColumnsToContents
+
+class ResizeColumnsToContentsBase(QObject):
+
+	def __init__(self, parent=None):
+		super(ResizeColumnsToContentsBase, self).__init__(parent)
+
+	def ResizeColumnToContents(self, column, n):
+		# Using the view's resizeColumnToContents() here is extrememly slow
+		# so implement a crude alternative
+		font = self.view.font()
+		metrics = QFontMetrics(font)
+		max = 0
+		for row in xrange(n):
+			val = self.data_model.child_items[row].data[column]
+			len = metrics.width(str(val) + "MM")
+			max = len if len > max else max
+		val = self.data_model.columnHeader(column)
+		len = metrics.width(str(val) + "MM")
+		max = len if len > max else max
+		self.view.setColumnWidth(column, max)
+
+	def ResizeColumnsToContents(self):
+		n = min(self.data_model.child_count, 100)
+		if n < 1:
+			# No data yet, so connect a signal to notify when there is
+			self.data_model.rowsInserted.connect(self.UpdateColumnWidths)
+			return
+		columns = self.data_model.columnCount()
+		for i in xrange(columns):
+			self.ResizeColumnToContents(i, n)
+
+	def UpdateColumnWidths(self, *x):
+		# This only needs to be done once, so disconnect the signal now
+		self.data_model.rowsInserted.disconnect(self.UpdateColumnWidths)
+		self.ResizeColumnsToContents()
+
+# Table window
+
+class TableWindow(QMdiSubWindow, ResizeColumnsToContentsBase):
+
+	def __init__(self, glb, table_name, parent=None):
+		super(TableWindow, self).__init__(parent)
+
+		self.data_model = LookupCreateModel(table_name + " Table", lambda: SQLAutoTableModel(glb, table_name))
+
+		self.model = QSortFilterProxyModel()
+		self.model.setSourceModel(self.data_model)
+
+		self.view = QTableView()
+		self.view.setModel(self.model)
+		self.view.setEditTriggers(QAbstractItemView.NoEditTriggers)
+		self.view.verticalHeader().setVisible(False)
+		self.view.sortByColumn(-1, Qt.AscendingOrder)
+		self.view.setSortingEnabled(True)
+
+		self.ResizeColumnsToContents()
+
+		self.find_bar = FindBar(self, self, True)
+
+		self.finder = ChildDataItemFinder(self.data_model)
+
+		self.fetch_bar = FetchMoreRecordsBar(self.data_model, self)
+
+		self.vbox = VBox(self.view, self.find_bar.Widget(), self.fetch_bar.Widget())
+
+		self.setWidget(self.vbox.Widget())
+
+		AddSubWindow(glb.mainwindow.mdi_area, self, table_name + " Table")
+
+	def Find(self, value, direction, pattern, context):
+		self.view.setFocus()
+		self.find_bar.Busy()
+		self.finder.Find(value, direction, pattern, context, self.FindDone)
+
+	def FindDone(self, row):
+		self.find_bar.Idle()
+		if row >= 0:
+			self.view.setCurrentIndex(self.model.mapFromSource(self.data_model.index(row, 0, QModelIndex())))
+		else:
+			self.find_bar.NotFound()
+
+# Table list
+
+def GetTableList(glb):
+	tables = []
+	query = QSqlQuery(glb.db)
+	if glb.dbref.is_sqlite3:
+		QueryExec(query, "SELECT name FROM sqlite_master WHERE type IN ( 'table' , 'view' ) ORDER BY name")
+	else:
+		QueryExec(query, "SELECT table_name FROM information_schema.tables WHERE table_schema = 'public' AND table_type IN ( 'BASE TABLE' , 'VIEW' ) ORDER BY table_name")
+	while query.next():
+		tables.append(query.value(0))
+	if glb.dbref.is_sqlite3:
+		tables.append("sqlite_master")
+	else:
+		tables.append("information_schema.tables")
+		tables.append("information_schema.views")
+		tables.append("information_schema.columns")
+	return tables
+
+# Action Definition
+
+def CreateAction(label, tip, callback, parent=None, shortcut=None):
+	action = QAction(label, parent)
+	if shortcut != None:
+		action.setShortcuts(shortcut)
+	action.setStatusTip(tip)
+	action.triggered.connect(callback)
+	return action
+
+# Typical application actions
+
+def CreateExitAction(app, parent=None):
+	return CreateAction("&Quit", "Exit the application", app.closeAllWindows, parent, QKeySequence.Quit)
+
+# Typical MDI actions
+
+def CreateCloseActiveWindowAction(mdi_area):
+	return CreateAction("Cl&ose", "Close the active window", mdi_area.closeActiveSubWindow, mdi_area)
+
+def CreateCloseAllWindowsAction(mdi_area):
+	return CreateAction("Close &All", "Close all the windows", mdi_area.closeAllSubWindows, mdi_area)
+
+def CreateTileWindowsAction(mdi_area):
+	return CreateAction("&Tile", "Tile the windows", mdi_area.tileSubWindows, mdi_area)
+
+def CreateCascadeWindowsAction(mdi_area):
+	return CreateAction("&Cascade", "Cascade the windows", mdi_area.cascadeSubWindows, mdi_area)
+
+def CreateNextWindowAction(mdi_area):
+	return CreateAction("Ne&xt", "Move the focus to the next window", mdi_area.activateNextSubWindow, mdi_area, QKeySequence.NextChild)
+
+def CreatePreviousWindowAction(mdi_area):
+	return CreateAction("Pre&vious", "Move the focus to the previous window", mdi_area.activatePreviousSubWindow, mdi_area, QKeySequence.PreviousChild)
+
+# Typical MDI window menu
+
+class WindowMenu():
+
+	def __init__(self, mdi_area, menu):
+		self.mdi_area = mdi_area
+		self.window_menu = menu.addMenu("&Windows")
+		self.close_active_window = CreateCloseActiveWindowAction(mdi_area)
+		self.close_all_windows = CreateCloseAllWindowsAction(mdi_area)
+		self.tile_windows = CreateTileWindowsAction(mdi_area)
+		self.cascade_windows = CreateCascadeWindowsAction(mdi_area)
+		self.next_window = CreateNextWindowAction(mdi_area)
+		self.previous_window = CreatePreviousWindowAction(mdi_area)
+		self.window_menu.aboutToShow.connect(self.Update)
+
+	def Update(self):
+		self.window_menu.clear()
+		sub_window_count = len(self.mdi_area.subWindowList())
+		have_sub_windows = sub_window_count != 0
+		self.close_active_window.setEnabled(have_sub_windows)
+		self.close_all_windows.setEnabled(have_sub_windows)
+		self.tile_windows.setEnabled(have_sub_windows)
+		self.cascade_windows.setEnabled(have_sub_windows)
+		self.next_window.setEnabled(have_sub_windows)
+		self.previous_window.setEnabled(have_sub_windows)
+		self.window_menu.addAction(self.close_active_window)
+		self.window_menu.addAction(self.close_all_windows)
+		self.window_menu.addSeparator()
+		self.window_menu.addAction(self.tile_windows)
+		self.window_menu.addAction(self.cascade_windows)
+		self.window_menu.addSeparator()
+		self.window_menu.addAction(self.next_window)
+		self.window_menu.addAction(self.previous_window)
+		if sub_window_count == 0:
+			return
+		self.window_menu.addSeparator()
+		nr = 1
+		for sub_window in self.mdi_area.subWindowList():
+			label = str(nr) + " " + sub_window.name
+			if nr < 10:
+				label = "&" + label
+			action = self.window_menu.addAction(label)
+			action.setCheckable(True)
+			action.setChecked(sub_window == self.mdi_area.activeSubWindow())
+			action.triggered.connect(lambda x=nr: self.setActiveSubWindow(x))
+			self.window_menu.addAction(action)
+			nr += 1
+
+	def setActiveSubWindow(self, nr):
+		self.mdi_area.setActiveSubWindow(self.mdi_area.subWindowList()[nr - 1])
+
+# Help text
+
+glb_help_text = """
+<h1>Contents</h1>
+<style>
+p.c1 {
+    text-indent: 40px;
+}
+p.c2 {
+    text-indent: 80px;
+}
+}
+</style>
+<p class=c1><a href=#reports>1. Reports</a></p>
+<p class=c2><a href=#callgraph>1.1 Context-Sensitive Call Graph</a></p>
+<p class=c2><a href=#allbranches>1.2 All branches</a></p>
+<p class=c2><a href=#selectedbranches>1.3 Selected branches</a></p>
+<p class=c1><a href=#tables>2. Tables</a></p>
+<h1 id=reports>1. Reports</h1>
+<h2 id=callgraph>1.1 Context-Sensitive Call Graph</h2>
+The result is a GUI window with a tree representing a context-sensitive
+call-graph. Expanding a couple of levels of the tree and adjusting column
+widths to suit will display something like:
+<pre>
+                                         Call Graph: pt_example
+Call Path                          Object      Count   Time(ns)  Time(%)  Branch Count   Branch Count(%)
+v- ls
+    v- 2638:2638
+        v- _start                  ld-2.19.so    1     10074071   100.0         211135            100.0
+          |- unknown               unknown       1        13198     0.1              1              0.0
+          >- _dl_start             ld-2.19.so    1      1400980    13.9          19637              9.3
+          >- _d_linit_internal     ld-2.19.so    1       448152     4.4          11094              5.3
+          v-__libc_start_main@plt  ls            1      8211741    81.5         180397             85.4
+             >- _dl_fixup          ld-2.19.so    1         7607     0.1            108              0.1
+             >- __cxa_atexit       libc-2.19.so  1        11737     0.1             10              0.0
+             >- __libc_csu_init    ls            1        10354     0.1             10              0.0
+             |- _setjmp            libc-2.19.so  1            0     0.0              4              0.0
+             v- main               ls            1      8182043    99.6         180254             99.9
+</pre>
+<h3>Points to note:</h3>
+<ul>
+<li>The top level is a command name (comm)</li>
+<li>The next level is a thread (pid:tid)</li>
+<li>Subsequent levels are functions</li>
+<li>'Count' is the number of calls</li>
+<li>'Time' is the elapsed time until the function returns</li>
+<li>Percentages are relative to the level above</li>
+<li>'Branch Count' is the total number of branches for that function and all functions that it calls
+</ul>
+<h3>Find</h3>
+Ctrl-F displays a Find bar which finds function names by either an exact match or a pattern match.
+The pattern matching symbols are ? for any character and * for zero or more characters.
+<h2 id=allbranches>1.2 All branches</h2>
+The All branches report displays all branches in chronological order.
+Not all data is fetched immediately. More records can be fetched using the Fetch bar provided.
+<h3>Disassembly</h3>
+Open a branch to display disassembly. This only works if:
+<ol>
+<li>The disassembler is available. Currently, only Intel XED is supported - see <a href=#xed>Intel XED Setup</a></li>
+<li>The object code is available. Currently, only the perf build ID cache is searched for object code.
+The default directory ~/.debug can be overridden by setting environment variable PERF_BUILDID_DIR.
+One exception is kcore where the DSO long name is used (refer dsos_view on the Tables menu),
+or alternatively, set environment variable PERF_KCORE to the kcore file name.</li>
+</ol>
+<h4 id=xed>Intel XED Setup</h4>
+To use Intel XED, libxed.so must be present.  To build and install libxed.so:
+<pre>
+git clone https://github.com/intelxed/mbuild.git mbuild
+git clone https://github.com/intelxed/xed
+cd xed
+./mfile.py --share
+sudo ./mfile.py --prefix=/usr/local install
+sudo ldconfig
+</pre>
+<h3>Find</h3>
+Ctrl-F displays a Find bar which finds substrings by either an exact match or a regular expression match.
+Refer to Python documentation for the regular expression syntax.
+All columns are searched, but only currently fetched rows are searched.
+<h2 id=selectedbranches>1.3 Selected branches</h2>
+This is the same as the <a href=#allbranches>All branches</a> report but with the data reduced
+by various selection criteria. A dialog box displays available criteria which are AND'ed together.
+<h3>1.3.1 Time ranges</h3>
+The time ranges hint text shows the total time range. Relative time ranges can also be entered in
+ms, us or ns. Also, negative values are relative to the end of trace.  Examples:
+<pre>
+	81073085947329-81073085958238	From 81073085947329 to 81073085958238
+	100us-200us		From 100us to 200us
+	10ms-			From 10ms to the end
+	-100ns			The first 100ns
+	-10ms-			The last 10ms
+</pre>
+N.B. Due to the granularity of timestamps, there could be no branches in any given time range.
+<h1 id=tables>2. Tables</h1>
+The Tables menu shows all tables and views in the database. Most tables have an associated view
+which displays the information in a more friendly way. Not all data for large tables is fetched
+immediately. More records can be fetched using the Fetch bar provided. Columns can be sorted,
+but that can be slow for large tables.
+<p>There are also tables of database meta-information.
+For SQLite3 databases, the sqlite_master table is included.
+For PostgreSQL databases, information_schema.tables/views/columns are included.
+<h3>Find</h3>
+Ctrl-F displays a Find bar which finds substrings by either an exact match or a regular expression match.
+Refer to Python documentation for the regular expression syntax.
+All columns are searched, but only currently fetched rows are searched.
+<p>N.B. Results are found in id order, so if the table is re-ordered, find-next and find-previous
+will go to the next/previous result in id order, instead of display order.
+"""
+
+# Help window
+
+class HelpWindow(QMdiSubWindow):
+
+	def __init__(self, glb, parent=None):
+		super(HelpWindow, self).__init__(parent)
+
+		self.text = QTextBrowser()
+		self.text.setHtml(glb_help_text)
+		self.text.setReadOnly(True)
+		self.text.setOpenExternalLinks(True)
+
+		self.setWidget(self.text)
+
+		AddSubWindow(glb.mainwindow.mdi_area, self, "Exported SQL Viewer Help")
+
+# Main window that only displays the help text
+
+class HelpOnlyWindow(QMainWindow):
+
+	def __init__(self, parent=None):
+		super(HelpOnlyWindow, self).__init__(parent)
+
+		self.setMinimumSize(200, 100)
+		self.resize(800, 600)
+		self.setWindowTitle("Exported SQL Viewer Help")
+		self.setWindowIcon(self.style().standardIcon(QStyle.SP_MessageBoxInformation))
+
+		self.text = QTextBrowser()
+		self.text.setHtml(glb_help_text)
+		self.text.setReadOnly(True)
+		self.text.setOpenExternalLinks(True)
+
+		self.setCentralWidget(self.text)
+
+# Font resize
+
+def ResizeFont(widget, diff):
+	font = widget.font()
+	sz = font.pointSize()
+	font.setPointSize(sz + diff)
+	widget.setFont(font)
+
+def ShrinkFont(widget):
+	ResizeFont(widget, -1)
+
+def EnlargeFont(widget):
+	ResizeFont(widget, 1)
+
+# Unique name for sub-windows
+
+def NumberedWindowName(name, nr):
+	if nr > 1:
+		name += " <" + str(nr) + ">"
+	return name
+
+def UniqueSubWindowName(mdi_area, name):
+	nr = 1
+	while True:
+		unique_name = NumberedWindowName(name, nr)
+		ok = True
+		for sub_window in mdi_area.subWindowList():
+			if sub_window.name == unique_name:
+				ok = False
+				break
+		if ok:
+			return unique_name
+		nr += 1
+
+# Add a sub-window
+
+def AddSubWindow(mdi_area, sub_window, name):
+	unique_name = UniqueSubWindowName(mdi_area, name)
+	sub_window.setMinimumSize(200, 100)
+	sub_window.resize(800, 600)
+	sub_window.setWindowTitle(unique_name)
+	sub_window.setAttribute(Qt.WA_DeleteOnClose)
+	sub_window.setWindowIcon(sub_window.style().standardIcon(QStyle.SP_FileIcon))
+	sub_window.name = unique_name
+	mdi_area.addSubWindow(sub_window)
+	sub_window.show()
+
+# Main window
+
+class MainWindow(QMainWindow):
+
+	def __init__(self, glb, parent=None):
+		super(MainWindow, self).__init__(parent)
+
+		self.glb = glb
+
+		self.setWindowTitle("Exported SQL Viewer: " + glb.dbname)
+		self.setWindowIcon(self.style().standardIcon(QStyle.SP_ComputerIcon))
+		self.setMinimumSize(200, 100)
+
+		self.mdi_area = QMdiArea()
+		self.mdi_area.setHorizontalScrollBarPolicy(Qt.ScrollBarAsNeeded)
+		self.mdi_area.setVerticalScrollBarPolicy(Qt.ScrollBarAsNeeded)
+
+		self.setCentralWidget(self.mdi_area)
+
+		menu = self.menuBar()
+
+		file_menu = menu.addMenu("&File")
+		file_menu.addAction(CreateExitAction(glb.app, self))
+
+		edit_menu = menu.addMenu("&Edit")
+		edit_menu.addAction(CreateAction("&Find...", "Find items", self.Find, self, QKeySequence.Find))
+		edit_menu.addAction(CreateAction("Fetch &more records...", "Fetch more records", self.FetchMoreRecords, self, [QKeySequence(Qt.Key_F8)]))
+		edit_menu.addAction(CreateAction("&Shrink Font", "Make text smaller", self.ShrinkFont, self, [QKeySequence("Ctrl+-")]))
+		edit_menu.addAction(CreateAction("&Enlarge Font", "Make text bigger", self.EnlargeFont, self, [QKeySequence("Ctrl++")]))
+
+		reports_menu = menu.addMenu("&Reports")
+		reports_menu.addAction(CreateAction("Context-Sensitive Call &Graph", "Create a new window containing a context-sensitive call graph", self.NewCallGraph, self))
+
+		self.EventMenu(GetEventList(glb.db), reports_menu)
+
+		self.TableMenu(GetTableList(glb), menu)
+
+		self.window_menu = WindowMenu(self.mdi_area, menu)
+
+		help_menu = menu.addMenu("&Help")
+		help_menu.addAction(CreateAction("&Exported SQL Viewer Help", "Helpful information", self.Help, self, QKeySequence.HelpContents))
+
+	def Find(self):
+		win = self.mdi_area.activeSubWindow()
+		if win:
+			try:
+				win.find_bar.Activate()
+			except:
+				pass
+
+	def FetchMoreRecords(self):
+		win = self.mdi_area.activeSubWindow()
+		if win:
+			try:
+				win.fetch_bar.Activate()
+			except:
+				pass
+
+	def ShrinkFont(self):
+		win = self.mdi_area.activeSubWindow()
+		ShrinkFont(win.view)
+
+	def EnlargeFont(self):
+		win = self.mdi_area.activeSubWindow()
+		EnlargeFont(win.view)
+
+	def EventMenu(self, events, reports_menu):
+		branches_events = 0
+		for event in events:
+			event = event.split(":")[0]
+			if event == "branches":
+				branches_events += 1
+		dbid = 0
+		for event in events:
+			dbid += 1
+			event = event.split(":")[0]
+			if event == "branches":
+				label = "All branches" if branches_events == 1 else "All branches " + "(id=" + dbid + ")"
+				reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda x=dbid: self.NewBranchView(x), self))
+				label = "Selected branches" if branches_events == 1 else "Selected branches " + "(id=" + dbid + ")"
+				reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda x=dbid: self.NewSelectedBranchView(x), self))
+
+	def TableMenu(self, tables, menu):
+		table_menu = menu.addMenu("&Tables")
+		for table in tables:
+			table_menu.addAction(CreateAction(table, "Create a new window containing a table view", lambda t=table: self.NewTableView(t), self))
+
+	def NewCallGraph(self):
+		CallGraphWindow(self.glb, self)
+
+	def NewBranchView(self, event_id):
+		BranchWindow(self.glb, event_id, "", "", self)
+
+	def NewSelectedBranchView(self, event_id):
+		dialog = SelectedBranchDialog(self.glb, self)
+		ret = dialog.exec_()
+		if ret:
+			BranchWindow(self.glb, event_id, dialog.name, dialog.where_clause, self)
+
+	def NewTableView(self, table_name):
+		TableWindow(self.glb, table_name, self)
+
+	def Help(self):
+		HelpWindow(self.glb, self)
+
+# XED Disassembler
+
+class xed_state_t(Structure):
+
+	_fields_ = [
+		("mode", c_int),
+		("width", c_int)
+	]
+
+class XEDInstruction():
+
+	def __init__(self, libxed):
+		# Current xed_decoded_inst_t structure is 192 bytes. Use 512 to allow for future expansion
+		xedd_t = c_byte * 512
+		self.xedd = xedd_t()
+		self.xedp = addressof(self.xedd)
+		libxed.xed_decoded_inst_zero(self.xedp)
+		self.state = xed_state_t()
+		self.statep = addressof(self.state)
+		# Buffer for disassembled instruction text
+		self.buffer = create_string_buffer(256)
+		self.bufferp = addressof(self.buffer)
+
+class LibXED():
+
+	def __init__(self):
+		try:
+			self.libxed = CDLL("libxed.so")
+		except:
+			self.libxed = None
+		if not self.libxed:
+			self.libxed = CDLL("/usr/local/lib/libxed.so")
+
+		self.xed_tables_init = self.libxed.xed_tables_init
+		self.xed_tables_init.restype = None
+		self.xed_tables_init.argtypes = []
+
+		self.xed_decoded_inst_zero = self.libxed.xed_decoded_inst_zero
+		self.xed_decoded_inst_zero.restype = None
+		self.xed_decoded_inst_zero.argtypes = [ c_void_p ]
+
+		self.xed_operand_values_set_mode = self.libxed.xed_operand_values_set_mode
+		self.xed_operand_values_set_mode.restype = None
+		self.xed_operand_values_set_mode.argtypes = [ c_void_p, c_void_p ]
+
+		self.xed_decoded_inst_zero_keep_mode = self.libxed.xed_decoded_inst_zero_keep_mode
+		self.xed_decoded_inst_zero_keep_mode.restype = None
+		self.xed_decoded_inst_zero_keep_mode.argtypes = [ c_void_p ]
+
+		self.xed_decode = self.libxed.xed_decode
+		self.xed_decode.restype = c_int
+		self.xed_decode.argtypes = [ c_void_p, c_void_p, c_uint ]
+
+		self.xed_format_context = self.libxed.xed_format_context
+		self.xed_format_context.restype = c_uint
+		self.xed_format_context.argtypes = [ c_int, c_void_p, c_void_p, c_int, c_ulonglong, c_void_p, c_void_p ]
+
+		self.xed_tables_init()
+
+	def Instruction(self):
+		return XEDInstruction(self)
+
+	def SetMode(self, inst, mode):
+		if mode:
+			inst.state.mode = 4 # 32-bit
+			inst.state.width = 4 # 4 bytes
+		else:
+			inst.state.mode = 1 # 64-bit
+			inst.state.width = 8 # 8 bytes
+		self.xed_operand_values_set_mode(inst.xedp, inst.statep)
+
+	def DisassembleOne(self, inst, bytes_ptr, bytes_cnt, ip):
+		self.xed_decoded_inst_zero_keep_mode(inst.xedp)
+		err = self.xed_decode(inst.xedp, bytes_ptr, bytes_cnt)
+		if err:
+			return 0, ""
+		# Use AT&T mode (2), alternative is Intel (3)
+		ok = self.xed_format_context(2, inst.xedp, inst.bufferp, sizeof(inst.buffer), ip, 0, 0)
+		if not ok:
+			return 0, ""
+		# Return instruction length and the disassembled instruction text
+		# For now, assume the length is in byte 166
+		return inst.xedd[166], inst.buffer.value
+
+def TryOpen(file_name):
+	try:
+		return open(file_name, "rb")
+	except:
+		return None
+
+def Is64Bit(f):
+	result = sizeof(c_void_p)
+	# ELF support only
+	pos = f.tell()
+	f.seek(0)
+	header = f.read(7)
+	f.seek(pos)
+	magic = header[0:4]
+	eclass = ord(header[4])
+	encoding = ord(header[5])
+	version = ord(header[6])
+	if magic == chr(127) + "ELF" and eclass > 0 and eclass < 3 and encoding > 0 and encoding < 3 and version == 1:
+		result = True if eclass == 2 else False
+	return result
+
+# Global data
+
+class Glb():
+
+	def __init__(self, dbref, db, dbname):
+		self.dbref = dbref
+		self.db = db
+		self.dbname = dbname
+		self.home_dir = os.path.expanduser("~")
+		self.buildid_dir = os.getenv("PERF_BUILDID_DIR")
+		if self.buildid_dir:
+			self.buildid_dir += "/.build-id/"
+		else:
+			self.buildid_dir = self.home_dir + "/.debug/.build-id/"
+		self.app = None
+		self.mainwindow = None
+		self.instances_to_shutdown_on_exit = weakref.WeakSet()
+		try:
+			self.disassembler = LibXED()
+			self.have_disassembler = True
+		except:
+			self.have_disassembler = False
+
+	def FileFromBuildId(self, build_id):
+		file_name = self.buildid_dir + build_id[0:2] + "/" + build_id[2:] + "/elf"
+		return TryOpen(file_name)
+
+	def FileFromNamesAndBuildId(self, short_name, long_name, build_id):
+		# Assume current machine i.e. no support for virtualization
+		if short_name[0:7] == "[kernel" and os.path.basename(long_name) == "kcore":
+			file_name = os.getenv("PERF_KCORE")
+			f = TryOpen(file_name) if file_name else None
+			if f:
+				return f
+			# For now, no special handling if long_name is /proc/kcore
+			f = TryOpen(long_name)
+			if f:
+				return f
+		f = self.FileFromBuildId(build_id)
+		if f:
+			return f
+		return None
+
+	def AddInstanceToShutdownOnExit(self, instance):
+		self.instances_to_shutdown_on_exit.add(instance)
+
+	# Shutdown any background processes or threads
+	def ShutdownInstances(self):
+		for x in self.instances_to_shutdown_on_exit:
+			try:
+				x.Shutdown()
+			except:
+				pass
+
+# Database reference
+
+class DBRef():
+
+	def __init__(self, is_sqlite3, dbname):
+		self.is_sqlite3 = is_sqlite3
+		self.dbname = dbname
+
+	def Open(self, connection_name):
+		dbname = self.dbname
+		if self.is_sqlite3:
+			db = QSqlDatabase.addDatabase("QSQLITE", connection_name)
+		else:
+			db = QSqlDatabase.addDatabase("QPSQL", connection_name)
+			opts = dbname.split()
+			for opt in opts:
+				if "=" in opt:
+					opt = opt.split("=")
+					if opt[0] == "hostname":
+						db.setHostName(opt[1])
+					elif opt[0] == "port":
+						db.setPort(int(opt[1]))
+					elif opt[0] == "username":
+						db.setUserName(opt[1])
+					elif opt[0] == "password":
+						db.setPassword(opt[1])
+					elif opt[0] == "dbname":
+						dbname = opt[1]
+				else:
+					dbname = opt
+
+		db.setDatabaseName(dbname)
+		if not db.open():
+			raise Exception("Failed to open database " + dbname + " error: " + db.lastError().text())
+		return db, dbname
+
+# Main
+
+def Main():
+	if (len(sys.argv) < 2):
+		print >> sys.stderr, "Usage is: exported-sql-viewer.py {<database name> | --help-only}"
+		raise Exception("Too few arguments")
+
+	dbname = sys.argv[1]
+	if dbname == "--help-only":
+		app = QApplication(sys.argv)
+		mainwindow = HelpOnlyWindow()
+		mainwindow.show()
+		err = app.exec_()
+		sys.exit(err)
+
+	is_sqlite3 = False
+	try:
+		f = open(dbname)
+		if f.read(15) == "SQLite format 3":
+			is_sqlite3 = True
+		f.close()
+	except:
+		pass
+
+	dbref = DBRef(is_sqlite3, dbname)
+	db, dbname = dbref.Open("main")
+	glb = Glb(dbref, db, dbname)
+	app = QApplication(sys.argv)
+	glb.app = app
+	mainwindow = MainWindow(glb)
+	glb.mainwindow = mainwindow
+	mainwindow.show()
+	err = app.exec_()
+	glb.ShutdownInstances()
+	db.close()
+	sys.exit(err)
+
+if __name__ == "__main__":
+	Main()
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index 6c108fa79ae3..0b2b8305c965 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -21,6 +21,7 @@ perf-y += python-use.o
 perf-y += bp_signal.o
 perf-y += bp_signal_overflow.o
 perf-y += bp_account.o
+perf-y += wp.o
 perf-y += task-exit.o
 perf-y += sw-clock.o
 perf-y += mmap-thread-lookup.o
diff --git a/tools/perf/tests/attr/test-record-group-sampling b/tools/perf/tests/attr/test-record-group-sampling
index 8a33ca4f9e1f..f0729c454f16 100644
--- a/tools/perf/tests/attr/test-record-group-sampling
+++ b/tools/perf/tests/attr/test-record-group-sampling
@@ -37,4 +37,3 @@ sample_freq=0
 sample_period=0
 freq=0
 write_backward=0
-sample_id_all=0
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index d7a5e1b9aa6f..12c09e0ece71 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -121,6 +121,16 @@ static struct test generic_tests[] = {
 		.is_supported = test__bp_signal_is_supported,
 	},
 	{
+		.desc = "Watchpoint",
+		.func = test__wp,
+		.is_supported = test__wp_is_supported,
+		.subtest = {
+			.skip_if_fail	= false,
+			.get_nr		= test__wp_subtest_get_nr,
+			.get_desc	= test__wp_subtest_get_desc,
+		},
+	},
+	{
 		.desc = "Number of exit events of a simple workload",
 		.func = test__task_exit,
 	},
diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c
index 699561fa512c..5f8501c68da4 100644
--- a/tools/perf/tests/evsel-tp-sched.c
+++ b/tools/perf/tests/evsel-tp-sched.c
@@ -8,7 +8,7 @@
 static int perf_evsel__test_field(struct perf_evsel *evsel, const char *name,
 				  int size, bool should_be_signed)
 {
-	struct format_field *field = perf_evsel__field(evsel, name);
+	struct tep_format_field *field = perf_evsel__field(evsel, name);
 	int is_signed;
 	int ret = 0;
 
@@ -17,7 +17,7 @@ static int perf_evsel__test_field(struct perf_evsel *evsel, const char *name,
 		return -1;
 	}
 
-	is_signed = !!(field->flags | FIELD_IS_SIGNED);
+	is_signed = !!(field->flags | TEP_FIELD_IS_SIGNED);
 	if (should_be_signed && !is_signed) {
 		pr_debug("%s: \"%s\" signedness(%d) is wrong, should be %d\n",
 			 evsel->name, name, is_signed, should_be_signed);
diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
index 3013ac8f83d0..cab7b0aea6ea 100755
--- a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
+++ b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
@@ -48,7 +48,7 @@ trace_libc_inet_pton_backtrace() {
 	*)
 		eventattr='max-stack=3'
 		echo "getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc\)$" >> $expected
-		echo ".*\+0x[[:xdigit:]]+[[:space:]]\(.*/bin/ping.*\)$" >> $expected
+		echo ".*(\+0x[[:xdigit:]]+|\[unknown\])[[:space:]]\(.*/bin/ping.*\)$" >> $expected
 		;;
 	esac
 
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index a9760e790563..b82f55fcc294 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -59,6 +59,9 @@ int test__python_use(struct test *test, int subtest);
 int test__bp_signal(struct test *test, int subtest);
 int test__bp_signal_overflow(struct test *test, int subtest);
 int test__bp_accounting(struct test *test, int subtest);
+int test__wp(struct test *test, int subtest);
+const char *test__wp_subtest_get_desc(int subtest);
+int test__wp_subtest_get_nr(void);
 int test__task_exit(struct test *test, int subtest);
 int test__mem(struct test *test, int subtest);
 int test__sw_clock_freq(struct test *test, int subtest);
@@ -106,6 +109,7 @@ int test__unit_number__scnprint(struct test *test, int subtest);
 int test__mem2node(struct test *t, int subtest);
 
 bool test__bp_signal_is_supported(void);
+bool test__wp_is_supported(void);
 
 #if defined(__arm__) || defined(__aarch64__)
 #ifdef HAVE_DWARF_UNWIND_SUPPORT
diff --git a/tools/perf/tests/wp.c b/tools/perf/tests/wp.c
new file mode 100644
index 000000000000..f89e6806557b
--- /dev/null
+++ b/tools/perf/tests/wp.c
@@ -0,0 +1,241 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <linux/hw_breakpoint.h>
+#include "tests.h"
+#include "debug.h"
+#include "cloexec.h"
+
+#define WP_TEST_ASSERT_VAL(fd, text, val)       \
+do {                                            \
+	long long count;                        \
+	wp_read(fd, &count, sizeof(long long)); \
+	TEST_ASSERT_VAL(text, count == val);    \
+} while (0)
+
+volatile u64 data1;
+volatile u8 data2[3];
+
+static int wp_read(int fd, long long *count, int size)
+{
+	int ret = read(fd, count, size);
+
+	if (ret != size) {
+		pr_debug("failed to read: %d\n", ret);
+		return -1;
+	}
+	return 0;
+}
+
+static void get__perf_event_attr(struct perf_event_attr *attr, int wp_type,
+				 void *wp_addr, unsigned long wp_len)
+{
+	memset(attr, 0, sizeof(struct perf_event_attr));
+	attr->type           = PERF_TYPE_BREAKPOINT;
+	attr->size           = sizeof(struct perf_event_attr);
+	attr->config         = 0;
+	attr->bp_type        = wp_type;
+	attr->bp_addr        = (unsigned long)wp_addr;
+	attr->bp_len         = wp_len;
+	attr->sample_period  = 1;
+	attr->sample_type    = PERF_SAMPLE_IP;
+	attr->exclude_kernel = 1;
+	attr->exclude_hv     = 1;
+}
+
+static int __event(int wp_type, void *wp_addr, unsigned long wp_len)
+{
+	int fd;
+	struct perf_event_attr attr;
+
+	get__perf_event_attr(&attr, wp_type, wp_addr, wp_len);
+	fd = sys_perf_event_open(&attr, 0, -1, -1,
+				 perf_event_open_cloexec_flag());
+	if (fd < 0)
+		pr_debug("failed opening event %x\n", attr.bp_type);
+
+	return fd;
+}
+
+static int wp_ro_test(void)
+{
+	int fd;
+	unsigned long tmp, tmp1 = rand();
+
+	fd = __event(HW_BREAKPOINT_R, (void *)&data1, sizeof(data1));
+	if (fd < 0)
+		return -1;
+
+	tmp = data1;
+	WP_TEST_ASSERT_VAL(fd, "RO watchpoint", 1);
+
+	data1 = tmp1 + tmp;
+	WP_TEST_ASSERT_VAL(fd, "RO watchpoint", 1);
+
+	close(fd);
+	return 0;
+}
+
+static int wp_wo_test(void)
+{
+	int fd;
+	unsigned long tmp, tmp1 = rand();
+
+	fd = __event(HW_BREAKPOINT_W, (void *)&data1, sizeof(data1));
+	if (fd < 0)
+		return -1;
+
+	tmp = data1;
+	WP_TEST_ASSERT_VAL(fd, "WO watchpoint", 0);
+
+	data1 = tmp1 + tmp;
+	WP_TEST_ASSERT_VAL(fd, "WO watchpoint", 1);
+
+	close(fd);
+	return 0;
+}
+
+static int wp_rw_test(void)
+{
+	int fd;
+	unsigned long tmp, tmp1 = rand();
+
+	fd = __event(HW_BREAKPOINT_R | HW_BREAKPOINT_W, (void *)&data1,
+		     sizeof(data1));
+	if (fd < 0)
+		return -1;
+
+	tmp = data1;
+	WP_TEST_ASSERT_VAL(fd, "RW watchpoint", 1);
+
+	data1 = tmp1 + tmp;
+	WP_TEST_ASSERT_VAL(fd, "RW watchpoint", 2);
+
+	close(fd);
+	return 0;
+}
+
+static int wp_modify_test(void)
+{
+	int fd, ret;
+	unsigned long tmp = rand();
+	struct perf_event_attr new_attr;
+
+	fd = __event(HW_BREAKPOINT_W, (void *)&data1, sizeof(data1));
+	if (fd < 0)
+		return -1;
+
+	data1 = tmp;
+	WP_TEST_ASSERT_VAL(fd, "Modify watchpoint", 1);
+
+	/* Modify watchpoint with disabled = 1 */
+	get__perf_event_attr(&new_attr, HW_BREAKPOINT_W, (void *)&data2[0],
+			     sizeof(u8) * 2);
+	new_attr.disabled = 1;
+	ret = ioctl(fd, PERF_EVENT_IOC_MODIFY_ATTRIBUTES, &new_attr);
+	if (ret < 0) {
+		pr_debug("ioctl(PERF_EVENT_IOC_MODIFY_ATTRIBUTES) failed\n");
+		close(fd);
+		return ret;
+	}
+
+	data2[1] = tmp; /* Not Counted */
+	WP_TEST_ASSERT_VAL(fd, "Modify watchpoint", 1);
+
+	/* Enable the event */
+	ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
+	if (ret < 0) {
+		pr_debug("Failed to enable event\n");
+		close(fd);
+		return ret;
+	}
+
+	data2[1] = tmp; /* Counted */
+	WP_TEST_ASSERT_VAL(fd, "Modify watchpoint", 2);
+
+	data2[2] = tmp; /* Not Counted */
+	WP_TEST_ASSERT_VAL(fd, "Modify watchpoint", 2);
+
+	close(fd);
+	return 0;
+}
+
+static bool wp_ro_supported(void)
+{
+#if defined (__x86_64__) || defined (__i386__)
+	return false;
+#else
+	return true;
+#endif
+}
+
+static void wp_ro_skip_msg(void)
+{
+#if defined (__x86_64__) || defined (__i386__)
+	pr_debug("Hardware does not support read only watchpoints.\n");
+#endif
+}
+
+static struct {
+	const char *desc;
+	int (*target_func)(void);
+	bool (*is_supported)(void);
+	void (*skip_msg)(void);
+} wp_testcase_table[] = {
+	{
+		.desc = "Read Only Watchpoint",
+		.target_func = &wp_ro_test,
+		.is_supported = &wp_ro_supported,
+		.skip_msg = &wp_ro_skip_msg,
+	},
+	{
+		.desc = "Write Only Watchpoint",
+		.target_func = &wp_wo_test,
+	},
+	{
+		.desc = "Read / Write Watchpoint",
+		.target_func = &wp_rw_test,
+	},
+	{
+		.desc = "Modify Watchpoint",
+		.target_func = &wp_modify_test,
+	},
+};
+
+int test__wp_subtest_get_nr(void)
+{
+	return (int)ARRAY_SIZE(wp_testcase_table);
+}
+
+const char *test__wp_subtest_get_desc(int i)
+{
+	if (i < 0 || i >= (int)ARRAY_SIZE(wp_testcase_table))
+		return NULL;
+	return wp_testcase_table[i].desc;
+}
+
+int test__wp(struct test *test __maybe_unused, int i)
+{
+	if (i < 0 || i >= (int)ARRAY_SIZE(wp_testcase_table))
+		return TEST_FAIL;
+
+	if (wp_testcase_table[i].is_supported &&
+	    !wp_testcase_table[i].is_supported()) {
+		wp_testcase_table[i].skip_msg();
+		return TEST_SKIP;
+	}
+
+	return !wp_testcase_table[i].target_func() ? TEST_OK : TEST_FAIL;
+}
+
+/* The s390 so far does not have support for
+ * instruction breakpoint using the perf_event_open() system call.
+ */
+bool test__wp_is_supported(void)
+{
+#if defined(__s390x__)
+	return false;
+#else
+	return true;
+#endif
+}
diff --git a/tools/perf/trace/beauty/Build b/tools/perf/trace/beauty/Build
index f528ba35e140..304313073242 100644
--- a/tools/perf/trace/beauty/Build
+++ b/tools/perf/trace/beauty/Build
@@ -5,7 +5,9 @@ ifeq ($(SRCARCH),$(filter $(SRCARCH),x86))
 libperf-y += ioctl.o
 endif
 libperf-y += kcmp.o
+libperf-y += mount_flags.o
 libperf-y += pkey_alloc.o
 libperf-y += prctl.o
+libperf-y += sockaddr.o
 libperf-y += socket.o
 libperf-y += statx.o
diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h
index 9615af5d412b..039c29039b2c 100644
--- a/tools/perf/trace/beauty/beauty.h
+++ b/tools/perf/trace/beauty/beauty.h
@@ -24,15 +24,43 @@ struct strarray {
 }
 
 size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, int val);
+size_t strarray__scnprintf_flags(struct strarray *sa, char *bf, size_t size, unsigned long flags);
 
 struct trace;
 struct thread;
 
 size_t pid__scnprintf_fd(struct trace *trace, pid_t pid, int fd, char *bf, size_t size);
 
+extern struct strarray strarray__socket_families;
+
+/**
+ * augmented_arg: extra payload for syscall pointer arguments
+ 
+ * If perf_sample->raw_size is more than what a syscall sys_enter_FOO puts,
+ * then its the arguments contents, so that we can show more than just a
+ * pointer. This will be done initially with eBPF, the start of that is at the
+ * tools/perf/examples/bpf/augmented_syscalls.c example for the openat, but
+ * will eventually be done automagically caching the running kernel tracefs
+ * events data into an eBPF C script, that then gets compiled and its .o file
+ * cached for subsequent use. For char pointers like the ones for 'open' like
+ * syscalls its easy, for the rest we should use DWARF or better, BTF, much
+ * more compact.
+ *
+ * @size: 8 if all we need is an integer, otherwise all of the augmented arg.
+ * @int_arg: will be used for integer like pointer contents, like 'accept's 'upeer_addrlen'
+ * @value: u64 aligned, for structs, pathnames
+ */
+struct augmented_arg {
+	int  size;
+	int  int_arg;
+	u64  value[];
+};
+
 /**
  * @val: value of syscall argument being formatted
  * @args: All the args, use syscall_args__val(arg, nth) to access one
+ * @augmented_args: Extra data that can be collected, for instance, with eBPF for expanding the pathname for open, etc
+ * @augmented_args_size: augmented_args total payload size
  * @thread: tid state (maps, pid, tid, etc)
  * @trace: 'perf trace' internals: all threads, etc
  * @parm: private area, may be an strarray, for instance
@@ -43,6 +71,10 @@ size_t pid__scnprintf_fd(struct trace *trace, pid_t pid, int fd, char *bf, size_
 struct syscall_arg {
 	unsigned long val;
 	unsigned char *args;
+	struct {
+		struct augmented_arg *args;
+		int		     size;
+	} augmented;
 	struct thread *thread;
 	struct trace  *trace;
 	void	      *parm;
@@ -91,6 +123,12 @@ size_t syscall_arg__scnprintf_kcmp_type(char *bf, size_t size, struct syscall_ar
 size_t syscall_arg__scnprintf_kcmp_idx(char *bf, size_t size, struct syscall_arg *arg);
 #define SCA_KCMP_IDX syscall_arg__scnprintf_kcmp_idx
 
+unsigned long syscall_arg__mask_val_mount_flags(struct syscall_arg *arg, unsigned long flags);
+#define SCAMV_MOUNT_FLAGS syscall_arg__mask_val_mount_flags
+
+size_t syscall_arg__scnprintf_mount_flags(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_MOUNT_FLAGS syscall_arg__scnprintf_mount_flags
+
 size_t syscall_arg__scnprintf_pkey_alloc_access_rights(char *bf, size_t size, struct syscall_arg *arg);
 #define SCA_PKEY_ALLOC_ACCESS_RIGHTS syscall_arg__scnprintf_pkey_alloc_access_rights
 
@@ -106,6 +144,9 @@ size_t syscall_arg__scnprintf_prctl_arg2(char *bf, size_t size, struct syscall_a
 size_t syscall_arg__scnprintf_prctl_arg3(char *bf, size_t size, struct syscall_arg *arg);
 #define SCA_PRCTL_ARG3 syscall_arg__scnprintf_prctl_arg3
 
+size_t syscall_arg__scnprintf_sockaddr(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_SOCKADDR syscall_arg__scnprintf_sockaddr
+
 size_t syscall_arg__scnprintf_socket_protocol(char *bf, size_t size, struct syscall_arg *arg);
 #define SCA_SK_PROTO syscall_arg__scnprintf_socket_protocol
 
diff --git a/tools/perf/trace/beauty/clone.c b/tools/perf/trace/beauty/clone.c
index d64d049ab991..010406500c30 100644
--- a/tools/perf/trace/beauty/clone.c
+++ b/tools/perf/trace/beauty/clone.c
@@ -1,9 +1,8 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  * trace/beauty/cone.c
  *
  *  Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
- *
- * Released under the GPL v2. (and only v2, not any later version)
  */
 
 #include "trace/beauty/beauty.h"
diff --git a/tools/perf/trace/beauty/drm_ioctl.sh b/tools/perf/trace/beauty/drm_ioctl.sh
index 9d3816815e60..9aa94fd523a9 100755
--- a/tools/perf/trace/beauty/drm_ioctl.sh
+++ b/tools/perf/trace/beauty/drm_ioctl.sh
@@ -1,4 +1,5 @@
 #!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
 
 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/drm/
 
diff --git a/tools/perf/trace/beauty/eventfd.c b/tools/perf/trace/beauty/eventfd.c
index 5d6a477a6400..db5b9b492113 100644
--- a/tools/perf/trace/beauty/eventfd.c
+++ b/tools/perf/trace/beauty/eventfd.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #ifndef EFD_SEMAPHORE
 #define EFD_SEMAPHORE		1
 #endif
diff --git a/tools/perf/trace/beauty/fcntl.c b/tools/perf/trace/beauty/fcntl.c
index 9e8900c13cb1..e6de31674e24 100644
--- a/tools/perf/trace/beauty/fcntl.c
+++ b/tools/perf/trace/beauty/fcntl.c
@@ -1,9 +1,8 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  * trace/beauty/fcntl.c
  *
  *  Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
- *
- * Released under the GPL v2. (and only v2, not any later version)
  */
 
 #include "trace/beauty/beauty.h"
diff --git a/tools/perf/trace/beauty/flock.c b/tools/perf/trace/beauty/flock.c
index c4ff6ad30b06..cf02ae5f0ba6 100644
--- a/tools/perf/trace/beauty/flock.c
+++ b/tools/perf/trace/beauty/flock.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 
 #include "trace/beauty/beauty.h"
 #include <linux/kernel.h>
diff --git a/tools/perf/trace/beauty/futex_op.c b/tools/perf/trace/beauty/futex_op.c
index 61850fbc85ff..1136bde56406 100644
--- a/tools/perf/trace/beauty/futex_op.c
+++ b/tools/perf/trace/beauty/futex_op.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #include <linux/futex.h>
 
 #ifndef FUTEX_WAIT_BITSET
diff --git a/tools/perf/trace/beauty/futex_val3.c b/tools/perf/trace/beauty/futex_val3.c
index 26f6b3253511..138b7d588a70 100644
--- a/tools/perf/trace/beauty/futex_val3.c
+++ b/tools/perf/trace/beauty/futex_val3.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #include <linux/futex.h>
 
 #ifndef FUTEX_BITSET_MATCH_ANY
diff --git a/tools/perf/trace/beauty/ioctl.c b/tools/perf/trace/beauty/ioctl.c
index 1be3b4cf0827..5d2a7fd8d407 100644
--- a/tools/perf/trace/beauty/ioctl.c
+++ b/tools/perf/trace/beauty/ioctl.c
@@ -1,9 +1,8 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  * trace/beauty/ioctl.c
  *
  *  Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
- *
- * Released under the GPL v2. (and only v2, not any later version)
  */
 
 #include "trace/beauty/beauty.h"
diff --git a/tools/perf/trace/beauty/kcmp.c b/tools/perf/trace/beauty/kcmp.c
index f62040eb9d5c..b276a274f203 100644
--- a/tools/perf/trace/beauty/kcmp.c
+++ b/tools/perf/trace/beauty/kcmp.c
@@ -1,9 +1,8 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  * trace/beauty/kcmp.c
  *
  *  Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
- *
- * Released under the GPL v2. (and only v2, not any later version)
  */
 
 #include "trace/beauty/beauty.h"
diff --git a/tools/perf/trace/beauty/kcmp_type.sh b/tools/perf/trace/beauty/kcmp_type.sh
index a3c304caa336..df8b17486d57 100755
--- a/tools/perf/trace/beauty/kcmp_type.sh
+++ b/tools/perf/trace/beauty/kcmp_type.sh
@@ -1,4 +1,5 @@
 #!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
 
 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
 
diff --git a/tools/perf/trace/beauty/kvm_ioctl.sh b/tools/perf/trace/beauty/kvm_ioctl.sh
index c4699fd46bb6..4ce54f5bf756 100755
--- a/tools/perf/trace/beauty/kvm_ioctl.sh
+++ b/tools/perf/trace/beauty/kvm_ioctl.sh
@@ -1,4 +1,5 @@
 #!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
 
 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
 
diff --git a/tools/perf/trace/beauty/madvise_behavior.sh b/tools/perf/trace/beauty/madvise_behavior.sh
index 431639eb4d29..4527d290cdfc 100755
--- a/tools/perf/trace/beauty/madvise_behavior.sh
+++ b/tools/perf/trace/beauty/madvise_behavior.sh
@@ -1,4 +1,5 @@
 #!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
 
 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/asm-generic/
 
diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c
index 9f68077b241b..c534bd96ef5c 100644
--- a/tools/perf/trace/beauty/mmap.c
+++ b/tools/perf/trace/beauty/mmap.c
@@ -1,5 +1,6 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #include <uapi/linux/mman.h>
+#include <linux/log2.h>
 
 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
 					       struct syscall_arg *arg)
@@ -30,50 +31,23 @@ static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
 
 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
 
+static size_t mmap__scnprintf_flags(unsigned long flags, char *bf, size_t size)
+{
+#include "trace/beauty/generated/mmap_flags_array.c"
+       static DEFINE_STRARRAY(mmap_flags);
+
+       return strarray__scnprintf_flags(&strarray__mmap_flags, bf, size, flags);
+}
+
 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
 						struct syscall_arg *arg)
 {
-	int printed = 0, flags = arg->val;
+	unsigned long flags = arg->val;
 
 	if (flags & MAP_ANONYMOUS)
 		arg->mask |= (1 << 4) | (1 << 5); /* Mask 4th ('fd') and 5th ('offset') args, ignored */
 
-#define	P_MMAP_FLAG(n) \
-	if (flags & MAP_##n) { \
-		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
-		flags &= ~MAP_##n; \
-	}
-
-	P_MMAP_FLAG(SHARED);
-	P_MMAP_FLAG(PRIVATE);
-#ifdef MAP_32BIT
-	P_MMAP_FLAG(32BIT);
-#endif
-	P_MMAP_FLAG(ANONYMOUS);
-	P_MMAP_FLAG(DENYWRITE);
-	P_MMAP_FLAG(EXECUTABLE);
-	P_MMAP_FLAG(FILE);
-	P_MMAP_FLAG(FIXED);
-#ifdef MAP_FIXED_NOREPLACE
-	P_MMAP_FLAG(FIXED_NOREPLACE);
-#endif
-	P_MMAP_FLAG(GROWSDOWN);
-	P_MMAP_FLAG(HUGETLB);
-	P_MMAP_FLAG(LOCKED);
-	P_MMAP_FLAG(NONBLOCK);
-	P_MMAP_FLAG(NORESERVE);
-	P_MMAP_FLAG(POPULATE);
-	P_MMAP_FLAG(STACK);
-	P_MMAP_FLAG(UNINITIALIZED);
-#ifdef MAP_SYNC
-	P_MMAP_FLAG(SYNC);
-#endif
-#undef P_MMAP_FLAG
-
-	if (flags)
-		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
-
-	return printed;
+	return mmap__scnprintf_flags(flags, bf, size);
 }
 
 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
diff --git a/tools/perf/trace/beauty/mmap_flags.sh b/tools/perf/trace/beauty/mmap_flags.sh
new file mode 100755
index 000000000000..22c3fdca8975
--- /dev/null
+++ b/tools/perf/trace/beauty/mmap_flags.sh
@@ -0,0 +1,32 @@
+#!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
+
+if [ $# -ne 2 ] ; then
+	[ $# -eq 1 ] && hostarch=$1 || hostarch=`uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/`
+	header_dir=tools/include/uapi/asm-generic
+	arch_header_dir=tools/arch/${hostarch}/include/uapi/asm
+else
+	header_dir=$1
+	arch_header_dir=$2
+fi
+
+arch_mman=${arch_header_dir}/mman.h
+
+# those in egrep -vw are flags, we want just the bits
+
+printf "static const char *mmap_flags[] = {\n"
+regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MAP_([[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*'
+egrep -q $regex ${arch_mman} && \
+(egrep $regex ${arch_mman} | \
+	sed -r "s/$regex/\2 \1/g"	| \
+	xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n")
+egrep -q '#[[:space:]]*include[[:space:]]+<uapi/asm-generic/mman.*' ${arch_mman} &&
+(egrep $regex ${header_dir}/mman-common.h | \
+	egrep -vw 'MAP_(UNINITIALIZED|TYPE|SHARED_VALIDATE)' | \
+	sed -r "s/$regex/\2 \1/g"	| \
+	xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n")
+egrep -q '#[[:space:]]*include[[:space:]]+<uapi/asm-generic/mman.h>.*' ${arch_mman} &&
+(egrep $regex ${header_dir}/mman.h | \
+	sed -r "s/$regex/\2 \1/g"	| \
+	xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n")
+printf "};\n"
diff --git a/tools/perf/trace/beauty/mode_t.c b/tools/perf/trace/beauty/mode_t.c
index d929ad7dd97b..6879d36d3004 100644
--- a/tools/perf/trace/beauty/mode_t.c
+++ b/tools/perf/trace/beauty/mode_t.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <unistd.h>
diff --git a/tools/perf/trace/beauty/mount_flags.c b/tools/perf/trace/beauty/mount_flags.c
new file mode 100644
index 000000000000..712935c6620a
--- /dev/null
+++ b/tools/perf/trace/beauty/mount_flags.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * trace/beauty/mount_flags.c
+ *
+ *  Copyright (C) 2018, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ */
+
+#include "trace/beauty/beauty.h"
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <sys/mount.h>
+
+static size_t mount__scnprintf_flags(unsigned long flags, char *bf, size_t size)
+{
+#include "trace/beauty/generated/mount_flags_array.c"
+	static DEFINE_STRARRAY(mount_flags);
+
+	return strarray__scnprintf_flags(&strarray__mount_flags, bf, size, flags);
+}
+
+unsigned long syscall_arg__mask_val_mount_flags(struct syscall_arg *arg __maybe_unused, unsigned long flags)
+{
+	// do_mount in fs/namespace.c:
+	/*
+	 * Pre-0.97 versions of mount() didn't have a flags word.  When the
+	 * flags word was introduced its top half was required to have the
+	 * magic value 0xC0ED, and this remained so until 2.4.0-test9.
+	 * Therefore, if this magic number is present, it carries no
+	 * information and must be discarded.
+	 */
+	if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
+		flags &= ~MS_MGC_MSK;
+
+	return flags;
+}
+
+size_t syscall_arg__scnprintf_mount_flags(char *bf, size_t size, struct syscall_arg *arg)
+{
+	unsigned long flags = arg->val;
+
+	return mount__scnprintf_flags(flags, bf, size);
+}
diff --git a/tools/perf/trace/beauty/mount_flags.sh b/tools/perf/trace/beauty/mount_flags.sh
new file mode 100755
index 000000000000..45547573a1db
--- /dev/null
+++ b/tools/perf/trace/beauty/mount_flags.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
+
+[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
+
+printf "static const char *mount_flags[] = {\n"
+regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MS_([[:alnum:]_]+)[[:space:]]+([[:digit:]]+)[[:space:]]*.*'
+egrep $regex ${header_dir}/fs.h | egrep -v '(MSK|VERBOSE|MGC_VAL)\>' | \
+	sed -r "s/$regex/\2 \2 \1/g" | sort -n | \
+	xargs printf "\t[%s ? (ilog2(%s) + 1) : 0] = \"%s\",\n"
+regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MS_([[:alnum:]_]+)[[:space:]]+\(1<<([[:digit:]]+)\)[[:space:]]*.*'
+egrep $regex ${header_dir}/fs.h | \
+	sed -r "s/$regex/\2 \1/g" | \
+	xargs printf "\t[%s + 1] = \"%s\",\n"
+printf "};\n"
diff --git a/tools/perf/trace/beauty/msg_flags.c b/tools/perf/trace/beauty/msg_flags.c
index c064d6aae659..1b9d6306d274 100644
--- a/tools/perf/trace/beauty/msg_flags.c
+++ b/tools/perf/trace/beauty/msg_flags.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #include <sys/types.h>
 #include <sys/socket.h>
 
diff --git a/tools/perf/trace/beauty/open_flags.c b/tools/perf/trace/beauty/open_flags.c
index 6aec6178a99d..cc673fec9184 100644
--- a/tools/perf/trace/beauty/open_flags.c
+++ b/tools/perf/trace/beauty/open_flags.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
diff --git a/tools/perf/trace/beauty/perf_event_open.c b/tools/perf/trace/beauty/perf_event_open.c
index 2bafd7c995ff..981185c1974b 100644
--- a/tools/perf/trace/beauty/perf_event_open.c
+++ b/tools/perf/trace/beauty/perf_event_open.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #ifndef PERF_FLAG_FD_NO_GROUP
 # define PERF_FLAG_FD_NO_GROUP		(1UL << 0)
 #endif
diff --git a/tools/perf/trace/beauty/perf_ioctl.sh b/tools/perf/trace/beauty/perf_ioctl.sh
index 6492c74df928..9aabd9743ef6 100755
--- a/tools/perf/trace/beauty/perf_ioctl.sh
+++ b/tools/perf/trace/beauty/perf_ioctl.sh
@@ -1,4 +1,5 @@
 #!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
 
 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
 
diff --git a/tools/perf/trace/beauty/pid.c b/tools/perf/trace/beauty/pid.c
index 0313df342830..1a6acc46807b 100644
--- a/tools/perf/trace/beauty/pid.c
+++ b/tools/perf/trace/beauty/pid.c
@@ -1,4 +1,5 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
+
 size_t syscall_arg__scnprintf_pid(char *bf, size_t size, struct syscall_arg *arg)
 {
 	int pid = arg->val;
diff --git a/tools/perf/trace/beauty/pkey_alloc.c b/tools/perf/trace/beauty/pkey_alloc.c
index 2ba784a3734a..1b8ed4cac815 100644
--- a/tools/perf/trace/beauty/pkey_alloc.c
+++ b/tools/perf/trace/beauty/pkey_alloc.c
@@ -1,40 +1,36 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  * trace/beauty/pkey_alloc.c
  *
  *  Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
- *
- * Released under the GPL v2. (and only v2, not any later version)
  */
 
 #include "trace/beauty/beauty.h"
 #include <linux/kernel.h>
 #include <linux/log2.h>
 
-static size_t pkey_alloc__scnprintf_access_rights(int access_rights, char *bf, size_t size)
+size_t strarray__scnprintf_flags(struct strarray *sa, char *bf, size_t size, unsigned long flags)
 {
 	int i, printed = 0;
 
-#include "trace/beauty/generated/pkey_alloc_access_rights_array.c"
-	static DEFINE_STRARRAY(pkey_alloc_access_rights);
-
-	if (access_rights == 0) {
-		const char *s = strarray__pkey_alloc_access_rights.entries[0];
+	if (flags == 0) {
+		const char *s = sa->entries[0];
 		if (s)
 			return scnprintf(bf, size, "%s", s);
 		return scnprintf(bf, size, "%d", 0);
 	}
 
-	for (i = 1; i < strarray__pkey_alloc_access_rights.nr_entries; ++i) {
-		int bit = 1 << (i - 1);
+	for (i = 1; i < sa->nr_entries; ++i) {
+		unsigned long bit = 1UL << (i - 1);
 
-		if (!(access_rights & bit))
+		if (!(flags & bit))
 			continue;
 
 		if (printed != 0)
 			printed += scnprintf(bf + printed, size - printed, "|");
 
-		if (strarray__pkey_alloc_access_rights.entries[i] != NULL)
-			printed += scnprintf(bf + printed, size - printed, "%s", strarray__pkey_alloc_access_rights.entries[i]);
+		if (sa->entries[i] != NULL)
+			printed += scnprintf(bf + printed, size - printed, "%s", sa->entries[i]);
 		else
 			printed += scnprintf(bf + printed, size - printed, "0x%#", bit);
 	}
@@ -42,6 +38,14 @@ static size_t pkey_alloc__scnprintf_access_rights(int access_rights, char *bf, s
 	return printed;
 }
 
+static size_t pkey_alloc__scnprintf_access_rights(int access_rights, char *bf, size_t size)
+{
+#include "trace/beauty/generated/pkey_alloc_access_rights_array.c"
+	static DEFINE_STRARRAY(pkey_alloc_access_rights);
+
+	return strarray__scnprintf_flags(&strarray__pkey_alloc_access_rights, bf, size, access_rights);
+}
+
 size_t syscall_arg__scnprintf_pkey_alloc_access_rights(char *bf, size_t size, struct syscall_arg *arg)
 {
 	unsigned long cmd = arg->val;
diff --git a/tools/perf/trace/beauty/pkey_alloc_access_rights.sh b/tools/perf/trace/beauty/pkey_alloc_access_rights.sh
index e0a51aeb20b2..f8f1b560cf8a 100755
--- a/tools/perf/trace/beauty/pkey_alloc_access_rights.sh
+++ b/tools/perf/trace/beauty/pkey_alloc_access_rights.sh
@@ -1,4 +1,5 @@
 #!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
 
 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/asm-generic/
 
diff --git a/tools/perf/trace/beauty/prctl.c b/tools/perf/trace/beauty/prctl.c
index 246130dad6c4..be7a5d395975 100644
--- a/tools/perf/trace/beauty/prctl.c
+++ b/tools/perf/trace/beauty/prctl.c
@@ -1,9 +1,8 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  * trace/beauty/prctl.c
  *
  *  Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
- *
- * Released under the GPL v2. (and only v2, not any later version)
  */
 
 #include "trace/beauty/beauty.h"
diff --git a/tools/perf/trace/beauty/prctl_option.sh b/tools/perf/trace/beauty/prctl_option.sh
index f24722146ebe..d32f8f1124af 100755
--- a/tools/perf/trace/beauty/prctl_option.sh
+++ b/tools/perf/trace/beauty/prctl_option.sh
@@ -1,4 +1,5 @@
 #!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
 
 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
 
diff --git a/tools/perf/trace/beauty/sched_policy.c b/tools/perf/trace/beauty/sched_policy.c
index ba5096ae76b6..48f2b5c9aa3e 100644
--- a/tools/perf/trace/beauty/sched_policy.c
+++ b/tools/perf/trace/beauty/sched_policy.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #include <sched.h>
 
 /*
diff --git a/tools/perf/trace/beauty/seccomp.c b/tools/perf/trace/beauty/seccomp.c
index b7097fd5fed9..e36156b19c70 100644
--- a/tools/perf/trace/beauty/seccomp.c
+++ b/tools/perf/trace/beauty/seccomp.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #ifndef SECCOMP_SET_MODE_STRICT
 #define SECCOMP_SET_MODE_STRICT 0
 #endif
diff --git a/tools/perf/trace/beauty/signum.c b/tools/perf/trace/beauty/signum.c
index bde18a53f090..587fec545b8a 100644
--- a/tools/perf/trace/beauty/signum.c
+++ b/tools/perf/trace/beauty/signum.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #include <signal.h>
 
 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
diff --git a/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh b/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh
index eb511bb5fbd3..e0803b957593 100755
--- a/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh
+++ b/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh
@@ -1,4 +1,5 @@
 #!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
 
 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/sound/
 
diff --git a/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh b/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh
index 6818392968b2..7a464a7bf913 100755
--- a/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh
+++ b/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh
@@ -1,4 +1,5 @@
 #!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
 
 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/sound/
 
diff --git a/tools/perf/trace/beauty/sockaddr.c b/tools/perf/trace/beauty/sockaddr.c
new file mode 100644
index 000000000000..9410ad230f10
--- /dev/null
+++ b/tools/perf/trace/beauty/sockaddr.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: LGPL-2.1
+// Copyright (C) 2018, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+
+#include "trace/beauty/beauty.h"
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <arpa/inet.h>
+
+static const char *socket_families[] = {
+	"UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
+	"BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
+	"SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
+	"RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
+	"BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
+	"ALG", "NFC", "VSOCK",
+};
+DEFINE_STRARRAY(socket_families);
+
+static size_t af_inet__scnprintf(struct sockaddr *sa, char *bf, size_t size)
+{
+	struct sockaddr_in *sin = (struct sockaddr_in *)sa;
+	char tmp[16];
+	return scnprintf(bf, size, ", port: %d, addr: %s", ntohs(sin->sin_port),
+			 inet_ntop(sin->sin_family, &sin->sin_addr, tmp, sizeof(tmp)));
+}
+
+static size_t af_inet6__scnprintf(struct sockaddr *sa, char *bf, size_t size)
+{
+	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa;
+	u32 flowinfo = ntohl(sin6->sin6_flowinfo);
+	char tmp[512];
+	size_t printed = scnprintf(bf, size, ", port: %d, addr: %s", ntohs(sin6->sin6_port),
+				   inet_ntop(sin6->sin6_family, &sin6->sin6_addr, tmp, sizeof(tmp)));
+	if (flowinfo != 0)
+		printed += scnprintf(bf + printed, size - printed, ", flowinfo: %lu", flowinfo);
+	if (sin6->sin6_scope_id != 0)
+		printed += scnprintf(bf + printed, size - printed, ", scope_id: %lu", sin6->sin6_scope_id);
+
+	return printed;
+}
+
+static size_t af_local__scnprintf(struct sockaddr *sa, char *bf, size_t size)
+{
+	struct sockaddr_un *sun = (struct sockaddr_un *)sa;
+	return scnprintf(bf, size, ", path: %s", sun->sun_path);
+}
+
+static size_t (*af_scnprintfs[])(struct sockaddr *sa, char *bf, size_t size) = {
+	[AF_LOCAL] = af_local__scnprintf,
+	[AF_INET]  = af_inet__scnprintf,
+	[AF_INET6] = af_inet6__scnprintf,
+};
+
+static size_t syscall_arg__scnprintf_augmented_sockaddr(struct syscall_arg *arg, char *bf, size_t size)
+{
+	struct sockaddr *sa = (struct sockaddr *)arg->augmented.args;
+	char family[32];
+	size_t printed;
+
+	strarray__scnprintf(&strarray__socket_families, family, sizeof(family), "%d", sa->sa_family);
+	printed = scnprintf(bf, size, "{ .family: %s", family);
+
+	if (sa->sa_family < ARRAY_SIZE(af_scnprintfs) && af_scnprintfs[sa->sa_family])
+		printed += af_scnprintfs[sa->sa_family](sa, bf + printed, size - printed);
+
+	return printed + scnprintf(bf + printed, size - printed, " }");
+}
+
+size_t syscall_arg__scnprintf_sockaddr(char *bf, size_t size, struct syscall_arg *arg)
+{
+	if (arg->augmented.args)
+		return syscall_arg__scnprintf_augmented_sockaddr(arg, bf, size);
+
+	return scnprintf(bf, size, "%#x", arg->val);
+}
diff --git a/tools/perf/trace/beauty/socket.c b/tools/perf/trace/beauty/socket.c
index 65227269384b..d971a2596417 100644
--- a/tools/perf/trace/beauty/socket.c
+++ b/tools/perf/trace/beauty/socket.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 /*
  * trace/beauty/socket.c
  *
diff --git a/tools/perf/trace/beauty/socket_ipproto.sh b/tools/perf/trace/beauty/socket_ipproto.sh
index a3cc24633bec..de0f2f29017f 100755
--- a/tools/perf/trace/beauty/socket_ipproto.sh
+++ b/tools/perf/trace/beauty/socket_ipproto.sh
@@ -1,4 +1,5 @@
 #!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
 
 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
 
diff --git a/tools/perf/trace/beauty/socket_type.c b/tools/perf/trace/beauty/socket_type.c
index bca26aef4a77..a63a9a332aa0 100644
--- a/tools/perf/trace/beauty/socket_type.c
+++ b/tools/perf/trace/beauty/socket_type.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #include <sys/types.h>
 #include <sys/socket.h>
 
diff --git a/tools/perf/trace/beauty/statx.c b/tools/perf/trace/beauty/statx.c
index 5643b692af4c..630f2760dd66 100644
--- a/tools/perf/trace/beauty/statx.c
+++ b/tools/perf/trace/beauty/statx.c
@@ -1,9 +1,8 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  * trace/beauty/statx.c
  *
  *  Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
- *
- * Released under the GPL v2. (and only v2, not any later version)
  */
 
 #include "trace/beauty/beauty.h"
diff --git a/tools/perf/trace/beauty/vhost_virtio_ioctl.sh b/tools/perf/trace/beauty/vhost_virtio_ioctl.sh
index 0f6a5197d0be..439773daaf77 100755
--- a/tools/perf/trace/beauty/vhost_virtio_ioctl.sh
+++ b/tools/perf/trace/beauty/vhost_virtio_ioctl.sh
@@ -1,4 +1,5 @@
 #!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
 
 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
 
diff --git a/tools/perf/trace/beauty/waitid_options.c b/tools/perf/trace/beauty/waitid_options.c
index 8465281a093d..42ff58ad613b 100644
--- a/tools/perf/trace/beauty/waitid_options.c
+++ b/tools/perf/trace/beauty/waitid_options.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #include <sys/types.h>
 #include <sys/wait.h>
 
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 7efe15b9618d..ecd9f9ceda77 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -73,6 +73,7 @@ libperf-y += vdso.o
 libperf-y += counts.o
 libperf-y += stat.o
 libperf-y += stat-shadow.o
+libperf-y += stat-display.o
 libperf-y += record.o
 libperf-y += srcline.o
 libperf-y += data.o
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 28cd6a17491b..6936daf89ddd 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -139,6 +139,7 @@ static int arch__associate_ins_ops(struct arch* arch, const char *name, struct i
 #include "arch/x86/annotate/instructions.c"
 #include "arch/powerpc/annotate/instructions.c"
 #include "arch/s390/annotate/instructions.c"
+#include "arch/sparc/annotate/instructions.c"
 
 static struct arch architectures[] = {
 	{
@@ -170,6 +171,13 @@ static struct arch architectures[] = {
 			.comment_char = '#',
 		},
 	},
+	{
+		.name = "sparc",
+		.init = sparc__annotate_init,
+		.objdump = {
+			.comment_char = '#',
+		},
+	},
 };
 
 static void ins__delete(struct ins_operands *ops)
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index db1511359c5e..72d5ba2479bf 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -906,9 +906,8 @@ out_free:
 	return err;
 }
 
-int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused,
-				      union perf_event *event,
-				      struct perf_session *session)
+int perf_event__process_auxtrace_info(struct perf_session *session,
+				      union perf_event *event)
 {
 	enum auxtrace_type type = event->auxtrace_info.type;
 
@@ -932,9 +931,8 @@ int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused,
 	}
 }
 
-s64 perf_event__process_auxtrace(struct perf_tool *tool,
-				 union perf_event *event,
-				 struct perf_session *session)
+s64 perf_event__process_auxtrace(struct perf_session *session,
+				 union perf_event *event)
 {
 	s64 err;
 
@@ -950,7 +948,7 @@ s64 perf_event__process_auxtrace(struct perf_tool *tool,
 	if (!session->auxtrace || event->header.type != PERF_RECORD_AUXTRACE)
 		return -EINVAL;
 
-	err = session->auxtrace->process_auxtrace_event(session, event, tool);
+	err = session->auxtrace->process_auxtrace_event(session, event, session->tool);
 	if (err < 0)
 		return err;
 
@@ -964,16 +962,23 @@ s64 perf_event__process_auxtrace(struct perf_tool *tool,
 #define PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ	64
 #define PERF_ITRACE_MAX_LAST_BRANCH_SZ		1024
 
-void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts)
+void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts,
+				    bool no_sample)
 {
-	synth_opts->instructions = true;
 	synth_opts->branches = true;
 	synth_opts->transactions = true;
 	synth_opts->ptwrites = true;
 	synth_opts->pwr_events = true;
 	synth_opts->errors = true;
-	synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE;
-	synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
+	if (no_sample) {
+		synth_opts->period_type = PERF_ITRACE_PERIOD_INSTRUCTIONS;
+		synth_opts->period = 1;
+		synth_opts->calls = true;
+	} else {
+		synth_opts->instructions = true;
+		synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE;
+		synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
+	}
 	synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ;
 	synth_opts->last_branch_sz = PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ;
 	synth_opts->initial_skip = 0;
@@ -1001,7 +1006,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
 	}
 
 	if (!str) {
-		itrace_synth_opts__set_default(synth_opts);
+		itrace_synth_opts__set_default(synth_opts, false);
 		return 0;
 	}
 
@@ -1185,9 +1190,8 @@ void events_stats__auxtrace_error_warn(const struct events_stats *stats)
 	}
 }
 
-int perf_event__process_auxtrace_error(struct perf_tool *tool __maybe_unused,
-				       union perf_event *event,
-				       struct perf_session *session)
+int perf_event__process_auxtrace_error(struct perf_session *session,
+				       union perf_event *event)
 {
 	if (auxtrace__dont_decode(session))
 		return 0;
@@ -1196,11 +1200,12 @@ int perf_event__process_auxtrace_error(struct perf_tool *tool __maybe_unused,
 	return 0;
 }
 
-static int __auxtrace_mmap__read(struct auxtrace_mmap *mm,
+static int __auxtrace_mmap__read(struct perf_mmap *map,
 				 struct auxtrace_record *itr,
 				 struct perf_tool *tool, process_auxtrace_t fn,
 				 bool snapshot, size_t snapshot_size)
 {
+	struct auxtrace_mmap *mm = &map->auxtrace_mmap;
 	u64 head, old = mm->prev, offset, ref;
 	unsigned char *data = mm->base;
 	size_t size, head_off, old_off, len1, len2, padding;
@@ -1287,7 +1292,7 @@ static int __auxtrace_mmap__read(struct auxtrace_mmap *mm,
 	ev.auxtrace.tid = mm->tid;
 	ev.auxtrace.cpu = mm->cpu;
 
-	if (fn(tool, &ev, data1, len1, data2, len2))
+	if (fn(tool, map, &ev, data1, len1, data2, len2))
 		return -1;
 
 	mm->prev = head;
@@ -1306,18 +1311,18 @@ static int __auxtrace_mmap__read(struct auxtrace_mmap *mm,
 	return 1;
 }
 
-int auxtrace_mmap__read(struct auxtrace_mmap *mm, struct auxtrace_record *itr,
+int auxtrace_mmap__read(struct perf_mmap *map, struct auxtrace_record *itr,
 			struct perf_tool *tool, process_auxtrace_t fn)
 {
-	return __auxtrace_mmap__read(mm, itr, tool, fn, false, 0);
+	return __auxtrace_mmap__read(map, itr, tool, fn, false, 0);
 }
 
-int auxtrace_mmap__read_snapshot(struct auxtrace_mmap *mm,
+int auxtrace_mmap__read_snapshot(struct perf_mmap *map,
 				 struct auxtrace_record *itr,
 				 struct perf_tool *tool, process_auxtrace_t fn,
 				 size_t snapshot_size)
 {
-	return __auxtrace_mmap__read(mm, itr, tool, fn, true, snapshot_size);
+	return __auxtrace_mmap__read(map, itr, tool, fn, true, snapshot_size);
 }
 
 /**
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 71fc3bd74299..8e50f96d4b23 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -23,6 +23,7 @@
 #include <linux/list.h>
 #include <linux/perf_event.h>
 #include <linux/types.h>
+#include <asm/bitsperlong.h>
 
 #include "../perf.h"
 #include "event.h"
@@ -33,6 +34,7 @@ union perf_event;
 struct perf_session;
 struct perf_evlist;
 struct perf_tool;
+struct perf_mmap;
 struct option;
 struct record_opts;
 struct auxtrace_info_event;
@@ -56,6 +58,7 @@ enum itrace_period_type {
 /**
  * struct itrace_synth_opts - AUX area tracing synthesis options.
  * @set: indicates whether or not options have been set
+ * @default_no_sample: Default to no sampling.
  * @inject: indicates the event (not just the sample) must be fully synthesized
  *          because 'perf inject' will write it out
  * @instructions: whether to synthesize 'instructions' events
@@ -80,6 +83,7 @@ enum itrace_period_type {
  */
 struct itrace_synth_opts {
 	bool			set;
+	bool			default_no_sample;
 	bool			inject;
 	bool			instructions;
 	bool			branches;
@@ -434,13 +438,14 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
 				   bool per_cpu);
 
 typedef int (*process_auxtrace_t)(struct perf_tool *tool,
+				  struct perf_mmap *map,
 				  union perf_event *event, void *data1,
 				  size_t len1, void *data2, size_t len2);
 
-int auxtrace_mmap__read(struct auxtrace_mmap *mm, struct auxtrace_record *itr,
+int auxtrace_mmap__read(struct perf_mmap *map, struct auxtrace_record *itr,
 			struct perf_tool *tool, process_auxtrace_t fn);
 
-int auxtrace_mmap__read_snapshot(struct auxtrace_mmap *mm,
+int auxtrace_mmap__read_snapshot(struct perf_mmap *map,
 				 struct auxtrace_record *itr,
 				 struct perf_tool *tool, process_auxtrace_t fn,
 				 size_t snapshot_size);
@@ -517,18 +522,16 @@ int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr,
 					 struct perf_tool *tool,
 					 struct perf_session *session,
 					 perf_event__handler_t process);
-int perf_event__process_auxtrace_info(struct perf_tool *tool,
-				      union perf_event *event,
-				      struct perf_session *session);
-s64 perf_event__process_auxtrace(struct perf_tool *tool,
-				 union perf_event *event,
-				 struct perf_session *session);
-int perf_event__process_auxtrace_error(struct perf_tool *tool,
-				       union perf_event *event,
-				       struct perf_session *session);
+int perf_event__process_auxtrace_info(struct perf_session *session,
+				      union perf_event *event);
+s64 perf_event__process_auxtrace(struct perf_session *session,
+				 union perf_event *event);
+int perf_event__process_auxtrace_error(struct perf_session *session,
+				       union perf_event *event);
 int itrace_parse_synth_opts(const struct option *opt, const char *str,
 			    int unset);
-void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts);
+void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts,
+				    bool no_sample);
 
 size_t perf_event__fprintf_auxtrace_error(union perf_event *event, FILE *fp);
 void perf_session__auxtrace_error_inc(struct perf_session *session,
@@ -577,6 +580,23 @@ static inline void auxtrace__free(struct perf_session *session)
 	return session->auxtrace->free(session);
 }
 
+#define ITRACE_HELP \
+"				i:	    		synthesize instructions events\n"		\
+"				b:	    		synthesize branches events\n"		\
+"				c:	    		synthesize branches events (calls only)\n"	\
+"				r:	    		synthesize branches events (returns only)\n" \
+"				x:	    		synthesize transactions events\n"		\
+"				w:	    		synthesize ptwrite events\n"		\
+"				p:	    		synthesize power events\n"			\
+"				e:	    		synthesize error events\n"			\
+"				d:	    		create a debug log\n"			\
+"				g[len]:     		synthesize a call chain (use with i or x)\n" \
+"				l[len]:     		synthesize last branch entries (use with i or x)\n" \
+"				sNUMBER:    		skip initial number of events\n"		\
+"				PERIOD[ns|us|ms|i|t]:   specify period to sample stream\n" \
+"				concatenate multiple options. Default is ibxwpe or cewp\n"
+
+
 #else
 
 static inline struct auxtrace_record *
@@ -717,6 +737,8 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
 				   struct perf_evlist *evlist, int idx,
 				   bool per_cpu);
 
+#define ITRACE_HELP ""
+
 #endif
 
 #endif
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index 47aac41349a2..f9ae1a993806 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -1615,7 +1615,7 @@ struct perf_evsel *bpf__setup_output_event(struct perf_evlist *evlist, const cha
 int bpf__setup_stdout(struct perf_evlist *evlist)
 {
 	struct perf_evsel *evsel = bpf__setup_output_event(evlist, "__bpf_stdout__");
-	return IS_ERR(evsel) ? PTR_ERR(evsel) : 0;
+	return PTR_ERR_OR_ZERO(evsel);
 }
 
 #define ERRNO_OFFSET(e)		((e) - __BPF_LOADER_ERRNO__START)
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 2ae640257fdb..73430b73570d 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -244,6 +244,27 @@ static void cs_etm__free(struct perf_session *session)
 	zfree(&aux);
 }
 
+static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address)
+{
+	struct machine *machine;
+
+	machine = etmq->etm->machine;
+
+	if (address >= etmq->etm->kernel_start) {
+		if (machine__is_host(machine))
+			return PERF_RECORD_MISC_KERNEL;
+		else
+			return PERF_RECORD_MISC_GUEST_KERNEL;
+	} else {
+		if (machine__is_host(machine))
+			return PERF_RECORD_MISC_USER;
+		else if (perf_guest)
+			return PERF_RECORD_MISC_GUEST_USER;
+		else
+			return PERF_RECORD_MISC_HYPERVISOR;
+	}
+}
+
 static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address,
 			      size_t size, u8 *buffer)
 {
@@ -258,10 +279,7 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address,
 		return -1;
 
 	machine = etmq->etm->machine;
-	if (address >= etmq->etm->kernel_start)
-		cpumode = PERF_RECORD_MISC_KERNEL;
-	else
-		cpumode = PERF_RECORD_MISC_USER;
+	cpumode = cs_etm__cpu_mode(etmq, address);
 
 	thread = etmq->thread;
 	if (!thread) {
@@ -653,7 +671,7 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
 	struct perf_sample sample = {.ip = 0,};
 
 	event->sample.header.type = PERF_RECORD_SAMPLE;
-	event->sample.header.misc = PERF_RECORD_MISC_USER;
+	event->sample.header.misc = cs_etm__cpu_mode(etmq, addr);
 	event->sample.header.size = sizeof(struct perf_event_header);
 
 	sample.ip = addr;
@@ -665,7 +683,7 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
 	sample.cpu = etmq->packet->cpu;
 	sample.flags = 0;
 	sample.insn_len = 1;
-	sample.cpumode = event->header.misc;
+	sample.cpumode = event->sample.header.misc;
 
 	if (etm->synth_opts.last_branch) {
 		cs_etm__copy_last_branch_rb(etmq);
@@ -706,12 +724,15 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq)
 		u64			nr;
 		struct branch_entry	entries;
 	} dummy_bs;
+	u64 ip;
+
+	ip = cs_etm__last_executed_instr(etmq->prev_packet);
 
 	event->sample.header.type = PERF_RECORD_SAMPLE;
-	event->sample.header.misc = PERF_RECORD_MISC_USER;
+	event->sample.header.misc = cs_etm__cpu_mode(etmq, ip);
 	event->sample.header.size = sizeof(struct perf_event_header);
 
-	sample.ip = cs_etm__last_executed_instr(etmq->prev_packet);
+	sample.ip = ip;
 	sample.pid = etmq->pid;
 	sample.tid = etmq->tid;
 	sample.addr = cs_etm__first_executed_instr(etmq->packet);
@@ -720,7 +741,7 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq)
 	sample.period = 1;
 	sample.cpu = etmq->packet->cpu;
 	sample.flags = 0;
-	sample.cpumode = PERF_RECORD_MISC_USER;
+	sample.cpumode = event->sample.header.misc;
 
 	/*
 	 * perf report cannot handle events without a branch stack
@@ -1432,7 +1453,8 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
 	if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
 		etm->synth_opts = *session->itrace_synth_opts;
 	} else {
-		itrace_synth_opts__set_default(&etm->synth_opts);
+		itrace_synth_opts__set_default(&etm->synth_opts,
+				session->itrace_synth_opts->default_no_sample);
 		etm->synth_opts.callchain = false;
 	}
 
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index abd38abf1d91..2a36fab76994 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -182,20 +182,20 @@ err_put_field:
 }
 
 static struct bt_ctf_field_type*
-get_tracepoint_field_type(struct ctf_writer *cw, struct format_field *field)
+get_tracepoint_field_type(struct ctf_writer *cw, struct tep_format_field *field)
 {
 	unsigned long flags = field->flags;
 
-	if (flags & FIELD_IS_STRING)
+	if (flags & TEP_FIELD_IS_STRING)
 		return cw->data.string;
 
-	if (!(flags & FIELD_IS_SIGNED)) {
+	if (!(flags & TEP_FIELD_IS_SIGNED)) {
 		/* unsigned long are mostly pointers */
-		if (flags & FIELD_IS_LONG || flags & FIELD_IS_POINTER)
+		if (flags & TEP_FIELD_IS_LONG || flags & TEP_FIELD_IS_POINTER)
 			return cw->data.u64_hex;
 	}
 
-	if (flags & FIELD_IS_SIGNED) {
+	if (flags & TEP_FIELD_IS_SIGNED) {
 		if (field->size == 8)
 			return cw->data.s64;
 		else
@@ -287,7 +287,7 @@ static int add_tracepoint_field_value(struct ctf_writer *cw,
 				      struct bt_ctf_event_class *event_class,
 				      struct bt_ctf_event *event,
 				      struct perf_sample *sample,
-				      struct format_field *fmtf)
+				      struct tep_format_field *fmtf)
 {
 	struct bt_ctf_field_type *type;
 	struct bt_ctf_field *array_field;
@@ -304,10 +304,10 @@ static int add_tracepoint_field_value(struct ctf_writer *cw,
 	name = fmtf->alias;
 	offset = fmtf->offset;
 	len = fmtf->size;
-	if (flags & FIELD_IS_STRING)
-		flags &= ~FIELD_IS_ARRAY;
+	if (flags & TEP_FIELD_IS_STRING)
+		flags &= ~TEP_FIELD_IS_ARRAY;
 
-	if (flags & FIELD_IS_DYNAMIC) {
+	if (flags & TEP_FIELD_IS_DYNAMIC) {
 		unsigned long long tmp_val;
 
 		tmp_val = tep_read_number(fmtf->event->pevent,
@@ -317,7 +317,7 @@ static int add_tracepoint_field_value(struct ctf_writer *cw,
 		offset &= 0xffff;
 	}
 
-	if (flags & FIELD_IS_ARRAY) {
+	if (flags & TEP_FIELD_IS_ARRAY) {
 
 		type = bt_ctf_event_class_get_field_by_name(
 				event_class, name);
@@ -338,7 +338,7 @@ static int add_tracepoint_field_value(struct ctf_writer *cw,
 	type = get_tracepoint_field_type(cw, fmtf);
 
 	for (i = 0; i < n_items; i++) {
-		if (flags & FIELD_IS_ARRAY)
+		if (flags & TEP_FIELD_IS_ARRAY)
 			field = bt_ctf_field_array_get_field(array_field, i);
 		else
 			field = bt_ctf_field_create(type);
@@ -348,7 +348,7 @@ static int add_tracepoint_field_value(struct ctf_writer *cw,
 			return -1;
 		}
 
-		if (flags & FIELD_IS_STRING)
+		if (flags & TEP_FIELD_IS_STRING)
 			ret = string_set_value(field, data + offset + i * len);
 		else {
 			unsigned long long value_int;
@@ -357,7 +357,7 @@ static int add_tracepoint_field_value(struct ctf_writer *cw,
 					fmtf->event->pevent,
 					data + offset + i * len, len);
 
-			if (!(flags & FIELD_IS_SIGNED))
+			if (!(flags & TEP_FIELD_IS_SIGNED))
 				ret = bt_ctf_field_unsigned_integer_set_value(
 						field, value_int);
 			else
@@ -369,7 +369,7 @@ static int add_tracepoint_field_value(struct ctf_writer *cw,
 			pr_err("failed to set file value %s\n", name);
 			goto err_put_field;
 		}
-		if (!(flags & FIELD_IS_ARRAY)) {
+		if (!(flags & TEP_FIELD_IS_ARRAY)) {
 			ret = bt_ctf_event_set_payload(event, name, field);
 			if (ret) {
 				pr_err("failed to set payload %s\n", name);
@@ -378,7 +378,7 @@ static int add_tracepoint_field_value(struct ctf_writer *cw,
 		}
 		bt_ctf_field_put(field);
 	}
-	if (flags & FIELD_IS_ARRAY) {
+	if (flags & TEP_FIELD_IS_ARRAY) {
 		ret = bt_ctf_event_set_payload(event, name, array_field);
 		if (ret) {
 			pr_err("Failed add payload array %s\n", name);
@@ -396,10 +396,10 @@ err_put_field:
 static int add_tracepoint_fields_values(struct ctf_writer *cw,
 					struct bt_ctf_event_class *event_class,
 					struct bt_ctf_event *event,
-					struct format_field *fields,
+					struct tep_format_field *fields,
 					struct perf_sample *sample)
 {
-	struct format_field *field;
+	struct tep_format_field *field;
 	int ret;
 
 	for (field = fields; field; field = field->next) {
@@ -417,8 +417,8 @@ static int add_tracepoint_values(struct ctf_writer *cw,
 				 struct perf_evsel *evsel,
 				 struct perf_sample *sample)
 {
-	struct format_field *common_fields = evsel->tp_format->format.common_fields;
-	struct format_field *fields        = evsel->tp_format->format.fields;
+	struct tep_format_field *common_fields = evsel->tp_format->format.common_fields;
+	struct tep_format_field *fields        = evsel->tp_format->format.fields;
 	int ret;
 
 	ret = add_tracepoint_fields_values(cw, event_class, event,
@@ -970,7 +970,7 @@ out:
 
 static int event_class_add_field(struct bt_ctf_event_class *event_class,
 		struct bt_ctf_field_type *type,
-		struct format_field *field)
+		struct tep_format_field *field)
 {
 	struct bt_ctf_field_type *t = NULL;
 	char *name;
@@ -1009,10 +1009,10 @@ static int event_class_add_field(struct bt_ctf_event_class *event_class,
 }
 
 static int add_tracepoint_fields_types(struct ctf_writer *cw,
-				       struct format_field *fields,
+				       struct tep_format_field *fields,
 				       struct bt_ctf_event_class *event_class)
 {
-	struct format_field *field;
+	struct tep_format_field *field;
 	int ret;
 
 	for (field = fields; field; field = field->next) {
@@ -1030,15 +1030,15 @@ static int add_tracepoint_fields_types(struct ctf_writer *cw,
 		 * type and don't care that it is an array. What we don't
 		 * support is an array of strings.
 		 */
-		if (flags & FIELD_IS_STRING)
-			flags &= ~FIELD_IS_ARRAY;
+		if (flags & TEP_FIELD_IS_STRING)
+			flags &= ~TEP_FIELD_IS_ARRAY;
 
-		if (flags & FIELD_IS_ARRAY)
+		if (flags & TEP_FIELD_IS_ARRAY)
 			type = bt_ctf_field_type_array_create(type, field->arraylen);
 
 		ret = event_class_add_field(event_class, type, field);
 
-		if (flags & FIELD_IS_ARRAY)
+		if (flags & TEP_FIELD_IS_ARRAY)
 			bt_ctf_field_type_put(type);
 
 		if (ret) {
@@ -1055,8 +1055,8 @@ static int add_tracepoint_types(struct ctf_writer *cw,
 				struct perf_evsel *evsel,
 				struct bt_ctf_event_class *class)
 {
-	struct format_field *common_fields = evsel->tp_format->format.common_fields;
-	struct format_field *fields        = evsel->tp_format->format.fields;
+	struct tep_format_field *common_fields = evsel->tp_format->format.common_fields;
+	struct tep_format_field *fields        = evsel->tp_format->format.fields;
 	int ret;
 
 	ret = add_tracepoint_fields_types(cw, common_fields, class);
@@ -1578,7 +1578,7 @@ int bt_convert__perf2ctf(const char *input, const char *path,
 {
 	struct perf_session *session;
 	struct perf_data data = {
-		.file.path = input,
+		.file      = { .path = input, .fd = -1 },
 		.mode      = PERF_DATA_MODE_READ,
 		.force     = opts->force,
 	};
diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c
index 7123746edcf4..69fbb0a72d0c 100644
--- a/tools/perf/util/db-export.c
+++ b/tools/perf/util/db-export.c
@@ -463,6 +463,28 @@ int db_export__branch_types(struct db_export *dbe)
 		if (err)
 			break;
 	}
+
+	/* Add trace begin / end variants */
+	for (i = 0; branch_types[i].name ; i++) {
+		const char *name = branch_types[i].name;
+		u32 type = branch_types[i].branch_type;
+		char buf[64];
+
+		if (type == PERF_IP_FLAG_BRANCH ||
+		    (type & (PERF_IP_FLAG_TRACE_BEGIN | PERF_IP_FLAG_TRACE_END)))
+			continue;
+
+		snprintf(buf, sizeof(buf), "trace begin / %s", name);
+		err = db_export__branch_type(dbe, type | PERF_IP_FLAG_TRACE_BEGIN, buf);
+		if (err)
+			break;
+
+		snprintf(buf, sizeof(buf), "%s / trace end", name);
+		err = db_export__branch_type(dbe, type | PERF_IP_FLAG_TRACE_END, buf);
+		if (err)
+			break;
+	}
+
 	return err;
 }
 
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index 1f3ccc368530..d01b8355f4ca 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -63,6 +63,7 @@ struct perf_env {
 	struct numa_node	*numa_nodes;
 	struct memory_node	*memory_nodes;
 	unsigned long long	 memory_bsize;
+	u64                     clockid_res_ns;
 };
 
 extern struct perf_env perf_env;
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index bc646185f8d9..e9c108a6b1c3 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -308,6 +308,7 @@ static int perf_event__synthesize_fork(struct perf_tool *tool,
 	event->fork.pid  = tgid;
 	event->fork.tid  = pid;
 	event->fork.header.type = PERF_RECORD_FORK;
+	event->fork.header.misc = PERF_RECORD_MISC_FORK_EXEC;
 
 	event->fork.header.size = (sizeof(event->fork) + machine->id_hdr_size);
 
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index be440df29615..668d2a9ef0f4 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -358,7 +358,7 @@ void perf_evlist__disable(struct perf_evlist *evlist)
 	struct perf_evsel *pos;
 
 	evlist__for_each_entry(evlist, pos) {
-		if (!perf_evsel__is_group_leader(pos) || !pos->fd)
+		if (pos->disabled || !perf_evsel__is_group_leader(pos) || !pos->fd)
 			continue;
 		perf_evsel__disable(pos);
 	}
@@ -1810,3 +1810,30 @@ void perf_evlist__force_leader(struct perf_evlist *evlist)
 		leader->forced_leader = true;
 	}
 }
+
+struct perf_evsel *perf_evlist__reset_weak_group(struct perf_evlist *evsel_list,
+						 struct perf_evsel *evsel)
+{
+	struct perf_evsel *c2, *leader;
+	bool is_open = true;
+
+	leader = evsel->leader;
+	pr_debug("Weak group for %s/%d failed\n",
+			leader->name, leader->nr_members);
+
+	/*
+	 * for_each_group_member doesn't work here because it doesn't
+	 * include the first entry.
+	 */
+	evlist__for_each_entry(evsel_list, c2) {
+		if (c2 == evsel)
+			is_open = false;
+		if (c2->leader == leader) {
+			if (is_open)
+				perf_evsel__close(c2);
+			c2->leader = c2;
+			c2->nr_members = 0;
+		}
+	}
+	return leader;
+}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index dc66436add98..9919eed6d15b 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -312,4 +312,7 @@ bool perf_evlist__exclude_kernel(struct perf_evlist *evlist);
 
 void perf_evlist__force_leader(struct perf_evlist *evlist);
 
+struct perf_evsel *perf_evlist__reset_weak_group(struct perf_evlist *evlist,
+						 struct perf_evsel *evsel);
+
 #endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index e596ae358c4d..d37bb1566cd9 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -232,6 +232,7 @@ void perf_evsel__init(struct perf_evsel *evsel,
 	evsel->leader	   = evsel;
 	evsel->unit	   = "";
 	evsel->scale	   = 1.0;
+	evsel->max_events  = ULONG_MAX;
 	evsel->evlist	   = NULL;
 	evsel->bpf_fd	   = -1;
 	INIT_LIST_HEAD(&evsel->node);
@@ -793,6 +794,9 @@ static void apply_config_terms(struct perf_evsel *evsel,
 		case PERF_EVSEL__CONFIG_TERM_MAX_STACK:
 			max_stack = term->val.max_stack;
 			break;
+		case PERF_EVSEL__CONFIG_TERM_MAX_EVENTS:
+			evsel->max_events = term->val.max_events;
+			break;
 		case PERF_EVSEL__CONFIG_TERM_INHERIT:
 			/*
 			 * attr->inherit should has already been set by
@@ -952,7 +956,6 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
 		attr->sample_freq    = 0;
 		attr->sample_period  = 0;
 		attr->write_backward = 0;
-		attr->sample_id_all  = 0;
 	}
 
 	if (opts->no_samples)
@@ -1203,16 +1206,27 @@ int perf_evsel__append_addr_filter(struct perf_evsel *evsel, const char *filter)
 
 int perf_evsel__enable(struct perf_evsel *evsel)
 {
-	return perf_evsel__run_ioctl(evsel,
-				     PERF_EVENT_IOC_ENABLE,
-				     0);
+	int err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, 0);
+
+	if (!err)
+		evsel->disabled = false;
+
+	return err;
 }
 
 int perf_evsel__disable(struct perf_evsel *evsel)
 {
-	return perf_evsel__run_ioctl(evsel,
-				     PERF_EVENT_IOC_DISABLE,
-				     0);
+	int err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, 0);
+	/*
+	 * We mark it disabled here so that tools that disable a event can
+	 * ignore events after they disable it. I.e. the ring buffer may have
+	 * already a few more events queued up before the kernel got the stop
+	 * request.
+	 */
+	if (!err)
+		evsel->disabled = true;
+
+	return err;
 }
 
 int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
@@ -2685,7 +2699,7 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type,
 	return 0;
 }
 
-struct format_field *perf_evsel__field(struct perf_evsel *evsel, const char *name)
+struct tep_format_field *perf_evsel__field(struct perf_evsel *evsel, const char *name)
 {
 	return tep_find_field(evsel->tp_format, name);
 }
@@ -2693,7 +2707,7 @@ struct format_field *perf_evsel__field(struct perf_evsel *evsel, const char *nam
 void *perf_evsel__rawptr(struct perf_evsel *evsel, struct perf_sample *sample,
 			 const char *name)
 {
-	struct format_field *field = perf_evsel__field(evsel, name);
+	struct tep_format_field *field = perf_evsel__field(evsel, name);
 	int offset;
 
 	if (!field)
@@ -2701,7 +2715,7 @@ void *perf_evsel__rawptr(struct perf_evsel *evsel, struct perf_sample *sample,
 
 	offset = field->offset;
 
-	if (field->flags & FIELD_IS_DYNAMIC) {
+	if (field->flags & TEP_FIELD_IS_DYNAMIC) {
 		offset = *(int *)(sample->raw_data + field->offset);
 		offset &= 0xffff;
 	}
@@ -2709,7 +2723,7 @@ void *perf_evsel__rawptr(struct perf_evsel *evsel, struct perf_sample *sample,
 	return sample->raw_data + offset;
 }
 
-u64 format_field__intval(struct format_field *field, struct perf_sample *sample,
+u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sample,
 			 bool needs_swap)
 {
 	u64 value;
@@ -2751,7 +2765,7 @@ u64 format_field__intval(struct format_field *field, struct perf_sample *sample,
 u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample,
 		       const char *name)
 {
-	struct format_field *field = perf_evsel__field(evsel, name);
+	struct tep_format_field *field = perf_evsel__field(evsel, name);
 
 	if (!field)
 		return 0;
@@ -2943,3 +2957,32 @@ struct perf_env *perf_evsel__env(struct perf_evsel *evsel)
 		return evsel->evlist->env;
 	return NULL;
 }
+
+static int store_evsel_ids(struct perf_evsel *evsel, struct perf_evlist *evlist)
+{
+	int cpu, thread;
+
+	for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) {
+		for (thread = 0; thread < xyarray__max_y(evsel->fd);
+		     thread++) {
+			int fd = FD(evsel, cpu, thread);
+
+			if (perf_evlist__id_add_fd(evlist, evsel,
+						   cpu, thread, fd) < 0)
+				return -1;
+		}
+	}
+
+	return 0;
+}
+
+int perf_evsel__store_ids(struct perf_evsel *evsel, struct perf_evlist *evlist)
+{
+	struct cpu_map *cpus = evsel->cpus;
+	struct thread_map *threads = evsel->threads;
+
+	if (perf_evsel__alloc_id(evsel, cpus->nr, threads->nr))
+		return -ENOMEM;
+
+	return store_evsel_ids(evsel, evlist);
+}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 163c960614d3..3147ca76c6fc 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -46,6 +46,7 @@ enum term_type {
 	PERF_EVSEL__CONFIG_TERM_STACK_USER,
 	PERF_EVSEL__CONFIG_TERM_INHERIT,
 	PERF_EVSEL__CONFIG_TERM_MAX_STACK,
+	PERF_EVSEL__CONFIG_TERM_MAX_EVENTS,
 	PERF_EVSEL__CONFIG_TERM_OVERWRITE,
 	PERF_EVSEL__CONFIG_TERM_DRV_CFG,
 	PERF_EVSEL__CONFIG_TERM_BRANCH,
@@ -65,6 +66,7 @@ struct perf_evsel_config_term {
 		bool	inherit;
 		bool	overwrite;
 		char	*branch;
+		unsigned long max_events;
 	} val;
 	bool weak;
 };
@@ -99,10 +101,12 @@ struct perf_evsel {
 	struct perf_counts	*prev_raw_counts;
 	int			idx;
 	u32			ids;
+	unsigned long		max_events;
+	unsigned long		nr_events_printed;
 	char			*name;
 	double			scale;
 	const char		*unit;
-	struct event_format	*tp_format;
+	struct tep_event_format	*tp_format;
 	off_t			id_offset;
 	struct perf_stat_evsel  *stats;
 	void			*priv;
@@ -119,6 +123,7 @@ struct perf_evsel {
 	bool			snapshot;
 	bool 			supported;
 	bool 			needs_swap;
+	bool 			disabled;
 	bool			no_aux_samples;
 	bool			immediate;
 	bool			system_wide;
@@ -211,7 +216,7 @@ static inline struct perf_evsel *perf_evsel__newtp(const char *sys, const char *
 
 struct perf_evsel *perf_evsel__new_cycles(bool precise);
 
-struct event_format *event_format__new(const char *sys, const char *name);
+struct tep_event_format *event_format__new(const char *sys, const char *name);
 
 void perf_evsel__init(struct perf_evsel *evsel,
 		      struct perf_event_attr *attr, int idx);
@@ -296,11 +301,11 @@ static inline char *perf_evsel__strval(struct perf_evsel *evsel,
 	return perf_evsel__rawptr(evsel, sample, name);
 }
 
-struct format_field;
+struct tep_format_field;
 
-u64 format_field__intval(struct format_field *field, struct perf_sample *sample, bool needs_swap);
+u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sample, bool needs_swap);
 
-struct format_field *perf_evsel__field(struct perf_evsel *evsel, const char *name);
+struct tep_format_field *perf_evsel__field(struct perf_evsel *evsel, const char *name);
 
 #define perf_evsel__match(evsel, t, c)		\
 	(evsel->attr.type == PERF_TYPE_##t &&	\
@@ -481,4 +486,5 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
 
 struct perf_env *perf_evsel__env(struct perf_evsel *evsel);
 
+int perf_evsel__store_ids(struct perf_evsel *evsel, struct perf_evlist *evlist);
 #endif /* __PERF_EVSEL_H */
diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c
index 06dfb027879d..0d0a4c6f368b 100644
--- a/tools/perf/util/evsel_fprintf.c
+++ b/tools/perf/util/evsel_fprintf.c
@@ -73,7 +73,7 @@ int perf_evsel__fprintf(struct perf_evsel *evsel,
 	}
 
 	if (details->trace_fields) {
-		struct format_field *field;
+		struct tep_format_field *field;
 
 		if (evsel->attr.type != PERF_TYPE_TRACEPOINT) {
 			printed += comma_fprintf(fp, &first, " (not a tracepoint)");
diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h
index de322d51c7fe..b72440bf9a79 100644
--- a/tools/perf/util/genelf.h
+++ b/tools/perf/util/genelf.h
@@ -29,6 +29,12 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent
 #elif defined(__powerpc__)
 #define GEN_ELF_ARCH	EM_PPC
 #define GEN_ELF_CLASS	ELFCLASS32
+#elif defined(__sparc__) && defined(__arch64__)
+#define GEN_ELF_ARCH	EM_SPARCV9
+#define GEN_ELF_CLASS	ELFCLASS64
+#elif defined(__sparc__)
+#define GEN_ELF_ARCH	EM_SPARC
+#define GEN_ELF_CLASS	ELFCLASS32
 #else
 #error "unsupported architecture"
 #endif
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 3cadc252dd89..4fd45be95a43 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1034,6 +1034,13 @@ static int write_auxtrace(struct feat_fd *ff,
 	return err;
 }
 
+static int write_clockid(struct feat_fd *ff,
+			 struct perf_evlist *evlist __maybe_unused)
+{
+	return do_write(ff, &ff->ph->env.clockid_res_ns,
+			sizeof(ff->ph->env.clockid_res_ns));
+}
+
 static int cpu_cache_level__sort(const void *a, const void *b)
 {
 	struct cpu_cache_level *cache_a = (struct cpu_cache_level *)a;
@@ -1508,6 +1515,12 @@ static void print_cpu_topology(struct feat_fd *ff, FILE *fp)
 		fprintf(fp, "# Core ID and Socket ID information is not available\n");
 }
 
+static void print_clockid(struct feat_fd *ff, FILE *fp)
+{
+	fprintf(fp, "# clockid frequency: %"PRIu64" MHz\n",
+		ff->ph->env.clockid_res_ns * 1000);
+}
+
 static void free_event_desc(struct perf_evsel *events)
 {
 	struct perf_evsel *evsel;
@@ -2531,6 +2544,15 @@ out:
 	return ret;
 }
 
+static int process_clockid(struct feat_fd *ff,
+			   void *data __maybe_unused)
+{
+	if (do_read_u64(ff, &ff->ph->env.clockid_res_ns))
+		return -1;
+
+	return 0;
+}
+
 struct feature_ops {
 	int (*write)(struct feat_fd *ff, struct perf_evlist *evlist);
 	void (*print)(struct feat_fd *ff, FILE *fp);
@@ -2590,6 +2612,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
 	FEAT_OPN(CACHE,		cache,		true),
 	FEAT_OPR(SAMPLE_TIME,	sample_time,	false),
 	FEAT_OPR(MEM_TOPOLOGY,	mem_topology,	true),
+	FEAT_OPR(CLOCKID,       clockid,        false)
 };
 
 struct header_print_data {
@@ -3206,7 +3229,7 @@ static int read_attr(int fd, struct perf_header *ph,
 static int perf_evsel__prepare_tracepoint_event(struct perf_evsel *evsel,
 						struct tep_handle *pevent)
 {
-	struct event_format *event;
+	struct tep_event_format *event;
 	char bf[128];
 
 	/* already prepared */
@@ -3448,10 +3471,10 @@ int perf_event__synthesize_features(struct perf_tool *tool,
 	return ret;
 }
 
-int perf_event__process_feature(struct perf_tool *tool,
-				union perf_event *event,
-				struct perf_session *session __maybe_unused)
+int perf_event__process_feature(struct perf_session *session,
+				union perf_event *event)
 {
+	struct perf_tool *tool = session->tool;
 	struct feat_fd ff = { .fd = 0 };
 	struct feature_event *fe = (struct feature_event *)event;
 	int type = fe->header.type;
@@ -3637,13 +3660,13 @@ size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp)
 }
 
 int perf_event__synthesize_attrs(struct perf_tool *tool,
-				   struct perf_session *session,
-				   perf_event__handler_t process)
+				 struct perf_evlist *evlist,
+				 perf_event__handler_t process)
 {
 	struct perf_evsel *evsel;
 	int err = 0;
 
-	evlist__for_each_entry(session->evlist, evsel) {
+	evlist__for_each_entry(evlist, evsel) {
 		err = perf_event__synthesize_attr(tool, &evsel->attr, evsel->ids,
 						  evsel->id, process);
 		if (err) {
@@ -3856,9 +3879,8 @@ int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd,
 	return aligned_size;
 }
 
-int perf_event__process_tracing_data(struct perf_tool *tool __maybe_unused,
-				     union perf_event *event,
-				     struct perf_session *session)
+int perf_event__process_tracing_data(struct perf_session *session,
+				     union perf_event *event)
 {
 	ssize_t size_read, padding, size = event->tracing_data.size;
 	int fd = perf_data__fd(session->data);
@@ -3924,9 +3946,8 @@ int perf_event__synthesize_build_id(struct perf_tool *tool,
 	return err;
 }
 
-int perf_event__process_build_id(struct perf_tool *tool __maybe_unused,
-				 union perf_event *event,
-				 struct perf_session *session)
+int perf_event__process_build_id(struct perf_session *session,
+				 union perf_event *event)
 {
 	__event_process_build_id(&event->build_id,
 				 event->build_id.filename,
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 6d7fe44aadc0..0d553ddca0a3 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -38,6 +38,7 @@ enum {
 	HEADER_CACHE,
 	HEADER_SAMPLE_TIME,
 	HEADER_MEM_TOPOLOGY,
+	HEADER_CLOCKID,
 	HEADER_LAST_FEATURE,
 	HEADER_FEAT_BITS	= 256,
 };
@@ -116,15 +117,14 @@ int perf_event__synthesize_extra_attr(struct perf_tool *tool,
 				      perf_event__handler_t process,
 				      bool is_pipe);
 
-int perf_event__process_feature(struct perf_tool *tool,
-				union perf_event *event,
-				struct perf_session *session);
+int perf_event__process_feature(struct perf_session *session,
+				union perf_event *event);
 
 int perf_event__synthesize_attr(struct perf_tool *tool,
 				struct perf_event_attr *attr, u32 ids, u64 *id,
 				perf_event__handler_t process);
 int perf_event__synthesize_attrs(struct perf_tool *tool,
-				 struct perf_session *session,
+				 struct perf_evlist *evlist,
 				 perf_event__handler_t process);
 int perf_event__synthesize_event_update_unit(struct perf_tool *tool,
 					     struct perf_evsel *evsel,
@@ -148,17 +148,15 @@ size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp);
 int perf_event__synthesize_tracing_data(struct perf_tool *tool,
 					int fd, struct perf_evlist *evlist,
 					perf_event__handler_t process);
-int perf_event__process_tracing_data(struct perf_tool *tool,
-				     union perf_event *event,
-				     struct perf_session *session);
+int perf_event__process_tracing_data(struct perf_session *session,
+				     union perf_event *event);
 
 int perf_event__synthesize_build_id(struct perf_tool *tool,
 				    struct dso *pos, u16 misc,
 				    perf_event__handler_t process,
 				    struct machine *machine);
-int perf_event__process_build_id(struct perf_tool *tool,
-				 union perf_event *event,
-				 struct perf_session *session);
+int perf_event__process_build_id(struct perf_session *session,
+				 union perf_event *event);
 bool is_perf_magic(u64 magic);
 
 #define NAME_ALIGN 64
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index 7f0c83b6332b..7b27d77306c2 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c
@@ -269,6 +269,13 @@ static int intel_bts_do_fix_overlap(struct auxtrace_queue *queue,
 	return 0;
 }
 
+static inline u8 intel_bts_cpumode(struct intel_bts *bts, uint64_t ip)
+{
+	return machine__kernel_ip(bts->machine, ip) ?
+	       PERF_RECORD_MISC_KERNEL :
+	       PERF_RECORD_MISC_USER;
+}
+
 static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
 					 struct branch *branch)
 {
@@ -281,12 +288,8 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
 	    bts->num_events++ <= bts->synth_opts.initial_skip)
 		return 0;
 
-	event.sample.header.type = PERF_RECORD_SAMPLE;
-	event.sample.header.misc = PERF_RECORD_MISC_USER;
-	event.sample.header.size = sizeof(struct perf_event_header);
-
-	sample.cpumode = PERF_RECORD_MISC_USER;
 	sample.ip = le64_to_cpu(branch->from);
+	sample.cpumode = intel_bts_cpumode(bts, sample.ip);
 	sample.pid = btsq->pid;
 	sample.tid = btsq->tid;
 	sample.addr = le64_to_cpu(branch->to);
@@ -298,6 +301,10 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
 	sample.insn_len = btsq->intel_pt_insn.length;
 	memcpy(sample.insn, btsq->intel_pt_insn.buf, INTEL_PT_INSN_BUF_SZ);
 
+	event.sample.header.type = PERF_RECORD_SAMPLE;
+	event.sample.header.misc = sample.cpumode;
+	event.sample.header.size = sizeof(struct perf_event_header);
+
 	if (bts->synth_opts.inject) {
 		event.sample.header.size = bts->branches_event_size;
 		ret = perf_event__synthesize_sample(&event,
@@ -910,7 +917,8 @@ int intel_bts_process_auxtrace_info(union perf_event *event,
 	if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
 		bts->synth_opts = *session->itrace_synth_opts;
 	} else {
-		itrace_synth_opts__set_default(&bts->synth_opts);
+		itrace_synth_opts__set_default(&bts->synth_opts,
+				session->itrace_synth_opts->default_no_sample);
 		if (session->itrace_synth_opts)
 			bts->synth_opts.thread_stack =
 				session->itrace_synth_opts->thread_stack;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index d404bed7003a..4503f3ca45ab 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -1165,7 +1165,7 @@ static int intel_pt_walk_tip(struct intel_pt_decoder *decoder)
 		decoder->pge = false;
 		decoder->continuous_period = false;
 		decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
-		decoder->state.to_ip = 0;
+		decoder->state.type |= INTEL_PT_TRACE_END;
 		return 0;
 	}
 	if (err == INTEL_PT_RETURN)
@@ -1179,9 +1179,13 @@ static int intel_pt_walk_tip(struct intel_pt_decoder *decoder)
 			decoder->continuous_period = false;
 			decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
 			decoder->state.from_ip = decoder->ip;
-			decoder->state.to_ip = 0;
-			if (decoder->packet.count != 0)
+			if (decoder->packet.count == 0) {
+				decoder->state.to_ip = 0;
+			} else {
+				decoder->state.to_ip = decoder->last_ip;
 				decoder->ip = decoder->last_ip;
+			}
+			decoder->state.type |= INTEL_PT_TRACE_END;
 		} else {
 			decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
 			decoder->state.from_ip = decoder->ip;
@@ -1208,7 +1212,8 @@ static int intel_pt_walk_tip(struct intel_pt_decoder *decoder)
 			decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
 			decoder->ip = to_ip;
 			decoder->state.from_ip = decoder->ip;
-			decoder->state.to_ip = 0;
+			decoder->state.to_ip = to_ip;
+			decoder->state.type |= INTEL_PT_TRACE_END;
 			return 0;
 		}
 		intel_pt_log_at("ERROR: Conditional branch when expecting indirect branch",
@@ -1469,6 +1474,8 @@ static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder)
 		decoder->have_calc_cyc_to_tsc = false;
 		intel_pt_calc_cyc_to_tsc(decoder, true);
 	}
+
+	intel_pt_log_to("Setting timestamp", decoder->timestamp);
 }
 
 static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder)
@@ -1509,6 +1516,8 @@ static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder)
 		decoder->timestamp = timestamp;
 
 	decoder->timestamp_insn_cnt = 0;
+
+	intel_pt_log_to("Setting timestamp", decoder->timestamp);
 }
 
 /* Walk PSB+ packets when already in sync. */
@@ -1640,14 +1649,15 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
 
 		case INTEL_PT_TIP_PGD:
 			decoder->state.from_ip = decoder->ip;
-			decoder->state.to_ip = 0;
-			if (decoder->packet.count != 0) {
+			if (decoder->packet.count == 0) {
+				decoder->state.to_ip = 0;
+			} else {
 				intel_pt_set_ip(decoder);
-				intel_pt_log("Omitting PGD ip " x64_fmt "\n",
-					     decoder->ip);
+				decoder->state.to_ip = decoder->ip;
 			}
 			decoder->pge = false;
 			decoder->continuous_period = false;
+			decoder->state.type |= INTEL_PT_TRACE_END;
 			return 0;
 
 		case INTEL_PT_TIP_PGE:
@@ -1661,6 +1671,7 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
 				intel_pt_set_ip(decoder);
 				decoder->state.to_ip = decoder->ip;
 			}
+			decoder->state.type |= INTEL_PT_TRACE_BEGIN;
 			return 0;
 
 		case INTEL_PT_TIP:
@@ -1739,6 +1750,7 @@ next:
 			intel_pt_set_ip(decoder);
 			decoder->state.from_ip = 0;
 			decoder->state.to_ip = decoder->ip;
+			decoder->state.type |= INTEL_PT_TRACE_BEGIN;
 			return 0;
 		}
 
@@ -2077,9 +2089,13 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
 			decoder->pge = decoder->packet.type != INTEL_PT_TIP_PGD;
 			if (intel_pt_have_ip(decoder))
 				intel_pt_set_ip(decoder);
-			if (decoder->ip)
-				return 0;
-			break;
+			if (!decoder->ip)
+				break;
+			if (decoder->packet.type == INTEL_PT_TIP_PGE)
+				decoder->state.type |= INTEL_PT_TRACE_BEGIN;
+			if (decoder->packet.type == INTEL_PT_TIP_PGD)
+				decoder->state.type |= INTEL_PT_TRACE_END;
+			return 0;
 
 		case INTEL_PT_FUP:
 			if (intel_pt_have_ip(decoder))
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
index 51c18d67f4ca..ed088d4726ba 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
@@ -37,6 +37,8 @@ enum intel_pt_sample_type {
 	INTEL_PT_EX_STOP	= 1 << 6,
 	INTEL_PT_PWR_EXIT	= 1 << 7,
 	INTEL_PT_CBR_CHG	= 1 << 8,
+	INTEL_PT_TRACE_BEGIN	= 1 << 9,
+	INTEL_PT_TRACE_END	= 1 << 10,
 };
 
 enum intel_pt_period_type {
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.c b/tools/perf/util/intel-pt-decoder/intel-pt-log.c
index e02bc7b166a0..5e64da270f97 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-log.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.c
@@ -31,6 +31,11 @@ static FILE *f;
 static char log_name[MAX_LOG_NAME];
 bool intel_pt_enable_logging;
 
+void *intel_pt_log_fp(void)
+{
+	return f;
+}
+
 void intel_pt_log_enable(void)
 {
 	intel_pt_enable_logging = true;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.h b/tools/perf/util/intel-pt-decoder/intel-pt-log.h
index 45b64f93f358..cc084937f701 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-log.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.h
@@ -22,6 +22,7 @@
 
 struct intel_pt_pkt;
 
+void *intel_pt_log_fp(void);
 void intel_pt_log_enable(void);
 void intel_pt_log_disable(void);
 void intel_pt_log_set_name(const char *name);
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index aec68908d604..149ff361ca78 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -206,6 +206,16 @@ static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf,
 	intel_pt_dump(pt, buf, len);
 }
 
+static void intel_pt_log_event(union perf_event *event)
+{
+	FILE *f = intel_pt_log_fp();
+
+	if (!intel_pt_enable_logging || !f)
+		return;
+
+	perf_event__fprintf(event, f);
+}
+
 static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a,
 				   struct auxtrace_buffer *b)
 {
@@ -407,6 +417,13 @@ intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset)
 	return auxtrace_cache__lookup(dso->auxtrace_cache, offset);
 }
 
+static inline u8 intel_pt_cpumode(struct intel_pt *pt, uint64_t ip)
+{
+	return ip >= pt->kernel_start ?
+	       PERF_RECORD_MISC_KERNEL :
+	       PERF_RECORD_MISC_USER;
+}
+
 static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
 				   uint64_t *insn_cnt_ptr, uint64_t *ip,
 				   uint64_t to_ip, uint64_t max_insn_cnt,
@@ -429,10 +446,7 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
 	if (to_ip && *ip == to_ip)
 		goto out_no_cache;
 
-	if (*ip >= ptq->pt->kernel_start)
-		cpumode = PERF_RECORD_MISC_KERNEL;
-	else
-		cpumode = PERF_RECORD_MISC_USER;
+	cpumode = intel_pt_cpumode(ptq->pt, *ip);
 
 	thread = ptq->thread;
 	if (!thread) {
@@ -759,7 +773,8 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
 	if (pt->synth_opts.callchain) {
 		size_t sz = sizeof(struct ip_callchain);
 
-		sz += pt->synth_opts.callchain_sz * sizeof(u64);
+		/* Add 1 to callchain_sz for callchain context */
+		sz += (pt->synth_opts.callchain_sz + 1) * sizeof(u64);
 		ptq->chain = zalloc(sz);
 		if (!ptq->chain)
 			goto out_free;
@@ -908,6 +923,11 @@ static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
 		ptq->insn_len = ptq->state->insn_len;
 		memcpy(ptq->insn, ptq->state->insn, INTEL_PT_INSN_BUF_SZ);
 	}
+
+	if (ptq->state->type & INTEL_PT_TRACE_BEGIN)
+		ptq->flags |= PERF_IP_FLAG_TRACE_BEGIN;
+	if (ptq->state->type & INTEL_PT_TRACE_END)
+		ptq->flags |= PERF_IP_FLAG_TRACE_END;
 }
 
 static int intel_pt_setup_queue(struct intel_pt *pt,
@@ -1053,15 +1073,11 @@ static void intel_pt_prep_b_sample(struct intel_pt *pt,
 				   union perf_event *event,
 				   struct perf_sample *sample)
 {
-	event->sample.header.type = PERF_RECORD_SAMPLE;
-	event->sample.header.misc = PERF_RECORD_MISC_USER;
-	event->sample.header.size = sizeof(struct perf_event_header);
-
 	if (!pt->timeless_decoding)
 		sample->time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
 
-	sample->cpumode = PERF_RECORD_MISC_USER;
 	sample->ip = ptq->state->from_ip;
+	sample->cpumode = intel_pt_cpumode(pt, sample->ip);
 	sample->pid = ptq->pid;
 	sample->tid = ptq->tid;
 	sample->addr = ptq->state->to_ip;
@@ -1070,6 +1086,10 @@ static void intel_pt_prep_b_sample(struct intel_pt *pt,
 	sample->flags = ptq->flags;
 	sample->insn_len = ptq->insn_len;
 	memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
+
+	event->sample.header.type = PERF_RECORD_SAMPLE;
+	event->sample.header.misc = sample->cpumode;
+	event->sample.header.size = sizeof(struct perf_event_header);
 }
 
 static int intel_pt_inject_event(union perf_event *event,
@@ -1155,7 +1175,8 @@ static void intel_pt_prep_sample(struct intel_pt *pt,
 
 	if (pt->synth_opts.callchain) {
 		thread_stack__sample(ptq->thread, ptq->chain,
-				     pt->synth_opts.callchain_sz, sample->ip);
+				     pt->synth_opts.callchain_sz + 1,
+				     sample->ip, pt->kernel_start);
 		sample->callchain = ptq->chain;
 	}
 
@@ -1999,9 +2020,9 @@ static int intel_pt_process_event(struct perf_session *session,
 		 event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
 		err = intel_pt_context_switch(pt, event, sample);
 
-	intel_pt_log("event %s (%u): cpu %d time %"PRIu64" tsc %#"PRIx64"\n",
-		     perf_event__name(event->header.type), event->header.type,
-		     sample->cpu, sample->time, timestamp);
+	intel_pt_log("event %u: cpu %d time %"PRIu64" tsc %#"PRIx64" ",
+		     event->header.type, sample->cpu, sample->time, timestamp);
+	intel_pt_log_event(event);
 
 	return err;
 }
@@ -2554,7 +2575,8 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
 	if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
 		pt->synth_opts = *session->itrace_synth_opts;
 	} else {
-		itrace_synth_opts__set_default(&pt->synth_opts);
+		itrace_synth_opts__set_default(&pt->synth_opts,
+				session->itrace_synth_opts->default_no_sample);
 		if (use_browser != -1) {
 			pt->synth_opts.branches = false;
 			pt->synth_opts.callchain = true;
diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c
index 19262f98cd4e..5b0b60f00275 100644
--- a/tools/perf/util/llvm-utils.c
+++ b/tools/perf/util/llvm-utils.c
@@ -19,7 +19,7 @@
 #define CLANG_BPF_CMD_DEFAULT_TEMPLATE				\
 		"$CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS "\
 		"-DLINUX_VERSION_CODE=$LINUX_VERSION_CODE "	\
-		"$CLANG_OPTIONS $KERNEL_INC_OPTIONS $PERF_BPF_INC_OPTIONS " \
+		"$CLANG_OPTIONS $PERF_BPF_INC_OPTIONS $KERNEL_INC_OPTIONS " \
 		"-Wno-unused-value -Wno-pointer-sign "		\
 		"-working-directory $WORKING_DIR "		\
 		"-c \"$CLANG_SOURCE\" -target bpf $CLANG_EMIT_LLVM -O2 -o - $LLVM_OPTIONS_PIPE"
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 111ae858cbcb..8f36ce813bc5 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1708,6 +1708,7 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event
 	struct thread *parent = machine__findnew_thread(machine,
 							event->fork.ppid,
 							event->fork.ptid);
+	bool do_maps_clone = true;
 	int err = 0;
 
 	if (dump_trace)
@@ -1736,9 +1737,25 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event
 
 	thread = machine__findnew_thread(machine, event->fork.pid,
 					 event->fork.tid);
+	/*
+	 * When synthesizing FORK events, we are trying to create thread
+	 * objects for the already running tasks on the machine.
+	 *
+	 * Normally, for a kernel FORK event, we want to clone the parent's
+	 * maps because that is what the kernel just did.
+	 *
+	 * But when synthesizing, this should not be done.  If we do, we end up
+	 * with overlapping maps as we process the sythesized MMAP2 events that
+	 * get delivered shortly thereafter.
+	 *
+	 * Use the FORK event misc flags in an internal way to signal this
+	 * situation, so we can elide the map clone when appropriate.
+	 */
+	if (event->fork.header.misc & PERF_RECORD_MISC_FORK_EXEC)
+		do_maps_clone = false;
 
 	if (thread == NULL || parent == NULL ||
-	    thread__fork(thread, parent, sample->time) < 0) {
+	    thread__fork(thread, parent, sample->time, do_maps_clone) < 0) {
 		dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n");
 		err = -1;
 	}
@@ -2140,6 +2157,27 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
 	return 0;
 }
 
+static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread,
+			     struct callchain_cursor *cursor,
+			     struct symbol **parent,
+			     struct addr_location *root_al,
+			     u8 *cpumode, int ent)
+{
+	int err = 0;
+
+	while (--ent >= 0) {
+		u64 ip = chain->ips[ent];
+
+		if (ip >= PERF_CONTEXT_MAX) {
+			err = add_callchain_ip(thread, cursor, parent,
+					       root_al, cpumode, ip,
+					       false, NULL, NULL, 0);
+			break;
+		}
+	}
+	return err;
+}
+
 static int thread__resolve_callchain_sample(struct thread *thread,
 					    struct callchain_cursor *cursor,
 					    struct perf_evsel *evsel,
@@ -2246,6 +2284,12 @@ static int thread__resolve_callchain_sample(struct thread *thread,
 	}
 
 check_calls:
+	if (callchain_param.order != ORDER_CALLEE) {
+		err = find_prev_cpumode(chain, thread, cursor, parent, root_al,
+					&cpumode, chain->nr - first_call);
+		if (err)
+			return (err < 0) ? err : 0;
+	}
 	for (i = first_call, nr_entries = 0;
 	     i < chain_nr && nr_entries < max_stack; i++) {
 		u64 ip;
@@ -2260,9 +2304,15 @@ check_calls:
 			continue;
 #endif
 		ip = chain->ips[j];
-
 		if (ip < PERF_CONTEXT_MAX)
                        ++nr_entries;
+		else if (callchain_param.order != ORDER_CALLEE) {
+			err = find_prev_cpumode(chain, thread, cursor, parent,
+						root_al, &cpumode, j);
+			if (err)
+				return (err < 0) ? err : 0;
+			continue;
+		}
 
 		err = add_callchain_ip(thread, cursor, parent,
 				       root_al, &cpumode, ip,
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 6a6929f208b4..354e54550d2b 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -320,12 +320,11 @@ int map__load(struct map *map)
 			build_id__sprintf(map->dso->build_id,
 					  sizeof(map->dso->build_id),
 					  sbuild_id);
-			pr_warning("%s with build id %s not found",
-				   name, sbuild_id);
+			pr_debug("%s with build id %s not found", name, sbuild_id);
 		} else
-			pr_warning("Failed to open %s", name);
+			pr_debug("Failed to open %s", name);
 
-		pr_warning(", continuing without symbols\n");
+		pr_debug(", continuing without symbols\n");
 		return -1;
 	} else if (nr == 0) {
 #ifdef HAVE_LIBELF_SUPPORT
@@ -334,12 +333,11 @@ int map__load(struct map *map)
 
 		if (len > sizeof(DSO__DELETED) &&
 		    strcmp(name + real_len + 1, DSO__DELETED) == 0) {
-			pr_warning("%.*s was updated (is prelink enabled?). "
+			pr_debug("%.*s was updated (is prelink enabled?). "
 				"Restart the long running apps that use it!\n",
 				   (int)real_len, name);
 		} else {
-			pr_warning("no symbols found in %s, maybe install "
-				   "a debug package?\n", name);
+			pr_debug("no symbols found in %s, maybe install a debug package?\n", name);
 		}
 #endif
 		return -1;
@@ -712,8 +710,7 @@ static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp
 		if (verbose >= 2) {
 
 			if (use_browser) {
-				pr_warning("overlapping maps in %s "
-					   "(disable tui for more info)\n",
+				pr_debug("overlapping maps in %s (disable tui for more info)\n",
 					   map->dso->name);
 			} else {
 				fputs("overlapping maps:\n", fp);
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 215f69f41672..cdb95b3a1213 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -281,7 +281,7 @@ int perf_mmap__read_init(struct perf_mmap *map)
 }
 
 int perf_mmap__push(struct perf_mmap *md, void *to,
-		    int push(void *to, void *buf, size_t size))
+		    int push(struct perf_mmap *map, void *to, void *buf, size_t size))
 {
 	u64 head = perf_mmap__read_head(md);
 	unsigned char *data = md->base + page_size;
@@ -300,7 +300,7 @@ int perf_mmap__push(struct perf_mmap *md, void *to,
 		size = md->mask + 1 - (md->start & md->mask);
 		md->start += size;
 
-		if (push(to, buf, size) < 0) {
+		if (push(md, to, buf, size) < 0) {
 			rc = -1;
 			goto out;
 		}
@@ -310,7 +310,7 @@ int perf_mmap__push(struct perf_mmap *md, void *to,
 	size = md->end - md->start;
 	md->start += size;
 
-	if (push(to, buf, size) < 0) {
+	if (push(md, to, buf, size) < 0) {
 		rc = -1;
 		goto out;
 	}
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index 05a6d47c7956..cc5e2d6d17a9 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -4,7 +4,7 @@
 #include <linux/compiler.h>
 #include <linux/refcount.h>
 #include <linux/types.h>
-#include <asm/barrier.h>
+#include <linux/ring_buffer.h>
 #include <stdbool.h>
 #include "auxtrace.h"
 #include "event.h"
@@ -71,21 +71,12 @@ void perf_mmap__consume(struct perf_mmap *map);
 
 static inline u64 perf_mmap__read_head(struct perf_mmap *mm)
 {
-	struct perf_event_mmap_page *pc = mm->base;
-	u64 head = READ_ONCE(pc->data_head);
-	rmb();
-	return head;
+	return ring_buffer_read_head(mm->base);
 }
 
 static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail)
 {
-	struct perf_event_mmap_page *pc = md->base;
-
-	/*
-	 * ensure all reads are done before we write the tail out.
-	 */
-	mb();
-	pc->data_tail = tail;
+	ring_buffer_write_tail(md->base, tail);
 }
 
 union perf_event *perf_mmap__read_forward(struct perf_mmap *map);
@@ -93,7 +84,7 @@ union perf_event *perf_mmap__read_forward(struct perf_mmap *map);
 union perf_event *perf_mmap__read_event(struct perf_mmap *map);
 
 int perf_mmap__push(struct perf_mmap *md, void *to,
-		    int push(void *to, void *buf, size_t size));
+		    int push(struct perf_mmap *map, void *to, void *buf, size_t size));
 
 size_t perf_mmap__mmap_len(struct perf_mmap *map);
 
diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c
index bad9e0296e9a..1904e7f6ec84 100644
--- a/tools/perf/util/ordered-events.c
+++ b/tools/perf/util/ordered-events.c
@@ -80,14 +80,20 @@ static union perf_event *dup_event(struct ordered_events *oe,
 	return oe->copy_on_queue ? __dup_event(oe, event) : event;
 }
 
-static void free_dup_event(struct ordered_events *oe, union perf_event *event)
+static void __free_dup_event(struct ordered_events *oe, union perf_event *event)
 {
-	if (event && oe->copy_on_queue) {
+	if (event) {
 		oe->cur_alloc_size -= event->header.size;
 		free(event);
 	}
 }
 
+static void free_dup_event(struct ordered_events *oe, union perf_event *event)
+{
+	if (oe->copy_on_queue)
+		__free_dup_event(oe, event);
+}
+
 #define MAX_SAMPLE_BUFFER	(64 * 1024 / sizeof(struct ordered_event))
 static struct ordered_event *alloc_event(struct ordered_events *oe,
 					 union perf_event *event)
@@ -95,21 +101,49 @@ static struct ordered_event *alloc_event(struct ordered_events *oe,
 	struct list_head *cache = &oe->cache;
 	struct ordered_event *new = NULL;
 	union perf_event *new_event;
+	size_t size;
 
 	new_event = dup_event(oe, event);
 	if (!new_event)
 		return NULL;
 
+	/*
+	 * We maintain the following scheme of buffers for ordered
+	 * event allocation:
+	 *
+	 *   to_free list -> buffer1 (64K)
+	 *                   buffer2 (64K)
+	 *                   ...
+	 *
+	 * Each buffer keeps an array of ordered events objects:
+	 *    buffer -> event[0]
+	 *              event[1]
+	 *              ...
+	 *
+	 * Each allocated ordered event is linked to one of
+	 * following lists:
+	 *   - time ordered list 'events'
+	 *   - list of currently removed events 'cache'
+	 *
+	 * Allocation of the ordered event uses the following order
+	 * to get the memory:
+	 *   - use recently removed object from 'cache' list
+	 *   - use available object in current allocation buffer
+	 *   - allocate new buffer if the current buffer is full
+	 *
+	 * Removal of ordered event object moves it from events to
+	 * the cache list.
+	 */
+	size = sizeof(*oe->buffer) + MAX_SAMPLE_BUFFER * sizeof(*new);
+
 	if (!list_empty(cache)) {
 		new = list_entry(cache->next, struct ordered_event, list);
 		list_del(&new->list);
 	} else if (oe->buffer) {
-		new = oe->buffer + oe->buffer_idx;
+		new = &oe->buffer->event[oe->buffer_idx];
 		if (++oe->buffer_idx == MAX_SAMPLE_BUFFER)
 			oe->buffer = NULL;
-	} else if (oe->cur_alloc_size < oe->max_alloc_size) {
-		size_t size = MAX_SAMPLE_BUFFER * sizeof(*new);
-
+	} else if ((oe->cur_alloc_size + size) < oe->max_alloc_size) {
 		oe->buffer = malloc(size);
 		if (!oe->buffer) {
 			free_dup_event(oe, new_event);
@@ -122,11 +156,11 @@ static struct ordered_event *alloc_event(struct ordered_events *oe,
 		oe->cur_alloc_size += size;
 		list_add(&oe->buffer->list, &oe->to_free);
 
-		/* First entry is abused to maintain the to_free list. */
-		oe->buffer_idx = 2;
-		new = oe->buffer + 1;
+		oe->buffer_idx = 1;
+		new = &oe->buffer->event[0];
 	} else {
 		pr("allocation limit reached %" PRIu64 "B\n", oe->max_alloc_size);
+		return NULL;
 	}
 
 	new->event = new_event;
@@ -300,15 +334,38 @@ void ordered_events__init(struct ordered_events *oe, ordered_events__deliver_t d
 	oe->deliver	   = deliver;
 }
 
+static void
+ordered_events_buffer__free(struct ordered_events_buffer *buffer,
+			    unsigned int max, struct ordered_events *oe)
+{
+	if (oe->copy_on_queue) {
+		unsigned int i;
+
+		for (i = 0; i < max; i++)
+			__free_dup_event(oe, buffer->event[i].event);
+	}
+
+	free(buffer);
+}
+
 void ordered_events__free(struct ordered_events *oe)
 {
-	while (!list_empty(&oe->to_free)) {
-		struct ordered_event *event;
+	struct ordered_events_buffer *buffer, *tmp;
 
-		event = list_entry(oe->to_free.next, struct ordered_event, list);
-		list_del(&event->list);
-		free_dup_event(oe, event->event);
-		free(event);
+	if (list_empty(&oe->to_free))
+		return;
+
+	/*
+	 * Current buffer might not have all the events allocated
+	 * yet, we need to free only allocated ones ...
+	 */
+	list_del(&oe->buffer->list);
+	ordered_events_buffer__free(oe->buffer, oe->buffer_idx, oe);
+
+	/* ... and continue with the rest */
+	list_for_each_entry_safe(buffer, tmp, &oe->to_free, list) {
+		list_del(&buffer->list);
+		ordered_events_buffer__free(buffer, MAX_SAMPLE_BUFFER, oe);
 	}
 }
 
diff --git a/tools/perf/util/ordered-events.h b/tools/perf/util/ordered-events.h
index 8c7a2948593e..1338d5c345dc 100644
--- a/tools/perf/util/ordered-events.h
+++ b/tools/perf/util/ordered-events.h
@@ -25,23 +25,28 @@ struct ordered_events;
 typedef int (*ordered_events__deliver_t)(struct ordered_events *oe,
 					 struct ordered_event *event);
 
+struct ordered_events_buffer {
+	struct list_head	list;
+	struct ordered_event	event[0];
+};
+
 struct ordered_events {
-	u64			last_flush;
-	u64			next_flush;
-	u64			max_timestamp;
-	u64			max_alloc_size;
-	u64			cur_alloc_size;
-	struct list_head	events;
-	struct list_head	cache;
-	struct list_head	to_free;
-	struct ordered_event	*buffer;
-	struct ordered_event	*last;
-	ordered_events__deliver_t deliver;
-	int			buffer_idx;
-	unsigned int		nr_events;
-	enum oe_flush		last_flush_type;
-	u32			nr_unordered_events;
-	bool                    copy_on_queue;
+	u64				 last_flush;
+	u64				 next_flush;
+	u64				 max_timestamp;
+	u64				 max_alloc_size;
+	u64				 cur_alloc_size;
+	struct list_head		 events;
+	struct list_head		 cache;
+	struct list_head		 to_free;
+	struct ordered_events_buffer	*buffer;
+	struct ordered_event		*last;
+	ordered_events__deliver_t	 deliver;
+	int				 buffer_idx;
+	unsigned int			 nr_events;
+	enum oe_flush			 last_flush_type;
+	u32				 nr_unordered_events;
+	bool				 copy_on_queue;
 };
 
 int ordered_events__queue(struct ordered_events *oe, union perf_event *event,
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index f8cd3e7c9186..59be3466d64d 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -926,6 +926,7 @@ static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = {
 	[PARSE_EVENTS__TERM_TYPE_NOINHERIT]		= "no-inherit",
 	[PARSE_EVENTS__TERM_TYPE_INHERIT]		= "inherit",
 	[PARSE_EVENTS__TERM_TYPE_MAX_STACK]		= "max-stack",
+	[PARSE_EVENTS__TERM_TYPE_MAX_EVENTS]		= "nr",
 	[PARSE_EVENTS__TERM_TYPE_OVERWRITE]		= "overwrite",
 	[PARSE_EVENTS__TERM_TYPE_NOOVERWRITE]		= "no-overwrite",
 	[PARSE_EVENTS__TERM_TYPE_DRV_CFG]		= "driver-config",
@@ -1037,6 +1038,9 @@ do {									   \
 	case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
 		CHECK_TYPE_VAL(NUM);
 		break;
+	case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS:
+		CHECK_TYPE_VAL(NUM);
+		break;
 	default:
 		err->str = strdup("unknown term");
 		err->idx = term->err_term;
@@ -1084,6 +1088,7 @@ static int config_term_tracepoint(struct perf_event_attr *attr,
 	case PARSE_EVENTS__TERM_TYPE_INHERIT:
 	case PARSE_EVENTS__TERM_TYPE_NOINHERIT:
 	case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
+	case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS:
 	case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
 	case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE:
 		return config_term_common(attr, term, err);
@@ -1162,6 +1167,9 @@ do {								\
 		case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
 			ADD_CONFIG_TERM(MAX_STACK, max_stack, term->val.num);
 			break;
+		case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS:
+			ADD_CONFIG_TERM(MAX_EVENTS, max_events, term->val.num);
+			break;
 		case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
 			ADD_CONFIG_TERM(OVERWRITE, overwrite, term->val.num ? 1 : 0);
 			break;
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 4473dac27aee..5ed035cbcbb7 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -71,6 +71,7 @@ enum {
 	PARSE_EVENTS__TERM_TYPE_NOINHERIT,
 	PARSE_EVENTS__TERM_TYPE_INHERIT,
 	PARSE_EVENTS__TERM_TYPE_MAX_STACK,
+	PARSE_EVENTS__TERM_TYPE_MAX_EVENTS,
 	PARSE_EVENTS__TERM_TYPE_NOOVERWRITE,
 	PARSE_EVENTS__TERM_TYPE_OVERWRITE,
 	PARSE_EVENTS__TERM_TYPE_DRV_CFG,
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 5f761f3ed0f3..7805c71aaae2 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -269,6 +269,7 @@ time			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_TIME); }
 call-graph		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CALLGRAPH); }
 stack-size		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_STACKSIZE); }
 max-stack		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_MAX_STACK); }
+nr			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_MAX_EVENTS); }
 inherit			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_INHERIT); }
 no-inherit		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOINHERIT); }
 overwrite		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_OVERWRITE); }
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 7799788f662f..7e49baad304d 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -773,7 +773,7 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu)
 
 		if (!is_arm_pmu_core(name)) {
 			pname = pe->pmu ? pe->pmu : "cpu";
-			if (strncmp(pname, name, strlen(pname)))
+			if (strcmp(pname, name))
 				continue;
 		}
 
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index f119eb628dbb..e86f8be89157 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -1819,6 +1819,12 @@ int parse_probe_trace_command(const char *cmd, struct probe_trace_event *tev)
 			tp->offset = strtoul(fmt2_str, NULL, 10);
 	}
 
+	if (tev->uprobes) {
+		fmt2_str = strchr(p, '(');
+		if (fmt2_str)
+			tp->ref_ctr_offset = strtoul(fmt2_str + 1, NULL, 0);
+	}
+
 	tev->nargs = argc - 2;
 	tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs);
 	if (tev->args == NULL) {
@@ -2012,6 +2018,22 @@ static int synthesize_probe_trace_arg(struct probe_trace_arg *arg,
 	return err;
 }
 
+static int
+synthesize_uprobe_trace_def(struct probe_trace_event *tev, struct strbuf *buf)
+{
+	struct probe_trace_point *tp = &tev->point;
+	int err;
+
+	err = strbuf_addf(buf, "%s:0x%lx", tp->module, tp->address);
+
+	if (err >= 0 && tp->ref_ctr_offset) {
+		if (!uprobe_ref_ctr_is_supported())
+			return -1;
+		err = strbuf_addf(buf, "(0x%lx)", tp->ref_ctr_offset);
+	}
+	return err >= 0 ? 0 : -1;
+}
+
 char *synthesize_probe_trace_command(struct probe_trace_event *tev)
 {
 	struct probe_trace_point *tp = &tev->point;
@@ -2041,15 +2063,17 @@ char *synthesize_probe_trace_command(struct probe_trace_event *tev)
 	}
 
 	/* Use the tp->address for uprobes */
-	if (tev->uprobes)
-		err = strbuf_addf(&buf, "%s:0x%lx", tp->module, tp->address);
-	else if (!strncmp(tp->symbol, "0x", 2))
+	if (tev->uprobes) {
+		err = synthesize_uprobe_trace_def(tev, &buf);
+	} else if (!strncmp(tp->symbol, "0x", 2)) {
 		/* Absolute address. See try_to_find_absolute_address() */
 		err = strbuf_addf(&buf, "%s%s0x%lx", tp->module ?: "",
 				  tp->module ? ":" : "", tp->address);
-	else
+	} else {
 		err = strbuf_addf(&buf, "%s%s%s+%lu", tp->module ?: "",
 				tp->module ? ":" : "", tp->symbol, tp->offset);
+	}
+
 	if (err)
 		goto error;
 
@@ -2633,6 +2657,13 @@ static void warn_uprobe_event_compat(struct probe_trace_event *tev)
 {
 	int i;
 	char *buf = synthesize_probe_trace_command(tev);
+	struct probe_trace_point *tp = &tev->point;
+
+	if (tp->ref_ctr_offset && !uprobe_ref_ctr_is_supported()) {
+		pr_warning("A semaphore is associated with %s:%s and "
+			   "seems your kernel doesn't support it.\n",
+			   tev->group, tev->event);
+	}
 
 	/* Old uprobe event doesn't support memory dereference */
 	if (!tev->uprobes || tev->nargs == 0 || !buf)
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index 45b14f020558..15a98c3a2a2f 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -27,6 +27,7 @@ struct probe_trace_point {
 	char		*symbol;	/* Base symbol */
 	char		*module;	/* Module name */
 	unsigned long	offset;		/* Offset from symbol */
+	unsigned long	ref_ctr_offset;	/* SDT reference counter offset */
 	unsigned long	address;	/* Actual address of the trace point */
 	bool		retprobe;	/* Return probe flag */
 };
diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c
index b76088fadf3d..aac7817d9e14 100644
--- a/tools/perf/util/probe-file.c
+++ b/tools/perf/util/probe-file.c
@@ -696,8 +696,16 @@ out_err:
 #ifdef HAVE_GELF_GETNOTE_SUPPORT
 static unsigned long long sdt_note__get_addr(struct sdt_note *note)
 {
-	return note->bit32 ? (unsigned long long)note->addr.a32[0]
-		 : (unsigned long long)note->addr.a64[0];
+	return note->bit32 ?
+		(unsigned long long)note->addr.a32[SDT_NOTE_IDX_LOC] :
+		(unsigned long long)note->addr.a64[SDT_NOTE_IDX_LOC];
+}
+
+static unsigned long long sdt_note__get_ref_ctr_offset(struct sdt_note *note)
+{
+	return note->bit32 ?
+		(unsigned long long)note->addr.a32[SDT_NOTE_IDX_REFCTR] :
+		(unsigned long long)note->addr.a64[SDT_NOTE_IDX_REFCTR];
 }
 
 static const char * const type_to_suffix[] = {
@@ -775,14 +783,21 @@ static char *synthesize_sdt_probe_command(struct sdt_note *note,
 {
 	struct strbuf buf;
 	char *ret = NULL, **args;
-	int i, args_count;
+	int i, args_count, err;
+	unsigned long long ref_ctr_offset;
 
 	if (strbuf_init(&buf, 32) < 0)
 		return NULL;
 
-	if (strbuf_addf(&buf, "p:%s/%s %s:0x%llx",
-				sdtgrp, note->name, pathname,
-				sdt_note__get_addr(note)) < 0)
+	err = strbuf_addf(&buf, "p:%s/%s %s:0x%llx",
+			sdtgrp, note->name, pathname,
+			sdt_note__get_addr(note));
+
+	ref_ctr_offset = sdt_note__get_ref_ctr_offset(note);
+	if (ref_ctr_offset && err >= 0)
+		err = strbuf_addf(&buf, "(0x%llx)", ref_ctr_offset);
+
+	if (err < 0)
 		goto error;
 
 	if (!note->args)
@@ -998,6 +1013,7 @@ int probe_cache__show_all_caches(struct strfilter *filter)
 enum ftrace_readme {
 	FTRACE_README_PROBE_TYPE_X = 0,
 	FTRACE_README_KRETPROBE_OFFSET,
+	FTRACE_README_UPROBE_REF_CTR,
 	FTRACE_README_END,
 };
 
@@ -1009,6 +1025,7 @@ static struct {
 	[idx] = {.pattern = pat, .avail = false}
 	DEFINE_TYPE(FTRACE_README_PROBE_TYPE_X, "*type: * x8/16/32/64,*"),
 	DEFINE_TYPE(FTRACE_README_KRETPROBE_OFFSET, "*place (kretprobe): *"),
+	DEFINE_TYPE(FTRACE_README_UPROBE_REF_CTR, "*ref_ctr_offset*"),
 };
 
 static bool scan_ftrace_readme(enum ftrace_readme type)
@@ -1064,3 +1081,8 @@ bool kretprobe_offset_is_supported(void)
 {
 	return scan_ftrace_readme(FTRACE_README_KRETPROBE_OFFSET);
 }
+
+bool uprobe_ref_ctr_is_supported(void)
+{
+	return scan_ftrace_readme(FTRACE_README_UPROBE_REF_CTR);
+}
diff --git a/tools/perf/util/probe-file.h b/tools/perf/util/probe-file.h
index 63f29b1d22c1..2a249182f2a6 100644
--- a/tools/perf/util/probe-file.h
+++ b/tools/perf/util/probe-file.h
@@ -69,6 +69,7 @@ struct probe_cache_entry *probe_cache__find_by_name(struct probe_cache *pcache,
 int probe_cache__show_all_caches(struct strfilter *filter);
 bool probe_type_is_available(enum probe_type type);
 bool kretprobe_offset_is_supported(void);
+bool uprobe_ref_ctr_is_supported(void);
 #else	/* ! HAVE_LIBELF_SUPPORT */
 static inline struct probe_cache *probe_cache__new(const char *tgt __maybe_unused, struct nsinfo *nsi __maybe_unused)
 {
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index ce501ba14b08..50150dfc0cdf 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -340,7 +340,7 @@ static bool is_tracepoint(struct pyrf_event *pevent)
 }
 
 static PyObject*
-tracepoint_field(struct pyrf_event *pe, struct format_field *field)
+tracepoint_field(struct pyrf_event *pe, struct tep_format_field *field)
 {
 	struct tep_handle *pevent = field->event->pevent;
 	void *data = pe->sample.raw_data;
@@ -348,28 +348,28 @@ tracepoint_field(struct pyrf_event *pe, struct format_field *field)
 	unsigned long long val;
 	unsigned int offset, len;
 
-	if (field->flags & FIELD_IS_ARRAY) {
+	if (field->flags & TEP_FIELD_IS_ARRAY) {
 		offset = field->offset;
 		len    = field->size;
-		if (field->flags & FIELD_IS_DYNAMIC) {
+		if (field->flags & TEP_FIELD_IS_DYNAMIC) {
 			val     = tep_read_number(pevent, data + offset, len);
 			offset  = val;
 			len     = offset >> 16;
 			offset &= 0xffff;
 		}
-		if (field->flags & FIELD_IS_STRING &&
+		if (field->flags & TEP_FIELD_IS_STRING &&
 		    is_printable_array(data + offset, len)) {
 			ret = _PyUnicode_FromString((char *)data + offset);
 		} else {
 			ret = PyByteArray_FromStringAndSize((const char *) data + offset, len);
-			field->flags &= ~FIELD_IS_STRING;
+			field->flags &= ~TEP_FIELD_IS_STRING;
 		}
 	} else {
 		val = tep_read_number(pevent, data + field->offset,
 				      field->size);
-		if (field->flags & FIELD_IS_POINTER)
+		if (field->flags & TEP_FIELD_IS_POINTER)
 			ret = PyLong_FromUnsignedLong((unsigned long) val);
-		else if (field->flags & FIELD_IS_SIGNED)
+		else if (field->flags & TEP_FIELD_IS_SIGNED)
 			ret = PyLong_FromLong((long) val);
 		else
 			ret = PyLong_FromUnsignedLong((unsigned long) val);
@@ -383,10 +383,10 @@ get_tracepoint_field(struct pyrf_event *pevent, PyObject *attr_name)
 {
 	const char *str = _PyUnicode_AsString(PyObject_Str(attr_name));
 	struct perf_evsel *evsel = pevent->evsel;
-	struct format_field *field;
+	struct tep_format_field *field;
 
 	if (!evsel->tp_format) {
-		struct event_format *tp_format;
+		struct tep_event_format *tp_format;
 
 		tp_format = trace_event__tp_format_id(evsel->attr.config);
 		if (!tp_format)
@@ -1240,7 +1240,7 @@ static struct {
 static PyObject *pyrf__tracepoint(struct pyrf_evsel *pevsel,
 				  PyObject *args, PyObject *kwargs)
 {
-	struct event_format *tp_format;
+	struct tep_event_format *tp_format;
 	static char *kwlist[] = { "sys", "name", NULL };
 	char *sys  = NULL;
 	char *name = NULL;
diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c
index d2c78ffd9fee..a2eeebbfb25f 100644
--- a/tools/perf/util/s390-cpumsf.c
+++ b/tools/perf/util/s390-cpumsf.c
@@ -147,6 +147,9 @@
 #include <linux/bitops.h>
 #include <linux/log2.h>
 
+#include <sys/stat.h>
+#include <sys/types.h>
+
 #include "cpumap.h"
 #include "color.h"
 #include "evsel.h"
@@ -159,6 +162,7 @@
 #include "auxtrace.h"
 #include "s390-cpumsf.h"
 #include "s390-cpumsf-kernel.h"
+#include "config.h"
 
 struct s390_cpumsf {
 	struct auxtrace		auxtrace;
@@ -170,6 +174,8 @@ struct s390_cpumsf {
 	u32			pmu_type;
 	u16			machine_type;
 	bool			data_queued;
+	bool			use_logfile;
+	char			*logdir;
 };
 
 struct s390_cpumsf_queue {
@@ -177,6 +183,7 @@ struct s390_cpumsf_queue {
 	unsigned int		queue_nr;
 	struct auxtrace_buffer	*buffer;
 	int			cpu;
+	FILE			*logfile;
 };
 
 /* Display s390 CPU measurement facility basic-sampling data entry */
@@ -595,6 +602,12 @@ static int s390_cpumsf_run_decoder(struct s390_cpumsf_queue *sfq,
 			buffer->use_size = buffer->size;
 			buffer->use_data = buffer->data;
 		}
+		if (sfq->logfile) {	/* Write into log file */
+			size_t rc = fwrite(buffer->data, buffer->size, 1,
+					   sfq->logfile);
+			if (rc != 1)
+				pr_err("Failed to write auxiliary data\n");
+		}
 	} else
 		buffer = sfq->buffer;
 
@@ -606,6 +619,13 @@ static int s390_cpumsf_run_decoder(struct s390_cpumsf_queue *sfq,
 			return -ENOMEM;
 		buffer->use_size = buffer->size;
 		buffer->use_data = buffer->data;
+
+		if (sfq->logfile) {	/* Write into log file */
+			size_t rc = fwrite(buffer->data, buffer->size, 1,
+					   sfq->logfile);
+			if (rc != 1)
+				pr_err("Failed to write auxiliary data\n");
+		}
 	}
 	pr_debug4("%s queue_nr:%d buffer:%" PRId64 " offset:%#" PRIx64 " size:%#zx rest:%#zx\n",
 		  __func__, sfq->queue_nr, buffer->buffer_nr, buffer->offset,
@@ -640,6 +660,23 @@ s390_cpumsf_alloc_queue(struct s390_cpumsf *sf, unsigned int queue_nr)
 	sfq->sf = sf;
 	sfq->queue_nr = queue_nr;
 	sfq->cpu = -1;
+	if (sf->use_logfile) {
+		char *name;
+		int rc;
+
+		rc = (sf->logdir)
+			? asprintf(&name, "%s/aux.smp.%02x",
+				 sf->logdir, queue_nr)
+			: asprintf(&name, "aux.smp.%02x", queue_nr);
+		if (rc > 0)
+			sfq->logfile = fopen(name, "w");
+		if (sfq->logfile == NULL) {
+			pr_err("Failed to open auxiliary log file %s,"
+			       "continue...\n", name);
+			sf->use_logfile = false;
+		}
+		free(name);
+	}
 	return sfq;
 }
 
@@ -850,8 +887,16 @@ static void s390_cpumsf_free_queues(struct perf_session *session)
 	struct auxtrace_queues *queues = &sf->queues;
 	unsigned int i;
 
-	for (i = 0; i < queues->nr_queues; i++)
+	for (i = 0; i < queues->nr_queues; i++) {
+		struct s390_cpumsf_queue *sfq = (struct s390_cpumsf_queue *)
+						queues->queue_array[i].priv;
+
+		if (sfq != NULL && sfq->logfile) {
+			fclose(sfq->logfile);
+			sfq->logfile = NULL;
+		}
 		zfree(&queues->queue_array[i].priv);
+	}
 	auxtrace_queues__free(queues);
 }
 
@@ -864,6 +909,7 @@ static void s390_cpumsf_free(struct perf_session *session)
 	auxtrace_heap__free(&sf->heap);
 	s390_cpumsf_free_queues(session);
 	session->auxtrace = NULL;
+	free(sf->logdir);
 	free(sf);
 }
 
@@ -877,17 +923,55 @@ static int s390_cpumsf_get_type(const char *cpuid)
 
 /* Check itrace options set on perf report command.
  * Return true, if none are set or all options specified can be
- * handled on s390.
+ * handled on s390 (currently only option 'd' for logging.
  * Return false otherwise.
  */
 static bool check_auxtrace_itrace(struct itrace_synth_opts *itops)
 {
+	bool ison = false;
+
 	if (!itops || !itops->set)
 		return true;
-	pr_err("No --itrace options supported\n");
+	ison = itops->inject || itops->instructions || itops->branches ||
+		itops->transactions || itops->ptwrites ||
+		itops->pwr_events || itops->errors ||
+		itops->dont_decode || itops->calls || itops->returns ||
+		itops->callchain || itops->thread_stack ||
+		itops->last_branch;
+	if (!ison)
+		return true;
+	pr_err("Unsupported --itrace options specified\n");
 	return false;
 }
 
+/* Check for AUXTRACE dump directory if it is needed.
+ * On failure print an error message but continue.
+ * Return 0 on wrong keyword in config file and 1 otherwise.
+ */
+static int s390_cpumsf__config(const char *var, const char *value, void *cb)
+{
+	struct s390_cpumsf *sf = cb;
+	struct stat stbuf;
+	int rc;
+
+	if (strcmp(var, "auxtrace.dumpdir"))
+		return 0;
+	sf->logdir = strdup(value);
+	if (sf->logdir == NULL) {
+		pr_err("Failed to find auxtrace log directory %s,"
+		       " continue with current directory...\n", value);
+		return 1;
+	}
+	rc = stat(sf->logdir, &stbuf);
+	if (rc == -1 || !S_ISDIR(stbuf.st_mode)) {
+		pr_err("Missing auxtrace log directory %s,"
+		       " continue with current directory...\n", value);
+		free(sf->logdir);
+		sf->logdir = NULL;
+	}
+	return 1;
+}
+
 int s390_cpumsf_process_auxtrace_info(union perf_event *event,
 				      struct perf_session *session)
 {
@@ -906,6 +990,9 @@ int s390_cpumsf_process_auxtrace_info(union perf_event *event,
 		err = -EINVAL;
 		goto err_free;
 	}
+	sf->use_logfile = session->itrace_synth_opts->log;
+	if (sf->use_logfile)
+		perf_config(s390_cpumsf__config, sf);
 
 	err = auxtrace_queues__init(&sf->queues);
 	if (err)
@@ -940,6 +1027,7 @@ err_free_queues:
 	auxtrace_queues__free(&sf->queues);
 	session->auxtrace = NULL;
 err_free:
+	free(sf->logdir);
 	free(sf);
 	return err;
 }
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index 45484f0f7292..89cb887648f9 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -99,7 +99,7 @@ static void define_symbolic_value(const char *ev_name,
 	LEAVE;
 }
 
-static void define_symbolic_values(struct print_flag_sym *field,
+static void define_symbolic_values(struct tep_print_flag_sym *field,
 				   const char *ev_name,
 				   const char *field_name)
 {
@@ -157,7 +157,7 @@ static void define_flag_value(const char *ev_name,
 	LEAVE;
 }
 
-static void define_flag_values(struct print_flag_sym *field,
+static void define_flag_values(struct tep_print_flag_sym *field,
 			       const char *ev_name,
 			       const char *field_name)
 {
@@ -189,62 +189,62 @@ static void define_flag_field(const char *ev_name,
 	LEAVE;
 }
 
-static void define_event_symbols(struct event_format *event,
+static void define_event_symbols(struct tep_event_format *event,
 				 const char *ev_name,
-				 struct print_arg *args)
+				 struct tep_print_arg *args)
 {
 	if (args == NULL)
 		return;
 
 	switch (args->type) {
-	case PRINT_NULL:
+	case TEP_PRINT_NULL:
 		break;
-	case PRINT_ATOM:
+	case TEP_PRINT_ATOM:
 		define_flag_value(ev_name, cur_field_name, "0",
 				  args->atom.atom);
 		zero_flag_atom = 0;
 		break;
-	case PRINT_FIELD:
+	case TEP_PRINT_FIELD:
 		free(cur_field_name);
 		cur_field_name = strdup(args->field.name);
 		break;
-	case PRINT_FLAGS:
+	case TEP_PRINT_FLAGS:
 		define_event_symbols(event, ev_name, args->flags.field);
 		define_flag_field(ev_name, cur_field_name, args->flags.delim);
 		define_flag_values(args->flags.flags, ev_name, cur_field_name);
 		break;
-	case PRINT_SYMBOL:
+	case TEP_PRINT_SYMBOL:
 		define_event_symbols(event, ev_name, args->symbol.field);
 		define_symbolic_field(ev_name, cur_field_name);
 		define_symbolic_values(args->symbol.symbols, ev_name,
 				       cur_field_name);
 		break;
-	case PRINT_HEX:
-	case PRINT_HEX_STR:
+	case TEP_PRINT_HEX:
+	case TEP_PRINT_HEX_STR:
 		define_event_symbols(event, ev_name, args->hex.field);
 		define_event_symbols(event, ev_name, args->hex.size);
 		break;
-	case PRINT_INT_ARRAY:
+	case TEP_PRINT_INT_ARRAY:
 		define_event_symbols(event, ev_name, args->int_array.field);
 		define_event_symbols(event, ev_name, args->int_array.count);
 		define_event_symbols(event, ev_name, args->int_array.el_size);
 		break;
-	case PRINT_BSTRING:
-	case PRINT_DYNAMIC_ARRAY:
-	case PRINT_DYNAMIC_ARRAY_LEN:
-	case PRINT_STRING:
-	case PRINT_BITMASK:
+	case TEP_PRINT_BSTRING:
+	case TEP_PRINT_DYNAMIC_ARRAY:
+	case TEP_PRINT_DYNAMIC_ARRAY_LEN:
+	case TEP_PRINT_STRING:
+	case TEP_PRINT_BITMASK:
 		break;
-	case PRINT_TYPE:
+	case TEP_PRINT_TYPE:
 		define_event_symbols(event, ev_name, args->typecast.item);
 		break;
-	case PRINT_OP:
+	case TEP_PRINT_OP:
 		if (strcmp(args->op.op, ":") == 0)
 			zero_flag_atom = 1;
 		define_event_symbols(event, ev_name, args->op.left);
 		define_event_symbols(event, ev_name, args->op.right);
 		break;
-	case PRINT_FUNC:
+	case TEP_PRINT_FUNC:
 	default:
 		pr_err("Unsupported print arg type\n");
 		/* we should warn... */
@@ -338,8 +338,8 @@ static void perl_process_tracepoint(struct perf_sample *sample,
 				    struct addr_location *al)
 {
 	struct thread *thread = al->thread;
-	struct event_format *event = evsel->tp_format;
-	struct format_field *field;
+	struct tep_event_format *event = evsel->tp_format;
+	struct tep_format_field *field;
 	static char handler[256];
 	unsigned long long val;
 	unsigned long s, ns;
@@ -388,9 +388,9 @@ static void perl_process_tracepoint(struct perf_sample *sample,
 	/* common fields other than pid can be accessed via xsub fns */
 
 	for (field = event->format.fields; field; field = field->next) {
-		if (field->flags & FIELD_IS_STRING) {
+		if (field->flags & TEP_FIELD_IS_STRING) {
 			int offset;
-			if (field->flags & FIELD_IS_DYNAMIC) {
+			if (field->flags & TEP_FIELD_IS_DYNAMIC) {
 				offset = *(int *)(data + field->offset);
 				offset &= 0xffff;
 			} else
@@ -399,7 +399,7 @@ static void perl_process_tracepoint(struct perf_sample *sample,
 		} else { /* FIELD_IS_NUMERIC */
 			val = read_size(event, data + field->offset,
 					field->size);
-			if (field->flags & FIELD_IS_SIGNED) {
+			if (field->flags & TEP_FIELD_IS_SIGNED) {
 				XPUSHs(sv_2mortal(newSViv(val)));
 			} else {
 				XPUSHs(sv_2mortal(newSVuv(val)));
@@ -537,8 +537,8 @@ static int perl_stop_script(void)
 
 static int perl_generate_script(struct tep_handle *pevent, const char *outfile)
 {
-	struct event_format *event = NULL;
-	struct format_field *f;
+	struct tep_event_format *event = NULL;
+	struct tep_format_field *f;
 	char fname[PATH_MAX];
 	int not_first, count;
 	FILE *ofp;
@@ -646,11 +646,11 @@ sub print_backtrace\n\
 			count++;
 
 			fprintf(ofp, "%s=", f->name);
-			if (f->flags & FIELD_IS_STRING ||
-			    f->flags & FIELD_IS_FLAG ||
-			    f->flags & FIELD_IS_SYMBOLIC)
+			if (f->flags & TEP_FIELD_IS_STRING ||
+			    f->flags & TEP_FIELD_IS_FLAG ||
+			    f->flags & TEP_FIELD_IS_SYMBOLIC)
 				fprintf(ofp, "%%s");
-			else if (f->flags & FIELD_IS_SIGNED)
+			else if (f->flags & TEP_FIELD_IS_SIGNED)
 				fprintf(ofp, "%%d");
 			else
 				fprintf(ofp, "%%u");
@@ -668,7 +668,7 @@ sub print_backtrace\n\
 			if (++count % 5 == 0)
 				fprintf(ofp, "\n\t       ");
 
-			if (f->flags & FIELD_IS_FLAG) {
+			if (f->flags & TEP_FIELD_IS_FLAG) {
 				if ((count - 1) % 5 != 0) {
 					fprintf(ofp, "\n\t       ");
 					count = 4;
@@ -678,7 +678,7 @@ sub print_backtrace\n\
 					event->name);
 				fprintf(ofp, "\"%s\", $%s)", f->name,
 					f->name);
-			} else if (f->flags & FIELD_IS_SYMBOLIC) {
+			} else if (f->flags & TEP_FIELD_IS_SYMBOLIC) {
 				if ((count - 1) % 5 != 0) {
 					fprintf(ofp, "\n\t       ");
 					count = 4;
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index dfc6093f118c..69aa93d4ee99 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -193,7 +193,7 @@ static void try_call_object(const char *handler_name, PyObject *args)
 		call_object(handler, args, handler_name);
 }
 
-static void define_value(enum print_arg_type field_type,
+static void define_value(enum tep_print_arg_type field_type,
 			 const char *ev_name,
 			 const char *field_name,
 			 const char *field_value,
@@ -204,7 +204,7 @@ static void define_value(enum print_arg_type field_type,
 	unsigned long long value;
 	unsigned n = 0;
 
-	if (field_type == PRINT_SYMBOL)
+	if (field_type == TEP_PRINT_SYMBOL)
 		handler_name = "define_symbolic_value";
 
 	t = PyTuple_New(4);
@@ -223,8 +223,8 @@ static void define_value(enum print_arg_type field_type,
 	Py_DECREF(t);
 }
 
-static void define_values(enum print_arg_type field_type,
-			  struct print_flag_sym *field,
+static void define_values(enum tep_print_arg_type field_type,
+			  struct tep_print_flag_sym *field,
 			  const char *ev_name,
 			  const char *field_name)
 {
@@ -235,7 +235,7 @@ static void define_values(enum print_arg_type field_type,
 		define_values(field_type, field->next, ev_name, field_name);
 }
 
-static void define_field(enum print_arg_type field_type,
+static void define_field(enum tep_print_arg_type field_type,
 			 const char *ev_name,
 			 const char *field_name,
 			 const char *delim)
@@ -244,10 +244,10 @@ static void define_field(enum print_arg_type field_type,
 	PyObject *t;
 	unsigned n = 0;
 
-	if (field_type == PRINT_SYMBOL)
+	if (field_type == TEP_PRINT_SYMBOL)
 		handler_name = "define_symbolic_field";
 
-	if (field_type == PRINT_FLAGS)
+	if (field_type == TEP_PRINT_FLAGS)
 		t = PyTuple_New(3);
 	else
 		t = PyTuple_New(2);
@@ -256,7 +256,7 @@ static void define_field(enum print_arg_type field_type,
 
 	PyTuple_SetItem(t, n++, _PyUnicode_FromString(ev_name));
 	PyTuple_SetItem(t, n++, _PyUnicode_FromString(field_name));
-	if (field_type == PRINT_FLAGS)
+	if (field_type == TEP_PRINT_FLAGS)
 		PyTuple_SetItem(t, n++, _PyUnicode_FromString(delim));
 
 	try_call_object(handler_name, t);
@@ -264,54 +264,54 @@ static void define_field(enum print_arg_type field_type,
 	Py_DECREF(t);
 }
 
-static void define_event_symbols(struct event_format *event,
+static void define_event_symbols(struct tep_event_format *event,
 				 const char *ev_name,
-				 struct print_arg *args)
+				 struct tep_print_arg *args)
 {
 	if (args == NULL)
 		return;
 
 	switch (args->type) {
-	case PRINT_NULL:
+	case TEP_PRINT_NULL:
 		break;
-	case PRINT_ATOM:
-		define_value(PRINT_FLAGS, ev_name, cur_field_name, "0",
+	case TEP_PRINT_ATOM:
+		define_value(TEP_PRINT_FLAGS, ev_name, cur_field_name, "0",
 			     args->atom.atom);
 		zero_flag_atom = 0;
 		break;
-	case PRINT_FIELD:
+	case TEP_PRINT_FIELD:
 		free(cur_field_name);
 		cur_field_name = strdup(args->field.name);
 		break;
-	case PRINT_FLAGS:
+	case TEP_PRINT_FLAGS:
 		define_event_symbols(event, ev_name, args->flags.field);
-		define_field(PRINT_FLAGS, ev_name, cur_field_name,
+		define_field(TEP_PRINT_FLAGS, ev_name, cur_field_name,
 			     args->flags.delim);
-		define_values(PRINT_FLAGS, args->flags.flags, ev_name,
+		define_values(TEP_PRINT_FLAGS, args->flags.flags, ev_name,
 			      cur_field_name);
 		break;
-	case PRINT_SYMBOL:
+	case TEP_PRINT_SYMBOL:
 		define_event_symbols(event, ev_name, args->symbol.field);
-		define_field(PRINT_SYMBOL, ev_name, cur_field_name, NULL);
-		define_values(PRINT_SYMBOL, args->symbol.symbols, ev_name,
+		define_field(TEP_PRINT_SYMBOL, ev_name, cur_field_name, NULL);
+		define_values(TEP_PRINT_SYMBOL, args->symbol.symbols, ev_name,
 			      cur_field_name);
 		break;
-	case PRINT_HEX:
-	case PRINT_HEX_STR:
+	case TEP_PRINT_HEX:
+	case TEP_PRINT_HEX_STR:
 		define_event_symbols(event, ev_name, args->hex.field);
 		define_event_symbols(event, ev_name, args->hex.size);
 		break;
-	case PRINT_INT_ARRAY:
+	case TEP_PRINT_INT_ARRAY:
 		define_event_symbols(event, ev_name, args->int_array.field);
 		define_event_symbols(event, ev_name, args->int_array.count);
 		define_event_symbols(event, ev_name, args->int_array.el_size);
 		break;
-	case PRINT_STRING:
+	case TEP_PRINT_STRING:
 		break;
-	case PRINT_TYPE:
+	case TEP_PRINT_TYPE:
 		define_event_symbols(event, ev_name, args->typecast.item);
 		break;
-	case PRINT_OP:
+	case TEP_PRINT_OP:
 		if (strcmp(args->op.op, ":") == 0)
 			zero_flag_atom = 1;
 		define_event_symbols(event, ev_name, args->op.left);
@@ -319,11 +319,11 @@ static void define_event_symbols(struct event_format *event,
 		break;
 	default:
 		/* gcc warns for these? */
-	case PRINT_BSTRING:
-	case PRINT_DYNAMIC_ARRAY:
-	case PRINT_DYNAMIC_ARRAY_LEN:
-	case PRINT_FUNC:
-	case PRINT_BITMASK:
+	case TEP_PRINT_BSTRING:
+	case TEP_PRINT_DYNAMIC_ARRAY:
+	case TEP_PRINT_DYNAMIC_ARRAY_LEN:
+	case TEP_PRINT_FUNC:
+	case TEP_PRINT_BITMASK:
 		/* we should warn... */
 		return;
 	}
@@ -332,10 +332,10 @@ static void define_event_symbols(struct event_format *event,
 		define_event_symbols(event, ev_name, args->next);
 }
 
-static PyObject *get_field_numeric_entry(struct event_format *event,
-		struct format_field *field, void *data)
+static PyObject *get_field_numeric_entry(struct tep_event_format *event,
+		struct tep_format_field *field, void *data)
 {
-	bool is_array = field->flags & FIELD_IS_ARRAY;
+	bool is_array = field->flags & TEP_FIELD_IS_ARRAY;
 	PyObject *obj = NULL, *list = NULL;
 	unsigned long long val;
 	unsigned int item_size, n_items, i;
@@ -353,7 +353,7 @@ static PyObject *get_field_numeric_entry(struct event_format *event,
 
 		val = read_size(event, data + field->offset + i * item_size,
 				item_size);
-		if (field->flags & FIELD_IS_SIGNED) {
+		if (field->flags & TEP_FIELD_IS_SIGNED) {
 			if ((long long)val >= LONG_MIN &&
 					(long long)val <= LONG_MAX)
 				obj = _PyLong_FromLong(val);
@@ -790,11 +790,11 @@ static void python_process_tracepoint(struct perf_sample *sample,
 				      struct perf_evsel *evsel,
 				      struct addr_location *al)
 {
-	struct event_format *event = evsel->tp_format;
+	struct tep_event_format *event = evsel->tp_format;
 	PyObject *handler, *context, *t, *obj = NULL, *callchain;
 	PyObject *dict = NULL, *all_entries_dict = NULL;
 	static char handler_name[256];
-	struct format_field *field;
+	struct tep_format_field *field;
 	unsigned long s, ns;
 	unsigned n = 0;
 	int pid;
@@ -867,22 +867,22 @@ static void python_process_tracepoint(struct perf_sample *sample,
 		unsigned int offset, len;
 		unsigned long long val;
 
-		if (field->flags & FIELD_IS_ARRAY) {
+		if (field->flags & TEP_FIELD_IS_ARRAY) {
 			offset = field->offset;
 			len    = field->size;
-			if (field->flags & FIELD_IS_DYNAMIC) {
+			if (field->flags & TEP_FIELD_IS_DYNAMIC) {
 				val     = tep_read_number(scripting_context->pevent,
 							  data + offset, len);
 				offset  = val;
 				len     = offset >> 16;
 				offset &= 0xffff;
 			}
-			if (field->flags & FIELD_IS_STRING &&
+			if (field->flags & TEP_FIELD_IS_STRING &&
 			    is_printable_array(data + offset, len)) {
 				obj = _PyUnicode_FromString((char *) data + offset);
 			} else {
 				obj = PyByteArray_FromStringAndSize((const char *) data + offset, len);
-				field->flags &= ~FIELD_IS_STRING;
+				field->flags &= ~TEP_FIELD_IS_STRING;
 			}
 		} else { /* FIELD_IS_NUMERIC */
 			obj = get_field_numeric_entry(event, field, data);
@@ -1590,8 +1590,8 @@ static int python_stop_script(void)
 
 static int python_generate_script(struct tep_handle *pevent, const char *outfile)
 {
-	struct event_format *event = NULL;
-	struct format_field *f;
+	struct tep_event_format *event = NULL;
+	struct tep_format_field *f;
 	char fname[PATH_MAX];
 	int not_first, count;
 	FILE *ofp;
@@ -1686,12 +1686,12 @@ static int python_generate_script(struct tep_handle *pevent, const char *outfile
 			count++;
 
 			fprintf(ofp, "%s=", f->name);
-			if (f->flags & FIELD_IS_STRING ||
-			    f->flags & FIELD_IS_FLAG ||
-			    f->flags & FIELD_IS_ARRAY ||
-			    f->flags & FIELD_IS_SYMBOLIC)
+			if (f->flags & TEP_FIELD_IS_STRING ||
+			    f->flags & TEP_FIELD_IS_FLAG ||
+			    f->flags & TEP_FIELD_IS_ARRAY ||
+			    f->flags & TEP_FIELD_IS_SYMBOLIC)
 				fprintf(ofp, "%%s");
-			else if (f->flags & FIELD_IS_SIGNED)
+			else if (f->flags & TEP_FIELD_IS_SIGNED)
 				fprintf(ofp, "%%d");
 			else
 				fprintf(ofp, "%%u");
@@ -1709,7 +1709,7 @@ static int python_generate_script(struct tep_handle *pevent, const char *outfile
 			if (++count % 5 == 0)
 				fprintf(ofp, "\n\t\t");
 
-			if (f->flags & FIELD_IS_FLAG) {
+			if (f->flags & TEP_FIELD_IS_FLAG) {
 				if ((count - 1) % 5 != 0) {
 					fprintf(ofp, "\n\t\t");
 					count = 4;
@@ -1719,7 +1719,7 @@ static int python_generate_script(struct tep_handle *pevent, const char *outfile
 					event->name);
 				fprintf(ofp, "\"%s\", %s)", f->name,
 					f->name);
-			} else if (f->flags & FIELD_IS_SYMBOLIC) {
+			} else if (f->flags & TEP_FIELD_IS_SYMBOLIC) {
 				if ((count - 1) % 5 != 0) {
 					fprintf(ofp, "\n\t\t");
 					count = 4;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 8b9369303561..7d2c8ce6cfad 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -199,12 +199,10 @@ void perf_session__delete(struct perf_session *session)
 	free(session);
 }
 
-static int process_event_synth_tracing_data_stub(struct perf_tool *tool
+static int process_event_synth_tracing_data_stub(struct perf_session *session
 						 __maybe_unused,
 						 union perf_event *event
-						 __maybe_unused,
-						 struct perf_session *session
-						__maybe_unused)
+						 __maybe_unused)
 {
 	dump_printf(": unhandled!\n");
 	return 0;
@@ -277,10 +275,8 @@ static int skipn(int fd, off_t n)
 	return 0;
 }
 
-static s64 process_event_auxtrace_stub(struct perf_tool *tool __maybe_unused,
-				       union perf_event *event,
-				       struct perf_session *session
-				       __maybe_unused)
+static s64 process_event_auxtrace_stub(struct perf_session *session __maybe_unused,
+				       union perf_event *event)
 {
 	dump_printf(": unhandled!\n");
 	if (perf_data__is_pipe(session->data))
@@ -288,9 +284,8 @@ static s64 process_event_auxtrace_stub(struct perf_tool *tool __maybe_unused,
 	return event->auxtrace.size;
 }
 
-static int process_event_op2_stub(struct perf_tool *tool __maybe_unused,
-				  union perf_event *event __maybe_unused,
-				  struct perf_session *session __maybe_unused)
+static int process_event_op2_stub(struct perf_session *session __maybe_unused,
+				  union perf_event *event __maybe_unused)
 {
 	dump_printf(": unhandled!\n");
 	return 0;
@@ -298,9 +293,8 @@ static int process_event_op2_stub(struct perf_tool *tool __maybe_unused,
 
 
 static
-int process_event_thread_map_stub(struct perf_tool *tool __maybe_unused,
-				  union perf_event *event __maybe_unused,
-				  struct perf_session *session __maybe_unused)
+int process_event_thread_map_stub(struct perf_session *session __maybe_unused,
+				  union perf_event *event __maybe_unused)
 {
 	if (dump_trace)
 		perf_event__fprintf_thread_map(event, stdout);
@@ -310,9 +304,8 @@ int process_event_thread_map_stub(struct perf_tool *tool __maybe_unused,
 }
 
 static
-int process_event_cpu_map_stub(struct perf_tool *tool __maybe_unused,
-			       union perf_event *event __maybe_unused,
-			       struct perf_session *session __maybe_unused)
+int process_event_cpu_map_stub(struct perf_session *session __maybe_unused,
+			       union perf_event *event __maybe_unused)
 {
 	if (dump_trace)
 		perf_event__fprintf_cpu_map(event, stdout);
@@ -322,9 +315,8 @@ int process_event_cpu_map_stub(struct perf_tool *tool __maybe_unused,
 }
 
 static
-int process_event_stat_config_stub(struct perf_tool *tool __maybe_unused,
-				   union perf_event *event __maybe_unused,
-				   struct perf_session *session __maybe_unused)
+int process_event_stat_config_stub(struct perf_session *session __maybe_unused,
+				   union perf_event *event __maybe_unused)
 {
 	if (dump_trace)
 		perf_event__fprintf_stat_config(event, stdout);
@@ -333,10 +325,8 @@ int process_event_stat_config_stub(struct perf_tool *tool __maybe_unused,
 	return 0;
 }
 
-static int process_stat_stub(struct perf_tool *tool __maybe_unused,
-			     union perf_event *event __maybe_unused,
-			     struct perf_session *perf_session
-			     __maybe_unused)
+static int process_stat_stub(struct perf_session *perf_session __maybe_unused,
+			     union perf_event *event)
 {
 	if (dump_trace)
 		perf_event__fprintf_stat(event, stdout);
@@ -345,10 +335,8 @@ static int process_stat_stub(struct perf_tool *tool __maybe_unused,
 	return 0;
 }
 
-static int process_stat_round_stub(struct perf_tool *tool __maybe_unused,
-				   union perf_event *event __maybe_unused,
-				   struct perf_session *perf_session
-				   __maybe_unused)
+static int process_stat_round_stub(struct perf_session *perf_session __maybe_unused,
+				   union perf_event *event)
 {
 	if (dump_trace)
 		perf_event__fprintf_stat_round(event, stdout);
@@ -1374,37 +1362,37 @@ static s64 perf_session__process_user_event(struct perf_session *session,
 	case PERF_RECORD_HEADER_TRACING_DATA:
 		/* setup for reading amidst mmap */
 		lseek(fd, file_offset, SEEK_SET);
-		return tool->tracing_data(tool, event, session);
+		return tool->tracing_data(session, event);
 	case PERF_RECORD_HEADER_BUILD_ID:
-		return tool->build_id(tool, event, session);
+		return tool->build_id(session, event);
 	case PERF_RECORD_FINISHED_ROUND:
 		return tool->finished_round(tool, event, oe);
 	case PERF_RECORD_ID_INDEX:
-		return tool->id_index(tool, event, session);
+		return tool->id_index(session, event);
 	case PERF_RECORD_AUXTRACE_INFO:
-		return tool->auxtrace_info(tool, event, session);
+		return tool->auxtrace_info(session, event);
 	case PERF_RECORD_AUXTRACE:
 		/* setup for reading amidst mmap */
 		lseek(fd, file_offset + event->header.size, SEEK_SET);
-		return tool->auxtrace(tool, event, session);
+		return tool->auxtrace(session, event);
 	case PERF_RECORD_AUXTRACE_ERROR:
 		perf_session__auxtrace_error_inc(session, event);
-		return tool->auxtrace_error(tool, event, session);
+		return tool->auxtrace_error(session, event);
 	case PERF_RECORD_THREAD_MAP:
-		return tool->thread_map(tool, event, session);
+		return tool->thread_map(session, event);
 	case PERF_RECORD_CPU_MAP:
-		return tool->cpu_map(tool, event, session);
+		return tool->cpu_map(session, event);
 	case PERF_RECORD_STAT_CONFIG:
-		return tool->stat_config(tool, event, session);
+		return tool->stat_config(session, event);
 	case PERF_RECORD_STAT:
-		return tool->stat(tool, event, session);
+		return tool->stat(session, event);
 	case PERF_RECORD_STAT_ROUND:
-		return tool->stat_round(tool, event, session);
+		return tool->stat_round(session, event);
 	case PERF_RECORD_TIME_CONV:
 		session->time_conv = event->time_conv;
-		return tool->time_conv(tool, event, session);
+		return tool->time_conv(session, event);
 	case PERF_RECORD_HEADER_FEATURE:
-		return tool->feature(tool, event, session);
+		return tool->feature(session, event);
 	default:
 		return -EINVAL;
 	}
@@ -2133,9 +2121,8 @@ out:
 	return err;
 }
 
-int perf_event__process_id_index(struct perf_tool *tool __maybe_unused,
-				 union perf_event *event,
-				 struct perf_session *session)
+int perf_event__process_id_index(struct perf_session *session,
+				 union perf_event *event)
 {
 	struct perf_evlist *evlist = session->evlist;
 	struct id_index_event *ie = &event->id_index;
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index da40b4b380ca..d96eccd7d27f 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -120,9 +120,8 @@ int perf_session__deliver_synth_event(struct perf_session *session,
 				      union perf_event *event,
 				      struct perf_sample *sample);
 
-int perf_event__process_id_index(struct perf_tool *tool,
-				 union perf_event *event,
-				 struct perf_session *session);
+int perf_event__process_id_index(struct perf_session *session,
+				 union perf_event *event);
 
 int perf_event__synthesize_id_index(struct perf_tool *tool,
 				    perf_event__handler_t process,
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index 1942f6dd24f6..63f758c655d5 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -5,16 +5,18 @@ from subprocess import Popen, PIPE
 from re import sub
 
 def clang_has_option(option):
-    return [o for o in Popen(['clang', option], stderr=PIPE).stderr.readlines() if "unknown argument" in o] == [ ]
+    return [o for o in Popen(['clang', option], stderr=PIPE).stderr.readlines() if b"unknown argument" in o] == [ ]
 
 cc = getenv("CC")
 if cc == "clang":
-    from _sysconfigdata import build_time_vars
-    build_time_vars["CFLAGS"] = sub("-specs=[^ ]+", "", build_time_vars["CFLAGS"])
-    if not clang_has_option("-mcet"):
-        build_time_vars["CFLAGS"] = sub("-mcet", "", build_time_vars["CFLAGS"])
-    if not clang_has_option("-fcf-protection"):
-        build_time_vars["CFLAGS"] = sub("-fcf-protection", "", build_time_vars["CFLAGS"])
+    from distutils.sysconfig import get_config_vars
+    vars = get_config_vars()
+    for var in ('CFLAGS', 'OPT'):
+        vars[var] = sub("-specs=[^ ]+", "", vars[var])
+        if not clang_has_option("-mcet"):
+            vars[var] = sub("-mcet", "", vars[var])
+        if not clang_has_option("-fcf-protection"):
+            vars[var] = sub("-fcf-protection", "", vars[var])
 
 from distutils.core import setup, Extension
 
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index b284276ec963..f96c005b3c41 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -1884,7 +1884,7 @@ static int __sort_dimension__add_hpp_output(struct sort_dimension *sd,
 struct hpp_dynamic_entry {
 	struct perf_hpp_fmt hpp;
 	struct perf_evsel *evsel;
-	struct format_field *field;
+	struct tep_format_field *field;
 	unsigned dynamic_len;
 	bool raw_trace;
 };
@@ -1899,7 +1899,7 @@ static int hde_width(struct hpp_dynamic_entry *hde)
 		if (namelen > len)
 			len = namelen;
 
-		if (!(hde->field->flags & FIELD_IS_STRING)) {
+		if (!(hde->field->flags & TEP_FIELD_IS_STRING)) {
 			/* length for print hex numbers */
 			fieldlen = hde->field->size * 2 + 2;
 		}
@@ -1915,7 +1915,7 @@ static void update_dynamic_len(struct hpp_dynamic_entry *hde,
 			       struct hist_entry *he)
 {
 	char *str, *pos;
-	struct format_field *field = hde->field;
+	struct tep_format_field *field = hde->field;
 	size_t namelen;
 	bool last = false;
 
@@ -2000,7 +2000,7 @@ static int __sort__hde_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
 	struct hpp_dynamic_entry *hde;
 	size_t len = fmt->user_len;
 	char *str, *pos;
-	struct format_field *field;
+	struct tep_format_field *field;
 	size_t namelen;
 	bool last = false;
 	int ret;
@@ -2060,7 +2060,7 @@ static int64_t __sort__hde_cmp(struct perf_hpp_fmt *fmt,
 			       struct hist_entry *a, struct hist_entry *b)
 {
 	struct hpp_dynamic_entry *hde;
-	struct format_field *field;
+	struct tep_format_field *field;
 	unsigned offset, size;
 
 	hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
@@ -2071,7 +2071,7 @@ static int64_t __sort__hde_cmp(struct perf_hpp_fmt *fmt,
 	}
 
 	field = hde->field;
-	if (field->flags & FIELD_IS_DYNAMIC) {
+	if (field->flags & TEP_FIELD_IS_DYNAMIC) {
 		unsigned long long dyn;
 
 		tep_read_number_field(field, a->raw_data, &dyn);
@@ -2117,7 +2117,7 @@ static void hde_free(struct perf_hpp_fmt *fmt)
 }
 
 static struct hpp_dynamic_entry *
-__alloc_dynamic_entry(struct perf_evsel *evsel, struct format_field *field,
+__alloc_dynamic_entry(struct perf_evsel *evsel, struct tep_format_field *field,
 		      int level)
 {
 	struct hpp_dynamic_entry *hde;
@@ -2252,7 +2252,7 @@ static struct perf_evsel *find_evsel(struct perf_evlist *evlist, char *event_nam
 }
 
 static int __dynamic_dimension__add(struct perf_evsel *evsel,
-				    struct format_field *field,
+				    struct tep_format_field *field,
 				    bool raw_trace, int level)
 {
 	struct hpp_dynamic_entry *hde;
@@ -2270,7 +2270,7 @@ static int __dynamic_dimension__add(struct perf_evsel *evsel,
 static int add_evsel_fields(struct perf_evsel *evsel, bool raw_trace, int level)
 {
 	int ret;
-	struct format_field *field;
+	struct tep_format_field *field;
 
 	field = evsel->tp_format->format.fields;
 	while (field) {
@@ -2305,7 +2305,7 @@ static int add_all_matching_fields(struct perf_evlist *evlist,
 {
 	int ret = -ESRCH;
 	struct perf_evsel *evsel;
-	struct format_field *field;
+	struct tep_format_field *field;
 
 	evlist__for_each_entry(evlist, evsel) {
 		if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
@@ -2327,7 +2327,7 @@ static int add_dynamic_entry(struct perf_evlist *evlist, const char *tok,
 {
 	char *str, *event_name, *field_name, *opt_name;
 	struct perf_evsel *evsel;
-	struct format_field *field;
+	struct tep_format_field *field;
 	bool raw_trace = symbol_conf.raw_trace;
 	int ret = 0;
 
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
new file mode 100644
index 000000000000..e7b4c44ebb62
--- /dev/null
+++ b/tools/perf/util/stat-display.c
@@ -0,0 +1,1166 @@
+#include <stdio.h>
+#include <inttypes.h>
+#include <linux/time64.h>
+#include <math.h>
+#include "evlist.h"
+#include "evsel.h"
+#include "stat.h"
+#include "top.h"
+#include "thread_map.h"
+#include "cpumap.h"
+#include "string2.h"
+#include "sane_ctype.h"
+#include "cgroup.h"
+#include <math.h>
+#include <api/fs/fs.h>
+
+#define CNTR_NOT_SUPPORTED	"<not supported>"
+#define CNTR_NOT_COUNTED	"<not counted>"
+
+static bool is_duration_time(struct perf_evsel *evsel)
+{
+	return !strcmp(evsel->name, "duration_time");
+}
+
+static void print_running(struct perf_stat_config *config,
+			  u64 run, u64 ena)
+{
+	if (config->csv_output) {
+		fprintf(config->output, "%s%" PRIu64 "%s%.2f",
+					config->csv_sep,
+					run,
+					config->csv_sep,
+					ena ? 100.0 * run / ena : 100.0);
+	} else if (run != ena) {
+		fprintf(config->output, "  (%.2f%%)", 100.0 * run / ena);
+	}
+}
+
+static void print_noise_pct(struct perf_stat_config *config,
+			    double total, double avg)
+{
+	double pct = rel_stddev_stats(total, avg);
+
+	if (config->csv_output)
+		fprintf(config->output, "%s%.2f%%", config->csv_sep, pct);
+	else if (pct)
+		fprintf(config->output, "  ( +-%6.2f%% )", pct);
+}
+
+static void print_noise(struct perf_stat_config *config,
+			struct perf_evsel *evsel, double avg)
+{
+	struct perf_stat_evsel *ps;
+
+	if (config->run_count == 1)
+		return;
+
+	ps = evsel->stats;
+	print_noise_pct(config, stddev_stats(&ps->res_stats[0]), avg);
+}
+
+static void aggr_printout(struct perf_stat_config *config,
+			  struct perf_evsel *evsel, int id, int nr)
+{
+	switch (config->aggr_mode) {
+	case AGGR_CORE:
+		fprintf(config->output, "S%d-C%*d%s%*d%s",
+			cpu_map__id_to_socket(id),
+			config->csv_output ? 0 : -8,
+			cpu_map__id_to_cpu(id),
+			config->csv_sep,
+			config->csv_output ? 0 : 4,
+			nr,
+			config->csv_sep);
+		break;
+	case AGGR_SOCKET:
+		fprintf(config->output, "S%*d%s%*d%s",
+			config->csv_output ? 0 : -5,
+			id,
+			config->csv_sep,
+			config->csv_output ? 0 : 4,
+			nr,
+			config->csv_sep);
+			break;
+	case AGGR_NONE:
+		fprintf(config->output, "CPU%*d%s",
+			config->csv_output ? 0 : -4,
+			perf_evsel__cpus(evsel)->map[id], config->csv_sep);
+		break;
+	case AGGR_THREAD:
+		fprintf(config->output, "%*s-%*d%s",
+			config->csv_output ? 0 : 16,
+			thread_map__comm(evsel->threads, id),
+			config->csv_output ? 0 : -8,
+			thread_map__pid(evsel->threads, id),
+			config->csv_sep);
+		break;
+	case AGGR_GLOBAL:
+	case AGGR_UNSET:
+	default:
+		break;
+	}
+}
+
+struct outstate {
+	FILE *fh;
+	bool newline;
+	const char *prefix;
+	int  nfields;
+	int  id, nr;
+	struct perf_evsel *evsel;
+};
+
+#define METRIC_LEN  35
+
+static void new_line_std(struct perf_stat_config *config __maybe_unused,
+			 void *ctx)
+{
+	struct outstate *os = ctx;
+
+	os->newline = true;
+}
+
+static void do_new_line_std(struct perf_stat_config *config,
+			    struct outstate *os)
+{
+	fputc('\n', os->fh);
+	fputs(os->prefix, os->fh);
+	aggr_printout(config, os->evsel, os->id, os->nr);
+	if (config->aggr_mode == AGGR_NONE)
+		fprintf(os->fh, "        ");
+	fprintf(os->fh, "                                                 ");
+}
+
+static void print_metric_std(struct perf_stat_config *config,
+			     void *ctx, const char *color, const char *fmt,
+			     const char *unit, double val)
+{
+	struct outstate *os = ctx;
+	FILE *out = os->fh;
+	int n;
+	bool newline = os->newline;
+
+	os->newline = false;
+
+	if (unit == NULL || fmt == NULL) {
+		fprintf(out, "%-*s", METRIC_LEN, "");
+		return;
+	}
+
+	if (newline)
+		do_new_line_std(config, os);
+
+	n = fprintf(out, " # ");
+	if (color)
+		n += color_fprintf(out, color, fmt, val);
+	else
+		n += fprintf(out, fmt, val);
+	fprintf(out, " %-*s", METRIC_LEN - n - 1, unit);
+}
+
+static void new_line_csv(struct perf_stat_config *config, void *ctx)
+{
+	struct outstate *os = ctx;
+	int i;
+
+	fputc('\n', os->fh);
+	if (os->prefix)
+		fprintf(os->fh, "%s%s", os->prefix, config->csv_sep);
+	aggr_printout(config, os->evsel, os->id, os->nr);
+	for (i = 0; i < os->nfields; i++)
+		fputs(config->csv_sep, os->fh);
+}
+
+static void print_metric_csv(struct perf_stat_config *config __maybe_unused,
+			     void *ctx,
+			     const char *color __maybe_unused,
+			     const char *fmt, const char *unit, double val)
+{
+	struct outstate *os = ctx;
+	FILE *out = os->fh;
+	char buf[64], *vals, *ends;
+
+	if (unit == NULL || fmt == NULL) {
+		fprintf(out, "%s%s", config->csv_sep, config->csv_sep);
+		return;
+	}
+	snprintf(buf, sizeof(buf), fmt, val);
+	ends = vals = ltrim(buf);
+	while (isdigit(*ends) || *ends == '.')
+		ends++;
+	*ends = 0;
+	while (isspace(*unit))
+		unit++;
+	fprintf(out, "%s%s%s%s", config->csv_sep, vals, config->csv_sep, unit);
+}
+
+/* Filter out some columns that don't work well in metrics only mode */
+
+static bool valid_only_metric(const char *unit)
+{
+	if (!unit)
+		return false;
+	if (strstr(unit, "/sec") ||
+	    strstr(unit, "hz") ||
+	    strstr(unit, "Hz") ||
+	    strstr(unit, "CPUs utilized"))
+		return false;
+	return true;
+}
+
+static const char *fixunit(char *buf, struct perf_evsel *evsel,
+			   const char *unit)
+{
+	if (!strncmp(unit, "of all", 6)) {
+		snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel),
+			 unit);
+		return buf;
+	}
+	return unit;
+}
+
+static void print_metric_only(struct perf_stat_config *config,
+			      void *ctx, const char *color, const char *fmt,
+			      const char *unit, double val)
+{
+	struct outstate *os = ctx;
+	FILE *out = os->fh;
+	char buf[1024], str[1024];
+	unsigned mlen = config->metric_only_len;
+
+	if (!valid_only_metric(unit))
+		return;
+	unit = fixunit(buf, os->evsel, unit);
+	if (mlen < strlen(unit))
+		mlen = strlen(unit) + 1;
+
+	if (color)
+		mlen += strlen(color) + sizeof(PERF_COLOR_RESET) - 1;
+
+	color_snprintf(str, sizeof(str), color ?: "", fmt, val);
+	fprintf(out, "%*s ", mlen, str);
+}
+
+static void print_metric_only_csv(struct perf_stat_config *config __maybe_unused,
+				  void *ctx, const char *color __maybe_unused,
+				  const char *fmt,
+				  const char *unit, double val)
+{
+	struct outstate *os = ctx;
+	FILE *out = os->fh;
+	char buf[64], *vals, *ends;
+	char tbuf[1024];
+
+	if (!valid_only_metric(unit))
+		return;
+	unit = fixunit(tbuf, os->evsel, unit);
+	snprintf(buf, sizeof buf, fmt, val);
+	ends = vals = ltrim(buf);
+	while (isdigit(*ends) || *ends == '.')
+		ends++;
+	*ends = 0;
+	fprintf(out, "%s%s", vals, config->csv_sep);
+}
+
+static void new_line_metric(struct perf_stat_config *config __maybe_unused,
+			    void *ctx __maybe_unused)
+{
+}
+
+static void print_metric_header(struct perf_stat_config *config,
+				void *ctx, const char *color __maybe_unused,
+				const char *fmt __maybe_unused,
+				const char *unit, double val __maybe_unused)
+{
+	struct outstate *os = ctx;
+	char tbuf[1024];
+
+	if (!valid_only_metric(unit))
+		return;
+	unit = fixunit(tbuf, os->evsel, unit);
+	if (config->csv_output)
+		fprintf(os->fh, "%s%s", unit, config->csv_sep);
+	else
+		fprintf(os->fh, "%*s ", config->metric_only_len, unit);
+}
+
+static int first_shadow_cpu(struct perf_stat_config *config,
+			    struct perf_evsel *evsel, int id)
+{
+	struct perf_evlist *evlist = evsel->evlist;
+	int i;
+
+	if (!config->aggr_get_id)
+		return 0;
+
+	if (config->aggr_mode == AGGR_NONE)
+		return id;
+
+	if (config->aggr_mode == AGGR_GLOBAL)
+		return 0;
+
+	for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) {
+		int cpu2 = perf_evsel__cpus(evsel)->map[i];
+
+		if (config->aggr_get_id(config, evlist->cpus, cpu2) == id)
+			return cpu2;
+	}
+	return 0;
+}
+
+static void abs_printout(struct perf_stat_config *config,
+			 int id, int nr, struct perf_evsel *evsel, double avg)
+{
+	FILE *output = config->output;
+	double sc =  evsel->scale;
+	const char *fmt;
+
+	if (config->csv_output) {
+		fmt = floor(sc) != sc ?  "%.2f%s" : "%.0f%s";
+	} else {
+		if (config->big_num)
+			fmt = floor(sc) != sc ? "%'18.2f%s" : "%'18.0f%s";
+		else
+			fmt = floor(sc) != sc ? "%18.2f%s" : "%18.0f%s";
+	}
+
+	aggr_printout(config, evsel, id, nr);
+
+	fprintf(output, fmt, avg, config->csv_sep);
+
+	if (evsel->unit)
+		fprintf(output, "%-*s%s",
+			config->csv_output ? 0 : config->unit_width,
+			evsel->unit, config->csv_sep);
+
+	fprintf(output, "%-*s", config->csv_output ? 0 : 25, perf_evsel__name(evsel));
+
+	if (evsel->cgrp)
+		fprintf(output, "%s%s", config->csv_sep, evsel->cgrp->name);
+}
+
+static bool is_mixed_hw_group(struct perf_evsel *counter)
+{
+	struct perf_evlist *evlist = counter->evlist;
+	u32 pmu_type = counter->attr.type;
+	struct perf_evsel *pos;
+
+	if (counter->nr_members < 2)
+		return false;
+
+	evlist__for_each_entry(evlist, pos) {
+		/* software events can be part of any hardware group */
+		if (pos->attr.type == PERF_TYPE_SOFTWARE)
+			continue;
+		if (pmu_type == PERF_TYPE_SOFTWARE) {
+			pmu_type = pos->attr.type;
+			continue;
+		}
+		if (pmu_type != pos->attr.type)
+			return true;
+	}
+
+	return false;
+}
+
+static void printout(struct perf_stat_config *config, int id, int nr,
+		     struct perf_evsel *counter, double uval,
+		     char *prefix, u64 run, u64 ena, double noise,
+		     struct runtime_stat *st)
+{
+	struct perf_stat_output_ctx out;
+	struct outstate os = {
+		.fh = config->output,
+		.prefix = prefix ? prefix : "",
+		.id = id,
+		.nr = nr,
+		.evsel = counter,
+	};
+	print_metric_t pm = print_metric_std;
+	new_line_t nl;
+
+	if (config->metric_only) {
+		nl = new_line_metric;
+		if (config->csv_output)
+			pm = print_metric_only_csv;
+		else
+			pm = print_metric_only;
+	} else
+		nl = new_line_std;
+
+	if (config->csv_output && !config->metric_only) {
+		static int aggr_fields[] = {
+			[AGGR_GLOBAL] = 0,
+			[AGGR_THREAD] = 1,
+			[AGGR_NONE] = 1,
+			[AGGR_SOCKET] = 2,
+			[AGGR_CORE] = 2,
+		};
+
+		pm = print_metric_csv;
+		nl = new_line_csv;
+		os.nfields = 3;
+		os.nfields += aggr_fields[config->aggr_mode];
+		if (counter->cgrp)
+			os.nfields++;
+	}
+	if (run == 0 || ena == 0 || counter->counts->scaled == -1) {
+		if (config->metric_only) {
+			pm(config, &os, NULL, "", "", 0);
+			return;
+		}
+		aggr_printout(config, counter, id, nr);
+
+		fprintf(config->output, "%*s%s",
+			config->csv_output ? 0 : 18,
+			counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
+			config->csv_sep);
+
+		if (counter->supported) {
+			config->print_free_counters_hint = 1;
+			if (is_mixed_hw_group(counter))
+				config->print_mixed_hw_group_error = 1;
+		}
+
+		fprintf(config->output, "%-*s%s",
+			config->csv_output ? 0 : config->unit_width,
+			counter->unit, config->csv_sep);
+
+		fprintf(config->output, "%*s",
+			config->csv_output ? 0 : -25,
+			perf_evsel__name(counter));
+
+		if (counter->cgrp)
+			fprintf(config->output, "%s%s",
+				config->csv_sep, counter->cgrp->name);
+
+		if (!config->csv_output)
+			pm(config, &os, NULL, NULL, "", 0);
+		print_noise(config, counter, noise);
+		print_running(config, run, ena);
+		if (config->csv_output)
+			pm(config, &os, NULL, NULL, "", 0);
+		return;
+	}
+
+	if (!config->metric_only)
+		abs_printout(config, id, nr, counter, uval);
+
+	out.print_metric = pm;
+	out.new_line = nl;
+	out.ctx = &os;
+	out.force_header = false;
+
+	if (config->csv_output && !config->metric_only) {
+		print_noise(config, counter, noise);
+		print_running(config, run, ena);
+	}
+
+	perf_stat__print_shadow_stats(config, counter, uval,
+				first_shadow_cpu(config, counter, id),
+				&out, &config->metric_events, st);
+	if (!config->csv_output && !config->metric_only) {
+		print_noise(config, counter, noise);
+		print_running(config, run, ena);
+	}
+}
+
+static void aggr_update_shadow(struct perf_stat_config *config,
+			       struct perf_evlist *evlist)
+{
+	int cpu, s2, id, s;
+	u64 val;
+	struct perf_evsel *counter;
+
+	for (s = 0; s < config->aggr_map->nr; s++) {
+		id = config->aggr_map->map[s];
+		evlist__for_each_entry(evlist, counter) {
+			val = 0;
+			for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
+				s2 = config->aggr_get_id(config, evlist->cpus, cpu);
+				if (s2 != id)
+					continue;
+				val += perf_counts(counter->counts, cpu, 0)->val;
+			}
+			perf_stat__update_shadow_stats(counter, val,
+					first_shadow_cpu(config, counter, id),
+					&rt_stat);
+		}
+	}
+}
+
+static void uniquify_event_name(struct perf_evsel *counter)
+{
+	char *new_name;
+	char *config;
+
+	if (counter->uniquified_name ||
+	    !counter->pmu_name || !strncmp(counter->name, counter->pmu_name,
+					   strlen(counter->pmu_name)))
+		return;
+
+	config = strchr(counter->name, '/');
+	if (config) {
+		if (asprintf(&new_name,
+			     "%s%s", counter->pmu_name, config) > 0) {
+			free(counter->name);
+			counter->name = new_name;
+		}
+	} else {
+		if (asprintf(&new_name,
+			     "%s [%s]", counter->name, counter->pmu_name) > 0) {
+			free(counter->name);
+			counter->name = new_name;
+		}
+	}
+
+	counter->uniquified_name = true;
+}
+
+static void collect_all_aliases(struct perf_stat_config *config, struct perf_evsel *counter,
+			    void (*cb)(struct perf_stat_config *config, struct perf_evsel *counter, void *data,
+				       bool first),
+			    void *data)
+{
+	struct perf_evlist *evlist = counter->evlist;
+	struct perf_evsel *alias;
+
+	alias = list_prepare_entry(counter, &(evlist->entries), node);
+	list_for_each_entry_continue (alias, &evlist->entries, node) {
+		if (strcmp(perf_evsel__name(alias), perf_evsel__name(counter)) ||
+		    alias->scale != counter->scale ||
+		    alias->cgrp != counter->cgrp ||
+		    strcmp(alias->unit, counter->unit) ||
+		    perf_evsel__is_clock(alias) != perf_evsel__is_clock(counter))
+			break;
+		alias->merged_stat = true;
+		cb(config, alias, data, false);
+	}
+}
+
+static bool collect_data(struct perf_stat_config *config, struct perf_evsel *counter,
+			    void (*cb)(struct perf_stat_config *config, struct perf_evsel *counter, void *data,
+				       bool first),
+			    void *data)
+{
+	if (counter->merged_stat)
+		return false;
+	cb(config, counter, data, true);
+	if (config->no_merge)
+		uniquify_event_name(counter);
+	else if (counter->auto_merge_stats)
+		collect_all_aliases(config, counter, cb, data);
+	return true;
+}
+
+struct aggr_data {
+	u64 ena, run, val;
+	int id;
+	int nr;
+	int cpu;
+};
+
+static void aggr_cb(struct perf_stat_config *config,
+		    struct perf_evsel *counter, void *data, bool first)
+{
+	struct aggr_data *ad = data;
+	int cpu, s2;
+
+	for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
+		struct perf_counts_values *counts;
+
+		s2 = config->aggr_get_id(config, perf_evsel__cpus(counter), cpu);
+		if (s2 != ad->id)
+			continue;
+		if (first)
+			ad->nr++;
+		counts = perf_counts(counter->counts, cpu, 0);
+		/*
+		 * When any result is bad, make them all to give
+		 * consistent output in interval mode.
+		 */
+		if (counts->ena == 0 || counts->run == 0 ||
+		    counter->counts->scaled == -1) {
+			ad->ena = 0;
+			ad->run = 0;
+			break;
+		}
+		ad->val += counts->val;
+		ad->ena += counts->ena;
+		ad->run += counts->run;
+	}
+}
+
+static void print_aggr(struct perf_stat_config *config,
+		       struct perf_evlist *evlist,
+		       char *prefix)
+{
+	bool metric_only = config->metric_only;
+	FILE *output = config->output;
+	struct perf_evsel *counter;
+	int s, id, nr;
+	double uval;
+	u64 ena, run, val;
+	bool first;
+
+	if (!(config->aggr_map || config->aggr_get_id))
+		return;
+
+	aggr_update_shadow(config, evlist);
+
+	/*
+	 * With metric_only everything is on a single line.
+	 * Without each counter has its own line.
+	 */
+	for (s = 0; s < config->aggr_map->nr; s++) {
+		struct aggr_data ad;
+		if (prefix && metric_only)
+			fprintf(output, "%s", prefix);
+
+		ad.id = id = config->aggr_map->map[s];
+		first = true;
+		evlist__for_each_entry(evlist, counter) {
+			if (is_duration_time(counter))
+				continue;
+
+			ad.val = ad.ena = ad.run = 0;
+			ad.nr = 0;
+			if (!collect_data(config, counter, aggr_cb, &ad))
+				continue;
+			nr = ad.nr;
+			ena = ad.ena;
+			run = ad.run;
+			val = ad.val;
+			if (first && metric_only) {
+				first = false;
+				aggr_printout(config, counter, id, nr);
+			}
+			if (prefix && !metric_only)
+				fprintf(output, "%s", prefix);
+
+			uval = val * counter->scale;
+			printout(config, id, nr, counter, uval, prefix,
+				 run, ena, 1.0, &rt_stat);
+			if (!metric_only)
+				fputc('\n', output);
+		}
+		if (metric_only)
+			fputc('\n', output);
+	}
+}
+
+static int cmp_val(const void *a, const void *b)
+{
+	return ((struct perf_aggr_thread_value *)b)->val -
+		((struct perf_aggr_thread_value *)a)->val;
+}
+
+static struct perf_aggr_thread_value *sort_aggr_thread(
+					struct perf_evsel *counter,
+					int nthreads, int ncpus,
+					int *ret,
+					struct target *_target)
+{
+	int cpu, thread, i = 0;
+	double uval;
+	struct perf_aggr_thread_value *buf;
+
+	buf = calloc(nthreads, sizeof(struct perf_aggr_thread_value));
+	if (!buf)
+		return NULL;
+
+	for (thread = 0; thread < nthreads; thread++) {
+		u64 ena = 0, run = 0, val = 0;
+
+		for (cpu = 0; cpu < ncpus; cpu++) {
+			val += perf_counts(counter->counts, cpu, thread)->val;
+			ena += perf_counts(counter->counts, cpu, thread)->ena;
+			run += perf_counts(counter->counts, cpu, thread)->run;
+		}
+
+		uval = val * counter->scale;
+
+		/*
+		 * Skip value 0 when enabling --per-thread globally,
+		 * otherwise too many 0 output.
+		 */
+		if (uval == 0.0 && target__has_per_thread(_target))
+			continue;
+
+		buf[i].counter = counter;
+		buf[i].id = thread;
+		buf[i].uval = uval;
+		buf[i].val = val;
+		buf[i].run = run;
+		buf[i].ena = ena;
+		i++;
+	}
+
+	qsort(buf, i, sizeof(struct perf_aggr_thread_value), cmp_val);
+
+	if (ret)
+		*ret = i;
+
+	return buf;
+}
+
+static void print_aggr_thread(struct perf_stat_config *config,
+			      struct target *_target,
+			      struct perf_evsel *counter, char *prefix)
+{
+	FILE *output = config->output;
+	int nthreads = thread_map__nr(counter->threads);
+	int ncpus = cpu_map__nr(counter->cpus);
+	int thread, sorted_threads, id;
+	struct perf_aggr_thread_value *buf;
+
+	buf = sort_aggr_thread(counter, nthreads, ncpus, &sorted_threads, _target);
+	if (!buf) {
+		perror("cannot sort aggr thread");
+		return;
+	}
+
+	for (thread = 0; thread < sorted_threads; thread++) {
+		if (prefix)
+			fprintf(output, "%s", prefix);
+
+		id = buf[thread].id;
+		if (config->stats)
+			printout(config, id, 0, buf[thread].counter, buf[thread].uval,
+				 prefix, buf[thread].run, buf[thread].ena, 1.0,
+				 &config->stats[id]);
+		else
+			printout(config, id, 0, buf[thread].counter, buf[thread].uval,
+				 prefix, buf[thread].run, buf[thread].ena, 1.0,
+				 &rt_stat);
+		fputc('\n', output);
+	}
+
+	free(buf);
+}
+
+struct caggr_data {
+	double avg, avg_enabled, avg_running;
+};
+
+static void counter_aggr_cb(struct perf_stat_config *config __maybe_unused,
+			    struct perf_evsel *counter, void *data,
+			    bool first __maybe_unused)
+{
+	struct caggr_data *cd = data;
+	struct perf_stat_evsel *ps = counter->stats;
+
+	cd->avg += avg_stats(&ps->res_stats[0]);
+	cd->avg_enabled += avg_stats(&ps->res_stats[1]);
+	cd->avg_running += avg_stats(&ps->res_stats[2]);
+}
+
+/*
+ * Print out the results of a single counter:
+ * aggregated counts in system-wide mode
+ */
+static void print_counter_aggr(struct perf_stat_config *config,
+			       struct perf_evsel *counter, char *prefix)
+{
+	bool metric_only = config->metric_only;
+	FILE *output = config->output;
+	double uval;
+	struct caggr_data cd = { .avg = 0.0 };
+
+	if (!collect_data(config, counter, counter_aggr_cb, &cd))
+		return;
+
+	if (prefix && !metric_only)
+		fprintf(output, "%s", prefix);
+
+	uval = cd.avg * counter->scale;
+	printout(config, -1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled,
+		 cd.avg, &rt_stat);
+	if (!metric_only)
+		fprintf(output, "\n");
+}
+
+static void counter_cb(struct perf_stat_config *config __maybe_unused,
+		       struct perf_evsel *counter, void *data,
+		       bool first __maybe_unused)
+{
+	struct aggr_data *ad = data;
+
+	ad->val += perf_counts(counter->counts, ad->cpu, 0)->val;
+	ad->ena += perf_counts(counter->counts, ad->cpu, 0)->ena;
+	ad->run += perf_counts(counter->counts, ad->cpu, 0)->run;
+}
+
+/*
+ * Print out the results of a single counter:
+ * does not use aggregated count in system-wide
+ */
+static void print_counter(struct perf_stat_config *config,
+			  struct perf_evsel *counter, char *prefix)
+{
+	FILE *output = config->output;
+	u64 ena, run, val;
+	double uval;
+	int cpu;
+
+	for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
+		struct aggr_data ad = { .cpu = cpu };
+
+		if (!collect_data(config, counter, counter_cb, &ad))
+			return;
+		val = ad.val;
+		ena = ad.ena;
+		run = ad.run;
+
+		if (prefix)
+			fprintf(output, "%s", prefix);
+
+		uval = val * counter->scale;
+		printout(config, cpu, 0, counter, uval, prefix, run, ena, 1.0,
+			 &rt_stat);
+
+		fputc('\n', output);
+	}
+}
+
+static void print_no_aggr_metric(struct perf_stat_config *config,
+				 struct perf_evlist *evlist,
+				 char *prefix)
+{
+	int cpu;
+	int nrcpus = 0;
+	struct perf_evsel *counter;
+	u64 ena, run, val;
+	double uval;
+
+	nrcpus = evlist->cpus->nr;
+	for (cpu = 0; cpu < nrcpus; cpu++) {
+		bool first = true;
+
+		if (prefix)
+			fputs(prefix, config->output);
+		evlist__for_each_entry(evlist, counter) {
+			if (is_duration_time(counter))
+				continue;
+			if (first) {
+				aggr_printout(config, counter, cpu, 0);
+				first = false;
+			}
+			val = perf_counts(counter->counts, cpu, 0)->val;
+			ena = perf_counts(counter->counts, cpu, 0)->ena;
+			run = perf_counts(counter->counts, cpu, 0)->run;
+
+			uval = val * counter->scale;
+			printout(config, cpu, 0, counter, uval, prefix, run, ena, 1.0,
+				 &rt_stat);
+		}
+		fputc('\n', config->output);
+	}
+}
+
+static int aggr_header_lens[] = {
+	[AGGR_CORE] = 18,
+	[AGGR_SOCKET] = 12,
+	[AGGR_NONE] = 6,
+	[AGGR_THREAD] = 24,
+	[AGGR_GLOBAL] = 0,
+};
+
+static const char *aggr_header_csv[] = {
+	[AGGR_CORE] 	= 	"core,cpus,",
+	[AGGR_SOCKET] 	= 	"socket,cpus",
+	[AGGR_NONE] 	= 	"cpu,",
+	[AGGR_THREAD] 	= 	"comm-pid,",
+	[AGGR_GLOBAL] 	=	""
+};
+
+static void print_metric_headers(struct perf_stat_config *config,
+				 struct perf_evlist *evlist,
+				 const char *prefix, bool no_indent)
+{
+	struct perf_stat_output_ctx out;
+	struct perf_evsel *counter;
+	struct outstate os = {
+		.fh = config->output
+	};
+
+	if (prefix)
+		fprintf(config->output, "%s", prefix);
+
+	if (!config->csv_output && !no_indent)
+		fprintf(config->output, "%*s",
+			aggr_header_lens[config->aggr_mode], "");
+	if (config->csv_output) {
+		if (config->interval)
+			fputs("time,", config->output);
+		fputs(aggr_header_csv[config->aggr_mode], config->output);
+	}
+
+	/* Print metrics headers only */
+	evlist__for_each_entry(evlist, counter) {
+		if (is_duration_time(counter))
+			continue;
+		os.evsel = counter;
+		out.ctx = &os;
+		out.print_metric = print_metric_header;
+		out.new_line = new_line_metric;
+		out.force_header = true;
+		os.evsel = counter;
+		perf_stat__print_shadow_stats(config, counter, 0,
+					      0,
+					      &out,
+					      &config->metric_events,
+					      &rt_stat);
+	}
+	fputc('\n', config->output);
+}
+
+static void print_interval(struct perf_stat_config *config,
+			   struct perf_evlist *evlist,
+			   char *prefix, struct timespec *ts)
+{
+	bool metric_only = config->metric_only;
+	unsigned int unit_width = config->unit_width;
+	FILE *output = config->output;
+	static int num_print_interval;
+
+	if (config->interval_clear)
+		puts(CONSOLE_CLEAR);
+
+	sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, config->csv_sep);
+
+	if ((num_print_interval == 0 && !config->csv_output) || config->interval_clear) {
+		switch (config->aggr_mode) {
+		case AGGR_SOCKET:
+			fprintf(output, "#           time socket cpus");
+			if (!metric_only)
+				fprintf(output, "             counts %*s events\n", unit_width, "unit");
+			break;
+		case AGGR_CORE:
+			fprintf(output, "#           time core         cpus");
+			if (!metric_only)
+				fprintf(output, "             counts %*s events\n", unit_width, "unit");
+			break;
+		case AGGR_NONE:
+			fprintf(output, "#           time CPU    ");
+			if (!metric_only)
+				fprintf(output, "                counts %*s events\n", unit_width, "unit");
+			break;
+		case AGGR_THREAD:
+			fprintf(output, "#           time             comm-pid");
+			if (!metric_only)
+				fprintf(output, "                  counts %*s events\n", unit_width, "unit");
+			break;
+		case AGGR_GLOBAL:
+		default:
+			fprintf(output, "#           time");
+			if (!metric_only)
+				fprintf(output, "             counts %*s events\n", unit_width, "unit");
+		case AGGR_UNSET:
+			break;
+		}
+	}
+
+	if ((num_print_interval == 0 || config->interval_clear) && metric_only)
+		print_metric_headers(config, evlist, " ", true);
+	if (++num_print_interval == 25)
+		num_print_interval = 0;
+}
+
+static void print_header(struct perf_stat_config *config,
+			 struct target *_target,
+			 int argc, const char **argv)
+{
+	FILE *output = config->output;
+	int i;
+
+	fflush(stdout);
+
+	if (!config->csv_output) {
+		fprintf(output, "\n");
+		fprintf(output, " Performance counter stats for ");
+		if (_target->system_wide)
+			fprintf(output, "\'system wide");
+		else if (_target->cpu_list)
+			fprintf(output, "\'CPU(s) %s", _target->cpu_list);
+		else if (!target__has_task(_target)) {
+			fprintf(output, "\'%s", argv ? argv[0] : "pipe");
+			for (i = 1; argv && (i < argc); i++)
+				fprintf(output, " %s", argv[i]);
+		} else if (_target->pid)
+			fprintf(output, "process id \'%s", _target->pid);
+		else
+			fprintf(output, "thread id \'%s", _target->tid);
+
+		fprintf(output, "\'");
+		if (config->run_count > 1)
+			fprintf(output, " (%d runs)", config->run_count);
+		fprintf(output, ":\n\n");
+	}
+}
+
+static int get_precision(double num)
+{
+	if (num > 1)
+		return 0;
+
+	return lround(ceil(-log10(num)));
+}
+
+static void print_table(struct perf_stat_config *config,
+			FILE *output, int precision, double avg)
+{
+	char tmp[64];
+	int idx, indent = 0;
+
+	scnprintf(tmp, 64, " %17.*f", precision, avg);
+	while (tmp[indent] == ' ')
+		indent++;
+
+	fprintf(output, "%*s# Table of individual measurements:\n", indent, "");
+
+	for (idx = 0; idx < config->run_count; idx++) {
+		double run = (double) config->walltime_run[idx] / NSEC_PER_SEC;
+		int h, n = 1 + abs((int) (100.0 * (run - avg)/run) / 5);
+
+		fprintf(output, " %17.*f (%+.*f) ",
+			precision, run, precision, run - avg);
+
+		for (h = 0; h < n; h++)
+			fprintf(output, "#");
+
+		fprintf(output, "\n");
+	}
+
+	fprintf(output, "\n%*s# Final result:\n", indent, "");
+}
+
+static double timeval2double(struct timeval *t)
+{
+	return t->tv_sec + (double) t->tv_usec/USEC_PER_SEC;
+}
+
+static void print_footer(struct perf_stat_config *config)
+{
+	double avg = avg_stats(config->walltime_nsecs_stats) / NSEC_PER_SEC;
+	FILE *output = config->output;
+	int n;
+
+	if (!config->null_run)
+		fprintf(output, "\n");
+
+	if (config->run_count == 1) {
+		fprintf(output, " %17.9f seconds time elapsed", avg);
+
+		if (config->ru_display) {
+			double ru_utime = timeval2double(&config->ru_data.ru_utime);
+			double ru_stime = timeval2double(&config->ru_data.ru_stime);
+
+			fprintf(output, "\n\n");
+			fprintf(output, " %17.9f seconds user\n", ru_utime);
+			fprintf(output, " %17.9f seconds sys\n", ru_stime);
+		}
+	} else {
+		double sd = stddev_stats(config->walltime_nsecs_stats) / NSEC_PER_SEC;
+		/*
+		 * Display at most 2 more significant
+		 * digits than the stddev inaccuracy.
+		 */
+		int precision = get_precision(sd) + 2;
+
+		if (config->walltime_run_table)
+			print_table(config, output, precision, avg);
+
+		fprintf(output, " %17.*f +- %.*f seconds time elapsed",
+			precision, avg, precision, sd);
+
+		print_noise_pct(config, sd, avg);
+	}
+	fprintf(output, "\n\n");
+
+	if (config->print_free_counters_hint &&
+	    sysctl__read_int("kernel/nmi_watchdog", &n) >= 0 &&
+	    n > 0)
+		fprintf(output,
+"Some events weren't counted. Try disabling the NMI watchdog:\n"
+"	echo 0 > /proc/sys/kernel/nmi_watchdog\n"
+"	perf stat ...\n"
+"	echo 1 > /proc/sys/kernel/nmi_watchdog\n");
+
+	if (config->print_mixed_hw_group_error)
+		fprintf(output,
+			"The events in group usually have to be from "
+			"the same PMU. Try reorganizing the group.\n");
+}
+
+void
+perf_evlist__print_counters(struct perf_evlist *evlist,
+			    struct perf_stat_config *config,
+			    struct target *_target,
+			    struct timespec *ts,
+			    int argc, const char **argv)
+{
+	bool metric_only = config->metric_only;
+	int interval = config->interval;
+	struct perf_evsel *counter;
+	char buf[64], *prefix = NULL;
+
+	if (interval)
+		print_interval(config, evlist, prefix = buf, ts);
+	else
+		print_header(config, _target, argc, argv);
+
+	if (metric_only) {
+		static int num_print_iv;
+
+		if (num_print_iv == 0 && !interval)
+			print_metric_headers(config, evlist, prefix, false);
+		if (num_print_iv++ == 25)
+			num_print_iv = 0;
+		if (config->aggr_mode == AGGR_GLOBAL && prefix)
+			fprintf(config->output, "%s", prefix);
+	}
+
+	switch (config->aggr_mode) {
+	case AGGR_CORE:
+	case AGGR_SOCKET:
+		print_aggr(config, evlist, prefix);
+		break;
+	case AGGR_THREAD:
+		evlist__for_each_entry(evlist, counter) {
+			if (is_duration_time(counter))
+				continue;
+			print_aggr_thread(config, _target, counter, prefix);
+		}
+		break;
+	case AGGR_GLOBAL:
+		evlist__for_each_entry(evlist, counter) {
+			if (is_duration_time(counter))
+				continue;
+			print_counter_aggr(config, counter, prefix);
+		}
+		if (metric_only)
+			fputc('\n', config->output);
+		break;
+	case AGGR_NONE:
+		if (metric_only)
+			print_no_aggr_metric(config, evlist, prefix);
+		else {
+			evlist__for_each_entry(evlist, counter) {
+				if (is_duration_time(counter))
+					continue;
+				print_counter(config, counter, prefix);
+			}
+		}
+		break;
+	case AGGR_UNSET:
+	default:
+		break;
+	}
+
+	if (!interval && !config->csv_output)
+		print_footer(config);
+
+	fflush(config->output);
+}
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 99990f5f2512..8ad32763cfff 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -410,7 +410,8 @@ static double runtime_stat_n(struct runtime_stat *st,
 	return v->stats.n;
 }
 
-static void print_stalled_cycles_frontend(int cpu,
+static void print_stalled_cycles_frontend(struct perf_stat_config *config,
+					  int cpu,
 					  struct perf_evsel *evsel, double avg,
 					  struct perf_stat_output_ctx *out,
 					  struct runtime_stat *st)
@@ -427,13 +428,14 @@ static void print_stalled_cycles_frontend(int cpu,
 	color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
 
 	if (ratio)
-		out->print_metric(out->ctx, color, "%7.2f%%", "frontend cycles idle",
+		out->print_metric(config, out->ctx, color, "%7.2f%%", "frontend cycles idle",
 				  ratio);
 	else
-		out->print_metric(out->ctx, NULL, NULL, "frontend cycles idle", 0);
+		out->print_metric(config, out->ctx, NULL, NULL, "frontend cycles idle", 0);
 }
 
-static void print_stalled_cycles_backend(int cpu,
+static void print_stalled_cycles_backend(struct perf_stat_config *config,
+					 int cpu,
 					 struct perf_evsel *evsel, double avg,
 					 struct perf_stat_output_ctx *out,
 					 struct runtime_stat *st)
@@ -449,10 +451,11 @@ static void print_stalled_cycles_backend(int cpu,
 
 	color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
 
-	out->print_metric(out->ctx, color, "%7.2f%%", "backend cycles idle", ratio);
+	out->print_metric(config, out->ctx, color, "%7.2f%%", "backend cycles idle", ratio);
 }
 
-static void print_branch_misses(int cpu,
+static void print_branch_misses(struct perf_stat_config *config,
+				int cpu,
 				struct perf_evsel *evsel,
 				double avg,
 				struct perf_stat_output_ctx *out,
@@ -469,10 +472,11 @@ static void print_branch_misses(int cpu,
 
 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
 
-	out->print_metric(out->ctx, color, "%7.2f%%", "of all branches", ratio);
+	out->print_metric(config, out->ctx, color, "%7.2f%%", "of all branches", ratio);
 }
 
-static void print_l1_dcache_misses(int cpu,
+static void print_l1_dcache_misses(struct perf_stat_config *config,
+				   int cpu,
 				   struct perf_evsel *evsel,
 				   double avg,
 				   struct perf_stat_output_ctx *out,
@@ -490,10 +494,11 @@ static void print_l1_dcache_misses(int cpu,
 
 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
 
-	out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio);
+	out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio);
 }
 
-static void print_l1_icache_misses(int cpu,
+static void print_l1_icache_misses(struct perf_stat_config *config,
+				   int cpu,
 				   struct perf_evsel *evsel,
 				   double avg,
 				   struct perf_stat_output_ctx *out,
@@ -510,10 +515,11 @@ static void print_l1_icache_misses(int cpu,
 		ratio = avg / total * 100.0;
 
 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-	out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio);
+	out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio);
 }
 
-static void print_dtlb_cache_misses(int cpu,
+static void print_dtlb_cache_misses(struct perf_stat_config *config,
+				    int cpu,
 				    struct perf_evsel *evsel,
 				    double avg,
 				    struct perf_stat_output_ctx *out,
@@ -529,10 +535,11 @@ static void print_dtlb_cache_misses(int cpu,
 		ratio = avg / total * 100.0;
 
 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-	out->print_metric(out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio);
+	out->print_metric(config, out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio);
 }
 
-static void print_itlb_cache_misses(int cpu,
+static void print_itlb_cache_misses(struct perf_stat_config *config,
+				    int cpu,
 				    struct perf_evsel *evsel,
 				    double avg,
 				    struct perf_stat_output_ctx *out,
@@ -548,10 +555,11 @@ static void print_itlb_cache_misses(int cpu,
 		ratio = avg / total * 100.0;
 
 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-	out->print_metric(out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio);
+	out->print_metric(config, out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio);
 }
 
-static void print_ll_cache_misses(int cpu,
+static void print_ll_cache_misses(struct perf_stat_config *config,
+				  int cpu,
 				  struct perf_evsel *evsel,
 				  double avg,
 				  struct perf_stat_output_ctx *out,
@@ -567,7 +575,7 @@ static void print_ll_cache_misses(int cpu,
 		ratio = avg / total * 100.0;
 
 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-	out->print_metric(out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio);
+	out->print_metric(config, out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio);
 }
 
 /*
@@ -674,7 +682,8 @@ static double td_be_bound(int ctx, int cpu, struct runtime_stat *st)
 	return sanitize_val(1.0 - sum);
 }
 
-static void print_smi_cost(int cpu, struct perf_evsel *evsel,
+static void print_smi_cost(struct perf_stat_config *config,
+			   int cpu, struct perf_evsel *evsel,
 			   struct perf_stat_output_ctx *out,
 			   struct runtime_stat *st)
 {
@@ -694,11 +703,12 @@ static void print_smi_cost(int cpu, struct perf_evsel *evsel,
 
 	if (cost > 10)
 		color = PERF_COLOR_RED;
-	out->print_metric(out->ctx, color, "%8.1f%%", "SMI cycles%", cost);
-	out->print_metric(out->ctx, NULL, "%4.0f", "SMI#", smi_num);
+	out->print_metric(config, out->ctx, color, "%8.1f%%", "SMI cycles%", cost);
+	out->print_metric(config, out->ctx, NULL, "%4.0f", "SMI#", smi_num);
 }
 
-static void generic_metric(const char *metric_expr,
+static void generic_metric(struct perf_stat_config *config,
+			   const char *metric_expr,
 			   struct perf_evsel **metric_events,
 			   char *name,
 			   const char *metric_name,
@@ -737,20 +747,21 @@ static void generic_metric(const char *metric_expr,
 		const char *p = metric_expr;
 
 		if (expr__parse(&ratio, &pctx, &p) == 0)
-			print_metric(ctxp, NULL, "%8.1f",
+			print_metric(config, ctxp, NULL, "%8.1f",
 				metric_name ?
 				metric_name :
 				out->force_header ?  name : "",
 				ratio);
 		else
-			print_metric(ctxp, NULL, NULL,
+			print_metric(config, ctxp, NULL, NULL,
 				     out->force_header ?
 				     (metric_name ? metric_name : name) : "", 0);
 	} else
-		print_metric(ctxp, NULL, NULL, "", 0);
+		print_metric(config, ctxp, NULL, NULL, "", 0);
 }
 
-void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
+void perf_stat__print_shadow_stats(struct perf_stat_config *config,
+				   struct perf_evsel *evsel,
 				   double avg, int cpu,
 				   struct perf_stat_output_ctx *out,
 				   struct rblist *metric_events,
@@ -769,10 +780,10 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 
 		if (total) {
 			ratio = avg / total;
-			print_metric(ctxp, NULL, "%7.2f ",
+			print_metric(config, ctxp, NULL, "%7.2f ",
 					"insn per cycle", ratio);
 		} else {
-			print_metric(ctxp, NULL, NULL, "insn per cycle", 0);
+			print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0);
 		}
 
 		total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT,
@@ -783,20 +794,20 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 						    ctx, cpu));
 
 		if (total && avg) {
-			out->new_line(ctxp);
+			out->new_line(config, ctxp);
 			ratio = total / avg;
-			print_metric(ctxp, NULL, "%7.2f ",
+			print_metric(config, ctxp, NULL, "%7.2f ",
 					"stalled cycles per insn",
 					ratio);
 		} else if (have_frontend_stalled) {
-			print_metric(ctxp, NULL, NULL,
+			print_metric(config, ctxp, NULL, NULL,
 				     "stalled cycles per insn", 0);
 		}
 	} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
 		if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0)
-			print_branch_misses(cpu, evsel, avg, out, st);
+			print_branch_misses(config, cpu, evsel, avg, out, st);
 		else
-			print_metric(ctxp, NULL, NULL, "of all branches", 0);
+			print_metric(config, ctxp, NULL, NULL, "of all branches", 0);
 	} else if (
 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
@@ -804,9 +815,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
 
 		if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0)
-			print_l1_dcache_misses(cpu, evsel, avg, out, st);
+			print_l1_dcache_misses(config, cpu, evsel, avg, out, st);
 		else
-			print_metric(ctxp, NULL, NULL, "of all L1-dcache hits", 0);
+			print_metric(config, ctxp, NULL, NULL, "of all L1-dcache hits", 0);
 	} else if (
 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |
@@ -814,9 +825,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
 
 		if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0)
-			print_l1_icache_misses(cpu, evsel, avg, out, st);
+			print_l1_icache_misses(config, cpu, evsel, avg, out, st);
 		else
-			print_metric(ctxp, NULL, NULL, "of all L1-icache hits", 0);
+			print_metric(config, ctxp, NULL, NULL, "of all L1-icache hits", 0);
 	} else if (
 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |
@@ -824,9 +835,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
 
 		if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0)
-			print_dtlb_cache_misses(cpu, evsel, avg, out, st);
+			print_dtlb_cache_misses(config, cpu, evsel, avg, out, st);
 		else
-			print_metric(ctxp, NULL, NULL, "of all dTLB cache hits", 0);
+			print_metric(config, ctxp, NULL, NULL, "of all dTLB cache hits", 0);
 	} else if (
 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |
@@ -834,9 +845,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
 
 		if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0)
-			print_itlb_cache_misses(cpu, evsel, avg, out, st);
+			print_itlb_cache_misses(config, cpu, evsel, avg, out, st);
 		else
-			print_metric(ctxp, NULL, NULL, "of all iTLB cache hits", 0);
+			print_metric(config, ctxp, NULL, NULL, "of all iTLB cache hits", 0);
 	} else if (
 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_LL |
@@ -844,9 +855,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
 
 		if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0)
-			print_ll_cache_misses(cpu, evsel, avg, out, st);
+			print_ll_cache_misses(config, cpu, evsel, avg, out, st);
 		else
-			print_metric(ctxp, NULL, NULL, "of all LL-cache hits", 0);
+			print_metric(config, ctxp, NULL, NULL, "of all LL-cache hits", 0);
 	} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
 		total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu);
 
@@ -854,32 +865,32 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 			ratio = avg * 100 / total;
 
 		if (runtime_stat_n(st, STAT_CACHEREFS, ctx, cpu) != 0)
-			print_metric(ctxp, NULL, "%8.3f %%",
+			print_metric(config, ctxp, NULL, "%8.3f %%",
 				     "of all cache refs", ratio);
 		else
-			print_metric(ctxp, NULL, NULL, "of all cache refs", 0);
+			print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0);
 	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
-		print_stalled_cycles_frontend(cpu, evsel, avg, out, st);
+		print_stalled_cycles_frontend(config, cpu, evsel, avg, out, st);
 	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
-		print_stalled_cycles_backend(cpu, evsel, avg, out, st);
+		print_stalled_cycles_backend(config, cpu, evsel, avg, out, st);
 	} else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
 		total = runtime_stat_avg(st, STAT_NSECS, 0, cpu);
 
 		if (total) {
 			ratio = avg / total;
-			print_metric(ctxp, NULL, "%8.3f", "GHz", ratio);
+			print_metric(config, ctxp, NULL, "%8.3f", "GHz", ratio);
 		} else {
-			print_metric(ctxp, NULL, NULL, "Ghz", 0);
+			print_metric(config, ctxp, NULL, NULL, "Ghz", 0);
 		}
 	} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
 		total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
 
 		if (total)
-			print_metric(ctxp, NULL,
+			print_metric(config, ctxp, NULL,
 					"%7.2f%%", "transactional cycles",
 					100.0 * (avg / total));
 		else
-			print_metric(ctxp, NULL, NULL, "transactional cycles",
+			print_metric(config, ctxp, NULL, NULL, "transactional cycles",
 				     0);
 	} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
 		total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
@@ -888,10 +899,10 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 		if (total2 < avg)
 			total2 = avg;
 		if (total)
-			print_metric(ctxp, NULL, "%7.2f%%", "aborted cycles",
+			print_metric(config, ctxp, NULL, "%7.2f%%", "aborted cycles",
 				100.0 * ((total2-avg) / total));
 		else
-			print_metric(ctxp, NULL, NULL, "aborted cycles", 0);
+			print_metric(config, ctxp, NULL, NULL, "aborted cycles", 0);
 	} else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) {
 		total = runtime_stat_avg(st, STAT_CYCLES_IN_TX,
 					 ctx, cpu);
@@ -900,10 +911,10 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 			ratio = total / avg;
 
 		if (runtime_stat_n(st, STAT_CYCLES_IN_TX, ctx, cpu) != 0)
-			print_metric(ctxp, NULL, "%8.0f",
+			print_metric(config, ctxp, NULL, "%8.0f",
 				     "cycles / transaction", ratio);
 		else
-			print_metric(ctxp, NULL, NULL, "cycles / transaction",
+			print_metric(config, ctxp, NULL, NULL, "cycles / transaction",
 				      0);
 	} else if (perf_stat_evsel__is(evsel, ELISION_START)) {
 		total = runtime_stat_avg(st, STAT_CYCLES_IN_TX,
@@ -912,33 +923,33 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 		if (avg)
 			ratio = total / avg;
 
-		print_metric(ctxp, NULL, "%8.0f", "cycles / elision", ratio);
+		print_metric(config, ctxp, NULL, "%8.0f", "cycles / elision", ratio);
 	} else if (perf_evsel__is_clock(evsel)) {
 		if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0)
-			print_metric(ctxp, NULL, "%8.3f", "CPUs utilized",
+			print_metric(config, ctxp, NULL, "%8.3f", "CPUs utilized",
 				     avg / (ratio * evsel->scale));
 		else
-			print_metric(ctxp, NULL, NULL, "CPUs utilized", 0);
+			print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0);
 	} else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) {
 		double fe_bound = td_fe_bound(ctx, cpu, st);
 
 		if (fe_bound > 0.2)
 			color = PERF_COLOR_RED;
-		print_metric(ctxp, color, "%8.1f%%", "frontend bound",
+		print_metric(config, ctxp, color, "%8.1f%%", "frontend bound",
 				fe_bound * 100.);
 	} else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) {
 		double retiring = td_retiring(ctx, cpu, st);
 
 		if (retiring > 0.7)
 			color = PERF_COLOR_GREEN;
-		print_metric(ctxp, color, "%8.1f%%", "retiring",
+		print_metric(config, ctxp, color, "%8.1f%%", "retiring",
 				retiring * 100.);
 	} else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) {
 		double bad_spec = td_bad_spec(ctx, cpu, st);
 
 		if (bad_spec > 0.1)
 			color = PERF_COLOR_RED;
-		print_metric(ctxp, color, "%8.1f%%", "bad speculation",
+		print_metric(config, ctxp, color, "%8.1f%%", "bad speculation",
 				bad_spec * 100.);
 	} else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) {
 		double be_bound = td_be_bound(ctx, cpu, st);
@@ -955,12 +966,12 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 		if (be_bound > 0.2)
 			color = PERF_COLOR_RED;
 		if (td_total_slots(ctx, cpu, st) > 0)
-			print_metric(ctxp, color, "%8.1f%%", name,
+			print_metric(config, ctxp, color, "%8.1f%%", name,
 					be_bound * 100.);
 		else
-			print_metric(ctxp, NULL, NULL, name, 0);
+			print_metric(config, ctxp, NULL, NULL, name, 0);
 	} else if (evsel->metric_expr) {
-		generic_metric(evsel->metric_expr, evsel->metric_events, evsel->name,
+		generic_metric(config, evsel->metric_expr, evsel->metric_events, evsel->name,
 				evsel->metric_name, avg, cpu, out, st);
 	} else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) {
 		char unit = 'M';
@@ -975,9 +986,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 			unit = 'K';
 		}
 		snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
-		print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio);
+		print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio);
 	} else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
-		print_smi_cost(cpu, evsel, out, st);
+		print_smi_cost(config, cpu, evsel, out, st);
 	} else {
 		num = 0;
 	}
@@ -987,12 +998,12 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 
 		list_for_each_entry (mexp, &me->head, nd) {
 			if (num++ > 0)
-				out->new_line(ctxp);
-			generic_metric(mexp->metric_expr, mexp->metric_events,
+				out->new_line(config, ctxp);
+			generic_metric(config, mexp->metric_expr, mexp->metric_events,
 					evsel->name, mexp->metric_name,
 					avg, cpu, out, st);
 		}
 	}
 	if (num == 0)
-		print_metric(ctxp, NULL, NULL, NULL, 0);
+		print_metric(config, ctxp, NULL, NULL, NULL, 0);
 }
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index a0061e0b0fad..4d40515307b8 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -374,9 +374,8 @@ int perf_stat_process_counter(struct perf_stat_config *config,
 	return 0;
 }
 
-int perf_event__process_stat_event(struct perf_tool *tool __maybe_unused,
-				   union perf_event *event,
-				   struct perf_session *session)
+int perf_event__process_stat_event(struct perf_session *session,
+				   union perf_event *event)
 {
 	struct perf_counts_values count;
 	struct stat_event *st = &event->stat;
@@ -435,3 +434,98 @@ size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp)
 
 	return ret;
 }
+
+int create_perf_stat_counter(struct perf_evsel *evsel,
+			     struct perf_stat_config *config,
+			     struct target *target)
+{
+	struct perf_event_attr *attr = &evsel->attr;
+	struct perf_evsel *leader = evsel->leader;
+
+	if (config->scale) {
+		attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
+				    PERF_FORMAT_TOTAL_TIME_RUNNING;
+	}
+
+	/*
+	 * The event is part of non trivial group, let's enable
+	 * the group read (for leader) and ID retrieval for all
+	 * members.
+	 */
+	if (leader->nr_members > 1)
+		attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP;
+
+	attr->inherit = !config->no_inherit;
+
+	/*
+	 * Some events get initialized with sample_(period/type) set,
+	 * like tracepoints. Clear it up for counting.
+	 */
+	attr->sample_period = 0;
+
+	if (config->identifier)
+		attr->sample_type = PERF_SAMPLE_IDENTIFIER;
+
+	/*
+	 * Disabling all counters initially, they will be enabled
+	 * either manually by us or by kernel via enable_on_exec
+	 * set later.
+	 */
+	if (perf_evsel__is_group_leader(evsel)) {
+		attr->disabled = 1;
+
+		/*
+		 * In case of initial_delay we enable tracee
+		 * events manually.
+		 */
+		if (target__none(target) && !config->initial_delay)
+			attr->enable_on_exec = 1;
+	}
+
+	if (target__has_cpu(target) && !target__has_per_thread(target))
+		return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel));
+
+	return perf_evsel__open_per_thread(evsel, evsel->threads);
+}
+
+int perf_stat_synthesize_config(struct perf_stat_config *config,
+				struct perf_tool *tool,
+				struct perf_evlist *evlist,
+				perf_event__handler_t process,
+				bool attrs)
+{
+	int err;
+
+	if (attrs) {
+		err = perf_event__synthesize_attrs(tool, evlist, process);
+		if (err < 0) {
+			pr_err("Couldn't synthesize attrs.\n");
+			return err;
+		}
+	}
+
+	err = perf_event__synthesize_extra_attr(tool, evlist, process,
+						attrs);
+
+	err = perf_event__synthesize_thread_map2(tool, evlist->threads,
+						 process, NULL);
+	if (err < 0) {
+		pr_err("Couldn't synthesize thread map.\n");
+		return err;
+	}
+
+	err = perf_event__synthesize_cpu_map(tool, evlist->cpus,
+					     process, NULL);
+	if (err < 0) {
+		pr_err("Couldn't synthesize thread map.\n");
+		return err;
+	}
+
+	err = perf_event__synthesize_stat_config(tool, config, process, NULL);
+	if (err < 0) {
+		pr_err("Couldn't synthesize config.\n");
+		return err;
+	}
+
+	return 0;
+}
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 36efb986f7fc..2f9c9159a364 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -4,8 +4,14 @@
 
 #include <linux/types.h>
 #include <stdio.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/wait.h>
 #include "xyarray.h"
 #include "rblist.h"
+#include "perf.h"
+#include "event.h"
 
 struct stats {
 	double n, mean, M2;
@@ -84,15 +90,42 @@ struct runtime_stat {
 	struct rblist value_list;
 };
 
+typedef int (*aggr_get_id_t)(struct perf_stat_config *config,
+			     struct cpu_map *m, int cpu);
+
 struct perf_stat_config {
-	enum aggr_mode	aggr_mode;
-	bool		scale;
-	FILE		*output;
-	unsigned int	interval;
-	unsigned int	timeout;
-	int		times;
-	struct runtime_stat *stats;
-	int		stats_num;
+	enum aggr_mode		 aggr_mode;
+	bool			 scale;
+	bool			 no_inherit;
+	bool			 identifier;
+	bool			 csv_output;
+	bool			 interval_clear;
+	bool			 metric_only;
+	bool			 null_run;
+	bool			 ru_display;
+	bool			 big_num;
+	bool			 no_merge;
+	bool			 walltime_run_table;
+	FILE			*output;
+	unsigned int		 interval;
+	unsigned int		 timeout;
+	unsigned int		 initial_delay;
+	unsigned int		 unit_width;
+	unsigned int		 metric_only_len;
+	int			 times;
+	int			 run_count;
+	int			 print_free_counters_hint;
+	int			 print_mixed_hw_group_error;
+	struct runtime_stat	*stats;
+	int			 stats_num;
+	const char		*csv_sep;
+	struct stats		*walltime_nsecs_stats;
+	struct rusage		 ru_data;
+	struct cpu_map		*aggr_map;
+	aggr_get_id_t		 aggr_get_id;
+	struct cpu_map		*cpus_aggr_map;
+	u64			*walltime_run;
+	struct rblist		 metric_events;
 };
 
 void update_stats(struct stats *stats, u64 val);
@@ -130,9 +163,10 @@ bool __perf_evsel_stat__is(struct perf_evsel *evsel,
 extern struct runtime_stat rt_stat;
 extern struct stats walltime_nsecs_stats;
 
-typedef void (*print_metric_t)(void *ctx, const char *color, const char *unit,
+typedef void (*print_metric_t)(struct perf_stat_config *config,
+			       void *ctx, const char *color, const char *unit,
 			       const char *fmt, double val);
-typedef void (*new_line_t )(void *ctx);
+typedef void (*new_line_t)(struct perf_stat_config *config, void *ctx);
 
 void runtime_stat__init(struct runtime_stat *st);
 void runtime_stat__exit(struct runtime_stat *st);
@@ -148,7 +182,8 @@ struct perf_stat_output_ctx {
 	bool force_header;
 };
 
-void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
+void perf_stat__print_shadow_stats(struct perf_stat_config *config,
+				   struct perf_evsel *evsel,
 				   double avg, int cpu,
 				   struct perf_stat_output_ctx *out,
 				   struct rblist *metric_events,
@@ -164,11 +199,25 @@ int perf_stat_process_counter(struct perf_stat_config *config,
 struct perf_tool;
 union perf_event;
 struct perf_session;
-int perf_event__process_stat_event(struct perf_tool *tool,
-				   union perf_event *event,
-				   struct perf_session *session);
+int perf_event__process_stat_event(struct perf_session *session,
+				   union perf_event *event);
 
 size_t perf_event__fprintf_stat(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_stat_round(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp);
+
+int create_perf_stat_counter(struct perf_evsel *evsel,
+			     struct perf_stat_config *config,
+			     struct target *target);
+int perf_stat_synthesize_config(struct perf_stat_config *config,
+				struct perf_tool *tool,
+				struct perf_evlist *evlist,
+				perf_event__handler_t process,
+				bool attrs);
+void
+perf_evlist__print_counters(struct perf_evlist *evlist,
+			    struct perf_stat_config *config,
+			    struct target *_target,
+			    struct timespec *ts,
+			    int argc, const char **argv);
 #endif
diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c
index 3d1cf5bf7f18..9005fbe0780e 100644
--- a/tools/perf/util/strbuf.c
+++ b/tools/perf/util/strbuf.c
@@ -98,19 +98,25 @@ static int strbuf_addv(struct strbuf *sb, const char *fmt, va_list ap)
 
 	va_copy(ap_saved, ap);
 	len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap);
-	if (len < 0)
+	if (len < 0) {
+		va_end(ap_saved);
 		return len;
+	}
 	if (len > strbuf_avail(sb)) {
 		ret = strbuf_grow(sb, len);
-		if (ret)
+		if (ret) {
+			va_end(ap_saved);
 			return ret;
+		}
 		len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap_saved);
 		va_end(ap_saved);
 		if (len > strbuf_avail(sb)) {
 			pr_debug("this should not happen, your vsnprintf is broken");
+			va_end(ap_saved);
 			return -EINVAL;
 		}
 	}
+	va_end(ap_saved);
 	return strbuf_setlen(sb, sb->len + len);
 }
 
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 29770ea61768..66a84d5846c8 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -324,7 +324,17 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss)
 			plt_entry_size = 16;
 			break;
 
-		default: /* FIXME: s390/alpha/mips/parisc/poperpc/sh/sparc/xtensa need to be checked */
+		case EM_SPARC:
+			plt_header_size = 48;
+			plt_entry_size = 12;
+			break;
+
+		case EM_SPARCV9:
+			plt_header_size = 128;
+			plt_entry_size = 32;
+			break;
+
+		default: /* FIXME: s390/alpha/mips/parisc/poperpc/sh/xtensa need to be checked */
 			plt_header_size = shdr_plt.sh_entsize;
 			plt_entry_size = shdr_plt.sh_entsize;
 			break;
@@ -1947,6 +1957,34 @@ void kcore_extract__delete(struct kcore_extract *kce)
 }
 
 #ifdef HAVE_GELF_GETNOTE_SUPPORT
+
+static void sdt_adjust_loc(struct sdt_note *tmp, GElf_Addr base_off)
+{
+	if (!base_off)
+		return;
+
+	if (tmp->bit32)
+		tmp->addr.a32[SDT_NOTE_IDX_LOC] =
+			tmp->addr.a32[SDT_NOTE_IDX_LOC] + base_off -
+			tmp->addr.a32[SDT_NOTE_IDX_BASE];
+	else
+		tmp->addr.a64[SDT_NOTE_IDX_LOC] =
+			tmp->addr.a64[SDT_NOTE_IDX_LOC] + base_off -
+			tmp->addr.a64[SDT_NOTE_IDX_BASE];
+}
+
+static void sdt_adjust_refctr(struct sdt_note *tmp, GElf_Addr base_addr,
+			      GElf_Addr base_off)
+{
+	if (!base_off)
+		return;
+
+	if (tmp->bit32 && tmp->addr.a32[SDT_NOTE_IDX_REFCTR])
+		tmp->addr.a32[SDT_NOTE_IDX_REFCTR] -= (base_addr - base_off);
+	else if (tmp->addr.a64[SDT_NOTE_IDX_REFCTR])
+		tmp->addr.a64[SDT_NOTE_IDX_REFCTR] -= (base_addr - base_off);
+}
+
 /**
  * populate_sdt_note : Parse raw data and identify SDT note
  * @elf: elf of the opened file
@@ -1964,7 +2002,6 @@ static int populate_sdt_note(Elf **elf, const char *data, size_t len,
 	const char *provider, *name, *args;
 	struct sdt_note *tmp = NULL;
 	GElf_Ehdr ehdr;
-	GElf_Addr base_off = 0;
 	GElf_Shdr shdr;
 	int ret = -EINVAL;
 
@@ -2060,17 +2097,12 @@ static int populate_sdt_note(Elf **elf, const char *data, size_t len,
 	 * base address in the description of the SDT note. If its different,
 	 * then accordingly, adjust the note location.
 	 */
-	if (elf_section_by_name(*elf, &ehdr, &shdr, SDT_BASE_SCN, NULL)) {
-		base_off = shdr.sh_offset;
-		if (base_off) {
-			if (tmp->bit32)
-				tmp->addr.a32[0] = tmp->addr.a32[0] + base_off -
-					tmp->addr.a32[1];
-			else
-				tmp->addr.a64[0] = tmp->addr.a64[0] + base_off -
-					tmp->addr.a64[1];
-		}
-	}
+	if (elf_section_by_name(*elf, &ehdr, &shdr, SDT_BASE_SCN, NULL))
+		sdt_adjust_loc(tmp, shdr.sh_offset);
+
+	/* Adjust reference counter offset */
+	if (elf_section_by_name(*elf, &ehdr, &shdr, SDT_PROBES_SCN, NULL))
+		sdt_adjust_refctr(tmp, shdr.sh_addr, shdr.sh_offset);
 
 	list_add_tail(&tmp->note_list, sdt_notes);
 	return 0;
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index f25fae4b5743..d026d215bdc6 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -123,7 +123,8 @@ struct symbol_conf {
 	const char	*vmlinux_name,
 			*kallsyms_name,
 			*source_prefix,
-			*field_sep;
+			*field_sep,
+			*graph_function;
 	const char	*default_guest_vmlinux_name,
 			*default_guest_kallsyms,
 			*default_guest_modules;
@@ -379,12 +380,19 @@ int get_sdt_note_list(struct list_head *head, const char *target);
 int cleanup_sdt_note_list(struct list_head *sdt_notes);
 int sdt_notes__get_count(struct list_head *start);
 
+#define SDT_PROBES_SCN ".probes"
 #define SDT_BASE_SCN ".stapsdt.base"
 #define SDT_NOTE_SCN  ".note.stapsdt"
 #define SDT_NOTE_TYPE 3
 #define SDT_NOTE_NAME "stapsdt"
 #define NR_ADDR 3
 
+enum {
+	SDT_NOTE_IDX_LOC = 0,
+	SDT_NOTE_IDX_BASE,
+	SDT_NOTE_IDX_REFCTR,
+};
+
 struct mem_info *mem_info__new(void);
 struct mem_info *mem_info__get(struct mem_info *mi);
 void   mem_info__put(struct mem_info *mi);
diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c
index dd17d6a38d3a..61a4286a74dc 100644
--- a/tools/perf/util/thread-stack.c
+++ b/tools/perf/util/thread-stack.c
@@ -36,6 +36,7 @@
  * @branch_count: the branch count when the entry was created
  * @cp: call path
  * @no_call: a 'call' was not seen
+ * @trace_end: a 'call' but trace ended
  */
 struct thread_stack_entry {
 	u64 ret_addr;
@@ -44,6 +45,7 @@ struct thread_stack_entry {
 	u64 branch_count;
 	struct call_path *cp;
 	bool no_call;
+	bool trace_end;
 };
 
 /**
@@ -112,7 +114,8 @@ static struct thread_stack *thread_stack__new(struct thread *thread,
 	return ts;
 }
 
-static int thread_stack__push(struct thread_stack *ts, u64 ret_addr)
+static int thread_stack__push(struct thread_stack *ts, u64 ret_addr,
+			      bool trace_end)
 {
 	int err = 0;
 
@@ -124,6 +127,7 @@ static int thread_stack__push(struct thread_stack *ts, u64 ret_addr)
 		}
 	}
 
+	ts->stack[ts->cnt].trace_end = trace_end;
 	ts->stack[ts->cnt++].ret_addr = ret_addr;
 
 	return err;
@@ -150,6 +154,18 @@ static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr)
 	}
 }
 
+static void thread_stack__pop_trace_end(struct thread_stack *ts)
+{
+	size_t i;
+
+	for (i = ts->cnt; i; ) {
+		if (ts->stack[--i].trace_end)
+			ts->cnt = i;
+		else
+			return;
+	}
+}
+
 static bool thread_stack__in_kernel(struct thread_stack *ts)
 {
 	if (!ts->cnt)
@@ -254,10 +270,19 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
 		ret_addr = from_ip + insn_len;
 		if (ret_addr == to_ip)
 			return 0; /* Zero-length calls are excluded */
-		return thread_stack__push(thread->ts, ret_addr);
-	} else if (flags & PERF_IP_FLAG_RETURN) {
-		if (!from_ip)
-			return 0;
+		return thread_stack__push(thread->ts, ret_addr,
+					  flags & PERF_IP_FLAG_TRACE_END);
+	} else if (flags & PERF_IP_FLAG_TRACE_BEGIN) {
+		/*
+		 * If the caller did not change the trace number (which would
+		 * have flushed the stack) then try to make sense of the stack.
+		 * Possibly, tracing began after returning to the current
+		 * address, so try to pop that. Also, do not expect a call made
+		 * when the trace ended, to return, so pop that.
+		 */
+		thread_stack__pop(thread->ts, to_ip);
+		thread_stack__pop_trace_end(thread->ts);
+	} else if ((flags & PERF_IP_FLAG_RETURN) && from_ip) {
 		thread_stack__pop(thread->ts, to_ip);
 	}
 
@@ -285,20 +310,46 @@ void thread_stack__free(struct thread *thread)
 	}
 }
 
+static inline u64 callchain_context(u64 ip, u64 kernel_start)
+{
+	return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL;
+}
+
 void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
-			  size_t sz, u64 ip)
+			  size_t sz, u64 ip, u64 kernel_start)
 {
-	size_t i;
+	u64 context = callchain_context(ip, kernel_start);
+	u64 last_context;
+	size_t i, j;
 
-	if (!thread || !thread->ts)
-		chain->nr = 1;
-	else
-		chain->nr = min(sz, thread->ts->cnt + 1);
+	if (sz < 2) {
+		chain->nr = 0;
+		return;
+	}
+
+	chain->ips[0] = context;
+	chain->ips[1] = ip;
 
-	chain->ips[0] = ip;
+	if (!thread || !thread->ts) {
+		chain->nr = 2;
+		return;
+	}
+
+	last_context = context;
+
+	for (i = 2, j = 1; i < sz && j <= thread->ts->cnt; i++, j++) {
+		ip = thread->ts->stack[thread->ts->cnt - j].ret_addr;
+		context = callchain_context(ip, kernel_start);
+		if (context != last_context) {
+			if (i >= sz - 1)
+				break;
+			chain->ips[i++] = context;
+			last_context = context;
+		}
+		chain->ips[i] = ip;
+	}
 
-	for (i = 1; i < chain->nr; i++)
-		chain->ips[i] = thread->ts->stack[thread->ts->cnt - i].ret_addr;
+	chain->nr = i;
 }
 
 struct call_return_processor *
@@ -332,7 +383,7 @@ void call_return_processor__free(struct call_return_processor *crp)
 
 static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr,
 				 u64 timestamp, u64 ref, struct call_path *cp,
-				 bool no_call)
+				 bool no_call, bool trace_end)
 {
 	struct thread_stack_entry *tse;
 	int err;
@@ -350,6 +401,7 @@ static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr,
 	tse->branch_count = ts->branch_count;
 	tse->cp = cp;
 	tse->no_call = no_call;
+	tse->trace_end = trace_end;
 
 	return 0;
 }
@@ -423,7 +475,7 @@ static int thread_stack__bottom(struct thread *thread, struct thread_stack *ts,
 		return -ENOMEM;
 
 	return thread_stack__push_cp(thread->ts, ip, sample->time, ref, cp,
-				     true);
+				     true, false);
 }
 
 static int thread_stack__no_call_return(struct thread *thread,
@@ -455,7 +507,7 @@ static int thread_stack__no_call_return(struct thread *thread,
 			if (!cp)
 				return -ENOMEM;
 			return thread_stack__push_cp(ts, 0, sample->time, ref,
-						     cp, true);
+						     cp, true, false);
 		}
 	} else if (thread_stack__in_kernel(ts) && sample->ip < ks) {
 		/* Return to userspace, so pop all kernel addresses */
@@ -480,7 +532,7 @@ static int thread_stack__no_call_return(struct thread *thread,
 		return -ENOMEM;
 
 	err = thread_stack__push_cp(ts, sample->addr, sample->time, ref, cp,
-				    true);
+				    true, false);
 	if (err)
 		return err;
 
@@ -500,7 +552,7 @@ static int thread_stack__trace_begin(struct thread *thread,
 
 	/* Pop trace end */
 	tse = &ts->stack[ts->cnt - 1];
-	if (tse->cp->sym == NULL && tse->cp->ip == 0) {
+	if (tse->trace_end) {
 		err = thread_stack__call_return(thread, ts, --ts->cnt,
 						timestamp, ref, false);
 		if (err)
@@ -529,7 +581,7 @@ static int thread_stack__trace_end(struct thread_stack *ts,
 	ret_addr = sample->ip + sample->insn_len;
 
 	return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp,
-				     false);
+				     false, true);
 }
 
 int thread_stack__process(struct thread *thread, struct comm *comm,
@@ -579,6 +631,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
 	ts->last_time = sample->time;
 
 	if (sample->flags & PERF_IP_FLAG_CALL) {
+		bool trace_end = sample->flags & PERF_IP_FLAG_TRACE_END;
 		struct call_path_root *cpr = ts->crp->cpr;
 		struct call_path *cp;
 		u64 ret_addr;
@@ -596,7 +649,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
 		if (!cp)
 			return -ENOMEM;
 		err = thread_stack__push_cp(ts, ret_addr, sample->time, ref,
-					    cp, false);
+					    cp, false, trace_end);
 	} else if (sample->flags & PERF_IP_FLAG_RETURN) {
 		if (!sample->ip || !sample->addr)
 			return 0;
diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h
index b7e41c4ebfdd..f97c00a8c251 100644
--- a/tools/perf/util/thread-stack.h
+++ b/tools/perf/util/thread-stack.h
@@ -84,7 +84,7 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
 			u64 to_ip, u16 insn_len, u64 trace_nr);
 void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr);
 void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
-			  size_t sz, u64 ip);
+			  size_t sz, u64 ip, u64 kernel_start);
 int thread_stack__flush(struct thread *thread);
 void thread_stack__free(struct thread *thread);
 size_t thread_stack__depth(struct thread *thread);
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 2048d393ece6..3d9ed7d0e281 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -330,7 +330,8 @@ static int thread__prepare_access(struct thread *thread)
 }
 
 static int thread__clone_map_groups(struct thread *thread,
-				    struct thread *parent)
+				    struct thread *parent,
+				    bool do_maps_clone)
 {
 	/* This is new thread, we share map groups for process. */
 	if (thread->pid_ == parent->pid_)
@@ -341,15 +342,11 @@ static int thread__clone_map_groups(struct thread *thread,
 			 thread->pid_, thread->tid, parent->pid_, parent->tid);
 		return 0;
 	}
-
 	/* But this one is new process, copy maps. */
-	if (map_groups__clone(thread, parent->mg) < 0)
-		return -ENOMEM;
-
-	return 0;
+	return do_maps_clone ? map_groups__clone(thread, parent->mg) : 0;
 }
 
-int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp)
+int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone)
 {
 	if (parent->comm_set) {
 		const char *comm = thread__comm_str(parent);
@@ -362,7 +359,7 @@ int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp)
 	}
 
 	thread->ppid = parent->tid;
-	return thread__clone_map_groups(thread, parent);
+	return thread__clone_map_groups(thread, parent, do_maps_clone);
 }
 
 void thread__find_cpumode_addr_location(struct thread *thread, u64 addr,
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 07606aa6998d..30e2b4c165fe 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -42,6 +42,8 @@ struct thread {
 	void				*addr_space;
 	struct unwind_libunwind_ops	*unwind_libunwind_ops;
 #endif
+	bool			filter;
+	int			filter_entry_depth;
 };
 
 struct machine;
@@ -87,7 +89,7 @@ struct comm *thread__comm(const struct thread *thread);
 struct comm *thread__exec_comm(const struct thread *thread);
 const char *thread__comm_str(const struct thread *thread);
 int thread__insert_map(struct thread *thread, struct map *map);
-int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp);
+int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone);
 size_t thread__fprintf(struct thread *thread, FILE *fp);
 
 struct thread *thread__main_thread(struct machine *machine, struct thread *thread);
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index 183c91453522..56e4ca54020a 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -26,15 +26,12 @@ typedef int (*event_attr_op)(struct perf_tool *tool,
 			     union perf_event *event,
 			     struct perf_evlist **pevlist);
 
-typedef int (*event_op2)(struct perf_tool *tool, union perf_event *event,
-			 struct perf_session *session);
+typedef int (*event_op2)(struct perf_session *session, union perf_event *event);
+typedef s64 (*event_op3)(struct perf_session *session, union perf_event *event);
 
 typedef int (*event_oe)(struct perf_tool *tool, union perf_event *event,
 			struct ordered_events *oe);
 
-typedef s64 (*event_op3)(struct perf_tool *tool, union perf_event *event,
-			 struct perf_session *session);
-
 enum show_feature_header {
 	SHOW_FEAT_NO_HEADER = 0,
 	SHOW_FEAT_HEADER,
diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
index 7b0ca7cbb7de..8ad8e755127b 100644
--- a/tools/perf/util/trace-event-info.c
+++ b/tools/perf/util/trace-event-info.c
@@ -531,12 +531,14 @@ struct tracing_data *tracing_data_get(struct list_head *pattrs,
 			 "/tmp/perf-XXXXXX");
 		if (!mkstemp(tdata->temp_file)) {
 			pr_debug("Can't make temp file");
+			free(tdata);
 			return NULL;
 		}
 
 		temp_fd = open(tdata->temp_file, O_RDWR);
 		if (temp_fd < 0) {
 			pr_debug("Can't read '%s'", tdata->temp_file);
+			free(tdata);
 			return NULL;
 		}
 
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index e76214f8d596..32e558a65af3 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -33,14 +33,15 @@ static int get_common_field(struct scripting_context *context,
 			    int *offset, int *size, const char *type)
 {
 	struct tep_handle *pevent = context->pevent;
-	struct event_format *event;
-	struct format_field *field;
+	struct tep_event_format *event;
+	struct tep_format_field *field;
 
 	if (!*size) {
-		if (!pevent->events)
+
+		event = tep_get_first_event(pevent);
+		if (!event)
 			return 0;
 
-		event = pevent->events[0];
 		field = tep_find_common_field(event, type);
 		if (!field)
 			return 0;
@@ -94,9 +95,9 @@ int common_pc(struct scripting_context *context)
 }
 
 unsigned long long
-raw_field_value(struct event_format *event, const char *name, void *data)
+raw_field_value(struct tep_event_format *event, const char *name, void *data)
 {
-	struct format_field *field;
+	struct tep_format_field *field;
 	unsigned long long val;
 
 	field = tep_find_any_field(event, name);
@@ -108,12 +109,12 @@ raw_field_value(struct event_format *event, const char *name, void *data)
 	return val;
 }
 
-unsigned long long read_size(struct event_format *event, void *ptr, int size)
+unsigned long long read_size(struct tep_event_format *event, void *ptr, int size)
 {
 	return tep_read_number(event->pevent, ptr, size);
 }
 
-void event_format__fprintf(struct event_format *event,
+void event_format__fprintf(struct tep_event_format *event,
 			   int cpu, void *data, int size, FILE *fp)
 {
 	struct tep_record record;
@@ -130,7 +131,7 @@ void event_format__fprintf(struct event_format *event,
 	trace_seq_destroy(&s);
 }
 
-void event_format__print(struct event_format *event,
+void event_format__print(struct tep_event_format *event,
 			 int cpu, void *data, int size)
 {
 	return event_format__fprintf(event, cpu, data, size, stdout);
@@ -158,6 +159,7 @@ void parse_ftrace_printk(struct tep_handle *pevent,
 		printk = strdup(fmt+1);
 		line = strtok_r(NULL, "\n", &next);
 		tep_register_print_string(pevent, printk, addr);
+		free(printk);
 	}
 }
 
@@ -188,29 +190,33 @@ int parse_event_file(struct tep_handle *pevent,
 	return tep_parse_event(pevent, buf, size, sys);
 }
 
-struct event_format *trace_find_next_event(struct tep_handle *pevent,
-					   struct event_format *event)
+struct tep_event_format *trace_find_next_event(struct tep_handle *pevent,
+					       struct tep_event_format *event)
 {
 	static int idx;
+	int events_count;
+	struct tep_event_format *all_events;
 
-	if (!pevent || !pevent->events)
+	all_events = tep_get_first_event(pevent);
+	events_count = tep_get_events_count(pevent);
+	if (!pevent || !all_events || events_count < 1)
 		return NULL;
 
 	if (!event) {
 		idx = 0;
-		return pevent->events[0];
+		return all_events;
 	}
 
-	if (idx < pevent->nr_events && event == pevent->events[idx]) {
+	if (idx < events_count && event == (all_events + idx)) {
 		idx++;
-		if (idx == pevent->nr_events)
+		if (idx == events_count)
 			return NULL;
-		return pevent->events[idx];
+		return (all_events + idx);
 	}
 
-	for (idx = 1; idx < pevent->nr_events; idx++) {
-		if (event == pevent->events[idx - 1])
-			return pevent->events[idx];
+	for (idx = 1; idx < events_count; idx++) {
+		if (event == (all_events + (idx - 1)))
+			return (all_events + idx);
 	}
 	return NULL;
 }
diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c
index 3dfc1db6b25b..76f12c705ef9 100644
--- a/tools/perf/util/trace-event-read.c
+++ b/tools/perf/util/trace-event-read.c
@@ -102,7 +102,7 @@ static unsigned int read4(struct tep_handle *pevent)
 
 	if (do_read(&data, 4) < 0)
 		return 0;
-	return __data2host4(pevent, data);
+	return __tep_data2host4(pevent, data);
 }
 
 static unsigned long long read8(struct tep_handle *pevent)
@@ -111,7 +111,7 @@ static unsigned long long read8(struct tep_handle *pevent)
 
 	if (do_read(&data, 8) < 0)
 		return 0;
-	return __data2host8(pevent, data);
+	return __tep_data2host8(pevent, data);
 }
 
 static char *read_string(void)
@@ -241,7 +241,7 @@ static int read_header_files(struct tep_handle *pevent)
 		 * The commit field in the page is of type long,
 		 * use that instead, since it represents the kernel.
 		 */
-		tep_set_long_size(pevent, pevent->header_page_size_size);
+		tep_set_long_size(pevent, tep_get_header_page_size(pevent));
 	}
 	free(header_page);
 
@@ -297,10 +297,8 @@ static int read_event_file(struct tep_handle *pevent, char *sys,
 	}
 
 	ret = do_read(buf, size);
-	if (ret < 0) {
-		free(buf);
+	if (ret < 0)
 		goto out;
-	}
 
 	ret = parse_event_file(pevent, buf, size, sys);
 	if (ret < 0)
@@ -349,9 +347,12 @@ static int read_event_files(struct tep_handle *pevent)
 		for (x=0; x < count; x++) {
 			size = read8(pevent);
 			ret = read_event_file(pevent, sys, size);
-			if (ret)
+			if (ret) {
+				free(sys);
 				return ret;
+			}
 		}
+		free(sys);
 	}
 	return 0;
 }
diff --git a/tools/perf/util/trace-event.c b/tools/perf/util/trace-event.c
index 58bb72f266f3..95664b2f771e 100644
--- a/tools/perf/util/trace-event.c
+++ b/tools/perf/util/trace-event.c
@@ -72,12 +72,12 @@ void trace_event__cleanup(struct trace_event *t)
 /*
  * Returns pointer with encoded error via <linux/err.h> interface.
  */
-static struct event_format*
+static struct tep_event_format*
 tp_format(const char *sys, const char *name)
 {
 	char *tp_dir = get_events_file(sys);
 	struct tep_handle *pevent = tevent.pevent;
-	struct event_format *event = NULL;
+	struct tep_event_format *event = NULL;
 	char path[PATH_MAX];
 	size_t size;
 	char *data;
@@ -102,7 +102,7 @@ tp_format(const char *sys, const char *name)
 /*
  * Returns pointer with encoded error via <linux/err.h> interface.
  */
-struct event_format*
+struct tep_event_format*
 trace_event__tp_format(const char *sys, const char *name)
 {
 	if (!tevent_initialized && trace_event__init2())
@@ -111,7 +111,7 @@ trace_event__tp_format(const char *sys, const char *name)
 	return tp_format(sys, name);
 }
 
-struct event_format *trace_event__tp_format_id(int id)
+struct tep_event_format *trace_event__tp_format_id(int id)
 {
 	if (!tevent_initialized && trace_event__init2())
 		return ERR_PTR(-ENOMEM);
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index 40204ec3a7a2..f024d73bfc40 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -3,6 +3,7 @@
 #define _PERF_UTIL_TRACE_EVENT_H
 
 #include <traceevent/event-parse.h>
+#include <traceevent/trace-seq.h>
 #include "parse-events.h"
 
 struct machine;
@@ -10,28 +11,28 @@ struct perf_sample;
 union perf_event;
 struct perf_tool;
 struct thread;
-struct plugin_list;
+struct tep_plugin_list;
 
 struct trace_event {
 	struct tep_handle	*pevent;
-	struct plugin_list	*plugin_list;
+	struct tep_plugin_list	*plugin_list;
 };
 
 int trace_event__init(struct trace_event *t);
 void trace_event__cleanup(struct trace_event *t);
 int trace_event__register_resolver(struct machine *machine,
 				   tep_func_resolver_t *func);
-struct event_format*
+struct tep_event_format*
 trace_event__tp_format(const char *sys, const char *name);
 
-struct event_format *trace_event__tp_format_id(int id);
+struct tep_event_format *trace_event__tp_format_id(int id);
 
 int bigendian(void);
 
-void event_format__fprintf(struct event_format *event,
+void event_format__fprintf(struct tep_event_format *event,
 			   int cpu, void *data, int size, FILE *fp);
 
-void event_format__print(struct event_format *event,
+void event_format__print(struct tep_event_format *event,
 			 int cpu, void *data, int size);
 
 int parse_ftrace_file(struct tep_handle *pevent, char *buf, unsigned long size);
@@ -39,7 +40,7 @@ int parse_event_file(struct tep_handle *pevent,
 		     char *buf, unsigned long size, char *sys);
 
 unsigned long long
-raw_field_value(struct event_format *event, const char *name, void *data);
+raw_field_value(struct tep_event_format *event, const char *name, void *data);
 
 void parse_proc_kallsyms(struct tep_handle *pevent, char *file, unsigned int size);
 void parse_ftrace_printk(struct tep_handle *pevent, char *file, unsigned int size);
@@ -47,9 +48,9 @@ void parse_saved_cmdline(struct tep_handle *pevent, char *file, unsigned int siz
 
 ssize_t trace_report(int fd, struct trace_event *tevent, bool repipe);
 
-struct event_format *trace_find_next_event(struct tep_handle *pevent,
-					   struct event_format *event);
-unsigned long long read_size(struct event_format *event, void *ptr, int size);
+struct tep_event_format *trace_find_next_event(struct tep_handle *pevent,
+					       struct tep_event_format *event);
+unsigned long long read_size(struct tep_event_format *event, void *ptr, int size);
 unsigned long long eval_flag(const char *flag);
 
 int read_tracing_data(int fd, struct list_head *pattrs);
diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c
index 6f318b15950e..5eff9bfc5758 100644
--- a/tools/perf/util/unwind-libdw.c
+++ b/tools/perf/util/unwind-libdw.c
@@ -45,13 +45,13 @@ static int __report_module(struct addr_location *al, u64 ip,
 		Dwarf_Addr s;
 
 		dwfl_module_info(mod, NULL, &s, NULL, NULL, NULL, NULL, NULL);
-		if (s != al->map->start)
+		if (s != al->map->start - al->map->pgoff)
 			mod = 0;
 	}
 
 	if (!mod)
 		mod = dwfl_report_elf(ui->dwfl, dso->short_name,
-				      (dso->symsrc_filename ? dso->symsrc_filename : dso->long_name), -1, al->map->start,
+				      (dso->symsrc_filename ? dso->symsrc_filename : dso->long_name), -1, al->map->start - al->map->pgoff,
 				      false);
 
 	return mod && dwfl_addrmodule(ui->dwfl, ip) == mod ? 0 : -1;
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index eac5b858a371..093352e93d50 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -221,7 +221,7 @@ out:
 	return err;
 }
 
-static int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size)
+int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size)
 {
 	void *ptr;
 	loff_t pgoff;
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index dc58254a2b69..14508ee7707a 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -6,6 +6,7 @@
 /* glibc 2.20 deprecates _BSD_SOURCE in favour of _DEFAULT_SOURCE */
 #define _DEFAULT_SOURCE 1
 
+#include <fcntl.h>
 #include <stdbool.h>
 #include <stddef.h>
 #include <stdlib.h>
@@ -35,6 +36,7 @@ bool lsdir_no_dot_filter(const char *name, struct dirent *d);
 int copyfile(const char *from, const char *to);
 int copyfile_mode(const char *from, const char *to, mode_t mode);
 int copyfile_ns(const char *from, const char *to, struct nsinfo *nsi);
+int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size);
 
 ssize_t readn(int fd, void *buf, size_t n);
 ssize_t writen(int fd, const void *buf, size_t n);
diff --git a/tools/power/cpupower/bench/parse.c b/tools/power/cpupower/bench/parse.c
index 9ba8a44ad2a7..84caee38418f 100644
--- a/tools/power/cpupower/bench/parse.c
+++ b/tools/power/cpupower/bench/parse.c
@@ -145,7 +145,7 @@ struct config *prepare_default_config()
 	config->cpu = 0;
 	config->prio = SCHED_HIGH;
 	config->verbose = 0;
-	strncpy(config->governor, "ondemand", 8);
+	strncpy(config->governor, "ondemand", sizeof(config->governor));
 
 	config->output = stdout;
 
diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c
index df43cd45d810..c3f39d5128ee 100644
--- a/tools/power/cpupower/utils/cpufreq-info.c
+++ b/tools/power/cpupower/utils/cpufreq-info.c
@@ -170,6 +170,7 @@ static int get_boost_mode(unsigned int cpu)
 	unsigned long pstates[MAX_HW_PSTATES] = {0,};
 
 	if (cpupower_cpu_info.vendor != X86_VENDOR_AMD &&
+	    cpupower_cpu_info.vendor != X86_VENDOR_HYGON &&
 	    cpupower_cpu_info.vendor != X86_VENDOR_INTEL)
 		return 0;
 
@@ -190,8 +191,9 @@ static int get_boost_mode(unsigned int cpu)
 	printf(_("    Supported: %s\n"), support ? _("yes") : _("no"));
 	printf(_("    Active: %s\n"), active ? _("yes") : _("no"));
 
-	if (cpupower_cpu_info.vendor == X86_VENDOR_AMD &&
-	    cpupower_cpu_info.family >= 0x10) {
+	if ((cpupower_cpu_info.vendor == X86_VENDOR_AMD &&
+	     cpupower_cpu_info.family >= 0x10) ||
+	     cpupower_cpu_info.vendor == X86_VENDOR_HYGON) {
 		ret = decode_pstates(cpu, cpupower_cpu_info.family, b_states,
 				     pstates, &pstate_no);
 		if (ret)
@@ -200,6 +202,8 @@ static int get_boost_mode(unsigned int cpu)
 		printf(_("    Boost States: %d\n"), b_states);
 		printf(_("    Total States: %d\n"), pstate_no);
 		for (i = 0; i < pstate_no; i++) {
+			if (!pstates[i])
+				continue;
 			if (i < b_states)
 				printf(_("    Pstate-Pb%d: %luMHz (boost state)"
 					 "\n"), i, pstates[i]);
diff --git a/tools/power/cpupower/utils/helpers/amd.c b/tools/power/cpupower/utils/helpers/amd.c
index bb41cdd0df6b..7c4f83a8c973 100644
--- a/tools/power/cpupower/utils/helpers/amd.c
+++ b/tools/power/cpupower/utils/helpers/amd.c
@@ -33,7 +33,7 @@ union msr_pstate {
 		unsigned vid:8;
 		unsigned iddval:8;
 		unsigned idddiv:2;
-		unsigned res1:30;
+		unsigned res1:31;
 		unsigned en:1;
 	} fam17h_bits;
 	unsigned long long val;
@@ -45,7 +45,7 @@ static int get_did(int family, union msr_pstate pstate)
 
 	if (family == 0x12)
 		t = pstate.val & 0xf;
-	else if (family == 0x17)
+	else if (family == 0x17 || family == 0x18)
 		t = pstate.fam17h_bits.did;
 	else
 		t = pstate.bits.did;
@@ -59,7 +59,7 @@ static int get_cof(int family, union msr_pstate pstate)
 	int fid, did, cof;
 
 	did = get_did(family, pstate);
-	if (family == 0x17) {
+	if (family == 0x17 || family == 0x18) {
 		fid = pstate.fam17h_bits.fid;
 		cof = 200 * fid / did;
 	} else {
@@ -119,6 +119,11 @@ int decode_pstates(unsigned int cpu, unsigned int cpu_family,
 		}
 		if (read_msr(cpu, MSR_AMD_PSTATE + i, &pstate.val))
 			return -1;
+		if ((cpu_family == 0x17) && (!pstate.fam17h_bits.en))
+			continue;
+		else if (!pstate.bits.en)
+			continue;
+
 		pstates[i] = get_cof(cpu_family, pstate);
 	}
 	*no = i;
diff --git a/tools/power/cpupower/utils/helpers/cpuid.c b/tools/power/cpupower/utils/helpers/cpuid.c
index 732b0b41ba26..5cc39d4e23ed 100644
--- a/tools/power/cpupower/utils/helpers/cpuid.c
+++ b/tools/power/cpupower/utils/helpers/cpuid.c
@@ -8,7 +8,7 @@
 #include "helpers/helpers.h"
 
 static const char *cpu_vendor_table[X86_VENDOR_MAX] = {
-	"Unknown", "GenuineIntel", "AuthenticAMD",
+	"Unknown", "GenuineIntel", "AuthenticAMD", "HygonGenuine",
 };
 
 #if defined(__i386__) || defined(__x86_64__)
@@ -109,6 +109,7 @@ out:
 	fclose(fp);
 	/* Get some useful CPU capabilities from cpuid */
 	if (cpu_info->vendor != X86_VENDOR_AMD &&
+	    cpu_info->vendor != X86_VENDOR_HYGON &&
 	    cpu_info->vendor != X86_VENDOR_INTEL)
 		return ret;
 
@@ -124,8 +125,9 @@ out:
 	if (cpuid_level >= 6 && (cpuid_ecx(6) & 0x1))
 		cpu_info->caps |= CPUPOWER_CAP_APERF;
 
-	/* AMD Boost state enable/disable register */
-	if (cpu_info->vendor == X86_VENDOR_AMD) {
+	/* AMD or Hygon Boost state enable/disable register */
+	if (cpu_info->vendor == X86_VENDOR_AMD ||
+	    cpu_info->vendor == X86_VENDOR_HYGON) {
 		if (ext_cpuid_level >= 0x80000007 &&
 		    (cpuid_edx(0x80000007) & (1 << 9)))
 			cpu_info->caps |= CPUPOWER_CAP_AMD_CBP;
diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h
index 41da392be448..902139689315 100644
--- a/tools/power/cpupower/utils/helpers/helpers.h
+++ b/tools/power/cpupower/utils/helpers/helpers.h
@@ -61,7 +61,7 @@ extern int be_verbose;
 
 /* cpuid and cpuinfo helpers  **************************/
 enum cpupower_cpu_vendor {X86_VENDOR_UNKNOWN = 0, X86_VENDOR_INTEL,
-			  X86_VENDOR_AMD, X86_VENDOR_MAX};
+			  X86_VENDOR_AMD, X86_VENDOR_HYGON, X86_VENDOR_MAX};
 
 #define CPUPOWER_CAP_INV_TSC		0x00000001
 #define CPUPOWER_CAP_APERF		0x00000002
diff --git a/tools/power/cpupower/utils/helpers/misc.c b/tools/power/cpupower/utils/helpers/misc.c
index 80fdf55f414d..f406adc40bad 100644
--- a/tools/power/cpupower/utils/helpers/misc.c
+++ b/tools/power/cpupower/utils/helpers/misc.c
@@ -26,7 +26,7 @@ int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active,
 		 * has Hardware determined variable increments instead.
 		 */
 
-		if (cpu_info.family == 0x17) {
+		if (cpu_info.family == 0x17 || cpu_info.family == 0x18) {
 			if (!read_msr(cpu, MSR_AMD_HWCR, &val)) {
 				if (!(val & CPUPOWER_AMD_CPBDIS))
 					*active = 1;
diff --git a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c
index d7c2a6d13dea..f2a7e9cfd577 100644
--- a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c
+++ b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c
@@ -241,7 +241,8 @@ static int init_maxfreq_mode(void)
 	if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_INV_TSC))
 		goto use_sysfs;
 
-	if (cpupower_cpu_info.vendor == X86_VENDOR_AMD) {
+	if (cpupower_cpu_info.vendor == X86_VENDOR_AMD ||
+	    cpupower_cpu_info.vendor == X86_VENDOR_HYGON) {
 		/* MSR_AMD_HWCR tells us whether TSC runs at P0/mperf
 		 * freq.
 		 * A test whether hwcr is accessable/available would be:
diff --git a/tools/power/pm-graph/Makefile b/tools/power/pm-graph/Makefile
index c1899cd72c80..845541544570 100644
--- a/tools/power/pm-graph/Makefile
+++ b/tools/power/pm-graph/Makefile
@@ -23,8 +23,8 @@ install : uninstall
 	install -m 644 config/suspend-x2-proc.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config
 
 	install -d  $(DESTDIR)$(PREFIX)/bin
-	ln -s $(DESTDIR)$(PREFIX)/lib/pm-graph/bootgraph.py $(DESTDIR)$(PREFIX)/bin/bootgraph
-	ln -s $(DESTDIR)$(PREFIX)/lib/pm-graph/sleepgraph.py $(DESTDIR)$(PREFIX)/bin/sleepgraph
+	ln -s ../lib/pm-graph/bootgraph.py $(DESTDIR)$(PREFIX)/bin/bootgraph
+	ln -s ../lib/pm-graph/sleepgraph.py $(DESTDIR)$(PREFIX)/bin/sleepgraph
 
 	install -d  $(DESTDIR)$(PREFIX)/share/man/man8
 	install bootgraph.8 $(DESTDIR)$(PREFIX)/share/man/man8
diff --git a/tools/power/pm-graph/bootgraph.py b/tools/power/pm-graph/bootgraph.py
index 8ee626c0f6a5..6dae57041537 100755
--- a/tools/power/pm-graph/bootgraph.py
+++ b/tools/power/pm-graph/bootgraph.py
@@ -34,6 +34,10 @@ from datetime import datetime, timedelta
 from subprocess import call, Popen, PIPE
 import sleepgraph as aslib
 
+def pprint(msg):
+	print(msg)
+	sys.stdout.flush()
+
 # ----------------- CLASSES --------------------
 
 # Class: SystemValues
@@ -157,11 +161,11 @@ class SystemValues(aslib.SystemValues):
 		return cmdline
 	def manualRebootRequired(self):
 		cmdline = self.kernelParams()
-		print 'To generate a new timeline manually, follow these steps:\n'
-		print '1. Add the CMDLINE string to your kernel command line.'
-		print '2. Reboot the system.'
-		print '3. After reboot, re-run this tool with the same arguments but no command (w/o -reboot or -manual).\n'
-		print 'CMDLINE="%s"' % cmdline
+		pprint('To generate a new timeline manually, follow these steps:\n\n'\
+		'1. Add the CMDLINE string to your kernel command line.\n'\
+		'2. Reboot the system.\n'\
+		'3. After reboot, re-run this tool with the same arguments but no command (w/o -reboot or -manual).\n\n'\
+		'CMDLINE="%s"' % cmdline)
 		sys.exit()
 	def blGrub(self):
 		blcmd = ''
@@ -431,7 +435,7 @@ def parseTraceLog(data):
 			if len(cg.list) < 1 or cg.invalid or (cg.end - cg.start == 0):
 				continue
 			if(not cg.postProcess()):
-				print('Sanity check failed for %s-%d' % (proc, pid))
+				pprint('Sanity check failed for %s-%d' % (proc, pid))
 				continue
 			# match cg data to devices
 			devname = data.deviceMatch(pid, cg)
@@ -442,8 +446,8 @@ def parseTraceLog(data):
 				sysvals.vprint('%s callgraph found for %s %s-%d [%f - %f]' %\
 					(kind, cg.name, proc, pid, cg.start, cg.end))
 			elif len(cg.list) > 1000000:
-				print 'WARNING: the callgraph found for %s is massive! (%d lines)' %\
-					(devname, len(cg.list))
+				pprint('WARNING: the callgraph found for %s is massive! (%d lines)' %\
+					(devname, len(cg.list)))
 
 # Function: retrieveLogs
 # Description:
@@ -528,7 +532,7 @@ def createBootGraph(data):
 	tMax = data.end
 	tTotal = tMax - t0
 	if(tTotal == 0):
-		print('ERROR: No timeline data')
+		pprint('ERROR: No timeline data')
 		return False
 	user_mode = '%.0f'%(data.tUserMode*1000)
 	last_init = '%.0f'%(tTotal*1000)
@@ -734,7 +738,7 @@ def updateCron(restore=False):
 		op.close()
 		res = call([cmd, cronfile])
 	except Exception, e:
-		print 'Exception: %s' % str(e)
+		pprint('Exception: %s' % str(e))
 		shutil.move(backfile, cronfile)
 		res = -1
 	if res != 0:
@@ -750,7 +754,7 @@ def updateGrub(restore=False):
 			call(sysvals.blexec, stderr=PIPE, stdout=PIPE,
 				env={'PATH': '.:/sbin:/usr/sbin:/usr/bin:/sbin:/bin'})
 		except Exception, e:
-			print 'Exception: %s\n' % str(e)
+			pprint('Exception: %s\n' % str(e))
 		return
 	# extract the option and create a grub config without it
 	sysvals.rootUser(True)
@@ -797,7 +801,7 @@ def updateGrub(restore=False):
 		res = call(sysvals.blexec)
 		os.remove(grubfile)
 	except Exception, e:
-		print 'Exception: %s' % str(e)
+		pprint('Exception: %s' % str(e))
 		res = -1
 	# cleanup
 	shutil.move(tempfile, grubfile)
@@ -821,7 +825,7 @@ def updateKernelParams(restore=False):
 def doError(msg, help=False):
 	if help == True:
 		printHelp()
-	print 'ERROR: %s\n' % msg
+	pprint('ERROR: %s\n' % msg)
 	sysvals.outputResult({'error':msg})
 	sys.exit()
 
@@ -829,52 +833,51 @@ def doError(msg, help=False):
 # Description:
 #	 print out the help text
 def printHelp():
-	print('')
-	print('%s v%s' % (sysvals.title, sysvals.version))
-	print('Usage: bootgraph <options> <command>')
-	print('')
-	print('Description:')
-	print('  This tool reads in a dmesg log of linux kernel boot and')
-	print('  creates an html representation of the boot timeline up to')
-	print('  the start of the init process.')
-	print('')
-	print('  If no specific command is given the tool reads the current dmesg')
-	print('  and/or ftrace log and creates a timeline')
-	print('')
-	print('  Generates output files in subdirectory: boot-yymmdd-HHMMSS')
-	print('   HTML output:                    <hostname>_boot.html')
-	print('   raw dmesg output:               <hostname>_boot_dmesg.txt')
-	print('   raw ftrace output:              <hostname>_boot_ftrace.txt')
-	print('')
-	print('Options:')
-	print('  -h            Print this help text')
-	print('  -v            Print the current tool version')
-	print('  -verbose      Print extra information during execution and analysis')
-	print('  -addlogs      Add the dmesg log to the html output')
-	print('  -result fn    Export a results table to a text file for parsing.')
-	print('  -o name       Overrides the output subdirectory name when running a new test')
-	print('                default: boot-{date}-{time}')
-	print(' [advanced]')
-	print('  -fstat        Use ftrace to add function detail and statistics (default: disabled)')
-	print('  -f/-callgraph Add callgraph detail, can be very large (default: disabled)')
-	print('  -maxdepth N   limit the callgraph data to N call levels (default: 2)')
-	print('  -mincg ms     Discard all callgraphs shorter than ms milliseconds (e.g. 0.001 for us)')
-	print('  -timeprec N   Number of significant digits in timestamps (0:S, 3:ms, [6:us])')
-	print('  -expandcg     pre-expand the callgraph data in the html output (default: disabled)')
-	print('  -func list    Limit ftrace to comma-delimited list of functions (default: do_one_initcall)')
-	print('  -cgfilter S   Filter the callgraph output in the timeline')
-	print('  -cgskip file  Callgraph functions to skip, off to disable (default: cgskip.txt)')
-	print('  -bl name      Use the following boot loader for kernel params (default: grub)')
-	print('  -reboot       Reboot the machine automatically and generate a new timeline')
-	print('  -manual       Show the steps to generate a new timeline manually (used with -reboot)')
-	print('')
-	print('Other commands:')
-	print('  -flistall     Print all functions capable of being captured in ftrace')
-	print('  -sysinfo      Print out system info extracted from BIOS')
-	print(' [redo]')
-	print('  -dmesg file   Create HTML output using dmesg input (used with -ftrace)')
-	print('  -ftrace file  Create HTML output using ftrace input (used with -dmesg)')
-	print('')
+	pprint('\n%s v%s\n'\
+	'Usage: bootgraph <options> <command>\n'\
+	'\n'\
+	'Description:\n'\
+	'  This tool reads in a dmesg log of linux kernel boot and\n'\
+	'  creates an html representation of the boot timeline up to\n'\
+	'  the start of the init process.\n'\
+	'\n'\
+	'  If no specific command is given the tool reads the current dmesg\n'\
+	'  and/or ftrace log and creates a timeline\n'\
+	'\n'\
+	'  Generates output files in subdirectory: boot-yymmdd-HHMMSS\n'\
+	'   HTML output:                    <hostname>_boot.html\n'\
+	'   raw dmesg output:               <hostname>_boot_dmesg.txt\n'\
+	'   raw ftrace output:              <hostname>_boot_ftrace.txt\n'\
+	'\n'\
+	'Options:\n'\
+	'  -h            Print this help text\n'\
+	'  -v            Print the current tool version\n'\
+	'  -verbose      Print extra information during execution and analysis\n'\
+	'  -addlogs      Add the dmesg log to the html output\n'\
+	'  -result fn    Export a results table to a text file for parsing.\n'\
+	'  -o name       Overrides the output subdirectory name when running a new test\n'\
+	'                default: boot-{date}-{time}\n'\
+	' [advanced]\n'\
+	'  -fstat        Use ftrace to add function detail and statistics (default: disabled)\n'\
+	'  -f/-callgraph Add callgraph detail, can be very large (default: disabled)\n'\
+	'  -maxdepth N   limit the callgraph data to N call levels (default: 2)\n'\
+	'  -mincg ms     Discard all callgraphs shorter than ms milliseconds (e.g. 0.001 for us)\n'\
+	'  -timeprec N   Number of significant digits in timestamps (0:S, 3:ms, [6:us])\n'\
+	'  -expandcg     pre-expand the callgraph data in the html output (default: disabled)\n'\
+	'  -func list    Limit ftrace to comma-delimited list of functions (default: do_one_initcall)\n'\
+	'  -cgfilter S   Filter the callgraph output in the timeline\n'\
+	'  -cgskip file  Callgraph functions to skip, off to disable (default: cgskip.txt)\n'\
+	'  -bl name      Use the following boot loader for kernel params (default: grub)\n'\
+	'  -reboot       Reboot the machine automatically and generate a new timeline\n'\
+	'  -manual       Show the steps to generate a new timeline manually (used with -reboot)\n'\
+	'\n'\
+	'Other commands:\n'\
+	'  -flistall     Print all functions capable of being captured in ftrace\n'\
+	'  -sysinfo      Print out system info extracted from BIOS\n'\
+	' [redo]\n'\
+	'  -dmesg file   Create HTML output using dmesg input (used with -ftrace)\n'\
+	'  -ftrace file  Create HTML output using ftrace input (used with -dmesg)\n'\
+	'' % (sysvals.title, sysvals.version))
 	return True
 
 # ----------------- MAIN --------------------
@@ -895,7 +898,7 @@ if __name__ == '__main__':
 			printHelp()
 			sys.exit()
 		elif(arg == '-v'):
-			print("Version %s" % sysvals.version)
+			pprint("Version %s" % sysvals.version)
 			sys.exit()
 		elif(arg == '-verbose'):
 			sysvals.verbose = True
@@ -1016,7 +1019,7 @@ if __name__ == '__main__':
 				print f
 		elif cmd == 'checkbl':
 			sysvals.getBootLoader()
-			print 'Boot Loader: %s\n%s' % (sysvals.bootloader, sysvals.blexec)
+			pprint('Boot Loader: %s\n%s' % (sysvals.bootloader, sysvals.blexec))
 		elif(cmd == 'sysinfo'):
 			sysvals.printSystemInfo(True)
 		sys.exit()
diff --git a/tools/power/pm-graph/config/cgskip.txt b/tools/power/pm-graph/config/cgskip.txt
index e48d588fbfb4..9ff88e7e2300 100644
--- a/tools/power/pm-graph/config/cgskip.txt
+++ b/tools/power/pm-graph/config/cgskip.txt
@@ -27,6 +27,7 @@ ktime_get
 # console calls
 printk
 dev_printk
+__dev_printk
 console_unlock
 
 # memory handling
diff --git a/tools/power/pm-graph/config/custom-timeline-functions.cfg b/tools/power/pm-graph/config/custom-timeline-functions.cfg
index f8fcb06fd68b..4f80ad7d7275 100644
--- a/tools/power/pm-graph/config/custom-timeline-functions.cfg
+++ b/tools/power/pm-graph/config/custom-timeline-functions.cfg
@@ -105,7 +105,7 @@ override-dev-timeline-functions: true
 #       example: [color=#CC00CC]
 #
 #   arglist: A list of arguments from registers/stack addresses. See URL:
-#            https://www.kernel.org/doc/Documentation/trace/kprobetrace.rst
+#            https://www.kernel.org/doc/Documentation/trace/kprobetrace.txt
 #
 #       example: cpu=%di:s32
 #
@@ -170,7 +170,7 @@ pm_restore_console:
 #       example: [color=#CC00CC]
 #
 #   arglist: A list of arguments from registers/stack addresses. See URL:
-#            https://www.kernel.org/doc/Documentation/trace/kprobetrace.rst
+#            https://www.kernel.org/doc/Documentation/trace/kprobetrace.txt
 #
 #       example: port=+36(%di):s32
 #
diff --git a/tools/power/pm-graph/sleepgraph.8 b/tools/power/pm-graph/sleepgraph.8
index 070be2cf7f74..24a2e7d0ae63 100644
--- a/tools/power/pm-graph/sleepgraph.8
+++ b/tools/power/pm-graph/sleepgraph.8
@@ -65,9 +65,9 @@ During test, enable/disable runtime suspend for all devices. The test is delayed
 by 5 seconds to allow runtime suspend changes to occur. The settings are restored
 after the test is complete.
 .TP
-\fB-display \fIon/off\fR
-Turn the display on or off for the test using the xset command. This helps
-maintain the consistecy of test data for better comparison.
+\fB-display \fIon/off/standby/suspend\fR
+Switch the display to the requested mode for the test using the xset command.
+This helps maintain the consistency of test data for better comparison.
 .TP
 \fB-skiphtml\fR
 Run the test and capture the trace logs, but skip the timeline generation.
@@ -183,6 +183,13 @@ Print out the contents of the ACPI Firmware Performance Data Table.
 \fB-battery\fR
 Print out battery status and current charge.
 .TP
+\fB-xon/-xoff/-xstandby/-xsuspend\fR
+Test xset by attempting to switch the display to the given mode. This
+is the same command which will be issued by \fB-display \fImode\fR.
+.TP
+\fB-xstat\fR
+Get the current DPMS display mode.
+.TP
 \fB-sysinfo\fR
 Print out system info extracted from BIOS. Reads /dev/mem directly instead of going through dmidecode.
 .TP
diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py
index 0c760478f7d7..52618f3444d4 100755
--- a/tools/power/pm-graph/sleepgraph.py
+++ b/tools/power/pm-graph/sleepgraph.py
@@ -54,6 +54,7 @@ import os
 import string
 import re
 import platform
+import signal
 from datetime import datetime
 import struct
 import ConfigParser
@@ -61,6 +62,10 @@ import gzip
 from threading import Thread
 from subprocess import call, Popen, PIPE
 
+def pprint(msg):
+	print(msg)
+	sys.stdout.flush()
+
 # ----------------- CLASSES --------------------
 
 # Class: SystemValues
@@ -69,10 +74,10 @@ from subprocess import call, Popen, PIPE
 #	 store system values and test parameters
 class SystemValues:
 	title = 'SleepGraph'
-	version = '5.1'
+	version = '5.2'
 	ansi = False
 	rs = 0
-	display = 0
+	display = ''
 	gzip = False
 	sync = False
 	verbose = False
@@ -99,6 +104,7 @@ class SystemValues:
 	tpath = '/sys/kernel/debug/tracing/'
 	fpdtpath = '/sys/firmware/acpi/tables/FPDT'
 	epath = '/sys/kernel/debug/tracing/events/power/'
+	pmdpath = '/sys/power/pm_debug_messages'
 	traceevents = [
 		'suspend_resume',
 		'device_pm_callback_end',
@@ -109,8 +115,10 @@ class SystemValues:
 	mempath = '/dev/mem'
 	powerfile = '/sys/power/state'
 	mempowerfile = '/sys/power/mem_sleep'
+	diskpowerfile = '/sys/power/disk'
 	suspendmode = 'mem'
 	memmode = ''
+	diskmode = ''
 	hostname = 'localhost'
 	prefix = 'test'
 	teststamp = ''
@@ -137,16 +145,15 @@ class SystemValues:
 	useprocmon = False
 	notestrun = False
 	cgdump = False
+	devdump = False
 	mixedphaseheight = True
 	devprops = dict()
 	predelay = 0
 	postdelay = 0
-	procexecfmt = 'ps - (?P<ps>.*)$'
-	devpropfmt = '# Device Properties: .*'
-	tracertypefmt = '# tracer: (?P<t>.*)'
-	firmwarefmt = '# fwsuspend (?P<s>[0-9]*) fwresume (?P<r>[0-9]*)$'
+	pmdebug = ''
 	tracefuncs = {
 		'sys_sync': {},
+		'ksys_sync': {},
 		'__pm_notifier_call_chain': {},
 		'pm_prepare_console': {},
 		'pm_notifier_call_chain': {},
@@ -187,7 +194,6 @@ class SystemValues:
 	dev_tracefuncs = {
 		# general wait/delay/sleep
 		'msleep': { 'args_x86_64': {'time':'%di:s32'}, 'ub': 1 },
-		'schedule_timeout_uninterruptible': { 'args_x86_64': {'timeout':'%di:s32'}, 'ub': 1 },
 		'schedule_timeout': { 'args_x86_64': {'timeout':'%di:s32'}, 'ub': 1 },
 		'udelay': { 'func':'__const_udelay', 'args_x86_64': {'loops':'%di:s32'}, 'ub': 1 },
 		'usleep_range': { 'args_x86_64': {'min':'%di:s32', 'max':'%si:s32'}, 'ub': 1 },
@@ -199,6 +205,9 @@ class SystemValues:
 		# filesystem
 		'ext4_sync_fs': {},
 		# 80211
+		'ath10k_bmi_read_memory': { 'args_x86_64': {'length':'%cx:s32'} },
+		'ath10k_bmi_write_memory': { 'args_x86_64': {'length':'%cx:s32'} },
+		'ath10k_bmi_fast_download': { 'args_x86_64': {'length':'%cx:s32'} },
 		'iwlagn_mac_start': {},
 		'iwlagn_alloc_bcast_station': {},
 		'iwl_trans_pcie_start_hw': {},
@@ -241,6 +250,7 @@ class SystemValues:
 	timeformat = '%.3f'
 	cmdline = '%s %s' % \
 			(os.path.basename(sys.argv[0]), ' '.join(sys.argv[1:]))
+	sudouser = ''
 	def __init__(self):
 		self.archargs = 'args_'+platform.machine()
 		self.hostname = platform.node()
@@ -256,27 +266,49 @@ class SystemValues:
 		if (hasattr(sys.stdout, 'isatty') and sys.stdout.isatty()):
 			self.ansi = True
 		self.testdir = datetime.now().strftime('suspend-%y%m%d-%H%M%S')
+		if os.getuid() == 0 and 'SUDO_USER' in os.environ and \
+			os.environ['SUDO_USER']:
+			self.sudouser = os.environ['SUDO_USER']
 	def vprint(self, msg):
 		self.logmsg += msg+'\n'
-		if(self.verbose):
-			print(msg)
+		if self.verbose or msg.startswith('WARNING:'):
+			pprint(msg)
+	def signalHandler(self, signum, frame):
+		if not self.result:
+			return
+		signame = self.signames[signum] if signum in self.signames else 'UNKNOWN'
+		msg = 'Signal %s caused a tool exit, line %d' % (signame, frame.f_lineno)
+		sysvals.outputResult({'error':msg})
+		sys.exit(3)
+	def signalHandlerInit(self):
+		capture = ['BUS', 'SYS', 'XCPU', 'XFSZ', 'PWR', 'HUP', 'INT', 'QUIT',
+			'ILL', 'ABRT', 'FPE', 'SEGV', 'TERM', 'TSTP']
+		self.signames = dict()
+		for i in capture:
+			s = 'SIG'+i
+			try:
+				signum = getattr(signal, s)
+				signal.signal(signum, self.signalHandler)
+			except:
+				continue
+			self.signames[signum] = s
 	def rootCheck(self, fatal=True):
 		if(os.access(self.powerfile, os.W_OK)):
 			return True
 		if fatal:
 			msg = 'This command requires sysfs mount and root access'
-			print('ERROR: %s\n') % msg
+			pprint('ERROR: %s\n' % msg)
 			self.outputResult({'error':msg})
-			sys.exit()
+			sys.exit(1)
 		return False
 	def rootUser(self, fatal=False):
 		if 'USER' in os.environ and os.environ['USER'] == 'root':
 			return True
 		if fatal:
 			msg = 'This command must be run as root'
-			print('ERROR: %s\n') % msg
+			pprint('ERROR: %s\n' % msg)
 			self.outputResult({'error':msg})
-			sys.exit()
+			sys.exit(1)
 		return False
 	def getExec(self, cmd):
 		dirlist = ['/sbin', '/bin', '/usr/sbin', '/usr/bin',
@@ -406,8 +438,8 @@ class SystemValues:
 				ktime = m.group('ktime')
 		fp.close()
 		self.dmesgstart = float(ktime)
-	def getdmesg(self, fwdata=[]):
-		op = self.writeDatafileHeader(sysvals.dmesgfile, fwdata)
+	def getdmesg(self, testdata):
+		op = self.writeDatafileHeader(sysvals.dmesgfile, testdata)
 		# store all new dmesg lines since initdmesg was called
 		fp = Popen('dmesg', stdout=PIPE).stdout
 		for line in fp:
@@ -535,7 +567,7 @@ class SystemValues:
 		if len(self.kprobes) < 1:
 			return
 		if output:
-			print('    kprobe functions in this kernel:')
+			pprint('    kprobe functions in this kernel:')
 		# first test each kprobe
 		rejects = []
 		# sort kprobes: trace, ub-dev, custom, dev
@@ -557,7 +589,7 @@ class SystemValues:
 				else:
 					kpl[2].append(name)
 			if output:
-				print('         %s: %s' % (name, res))
+				pprint('         %s: %s' % (name, res))
 		kplist = kpl[0] + kpl[1] + kpl[2] + kpl[3]
 		# remove all failed ones from the list
 		for name in rejects:
@@ -571,7 +603,7 @@ class SystemValues:
 		if output:
 			check = self.fgetVal('kprobe_events')
 			linesack = (len(check.split('\n')) - 1) / 2
-			print('    kprobe functions enabled: %d/%d' % (linesack, linesout))
+			pprint('    kprobe functions enabled: %d/%d' % (linesack, linesout))
 		self.fsetVal('1', 'events/kprobes/enable')
 	def testKprobe(self, kname, kprobe):
 		self.fsetVal('0', 'events/kprobes/enable')
@@ -619,6 +651,8 @@ class SystemValues:
 			self.fsetVal('0', 'events/kprobes/enable')
 			self.fsetVal('', 'kprobe_events')
 			self.fsetVal('1024', 'buffer_size_kb')
+		if self.pmdebug:
+			self.setVal(self.pmdebug, self.pmdpath)
 	def setupAllKprobes(self):
 		for name in self.tracefuncs:
 			self.defaultKprobe(name, self.tracefuncs[name])
@@ -637,10 +671,15 @@ class SystemValues:
 		return False
 	def initFtrace(self):
 		self.printSystemInfo(False)
-		print('INITIALIZING FTRACE...')
+		pprint('INITIALIZING FTRACE...')
 		# turn trace off
 		self.fsetVal('0', 'tracing_on')
 		self.cleanupFtrace()
+		# pm debug messages
+		pv = self.getVal(self.pmdpath)
+		if pv != '1':
+			self.setVal('1', self.pmdpath)
+			self.pmdebug = pv
 		# set the trace clock to global
 		self.fsetVal('global', 'trace_clock')
 		self.fsetVal('nop', 'current_tracer')
@@ -649,7 +688,8 @@ class SystemValues:
 		if self.bufsize > 0:
 			tgtsize = self.bufsize
 		elif self.usecallgraph or self.usedevsrc:
-			tgtsize = min(self.memfree, 3*1024*1024)
+			bmax = (1*1024*1024) if self.suspendmode == 'disk' else (3*1024*1024)
+			tgtsize = min(self.memfree, bmax)
 		else:
 			tgtsize = 65536
 		while not self.fsetVal('%d' % (tgtsize / cpus), 'buffer_size_kb'):
@@ -658,7 +698,7 @@ class SystemValues:
 			if tgtsize < 65536:
 				tgtsize = int(self.fgetVal('buffer_size_kb')) * cpus
 				break
-		print 'Setting trace buffers to %d kB (%d kB per cpu)' % (tgtsize, tgtsize/cpus)
+		pprint('Setting trace buffers to %d kB (%d kB per cpu)' % (tgtsize, tgtsize/cpus))
 		# initialize the callgraph trace
 		if(self.usecallgraph):
 			# set trace type
@@ -691,7 +731,7 @@ class SystemValues:
 			if self.usedevsrc:
 				for name in self.dev_tracefuncs:
 					self.defaultKprobe(name, self.dev_tracefuncs[name])
-			print('INITIALIZING KPROBES...')
+			pprint('INITIALIZING KPROBES...')
 			self.addKprobes(self.verbose)
 		if(self.usetraceevents):
 			# turn trace events on
@@ -728,19 +768,24 @@ class SystemValues:
 		if not self.ansi:
 			return str
 		return '\x1B[%d;40m%s\x1B[m' % (color, str)
-	def writeDatafileHeader(self, filename, fwdata=[]):
+	def writeDatafileHeader(self, filename, testdata):
 		fp = self.openlog(filename, 'w')
 		fp.write('%s\n%s\n# command | %s\n' % (self.teststamp, self.sysstamp, self.cmdline))
-		if(self.suspendmode == 'mem' or self.suspendmode == 'command'):
-			for fw in fwdata:
+		for test in testdata:
+			if 'fw' in test:
+				fw = test['fw']
 				if(fw):
 					fp.write('# fwsuspend %u fwresume %u\n' % (fw[0], fw[1]))
+			if 'bat' in test:
+				(a1, c1), (a2, c2) = test['bat']
+				fp.write('# battery %s %d %s %d\n' % (a1, c1, a2, c2))
+			if test['error'] or len(testdata) > 1:
+				fp.write('# enter_sleep_error %s\n' % test['error'])
 		return fp
-	def sudouser(self, dir):
-		if os.path.exists(dir) and os.getuid() == 0 and \
-			'SUDO_USER' in os.environ:
+	def sudoUserchown(self, dir):
+		if os.path.exists(dir) and self.sudouser:
 			cmd = 'chown -R {0}:{0} {1} > /dev/null 2>&1'
-			call(cmd.format(os.environ['SUDO_USER'], dir), shell=True)
+			call(cmd.format(self.sudouser, dir), shell=True)
 	def outputResult(self, testdata, num=0):
 		if not self.result:
 			return
@@ -762,7 +807,7 @@ class SystemValues:
 		if 'bugurl' in testdata:
 			fp.write('url%s: %s\n' % (n, testdata['bugurl']))
 		fp.close()
-		self.sudouser(self.result)
+		self.sudoUserchown(self.result)
 	def configFile(self, file):
 		dir = os.path.dirname(os.path.realpath(__file__))
 		if os.path.exists(file):
@@ -800,15 +845,16 @@ suspendmodename = {
 #	 Simple class which holds property values collected
 #	 for all the devices used in the timeline.
 class DevProps:
-	syspath = ''
-	altname = ''
-	async = True
-	xtraclass = ''
-	xtrainfo = ''
+	def __init__(self):
+		self.syspath = ''
+		self.altname = ''
+		self.async = True
+		self.xtraclass = ''
+		self.xtrainfo = ''
 	def out(self, dev):
 		return '%s,%s,%d;' % (dev, self.altname, self.async)
 	def debug(self, dev):
-		print '%s:\n\taltname = %s\n\t  async = %s' % (dev, self.altname, self.async)
+		pprint('%s:\n\taltname = %s\n\t  async = %s' % (dev, self.altname, self.async))
 	def altName(self, dev):
 		if not self.altname or self.altname == dev:
 			return dev
@@ -831,9 +877,6 @@ class DevProps:
 #	 A container used to create a device hierachy, with a single root node
 #	 and a tree of child nodes. Used by Data.deviceTopology()
 class DeviceNode:
-	name = ''
-	children = 0
-	depth = 0
 	def __init__(self, nodename, nodedepth):
 		self.name = nodename
 		self.children = []
@@ -861,71 +904,78 @@ class DeviceNode:
 #	}
 #
 class Data:
-	dmesg = {}  # root data structure
-	phases = [] # ordered list of phases
-	start = 0.0 # test start
-	end = 0.0   # test end
-	tSuspended = 0.0 # low-level suspend start
-	tResumed = 0.0   # low-level resume start
-	tKernSus = 0.0   # kernel level suspend start
-	tKernRes = 0.0   # kernel level resume end
-	tLow = 0.0       # time spent in low-level suspend (standby/freeze)
-	fwValid = False  # is firmware data available
-	fwSuspend = 0    # time spent in firmware suspend
-	fwResume = 0     # time spent in firmware resume
-	dmesgtext = []   # dmesg text file in memory
-	pstl = 0         # process timeline
-	testnumber = 0
-	idstr = ''
-	html_device_id = 0
-	stamp = 0
-	outfile = ''
-	devpids = []
-	kerror = False
+	phasedef = {
+		'suspend_prepare': {'order': 0, 'color': '#CCFFCC'},
+		        'suspend': {'order': 1, 'color': '#88FF88'},
+		   'suspend_late': {'order': 2, 'color': '#00AA00'},
+		  'suspend_noirq': {'order': 3, 'color': '#008888'},
+		'suspend_machine': {'order': 4, 'color': '#0000FF'},
+		 'resume_machine': {'order': 5, 'color': '#FF0000'},
+		   'resume_noirq': {'order': 6, 'color': '#FF9900'},
+		   'resume_early': {'order': 7, 'color': '#FFCC00'},
+		         'resume': {'order': 8, 'color': '#FFFF88'},
+		'resume_complete': {'order': 9, 'color': '#FFFFCC'},
+	}
+	errlist = {
+		'HWERROR' : '.*\[ *Hardware Error *\].*',
+		'FWBUG'   : '.*\[ *Firmware Bug *\].*',
+		'BUG'     : '.*BUG.*',
+		'ERROR'   : '.*ERROR.*',
+		'WARNING' : '.*WARNING.*',
+		'IRQ'     : '.*genirq: .*',
+		'TASKFAIL': '.*Freezing of tasks failed.*',
+	}
 	def __init__(self, num):
 		idchar = 'abcdefghij'
-		self.pstl = dict()
+		self.start = 0.0 # test start
+		self.end = 0.0   # test end
+		self.tSuspended = 0.0 # low-level suspend start
+		self.tResumed = 0.0   # low-level resume start
+		self.tKernSus = 0.0   # kernel level suspend start
+		self.tKernRes = 0.0   # kernel level resume end
+		self.fwValid = False  # is firmware data available
+		self.fwSuspend = 0    # time spent in firmware suspend
+		self.fwResume = 0     # time spent in firmware resume
+		self.html_device_id = 0
+		self.stamp = 0
+		self.outfile = ''
+		self.kerror = False
+		self.battery = 0
+		self.enterfail = ''
+		self.currphase = ''
+		self.pstl = dict()    # process timeline
 		self.testnumber = num
 		self.idstr = idchar[num]
-		self.dmesgtext = []
-		self.phases = []
-		self.dmesg = { # fixed list of 10 phases
-			'suspend_prepare': {'list': dict(), 'start': -1.0, 'end': -1.0,
-								'row': 0, 'color': '#CCFFCC', 'order': 0},
-			        'suspend': {'list': dict(), 'start': -1.0, 'end': -1.0,
-								'row': 0, 'color': '#88FF88', 'order': 1},
-			   'suspend_late': {'list': dict(), 'start': -1.0, 'end': -1.0,
-								'row': 0, 'color': '#00AA00', 'order': 2},
-			  'suspend_noirq': {'list': dict(), 'start': -1.0, 'end': -1.0,
-								'row': 0, 'color': '#008888', 'order': 3},
-		    'suspend_machine': {'list': dict(), 'start': -1.0, 'end': -1.0,
-								'row': 0, 'color': '#0000FF', 'order': 4},
-			 'resume_machine': {'list': dict(), 'start': -1.0, 'end': -1.0,
-								'row': 0, 'color': '#FF0000', 'order': 5},
-			   'resume_noirq': {'list': dict(), 'start': -1.0, 'end': -1.0,
-								'row': 0, 'color': '#FF9900', 'order': 6},
-			   'resume_early': {'list': dict(), 'start': -1.0, 'end': -1.0,
-								'row': 0, 'color': '#FFCC00', 'order': 7},
-			         'resume': {'list': dict(), 'start': -1.0, 'end': -1.0,
-								'row': 0, 'color': '#FFFF88', 'order': 8},
-			'resume_complete': {'list': dict(), 'start': -1.0, 'end': -1.0,
-								'row': 0, 'color': '#FFFFCC', 'order': 9}
-		}
-		self.phases = self.sortedPhases()
+		self.dmesgtext = []   # dmesg text file in memory
+		self.dmesg = dict()   # root data structure
+		self.errorinfo = {'suspend':[],'resume':[]}
+		self.tLow = []        # time spent in low-level suspends (standby/freeze)
+		self.devpids = []
+		self.devicegroups = 0
+	def sortedPhases(self):
+		return sorted(self.dmesg, key=lambda k:self.dmesg[k]['order'])
+	def initDevicegroups(self):
+		# called when phases are all finished being added
+		for phase in self.dmesg.keys():
+			if '*' in phase:
+				p = phase.split('*')
+				pnew = '%s%d' % (p[0], len(p))
+				self.dmesg[pnew] = self.dmesg.pop(phase)
 		self.devicegroups = []
-		for phase in self.phases:
+		for phase in self.sortedPhases():
 			self.devicegroups.append([phase])
-		self.errorinfo = {'suspend':[],'resume':[]}
+	def nextPhase(self, phase, offset):
+		order = self.dmesg[phase]['order'] + offset
+		for p in self.dmesg:
+			if self.dmesg[p]['order'] == order:
+				return p
+		return ''
+	def lastPhase(self):
+		plist = self.sortedPhases()
+		if len(plist) < 1:
+			return ''
+		return plist[-1]
 	def extractErrorInfo(self):
-		elist = {
-			'HWERROR' : '.*\[ *Hardware Error *\].*',
-			'FWBUG'   : '.*\[ *Firmware Bug *\].*',
-			'BUG'     : '.*BUG.*',
-			'ERROR'   : '.*ERROR.*',
-			'WARNING' : '.*WARNING.*',
-			'IRQ'     : '.*genirq: .*',
-			'TASKFAIL': '.*Freezing of tasks failed.*',
-		}
 		lf = sysvals.openlog(sysvals.dmesgfile, 'r')
 		i = 0
 		list = []
@@ -939,8 +989,8 @@ class Data:
 				continue
 			dir = 'suspend' if t < self.tSuspended else 'resume'
 			msg = m.group('msg')
-			for err in elist:
-				if re.match(elist[err], msg):
+			for err in self.errlist:
+				if re.match(self.errlist[err], msg):
 					list.append((err, dir, t, i, i))
 					self.kerror = True
 					break
@@ -956,7 +1006,7 @@ class Data:
 	def setEnd(self, time):
 		self.end = time
 	def isTraceEventOutsideDeviceCalls(self, pid, time):
-		for phase in self.phases:
+		for phase in self.sortedPhases():
 			list = self.dmesg[phase]['list']
 			for dev in list:
 				d = list[dev]
@@ -964,16 +1014,10 @@ class Data:
 					time < d['end']):
 					return False
 		return True
-	def phaseCollision(self, phase, isbegin, line):
-		key = 'end'
-		if isbegin:
-			key = 'start'
-		if self.dmesg[phase][key] >= 0:
-			sysvals.vprint('IGNORE: %s' % line.strip())
-			return True
-		return False
 	def sourcePhase(self, start):
-		for phase in self.phases:
+		for phase in self.sortedPhases():
+			if 'machine' in phase:
+				continue
 			pend = self.dmesg[phase]['end']
 			if start <= pend:
 				return phase
@@ -1004,14 +1048,15 @@ class Data:
 		return tgtdev
 	def addDeviceFunctionCall(self, displayname, kprobename, proc, pid, start, end, cdata, rdata):
 		# try to place the call in a device
-		tgtdev = self.sourceDevice(self.phases, start, end, pid, 'device')
+		phases = self.sortedPhases()
+		tgtdev = self.sourceDevice(phases, start, end, pid, 'device')
 		# calls with device pids that occur outside device bounds are dropped
 		# TODO: include these somehow
 		if not tgtdev and pid in self.devpids:
 			return False
 		# try to place the call in a thread
 		if not tgtdev:
-			tgtdev = self.sourceDevice(self.phases, start, end, pid, 'thread')
+			tgtdev = self.sourceDevice(phases, start, end, pid, 'thread')
 		# create new thread blocks, expand as new calls are found
 		if not tgtdev:
 			if proc == '<...>':
@@ -1053,7 +1098,7 @@ class Data:
 	def overflowDevices(self):
 		# get a list of devices that extend beyond the end of this test run
 		devlist = []
-		for phase in self.phases:
+		for phase in self.sortedPhases():
 			list = self.dmesg[phase]['list']
 			for devname in list:
 				dev = list[devname]
@@ -1064,7 +1109,7 @@ class Data:
 		# merge any devices that overlap devlist
 		for dev in devlist:
 			devname = dev['name']
-			for phase in self.phases:
+			for phase in self.sortedPhases():
 				list = self.dmesg[phase]['list']
 				if devname not in list:
 					continue
@@ -1079,7 +1124,7 @@ class Data:
 				del list[devname]
 	def usurpTouchingThread(self, name, dev):
 		# the caller test has priority of this thread, give it to him
-		for phase in self.phases:
+		for phase in self.sortedPhases():
 			list = self.dmesg[phase]['list']
 			if name in list:
 				tdev = list[name]
@@ -1093,7 +1138,7 @@ class Data:
 				break
 	def stitchTouchingThreads(self, testlist):
 		# merge any threads between tests that touch
-		for phase in self.phases:
+		for phase in self.sortedPhases():
 			list = self.dmesg[phase]['list']
 			for devname in list:
 				dev = list[devname]
@@ -1103,7 +1148,7 @@ class Data:
 					data.usurpTouchingThread(devname, dev)
 	def optimizeDevSrc(self):
 		# merge any src call loops to reduce timeline size
-		for phase in self.phases:
+		for phase in self.sortedPhases():
 			list = self.dmesg[phase]['list']
 			for dev in list:
 				if 'src' not in list[dev]:
@@ -1141,7 +1186,7 @@ class Data:
 		self.tKernSus = self.trimTimeVal(self.tKernSus, t0, dT, left)
 		self.tKernRes = self.trimTimeVal(self.tKernRes, t0, dT, left)
 		self.end = self.trimTimeVal(self.end, t0, dT, left)
-		for phase in self.phases:
+		for phase in self.sortedPhases():
 			p = self.dmesg[phase]
 			p['start'] = self.trimTimeVal(p['start'], t0, dT, left)
 			p['end'] = self.trimTimeVal(p['end'], t0, dT, left)
@@ -1150,6 +1195,7 @@ class Data:
 				d = list[name]
 				d['start'] = self.trimTimeVal(d['start'], t0, dT, left)
 				d['end'] = self.trimTimeVal(d['end'], t0, dT, left)
+				d['length'] = d['end'] - d['start']
 				if('ftrace' in d):
 					cg = d['ftrace']
 					cg.start = self.trimTimeVal(cg.start, t0, dT, left)
@@ -1166,30 +1212,51 @@ class Data:
 				tm = self.trimTimeVal(tm, t0, dT, left)
 				list.append((type, tm, idx1, idx2))
 			self.errorinfo[dir] = list
-	def normalizeTime(self, tZero):
+	def trimFreezeTime(self, tZero):
 		# trim out any standby or freeze clock time
-		if(self.tSuspended != self.tResumed):
-			if(self.tResumed > tZero):
-				self.trimTime(self.tSuspended, \
-					self.tResumed-self.tSuspended, True)
-			else:
-				self.trimTime(self.tSuspended, \
-					self.tResumed-self.tSuspended, False)
+		lp = ''
+		for phase in self.sortedPhases():
+			if 'resume_machine' in phase and 'suspend_machine' in lp:
+				tS, tR = self.dmesg[lp]['end'], self.dmesg[phase]['start']
+				tL = tR - tS
+				if tL > 0:
+					left = True if tR > tZero else False
+					self.trimTime(tS, tL, left)
+					self.tLow.append('%.0f'%(tL*1000))
+			lp = phase
 	def getTimeValues(self):
-		sktime = (self.dmesg['suspend_machine']['end'] - \
-			self.tKernSus) * 1000
-		rktime = (self.dmesg['resume_complete']['end'] - \
-			self.dmesg['resume_machine']['start']) * 1000
+		sktime = (self.tSuspended - self.tKernSus) * 1000
+		rktime = (self.tKernRes - self.tResumed) * 1000
 		return (sktime, rktime)
-	def setPhase(self, phase, ktime, isbegin):
+	def setPhase(self, phase, ktime, isbegin, order=-1):
 		if(isbegin):
+			# phase start over current phase
+			if self.currphase:
+				if 'resume_machine' not in self.currphase:
+					sysvals.vprint('WARNING: phase %s failed to end' % self.currphase)
+				self.dmesg[self.currphase]['end'] = ktime
+			phases = self.dmesg.keys()
+			color = self.phasedef[phase]['color']
+			count = len(phases) if order < 0 else order
+			# create unique name for every new phase
+			while phase in phases:
+				phase += '*'
+			self.dmesg[phase] = {'list': dict(), 'start': -1.0, 'end': -1.0,
+				'row': 0, 'color': color, 'order': count}
 			self.dmesg[phase]['start'] = ktime
+			self.currphase = phase
 		else:
+			# phase end without a start
+			if phase not in self.currphase:
+				if self.currphase:
+					sysvals.vprint('WARNING: %s ended instead of %s, ftrace corruption?' % (phase, self.currphase))
+				else:
+					sysvals.vprint('WARNING: %s ended without a start, ftrace corruption?' % phase)
+					return phase
+			phase = self.currphase
 			self.dmesg[phase]['end'] = ktime
-	def dmesgSortVal(self, phase):
-		return self.dmesg[phase]['order']
-	def sortedPhases(self):
-		return sorted(self.dmesg, key=self.dmesgSortVal)
+			self.currphase = ''
+		return phase
 	def sortedDevices(self, phase):
 		list = self.dmesg[phase]['list']
 		slist = []
@@ -1208,13 +1275,13 @@ class Data:
 		for devname in phaselist:
 			dev = phaselist[devname]
 			if(dev['end'] < 0):
-				for p in self.phases:
+				for p in self.sortedPhases():
 					if self.dmesg[p]['end'] > dev['start']:
 						dev['end'] = self.dmesg[p]['end']
 						break
 				sysvals.vprint('%s (%s): callback didnt return' % (devname, phase))
 	def deviceFilter(self, devicefilter):
-		for phase in self.phases:
+		for phase in self.sortedPhases():
 			list = self.dmesg[phase]['list']
 			rmlist = []
 			for name in list:
@@ -1229,7 +1296,7 @@ class Data:
 				del list[name]
 	def fixupInitcallsThatDidntReturn(self):
 		# if any calls never returned, clip them at system resume end
-		for phase in self.phases:
+		for phase in self.sortedPhases():
 			self.fixupInitcalls(phase)
 	def phaseOverlap(self, phases):
 		rmgroups = []
@@ -1248,17 +1315,18 @@ class Data:
 		self.devicegroups.append(newgroup)
 	def newActionGlobal(self, name, start, end, pid=-1, color=''):
 		# which phase is this device callback or action in
+		phases = self.sortedPhases()
 		targetphase = 'none'
 		htmlclass = ''
 		overlap = 0.0
-		phases = []
-		for phase in self.phases:
+		myphases = []
+		for phase in phases:
 			pstart = self.dmesg[phase]['start']
 			pend = self.dmesg[phase]['end']
 			# see if the action overlaps this phase
 			o = max(0, min(end, pend) - max(start, pstart))
 			if o > 0:
-				phases.append(phase)
+				myphases.append(phase)
 			# set the target phase to the one that overlaps most
 			if o > overlap:
 				if overlap > 0 and phase == 'post_resume':
@@ -1267,19 +1335,19 @@ class Data:
 				overlap = o
 		# if no target phase was found, pin it to the edge
 		if targetphase == 'none':
-			p0start = self.dmesg[self.phases[0]]['start']
+			p0start = self.dmesg[phases[0]]['start']
 			if start <= p0start:
-				targetphase = self.phases[0]
+				targetphase = phases[0]
 			else:
-				targetphase = self.phases[-1]
+				targetphase = phases[-1]
 		if pid == -2:
 			htmlclass = ' bg'
 		elif pid == -3:
 			htmlclass = ' ps'
-		if len(phases) > 1:
+		if len(myphases) > 1:
 			htmlclass = ' bg'
-			self.phaseOverlap(phases)
-		if targetphase in self.phases:
+			self.phaseOverlap(myphases)
+		if targetphase in phases:
 			newname = self.newAction(targetphase, name, pid, '', start, end, '', htmlclass, color)
 			return (targetphase, newname)
 		return False
@@ -1311,19 +1379,43 @@ class Data:
 			if(list[child]['par'] == devname):
 				devlist.append(child)
 		return devlist
+	def maxDeviceNameSize(self, phase):
+		size = 0
+		for name in self.dmesg[phase]['list']:
+			if len(name) > size:
+				size = len(name)
+		return size
 	def printDetails(self):
 		sysvals.vprint('Timeline Details:')
 		sysvals.vprint('          test start: %f' % self.start)
 		sysvals.vprint('kernel suspend start: %f' % self.tKernSus)
-		for phase in self.phases:
-			dc = len(self.dmesg[phase]['list'])
-			sysvals.vprint('    %16s: %f - %f (%d devices)' % (phase, \
-				self.dmesg[phase]['start'], self.dmesg[phase]['end'], dc))
+		tS = tR = False
+		for phase in self.sortedPhases():
+			devlist = self.dmesg[phase]['list']
+			dc, ps, pe = len(devlist), self.dmesg[phase]['start'], self.dmesg[phase]['end']
+			if not tS and ps >= self.tSuspended:
+				sysvals.vprint('   machine suspended: %f' % self.tSuspended)
+				tS = True
+			if not tR and ps >= self.tResumed:
+				sysvals.vprint('     machine resumed: %f' % self.tResumed)
+				tR = True
+			sysvals.vprint('%20s: %f - %f (%d devices)' % (phase, ps, pe, dc))
+			if sysvals.devdump:
+				sysvals.vprint(''.join('-' for i in range(80)))
+				maxname = '%d' % self.maxDeviceNameSize(phase)
+				fmt = '%3d) %'+maxname+'s - %f - %f'
+				c = 1
+				for name in devlist:
+					s = devlist[name]['start']
+					e = devlist[name]['end']
+					sysvals.vprint(fmt % (c, name, s, e))
+					c += 1
+				sysvals.vprint(''.join('-' for i in range(80)))
 		sysvals.vprint('   kernel resume end: %f' % self.tKernRes)
 		sysvals.vprint('            test end: %f' % self.end)
 	def deviceChildrenAllPhases(self, devname):
 		devlist = []
-		for phase in self.phases:
+		for phase in self.sortedPhases():
 			list = self.deviceChildren(devname, phase)
 			for dev in list:
 				if dev not in devlist:
@@ -1344,7 +1436,7 @@ class Data:
 		if node.name:
 			info = ''
 			drv = ''
-			for phase in self.phases:
+			for phase in self.sortedPhases():
 				list = self.dmesg[phase]['list']
 				if node.name in list:
 					s = list[node.name]['start']
@@ -1478,8 +1570,29 @@ class Data:
 			c = self.addProcessUsageEvent(ps, tres)
 			if c > 0:
 				sysvals.vprint('%25s (res): %d' % (ps, c))
+	def handleEndMarker(self, time):
+		dm = self.dmesg
+		self.setEnd(time)
+		self.initDevicegroups()
+		# give suspend_prepare an end if needed
+		if 'suspend_prepare' in dm and dm['suspend_prepare']['end'] < 0:
+			dm['suspend_prepare']['end'] = time
+		# assume resume machine ends at next phase start
+		if 'resume_machine' in dm and dm['resume_machine']['end'] < 0:
+			np = self.nextPhase('resume_machine', 1)
+			if np:
+				dm['resume_machine']['end'] = dm[np]['start']
+		# if kernel resume end not found, assume its the end marker
+		if self.tKernRes == 0.0:
+			self.tKernRes = time
+		# if kernel suspend start not found, assume its the end marker
+		if self.tKernSus == 0.0:
+			self.tKernSus = time
+		# set resume complete to end at end marker
+		if 'resume_complete' in dm:
+			dm['resume_complete']['end'] = time
 	def debugPrint(self):
-		for p in self.phases:
+		for p in self.sortedPhases():
 			list = self.dmesg[p]['list']
 			for devname in list:
 				dev = list[devname]
@@ -1490,9 +1603,9 @@ class Data:
 # Description:
 #	 A container for kprobe function data we want in the dev timeline
 class DevFunction:
-	row = 0
-	count = 1
 	def __init__(self, name, args, caller, ret, start, end, u, proc, pid, color):
+		self.row = 0
+		self.count = 1
 		self.name = name
 		self.args = args
 		self.caller = caller
@@ -1546,16 +1659,15 @@ class DevFunction:
 #			 suspend_resume: phase or custom exec block data
 #			 device_pm_callback: device callback info
 class FTraceLine:
-	time = 0.0
-	length = 0.0
-	fcall = False
-	freturn = False
-	fevent = False
-	fkprobe = False
-	depth = 0
-	name = ''
-	type = ''
 	def __init__(self, t, m='', d=''):
+		self.length = 0.0
+		self.fcall = False
+		self.freturn = False
+		self.fevent = False
+		self.fkprobe = False
+		self.depth = 0
+		self.name = ''
+		self.type = ''
 		self.time = float(t)
 		if not m and not d:
 			return
@@ -1633,13 +1745,13 @@ class FTraceLine:
 		return len(str)/2
 	def debugPrint(self, info=''):
 		if self.isLeaf():
-			print(' -- %12.6f (depth=%02d): %s(); (%.3f us) %s' % (self.time, \
+			pprint(' -- %12.6f (depth=%02d): %s(); (%.3f us) %s' % (self.time, \
 				self.depth, self.name, self.length*1000000, info))
 		elif self.freturn:
-			print(' -- %12.6f (depth=%02d): %s} (%.3f us) %s' % (self.time, \
+			pprint(' -- %12.6f (depth=%02d): %s} (%.3f us) %s' % (self.time, \
 				self.depth, self.name, self.length*1000000, info))
 		else:
-			print(' -- %12.6f (depth=%02d): %s() { (%.3f us) %s' % (self.time, \
+			pprint(' -- %12.6f (depth=%02d): %s() { (%.3f us) %s' % (self.time, \
 				self.depth, self.name, self.length*1000000, info))
 	def startMarker(self):
 		# Is this the starting line of a suspend?
@@ -1675,19 +1787,13 @@ class FTraceLine:
 #	 Each instance is tied to a single device in a single phase, and is
 #	 comprised of an ordered list of FTraceLine objects
 class FTraceCallGraph:
-	id = ''
-	start = -1.0
-	end = -1.0
-	list = []
-	invalid = False
-	depth = 0
-	pid = 0
-	name = ''
-	partial = False
 	vfname = 'missing_function_name'
-	ignore = False
-	sv = 0
 	def __init__(self, pid, sv):
+		self.id = ''
+		self.invalid = False
+		self.name = ''
+		self.partial = False
+		self.ignore = False
 		self.start = -1.0
 		self.end = -1.0
 		self.list = []
@@ -1786,7 +1892,7 @@ class FTraceCallGraph:
 			if warning and ('[make leaf]', line) not in info:
 				info.append(('', line))
 		if warning:
-			print 'WARNING: ftrace data missing, corrections made:'
+			pprint('WARNING: ftrace data missing, corrections made:')
 			for i in info:
 				t, obj = i
 				if obj:
@@ -1846,10 +1952,10 @@ class FTraceCallGraph:
 		id = 'task %s' % (self.pid)
 		window = '(%f - %f)' % (self.start, line.time)
 		if(self.depth < 0):
-			print('Data misalignment for '+id+\
+			pprint('Data misalignment for '+id+\
 				' (buffer overflow), ignoring this callback')
 		else:
-			print('Too much data for '+id+\
+			pprint('Too much data for '+id+\
 				' '+window+', ignoring this callback')
 	def slice(self, dev):
 		minicg = FTraceCallGraph(dev['pid'], self.sv)
@@ -1902,7 +2008,7 @@ class FTraceCallGraph:
 			elif l.isReturn():
 				if(l.depth not in stack):
 					if self.sv.verbose:
-						print 'Post Process Error: Depth missing'
+						pprint('Post Process Error: Depth missing')
 						l.debugPrint()
 					return False
 				# calculate call length from call/return lines
@@ -1919,7 +2025,7 @@ class FTraceCallGraph:
 			return True
 		elif(cnt < 0):
 			if self.sv.verbose:
-				print 'Post Process Error: Depth is less than 0'
+				pprint('Post Process Error: Depth is less than 0')
 			return False
 		# trace ended before call tree finished
 		return self.repair(cnt)
@@ -1943,7 +2049,7 @@ class FTraceCallGraph:
 						dev['ftrace'] = cg
 					found = devname
 			return found
-		for p in data.phases:
+		for p in data.sortedPhases():
 			if(data.dmesg[p]['start'] <= self.start and
 				self.start <= data.dmesg[p]['end']):
 				list = data.dmesg[p]['list']
@@ -1966,7 +2072,7 @@ class FTraceCallGraph:
 		if fs < data.start or fe > data.end:
 			return
 		phase = ''
-		for p in data.phases:
+		for p in data.sortedPhases():
 			if(data.dmesg[p]['start'] <= self.start and
 				self.start < data.dmesg[p]['end']):
 				phase = p
@@ -1978,20 +2084,20 @@ class FTraceCallGraph:
 			phase, myname = out
 			data.dmesg[phase]['list'][myname]['ftrace'] = self
 	def debugPrint(self, info=''):
-		print('%s pid=%d [%f - %f] %.3f us') % \
+		pprint('%s pid=%d [%f - %f] %.3f us' % \
 			(self.name, self.pid, self.start, self.end,
-			(self.end - self.start)*1000000)
+			(self.end - self.start)*1000000))
 		for l in self.list:
 			if l.isLeaf():
-				print('%f (%02d): %s(); (%.3f us)%s' % (l.time, \
+				pprint('%f (%02d): %s(); (%.3f us)%s' % (l.time, \
 					l.depth, l.name, l.length*1000000, info))
 			elif l.freturn:
-				print('%f (%02d): %s} (%.3f us)%s' % (l.time, \
+				pprint('%f (%02d): %s} (%.3f us)%s' % (l.time, \
 					l.depth, l.name, l.length*1000000, info))
 			else:
-				print('%f (%02d): %s() { (%.3f us)%s' % (l.time, \
+				pprint('%f (%02d): %s() { (%.3f us)%s' % (l.time, \
 					l.depth, l.name, l.length*1000000, info))
-		print(' ')
+		pprint(' ')
 
 class DevItem:
 	def __init__(self, test, phase, dev):
@@ -2008,23 +2114,20 @@ class DevItem:
 #	 A container for a device timeline which calculates
 #	 all the html properties to display it correctly
 class Timeline:
-	html = ''
-	height = 0	# total timeline height
-	scaleH = 20	# timescale (top) row height
-	rowH = 30	# device row height
-	bodyH = 0	# body height
-	rows = 0	# total timeline rows
-	rowlines = dict()
-	rowheight = dict()
 	html_tblock = '<div id="block{0}" class="tblock" style="left:{1}%;width:{2}%;"><div class="tback" style="height:{3}px"></div>\n'
 	html_device = '<div id="{0}" title="{1}" class="thread{7}" style="left:{2}%;top:{3}px;height:{4}px;width:{5}%;{8}">{6}</div>\n'
 	html_phase = '<div class="phase" style="left:{0}%;width:{1}%;top:{2}px;height:{3}px;background:{4}">{5}</div>\n'
 	html_phaselet = '<div id="{0}" class="phaselet" style="left:{1}%;width:{2}%;background:{3}"></div>\n'
 	html_legend = '<div id="p{3}" class="square" style="left:{0}%;background:{1}">&nbsp;{2}</div>\n'
 	def __init__(self, rowheight, scaleheight):
-		self.rowH = rowheight
-		self.scaleH = scaleheight
 		self.html = ''
+		self.height = 0  # total timeline height
+		self.scaleH = scaleheight # timescale (top) row height
+		self.rowH = rowheight     # device row height
+		self.bodyH = 0   # body height
+		self.rows = 0    # total timeline rows
+		self.rowlines = dict()
+		self.rowheight = dict()
 	def createHeader(self, sv, stamp):
 		if(not stamp['time']):
 			return
@@ -2251,18 +2354,18 @@ class Timeline:
 # Description:
 #	 A list of values describing the properties of these test runs
 class TestProps:
-	stamp = ''
-	sysinfo = ''
-	cmdline = ''
-	kparams = ''
-	S0i3 = False
-	fwdata = []
 	stampfmt = '# [a-z]*-(?P<m>[0-9]{2})(?P<d>[0-9]{2})(?P<y>[0-9]{2})-'+\
 				'(?P<H>[0-9]{2})(?P<M>[0-9]{2})(?P<S>[0-9]{2})'+\
 				' (?P<host>.*) (?P<mode>.*) (?P<kernel>.*)$'
+	batteryfmt = '^# battery (?P<a1>\w*) (?P<c1>\d*) (?P<a2>\w*) (?P<c2>\d*)'
+	testerrfmt = '^# enter_sleep_error (?P<e>.*)'
 	sysinfofmt = '^# sysinfo .*'
 	cmdlinefmt = '^# command \| (?P<cmd>.*)'
 	kparamsfmt = '^# kparams \| (?P<kp>.*)'
+	devpropfmt = '# Device Properties: .*'
+	tracertypefmt = '# tracer: (?P<t>.*)'
+	firmwarefmt = '# fwsuspend (?P<s>[0-9]*) fwresume (?P<r>[0-9]*)$'
+	procexecfmt = 'ps - (?P<ps>.*)$'
 	ftrace_line_fmt_fg = \
 		'^ *(?P<time>[0-9\.]*) *\| *(?P<cpu>[0-9]*)\)'+\
 		' *(?P<proc>.*)-(?P<pid>[0-9]*) *\|'+\
@@ -2271,11 +2374,17 @@ class TestProps:
 		' *(?P<proc>.*)-(?P<pid>[0-9]*) *\[(?P<cpu>[0-9]*)\] *'+\
 		'(?P<flags>.{4}) *(?P<time>[0-9\.]*): *'+\
 		'(?P<msg>.*)'
-	ftrace_line_fmt = ftrace_line_fmt_nop
-	cgformat = False
-	data = 0
-	ktemp = dict()
 	def __init__(self):
+		self.stamp = ''
+		self.sysinfo = ''
+		self.cmdline = ''
+		self.kparams = ''
+		self.testerror = []
+		self.battery = []
+		self.fwdata = []
+		self.ftrace_line_fmt = self.ftrace_line_fmt_nop
+		self.cgformat = False
+		self.data = 0
 		self.ktemp = dict()
 	def setTracerType(self, tracer):
 		if(tracer == 'function_graph'):
@@ -2286,6 +2395,7 @@ class TestProps:
 		else:
 			doError('Invalid tracer format: [%s]' % tracer)
 	def parseStamp(self, data, sv):
+		# global test data
 		m = re.match(self.stampfmt, self.stamp)
 		data.stamp = {'time': '', 'host': '', 'mode': ''}
 		dt = datetime(int(m.group('y'))+2000, int(m.group('m')),
@@ -2324,23 +2434,36 @@ class TestProps:
 				sv.kparams = m.group('kp')
 		if not sv.stamp:
 			sv.stamp = data.stamp
+		# firmware data
+		if sv.suspendmode == 'mem' and len(self.fwdata) > data.testnumber:
+			data.fwSuspend, data.fwResume = self.fwdata[data.testnumber]
+			if(data.fwSuspend > 0 or data.fwResume > 0):
+				data.fwValid = True
+		# battery data
+		if len(self.battery) > data.testnumber:
+			m = re.match(self.batteryfmt, self.battery[data.testnumber])
+			if m:
+				data.battery = m.groups()
+		# sleep mode enter errors
+		if len(self.testerror) > data.testnumber:
+			m = re.match(self.testerrfmt, self.testerror[data.testnumber])
+			if m:
+				data.enterfail = m.group('e')
 
 # Class: TestRun
 # Description:
 #	 A container for a suspend/resume test run. This is necessary as
 #	 there could be more than one, and they need to be separate.
 class TestRun:
-	ftemp = dict()
-	ttemp = dict()
-	data = 0
 	def __init__(self, dataobj):
 		self.data = dataobj
 		self.ftemp = dict()
 		self.ttemp = dict()
 
 class ProcessMonitor:
-	proclist = dict()
-	running = False
+	def __init__(self):
+		self.proclist = dict()
+		self.running = False
 	def procstat(self):
 		c = ['cat /proc/[1-9]*/stat 2>/dev/null']
 		process = Popen(c, shell=True, stdout=PIPE)
@@ -2391,8 +2514,8 @@ class ProcessMonitor:
 #	 markers, and/or kprobes required for primary parsing.
 def doesTraceLogHaveTraceEvents():
 	kpcheck = ['_cal: (', '_cpu_down()']
-	techeck = ['suspend_resume']
-	tmcheck = ['SUSPEND START', 'RESUME COMPLETE']
+	techeck = ['suspend_resume', 'device_pm_callback']
+	tmcheck = ['tracing_mark_write']
 	sysvals.usekprobes = False
 	fp = sysvals.openlog(sysvals.ftracefile, 'r')
 	for line in fp:
@@ -2414,23 +2537,14 @@ def doesTraceLogHaveTraceEvents():
 				check.remove(i)
 		tmcheck = check
 	fp.close()
-	if len(techeck) == 0:
-		sysvals.usetraceevents = True
-	else:
-		sysvals.usetraceevents = False
-	if len(tmcheck) == 0:
-		sysvals.usetracemarkers = True
-	else:
-		sysvals.usetracemarkers = False
+	sysvals.usetraceevents = True if len(techeck) < 2 else False
+	sysvals.usetracemarkers = True if len(tmcheck) == 0 else False
 
 # Function: appendIncompleteTraceLog
 # Description:
 #	 [deprecated for kernel 3.15 or newer]
-#	 Legacy support of ftrace outputs that lack the device_pm_callback
-#	 and/or suspend_resume trace events. The primary data should be
-#	 taken from dmesg, and this ftrace is used only for callgraph data
-#	 or custom actions in the timeline. The data is appended to the Data
-#	 objects provided.
+#	 Adds callgraph data which lacks trace event data. This is only
+#	 for timelines generated from 3.15 or older
 # Arguments:
 #	 testruns: the array of Data objects obtained from parseKernelLog
 def appendIncompleteTraceLog(testruns):
@@ -2460,13 +2574,19 @@ def appendIncompleteTraceLog(testruns):
 		elif re.match(tp.cmdlinefmt, line):
 			tp.cmdline = line
 			continue
+		elif re.match(tp.batteryfmt, line):
+			tp.battery.append(line)
+			continue
+		elif re.match(tp.testerrfmt, line):
+			tp.testerror.append(line)
+			continue
 		# determine the trace data type (required for further parsing)
-		m = re.match(sysvals.tracertypefmt, line)
+		m = re.match(tp.tracertypefmt, line)
 		if(m):
 			tp.setTracerType(m.group('t'))
 			continue
 		# device properties line
-		if(re.match(sysvals.devpropfmt, line)):
+		if(re.match(tp.devpropfmt, line)):
 			devProps(line)
 			continue
 		# parse only valid lines, if this is not one move on
@@ -2506,87 +2626,7 @@ def appendIncompleteTraceLog(testruns):
 			continue
 		# trace event processing
 		if(t.fevent):
-			# general trace events have two types, begin and end
-			if(re.match('(?P<name>.*) begin$', t.name)):
-				isbegin = True
-			elif(re.match('(?P<name>.*) end$', t.name)):
-				isbegin = False
-			else:
-				continue
-			m = re.match('(?P<name>.*)\[(?P<val>[0-9]*)\] .*', t.name)
-			if(m):
-				val = m.group('val')
-				if val == '0':
-					name = m.group('name')
-				else:
-					name = m.group('name')+'['+val+']'
-			else:
-				m = re.match('(?P<name>.*) .*', t.name)
-				name = m.group('name')
-			# special processing for trace events
-			if re.match('dpm_prepare\[.*', name):
-				continue
-			elif re.match('machine_suspend.*', name):
-				continue
-			elif re.match('suspend_enter\[.*', name):
-				if(not isbegin):
-					data.dmesg['suspend_prepare']['end'] = t.time
-				continue
-			elif re.match('dpm_suspend\[.*', name):
-				if(not isbegin):
-					data.dmesg['suspend']['end'] = t.time
-				continue
-			elif re.match('dpm_suspend_late\[.*', name):
-				if(isbegin):
-					data.dmesg['suspend_late']['start'] = t.time
-				else:
-					data.dmesg['suspend_late']['end'] = t.time
-				continue
-			elif re.match('dpm_suspend_noirq\[.*', name):
-				if(isbegin):
-					data.dmesg['suspend_noirq']['start'] = t.time
-				else:
-					data.dmesg['suspend_noirq']['end'] = t.time
-				continue
-			elif re.match('dpm_resume_noirq\[.*', name):
-				if(isbegin):
-					data.dmesg['resume_machine']['end'] = t.time
-					data.dmesg['resume_noirq']['start'] = t.time
-				else:
-					data.dmesg['resume_noirq']['end'] = t.time
-				continue
-			elif re.match('dpm_resume_early\[.*', name):
-				if(isbegin):
-					data.dmesg['resume_early']['start'] = t.time
-				else:
-					data.dmesg['resume_early']['end'] = t.time
-				continue
-			elif re.match('dpm_resume\[.*', name):
-				if(isbegin):
-					data.dmesg['resume']['start'] = t.time
-				else:
-					data.dmesg['resume']['end'] = t.time
-				continue
-			elif re.match('dpm_complete\[.*', name):
-				if(isbegin):
-					data.dmesg['resume_complete']['start'] = t.time
-				else:
-					data.dmesg['resume_complete']['end'] = t.time
-				continue
-			# skip trace events inside devices calls
-			if(not data.isTraceEventOutsideDeviceCalls(pid, t.time)):
-				continue
-			# global events (outside device calls) are simply graphed
-			if(isbegin):
-				# store each trace event in ttemp
-				if(name not in testrun[testidx].ttemp):
-					testrun[testidx].ttemp[name] = []
-				testrun[testidx].ttemp[name].append(\
-					{'begin': t.time, 'end': t.time})
-			else:
-				# finish off matching trace event in ttemp
-				if(name in testrun[testidx].ttemp):
-					testrun[testidx].ttemp[name][-1]['end'] = t.time
+			continue
 		# call/return processing
 		elif sysvals.usecallgraph:
 			# create a callgraph object for the data
@@ -2603,12 +2643,6 @@ def appendIncompleteTraceLog(testruns):
 	tf.close()
 
 	for test in testrun:
-		# add the traceevent data to the device hierarchy
-		if(sysvals.usetraceevents):
-			for name in test.ttemp:
-				for event in test.ttemp[name]:
-					test.data.newActionGlobal(name, event['begin'], event['end'])
-
 		# add the callgraph data to the device hierarchy
 		for pid in test.ftemp:
 			for cg in test.ftemp[pid]:
@@ -2621,7 +2655,7 @@ def appendIncompleteTraceLog(testruns):
 					continue
 				callstart = cg.start
 				callend = cg.end
-				for p in test.data.phases:
+				for p in test.data.sortedPhases():
 					if(test.data.dmesg[p]['start'] <= callstart and
 						callstart <= test.data.dmesg[p]['end']):
 						list = test.data.dmesg[p]['list']
@@ -2648,10 +2682,13 @@ def parseTraceLog(live=False):
 		doError('%s does not exist' % sysvals.ftracefile)
 	if not live:
 		sysvals.setupAllKprobes()
+	ksuscalls = ['pm_prepare_console']
+	krescalls = ['pm_restore_console']
 	tracewatch = []
 	if sysvals.usekprobes:
 		tracewatch += ['sync_filesystems', 'freeze_processes', 'syscore_suspend',
-			'syscore_resume', 'resume_console', 'thaw_processes', 'CPU_ON', 'CPU_OFF']
+			'syscore_resume', 'resume_console', 'thaw_processes', 'CPU_ON',
+			'CPU_OFF', 'timekeeping_freeze', 'acpi_suspend']
 
 	# extract the callgraph and traceevent data
 	tp = TestProps()
@@ -2674,18 +2711,24 @@ def parseTraceLog(live=False):
 		elif re.match(tp.cmdlinefmt, line):
 			tp.cmdline = line
 			continue
+		elif re.match(tp.batteryfmt, line):
+			tp.battery.append(line)
+			continue
+		elif re.match(tp.testerrfmt, line):
+			tp.testerror.append(line)
+			continue
 		# firmware line: pull out any firmware data
-		m = re.match(sysvals.firmwarefmt, line)
+		m = re.match(tp.firmwarefmt, line)
 		if(m):
 			tp.fwdata.append((int(m.group('s')), int(m.group('r'))))
 			continue
 		# tracer type line: determine the trace data type
-		m = re.match(sysvals.tracertypefmt, line)
+		m = re.match(tp.tracertypefmt, line)
 		if(m):
 			tp.setTracerType(m.group('t'))
 			continue
 		# device properties line
-		if(re.match(sysvals.devpropfmt, line)):
+		if(re.match(tp.devpropfmt, line)):
 			devProps(line)
 			continue
 		# ignore all other commented lines
@@ -2714,20 +2757,20 @@ def parseTraceLog(live=False):
 			continue
 		# find the start of suspend
 		if(t.startMarker()):
-			phase = 'suspend_prepare'
 			data = Data(len(testdata))
 			testdata.append(data)
 			testrun = TestRun(data)
 			testruns.append(testrun)
 			tp.parseStamp(data, sysvals)
 			data.setStart(t.time)
-			data.tKernSus = t.time
+			data.first_suspend_prepare = True
+			phase = data.setPhase('suspend_prepare', t.time, True)
 			continue
 		if(not data):
 			continue
 		# process cpu exec line
 		if t.type == 'tracing_mark_write':
-			m = re.match(sysvals.procexecfmt, t.name)
+			m = re.match(tp.procexecfmt, t.name)
 			if(m):
 				proclist = dict()
 				for ps in m.group('ps').split(','):
@@ -2740,28 +2783,17 @@ def parseTraceLog(live=False):
 				continue
 		# find the end of resume
 		if(t.endMarker()):
-			data.setEnd(t.time)
-			if data.tKernRes == 0.0:
-				data.tKernRes = t.time
-			if data.dmesg['resume_complete']['end'] < 0:
-				data.dmesg['resume_complete']['end'] = t.time
-			if sysvals.suspendmode == 'mem' and len(tp.fwdata) > data.testnumber:
-				data.fwSuspend, data.fwResume = tp.fwdata[data.testnumber]
-				if(data.tSuspended != 0 and data.tResumed != 0 and \
-					(data.fwSuspend > 0 or data.fwResume > 0)):
-					data.fwValid = True
+			data.handleEndMarker(t.time)
 			if(not sysvals.usetracemarkers):
 				# no trace markers? then quit and be sure to finish recording
 				# the event we used to trigger resume end
-				if(len(testrun.ttemp['thaw_processes']) > 0):
+				if('thaw_processes' in testrun.ttemp and len(testrun.ttemp['thaw_processes']) > 0):
 					# if an entry exists, assume this is its end
 					testrun.ttemp['thaw_processes'][-1]['end'] = t.time
 				break
 			continue
 		# trace event processing
 		if(t.fevent):
-			if(phase == 'post_resume'):
-				data.setEnd(t.time)
 			if(t.type == 'suspend_resume'):
 				# suspend_resume trace events have two types, begin and end
 				if(re.match('(?P<name>.*) begin$', t.name)):
@@ -2786,86 +2818,62 @@ def parseTraceLog(live=False):
 				# -- phase changes --
 				# start of kernel suspend
 				if(re.match('suspend_enter\[.*', t.name)):
-					if(isbegin and data.start == data.tKernSus):
-						data.dmesg[phase]['start'] = t.time
+					if(isbegin):
 						data.tKernSus = t.time
 					continue
 				# suspend_prepare start
 				elif(re.match('dpm_prepare\[.*', t.name)):
-					phase = 'suspend_prepare'
-					if(not isbegin):
-						data.dmesg[phase]['end'] = t.time
-						if data.dmesg[phase]['start'] < 0:
-							data.dmesg[phase]['start'] = data.start
+					if isbegin and data.first_suspend_prepare:
+						data.first_suspend_prepare = False
+						if data.tKernSus == 0:
+							data.tKernSus = t.time
+						continue
+					phase = data.setPhase('suspend_prepare', t.time, isbegin)
 					continue
 				# suspend start
 				elif(re.match('dpm_suspend\[.*', t.name)):
-					phase = 'suspend'
-					data.setPhase(phase, t.time, isbegin)
+					phase = data.setPhase('suspend', t.time, isbegin)
 					continue
 				# suspend_late start
 				elif(re.match('dpm_suspend_late\[.*', t.name)):
-					phase = 'suspend_late'
-					data.setPhase(phase, t.time, isbegin)
+					phase = data.setPhase('suspend_late', t.time, isbegin)
 					continue
 				# suspend_noirq start
 				elif(re.match('dpm_suspend_noirq\[.*', t.name)):
-					if data.phaseCollision('suspend_noirq', isbegin, line):
-						continue
-					phase = 'suspend_noirq'
-					data.setPhase(phase, t.time, isbegin)
-					if(not isbegin):
-						phase = 'suspend_machine'
-						data.dmesg[phase]['start'] = t.time
+					phase = data.setPhase('suspend_noirq', t.time, isbegin)
 					continue
 				# suspend_machine/resume_machine
 				elif(re.match('machine_suspend\[.*', t.name)):
 					if(isbegin):
-						phase = 'suspend_machine'
-						data.dmesg[phase]['end'] = t.time
-						data.tSuspended = t.time
+						lp = data.lastPhase()
+						phase = data.setPhase('suspend_machine', data.dmesg[lp]['end'], True)
+						data.setPhase(phase, t.time, False)
+						if data.tSuspended == 0:
+							data.tSuspended = t.time
 					else:
-						if(sysvals.suspendmode in ['mem', 'disk'] and not tp.S0i3):
-							data.dmesg['suspend_machine']['end'] = t.time
+						phase = data.setPhase('resume_machine', t.time, True)
+						if(sysvals.suspendmode in ['mem', 'disk']):
+							susp = phase.replace('resume', 'suspend')
+							if susp in data.dmesg:
+								data.dmesg[susp]['end'] = t.time
 							data.tSuspended = t.time
-						phase = 'resume_machine'
-						data.dmesg[phase]['start'] = t.time
 						data.tResumed = t.time
-						data.tLow = data.tResumed - data.tSuspended
-					continue
-				# acpi_suspend
-				elif(re.match('acpi_suspend\[.*', t.name)):
-					# acpi_suspend[0] S0i3
-					if(re.match('acpi_suspend\[0\] begin', t.name)):
-						if(sysvals.suspendmode == 'mem'):
-							tp.S0i3 = True
-							data.dmesg['suspend_machine']['end'] = t.time
-							data.tSuspended = t.time
 					continue
 				# resume_noirq start
 				elif(re.match('dpm_resume_noirq\[.*', t.name)):
-					if data.phaseCollision('resume_noirq', isbegin, line):
-						continue
-					phase = 'resume_noirq'
-					data.setPhase(phase, t.time, isbegin)
-					if(isbegin):
-						data.dmesg['resume_machine']['end'] = t.time
+					phase = data.setPhase('resume_noirq', t.time, isbegin)
 					continue
 				# resume_early start
 				elif(re.match('dpm_resume_early\[.*', t.name)):
-					phase = 'resume_early'
-					data.setPhase(phase, t.time, isbegin)
+					phase = data.setPhase('resume_early', t.time, isbegin)
 					continue
 				# resume start
 				elif(re.match('dpm_resume\[.*', t.name)):
-					phase = 'resume'
-					data.setPhase(phase, t.time, isbegin)
+					phase = data.setPhase('resume', t.time, isbegin)
 					continue
 				# resume complete start
 				elif(re.match('dpm_complete\[.*', t.name)):
-					phase = 'resume_complete'
-					if(isbegin):
-						data.dmesg[phase]['start'] = t.time
+					phase = data.setPhase('resume_complete', t.time, isbegin)
 					continue
 				# skip trace events inside devices calls
 				if(not data.isTraceEventOutsideDeviceCalls(pid, t.time)):
@@ -2881,13 +2889,10 @@ def parseTraceLog(live=False):
 					if(len(testrun.ttemp[name]) > 0):
 						# if an entry exists, assume this is its end
 						testrun.ttemp[name][-1]['end'] = t.time
-					elif(phase == 'post_resume'):
-						# post resume events can just have ends
-						testrun.ttemp[name].append({
-							'begin': data.dmesg[phase]['start'],
-							'end': t.time})
 			# device callback start
 			elif(t.type == 'device_pm_callback_start'):
+				if phase not in data.dmesg:
+					continue
 				m = re.match('(?P<drv>.*) (?P<d>.*), parent: *(?P<p>.*), .*',\
 					t.name);
 				if(not m):
@@ -2901,6 +2906,8 @@ def parseTraceLog(live=False):
 						data.devpids.append(pid)
 			# device callback finish
 			elif(t.type == 'device_pm_callback_end'):
+				if phase not in data.dmesg:
+					continue
 				m = re.match('(?P<drv>.*) (?P<d>.*), err.*', t.name);
 				if(not m):
 					continue
@@ -2931,6 +2938,9 @@ def parseTraceLog(live=False):
 					'cdata': kprobedata,
 					'proc': m_proc,
 				})
+				# start of kernel resume
+				if(phase == 'suspend_prepare' and kprobename in ksuscalls):
+					data.tKernSus = t.time
 			elif(t.freturn):
 				if(key not in tp.ktemp) or len(tp.ktemp[key]) < 1:
 					continue
@@ -2941,9 +2951,9 @@ def parseTraceLog(live=False):
 					e['end'] = t.time
 					e['rdata'] = kprobedata
 				# end of kernel resume
-				if(kprobename == 'pm_notifier_call_chain' or \
-					kprobename == 'pm_restore_console'):
-					data.dmesg[phase]['end'] = t.time
+				if(phase != 'suspend_prepare' and kprobename in krescalls):
+					if phase in data.dmesg:
+						data.dmesg[phase]['end'] = t.time
 					data.tKernRes = t.time
 
 		# callgraph processing
@@ -2961,10 +2971,13 @@ def parseTraceLog(live=False):
 			if(res == -1):
 				testrun.ftemp[key][-1].addLine(t)
 	tf.close()
+	if data and not data.devicegroups:
+		sysvals.vprint('WARNING: end marker is missing')
+		data.handleEndMarker(t.time)
 
 	if sysvals.suspendmode == 'command':
 		for test in testruns:
-			for p in test.data.phases:
+			for p in test.data.sortedPhases():
 				if p == 'suspend_prepare':
 					test.data.dmesg[p]['start'] = test.data.start
 					test.data.dmesg[p]['end'] = test.data.end
@@ -2973,13 +2986,20 @@ def parseTraceLog(live=False):
 					test.data.dmesg[p]['end'] = test.data.end
 			test.data.tSuspended = test.data.end
 			test.data.tResumed = test.data.end
-			test.data.tLow = 0
 			test.data.fwValid = False
 
 	# dev source and procmon events can be unreadable with mixed phase height
 	if sysvals.usedevsrc or sysvals.useprocmon:
 		sysvals.mixedphaseheight = False
 
+	# expand phase boundaries so there are no gaps
+	for data in testdata:
+		lp = data.sortedPhases()[0]
+		for p in data.sortedPhases():
+			if(p != lp and not ('machine' in p and 'machine' in lp)):
+				data.dmesg[lp]['end'] = data.dmesg[p]['start']
+			lp = p
+
 	for i in range(len(testruns)):
 		test = testruns[i]
 		data = test.data
@@ -3040,8 +3060,8 @@ def parseTraceLog(live=False):
 						sortkey = '%f%f%d' % (cg.start, cg.end, pid)
 						sortlist[sortkey] = cg
 					elif len(cg.list) > 1000000:
-						print 'WARNING: the callgraph for %s is massive (%d lines)' %\
-							(devname, len(cg.list))
+						sysvals.vprint('WARNING: the callgraph for %s is massive (%d lines)' %\
+							(devname, len(cg.list)))
 			# create blocks for orphan cg data
 			for sortkey in sorted(sortlist):
 				cg = sortlist[sortkey]
@@ -3057,25 +3077,29 @@ def parseTraceLog(live=False):
 	for data in testdata:
 		tn = '' if len(testdata) == 1 else ('%d' % (data.testnumber + 1))
 		terr = ''
-		lp = data.phases[0]
-		for p in data.phases:
-			if(data.dmesg[p]['start'] < 0 and data.dmesg[p]['end'] < 0):
+		phasedef = data.phasedef
+		lp = 'suspend_prepare'
+		for p in sorted(phasedef, key=lambda k:phasedef[k]['order']):
+			if p not in data.dmesg:
 				if not terr:
-					print 'TEST%s FAILED: %s failed in %s phase' % (tn, sysvals.suspendmode, lp)
+					pprint('TEST%s FAILED: %s failed in %s phase' % (tn, sysvals.suspendmode, lp))
 					terr = '%s%s failed in %s phase' % (sysvals.suspendmode, tn, lp)
 					error.append(terr)
+					if data.tSuspended == 0:
+						data.tSuspended = data.dmesg[lp]['end']
+					if data.tResumed == 0:
+						data.tResumed = data.dmesg[lp]['end']
+					data.fwValid = False
 				sysvals.vprint('WARNING: phase "%s" is missing!' % p)
-			if(data.dmesg[p]['start'] < 0):
-				data.dmesg[p]['start'] = data.dmesg[lp]['end']
-				if(p == 'resume_machine'):
-					data.tSuspended = data.dmesg[lp]['end']
-					data.tResumed = data.dmesg[lp]['end']
-					data.tLow = 0
-			if(data.dmesg[p]['end'] < 0):
-				data.dmesg[p]['end'] = data.dmesg[p]['start']
-			if(p != lp and not ('machine' in p and 'machine' in lp)):
-				data.dmesg[lp]['end'] = data.dmesg[p]['start']
 			lp = p
+		if not terr and data.enterfail:
+			pprint('test%s FAILED: enter %s failed with %s' % (tn, sysvals.suspendmode, data.enterfail))
+			terr = 'test%s failed to enter %s mode' % (tn, sysvals.suspendmode)
+			error.append(terr)
+		if data.tSuspended == 0:
+			data.tSuspended = data.tKernRes
+		if data.tResumed == 0:
+			data.tResumed = data.tSuspended
 
 		if(len(sysvals.devicefilter) > 0):
 			data.deviceFilter(sysvals.devicefilter)
@@ -3127,7 +3151,13 @@ def loadKernelLog():
 		elif re.match(tp.cmdlinefmt, line):
 			tp.cmdline = line
 			continue
-		m = re.match(sysvals.firmwarefmt, line)
+		elif re.match(tp.batteryfmt, line):
+			tp.battery.append(line)
+			continue
+		elif re.match(tp.testerrfmt, line):
+			tp.testerror.append(line)
+			continue
+		m = re.match(tp.firmwarefmt, line)
 		if(m):
 			tp.fwdata.append((int(m.group('s')), int(m.group('r'))))
 			continue
@@ -3140,10 +3170,6 @@ def loadKernelLog():
 				testruns.append(data)
 			data = Data(len(testruns))
 			tp.parseStamp(data, sysvals)
-			if len(tp.fwdata) > data.testnumber:
-				data.fwSuspend, data.fwResume = tp.fwdata[data.testnumber]
-				if(data.fwSuspend > 0 or data.fwResume > 0):
-					data.fwValid = True
 		if(not data):
 			continue
 		m = re.match('.* *(?P<k>[0-9]\.[0-9]{2}\.[0-9]-.*) .*', msg)
@@ -3158,7 +3184,7 @@ def loadKernelLog():
 	if data:
 		testruns.append(data)
 	if len(testruns) < 1:
-		print('ERROR: dmesg log has no suspend/resume data: %s' \
+		pprint('ERROR: dmesg log has no suspend/resume data: %s' \
 			% sysvals.dmesgfile)
 
 	# fix lines with same timestamp/function with the call and return swapped
@@ -3199,30 +3225,30 @@ def parseKernelLog(data):
 
 	# dmesg phase match table
 	dm = {
-		'suspend_prepare': 'PM: Syncing filesystems.*',
-		        'suspend': 'PM: Entering [a-z]* sleep.*',
-		   'suspend_late': 'PM: suspend of devices complete after.*',
-		  'suspend_noirq': 'PM: late suspend of devices complete after.*',
-		'suspend_machine': 'PM: noirq suspend of devices complete after.*',
-		 'resume_machine': 'ACPI: Low-level resume complete.*',
-		   'resume_noirq': 'ACPI: Waking up from system sleep state.*',
-		   'resume_early': 'PM: noirq resume of devices complete after.*',
-		         'resume': 'PM: early resume of devices complete after.*',
-		'resume_complete': 'PM: resume of devices complete after.*',
-		    'post_resume': '.*Restarting tasks \.\.\..*',
+		'suspend_prepare': ['PM: Syncing filesystems.*'],
+		        'suspend': ['PM: Entering [a-z]* sleep.*', 'Suspending console.*'],
+		   'suspend_late': ['PM: suspend of devices complete after.*'],
+		  'suspend_noirq': ['PM: late suspend of devices complete after.*'],
+		'suspend_machine': ['PM: noirq suspend of devices complete after.*'],
+		 'resume_machine': ['ACPI: Low-level resume complete.*'],
+		   'resume_noirq': ['ACPI: Waking up from system sleep state.*'],
+		   'resume_early': ['PM: noirq resume of devices complete after.*'],
+		         'resume': ['PM: early resume of devices complete after.*'],
+		'resume_complete': ['PM: resume of devices complete after.*'],
+		    'post_resume': ['.*Restarting tasks \.\.\..*'],
 	}
 	if(sysvals.suspendmode == 'standby'):
-		dm['resume_machine'] = 'PM: Restoring platform NVS memory'
+		dm['resume_machine'] = ['PM: Restoring platform NVS memory']
 	elif(sysvals.suspendmode == 'disk'):
-		dm['suspend_late'] = 'PM: freeze of devices complete after.*'
-		dm['suspend_noirq'] = 'PM: late freeze of devices complete after.*'
-		dm['suspend_machine'] = 'PM: noirq freeze of devices complete after.*'
-		dm['resume_machine'] = 'PM: Restoring platform NVS memory'
-		dm['resume_early'] = 'PM: noirq restore of devices complete after.*'
-		dm['resume'] = 'PM: early restore of devices complete after.*'
-		dm['resume_complete'] = 'PM: restore of devices complete after.*'
+		dm['suspend_late'] = ['PM: freeze of devices complete after.*']
+		dm['suspend_noirq'] = ['PM: late freeze of devices complete after.*']
+		dm['suspend_machine'] = ['PM: noirq freeze of devices complete after.*']
+		dm['resume_machine'] = ['PM: Restoring platform NVS memory']
+		dm['resume_early'] = ['PM: noirq restore of devices complete after.*']
+		dm['resume'] = ['PM: early restore of devices complete after.*']
+		dm['resume_complete'] = ['PM: restore of devices complete after.*']
 	elif(sysvals.suspendmode == 'freeze'):
-		dm['resume_machine'] = 'ACPI: resume from mwait'
+		dm['resume_machine'] = ['ACPI: resume from mwait']
 
 	# action table (expected events that occur and show up in dmesg)
 	at = {
@@ -3264,81 +3290,89 @@ def parseKernelLog(data):
 		else:
 			continue
 
+		# check for a phase change line
+		phasechange = False
+		for p in dm:
+			for s in dm[p]:
+				if(re.match(s, msg)):
+					phasechange, phase = True, p
+					break
+
 		# hack for determining resume_machine end for freeze
 		if(not sysvals.usetraceevents and sysvals.suspendmode == 'freeze' \
 			and phase == 'resume_machine' and \
 			re.match('calling  (?P<f>.*)\+ @ .*, parent: .*', msg)):
-			data.dmesg['resume_machine']['end'] = ktime
-			phase = 'resume_noirq'
-			data.dmesg[phase]['start'] = ktime
-
-		# suspend start
-		if(re.match(dm['suspend_prepare'], msg)):
-			phase = 'suspend_prepare'
-			data.dmesg[phase]['start'] = ktime
-			data.setStart(ktime)
-			data.tKernSus = ktime
-		# suspend start
-		elif(re.match(dm['suspend'], msg)):
-			data.dmesg['suspend_prepare']['end'] = ktime
-			phase = 'suspend'
-			data.dmesg[phase]['start'] = ktime
-		# suspend_late start
-		elif(re.match(dm['suspend_late'], msg)):
-			data.dmesg['suspend']['end'] = ktime
-			phase = 'suspend_late'
-			data.dmesg[phase]['start'] = ktime
-		# suspend_noirq start
-		elif(re.match(dm['suspend_noirq'], msg)):
-			data.dmesg['suspend_late']['end'] = ktime
-			phase = 'suspend_noirq'
-			data.dmesg[phase]['start'] = ktime
-		# suspend_machine start
-		elif(re.match(dm['suspend_machine'], msg)):
-			data.dmesg['suspend_noirq']['end'] = ktime
-			phase = 'suspend_machine'
-			data.dmesg[phase]['start'] = ktime
-		# resume_machine start
-		elif(re.match(dm['resume_machine'], msg)):
-			if(sysvals.suspendmode in ['freeze', 'standby']):
-				data.tSuspended = prevktime
-				data.dmesg['suspend_machine']['end'] = prevktime
-			else:
-				data.tSuspended = ktime
-				data.dmesg['suspend_machine']['end'] = ktime
-			phase = 'resume_machine'
-			data.tResumed = ktime
-			data.tLow = data.tResumed - data.tSuspended
-			data.dmesg[phase]['start'] = ktime
-		# resume_noirq start
-		elif(re.match(dm['resume_noirq'], msg)):
-			data.dmesg['resume_machine']['end'] = ktime
+			data.setPhase(phase, ktime, False)
 			phase = 'resume_noirq'
-			data.dmesg[phase]['start'] = ktime
-		# resume_early start
-		elif(re.match(dm['resume_early'], msg)):
-			data.dmesg['resume_noirq']['end'] = ktime
-			phase = 'resume_early'
-			data.dmesg[phase]['start'] = ktime
-		# resume start
-		elif(re.match(dm['resume'], msg)):
-			data.dmesg['resume_early']['end'] = ktime
-			phase = 'resume'
-			data.dmesg[phase]['start'] = ktime
-		# resume complete start
-		elif(re.match(dm['resume_complete'], msg)):
-			data.dmesg['resume']['end'] = ktime
-			phase = 'resume_complete'
-			data.dmesg[phase]['start'] = ktime
-		# post resume start
-		elif(re.match(dm['post_resume'], msg)):
-			data.dmesg['resume_complete']['end'] = ktime
-			data.setEnd(ktime)
-			data.tKernRes = ktime
-			break
+			data.setPhase(phase, ktime, True)
+
+		if phasechange:
+			if phase == 'suspend_prepare':
+				data.setPhase(phase, ktime, True)
+				data.setStart(ktime)
+				data.tKernSus = ktime
+			elif phase == 'suspend':
+				lp = data.lastPhase()
+				if lp:
+					data.setPhase(lp, ktime, False)
+				data.setPhase(phase, ktime, True)
+			elif phase == 'suspend_late':
+				lp = data.lastPhase()
+				if lp:
+					data.setPhase(lp, ktime, False)
+				data.setPhase(phase, ktime, True)
+			elif phase == 'suspend_noirq':
+				lp = data.lastPhase()
+				if lp:
+					data.setPhase(lp, ktime, False)
+				data.setPhase(phase, ktime, True)
+			elif phase == 'suspend_machine':
+				lp = data.lastPhase()
+				if lp:
+					data.setPhase(lp, ktime, False)
+				data.setPhase(phase, ktime, True)
+			elif phase == 'resume_machine':
+				lp = data.lastPhase()
+				if(sysvals.suspendmode in ['freeze', 'standby']):
+					data.tSuspended = prevktime
+					if lp:
+						data.setPhase(lp, prevktime, False)
+				else:
+					data.tSuspended = ktime
+					if lp:
+						data.setPhase(lp, prevktime, False)
+				data.tResumed = ktime
+				data.setPhase(phase, ktime, True)
+			elif phase == 'resume_noirq':
+				lp = data.lastPhase()
+				if lp:
+					data.setPhase(lp, ktime, False)
+				data.setPhase(phase, ktime, True)
+			elif phase == 'resume_early':
+				lp = data.lastPhase()
+				if lp:
+					data.setPhase(lp, ktime, False)
+				data.setPhase(phase, ktime, True)
+			elif phase == 'resume':
+				lp = data.lastPhase()
+				if lp:
+					data.setPhase(lp, ktime, False)
+				data.setPhase(phase, ktime, True)
+			elif phase == 'resume_complete':
+				lp = data.lastPhase()
+				if lp:
+					data.setPhase(lp, ktime, False)
+				data.setPhase(phase, ktime, True)
+			elif phase == 'post_resume':
+				lp = data.lastPhase()
+				if lp:
+					data.setPhase(lp, ktime, False)
+				data.setEnd(ktime)
+				data.tKernRes = ktime
+				break
 
 		# -- device callbacks --
-		if(phase in data.phases):
+		if(phase in data.sortedPhases()):
 			# device init call
 			if(re.match('calling  (?P<f>.*)\+ @ .*, parent: .*', msg)):
 				sm = re.match('calling  (?P<f>.*)\+ @ '+\
@@ -3396,24 +3430,31 @@ def parseKernelLog(data):
 				actions[cpu].append({'begin': cpu_start, 'end': ktime})
 				cpu_start = ktime
 		prevktime = ktime
+	data.initDevicegroups()
 
 	# fill in any missing phases
-	lp = data.phases[0]
-	for p in data.phases:
-		if(data.dmesg[p]['start'] < 0 and data.dmesg[p]['end'] < 0):
-			print('WARNING: phase "%s" is missing, something went wrong!' % p)
-			print('    In %s, this dmesg line denotes the start of %s:' % \
-				(sysvals.suspendmode, p))
-			print('        "%s"' % dm[p])
-		if(data.dmesg[p]['start'] < 0):
-			data.dmesg[p]['start'] = data.dmesg[lp]['end']
-			if(p == 'resume_machine'):
-				data.tSuspended = data.dmesg[lp]['end']
-				data.tResumed = data.dmesg[lp]['end']
-				data.tLow = 0
-		if(data.dmesg[p]['end'] < 0):
-			data.dmesg[p]['end'] = data.dmesg[p]['start']
+	phasedef = data.phasedef
+	terr, lp = '', 'suspend_prepare'
+	for p in sorted(phasedef, key=lambda k:phasedef[k]['order']):
+		if p not in data.dmesg:
+			if not terr:
+				pprint('TEST FAILED: %s failed in %s phase' % (sysvals.suspendmode, lp))
+				terr = '%s failed in %s phase' % (sysvals.suspendmode, lp)
+				if data.tSuspended == 0:
+					data.tSuspended = data.dmesg[lp]['end']
+				if data.tResumed == 0:
+					data.tResumed = data.dmesg[lp]['end']
+			sysvals.vprint('WARNING: phase "%s" is missing!' % p)
 		lp = p
+	lp = data.sortedPhases()[0]
+	for p in data.sortedPhases():
+		if(p != lp and not ('machine' in p and 'machine' in lp)):
+			data.dmesg[lp]['end'] = data.dmesg[p]['start']
+		lp = p
+	if data.tSuspended == 0:
+		data.tSuspended = data.tKernRes
+	if data.tResumed == 0:
+		data.tResumed = data.tSuspended
 
 	# fill in any actions we've found
 	for name in actions:
@@ -3462,7 +3503,7 @@ def addCallgraphs(sv, hf, data):
 	hf.write('<section id="callgraphs" class="callgraph">\n')
 	# write out the ftrace data converted to html
 	num = 0
-	for p in data.phases:
+	for p in data.sortedPhases():
 		if sv.cgphase and p != sv.cgphase:
 			continue
 		list = data.dmesg[p]['list']
@@ -3495,7 +3536,7 @@ def addCallgraphs(sv, hf, data):
 #	 Create summary html file for a series of tests
 # Arguments:
 #	 testruns: array of Data objects from parseTraceLog
-def createHTMLSummarySimple(testruns, htmlfile, folder):
+def createHTMLSummarySimple(testruns, htmlfile, title):
 	# write the html header first (html head, css code, up to body start)
 	html = '<!DOCTYPE html>\n<html>\n<head>\n\
 	<meta http-equiv="content-type" content="text/html; charset=UTF-8">\n\
@@ -3505,7 +3546,7 @@ def createHTMLSummarySimple(testruns, htmlfile, folder):
 		table {width:100%;border-collapse: collapse;}\n\
 		.summary {border:1px solid;}\n\
 		th {border: 1px solid black;background:#222;color:white;}\n\
-		td {font: 16px "Times New Roman";text-align: center;}\n\
+		td {font: 14px "Times New Roman";text-align: center;}\n\
 		tr.head td {border: 1px solid black;background:#aaa;}\n\
 		tr.alt {background-color:#ddd;}\n\
 		tr.notice {color:red;}\n\
@@ -3521,7 +3562,7 @@ def createHTMLSummarySimple(testruns, htmlfile, folder):
 	iMin, iMed, iMax = [0, 0], [0, 0], [0, 0]
 	num = 0
 	lastmode = ''
-	cnt = {'pass':0, 'fail':0, 'hang':0}
+	cnt = dict()
 	for data in sorted(testruns, key=lambda v:(v['mode'], v['host'], v['kernel'], v['time'])):
 		mode = data['mode']
 		if mode not in list:
@@ -3541,10 +3582,14 @@ def createHTMLSummarySimple(testruns, htmlfile, folder):
 		tVal = [float(data['suspend']), float(data['resume'])]
 		list[mode]['data'].append([data['host'], data['kernel'],
 			data['time'], tVal[0], tVal[1], data['url'], data['result'],
-			data['issues']])
+			data['issues'], data['sus_worst'], data['sus_worsttime'],
+			data['res_worst'], data['res_worsttime']])
 		idx = len(list[mode]['data']) - 1
+		if data['result'] not in cnt:
+			cnt[data['result']] = 1
+		else:
+			cnt[data['result']] += 1
 		if data['result'] == 'pass':
-			cnt['pass'] += 1
 			for i in range(2):
 				tMed[i].append(tVal[i])
 				tAvg[i] += tVal[i]
@@ -3555,10 +3600,6 @@ def createHTMLSummarySimple(testruns, htmlfile, folder):
 					iMax[i] = idx
 					tMax[i] = tVal[i]
 			num += 1
-		elif data['result'] == 'hang':
-			cnt['hang'] += 1
-		elif data['result'] == 'fail':
-			cnt['fail'] += 1
 		lastmode = mode
 	if lastmode and num > 0:
 		for i in range(2):
@@ -3575,7 +3616,7 @@ def createHTMLSummarySimple(testruns, htmlfile, folder):
 	for ilk in sorted(cnt, reverse=True):
 		if cnt[ilk] > 0:
 			desc.append('%d %s' % (cnt[ilk], ilk))
-	html += '<div class="stamp">%s (%d tests: %s)</div>\n' % (folder, len(testruns), ', '.join(desc))
+	html += '<div class="stamp">%s (%d tests: %s)</div>\n' % (title, len(testruns), ', '.join(desc))
 	th = '\t<th>{0}</th>\n'
 	td = '\t<td>{0}</td>\n'
 	tdh = '\t<td{1}>{0}</td>\n'
@@ -3585,11 +3626,14 @@ def createHTMLSummarySimple(testruns, htmlfile, folder):
 	html += '<table class="summary">\n<tr>\n' + th.format('#') +\
 		th.format('Mode') + th.format('Host') + th.format('Kernel') +\
 		th.format('Test Time') + th.format('Result') + th.format('Issues') +\
-		th.format('Suspend') + th.format('Resume') + th.format('Detail') + '</tr>\n'
+		th.format('Suspend') + th.format('Resume') +\
+		th.format('Worst Suspend Device') + th.format('SD Time') +\
+		th.format('Worst Resume Device') + th.format('RD Time') +\
+		th.format('Detail') + '</tr>\n'
 
 	# export list into html
 	head = '<tr class="head"><td>{0}</td><td>{1}</td>'+\
-		'<td colspan=8 class="sus">Suspend Avg={2} '+\
+		'<td colspan=12 class="sus">Suspend Avg={2} '+\
 		'<span class=minval><a href="#s{10}min">Min={3}</a></span> '+\
 		'<span class=medval><a href="#s{10}med">Med={4}</a></span> '+\
 		'<span class=maxval><a href="#s{10}max">Max={5}</a></span> '+\
@@ -3598,7 +3642,7 @@ def createHTMLSummarySimple(testruns, htmlfile, folder):
 		'<span class=medval><a href="#r{10}med">Med={8}</a></span> '+\
 		'<span class=maxval><a href="#r{10}max">Max={9}</a></span></td>'+\
 		'</tr>\n'
-	headnone = '<tr class="head"><td>{0}</td><td>{1}</td><td colspan=8></td></tr>\n'
+	headnone = '<tr class="head"><td>{0}</td><td>{1}</td><td colspan=12></td></tr>\n'
 	for mode in list:
 		# header line for each suspend mode
 		num = 0
@@ -3641,6 +3685,10 @@ def createHTMLSummarySimple(testruns, htmlfile, folder):
 			html += td.format(d[7])										# issues
 			html += tdh.format('%.3f ms' % d[3], tHigh[0]) if d[3] else td.format('')	# suspend
 			html += tdh.format('%.3f ms' % d[4], tHigh[1]) if d[4] else td.format('')	# resume
+			html += td.format(d[8])										# sus_worst
+			html += td.format('%.3f ms' % d[9])	if d[9] else td.format('')		# sus_worst time
+			html += td.format(d[10])									# res_worst
+			html += td.format('%.3f ms' % d[11]) if d[11] else td.format('')	# res_worst time
 			html += tdlink.format(d[5]) if d[5] else td.format('')		# url
 			html += '</tr>\n'
 			num += 1
@@ -3670,14 +3718,15 @@ def ordinal(value):
 #	 True if the html file was created, false if it failed
 def createHTML(testruns, testfail):
 	if len(testruns) < 1:
-		print('ERROR: Not enough test data to build a timeline')
+		pprint('ERROR: Not enough test data to build a timeline')
 		return
 
 	kerror = False
 	for data in testruns:
 		if data.kerror:
 			kerror = True
-		data.normalizeTime(testruns[-1].tSuspended)
+		if(sysvals.suspendmode in ['freeze', 'standby']):
+			data.trimFreezeTime(testruns[-1].tSuspended)
 
 	# html function templates
 	html_error = '<div id="{1}" title="kernel error/warning" class="err" style="right:{0}%">{2}&rarr;</div>\n'
@@ -3721,8 +3770,8 @@ def createHTML(testruns, testfail):
 		sktime, rktime = data.getTimeValues()
 		if(tTotal == 0):
 			doError('No timeline data')
-		if(data.tLow > 0):
-			low_time = '%.0f'%(data.tLow*1000)
+		if(len(data.tLow) > 0):
+			low_time = '|'.join(data.tLow)
 		if sysvals.suspendmode == 'command':
 			run_time = '%.0f'%((data.end-data.start)*1000)
 			if sysvals.testcommand:
@@ -3743,7 +3792,7 @@ def createHTML(testruns, testfail):
 			if(len(testruns) > 1):
 				testdesc1 = testdesc2 = ordinal(data.testnumber+1)
 				testdesc2 += ' '
-			if(data.tLow == 0):
+			if(len(data.tLow) == 0):
 				thtml = html_timetotal.format(suspend_time, \
 					resume_time, testdesc1, stitle, rtitle)
 			else:
@@ -3762,7 +3811,7 @@ def createHTML(testruns, testfail):
 			rtitle = 'time from firmware mode to return from kernel enter_state(%s) [kernel time only]' % sysvals.suspendmode
 			if(len(testruns) > 1):
 				testdesc = ordinal(data.testnumber+1)+' '+testdesc
-			if(data.tLow == 0):
+			if(len(data.tLow) == 0):
 				thtml = html_timetotal.format(suspend_time, \
 					resume_time, testdesc, stitle, rtitle)
 			else:
@@ -3820,15 +3869,14 @@ def createHTML(testruns, testfail):
 
 	# draw the full timeline
 	devtl.createZoomBox(sysvals.suspendmode, len(testruns))
-	phases = {'suspend':[],'resume':[]}
-	for phase in data.dmesg:
-		if 'resume' in phase:
-			phases['resume'].append(phase)
-		else:
-			phases['suspend'].append(phase)
-
-	# draw each test run chronologically
 	for data in testruns:
+		# draw each test run and block chronologically
+		phases = {'suspend':[],'resume':[]}
+		for phase in data.sortedPhases():
+			if data.dmesg[phase]['start'] >= data.tSuspended:
+				phases['resume'].append(phase)
+			else:
+				phases['suspend'].append(phase)
 		# now draw the actual timeline blocks
 		for dir in phases:
 			# draw suspend and resume blocks separately
@@ -3850,7 +3898,7 @@ def createHTML(testruns, testfail):
 				continue
 			width = '%f' % (((mTotal*100.0)-sysvals.srgap/2)/tTotal)
 			devtl.html += devtl.html_tblock.format(bname, left, width, devtl.scaleH)
-			for b in sorted(phases[dir]):
+			for b in phases[dir]:
 				# draw the phase color background
 				phase = data.dmesg[b]
 				length = phase['end']-phase['start']
@@ -3865,7 +3913,7 @@ def createHTML(testruns, testfail):
 				id = '%d_%d' % (idx1, idx2)
 				right = '%f' % (((mMax-t)*100.0)/mTotal)
 				devtl.html += html_error.format(right, id, type)
-			for b in sorted(phases[dir]):
+			for b in phases[dir]:
 				# draw the devices for this phase
 				phaselist = data.dmesg[b]['list']
 				for d in data.tdevlist[b]:
@@ -3942,19 +3990,17 @@ def createHTML(testruns, testfail):
 
 	# draw a legend which describes the phases by color
 	if sysvals.suspendmode != 'command':
-		data = testruns[-1]
+		phasedef = testruns[-1].phasedef
 		devtl.html += '<div class="legend">\n'
-		pdelta = 100.0/len(data.phases)
+		pdelta = 100.0/len(phasedef.keys())
 		pmargin = pdelta / 4.0
-		for phase in data.phases:
-			tmp = phase.split('_')
-			id = tmp[0][0]
-			if(len(tmp) > 1):
-				id += tmp[1][0]
-			order = '%.2f' % ((data.dmesg[phase]['order'] * pdelta) + pmargin)
+		for phase in sorted(phasedef, key=lambda k:phasedef[k]['order']):
+			id, p = '', phasedef[phase]
+			for word in phase.split('_'):
+				id += word[0]
+			order = '%.2f' % ((p['order'] * pdelta) + pmargin)
 			name = string.replace(phase, '_', ' &nbsp;')
-			devtl.html += devtl.html_legend.format(order, \
-				data.dmesg[phase]['color'], name, id)
+			devtl.html += devtl.html_legend.format(order, p['color'], name, id)
 		devtl.html += '</div>\n'
 
 	hf = open(sysvals.htmlfile, 'w')
@@ -3970,7 +4016,7 @@ def createHTML(testruns, testfail):
 		pscolor = 'linear-gradient(to top left, #ccc, #eee)'
 		hf.write(devtl.html_phaselet.format('pre_suspend_process', \
 			'0', '0', pscolor))
-		for b in data.phases:
+		for b in data.sortedPhases():
 			phase = data.dmesg[b]
 			length = phase['end']-phase['start']
 			left = '%.3f' % (((phase['start']-t0)*100.0)/tTotal)
@@ -4522,18 +4568,18 @@ def setRuntimeSuspend(before=True):
 			sv.rstgt, sv.rsval, sv.rsdir = 'on', 'auto', 'enabled'
 		else:
 			sv.rstgt, sv.rsval, sv.rsdir = 'auto', 'on', 'disabled'
-		print('CONFIGURING RUNTIME SUSPEND...')
+		pprint('CONFIGURING RUNTIME SUSPEND...')
 		sv.rslist = deviceInfo(sv.rstgt)
 		for i in sv.rslist:
 			sv.setVal(sv.rsval, i)
-		print('runtime suspend %s on all devices (%d changed)' % (sv.rsdir, len(sv.rslist)))
-		print('waiting 5 seconds...')
+		pprint('runtime suspend %s on all devices (%d changed)' % (sv.rsdir, len(sv.rslist)))
+		pprint('waiting 5 seconds...')
 		time.sleep(5)
 	else:
 		# runtime suspend re-enable or re-disable
 		for i in sv.rslist:
 			sv.setVal(sv.rstgt, i)
-		print('runtime suspend settings restored on %d devices' % len(sv.rslist))
+		pprint('runtime suspend settings restored on %d devices' % len(sv.rslist))
 
 # Function: executeSuspend
 # Description:
@@ -4542,25 +4588,21 @@ def setRuntimeSuspend(before=True):
 def executeSuspend():
 	pm = ProcessMonitor()
 	tp = sysvals.tpath
-	fwdata = []
+	testdata = []
+	battery = True if getBattery() else False
 	# run these commands to prepare the system for suspend
 	if sysvals.display:
-		if sysvals.display > 0:
-			print('TURN DISPLAY ON')
-			call('xset -d :0.0 dpms force suspend', shell=True)
-			call('xset -d :0.0 dpms force on', shell=True)
-		else:
-			print('TURN DISPLAY OFF')
-			call('xset -d :0.0 dpms force suspend', shell=True)
+		pprint('SET DISPLAY TO %s' % sysvals.display.upper())
+		displayControl(sysvals.display)
 		time.sleep(1)
 	if sysvals.sync:
-		print('SYNCING FILESYSTEMS')
+		pprint('SYNCING FILESYSTEMS')
 		call('sync', shell=True)
 	# mark the start point in the kernel ring buffer just as we start
 	sysvals.initdmesg()
 	# start ftrace
 	if(sysvals.usecallgraph or sysvals.usetraceevents):
-		print('START TRACING')
+		pprint('START TRACING')
 		sysvals.fsetVal('1', 'tracing_on')
 		if sysvals.useprocmon:
 			pm.start()
@@ -4573,15 +4615,16 @@ def executeSuspend():
 			sysvals.fsetVal('WAIT END', 'trace_marker')
 		# start message
 		if sysvals.testcommand != '':
-			print('COMMAND START')
+			pprint('COMMAND START')
 		else:
 			if(sysvals.rtcwake):
-				print('SUSPEND START')
+				pprint('SUSPEND START')
 			else:
-				print('SUSPEND START (press a key to resume)')
+				pprint('SUSPEND START (press a key to resume)')
+		bat1 = getBattery() if battery else False
 		# set rtcwake
 		if(sysvals.rtcwake):
-			print('will issue an rtcwake in %d seconds' % sysvals.rtcwaketime)
+			pprint('will issue an rtcwake in %d seconds' % sysvals.rtcwaketime)
 			sysvals.rtcWakeAlarmOn()
 		# start of suspend trace marker
 		if(sysvals.usecallgraph or sysvals.usetraceevents):
@@ -4592,8 +4635,11 @@ def executeSuspend():
 			time.sleep(sysvals.predelay/1000.0)
 			sysvals.fsetVal('WAIT END', 'trace_marker')
 		# initiate suspend or command
+		tdata = {'error': ''}
 		if sysvals.testcommand != '':
-			call(sysvals.testcommand+' 2>&1', shell=True);
+			res = call(sysvals.testcommand+' 2>&1', shell=True);
+			if res != 0:
+				tdata['error'] = 'cmd returned %d' % res
 		else:
 			mode = sysvals.suspendmode
 			if sysvals.memmode and os.path.exists(sysvals.mempowerfile):
@@ -4601,13 +4647,18 @@ def executeSuspend():
 				pf = open(sysvals.mempowerfile, 'w')
 				pf.write(sysvals.memmode)
 				pf.close()
+			if sysvals.diskmode and os.path.exists(sysvals.diskpowerfile):
+				mode = 'disk'
+				pf = open(sysvals.diskpowerfile, 'w')
+				pf.write(sysvals.diskmode)
+				pf.close()
 			pf = open(sysvals.powerfile, 'w')
 			pf.write(mode)
 			# execution will pause here
 			try:
 				pf.close()
-			except:
-				pass
+			except Exception as e:
+				tdata['error'] = str(e)
 		if(sysvals.rtcwake):
 			sysvals.rtcWakeAlarmOff()
 		# postdelay delay
@@ -4616,27 +4667,33 @@ def executeSuspend():
 			time.sleep(sysvals.postdelay/1000.0)
 			sysvals.fsetVal('WAIT END', 'trace_marker')
 		# return from suspend
-		print('RESUME COMPLETE')
+		pprint('RESUME COMPLETE')
 		if(sysvals.usecallgraph or sysvals.usetraceevents):
 			sysvals.fsetVal('RESUME COMPLETE', 'trace_marker')
 		if(sysvals.suspendmode == 'mem' or sysvals.suspendmode == 'command'):
-			fwdata.append(getFPDT(False))
+			tdata['fw'] = getFPDT(False)
+		bat2 = getBattery() if battery else False
+		if battery and bat1 and bat2:
+			tdata['bat'] = (bat1, bat2)
+		testdata.append(tdata)
 	# stop ftrace
 	if(sysvals.usecallgraph or sysvals.usetraceevents):
 		if sysvals.useprocmon:
 			pm.stop()
 		sysvals.fsetVal('0', 'tracing_on')
-		print('CAPTURING TRACE')
-		op = sysvals.writeDatafileHeader(sysvals.ftracefile, fwdata)
+	# grab a copy of the dmesg output
+	pprint('CAPTURING DMESG')
+	sysvals.getdmesg(testdata)
+	# grab a copy of the ftrace output
+	if(sysvals.usecallgraph or sysvals.usetraceevents):
+		pprint('CAPTURING TRACE')
+		op = sysvals.writeDatafileHeader(sysvals.ftracefile, testdata)
 		fp = open(tp+'trace', 'r')
 		for line in fp:
 			op.write(line)
 		op.close()
 		sysvals.fsetVal('', 'trace')
 		devProps()
-	# grab a copy of the dmesg output
-	print('CAPTURING DMESG')
-	sysvals.getdmesg(fwdata)
 
 def readFile(file):
 	if os.path.islink(file):
@@ -4673,15 +4730,15 @@ def yesno(val):
 #	 a list of USB device names to sysvals for better timeline readability
 def deviceInfo(output=''):
 	if not output:
-		print('LEGEND')
-		print('---------------------------------------------------------------------------------------------')
-		print('  A = async/sync PM queue (A/S)               C = runtime active children')
-		print('  R = runtime suspend enabled/disabled (E/D)  rACTIVE = runtime active (min/sec)')
-		print('  S = runtime status active/suspended (A/S)   rSUSPEND = runtime suspend (min/sec)')
-		print('  U = runtime usage count')
-		print('---------------------------------------------------------------------------------------------')
-		print('DEVICE                     NAME                       A R S U C    rACTIVE   rSUSPEND')
-		print('---------------------------------------------------------------------------------------------')
+		pprint('LEGEND\n'\
+		'---------------------------------------------------------------------------------------------\n'\
+		'  A = async/sync PM queue (A/S)               C = runtime active children\n'\
+		'  R = runtime suspend enabled/disabled (E/D)  rACTIVE = runtime active (min/sec)\n'\
+		'  S = runtime status active/suspended (A/S)   rSUSPEND = runtime suspend (min/sec)\n'\
+		'  U = runtime usage count\n'\
+		'---------------------------------------------------------------------------------------------\n'\
+		'DEVICE                     NAME                       A R S U C    rACTIVE   rSUSPEND\n'\
+		'---------------------------------------------------------------------------------------------')
 
 	res = []
 	tgtval = 'runtime_status'
@@ -4766,7 +4823,7 @@ def devProps(data=0):
 			alreadystamped = True
 			continue
 		# determine the trace data type (required for further parsing)
-		m = re.match(sysvals.tracertypefmt, line)
+		m = re.match(tp.tracertypefmt, line)
 		if(m):
 			tp.setTracerType(m.group('t'))
 			continue
@@ -4870,6 +4927,11 @@ def getModes():
 		fp.close()
 		if 'mem' in modes and not deep:
 			modes.remove('mem')
+	if('disk' in modes and os.path.exists(sysvals.diskpowerfile)):
+		fp = open(sysvals.diskpowerfile, 'r')
+		for m in string.split(fp.read()):
+			modes.append('disk-%s' % m.strip('[]'))
+		fp.close()
 	return modes
 
 # Function: dmidecode
@@ -4994,8 +5056,9 @@ def dmidecode(mempath, fatal=False):
 	return out
 
 def getBattery():
-	p = '/sys/class/power_supply'
-	bat = dict()
+	p, charge, bat = '/sys/class/power_supply', 0, {}
+	if not os.path.exists(p):
+		return False
 	for d in os.listdir(p):
 		type = sysvals.getVal(os.path.join(p, d, 'type')).strip().lower()
 		if type != 'battery':
@@ -5003,15 +5066,47 @@ def getBattery():
 		for v in ['status', 'energy_now', 'capacity_now']:
 			bat[v] = sysvals.getVal(os.path.join(p, d, v)).strip().lower()
 		break
-	ac = True
-	if 'status' in bat and 'discharging' in bat['status']:
-		ac = False
-	charge = 0
+	if 'status' not in bat:
+		return False
+	ac = False if 'discharging' in bat['status'] else True
 	for v in ['energy_now', 'capacity_now']:
 		if v in bat and bat[v]:
 			charge = int(bat[v])
 	return (ac, charge)
 
+def displayControl(cmd):
+	xset, ret = 'xset -d :0.0 {0}', 0
+	if sysvals.sudouser:
+		xset = 'sudo -u %s %s' % (sysvals.sudouser, xset)
+	if cmd == 'init':
+		ret = call(xset.format('dpms 0 0 0'), shell=True)
+		if not ret:
+			ret = call(xset.format('s off'), shell=True)
+	elif cmd == 'reset':
+		ret = call(xset.format('s reset'), shell=True)
+	elif cmd in ['on', 'off', 'standby', 'suspend']:
+		b4 = displayControl('stat')
+		ret = call(xset.format('dpms force %s' % cmd), shell=True)
+		if not ret:
+			curr = displayControl('stat')
+			sysvals.vprint('Display Switched: %s -> %s' % (b4, curr))
+			if curr != cmd:
+				sysvals.vprint('WARNING: Display failed to change to %s' % cmd)
+		if ret:
+			sysvals.vprint('WARNING: Display failed to change to %s with xset' % cmd)
+			return ret
+	elif cmd == 'stat':
+		fp = Popen(xset.format('q').split(' '), stdout=PIPE).stdout
+		ret = 'unknown'
+		for line in fp:
+			m = re.match('[\s]*Monitor is (?P<m>.*)', line)
+			if(m and len(m.group('m')) >= 2):
+				out = m.group('m').lower()
+				ret = out[3:] if out[0:2] == 'in' else out
+				break
+		fp.close()
+	return ret
+
 # Function: getFPDT
 # Description:
 #	 Read the acpi bios tables and pull out FPDT, the firmware data
@@ -5055,18 +5150,19 @@ def getFPDT(output):
 
 	table = struct.unpack('4sIBB6s8sI4sI', buf[0:36])
 	if(output):
-		print('')
-		print('Firmware Performance Data Table (%s)' % table[0])
-		print('                  Signature : %s' % table[0])
-		print('               Table Length : %u' % table[1])
-		print('                   Revision : %u' % table[2])
-		print('                   Checksum : 0x%x' % table[3])
-		print('                     OEM ID : %s' % table[4])
-		print('               OEM Table ID : %s' % table[5])
-		print('               OEM Revision : %u' % table[6])
-		print('                 Creator ID : %s' % table[7])
-		print('           Creator Revision : 0x%x' % table[8])
-		print('')
+		pprint('\n'\
+		'Firmware Performance Data Table (%s)\n'\
+		'                  Signature : %s\n'\
+		'               Table Length : %u\n'\
+		'                   Revision : %u\n'\
+		'                   Checksum : 0x%x\n'\
+		'                     OEM ID : %s\n'\
+		'               OEM Table ID : %s\n'\
+		'               OEM Revision : %u\n'\
+		'                 Creator ID : %s\n'\
+		'           Creator Revision : 0x%x\n'\
+		'' % (table[0], table[0], table[1], table[2], table[3],
+			table[4], table[5], table[6], table[7], table[8]))
 
 	if(table[0] != 'FPDT'):
 		if(output):
@@ -5092,22 +5188,24 @@ def getFPDT(output):
 			first = fp.read(8)
 		except:
 			if(output):
-				print('Bad address 0x%x in %s' % (addr, sysvals.mempath))
+				pprint('Bad address 0x%x in %s' % (addr, sysvals.mempath))
 			return [0, 0]
 		rechead = struct.unpack('4sI', first)
 		recdata = fp.read(rechead[1]-8)
 		if(rechead[0] == 'FBPT'):
 			record = struct.unpack('HBBIQQQQQ', recdata)
 			if(output):
-				print('%s (%s)' % (rectype[header[0]], rechead[0]))
-				print('                  Reset END : %u ns' % record[4])
-				print('  OS Loader LoadImage Start : %u ns' % record[5])
-				print(' OS Loader StartImage Start : %u ns' % record[6])
-				print('     ExitBootServices Entry : %u ns' % record[7])
-				print('      ExitBootServices Exit : %u ns' % record[8])
+				pprint('%s (%s)\n'\
+				'                  Reset END : %u ns\n'\
+				'  OS Loader LoadImage Start : %u ns\n'\
+				' OS Loader StartImage Start : %u ns\n'\
+				'     ExitBootServices Entry : %u ns\n'\
+				'      ExitBootServices Exit : %u ns'\
+				'' % (rectype[header[0]], rechead[0], record[4], record[5],
+					record[6], record[7], record[8]))
 		elif(rechead[0] == 'S3PT'):
 			if(output):
-				print('%s (%s)' % (rectype[header[0]], rechead[0]))
+				pprint('%s (%s)' % (rectype[header[0]], rechead[0]))
 			j = 0
 			while(j < len(recdata)):
 				prechead = struct.unpack('HBB', recdata[j:j+4])
@@ -5117,27 +5215,26 @@ def getFPDT(output):
 					record = struct.unpack('IIQQ', recdata[j:j+prechead[1]])
 					fwData[1] = record[2]
 					if(output):
-						print('    %s' % prectype[prechead[0]])
-						print('               Resume Count : %u' % \
-							record[1])
-						print('                 FullResume : %u ns' % \
-							record[2])
-						print('              AverageResume : %u ns' % \
-							record[3])
+						pprint('    %s\n'\
+						'               Resume Count : %u\n'\
+						'                 FullResume : %u ns\n'\
+						'              AverageResume : %u ns'\
+						'' % (prectype[prechead[0]], record[1],
+								record[2], record[3]))
 				elif(prechead[0] == 1):
 					record = struct.unpack('QQ', recdata[j+4:j+prechead[1]])
 					fwData[0] = record[1] - record[0]
 					if(output):
-						print('    %s' % prectype[prechead[0]])
-						print('               SuspendStart : %u ns' % \
-							record[0])
-						print('                 SuspendEnd : %u ns' % \
-							record[1])
-						print('                SuspendTime : %u ns' % \
-							fwData[0])
+						pprint('    %s\n'\
+						'               SuspendStart : %u ns\n'\
+						'                 SuspendEnd : %u ns\n'\
+						'                SuspendTime : %u ns'\
+						'' % (prectype[prechead[0]], record[0],
+								record[1], fwData[0]))
+
 				j += prechead[1]
 		if(output):
-			print('')
+			pprint('')
 		i += header[1]
 	fp.close()
 	return fwData
@@ -5149,26 +5246,26 @@ def getFPDT(output):
 # Output:
 #	 True if the test will work, False if not
 def statusCheck(probecheck=False):
-	status = True
+	status = ''
 
-	print('Checking this system (%s)...' % platform.node())
+	pprint('Checking this system (%s)...' % platform.node())
 
 	# check we have root access
 	res = sysvals.colorText('NO (No features of this tool will work!)')
 	if(sysvals.rootCheck(False)):
 		res = 'YES'
-	print('    have root access: %s' % res)
+	pprint('    have root access: %s' % res)
 	if(res != 'YES'):
-		print('    Try running this script with sudo')
-		return False
+		pprint('    Try running this script with sudo')
+		return 'missing root access'
 
 	# check sysfs is mounted
 	res = sysvals.colorText('NO (No features of this tool will work!)')
 	if(os.path.exists(sysvals.powerfile)):
 		res = 'YES'
-	print('    is sysfs mounted: %s' % res)
+	pprint('    is sysfs mounted: %s' % res)
 	if(res != 'YES'):
-		return False
+		return 'sysfs is missing'
 
 	# check target mode is a valid mode
 	if sysvals.suspendmode != 'command':
@@ -5177,11 +5274,11 @@ def statusCheck(probecheck=False):
 		if(sysvals.suspendmode in modes):
 			res = 'YES'
 		else:
-			status = False
-		print('    is "%s" a valid power mode: %s' % (sysvals.suspendmode, res))
+			status = '%s mode is not supported' % sysvals.suspendmode
+		pprint('    is "%s" a valid power mode: %s' % (sysvals.suspendmode, res))
 		if(res == 'NO'):
-			print('      valid power modes are: %s' % modes)
-			print('      please choose one with -m')
+			pprint('      valid power modes are: %s' % modes)
+			pprint('      please choose one with -m')
 
 	# check if ftrace is available
 	res = sysvals.colorText('NO')
@@ -5189,8 +5286,8 @@ def statusCheck(probecheck=False):
 	if(ftgood):
 		res = 'YES'
 	elif(sysvals.usecallgraph):
-		status = False
-	print('    is ftrace supported: %s' % res)
+		status = 'ftrace is not properly supported'
+	pprint('    is ftrace supported: %s' % res)
 
 	# check if kprobes are available
 	res = sysvals.colorText('NO')
@@ -5199,7 +5296,7 @@ def statusCheck(probecheck=False):
 		res = 'YES'
 	else:
 		sysvals.usedevsrc = False
-	print('    are kprobes supported: %s' % res)
+	pprint('    are kprobes supported: %s' % res)
 
 	# what data source are we using
 	res = 'DMESG'
@@ -5210,15 +5307,15 @@ def statusCheck(probecheck=False):
 				sysvals.usetraceevents = False
 		if(sysvals.usetraceevents):
 			res = 'FTRACE (all trace events found)'
-	print('    timeline data source: %s' % res)
+	pprint('    timeline data source: %s' % res)
 
 	# check if rtcwake
 	res = sysvals.colorText('NO')
 	if(sysvals.rtcpath != ''):
 		res = 'YES'
 	elif(sysvals.rtcwake):
-		status = False
-	print('    is rtcwake supported: %s' % res)
+		status = 'rtcwake is not properly supported'
+	pprint('    is rtcwake supported: %s' % res)
 
 	if not probecheck:
 		return status
@@ -5243,9 +5340,9 @@ def statusCheck(probecheck=False):
 def doError(msg, help=False):
 	if(help == True):
 		printHelp()
-	print('ERROR: %s\n') % msg
+	pprint('ERROR: %s\n' % msg)
 	sysvals.outputResult({'error':msg})
-	sys.exit()
+	sys.exit(1)
 
 # Function: getArgInt
 # Description:
@@ -5286,7 +5383,7 @@ def getArgFloat(name, args, min, max, main=True):
 	return val
 
 def processData(live=False):
-	print('PROCESSING DATA')
+	pprint('PROCESSING DATA')
 	error = ''
 	if(sysvals.usetraceevents):
 		testruns, error = parseTraceLog(live)
@@ -5301,16 +5398,22 @@ def processData(live=False):
 			appendIncompleteTraceLog(testruns)
 	sysvals.vprint('Command:\n    %s' % sysvals.cmdline)
 	for data in testruns:
+		if data.battery:
+			a1, c1, a2, c2 = data.battery
+			s = 'Battery:\n    Before - AC: %s, Charge: %d\n     After - AC: %s, Charge: %d' % \
+				(a1, int(c1), a2, int(c2))
+			sysvals.vprint(s)
 		data.printDetails()
 	if sysvals.cgdump:
 		for data in testruns:
 			data.debugPrint()
-		sys.exit()
+		sys.exit(0)
 	if len(testruns) < 1:
+		pprint('ERROR: Not enough test data to build a timeline')
 		return (testruns, {'error': 'timeline generation failed'})
 	sysvals.vprint('Creating the html timeline (%s)...' % sysvals.htmlfile)
 	createHTML(testruns, error)
-	print('DONE')
+	pprint('DONE')
 	data = testruns[0]
 	stamp = data.stamp
 	stamp['suspend'], stamp['resume'] = data.getTimeValues()
@@ -5335,6 +5438,7 @@ def rerunTest():
 		elif not os.access(sysvals.htmlfile, os.W_OK):
 			doError('missing permission to write to %s' % sysvals.htmlfile)
 	testruns, stamp = processData(False)
+	sysvals.logmsg = ''
 	return stamp
 
 # Function: runTest
@@ -5349,13 +5453,16 @@ def runTest(n=0):
 	executeSuspend()
 	sysvals.cleanupFtrace()
 	if sysvals.skiphtml:
-		sysvals.sudouser(sysvals.testdir)
+		sysvals.sudoUserchown(sysvals.testdir)
 		return
 	testruns, stamp = processData(True)
 	for data in testruns:
 		del data
-	sysvals.sudouser(sysvals.testdir)
+	sysvals.sudoUserchown(sysvals.testdir)
 	sysvals.outputResult(stamp, n)
+	if 'error' in stamp:
+		return 2
+	return 0
 
 def find_in_html(html, start, end, firstonly=True):
 	n, out = 0, []
@@ -5380,15 +5487,87 @@ def find_in_html(html, start, end, firstonly=True):
 		return ''
 	return out
 
+def data_from_html(file, outpath, devlist=False):
+	html = open(file, 'r').read()
+	suspend = find_in_html(html, 'Kernel Suspend', 'ms')
+	resume = find_in_html(html, 'Kernel Resume', 'ms')
+	line = find_in_html(html, '<div class="stamp">', '</div>')
+	stmp = line.split()
+	if not suspend or not resume or len(stmp) != 8:
+		return False
+	try:
+		dt = datetime.strptime(' '.join(stmp[3:]), '%B %d %Y, %I:%M:%S %p')
+	except:
+		return False
+	tstr = dt.strftime('%Y/%m/%d %H:%M:%S')
+	error = find_in_html(html, '<table class="testfail"><tr><td>', '</td>')
+	if error:
+		m = re.match('[a-z]* failed in (?P<p>[a-z0-9_]*) phase', error)
+		if m:
+			result = 'fail in %s' % m.group('p')
+		else:
+			result = 'fail'
+	else:
+		result = 'pass'
+	ilist = []
+	e = find_in_html(html, 'class="err"[\w=":;\.%\- ]*>', '&rarr;</div>', False)
+	for i in list(set(e)):
+		ilist.append('%sx%d' % (i, e.count(i)) if e.count(i) > 1 else i)
+	low = find_in_html(html, 'freeze time: <b>', ' ms</b>')
+	if low and '|' in low:
+		ilist.append('FREEZEx%d' % len(low.split('|')))
+	devices = dict()
+	for line in html.split('\n'):
+		m = re.match(' *<div id=\"[a,0-9]*\" *title=\"(?P<title>.*)\" class=\"thread.*', line)
+		if not m or 'thread kth' in line or 'thread sec' in line:
+			continue
+		m = re.match('(?P<n>.*) \((?P<t>[0-9,\.]*) ms\) (?P<p>.*)', m.group('title'))
+		if not m:
+			continue
+		name, time, phase = m.group('n'), m.group('t'), m.group('p')
+		if ' async' in name or ' sync' in name:
+			name = ' '.join(name.split(' ')[:-1])
+		d = phase.split('_')[0]
+		if d not in devices:
+			devices[d] = dict()
+		if name not in devices[d]:
+			devices[d][name] = 0.0
+		devices[d][name] += float(time)
+	worst  = {'suspend': {'name':'', 'time': 0.0},
+		'resume': {'name':'', 'time': 0.0}}
+	for d in devices:
+		if d not in worst:
+			worst[d] = dict()
+		dev = devices[d]
+		if len(dev.keys()) > 0:
+			n = sorted(dev, key=dev.get, reverse=True)[0]
+			worst[d]['name'], worst[d]['time'] = n, dev[n]
+	data = {
+		'mode': stmp[2],
+		'host': stmp[0],
+		'kernel': stmp[1],
+		'time': tstr,
+		'result': result,
+		'issues': ' '.join(ilist),
+		'suspend': suspend,
+		'resume': resume,
+		'sus_worst': worst['suspend']['name'],
+		'sus_worsttime': worst['suspend']['time'],
+		'res_worst': worst['resume']['name'],
+		'res_worsttime': worst['resume']['time'],
+		'url': os.path.relpath(file, outpath),
+	}
+	if devlist:
+		data['devlist'] = devices
+	return data
+
 # Function: runSummary
 # Description:
 #	 create a summary of tests in a sub-directory
 def runSummary(subdir, local=True, genhtml=False):
 	inpath = os.path.abspath(subdir)
-	outpath = inpath
-	if local:
-		outpath = os.path.abspath('.')
-	print('Generating a summary of folder "%s"' % inpath)
+	outpath = os.path.abspath('.') if local else inpath
+	pprint('Generating a summary of folder "%s"' % inpath)
 	if genhtml:
 		for dirname, dirnames, filenames in os.walk(subdir):
 			sysvals.dmesgfile = sysvals.ftracefile = sysvals.htmlfile = ''
@@ -5400,49 +5579,30 @@ def runSummary(subdir, local=True, genhtml=False):
 			sysvals.setOutputFile()
 			if sysvals.ftracefile and sysvals.htmlfile and \
 				not os.path.exists(sysvals.htmlfile):
-				print('FTRACE: %s' % sysvals.ftracefile)
+				pprint('FTRACE: %s' % sysvals.ftracefile)
 				if sysvals.dmesgfile:
-					print('DMESG : %s' % sysvals.dmesgfile)
+					pprint('DMESG : %s' % sysvals.dmesgfile)
 				rerunTest()
 	testruns = []
+	desc = {'host':[],'mode':[],'kernel':[]}
 	for dirname, dirnames, filenames in os.walk(subdir):
 		for filename in filenames:
 			if(not re.match('.*.html', filename)):
 				continue
-			file = os.path.join(dirname, filename)
-			html = open(file, 'r').read()
-			suspend = find_in_html(html, 'Kernel Suspend', 'ms')
-			resume = find_in_html(html, 'Kernel Resume', 'ms')
-			line = find_in_html(html, '<div class="stamp">', '</div>')
-			stmp = line.split()
-			if not suspend or not resume or len(stmp) != 8:
+			data = data_from_html(os.path.join(dirname, filename), outpath)
+			if(not data):
 				continue
-			try:
-				dt = datetime.strptime(' '.join(stmp[3:]), '%B %d %Y, %I:%M:%S %p')
-			except:
-				continue
-			tstr = dt.strftime('%Y/%m/%d %H:%M:%S')
-			error = find_in_html(html, '<table class="testfail"><tr><td>', '</td>')
-			result = 'fail' if error else 'pass'
-			ilist = []
-			e = find_in_html(html, 'class="err"[\w=":;\.%\- ]*>', '&rarr;</div>', False)
-			for i in list(set(e)):
-				ilist.append('%sx%d' % (i, e.count(i)) if e.count(i) > 1 else i)
-			data = {
-				'mode': stmp[2],
-				'host': stmp[0],
-				'kernel': stmp[1],
-				'time': tstr,
-				'result': result,
-				'issues': ','.join(ilist),
-				'suspend': suspend,
-				'resume': resume,
-				'url': os.path.relpath(file, outpath),
-			}
 			testruns.append(data)
+			for key in desc:
+				if data[key] not in desc[key]:
+					desc[key].append(data[key])
 	outfile = os.path.join(outpath, 'summary.html')
-	print('Summary file: %s' % outfile)
-	createHTMLSummarySimple(testruns, outfile, inpath)
+	pprint('Summary file: %s' % outfile)
+	if len(desc['host']) == len(desc['mode']) == len(desc['kernel']) == 1:
+		title = '%s %s %s' % (desc['host'][0], desc['kernel'][0], desc['mode'][0])
+	else:
+		title = inpath
+	createHTMLSummarySimple(testruns, outfile, title)
 
 # Function: checkArgBool
 # Description:
@@ -5499,13 +5659,10 @@ def configFromFile(file):
 				else:
 					doError('invalid value --> (%s: %s), use "enable/disable"' % (option, value), True)
 			elif(option == 'display'):
-				if value in switchvalues:
-					if value in switchoff:
-						sysvals.display = -1
-					else:
-						sysvals.display = 1
-				else:
-					doError('invalid value --> (%s: %s), use "on/off"' % (option, value), True)
+				disopt = ['on', 'off', 'standby', 'suspend']
+				if value not in disopt:
+					doError('invalid value --> (%s: %s), use %s' % (option, value, disopt), True)
+				sysvals.display = value
 			elif(option == 'gzip'):
 				sysvals.gzip = checkArgBool(option, value)
 			elif(option == 'cgfilter'):
@@ -5521,9 +5678,9 @@ def configFromFile(file):
 				sysvals.cgtest = getArgInt('cgtest', value, 0, 1, False)
 			elif(option == 'cgphase'):
 				d = Data(0)
-				if value not in d.phases:
+				if value not in d.sortedPhases():
 					doError('invalid phase --> (%s: %s), valid phases are %s'\
-						% (option, value, d.phases), True)
+						% (option, value, d.sortedPhases()), True)
 				sysvals.cgphase = value
 			elif(option == 'fadd'):
 				file = sysvals.configFile(value)
@@ -5660,84 +5817,86 @@ def configFromFile(file):
 # Description:
 #	 print out the help text
 def printHelp():
-	print('')
-	print('%s v%s' % (sysvals.title, sysvals.version))
-	print('Usage: sudo sleepgraph <options> <commands>')
-	print('')
-	print('Description:')
-	print('  This tool is designed to assist kernel and OS developers in optimizing')
-	print('  their linux stack\'s suspend/resume time. Using a kernel image built')
-	print('  with a few extra options enabled, the tool will execute a suspend and')
-	print('  capture dmesg and ftrace data until resume is complete. This data is')
-	print('  transformed into a device timeline and an optional callgraph to give')
-	print('  a detailed view of which devices/subsystems are taking the most')
-	print('  time in suspend/resume.')
-	print('')
-	print('  If no specific command is given, the default behavior is to initiate')
-	print('  a suspend/resume and capture the dmesg/ftrace output as an html timeline.')
-	print('')
-	print('  Generates output files in subdirectory: suspend-yymmdd-HHMMSS')
-	print('   HTML output:                    <hostname>_<mode>.html')
-	print('   raw dmesg output:               <hostname>_<mode>_dmesg.txt')
-	print('   raw ftrace output:              <hostname>_<mode>_ftrace.txt')
-	print('')
-	print('Options:')
-	print('   -h           Print this help text')
-	print('   -v           Print the current tool version')
-	print('   -config fn   Pull arguments and config options from file fn')
-	print('   -verbose     Print extra information during execution and analysis')
-	print('   -m mode      Mode to initiate for suspend (default: %s)') % (sysvals.suspendmode)
-	print('   -o name      Overrides the output subdirectory name when running a new test')
-	print('                default: suspend-{date}-{time}')
-	print('   -rtcwake t   Wakeup t seconds after suspend, set t to "off" to disable (default: 15)')
-	print('   -addlogs     Add the dmesg and ftrace logs to the html output')
-	print('   -srgap       Add a visible gap in the timeline between sus/res (default: disabled)')
-	print('   -skiphtml    Run the test and capture the trace logs, but skip the timeline (default: disabled)')
-	print('   -result fn   Export a results table to a text file for parsing.')
-	print('  [testprep]')
-	print('   -sync        Sync the filesystems before starting the test')
-	print('   -rs on/off   Enable/disable runtime suspend for all devices, restore all after test')
-	print('   -display on/off  Turn the display on or off for the test')
-	print('  [advanced]')
-	print('   -gzip        Gzip the trace and dmesg logs to save space')
-	print('   -cmd {s}     Run the timeline over a custom command, e.g. "sync -d"')
-	print('   -proc        Add usermode process info into the timeline (default: disabled)')
-	print('   -dev         Add kernel function calls and threads to the timeline (default: disabled)')
-	print('   -x2          Run two suspend/resumes back to back (default: disabled)')
-	print('   -x2delay t   Include t ms delay between multiple test runs (default: 0 ms)')
-	print('   -predelay t  Include t ms delay before 1st suspend (default: 0 ms)')
-	print('   -postdelay t Include t ms delay after last resume (default: 0 ms)')
-	print('   -mindev ms   Discard all device blocks shorter than ms milliseconds (e.g. 0.001 for us)')
-	print('   -multi n d   Execute <n> consecutive tests at <d> seconds intervals. The outputs will')
-	print('                be created in a new subdirectory with a summary page.')
-	print('  [debug]')
-	print('   -f           Use ftrace to create device callgraphs (default: disabled)')
-	print('   -maxdepth N  limit the callgraph data to N call levels (default: 0=all)')
-	print('   -expandcg    pre-expand the callgraph data in the html output (default: disabled)')
-	print('   -fadd file   Add functions to be graphed in the timeline from a list in a text file')
-	print('   -filter "d1,d2,..." Filter out all but this comma-delimited list of device names')
-	print('   -mincg  ms   Discard all callgraphs shorter than ms milliseconds (e.g. 0.001 for us)')
-	print('   -cgphase P   Only show callgraph data for phase P (e.g. suspend_late)')
-	print('   -cgtest N    Only show callgraph data for test N (e.g. 0 or 1 in an x2 run)')
-	print('   -timeprec N  Number of significant digits in timestamps (0:S, [3:ms], 6:us)')
-	print('   -cgfilter S  Filter the callgraph output in the timeline')
-	print('   -cgskip file Callgraph functions to skip, off to disable (default: cgskip.txt)')
-	print('   -bufsize N   Set trace buffer size to N kilo-bytes (default: all of free memory)')
-	print('')
-	print('Other commands:')
-	print('   -modes       List available suspend modes')
-	print('   -status      Test to see if the system is enabled to run this tool')
-	print('   -fpdt        Print out the contents of the ACPI Firmware Performance Data Table')
-	print('   -battery     Print out battery info (if available)')
-	print('   -sysinfo     Print out system info extracted from BIOS')
-	print('   -devinfo     Print out the pm settings of all devices which support runtime suspend')
-	print('   -flist       Print the list of functions currently being captured in ftrace')
-	print('   -flistall    Print all functions capable of being captured in ftrace')
-	print('   -summary dir Create a summary of tests in this dir [-genhtml builds missing html]')
-	print('  [redo]')
-	print('   -ftrace ftracefile  Create HTML output using ftrace input (used with -dmesg)')
-	print('   -dmesg dmesgfile    Create HTML output using dmesg (used with -ftrace)')
-	print('')
+	pprint('\n%s v%s\n'\
+	'Usage: sudo sleepgraph <options> <commands>\n'\
+	'\n'\
+	'Description:\n'\
+	'  This tool is designed to assist kernel and OS developers in optimizing\n'\
+	'  their linux stack\'s suspend/resume time. Using a kernel image built\n'\
+	'  with a few extra options enabled, the tool will execute a suspend and\n'\
+	'  capture dmesg and ftrace data until resume is complete. This data is\n'\
+	'  transformed into a device timeline and an optional callgraph to give\n'\
+	'  a detailed view of which devices/subsystems are taking the most\n'\
+	'  time in suspend/resume.\n'\
+	'\n'\
+	'  If no specific command is given, the default behavior is to initiate\n'\
+	'  a suspend/resume and capture the dmesg/ftrace output as an html timeline.\n'\
+	'\n'\
+	'  Generates output files in subdirectory: suspend-yymmdd-HHMMSS\n'\
+	'   HTML output:                    <hostname>_<mode>.html\n'\
+	'   raw dmesg output:               <hostname>_<mode>_dmesg.txt\n'\
+	'   raw ftrace output:              <hostname>_<mode>_ftrace.txt\n'\
+	'\n'\
+	'Options:\n'\
+	'   -h           Print this help text\n'\
+	'   -v           Print the current tool version\n'\
+	'   -config fn   Pull arguments and config options from file fn\n'\
+	'   -verbose     Print extra information during execution and analysis\n'\
+	'   -m mode      Mode to initiate for suspend (default: %s)\n'\
+	'   -o name      Overrides the output subdirectory name when running a new test\n'\
+	'                default: suspend-{date}-{time}\n'\
+	'   -rtcwake t   Wakeup t seconds after suspend, set t to "off" to disable (default: 15)\n'\
+	'   -addlogs     Add the dmesg and ftrace logs to the html output\n'\
+	'   -srgap       Add a visible gap in the timeline between sus/res (default: disabled)\n'\
+	'   -skiphtml    Run the test and capture the trace logs, but skip the timeline (default: disabled)\n'\
+	'   -result fn   Export a results table to a text file for parsing.\n'\
+	'  [testprep]\n'\
+	'   -sync        Sync the filesystems before starting the test\n'\
+	'   -rs on/off   Enable/disable runtime suspend for all devices, restore all after test\n'\
+	'   -display m   Change the display mode to m for the test (on/off/standby/suspend)\n'\
+	'  [advanced]\n'\
+	'   -gzip        Gzip the trace and dmesg logs to save space\n'\
+	'   -cmd {s}     Run the timeline over a custom command, e.g. "sync -d"\n'\
+	'   -proc        Add usermode process info into the timeline (default: disabled)\n'\
+	'   -dev         Add kernel function calls and threads to the timeline (default: disabled)\n'\
+	'   -x2          Run two suspend/resumes back to back (default: disabled)\n'\
+	'   -x2delay t   Include t ms delay between multiple test runs (default: 0 ms)\n'\
+	'   -predelay t  Include t ms delay before 1st suspend (default: 0 ms)\n'\
+	'   -postdelay t Include t ms delay after last resume (default: 0 ms)\n'\
+	'   -mindev ms   Discard all device blocks shorter than ms milliseconds (e.g. 0.001 for us)\n'\
+	'   -multi n d   Execute <n> consecutive tests at <d> seconds intervals. The outputs will\n'\
+	'                be created in a new subdirectory with a summary page.\n'\
+	'  [debug]\n'\
+	'   -f           Use ftrace to create device callgraphs (default: disabled)\n'\
+	'   -maxdepth N  limit the callgraph data to N call levels (default: 0=all)\n'\
+	'   -expandcg    pre-expand the callgraph data in the html output (default: disabled)\n'\
+	'   -fadd file   Add functions to be graphed in the timeline from a list in a text file\n'\
+	'   -filter "d1,d2,..." Filter out all but this comma-delimited list of device names\n'\
+	'   -mincg  ms   Discard all callgraphs shorter than ms milliseconds (e.g. 0.001 for us)\n'\
+	'   -cgphase P   Only show callgraph data for phase P (e.g. suspend_late)\n'\
+	'   -cgtest N    Only show callgraph data for test N (e.g. 0 or 1 in an x2 run)\n'\
+	'   -timeprec N  Number of significant digits in timestamps (0:S, [3:ms], 6:us)\n'\
+	'   -cgfilter S  Filter the callgraph output in the timeline\n'\
+	'   -cgskip file Callgraph functions to skip, off to disable (default: cgskip.txt)\n'\
+	'   -bufsize N   Set trace buffer size to N kilo-bytes (default: all of free memory)\n'\
+	'   -devdump     Print out all the raw device data for each phase\n'\
+	'   -cgdump      Print out all the raw callgraph data\n'\
+	'\n'\
+	'Other commands:\n'\
+	'   -modes       List available suspend modes\n'\
+	'   -status      Test to see if the system is enabled to run this tool\n'\
+	'   -fpdt        Print out the contents of the ACPI Firmware Performance Data Table\n'\
+	'   -battery     Print out battery info (if available)\n'\
+	'   -x<mode>     Test xset by toggling the given mode (on/off/standby/suspend)\n'\
+	'   -sysinfo     Print out system info extracted from BIOS\n'\
+	'   -devinfo     Print out the pm settings of all devices which support runtime suspend\n'\
+	'   -flist       Print the list of functions currently being captured in ftrace\n'\
+	'   -flistall    Print all functions capable of being captured in ftrace\n'\
+	'   -summary dir Create a summary of tests in this dir [-genhtml builds missing html]\n'\
+	'  [redo]\n'\
+	'   -ftrace ftracefile  Create HTML output using ftrace input (used with -dmesg)\n'\
+	'   -dmesg dmesgfile    Create HTML output using dmesg (used with -ftrace)\n'\
+	'' % (sysvals.title, sysvals.version, sysvals.suspendmode))
 	return True
 
 # ----------------- MAIN --------------------
@@ -5745,7 +5904,9 @@ def printHelp():
 if __name__ == '__main__':
 	genhtml = False
 	cmd = ''
-	simplecmds = ['-sysinfo', '-modes', '-fpdt', '-flist', '-flistall', '-devinfo', '-status', '-battery']
+	simplecmds = ['-sysinfo', '-modes', '-fpdt', '-flist', '-flistall',
+		'-devinfo', '-status', '-battery', '-xon', '-xoff', '-xstandby',
+		'-xsuspend', '-xinit', '-xreset', '-xstat']
 	if '-f' in sys.argv:
 		sysvals.cgskip = sysvals.configFile('cgskip.txt')
 	# loop through the command line arguments
@@ -5763,10 +5924,10 @@ if __name__ == '__main__':
 			cmd = arg[1:]
 		elif(arg == '-h'):
 			printHelp()
-			sys.exit()
+			sys.exit(0)
 		elif(arg == '-v'):
-			print("Version %s" % sysvals.version)
-			sys.exit()
+			pprint("Version %s" % sysvals.version)
+			sys.exit(0)
 		elif(arg == '-x2'):
 			sysvals.execcount = 2
 		elif(arg == '-x2delay'):
@@ -5781,10 +5942,16 @@ if __name__ == '__main__':
 			sysvals.skiphtml = True
 		elif(arg == '-cgdump'):
 			sysvals.cgdump = True
+		elif(arg == '-devdump'):
+			sysvals.devdump = True
 		elif(arg == '-genhtml'):
 			genhtml = True
 		elif(arg == '-addlogs'):
 			sysvals.dmesglog = sysvals.ftracelog = True
+		elif(arg == '-addlogdmesg'):
+			sysvals.dmesglog = True
+		elif(arg == '-addlogftrace'):
+			sysvals.ftracelog = True
 		elif(arg == '-verbose'):
 			sysvals.verbose = True
 		elif(arg == '-proc'):
@@ -5811,14 +5978,11 @@ if __name__ == '__main__':
 			try:
 				val = args.next()
 			except:
-				doError('-display requires "on" or "off"', True)
-			if val.lower() in switchvalues:
-				if val.lower() in switchoff:
-					sysvals.display = -1
-				else:
-					sysvals.display = 1
-			else:
-				doError('invalid option: %s, use "on/off"' % val, True)
+				doError('-display requires an mode value', True)
+			disopt = ['on', 'off', 'standby', 'suspend']
+			if val.lower() not in disopt:
+				doError('valid display mode values are %s' % disopt, True)
+			sysvals.display = val.lower()
 		elif(arg == '-maxdepth'):
 			sysvals.max_graph_depth = getArgInt('-maxdepth', args, 0, 1000)
 		elif(arg == '-rtcwake'):
@@ -5847,9 +6011,9 @@ if __name__ == '__main__':
 			except:
 				doError('No phase name supplied', True)
 			d = Data(0)
-			if val not in d.phases:
+			if val not in d.phasedef:
 				doError('invalid phase --> (%s: %s), valid phases are %s'\
-					% (arg, val, d.phases), True)
+					% (arg, val, d.phasedef.keys()), True)
 			sysvals.cgphase = val
 		elif(arg == '-cgfilter'):
 			try:
@@ -5951,6 +6115,7 @@ if __name__ == '__main__':
 			except:
 				doError('No result file supplied', True)
 			sysvals.result = val
+			sysvals.signalHandlerInit()
 		else:
 			doError('Invalid argument: '+arg, True)
 
@@ -5975,12 +6140,20 @@ if __name__ == '__main__':
 
 	# just run a utility command and exit
 	if(cmd != ''):
+		ret = 0
 		if(cmd == 'status'):
-			statusCheck(True)
+			if not statusCheck(True):
+				ret = 1
 		elif(cmd == 'fpdt'):
-			getFPDT(True)
+			if not getFPDT(True):
+				ret = 1
 		elif(cmd == 'battery'):
-			print 'AC Connect: %s\nCharge: %d' % getBattery()
+			out = getBattery()
+			if out:
+				pprint('AC Connect    : %s\nBattery Charge: %d' % out)
+			else:
+				pprint('no battery found')
+				ret = 1
 		elif(cmd == 'sysinfo'):
 			sysvals.printSystemInfo(True)
 		elif(cmd == 'devinfo'):
@@ -5993,25 +6166,28 @@ if __name__ == '__main__':
 			sysvals.getFtraceFilterFunctions(False)
 		elif(cmd == 'summary'):
 			runSummary(sysvals.outdir, True, genhtml)
-		sys.exit()
+		elif(cmd in ['xon', 'xoff', 'xstandby', 'xsuspend', 'xinit', 'xreset']):
+			sysvals.verbose = True
+			ret = displayControl(cmd[1:])
+		elif(cmd == 'xstat'):
+			pprint('Display Status: %s' % displayControl('stat').upper())
+		sys.exit(ret)
 
 	# if instructed, re-analyze existing data files
 	if(sysvals.notestrun):
 		stamp = rerunTest()
 		sysvals.outputResult(stamp)
-		sys.exit()
+		sys.exit(0)
 
 	# verify that we can run a test
-	if(not statusCheck()):
-		doError('Check FAILED, aborting the test run!')
+	error = statusCheck()
+	if(error):
+		doError(error)
 
-	# extract mem modes and convert
+	# extract mem/disk extra modes and convert
 	mode = sysvals.suspendmode
-	if 'mem' == mode[:3]:
-		if '-' in mode:
-			memmode = mode.split('-')[-1]
-		else:
-			memmode = 'deep'
+	if mode.startswith('mem'):
+		memmode = mode.split('-', 1)[-1] if '-' in mode else 'deep'
 		if memmode == 'shallow':
 			mode = 'standby'
 		elif memmode ==  's2idle':
@@ -6020,13 +6196,16 @@ if __name__ == '__main__':
 			mode = 'mem'
 		sysvals.memmode = memmode
 		sysvals.suspendmode = mode
+	if mode.startswith('disk-'):
+		sysvals.diskmode = mode.split('-', 1)[-1]
+		sysvals.suspendmode = 'disk'
 
 	sysvals.systemInfo(dmidecode(sysvals.mempath))
 
 	setRuntimeSuspend(True)
 	if sysvals.display:
-		call('xset -d :0.0 dpms 0 0 0', shell=True)
-		call('xset -d :0.0 s off', shell=True)
+		displayControl('init')
+	ret = 0
 	if sysvals.multitest['run']:
 		# run multiple tests in a separate subdirectory
 		if not sysvals.outdir:
@@ -6036,22 +6215,23 @@ if __name__ == '__main__':
 			os.mkdir(sysvals.outdir)
 		for i in range(sysvals.multitest['count']):
 			if(i != 0):
-				print('Waiting %d seconds...' % (sysvals.multitest['delay']))
+				pprint('Waiting %d seconds...' % (sysvals.multitest['delay']))
 				time.sleep(sysvals.multitest['delay'])
-			print('TEST (%d/%d) START' % (i+1, sysvals.multitest['count']))
+			pprint('TEST (%d/%d) START' % (i+1, sysvals.multitest['count']))
 			fmt = 'suspend-%y%m%d-%H%M%S'
 			sysvals.testdir = os.path.join(sysvals.outdir, datetime.now().strftime(fmt))
-			runTest(i+1)
-			print('TEST (%d/%d) COMPLETE' % (i+1, sysvals.multitest['count']))
+			ret = runTest(i+1)
+			pprint('TEST (%d/%d) COMPLETE' % (i+1, sysvals.multitest['count']))
 			sysvals.logmsg = ''
 		if not sysvals.skiphtml:
 			runSummary(sysvals.outdir, False, False)
-		sysvals.sudouser(sysvals.outdir)
+		sysvals.sudoUserchown(sysvals.outdir)
 	else:
 		if sysvals.outdir:
 			sysvals.testdir = sysvals.outdir
 		# run the test in the current directory
-		runTest()
+		ret = runTest()
 	if sysvals.display:
-		call('xset -d :0.0 s reset', shell=True)
+		displayControl('reset')
 	setRuntimeSuspend(False)
+	sys.exit(ret)
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 980bd9d20646..328f62e6ea02 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -2082,7 +2082,7 @@ int has_turbo_ratio_group_limits(int family, int model)
 	switch (model) {
 	case INTEL_FAM6_ATOM_GOLDMONT:
 	case INTEL_FAM6_SKYLAKE_X:
-	case INTEL_FAM6_ATOM_DENVERTON:
+	case INTEL_FAM6_ATOM_GOLDMONT_X:
 		return 1;
 	}
 	return 0;
@@ -3149,9 +3149,9 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
 		pkg_cstate_limits = skx_pkg_cstate_limits;
 		has_misc_feature_control = 1;
 		break;
-	case INTEL_FAM6_ATOM_SILVERMONT1:	/* BYT */
+	case INTEL_FAM6_ATOM_SILVERMONT:	/* BYT */
 		no_MSR_MISC_PWR_MGMT = 1;
-	case INTEL_FAM6_ATOM_SILVERMONT2:	/* AVN */
+	case INTEL_FAM6_ATOM_SILVERMONT_X:	/* AVN */
 		pkg_cstate_limits = slv_pkg_cstate_limits;
 		break;
 	case INTEL_FAM6_ATOM_AIRMONT:	/* AMT */
@@ -3163,8 +3163,8 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
 		pkg_cstate_limits = phi_pkg_cstate_limits;
 		break;
 	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
-	case INTEL_FAM6_ATOM_GEMINI_LAKE:
-	case INTEL_FAM6_ATOM_DENVERTON:	/* DNV */
+	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
+	case INTEL_FAM6_ATOM_GOLDMONT_X:	/* DNV */
 		pkg_cstate_limits = bxt_pkg_cstate_limits;
 		break;
 	default:
@@ -3193,9 +3193,9 @@ int has_slv_msrs(unsigned int family, unsigned int model)
 		return 0;
 
 	switch (model) {
-	case INTEL_FAM6_ATOM_SILVERMONT1:
-	case INTEL_FAM6_ATOM_MERRIFIELD:
-	case INTEL_FAM6_ATOM_MOOREFIELD:
+	case INTEL_FAM6_ATOM_SILVERMONT:
+	case INTEL_FAM6_ATOM_SILVERMONT_MID:
+	case INTEL_FAM6_ATOM_AIRMONT_MID:
 		return 1;
 	}
 	return 0;
@@ -3207,7 +3207,7 @@ int is_dnv(unsigned int family, unsigned int model)
 		return 0;
 
 	switch (model) {
-	case INTEL_FAM6_ATOM_DENVERTON:
+	case INTEL_FAM6_ATOM_GOLDMONT_X:
 		return 1;
 	}
 	return 0;
@@ -3724,8 +3724,8 @@ double get_tdp(unsigned int model)
 			return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
 
 	switch (model) {
-	case INTEL_FAM6_ATOM_SILVERMONT1:
-	case INTEL_FAM6_ATOM_SILVERMONT2:
+	case INTEL_FAM6_ATOM_SILVERMONT:
+	case INTEL_FAM6_ATOM_SILVERMONT_X:
 		return 30.0;
 	default:
 		return 135.0;
@@ -3791,7 +3791,7 @@ void rapl_probe(unsigned int family, unsigned int model)
 		}
 		break;
 	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
-	case INTEL_FAM6_ATOM_GEMINI_LAKE:
+	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
 		do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO;
 		if (rapl_joules)
 			BIC_PRESENT(BIC_Pkg_J);
@@ -3850,8 +3850,8 @@ void rapl_probe(unsigned int family, unsigned int model)
 			BIC_PRESENT(BIC_RAMWatt);
 		}
 		break;
-	case INTEL_FAM6_ATOM_SILVERMONT1:	/* BYT */
-	case INTEL_FAM6_ATOM_SILVERMONT2:	/* AVN */
+	case INTEL_FAM6_ATOM_SILVERMONT:	/* BYT */
+	case INTEL_FAM6_ATOM_SILVERMONT_X:	/* AVN */
 		do_rapl = RAPL_PKG | RAPL_CORES;
 		if (rapl_joules) {
 			BIC_PRESENT(BIC_Pkg_J);
@@ -3861,7 +3861,7 @@ void rapl_probe(unsigned int family, unsigned int model)
 			BIC_PRESENT(BIC_CorWatt);
 		}
 		break;
-	case INTEL_FAM6_ATOM_DENVERTON:	/* DNV */
+	case INTEL_FAM6_ATOM_GOLDMONT_X:	/* DNV */
 		do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS;
 		BIC_PRESENT(BIC_PKG__);
 		BIC_PRESENT(BIC_RAM__);
@@ -3884,7 +3884,7 @@ void rapl_probe(unsigned int family, unsigned int model)
 		return;
 
 	rapl_power_units = 1.0 / (1 << (msr & 0xF));
-	if (model == INTEL_FAM6_ATOM_SILVERMONT1)
+	if (model == INTEL_FAM6_ATOM_SILVERMONT)
 		rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
 	else
 		rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
@@ -4141,8 +4141,8 @@ int has_snb_msrs(unsigned int family, unsigned int model)
 	case INTEL_FAM6_CANNONLAKE_MOBILE:	/* CNL */
 	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
 	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
-	case INTEL_FAM6_ATOM_GEMINI_LAKE:
-	case INTEL_FAM6_ATOM_DENVERTON:	/* DNV */
+	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
+	case INTEL_FAM6_ATOM_GOLDMONT_X:	/* DNV */
 		return 1;
 	}
 	return 0;
@@ -4174,7 +4174,7 @@ int has_hsw_msrs(unsigned int family, unsigned int model)
 	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
 	case INTEL_FAM6_CANNONLAKE_MOBILE:	/* CNL */
 	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
-	case INTEL_FAM6_ATOM_GEMINI_LAKE:
+	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
 		return 1;
 	}
 	return 0;
@@ -4209,8 +4209,8 @@ int is_slm(unsigned int family, unsigned int model)
 	if (!genuine_intel)
 		return 0;
 	switch (model) {
-	case INTEL_FAM6_ATOM_SILVERMONT1:	/* BYT */
-	case INTEL_FAM6_ATOM_SILVERMONT2:	/* AVN */
+	case INTEL_FAM6_ATOM_SILVERMONT:	/* BYT */
+	case INTEL_FAM6_ATOM_SILVERMONT_X:	/* AVN */
 		return 1;
 	}
 	return 0;
@@ -4581,11 +4581,11 @@ void process_cpuid()
 				case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
 					crystal_hz = 24000000;	/* 24.0 MHz */
 					break;
-				case INTEL_FAM6_ATOM_DENVERTON:	/* DNV */
+				case INTEL_FAM6_ATOM_GOLDMONT_X:	/* DNV */
 					crystal_hz = 25000000;	/* 25.0 MHz */
 					break;
 				case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
-				case INTEL_FAM6_ATOM_GEMINI_LAKE:
+				case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
 					crystal_hz = 19200000;	/* 19.2 MHz */
 					break;
 				default:
diff --git a/tools/spi/spidev_test.c b/tools/spi/spidev_test.c
index 8c590cd1171a..4c12e6aea5d5 100644
--- a/tools/spi/spidev_test.c
+++ b/tools/spi/spidev_test.c
@@ -73,12 +73,12 @@ static void hex_dump(const void *src, size_t length, size_t line_size,
 				while (i++ % line_size)
 					printf("__ ");
 			}
-			printf(" | ");  /* right close */
+			printf(" |");
 			while (line < address) {
 				c = *line++;
-				printf("%c", (c < 33 || c == 255) ? 0x2E : c);
+				printf("%c", (c < 32 || c > 126) ? '.' : c);
 			}
-			printf("\n");
+			printf("|\n");
 			if (length > 0)
 				printf("%s | ", prefix);
 		}
diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild
index 0392153a0009..778ceb651000 100644
--- a/tools/testing/nvdimm/Kbuild
+++ b/tools/testing/nvdimm/Kbuild
@@ -22,6 +22,7 @@ NVDIMM_SRC := $(DRIVERS)/nvdimm
 ACPI_SRC := $(DRIVERS)/acpi/nfit
 DAX_SRC := $(DRIVERS)/dax
 ccflags-y := -I$(src)/$(NVDIMM_SRC)/
+ccflags-y += -I$(src)/$(ACPI_SRC)/
 
 obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o
 obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o
diff --git a/tools/testing/nvdimm/acpi_nfit_test.c b/tools/testing/nvdimm/acpi_nfit_test.c
index 43521512e577..fec8fb1b7715 100644
--- a/tools/testing/nvdimm/acpi_nfit_test.c
+++ b/tools/testing/nvdimm/acpi_nfit_test.c
@@ -4,5 +4,13 @@
 #include <linux/module.h>
 #include <linux/printk.h>
 #include "watermark.h"
+#include <nfit.h>
 
 nfit_test_watermark(acpi_nfit);
+
+/* strong / override definition of nfit_intel_shutdown_status */
+void nfit_intel_shutdown_status(struct nfit_mem *nfit_mem)
+{
+	set_bit(NFIT_MEM_DIRTY_COUNT, &nfit_mem->flags);
+	nfit_mem->dirty_shutdown = 42;
+}
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index cffc2c5a778d..9527d47a1070 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -24,6 +24,7 @@
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <nd-core.h>
+#include <intel.h>
 #include <nfit.h>
 #include <nd.h>
 #include "nfit_test.h"
@@ -148,6 +149,7 @@ static const struct nd_intel_smart smart_def = {
 		| ND_INTEL_SMART_ALARM_VALID
 		| ND_INTEL_SMART_USED_VALID
 		| ND_INTEL_SMART_SHUTDOWN_VALID
+		| ND_INTEL_SMART_SHUTDOWN_COUNT_VALID
 		| ND_INTEL_SMART_MTEMP_VALID
 		| ND_INTEL_SMART_CTEMP_VALID,
 	.health = ND_INTEL_SMART_NON_CRITICAL_HEALTH,
@@ -160,8 +162,8 @@ static const struct nd_intel_smart smart_def = {
 	.ait_status = 1,
 	.life_used = 5,
 	.shutdown_state = 0,
+	.shutdown_count = 42,
 	.vendor_size = 0,
-	.shutdown_count = 100,
 };
 
 struct nfit_test_fw {
diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h
index 33752e06ff8d..ade14fe3837e 100644
--- a/tools/testing/nvdimm/test/nfit_test.h
+++ b/tools/testing/nvdimm/test/nfit_test.h
@@ -117,30 +117,6 @@ struct nd_cmd_ars_err_inj_stat {
 #define ND_INTEL_SMART_INJECT_FATAL		(1 << 2)
 #define ND_INTEL_SMART_INJECT_SHUTDOWN		(1 << 3)
 
-struct nd_intel_smart {
-	__u32 status;
-	union {
-		struct {
-			__u32 flags;
-			__u8 reserved0[4];
-			__u8 health;
-			__u8 spares;
-			__u8 life_used;
-			__u8 alarm_flags;
-			__u16 media_temperature;
-			__u16 ctrl_temperature;
-			__u32 shutdown_count;
-			__u8 ait_status;
-			__u16 pmic_temperature;
-			__u8 reserved1[8];
-			__u8 shutdown_state;
-			__u32 vendor_size;
-			__u8 vendor_data[92];
-		} __packed;
-		__u8 data[128];
-	};
-} __packed;
-
 struct nd_intel_smart_threshold {
 	__u32 status;
 	union {
diff --git a/tools/testing/radix-tree/.gitignore b/tools/testing/radix-tree/.gitignore
index d4706c0ffceb..3834899b6693 100644
--- a/tools/testing/radix-tree/.gitignore
+++ b/tools/testing/radix-tree/.gitignore
@@ -4,3 +4,4 @@ idr-test
 main
 multiorder
 radix-tree.c
+xarray
diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index 37baecc3766f..acf1afa01c5b 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -4,8 +4,8 @@ CFLAGS += -I. -I../../include -g -Og -Wall -D_LGPL_SOURCE -fsanitize=address \
 	  -fsanitize=undefined
 LDFLAGS += -fsanitize=address -fsanitize=undefined
 LDLIBS+= -lpthread -lurcu
-TARGETS = main idr-test multiorder
-CORE_OFILES := radix-tree.o idr.o linux.o test.o find_bit.o
+TARGETS = main idr-test multiorder xarray
+CORE_OFILES := xarray.o radix-tree.o idr.o linux.o test.o find_bit.o bitmap.o
 OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \
 	 tag_check.o multiorder.o idr-test.o iteration_check.o benchmark.o
 
@@ -25,6 +25,8 @@ main:	$(OFILES)
 idr-test.o: ../../../lib/test_ida.c
 idr-test: idr-test.o $(CORE_OFILES)
 
+xarray: $(CORE_OFILES)
+
 multiorder: multiorder.o $(CORE_OFILES)
 
 clean:
@@ -35,6 +37,7 @@ vpath %.c ../../lib
 $(OFILES): Makefile *.h */*.h generated/map-shift.h \
 	../../include/linux/*.h \
 	../../include/asm/*.h \
+	../../../include/linux/xarray.h \
 	../../../include/linux/radix-tree.h \
 	../../../include/linux/idr.h
 
@@ -44,8 +47,10 @@ radix-tree.c: ../../../lib/radix-tree.c
 idr.c: ../../../lib/idr.c
 	sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@
 
+xarray.o: ../../../lib/xarray.c ../../../lib/test_xarray.c
+
 generated/map-shift.h:
 	@if ! grep -qws $(SHIFT) generated/map-shift.h; then		\
-		echo "#define RADIX_TREE_MAP_SHIFT $(SHIFT)" >		\
+		echo "#define XA_CHUNK_SHIFT $(SHIFT)" >		\
 				generated/map-shift.h;			\
 	fi
diff --git a/tools/testing/radix-tree/benchmark.c b/tools/testing/radix-tree/benchmark.c
index 99c40f3ed133..7e195ed8e92d 100644
--- a/tools/testing/radix-tree/benchmark.c
+++ b/tools/testing/radix-tree/benchmark.c
@@ -17,9 +17,6 @@
 #include <time.h>
 #include "test.h"
 
-#define for_each_index(i, base, order) \
-	        for (i = base; i < base + (1 << order); i++)
-
 #define NSEC_PER_SEC	1000000000L
 
 static long long benchmark_iter(struct radix_tree_root *root, bool tagged)
@@ -61,7 +58,7 @@ again:
 }
 
 static void benchmark_insert(struct radix_tree_root *root,
-			     unsigned long size, unsigned long step, int order)
+			     unsigned long size, unsigned long step)
 {
 	struct timespec start, finish;
 	unsigned long index;
@@ -70,19 +67,19 @@ static void benchmark_insert(struct radix_tree_root *root,
 	clock_gettime(CLOCK_MONOTONIC, &start);
 
 	for (index = 0 ; index < size ; index += step)
-		item_insert_order(root, index, order);
+		item_insert(root, index);
 
 	clock_gettime(CLOCK_MONOTONIC, &finish);
 
 	nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
 	       (finish.tv_nsec - start.tv_nsec);
 
-	printv(2, "Size: %8ld, step: %8ld, order: %d, insertion: %15lld ns\n",
-		size, step, order, nsec);
+	printv(2, "Size: %8ld, step: %8ld, insertion: %15lld ns\n",
+		size, step, nsec);
 }
 
 static void benchmark_tagging(struct radix_tree_root *root,
-			     unsigned long size, unsigned long step, int order)
+			     unsigned long size, unsigned long step)
 {
 	struct timespec start, finish;
 	unsigned long index;
@@ -98,138 +95,53 @@ static void benchmark_tagging(struct radix_tree_root *root,
 	nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
 	       (finish.tv_nsec - start.tv_nsec);
 
-	printv(2, "Size: %8ld, step: %8ld, order: %d, tagging: %17lld ns\n",
-		size, step, order, nsec);
+	printv(2, "Size: %8ld, step: %8ld, tagging: %17lld ns\n",
+		size, step, nsec);
 }
 
 static void benchmark_delete(struct radix_tree_root *root,
-			     unsigned long size, unsigned long step, int order)
+			     unsigned long size, unsigned long step)
 {
 	struct timespec start, finish;
-	unsigned long index, i;
+	unsigned long index;
 	long long nsec;
 
 	clock_gettime(CLOCK_MONOTONIC, &start);
 
 	for (index = 0 ; index < size ; index += step)
-		for_each_index(i, index, order)
-			item_delete(root, i);
+		item_delete(root, index);
 
 	clock_gettime(CLOCK_MONOTONIC, &finish);
 
 	nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
 	       (finish.tv_nsec - start.tv_nsec);
 
-	printv(2, "Size: %8ld, step: %8ld, order: %d, deletion: %16lld ns\n",
-		size, step, order, nsec);
+	printv(2, "Size: %8ld, step: %8ld, deletion: %16lld ns\n",
+		size, step, nsec);
 }
 
-static void benchmark_size(unsigned long size, unsigned long step, int order)
+static void benchmark_size(unsigned long size, unsigned long step)
 {
 	RADIX_TREE(tree, GFP_KERNEL);
 	long long normal, tagged;
 
-	benchmark_insert(&tree, size, step, order);
-	benchmark_tagging(&tree, size, step, order);
+	benchmark_insert(&tree, size, step);
+	benchmark_tagging(&tree, size, step);
 
 	tagged = benchmark_iter(&tree, true);
 	normal = benchmark_iter(&tree, false);
 
-	printv(2, "Size: %8ld, step: %8ld, order: %d, tagged iteration: %8lld ns\n",
-		size, step, order, tagged);
-	printv(2, "Size: %8ld, step: %8ld, order: %d, normal iteration: %8lld ns\n",
-		size, step, order, normal);
+	printv(2, "Size: %8ld, step: %8ld, tagged iteration: %8lld ns\n",
+		size, step, tagged);
+	printv(2, "Size: %8ld, step: %8ld, normal iteration: %8lld ns\n",
+		size, step, normal);
 
-	benchmark_delete(&tree, size, step, order);
+	benchmark_delete(&tree, size, step);
 
 	item_kill_tree(&tree);
 	rcu_barrier();
 }
 
-static long long  __benchmark_split(unsigned long index,
-				    int old_order, int new_order)
-{
-	struct timespec start, finish;
-	long long nsec;
-	RADIX_TREE(tree, GFP_ATOMIC);
-
-	item_insert_order(&tree, index, old_order);
-
-	clock_gettime(CLOCK_MONOTONIC, &start);
-	radix_tree_split(&tree, index, new_order);
-	clock_gettime(CLOCK_MONOTONIC, &finish);
-	nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
-	       (finish.tv_nsec - start.tv_nsec);
-
-	item_kill_tree(&tree);
-
-	return nsec;
-
-}
-
-static void benchmark_split(unsigned long size, unsigned long step)
-{
-	int i, j, idx;
-	long long nsec = 0;
-
-
-	for (idx = 0; idx < size; idx += step) {
-		for (i = 3; i < 11; i++) {
-			for (j = 0; j < i; j++) {
-				nsec += __benchmark_split(idx, i, j);
-			}
-		}
-	}
-
-	printv(2, "Size %8ld, step %8ld, split time %10lld ns\n",
-			size, step, nsec);
-
-}
-
-static long long  __benchmark_join(unsigned long index,
-			     unsigned order1, unsigned order2)
-{
-	unsigned long loc;
-	struct timespec start, finish;
-	long long nsec;
-	void *item, *item2 = item_create(index + 1, order1);
-	RADIX_TREE(tree, GFP_KERNEL);
-
-	item_insert_order(&tree, index, order2);
-	item = radix_tree_lookup(&tree, index);
-
-	clock_gettime(CLOCK_MONOTONIC, &start);
-	radix_tree_join(&tree, index + 1, order1, item2);
-	clock_gettime(CLOCK_MONOTONIC, &finish);
-	nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
-		(finish.tv_nsec - start.tv_nsec);
-
-	loc = find_item(&tree, item);
-	if (loc == -1)
-		free(item);
-
-	item_kill_tree(&tree);
-
-	return nsec;
-}
-
-static void benchmark_join(unsigned long step)
-{
-	int i, j, idx;
-	long long nsec = 0;
-
-	for (idx = 0; idx < 1 << 10; idx += step) {
-		for (i = 1; i < 15; i++) {
-			for (j = 0; j < i; j++) {
-				nsec += __benchmark_join(idx, i, j);
-			}
-		}
-	}
-
-	printv(2, "Size %8d, step %8ld, join time %10lld ns\n",
-			1 << 10, step, nsec);
-}
-
 void benchmark(void)
 {
 	unsigned long size[] = {1 << 10, 1 << 20, 0};
@@ -242,16 +154,5 @@ void benchmark(void)
 
 	for (c = 0; size[c]; c++)
 		for (s = 0; step[s]; s++)
-			benchmark_size(size[c], step[s], 0);
-
-	for (c = 0; size[c]; c++)
-		for (s = 0; step[s]; s++)
-			benchmark_size(size[c], step[s] << 9, 9);
-
-	for (c = 0; size[c]; c++)
-		for (s = 0; step[s]; s++)
-			benchmark_split(size[c], step[s]);
-
-	for (s = 0; step[s]; s++)
-		benchmark_join(step[s]);
+			benchmark_size(size[c], step[s]);
 }
diff --git a/tools/testing/radix-tree/bitmap.c b/tools/testing/radix-tree/bitmap.c
new file mode 100644
index 000000000000..66ec4a24a203
--- /dev/null
+++ b/tools/testing/radix-tree/bitmap.c
@@ -0,0 +1,23 @@
+/* lib/bitmap.c pulls in at least two other files. */
+
+#include <linux/bitmap.h>
+
+void bitmap_clear(unsigned long *map, unsigned int start, int len)
+{
+	unsigned long *p = map + BIT_WORD(start);
+	const unsigned int size = start + len;
+	int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
+	unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
+
+	while (len - bits_to_clear >= 0) {
+		*p &= ~mask_to_clear;
+		len -= bits_to_clear;
+		bits_to_clear = BITS_PER_LONG;
+		mask_to_clear = ~0UL;
+		p++;
+	}
+	if (len) {
+		mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
+		*p &= ~mask_to_clear;
+	}
+}
diff --git a/tools/testing/radix-tree/generated/autoconf.h b/tools/testing/radix-tree/generated/autoconf.h
index cf88dc5b8832..2218b3cc184e 100644
--- a/tools/testing/radix-tree/generated/autoconf.h
+++ b/tools/testing/radix-tree/generated/autoconf.h
@@ -1 +1 @@
-#define CONFIG_RADIX_TREE_MULTIORDER 1
+#define CONFIG_XARRAY_MULTI 1
diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
index 321ba92c70d2..1b63bdb7688f 100644
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -19,7 +19,7 @@
 
 #include "test.h"
 
-#define DUMMY_PTR	((void *)0x12)
+#define DUMMY_PTR	((void *)0x10)
 
 int item_idr_free(int id, void *p, void *data)
 {
@@ -227,6 +227,66 @@ void idr_u32_test(int base)
 	idr_u32_test1(&idr, 0xffffffff);
 }
 
+static void idr_align_test(struct idr *idr)
+{
+	char name[] = "Motorola 68000";
+	int i, id;
+	void *entry;
+
+	for (i = 0; i < 9; i++) {
+		BUG_ON(idr_alloc(idr, &name[i], 0, 0, GFP_KERNEL) != i);
+		idr_for_each_entry(idr, entry, id);
+	}
+	idr_destroy(idr);
+
+	for (i = 1; i < 10; i++) {
+		BUG_ON(idr_alloc(idr, &name[i], 0, 0, GFP_KERNEL) != i - 1);
+		idr_for_each_entry(idr, entry, id);
+	}
+	idr_destroy(idr);
+
+	for (i = 2; i < 11; i++) {
+		BUG_ON(idr_alloc(idr, &name[i], 0, 0, GFP_KERNEL) != i - 2);
+		idr_for_each_entry(idr, entry, id);
+	}
+	idr_destroy(idr);
+
+	for (i = 3; i < 12; i++) {
+		BUG_ON(idr_alloc(idr, &name[i], 0, 0, GFP_KERNEL) != i - 3);
+		idr_for_each_entry(idr, entry, id);
+	}
+	idr_destroy(idr);
+
+	for (i = 0; i < 8; i++) {
+		BUG_ON(idr_alloc(idr, &name[i], 0, 0, GFP_KERNEL) != 0);
+		BUG_ON(idr_alloc(idr, &name[i + 1], 0, 0, GFP_KERNEL) != 1);
+		idr_for_each_entry(idr, entry, id);
+		idr_remove(idr, 1);
+		idr_for_each_entry(idr, entry, id);
+		idr_remove(idr, 0);
+		BUG_ON(!idr_is_empty(idr));
+	}
+
+	for (i = 0; i < 8; i++) {
+		BUG_ON(idr_alloc(idr, NULL, 0, 0, GFP_KERNEL) != 0);
+		idr_for_each_entry(idr, entry, id);
+		idr_replace(idr, &name[i], 0);
+		idr_for_each_entry(idr, entry, id);
+		BUG_ON(idr_find(idr, 0) != &name[i]);
+		idr_remove(idr, 0);
+	}
+
+	for (i = 0; i < 8; i++) {
+		BUG_ON(idr_alloc(idr, &name[i], 0, 0, GFP_KERNEL) != 0);
+		BUG_ON(idr_alloc(idr, NULL, 0, 0, GFP_KERNEL) != 1);
+		idr_remove(idr, 1);
+		idr_for_each_entry(idr, entry, id);
+		idr_replace(idr, &name[i + 1], 0);
+		idr_for_each_entry(idr, entry, id);
+		idr_remove(idr, 0);
+	}
+}
+
 void idr_checks(void)
 {
 	unsigned long i;
@@ -307,6 +367,7 @@ void idr_checks(void)
 	idr_u32_test(4);
 	idr_u32_test(1);
 	idr_u32_test(0);
+	idr_align_test(&idr);
 }
 
 #define module_init(x)
@@ -344,16 +405,16 @@ void ida_check_conv_user(void)
 	DEFINE_IDA(ida);
 	unsigned long i;
 
-	radix_tree_cpu_dead(1);
 	for (i = 0; i < 1000000; i++) {
 		int id = ida_alloc(&ida, GFP_NOWAIT);
 		if (id == -ENOMEM) {
-			IDA_BUG_ON(&ida, (i % IDA_BITMAP_BITS) !=
-					BITS_PER_LONG - 2);
+			IDA_BUG_ON(&ida, ((i % IDA_BITMAP_BITS) !=
+					  BITS_PER_XA_VALUE) &&
+					 ((i % IDA_BITMAP_BITS) != 0));
 			id = ida_alloc(&ida, GFP_KERNEL);
 		} else {
 			IDA_BUG_ON(&ida, (i % IDA_BITMAP_BITS) ==
-					BITS_PER_LONG - 2);
+					BITS_PER_XA_VALUE);
 		}
 		IDA_BUG_ON(&ida, id != i);
 	}
diff --git a/tools/testing/radix-tree/iteration_check.c b/tools/testing/radix-tree/iteration_check.c
index a92bab513701..238db187aa15 100644
--- a/tools/testing/radix-tree/iteration_check.c
+++ b/tools/testing/radix-tree/iteration_check.c
@@ -1,5 +1,5 @@
 /*
- * iteration_check.c: test races having to do with radix tree iteration
+ * iteration_check.c: test races having to do with xarray iteration
  * Copyright (c) 2016 Intel Corporation
  * Author: Ross Zwisler <ross.zwisler@linux.intel.com>
  *
@@ -12,41 +12,54 @@
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  */
-#include <linux/radix-tree.h>
 #include <pthread.h>
 #include "test.h"
 
 #define NUM_THREADS	5
 #define MAX_IDX		100
-#define TAG		0
-#define NEW_TAG		1
+#define TAG		XA_MARK_0
+#define NEW_TAG		XA_MARK_1
 
-static pthread_mutex_t tree_lock = PTHREAD_MUTEX_INITIALIZER;
 static pthread_t threads[NUM_THREADS];
 static unsigned int seeds[3];
-static RADIX_TREE(tree, GFP_KERNEL);
+static DEFINE_XARRAY(array);
 static bool test_complete;
 static int max_order;
 
-/* relentlessly fill the tree with tagged entries */
+void my_item_insert(struct xarray *xa, unsigned long index)
+{
+	XA_STATE(xas, xa, index);
+	struct item *item = item_create(index, 0);
+	int order;
+
+retry:
+	xas_lock(&xas);
+	for (order = max_order; order >= 0; order--) {
+		xas_set_order(&xas, index, order);
+		item->order = order;
+		if (xas_find_conflict(&xas))
+			continue;
+		xas_store(&xas, item);
+		xas_set_mark(&xas, TAG);
+		break;
+	}
+	xas_unlock(&xas);
+	if (xas_nomem(&xas, GFP_KERNEL))
+		goto retry;
+	if (order < 0)
+		free(item);
+}
+
+/* relentlessly fill the array with tagged entries */
 static void *add_entries_fn(void *arg)
 {
 	rcu_register_thread();
 
 	while (!test_complete) {
 		unsigned long pgoff;
-		int order;
 
 		for (pgoff = 0; pgoff < MAX_IDX; pgoff++) {
-			pthread_mutex_lock(&tree_lock);
-			for (order = max_order; order >= 0; order--) {
-				if (item_insert_order(&tree, pgoff, order)
-						== 0) {
-					item_tag_set(&tree, pgoff, TAG);
-					break;
-				}
-			}
-			pthread_mutex_unlock(&tree_lock);
+			my_item_insert(&array, pgoff);
 		}
 	}
 
@@ -56,33 +69,25 @@ static void *add_entries_fn(void *arg)
 }
 
 /*
- * Iterate over the tagged entries, doing a radix_tree_iter_retry() as we find
- * things that have been removed and randomly resetting our iteration to the
- * next chunk with radix_tree_iter_resume().  Both radix_tree_iter_retry() and
- * radix_tree_iter_resume() cause radix_tree_next_slot() to be called with a
- * NULL 'slot' variable.
+ * Iterate over tagged entries, retrying when we find ourselves in a deleted
+ * node and randomly pausing the iteration.
  */
 static void *tagged_iteration_fn(void *arg)
 {
-	struct radix_tree_iter iter;
-	void **slot;
+	XA_STATE(xas, &array, 0);
+	void *entry;
 
 	rcu_register_thread();
 
 	while (!test_complete) {
+		xas_set(&xas, 0);
 		rcu_read_lock();
-		radix_tree_for_each_tagged(slot, &tree, &iter, 0, TAG) {
-			void *entry = radix_tree_deref_slot(slot);
-			if (unlikely(!entry))
+		xas_for_each_marked(&xas, entry, ULONG_MAX, TAG) {
+			if (xas_retry(&xas, entry))
 				continue;
 
-			if (radix_tree_deref_retry(entry)) {
-				slot = radix_tree_iter_retry(&iter);
-				continue;
-			}
-
 			if (rand_r(&seeds[0]) % 50 == 0) {
-				slot = radix_tree_iter_resume(slot, &iter);
+				xas_pause(&xas);
 				rcu_read_unlock();
 				rcu_barrier();
 				rcu_read_lock();
@@ -97,33 +102,25 @@ static void *tagged_iteration_fn(void *arg)
 }
 
 /*
- * Iterate over the entries, doing a radix_tree_iter_retry() as we find things
- * that have been removed and randomly resetting our iteration to the next
- * chunk with radix_tree_iter_resume().  Both radix_tree_iter_retry() and
- * radix_tree_iter_resume() cause radix_tree_next_slot() to be called with a
- * NULL 'slot' variable.
+ * Iterate over the entries, retrying when we find ourselves in a deleted
+ * node and randomly pausing the iteration.
  */
 static void *untagged_iteration_fn(void *arg)
 {
-	struct radix_tree_iter iter;
-	void **slot;
+	XA_STATE(xas, &array, 0);
+	void *entry;
 
 	rcu_register_thread();
 
 	while (!test_complete) {
+		xas_set(&xas, 0);
 		rcu_read_lock();
-		radix_tree_for_each_slot(slot, &tree, &iter, 0) {
-			void *entry = radix_tree_deref_slot(slot);
-			if (unlikely(!entry))
+		xas_for_each(&xas, entry, ULONG_MAX) {
+			if (xas_retry(&xas, entry))
 				continue;
 
-			if (radix_tree_deref_retry(entry)) {
-				slot = radix_tree_iter_retry(&iter);
-				continue;
-			}
-
 			if (rand_r(&seeds[1]) % 50 == 0) {
-				slot = radix_tree_iter_resume(slot, &iter);
+				xas_pause(&xas);
 				rcu_read_unlock();
 				rcu_barrier();
 				rcu_read_lock();
@@ -138,7 +135,7 @@ static void *untagged_iteration_fn(void *arg)
 }
 
 /*
- * Randomly remove entries to help induce radix_tree_iter_retry() calls in the
+ * Randomly remove entries to help induce retries in the
  * two iteration functions.
  */
 static void *remove_entries_fn(void *arg)
@@ -147,12 +144,13 @@ static void *remove_entries_fn(void *arg)
 
 	while (!test_complete) {
 		int pgoff;
+		struct item *item;
 
 		pgoff = rand_r(&seeds[2]) % MAX_IDX;
 
-		pthread_mutex_lock(&tree_lock);
-		item_delete(&tree, pgoff);
-		pthread_mutex_unlock(&tree_lock);
+		item = xa_erase(&array, pgoff);
+		if (item)
+			item_free(item, pgoff);
 	}
 
 	rcu_unregister_thread();
@@ -165,8 +163,7 @@ static void *tag_entries_fn(void *arg)
 	rcu_register_thread();
 
 	while (!test_complete) {
-		tag_tagged_items(&tree, &tree_lock, 0, MAX_IDX, 10, TAG,
-					NEW_TAG);
+		tag_tagged_items(&array, 0, MAX_IDX, 10, TAG, NEW_TAG);
 	}
 	rcu_unregister_thread();
 	return NULL;
@@ -217,5 +214,5 @@ void iteration_test(unsigned order, unsigned test_duration)
 		}
 	}
 
-	item_kill_tree(&tree);
+	item_kill_tree(&array);
 }
diff --git a/tools/testing/radix-tree/linux/bug.h b/tools/testing/radix-tree/linux/bug.h
index 23b8ed52f8c8..03dc8a57eb99 100644
--- a/tools/testing/radix-tree/linux/bug.h
+++ b/tools/testing/radix-tree/linux/bug.h
@@ -1 +1,2 @@
+#include <stdio.h>
 #include "asm/bug.h"
diff --git a/tools/testing/radix-tree/linux/kconfig.h b/tools/testing/radix-tree/linux/kconfig.h
new file mode 100644
index 000000000000..6c8675859913
--- /dev/null
+++ b/tools/testing/radix-tree/linux/kconfig.h
@@ -0,0 +1 @@
+#include "../../../../include/linux/kconfig.h"
diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h
index 426f32f28547..4568248222ae 100644
--- a/tools/testing/radix-tree/linux/kernel.h
+++ b/tools/testing/radix-tree/linux/kernel.h
@@ -14,7 +14,12 @@
 #include "../../../include/linux/kconfig.h"
 
 #define printk printf
+#define pr_info printk
 #define pr_debug printk
 #define pr_cont printk
 
+#define __acquires(x)
+#define __releases(x)
+#define __must_hold(x)
+
 #endif /* _KERNEL_H */
diff --git a/tools/testing/radix-tree/linux/lockdep.h b/tools/testing/radix-tree/linux/lockdep.h
new file mode 100644
index 000000000000..565fccdfe6e9
--- /dev/null
+++ b/tools/testing/radix-tree/linux/lockdep.h
@@ -0,0 +1,11 @@
+#ifndef _LINUX_LOCKDEP_H
+#define _LINUX_LOCKDEP_H
+struct lock_class_key {
+	unsigned int a;
+};
+
+static inline void lockdep_set_class(spinlock_t *lock,
+					struct lock_class_key *key)
+{
+}
+#endif /* _LINUX_LOCKDEP_H */
diff --git a/tools/testing/radix-tree/linux/radix-tree.h b/tools/testing/radix-tree/linux/radix-tree.h
index 24f13d27a8da..d1635a5bef02 100644
--- a/tools/testing/radix-tree/linux/radix-tree.h
+++ b/tools/testing/radix-tree/linux/radix-tree.h
@@ -2,7 +2,6 @@
 #ifndef _TEST_RADIX_TREE_H
 #define _TEST_RADIX_TREE_H
 
-#include "generated/map-shift.h"
 #include "../../../../include/linux/radix-tree.h"
 
 extern int kmalloc_verbose;
diff --git a/tools/testing/radix-tree/linux/rcupdate.h b/tools/testing/radix-tree/linux/rcupdate.h
index 73ed33658203..fd280b070fdb 100644
--- a/tools/testing/radix-tree/linux/rcupdate.h
+++ b/tools/testing/radix-tree/linux/rcupdate.h
@@ -6,5 +6,7 @@
 
 #define rcu_dereference_raw(p) rcu_dereference(p)
 #define rcu_dereference_protected(p, cond) rcu_dereference(p)
+#define rcu_dereference_check(p, cond) rcu_dereference(p)
+#define RCU_INIT_POINTER(p, v)	(p) = (v)
 
 #endif
diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c
index b741686e53d6..77a44c54998f 100644
--- a/tools/testing/radix-tree/main.c
+++ b/tools/testing/radix-tree/main.c
@@ -214,7 +214,7 @@ void copy_tag_check(void)
 	}
 
 //	printf("\ncopying tags...\n");
-	tagged = tag_tagged_items(&tree, NULL, start, end, ITEMS, 0, 1);
+	tagged = tag_tagged_items(&tree, start, end, ITEMS, XA_MARK_0, XA_MARK_1);
 
 //	printf("checking copied tags\n");
 	assert(tagged == count);
@@ -223,7 +223,7 @@ void copy_tag_check(void)
 	/* Copy tags in several rounds */
 //	printf("\ncopying tags...\n");
 	tmp = rand() % (count / 10 + 2);
-	tagged = tag_tagged_items(&tree, NULL, start, end, tmp, 0, 2);
+	tagged = tag_tagged_items(&tree, start, end, tmp, XA_MARK_0, XA_MARK_2);
 	assert(tagged == count);
 
 //	printf("%lu %lu %lu\n", tagged, tmp, count);
@@ -236,63 +236,6 @@ void copy_tag_check(void)
 	item_kill_tree(&tree);
 }
 
-static void __locate_check(struct radix_tree_root *tree, unsigned long index,
-			unsigned order)
-{
-	struct item *item;
-	unsigned long index2;
-
-	item_insert_order(tree, index, order);
-	item = item_lookup(tree, index);
-	index2 = find_item(tree, item);
-	if (index != index2) {
-		printv(2, "index %ld order %d inserted; found %ld\n",
-			index, order, index2);
-		abort();
-	}
-}
-
-static void __order_0_locate_check(void)
-{
-	RADIX_TREE(tree, GFP_KERNEL);
-	int i;
-
-	for (i = 0; i < 50; i++)
-		__locate_check(&tree, rand() % INT_MAX, 0);
-
-	item_kill_tree(&tree);
-}
-
-static void locate_check(void)
-{
-	RADIX_TREE(tree, GFP_KERNEL);
-	unsigned order;
-	unsigned long offset, index;
-
-	__order_0_locate_check();
-
-	for (order = 0; order < 20; order++) {
-		for (offset = 0; offset < (1 << (order + 3));
-		     offset += (1UL << order)) {
-			for (index = 0; index < (1UL << (order + 5));
-			     index += (1UL << order)) {
-				__locate_check(&tree, index + offset, order);
-			}
-			if (find_item(&tree, &tree) != -1)
-				abort();
-
-			item_kill_tree(&tree);
-		}
-	}
-
-	if (find_item(&tree, &tree) != -1)
-		abort();
-	__locate_check(&tree, -1, 0);
-	if (find_item(&tree, &tree) != -1)
-		abort();
-	item_kill_tree(&tree);
-}
-
 static void single_thread_tests(bool long_run)
 {
 	int i;
@@ -303,10 +246,6 @@ static void single_thread_tests(bool long_run)
 	rcu_barrier();
 	printv(2, "after multiorder_check: %d allocated, preempt %d\n",
 		nr_allocated, preempt_count);
-	locate_check();
-	rcu_barrier();
-	printv(2, "after locate_check: %d allocated, preempt %d\n",
-		nr_allocated, preempt_count);
 	tag_check();
 	rcu_barrier();
 	printv(2, "after tag_check: %d allocated, preempt %d\n",
@@ -365,6 +304,7 @@ int main(int argc, char **argv)
 	rcu_register_thread();
 	radix_tree_init();
 
+	xarray_tests();
 	regression1_test();
 	regression2_test();
 	regression3_test();
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
index 7bf405638b0b..ff27a74d9762 100644
--- a/tools/testing/radix-tree/multiorder.c
+++ b/tools/testing/radix-tree/multiorder.c
@@ -20,230 +20,39 @@
 
 #include "test.h"
 
-#define for_each_index(i, base, order) \
-	for (i = base; i < base + (1 << order); i++)
-
-static void __multiorder_tag_test(int index, int order)
-{
-	RADIX_TREE(tree, GFP_KERNEL);
-	int base, err, i;
-
-	/* our canonical entry */
-	base = index & ~((1 << order) - 1);
-
-	printv(2, "Multiorder tag test with index %d, canonical entry %d\n",
-			index, base);
-
-	err = item_insert_order(&tree, index, order);
-	assert(!err);
-
-	/*
-	 * Verify we get collisions for covered indices.  We try and fail to
-	 * insert an exceptional entry so we don't leak memory via
-	 * item_insert_order().
-	 */
-	for_each_index(i, base, order) {
-		err = __radix_tree_insert(&tree, i, order,
-				(void *)(0xA0 | RADIX_TREE_EXCEPTIONAL_ENTRY));
-		assert(err == -EEXIST);
-	}
-
-	for_each_index(i, base, order) {
-		assert(!radix_tree_tag_get(&tree, i, 0));
-		assert(!radix_tree_tag_get(&tree, i, 1));
-	}
-
-	assert(radix_tree_tag_set(&tree, index, 0));
-
-	for_each_index(i, base, order) {
-		assert(radix_tree_tag_get(&tree, i, 0));
-		assert(!radix_tree_tag_get(&tree, i, 1));
-	}
-
-	assert(tag_tagged_items(&tree, NULL, 0, ~0UL, 10, 0, 1) == 1);
-	assert(radix_tree_tag_clear(&tree, index, 0));
-
-	for_each_index(i, base, order) {
-		assert(!radix_tree_tag_get(&tree, i, 0));
-		assert(radix_tree_tag_get(&tree, i, 1));
-	}
-
-	assert(radix_tree_tag_clear(&tree, index, 1));
-
-	assert(!radix_tree_tagged(&tree, 0));
-	assert(!radix_tree_tagged(&tree, 1));
-
-	item_kill_tree(&tree);
-}
-
-static void __multiorder_tag_test2(unsigned order, unsigned long index2)
+static int item_insert_order(struct xarray *xa, unsigned long index,
+			unsigned order)
 {
-	RADIX_TREE(tree, GFP_KERNEL);
-	unsigned long index = (1 << order);
-	index2 += index;
-
-	assert(item_insert_order(&tree, 0, order) == 0);
-	assert(item_insert(&tree, index2) == 0);
-
-	assert(radix_tree_tag_set(&tree, 0, 0));
-	assert(radix_tree_tag_set(&tree, index2, 0));
-
-	assert(tag_tagged_items(&tree, NULL, 0, ~0UL, 10, 0, 1) == 2);
-
-	item_kill_tree(&tree);
-}
-
-static void multiorder_tag_tests(void)
-{
-	int i, j;
-
-	/* test multi-order entry for indices 0-7 with no sibling pointers */
-	__multiorder_tag_test(0, 3);
-	__multiorder_tag_test(5, 3);
-
-	/* test multi-order entry for indices 8-15 with no sibling pointers */
-	__multiorder_tag_test(8, 3);
-	__multiorder_tag_test(15, 3);
-
-	/*
-	 * Our order 5 entry covers indices 0-31 in a tree with height=2.
-	 * This is broken up as follows:
-	 * 0-7:		canonical entry
-	 * 8-15:	sibling 1
-	 * 16-23:	sibling 2
-	 * 24-31:	sibling 3
-	 */
-	__multiorder_tag_test(0, 5);
-	__multiorder_tag_test(29, 5);
-
-	/* same test, but with indices 32-63 */
-	__multiorder_tag_test(32, 5);
-	__multiorder_tag_test(44, 5);
-
-	/*
-	 * Our order 8 entry covers indices 0-255 in a tree with height=3.
-	 * This is broken up as follows:
-	 * 0-63:	canonical entry
-	 * 64-127:	sibling 1
-	 * 128-191:	sibling 2
-	 * 192-255:	sibling 3
-	 */
-	__multiorder_tag_test(0, 8);
-	__multiorder_tag_test(190, 8);
-
-	/* same test, but with indices 256-511 */
-	__multiorder_tag_test(256, 8);
-	__multiorder_tag_test(300, 8);
-
-	__multiorder_tag_test(0x12345678UL, 8);
-
-	for (i = 1; i < 10; i++)
-		for (j = 0; j < (10 << i); j++)
-			__multiorder_tag_test2(i, j);
-}
-
-static void multiorder_check(unsigned long index, int order)
-{
-	unsigned long i;
-	unsigned long min = index & ~((1UL << order) - 1);
-	unsigned long max = min + (1UL << order);
-	void **slot;
-	struct item *item2 = item_create(min, order);
-	RADIX_TREE(tree, GFP_KERNEL);
-
-	printv(2, "Multiorder index %ld, order %d\n", index, order);
-
-	assert(item_insert_order(&tree, index, order) == 0);
-
-	for (i = min; i < max; i++) {
-		struct item *item = item_lookup(&tree, i);
-		assert(item != 0);
-		assert(item->index == index);
-	}
-	for (i = 0; i < min; i++)
-		item_check_absent(&tree, i);
-	for (i = max; i < 2*max; i++)
-		item_check_absent(&tree, i);
-	for (i = min; i < max; i++)
-		assert(radix_tree_insert(&tree, i, item2) == -EEXIST);
-
-	slot = radix_tree_lookup_slot(&tree, index);
-	free(*slot);
-	radix_tree_replace_slot(&tree, slot, item2);
-	for (i = min; i < max; i++) {
-		struct item *item = item_lookup(&tree, i);
-		assert(item != 0);
-		assert(item->index == min);
-	}
-
-	assert(item_delete(&tree, min) != 0);
-
-	for (i = 0; i < 2*max; i++)
-		item_check_absent(&tree, i);
-}
-
-static void multiorder_shrink(unsigned long index, int order)
-{
-	unsigned long i;
-	unsigned long max = 1 << order;
-	RADIX_TREE(tree, GFP_KERNEL);
-	struct radix_tree_node *node;
-
-	printv(2, "Multiorder shrink index %ld, order %d\n", index, order);
+	XA_STATE_ORDER(xas, xa, index, order);
+	struct item *item = item_create(index, order);
 
-	assert(item_insert_order(&tree, 0, order) == 0);
-
-	node = tree.rnode;
-
-	assert(item_insert(&tree, index) == 0);
-	assert(node != tree.rnode);
-
-	assert(item_delete(&tree, index) != 0);
-	assert(node == tree.rnode);
-
-	for (i = 0; i < max; i++) {
-		struct item *item = item_lookup(&tree, i);
-		assert(item != 0);
-		assert(item->index == 0);
-	}
-	for (i = max; i < 2*max; i++)
-		item_check_absent(&tree, i);
-
-	if (!item_delete(&tree, 0)) {
-		printv(2, "failed to delete index %ld (order %d)\n", index, order);
-		abort();
-	}
-
-	for (i = 0; i < 2*max; i++)
-		item_check_absent(&tree, i);
-}
-
-static void multiorder_insert_bug(void)
-{
-	RADIX_TREE(tree, GFP_KERNEL);
+	do {
+		xas_lock(&xas);
+		xas_store(&xas, item);
+		xas_unlock(&xas);
+	} while (xas_nomem(&xas, GFP_KERNEL));
 
-	item_insert(&tree, 0);
-	radix_tree_tag_set(&tree, 0, 0);
-	item_insert_order(&tree, 3 << 6, 6);
+	if (!xas_error(&xas))
+		return 0;
 
-	item_kill_tree(&tree);
+	free(item);
+	return xas_error(&xas);
 }
 
-void multiorder_iteration(void)
+void multiorder_iteration(struct xarray *xa)
 {
-	RADIX_TREE(tree, GFP_KERNEL);
-	struct radix_tree_iter iter;
-	void **slot;
+	XA_STATE(xas, xa, 0);
+	struct item *item;
 	int i, j, err;
 
-	printv(1, "Multiorder iteration test\n");
-
 #define NUM_ENTRIES 11
 	int index[NUM_ENTRIES] = {0, 2, 4, 8, 16, 32, 34, 36, 64, 72, 128};
 	int order[NUM_ENTRIES] = {1, 1, 2, 3,  4,  1,  0,  1,  3,  0, 7};
 
+	printv(1, "Multiorder iteration test\n");
+
 	for (i = 0; i < NUM_ENTRIES; i++) {
-		err = item_insert_order(&tree, index[i], order[i]);
+		err = item_insert_order(xa, index[i], order[i]);
 		assert(!err);
 	}
 
@@ -252,14 +61,14 @@ void multiorder_iteration(void)
 			if (j <= (index[i] | ((1 << order[i]) - 1)))
 				break;
 
-		radix_tree_for_each_slot(slot, &tree, &iter, j) {
-			int height = order[i] / RADIX_TREE_MAP_SHIFT;
-			int shift = height * RADIX_TREE_MAP_SHIFT;
+		xas_set(&xas, j);
+		xas_for_each(&xas, item, ULONG_MAX) {
+			int height = order[i] / XA_CHUNK_SHIFT;
+			int shift = height * XA_CHUNK_SHIFT;
 			unsigned long mask = (1UL << order[i]) - 1;
-			struct item *item = *slot;
 
-			assert((iter.index | mask) == (index[i] | mask));
-			assert(iter.shift == shift);
+			assert((xas.xa_index | mask) == (index[i] | mask));
+			assert(xas.xa_node->shift == shift);
 			assert(!radix_tree_is_internal_node(item));
 			assert((item->index | mask) == (index[i] | mask));
 			assert(item->order == order[i]);
@@ -267,18 +76,15 @@ void multiorder_iteration(void)
 		}
 	}
 
-	item_kill_tree(&tree);
+	item_kill_tree(xa);
 }
 
-void multiorder_tagged_iteration(void)
+void multiorder_tagged_iteration(struct xarray *xa)
 {
-	RADIX_TREE(tree, GFP_KERNEL);
-	struct radix_tree_iter iter;
-	void **slot;
+	XA_STATE(xas, xa, 0);
+	struct item *item;
 	int i, j;
 
-	printv(1, "Multiorder tagged iteration test\n");
-
 #define MT_NUM_ENTRIES 9
 	int index[MT_NUM_ENTRIES] = {0, 2, 4, 16, 32, 40, 64, 72, 128};
 	int order[MT_NUM_ENTRIES] = {1, 0, 2, 4,  3,  1,  3,  0,   7};
@@ -286,13 +92,15 @@ void multiorder_tagged_iteration(void)
 #define TAG_ENTRIES 7
 	int tag_index[TAG_ENTRIES] = {0, 4, 16, 40, 64, 72, 128};
 
+	printv(1, "Multiorder tagged iteration test\n");
+
 	for (i = 0; i < MT_NUM_ENTRIES; i++)
-		assert(!item_insert_order(&tree, index[i], order[i]));
+		assert(!item_insert_order(xa, index[i], order[i]));
 
-	assert(!radix_tree_tagged(&tree, 1));
+	assert(!xa_marked(xa, XA_MARK_1));
 
 	for (i = 0; i < TAG_ENTRIES; i++)
-		assert(radix_tree_tag_set(&tree, tag_index[i], 1));
+		xa_set_mark(xa, tag_index[i], XA_MARK_1);
 
 	for (j = 0; j < 256; j++) {
 		int k;
@@ -304,23 +112,23 @@ void multiorder_tagged_iteration(void)
 				break;
 		}
 
-		radix_tree_for_each_tagged(slot, &tree, &iter, j, 1) {
+		xas_set(&xas, j);
+		xas_for_each_marked(&xas, item, ULONG_MAX, XA_MARK_1) {
 			unsigned long mask;
-			struct item *item = *slot;
 			for (k = i; index[k] < tag_index[i]; k++)
 				;
 			mask = (1UL << order[k]) - 1;
 
-			assert((iter.index | mask) == (tag_index[i] | mask));
-			assert(!radix_tree_is_internal_node(item));
+			assert((xas.xa_index | mask) == (tag_index[i] | mask));
+			assert(!xa_is_internal(item));
 			assert((item->index | mask) == (tag_index[i] | mask));
 			assert(item->order == order[k]);
 			i++;
 		}
 	}
 
-	assert(tag_tagged_items(&tree, NULL, 0, ~0UL, TAG_ENTRIES, 1, 2) ==
-				TAG_ENTRIES);
+	assert(tag_tagged_items(xa, 0, ULONG_MAX, TAG_ENTRIES, XA_MARK_1,
+				XA_MARK_2) == TAG_ENTRIES);
 
 	for (j = 0; j < 256; j++) {
 		int mask, k;
@@ -332,297 +140,31 @@ void multiorder_tagged_iteration(void)
 				break;
 		}
 
-		radix_tree_for_each_tagged(slot, &tree, &iter, j, 2) {
-			struct item *item = *slot;
+		xas_set(&xas, j);
+		xas_for_each_marked(&xas, item, ULONG_MAX, XA_MARK_2) {
 			for (k = i; index[k] < tag_index[i]; k++)
 				;
 			mask = (1 << order[k]) - 1;
 
-			assert((iter.index | mask) == (tag_index[i] | mask));
-			assert(!radix_tree_is_internal_node(item));
+			assert((xas.xa_index | mask) == (tag_index[i] | mask));
+			assert(!xa_is_internal(item));
 			assert((item->index | mask) == (tag_index[i] | mask));
 			assert(item->order == order[k]);
 			i++;
 		}
 	}
 
-	assert(tag_tagged_items(&tree, NULL, 1, ~0UL, MT_NUM_ENTRIES * 2, 1, 0)
-			== TAG_ENTRIES);
+	assert(tag_tagged_items(xa, 1, ULONG_MAX, MT_NUM_ENTRIES * 2, XA_MARK_1,
+				XA_MARK_0) == TAG_ENTRIES);
 	i = 0;
-	radix_tree_for_each_tagged(slot, &tree, &iter, 0, 0) {
-		assert(iter.index == tag_index[i]);
+	xas_set(&xas, 0);
+	xas_for_each_marked(&xas, item, ULONG_MAX, XA_MARK_0) {
+		assert(xas.xa_index == tag_index[i]);
 		i++;
 	}
+	assert(i == TAG_ENTRIES);
 
-	item_kill_tree(&tree);
-}
-
-/*
- * Basic join checks: make sure we can't find an entry in the tree after
- * a larger entry has replaced it
- */
-static void multiorder_join1(unsigned long index,
-				unsigned order1, unsigned order2)
-{
-	unsigned long loc;
-	void *item, *item2 = item_create(index + 1, order1);
-	RADIX_TREE(tree, GFP_KERNEL);
-
-	item_insert_order(&tree, index, order2);
-	item = radix_tree_lookup(&tree, index);
-	radix_tree_join(&tree, index + 1, order1, item2);
-	loc = find_item(&tree, item);
-	if (loc == -1)
-		free(item);
-	item = radix_tree_lookup(&tree, index + 1);
-	assert(item == item2);
-	item_kill_tree(&tree);
-}
-
-/*
- * Check that the accounting of exceptional entries is handled correctly
- * by joining an exceptional entry to a normal pointer.
- */
-static void multiorder_join2(unsigned order1, unsigned order2)
-{
-	RADIX_TREE(tree, GFP_KERNEL);
-	struct radix_tree_node *node;
-	void *item1 = item_create(0, order1);
-	void *item2;
-
-	item_insert_order(&tree, 0, order2);
-	radix_tree_insert(&tree, 1 << order2, (void *)0x12UL);
-	item2 = __radix_tree_lookup(&tree, 1 << order2, &node, NULL);
-	assert(item2 == (void *)0x12UL);
-	assert(node->exceptional == 1);
-
-	item2 = radix_tree_lookup(&tree, 0);
-	free(item2);
-
-	radix_tree_join(&tree, 0, order1, item1);
-	item2 = __radix_tree_lookup(&tree, 1 << order2, &node, NULL);
-	assert(item2 == item1);
-	assert(node->exceptional == 0);
-	item_kill_tree(&tree);
-}
-
-/*
- * This test revealed an accounting bug for exceptional entries at one point.
- * Nodes were being freed back into the pool with an elevated exception count
- * by radix_tree_join() and then radix_tree_split() was failing to zero the
- * count of exceptional entries.
- */
-static void multiorder_join3(unsigned int order)
-{
-	RADIX_TREE(tree, GFP_KERNEL);
-	struct radix_tree_node *node;
-	void **slot;
-	struct radix_tree_iter iter;
-	unsigned long i;
-
-	for (i = 0; i < (1 << order); i++) {
-		radix_tree_insert(&tree, i, (void *)0x12UL);
-	}
-
-	radix_tree_join(&tree, 0, order, (void *)0x16UL);
-	rcu_barrier();
-
-	radix_tree_split(&tree, 0, 0);
-
-	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
-		radix_tree_iter_replace(&tree, &iter, slot, (void *)0x12UL);
-	}
-
-	__radix_tree_lookup(&tree, 0, &node, NULL);
-	assert(node->exceptional == node->count);
-
-	item_kill_tree(&tree);
-}
-
-static void multiorder_join(void)
-{
-	int i, j, idx;
-
-	for (idx = 0; idx < 1024; idx = idx * 2 + 3) {
-		for (i = 1; i < 15; i++) {
-			for (j = 0; j < i; j++) {
-				multiorder_join1(idx, i, j);
-			}
-		}
-	}
-
-	for (i = 1; i < 15; i++) {
-		for (j = 0; j < i; j++) {
-			multiorder_join2(i, j);
-		}
-	}
-
-	for (i = 3; i < 10; i++) {
-		multiorder_join3(i);
-	}
-}
-
-static void check_mem(unsigned old_order, unsigned new_order, unsigned alloc)
-{
-	struct radix_tree_preload *rtp = &radix_tree_preloads;
-	if (rtp->nr != 0)
-		printv(2, "split(%u %u) remaining %u\n", old_order, new_order,
-							rtp->nr);
-	/*
-	 * Can't check for equality here as some nodes may have been
-	 * RCU-freed while we ran.  But we should never finish with more
-	 * nodes allocated since they should have all been preloaded.
-	 */
-	if (nr_allocated > alloc)
-		printv(2, "split(%u %u) allocated %u %u\n", old_order, new_order,
-							alloc, nr_allocated);
-}
-
-static void __multiorder_split(int old_order, int new_order)
-{
-	RADIX_TREE(tree, GFP_ATOMIC);
-	void **slot;
-	struct radix_tree_iter iter;
-	unsigned alloc;
-	struct item *item;
-
-	radix_tree_preload(GFP_KERNEL);
-	assert(item_insert_order(&tree, 0, old_order) == 0);
-	radix_tree_preload_end();
-
-	/* Wipe out the preloaded cache or it'll confuse check_mem() */
-	radix_tree_cpu_dead(0);
-
-	item = radix_tree_tag_set(&tree, 0, 2);
-
-	radix_tree_split_preload(old_order, new_order, GFP_KERNEL);
-	alloc = nr_allocated;
-	radix_tree_split(&tree, 0, new_order);
-	check_mem(old_order, new_order, alloc);
-	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
-		radix_tree_iter_replace(&tree, &iter, slot,
-					item_create(iter.index, new_order));
-	}
-	radix_tree_preload_end();
-
-	item_kill_tree(&tree);
-	free(item);
-}
-
-static void __multiorder_split2(int old_order, int new_order)
-{
-	RADIX_TREE(tree, GFP_KERNEL);
-	void **slot;
-	struct radix_tree_iter iter;
-	struct radix_tree_node *node;
-	void *item;
-
-	__radix_tree_insert(&tree, 0, old_order, (void *)0x12);
-
-	item = __radix_tree_lookup(&tree, 0, &node, NULL);
-	assert(item == (void *)0x12);
-	assert(node->exceptional > 0);
-
-	radix_tree_split(&tree, 0, new_order);
-	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
-		radix_tree_iter_replace(&tree, &iter, slot,
-					item_create(iter.index, new_order));
-	}
-
-	item = __radix_tree_lookup(&tree, 0, &node, NULL);
-	assert(item != (void *)0x12);
-	assert(node->exceptional == 0);
-
-	item_kill_tree(&tree);
-}
-
-static void __multiorder_split3(int old_order, int new_order)
-{
-	RADIX_TREE(tree, GFP_KERNEL);
-	void **slot;
-	struct radix_tree_iter iter;
-	struct radix_tree_node *node;
-	void *item;
-
-	__radix_tree_insert(&tree, 0, old_order, (void *)0x12);
-
-	item = __radix_tree_lookup(&tree, 0, &node, NULL);
-	assert(item == (void *)0x12);
-	assert(node->exceptional > 0);
-
-	radix_tree_split(&tree, 0, new_order);
-	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
-		radix_tree_iter_replace(&tree, &iter, slot, (void *)0x16);
-	}
-
-	item = __radix_tree_lookup(&tree, 0, &node, NULL);
-	assert(item == (void *)0x16);
-	assert(node->exceptional > 0);
-
-	item_kill_tree(&tree);
-
-	__radix_tree_insert(&tree, 0, old_order, (void *)0x12);
-
-	item = __radix_tree_lookup(&tree, 0, &node, NULL);
-	assert(item == (void *)0x12);
-	assert(node->exceptional > 0);
-
-	radix_tree_split(&tree, 0, new_order);
-	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
-		if (iter.index == (1 << new_order))
-			radix_tree_iter_replace(&tree, &iter, slot,
-						(void *)0x16);
-		else
-			radix_tree_iter_replace(&tree, &iter, slot, NULL);
-	}
-
-	item = __radix_tree_lookup(&tree, 1 << new_order, &node, NULL);
-	assert(item == (void *)0x16);
-	assert(node->count == node->exceptional);
-	do {
-		node = node->parent;
-		if (!node)
-			break;
-		assert(node->count == 1);
-		assert(node->exceptional == 0);
-	} while (1);
-
-	item_kill_tree(&tree);
-}
-
-static void multiorder_split(void)
-{
-	int i, j;
-
-	for (i = 3; i < 11; i++)
-		for (j = 0; j < i; j++) {
-			__multiorder_split(i, j);
-			__multiorder_split2(i, j);
-			__multiorder_split3(i, j);
-		}
-}
-
-static void multiorder_account(void)
-{
-	RADIX_TREE(tree, GFP_KERNEL);
-	struct radix_tree_node *node;
-	void **slot;
-
-	item_insert_order(&tree, 0, 5);
-
-	__radix_tree_insert(&tree, 1 << 5, 5, (void *)0x12);
-	__radix_tree_lookup(&tree, 0, &node, NULL);
-	assert(node->count == node->exceptional * 2);
-	radix_tree_delete(&tree, 1 << 5);
-	assert(node->exceptional == 0);
-
-	__radix_tree_insert(&tree, 1 << 5, 5, (void *)0x12);
-	__radix_tree_lookup(&tree, 1 << 5, &node, &slot);
-	assert(node->count == node->exceptional * 2);
-	__radix_tree_replace(&tree, node, slot, NULL, NULL);
-	assert(node->exceptional == 0);
-
-	item_kill_tree(&tree);
+	item_kill_tree(xa);
 }
 
 bool stop_iteration = false;
@@ -645,68 +187,45 @@ static void *creator_func(void *ptr)
 
 static void *iterator_func(void *ptr)
 {
-	struct radix_tree_root *tree = ptr;
-	struct radix_tree_iter iter;
+	XA_STATE(xas, ptr, 0);
 	struct item *item;
-	void **slot;
 
 	while (!stop_iteration) {
 		rcu_read_lock();
-		radix_tree_for_each_slot(slot, tree, &iter, 0) {
-			item = radix_tree_deref_slot(slot);
-
-			if (!item)
+		xas_for_each(&xas, item, ULONG_MAX) {
+			if (xas_retry(&xas, item))
 				continue;
-			if (radix_tree_deref_retry(item)) {
-				slot = radix_tree_iter_retry(&iter);
-				continue;
-			}
 
-			item_sanity(item, iter.index);
+			item_sanity(item, xas.xa_index);
 		}
 		rcu_read_unlock();
 	}
 	return NULL;
 }
 
-static void multiorder_iteration_race(void)
+static void multiorder_iteration_race(struct xarray *xa)
 {
 	const int num_threads = sysconf(_SC_NPROCESSORS_ONLN);
 	pthread_t worker_thread[num_threads];
-	RADIX_TREE(tree, GFP_KERNEL);
 	int i;
 
-	pthread_create(&worker_thread[0], NULL, &creator_func, &tree);
+	pthread_create(&worker_thread[0], NULL, &creator_func, xa);
 	for (i = 1; i < num_threads; i++)
-		pthread_create(&worker_thread[i], NULL, &iterator_func, &tree);
+		pthread_create(&worker_thread[i], NULL, &iterator_func, xa);
 
 	for (i = 0; i < num_threads; i++)
 		pthread_join(worker_thread[i], NULL);
 
-	item_kill_tree(&tree);
+	item_kill_tree(xa);
 }
 
+static DEFINE_XARRAY(array);
+
 void multiorder_checks(void)
 {
-	int i;
-
-	for (i = 0; i < 20; i++) {
-		multiorder_check(200, i);
-		multiorder_check(0, i);
-		multiorder_check((1UL << i) + 1, i);
-	}
-
-	for (i = 0; i < 15; i++)
-		multiorder_shrink((1UL << (i + RADIX_TREE_MAP_SHIFT)), i);
-
-	multiorder_insert_bug();
-	multiorder_tag_tests();
-	multiorder_iteration();
-	multiorder_tagged_iteration();
-	multiorder_join();
-	multiorder_split();
-	multiorder_account();
-	multiorder_iteration_race();
+	multiorder_iteration(&array);
+	multiorder_tagged_iteration(&array);
+	multiorder_iteration_race(&array);
 
 	radix_tree_cpu_dead(0);
 }
diff --git a/tools/testing/radix-tree/regression1.c b/tools/testing/radix-tree/regression1.c
index 0aece092f40e..a61c7bcbc72d 100644
--- a/tools/testing/radix-tree/regression1.c
+++ b/tools/testing/radix-tree/regression1.c
@@ -44,7 +44,6 @@
 #include "regression.h"
 
 static RADIX_TREE(mt_tree, GFP_KERNEL);
-static pthread_mutex_t mt_lock = PTHREAD_MUTEX_INITIALIZER;
 
 struct page {
 	pthread_mutex_t lock;
@@ -53,12 +52,12 @@ struct page {
 	unsigned long index;
 };
 
-static struct page *page_alloc(void)
+static struct page *page_alloc(int index)
 {
 	struct page *p;
 	p = malloc(sizeof(struct page));
 	p->count = 1;
-	p->index = 1;
+	p->index = index;
 	pthread_mutex_init(&p->lock, NULL);
 
 	return p;
@@ -80,53 +79,33 @@ static void page_free(struct page *p)
 static unsigned find_get_pages(unsigned long start,
 			    unsigned int nr_pages, struct page **pages)
 {
-	unsigned int i;
-	unsigned int ret;
-	unsigned int nr_found;
+	XA_STATE(xas, &mt_tree, start);
+	struct page *page;
+	unsigned int ret = 0;
 
 	rcu_read_lock();
-restart:
-	nr_found = radix_tree_gang_lookup_slot(&mt_tree,
-				(void ***)pages, NULL, start, nr_pages);
-	ret = 0;
-	for (i = 0; i < nr_found; i++) {
-		struct page *page;
-repeat:
-		page = radix_tree_deref_slot((void **)pages[i]);
-		if (unlikely(!page))
+	xas_for_each(&xas, page, ULONG_MAX) {
+		if (xas_retry(&xas, page))
 			continue;
 
-		if (radix_tree_exception(page)) {
-			if (radix_tree_deref_retry(page)) {
-				/*
-				 * Transient condition which can only trigger
-				 * when entry at index 0 moves out of or back
-				 * to root: none yet gotten, safe to restart.
-				 */
-				assert((start | i) == 0);
-				goto restart;
-			}
-			/*
-			 * No exceptional entries are inserted in this test.
-			 */
-			assert(0);
-		}
-
 		pthread_mutex_lock(&page->lock);
-		if (!page->count) {
-			pthread_mutex_unlock(&page->lock);
-			goto repeat;
-		}
+		if (!page->count)
+			goto unlock;
+
 		/* don't actually update page refcount */
 		pthread_mutex_unlock(&page->lock);
 
 		/* Has the page moved? */
-		if (unlikely(page != *((void **)pages[i]))) {
-			goto repeat;
-		}
+		if (unlikely(page != xas_reload(&xas)))
+			goto put_page;
 
 		pages[ret] = page;
 		ret++;
+		continue;
+unlock:
+		pthread_mutex_unlock(&page->lock);
+put_page:
+		xas_reset(&xas);
 	}
 	rcu_read_unlock();
 	return ret;
@@ -145,30 +124,30 @@ static void *regression1_fn(void *arg)
 		for (j = 0; j < 1000000; j++) {
 			struct page *p;
 
-			p = page_alloc();
-			pthread_mutex_lock(&mt_lock);
+			p = page_alloc(0);
+			xa_lock(&mt_tree);
 			radix_tree_insert(&mt_tree, 0, p);
-			pthread_mutex_unlock(&mt_lock);
+			xa_unlock(&mt_tree);
 
-			p = page_alloc();
-			pthread_mutex_lock(&mt_lock);
+			p = page_alloc(1);
+			xa_lock(&mt_tree);
 			radix_tree_insert(&mt_tree, 1, p);
-			pthread_mutex_unlock(&mt_lock);
+			xa_unlock(&mt_tree);
 
-			pthread_mutex_lock(&mt_lock);
+			xa_lock(&mt_tree);
 			p = radix_tree_delete(&mt_tree, 1);
 			pthread_mutex_lock(&p->lock);
 			p->count--;
 			pthread_mutex_unlock(&p->lock);
-			pthread_mutex_unlock(&mt_lock);
+			xa_unlock(&mt_tree);
 			page_free(p);
 
-			pthread_mutex_lock(&mt_lock);
+			xa_lock(&mt_tree);
 			p = radix_tree_delete(&mt_tree, 0);
 			pthread_mutex_lock(&p->lock);
 			p->count--;
 			pthread_mutex_unlock(&p->lock);
-			pthread_mutex_unlock(&mt_lock);
+			xa_unlock(&mt_tree);
 			page_free(p);
 		}
 	} else {
diff --git a/tools/testing/radix-tree/regression2.c b/tools/testing/radix-tree/regression2.c
index 424b91c77831..f2c7e640a919 100644
--- a/tools/testing/radix-tree/regression2.c
+++ b/tools/testing/radix-tree/regression2.c
@@ -53,9 +53,9 @@
 #include "regression.h"
 #include "test.h"
 
-#define PAGECACHE_TAG_DIRTY     0
-#define PAGECACHE_TAG_WRITEBACK 1
-#define PAGECACHE_TAG_TOWRITE   2
+#define PAGECACHE_TAG_DIRTY     XA_MARK_0
+#define PAGECACHE_TAG_WRITEBACK XA_MARK_1
+#define PAGECACHE_TAG_TOWRITE   XA_MARK_2
 
 static RADIX_TREE(mt_tree, GFP_KERNEL);
 unsigned long page_count = 0;
@@ -92,7 +92,7 @@ void regression2_test(void)
 	/* 1. */
 	start = 0;
 	end = max_slots - 2;
-	tag_tagged_items(&mt_tree, NULL, start, end, 1,
+	tag_tagged_items(&mt_tree, start, end, 1,
 				PAGECACHE_TAG_DIRTY, PAGECACHE_TAG_TOWRITE);
 
 	/* 2. */
diff --git a/tools/testing/radix-tree/regression3.c b/tools/testing/radix-tree/regression3.c
index ace2543c3eda..9f9a3b280f56 100644
--- a/tools/testing/radix-tree/regression3.c
+++ b/tools/testing/radix-tree/regression3.c
@@ -69,21 +69,6 @@ void regression3_test(void)
 			continue;
 		}
 	}
-	radix_tree_delete(&root, 1);
-
-	first = true;
-	radix_tree_for_each_contig(slot, &root, &iter, 0) {
-		printv(2, "contig %ld %p\n", iter.index, *slot);
-		if (first) {
-			radix_tree_insert(&root, 1, ptr);
-			first = false;
-		}
-		if (radix_tree_deref_retry(*slot)) {
-			printv(2, "retry at %ld\n", iter.index);
-			slot = radix_tree_iter_retry(&iter);
-			continue;
-		}
-	}
 
 	radix_tree_for_each_slot(slot, &root, &iter, 0) {
 		printv(2, "slot %ld %p\n", iter.index, *slot);
@@ -93,14 +78,6 @@ void regression3_test(void)
 		}
 	}
 
-	radix_tree_for_each_contig(slot, &root, &iter, 0) {
-		printv(2, "contig %ld %p\n", iter.index, *slot);
-		if (!iter.index) {
-			printv(2, "next at %ld\n", iter.index);
-			slot = radix_tree_iter_resume(slot, &iter);
-		}
-	}
-
 	radix_tree_tag_set(&root, 0, 0);
 	radix_tree_tag_set(&root, 1, 0);
 	radix_tree_for_each_tagged(slot, &root, &iter, 0, 0) {
diff --git a/tools/testing/radix-tree/tag_check.c b/tools/testing/radix-tree/tag_check.c
index 543181e4847b..f898957b1a19 100644
--- a/tools/testing/radix-tree/tag_check.c
+++ b/tools/testing/radix-tree/tag_check.c
@@ -24,7 +24,7 @@ __simple_checks(struct radix_tree_root *tree, unsigned long index, int tag)
 	item_tag_set(tree, index, tag);
 	ret = item_tag_get(tree, index, tag);
 	assert(ret != 0);
-	ret = tag_tagged_items(tree, NULL, first, ~0UL, 10, tag, !tag);
+	ret = tag_tagged_items(tree, first, ~0UL, 10, tag, !tag);
 	assert(ret == 1);
 	ret = item_tag_get(tree, index, !tag);
 	assert(ret != 0);
@@ -321,7 +321,7 @@ static void single_check(void)
 	assert(ret == 0);
 	verify_tag_consistency(&tree, 0);
 	verify_tag_consistency(&tree, 1);
-	ret = tag_tagged_items(&tree, NULL, first, 10, 10, 0, 1);
+	ret = tag_tagged_items(&tree, first, 10, 10, XA_MARK_0, XA_MARK_1);
 	assert(ret == 1);
 	ret = radix_tree_gang_lookup_tag(&tree, (void **)items, 0, BATCH, 1);
 	assert(ret == 1);
@@ -331,34 +331,6 @@ static void single_check(void)
 	item_kill_tree(&tree);
 }
 
-void radix_tree_clear_tags_test(void)
-{
-	unsigned long index;
-	struct radix_tree_node *node;
-	struct radix_tree_iter iter;
-	void **slot;
-
-	RADIX_TREE(tree, GFP_KERNEL);
-
-	item_insert(&tree, 0);
-	item_tag_set(&tree, 0, 0);
-	__radix_tree_lookup(&tree, 0, &node, &slot);
-	radix_tree_clear_tags(&tree, node, slot);
-	assert(item_tag_get(&tree, 0, 0) == 0);
-
-	for (index = 0; index < 1000; index++) {
-		item_insert(&tree, index);
-		item_tag_set(&tree, index, 0);
-	}
-
-	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
-		radix_tree_clear_tags(&tree, iter.node, slot);
-		assert(item_tag_get(&tree, iter.index, 0) == 0);
-	}
-
-	item_kill_tree(&tree);
-}
-
 void tag_check(void)
 {
 	single_check();
@@ -376,5 +348,4 @@ void tag_check(void)
 	thrash_tags();
 	rcu_barrier();
 	printv(2, "after thrash_tags: %d allocated\n", nr_allocated);
-	radix_tree_clear_tags_test();
 }
diff --git a/tools/testing/radix-tree/test.c b/tools/testing/radix-tree/test.c
index def6015570b2..a15d0512e633 100644
--- a/tools/testing/radix-tree/test.c
+++ b/tools/testing/radix-tree/test.c
@@ -25,11 +25,6 @@ int item_tag_get(struct radix_tree_root *root, unsigned long index, int tag)
 	return radix_tree_tag_get(root, index, tag);
 }
 
-int __item_insert(struct radix_tree_root *root, struct item *item)
-{
-	return __radix_tree_insert(root, item->index, item->order, item);
-}
-
 struct item *item_create(unsigned long index, unsigned int order)
 {
 	struct item *ret = malloc(sizeof(*ret));
@@ -39,21 +34,15 @@ struct item *item_create(unsigned long index, unsigned int order)
 	return ret;
 }
 
-int item_insert_order(struct radix_tree_root *root, unsigned long index,
-			unsigned order)
+int item_insert(struct radix_tree_root *root, unsigned long index)
 {
-	struct item *item = item_create(index, order);
-	int err = __item_insert(root, item);
+	struct item *item = item_create(index, 0);
+	int err = radix_tree_insert(root, item->index, item);
 	if (err)
 		free(item);
 	return err;
 }
 
-int item_insert(struct radix_tree_root *root, unsigned long index)
-{
-	return item_insert_order(root, index, 0);
-}
-
 void item_sanity(struct item *item, unsigned long index)
 {
 	unsigned long mask;
@@ -63,16 +52,21 @@ void item_sanity(struct item *item, unsigned long index)
 	assert((item->index | mask) == (index | mask));
 }
 
+void item_free(struct item *item, unsigned long index)
+{
+	item_sanity(item, index);
+	free(item);
+}
+
 int item_delete(struct radix_tree_root *root, unsigned long index)
 {
 	struct item *item = radix_tree_delete(root, index);
 
-	if (item) {
-		item_sanity(item, index);
-		free(item);
-		return 1;
-	}
-	return 0;
+	if (!item)
+		return 0;
+
+	item_free(item, index);
+	return 1;
 }
 
 static void item_free_rcu(struct rcu_head *head)
@@ -82,9 +76,9 @@ static void item_free_rcu(struct rcu_head *head)
 	free(item);
 }
 
-int item_delete_rcu(struct radix_tree_root *root, unsigned long index)
+int item_delete_rcu(struct xarray *xa, unsigned long index)
 {
-	struct item *item = radix_tree_delete(root, index);
+	struct item *item = xa_erase(xa, index);
 
 	if (item) {
 		item_sanity(item, index);
@@ -176,59 +170,30 @@ void item_full_scan(struct radix_tree_root *root, unsigned long start,
 }
 
 /* Use the same pattern as tag_pages_for_writeback() in mm/page-writeback.c */
-int tag_tagged_items(struct radix_tree_root *root, pthread_mutex_t *lock,
-			unsigned long start, unsigned long end, unsigned batch,
-			unsigned iftag, unsigned thentag)
+int tag_tagged_items(struct xarray *xa, unsigned long start, unsigned long end,
+		unsigned batch, xa_mark_t iftag, xa_mark_t thentag)
 {
-	unsigned long tagged = 0;
-	struct radix_tree_iter iter;
-	void **slot;
+	XA_STATE(xas, xa, start);
+	unsigned int tagged = 0;
+	struct item *item;
 
 	if (batch == 0)
 		batch = 1;
 
-	if (lock)
-		pthread_mutex_lock(lock);
-	radix_tree_for_each_tagged(slot, root, &iter, start, iftag) {
-		if (iter.index > end)
-			break;
-		radix_tree_iter_tag_set(root, &iter, thentag);
-		tagged++;
-		if ((tagged % batch) != 0)
+	xas_lock_irq(&xas);
+	xas_for_each_marked(&xas, item, end, iftag) {
+		xas_set_mark(&xas, thentag);
+		if (++tagged % batch)
 			continue;
-		slot = radix_tree_iter_resume(slot, &iter);
-		if (lock) {
-			pthread_mutex_unlock(lock);
-			rcu_barrier();
-			pthread_mutex_lock(lock);
-		}
-	}
-	if (lock)
-		pthread_mutex_unlock(lock);
-
-	return tagged;
-}
 
-/* Use the same pattern as find_swap_entry() in mm/shmem.c */
-unsigned long find_item(struct radix_tree_root *root, void *item)
-{
-	struct radix_tree_iter iter;
-	void **slot;
-	unsigned long found = -1;
-	unsigned long checked = 0;
-
-	radix_tree_for_each_slot(slot, root, &iter, 0) {
-		if (*slot == item) {
-			found = iter.index;
-			break;
-		}
-		checked++;
-		if ((checked % 4) != 0)
-			continue;
-		slot = radix_tree_iter_resume(slot, &iter);
+		xas_pause(&xas);
+		xas_unlock_irq(&xas);
+		rcu_barrier();
+		xas_lock_irq(&xas);
 	}
+	xas_unlock_irq(&xas);
 
-	return found;
+	return tagged;
 }
 
 static int verify_node(struct radix_tree_node *slot, unsigned int tag,
@@ -281,43 +246,31 @@ static int verify_node(struct radix_tree_node *slot, unsigned int tag,
 
 void verify_tag_consistency(struct radix_tree_root *root, unsigned int tag)
 {
-	struct radix_tree_node *node = root->rnode;
+	struct radix_tree_node *node = root->xa_head;
 	if (!radix_tree_is_internal_node(node))
 		return;
 	verify_node(node, tag, !!root_tag_get(root, tag));
 }
 
-void item_kill_tree(struct radix_tree_root *root)
+void item_kill_tree(struct xarray *xa)
 {
-	struct radix_tree_iter iter;
-	void **slot;
-	struct item *items[32];
-	int nfound;
-
-	radix_tree_for_each_slot(slot, root, &iter, 0) {
-		if (radix_tree_exceptional_entry(*slot))
-			radix_tree_delete(root, iter.index);
-	}
+	XA_STATE(xas, xa, 0);
+	void *entry;
 
-	while ((nfound = radix_tree_gang_lookup(root, (void **)items, 0, 32))) {
-		int i;
-
-		for (i = 0; i < nfound; i++) {
-			void *ret;
-
-			ret = radix_tree_delete(root, items[i]->index);
-			assert(ret == items[i]);
-			free(items[i]);
+	xas_for_each(&xas, entry, ULONG_MAX) {
+		if (!xa_is_value(entry)) {
+			item_free(entry, xas.xa_index);
 		}
+		xas_store(&xas, NULL);
 	}
-	assert(radix_tree_gang_lookup(root, (void **)items, 0, 32) == 0);
-	assert(root->rnode == NULL);
+
+	assert(xa_empty(xa));
 }
 
 void tree_verify_min_height(struct radix_tree_root *root, int maxindex)
 {
 	unsigned shift;
-	struct radix_tree_node *node = root->rnode;
+	struct radix_tree_node *node = root->xa_head;
 	if (!radix_tree_is_internal_node(node)) {
 		assert(maxindex == 0);
 		return;
diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h
index 92d901eacf49..1ee4b2c0ad10 100644
--- a/tools/testing/radix-tree/test.h
+++ b/tools/testing/radix-tree/test.h
@@ -11,13 +11,11 @@ struct item {
 };
 
 struct item *item_create(unsigned long index, unsigned int order);
-int __item_insert(struct radix_tree_root *root, struct item *item);
 int item_insert(struct radix_tree_root *root, unsigned long index);
 void item_sanity(struct item *item, unsigned long index);
-int item_insert_order(struct radix_tree_root *root, unsigned long index,
-			unsigned order);
+void item_free(struct item *item, unsigned long index);
 int item_delete(struct radix_tree_root *root, unsigned long index);
-int item_delete_rcu(struct radix_tree_root *root, unsigned long index);
+int item_delete_rcu(struct xarray *xa, unsigned long index);
 struct item *item_lookup(struct radix_tree_root *root, unsigned long index);
 
 void item_check_present(struct radix_tree_root *root, unsigned long index);
@@ -29,11 +27,10 @@ void item_full_scan(struct radix_tree_root *root, unsigned long start,
 			unsigned long nr, int chunk);
 void item_kill_tree(struct radix_tree_root *root);
 
-int tag_tagged_items(struct radix_tree_root *, pthread_mutex_t *,
-			unsigned long start, unsigned long end, unsigned batch,
-			unsigned iftag, unsigned thentag);
-unsigned long find_item(struct radix_tree_root *, void *item);
+int tag_tagged_items(struct xarray *, unsigned long start, unsigned long end,
+		unsigned batch, xa_mark_t iftag, xa_mark_t thentag);
 
+void xarray_tests(void);
 void tag_check(void);
 void multiorder_checks(void);
 void iteration_test(unsigned order, unsigned duration);
diff --git a/tools/testing/radix-tree/xarray.c b/tools/testing/radix-tree/xarray.c
new file mode 100644
index 000000000000..e61e43efe463
--- /dev/null
+++ b/tools/testing/radix-tree/xarray.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * xarray.c: Userspace shim for XArray test-suite
+ * Copyright (c) 2018 Matthew Wilcox <willy@infradead.org>
+ */
+
+#define XA_DEBUG
+#include "test.h"
+
+#define module_init(x)
+#define module_exit(x)
+#define MODULE_AUTHOR(x)
+#define MODULE_LICENSE(x)
+#define dump_stack()	assert(0)
+
+#include "../../../lib/xarray.c"
+#undef XA_DEBUG
+#include "../../../lib/test_xarray.c"
+
+void xarray_tests(void)
+{
+	xarray_checks();
+	xarray_exit();
+}
+
+int __weak main(void)
+{
+	radix_tree_init();
+	xarray_tests();
+	radix_tree_cpu_dead(1);
+	rcu_barrier();
+	if (nr_allocated)
+		printf("nr_allocated = %d\n", nr_allocated);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 49938d72cf63..1b799e30c06d 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -19,3 +19,11 @@ test_btf
 test_sockmap
 test_lirc_mode2_user
 get_cgroup_id_user
+test_skb_cgroup_id_user
+test_socket_cookie
+test_cgroup_storage
+test_select_reuseport
+test_flow_dissector
+flow_dissector_load
+test_netcnt
+test_section_names
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index fff7fb1285fc..e39dfb4e7970 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -23,7 +23,8 @@ $(TEST_CUSTOM_PROGS): $(OUTPUT)/%: %.c
 TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
 	test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
 	test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \
-	test_socket_cookie test_cgroup_storage test_select_reuseport
+	test_socket_cookie test_cgroup_storage test_select_reuseport test_section_names \
+	test_netcnt
 
 TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
 	test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o     \
@@ -35,7 +36,8 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
 	test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \
 	test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \
 	get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \
-	test_skb_cgroup_id_kern.o
+	test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o \
+	test_sk_lookup_kern.o test_xdp_vlan.o test_queue_map.o test_stack_map.o
 
 # Order correspond to 'make run_tests' order
 TEST_PROGS := test_kmod.sh \
@@ -47,10 +49,15 @@ TEST_PROGS := test_kmod.sh \
 	test_tunnel.sh \
 	test_lwt_seg6local.sh \
 	test_lirc_mode2.sh \
-	test_skb_cgroup_id.sh
+	test_skb_cgroup_id.sh \
+	test_flow_dissector.sh \
+	test_xdp_vlan.sh
+
+TEST_PROGS_EXTENDED := with_addr.sh
 
 # Compile but not part of 'make run_tests'
-TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr test_skb_cgroup_id_user
+TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr test_skb_cgroup_id_user \
+	flow_dissector_load test_flow_dissector
 
 include ../lib.mk
 
@@ -70,6 +77,7 @@ $(OUTPUT)/test_tcpbpf_user: cgroup_helpers.c
 $(OUTPUT)/test_progs: trace_helpers.c
 $(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c
 $(OUTPUT)/test_cgroup_storage: cgroup_helpers.c
+$(OUTPUT)/test_netcnt: cgroup_helpers.c
 
 .PHONY: force
 
@@ -110,6 +118,9 @@ CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \
 $(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline
 $(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline
 
+$(OUTPUT)/test_queue_map.o: test_queue_stack_map.h
+$(OUTPUT)/test_stack_map.o: test_queue_stack_map.h
+
 BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris)
 BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)
 BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm')
diff --git a/tools/testing/selftests/bpf/bpf_flow.c b/tools/testing/selftests/bpf/bpf_flow.c
new file mode 100644
index 000000000000..107350a7821d
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_flow.c
@@ -0,0 +1,373 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <limits.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/pkt_cls.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/icmp.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/if_packet.h>
+#include <sys/socket.h>
+#include <linux/if_tunnel.h>
+#include <linux/mpls.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+int _version SEC("version") = 1;
+#define PROG(F) SEC(#F) int bpf_func_##F
+
+/* These are the identifiers of the BPF programs that will be used in tail
+ * calls. Name is limited to 16 characters, with the terminating character and
+ * bpf_func_ above, we have only 6 to work with, anything after will be cropped.
+ */
+enum {
+	IP,
+	IPV6,
+	IPV6OP,	/* Destination/Hop-by-Hop Options IPv6 Extension header */
+	IPV6FR,	/* Fragmentation IPv6 Extension Header */
+	MPLS,
+	VLAN,
+};
+
+#define IP_MF		0x2000
+#define IP_OFFSET	0x1FFF
+#define IP6_MF		0x0001
+#define IP6_OFFSET	0xFFF8
+
+struct vlan_hdr {
+	__be16 h_vlan_TCI;
+	__be16 h_vlan_encapsulated_proto;
+};
+
+struct gre_hdr {
+	__be16 flags;
+	__be16 proto;
+};
+
+struct frag_hdr {
+	__u8 nexthdr;
+	__u8 reserved;
+	__be16 frag_off;
+	__be32 identification;
+};
+
+struct bpf_map_def SEC("maps") jmp_table = {
+	.type = BPF_MAP_TYPE_PROG_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u32),
+	.max_entries = 8
+};
+
+static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb,
+							 __u16 hdr_size,
+							 void *buffer)
+{
+	void *data_end = (void *)(long)skb->data_end;
+	void *data = (void *)(long)skb->data;
+	__u16 nhoff = skb->flow_keys->nhoff;
+	__u8 *hdr;
+
+	/* Verifies this variable offset does not overflow */
+	if (nhoff > (USHRT_MAX - hdr_size))
+		return NULL;
+
+	hdr = data + nhoff;
+	if (hdr + hdr_size <= data_end)
+		return hdr;
+
+	if (bpf_skb_load_bytes(skb, nhoff, buffer, hdr_size))
+		return NULL;
+
+	return buffer;
+}
+
+/* Dispatches on ETHERTYPE */
+static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto)
+{
+	struct bpf_flow_keys *keys = skb->flow_keys;
+
+	keys->n_proto = proto;
+	switch (proto) {
+	case bpf_htons(ETH_P_IP):
+		bpf_tail_call(skb, &jmp_table, IP);
+		break;
+	case bpf_htons(ETH_P_IPV6):
+		bpf_tail_call(skb, &jmp_table, IPV6);
+		break;
+	case bpf_htons(ETH_P_MPLS_MC):
+	case bpf_htons(ETH_P_MPLS_UC):
+		bpf_tail_call(skb, &jmp_table, MPLS);
+		break;
+	case bpf_htons(ETH_P_8021Q):
+	case bpf_htons(ETH_P_8021AD):
+		bpf_tail_call(skb, &jmp_table, VLAN);
+		break;
+	default:
+		/* Protocol not supported */
+		return BPF_DROP;
+	}
+
+	return BPF_DROP;
+}
+
+SEC("dissect")
+int _dissect(struct __sk_buff *skb)
+{
+	if (!skb->vlan_present)
+		return parse_eth_proto(skb, skb->protocol);
+	else
+		return parse_eth_proto(skb, skb->vlan_proto);
+}
+
+/* Parses on IPPROTO_* */
+static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
+{
+	struct bpf_flow_keys *keys = skb->flow_keys;
+	void *data_end = (void *)(long)skb->data_end;
+	struct icmphdr *icmp, _icmp;
+	struct gre_hdr *gre, _gre;
+	struct ethhdr *eth, _eth;
+	struct tcphdr *tcp, _tcp;
+	struct udphdr *udp, _udp;
+
+	keys->ip_proto = proto;
+	switch (proto) {
+	case IPPROTO_ICMP:
+		icmp = bpf_flow_dissect_get_header(skb, sizeof(*icmp), &_icmp);
+		if (!icmp)
+			return BPF_DROP;
+		return BPF_OK;
+	case IPPROTO_IPIP:
+		keys->is_encap = true;
+		return parse_eth_proto(skb, bpf_htons(ETH_P_IP));
+	case IPPROTO_IPV6:
+		keys->is_encap = true;
+		return parse_eth_proto(skb, bpf_htons(ETH_P_IPV6));
+	case IPPROTO_GRE:
+		gre = bpf_flow_dissect_get_header(skb, sizeof(*gre), &_gre);
+		if (!gre)
+			return BPF_DROP;
+
+		if (bpf_htons(gre->flags & GRE_VERSION))
+			/* Only inspect standard GRE packets with version 0 */
+			return BPF_OK;
+
+		keys->nhoff += sizeof(*gre); /* Step over GRE Flags and Proto */
+		if (GRE_IS_CSUM(gre->flags))
+			keys->nhoff += 4; /* Step over chksum and Padding */
+		if (GRE_IS_KEY(gre->flags))
+			keys->nhoff += 4; /* Step over key */
+		if (GRE_IS_SEQ(gre->flags))
+			keys->nhoff += 4; /* Step over sequence number */
+
+		keys->is_encap = true;
+
+		if (gre->proto == bpf_htons(ETH_P_TEB)) {
+			eth = bpf_flow_dissect_get_header(skb, sizeof(*eth),
+							  &_eth);
+			if (!eth)
+				return BPF_DROP;
+
+			keys->nhoff += sizeof(*eth);
+
+			return parse_eth_proto(skb, eth->h_proto);
+		} else {
+			return parse_eth_proto(skb, gre->proto);
+		}
+	case IPPROTO_TCP:
+		tcp = bpf_flow_dissect_get_header(skb, sizeof(*tcp), &_tcp);
+		if (!tcp)
+			return BPF_DROP;
+
+		if (tcp->doff < 5)
+			return BPF_DROP;
+
+		if ((__u8 *)tcp + (tcp->doff << 2) > data_end)
+			return BPF_DROP;
+
+		keys->thoff = keys->nhoff;
+		keys->sport = tcp->source;
+		keys->dport = tcp->dest;
+		return BPF_OK;
+	case IPPROTO_UDP:
+	case IPPROTO_UDPLITE:
+		udp = bpf_flow_dissect_get_header(skb, sizeof(*udp), &_udp);
+		if (!udp)
+			return BPF_DROP;
+
+		keys->thoff = keys->nhoff;
+		keys->sport = udp->source;
+		keys->dport = udp->dest;
+		return BPF_OK;
+	default:
+		return BPF_DROP;
+	}
+
+	return BPF_DROP;
+}
+
+static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr)
+{
+	struct bpf_flow_keys *keys = skb->flow_keys;
+
+	keys->ip_proto = nexthdr;
+	switch (nexthdr) {
+	case IPPROTO_HOPOPTS:
+	case IPPROTO_DSTOPTS:
+		bpf_tail_call(skb, &jmp_table, IPV6OP);
+		break;
+	case IPPROTO_FRAGMENT:
+		bpf_tail_call(skb, &jmp_table, IPV6FR);
+		break;
+	default:
+		return parse_ip_proto(skb, nexthdr);
+	}
+
+	return BPF_DROP;
+}
+
+PROG(IP)(struct __sk_buff *skb)
+{
+	void *data_end = (void *)(long)skb->data_end;
+	struct bpf_flow_keys *keys = skb->flow_keys;
+	void *data = (void *)(long)skb->data;
+	struct iphdr *iph, _iph;
+	bool done = false;
+
+	iph = bpf_flow_dissect_get_header(skb, sizeof(*iph), &_iph);
+	if (!iph)
+		return BPF_DROP;
+
+	/* IP header cannot be smaller than 20 bytes */
+	if (iph->ihl < 5)
+		return BPF_DROP;
+
+	keys->addr_proto = ETH_P_IP;
+	keys->ipv4_src = iph->saddr;
+	keys->ipv4_dst = iph->daddr;
+
+	keys->nhoff += iph->ihl << 2;
+	if (data + keys->nhoff > data_end)
+		return BPF_DROP;
+
+	if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) {
+		keys->is_frag = true;
+		if (iph->frag_off & bpf_htons(IP_OFFSET))
+			/* From second fragment on, packets do not have headers
+			 * we can parse.
+			 */
+			done = true;
+		else
+			keys->is_first_frag = true;
+	}
+
+	if (done)
+		return BPF_OK;
+
+	return parse_ip_proto(skb, iph->protocol);
+}
+
+PROG(IPV6)(struct __sk_buff *skb)
+{
+	struct bpf_flow_keys *keys = skb->flow_keys;
+	struct ipv6hdr *ip6h, _ip6h;
+
+	ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h);
+	if (!ip6h)
+		return BPF_DROP;
+
+	keys->addr_proto = ETH_P_IPV6;
+	memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr));
+
+	keys->nhoff += sizeof(struct ipv6hdr);
+
+	return parse_ipv6_proto(skb, ip6h->nexthdr);
+}
+
+PROG(IPV6OP)(struct __sk_buff *skb)
+{
+	struct ipv6_opt_hdr *ip6h, _ip6h;
+
+	ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h);
+	if (!ip6h)
+		return BPF_DROP;
+
+	/* hlen is in 8-octets and does not include the first 8 bytes
+	 * of the header
+	 */
+	skb->flow_keys->nhoff += (1 + ip6h->hdrlen) << 3;
+
+	return parse_ipv6_proto(skb, ip6h->nexthdr);
+}
+
+PROG(IPV6FR)(struct __sk_buff *skb)
+{
+	struct bpf_flow_keys *keys = skb->flow_keys;
+	struct frag_hdr *fragh, _fragh;
+
+	fragh = bpf_flow_dissect_get_header(skb, sizeof(*fragh), &_fragh);
+	if (!fragh)
+		return BPF_DROP;
+
+	keys->nhoff += sizeof(*fragh);
+	keys->is_frag = true;
+	if (!(fragh->frag_off & bpf_htons(IP6_OFFSET)))
+		keys->is_first_frag = true;
+
+	return parse_ipv6_proto(skb, fragh->nexthdr);
+}
+
+PROG(MPLS)(struct __sk_buff *skb)
+{
+	struct mpls_label *mpls, _mpls;
+
+	mpls = bpf_flow_dissect_get_header(skb, sizeof(*mpls), &_mpls);
+	if (!mpls)
+		return BPF_DROP;
+
+	return BPF_OK;
+}
+
+PROG(VLAN)(struct __sk_buff *skb)
+{
+	struct bpf_flow_keys *keys = skb->flow_keys;
+	struct vlan_hdr *vlan, _vlan;
+	__be16 proto;
+
+	/* Peek back to see if single or double-tagging */
+	if (bpf_skb_load_bytes(skb, keys->nhoff - sizeof(proto), &proto,
+			       sizeof(proto)))
+		return BPF_DROP;
+
+	/* Account for double-tagging */
+	if (proto == bpf_htons(ETH_P_8021AD)) {
+		vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
+		if (!vlan)
+			return BPF_DROP;
+
+		if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q))
+			return BPF_DROP;
+
+		keys->nhoff += sizeof(*vlan);
+	}
+
+	vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
+	if (!vlan)
+		return BPF_DROP;
+
+	keys->nhoff += sizeof(*vlan);
+	/* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/
+	if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) ||
+	    vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q))
+		return BPF_DROP;
+
+	return parse_eth_proto(skb, vlan->h_vlan_encapsulated_proto);
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index e4be7730222d..686e57ce40f4 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -16,6 +16,13 @@ static int (*bpf_map_update_elem)(void *map, void *key, void *value,
 	(void *) BPF_FUNC_map_update_elem;
 static int (*bpf_map_delete_elem)(void *map, void *key) =
 	(void *) BPF_FUNC_map_delete_elem;
+static int (*bpf_map_push_elem)(void *map, void *value,
+				unsigned long long flags) =
+	(void *) BPF_FUNC_map_push_elem;
+static int (*bpf_map_pop_elem)(void *map, void *value) =
+	(void *) BPF_FUNC_map_pop_elem;
+static int (*bpf_map_peek_elem)(void *map, void *value) =
+	(void *) BPF_FUNC_map_peek_elem;
 static int (*bpf_probe_read)(void *dst, int size, void *unsafe_ptr) =
 	(void *) BPF_FUNC_probe_read;
 static unsigned long long (*bpf_ktime_get_ns)(void) =
@@ -104,6 +111,8 @@ static int (*bpf_msg_cork_bytes)(void *ctx, int len) =
 	(void *) BPF_FUNC_msg_cork_bytes;
 static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) =
 	(void *) BPF_FUNC_msg_pull_data;
+static int (*bpf_msg_push_data)(void *ctx, int start, int end, int flags) =
+	(void *) BPF_FUNC_msg_push_data;
 static int (*bpf_bind)(void *ctx, void *addr, int addr_len) =
 	(void *) BPF_FUNC_bind;
 static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) =
@@ -143,6 +152,22 @@ static unsigned long long (*bpf_skb_cgroup_id)(void *ctx) =
 	(void *) BPF_FUNC_skb_cgroup_id;
 static unsigned long long (*bpf_skb_ancestor_cgroup_id)(void *ctx, int level) =
 	(void *) BPF_FUNC_skb_ancestor_cgroup_id;
+static struct bpf_sock *(*bpf_sk_lookup_tcp)(void *ctx,
+					     struct bpf_sock_tuple *tuple,
+					     int size, unsigned int netns_id,
+					     unsigned long long flags) =
+	(void *) BPF_FUNC_sk_lookup_tcp;
+static struct bpf_sock *(*bpf_sk_lookup_udp)(void *ctx,
+					     struct bpf_sock_tuple *tuple,
+					     int size, unsigned int netns_id,
+					     unsigned long long flags) =
+	(void *) BPF_FUNC_sk_lookup_udp;
+static int (*bpf_sk_release)(struct bpf_sock *sk) =
+	(void *) BPF_FUNC_sk_release;
+static int (*bpf_skb_vlan_push)(void *ctx, __be16 vlan_proto, __u16 vlan_tci) =
+	(void *) BPF_FUNC_skb_vlan_push;
+static int (*bpf_skb_vlan_pop)(void *ctx) =
+	(void *) BPF_FUNC_skb_vlan_pop;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index b4994a94968b..7f90d3645af8 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -18,3 +18,7 @@ CONFIG_CRYPTO_HMAC=m
 CONFIG_CRYPTO_SHA256=m
 CONFIG_VXLAN=y
 CONFIG_GENEVE=y
+CONFIG_NET_CLS_FLOWER=m
+CONFIG_LWTUNNEL=y
+CONFIG_BPF_STREAM_PARSER=y
+CONFIG_XDP_SOCKETS=y
diff --git a/tools/testing/selftests/bpf/flow_dissector_load.c b/tools/testing/selftests/bpf/flow_dissector_load.c
new file mode 100644
index 000000000000..ae8180b11d5f
--- /dev/null
+++ b/tools/testing/selftests/bpf/flow_dissector_load.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <error.h>
+#include <errno.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_rlimit.h"
+
+const char *cfg_pin_path = "/sys/fs/bpf/flow_dissector";
+const char *cfg_map_name = "jmp_table";
+bool cfg_attach = true;
+char *cfg_section_name;
+char *cfg_path_name;
+
+static void load_and_attach_program(void)
+{
+	struct bpf_program *prog, *main_prog;
+	struct bpf_map *prog_array;
+	int i, fd, prog_fd, ret;
+	struct bpf_object *obj;
+	int prog_array_fd;
+
+	ret = bpf_prog_load(cfg_path_name, BPF_PROG_TYPE_FLOW_DISSECTOR, &obj,
+			    &prog_fd);
+	if (ret)
+		error(1, 0, "bpf_prog_load %s", cfg_path_name);
+
+	main_prog = bpf_object__find_program_by_title(obj, cfg_section_name);
+	if (!main_prog)
+		error(1, 0, "bpf_object__find_program_by_title %s",
+		      cfg_section_name);
+
+	prog_fd = bpf_program__fd(main_prog);
+	if (prog_fd < 0)
+		error(1, 0, "bpf_program__fd");
+
+	prog_array = bpf_object__find_map_by_name(obj, cfg_map_name);
+	if (!prog_array)
+		error(1, 0, "bpf_object__find_map_by_name %s", cfg_map_name);
+
+	prog_array_fd = bpf_map__fd(prog_array);
+	if (prog_array_fd < 0)
+		error(1, 0, "bpf_map__fd %s", cfg_map_name);
+
+	i = 0;
+	bpf_object__for_each_program(prog, obj) {
+		fd = bpf_program__fd(prog);
+		if (fd < 0)
+			error(1, 0, "bpf_program__fd");
+
+		if (fd != prog_fd) {
+			printf("%d: %s\n", i, bpf_program__title(prog, false));
+			bpf_map_update_elem(prog_array_fd, &i, &fd, BPF_ANY);
+			++i;
+		}
+	}
+
+	ret = bpf_prog_attach(prog_fd, 0 /* Ignore */, BPF_FLOW_DISSECTOR, 0);
+	if (ret)
+		error(1, 0, "bpf_prog_attach %s", cfg_path_name);
+
+	ret = bpf_object__pin(obj, cfg_pin_path);
+	if (ret)
+		error(1, 0, "bpf_object__pin %s", cfg_pin_path);
+
+}
+
+static void detach_program(void)
+{
+	char command[64];
+	int ret;
+
+	ret = bpf_prog_detach(0, BPF_FLOW_DISSECTOR);
+	if (ret)
+		error(1, 0, "bpf_prog_detach");
+
+	/* To unpin, it is necessary and sufficient to just remove this dir */
+	sprintf(command, "rm -r %s", cfg_pin_path);
+	ret = system(command);
+	if (ret)
+		error(1, errno, command);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	bool attach = false;
+	bool detach = false;
+	int c;
+
+	while ((c = getopt(argc, argv, "adp:s:")) != -1) {
+		switch (c) {
+		case 'a':
+			if (detach)
+				error(1, 0, "attach/detach are exclusive");
+			attach = true;
+			break;
+		case 'd':
+			if (attach)
+				error(1, 0, "attach/detach are exclusive");
+			detach = true;
+			break;
+		case 'p':
+			if (cfg_path_name)
+				error(1, 0, "only one prog name can be given");
+
+			cfg_path_name = optarg;
+			break;
+		case 's':
+			if (cfg_section_name)
+				error(1, 0, "only one section can be given");
+
+			cfg_section_name = optarg;
+			break;
+		}
+	}
+
+	if (detach)
+		cfg_attach = false;
+
+	if (cfg_attach && !cfg_path_name)
+		error(1, 0, "must provide a path to the BPF program");
+
+	if (cfg_attach && !cfg_section_name)
+		error(1, 0, "must provide a section name");
+}
+
+int main(int argc, char **argv)
+{
+	parse_opts(argc, argv);
+	if (cfg_attach)
+		load_and_attach_program();
+	else
+		detach_program();
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/netcnt_common.h b/tools/testing/selftests/bpf/netcnt_common.h
new file mode 100644
index 000000000000..81084c1c2c23
--- /dev/null
+++ b/tools/testing/selftests/bpf/netcnt_common.h
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef __NETCNT_COMMON_H
+#define __NETCNT_COMMON_H
+
+#include <linux/types.h>
+
+#define MAX_PERCPU_PACKETS 32
+
+struct percpu_net_cnt {
+	__u64 packets;
+	__u64 bytes;
+
+	__u64 prev_ts;
+
+	__u64 prev_packets;
+	__u64 prev_bytes;
+};
+
+struct net_cnt {
+	__u64 packets;
+	__u64 bytes;
+};
+
+#endif
diff --git a/tools/testing/selftests/bpf/netcnt_prog.c b/tools/testing/selftests/bpf/netcnt_prog.c
new file mode 100644
index 000000000000..1198abca1360
--- /dev/null
+++ b/tools/testing/selftests/bpf/netcnt_prog.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <linux/version.h>
+
+#include "bpf_helpers.h"
+#include "netcnt_common.h"
+
+#define MAX_BPS	(3 * 1024 * 1024)
+
+#define REFRESH_TIME_NS	100000000
+#define NS_PER_SEC	1000000000
+
+struct bpf_map_def SEC("maps") percpu_netcnt = {
+	.type = BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
+	.key_size = sizeof(struct bpf_cgroup_storage_key),
+	.value_size = sizeof(struct percpu_net_cnt),
+};
+
+struct bpf_map_def SEC("maps") netcnt = {
+	.type = BPF_MAP_TYPE_CGROUP_STORAGE,
+	.key_size = sizeof(struct bpf_cgroup_storage_key),
+	.value_size = sizeof(struct net_cnt),
+};
+
+SEC("cgroup/skb")
+int bpf_nextcnt(struct __sk_buff *skb)
+{
+	struct percpu_net_cnt *percpu_cnt;
+	char fmt[] = "%d %llu %llu\n";
+	struct net_cnt *cnt;
+	__u64 ts, dt;
+	int ret;
+
+	cnt = bpf_get_local_storage(&netcnt, 0);
+	percpu_cnt = bpf_get_local_storage(&percpu_netcnt, 0);
+
+	percpu_cnt->packets++;
+	percpu_cnt->bytes += skb->len;
+
+	if (percpu_cnt->packets > MAX_PERCPU_PACKETS) {
+		__sync_fetch_and_add(&cnt->packets,
+				     percpu_cnt->packets);
+		percpu_cnt->packets = 0;
+
+		__sync_fetch_and_add(&cnt->bytes,
+				     percpu_cnt->bytes);
+		percpu_cnt->bytes = 0;
+	}
+
+	ts = bpf_ktime_get_ns();
+	dt = ts - percpu_cnt->prev_ts;
+
+	dt *= MAX_BPS;
+	dt /= NS_PER_SEC;
+
+	if (cnt->bytes + percpu_cnt->bytes - percpu_cnt->prev_bytes < dt)
+		ret = 1;
+	else
+		ret = 0;
+
+	if (dt > REFRESH_TIME_NS) {
+		percpu_cnt->prev_ts = ts;
+		percpu_cnt->prev_packets = cnt->packets;
+		percpu_cnt->prev_bytes = cnt->bytes;
+	}
+
+	return !!ret;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
index 6b5cfeb7a9cc..f42b3396d622 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -4,6 +4,7 @@
 #include <linux/bpf.h>
 #include <linux/btf.h>
 #include <linux/err.h>
+#include <linux/kernel.h>
 #include <bpf/bpf.h>
 #include <sys/resource.h>
 #include <libelf.h>
@@ -45,7 +46,6 @@ static int count_result(int err)
 	return err;
 }
 
-#define min(a, b) ((a) < (b) ? (a) : (b))
 #define __printf(a, b)	__attribute__((format(printf, a, b)))
 
 __printf(1, 2)
@@ -130,6 +130,7 @@ struct btf_raw_test {
 	bool map_create_err;
 	bool ordered_map;
 	bool lossless_map;
+	bool percpu_map;
 	int hdr_len_delta;
 	int type_off_delta;
 	int str_off_delta;
@@ -2157,6 +2158,7 @@ static struct btf_pprint_test_meta {
 	const char *map_name;
 	bool ordered_map;
 	bool lossless_map;
+	bool percpu_map;
 } pprint_tests_meta[] = {
 {
 	.descr = "BTF pretty print array",
@@ -2164,6 +2166,7 @@ static struct btf_pprint_test_meta {
 	.map_name = "pprint_test_array",
 	.ordered_map = true,
 	.lossless_map = true,
+	.percpu_map = false,
 },
 
 {
@@ -2172,6 +2175,7 @@ static struct btf_pprint_test_meta {
 	.map_name = "pprint_test_hash",
 	.ordered_map = false,
 	.lossless_map = true,
+	.percpu_map = false,
 },
 
 {
@@ -2180,30 +2184,83 @@ static struct btf_pprint_test_meta {
 	.map_name = "pprint_test_lru_hash",
 	.ordered_map = false,
 	.lossless_map = false,
+	.percpu_map = false,
+},
+
+{
+	.descr = "BTF pretty print percpu array",
+	.map_type = BPF_MAP_TYPE_PERCPU_ARRAY,
+	.map_name = "pprint_test_percpu_array",
+	.ordered_map = true,
+	.lossless_map = true,
+	.percpu_map = true,
+},
+
+{
+	.descr = "BTF pretty print percpu hash",
+	.map_type = BPF_MAP_TYPE_PERCPU_HASH,
+	.map_name = "pprint_test_percpu_hash",
+	.ordered_map = false,
+	.lossless_map = true,
+	.percpu_map = true,
+},
+
+{
+	.descr = "BTF pretty print lru percpu hash",
+	.map_type = BPF_MAP_TYPE_LRU_PERCPU_HASH,
+	.map_name = "pprint_test_lru_percpu_hash",
+	.ordered_map = false,
+	.lossless_map = false,
+	.percpu_map = true,
 },
 
 };
 
 
-static void set_pprint_mapv(struct pprint_mapv *v, uint32_t i)
+static void set_pprint_mapv(struct pprint_mapv *v, uint32_t i,
+			    int num_cpus, int rounded_value_size)
 {
-	v->ui32 = i;
-	v->si32 = -i;
-	v->unused_bits2a = 3;
-	v->bits28 = i;
-	v->unused_bits2b = 3;
-	v->ui64 = i;
-	v->aenum = i & 0x03;
+	int cpu;
+
+	for (cpu = 0; cpu < num_cpus; cpu++) {
+		v->ui32 = i + cpu;
+		v->si32 = -i;
+		v->unused_bits2a = 3;
+		v->bits28 = i;
+		v->unused_bits2b = 3;
+		v->ui64 = i;
+		v->aenum = i & 0x03;
+		v = (void *)v + rounded_value_size;
+	}
 }
 
+static int check_line(const char *expected_line, int nexpected_line,
+		      int expected_line_len, const char *line)
+{
+	if (CHECK(nexpected_line == expected_line_len,
+		  "expected_line is too long"))
+		return -1;
+
+	if (strcmp(expected_line, line)) {
+		fprintf(stderr, "unexpected pprint output\n");
+		fprintf(stderr, "expected: %s", expected_line);
+		fprintf(stderr, "    read: %s", line);
+		return -1;
+	}
+
+	return 0;
+}
+
+
 static int do_test_pprint(void)
 {
 	const struct btf_raw_test *test = &pprint_test_template;
 	struct bpf_create_map_attr create_attr = {};
+	bool ordered_map, lossless_map, percpu_map;
+	int err, ret, num_cpus, rounded_value_size;
+	struct pprint_mapv *mapv = NULL;
 	unsigned int key, nr_read_elems;
-	bool ordered_map, lossless_map;
 	int map_fd = -1, btf_fd = -1;
-	struct pprint_mapv mapv = {};
 	unsigned int raw_btf_size;
 	char expected_line[255];
 	FILE *pin_file = NULL;
@@ -2212,7 +2269,6 @@ static int do_test_pprint(void)
 	char *line = NULL;
 	uint8_t *raw_btf;
 	ssize_t nread;
-	int err, ret;
 
 	fprintf(stderr, "%s......", test->descr);
 	raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types,
@@ -2261,9 +2317,18 @@ static int do_test_pprint(void)
 	if (CHECK(err, "bpf_obj_pin(%s): errno:%d.", pin_path, errno))
 		goto done;
 
+	percpu_map = test->percpu_map;
+	num_cpus = percpu_map ? bpf_num_possible_cpus() : 1;
+	rounded_value_size = round_up(sizeof(struct pprint_mapv), 8);
+	mapv = calloc(num_cpus, rounded_value_size);
+	if (CHECK(!mapv, "mapv allocation failure")) {
+		err = -1;
+		goto done;
+	}
+
 	for (key = 0; key < test->max_entries; key++) {
-		set_pprint_mapv(&mapv, key);
-		bpf_map_update_elem(map_fd, &key, &mapv, 0);
+		set_pprint_mapv(mapv, key, num_cpus, rounded_value_size);
+		bpf_map_update_elem(map_fd, &key, mapv, 0);
 	}
 
 	pin_file = fopen(pin_path, "r");
@@ -2286,33 +2351,74 @@ static int do_test_pprint(void)
 	ordered_map = test->ordered_map;
 	lossless_map = test->lossless_map;
 	do {
+		struct pprint_mapv *cmapv;
 		ssize_t nexpected_line;
 		unsigned int next_key;
+		int cpu;
 
 		next_key = ordered_map ? nr_read_elems : atoi(line);
-		set_pprint_mapv(&mapv, next_key);
-		nexpected_line = snprintf(expected_line, sizeof(expected_line),
-					  "%u: {%u,0,%d,0x%x,0x%x,0x%x,{%lu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s}\n",
-					  next_key,
-					  mapv.ui32, mapv.si32,
-					  mapv.unused_bits2a, mapv.bits28, mapv.unused_bits2b,
-					  mapv.ui64,
-					  mapv.ui8a[0], mapv.ui8a[1], mapv.ui8a[2], mapv.ui8a[3],
-					  mapv.ui8a[4], mapv.ui8a[5], mapv.ui8a[6], mapv.ui8a[7],
-					  pprint_enum_str[mapv.aenum]);
-
-		if (CHECK(nexpected_line == sizeof(expected_line),
-			  "expected_line is too long")) {
-			err = -1;
-			goto done;
+		set_pprint_mapv(mapv, next_key, num_cpus, rounded_value_size);
+		cmapv = mapv;
+
+		for (cpu = 0; cpu < num_cpus; cpu++) {
+			if (percpu_map) {
+				/* for percpu map, the format looks like:
+				 * <key>: {
+				 *	cpu0: <value_on_cpu0>
+				 *	cpu1: <value_on_cpu1>
+				 *	...
+				 *	cpun: <value_on_cpun>
+				 * }
+				 *
+				 * let us verify the line containing the key here.
+				 */
+				if (cpu == 0) {
+					nexpected_line = snprintf(expected_line,
+								  sizeof(expected_line),
+								  "%u: {\n",
+								  next_key);
+
+					err = check_line(expected_line, nexpected_line,
+							 sizeof(expected_line), line);
+					if (err == -1)
+						goto done;
+				}
+
+				/* read value@cpu */
+				nread = getline(&line, &line_len, pin_file);
+				if (nread < 0)
+					break;
+			}
+
+			nexpected_line = snprintf(expected_line, sizeof(expected_line),
+						  "%s%u: {%u,0,%d,0x%x,0x%x,0x%x,"
+						  "{%lu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s}\n",
+						  percpu_map ? "\tcpu" : "",
+						  percpu_map ? cpu : next_key,
+						  cmapv->ui32, cmapv->si32,
+						  cmapv->unused_bits2a,
+						  cmapv->bits28,
+						  cmapv->unused_bits2b,
+						  cmapv->ui64,
+						  cmapv->ui8a[0], cmapv->ui8a[1],
+						  cmapv->ui8a[2], cmapv->ui8a[3],
+						  cmapv->ui8a[4], cmapv->ui8a[5],
+						  cmapv->ui8a[6], cmapv->ui8a[7],
+						  pprint_enum_str[cmapv->aenum]);
+
+			err = check_line(expected_line, nexpected_line,
+					 sizeof(expected_line), line);
+			if (err == -1)
+				goto done;
+
+			cmapv = (void *)cmapv + rounded_value_size;
 		}
 
-		if (strcmp(expected_line, line)) {
-			err = -1;
-			fprintf(stderr, "unexpected pprint output\n");
-			fprintf(stderr, "expected: %s", expected_line);
-			fprintf(stderr, "    read: %s", line);
-			goto done;
+		if (percpu_map) {
+			/* skip the last bracket for the percpu map */
+			nread = getline(&line, &line_len, pin_file);
+			if (nread < 0)
+				break;
 		}
 
 		nread = getline(&line, &line_len, pin_file);
@@ -2334,6 +2440,8 @@ static int do_test_pprint(void)
 	err = 0;
 
 done:
+	if (mapv)
+		free(mapv);
 	if (!err)
 		fprintf(stderr, "OK");
 	if (*btf_log_buf && (err || args.always_log))
@@ -2361,6 +2469,7 @@ static int test_pprint(void)
 		pprint_test_template.map_name = pprint_tests_meta[i].map_name;
 		pprint_test_template.ordered_map = pprint_tests_meta[i].ordered_map;
 		pprint_test_template.lossless_map = pprint_tests_meta[i].lossless_map;
+		pprint_test_template.percpu_map = pprint_tests_meta[i].percpu_map;
 
 		err |= count_result(do_test_pprint());
 	}
diff --git a/tools/testing/selftests/bpf/test_cgroup_storage.c b/tools/testing/selftests/bpf/test_cgroup_storage.c
index 4e196e3bfecf..f44834155f25 100644
--- a/tools/testing/selftests/bpf/test_cgroup_storage.c
+++ b/tools/testing/selftests/bpf/test_cgroup_storage.c
@@ -4,6 +4,7 @@
 #include <linux/filter.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <sys/sysinfo.h>
 
 #include "bpf_rlimit.h"
 #include "cgroup_helpers.h"
@@ -15,6 +16,14 @@ char bpf_log_buf[BPF_LOG_BUF_SIZE];
 int main(int argc, char **argv)
 {
 	struct bpf_insn prog[] = {
+		BPF_LD_MAP_FD(BPF_REG_1, 0), /* percpu map fd */
+		BPF_MOV64_IMM(BPF_REG_2, 0), /* flags, not used */
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+			     BPF_FUNC_get_local_storage),
+		BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 0x1),
+		BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_3, 0),
+
 		BPF_LD_MAP_FD(BPF_REG_1, 0), /* map fd */
 		BPF_MOV64_IMM(BPF_REG_2, 0), /* flags, not used */
 		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
@@ -28,9 +37,18 @@ int main(int argc, char **argv)
 	};
 	size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
 	int error = EXIT_FAILURE;
-	int map_fd, prog_fd, cgroup_fd;
+	int map_fd, percpu_map_fd, prog_fd, cgroup_fd;
 	struct bpf_cgroup_storage_key key;
 	unsigned long long value;
+	unsigned long long *percpu_value;
+	int cpu, nproc;
+
+	nproc = get_nprocs_conf();
+	percpu_value = malloc(sizeof(*percpu_value) * nproc);
+	if (!percpu_value) {
+		printf("Not enough memory for per-cpu area (%d cpus)\n", nproc);
+		goto err;
+	}
 
 	map_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE, sizeof(key),
 				sizeof(value), 0, 0);
@@ -39,7 +57,15 @@ int main(int argc, char **argv)
 		goto out;
 	}
 
-	prog[0].imm = map_fd;
+	percpu_map_fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
+				       sizeof(key), sizeof(value), 0, 0);
+	if (percpu_map_fd < 0) {
+		printf("Failed to create map: %s\n", strerror(errno));
+		goto out;
+	}
+
+	prog[0].imm = percpu_map_fd;
+	prog[7].imm = map_fd;
 	prog_fd = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
 				   prog, insns_cnt, "GPL", 0,
 				   bpf_log_buf, BPF_LOG_BUF_SIZE);
@@ -77,7 +103,15 @@ int main(int argc, char **argv)
 	}
 
 	if (bpf_map_lookup_elem(map_fd, &key, &value)) {
-		printf("Failed to lookup cgroup storage\n");
+		printf("Failed to lookup cgroup storage 0\n");
+		goto err;
+	}
+
+	for (cpu = 0; cpu < nproc; cpu++)
+		percpu_value[cpu] = 1000;
+
+	if (bpf_map_update_elem(percpu_map_fd, &key, percpu_value, 0)) {
+		printf("Failed to update the data in the cgroup storage\n");
 		goto err;
 	}
 
@@ -120,11 +154,31 @@ int main(int argc, char **argv)
 		goto err;
 	}
 
+	/* Check the final value of the counter in the percpu local storage */
+
+	for (cpu = 0; cpu < nproc; cpu++)
+		percpu_value[cpu] = 0;
+
+	if (bpf_map_lookup_elem(percpu_map_fd, &key, percpu_value)) {
+		printf("Failed to lookup the per-cpu cgroup storage\n");
+		goto err;
+	}
+
+	value = 0;
+	for (cpu = 0; cpu < nproc; cpu++)
+		value += percpu_value[cpu];
+
+	if (value != nproc * 1000 + 6) {
+		printf("Unexpected data in the per-cpu cgroup storage\n");
+		goto err;
+	}
+
 	error = 0;
 	printf("test_cgroup_storage:PASS\n");
 
 err:
 	cleanup_cgroup_environment();
+	free(percpu_value);
 
 out:
 	return error;
diff --git a/tools/testing/selftests/bpf/test_flow_dissector.c b/tools/testing/selftests/bpf/test_flow_dissector.c
new file mode 100644
index 000000000000..12b784afba31
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_flow_dissector.c
@@ -0,0 +1,782 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Inject packets with all sorts of encapsulation into the kernel.
+ *
+ * IPv4/IPv6	outer layer 3
+ * GRE/GUE/BARE outer layer 4, where bare is IPIP/SIT/IPv4-in-IPv6/..
+ * IPv4/IPv6    inner layer 3
+ */
+
+#define _GNU_SOURCE
+
+#include <stddef.h>
+#include <arpa/inet.h>
+#include <asm/byteorder.h>
+#include <error.h>
+#include <errno.h>
+#include <linux/if_packet.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ipv6.h>
+#include <netinet/ip.h>
+#include <netinet/in.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#define CFG_PORT_INNER	8000
+
+/* Add some protocol definitions that do not exist in userspace */
+
+struct grehdr {
+	uint16_t unused;
+	uint16_t protocol;
+} __attribute__((packed));
+
+struct guehdr {
+	union {
+		struct {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+			__u8	hlen:5,
+				control:1,
+				version:2;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+			__u8	version:2,
+				control:1,
+				hlen:5;
+#else
+#error  "Please fix <asm/byteorder.h>"
+#endif
+			__u8	proto_ctype;
+			__be16	flags;
+		};
+		__be32	word;
+	};
+};
+
+static uint8_t	cfg_dsfield_inner;
+static uint8_t	cfg_dsfield_outer;
+static uint8_t	cfg_encap_proto;
+static bool	cfg_expect_failure = false;
+static int	cfg_l3_extra = AF_UNSPEC;	/* optional SIT prefix */
+static int	cfg_l3_inner = AF_UNSPEC;
+static int	cfg_l3_outer = AF_UNSPEC;
+static int	cfg_num_pkt = 10;
+static int	cfg_num_secs = 0;
+static char	cfg_payload_char = 'a';
+static int	cfg_payload_len = 100;
+static int	cfg_port_gue = 6080;
+static bool	cfg_only_rx;
+static bool	cfg_only_tx;
+static int	cfg_src_port = 9;
+
+static char	buf[ETH_DATA_LEN];
+
+#define INIT_ADDR4(name, addr4, port)				\
+	static struct sockaddr_in name = {			\
+		.sin_family = AF_INET,				\
+		.sin_port = __constant_htons(port),		\
+		.sin_addr.s_addr = __constant_htonl(addr4),	\
+	};
+
+#define INIT_ADDR6(name, addr6, port)				\
+	static struct sockaddr_in6 name = {			\
+		.sin6_family = AF_INET6,			\
+		.sin6_port = __constant_htons(port),		\
+		.sin6_addr = addr6,				\
+	};
+
+INIT_ADDR4(in_daddr4, INADDR_LOOPBACK, CFG_PORT_INNER)
+INIT_ADDR4(in_saddr4, INADDR_LOOPBACK + 2, 0)
+INIT_ADDR4(out_daddr4, INADDR_LOOPBACK, 0)
+INIT_ADDR4(out_saddr4, INADDR_LOOPBACK + 1, 0)
+INIT_ADDR4(extra_daddr4, INADDR_LOOPBACK, 0)
+INIT_ADDR4(extra_saddr4, INADDR_LOOPBACK + 1, 0)
+
+INIT_ADDR6(in_daddr6, IN6ADDR_LOOPBACK_INIT, CFG_PORT_INNER)
+INIT_ADDR6(in_saddr6, IN6ADDR_LOOPBACK_INIT, 0)
+INIT_ADDR6(out_daddr6, IN6ADDR_LOOPBACK_INIT, 0)
+INIT_ADDR6(out_saddr6, IN6ADDR_LOOPBACK_INIT, 0)
+INIT_ADDR6(extra_daddr6, IN6ADDR_LOOPBACK_INIT, 0)
+INIT_ADDR6(extra_saddr6, IN6ADDR_LOOPBACK_INIT, 0)
+
+static unsigned long util_gettime(void)
+{
+	struct timeval tv;
+
+	gettimeofday(&tv, NULL);
+	return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static void util_printaddr(const char *msg, struct sockaddr *addr)
+{
+	unsigned long off = 0;
+	char nbuf[INET6_ADDRSTRLEN];
+
+	switch (addr->sa_family) {
+	case PF_INET:
+		off = __builtin_offsetof(struct sockaddr_in, sin_addr);
+		break;
+	case PF_INET6:
+		off = __builtin_offsetof(struct sockaddr_in6, sin6_addr);
+		break;
+	default:
+		error(1, 0, "printaddr: unsupported family %u\n",
+		      addr->sa_family);
+	}
+
+	if (!inet_ntop(addr->sa_family, ((void *) addr) + off, nbuf,
+		       sizeof(nbuf)))
+		error(1, errno, "inet_ntop");
+
+	fprintf(stderr, "%s: %s\n", msg, nbuf);
+}
+
+static unsigned long add_csum_hword(const uint16_t *start, int num_u16)
+{
+	unsigned long sum = 0;
+	int i;
+
+	for (i = 0; i < num_u16; i++)
+		sum += start[i];
+
+	return sum;
+}
+
+static uint16_t build_ip_csum(const uint16_t *start, int num_u16,
+			      unsigned long sum)
+{
+	sum += add_csum_hword(start, num_u16);
+
+	while (sum >> 16)
+		sum = (sum & 0xffff) + (sum >> 16);
+
+	return ~sum;
+}
+
+static void build_ipv4_header(void *header, uint8_t proto,
+			      uint32_t src, uint32_t dst,
+			      int payload_len, uint8_t tos)
+{
+	struct iphdr *iph = header;
+
+	iph->ihl = 5;
+	iph->version = 4;
+	iph->tos = tos;
+	iph->ttl = 8;
+	iph->tot_len = htons(sizeof(*iph) + payload_len);
+	iph->id = htons(1337);
+	iph->protocol = proto;
+	iph->saddr = src;
+	iph->daddr = dst;
+	iph->check = build_ip_csum((void *) iph, iph->ihl << 1, 0);
+}
+
+static void ipv6_set_dsfield(struct ipv6hdr *ip6h, uint8_t dsfield)
+{
+	uint16_t val, *ptr = (uint16_t *)ip6h;
+
+	val = ntohs(*ptr);
+	val &= 0xF00F;
+	val |= ((uint16_t) dsfield) << 4;
+	*ptr = htons(val);
+}
+
+static void build_ipv6_header(void *header, uint8_t proto,
+			      struct sockaddr_in6 *src,
+			      struct sockaddr_in6 *dst,
+			      int payload_len, uint8_t dsfield)
+{
+	struct ipv6hdr *ip6h = header;
+
+	ip6h->version = 6;
+	ip6h->payload_len = htons(payload_len);
+	ip6h->nexthdr = proto;
+	ip6h->hop_limit = 8;
+	ipv6_set_dsfield(ip6h, dsfield);
+
+	memcpy(&ip6h->saddr, &src->sin6_addr, sizeof(ip6h->saddr));
+	memcpy(&ip6h->daddr, &dst->sin6_addr, sizeof(ip6h->daddr));
+}
+
+static uint16_t build_udp_v4_csum(const struct iphdr *iph,
+				  const struct udphdr *udph,
+				  int num_words)
+{
+	unsigned long pseudo_sum;
+	int num_u16 = sizeof(iph->saddr);	/* halfwords: twice byte len */
+
+	pseudo_sum = add_csum_hword((void *) &iph->saddr, num_u16);
+	pseudo_sum += htons(IPPROTO_UDP);
+	pseudo_sum += udph->len;
+	return build_ip_csum((void *) udph, num_words, pseudo_sum);
+}
+
+static uint16_t build_udp_v6_csum(const struct ipv6hdr *ip6h,
+				  const struct udphdr *udph,
+				  int num_words)
+{
+	unsigned long pseudo_sum;
+	int num_u16 = sizeof(ip6h->saddr);	/* halfwords: twice byte len */
+
+	pseudo_sum = add_csum_hword((void *) &ip6h->saddr, num_u16);
+	pseudo_sum += htons(ip6h->nexthdr);
+	pseudo_sum += ip6h->payload_len;
+	return build_ip_csum((void *) udph, num_words, pseudo_sum);
+}
+
+static void build_udp_header(void *header, int payload_len,
+			     uint16_t dport, int family)
+{
+	struct udphdr *udph = header;
+	int len = sizeof(*udph) + payload_len;
+
+	udph->source = htons(cfg_src_port);
+	udph->dest = htons(dport);
+	udph->len = htons(len);
+	udph->check = 0;
+	if (family == AF_INET)
+		udph->check = build_udp_v4_csum(header - sizeof(struct iphdr),
+						udph, len >> 1);
+	else
+		udph->check = build_udp_v6_csum(header - sizeof(struct ipv6hdr),
+						udph, len >> 1);
+}
+
+static void build_gue_header(void *header, uint8_t proto)
+{
+	struct guehdr *gueh = header;
+
+	gueh->proto_ctype = proto;
+}
+
+static void build_gre_header(void *header, uint16_t proto)
+{
+	struct grehdr *greh = header;
+
+	greh->protocol = htons(proto);
+}
+
+static int l3_length(int family)
+{
+	if (family == AF_INET)
+		return sizeof(struct iphdr);
+	else
+		return sizeof(struct ipv6hdr);
+}
+
+static int build_packet(void)
+{
+	int ol3_len = 0, ol4_len = 0, il3_len = 0, il4_len = 0;
+	int el3_len = 0;
+
+	if (cfg_l3_extra)
+		el3_len = l3_length(cfg_l3_extra);
+
+	/* calculate header offsets */
+	if (cfg_encap_proto) {
+		ol3_len = l3_length(cfg_l3_outer);
+
+		if (cfg_encap_proto == IPPROTO_GRE)
+			ol4_len = sizeof(struct grehdr);
+		else if (cfg_encap_proto == IPPROTO_UDP)
+			ol4_len = sizeof(struct udphdr) + sizeof(struct guehdr);
+	}
+
+	il3_len = l3_length(cfg_l3_inner);
+	il4_len = sizeof(struct udphdr);
+
+	if (el3_len + ol3_len + ol4_len + il3_len + il4_len + cfg_payload_len >=
+	    sizeof(buf))
+		error(1, 0, "packet too large\n");
+
+	/*
+	 * Fill packet from inside out, to calculate correct checksums.
+	 * But create ip before udp headers, as udp uses ip for pseudo-sum.
+	 */
+	memset(buf + el3_len + ol3_len + ol4_len + il3_len + il4_len,
+	       cfg_payload_char, cfg_payload_len);
+
+	/* add zero byte for udp csum padding */
+	buf[el3_len + ol3_len + ol4_len + il3_len + il4_len + cfg_payload_len] = 0;
+
+	switch (cfg_l3_inner) {
+	case PF_INET:
+		build_ipv4_header(buf + el3_len + ol3_len + ol4_len,
+				  IPPROTO_UDP,
+				  in_saddr4.sin_addr.s_addr,
+				  in_daddr4.sin_addr.s_addr,
+				  il4_len + cfg_payload_len,
+				  cfg_dsfield_inner);
+		break;
+	case PF_INET6:
+		build_ipv6_header(buf + el3_len + ol3_len + ol4_len,
+				  IPPROTO_UDP,
+				  &in_saddr6, &in_daddr6,
+				  il4_len + cfg_payload_len,
+				  cfg_dsfield_inner);
+		break;
+	}
+
+	build_udp_header(buf + el3_len + ol3_len + ol4_len + il3_len,
+			 cfg_payload_len, CFG_PORT_INNER, cfg_l3_inner);
+
+	if (!cfg_encap_proto)
+		return il3_len + il4_len + cfg_payload_len;
+
+	switch (cfg_l3_outer) {
+	case PF_INET:
+		build_ipv4_header(buf + el3_len, cfg_encap_proto,
+				  out_saddr4.sin_addr.s_addr,
+				  out_daddr4.sin_addr.s_addr,
+				  ol4_len + il3_len + il4_len + cfg_payload_len,
+				  cfg_dsfield_outer);
+		break;
+	case PF_INET6:
+		build_ipv6_header(buf + el3_len, cfg_encap_proto,
+				  &out_saddr6, &out_daddr6,
+				  ol4_len + il3_len + il4_len + cfg_payload_len,
+				  cfg_dsfield_outer);
+		break;
+	}
+
+	switch (cfg_encap_proto) {
+	case IPPROTO_UDP:
+		build_gue_header(buf + el3_len + ol3_len + ol4_len -
+				 sizeof(struct guehdr),
+				 cfg_l3_inner == PF_INET ? IPPROTO_IPIP
+							 : IPPROTO_IPV6);
+		build_udp_header(buf + el3_len + ol3_len,
+				 sizeof(struct guehdr) + il3_len + il4_len +
+				 cfg_payload_len,
+				 cfg_port_gue, cfg_l3_outer);
+		break;
+	case IPPROTO_GRE:
+		build_gre_header(buf + el3_len + ol3_len,
+				 cfg_l3_inner == PF_INET ? ETH_P_IP
+							 : ETH_P_IPV6);
+		break;
+	}
+
+	switch (cfg_l3_extra) {
+	case PF_INET:
+		build_ipv4_header(buf,
+				  cfg_l3_outer == PF_INET ? IPPROTO_IPIP
+							  : IPPROTO_IPV6,
+				  extra_saddr4.sin_addr.s_addr,
+				  extra_daddr4.sin_addr.s_addr,
+				  ol3_len + ol4_len + il3_len + il4_len +
+				  cfg_payload_len, 0);
+		break;
+	case PF_INET6:
+		build_ipv6_header(buf,
+				  cfg_l3_outer == PF_INET ? IPPROTO_IPIP
+							  : IPPROTO_IPV6,
+				  &extra_saddr6, &extra_daddr6,
+				  ol3_len + ol4_len + il3_len + il4_len +
+				  cfg_payload_len, 0);
+		break;
+	}
+
+	return el3_len + ol3_len + ol4_len + il3_len + il4_len +
+	       cfg_payload_len;
+}
+
+/* sender transmits encapsulated over RAW or unencap'd over UDP */
+static int setup_tx(void)
+{
+	int family, fd, ret;
+
+	if (cfg_l3_extra)
+		family = cfg_l3_extra;
+	else if (cfg_l3_outer)
+		family = cfg_l3_outer;
+	else
+		family = cfg_l3_inner;
+
+	fd = socket(family, SOCK_RAW, IPPROTO_RAW);
+	if (fd == -1)
+		error(1, errno, "socket tx");
+
+	if (cfg_l3_extra) {
+		if (cfg_l3_extra == PF_INET)
+			ret = connect(fd, (void *) &extra_daddr4,
+				      sizeof(extra_daddr4));
+		else
+			ret = connect(fd, (void *) &extra_daddr6,
+				      sizeof(extra_daddr6));
+		if (ret)
+			error(1, errno, "connect tx");
+	} else if (cfg_l3_outer) {
+		/* connect to destination if not encapsulated */
+		if (cfg_l3_outer == PF_INET)
+			ret = connect(fd, (void *) &out_daddr4,
+				      sizeof(out_daddr4));
+		else
+			ret = connect(fd, (void *) &out_daddr6,
+				      sizeof(out_daddr6));
+		if (ret)
+			error(1, errno, "connect tx");
+	} else {
+		/* otherwise using loopback */
+		if (cfg_l3_inner == PF_INET)
+			ret = connect(fd, (void *) &in_daddr4,
+				      sizeof(in_daddr4));
+		else
+			ret = connect(fd, (void *) &in_daddr6,
+				      sizeof(in_daddr6));
+		if (ret)
+			error(1, errno, "connect tx");
+	}
+
+	return fd;
+}
+
+/* receiver reads unencapsulated UDP */
+static int setup_rx(void)
+{
+	int fd, ret;
+
+	fd = socket(cfg_l3_inner, SOCK_DGRAM, 0);
+	if (fd == -1)
+		error(1, errno, "socket rx");
+
+	if (cfg_l3_inner == PF_INET)
+		ret = bind(fd, (void *) &in_daddr4, sizeof(in_daddr4));
+	else
+		ret = bind(fd, (void *) &in_daddr6, sizeof(in_daddr6));
+	if (ret)
+		error(1, errno, "bind rx");
+
+	return fd;
+}
+
+static int do_tx(int fd, const char *pkt, int len)
+{
+	int ret;
+
+	ret = write(fd, pkt, len);
+	if (ret == -1)
+		error(1, errno, "send");
+	if (ret != len)
+		error(1, errno, "send: len (%d < %d)\n", ret, len);
+
+	return 1;
+}
+
+static int do_poll(int fd, short events, int timeout)
+{
+	struct pollfd pfd;
+	int ret;
+
+	pfd.fd = fd;
+	pfd.events = events;
+
+	ret = poll(&pfd, 1, timeout);
+	if (ret == -1)
+		error(1, errno, "poll");
+	if (ret && !(pfd.revents & POLLIN))
+		error(1, errno, "poll: unexpected event 0x%x\n", pfd.revents);
+
+	return ret;
+}
+
+static int do_rx(int fd)
+{
+	char rbuf;
+	int ret, num = 0;
+
+	while (1) {
+		ret = recv(fd, &rbuf, 1, MSG_DONTWAIT);
+		if (ret == -1 && errno == EAGAIN)
+			break;
+		if (ret == -1)
+			error(1, errno, "recv");
+		if (rbuf != cfg_payload_char)
+			error(1, 0, "recv: payload mismatch");
+		num++;
+	};
+
+	return num;
+}
+
+static int do_main(void)
+{
+	unsigned long tstop, treport, tcur;
+	int fdt = -1, fdr = -1, len, tx = 0, rx = 0;
+
+	if (!cfg_only_tx)
+		fdr = setup_rx();
+	if (!cfg_only_rx)
+		fdt = setup_tx();
+
+	len = build_packet();
+
+	tcur = util_gettime();
+	treport = tcur + 1000;
+	tstop = tcur + (cfg_num_secs * 1000);
+
+	while (1) {
+		if (!cfg_only_rx)
+			tx += do_tx(fdt, buf, len);
+
+		if (!cfg_only_tx)
+			rx += do_rx(fdr);
+
+		if (cfg_num_secs) {
+			tcur = util_gettime();
+			if (tcur >= tstop)
+				break;
+			if (tcur >= treport) {
+				fprintf(stderr, "pkts: tx=%u rx=%u\n", tx, rx);
+				tx = 0;
+				rx = 0;
+				treport = tcur + 1000;
+			}
+		} else {
+			if (tx == cfg_num_pkt)
+				break;
+		}
+	}
+
+	/* read straggler packets, if any */
+	if (rx < tx) {
+		tstop = util_gettime() + 100;
+		while (rx < tx) {
+			tcur = util_gettime();
+			if (tcur >= tstop)
+				break;
+
+			do_poll(fdr, POLLIN, tstop - tcur);
+			rx += do_rx(fdr);
+		}
+	}
+
+	fprintf(stderr, "pkts: tx=%u rx=%u\n", tx, rx);
+
+	if (fdr != -1 && close(fdr))
+		error(1, errno, "close rx");
+	if (fdt != -1 && close(fdt))
+		error(1, errno, "close tx");
+
+	/*
+	 * success (== 0) only if received all packets
+	 * unless failure is expected, in which case none must arrive.
+	 */
+	if (cfg_expect_failure)
+		return rx != 0;
+	else
+		return rx != tx;
+}
+
+
+static void __attribute__((noreturn)) usage(const char *filepath)
+{
+	fprintf(stderr, "Usage: %s [-e gre|gue|bare|none] [-i 4|6] [-l len] "
+			"[-O 4|6] [-o 4|6] [-n num] [-t secs] [-R] [-T] "
+			"[-s <osrc> [-d <odst>] [-S <isrc>] [-D <idst>] "
+			"[-x <otos>] [-X <itos>] [-f <isport>] [-F]\n",
+		filepath);
+	exit(1);
+}
+
+static void parse_addr(int family, void *addr, const char *optarg)
+{
+	int ret;
+
+	ret = inet_pton(family, optarg, addr);
+	if (ret == -1)
+		error(1, errno, "inet_pton");
+	if (ret == 0)
+		error(1, 0, "inet_pton: bad string");
+}
+
+static void parse_addr4(struct sockaddr_in *addr, const char *optarg)
+{
+	parse_addr(AF_INET, &addr->sin_addr, optarg);
+}
+
+static void parse_addr6(struct sockaddr_in6 *addr, const char *optarg)
+{
+	parse_addr(AF_INET6, &addr->sin6_addr, optarg);
+}
+
+static int parse_protocol_family(const char *filepath, const char *optarg)
+{
+	if (!strcmp(optarg, "4"))
+		return PF_INET;
+	if (!strcmp(optarg, "6"))
+		return PF_INET6;
+
+	usage(filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	int c;
+
+	while ((c = getopt(argc, argv, "d:D:e:f:Fhi:l:n:o:O:Rs:S:t:Tx:X:")) != -1) {
+		switch (c) {
+		case 'd':
+			if (cfg_l3_outer == AF_UNSPEC)
+				error(1, 0, "-d must be preceded by -o");
+			if (cfg_l3_outer == AF_INET)
+				parse_addr4(&out_daddr4, optarg);
+			else
+				parse_addr6(&out_daddr6, optarg);
+			break;
+		case 'D':
+			if (cfg_l3_inner == AF_UNSPEC)
+				error(1, 0, "-D must be preceded by -i");
+			if (cfg_l3_inner == AF_INET)
+				parse_addr4(&in_daddr4, optarg);
+			else
+				parse_addr6(&in_daddr6, optarg);
+			break;
+		case 'e':
+			if (!strcmp(optarg, "gre"))
+				cfg_encap_proto = IPPROTO_GRE;
+			else if (!strcmp(optarg, "gue"))
+				cfg_encap_proto = IPPROTO_UDP;
+			else if (!strcmp(optarg, "bare"))
+				cfg_encap_proto = IPPROTO_IPIP;
+			else if (!strcmp(optarg, "none"))
+				cfg_encap_proto = IPPROTO_IP;	/* == 0 */
+			else
+				usage(argv[0]);
+			break;
+		case 'f':
+			cfg_src_port = strtol(optarg, NULL, 0);
+			break;
+		case 'F':
+			cfg_expect_failure = true;
+			break;
+		case 'h':
+			usage(argv[0]);
+			break;
+		case 'i':
+			if (!strcmp(optarg, "4"))
+				cfg_l3_inner = PF_INET;
+			else if (!strcmp(optarg, "6"))
+				cfg_l3_inner = PF_INET6;
+			else
+				usage(argv[0]);
+			break;
+		case 'l':
+			cfg_payload_len = strtol(optarg, NULL, 0);
+			break;
+		case 'n':
+			cfg_num_pkt = strtol(optarg, NULL, 0);
+			break;
+		case 'o':
+			cfg_l3_outer = parse_protocol_family(argv[0], optarg);
+			break;
+		case 'O':
+			cfg_l3_extra = parse_protocol_family(argv[0], optarg);
+			break;
+		case 'R':
+			cfg_only_rx = true;
+			break;
+		case 's':
+			if (cfg_l3_outer == AF_INET)
+				parse_addr4(&out_saddr4, optarg);
+			else
+				parse_addr6(&out_saddr6, optarg);
+			break;
+		case 'S':
+			if (cfg_l3_inner == AF_INET)
+				parse_addr4(&in_saddr4, optarg);
+			else
+				parse_addr6(&in_saddr6, optarg);
+			break;
+		case 't':
+			cfg_num_secs = strtol(optarg, NULL, 0);
+			break;
+		case 'T':
+			cfg_only_tx = true;
+			break;
+		case 'x':
+			cfg_dsfield_outer = strtol(optarg, NULL, 0);
+			break;
+		case 'X':
+			cfg_dsfield_inner = strtol(optarg, NULL, 0);
+			break;
+		}
+	}
+
+	if (cfg_only_rx && cfg_only_tx)
+		error(1, 0, "options: cannot combine rx-only and tx-only");
+
+	if (cfg_encap_proto && cfg_l3_outer == AF_UNSPEC)
+		error(1, 0, "options: must specify outer with encap");
+	else if ((!cfg_encap_proto) && cfg_l3_outer != AF_UNSPEC)
+		error(1, 0, "options: cannot combine no-encap and outer");
+	else if ((!cfg_encap_proto) && cfg_l3_extra != AF_UNSPEC)
+		error(1, 0, "options: cannot combine no-encap and extra");
+
+	if (cfg_l3_inner == AF_UNSPEC)
+		cfg_l3_inner = AF_INET6;
+	if (cfg_l3_inner == AF_INET6 && cfg_encap_proto == IPPROTO_IPIP)
+		cfg_encap_proto = IPPROTO_IPV6;
+
+	/* RFC 6040 4.2:
+	 *   on decap, if outer encountered congestion (CE == 0x3),
+	 *   but inner cannot encode ECN (NoECT == 0x0), then drop packet.
+	 */
+	if (((cfg_dsfield_outer & 0x3) == 0x3) &&
+	    ((cfg_dsfield_inner & 0x3) == 0x0))
+		cfg_expect_failure = true;
+}
+
+static void print_opts(void)
+{
+	if (cfg_l3_inner == PF_INET6) {
+		util_printaddr("inner.dest6", (void *) &in_daddr6);
+		util_printaddr("inner.source6", (void *) &in_saddr6);
+	} else {
+		util_printaddr("inner.dest4", (void *) &in_daddr4);
+		util_printaddr("inner.source4", (void *) &in_saddr4);
+	}
+
+	if (!cfg_l3_outer)
+		return;
+
+	fprintf(stderr, "encap proto:   %u\n", cfg_encap_proto);
+
+	if (cfg_l3_outer == PF_INET6) {
+		util_printaddr("outer.dest6", (void *) &out_daddr6);
+		util_printaddr("outer.source6", (void *) &out_saddr6);
+	} else {
+		util_printaddr("outer.dest4", (void *) &out_daddr4);
+		util_printaddr("outer.source4", (void *) &out_saddr4);
+	}
+
+	if (!cfg_l3_extra)
+		return;
+
+	if (cfg_l3_outer == PF_INET6) {
+		util_printaddr("extra.dest6", (void *) &extra_daddr6);
+		util_printaddr("extra.source6", (void *) &extra_saddr6);
+	} else {
+		util_printaddr("extra.dest4", (void *) &extra_daddr4);
+		util_printaddr("extra.source4", (void *) &extra_saddr4);
+	}
+
+}
+
+int main(int argc, char **argv)
+{
+	parse_opts(argc, argv);
+	print_opts();
+	return do_main();
+}
diff --git a/tools/testing/selftests/bpf/test_flow_dissector.sh b/tools/testing/selftests/bpf/test_flow_dissector.sh
new file mode 100755
index 000000000000..c0fb073b5eab
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_flow_dissector.sh
@@ -0,0 +1,115 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Load BPF flow dissector and verify it correctly dissects traffic
+export TESTNAME=test_flow_dissector
+unmount=0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+msg="skip all tests:"
+if [ $UID != 0 ]; then
+	echo $msg please run this as root >&2
+	exit $ksft_skip
+fi
+
+# This test needs to be run in a network namespace with in_netns.sh. Check if
+# this is the case and run it with in_netns.sh if it is being run in the root
+# namespace.
+if [[ -z $(ip netns identify $$) ]]; then
+	../net/in_netns.sh "$0" "$@"
+	exit $?
+fi
+
+# Determine selftest success via shell exit code
+exit_handler()
+{
+	if (( $? == 0 )); then
+		echo "selftests: $TESTNAME [PASS]";
+	else
+		echo "selftests: $TESTNAME [FAILED]";
+	fi
+
+	set +e
+
+	# Cleanup
+	tc filter del dev lo ingress pref 1337 2> /dev/null
+	tc qdisc del dev lo ingress 2> /dev/null
+	./flow_dissector_load -d 2> /dev/null
+	if [ $unmount -ne 0 ]; then
+		umount bpffs 2> /dev/null
+	fi
+}
+
+# Exit script immediately (well catched by trap handler) if any
+# program/thing exits with a non-zero status.
+set -e
+
+# (Use 'trap -l' to list meaning of numbers)
+trap exit_handler 0 2 3 6 9
+
+# Mount BPF file system
+if /bin/mount | grep /sys/fs/bpf > /dev/null; then
+	echo "bpffs already mounted"
+else
+	echo "bpffs not mounted. Mounting..."
+	unmount=1
+	/bin/mount bpffs /sys/fs/bpf -t bpf
+fi
+
+# Attach BPF program
+./flow_dissector_load -p bpf_flow.o -s dissect
+
+# Setup
+tc qdisc add dev lo ingress
+
+echo "Testing IPv4..."
+# Drops all IP/UDP packets coming from port 9
+tc filter add dev lo parent ffff: protocol ip pref 1337 flower ip_proto \
+	udp src_port 9 action drop
+
+# Send 10 IPv4/UDP packets from port 8. Filter should not drop any.
+./test_flow_dissector -i 4 -f 8
+# Send 10 IPv4/UDP packets from port 9. Filter should drop all.
+./test_flow_dissector -i 4 -f 9 -F
+# Send 10 IPv4/UDP packets from port 10. Filter should not drop any.
+./test_flow_dissector -i 4 -f 10
+
+echo "Testing IPIP..."
+# Send 10 IPv4/IPv4/UDP packets from port 8. Filter should not drop any.
+./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \
+	-D 192.168.0.1 -S 1.1.1.1 -f 8
+# Send 10 IPv4/IPv4/UDP packets from port 9. Filter should drop all.
+./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \
+	-D 192.168.0.1 -S 1.1.1.1 -f 9 -F
+# Send 10 IPv4/IPv4/UDP packets from port 10. Filter should not drop any.
+./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \
+	-D 192.168.0.1 -S 1.1.1.1 -f 10
+
+echo "Testing IPv4 + GRE..."
+# Send 10 IPv4/GRE/IPv4/UDP packets from port 8. Filter should not drop any.
+./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \
+	-D 192.168.0.1 -S 1.1.1.1 -f 8
+# Send 10 IPv4/GRE/IPv4/UDP packets from port 9. Filter should drop all.
+./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \
+	-D 192.168.0.1 -S 1.1.1.1 -f 9 -F
+# Send 10 IPv4/GRE/IPv4/UDP packets from port 10. Filter should not drop any.
+./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \
+	-D 192.168.0.1 -S 1.1.1.1 -f 10
+
+tc filter del dev lo ingress pref 1337
+
+echo "Testing IPv6..."
+# Drops all IPv6/UDP packets coming from port 9
+tc filter add dev lo parent ffff: protocol ipv6 pref 1337 flower ip_proto \
+	udp src_port 9 action drop
+
+# Send 10 IPv6/UDP packets from port 8. Filter should not drop any.
+./test_flow_dissector -i 6 -f 8
+# Send 10 IPv6/UDP packets from port 9. Filter should drop all.
+./test_flow_dissector -i 6 -f 9 -F
+# Send 10 IPv6/UDP packets from port 10. Filter should not drop any.
+./test_flow_dissector -i 6 -f 10
+
+exit 0
diff --git a/tools/testing/selftests/bpf/test_libbpf.sh b/tools/testing/selftests/bpf/test_libbpf.sh
index d97dc914cd49..156d89f1edcc 100755
--- a/tools/testing/selftests/bpf/test_libbpf.sh
+++ b/tools/testing/selftests/bpf/test_libbpf.sh
@@ -6,7 +6,7 @@ export TESTNAME=test_libbpf
 # Determine selftest success via shell exit code
 exit_handler()
 {
-	if (( $? == 0 )); then
+	if [ $? -eq 0 ]; then
 		echo "selftests: $TESTNAME [PASS]";
 	else
 		echo "$TESTNAME: failed at file $LAST_LOADED" 1>&2
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 9b552c0fc47d..4db2116e52be 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -15,6 +15,7 @@
 #include <string.h>
 #include <assert.h>
 #include <stdlib.h>
+#include <time.h>
 
 #include <sys/wait.h>
 #include <sys/socket.h>
@@ -471,6 +472,122 @@ static void test_devmap(int task, void *data)
 	close(fd);
 }
 
+static void test_queuemap(int task, void *data)
+{
+	const int MAP_SIZE = 32;
+	__u32 vals[MAP_SIZE + MAP_SIZE/2], val;
+	int fd, i;
+
+	/* Fill test values to be used */
+	for (i = 0; i < MAP_SIZE + MAP_SIZE/2; i++)
+		vals[i] = rand();
+
+	/* Invalid key size */
+	fd = bpf_create_map(BPF_MAP_TYPE_QUEUE, 4, sizeof(val), MAP_SIZE,
+			    map_flags);
+	assert(fd < 0 && errno == EINVAL);
+
+	fd = bpf_create_map(BPF_MAP_TYPE_QUEUE, 0, sizeof(val), MAP_SIZE,
+			    map_flags);
+	/* Queue map does not support BPF_F_NO_PREALLOC */
+	if (map_flags & BPF_F_NO_PREALLOC) {
+		assert(fd < 0 && errno == EINVAL);
+		return;
+	}
+	if (fd < 0) {
+		printf("Failed to create queuemap '%s'!\n", strerror(errno));
+		exit(1);
+	}
+
+	/* Push MAP_SIZE elements */
+	for (i = 0; i < MAP_SIZE; i++)
+		assert(bpf_map_update_elem(fd, NULL, &vals[i], 0) == 0);
+
+	/* Check that element cannot be pushed due to max_entries limit */
+	assert(bpf_map_update_elem(fd, NULL, &val, 0) == -1 &&
+	       errno == E2BIG);
+
+	/* Peek element */
+	assert(bpf_map_lookup_elem(fd, NULL, &val) == 0 && val == vals[0]);
+
+	/* Replace half elements */
+	for (i = MAP_SIZE; i < MAP_SIZE + MAP_SIZE/2; i++)
+		assert(bpf_map_update_elem(fd, NULL, &vals[i], BPF_EXIST) == 0);
+
+	/* Pop all elements */
+	for (i = MAP_SIZE/2; i < MAP_SIZE + MAP_SIZE/2; i++)
+		assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == 0 &&
+		       val == vals[i]);
+
+	/* Check that there are not elements left */
+	assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == -1 &&
+	       errno == ENOENT);
+
+	/* Check that non supported functions set errno to EINVAL */
+	assert(bpf_map_delete_elem(fd, NULL) == -1 && errno == EINVAL);
+	assert(bpf_map_get_next_key(fd, NULL, NULL) == -1 && errno == EINVAL);
+
+	close(fd);
+}
+
+static void test_stackmap(int task, void *data)
+{
+	const int MAP_SIZE = 32;
+	__u32 vals[MAP_SIZE + MAP_SIZE/2], val;
+	int fd, i;
+
+	/* Fill test values to be used */
+	for (i = 0; i < MAP_SIZE + MAP_SIZE/2; i++)
+		vals[i] = rand();
+
+	/* Invalid key size */
+	fd = bpf_create_map(BPF_MAP_TYPE_STACK, 4, sizeof(val), MAP_SIZE,
+			    map_flags);
+	assert(fd < 0 && errno == EINVAL);
+
+	fd = bpf_create_map(BPF_MAP_TYPE_STACK, 0, sizeof(val), MAP_SIZE,
+			    map_flags);
+	/* Stack map does not support BPF_F_NO_PREALLOC */
+	if (map_flags & BPF_F_NO_PREALLOC) {
+		assert(fd < 0 && errno == EINVAL);
+		return;
+	}
+	if (fd < 0) {
+		printf("Failed to create stackmap '%s'!\n", strerror(errno));
+		exit(1);
+	}
+
+	/* Push MAP_SIZE elements */
+	for (i = 0; i < MAP_SIZE; i++)
+		assert(bpf_map_update_elem(fd, NULL, &vals[i], 0) == 0);
+
+	/* Check that element cannot be pushed due to max_entries limit */
+	assert(bpf_map_update_elem(fd, NULL, &val, 0) == -1 &&
+	       errno == E2BIG);
+
+	/* Peek element */
+	assert(bpf_map_lookup_elem(fd, NULL, &val) == 0 && val == vals[i - 1]);
+
+	/* Replace half elements */
+	for (i = MAP_SIZE; i < MAP_SIZE + MAP_SIZE/2; i++)
+		assert(bpf_map_update_elem(fd, NULL, &vals[i], BPF_EXIST) == 0);
+
+	/* Pop all elements */
+	for (i = MAP_SIZE + MAP_SIZE/2 - 1; i >= MAP_SIZE/2; i--)
+		assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == 0 &&
+		       val == vals[i]);
+
+	/* Check that there are not elements left */
+	assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == -1 &&
+	       errno == ENOENT);
+
+	/* Check that non supported functions set errno to EINVAL */
+	assert(bpf_map_delete_elem(fd, NULL) == -1 && errno == EINVAL);
+	assert(bpf_map_get_next_key(fd, NULL, NULL) == -1 && errno == EINVAL);
+
+	close(fd);
+}
+
 #include <sys/socket.h>
 #include <sys/ioctl.h>
 #include <arpa/inet.h>
@@ -1434,10 +1551,15 @@ static void run_all_tests(void)
 	test_map_wronly();
 
 	test_reuseport_array();
+
+	test_queuemap(0, NULL);
+	test_stackmap(0, NULL);
 }
 
 int main(void)
 {
+	srand(time(NULL));
+
 	map_flags = 0;
 	run_all_tests();
 
diff --git a/tools/testing/selftests/bpf/test_netcnt.c b/tools/testing/selftests/bpf/test_netcnt.c
new file mode 100644
index 000000000000..7887df693399
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_netcnt.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+#include <sys/sysinfo.h>
+#include <sys/time.h>
+
+#include <linux/bpf.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "cgroup_helpers.h"
+#include "bpf_rlimit.h"
+#include "netcnt_common.h"
+
+#define BPF_PROG "./netcnt_prog.o"
+#define TEST_CGROUP "/test-network-counters/"
+
+static int bpf_find_map(const char *test, struct bpf_object *obj,
+			const char *name)
+{
+	struct bpf_map *map;
+
+	map = bpf_object__find_map_by_name(obj, name);
+	if (!map) {
+		printf("%s:FAIL:map '%s' not found\n", test, name);
+		return -1;
+	}
+	return bpf_map__fd(map);
+}
+
+int main(int argc, char **argv)
+{
+	struct percpu_net_cnt *percpu_netcnt;
+	struct bpf_cgroup_storage_key key;
+	int map_fd, percpu_map_fd;
+	int error = EXIT_FAILURE;
+	struct net_cnt netcnt;
+	struct bpf_object *obj;
+	int prog_fd, cgroup_fd;
+	unsigned long packets;
+	unsigned long bytes;
+	int cpu, nproc;
+	__u32 prog_cnt;
+
+	nproc = get_nprocs_conf();
+	percpu_netcnt = malloc(sizeof(*percpu_netcnt) * nproc);
+	if (!percpu_netcnt) {
+		printf("Not enough memory for per-cpu area (%d cpus)\n", nproc);
+		goto err;
+	}
+
+	if (bpf_prog_load(BPF_PROG, BPF_PROG_TYPE_CGROUP_SKB,
+			  &obj, &prog_fd)) {
+		printf("Failed to load bpf program\n");
+		goto out;
+	}
+
+	if (setup_cgroup_environment()) {
+		printf("Failed to load bpf program\n");
+		goto err;
+	}
+
+	/* Create a cgroup, get fd, and join it */
+	cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
+	if (!cgroup_fd) {
+		printf("Failed to create test cgroup\n");
+		goto err;
+	}
+
+	if (join_cgroup(TEST_CGROUP)) {
+		printf("Failed to join cgroup\n");
+		goto err;
+	}
+
+	/* Attach bpf program */
+	if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0)) {
+		printf("Failed to attach bpf program");
+		goto err;
+	}
+
+	assert(system("ping localhost -6 -c 10000 -f -q > /dev/null") == 0);
+
+	if (bpf_prog_query(cgroup_fd, BPF_CGROUP_INET_EGRESS, 0, NULL, NULL,
+			   &prog_cnt)) {
+		printf("Failed to query attached programs");
+		goto err;
+	}
+
+	map_fd = bpf_find_map(__func__, obj, "netcnt");
+	if (map_fd < 0) {
+		printf("Failed to find bpf map with net counters");
+		goto err;
+	}
+
+	percpu_map_fd = bpf_find_map(__func__, obj, "percpu_netcnt");
+	if (percpu_map_fd < 0) {
+		printf("Failed to find bpf map with percpu net counters");
+		goto err;
+	}
+
+	if (bpf_map_get_next_key(map_fd, NULL, &key)) {
+		printf("Failed to get key in cgroup storage\n");
+		goto err;
+	}
+
+	if (bpf_map_lookup_elem(map_fd, &key, &netcnt)) {
+		printf("Failed to lookup cgroup storage\n");
+		goto err;
+	}
+
+	if (bpf_map_lookup_elem(percpu_map_fd, &key, &percpu_netcnt[0])) {
+		printf("Failed to lookup percpu cgroup storage\n");
+		goto err;
+	}
+
+	/* Some packets can be still in per-cpu cache, but not more than
+	 * MAX_PERCPU_PACKETS.
+	 */
+	packets = netcnt.packets;
+	bytes = netcnt.bytes;
+	for (cpu = 0; cpu < nproc; cpu++) {
+		if (percpu_netcnt[cpu].packets > MAX_PERCPU_PACKETS) {
+			printf("Unexpected percpu value: %llu\n",
+			       percpu_netcnt[cpu].packets);
+			goto err;
+		}
+
+		packets += percpu_netcnt[cpu].packets;
+		bytes += percpu_netcnt[cpu].bytes;
+	}
+
+	/* No packets should be lost */
+	if (packets != 10000) {
+		printf("Unexpected packet count: %lu\n", packets);
+		goto err;
+	}
+
+	/* Let's check that bytes counter matches the number of packets
+	 * multiplied by the size of ipv6 ICMP packet.
+	 */
+	if (bytes != packets * 104) {
+		printf("Unexpected bytes count: %lu\n", bytes);
+		goto err;
+	}
+
+	error = 0;
+	printf("test_netcnt:PASS\n");
+
+err:
+	cleanup_cgroup_environment();
+	free(percpu_netcnt);
+
+out:
+	return error;
+}
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 0ef68204c84b..2d3c04f45530 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -112,13 +112,13 @@ static void test_pkt_access(void)
 
 	err = bpf_prog_test_run(prog_fd, 100000, &pkt_v4, sizeof(pkt_v4),
 				NULL, NULL, &retval, &duration);
-	CHECK(err || errno || retval, "ipv4",
+	CHECK(err || retval, "ipv4",
 	      "err %d errno %d retval %d duration %d\n",
 	      err, errno, retval, duration);
 
 	err = bpf_prog_test_run(prog_fd, 100000, &pkt_v6, sizeof(pkt_v6),
 				NULL, NULL, &retval, &duration);
-	CHECK(err || errno || retval, "ipv6",
+	CHECK(err || retval, "ipv6",
 	      "err %d errno %d retval %d duration %d\n",
 	      err, errno, retval, duration);
 	bpf_object__close(obj);
@@ -153,14 +153,14 @@ static void test_xdp(void)
 	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
 				buf, &size, &retval, &duration);
 
-	CHECK(err || errno || retval != XDP_TX || size != 74 ||
+	CHECK(err || retval != XDP_TX || size != 74 ||
 	      iph->protocol != IPPROTO_IPIP, "ipv4",
 	      "err %d errno %d retval %d size %d\n",
 	      err, errno, retval, size);
 
 	err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6),
 				buf, &size, &retval, &duration);
-	CHECK(err || errno || retval != XDP_TX || size != 114 ||
+	CHECK(err || retval != XDP_TX || size != 114 ||
 	      iph6->nexthdr != IPPROTO_IPV6, "ipv6",
 	      "err %d errno %d retval %d size %d\n",
 	      err, errno, retval, size);
@@ -185,13 +185,13 @@ static void test_xdp_adjust_tail(void)
 	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
 				buf, &size, &retval, &duration);
 
-	CHECK(err || errno || retval != XDP_DROP,
+	CHECK(err || retval != XDP_DROP,
 	      "ipv4", "err %d errno %d retval %d size %d\n",
 	      err, errno, retval, size);
 
 	err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6),
 				buf, &size, &retval, &duration);
-	CHECK(err || errno || retval != XDP_TX || size != 54,
+	CHECK(err || retval != XDP_TX || size != 54,
 	      "ipv6", "err %d errno %d retval %d size %d\n",
 	      err, errno, retval, size);
 	bpf_object__close(obj);
@@ -254,14 +254,14 @@ static void test_l4lb(const char *file)
 
 	err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4),
 				buf, &size, &retval, &duration);
-	CHECK(err || errno || retval != 7/*TC_ACT_REDIRECT*/ || size != 54 ||
+	CHECK(err || retval != 7/*TC_ACT_REDIRECT*/ || size != 54 ||
 	      *magic != MAGIC_VAL, "ipv4",
 	      "err %d errno %d retval %d size %d magic %x\n",
 	      err, errno, retval, size, *magic);
 
 	err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6),
 				buf, &size, &retval, &duration);
-	CHECK(err || errno || retval != 7/*TC_ACT_REDIRECT*/ || size != 74 ||
+	CHECK(err || retval != 7/*TC_ACT_REDIRECT*/ || size != 74 ||
 	      *magic != MAGIC_VAL, "ipv6",
 	      "err %d errno %d retval %d size %d magic %x\n",
 	      err, errno, retval, size, *magic);
@@ -343,14 +343,14 @@ static void test_xdp_noinline(void)
 
 	err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4),
 				buf, &size, &retval, &duration);
-	CHECK(err || errno || retval != 1 || size != 54 ||
+	CHECK(err || retval != 1 || size != 54 ||
 	      *magic != MAGIC_VAL, "ipv4",
 	      "err %d errno %d retval %d size %d magic %x\n",
 	      err, errno, retval, size, *magic);
 
 	err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6),
 				buf, &size, &retval, &duration);
-	CHECK(err || errno || retval != 1 || size != 74 ||
+	CHECK(err || retval != 1 || size != 74 ||
 	      *magic != MAGIC_VAL, "ipv6",
 	      "err %d errno %d retval %d size %d magic %x\n",
 	      err, errno, retval, size, *magic);
@@ -1698,8 +1698,142 @@ static void test_task_fd_query_tp(void)
 				   "sys_enter_read");
 }
 
+static void test_reference_tracking()
+{
+	const char *file = "./test_sk_lookup_kern.o";
+	struct bpf_object *obj;
+	struct bpf_program *prog;
+	__u32 duration;
+	int err = 0;
+
+	obj = bpf_object__open(file);
+	if (IS_ERR(obj)) {
+		error_cnt++;
+		return;
+	}
+
+	bpf_object__for_each_program(prog, obj) {
+		const char *title;
+
+		/* Ignore .text sections */
+		title = bpf_program__title(prog, false);
+		if (strstr(title, ".text") != NULL)
+			continue;
+
+		bpf_program__set_type(prog, BPF_PROG_TYPE_SCHED_CLS);
+
+		/* Expect verifier failure if test name has 'fail' */
+		if (strstr(title, "fail") != NULL) {
+			libbpf_set_print(NULL, NULL, NULL);
+			err = !bpf_program__load(prog, "GPL", 0);
+			libbpf_set_print(printf, printf, NULL);
+		} else {
+			err = bpf_program__load(prog, "GPL", 0);
+		}
+		CHECK(err, title, "\n");
+	}
+	bpf_object__close(obj);
+}
+
+enum {
+	QUEUE,
+	STACK,
+};
+
+static void test_queue_stack_map(int type)
+{
+	const int MAP_SIZE = 32;
+	__u32 vals[MAP_SIZE], duration, retval, size, val;
+	int i, err, prog_fd, map_in_fd, map_out_fd;
+	char file[32], buf[128];
+	struct bpf_object *obj;
+	struct iphdr *iph = (void *)buf + sizeof(struct ethhdr);
+
+	/* Fill test values to be used */
+	for (i = 0; i < MAP_SIZE; i++)
+		vals[i] = rand();
+
+	if (type == QUEUE)
+		strncpy(file, "./test_queue_map.o", sizeof(file));
+	else if (type == STACK)
+		strncpy(file, "./test_stack_map.o", sizeof(file));
+	else
+		return;
+
+	err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
+	if (err) {
+		error_cnt++;
+		return;
+	}
+
+	map_in_fd = bpf_find_map(__func__, obj, "map_in");
+	if (map_in_fd < 0)
+		goto out;
+
+	map_out_fd = bpf_find_map(__func__, obj, "map_out");
+	if (map_out_fd < 0)
+		goto out;
+
+	/* Push 32 elements to the input map */
+	for (i = 0; i < MAP_SIZE; i++) {
+		err = bpf_map_update_elem(map_in_fd, NULL, &vals[i], 0);
+		if (err) {
+			error_cnt++;
+			goto out;
+		}
+	}
+
+	/* The eBPF program pushes iph.saddr in the output map,
+	 * pops the input map and saves this value in iph.daddr
+	 */
+	for (i = 0; i < MAP_SIZE; i++) {
+		if (type == QUEUE) {
+			val = vals[i];
+			pkt_v4.iph.saddr = vals[i] * 5;
+		} else if (type == STACK) {
+			val = vals[MAP_SIZE - 1 - i];
+			pkt_v4.iph.saddr = vals[MAP_SIZE - 1 - i] * 5;
+		}
+
+		err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+					buf, &size, &retval, &duration);
+		if (err || retval || size != sizeof(pkt_v4) ||
+		    iph->daddr != val)
+			break;
+	}
+
+	CHECK(err || retval || size != sizeof(pkt_v4) || iph->daddr != val,
+	      "bpf_map_pop_elem",
+	      "err %d errno %d retval %d size %d iph->daddr %u\n",
+	      err, errno, retval, size, iph->daddr);
+
+	/* Queue is empty, program should return TC_ACT_SHOT */
+	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+				buf, &size, &retval, &duration);
+	CHECK(err || retval != 2 /* TC_ACT_SHOT */|| size != sizeof(pkt_v4),
+	      "check-queue-stack-map-empty",
+	      "err %d errno %d retval %d size %d\n",
+	      err, errno, retval, size);
+
+	/* Check that the program pushed elements correctly */
+	for (i = 0; i < MAP_SIZE; i++) {
+		err = bpf_map_lookup_and_delete_elem(map_out_fd, NULL, &val);
+		if (err || val != vals[i] * 5)
+			break;
+	}
+
+	CHECK(i != MAP_SIZE && (err || val != vals[i] * 5),
+	      "bpf_map_push_elem", "err %d value %u\n", err, val);
+
+out:
+	pkt_v4.iph.saddr = 0;
+	bpf_object__close(obj);
+}
+
 int main(void)
 {
+	srand(time(NULL));
+
 	jit_enabled = is_jit_enabled();
 
 	test_pkt_access();
@@ -1719,6 +1853,9 @@ int main(void)
 	test_get_stack_raw_tp();
 	test_task_fd_query_rawtp();
 	test_task_fd_query_tp();
+	test_reference_tracking();
+	test_queue_stack_map(QUEUE);
+	test_queue_stack_map(STACK);
 
 	printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
 	return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
diff --git a/tools/testing/selftests/bpf/test_queue_map.c b/tools/testing/selftests/bpf/test_queue_map.c
new file mode 100644
index 000000000000..87db1f9da33d
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_queue_map.c
@@ -0,0 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Politecnico di Torino
+#define MAP_TYPE BPF_MAP_TYPE_QUEUE
+#include "test_queue_stack_map.h"
diff --git a/tools/testing/selftests/bpf/test_queue_stack_map.h b/tools/testing/selftests/bpf/test_queue_stack_map.h
new file mode 100644
index 000000000000..295b9b3bc5c7
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_queue_stack_map.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (c) 2018 Politecnico di Torino
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/pkt_cls.h>
+#include "bpf_helpers.h"
+
+int _version SEC("version") = 1;
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) map_in = {
+	.type = MAP_TYPE,
+	.key_size = 0,
+	.value_size = sizeof(__u32),
+	.max_entries = 32,
+	.map_flags = 0,
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) map_out = {
+	.type = MAP_TYPE,
+	.key_size = 0,
+	.value_size = sizeof(__u32),
+	.max_entries = 32,
+	.map_flags = 0,
+};
+
+SEC("test")
+int _test(struct __sk_buff *skb)
+{
+	void *data_end = (void *)(long)skb->data_end;
+	void *data = (void *)(long)skb->data;
+	struct ethhdr *eth = (struct ethhdr *)(data);
+	__u32 value;
+	int err;
+
+	if (eth + 1 > data_end)
+		return TC_ACT_SHOT;
+
+	struct iphdr *iph = (struct iphdr *)(eth + 1);
+
+	if (iph + 1 > data_end)
+		return TC_ACT_SHOT;
+
+	err = bpf_map_pop_elem(&map_in, &value);
+	if (err)
+		return TC_ACT_SHOT;
+
+	iph->daddr = value;
+
+	err = bpf_map_push_elem(&map_out, &iph->saddr, 0);
+	if (err)
+		return TC_ACT_SHOT;
+
+	return TC_ACT_OK;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_section_names.c b/tools/testing/selftests/bpf/test_section_names.c
new file mode 100644
index 000000000000..7c4f41572b1c
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_section_names.c
@@ -0,0 +1,208 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <err.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_util.h"
+
+struct sec_name_test {
+	const char sec_name[32];
+	struct {
+		int rc;
+		enum bpf_prog_type prog_type;
+		enum bpf_attach_type expected_attach_type;
+	} expected_load;
+	struct {
+		int rc;
+		enum bpf_attach_type attach_type;
+	} expected_attach;
+};
+
+static struct sec_name_test tests[] = {
+	{"InvAliD", {-EINVAL, 0, 0}, {-EINVAL, 0} },
+	{"cgroup", {-EINVAL, 0, 0}, {-EINVAL, 0} },
+	{"socket", {0, BPF_PROG_TYPE_SOCKET_FILTER, 0}, {-EINVAL, 0} },
+	{"kprobe/", {0, BPF_PROG_TYPE_KPROBE, 0}, {-EINVAL, 0} },
+	{"kretprobe/", {0, BPF_PROG_TYPE_KPROBE, 0}, {-EINVAL, 0} },
+	{"classifier", {0, BPF_PROG_TYPE_SCHED_CLS, 0}, {-EINVAL, 0} },
+	{"action", {0, BPF_PROG_TYPE_SCHED_ACT, 0}, {-EINVAL, 0} },
+	{"tracepoint/", {0, BPF_PROG_TYPE_TRACEPOINT, 0}, {-EINVAL, 0} },
+	{
+		"raw_tracepoint/",
+		{0, BPF_PROG_TYPE_RAW_TRACEPOINT, 0},
+		{-EINVAL, 0},
+	},
+	{"xdp", {0, BPF_PROG_TYPE_XDP, 0}, {-EINVAL, 0} },
+	{"perf_event", {0, BPF_PROG_TYPE_PERF_EVENT, 0}, {-EINVAL, 0} },
+	{"lwt_in", {0, BPF_PROG_TYPE_LWT_IN, 0}, {-EINVAL, 0} },
+	{"lwt_out", {0, BPF_PROG_TYPE_LWT_OUT, 0}, {-EINVAL, 0} },
+	{"lwt_xmit", {0, BPF_PROG_TYPE_LWT_XMIT, 0}, {-EINVAL, 0} },
+	{"lwt_seg6local", {0, BPF_PROG_TYPE_LWT_SEG6LOCAL, 0}, {-EINVAL, 0} },
+	{
+		"cgroup_skb/ingress",
+		{0, BPF_PROG_TYPE_CGROUP_SKB, 0},
+		{0, BPF_CGROUP_INET_INGRESS},
+	},
+	{
+		"cgroup_skb/egress",
+		{0, BPF_PROG_TYPE_CGROUP_SKB, 0},
+		{0, BPF_CGROUP_INET_EGRESS},
+	},
+	{"cgroup/skb", {0, BPF_PROG_TYPE_CGROUP_SKB, 0}, {-EINVAL, 0} },
+	{
+		"cgroup/sock",
+		{0, BPF_PROG_TYPE_CGROUP_SOCK, 0},
+		{0, BPF_CGROUP_INET_SOCK_CREATE},
+	},
+	{
+		"cgroup/post_bind4",
+		{0, BPF_PROG_TYPE_CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND},
+		{0, BPF_CGROUP_INET4_POST_BIND},
+	},
+	{
+		"cgroup/post_bind6",
+		{0, BPF_PROG_TYPE_CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND},
+		{0, BPF_CGROUP_INET6_POST_BIND},
+	},
+	{
+		"cgroup/dev",
+		{0, BPF_PROG_TYPE_CGROUP_DEVICE, 0},
+		{0, BPF_CGROUP_DEVICE},
+	},
+	{"sockops", {0, BPF_PROG_TYPE_SOCK_OPS, 0}, {0, BPF_CGROUP_SOCK_OPS} },
+	{
+		"sk_skb/stream_parser",
+		{0, BPF_PROG_TYPE_SK_SKB, 0},
+		{0, BPF_SK_SKB_STREAM_PARSER},
+	},
+	{
+		"sk_skb/stream_verdict",
+		{0, BPF_PROG_TYPE_SK_SKB, 0},
+		{0, BPF_SK_SKB_STREAM_VERDICT},
+	},
+	{"sk_skb", {0, BPF_PROG_TYPE_SK_SKB, 0}, {-EINVAL, 0} },
+	{"sk_msg", {0, BPF_PROG_TYPE_SK_MSG, 0}, {0, BPF_SK_MSG_VERDICT} },
+	{"lirc_mode2", {0, BPF_PROG_TYPE_LIRC_MODE2, 0}, {0, BPF_LIRC_MODE2} },
+	{
+		"flow_dissector",
+		{0, BPF_PROG_TYPE_FLOW_DISSECTOR, 0},
+		{0, BPF_FLOW_DISSECTOR},
+	},
+	{
+		"cgroup/bind4",
+		{0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND},
+		{0, BPF_CGROUP_INET4_BIND},
+	},
+	{
+		"cgroup/bind6",
+		{0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND},
+		{0, BPF_CGROUP_INET6_BIND},
+	},
+	{
+		"cgroup/connect4",
+		{0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT},
+		{0, BPF_CGROUP_INET4_CONNECT},
+	},
+	{
+		"cgroup/connect6",
+		{0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT},
+		{0, BPF_CGROUP_INET6_CONNECT},
+	},
+	{
+		"cgroup/sendmsg4",
+		{0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG},
+		{0, BPF_CGROUP_UDP4_SENDMSG},
+	},
+	{
+		"cgroup/sendmsg6",
+		{0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG},
+		{0, BPF_CGROUP_UDP6_SENDMSG},
+	},
+};
+
+static int test_prog_type_by_name(const struct sec_name_test *test)
+{
+	enum bpf_attach_type expected_attach_type;
+	enum bpf_prog_type prog_type;
+	int rc;
+
+	rc = libbpf_prog_type_by_name(test->sec_name, &prog_type,
+				      &expected_attach_type);
+
+	if (rc != test->expected_load.rc) {
+		warnx("prog: unexpected rc=%d for %s", rc, test->sec_name);
+		return -1;
+	}
+
+	if (rc)
+		return 0;
+
+	if (prog_type != test->expected_load.prog_type) {
+		warnx("prog: unexpected prog_type=%d for %s", prog_type,
+		      test->sec_name);
+		return -1;
+	}
+
+	if (expected_attach_type != test->expected_load.expected_attach_type) {
+		warnx("prog: unexpected expected_attach_type=%d for %s",
+		      expected_attach_type, test->sec_name);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int test_attach_type_by_name(const struct sec_name_test *test)
+{
+	enum bpf_attach_type attach_type;
+	int rc;
+
+	rc = libbpf_attach_type_by_name(test->sec_name, &attach_type);
+
+	if (rc != test->expected_attach.rc) {
+		warnx("attach: unexpected rc=%d for %s", rc, test->sec_name);
+		return -1;
+	}
+
+	if (rc)
+		return 0;
+
+	if (attach_type != test->expected_attach.attach_type) {
+		warnx("attach: unexpected attach_type=%d for %s", attach_type,
+		      test->sec_name);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int run_test_case(const struct sec_name_test *test)
+{
+	if (test_prog_type_by_name(test))
+		return -1;
+	if (test_attach_type_by_name(test))
+		return -1;
+	return 0;
+}
+
+static int run_tests(void)
+{
+	int passes = 0;
+	int fails = 0;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(tests); ++i) {
+		if (run_test_case(&tests[i]))
+			++fails;
+		else
+			++passes;
+	}
+	printf("Summary: %d PASSED, %d FAILED\n", passes, fails);
+	return fails ? -1 : 0;
+}
+
+int main(int argc, char **argv)
+{
+	return run_tests();
+}
diff --git a/tools/testing/selftests/bpf/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/test_sk_lookup_kern.c
new file mode 100644
index 000000000000..b745bdc08c2b
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sk_lookup_kern.c
@@ -0,0 +1,180 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
+
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/pkt_cls.h>
+#include <linux/tcp.h>
+#include <sys/socket.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+int _version SEC("version") = 1;
+char _license[] SEC("license") = "GPL";
+
+/* Fill 'tuple' with L3 info, and attempt to find L4. On fail, return NULL. */
+static struct bpf_sock_tuple *get_tuple(void *data, __u64 nh_off,
+					void *data_end, __u16 eth_proto,
+					bool *ipv4)
+{
+	struct bpf_sock_tuple *result;
+	__u8 proto = 0;
+	__u64 ihl_len;
+
+	if (eth_proto == bpf_htons(ETH_P_IP)) {
+		struct iphdr *iph = (struct iphdr *)(data + nh_off);
+
+		if (iph + 1 > data_end)
+			return NULL;
+		ihl_len = iph->ihl * 4;
+		proto = iph->protocol;
+		*ipv4 = true;
+		result = (struct bpf_sock_tuple *)&iph->saddr;
+	} else if (eth_proto == bpf_htons(ETH_P_IPV6)) {
+		struct ipv6hdr *ip6h = (struct ipv6hdr *)(data + nh_off);
+
+		if (ip6h + 1 > data_end)
+			return NULL;
+		ihl_len = sizeof(*ip6h);
+		proto = ip6h->nexthdr;
+		*ipv4 = true;
+		result = (struct bpf_sock_tuple *)&ip6h->saddr;
+	}
+
+	if (data + nh_off + ihl_len > data_end || proto != IPPROTO_TCP)
+		return NULL;
+
+	return result;
+}
+
+SEC("sk_lookup_success")
+int bpf_sk_lookup_test0(struct __sk_buff *skb)
+{
+	void *data_end = (void *)(long)skb->data_end;
+	void *data = (void *)(long)skb->data;
+	struct ethhdr *eth = (struct ethhdr *)(data);
+	struct bpf_sock_tuple *tuple;
+	struct bpf_sock *sk;
+	size_t tuple_len;
+	bool ipv4;
+
+	if (eth + 1 > data_end)
+		return TC_ACT_SHOT;
+
+	tuple = get_tuple(data, sizeof(*eth), data_end, eth->h_proto, &ipv4);
+	if (!tuple || tuple + sizeof *tuple > data_end)
+		return TC_ACT_SHOT;
+
+	tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6);
+	sk = bpf_sk_lookup_tcp(skb, tuple, tuple_len, 0, 0);
+	if (sk)
+		bpf_sk_release(sk);
+	return sk ? TC_ACT_OK : TC_ACT_UNSPEC;
+}
+
+SEC("sk_lookup_success_simple")
+int bpf_sk_lookup_test1(struct __sk_buff *skb)
+{
+	struct bpf_sock_tuple tuple = {};
+	struct bpf_sock *sk;
+
+	sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0);
+	if (sk)
+		bpf_sk_release(sk);
+	return 0;
+}
+
+SEC("fail_use_after_free")
+int bpf_sk_lookup_uaf(struct __sk_buff *skb)
+{
+	struct bpf_sock_tuple tuple = {};
+	struct bpf_sock *sk;
+	__u32 family = 0;
+
+	sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0);
+	if (sk) {
+		bpf_sk_release(sk);
+		family = sk->family;
+	}
+	return family;
+}
+
+SEC("fail_modify_sk_pointer")
+int bpf_sk_lookup_modptr(struct __sk_buff *skb)
+{
+	struct bpf_sock_tuple tuple = {};
+	struct bpf_sock *sk;
+	__u32 family;
+
+	sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0);
+	if (sk) {
+		sk += 1;
+		bpf_sk_release(sk);
+	}
+	return 0;
+}
+
+SEC("fail_modify_sk_or_null_pointer")
+int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb)
+{
+	struct bpf_sock_tuple tuple = {};
+	struct bpf_sock *sk;
+	__u32 family;
+
+	sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0);
+	sk += 1;
+	if (sk)
+		bpf_sk_release(sk);
+	return 0;
+}
+
+SEC("fail_no_release")
+int bpf_sk_lookup_test2(struct __sk_buff *skb)
+{
+	struct bpf_sock_tuple tuple = {};
+
+	bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0);
+	return 0;
+}
+
+SEC("fail_release_twice")
+int bpf_sk_lookup_test3(struct __sk_buff *skb)
+{
+	struct bpf_sock_tuple tuple = {};
+	struct bpf_sock *sk;
+
+	sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0);
+	bpf_sk_release(sk);
+	bpf_sk_release(sk);
+	return 0;
+}
+
+SEC("fail_release_unchecked")
+int bpf_sk_lookup_test4(struct __sk_buff *skb)
+{
+	struct bpf_sock_tuple tuple = {};
+	struct bpf_sock *sk;
+
+	sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0);
+	bpf_sk_release(sk);
+	return 0;
+}
+
+void lookup_no_release(struct __sk_buff *skb)
+{
+	struct bpf_sock_tuple tuple = {};
+	bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0);
+}
+
+SEC("fail_no_release_subcall")
+int bpf_sk_lookup_test5(struct __sk_buff *skb)
+{
+	lookup_no_release(skb);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/test_skb_cgroup_id.sh b/tools/testing/selftests/bpf/test_skb_cgroup_id.sh
index 42544a969abc..a9bc6f82abc1 100755
--- a/tools/testing/selftests/bpf/test_skb_cgroup_id.sh
+++ b/tools/testing/selftests/bpf/test_skb_cgroup_id.sh
@@ -10,7 +10,7 @@ wait_for_ip()
 	echo -n "Wait for testing link-local IP to become available "
 	for _i in $(seq ${MAX_PING_TRIES}); do
 		echo -n "."
-		if ping -6 -q -c 1 -W 1 ff02::1%${TEST_IF} >/dev/null 2>&1; then
+		if $PING6 -c 1 -W 1 ff02::1%${TEST_IF} >/dev/null 2>&1; then
 			echo " OK"
 			return
 		fi
@@ -58,5 +58,6 @@ BPF_PROG_OBJ="${DIR}/test_skb_cgroup_id_kern.o"
 BPF_PROG_SECTION="cgroup_id_logger"
 BPF_PROG_ID=0
 PROG="${DIR}/test_skb_cgroup_id_user"
+type ping6 >/dev/null 2>&1 && PING6="ping6" || PING6="ping -6"
 
 main
diff --git a/tools/testing/selftests/bpf/test_sock_addr.sh b/tools/testing/selftests/bpf/test_sock_addr.sh
index 9832a875a828..3b9fdb8094aa 100755
--- a/tools/testing/selftests/bpf/test_sock_addr.sh
+++ b/tools/testing/selftests/bpf/test_sock_addr.sh
@@ -4,7 +4,8 @@ set -eu
 
 ping_once()
 {
-	ping -${1} -q -c 1 -W 1 ${2%%/*} >/dev/null 2>&1
+	type ping${1} >/dev/null 2>&1 && PING="ping${1}" || PING="ping -${1}"
+	$PING -q -c 1 -W 1 ${2%%/*} >/dev/null 2>&1
 }
 
 wait_for_ip()
diff --git a/tools/testing/selftests/bpf/test_socket_cookie.c b/tools/testing/selftests/bpf/test_socket_cookie.c
index 68e108e4687a..b6c2c605d8c0 100644
--- a/tools/testing/selftests/bpf/test_socket_cookie.c
+++ b/tools/testing/selftests/bpf/test_socket_cookie.c
@@ -158,11 +158,7 @@ static int run_test(int cgfd)
 	bpf_object__for_each_program(prog, pobj) {
 		prog_name = bpf_program__title(prog, /*needs_copy*/ false);
 
-		if (strcmp(prog_name, "cgroup/connect6") == 0) {
-			attach_type = BPF_CGROUP_INET6_CONNECT;
-		} else if (strcmp(prog_name, "sockops") == 0) {
-			attach_type = BPF_CGROUP_SOCK_OPS;
-		} else {
+		if (libbpf_attach_type_by_name(prog_name, &attach_type)) {
 			log_err("Unexpected prog: %s", prog_name);
 			goto err;
 		}
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index 0c7d9e556b47..622ade0a0957 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -28,6 +28,7 @@
 #include <linux/sock_diag.h>
 #include <linux/bpf.h>
 #include <linux/if_link.h>
+#include <linux/tls.h>
 #include <assert.h>
 #include <libgen.h>
 
@@ -43,6 +44,13 @@
 int running;
 static void running_handler(int a);
 
+#ifndef TCP_ULP
+# define TCP_ULP 31
+#endif
+#ifndef SOL_TLS
+# define SOL_TLS 282
+#endif
+
 /* randomly selected ports for testing on lo */
 #define S1_PORT 10000
 #define S2_PORT 10001
@@ -69,8 +77,12 @@ int txmsg_apply;
 int txmsg_cork;
 int txmsg_start;
 int txmsg_end;
+int txmsg_start_push;
+int txmsg_end_push;
 int txmsg_ingress;
 int txmsg_skb;
+int ktls;
+int peek_flag;
 
 static const struct option long_options[] = {
 	{"help",	no_argument,		NULL, 'h' },
@@ -90,8 +102,12 @@ static const struct option long_options[] = {
 	{"txmsg_cork",	required_argument,	NULL, 'k'},
 	{"txmsg_start", required_argument,	NULL, 's'},
 	{"txmsg_end",	required_argument,	NULL, 'e'},
+	{"txmsg_start_push", required_argument,	NULL, 'p'},
+	{"txmsg_end_push",   required_argument,	NULL, 'q'},
 	{"txmsg_ingress", no_argument,		&txmsg_ingress, 1 },
 	{"txmsg_skb", no_argument,		&txmsg_skb, 1 },
+	{"ktls", no_argument,			&ktls, 1 },
+	{"peek", no_argument,			&peek_flag, 1 },
 	{0, 0, NULL, 0 }
 };
 
@@ -112,6 +128,71 @@ static void usage(char *argv[])
 	printf("\n");
 }
 
+char *sock_to_string(int s)
+{
+	if (s == c1)
+		return "client1";
+	else if (s == c2)
+		return "client2";
+	else if (s == s1)
+		return "server1";
+	else if (s == s2)
+		return "server2";
+	else if (s == p1)
+		return "peer1";
+	else if (s == p2)
+		return "peer2";
+	else
+		return "unknown";
+}
+
+static int sockmap_init_ktls(int verbose, int s)
+{
+	struct tls12_crypto_info_aes_gcm_128 tls_tx = {
+		.info = {
+			.version     = TLS_1_2_VERSION,
+			.cipher_type = TLS_CIPHER_AES_GCM_128,
+		},
+	};
+	struct tls12_crypto_info_aes_gcm_128 tls_rx = {
+		.info = {
+			.version     = TLS_1_2_VERSION,
+			.cipher_type = TLS_CIPHER_AES_GCM_128,
+		},
+	};
+	int so_buf = 6553500;
+	int err;
+
+	err = setsockopt(s, 6, TCP_ULP, "tls", sizeof("tls"));
+	if (err) {
+		fprintf(stderr, "setsockopt: TCP_ULP(%s) failed with error %i\n", sock_to_string(s), err);
+		return -EINVAL;
+	}
+	err = setsockopt(s, SOL_TLS, TLS_TX, (void *)&tls_tx, sizeof(tls_tx));
+	if (err) {
+		fprintf(stderr, "setsockopt: TLS_TX(%s) failed with error %i\n", sock_to_string(s), err);
+		return -EINVAL;
+	}
+	err = setsockopt(s, SOL_TLS, TLS_RX, (void *)&tls_rx, sizeof(tls_rx));
+	if (err) {
+		fprintf(stderr, "setsockopt: TLS_RX(%s) failed with error %i\n", sock_to_string(s), err);
+		return -EINVAL;
+	}
+	err = setsockopt(s, SOL_SOCKET, SO_SNDBUF, &so_buf, sizeof(so_buf));
+	if (err) {
+		fprintf(stderr, "setsockopt: (%s) failed sndbuf with error %i\n", sock_to_string(s), err);
+		return -EINVAL;
+	}
+	err = setsockopt(s, SOL_SOCKET, SO_RCVBUF, &so_buf, sizeof(so_buf));
+	if (err) {
+		fprintf(stderr, "setsockopt: (%s) failed rcvbuf with error %i\n", sock_to_string(s), err);
+		return -EINVAL;
+	}
+
+	if (verbose)
+		fprintf(stdout, "socket(%s) kTLS enabled\n", sock_to_string(s));
+	return 0;
+}
 static int sockmap_init_sockets(int verbose)
 {
 	int i, err, one = 1;
@@ -277,33 +358,40 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt,
 	return 0;
 }
 
-static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
-		    struct msg_stats *s, bool tx,
-		    struct sockmap_options *opt)
+static void msg_free_iov(struct msghdr *msg)
 {
-	struct msghdr msg = {0};
-	int err, i, flags = MSG_NOSIGNAL;
+	int i;
+
+	for (i = 0; i < msg->msg_iovlen; i++)
+		free(msg->msg_iov[i].iov_base);
+	free(msg->msg_iov);
+	msg->msg_iov = NULL;
+	msg->msg_iovlen = 0;
+}
+
+static int msg_alloc_iov(struct msghdr *msg,
+			 int iov_count, int iov_length,
+			 bool data, bool xmit)
+{
+	unsigned char k = 0;
 	struct iovec *iov;
-	unsigned char k;
-	bool data_test = opt->data_test;
-	bool drop = opt->drop_expected;
+	int i;
 
 	iov = calloc(iov_count, sizeof(struct iovec));
 	if (!iov)
 		return errno;
 
-	k = 0;
 	for (i = 0; i < iov_count; i++) {
 		unsigned char *d = calloc(iov_length, sizeof(char));
 
 		if (!d) {
 			fprintf(stderr, "iov_count %i/%i OOM\n", i, iov_count);
-			goto out_errno;
+			goto unwind_iov;
 		}
 		iov[i].iov_base = d;
 		iov[i].iov_len = iov_length;
 
-		if (data_test && tx) {
+		if (data && xmit) {
 			int j;
 
 			for (j = 0; j < iov_length; j++)
@@ -311,9 +399,60 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
 		}
 	}
 
-	msg.msg_iov = iov;
-	msg.msg_iovlen = iov_count;
-	k = 0;
+	msg->msg_iov = iov;
+	msg->msg_iovlen = iov_count;
+
+	return 0;
+unwind_iov:
+	for (i--; i >= 0 ; i--)
+		free(msg->msg_iov[i].iov_base);
+	return -ENOMEM;
+}
+
+static int msg_verify_data(struct msghdr *msg, int size, int chunk_sz)
+{
+	int i, j, bytes_cnt = 0;
+	unsigned char k = 0;
+
+	for (i = 0; i < msg->msg_iovlen; i++) {
+		unsigned char *d = msg->msg_iov[i].iov_base;
+
+		for (j = 0;
+		     j < msg->msg_iov[i].iov_len && size; j++) {
+			if (d[j] != k++) {
+				fprintf(stderr,
+					"detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n",
+					i, j, d[j], k - 1, d[j+1], k);
+				return -EIO;
+			}
+			bytes_cnt++;
+			if (bytes_cnt == chunk_sz) {
+				k = 0;
+				bytes_cnt = 0;
+			}
+			size--;
+		}
+	}
+	return 0;
+}
+
+static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
+		    struct msg_stats *s, bool tx,
+		    struct sockmap_options *opt)
+{
+	struct msghdr msg = {0}, msg_peek = {0};
+	int err, i, flags = MSG_NOSIGNAL;
+	bool drop = opt->drop_expected;
+	bool data = opt->data_test;
+
+	err = msg_alloc_iov(&msg, iov_count, iov_length, data, tx);
+	if (err)
+		goto out_errno;
+	if (peek_flag) {
+		err = msg_alloc_iov(&msg_peek, iov_count, iov_length, data, tx);
+		if (err)
+			goto out_errno;
+	}
 
 	if (tx) {
 		clock_gettime(CLOCK_MONOTONIC, &s->start);
@@ -333,19 +472,12 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
 		}
 		clock_gettime(CLOCK_MONOTONIC, &s->end);
 	} else {
-		int slct, recv, max_fd = fd;
+		int slct, recvp = 0, recv, max_fd = fd;
 		int fd_flags = O_NONBLOCK;
 		struct timeval timeout;
 		float total_bytes;
-		int bytes_cnt = 0;
-		int chunk_sz;
 		fd_set w;
 
-		if (opt->sendpage)
-			chunk_sz = iov_length * cnt;
-		else
-			chunk_sz = iov_length * iov_count;
-
 		fcntl(fd, fd_flags);
 		total_bytes = (float)iov_count * (float)iov_length * (float)cnt;
 		err = clock_gettime(CLOCK_MONOTONIC, &s->start);
@@ -377,6 +509,19 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
 				goto out_errno;
 			}
 
+			errno = 0;
+			if (peek_flag) {
+				flags |= MSG_PEEK;
+				recvp = recvmsg(fd, &msg_peek, flags);
+				if (recvp < 0) {
+					if (errno != EWOULDBLOCK) {
+						clock_gettime(CLOCK_MONOTONIC, &s->end);
+						goto out_errno;
+					}
+				}
+				flags = 0;
+			}
+
 			recv = recvmsg(fd, &msg, flags);
 			if (recv < 0) {
 				if (errno != EWOULDBLOCK) {
@@ -388,27 +533,23 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
 
 			s->bytes_recvd += recv;
 
-			if (data_test) {
-				int j;
-
-				for (i = 0; i < msg.msg_iovlen; i++) {
-					unsigned char *d = iov[i].iov_base;
-
-					for (j = 0;
-					     j < iov[i].iov_len && recv; j++) {
-						if (d[j] != k++) {
-							errno = -EIO;
-							fprintf(stderr,
-								"detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n",
-								i, j, d[j], k - 1, d[j+1], k);
-							goto out_errno;
-						}
-						bytes_cnt++;
-						if (bytes_cnt == chunk_sz) {
-							k = 0;
-							bytes_cnt = 0;
-						}
-						recv--;
+			if (data) {
+				int chunk_sz = opt->sendpage ?
+						iov_length * cnt :
+						iov_length * iov_count;
+
+				errno = msg_verify_data(&msg, recv, chunk_sz);
+				if (errno) {
+					perror("data verify msg failed\n");
+					goto out_errno;
+				}
+				if (recvp) {
+					errno = msg_verify_data(&msg_peek,
+								recvp,
+								chunk_sz);
+					if (errno) {
+						perror("data verify msg_peek failed\n");
+						goto out_errno;
 					}
 				}
 			}
@@ -416,14 +557,12 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
 		clock_gettime(CLOCK_MONOTONIC, &s->end);
 	}
 
-	for (i = 0; i < iov_count; i++)
-		free(iov[i].iov_base);
-	free(iov);
-	return 0;
+	msg_free_iov(&msg);
+	msg_free_iov(&msg_peek);
+	return err;
 out_errno:
-	for (i = 0; i < iov_count; i++)
-		free(iov[i].iov_base);
-	free(iov);
+	msg_free_iov(&msg);
+	msg_free_iov(&msg_peek);
 	return errno;
 }
 
@@ -456,6 +595,21 @@ static int sendmsg_test(struct sockmap_options *opt)
 	else
 		rx_fd = p2;
 
+	if (ktls) {
+		/* Redirecting into non-TLS socket which sends into a TLS
+		 * socket is not a valid test. So in this case lets not
+		 * enable kTLS but still run the test.
+		 */
+		if (!txmsg_redir || (txmsg_redir && txmsg_ingress)) {
+			err = sockmap_init_ktls(opt->verbose, rx_fd);
+			if (err)
+				return err;
+		}
+		err = sockmap_init_ktls(opt->verbose, c1);
+		if (err)
+			return err;
+	}
+
 	rxpid = fork();
 	if (rxpid == 0) {
 		if (opt->drop_expected)
@@ -469,17 +623,16 @@ static int sendmsg_test(struct sockmap_options *opt)
 			fprintf(stderr,
 				"msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n",
 				iov_count, iov_buf, cnt, err);
-		shutdown(p2, SHUT_RDWR);
-		shutdown(p1, SHUT_RDWR);
 		if (s.end.tv_sec - s.start.tv_sec) {
 			sent_Bps = sentBps(s);
 			recvd_Bps = recvdBps(s);
 		}
 		if (opt->verbose)
 			fprintf(stdout,
-				"rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s\n",
+				"rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s %s\n",
 				s.bytes_sent, sent_Bps, sent_Bps/giga,
-				s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
+				s.bytes_recvd, recvd_Bps, recvd_Bps/giga,
+				peek_flag ? "(peek_msg)" : "");
 		if (err && txmsg_cork)
 			err = 0;
 		exit(err ? 1 : 0);
@@ -500,7 +653,6 @@ static int sendmsg_test(struct sockmap_options *opt)
 			fprintf(stderr,
 				"msg_loop_tx: iov_count %i iov_buf %i cnt %i err %i\n",
 				iov_count, iov_buf, cnt, err);
-		shutdown(c1, SHUT_RDWR);
 		if (s.end.tv_sec - s.start.tv_sec) {
 			sent_Bps = sentBps(s);
 			recvd_Bps = recvdBps(s);
@@ -755,6 +907,30 @@ run:
 			}
 		}
 
+		if (txmsg_start_push) {
+			i = 2;
+			err = bpf_map_update_elem(map_fd[5],
+						  &i, &txmsg_start_push, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (txmsg_start_push):  %d (%s)\n",
+					err, strerror(errno));
+				goto out;
+			}
+		}
+
+		if (txmsg_end_push) {
+			i = 3;
+			err = bpf_map_update_elem(map_fd[5],
+						  &i, &txmsg_end_push, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem %i@%i (txmsg_end_push):  %d (%s)\n",
+					txmsg_end_push, i, err, strerror(errno));
+				goto out;
+			}
+		}
+
 		if (txmsg_ingress) {
 			int in = BPF_F_INGRESS;
 
@@ -910,6 +1086,10 @@ static void test_options(char *options)
 		strncat(options, "ingress,", OPTSTRING);
 	if (txmsg_skb)
 		strncat(options, "skb,", OPTSTRING);
+	if (ktls)
+		strncat(options, "ktls,", OPTSTRING);
+	if (peek_flag)
+		strncat(options, "peek,", OPTSTRING);
 }
 
 static int __test_exec(int cgrp, int test, struct sockmap_options *opt)
@@ -1083,6 +1263,8 @@ static int test_mixed(int cgrp)
 	txmsg_pass = txmsg_noisy = txmsg_redir_noisy = txmsg_drop = 0;
 	txmsg_apply = txmsg_cork = 0;
 	txmsg_start = txmsg_end = 0;
+	txmsg_start_push = txmsg_end_push = 0;
+
 	/* Test small and large iov_count values with pass/redir/apply/cork */
 	txmsg_pass = 1;
 	txmsg_redir = 0;
@@ -1199,6 +1381,8 @@ static int test_start_end(int cgrp)
 	/* Test basic start/end with lots of iov_count and iov_lengths */
 	txmsg_start = 1;
 	txmsg_end = 2;
+	txmsg_start_push = 1;
+	txmsg_end_push = 2;
 	err = test_txmsg(cgrp);
 	if (err)
 		goto out;
@@ -1212,6 +1396,8 @@ static int test_start_end(int cgrp)
 	for (i = 99; i <= 1600; i += 500) {
 		txmsg_start = 0;
 		txmsg_end = i;
+		txmsg_start_push = 0;
+		txmsg_end_push = i;
 		err = test_exec(cgrp, &opt);
 		if (err)
 			goto out;
@@ -1221,6 +1407,8 @@ static int test_start_end(int cgrp)
 	for (i = 199; i <= 1600; i += 500) {
 		txmsg_start = 100;
 		txmsg_end = i;
+		txmsg_start_push = 100;
+		txmsg_end_push = i;
 		err = test_exec(cgrp, &opt);
 		if (err)
 			goto out;
@@ -1229,6 +1417,8 @@ static int test_start_end(int cgrp)
 	/* Test start/end with cork pulling last sg entry */
 	txmsg_start = 1500;
 	txmsg_end = 1600;
+	txmsg_start_push = 1500;
+	txmsg_end_push = 1600;
 	err = test_exec(cgrp, &opt);
 	if (err)
 		goto out;
@@ -1236,6 +1426,8 @@ static int test_start_end(int cgrp)
 	/* Test start/end pull of single byte in last page */
 	txmsg_start = 1111;
 	txmsg_end = 1112;
+	txmsg_start_push = 1111;
+	txmsg_end_push = 1112;
 	err = test_exec(cgrp, &opt);
 	if (err)
 		goto out;
@@ -1243,6 +1435,8 @@ static int test_start_end(int cgrp)
 	/* Test start/end with end < start */
 	txmsg_start = 1111;
 	txmsg_end = 0;
+	txmsg_start_push = 1111;
+	txmsg_end_push = 0;
 	err = test_exec(cgrp, &opt);
 	if (err)
 		goto out;
@@ -1250,6 +1444,8 @@ static int test_start_end(int cgrp)
 	/* Test start/end with end > data */
 	txmsg_start = 0;
 	txmsg_end = 1601;
+	txmsg_start_push = 0;
+	txmsg_end_push = 1601;
 	err = test_exec(cgrp, &opt);
 	if (err)
 		goto out;
@@ -1257,6 +1453,8 @@ static int test_start_end(int cgrp)
 	/* Test start/end with start > data */
 	txmsg_start = 1601;
 	txmsg_end = 1600;
+	txmsg_start_push = 1601;
+	txmsg_end_push = 1600;
 	err = test_exec(cgrp, &opt);
 
 out:
@@ -1272,7 +1470,7 @@ char *map_names[] = {
 	"sock_map_redir",
 	"sock_apply_bytes",
 	"sock_cork_bytes",
-	"sock_pull_bytes",
+	"sock_bytes",
 	"sock_redir_flags",
 	"sock_skb_opts",
 };
@@ -1348,9 +1546,9 @@ static int populate_progs(char *bpf_file)
 	return 0;
 }
 
-static int __test_suite(char *bpf_file)
+static int __test_suite(int cg_fd, char *bpf_file)
 {
-	int cg_fd, err;
+	int err, cleanup = cg_fd;
 
 	err = populate_progs(bpf_file);
 	if (err < 0) {
@@ -1358,26 +1556,28 @@ static int __test_suite(char *bpf_file)
 		return err;
 	}
 
-	if (setup_cgroup_environment()) {
-		fprintf(stderr, "ERROR: cgroup env failed\n");
-		return -EINVAL;
-	}
-
-	cg_fd = create_and_get_cgroup(CG_PATH);
 	if (cg_fd < 0) {
-		fprintf(stderr,
-			"ERROR: (%i) open cg path failed: %s\n",
-			cg_fd, optarg);
-		return cg_fd;
-	}
+		if (setup_cgroup_environment()) {
+			fprintf(stderr, "ERROR: cgroup env failed\n");
+			return -EINVAL;
+		}
 
-	if (join_cgroup(CG_PATH)) {
-		fprintf(stderr, "ERROR: failed to join cgroup\n");
-		return -EINVAL;
+		cg_fd = create_and_get_cgroup(CG_PATH);
+		if (cg_fd < 0) {
+			fprintf(stderr,
+				"ERROR: (%i) open cg path failed: %s\n",
+				cg_fd, optarg);
+			return cg_fd;
+		}
+
+		if (join_cgroup(CG_PATH)) {
+			fprintf(stderr, "ERROR: failed to join cgroup\n");
+			return -EINVAL;
+		}
 	}
 
 	/* Tests basic commands and APIs with range of iov values */
-	txmsg_start = txmsg_end = 0;
+	txmsg_start = txmsg_end = txmsg_start_push = txmsg_end_push = 0;
 	err = test_txmsg(cg_fd);
 	if (err)
 		goto out;
@@ -1394,20 +1594,24 @@ static int __test_suite(char *bpf_file)
 
 out:
 	printf("Summary: %i PASSED %i FAILED\n", passed, failed);
-	cleanup_cgroup_environment();
-	close(cg_fd);
+	if (cleanup < 0) {
+		cleanup_cgroup_environment();
+		close(cg_fd);
+	}
 	return err;
 }
 
-static int test_suite(void)
+static int test_suite(int cg_fd)
 {
 	int err;
 
-	err = __test_suite(BPF_SOCKMAP_FILENAME);
+	err = __test_suite(cg_fd, BPF_SOCKMAP_FILENAME);
 	if (err)
 		goto out;
-	err = __test_suite(BPF_SOCKHASH_FILENAME);
+	err = __test_suite(cg_fd, BPF_SOCKHASH_FILENAME);
 out:
+	if (cg_fd > -1)
+		close(cg_fd);
 	return err;
 }
 
@@ -1420,9 +1624,9 @@ int main(int argc, char **argv)
 	int test = PING_PONG;
 
 	if (argc < 2)
-		return test_suite();
+		return test_suite(-1);
 
-	while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:",
+	while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:p:q:",
 				  long_options, &longindex)) != -1) {
 		switch (opt) {
 		case 's':
@@ -1431,6 +1635,12 @@ int main(int argc, char **argv)
 		case 'e':
 			txmsg_end = atoi(optarg);
 			break;
+		case 'p':
+			txmsg_start_push = atoi(optarg);
+			break;
+		case 'q':
+			txmsg_end_push = atoi(optarg);
+			break;
 		case 'a':
 			txmsg_apply = atoi(optarg);
 			break;
@@ -1486,6 +1696,9 @@ int main(int argc, char **argv)
 		}
 	}
 
+	if (argc <= 3 && cg_fd)
+		return test_suite(cg_fd);
+
 	if (!cg_fd) {
 		fprintf(stderr, "%s requires cgroup option: --cgroup <path>\n",
 			argv[0]);
diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.h b/tools/testing/selftests/bpf/test_sockmap_kern.h
index 8e8e41780bb9..14b8bbac004f 100644
--- a/tools/testing/selftests/bpf/test_sockmap_kern.h
+++ b/tools/testing/selftests/bpf/test_sockmap_kern.h
@@ -70,11 +70,11 @@ struct bpf_map_def SEC("maps") sock_cork_bytes = {
 	.max_entries = 1
 };
 
-struct bpf_map_def SEC("maps") sock_pull_bytes = {
+struct bpf_map_def SEC("maps") sock_bytes = {
 	.type = BPF_MAP_TYPE_ARRAY,
 	.key_size = sizeof(int),
 	.value_size = sizeof(int),
-	.max_entries = 2
+	.max_entries = 4
 };
 
 struct bpf_map_def SEC("maps") sock_redir_flags = {
@@ -181,8 +181,8 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
 SEC("sk_msg1")
 int bpf_prog4(struct sk_msg_md *msg)
 {
-	int *bytes, zero = 0, one = 1;
-	int *start, *end;
+	int *bytes, zero = 0, one = 1, two = 2, three = 3;
+	int *start, *end, *start_push, *end_push;
 
 	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
 	if (bytes)
@@ -190,18 +190,24 @@ int bpf_prog4(struct sk_msg_md *msg)
 	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
 	if (bytes)
 		bpf_msg_cork_bytes(msg, *bytes);
-	start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
-	end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+	start = bpf_map_lookup_elem(&sock_bytes, &zero);
+	end = bpf_map_lookup_elem(&sock_bytes, &one);
 	if (start && end)
 		bpf_msg_pull_data(msg, *start, *end, 0);
+	start_push = bpf_map_lookup_elem(&sock_bytes, &two);
+	end_push = bpf_map_lookup_elem(&sock_bytes, &three);
+	if (start_push && end_push)
+		bpf_msg_push_data(msg, *start_push, *end_push, 0);
 	return SK_PASS;
 }
 
 SEC("sk_msg2")
 int bpf_prog5(struct sk_msg_md *msg)
 {
-	int err1 = -1, err2 = -1, zero = 0, one = 1;
-	int *bytes, *start, *end, len1, len2;
+	int zero = 0, one = 1, two = 2, three = 3;
+	int *start, *end, *start_push, *end_push;
+	int *bytes, len1, len2 = 0, len3;
+	int err1 = -1, err2 = -1;
 
 	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
 	if (bytes)
@@ -210,8 +216,8 @@ int bpf_prog5(struct sk_msg_md *msg)
 	if (bytes)
 		err2 = bpf_msg_cork_bytes(msg, *bytes);
 	len1 = (__u64)msg->data_end - (__u64)msg->data;
-	start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
-	end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+	start = bpf_map_lookup_elem(&sock_bytes, &zero);
+	end = bpf_map_lookup_elem(&sock_bytes, &one);
 	if (start && end) {
 		int err;
 
@@ -225,6 +231,23 @@ int bpf_prog5(struct sk_msg_md *msg)
 		bpf_printk("sk_msg2: length update %i->%i\n",
 			   len1, len2);
 	}
+
+	start_push = bpf_map_lookup_elem(&sock_bytes, &two);
+	end_push = bpf_map_lookup_elem(&sock_bytes, &three);
+	if (start_push && end_push) {
+		int err;
+
+		bpf_printk("sk_msg2: push(%i:%i)\n",
+			   start_push ? *start_push : 0,
+			   end_push ? *end_push : 0);
+		err = bpf_msg_push_data(msg, *start_push, *end_push, 0);
+		if (err)
+			bpf_printk("sk_msg2: push_data err %i\n", err);
+		len3 = (__u64)msg->data_end - (__u64)msg->data;
+		bpf_printk("sk_msg2: length push_update %i->%i\n",
+			   len2 ? len2 : len1, len3);
+	}
+
 	bpf_printk("sk_msg2: data length %i err1 %i err2 %i\n",
 		   len1, err1, err2);
 	return SK_PASS;
@@ -233,8 +256,8 @@ int bpf_prog5(struct sk_msg_md *msg)
 SEC("sk_msg3")
 int bpf_prog6(struct sk_msg_md *msg)
 {
-	int *bytes, zero = 0, one = 1, key = 0;
-	int *start, *end, *f;
+	int *bytes, *start, *end, *start_push, *end_push, *f;
+	int zero = 0, one = 1, two = 2, three = 3, key = 0;
 	__u64 flags = 0;
 
 	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
@@ -243,10 +266,17 @@ int bpf_prog6(struct sk_msg_md *msg)
 	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
 	if (bytes)
 		bpf_msg_cork_bytes(msg, *bytes);
-	start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
-	end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+
+	start = bpf_map_lookup_elem(&sock_bytes, &zero);
+	end = bpf_map_lookup_elem(&sock_bytes, &one);
 	if (start && end)
 		bpf_msg_pull_data(msg, *start, *end, 0);
+
+	start_push = bpf_map_lookup_elem(&sock_bytes, &two);
+	end_push = bpf_map_lookup_elem(&sock_bytes, &three);
+	if (start_push && end_push)
+		bpf_msg_push_data(msg, *start_push, *end_push, 0);
+
 	f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
 	if (f && *f) {
 		key = 2;
@@ -262,8 +292,9 @@ int bpf_prog6(struct sk_msg_md *msg)
 SEC("sk_msg4")
 int bpf_prog7(struct sk_msg_md *msg)
 {
-	int err1 = 0, err2 = 0, zero = 0, one = 1, key = 0;
-	int *f, *bytes, *start, *end, len1, len2;
+	int zero = 0, one = 1, two = 2, three = 3, len1, len2 = 0, len3;
+	int *bytes, *start, *end, *start_push, *end_push, *f;
+	int err1 = 0, err2 = 0, key = 0;
 	__u64 flags = 0;
 
 		int err;
@@ -274,10 +305,10 @@ int bpf_prog7(struct sk_msg_md *msg)
 	if (bytes)
 		err2 = bpf_msg_cork_bytes(msg, *bytes);
 	len1 = (__u64)msg->data_end - (__u64)msg->data;
-	start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
-	end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
-	if (start && end) {
 
+	start = bpf_map_lookup_elem(&sock_bytes, &zero);
+	end = bpf_map_lookup_elem(&sock_bytes, &one);
+	if (start && end) {
 		bpf_printk("sk_msg2: pull(%i:%i)\n",
 			   start ? *start : 0, end ? *end : 0);
 		err = bpf_msg_pull_data(msg, *start, *end, 0);
@@ -288,6 +319,22 @@ int bpf_prog7(struct sk_msg_md *msg)
 		bpf_printk("sk_msg2: length update %i->%i\n",
 			   len1, len2);
 	}
+
+	start_push = bpf_map_lookup_elem(&sock_bytes, &two);
+	end_push = bpf_map_lookup_elem(&sock_bytes, &three);
+	if (start_push && end_push) {
+		bpf_printk("sk_msg4: push(%i:%i)\n",
+			   start_push ? *start_push : 0,
+			   end_push ? *end_push : 0);
+		err = bpf_msg_push_data(msg, *start_push, *end_push, 0);
+		if (err)
+			bpf_printk("sk_msg4: push_data err %i\n",
+				   err);
+		len3 = (__u64)msg->data_end - (__u64)msg->data;
+		bpf_printk("sk_msg4: length push_update %i->%i\n",
+			   len2 ? len2 : len1, len3);
+	}
+
 	f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
 	if (f && *f) {
 		key = 2;
@@ -342,8 +389,8 @@ int bpf_prog9(struct sk_msg_md *msg)
 SEC("sk_msg7")
 int bpf_prog10(struct sk_msg_md *msg)
 {
-	int *bytes, zero = 0, one = 1;
-	int *start, *end;
+	int *bytes, *start, *end, *start_push, *end_push;
+	int zero = 0, one = 1, two = 2, three = 3;
 
 	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
 	if (bytes)
@@ -351,10 +398,14 @@ int bpf_prog10(struct sk_msg_md *msg)
 	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
 	if (bytes)
 		bpf_msg_cork_bytes(msg, *bytes);
-	start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
-	end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+	start = bpf_map_lookup_elem(&sock_bytes, &zero);
+	end = bpf_map_lookup_elem(&sock_bytes, &one);
 	if (start && end)
 		bpf_msg_pull_data(msg, *start, *end, 0);
+	start_push = bpf_map_lookup_elem(&sock_bytes, &two);
+	end_push = bpf_map_lookup_elem(&sock_bytes, &three);
+	if (start_push && end_push)
+		bpf_msg_push_data(msg, *start_push, *end_push, 0);
 
 	return SK_DROP;
 }
diff --git a/tools/testing/selftests/bpf/test_stack_map.c b/tools/testing/selftests/bpf/test_stack_map.c
new file mode 100644
index 000000000000..31c3880e6da0
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_stack_map.c
@@ -0,0 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Politecnico di Torino
+#define MAP_TYPE BPF_MAP_TYPE_STACK
+#include "test_queue_stack_map.h"
diff --git a/tools/testing/selftests/bpf/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/test_tcpbpf_kern.c
index 4b7fd540cea9..74f73b33a7b0 100644
--- a/tools/testing/selftests/bpf/test_tcpbpf_kern.c
+++ b/tools/testing/selftests/bpf/test_tcpbpf_kern.c
@@ -5,6 +5,7 @@
 #include <linux/if_ether.h>
 #include <linux/if_packet.h>
 #include <linux/ip.h>
+#include <linux/ipv6.h>
 #include <linux/types.h>
 #include <linux/socket.h>
 #include <linux/tcp.h>
@@ -17,6 +18,13 @@ struct bpf_map_def SEC("maps") global_map = {
 	.type = BPF_MAP_TYPE_ARRAY,
 	.key_size = sizeof(__u32),
 	.value_size = sizeof(struct tcpbpf_globals),
+	.max_entries = 4,
+};
+
+struct bpf_map_def SEC("maps") sockopt_results = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(int),
 	.max_entries = 2,
 };
 
@@ -45,11 +53,14 @@ int _version SEC("version") = 1;
 SEC("sockops")
 int bpf_testcb(struct bpf_sock_ops *skops)
 {
-	int rv = -1;
-	int bad_call_rv = 0;
+	char header[sizeof(struct ipv6hdr) + sizeof(struct tcphdr)];
+	struct tcphdr *thdr;
 	int good_call_rv = 0;
-	int op;
+	int bad_call_rv = 0;
+	int save_syn = 1;
+	int rv = -1;
 	int v = 0;
+	int op;
 
 	op = (int) skops->op;
 
@@ -82,6 +93,21 @@ int bpf_testcb(struct bpf_sock_ops *skops)
 		v = 0xff;
 		rv = bpf_setsockopt(skops, SOL_IPV6, IPV6_TCLASS, &v,
 				    sizeof(v));
+		if (skops->family == AF_INET6) {
+			v = bpf_getsockopt(skops, IPPROTO_TCP, TCP_SAVED_SYN,
+					   header, (sizeof(struct ipv6hdr) +
+						    sizeof(struct tcphdr)));
+			if (!v) {
+				int offset = sizeof(struct ipv6hdr);
+
+				thdr = (struct tcphdr *)(header + offset);
+				v = thdr->syn;
+				__u32 key = 1;
+
+				bpf_map_update_elem(&sockopt_results, &key, &v,
+						    BPF_ANY);
+			}
+		}
 		break;
 	case BPF_SOCK_OPS_RTO_CB:
 		break;
@@ -111,6 +137,12 @@ int bpf_testcb(struct bpf_sock_ops *skops)
 		break;
 	case BPF_SOCK_OPS_TCP_LISTEN_CB:
 		bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_STATE_CB_FLAG);
+		v = bpf_setsockopt(skops, IPPROTO_TCP, TCP_SAVE_SYN,
+				   &save_syn, sizeof(save_syn));
+		/* Update global map w/ result of setsock opt */
+		__u32 key = 0;
+
+		bpf_map_update_elem(&sockopt_results, &key, &v, BPF_ANY);
 		break;
 	default:
 		rv = -1;
diff --git a/tools/testing/selftests/bpf/test_tcpbpf_user.c b/tools/testing/selftests/bpf/test_tcpbpf_user.c
index a275c2971376..e6eebda7d112 100644
--- a/tools/testing/selftests/bpf/test_tcpbpf_user.c
+++ b/tools/testing/selftests/bpf/test_tcpbpf_user.c
@@ -54,6 +54,26 @@ err:
 	return -1;
 }
 
+int verify_sockopt_result(int sock_map_fd)
+{
+	__u32 key = 0;
+	int res;
+	int rv;
+
+	/* check setsockopt for SAVE_SYN */
+	rv = bpf_map_lookup_elem(sock_map_fd, &key, &res);
+	EXPECT_EQ(0, rv, "d");
+	EXPECT_EQ(0, res, "d");
+	key = 1;
+	/* check getsockopt for SAVED_SYN */
+	rv = bpf_map_lookup_elem(sock_map_fd, &key, &res);
+	EXPECT_EQ(0, rv, "d");
+	EXPECT_EQ(1, res, "d");
+	return 0;
+err:
+	return -1;
+}
+
 static int bpf_find_map(const char *test, struct bpf_object *obj,
 			const char *name)
 {
@@ -70,11 +90,11 @@ static int bpf_find_map(const char *test, struct bpf_object *obj,
 int main(int argc, char **argv)
 {
 	const char *file = "test_tcpbpf_kern.o";
+	int prog_fd, map_fd, sock_map_fd;
 	struct tcpbpf_globals g = {0};
 	const char *cg_path = "/foo";
 	int error = EXIT_FAILURE;
 	struct bpf_object *obj;
-	int prog_fd, map_fd;
 	int cg_fd = -1;
 	__u32 key = 0;
 	int rv;
@@ -110,6 +130,10 @@ int main(int argc, char **argv)
 	if (map_fd < 0)
 		goto err;
 
+	sock_map_fd = bpf_find_map(__func__, obj, "sockopt_results");
+	if (sock_map_fd < 0)
+		goto err;
+
 	rv = bpf_map_lookup_elem(map_fd, &key, &g);
 	if (rv != 0) {
 		printf("FAILED: bpf_map_lookup_elem returns %d\n", rv);
@@ -121,6 +145,11 @@ int main(int argc, char **argv)
 		goto err;
 	}
 
+	if (verify_sockopt_result(sock_map_fd)) {
+		printf("FAILED: Wrong sockopt stats\n");
+		goto err;
+	}
+
 	printf("PASSED!\n");
 	error = 0;
 err:
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 67c412d19c09..6f61df62f690 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -3,6 +3,7 @@
  *
  * Copyright (c) 2014 PLUMgrid, http://plumgrid.com
  * Copyright (c) 2017 Facebook
+ * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of version 2 of the GNU General Public
@@ -47,7 +48,7 @@
 
 #define MAX_INSNS	BPF_MAXINSNS
 #define MAX_FIXUPS	8
-#define MAX_NR_MAPS	8
+#define MAX_NR_MAPS	13
 #define POINTER_VALUE	0xcafe4all
 #define TEST_DATA_LEN	64
 
@@ -60,17 +61,22 @@ static bool unpriv_disabled = false;
 struct bpf_test {
 	const char *descr;
 	struct bpf_insn	insns[MAX_INSNS];
-	int fixup_map1[MAX_FIXUPS];
-	int fixup_map2[MAX_FIXUPS];
-	int fixup_map3[MAX_FIXUPS];
-	int fixup_map4[MAX_FIXUPS];
+	int fixup_map_hash_8b[MAX_FIXUPS];
+	int fixup_map_hash_48b[MAX_FIXUPS];
+	int fixup_map_hash_16b[MAX_FIXUPS];
+	int fixup_map_array_48b[MAX_FIXUPS];
+	int fixup_map_sockmap[MAX_FIXUPS];
+	int fixup_map_sockhash[MAX_FIXUPS];
+	int fixup_map_xskmap[MAX_FIXUPS];
+	int fixup_map_stacktrace[MAX_FIXUPS];
 	int fixup_prog1[MAX_FIXUPS];
 	int fixup_prog2[MAX_FIXUPS];
 	int fixup_map_in_map[MAX_FIXUPS];
 	int fixup_cgroup_storage[MAX_FIXUPS];
+	int fixup_percpu_cgroup_storage[MAX_FIXUPS];
 	const char *errstr;
 	const char *errstr_unpriv;
-	uint32_t retval;
+	uint32_t retval, retval_unpriv;
 	enum {
 		UNDEF,
 		ACCEPT,
@@ -177,6 +183,24 @@ static void bpf_fill_rand_ld_dw(struct bpf_test *self)
 	self->retval = (uint32_t)res;
 }
 
+/* BPF_SK_LOOKUP contains 13 instructions, if you need to fix up maps */
+#define BPF_SK_LOOKUP							\
+	/* struct bpf_sock_tuple tuple = {} */				\
+	BPF_MOV64_IMM(BPF_REG_2, 0),					\
+	BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -8),			\
+	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -16),		\
+	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -24),		\
+	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -32),		\
+	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -40),		\
+	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -48),		\
+	/* sk = sk_lookup_tcp(ctx, &tuple, sizeof tuple, 0, 0) */	\
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),				\
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -48),				\
+	BPF_MOV64_IMM(BPF_REG_3, sizeof(struct bpf_sock_tuple)),	\
+	BPF_MOV64_IMM(BPF_REG_4, 0),					\
+	BPF_MOV64_IMM(BPF_REG_5, 0),					\
+	BPF_EMIT_CALL(BPF_FUNC_sk_lookup_tcp)
+
 static struct bpf_test tests[] = {
 	{
 		"add+sub+mul",
@@ -856,7 +880,7 @@ static struct bpf_test tests[] = {
 				     BPF_FUNC_map_lookup_elem),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 2 },
+		.fixup_map_hash_8b = { 2 },
 		.errstr = "invalid indirect read from stack",
 		.result = REJECT,
 	},
@@ -1090,7 +1114,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "R0 invalid mem access 'map_value_or_null'",
 		.result = REJECT,
 	},
@@ -1107,7 +1131,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "misaligned value access",
 		.result = REJECT,
 		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
@@ -1127,7 +1151,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 1),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "R0 invalid mem access",
 		.errstr_unpriv = "R0 leaks addr",
 		.result = REJECT,
@@ -1217,7 +1241,7 @@ static struct bpf_test tests[] = {
 				     BPF_FUNC_map_delete_elem),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 24 },
+		.fixup_map_hash_8b = { 24 },
 		.errstr_unpriv = "R1 pointer comparison",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
@@ -1371,7 +1395,7 @@ static struct bpf_test tests[] = {
 				    offsetof(struct __sk_buff, pkt_type)),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 4 },
+		.fixup_map_hash_8b = { 4 },
 		.errstr = "different pointers",
 		.errstr_unpriv = "R1 pointer comparison",
 		.result = REJECT,
@@ -1394,7 +1418,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
 			BPF_JMP_IMM(BPF_JA, 0, 0, -12),
 		},
-		.fixup_map1 = { 6 },
+		.fixup_map_hash_8b = { 6 },
 		.errstr = "different pointers",
 		.errstr_unpriv = "R1 pointer comparison",
 		.result = REJECT,
@@ -1418,7 +1442,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
 			BPF_JMP_IMM(BPF_JA, 0, 0, -13),
 		},
-		.fixup_map1 = { 7 },
+		.fixup_map_hash_8b = { 7 },
 		.errstr = "different pointers",
 		.errstr_unpriv = "R1 pointer comparison",
 		.result = REJECT,
@@ -2555,7 +2579,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr_unpriv = "R4 leaks addr",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
@@ -2572,7 +2596,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "invalid indirect read from stack off -8+0 size 8",
 		.result = REJECT,
 	},
@@ -2707,6 +2731,137 @@ static struct bpf_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
 	{
+		"unpriv: spill/fill of different pointers stx - ctx and sock",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
+			/* struct bpf_sock *sock = bpf_sock_lookup(...); */
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			/* u64 foo; */
+			/* void *target = &foo; */
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+			/* if (skb == NULL) *target = sock; */
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+				BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0),
+			/* else *target = skb; */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+				BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+			/* struct __sk_buff *skb = *target; */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
+			/* skb->mark = 42; */
+			BPF_MOV64_IMM(BPF_REG_3, 42),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3,
+				    offsetof(struct __sk_buff, mark)),
+			/* if (sk) bpf_sk_release(sk) */
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+				BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "type=ctx expected=sock",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"unpriv: spill/fill of different pointers stx - leak sock",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
+			/* struct bpf_sock *sock = bpf_sock_lookup(...); */
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			/* u64 foo; */
+			/* void *target = &foo; */
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+			/* if (skb == NULL) *target = sock; */
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+				BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0),
+			/* else *target = skb; */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+				BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+			/* struct __sk_buff *skb = *target; */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
+			/* skb->mark = 42; */
+			BPF_MOV64_IMM(BPF_REG_3, 42),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		//.errstr = "same insn cannot be used with different pointers",
+		.errstr = "Unreleased reference",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"unpriv: spill/fill of different pointers stx - sock and ctx (read)",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
+			/* struct bpf_sock *sock = bpf_sock_lookup(...); */
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			/* u64 foo; */
+			/* void *target = &foo; */
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+			/* if (skb) *target = skb */
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+				BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+			/* else *target = sock */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+				BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0),
+			/* struct bpf_sock *sk = *target; */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
+			/* if (sk) u32 foo = sk->mark; bpf_sk_release(sk); */
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 2),
+				BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+					    offsetof(struct bpf_sock, mark)),
+				BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "same insn cannot be used with different pointers",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"unpriv: spill/fill of different pointers stx - sock and ctx (write)",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
+			/* struct bpf_sock *sock = bpf_sock_lookup(...); */
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			/* u64 foo; */
+			/* void *target = &foo; */
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+			/* if (skb) *target = skb */
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+				BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+			/* else *target = sock */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+				BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0),
+			/* struct bpf_sock *sk = *target; */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
+			/* if (sk) sk->mark = 42; bpf_sk_release(sk); */
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3),
+				BPF_MOV64_IMM(BPF_REG_3, 42),
+				BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3,
+					    offsetof(struct bpf_sock, mark)),
+				BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		//.errstr = "same insn cannot be used with different pointers",
+		.errstr = "cannot write into socket",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
 		"unpriv: spill/fill of different pointers ldx",
 		.insns = {
 			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
@@ -2743,7 +2898,7 @@ static struct bpf_test tests[] = {
 			BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr_unpriv = "R0 leaks addr",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
@@ -2783,7 +2938,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 1 },
+		.fixup_map_hash_8b = { 1 },
 		.errstr_unpriv = "R1 pointer comparison",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
@@ -2929,6 +3084,8 @@ static struct bpf_test tests[] = {
 		.fixup_prog1 = { 2 },
 		.result = ACCEPT,
 		.retval = 42,
+		/* Verifier rewrite for unpriv skips tail call here. */
+		.retval_unpriv = 2,
 	},
 	{
 		"stack pointer arithmetic",
@@ -3275,7 +3432,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_1, offsetof(struct __sk_buff, mark), 0),
 			BPF_EXIT_INSN(),
 		},
-		.errstr = "BPF_ST stores into R1 context is not allowed",
+		.errstr = "BPF_ST stores into R1 ctx is not allowed",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
@@ -3287,7 +3444,7 @@ static struct bpf_test tests[] = {
 				     BPF_REG_0, offsetof(struct __sk_buff, mark), 0),
 			BPF_EXIT_INSN(),
 		},
-		.errstr = "BPF_XADD stores into R1 context is not allowed",
+		.errstr = "BPF_XADD stores into R1 ctx is not allowed",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
@@ -3637,7 +3794,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
+		.errstr = "R3 pointer arithmetic on pkt_end",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
@@ -3922,7 +4079,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 5 },
+		.fixup_map_hash_8b = { 5 },
 		.result_unpriv = ACCEPT,
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_XDP,
@@ -3938,7 +4095,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 1 },
+		.fixup_map_hash_8b = { 1 },
 		.result = REJECT,
 		.errstr = "invalid access to packet",
 		.prog_type = BPF_PROG_TYPE_XDP,
@@ -3966,7 +4123,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 11 },
+		.fixup_map_hash_8b = { 11 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_XDP,
 	},
@@ -3988,7 +4145,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 7 },
+		.fixup_map_hash_8b = { 7 },
 		.result = REJECT,
 		.errstr = "invalid access to packet",
 		.prog_type = BPF_PROG_TYPE_XDP,
@@ -4010,7 +4167,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 6 },
+		.fixup_map_hash_8b = { 6 },
 		.result = REJECT,
 		.errstr = "invalid access to packet",
 		.prog_type = BPF_PROG_TYPE_XDP,
@@ -4033,7 +4190,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 5 },
+		.fixup_map_hash_8b = { 5 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
@@ -4048,7 +4205,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 1 },
+		.fixup_map_hash_8b = { 1 },
 		.result = REJECT,
 		.errstr = "invalid access to packet",
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
@@ -4076,7 +4233,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 11 },
+		.fixup_map_hash_8b = { 11 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
@@ -4098,7 +4255,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 7 },
+		.fixup_map_hash_8b = { 7 },
 		.result = REJECT,
 		.errstr = "invalid access to packet",
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
@@ -4120,7 +4277,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 6 },
+		.fixup_map_hash_8b = { 6 },
 		.result = REJECT,
 		.errstr = "invalid access to packet",
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
@@ -4391,6 +4548,85 @@ static struct bpf_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
 	{
+		"prevent map lookup in sockmap",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_sockmap = { 3 },
+		.result = REJECT,
+		.errstr = "cannot pass map_type 15 into func bpf_map_lookup_elem",
+		.prog_type = BPF_PROG_TYPE_SOCK_OPS,
+	},
+	{
+		"prevent map lookup in sockhash",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_sockhash = { 3 },
+		.result = REJECT,
+		.errstr = "cannot pass map_type 18 into func bpf_map_lookup_elem",
+		.prog_type = BPF_PROG_TYPE_SOCK_OPS,
+	},
+	{
+		"prevent map lookup in xskmap",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_xskmap = { 3 },
+		.result = REJECT,
+		.errstr = "cannot pass map_type 17 into func bpf_map_lookup_elem",
+		.prog_type = BPF_PROG_TYPE_XDP,
+	},
+	{
+		"prevent map lookup in stack trace",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_stacktrace = { 3 },
+		.result = REJECT,
+		.errstr = "cannot pass map_type 7 into func bpf_map_lookup_elem",
+		.prog_type = BPF_PROG_TYPE_PERF_EVENT,
+	},
+	{
+		"prevent map lookup in prog array",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_prog2 = { 3 },
+		.result = REJECT,
+		.errstr = "cannot pass map_type 3 into func bpf_map_lookup_elem",
+	},
+	{
 		"valid map access into an array with a constant",
 		.insns = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
@@ -4404,7 +4640,7 @@ static struct bpf_test tests[] = {
 				   offsetof(struct test_val, foo)),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr_unpriv = "R0 leaks addr",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
@@ -4426,7 +4662,7 @@ static struct bpf_test tests[] = {
 				   offsetof(struct test_val, foo)),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr_unpriv = "R0 leaks addr",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
@@ -4450,7 +4686,7 @@ static struct bpf_test tests[] = {
 				   offsetof(struct test_val, foo)),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr_unpriv = "R0 leaks addr",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
@@ -4478,7 +4714,7 @@ static struct bpf_test tests[] = {
 				   offsetof(struct test_val, foo)),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr_unpriv = "R0 leaks addr",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
@@ -4498,7 +4734,7 @@ static struct bpf_test tests[] = {
 				   offsetof(struct test_val, foo)),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "invalid access to map value, value_size=48 off=48 size=8",
 		.result = REJECT,
 	},
@@ -4519,7 +4755,7 @@ static struct bpf_test tests[] = {
 				   offsetof(struct test_val, foo)),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R0 min value is outside of the array range",
 		.result = REJECT,
 		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -4541,7 +4777,7 @@ static struct bpf_test tests[] = {
 				   offsetof(struct test_val, foo)),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R0 unbounded memory access, make sure to bounds check any array access into a map",
 		.result = REJECT,
 		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -4566,7 +4802,7 @@ static struct bpf_test tests[] = {
 				   offsetof(struct test_val, foo)),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr_unpriv = "R0 leaks addr",
 		.errstr = "R0 unbounded memory access",
 		.result_unpriv = REJECT,
@@ -4593,7 +4829,7 @@ static struct bpf_test tests[] = {
 				   offsetof(struct test_val, foo)),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr_unpriv = "R0 leaks addr",
 		.errstr = "invalid access to map value, value_size=48 off=44 size=8",
 		.result_unpriv = REJECT,
@@ -4623,12 +4859,185 @@ static struct bpf_test tests[] = {
 				    offsetof(struct test_val, foo)),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3, 11 },
+		.fixup_map_hash_48b = { 3, 11 },
 		.errstr = "R0 pointer += pointer",
 		.result = REJECT,
 		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 	},
 	{
+		"direct packet read test#1 for CGROUP_SKB",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+				    offsetof(struct __sk_buff, len)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+				    offsetof(struct __sk_buff, pkt_type)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct __sk_buff, queue_mapping)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
+				    offsetof(struct __sk_buff, protocol)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
+				    offsetof(struct __sk_buff, vlan_present)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.result_unpriv = REJECT,
+		.errstr_unpriv = "invalid bpf_context access off=76 size=4",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"direct packet read test#2 for CGROUP_SKB",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+				    offsetof(struct __sk_buff, vlan_tci)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+				    offsetof(struct __sk_buff, vlan_proto)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, priority)),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
+				    offsetof(struct __sk_buff, priority)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct __sk_buff,
+					     ingress_ifindex)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
+				    offsetof(struct __sk_buff, tc_index)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
+				    offsetof(struct __sk_buff, hash)),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"direct packet read test#3 for CGROUP_SKB",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[1])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[2])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[3])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[4])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
+				    offsetof(struct __sk_buff, napi_id)),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_4,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_5,
+				    offsetof(struct __sk_buff, cb[1])),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
+				    offsetof(struct __sk_buff, cb[2])),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_7,
+				    offsetof(struct __sk_buff, cb[3])),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_8,
+				    offsetof(struct __sk_buff, cb[4])),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"direct packet read test#4 for CGROUP_SKB",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, family)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, remote_ip4)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+				    offsetof(struct __sk_buff, local_ip4)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+				    offsetof(struct __sk_buff, remote_ip6[0])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+				    offsetof(struct __sk_buff, remote_ip6[1])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+				    offsetof(struct __sk_buff, remote_ip6[2])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+				    offsetof(struct __sk_buff, remote_ip6[3])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, local_ip6[0])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, local_ip6[1])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, local_ip6[2])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, local_ip6[3])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct __sk_buff, remote_port)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
+				    offsetof(struct __sk_buff, local_port)),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"invalid access of tc_classid for CGROUP_SKB",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, tc_classid)),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "invalid bpf_context access",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"invalid access of data_meta for CGROUP_SKB",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_meta)),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "invalid bpf_context access",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"invalid access of flow_keys for CGROUP_SKB",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, flow_keys)),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "invalid bpf_context access",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"invalid write access to napi_id for CGROUP_SKB",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
+				    offsetof(struct __sk_buff, napi_id)),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_9,
+				    offsetof(struct __sk_buff, napi_id)),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "invalid bpf_context access",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
 		"valid cgroup storage access",
 		.insns = {
 			BPF_MOV64_IMM(BPF_REG_2, 0),
@@ -4656,7 +5065,7 @@ static struct bpf_test tests[] = {
 			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 1 },
+		.fixup_map_hash_8b = { 1 },
 		.result = REJECT,
 		.errstr = "cannot pass map_type 1 into func bpf_get_local_storage",
 		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
@@ -4676,7 +5085,7 @@ static struct bpf_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
 	},
 	{
-		"invalid per-cgroup storage access 3",
+		"invalid cgroup storage access 3",
 		.insns = {
 			BPF_MOV64_IMM(BPF_REG_2, 0),
 			BPF_LD_MAP_FD(BPF_REG_1, 0),
@@ -4741,6 +5150,123 @@ static struct bpf_test tests[] = {
 		.fixup_cgroup_storage = { 1 },
 		.result = REJECT,
 		.errstr = "get_local_storage() doesn't support non-zero flags",
+		.errstr_unpriv = "R2 leaks addr into helper function",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"valid per-cpu cgroup storage access",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_get_local_storage),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_percpu_cgroup_storage = { 1 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"invalid per-cpu cgroup storage access 1",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_get_local_storage),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_hash_8b = { 1 },
+		.result = REJECT,
+		.errstr = "cannot pass map_type 1 into func bpf_get_local_storage",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"invalid per-cpu cgroup storage access 2",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 1),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_get_local_storage),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "fd 1 is not pointing to valid bpf_map",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"invalid per-cpu cgroup storage access 3",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_get_local_storage),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 256),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_percpu_cgroup_storage = { 1 },
+		.result = REJECT,
+		.errstr = "invalid access to map value, value_size=64 off=256 size=4",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"invalid per-cpu cgroup storage access 4",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_get_local_storage),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, -2),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_cgroup_storage = { 1 },
+		.result = REJECT,
+		.errstr = "invalid access to map value, value_size=64 off=-2 size=4",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"invalid per-cpu cgroup storage access 5",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 7),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_get_local_storage),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_percpu_cgroup_storage = { 1 },
+		.result = REJECT,
+		.errstr = "get_local_storage() doesn't support non-zero flags",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"invalid per-cpu cgroup storage access 6",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_get_local_storage),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_percpu_cgroup_storage = { 1 },
+		.result = REJECT,
+		.errstr = "get_local_storage() doesn't support non-zero flags",
+		.errstr_unpriv = "R2 leaks addr into helper function",
 		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
 	},
 	{
@@ -4758,7 +5284,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 4 },
+		.fixup_map_hash_8b = { 4 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS
 	},
@@ -4779,8 +5305,8 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 4 },
-		.errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
+		.fixup_map_hash_8b = { 4 },
+		.errstr = "R4 pointer arithmetic on map_value_or_null",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS
 	},
@@ -4800,8 +5326,8 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 4 },
-		.errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
+		.fixup_map_hash_8b = { 4 },
+		.errstr = "R4 pointer arithmetic on map_value_or_null",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS
 	},
@@ -4821,8 +5347,8 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 4 },
-		.errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
+		.fixup_map_hash_8b = { 4 },
+		.errstr = "R4 pointer arithmetic on map_value_or_null",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS
 	},
@@ -4847,7 +5373,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 4 },
+		.fixup_map_hash_8b = { 4 },
 		.result = REJECT,
 		.errstr = "R4 !read_ok",
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS
@@ -4875,7 +5401,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 4 },
+		.fixup_map_hash_8b = { 4 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS
 	},
@@ -4896,7 +5422,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R0 unbounded memory access",
 		.result = REJECT,
 		.errstr_unpriv = "R0 leaks addr",
@@ -5146,11 +5672,11 @@ static struct bpf_test tests[] = {
 				      offsetof(struct __sk_buff, cb[0])),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 2 },
+		.fixup_map_hash_8b = { 2 },
 		.errstr_unpriv = "R2 leaks addr into mem",
 		.result_unpriv = REJECT,
 		.result = REJECT,
-		.errstr = "BPF_XADD stores into R1 context is not allowed",
+		.errstr = "BPF_XADD stores into R1 ctx is not allowed",
 	},
 	{
 		"leak pointer into ctx 2",
@@ -5165,7 +5691,7 @@ static struct bpf_test tests[] = {
 		.errstr_unpriv = "R10 leaks addr into mem",
 		.result_unpriv = REJECT,
 		.result = REJECT,
-		.errstr = "BPF_XADD stores into R1 context is not allowed",
+		.errstr = "BPF_XADD stores into R1 ctx is not allowed",
 	},
 	{
 		"leak pointer into ctx 3",
@@ -5176,7 +5702,7 @@ static struct bpf_test tests[] = {
 				      offsetof(struct __sk_buff, cb[0])),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 1 },
+		.fixup_map_hash_8b = { 1 },
 		.errstr_unpriv = "R2 leaks addr into ctx",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
@@ -5198,7 +5724,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 4 },
+		.fixup_map_hash_8b = { 4 },
 		.errstr_unpriv = "R6 leaks addr into mem",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
@@ -5218,7 +5744,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -5237,7 +5763,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -5255,7 +5781,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_trace_printk),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "invalid access to map value, value_size=48 off=0 size=0",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5275,7 +5801,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "invalid access to map value, value_size=48 off=0 size=56",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5295,7 +5821,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R2 min value is negative",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5319,7 +5845,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -5340,7 +5866,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -5360,7 +5886,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_trace_printk),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "invalid access to map value, value_size=48 off=4 size=0",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5384,7 +5910,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "invalid access to map value, value_size=48 off=4 size=52",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5406,7 +5932,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R2 min value is negative",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5428,7 +5954,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R2 min value is negative",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5453,7 +5979,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -5475,7 +6001,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -5495,7 +6021,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_trace_printk),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R1 min value is outside of the array range",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5520,7 +6046,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "invalid access to map value, value_size=48 off=4 size=52",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5543,7 +6069,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R2 min value is negative",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5566,7 +6092,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R2 min value is negative",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5592,7 +6118,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -5615,7 +6141,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -5637,7 +6163,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_trace_printk),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R1 min value is outside of the array range",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5659,7 +6185,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R1 unbounded memory access",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5685,7 +6211,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "invalid access to map value, value_size=48 off=4 size=45",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5709,7 +6235,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -5732,7 +6258,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = REJECT,
 		.errstr = "R1 unbounded memory access",
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5756,7 +6282,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -5779,7 +6305,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = REJECT,
 		.errstr = "R1 unbounded memory access",
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5804,7 +6330,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -5828,7 +6354,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -5852,7 +6378,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = REJECT,
 		.errstr = "R1 min value is negative",
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -5877,7 +6403,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -5901,7 +6427,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -5925,12 +6451,262 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = REJECT,
 		.errstr = "R1 min value is negative",
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
 	{
+		"map access: known scalar += value_ptr",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+			BPF_MOV64_IMM(BPF_REG_1, 4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_array_48b = { 3 },
+		.result = ACCEPT,
+		.retval = 1,
+	},
+	{
+		"map access: value_ptr += known scalar",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+			BPF_MOV64_IMM(BPF_REG_1, 4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_array_48b = { 3 },
+		.result = ACCEPT,
+		.retval = 1,
+	},
+	{
+		"map access: unknown scalar += value_ptr",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_array_48b = { 3 },
+		.result = ACCEPT,
+		.retval = 1,
+	},
+	{
+		"map access: value_ptr += unknown scalar",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_array_48b = { 3 },
+		.result = ACCEPT,
+		.retval = 1,
+	},
+	{
+		"map access: value_ptr += value_ptr",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_0),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_array_48b = { 3 },
+		.result = REJECT,
+		.errstr = "R0 pointer += pointer prohibited",
+	},
+	{
+		"map access: known scalar -= value_ptr",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+			BPF_MOV64_IMM(BPF_REG_1, 4),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_array_48b = { 3 },
+		.result = REJECT,
+		.errstr = "R1 tried to subtract pointer from scalar",
+	},
+	{
+		"map access: value_ptr -= known scalar",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+			BPF_MOV64_IMM(BPF_REG_1, 4),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_array_48b = { 3 },
+		.result = REJECT,
+		.errstr = "R0 min value is outside of the array range",
+	},
+	{
+		"map access: value_ptr -= known scalar, 2",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+			BPF_MOV64_IMM(BPF_REG_1, 6),
+			BPF_MOV64_IMM(BPF_REG_2, 4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_2),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_array_48b = { 3 },
+		.result = ACCEPT,
+		.retval = 1,
+	},
+	{
+		"map access: unknown scalar -= value_ptr",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_array_48b = { 3 },
+		.result = REJECT,
+		.errstr = "R1 tried to subtract pointer from scalar",
+	},
+	{
+		"map access: value_ptr -= unknown scalar",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_array_48b = { 3 },
+		.result = REJECT,
+		.errstr = "R0 min value is negative",
+	},
+	{
+		"map access: value_ptr -= unknown scalar, 2",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf),
+			BPF_ALU64_IMM(BPF_OR, BPF_REG_1, 0x7),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_array_48b = { 3 },
+		.result = ACCEPT,
+		.retval = 1,
+	},
+	{
+		"map access: value_ptr -= value_ptr",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_0),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_array_48b = { 3 },
+		.result = REJECT,
+		.errstr = "R0 invalid mem access 'inv'",
+		.errstr_unpriv = "R0 pointer -= pointer prohibited",
+	},
+	{
 		"map lookup helper access to map",
 		.insns = {
 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
@@ -5944,7 +6720,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map3 = { 3, 8 },
+		.fixup_map_hash_16b = { 3, 8 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -5964,7 +6740,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_map_update_elem),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map3 = { 3, 10 },
+		.fixup_map_hash_16b = { 3, 10 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -5984,8 +6760,8 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_map_update_elem),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
-		.fixup_map3 = { 10 },
+		.fixup_map_hash_8b = { 3 },
+		.fixup_map_hash_16b = { 10 },
 		.result = REJECT,
 		.errstr = "invalid access to map value, value_size=8 off=0 size=16",
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -6006,7 +6782,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map3 = { 3, 9 },
+		.fixup_map_hash_16b = { 3, 9 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -6026,7 +6802,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map3 = { 3, 9 },
+		.fixup_map_hash_16b = { 3, 9 },
 		.result = REJECT,
 		.errstr = "invalid access to map value, value_size=16 off=12 size=8",
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -6046,7 +6822,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map3 = { 3, 9 },
+		.fixup_map_hash_16b = { 3, 9 },
 		.result = REJECT,
 		.errstr = "invalid access to map value, value_size=16 off=-4 size=8",
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -6068,7 +6844,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map3 = { 3, 10 },
+		.fixup_map_hash_16b = { 3, 10 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -6089,7 +6865,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map3 = { 3, 10 },
+		.fixup_map_hash_16b = { 3, 10 },
 		.result = REJECT,
 		.errstr = "invalid access to map value, value_size=16 off=12 size=8",
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -6110,7 +6886,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map3 = { 3, 10 },
+		.fixup_map_hash_16b = { 3, 10 },
 		.result = REJECT,
 		.errstr = "invalid access to map value, value_size=16 off=-4 size=8",
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -6133,7 +6909,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map3 = { 3, 11 },
+		.fixup_map_hash_16b = { 3, 11 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -6153,7 +6929,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map3 = { 3, 10 },
+		.fixup_map_hash_16b = { 3, 10 },
 		.result = REJECT,
 		.errstr = "R2 unbounded memory access, make sure to bounds check any array access into a map",
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -6176,7 +6952,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map3 = { 3, 11 },
+		.fixup_map_hash_16b = { 3, 11 },
 		.result = REJECT,
 		.errstr = "invalid access to map value, value_size=16 off=9 size=8",
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -6198,7 +6974,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr_unpriv = "R0 leaks addr",
 		.result = ACCEPT,
 		.result_unpriv = REJECT,
@@ -6219,7 +6995,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr_unpriv = "R0 leaks addr",
 		.result = ACCEPT,
 		.result_unpriv = REJECT,
@@ -6236,7 +7012,7 @@ static struct bpf_test tests[] = {
 			BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr_unpriv = "R1 !read_ok",
 		.errstr = "R1 !read_ok",
 		.result = REJECT,
@@ -6270,7 +7046,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_7, -4, 24),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr_unpriv = "R0 leaks addr",
 		.result = ACCEPT,
 		.result_unpriv = REJECT,
@@ -6298,7 +7074,7 @@ static struct bpf_test tests[] = {
 			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 4),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr_unpriv = "R0 leaks addr",
 		.result = ACCEPT,
 		.result_unpriv = REJECT,
@@ -6317,7 +7093,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R0 bitwise operator &= on pointer",
 		.result = REJECT,
 	},
@@ -6334,7 +7110,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R0 32-bit pointer arithmetic prohibited",
 		.result = REJECT,
 	},
@@ -6351,7 +7127,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R0 pointer arithmetic with /= operator",
 		.result = REJECT,
 	},
@@ -6368,7 +7144,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 		.errstr = "invalid mem access 'inv'",
 		.result = REJECT,
@@ -6392,7 +7168,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R0 invalid mem access 'inv'",
 		.result = REJECT,
 	},
@@ -6415,7 +7191,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr_unpriv = "R0 leaks addr",
 		.result = ACCEPT,
 		.result_unpriv = REJECT,
@@ -6661,7 +7437,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -6687,7 +7463,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "invalid access to map value, value_size=48 off=0 size=49",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -6715,7 +7491,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -6742,7 +7518,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R1 min value is outside of the array range",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -6814,7 +7590,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_csum_diff),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
@@ -6839,7 +7615,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_csum_diff),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
@@ -6862,7 +7638,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_csum_diff),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
@@ -6943,7 +7719,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -6964,7 +7740,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -6984,7 +7760,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -7059,7 +7835,7 @@ static struct bpf_test tests[] = {
 				   offsetof(struct test_val, foo)),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R0 max value is outside of the array range",
 		.result = REJECT,
 		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -7089,7 +7865,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_REG(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr = "R0 max value is outside of the array range",
 		.result = REJECT,
 		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -7137,7 +7913,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map_in_map = { 3 },
-		.errstr = "R1 pointer arithmetic on CONST_PTR_TO_MAP prohibited",
+		.errstr = "R1 pointer arithmetic on map_ptr prohibited",
 		.result = REJECT,
 	},
 	{
@@ -7442,7 +8218,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "unbounded min value",
 		.result = REJECT,
 	},
@@ -7466,7 +8242,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "unbounded min value",
 		.result = REJECT,
 	},
@@ -7492,7 +8268,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "unbounded min value",
 		.result = REJECT,
 	},
@@ -7517,7 +8293,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "unbounded min value",
 		.result = REJECT,
 	},
@@ -7541,7 +8317,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.result = ACCEPT,
 	},
 	{
@@ -7565,7 +8341,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "unbounded min value",
 		.result = REJECT,
 	},
@@ -7611,7 +8387,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.result = ACCEPT,
 	},
 	{
@@ -7636,7 +8412,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "unbounded min value",
 		.result = REJECT,
 	},
@@ -7662,7 +8438,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.result = ACCEPT,
 	},
 	{
@@ -7687,7 +8463,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "unbounded min value",
 		.result = REJECT,
 	},
@@ -7714,7 +8490,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "unbounded min value",
 		.result = REJECT,
 	},
@@ -7740,7 +8516,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "unbounded min value",
 		.result = REJECT,
 	},
@@ -7769,7 +8545,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "unbounded min value",
 		.result = REJECT,
 	},
@@ -7799,7 +8575,7 @@ static struct bpf_test tests[] = {
 			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, -3),
 			BPF_JMP_IMM(BPF_JA, 0, 0, -7),
 		},
-		.fixup_map1 = { 4 },
+		.fixup_map_hash_8b = { 4 },
 		.errstr = "R0 invalid mem access 'inv'",
 		.result = REJECT,
 	},
@@ -7827,7 +8603,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "unbounded min value",
 		.result = REJECT,
 		.result_unpriv = REJECT,
@@ -7854,7 +8630,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "R0 max value is outside of the array range",
 		.result = REJECT,
 	},
@@ -7879,7 +8655,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
 		.result = REJECT,
 	},
@@ -7905,7 +8681,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.result = ACCEPT
 	},
 	{
@@ -7930,7 +8706,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "map_value pointer and 4294967295",
 		.result = REJECT
 	},
@@ -7956,7 +8732,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "R0 min value is outside of the array range",
 		.result = REJECT
 	},
@@ -7980,7 +8756,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 4 },
+		.fixup_map_hash_8b = { 4 },
 		.errstr = "value_size=8 off=1073741825",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
@@ -8005,7 +8781,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 4 },
+		.fixup_map_hash_8b = { 4 },
 		.errstr = "value 1073741823",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
@@ -8041,7 +8817,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.result = ACCEPT
 	},
 	{
@@ -8080,7 +8856,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		/* not actually fully unbounded, but the bound is very high */
 		.errstr = "R0 unbounded memory access",
 		.result = REJECT
@@ -8123,7 +8899,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		/* not actually fully unbounded, but the bound is very high */
 		.errstr = "R0 unbounded memory access",
 		.result = REJECT
@@ -8152,7 +8928,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.result = ACCEPT
 	},
 	{
@@ -8179,7 +8955,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "R0 max value is outside of the array range",
 		.result = REJECT
 	},
@@ -8209,7 +8985,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "R0 unbounded memory access",
 		.result = REJECT
 	},
@@ -8229,7 +9005,7 @@ static struct bpf_test tests[] = {
 			BPF_JMP_A(0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "map_value pointer and 2147483646",
 		.result = REJECT
 	},
@@ -8251,7 +9027,7 @@ static struct bpf_test tests[] = {
 			BPF_JMP_A(0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "pointer offset 1073741822",
 		.result = REJECT
 	},
@@ -8272,7 +9048,7 @@ static struct bpf_test tests[] = {
 			BPF_JMP_A(0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "pointer offset -1073741822",
 		.result = REJECT
 	},
@@ -8294,7 +9070,7 @@ static struct bpf_test tests[] = {
 			BPF_JMP_A(0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "map_value pointer and 1000000000000",
 		.result = REJECT
 	},
@@ -8314,7 +9090,7 @@ static struct bpf_test tests[] = {
 			BPF_JMP_A(0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.result = ACCEPT,
 		.retval = POINTER_VALUE,
 		.result_unpriv = REJECT,
@@ -8335,7 +9111,7 @@ static struct bpf_test tests[] = {
 			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.result = ACCEPT,
 		.retval = POINTER_VALUE,
 		.result_unpriv = REJECT,
@@ -8403,7 +9179,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 5 },
+		.fixup_map_hash_8b = { 5 },
 		.errstr = "variable stack read R2",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_LWT_IN,
@@ -8484,7 +9260,7 @@ static struct bpf_test tests[] = {
 				   offsetof(struct test_val, foo)),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 3 },
+		.fixup_map_hash_48b = { 3 },
 		.errstr_unpriv = "R0 leaks addr",
 		.errstr = "R0 unbounded memory access",
 		.result_unpriv = REJECT,
@@ -8811,7 +9587,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
+		.errstr = "R3 pointer arithmetic on pkt_end",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_XDP,
 	},
@@ -8830,7 +9606,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
+		.errstr = "R3 pointer arithmetic on pkt_end",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_XDP,
 	},
@@ -10018,7 +10794,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
-		.fixup_map1 = { 16 },
+		.fixup_map_hash_8b = { 16 },
 		.result = REJECT,
 		.errstr = "R0 min value is outside of the array range",
 	},
@@ -10969,7 +11745,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(), /* return 0 */
 		},
 		.prog_type = BPF_PROG_TYPE_XDP,
-		.fixup_map1 = { 23 },
+		.fixup_map_hash_8b = { 23 },
 		.result = ACCEPT,
 	},
 	{
@@ -11024,7 +11800,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(), /* return 1 */
 		},
 		.prog_type = BPF_PROG_TYPE_XDP,
-		.fixup_map1 = { 23 },
+		.fixup_map_hash_8b = { 23 },
 		.result = ACCEPT,
 	},
 	{
@@ -11079,7 +11855,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(), /* return 1 */
 		},
 		.prog_type = BPF_PROG_TYPE_XDP,
-		.fixup_map1 = { 23 },
+		.fixup_map_hash_8b = { 23 },
 		.result = REJECT,
 		.errstr = "invalid read from stack off -16+0 size 8",
 	},
@@ -11151,7 +11927,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
-		.fixup_map1 = { 12, 22 },
+		.fixup_map_hash_8b = { 12, 22 },
 		.result = REJECT,
 		.errstr = "invalid access to map value, value_size=8 off=2 size=8",
 	},
@@ -11223,7 +11999,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
-		.fixup_map1 = { 12, 22 },
+		.fixup_map_hash_8b = { 12, 22 },
 		.result = ACCEPT,
 	},
 	{
@@ -11294,7 +12070,7 @@ static struct bpf_test tests[] = {
 			BPF_JMP_IMM(BPF_JA, 0, 0, -8),
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
-		.fixup_map1 = { 12, 22 },
+		.fixup_map_hash_8b = { 12, 22 },
 		.result = REJECT,
 		.errstr = "invalid access to map value, value_size=8 off=2 size=8",
 	},
@@ -11366,7 +12142,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
-		.fixup_map1 = { 12, 22 },
+		.fixup_map_hash_8b = { 12, 22 },
 		.result = ACCEPT,
 	},
 	{
@@ -11437,7 +12213,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
-		.fixup_map1 = { 12, 22 },
+		.fixup_map_hash_8b = { 12, 22 },
 		.result = REJECT,
 		.errstr = "R0 invalid mem access 'inv'",
 	},
@@ -11782,7 +12558,7 @@ static struct bpf_test tests[] = {
 			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 13 },
+		.fixup_map_hash_8b = { 13 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_XDP,
 	},
@@ -11809,7 +12585,7 @@ static struct bpf_test tests[] = {
 				     BPF_FUNC_map_lookup_elem),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 6 },
+		.fixup_map_hash_48b = { 6 },
 		.errstr = "invalid indirect read from stack off -8+0 size 8",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_XDP,
@@ -11841,8 +12617,8 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
-		.fixup_map2 = { 13 },
-		.fixup_map4 = { 16 },
+		.fixup_map_hash_48b = { 13 },
+		.fixup_map_array_48b = { 16 },
 		.result = ACCEPT,
 		.retval = 1,
 	},
@@ -11874,7 +12650,7 @@ static struct bpf_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.fixup_map_in_map = { 16 },
-		.fixup_map4 = { 13 },
+		.fixup_map_array_48b = { 13 },
 		.result = REJECT,
 		.errstr = "R0 invalid mem access 'map_ptr'",
 	},
@@ -11942,7 +12718,7 @@ static struct bpf_test tests[] = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_6, 0, 0xdead),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "R6 invalid mem access 'inv'",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -11966,7 +12742,7 @@ static struct bpf_test tests[] = {
 			BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -16),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.errstr = "invalid read from stack off -16+0 size 8",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -12088,7 +12864,7 @@ static struct bpf_test tests[] = {
 			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 3),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map1 = { 3 },
+		.fixup_map_hash_8b = { 3 },
 		.result = REJECT,
 		.errstr = "misaligned value access off",
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
@@ -12114,7 +12890,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.result = REJECT,
-		.errstr = "BPF_XADD stores into R2 packet",
+		.errstr = "BPF_XADD stores into R2 pkt is not allowed",
 		.prog_type = BPF_PROG_TYPE_XDP,
 	},
 	{
@@ -12198,7 +12974,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_get_stack),
 			BPF_EXIT_INSN(),
 		},
-		.fixup_map2 = { 4 },
+		.fixup_map_hash_48b = { 4 },
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -12442,6 +13218,214 @@ static struct bpf_test tests[] = {
 		.result = ACCEPT,
 	},
 	{
+		"reference tracking: leak potential reference",
+		.insns = {
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), /* leak reference */
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "Unreleased reference",
+		.result = REJECT,
+	},
+	{
+		"reference tracking: leak potential reference on stack",
+		.insns = {
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "Unreleased reference",
+		.result = REJECT,
+	},
+	{
+		"reference tracking: leak potential reference on stack 2",
+		.insns = {
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "Unreleased reference",
+		.result = REJECT,
+	},
+	{
+		"reference tracking: zero potential reference",
+		.insns = {
+			BPF_SK_LOOKUP,
+			BPF_MOV64_IMM(BPF_REG_0, 0), /* leak reference */
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "Unreleased reference",
+		.result = REJECT,
+	},
+	{
+		"reference tracking: copy and zero potential references",
+		.insns = {
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_7, 0), /* leak reference */
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "Unreleased reference",
+		.result = REJECT,
+	},
+	{
+		"reference tracking: release reference without check",
+		.insns = {
+			BPF_SK_LOOKUP,
+			/* reference in r0 may be NULL */
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "type=sock_or_null expected=sock",
+		.result = REJECT,
+	},
+	{
+		"reference tracking: release reference",
+		.insns = {
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+	},
+	{
+		"reference tracking: release reference 2",
+		.insns = {
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+	},
+	{
+		"reference tracking: release reference twice",
+		.insns = {
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "type=inv expected=sock",
+		.result = REJECT,
+	},
+	{
+		"reference tracking: release reference twice inside branch",
+		.insns = {
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), /* goto end */
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "type=inv expected=sock",
+		.result = REJECT,
+	},
+	{
+		"reference tracking: alloc, check, free in one subbranch",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 16),
+			/* if (offsetof(skb, mark) > data_len) exit; */
+			BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_2,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_SK_LOOKUP,
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 1), /* mark == 0? */
+			/* Leak reference in R0 */
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), /* sk NULL? */
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "Unreleased reference",
+		.result = REJECT,
+	},
+	{
+		"reference tracking: alloc, check, free in both subbranches",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 16),
+			/* if (offsetof(skb, mark) > data_len) exit; */
+			BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_2,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_SK_LOOKUP,
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 4), /* mark == 0? */
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), /* sk NULL? */
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), /* sk NULL? */
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+	},
+	{
+		"reference tracking in call: free reference in subprog",
+		.insns = {
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), /* unchecked reference */
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 1),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+	},
+	{
 		"pass modified ctx pointer to helper, 1",
 		.insns = {
 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -612),
@@ -12511,6 +13495,407 @@ static struct bpf_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.result = ACCEPT,
 	},
+	{
+		"reference tracking in call: free reference in subprog and outside",
+		.insns = {
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), /* unchecked reference */
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 1),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "type=inv expected=sock",
+		.result = REJECT,
+	},
+	{
+		"reference tracking in call: alloc & leak reference in subprog",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_4),
+			BPF_SK_LOOKUP,
+			/* spill unchecked sk_ptr into stack of caller */
+			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "Unreleased reference",
+		.result = REJECT,
+	},
+	{
+		"reference tracking in call: alloc in subprog, release outside",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			BPF_SK_LOOKUP,
+			BPF_EXIT_INSN(), /* return sk */
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.retval = POINTER_VALUE,
+		.result = ACCEPT,
+	},
+	{
+		"reference tracking in call: sk_ptr leak into caller stack",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5),
+			/* spill unchecked sk_ptr into stack of caller */
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_5, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 2 */
+			BPF_SK_LOOKUP,
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "Unreleased reference",
+		.result = REJECT,
+	},
+	{
+		"reference tracking in call: sk_ptr spill into caller stack",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8),
+			/* spill unchecked sk_ptr into stack of caller */
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_5, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+			/* now the sk_ptr is verified, free the reference */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_4, 0),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+
+			/* subprog 2 */
+			BPF_SK_LOOKUP,
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+	},
+	{
+		"reference tracking: allow LD_ABS",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_LD_ABS(BPF_B, 0),
+			BPF_LD_ABS(BPF_H, 0),
+			BPF_LD_ABS(BPF_W, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+	},
+	{
+		"reference tracking: forbid LD_ABS while holding reference",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_SK_LOOKUP,
+			BPF_LD_ABS(BPF_B, 0),
+			BPF_LD_ABS(BPF_H, 0),
+			BPF_LD_ABS(BPF_W, 0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "BPF_LD_[ABS|IND] cannot be mixed with socket references",
+		.result = REJECT,
+	},
+	{
+		"reference tracking: allow LD_IND",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_MOV64_IMM(BPF_REG_7, 1),
+			BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 1,
+	},
+	{
+		"reference tracking: forbid LD_IND while holding reference",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_7, 1),
+			BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_4),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "BPF_LD_[ABS|IND] cannot be mixed with socket references",
+		.result = REJECT,
+	},
+	{
+		"reference tracking: check reference or tail call",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_1),
+			BPF_SK_LOOKUP,
+			/* if (sk) bpf_sk_release() */
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 7),
+			/* bpf_tail_call() */
+			BPF_MOV64_IMM(BPF_REG_3, 2),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_tail_call),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_prog1 = { 17 },
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+	},
+	{
+		"reference tracking: release reference then tail call",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_1),
+			BPF_SK_LOOKUP,
+			/* if (sk) bpf_sk_release() */
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			/* bpf_tail_call() */
+			BPF_MOV64_IMM(BPF_REG_3, 2),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_tail_call),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_prog1 = { 18 },
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+	},
+	{
+		"reference tracking: leak possible reference over tail call",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_1),
+			/* Look up socket and store in REG_6 */
+			BPF_SK_LOOKUP,
+			/* bpf_tail_call() */
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_3, 2),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_tail_call),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			/* if (sk) bpf_sk_release() */
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_prog1 = { 16 },
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "tail_call would lead to reference leak",
+		.result = REJECT,
+	},
+	{
+		"reference tracking: leak checked reference over tail call",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_1),
+			/* Look up socket and store in REG_6 */
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+			/* if (!sk) goto end */
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+			/* bpf_tail_call() */
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_tail_call),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_prog1 = { 17 },
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "tail_call would lead to reference leak",
+		.result = REJECT,
+	},
+	{
+		"reference tracking: mangle and release sock_or_null",
+		.insns = {
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 5),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "R1 pointer arithmetic on sock_or_null prohibited",
+		.result = REJECT,
+	},
+	{
+		"reference tracking: mangle and release sock",
+		.insns = {
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 5),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "R1 pointer arithmetic on sock prohibited",
+		.result = REJECT,
+	},
+	{
+		"reference tracking: access member",
+		.insns = {
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+	},
+	{
+		"reference tracking: write to member",
+		.insns = {
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_LD_IMM64(BPF_REG_2, 42),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_2,
+				    offsetof(struct bpf_sock, mark)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_LD_IMM64(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "cannot write into socket",
+		.result = REJECT,
+	},
+	{
+		"reference tracking: invalid 64-bit access of member",
+		.insns = {
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "invalid bpf_sock access off=0 size=8",
+		.result = REJECT,
+	},
+	{
+		"reference tracking: access after release",
+		.insns = {
+			BPF_SK_LOOKUP,
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "!read_ok",
+		.result = REJECT,
+	},
+	{
+		"reference tracking: direct access for lookup",
+		.insns = {
+			/* Check that the packet is at least 64B long */
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 64),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 9),
+			/* sk = sk_lookup_tcp(ctx, skb->data, ...) */
+			BPF_MOV64_IMM(BPF_REG_3, sizeof(struct bpf_sock_tuple)),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_EMIT_CALL(BPF_FUNC_sk_lookup_tcp),
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_EMIT_CALL(BPF_FUNC_sk_release),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+	},
 };
 
 static int probe_filter_length(const struct bpf_insn *fp)
@@ -12536,18 +13921,18 @@ static int create_map(uint32_t type, uint32_t size_key,
 	return fd;
 }
 
-static int create_prog_dummy1(void)
+static int create_prog_dummy1(enum bpf_map_type prog_type)
 {
 	struct bpf_insn prog[] = {
 		BPF_MOV64_IMM(BPF_REG_0, 42),
 		BPF_EXIT_INSN(),
 	};
 
-	return bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog,
+	return bpf_load_program(prog_type, prog,
 				ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
 }
 
-static int create_prog_dummy2(int mfd, int idx)
+static int create_prog_dummy2(enum bpf_map_type prog_type, int mfd, int idx)
 {
 	struct bpf_insn prog[] = {
 		BPF_MOV64_IMM(BPF_REG_3, idx),
@@ -12558,11 +13943,12 @@ static int create_prog_dummy2(int mfd, int idx)
 		BPF_EXIT_INSN(),
 	};
 
-	return bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog,
+	return bpf_load_program(prog_type, prog,
 				ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
 }
 
-static int create_prog_array(uint32_t max_elem, int p1key)
+static int create_prog_array(enum bpf_map_type prog_type, uint32_t max_elem,
+			     int p1key)
 {
 	int p2key = 1;
 	int mfd, p1fd, p2fd;
@@ -12574,8 +13960,8 @@ static int create_prog_array(uint32_t max_elem, int p1key)
 		return -1;
 	}
 
-	p1fd = create_prog_dummy1();
-	p2fd = create_prog_dummy2(mfd, p2key);
+	p1fd = create_prog_dummy1(prog_type);
+	p2fd = create_prog_dummy2(prog_type, mfd, p2key);
 	if (p1fd < 0 || p2fd < 0)
 		goto out;
 	if (bpf_map_update_elem(mfd, &p1key, &p1fd, BPF_ANY) < 0)
@@ -12615,32 +14001,39 @@ static int create_map_in_map(void)
 	return outer_map_fd;
 }
 
-static int create_cgroup_storage(void)
+static int create_cgroup_storage(bool percpu)
 {
+	enum bpf_map_type type = percpu ? BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE :
+		BPF_MAP_TYPE_CGROUP_STORAGE;
 	int fd;
 
-	fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE,
-			    sizeof(struct bpf_cgroup_storage_key),
+	fd = bpf_create_map(type, sizeof(struct bpf_cgroup_storage_key),
 			    TEST_DATA_LEN, 0, 0);
 	if (fd < 0)
-		printf("Failed to create array '%s'!\n", strerror(errno));
+		printf("Failed to create cgroup storage '%s'!\n",
+		       strerror(errno));
 
 	return fd;
 }
 
 static char bpf_vlog[UINT_MAX >> 8];
 
-static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog,
-			  int *map_fds)
+static void do_test_fixup(struct bpf_test *test, enum bpf_map_type prog_type,
+			  struct bpf_insn *prog, int *map_fds)
 {
-	int *fixup_map1 = test->fixup_map1;
-	int *fixup_map2 = test->fixup_map2;
-	int *fixup_map3 = test->fixup_map3;
-	int *fixup_map4 = test->fixup_map4;
+	int *fixup_map_hash_8b = test->fixup_map_hash_8b;
+	int *fixup_map_hash_48b = test->fixup_map_hash_48b;
+	int *fixup_map_hash_16b = test->fixup_map_hash_16b;
+	int *fixup_map_array_48b = test->fixup_map_array_48b;
+	int *fixup_map_sockmap = test->fixup_map_sockmap;
+	int *fixup_map_sockhash = test->fixup_map_sockhash;
+	int *fixup_map_xskmap = test->fixup_map_xskmap;
+	int *fixup_map_stacktrace = test->fixup_map_stacktrace;
 	int *fixup_prog1 = test->fixup_prog1;
 	int *fixup_prog2 = test->fixup_prog2;
 	int *fixup_map_in_map = test->fixup_map_in_map;
 	int *fixup_cgroup_storage = test->fixup_cgroup_storage;
+	int *fixup_percpu_cgroup_storage = test->fixup_percpu_cgroup_storage;
 
 	if (test->fill_helper)
 		test->fill_helper(test);
@@ -12649,44 +14042,44 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog,
 	 * for verifier and not do a runtime lookup, so the only thing
 	 * that really matters is value size in this case.
 	 */
-	if (*fixup_map1) {
+	if (*fixup_map_hash_8b) {
 		map_fds[0] = create_map(BPF_MAP_TYPE_HASH, sizeof(long long),
 					sizeof(long long), 1);
 		do {
-			prog[*fixup_map1].imm = map_fds[0];
-			fixup_map1++;
-		} while (*fixup_map1);
+			prog[*fixup_map_hash_8b].imm = map_fds[0];
+			fixup_map_hash_8b++;
+		} while (*fixup_map_hash_8b);
 	}
 
-	if (*fixup_map2) {
+	if (*fixup_map_hash_48b) {
 		map_fds[1] = create_map(BPF_MAP_TYPE_HASH, sizeof(long long),
 					sizeof(struct test_val), 1);
 		do {
-			prog[*fixup_map2].imm = map_fds[1];
-			fixup_map2++;
-		} while (*fixup_map2);
+			prog[*fixup_map_hash_48b].imm = map_fds[1];
+			fixup_map_hash_48b++;
+		} while (*fixup_map_hash_48b);
 	}
 
-	if (*fixup_map3) {
+	if (*fixup_map_hash_16b) {
 		map_fds[2] = create_map(BPF_MAP_TYPE_HASH, sizeof(long long),
 					sizeof(struct other_val), 1);
 		do {
-			prog[*fixup_map3].imm = map_fds[2];
-			fixup_map3++;
-		} while (*fixup_map3);
+			prog[*fixup_map_hash_16b].imm = map_fds[2];
+			fixup_map_hash_16b++;
+		} while (*fixup_map_hash_16b);
 	}
 
-	if (*fixup_map4) {
+	if (*fixup_map_array_48b) {
 		map_fds[3] = create_map(BPF_MAP_TYPE_ARRAY, sizeof(int),
 					sizeof(struct test_val), 1);
 		do {
-			prog[*fixup_map4].imm = map_fds[3];
-			fixup_map4++;
-		} while (*fixup_map4);
+			prog[*fixup_map_array_48b].imm = map_fds[3];
+			fixup_map_array_48b++;
+		} while (*fixup_map_array_48b);
 	}
 
 	if (*fixup_prog1) {
-		map_fds[4] = create_prog_array(4, 0);
+		map_fds[4] = create_prog_array(prog_type, 4, 0);
 		do {
 			prog[*fixup_prog1].imm = map_fds[4];
 			fixup_prog1++;
@@ -12694,7 +14087,7 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog,
 	}
 
 	if (*fixup_prog2) {
-		map_fds[5] = create_prog_array(8, 7);
+		map_fds[5] = create_prog_array(prog_type, 8, 7);
 		do {
 			prog[*fixup_prog2].imm = map_fds[5];
 			fixup_prog2++;
@@ -12710,12 +14103,79 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog,
 	}
 
 	if (*fixup_cgroup_storage) {
-		map_fds[7] = create_cgroup_storage();
+		map_fds[7] = create_cgroup_storage(false);
 		do {
 			prog[*fixup_cgroup_storage].imm = map_fds[7];
 			fixup_cgroup_storage++;
 		} while (*fixup_cgroup_storage);
 	}
+
+	if (*fixup_percpu_cgroup_storage) {
+		map_fds[8] = create_cgroup_storage(true);
+		do {
+			prog[*fixup_percpu_cgroup_storage].imm = map_fds[8];
+			fixup_percpu_cgroup_storage++;
+		} while (*fixup_percpu_cgroup_storage);
+	}
+	if (*fixup_map_sockmap) {
+		map_fds[9] = create_map(BPF_MAP_TYPE_SOCKMAP, sizeof(int),
+					sizeof(int), 1);
+		do {
+			prog[*fixup_map_sockmap].imm = map_fds[9];
+			fixup_map_sockmap++;
+		} while (*fixup_map_sockmap);
+	}
+	if (*fixup_map_sockhash) {
+		map_fds[10] = create_map(BPF_MAP_TYPE_SOCKHASH, sizeof(int),
+					sizeof(int), 1);
+		do {
+			prog[*fixup_map_sockhash].imm = map_fds[10];
+			fixup_map_sockhash++;
+		} while (*fixup_map_sockhash);
+	}
+	if (*fixup_map_xskmap) {
+		map_fds[11] = create_map(BPF_MAP_TYPE_XSKMAP, sizeof(int),
+					sizeof(int), 1);
+		do {
+			prog[*fixup_map_xskmap].imm = map_fds[11];
+			fixup_map_xskmap++;
+		} while (*fixup_map_xskmap);
+	}
+	if (*fixup_map_stacktrace) {
+		map_fds[12] = create_map(BPF_MAP_TYPE_STACK_TRACE, sizeof(u32),
+					 sizeof(u64), 1);
+		do {
+			prog[*fixup_map_stacktrace].imm = map_fds[12];
+			fixup_map_stacktrace++;
+		} while (fixup_map_stacktrace);
+	}
+}
+
+static int set_admin(bool admin)
+{
+	cap_t caps;
+	const cap_value_t cap_val = CAP_SYS_ADMIN;
+	int ret = -1;
+
+	caps = cap_get_proc();
+	if (!caps) {
+		perror("cap_get_proc");
+		return -1;
+	}
+	if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_val,
+				admin ? CAP_SET : CAP_CLEAR)) {
+		perror("cap_set_flag");
+		goto out;
+	}
+	if (cap_set_proc(caps)) {
+		perror("cap_set_proc");
+		goto out;
+	}
+	ret = 0;
+out:
+	if (cap_free(caps))
+		perror("cap_free");
+	return ret;
 }
 
 static void do_test_single(struct bpf_test *test, bool unpriv,
@@ -12726,23 +14186,28 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 	struct bpf_insn *prog = test->insns;
 	int map_fds[MAX_NR_MAPS];
 	const char *expected_err;
+	uint32_t expected_val;
 	uint32_t retval;
 	int i, err;
 
 	for (i = 0; i < MAX_NR_MAPS; i++)
 		map_fds[i] = -1;
 
-	do_test_fixup(test, prog, map_fds);
+	if (!prog_type)
+		prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+	do_test_fixup(test, prog_type, prog, map_fds);
 	prog_len = probe_filter_length(prog);
 
-	fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER,
-				     prog, prog_len, test->flags & F_LOAD_WITH_STRICT_ALIGNMENT,
+	fd_prog = bpf_verify_program(prog_type, prog, prog_len,
+				     test->flags & F_LOAD_WITH_STRICT_ALIGNMENT,
 				     "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 1);
 
 	expected_ret = unpriv && test->result_unpriv != UNDEF ?
 		       test->result_unpriv : test->result;
 	expected_err = unpriv && test->errstr_unpriv ?
 		       test->errstr_unpriv : test->errstr;
+	expected_val = unpriv && test->retval_unpriv ?
+		       test->retval_unpriv : test->retval;
 
 	reject_from_alignment = fd_prog < 0 &&
 				(test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS) &&
@@ -12776,16 +14241,20 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 		__u8 tmp[TEST_DATA_LEN << 2];
 		__u32 size_tmp = sizeof(tmp);
 
+		if (unpriv)
+			set_admin(true);
 		err = bpf_prog_test_run(fd_prog, 1, test->data,
 					sizeof(test->data), tmp, &size_tmp,
 					&retval, NULL);
+		if (unpriv)
+			set_admin(false);
 		if (err && errno != 524/*ENOTSUPP*/ && errno != EPERM) {
 			printf("Unexpected bpf_prog_test_run error\n");
 			goto fail_log;
 		}
-		if (!err && retval != test->retval &&
-		    test->retval != POINTER_VALUE) {
-			printf("FAIL retval %d != %d\n", retval, test->retval);
+		if (!err && retval != expected_val &&
+		    expected_val != POINTER_VALUE) {
+			printf("FAIL retval %d != %d\n", retval, expected_val);
 			goto fail_log;
 		}
 	}
@@ -12828,33 +14297,6 @@ static bool is_admin(void)
 	return (sysadmin == CAP_SET);
 }
 
-static int set_admin(bool admin)
-{
-	cap_t caps;
-	const cap_value_t cap_val = CAP_SYS_ADMIN;
-	int ret = -1;
-
-	caps = cap_get_proc();
-	if (!caps) {
-		perror("cap_get_proc");
-		return -1;
-	}
-	if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_val,
-				admin ? CAP_SET : CAP_CLEAR)) {
-		perror("cap_set_flag");
-		goto out;
-	}
-	if (cap_set_proc(caps)) {
-		perror("cap_set_proc");
-		goto out;
-	}
-	ret = 0;
-out:
-	if (cap_free(caps))
-		perror("cap_free");
-	return ret;
-}
-
 static void get_unpriv_disabled()
 {
 	char buf[2];
@@ -12871,6 +14313,13 @@ static void get_unpriv_disabled()
 	fclose(fd);
 }
 
+static bool test_as_unpriv(struct bpf_test *test)
+{
+	return !test->prog_type ||
+	       test->prog_type == BPF_PROG_TYPE_SOCKET_FILTER ||
+	       test->prog_type == BPF_PROG_TYPE_CGROUP_SKB;
+}
+
 static int do_test(bool unpriv, unsigned int from, unsigned int to)
 {
 	int i, passes = 0, errors = 0, skips = 0;
@@ -12881,10 +14330,10 @@ static int do_test(bool unpriv, unsigned int from, unsigned int to)
 		/* Program types that are not supported by non-root we
 		 * skip right away.
 		 */
-		if (!test->prog_type && unpriv_disabled) {
+		if (test_as_unpriv(test) && unpriv_disabled) {
 			printf("#%d/u %s SKIP\n", i, test->descr);
 			skips++;
-		} else if (!test->prog_type) {
+		} else if (test_as_unpriv(test)) {
 			if (!unpriv)
 				set_admin(false);
 			printf("#%d/u %s ", i, test->descr);
diff --git a/tools/testing/selftests/bpf/test_xdp_vlan.c b/tools/testing/selftests/bpf/test_xdp_vlan.c
new file mode 100644
index 000000000000..365a7d2d9f5c
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp_vlan.c
@@ -0,0 +1,292 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *  Copyright(c) 2018 Jesper Dangaard Brouer.
+ *
+ * XDP/TC VLAN manipulation example
+ *
+ * GOTCHA: Remember to disable NIC hardware offloading of VLANs,
+ * else the VLAN tags are NOT inlined in the packet payload:
+ *
+ *  # ethtool -K ixgbe2 rxvlan off
+ *
+ * Verify setting:
+ *  # ethtool -k ixgbe2 | grep rx-vlan-offload
+ *  rx-vlan-offload: off
+ *
+ */
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/in.h>
+#include <linux/pkt_cls.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+/* linux/if_vlan.h have not exposed this as UAPI, thus mirror some here
+ *
+ *	struct vlan_hdr - vlan header
+ *	@h_vlan_TCI: priority and VLAN ID
+ *	@h_vlan_encapsulated_proto: packet type ID or len
+ */
+struct _vlan_hdr {
+	__be16 h_vlan_TCI;
+	__be16 h_vlan_encapsulated_proto;
+};
+#define VLAN_PRIO_MASK		0xe000 /* Priority Code Point */
+#define VLAN_PRIO_SHIFT		13
+#define VLAN_CFI_MASK		0x1000 /* Canonical Format Indicator */
+#define VLAN_TAG_PRESENT	VLAN_CFI_MASK
+#define VLAN_VID_MASK		0x0fff /* VLAN Identifier */
+#define VLAN_N_VID		4096
+
+struct parse_pkt {
+	__u16 l3_proto;
+	__u16 l3_offset;
+	__u16 vlan_outer;
+	__u16 vlan_inner;
+	__u8  vlan_outer_offset;
+	__u8  vlan_inner_offset;
+};
+
+char _license[] SEC("license") = "GPL";
+
+static __always_inline
+bool parse_eth_frame(struct ethhdr *eth, void *data_end, struct parse_pkt *pkt)
+{
+	__u16 eth_type;
+	__u8 offset;
+
+	offset = sizeof(*eth);
+	/* Make sure packet is large enough for parsing eth + 2 VLAN headers */
+	if ((void *)eth + offset + (2*sizeof(struct _vlan_hdr)) > data_end)
+		return false;
+
+	eth_type = eth->h_proto;
+
+	/* Handle outer VLAN tag */
+	if (eth_type == bpf_htons(ETH_P_8021Q)
+	    || eth_type == bpf_htons(ETH_P_8021AD)) {
+		struct _vlan_hdr *vlan_hdr;
+
+		vlan_hdr = (void *)eth + offset;
+		pkt->vlan_outer_offset = offset;
+		pkt->vlan_outer = bpf_ntohs(vlan_hdr->h_vlan_TCI)
+				& VLAN_VID_MASK;
+		eth_type        = vlan_hdr->h_vlan_encapsulated_proto;
+		offset += sizeof(*vlan_hdr);
+	}
+
+	/* Handle inner (double) VLAN tag */
+	if (eth_type == bpf_htons(ETH_P_8021Q)
+	    || eth_type == bpf_htons(ETH_P_8021AD)) {
+		struct _vlan_hdr *vlan_hdr;
+
+		vlan_hdr = (void *)eth + offset;
+		pkt->vlan_inner_offset = offset;
+		pkt->vlan_inner = bpf_ntohs(vlan_hdr->h_vlan_TCI)
+				& VLAN_VID_MASK;
+		eth_type        = vlan_hdr->h_vlan_encapsulated_proto;
+		offset += sizeof(*vlan_hdr);
+	}
+
+	pkt->l3_proto = bpf_ntohs(eth_type); /* Convert to host-byte-order */
+	pkt->l3_offset = offset;
+
+	return true;
+}
+
+/* Hint, VLANs are choosen to hit network-byte-order issues */
+#define TESTVLAN 4011 /* 0xFAB */
+// #define TO_VLAN  4000 /* 0xFA0 (hint 0xOA0 = 160) */
+
+SEC("xdp_drop_vlan_4011")
+int  xdp_prognum0(struct xdp_md *ctx)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data     = (void *)(long)ctx->data;
+	struct parse_pkt pkt = { 0 };
+
+	if (!parse_eth_frame(data, data_end, &pkt))
+		return XDP_ABORTED;
+
+	/* Drop specific VLAN ID example */
+	if (pkt.vlan_outer == TESTVLAN)
+		return XDP_ABORTED;
+	/*
+	 * Using XDP_ABORTED makes it possible to record this event,
+	 * via tracepoint xdp:xdp_exception like:
+	 *  # perf record -a -e xdp:xdp_exception
+	 *  # perf script
+	 */
+	return XDP_PASS;
+}
+/*
+Commands to setup VLAN on Linux to test packets gets dropped:
+
+ export ROOTDEV=ixgbe2
+ export VLANID=4011
+ ip link add link $ROOTDEV name $ROOTDEV.$VLANID type vlan id $VLANID
+ ip link set dev  $ROOTDEV.$VLANID up
+
+ ip link set dev $ROOTDEV mtu 1508
+ ip addr add 100.64.40.11/24 dev $ROOTDEV.$VLANID
+
+Load prog with ip tool:
+
+ ip link set $ROOTDEV xdp off
+ ip link set $ROOTDEV xdp object xdp_vlan01_kern.o section xdp_drop_vlan_4011
+
+*/
+
+/* Changing VLAN to zero, have same practical effect as removing the VLAN. */
+#define TO_VLAN	0
+
+SEC("xdp_vlan_change")
+int  xdp_prognum1(struct xdp_md *ctx)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data     = (void *)(long)ctx->data;
+	struct parse_pkt pkt = { 0 };
+
+	if (!parse_eth_frame(data, data_end, &pkt))
+		return XDP_ABORTED;
+
+	/* Change specific VLAN ID */
+	if (pkt.vlan_outer == TESTVLAN) {
+		struct _vlan_hdr *vlan_hdr = data + pkt.vlan_outer_offset;
+
+		/* Modifying VLAN, preserve top 4 bits */
+		vlan_hdr->h_vlan_TCI =
+			bpf_htons((bpf_ntohs(vlan_hdr->h_vlan_TCI) & 0xf000)
+				  | TO_VLAN);
+	}
+
+	return XDP_PASS;
+}
+
+/*
+ * Show XDP+TC can cooperate, on creating a VLAN rewriter.
+ * 1. Create a XDP prog that can "pop"/remove a VLAN header.
+ * 2. Create a TC-bpf prog that egress can add a VLAN header.
+ */
+
+#ifndef ETH_ALEN /* Ethernet MAC address length */
+#define ETH_ALEN	6	/* bytes */
+#endif
+#define VLAN_HDR_SZ	4	/* bytes */
+
+SEC("xdp_vlan_remove_outer")
+int  xdp_prognum2(struct xdp_md *ctx)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data     = (void *)(long)ctx->data;
+	struct parse_pkt pkt = { 0 };
+	char *dest;
+
+	if (!parse_eth_frame(data, data_end, &pkt))
+		return XDP_ABORTED;
+
+	/* Skip packet if no outer VLAN was detected */
+	if (pkt.vlan_outer_offset == 0)
+		return XDP_PASS;
+
+	/* Moving Ethernet header, dest overlap with src, memmove handle this */
+	dest = data;
+	dest+= VLAN_HDR_SZ;
+	/*
+	 * Notice: Taking over vlan_hdr->h_vlan_encapsulated_proto, by
+	 * only moving two MAC addrs (12 bytes), not overwriting last 2 bytes
+	 */
+	__builtin_memmove(dest, data, ETH_ALEN * 2);
+	/* Note: LLVM built-in memmove inlining require size to be constant */
+
+	/* Move start of packet header seen by Linux kernel stack */
+	bpf_xdp_adjust_head(ctx, VLAN_HDR_SZ);
+
+	return XDP_PASS;
+}
+
+static __always_inline
+void shift_mac_4bytes_16bit(void *data)
+{
+	__u16 *p = data;
+
+	p[7] = p[5]; /* delete p[7] was vlan_hdr->h_vlan_TCI */
+	p[6] = p[4]; /* delete p[6] was ethhdr->h_proto */
+	p[5] = p[3];
+	p[4] = p[2];
+	p[3] = p[1];
+	p[2] = p[0];
+}
+
+static __always_inline
+void shift_mac_4bytes_32bit(void *data)
+{
+	__u32 *p = data;
+
+	/* Assuming VLAN hdr present. The 4 bytes in p[3] that gets
+	 * overwritten, is ethhdr->h_proto and vlan_hdr->h_vlan_TCI.
+	 * The vlan_hdr->h_vlan_encapsulated_proto take over role as
+	 * ethhdr->h_proto.
+	 */
+	p[3] = p[2];
+	p[2] = p[1];
+	p[1] = p[0];
+}
+
+SEC("xdp_vlan_remove_outer2")
+int  xdp_prognum3(struct xdp_md *ctx)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data     = (void *)(long)ctx->data;
+	struct ethhdr *orig_eth = data;
+	struct parse_pkt pkt = { 0 };
+
+	if (!parse_eth_frame(orig_eth, data_end, &pkt))
+		return XDP_ABORTED;
+
+	/* Skip packet if no outer VLAN was detected */
+	if (pkt.vlan_outer_offset == 0)
+		return XDP_PASS;
+
+	/* Simply shift down MAC addrs 4 bytes, overwrite h_proto + TCI */
+	shift_mac_4bytes_32bit(data);
+
+	/* Move start of packet header seen by Linux kernel stack */
+	bpf_xdp_adjust_head(ctx, VLAN_HDR_SZ);
+
+	return XDP_PASS;
+}
+
+/*=====================================
+ *  BELOW: TC-hook based ebpf programs
+ * ====================================
+ * The TC-clsact eBPF programs (currently) need to be attach via TC commands
+ */
+
+SEC("tc_vlan_push")
+int _tc_progA(struct __sk_buff *ctx)
+{
+	bpf_skb_vlan_push(ctx, bpf_htons(ETH_P_8021Q), TESTVLAN);
+
+	return TC_ACT_OK;
+}
+/*
+Commands to setup TC to use above bpf prog:
+
+export ROOTDEV=ixgbe2
+export FILE=xdp_vlan01_kern.o
+
+# Re-attach clsact to clear/flush existing role
+tc qdisc del dev $ROOTDEV clsact 2> /dev/null ;\
+tc qdisc add dev $ROOTDEV clsact
+
+# Attach BPF prog EGRESS
+tc filter add dev $ROOTDEV egress \
+  prio 1 handle 1 bpf da obj $FILE sec tc_vlan_push
+
+tc filter show dev $ROOTDEV egress
+*/
diff --git a/tools/testing/selftests/bpf/test_xdp_vlan.sh b/tools/testing/selftests/bpf/test_xdp_vlan.sh
new file mode 100755
index 000000000000..51a3a31d1aac
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp_vlan.sh
@@ -0,0 +1,195 @@
+#!/bin/bash
+
+TESTNAME=xdp_vlan
+
+usage() {
+  echo "Testing XDP + TC eBPF VLAN manipulations: $TESTNAME"
+  echo ""
+  echo "Usage: $0 [-vfh]"
+  echo "  -v | --verbose : Verbose"
+  echo "  --flush        : Flush before starting (e.g. after --interactive)"
+  echo "  --interactive  : Keep netns setup running after test-run"
+  echo ""
+}
+
+cleanup()
+{
+	local status=$?
+
+	if [ "$status" = "0" ]; then
+		echo "selftests: $TESTNAME [PASS]";
+	else
+		echo "selftests: $TESTNAME [FAILED]";
+	fi
+
+	if [ -n "$INTERACTIVE" ]; then
+		echo "Namespace setup still active explore with:"
+		echo " ip netns exec ns1 bash"
+		echo " ip netns exec ns2 bash"
+		exit $status
+	fi
+
+	set +e
+	ip link del veth1 2> /dev/null
+	ip netns del ns1 2> /dev/null
+	ip netns del ns2 2> /dev/null
+}
+
+# Using external program "getopt" to get --long-options
+OPTIONS=$(getopt -o hvfi: \
+    --long verbose,flush,help,interactive,debug -- "$@")
+if (( $? != 0 )); then
+    usage
+    echo "selftests: $TESTNAME [FAILED] Error calling getopt, unknown option?"
+    exit 2
+fi
+eval set -- "$OPTIONS"
+
+##  --- Parse command line arguments / parameters ---
+while true; do
+	case "$1" in
+	    -v | --verbose)
+		export VERBOSE=yes
+		shift
+		;;
+	    -i | --interactive | --debug )
+		INTERACTIVE=yes
+		shift
+		;;
+	    -f | --flush )
+		cleanup
+		shift
+		;;
+	    -- )
+		shift
+		break
+		;;
+	    -h | --help )
+		usage;
+		echo "selftests: $TESTNAME [SKIP] usage help info requested"
+		exit 0
+		;;
+	    * )
+		shift
+		break
+		;;
+	esac
+done
+
+if [ "$EUID" -ne 0 ]; then
+	echo "selftests: $TESTNAME [FAILED] need root privileges"
+	exit 1
+fi
+
+ip link set dev lo xdp off 2>/dev/null > /dev/null
+if [ $? -ne 0 ];then
+	echo "selftests: $TESTNAME [SKIP] need ip xdp support"
+	exit 0
+fi
+
+# Interactive mode likely require us to cleanup netns
+if [ -n "$INTERACTIVE" ]; then
+	ip link del veth1 2> /dev/null
+	ip netns del ns1 2> /dev/null
+	ip netns del ns2 2> /dev/null
+fi
+
+# Exit on failure
+set -e
+
+# Some shell-tools dependencies
+which ip > /dev/null
+which tc > /dev/null
+which ethtool > /dev/null
+
+# Make rest of shell verbose, showing comments as doc/info
+if [ -n "$VERBOSE" ]; then
+    set -v
+fi
+
+# Create two namespaces
+ip netns add ns1
+ip netns add ns2
+
+# Run cleanup if failing or on kill
+trap cleanup 0 2 3 6 9
+
+# Create veth pair
+ip link add veth1 type veth peer name veth2
+
+# Move veth1 and veth2 into the respective namespaces
+ip link set veth1 netns ns1
+ip link set veth2 netns ns2
+
+# NOTICE: XDP require VLAN header inside packet payload
+#  - Thus, disable VLAN offloading driver features
+#  - For veth REMEMBER TX side VLAN-offload
+#
+# Disable rx-vlan-offload (mostly needed on ns1)
+ip netns exec ns1 ethtool -K veth1 rxvlan off
+ip netns exec ns2 ethtool -K veth2 rxvlan off
+#
+# Disable tx-vlan-offload (mostly needed on ns2)
+ip netns exec ns2 ethtool -K veth2 txvlan off
+ip netns exec ns1 ethtool -K veth1 txvlan off
+
+export IPADDR1=100.64.41.1
+export IPADDR2=100.64.41.2
+
+# In ns1/veth1 add IP-addr on plain net_device
+ip netns exec ns1 ip addr add ${IPADDR1}/24 dev veth1
+ip netns exec ns1 ip link set veth1 up
+
+# In ns2/veth2 create VLAN device
+export VLAN=4011
+export DEVNS2=veth2
+ip netns exec ns2 ip link add link $DEVNS2 name $DEVNS2.$VLAN type vlan id $VLAN
+ip netns exec ns2 ip addr add ${IPADDR2}/24 dev $DEVNS2.$VLAN
+ip netns exec ns2 ip link set $DEVNS2 up
+ip netns exec ns2 ip link set $DEVNS2.$VLAN up
+
+# Bringup lo in netns (to avoids confusing people using --interactive)
+ip netns exec ns1 ip link set lo up
+ip netns exec ns2 ip link set lo up
+
+# At this point, the hosts cannot reach each-other,
+# because ns2 are using VLAN tags on the packets.
+
+ip netns exec ns2 sh -c 'ping -W 1 -c 1 100.64.41.1 || echo "Okay ping fails"'
+
+
+# Now we can use the test_xdp_vlan.c program to pop/push these VLAN tags
+# ----------------------------------------------------------------------
+# In ns1: ingress use XDP to remove VLAN tags
+export DEVNS1=veth1
+export FILE=test_xdp_vlan.o
+
+# First test: Remove VLAN by setting VLAN ID 0, using "xdp_vlan_change"
+export XDP_PROG=xdp_vlan_change
+ip netns exec ns1 ip link set $DEVNS1 xdp object $FILE section $XDP_PROG
+
+# In ns1: egress use TC to add back VLAN tag 4011
+#  (del cmd)
+#  tc qdisc del dev $DEVNS1 clsact 2> /dev/null
+#
+ip netns exec ns1 tc qdisc add dev $DEVNS1 clsact
+ip netns exec ns1 tc filter add dev $DEVNS1 egress \
+  prio 1 handle 1 bpf da obj $FILE sec tc_vlan_push
+
+# Now the namespaces can reach each-other, test with ping:
+ip netns exec ns2 ping -W 2 -c 3 $IPADDR1
+ip netns exec ns1 ping -W 2 -c 3 $IPADDR2
+
+# Second test: Replace xdp prog, that fully remove vlan header
+#
+# Catch kernel bug for generic-XDP, that does didn't allow us to
+# remove a VLAN header, because skb->protocol still contain VLAN
+# ETH_P_8021Q indication, and this cause overwriting of our changes.
+#
+export XDP_PROG=xdp_vlan_remove_outer2
+ip netns exec ns1 ip link set $DEVNS1 xdp off
+ip netns exec ns1 ip link set $DEVNS1 xdp object $FILE section $XDP_PROG
+
+# Now the namespaces should still be able reach each-other, test with ping:
+ip netns exec ns2 ping -W 2 -c 3 $IPADDR1
+ip netns exec ns1 ping -W 2 -c 3 $IPADDR2
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index cabe2a3a3b30..4cdb63bf0521 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -41,6 +41,7 @@ int load_kallsyms(void)
 		syms[i].name = strdup(func);
 		i++;
 	}
+	fclose(f);
 	sym_cnt = i;
 	qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp);
 	return 0;
@@ -124,10 +125,11 @@ struct perf_event_sample {
 	char data[];
 };
 
-static enum bpf_perf_event_ret bpf_perf_event_print(void *event, void *priv)
+static enum bpf_perf_event_ret
+bpf_perf_event_print(struct perf_event_header *hdr, void *private_data)
 {
-	struct perf_event_sample *e = event;
-	perf_event_print_fn fn = priv;
+	struct perf_event_sample *e = (struct perf_event_sample *)hdr;
+	perf_event_print_fn fn = private_data;
 	int ret;
 
 	if (e->header.type == PERF_RECORD_SAMPLE) {
diff --git a/tools/testing/selftests/bpf/with_addr.sh b/tools/testing/selftests/bpf/with_addr.sh
new file mode 100755
index 000000000000..ffcd3953f94c
--- /dev/null
+++ b/tools/testing/selftests/bpf/with_addr.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# add private ipv4 and ipv6 addresses to loopback
+
+readonly V6_INNER='100::a/128'
+readonly V4_INNER='192.168.0.1/32'
+
+if getopts ":s" opt; then
+  readonly SIT_DEV_NAME='sixtofourtest0'
+  readonly V6_SIT='2::/64'
+  readonly V4_SIT='172.17.0.1/32'
+  shift
+fi
+
+fail() {
+  echo "error: $*" 1>&2
+  exit 1
+}
+
+setup() {
+  ip -6 addr add "${V6_INNER}" dev lo || fail 'failed to setup v6 address'
+  ip -4 addr add "${V4_INNER}" dev lo || fail 'failed to setup v4 address'
+
+  if [[ -n "${V6_SIT}" ]]; then
+    ip link add "${SIT_DEV_NAME}" type sit remote any local any \
+	    || fail 'failed to add sit'
+    ip link set dev "${SIT_DEV_NAME}" up \
+	    || fail 'failed to bring sit device up'
+    ip -6 addr add "${V6_SIT}" dev "${SIT_DEV_NAME}" \
+	    || fail 'failed to setup v6 SIT address'
+    ip -4 addr add "${V4_SIT}" dev "${SIT_DEV_NAME}" \
+	    || fail 'failed to setup v4 SIT address'
+  fi
+
+  sleep 2	# avoid race causing bind to fail
+}
+
+cleanup() {
+  if [[ -n "${V6_SIT}" ]]; then
+    ip -4 addr del "${V4_SIT}" dev "${SIT_DEV_NAME}"
+    ip -6 addr del "${V6_SIT}" dev "${SIT_DEV_NAME}"
+    ip link del "${SIT_DEV_NAME}"
+  fi
+
+  ip -4 addr del "${V4_INNER}" dev lo
+  ip -6 addr del "${V6_INNER}" dev lo
+}
+
+trap cleanup EXIT
+
+setup
+"$@"
+exit "$?"
diff --git a/tools/testing/selftests/bpf/with_tunnels.sh b/tools/testing/selftests/bpf/with_tunnels.sh
new file mode 100755
index 000000000000..e24949ed3a20
--- /dev/null
+++ b/tools/testing/selftests/bpf/with_tunnels.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# setup tunnels for flow dissection test
+
+readonly SUFFIX="test_$(mktemp -u XXXX)"
+CONFIG="remote 127.0.0.2 local 127.0.0.1 dev lo"
+
+setup() {
+  ip link add "ipip_${SUFFIX}" type ipip ${CONFIG}
+  ip link add "gre_${SUFFIX}" type gre ${CONFIG}
+  ip link add "sit_${SUFFIX}" type sit ${CONFIG}
+
+  echo "tunnels before test:"
+  ip tunnel show
+
+  ip link set "ipip_${SUFFIX}" up
+  ip link set "gre_${SUFFIX}" up
+  ip link set "sit_${SUFFIX}" up
+}
+
+
+cleanup() {
+  ip tunnel del "ipip_${SUFFIX}"
+  ip tunnel del "gre_${SUFFIX}"
+  ip tunnel del "sit_${SUFFIX}"
+
+  echo "tunnels after test:"
+  ip tunnel show
+}
+
+trap cleanup EXIT
+
+setup
+"$@"
+exit "$?"
diff --git a/tools/testing/selftests/drivers/dma-buf/Makefile b/tools/testing/selftests/drivers/dma-buf/Makefile
new file mode 100644
index 000000000000..4154c3d7aa58
--- /dev/null
+++ b/tools/testing/selftests/drivers/dma-buf/Makefile
@@ -0,0 +1,5 @@
+CFLAGS += -I../../../../../usr/include/
+
+TEST_GEN_PROGS := udmabuf
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/drivers/dma-buf/udmabuf.c b/tools/testing/selftests/drivers/dma-buf/udmabuf.c
new file mode 100644
index 000000000000..376b1d6730bd
--- /dev/null
+++ b/tools/testing/selftests/drivers/dma-buf/udmabuf.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <malloc.h>
+
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+#include <linux/memfd.h>
+#include <linux/udmabuf.h>
+
+#define TEST_PREFIX	"drivers/dma-buf/udmabuf"
+#define NUM_PAGES       4
+
+static int memfd_create(const char *name, unsigned int flags)
+{
+	return syscall(__NR_memfd_create, name, flags);
+}
+
+int main(int argc, char *argv[])
+{
+	struct udmabuf_create create;
+	int devfd, memfd, buf, ret;
+	off_t size;
+	void *mem;
+
+	devfd = open("/dev/udmabuf", O_RDWR);
+	if (devfd < 0) {
+		printf("%s: [skip,no-udmabuf]\n", TEST_PREFIX);
+		exit(77);
+	}
+
+	memfd = memfd_create("udmabuf-test", MFD_CLOEXEC);
+	if (memfd < 0) {
+		printf("%s: [skip,no-memfd]\n", TEST_PREFIX);
+		exit(77);
+	}
+
+	size = getpagesize() * NUM_PAGES;
+	ret = ftruncate(memfd, size);
+	if (ret == -1) {
+		printf("%s: [FAIL,memfd-truncate]\n", TEST_PREFIX);
+		exit(1);
+	}
+
+	memset(&create, 0, sizeof(create));
+
+	/* should fail (offset not page aligned) */
+	create.memfd  = memfd;
+	create.offset = getpagesize()/2;
+	create.size   = getpagesize();
+	buf = ioctl(devfd, UDMABUF_CREATE, &create);
+	if (buf >= 0) {
+		printf("%s: [FAIL,test-1]\n", TEST_PREFIX);
+		exit(1);
+	}
+
+	/* should fail (size not multiple of page) */
+	create.memfd  = memfd;
+	create.offset = 0;
+	create.size   = getpagesize()/2;
+	buf = ioctl(devfd, UDMABUF_CREATE, &create);
+	if (buf >= 0) {
+		printf("%s: [FAIL,test-2]\n", TEST_PREFIX);
+		exit(1);
+	}
+
+	/* should fail (not memfd) */
+	create.memfd  = 0; /* stdin */
+	create.offset = 0;
+	create.size   = size;
+	buf = ioctl(devfd, UDMABUF_CREATE, &create);
+	if (buf >= 0) {
+		printf("%s: [FAIL,test-3]\n", TEST_PREFIX);
+		exit(1);
+	}
+
+	/* should work */
+	create.memfd  = memfd;
+	create.offset = 0;
+	create.size   = size;
+	buf = ioctl(devfd, UDMABUF_CREATE, &create);
+	if (buf < 0) {
+		printf("%s: [FAIL,test-4]\n", TEST_PREFIX);
+		exit(1);
+	}
+
+	fprintf(stderr, "%s: ok\n", TEST_PREFIX);
+	close(buf);
+	close(memfd);
+	close(devfd);
+	return 0;
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
new file mode 100755
index 000000000000..117f6f35d72f
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
@@ -0,0 +1,394 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# A test for switch behavior under MC overload. An issue in Spectrum chips
+# causes throughput of UC traffic to drop severely when a switch is under heavy
+# MC load. This issue can be overcome by putting the switch to MC-aware mode.
+# This test verifies that UC performance stays intact even as the switch is
+# under MC flood, and therefore that the MC-aware mode is enabled and correctly
+# configured.
+#
+# Because mlxsw throttles CPU port, the traffic can't actually reach userspace
+# at full speed. That makes it impossible to use iperf3 to simply measure the
+# throughput, because many packets (that reach $h3) don't get to the kernel at
+# all even in UDP mode (the situation is even worse in TCP mode, where one can't
+# hope to see more than a couple Mbps).
+#
+# So instead we send traffic with mausezahn and use RX ethtool counters at $h3.
+# Multicast traffic is untagged, unicast traffic is tagged with PCP 1. Therefore
+# each gets a different priority and we can use per-prio ethtool counters to
+# measure the throughput. In order to avoid prioritizing unicast traffic, prio
+# qdisc is installed on $swp3 and maps all priorities to the same band #7 (and
+# thus TC 0).
+#
+# Mausezahn can't actually saturate the links unless it's using large frames.
+# Thus we set MTU to 10K on all involved interfaces. Then both unicast and
+# multicast traffic uses 8K frames.
+#
+# +---------------------------+            +----------------------------------+
+# | H1                        |            |                               H2 |
+# |                           |            |  unicast --> + $h2.111           |
+# |                 multicast |            |  traffic     | 192.0.2.129/28    |
+# |                 traffic   |            |              | e-qos-map 0:1     |
+# |           $h1 + <-----    |            |              |                   |
+# | 192.0.2.65/28 |           |            |              + $h2               |
+# +---------------|-----------+            +--------------|-------------------+
+#                 |                                       |
+# +---------------|---------------------------------------|-------------------+
+# |         $swp1 +                                       + $swp2             |
+# |        >1Gbps |                                       | >1Gbps            |
+# | +-------------|------+                     +----------|----------------+  |
+# | |     $swp1.1 +      |                     |          + $swp2.111      |  |
+# | |                BR1 |             SW      | BR111                     |  |
+# | |     $swp3.1 +      |                     |          + $swp3.111      |  |
+# | +-------------|------+                     +----------|----------------+  |
+# |               \_______________________________________/                   |
+# |                                    |                                      |
+# |                                    + $swp3                                |
+# |                                    | 1Gbps bottleneck                     |
+# |                                    | prio qdisc: {0..7} -> 7              |
+# +------------------------------------|--------------------------------------+
+#                                      |
+#                                   +--|-----------------+
+#                                   |  + $h3          H3 |
+#                                   |  | 192.0.2.66/28   |
+#                                   |  |                 |
+#                                   |  + $h3.111         |
+#                                   |    192.0.2.130/28  |
+#                                   +--------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	test_mc_aware
+	test_uc_aware
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=6
+source $lib_dir/lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.65/28
+	mtu_set $h1 10000
+}
+
+h1_destroy()
+{
+	mtu_restore $h1
+	simple_if_fini $h1 192.0.2.65/28
+}
+
+h2_create()
+{
+	simple_if_init $h2
+	mtu_set $h2 10000
+
+	vlan_create $h2 111 v$h2 192.0.2.129/28
+	ip link set dev $h2.111 type vlan egress-qos-map 0:1
+}
+
+h2_destroy()
+{
+	vlan_destroy $h2 111
+
+	mtu_restore $h2
+	simple_if_fini $h2
+}
+
+h3_create()
+{
+	simple_if_init $h3 192.0.2.66/28
+	mtu_set $h3 10000
+
+	vlan_create $h3 111 v$h3 192.0.2.130/28
+}
+
+h3_destroy()
+{
+	vlan_destroy $h3 111
+
+	mtu_restore $h3
+	simple_if_fini $h3 192.0.2.66/28
+}
+
+switch_create()
+{
+	ip link set dev $swp1 up
+	mtu_set $swp1 10000
+
+	ip link set dev $swp2 up
+	mtu_set $swp2 10000
+
+	ip link set dev $swp3 up
+	mtu_set $swp3 10000
+
+	vlan_create $swp2 111
+	vlan_create $swp3 111
+
+	ethtool -s $swp3 speed 1000 autoneg off
+	tc qdisc replace dev $swp3 root handle 3: \
+	   prio bands 8 priomap 7 7 7 7 7 7 7 7
+
+	ip link add name br1 type bridge vlan_filtering 0
+	ip link set dev br1 up
+	ip link set dev $swp1 master br1
+	ip link set dev $swp3 master br1
+
+	ip link add name br111 type bridge vlan_filtering 0
+	ip link set dev br111 up
+	ip link set dev $swp2.111 master br111
+	ip link set dev $swp3.111 master br111
+}
+
+switch_destroy()
+{
+	ip link del dev br111
+	ip link del dev br1
+
+	tc qdisc del dev $swp3 root handle 3:
+	ethtool -s $swp3 autoneg on
+
+	vlan_destroy $swp3 111
+	vlan_destroy $swp2 111
+
+	mtu_restore $swp3
+	ip link set dev $swp3 down
+
+	mtu_restore $swp2
+	ip link set dev $swp2 down
+
+	mtu_restore $swp1
+	ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	h3mac=$(mac_get $h3)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+	h3_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h3_destroy
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h2 192.0.2.130
+}
+
+humanize()
+{
+	local speed=$1; shift
+
+	for unit in bps Kbps Mbps Gbps; do
+		if (($(echo "$speed < 1024" | bc))); then
+			break
+		fi
+
+		speed=$(echo "scale=1; $speed / 1024" | bc)
+	done
+
+	echo "$speed${unit}"
+}
+
+rate()
+{
+	local t0=$1; shift
+	local t1=$1; shift
+	local interval=$1; shift
+
+	echo $((8 * (t1 - t0) / interval))
+}
+
+check_rate()
+{
+	local rate=$1; shift
+	local min=$1; shift
+	local what=$1; shift
+
+	if ((rate > min)); then
+		return 0
+	fi
+
+	echo "$what $(humanize $ir) < $(humanize $min_ingress)" > /dev/stderr
+	return 1
+}
+
+measure_uc_rate()
+{
+	local what=$1; shift
+
+	local interval=10
+	local i
+	local ret=0
+
+	# Dips in performance might cause momentary ingress rate to drop below
+	# 1Gbps. That wouldn't saturate egress and MC would thus get through,
+	# seemingly winning bandwidth on account of UC. Demand at least 2Gbps
+	# average ingress rate to somewhat mitigate this.
+	local min_ingress=2147483648
+
+	$MZ $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \
+		-a own -b $h3mac -t udp -q &
+	sleep 1
+
+	for i in {5..0}; do
+		local t0=$(ethtool_stats_get $h3 rx_octets_prio_1)
+		local u0=$(ethtool_stats_get $swp2 rx_octets_prio_1)
+		sleep $interval
+		local t1=$(ethtool_stats_get $h3 rx_octets_prio_1)
+		local u1=$(ethtool_stats_get $swp2 rx_octets_prio_1)
+
+		local ir=$(rate $u0 $u1 $interval)
+		local er=$(rate $t0 $t1 $interval)
+
+		if check_rate $ir $min_ingress "$what ingress rate"; then
+			break
+		fi
+
+		# Fail the test if we can't get the throughput.
+		if ((i == 0)); then
+			ret=1
+		fi
+	done
+
+	# Suppress noise from killing mausezahn.
+	{ kill %% && wait; } 2>/dev/null
+
+	echo $ir $er
+	exit $ret
+}
+
+test_mc_aware()
+{
+	RET=0
+
+	local -a uc_rate
+	uc_rate=($(measure_uc_rate "UC-only"))
+	check_err $? "Could not get high enough UC-only ingress rate"
+	local ucth1=${uc_rate[1]}
+
+	$MZ $h1 -p 8000 -c 0 -a own -b bc -t udp -q &
+
+	local d0=$(date +%s)
+	local t0=$(ethtool_stats_get $h3 rx_octets_prio_0)
+	local u0=$(ethtool_stats_get $swp1 rx_octets_prio_0)
+
+	local -a uc_rate_2
+	uc_rate_2=($(measure_uc_rate "UC+MC"))
+	check_err $? "Could not get high enough UC+MC ingress rate"
+	local ucth2=${uc_rate_2[1]}
+
+	local d1=$(date +%s)
+	local t1=$(ethtool_stats_get $h3 rx_octets_prio_0)
+	local u1=$(ethtool_stats_get $swp1 rx_octets_prio_0)
+
+	local deg=$(bc <<< "
+			scale=2
+			ret = 100 * ($ucth1 - $ucth2) / $ucth1
+			if (ret > 0) { ret } else { 0 }
+		    ")
+	check_err $(bc <<< "$deg > 25")
+
+	local interval=$((d1 - d0))
+	local mc_ir=$(rate $u0 $u1 $interval)
+	local mc_er=$(rate $t0 $t1 $interval)
+
+	# Suppress noise from killing mausezahn.
+	{ kill %% && wait; } 2>/dev/null
+
+	log_test "UC performace under MC overload"
+
+	echo "UC-only throughput  $(humanize $ucth1)"
+	echo "UC+MC throughput    $(humanize $ucth2)"
+	echo "Degradation         $deg %"
+	echo
+	echo "Full report:"
+	echo "  UC only:"
+	echo "    ingress UC throughput $(humanize ${uc_rate[0]})"
+	echo "    egress UC throughput  $(humanize ${uc_rate[1]})"
+	echo "  UC+MC:"
+	echo "    ingress UC throughput $(humanize ${uc_rate_2[0]})"
+	echo "    egress UC throughput  $(humanize ${uc_rate_2[1]})"
+	echo "    ingress MC throughput $(humanize $mc_ir)"
+	echo "    egress MC throughput  $(humanize $mc_er)"
+	echo
+}
+
+test_uc_aware()
+{
+	RET=0
+
+	$MZ $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \
+		-a own -b $h3mac -t udp -q &
+
+	local d0=$(date +%s)
+	local t0=$(ethtool_stats_get $h3 rx_octets_prio_1)
+	local u0=$(ethtool_stats_get $swp2 rx_octets_prio_1)
+	sleep 1
+
+	local attempts=50
+	local passes=0
+	local i
+
+	for ((i = 0; i < attempts; ++i)); do
+		if $ARPING -c 1 -I $h1 -b 192.0.2.66 -q -w 0.1; then
+			((passes++))
+		fi
+
+		sleep 0.1
+	done
+
+	local d1=$(date +%s)
+	local t1=$(ethtool_stats_get $h3 rx_octets_prio_1)
+	local u1=$(ethtool_stats_get $swp2 rx_octets_prio_1)
+
+	local interval=$((d1 - d0))
+	local uc_ir=$(rate $u0 $u1 $interval)
+	local uc_er=$(rate $t0 $t1 $interval)
+
+	((attempts == passes))
+	check_err $?
+
+	# Suppress noise from killing mausezahn.
+	{ kill %% && wait; } 2>/dev/null
+
+	log_test "MC performace under UC overload"
+	echo "    ingress UC throughput $(humanize ${uc_ir})"
+	echo "    egress UC throughput  $(humanize ${uc_er})"
+	echo "    sent $attempts BC ARPs, got $passes responses"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/ftrace/config b/tools/testing/selftests/ftrace/config
index 07db5ab09cc7..c2c8de4fafff 100644
--- a/tools/testing/selftests/ftrace/config
+++ b/tools/testing/selftests/ftrace/config
@@ -4,6 +4,12 @@ CONFIG_FUNCTION_PROFILER=y
 CONFIG_TRACER_SNAPSHOT=y
 CONFIG_STACK_TRACER=y
 CONFIG_HIST_TRIGGERS=y
+CONFIG_SCHED_TRACER=y
 CONFIG_PREEMPT_TRACER=y
 CONFIG_IRQSOFF_TRACER=y
 CONFIG_PREEMPTIRQ_DELAY_TEST=m
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_SAMPLES=y
+CONFIG_SAMPLE_TRACE_PRINTK=m
+CONFIG_KALLSYMS_ALL=y
diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest
index f9a9d424c980..75244db70331 100755
--- a/tools/testing/selftests/ftrace/ftracetest
+++ b/tools/testing/selftests/ftrace/ftracetest
@@ -60,15 +60,29 @@ parse_opts() { # opts
       shift 1
     ;;
     --verbose|-v|-vv|-vvv)
+      if [ $VERBOSE -eq -1 ]; then
+	usage "--console can not use with --verbose"
+      fi
       VERBOSE=$((VERBOSE + 1))
       [ $1 = '-vv' ] && VERBOSE=$((VERBOSE + 1))
       [ $1 = '-vvv' ] && VERBOSE=$((VERBOSE + 2))
       shift 1
     ;;
+    --console)
+      if [ $VERBOSE -ne 0 ]; then
+	usage "--console can not use with --verbose"
+      fi
+      VERBOSE=-1
+      shift 1
+    ;;
     --debug|-d)
       DEBUG=1
       shift 1
     ;;
+    --stop-fail)
+      STOP_FAILURE=1
+      shift 1
+    ;;
     --fail-unsupported)
       UNSUPPORTED_RESULT=1
       shift 1
@@ -117,6 +131,7 @@ KEEP_LOG=0
 DEBUG=0
 VERBOSE=0
 UNSUPPORTED_RESULT=0
+STOP_FAILURE=0
 # Parse command-line options
 parse_opts $*
 
@@ -137,11 +152,33 @@ else
   date > $LOG_FILE
 fi
 
+# Define text colors
+# Check available colors on the terminal, if any
+ncolors=`tput colors 2>/dev/null`
+color_reset=
+color_red=
+color_green=
+color_blue=
+# If stdout exists and number of colors is eight or more, use them
+if [ -t 1 -a "$ncolors" -a "$ncolors" -ge 8 ]; then
+  color_reset="\e[0m"
+  color_red="\e[31m"
+  color_green="\e[32m"
+  color_blue="\e[34m"
+fi
+
+strip_esc() {
+  # busybox sed implementation doesn't accept "\x1B", so use [:cntrl:] instead.
+  sed -E "s/[[:cntrl:]]\[([0-9]{1,2}(;[0-9]{1,2})?)?[m|K]//g"
+}
+
 prlog() { # messages
-  [ -z "$LOG_FILE" ] && echo "$@" || echo "$@" | tee -a $LOG_FILE
+  echo -e "$@"
+  [ "$LOG_FILE" ] && echo -e "$@" | strip_esc >> $LOG_FILE
 }
 catlog() { #file
-  [ -z "$LOG_FILE" ] && cat $1 || cat $1 | tee -a $LOG_FILE
+  cat $1
+  [ "$LOG_FILE" ] && cat $1 | strip_esc >> $LOG_FILE
 }
 prlog "=== Ftrace unit tests ==="
 
@@ -180,37 +217,37 @@ test_on_instance() { # testfile
 eval_result() { # sigval
   case $1 in
     $PASS)
-      prlog "	[PASS]"
+      prlog "	[${color_green}PASS${color_reset}]"
       PASSED_CASES="$PASSED_CASES $CASENO"
       return 0
     ;;
     $FAIL)
-      prlog "	[FAIL]"
+      prlog "	[${color_red}FAIL${color_reset}]"
       FAILED_CASES="$FAILED_CASES $CASENO"
       return 1 # this is a bug.
     ;;
     $UNRESOLVED)
-      prlog "	[UNRESOLVED]"
+      prlog "	[${color_blue}UNRESOLVED${color_reset}]"
       UNRESOLVED_CASES="$UNRESOLVED_CASES $CASENO"
       return 1 # this is a kind of bug.. something happened.
     ;;
     $UNTESTED)
-      prlog "	[UNTESTED]"
+      prlog "	[${color_blue}UNTESTED${color_reset}]"
       UNTESTED_CASES="$UNTESTED_CASES $CASENO"
       return 0
     ;;
     $UNSUPPORTED)
-      prlog "	[UNSUPPORTED]"
+      prlog "	[${color_blue}UNSUPPORTED${color_reset}]"
       UNSUPPORTED_CASES="$UNSUPPORTED_CASES $CASENO"
       return $UNSUPPORTED_RESULT # depends on use case
     ;;
     $XFAIL)
-      prlog "	[XFAIL]"
+      prlog "	[${color_red}XFAIL${color_reset}]"
       XFAILED_CASES="$XFAILED_CASES $CASENO"
       return 0
     ;;
     *)
-      prlog "	[UNDEFINED]"
+      prlog "	[${color_blue}UNDEFINED${color_reset}]"
       UNDEFINED_CASES="$UNDEFINED_CASES $CASENO"
       return 1 # this must be a test bug
     ;;
@@ -269,16 +306,18 @@ __run_test() { # testfile
 # Run one test case
 run_test() { # testfile
   local testname=`basename $1`
+  testcase $1
   if [ ! -z "$LOG_FILE" ] ; then
-    local testlog=`mktemp $LOG_DIR/${testname}-log.XXXXXX`
+    local testlog=`mktemp $LOG_DIR/${CASENO}-${testname}-log.XXXXXX`
   else
     local testlog=/proc/self/fd/1
   fi
   export TMPDIR=`mktemp -d /tmp/ftracetest-dir.XXXXXX`
-  testcase $1
   echo "execute$INSTANCE: "$1 > $testlog
   SIG_RESULT=0
-  if [ -z "$LOG_FILE" ]; then
+  if [ $VERBOSE -eq -1 ]; then
+    __run_test $1
+  elif [ -z "$LOG_FILE" ]; then
     __run_test $1 2>&1
   elif [ $VERBOSE -ge 3 ]; then
     __run_test $1 | tee -a $testlog 2>&1
@@ -304,6 +343,10 @@ run_test() { # testfile
 # Main loop
 for t in $TEST_CASES; do
   run_test $t
+  if [ $STOP_FAILURE -ne 0 -a $TOTAL_RESULT -ne 0 ]; then
+    echo "A failure detected. Stop test."
+    exit 1
+  fi
 done
 
 # Test on instance loop
@@ -315,7 +358,12 @@ for t in $TEST_CASES; do
   run_test $t
   rmdir $TRACING_DIR
   TRACING_DIR=$SAVED_TRACING_DIR
+  if [ $STOP_FAILURE -ne 0 -a $TOTAL_RESULT -ne 0 ]; then
+    echo "A failure detected. Stop test."
+    exit 1
+  fi
 done
+(cd $TRACING_DIR; initialize_ftrace) # for cleanup
 
 prlog ""
 prlog "# of passed: " `echo $PASSED_CASES | wc -w`
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/ringbuffer_size.tc b/tools/testing/selftests/ftrace/test.d/00basic/ringbuffer_size.tc
new file mode 100644
index 000000000000..ab70f0077c35
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/00basic/ringbuffer_size.tc
@@ -0,0 +1,22 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Change the ringbuffer size
+# flags: instance
+
+rb_size_test() {
+ORIG=`cat buffer_size_kb`
+
+expr $ORIG / 2 > buffer_size_kb
+
+expr $ORIG \* 2 > buffer_size_kb
+
+echo $ORIG > buffer_size_kb
+}
+
+rb_size_test
+
+: "If per-cpu buffer is supported, imbalance it"
+if [ -d per_cpu/cpu0 ]; then
+  cd per_cpu/cpu0
+  rb_size_test
+fi
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/trace_pipe.tc b/tools/testing/selftests/ftrace/test.d/00basic/trace_pipe.tc
new file mode 100644
index 000000000000..5058fbcfd90f
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/00basic/trace_pipe.tc
@@ -0,0 +1,16 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: trace_pipe and trace_marker
+# flags: instance
+
+[ ! -f trace_marker ] && exit_unsupported
+
+echo "test input 1" > trace_marker
+
+: "trace interface never consume the ring buffer"
+grep -q "test input 1" trace
+grep -q "test input 1" trace
+
+: "trace interface never consume the ring buffer"
+head -n 1 trace_pipe | grep -q "test input 1"
+! grep -q "test input 1" trace
diff --git a/tools/testing/selftests/ftrace/test.d/event/event-enable.tc b/tools/testing/selftests/ftrace/test.d/event/event-enable.tc
index 9daf034186f5..dfb0d5122f7b 100644
--- a/tools/testing/selftests/ftrace/test.d/event/event-enable.tc
+++ b/tools/testing/selftests/ftrace/test.d/event/event-enable.tc
@@ -9,23 +9,15 @@ do_reset() {
 }
 
 fail() { #msg
-    do_reset
     echo $1
     exit_fail
 }
 
-yield() {
-    ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1
-}
-
 if [ ! -f set_event -o ! -d events/sched ]; then
     echo "event tracing is not supported"
     exit_unsupported
 fi
 
-reset_tracer
-do_reset
-
 echo 'sched:sched_switch' > set_event
 
 yield
@@ -57,6 +49,4 @@ if [ $count -ne 0 ]; then
     fail "sched_switch events should not be recorded"
 fi
 
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/event/event-pid.tc b/tools/testing/selftests/ftrace/test.d/event/event-pid.tc
index 132478b305c2..f9cb214220b1 100644
--- a/tools/testing/selftests/ftrace/test.d/event/event-pid.tc
+++ b/tools/testing/selftests/ftrace/test.d/event/event-pid.tc
@@ -16,10 +16,6 @@ fail() { #msg
     exit_fail
 }
 
-yield() {
-    ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1
-}
-
 if [ ! -f set_event -o ! -d events/sched ]; then
     echo "event tracing is not supported"
     exit_unsupported
@@ -30,8 +26,7 @@ if [ ! -f set_event_pid ]; then
     exit_unsupported
 fi
 
-reset_tracer
-do_reset
+echo 0 > options/event-fork
 
 echo 1 > events/sched/sched_switch/enable
 
@@ -47,6 +42,7 @@ do_reset
 read mypid rest < /proc/self/stat
 
 echo $mypid > set_event_pid
+grep -q $mypid set_event_pid
 echo 'sched:sched_switch' > set_event
 
 yield
diff --git a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc
index 6a37a8642ee6..83a8c571e93a 100644
--- a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc
+++ b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc
@@ -9,23 +9,15 @@ do_reset() {
 }
 
 fail() { #msg
-    do_reset
     echo $1
     exit_fail
 }
 
-yield() {
-    ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1
-}
-
 if [ ! -f set_event -o ! -d events/sched ]; then
     echo "event tracing is not supported"
     exit_unsupported
 fi
 
-reset_tracer
-do_reset
-
 echo 'sched:*' > set_event
 
 yield
@@ -57,6 +49,4 @@ if [ $count -ne 0 ]; then
     fail "any of scheduler events should not be recorded"
 fi
 
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc b/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc
index 4e9b6e2c0219..84d7bda08d2a 100644
--- a/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc
+++ b/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc
@@ -8,23 +8,15 @@ do_reset() {
 }
 
 fail() { #msg
-    do_reset
     echo $1
     exit_fail
 }
 
-yield() {
-    ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1
-}
-
 if [ ! -f available_events -o ! -f set_event -o ! -d events ]; then
     echo "event tracing is not supported"
     exit_unsupported
 fi
 
-reset_tracer
-do_reset
-
 echo '*:*' > set_event
 
 yield
@@ -60,6 +52,4 @@ if [ $count -ne 0 ]; then
     fail "any of events should not be recorded"
 fi
 
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/event/trace_printk.tc b/tools/testing/selftests/ftrace/test.d/event/trace_printk.tc
new file mode 100644
index 000000000000..b02550b42be9
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/event/trace_printk.tc
@@ -0,0 +1,27 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Test trace_printk from module
+
+rmmod trace-printk ||:
+if ! modprobe trace-printk ; then
+  echo "No trace-printk sample module - please make CONFIG_SAMPLE_TRACE_PRINTK=m"
+  exit_unresolved;
+fi
+
+echo "Waiting for irq work"
+sleep 1
+
+grep -q ": This .* trace_bputs" trace
+grep -q ": This .* trace_puts" trace
+grep -q ": This .* trace_bprintk" trace
+grep -q ": This .* trace_printk" trace
+
+grep -q ": (irq) .* trace_bputs" trace
+grep -q ": (irq) .* trace_puts" trace
+grep -q ": (irq) .* trace_bprintk" trace
+grep -q ": (irq) .* trace_printk" trace
+
+grep -q "This is a %s that will use trace_bprintk" printk_formats
+grep -q "(irq) This is a static string that will use trace_bputs" printk_formats
+
+rmmod trace-printk ||:
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc
index 1aec99d108eb..aefab0c66d54 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc
@@ -16,13 +16,9 @@ if [ ! -f set_ftrace_filter ]; then
 fi
 
 do_reset() {
-    reset_tracer
     if [ -e /proc/sys/kernel/stack_tracer_enabled ]; then
 	    echo 0 > /proc/sys/kernel/stack_tracer_enabled
     fi
-    enable_tracing
-    clear_trace
-    echo > set_ftrace_filter
 }
 
 fail() { # msg
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc
index 9f8d27ca39cf..c8a5209f2119 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc
@@ -9,14 +9,7 @@ if ! grep -q function_graph available_tracers; then
     exit_unsupported
 fi
 
-do_reset() {
-    reset_tracer
-    enable_tracing
-    clear_trace
-}
-
 fail() { # msg
-    do_reset
     echo $1
     exit_fail
 }
@@ -48,6 +41,4 @@ if [ $count -eq 0 ]; then
     fail "No schedule traces found?"
 fi
 
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc
index 524ce24b3c22..64cfcc75e3c1 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc
@@ -35,12 +35,6 @@ if [ $do_function_fork -eq 1 ]; then
 fi
 
 do_reset() {
-    reset_tracer
-    clear_trace
-    enable_tracing
-    echo > set_ftrace_filter
-    echo > set_ftrace_pid
-
     if [ $do_function_fork -eq 0 ]; then
 	return
     fi
@@ -54,10 +48,6 @@ fail() { # msg
     exit_fail
 }
 
-yield() {
-    ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1
-}
-
 do_test() {
     disable_tracing
 
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc
new file mode 100644
index 000000000000..bf72e783d014
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc
@@ -0,0 +1,12 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL2.0
+# description: ftrace - stacktrace filter command
+# flags: instance
+
+echo _do_fork:stacktrace >> set_ftrace_filter
+
+grep -q "_do_fork:stacktrace:unlimited" set_ftrace_filter
+
+(echo "forked"; sleep 1)
+
+grep -q "<stack trace>" trace
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_cpumask.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_cpumask.tc
new file mode 100644
index 000000000000..0e6810743576
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_cpumask.tc
@@ -0,0 +1,42 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL2.0
+# description: ftrace - function trace with cpumask
+
+if ! which nproc ; then
+  nproc() {
+    ls -d /sys/devices/system/cpu/cpu[0-9]* | wc -l
+  }
+fi
+
+NP=`nproc`
+
+if [ $NP -eq 1 ] ;then
+  echo "We can not test cpumask on UP environment"
+  exit_unresolved
+fi
+
+ORIG_CPUMASK=`cat tracing_cpumask`
+
+do_reset() {
+  echo $ORIG_CPUMASK > tracing_cpumask
+}
+
+echo 0 > tracing_on
+echo > trace
+: "Bitmask only record on CPU1"
+echo 2 > tracing_cpumask
+MASK=0x`cat tracing_cpumask`
+test `printf "%d" $MASK` -eq 2 || do_reset
+
+echo function > current_tracer
+echo 1 > tracing_on
+(echo "forked")
+echo 0 > tracing_on
+
+: "Check CPU1 events are recorded"
+grep -q -e "\[001\]" trace || do_reset
+
+: "There should be No other cpu events"
+! grep -qv -e "\[001\]" -e "^#" trace || do_reset
+
+do_reset
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc
index 6fed4cf2db81..ca2ffd7957f9 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc
@@ -25,15 +25,12 @@ do_reset() {
 }
 
 fail() { # mesg
-    do_reset
     echo $1
     exit_fail
 }
 
 SLEEP_TIME=".1"
 
-do_reset
-
 echo "Testing function probes with events:"
 
 EVENT="sched:sched_switch"
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_mod_trace.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_mod_trace.tc
new file mode 100644
index 000000000000..9330c873f9fe
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_mod_trace.tc
@@ -0,0 +1,24 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: ftrace - function trace on module
+
+[ ! -f set_ftrace_filter ] && exit_unsupported
+
+: "mod: allows to filter a non exist function"
+echo 'non_exist_func:mod:non_exist_module' > set_ftrace_filter
+grep -q "non_exist_func" set_ftrace_filter
+
+: "mod: on exist module"
+echo '*:mod:trace_printk' > set_ftrace_filter
+if ! modprobe trace-printk ; then
+  echo "No trace-printk sample module - please make CONFIG_SAMPLE_TRACE_PRINTK=
+m"
+  exit_unresolved;
+fi
+
+: "Wildcard should be resolved after loading module"
+grep -q "trace_printk_irq_work" set_ftrace_filter
+
+: "After removing the filter becomes empty"
+rmmod trace_printk
+test `cat set_ftrace_filter | wc -l` -eq 0
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_profile_stat.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_profile_stat.tc
new file mode 100644
index 000000000000..0d501058aa75
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_profile_stat.tc
@@ -0,0 +1,22 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: ftrace - function profiling
+
+[ ! -f function_profile_enabled ] && exit_unsupported
+
+: "Enable function profile"
+echo 1 > function_profile_enabled
+
+: "Profile must be updated"
+cp trace_stat/function0 $TMPDIR/
+( echo "forked"; sleep 1 )
+: "diff returns 0 if there is no difference"
+! diff trace_stat/function0 $TMPDIR/function0
+
+echo 0 > function_profile_enabled
+
+: "Profile must NOT be updated"
+cp trace_stat/function0 $TMPDIR/
+( echo "forked"; sleep 1 )
+: "diff returns 0 if there is no difference"
+diff trace_stat/function0 $TMPDIR/function0
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc
index b2d5a8febfe8..dfbae637c60c 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc
@@ -29,8 +29,6 @@ if [ ! -f function_profile_enabled ]; then
 fi
 
 fail() { # mesg
-    reset_tracer
-    echo > set_ftrace_filter
     echo $1
     exit_fail
 }
@@ -76,6 +74,4 @@ if ! grep -v -e '^#' -e 'schedule' trace > /dev/null; then
 	fail "no other functions besides schedule was found"
 fi
 
-reset_tracer
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc
index 68e7a48f5828..51f6e6146bd9 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc
@@ -15,22 +15,11 @@ if [ ! -f set_ftrace_filter ]; then
     exit_unsupported
 fi
 
-do_reset() {
-    reset_tracer
-    reset_ftrace_filter
-    disable_events
-    clear_trace
-    enable_tracing
-}
-
 fail() { # mesg
-    do_reset
     echo $1
     exit_fail
 }
 
-do_reset
-
 FILTER=set_ftrace_filter
 FUNC1="schedule"
 FUNC2="do_softirq"
@@ -165,6 +154,4 @@ test_actual
 rm $TMPDIR/expected
 rm $TMPDIR/actual
 
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_stack_tracer.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_stack_tracer.tc
new file mode 100644
index 000000000000..b414f0e3c646
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_stack_tracer.tc
@@ -0,0 +1,39 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: ftrace - Max stack tracer
+# Test the basic function of max-stack usage tracing
+
+if [ ! -f stack_trace ]; then
+  echo "Max stack tracer is not supported - please make CONFIG_STACK_TRACER=y"
+  exit_unsupported
+fi
+
+echo > stack_trace_filter
+echo 0 > stack_max_size
+echo 1 > /proc/sys/kernel/stack_tracer_enabled
+
+: "Fork and wait for the first entry become !lock"
+timeout=10
+while [ $timeout -ne 0 ]; do
+  ( echo "forked" )
+  FL=`grep " 0)" stack_trace`
+  echo $FL | grep -q "lock" || break;
+  timeout=$((timeout - 1))
+done
+echo 0 > /proc/sys/kernel/stack_tracer_enabled
+
+echo '*lock*' > stack_trace_filter
+test `cat stack_trace_filter | wc -l` -eq `grep lock stack_trace_filter | wc -l`
+
+echo 0 > stack_max_size
+echo 1 > /proc/sys/kernel/stack_tracer_enabled
+
+: "Fork and always the first entry including lock"
+timeout=10
+while [ $timeout -ne 0 ]; do
+  ( echo "forked" )
+  FL=`grep " 0)" stack_trace`
+  echo $FL | grep -q "lock"
+  timeout=$((timeout - 1))
+done
+echo 0 > /proc/sys/kernel/stack_tracer_enabled
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc
index f6d9ac73268a..0c04282d33dd 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc
@@ -16,24 +16,13 @@ if [ ! -f set_ftrace_filter ]; then
     exit_unsupported
 fi
 
-do_reset() {
-    reset_ftrace_filter
-    reset_tracer
-    disable_events
-    clear_trace
-    enable_tracing
-}
-
 fail() { # mesg
-    do_reset
     echo $1
     exit_fail
 }
 
 SLEEP_TIME=".1"
 
-do_reset
-
 echo "Testing function probes with enabling disabling tracing:"
 
 cnt_trace() {
diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions
index e4645d5e3126..7b96e80e6b8a 100644
--- a/tools/testing/selftests/ftrace/test.d/functions
+++ b/tools/testing/selftests/ftrace/test.d/functions
@@ -89,12 +89,23 @@ initialize_ftrace() { # Reset ftrace to initial-state
     reset_tracer
     reset_trigger
     reset_events_filter
+    reset_ftrace_filter
     disable_events
     echo > set_event_pid	# event tracer is always on
+    echo > set_ftrace_pid
     [ -f set_ftrace_filter ] && echo | tee set_ftrace_*
     [ -f set_graph_function ] && echo | tee set_graph_*
     [ -f stack_trace_filter ] && echo > stack_trace_filter
     [ -f kprobe_events ] && echo > kprobe_events
     [ -f uprobe_events ] && echo > uprobe_events
+    [ -f synthetic_events ] && echo > synthetic_events
+    [ -f snapshot ] && echo 0 > snapshot
+    clear_trace
     enable_tracing
 }
+
+LOCALHOST=127.0.0.1
+
+yield() {
+    ping $LOCALHOST -c 1 || sleep .001 || usleep 1 || sleep 1
+}
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc
index 4604d2103c89..bb1eb5a7c64e 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc
@@ -4,10 +4,7 @@
 
 [ -f kprobe_events ] || exit_unsupported # this is configurable
 
-echo 0 > events/enable
-echo > kprobe_events
 echo p:myevent _do_fork > kprobe_events
 grep myevent kprobe_events
 test -d events/kprobes/myevent
 echo > kprobe_events
-clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc
index bbc443a9190c..442c1a8c5edf 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc
@@ -4,12 +4,9 @@
 
 [ -f kprobe_events ] || exit_unsupported
 
-echo 0 > events/enable
-echo > kprobe_events
 echo p:myevent _do_fork > kprobe_events
 test -d events/kprobes/myevent
 echo 1 > events/kprobes/myevent/enable
 echo > kprobe_events && exit_fail # this must fail
 echo 0 > events/kprobes/myevent/enable
 echo > kprobe_events # this must succeed
-clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc
index 8b43c6804fc3..bcdecf80a8f1 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc
@@ -4,13 +4,15 @@
 
 [ -f kprobe_events ] || exit_unsupported # this is configurable
 
-echo 0 > events/enable
-echo > kprobe_events
 echo 'p:testprobe _do_fork $stack $stack0 +0($stack)' > kprobe_events
-grep testprobe kprobe_events
+grep testprobe kprobe_events | grep -q 'arg1=\$stack arg2=\$stack0 arg3=+0(\$stack)'
 test -d events/kprobes/testprobe
+
 echo 1 > events/kprobes/testprobe/enable
 ( echo "forked")
+grep testprobe trace | grep '_do_fork' | \
+  grep -q 'arg1=0x[[:xdigit:]]* arg2=0x[[:xdigit:]]* arg3=0x[[:xdigit:]]*$'
+
 echo 0 > events/kprobes/testprobe/enable
 echo "-:testprobe" >> kprobe_events
 clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc
new file mode 100644
index 000000000000..15c1f70fcaf9
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc
@@ -0,0 +1,17 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobe event with comm arguments
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+grep -A1 "fetcharg:" README | grep -q "\$comm" || exit_unsupported # this is too old
+
+echo 'p:testprobe _do_fork comm=$comm ' > kprobe_events
+grep testprobe kprobe_events | grep -q 'comm=$comm'
+test -d events/kprobes/testprobe
+
+echo 1 > events/kprobes/testprobe/enable
+( echo "forked")
+grep testprobe trace | grep -q 'comm=".*"'
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
index 1ad70cdaf442..46e7744f8358 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
@@ -4,9 +4,6 @@
 
 [ -f kprobe_events ] || exit_unsupported # this is configurable
 
-echo 0 > events/enable
-echo > kprobe_events
-
 case `uname -m` in
 x86_64)
   ARG1=%di
@@ -44,5 +41,3 @@ echo 1 > events/kprobes/testprobe/enable
 echo "p:test _do_fork" >> kprobe_events
 grep -qe "testprobe.* arg1=\"test\" arg2=\"test\"" trace
 
-echo 0 > events/enable
-echo > kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc
new file mode 100644
index 000000000000..2b6dd33f9076
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc
@@ -0,0 +1,39 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobe event symbol argument
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+SYMBOL="linux_proc_banner"
+
+if [ ! -f /proc/kallsyms ]; then
+  echo "Can not check the target symbol - please enable CONFIG_KALLSYMS"
+  exit_unresolved
+elif ! grep "$SYMBOL\$" /proc/kallsyms; then
+  echo "Linux banner is not exported - please enable CONFIG_KALLSYMS_ALL"
+  exit_unresolved
+fi
+
+: "Test get basic types symbol argument"
+echo "p:testprobe_u _do_fork arg1=@linux_proc_banner:u64 arg2=@linux_proc_banner:u32 arg3=@linux_proc_banner:u16 arg4=@linux_proc_banner:u8" > kprobe_events
+echo "p:testprobe_s _do_fork arg1=@linux_proc_banner:s64 arg2=@linux_proc_banner:s32 arg3=@linux_proc_banner:s16 arg4=@linux_proc_banner:s8" >> kprobe_events
+if grep -q "x8/16/32/64" README; then
+  echo "p:testprobe_x _do_fork arg1=@linux_proc_banner:x64 arg2=@linux_proc_banner:x32 arg3=@linux_proc_banner:x16 arg4=@linux_proc_banner:x8" >> kprobe_events
+fi
+echo "p:testprobe_bf _do_fork arg1=@linux_proc_banner:b8@4/32" >> kprobe_events
+echo 1 > events/kprobes/enable
+(echo "forked")
+echo 0 > events/kprobes/enable
+grep "testprobe_[usx]:.* arg1=.* arg2=.* arg3=.* arg4=.*" trace
+grep "testprobe_bf:.* arg1=.*" trace
+
+: "Test get string symbol argument"
+echo "p:testprobe_str _do_fork arg1=@linux_proc_banner:string" > kprobe_events
+echo 1 > events/kprobes/enable
+(echo "forked")
+echo 0 > events/kprobes/enable
+RESULT=`grep "testprobe_str" trace | sed -e 's/.* arg1=\(.*\)/\1/'`
+
+RESULT=`echo $RESULT | sed -e 's/.* \((.*)\) \((.*)\) .*/\1 \2/'`
+ORIG=`cat /proc/version | sed -e 's/.* \((.*)\) \((.*)\) .*/\1 \2/'`
+test "$RESULT" = "$ORIG"
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc
index d026ff4e562f..6f0f19953193 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc
@@ -6,9 +6,6 @@
 
 grep "x8/16/32/64" README > /dev/null || exit_unsupported # version issue
 
-echo 0 > events/enable
-echo > kprobe_events
-
 PROBEFUNC="vfs_read"
 GOODREG=
 BADREG=
@@ -78,8 +75,11 @@ test_badarg "\$stackp" "\$stack0+10" "\$stack1-10"
 echo "r ${PROBEFUNC} \$retval" > kprobe_events
 ! echo "p ${PROBEFUNC} \$retval" > kprobe_events
 
+# $comm was introduced in 4.8, older kernels reject it.
+if grep -A1 "fetcharg:" README | grep -q '\$comm' ; then
 : "Comm access"
 test_goodarg "\$comm"
+fi
 
 : "Indirect memory access"
 test_goodarg "+0(${GOODREG})" "-0(${GOODREG})" "+10(\$stack)" \
@@ -100,5 +100,3 @@ test_badarg "${GOODREG}::${GOODTYPE}" "${GOODREG}:${BADTYPE}" \
 
 test_goodarg "\$comm:string" "+0(\$stack):string"
 test_badarg "\$comm:x64" "\$stack:string" "${GOODREG}:string"
-
-echo > kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc
index 2a1755bfc290..1bcb67dcae26 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc
@@ -6,33 +6,45 @@
 
 grep "x8/16/32/64" README > /dev/null || exit_unsupported # version issue
 
-echo 0 > events/enable
-echo > kprobe_events
-enable_tracing
-
-echo 'p:testprobe _do_fork $stack0:s32 $stack0:u32 $stack0:x32 $stack0:b8@4/32' > kprobe_events
-grep testprobe kprobe_events
-test -d events/kprobes/testprobe
-
-echo 1 > events/kprobes/testprobe/enable
-( echo "forked")
-echo 0 > events/kprobes/testprobe/enable
-ARGS=`tail -n 1 trace | sed -e 's/.* arg1=\(.*\) arg2=\(.*\) arg3=\(.*\) arg4=\(.*\)/\1 \2 \3 \4/'`
+gen_event() { # Bitsize
+  echo "p:testprobe _do_fork \$stack0:s$1 \$stack0:u$1 \$stack0:x$1 \$stack0:b4@4/$1"
+}
 
-check_types() {
-  X1=`printf "%x" $1 | tail -c 8`
+check_types() { # s-type u-type x-type bf-type width
+  test $# -eq 5
+  CW=$5
+  CW=$((CW / 4))
+  X1=`printf "%x" $1 | tail -c ${CW}`
   X2=`printf "%x" $2`
   X3=`printf "%x" $3`
   test $X1 = $X2
   test $X2 = $X3
   test 0x$X3 = $3
 
-  B4=`printf "%02x" $4`
-  B3=`echo -n $X3 | tail -c 3 | head -c 2`
+  B4=`printf "%1x" $4`
+  B3=`printf "%03x" 0x$X3 | tail -c 2 | head -c 1`
   test $B3 = $B4
 }
-check_types $ARGS
 
-echo "-:testprobe" >> kprobe_events
-clear_trace
-test -d events/kprobes/testprobe && exit_fail || exit_pass
+for width in 64 32 16 8; do
+  : "Add new event with basic types"
+  gen_event $width > kprobe_events
+  grep testprobe kprobe_events
+  test -d events/kprobes/testprobe
+
+  : "Trace the event"
+  echo 1 > events/kprobes/testprobe/enable
+  ( echo "forked")
+  echo 0 > events/kprobes/testprobe/enable
+
+  : "Confirm the arguments is recorded in given types correctly"
+  ARGS=`grep "testprobe" trace | sed -e 's/.* arg1=\(.*\) arg2=\(.*\) arg3=\(.*\) arg4=\(.*\)/\1 \2 \3 \4/'`
+  check_types $ARGS $width
+
+  : "Clear event for next loop"
+  echo "-:testprobe" >> kprobe_events
+  clear_trace
+
+done
+
+exit_pass
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc
index 2724a1068cb1..3fb70e01b1fe 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc
@@ -4,9 +4,6 @@
 
 [ -f kprobe_events ] || exit_unsupported # this is configurable
 
-disable_events
-echo > kprobe_events
-
 :;: "Add an event on function without name" ;:
 
 FUNC=`grep " [tT] .*vfs_read$" /proc/kallsyms | tail -n 1 | cut -f 3 -d " "`
@@ -33,5 +30,3 @@ echo "p $FUNC" > kprobe_events
 EVENT=`grep $FUNC kprobe_events | cut -f 1 -d " " | cut -f 2 -d:`
 [ "x" != "x$EVENT" ] || exit_failure
 test -d events/$EVENT || exit_failure
-
-echo > kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc
index cc4cac0e60f2..492426e95e09 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc
@@ -8,8 +8,6 @@ grep function available_tracers || exit_unsupported # this is configurable
 # prepare
 echo nop > current_tracer
 echo _do_fork > set_ftrace_filter
-echo 0 > events/enable
-echo > kprobe_events
 echo 'p:testprobe _do_fork' > kprobe_events
 
 # kprobe on / ftrace off
@@ -47,10 +45,3 @@ echo > trace
 ( echo "forked")
 grep testprobe trace
 ! grep '_do_fork <-' trace
-
-# cleanup
-echo nop > current_tracer
-echo > set_ftrace_filter
-echo 0 > events/kprobes/testprobe/enable
-echo > kprobe_events
-echo > trace
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_module.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_module.tc
index 1e9f75f7a30f..d861bd776c5e 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_module.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_module.tc
@@ -4,14 +4,18 @@
 
 [ -f kprobe_events ] || exit_unsupported # this is configurable
 
-disable_events
-echo > kprobe_events
+rmmod trace-printk ||:
+if ! modprobe trace-printk ; then
+  echo "No trace-printk sample module - please make CONFIG_SAMPLE_TRACE_PRINTK=
+m"
+  exit_unresolved;
+fi
+
+MOD=trace_printk
+FUNC=trace_printk_irq_work
 
 :;: "Add an event on a module function without specifying event name" ;:
 
-MOD=`lsmod | head -n 2 | tail -n 1 | cut -f1 -d" "`
-FUNC=`grep -m 1 ".* t .*\\[$MOD\\]" /proc/kallsyms | xargs | cut -f3 -d" "`
-[ "x" != "x$MOD" -a "y" != "y$FUNC" ] || exit_unresolved
 echo "p $MOD:$FUNC" > kprobe_events
 PROBE_NAME=`echo $MOD:$FUNC | tr ".:" "_"`
 test -d events/kprobes/p_${PROBE_NAME}_0 || exit_failure
@@ -26,4 +30,24 @@ test -d events/kprobes/event1 || exit_failure
 echo "p:kprobes1/event1 $MOD:$FUNC" > kprobe_events
 test -d events/kprobes1/event1 || exit_failure
 
-echo > kprobe_events
+:;: "Remove target module, but event still be there" ;:
+if ! rmmod trace-printk ; then
+  echo "Failed to unload module - please enable CONFIG_MODULE_UNLOAD"
+  exit_unresolved;
+fi
+test -d events/kprobes1/event1
+
+:;: "Check posibility to defining events on unloaded module";:
+echo "p:event2 $MOD:$FUNC" >> kprobe_events
+
+:;: "Target is gone, but we can prepare for next time";:
+echo 1 > events/kprobes1/event1/enable
+
+:;: "Load module again, which means the event1 should be recorded";:
+modprobe trace-printk
+grep "event1:" trace
+
+:;: "Remove the module again and check the event is not locked"
+rmmod trace-printk
+echo 0 > events/kprobes1/event1/enable
+echo "-:kprobes1/event1" >> kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc
index 321954683aaa..ac9ab4a12e53 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc
@@ -4,13 +4,16 @@
 
 [ -f kprobe_events ] || exit_unsupported # this is configurable
 
-echo 0 > events/enable
-echo > kprobe_events
+# Add new kretprobe event
 echo 'r:testprobe2 _do_fork $retval' > kprobe_events
-grep testprobe2 kprobe_events
+grep testprobe2 kprobe_events | grep -q 'arg1=\$retval'
 test -d events/kprobes/testprobe2
+
 echo 1 > events/kprobes/testprobe2/enable
 ( echo "forked")
+
+cat trace | grep testprobe2 | grep -q '<- _do_fork'
+
 echo 0 > events/kprobes/testprobe2/enable
 echo '-:testprobe2' >> kprobe_events
 clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc
index 7c0290684c43..8e05b178519a 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc
@@ -5,8 +5,6 @@
 [ -f kprobe_events ] || exit_unsupported # this is configurable
 grep -q 'r\[maxactive\]' README || exit_unsupported # this is older version
 
-echo > kprobe_events
-
 # Test if we successfully reject unknown messages
 if echo 'a:myprobeaccept inet_csk_accept' > kprobe_events; then false; else true; fi
 
@@ -37,5 +35,3 @@ echo > kprobe_events
 echo 'r10 inet_csk_accept' > kprobe_events
 grep inet_csk_accept kprobe_events
 echo > kprobe_events
-
-clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
index ce361b9d62cf..5862eee91e1d 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
@@ -12,11 +12,6 @@ case `uname -m` in
   *) OFFS=0;;
 esac
 
-if [ -d events/kprobes ]; then
-  echo 0 > events/kprobes/enable
-  echo > kprobe_events
-fi
-
 N=0
 echo "Setup up kprobes on first available 256 text symbols"
 grep -i " t " /proc/kallsyms | cut -f3 -d" " | grep -v .*\\..* | \
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc b/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc
index 519d2763f5d2..a902aa0aaabc 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc
@@ -30,8 +30,6 @@ if [ `printf "%x" -1 | wc -c` != 9 ]; then
   UINT_TEST=yes
 fi
 
-echo 0 > events/enable
-echo > kprobe_events
 echo "p:testprobe ${TARGET_FUNC}" > kprobe_events
 echo "p:testprobe ${TARGET}" > kprobe_events
 echo "p:testprobe ${TARGET_FUNC}${NEXT}" > kprobe_events
@@ -39,5 +37,3 @@ echo "p:testprobe ${TARGET_FUNC}${NEXT}" > kprobe_events
 if [ "${UINT_TEST}" = yes ]; then
 ! echo "p:testprobe ${TARGET_FUNC}${OVERFLOW}" > kprobe_events
 fi
-echo > kprobe_events
-clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc
new file mode 100644
index 000000000000..0384b525cdee
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc
@@ -0,0 +1,15 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobe dynamic event - adding and removing
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+! grep -q 'myevent' kprobe_profile
+echo p:myevent _do_fork > kprobe_events
+grep -q 'myevent[[:space:]]*0[[:space:]]*0$' kprobe_profile
+echo 1 > events/kprobes/myevent/enable
+( echo "forked" )
+grep -q 'myevent[[:space:]]*[[:digit:]]*[[:space:]]*0$' kprobe_profile
+echo 0 > events/kprobes/myevent/enable
+echo > kprobe_events
+! grep -q 'myevent' kprobe_profile
diff --git a/tools/testing/selftests/ftrace/test.d/template b/tools/testing/selftests/ftrace/test.d/template
index 5c39ceb18a0d..799da7e0b3c9 100644
--- a/tools/testing/selftests/ftrace/test.d/template
+++ b/tools/testing/selftests/ftrace/test.d/template
@@ -1,4 +1,5 @@
 #!/bin/sh
+# SPDX-License-Identifier: GPL2.0
 # description: %HERE DESCRIBE WHAT THIS DOES%
 # you have to add ".tc" extention for your testcase file
 # Note that all tests are run with "errexit" option.
diff --git a/tools/testing/selftests/ftrace/test.d/tracer/wakeup.tc b/tools/testing/selftests/ftrace/test.d/tracer/wakeup.tc
new file mode 100644
index 000000000000..e3005fa785f0
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/tracer/wakeup.tc
@@ -0,0 +1,25 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL2.0
+# description: Test wakeup tracer
+
+if ! which chrt ; then
+  echo "chrt is not found. This test requires nice command."
+  exit_unresolved
+fi
+
+if ! grep -wq "wakeup" available_tracers ; then
+  echo "wakeup tracer is not supported"
+  exit_unsupported
+fi
+
+echo wakeup > current_tracer
+echo 1 > tracing_on
+echo 0 > tracing_max_latency
+
+: "Wakeup higher priority task"
+chrt -f 5 sleep 1
+
+echo 0 > tracing_on
+grep '+ \[[[:digit:]]*\]' trace
+grep '==> \[[[:digit:]]*\]' trace
+
diff --git a/tools/testing/selftests/ftrace/test.d/tracer/wakeup_rt.tc b/tools/testing/selftests/ftrace/test.d/tracer/wakeup_rt.tc
new file mode 100644
index 000000000000..f99b5178e00a
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/tracer/wakeup_rt.tc
@@ -0,0 +1,25 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL2.0
+# description: Test wakeup RT tracer
+
+if ! which chrt ; then
+  echo "chrt is not found. This test requires chrt command."
+  exit_unresolved
+fi
+
+if ! grep -wq "wakeup_rt" available_tracers ; then
+  echo "wakeup_rt tracer is not supported"
+  exit_unsupported
+fi
+
+echo wakeup_rt > current_tracer
+echo 1 > tracing_on
+echo 0 > tracing_max_latency
+
+: "Wakeup a realtime task"
+chrt -f 5 sleep 1
+
+echo 0 > tracing_on
+grep "+ \[[[:digit:]]*\]" trace
+grep "==> \[[[:digit:]]*\]" trace
+
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc
index 2aabab363cfb..401104344593 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc
@@ -2,14 +2,7 @@
 # description: event trigger - test extended error support
 
 
-do_reset() {
-    reset_trigger
-    echo > set_event
-    clear_trace
-}
-
 fail() { #msg
-    do_reset
     echo $1
     exit_fail
 }
@@ -24,9 +17,6 @@ if [ ! -f synthetic_events ]; then
     exit_unsupported
 fi
 
-reset_tracer
-do_reset
-
 echo "Test extended error support"
 echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger
 ! echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger 2> /dev/null
@@ -34,6 +24,4 @@ if ! grep -q "ERROR:" events/sched/sched_wakeup/hist; then
     fail "Failed to generate extended error in histogram"
 fi
 
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc
index 7fd5b4a8f060..f59b2a9a1f22 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc
@@ -1,14 +1,7 @@
 #!/bin/sh
 # description: event trigger - test field variable support
 
-do_reset() {
-    reset_trigger
-    echo > set_event
-    clear_trace
-}
-
 fail() { #msg
-    do_reset
     echo $1
     exit_fail
 }
@@ -23,10 +16,6 @@ if [ ! -f synthetic_events ]; then
     exit_unsupported
 fi
 
-clear_synthetic_events
-reset_tracer
-do_reset
-
 echo "Test field variable support"
 
 echo 'wakeup_latency u64 lat; pid_t pid; int prio; char comm[16]' > synthetic_events
@@ -34,7 +23,7 @@ echo 'hist:keys=comm:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/
 echo 'hist:keys=next_comm:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,sched.sched_waking.prio,next_comm) if next_comm=="ping"' > events/sched/sched_switch/trigger
 echo 'hist:keys=pid,prio,comm:vals=lat:sort=pid,prio' > events/synthetic/wakeup_latency/trigger
 
-ping localhost -c 3
+ping $LOCALHOST -c 3
 if ! grep -q "ping" events/synthetic/wakeup_latency/hist; then
     fail "Failed to create inter-event histogram"
 fi
@@ -49,6 +38,4 @@ if grep -q "synthetic_prio=prio" events/sched/sched_waking/hist; then
     fail "Failed to remove histogram with field variable"
 fi
 
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc
index c93dbe38b5df..524d9ce361e2 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc
@@ -1,14 +1,7 @@
 #!/bin/sh
 # description: event trigger - test inter-event combined histogram trigger
 
-do_reset() {
-    reset_trigger
-    echo > set_event
-    clear_trace
-}
-
 fail() { #msg
-    do_reset
     echo $1
     exit_fail
 }
@@ -23,10 +16,6 @@ if [ ! -f synthetic_events ]; then
     exit_unsupported
 fi
 
-reset_tracer
-do_reset
-clear_synthetic_events
-
 echo "Test create synthetic event"
 
 echo 'waking_latency  u64 lat pid_t pid' > synthetic_events
@@ -48,11 +37,9 @@ echo 'waking+wakeup_latency u64 lat; pid_t pid' >> synthetic_events
 echo 'hist:keys=pid,lat:sort=pid,lat:ww_lat=$waking_lat+$wakeup_lat:onmatch(synthetic.wakeup_latency).waking+wakeup_latency($ww_lat,pid)' >> events/synthetic/wakeup_latency/trigger
 echo 'hist:keys=pid,lat:sort=pid,lat' >> events/synthetic/waking+wakeup_latency/trigger
 
-ping localhost -c 3
+ping $LOCALHOST -c 3
 if ! grep -q "pid:" events/synthetic/waking+wakeup_latency/hist; then
     fail "Failed to create combined histogram"
 fi
 
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc
index c193dce611a2..4ddc546771b5 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc
@@ -1,15 +1,7 @@
 #!/bin/sh
 # description: event trigger - test multiple actions on hist trigger
 
-
-do_reset() {
-    reset_trigger
-    echo > set_event
-    clear_trace
-}
-
 fail() { #msg
-    do_reset
     echo $1
     exit_fail
 }
@@ -24,10 +16,6 @@ if [ ! -f synthetic_events ]; then
     exit_unsupported
 fi
 
-clear_synthetic_events
-reset_tracer
-do_reset
-
 echo "Test multiple actions on hist trigger"
 echo 'wakeup_latency u64 lat; pid_t pid' >> synthetic_events
 TRIGGER1=events/sched/sched_wakeup/trigger
@@ -39,6 +27,4 @@ echo 'hist:keys=next_pid:onmatch(sched.sched_wakeup).wakeup_latency(sched.sched_
 echo 'hist:keys=next_pid:onmatch(sched.sched_wakeup).wakeup_latency(sched.sched_switch.$wakeup_lat,prev_pid) if next_comm=="cyclictest"' >> $TRIGGER2
 echo 'hist:keys=next_pid if next_comm=="cyclictest"' >> $TRIGGER2
 
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc
index e84e7d048566..39fb65b0cd9f 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc
@@ -1,14 +1,7 @@
 #!/bin/sh
 # description: event trigger - test inter-event histogram trigger onmatch action
 
-do_reset() {
-    reset_trigger
-    echo > set_event
-    clear_trace
-}
-
 fail() { #msg
-    do_reset
     echo $1
     exit_fail
 }
@@ -23,10 +16,6 @@ if [ ! -f synthetic_events ]; then
     exit_unsupported
 fi
 
-clear_synthetic_events
-reset_tracer
-do_reset
-
 echo "Test create synthetic event"
 
 echo 'wakeup_latency  u64 lat pid_t pid char comm[16]' > synthetic_events
@@ -40,11 +29,10 @@ echo "Test histogram variables,simple expression support and onmatch action"
 echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger
 echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid,next_comm) if next_comm=="ping"' > events/sched/sched_switch/trigger
 echo 'hist:keys=comm,pid,lat:wakeup_lat=lat:sort=lat' > events/synthetic/wakeup_latency/trigger
-ping localhost -c 5
+
+ping $LOCALHOST -c 5
 if ! grep -q "ping" events/synthetic/wakeup_latency/hist; then
     fail "Failed to create onmatch action inter-event histogram"
 fi
 
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc
index 7907d8aacde3..81ab3939c96a 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc
@@ -1,14 +1,7 @@
 #!/bin/sh
 # description: event trigger - test inter-event histogram trigger onmatch-onmax action
 
-do_reset() {
-    reset_trigger
-    echo > set_event
-    clear_trace
-}
-
 fail() { #msg
-    do_reset
     echo $1
     exit_fail
 }
@@ -23,10 +16,6 @@ if [ ! -f synthetic_events ]; then
     exit_unsupported
 fi
 
-clear_synthetic_events
-reset_tracer
-do_reset
-
 echo "Test create synthetic event"
 
 echo 'wakeup_latency  u64 lat pid_t pid char comm[16]' > synthetic_events
@@ -40,11 +29,10 @@ echo "Test histogram variables,simple expression support and onmatch-onmax actio
 echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger
 echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid,next_comm):onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger
 echo 'hist:keys=comm,pid,lat:wakeup_lat=lat:sort=lat' > events/synthetic/wakeup_latency/trigger
-ping localhost -c 5
+
+ping $LOCALHOST -c 5
 if [ ! grep -q "ping" events/synthetic/wakeup_latency/hist -o ! grep -q "max:" events/sched/sched_switch/hist]; then
     fail "Failed to create onmatch-onmax action inter-event histogram"
 fi
 
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc
index 38b7ed6242b2..1180ab5f0845 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc
@@ -1,14 +1,7 @@
 #!/bin/sh
 # description: event trigger - test inter-event histogram trigger onmax action
 
-do_reset() {
-    reset_trigger
-    echo > set_event
-    clear_trace
-}
-
 fail() { #msg
-    do_reset
     echo $1
     exit_fail
 }
@@ -23,10 +16,6 @@ if [ ! -f synthetic_events ]; then
     exit_unsupported
 fi
 
-clear_synthetic_events
-reset_tracer
-do_reset
-
 echo "Test create synthetic event"
 
 echo 'wakeup_latency  u64 lat pid_t pid char comm[16]' > synthetic_events
@@ -38,11 +27,10 @@ echo "Test onmax action"
 
 echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_waking/trigger
 echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger
-ping localhost -c 3
+
+ping $LOCALHOST -c 3
 if ! grep -q "max:" events/sched/sched_switch/hist; then
     fail "Failed to create onmax action inter-event histogram"
 fi
 
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc
index cef11377dcbd..41128219231a 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc
@@ -1,13 +1,7 @@
 #!/bin/sh
 # description: event trigger - test synthetic event create remove
-do_reset() {
-    reset_trigger
-    echo > set_event
-    clear_trace
-}
 
 fail() { #msg
-    do_reset
     echo $1
     exit_fail
 }
@@ -22,10 +16,6 @@ if [ ! -f synthetic_events ]; then
     exit_unsupported
 fi
 
-clear_synthetic_events
-reset_tracer
-do_reset
-
 echo "Test create synthetic event"
 
 echo 'wakeup_latency  u64 lat pid_t pid char comm[16]' > synthetic_events
@@ -35,20 +25,18 @@ fi
 
 reset_trigger
 
-echo "Test create synthetic event with an error"
-echo 'wakeup_latency  u64 lat pid_t pid char' > synthetic_events > /dev/null
+echo "Test remove synthetic event"
+echo '!wakeup_latency  u64 lat pid_t pid char comm[16]' >> synthetic_events
 if [ -d events/synthetic/wakeup_latency ]; then
-    fail "Created wakeup_latency synthetic event with an invalid format"
+    fail "Failed to delete wakeup_latency synthetic event"
 fi
 
 reset_trigger
 
-echo "Test remove synthetic event"
-echo '!wakeup_latency  u64 lat pid_t pid char comm[16]' > synthetic_events
+echo "Test create synthetic event with an error"
+echo 'wakeup_latency  u64 lat pid_t pid char' > synthetic_events > /dev/null
 if [ -d events/synthetic/wakeup_latency ]; then
-    fail "Failed to delete wakeup_latency synthetic event"
+    fail "Created wakeup_latency synthetic event with an invalid format"
 fi
 
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc
index 28cc355a3a7b..eddb51e1fbf7 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc
@@ -3,14 +3,7 @@
 # description: event trigger - test event enable/disable trigger
 # flags: instance
 
-do_reset() {
-    reset_trigger
-    echo > set_event
-    clear_trace
-}
-
 fail() { #msg
-    do_reset
     echo $1
     exit_fail
 }
@@ -25,9 +18,6 @@ if [ ! -f events/sched/sched_process_fork/trigger ]; then
     exit_unsupported
 fi
 
-reset_tracer
-do_reset
-
 FEATURE=`grep enable_event events/sched/sched_process_fork/trigger`
 if [ -z "$FEATURE" ]; then
     echo "event enable/disable trigger is not supported"
@@ -61,6 +51,4 @@ echo 'enable_event:sched:sched_switch' > events/sched/sched_process_fork/trigger
 ! echo 'enable_event:sched:sched_switch' > events/sched/sched_process_fork/trigger
 ! echo 'disable_event:sched:sched_switch' > events/sched/sched_process_fork/trigger
 
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc
index a48e23eb8a8b..2dcc2296ebdd 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc
@@ -3,14 +3,7 @@
 # description: event trigger - test trigger filter
 # flags: instance
 
-do_reset() {
-    reset_trigger
-    echo > set_event
-    clear_trace
-}
-
 fail() { #msg
-    do_reset
     echo $1
     exit_fail
 }
@@ -25,9 +18,6 @@ if [ ! -f events/sched/sched_process_fork/trigger ]; then
     exit_unsupported
 fi
 
-reset_tracer
-do_reset
-
 echo "Test trigger filter"
 echo 1 > tracing_on
 echo 'traceoff if child_pid == 0' > events/sched/sched_process_fork/trigger
@@ -54,8 +44,4 @@ echo '!traceoff' > events/sched/sched_process_fork/trigger
 echo 'traceoff if parent_pid >= 0 || child_pid >= 0' > events/sched/sched_process_fork/trigger
 echo '!traceoff' > events/sched/sched_process_fork/trigger
 
-
-
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc
index 8da80efc44d8..fab4431639d3 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc
@@ -3,14 +3,7 @@
 # description: event trigger - test histogram modifiers
 # flags: instance
 
-do_reset() {
-    reset_trigger
-    echo > set_event
-    clear_trace
-}
-
 fail() { #msg
-    do_reset
     echo $1
     exit_fail
 }
@@ -30,9 +23,6 @@ if [ ! -f events/sched/sched_process_fork/hist ]; then
     exit_unsupported
 fi
 
-reset_tracer
-do_reset
-
 echo "Test histogram with execname modifier"
 
 echo 'hist:keys=common_pid.execname' > events/sched/sched_process_fork/trigger
@@ -71,6 +61,4 @@ for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
 grep 'bytes_req: ~ 2^[0-9]*' events/kmem/kmalloc/hist > /dev/null || \
     fail "log2 modifier on kmem/kmalloc did not work"
 
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc
index 449fe9ff91a2..177e8d4c4744 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc
@@ -3,14 +3,7 @@
 # description: event trigger - test histogram trigger
 # flags: instance
 
-do_reset() {
-    reset_trigger
-    echo > set_event
-    clear_trace
-}
-
 fail() { #msg
-    do_reset
     echo $1
     exit_fail
 }
@@ -30,9 +23,6 @@ if [ ! -f events/sched/sched_process_fork/hist ]; then
     exit_unsupported
 fi
 
-reset_tracer
-do_reset
-
 echo "Test histogram basic tigger"
 
 echo 'hist:keys=parent_pid:vals=child_pid' > events/sched/sched_process_fork/trigger
@@ -79,6 +69,4 @@ check_inc `grep -o "child_pid:[[:space:]]*[[:digit:]]*" \
     events/sched/sched_process_fork/hist | cut -d: -f2 ` ||
     fail "sort param on sched_process_fork did not work"
 
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc
index c5ef8b9d02b3..18fdaab9f570 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc
@@ -3,14 +3,7 @@
 # description: event trigger - test multiple histogram triggers
 # flags: instance
 
-do_reset() {
-    reset_trigger
-    echo > set_event
-    clear_trace
-}
-
 fail() { #msg
-    do_reset
     echo $1
     exit_fail
 }
@@ -30,11 +23,6 @@ if [ ! -f events/sched/sched_process_fork/hist ]; then
     exit_unsupported
 fi
 
-reset_tracer
-do_reset
-
-reset_trigger
-
 echo "Test histogram multiple tiggers"
 
 echo 'hist:keys=parent_pid:vals=child_pid' > events/sched/sched_process_fork/trigger
@@ -67,8 +55,4 @@ grep test_hist events/sched/sched_process_exit/hist > /dev/null || \
 diffs=`diff events/sched/sched_process_exit/hist events/sched/sched_process_fork/hist | wc -l`
 test $diffs -eq 0 || fail "Same name histograms are not same"
 
-reset_trigger
-
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc
index ed38f0050d77..7717c0a09686 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc
@@ -2,14 +2,7 @@
 # SPDX-License-Identifier: GPL-2.0
 # description: event trigger - test snapshot-trigger
 
-do_reset() {
-    reset_trigger
-    echo > set_event
-    clear_trace
-}
-
 fail() { #msg
-    do_reset
     echo $1
     exit_fail
 }
@@ -29,9 +22,6 @@ if [ ! -f snapshot ]; then
     exit_unsupported
 fi
 
-reset_tracer
-do_reset
-
 FEATURE=`grep snapshot events/sched/sched_process_fork/trigger`
 if [ -z "$FEATURE" ]; then
     echo "snapshot trigger is not supported"
@@ -57,6 +47,4 @@ echo "Test snapshot semantic errors"
 echo "snapshot" > events/sched/sched_process_fork/trigger
 ! echo "snapshot" > events/sched/sched_process_fork/trigger
 
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc
index 3121d795a868..398c05c4d2a7 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc
@@ -2,14 +2,7 @@
 # SPDX-License-Identifier: GPL-2.0
 # description: event trigger - test stacktrace-trigger
 
-do_reset() {
-    reset_trigger
-    echo > set_event
-    clear_trace
-}
-
 fail() { #msg
-    do_reset
     echo $1
     exit_fail
 }
@@ -24,9 +17,6 @@ if [ ! -f events/sched/sched_process_fork/trigger ]; then
     exit_unsupported
 fi
 
-reset_tracer
-do_reset
-
 FEATURE=`grep stacktrace events/sched/sched_process_fork/trigger`
 if [ -z "$FEATURE" ]; then
     echo "stacktrace trigger is not supported"
@@ -49,6 +39,4 @@ echo "Test stacktrace semantic errors"
 echo "stacktrace" > events/sched/sched_process_fork/trigger
 ! echo "stacktrace" > events/sched/sched_process_fork/trigger
 
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-hist.tc
index 2acbfe2c0c0c..ab6bedb25736 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-hist.tc
@@ -3,14 +3,7 @@
 # description: trace_marker trigger - test histogram trigger
 # flags: instance
 
-do_reset() {
-    reset_trigger
-    echo > set_event
-    clear_trace
-}
-
 fail() { #msg
-    do_reset
     echo $1
     exit_fail
 }
@@ -35,8 +28,6 @@ if [ ! -f events/ftrace/print/hist ]; then
     exit_unsupported
 fi
 
-do_reset
-
 echo "Test histogram trace_marker tigger"
 
 echo 'hist:keys=common_pid' > events/ftrace/print/trigger
@@ -44,6 +35,4 @@ for i in `seq 1 10` ; do echo "hello" > trace_marker; done
 grep 'hitcount: *10$' events/ftrace/print/hist > /dev/null || \
     fail "hist trigger did not trigger correct times on trace_marker"
 
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc
index 6748e8cb42d0..df246e505af7 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc
@@ -3,15 +3,7 @@
 # description: trace_marker trigger - test snapshot trigger
 # flags: instance
 
-do_reset() {
-    reset_trigger
-    echo > set_event
-    echo 0 > snapshot
-    clear_trace
-}
-
 fail() { #msg
-    do_reset
     echo $1
     exit_fail
 }
@@ -47,15 +39,13 @@ test_trace() {
 	fi
 	echo "testing $line for >$x<"
 	match=`echo $line | sed -e "s/>$x<//"`
-	if [ "$line" == "$match" ]; then
+	if [ "$line" = "$match" ]; then
 	    fail "$line does not have >$x< in it"
 	fi
-	let x=$x+2
+	x=$((x+2))
     done
 }
 
-do_reset
-
 echo "Test snapshot trace_marker tigger"
 
 echo 'snapshot' > events/ftrace/print/trigger
@@ -69,6 +59,4 @@ for i in `seq 1 10` ; do echo "hello >$i<" > trace_marker; done
 test_trace trace 1
 test_trace snapshot 2
 
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic-kernel.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic-kernel.tc
index 0a69c5d1cda8..18b4d1c2807e 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic-kernel.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic-kernel.tc
@@ -3,15 +3,7 @@
 # description: trace_marker trigger - test histogram with synthetic event against kernel event
 # flags:
 
-do_reset() {
-    reset_trigger
-    echo > set_event
-    echo > synthetic_events
-    clear_trace
-}
-
 fail() { #msg
-    do_reset
     echo $1
     exit_fail
 }
@@ -46,8 +38,6 @@ if [ ! -f events/ftrace/print/hist ]; then
     exit_unsupported
 fi
 
-do_reset
-
 echo "Test histogram kernel event to trace_marker latency histogram trigger"
 
 echo 'latency u64 lat' > synthetic_events
@@ -63,6 +53,4 @@ grep 'hitcount: *1$' events/ftrace/print/hist > /dev/null || \
 grep 'hitcount: *1$' events/synthetic/latency/hist > /dev/null || \
     fail "hist trigger did not trigger "
 
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic.tc
index 3666dd6ab02a..dd262d6d0db6 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic.tc
@@ -3,15 +3,7 @@
 # description: trace_marker trigger - test histogram with synthetic event
 # flags:
 
-do_reset() {
-    reset_trigger
-    echo > set_event
-    echo > synthetic_events
-    clear_trace
-}
-
 fail() { #msg
-    do_reset
     echo $1
     exit_fail
 }
@@ -41,8 +33,6 @@ if [ ! -f events/ftrace/print/hist ]; then
     exit_unsupported
 fi
 
-do_reset
-
 echo "Test histogram trace_marker to trace_marker latency histogram trigger"
 
 echo 'latency u64 lat' > synthetic_events
@@ -61,6 +51,4 @@ fi
 grep 'hitcount: *1$' events/synthetic/latency/hist > /dev/null || \
     fail "hist trigger did not trigger "
 
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-traceonoff.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-traceonoff.tc
index c59d9eb546da..d5d2dcbc9cab 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-traceonoff.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-traceonoff.tc
@@ -2,14 +2,7 @@
 # SPDX-License-Identifier: GPL-2.0
 # description: event trigger - test traceon/off trigger
 
-do_reset() {
-    reset_trigger
-    echo > set_event
-    clear_trace
-}
-
 fail() { #msg
-    do_reset
     echo $1
     exit_fail
 }
@@ -24,9 +17,6 @@ if [ ! -f events/sched/sched_process_fork/trigger ]; then
     exit_unsupported
 fi
 
-reset_tracer
-do_reset
-
 echo "Test traceoff trigger"
 echo 1 > tracing_on
 echo 'traceoff' > events/sched/sched_process_fork/trigger
@@ -54,6 +44,4 @@ echo 'traceon' > events/sched/sched_process_fork/trigger
 ! echo 'traceon' > events/sched/sched_process_fork/trigger
 ! echo 'traceoff' > events/sched/sched_process_fork/trigger
 
-do_reset
-
 exit 0
diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile
index 4665cdbf1a8d..46648427d537 100644
--- a/tools/testing/selftests/gpio/Makefile
+++ b/tools/testing/selftests/gpio/Makefile
@@ -1,28 +1,26 @@
 # SPDX-License-Identifier: GPL-2.0
 
+CFLAGS += -O2 -g -std=gnu99 -Wall -I../../../../usr/include/
+LDLIBS += -lmount -I/usr/include/libmount
+
 TEST_PROGS := gpio-mockup.sh
-TEST_FILES := gpio-mockup-sysfs.sh $(BINARIES)
-BINARIES := gpio-mockup-chardev
-EXTRA_PROGS := ../gpiogpio-event-mon ../gpiogpio-hammer ../gpiolsgpio
-EXTRA_DIRS := ../gpioinclude/
-EXTRA_OBJS := ../gpiogpio-event-mon-in.o ../gpiogpio-event-mon.o
-EXTRA_OBJS += ../gpiogpio-hammer-in.o ../gpiogpio-utils.o ../gpiolsgpio-in.o
-EXTRA_OBJS += ../gpiolsgpio.o
+TEST_FILES := gpio-mockup-sysfs.sh
+TEST_PROGS_EXTENDED := gpio-mockup-chardev
+
+GPIODIR := $(realpath ../../../gpio)
+GPIOOBJ := gpio-utils.o
 
 include ../lib.mk
 
-all: $(BINARIES)
+all: $(TEST_PROGS_EXTENDED)
 
 override define CLEAN
-	$(RM) $(BINARIES) $(EXTRA_PROGS) $(EXTRA_OBJS)
-	$(RM) -r $(EXTRA_DIRS)
+	$(RM) $(TEST_PROGS_EXTENDED)
+	$(MAKE) -C $(GPIODIR) OUTPUT=$(GPIODIR)/ clean
 endef
 
-CFLAGS += -O2 -g -std=gnu99 -Wall -I../../../../usr/include/
-LDLIBS += -lmount -I/usr/include/libmount
-
-$(BINARIES):| khdr
-$(BINARIES): ../../../gpio/gpio-utils.o
+$(TEST_PROGS_EXTENDED):| khdr
+$(TEST_PROGS_EXTENDED): $(GPIODIR)/$(GPIOOBJ)
 
-../../../gpio/gpio-utils.o:
-	make ARCH=$(ARCH) CROSS_COMPILE=$(CROSS_COMPILE) -C ../../../gpio
+$(GPIODIR)/$(GPIOOBJ):
+	$(MAKE) OUTPUT=$(GPIODIR)/ -C $(GPIODIR)
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 5c34752e1cff..6210ba41c29e 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -1,6 +1,8 @@
-cr4_cpuid_sync_test
-platform_info_test
-set_sregs_test
-sync_regs_test
-vmx_tsc_adjust_test
-state_test
+/x86_64/cr4_cpuid_sync_test
+/x86_64/evmcs_test
+/x86_64/platform_info_test
+/x86_64/set_sregs_test
+/x86_64/sync_regs_test
+/x86_64/vmx_tsc_adjust_test
+/x86_64/state_test
+/dirty_log_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index ec32dad3c3f0..01a219229238 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -1,26 +1,30 @@
 all:
 
-top_srcdir = ../../../../
+top_srcdir = ../../../..
 UNAME_M := $(shell uname -m)
 
-LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c
-LIBKVM_x86_64 = lib/x86.c lib/vmx.c
-
-TEST_GEN_PROGS_x86_64 = platform_info_test
-TEST_GEN_PROGS_x86_64 += set_sregs_test
-TEST_GEN_PROGS_x86_64 += sync_regs_test
-TEST_GEN_PROGS_x86_64 += vmx_tsc_adjust_test
-TEST_GEN_PROGS_x86_64 += cr4_cpuid_sync_test
-TEST_GEN_PROGS_x86_64 += state_test
+LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/ucall.c lib/sparsebit.c
+LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c
+LIBKVM_aarch64 = lib/aarch64/processor.c
+
+TEST_GEN_PROGS_x86_64 = x86_64/platform_info_test
+TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
+TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
+TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
+TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test
+TEST_GEN_PROGS_x86_64 += x86_64/state_test
+TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
 TEST_GEN_PROGS_x86_64 += dirty_log_test
 
+TEST_GEN_PROGS_aarch64 += dirty_log_test
+
 TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
 LIBKVM += $(LIBKVM_$(UNAME_M))
 
 INSTALL_HDR_PATH = $(top_srcdir)/usr
 LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
-LINUX_TOOL_INCLUDE = $(top_srcdir)tools/include
-CFLAGS += -O2 -g -std=gnu99 -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -I..
+LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include
+CFLAGS += -O2 -g -std=gnu99 -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -Iinclude/$(UNAME_M) -I..
 LDFLAGS += -pthread
 
 # After inclusion, $(OUTPUT) is defined and
@@ -29,7 +33,7 @@ include ../lib.mk
 
 STATIC_LIBS := $(OUTPUT)/libkvm.a
 LIBKVM_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM))
-EXTRA_CLEAN += $(LIBKVM_OBJ) $(STATIC_LIBS)
+EXTRA_CLEAN += $(LIBKVM_OBJ) $(STATIC_LIBS) cscope.*
 
 x := $(shell mkdir -p $(sort $(dir $(LIBKVM_OBJ))))
 $(LIBKVM_OBJ): $(OUTPUT)/%.o: %.c
@@ -41,3 +45,12 @@ $(OUTPUT)/libkvm.a: $(LIBKVM_OBJ)
 all: $(STATIC_LIBS)
 $(TEST_GEN_PROGS): $(STATIC_LIBS)
 $(STATIC_LIBS):| khdr
+
+cscope: include_paths = $(LINUX_TOOL_INCLUDE) $(LINUX_HDR_PATH) include lib ..
+cscope:
+	$(RM) cscope.*
+	(find $(include_paths) -name '*.h' \
+		-exec realpath --relative-base=$(PWD) {} \;; \
+	find . -name '*.c' \
+		-exec realpath --relative-base=$(PWD) {} \;) | sort -u > cscope.files
+	cscope -b
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index 0c2cdc105f96..aeff95a91b15 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -5,6 +5,8 @@
  * Copyright (C) 2018, Red Hat, Inc.
  */
 
+#define _GNU_SOURCE /* for program_invocation_name */
+
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
@@ -15,76 +17,78 @@
 
 #include "test_util.h"
 #include "kvm_util.h"
+#include "processor.h"
+
+#define DEBUG printf
 
-#define  DEBUG                 printf
+#define VCPU_ID				1
 
-#define  VCPU_ID                        1
 /* The memory slot index to track dirty pages */
-#define  TEST_MEM_SLOT_INDEX            1
-/*
- * GPA offset of the testing memory slot. Must be bigger than the
- * default vm mem slot, which is DEFAULT_GUEST_PHY_PAGES.
- */
-#define  TEST_MEM_OFFSET                (1ULL << 30) /* 1G */
-/* Size of the testing memory slot */
-#define  TEST_MEM_PAGES                 (1ULL << 18) /* 1G for 4K pages */
+#define TEST_MEM_SLOT_INDEX		1
+
+/* Default guest test memory offset, 1G */
+#define DEFAULT_GUEST_TEST_MEM		0x40000000
+
 /* How many pages to dirty for each guest loop */
-#define  TEST_PAGES_PER_LOOP            1024
+#define TEST_PAGES_PER_LOOP		1024
+
 /* How many host loops to run (one KVM_GET_DIRTY_LOG for each loop) */
-#define  TEST_HOST_LOOP_N               32
+#define TEST_HOST_LOOP_N		32UL
+
 /* Interval for each host loop (ms) */
-#define  TEST_HOST_LOOP_INTERVAL        10
+#define TEST_HOST_LOOP_INTERVAL		10UL
+
+/*
+ * Guest/Host shared variables. Ensure addr_gva2hva() and/or
+ * sync_global_to/from_guest() are used when accessing from
+ * the host. READ/WRITE_ONCE() should also be used with anything
+ * that may change.
+ */
+static uint64_t host_page_size;
+static uint64_t guest_page_size;
+static uint64_t guest_num_pages;
+static uint64_t random_array[TEST_PAGES_PER_LOOP];
+static uint64_t iteration;
 
 /*
- * Guest variables.  We use these variables to share data between host
- * and guest.  There are two copies of the variables, one in host memory
- * (which is unused) and one in guest memory.  When the host wants to
- * access these variables, it needs to call addr_gva2hva() to access the
- * guest copy.
+ * GPA offset of the testing memory slot. Must be bigger than
+ * DEFAULT_GUEST_PHY_PAGES.
  */
-uint64_t guest_random_array[TEST_PAGES_PER_LOOP];
-uint64_t guest_iteration;
-uint64_t guest_page_size;
+static uint64_t guest_test_mem = DEFAULT_GUEST_TEST_MEM;
 
 /*
- * Writes to the first byte of a random page within the testing memory
- * region continuously.
+ * Continuously write to the first 8 bytes of a random pages within
+ * the testing memory region.
  */
-void guest_code(void)
+static void guest_code(void)
 {
-	int i = 0;
-	uint64_t volatile *array = guest_random_array;
-	uint64_t volatile *guest_addr;
+	int i;
 
 	while (true) {
 		for (i = 0; i < TEST_PAGES_PER_LOOP; i++) {
-			/*
-			 * Write to the first 8 bytes of a random page
-			 * on the testing memory region.
-			 */
-			guest_addr = (uint64_t *)
-			    (TEST_MEM_OFFSET +
-			     (array[i] % TEST_MEM_PAGES) * guest_page_size);
-			*guest_addr = guest_iteration;
+			uint64_t addr = guest_test_mem;
+			addr += (READ_ONCE(random_array[i]) % guest_num_pages)
+				* guest_page_size;
+			addr &= ~(host_page_size - 1);
+			*(uint64_t *)addr = READ_ONCE(iteration);
 		}
+
 		/* Tell the host that we need more random numbers */
 		GUEST_SYNC(1);
 	}
 }
 
-/*
- * Host variables.  These variables should only be used by the host
- * rather than the guest.
- */
-bool host_quit;
+/* Host variables */
+static bool host_quit;
 
 /* Points to the test VM memory region on which we track dirty logs */
-void *host_test_mem;
+static void *host_test_mem;
+static uint64_t host_num_pages;
 
 /* For statistics only */
-uint64_t host_dirty_count;
-uint64_t host_clear_count;
-uint64_t host_track_next_count;
+static uint64_t host_dirty_count;
+static uint64_t host_clear_count;
+static uint64_t host_track_next_count;
 
 /*
  * We use this bitmap to track some pages that should have its dirty
@@ -93,40 +97,34 @@ uint64_t host_track_next_count;
  * page bit is cleared in the latest bitmap, then the system must
  * report that write in the next get dirty log call.
  */
-unsigned long *host_bmap_track;
+static unsigned long *host_bmap_track;
 
-void generate_random_array(uint64_t *guest_array, uint64_t size)
+static void generate_random_array(uint64_t *guest_array, uint64_t size)
 {
 	uint64_t i;
 
-	for (i = 0; i < size; i++) {
+	for (i = 0; i < size; i++)
 		guest_array[i] = random();
-	}
 }
 
-void *vcpu_worker(void *data)
+static void *vcpu_worker(void *data)
 {
 	int ret;
-	uint64_t loops, *guest_array, pages_count = 0;
 	struct kvm_vm *vm = data;
+	uint64_t *guest_array;
+	uint64_t pages_count = 0;
 	struct kvm_run *run;
-	struct guest_args args;
+	struct ucall uc;
 
 	run = vcpu_state(vm, VCPU_ID);
 
-	/* Retrieve the guest random array pointer and cache it */
-	guest_array = addr_gva2hva(vm, (vm_vaddr_t)guest_random_array);
-
-	DEBUG("VCPU starts\n");
-
+	guest_array = addr_gva2hva(vm, (vm_vaddr_t)random_array);
 	generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
 
 	while (!READ_ONCE(host_quit)) {
-		/* Let the guest to dirty these random pages */
+		/* Let the guest dirty the random pages */
 		ret = _vcpu_run(vm, VCPU_ID);
-		guest_args_read(vm, VCPU_ID, &args);
-		if (run->exit_reason == KVM_EXIT_IO &&
-		    args.port == GUEST_PORT_SYNC) {
+		if (get_ucall(vm, VCPU_ID, &uc) == UCALL_SYNC) {
 			pages_count += TEST_PAGES_PER_LOOP;
 			generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
 		} else {
@@ -137,18 +135,20 @@ void *vcpu_worker(void *data)
 		}
 	}
 
-	DEBUG("VCPU exits, dirtied %"PRIu64" pages\n", pages_count);
+	DEBUG("Dirtied %"PRIu64" pages\n", pages_count);
 
 	return NULL;
 }
 
-void vm_dirty_log_verify(unsigned long *bmap, uint64_t iteration)
+static void vm_dirty_log_verify(unsigned long *bmap)
 {
 	uint64_t page;
-	uint64_t volatile *value_ptr;
+	uint64_t *value_ptr;
+	uint64_t step = host_page_size >= guest_page_size ? 1 :
+				guest_page_size / host_page_size;
 
-	for (page = 0; page < TEST_MEM_PAGES; page++) {
-		value_ptr = host_test_mem + page * getpagesize();
+	for (page = 0; page < host_num_pages; page += step) {
+		value_ptr = host_test_mem + page * host_page_size;
 
 		/* If this is a special page that we were tracking... */
 		if (test_and_clear_bit(page, host_bmap_track)) {
@@ -208,88 +208,117 @@ void vm_dirty_log_verify(unsigned long *bmap, uint64_t iteration)
 	}
 }
 
-void help(char *name)
+static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid,
+				uint64_t extra_mem_pages, void *guest_code)
 {
-	puts("");
-	printf("usage: %s [-i iterations] [-I interval] [-h]\n", name);
-	puts("");
-	printf(" -i: specify iteration counts (default: %"PRIu64")\n",
-	       TEST_HOST_LOOP_N);
-	printf(" -I: specify interval in ms (default: %"PRIu64" ms)\n",
-	       TEST_HOST_LOOP_INTERVAL);
-	puts("");
-	exit(0);
+	struct kvm_vm *vm;
+	uint64_t extra_pg_pages = extra_mem_pages / 512 * 2;
+
+	vm = vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
+	kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+#ifdef __x86_64__
+	vm_create_irqchip(vm);
+#endif
+	vm_vcpu_add_default(vm, vcpuid, guest_code);
+	return vm;
 }
 
-int main(int argc, char *argv[])
+static void run_test(enum vm_guest_mode mode, unsigned long iterations,
+		     unsigned long interval, bool top_offset)
 {
+	unsigned int guest_pa_bits, guest_page_shift;
 	pthread_t vcpu_thread;
 	struct kvm_vm *vm;
-	uint64_t volatile *psize, *iteration;
-	unsigned long *bmap, iterations = TEST_HOST_LOOP_N,
-	    interval = TEST_HOST_LOOP_INTERVAL;
-	int opt;
-
-	while ((opt = getopt(argc, argv, "hi:I:")) != -1) {
-		switch (opt) {
-		case 'i':
-			iterations = strtol(optarg, NULL, 10);
-			break;
-		case 'I':
-			interval = strtol(optarg, NULL, 10);
-			break;
-		case 'h':
-		default:
-			help(argv[0]);
-			break;
-		}
+	uint64_t max_gfn;
+	unsigned long *bmap;
+
+	switch (mode) {
+	case VM_MODE_P52V48_4K:
+		guest_pa_bits = 52;
+		guest_page_shift = 12;
+		break;
+	case VM_MODE_P52V48_64K:
+		guest_pa_bits = 52;
+		guest_page_shift = 16;
+		break;
+	case VM_MODE_P40V48_4K:
+		guest_pa_bits = 40;
+		guest_page_shift = 12;
+		break;
+	case VM_MODE_P40V48_64K:
+		guest_pa_bits = 40;
+		guest_page_shift = 16;
+		break;
+	default:
+		TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode);
 	}
 
-	TEST_ASSERT(iterations > 2, "Iteration must be bigger than zero\n");
-	TEST_ASSERT(interval > 0, "Interval must be bigger than zero");
+	DEBUG("Testing guest mode: %s\n", vm_guest_mode_string(mode));
 
-	DEBUG("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n",
-	      iterations, interval);
+	max_gfn = (1ul << (guest_pa_bits - guest_page_shift)) - 1;
+	guest_page_size = (1ul << guest_page_shift);
+	/* 1G of guest page sized pages */
+	guest_num_pages = (1ul << (30 - guest_page_shift));
+	host_page_size = getpagesize();
+	host_num_pages = (guest_num_pages * guest_page_size) / host_page_size +
+			 !!((guest_num_pages * guest_page_size) % host_page_size);
 
-	srandom(time(0));
+	if (top_offset) {
+		guest_test_mem = (max_gfn - guest_num_pages) * guest_page_size;
+		guest_test_mem &= ~(host_page_size - 1);
+	}
 
-	bmap = bitmap_alloc(TEST_MEM_PAGES);
-	host_bmap_track = bitmap_alloc(TEST_MEM_PAGES);
+	DEBUG("guest test mem offset: 0x%lx\n", guest_test_mem);
 
-	vm = vm_create_default(VCPU_ID, TEST_MEM_PAGES, guest_code);
+	bmap = bitmap_alloc(host_num_pages);
+	host_bmap_track = bitmap_alloc(host_num_pages);
+
+	vm = create_vm(mode, VCPU_ID, guest_num_pages, guest_code);
 
 	/* Add an extra memory slot for testing dirty logging */
 	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
-				    TEST_MEM_OFFSET,
+				    guest_test_mem,
 				    TEST_MEM_SLOT_INDEX,
-				    TEST_MEM_PAGES,
+				    guest_num_pages,
 				    KVM_MEM_LOG_DIRTY_PAGES);
-	/* Cache the HVA pointer of the region */
-	host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)TEST_MEM_OFFSET);
 
 	/* Do 1:1 mapping for the dirty track memory slot */
-	virt_map(vm, TEST_MEM_OFFSET, TEST_MEM_OFFSET,
-		 TEST_MEM_PAGES * getpagesize(), 0);
+	virt_map(vm, guest_test_mem, guest_test_mem,
+		 guest_num_pages * guest_page_size, 0);
+
+	/* Cache the HVA pointer of the region */
+	host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_mem);
 
+#ifdef __x86_64__
 	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+#endif
+#ifdef __aarch64__
+	ucall_init(vm, UCALL_MMIO, NULL);
+#endif
 
-	/* Tell the guest about the page size on the system */
-	psize = addr_gva2hva(vm, (vm_vaddr_t)&guest_page_size);
-	*psize = getpagesize();
+	/* Export the shared variables to the guest */
+	sync_global_to_guest(vm, host_page_size);
+	sync_global_to_guest(vm, guest_page_size);
+	sync_global_to_guest(vm, guest_test_mem);
+	sync_global_to_guest(vm, guest_num_pages);
 
 	/* Start the iterations */
-	iteration = addr_gva2hva(vm, (vm_vaddr_t)&guest_iteration);
-	*iteration = 1;
+	iteration = 1;
+	sync_global_to_guest(vm, iteration);
+	host_quit = false;
+	host_dirty_count = 0;
+	host_clear_count = 0;
+	host_track_next_count = 0;
 
-	/* Start dirtying pages */
 	pthread_create(&vcpu_thread, NULL, vcpu_worker, vm);
 
-	while (*iteration < iterations) {
+	while (iteration < iterations) {
 		/* Give the vcpu thread some time to dirty some pages */
 		usleep(interval * 1000);
 		kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
-		vm_dirty_log_verify(bmap, *iteration);
-		(*iteration)++;
+		vm_dirty_log_verify(bmap);
+		iteration++;
+		sync_global_to_guest(vm, iteration);
 	}
 
 	/* Tell the vcpu thread to quit */
@@ -302,7 +331,118 @@ int main(int argc, char *argv[])
 
 	free(bmap);
 	free(host_bmap_track);
+	ucall_uninit(vm);
 	kvm_vm_free(vm);
+}
+
+static struct vm_guest_modes {
+	enum vm_guest_mode mode;
+	bool supported;
+	bool enabled;
+} vm_guest_modes[NUM_VM_MODES] = {
+#if defined(__x86_64__)
+	{ VM_MODE_P52V48_4K,	1, 1, },
+	{ VM_MODE_P52V48_64K,	0, 0, },
+	{ VM_MODE_P40V48_4K,	0, 0, },
+	{ VM_MODE_P40V48_64K,	0, 0, },
+#elif defined(__aarch64__)
+	{ VM_MODE_P52V48_4K,	0, 0, },
+	{ VM_MODE_P52V48_64K,	0, 0, },
+	{ VM_MODE_P40V48_4K,	1, 1, },
+	{ VM_MODE_P40V48_64K,	1, 1, },
+#endif
+};
+
+static void help(char *name)
+{
+	int i;
+
+	puts("");
+	printf("usage: %s [-h] [-i iterations] [-I interval] "
+	       "[-o offset] [-t] [-m mode]\n", name);
+	puts("");
+	printf(" -i: specify iteration counts (default: %"PRIu64")\n",
+	       TEST_HOST_LOOP_N);
+	printf(" -I: specify interval in ms (default: %"PRIu64" ms)\n",
+	       TEST_HOST_LOOP_INTERVAL);
+	printf(" -o: guest test memory offset (default: 0x%lx)\n",
+	       DEFAULT_GUEST_TEST_MEM);
+	printf(" -t: map guest test memory at the top of the allowed "
+	       "physical address range\n");
+	printf(" -m: specify the guest mode ID to test "
+	       "(default: test all supported modes)\n"
+	       "     This option may be used multiple times.\n"
+	       "     Guest mode IDs:\n");
+	for (i = 0; i < NUM_VM_MODES; ++i) {
+		printf("         %d:    %s%s\n",
+		       vm_guest_modes[i].mode,
+		       vm_guest_mode_string(vm_guest_modes[i].mode),
+		       vm_guest_modes[i].supported ? " (supported)" : "");
+	}
+	puts("");
+	exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+	unsigned long iterations = TEST_HOST_LOOP_N;
+	unsigned long interval = TEST_HOST_LOOP_INTERVAL;
+	bool mode_selected = false;
+	bool top_offset = false;
+	unsigned int mode;
+	int opt, i;
+
+	while ((opt = getopt(argc, argv, "hi:I:o:tm:")) != -1) {
+		switch (opt) {
+		case 'i':
+			iterations = strtol(optarg, NULL, 10);
+			break;
+		case 'I':
+			interval = strtol(optarg, NULL, 10);
+			break;
+		case 'o':
+			guest_test_mem = strtoull(optarg, NULL, 0);
+			break;
+		case 't':
+			top_offset = true;
+			break;
+		case 'm':
+			if (!mode_selected) {
+				for (i = 0; i < NUM_VM_MODES; ++i)
+					vm_guest_modes[i].enabled = 0;
+				mode_selected = true;
+			}
+			mode = strtoul(optarg, NULL, 10);
+			TEST_ASSERT(mode < NUM_VM_MODES,
+				    "Guest mode ID %d too big", mode);
+			vm_guest_modes[mode].enabled = 1;
+			break;
+		case 'h':
+		default:
+			help(argv[0]);
+			break;
+		}
+	}
+
+	TEST_ASSERT(iterations > 2, "Iterations must be greater than two");
+	TEST_ASSERT(interval > 0, "Interval must be greater than zero");
+	TEST_ASSERT(!top_offset || guest_test_mem == DEFAULT_GUEST_TEST_MEM,
+		    "Cannot use both -o [offset] and -t at the same time");
+
+	DEBUG("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n",
+	      iterations, interval);
+
+	srandom(time(0));
+
+	for (i = 0; i < NUM_VM_MODES; ++i) {
+		if (!vm_guest_modes[i].enabled)
+			continue;
+		TEST_ASSERT(vm_guest_modes[i].supported,
+			    "Guest mode ID %d (%s) not supported.",
+			    vm_guest_modes[i].mode,
+			    vm_guest_mode_string(vm_guest_modes[i].mode));
+		run_test(vm_guest_modes[i].mode, iterations, interval, top_offset);
+	}
 
 	return 0;
 }
diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h
new file mode 100644
index 000000000000..9ef2ab1a0c08
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/aarch64/processor.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * AArch64 processor specific defines
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+#ifndef SELFTEST_KVM_PROCESSOR_H
+#define SELFTEST_KVM_PROCESSOR_H
+
+#include "kvm_util.h"
+
+
+#define ARM64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
+			   KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
+
+#define CPACR_EL1	3, 0,  1, 0, 2
+#define TCR_EL1		3, 0,  2, 0, 2
+#define MAIR_EL1	3, 0, 10, 2, 0
+#define TTBR0_EL1	3, 0,  2, 0, 0
+#define SCTLR_EL1	3, 0,  1, 0, 0
+
+/*
+ * Default MAIR
+ *                  index   attribute
+ * DEVICE_nGnRnE      0     0000:0000
+ * DEVICE_nGnRE       1     0000:0100
+ * DEVICE_GRE         2     0000:1100
+ * NORMAL_NC          3     0100:0100
+ * NORMAL             4     1111:1111
+ * NORMAL_WT          5     1011:1011
+ */
+#define DEFAULT_MAIR_EL1 ((0x00ul << (0 * 8)) | \
+			  (0x04ul << (1 * 8)) | \
+			  (0x0cul << (2 * 8)) | \
+			  (0x44ul << (3 * 8)) | \
+			  (0xfful << (4 * 8)) | \
+			  (0xbbul << (5 * 8)))
+
+static inline void get_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id, uint64_t *addr)
+{
+	struct kvm_one_reg reg;
+	reg.id = id;
+	reg.addr = (uint64_t)addr;
+	vcpu_ioctl(vm, vcpuid, KVM_GET_ONE_REG, &reg);
+}
+
+static inline void set_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id, uint64_t val)
+{
+	struct kvm_one_reg reg;
+	reg.id = id;
+	reg.addr = (uint64_t)&val;
+	vcpu_ioctl(vm, vcpuid, KVM_SET_ONE_REG, &reg);
+}
+
+#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/evmcs.h b/tools/testing/selftests/kvm/include/evmcs.h
new file mode 100644
index 000000000000..4059014d93ea
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/evmcs.h
@@ -0,0 +1,1098 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * tools/testing/selftests/kvm/include/vmx.h
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ *
+ */
+
+#ifndef SELFTEST_KVM_EVMCS_H
+#define SELFTEST_KVM_EVMCS_H
+
+#include <stdint.h>
+#include "vmx.h"
+
+#define u16 uint16_t
+#define u32 uint32_t
+#define u64 uint64_t
+
+extern bool enable_evmcs;
+
+struct hv_vp_assist_page {
+	__u32 apic_assist;
+	__u32 reserved;
+	__u64 vtl_control[2];
+	__u64 nested_enlightenments_control[2];
+	__u32 enlighten_vmentry;
+	__u64 current_nested_vmcs;
+};
+
+struct hv_enlightened_vmcs {
+	u32 revision_id;
+	u32 abort;
+
+	u16 host_es_selector;
+	u16 host_cs_selector;
+	u16 host_ss_selector;
+	u16 host_ds_selector;
+	u16 host_fs_selector;
+	u16 host_gs_selector;
+	u16 host_tr_selector;
+
+	u64 host_ia32_pat;
+	u64 host_ia32_efer;
+
+	u64 host_cr0;
+	u64 host_cr3;
+	u64 host_cr4;
+
+	u64 host_ia32_sysenter_esp;
+	u64 host_ia32_sysenter_eip;
+	u64 host_rip;
+	u32 host_ia32_sysenter_cs;
+
+	u32 pin_based_vm_exec_control;
+	u32 vm_exit_controls;
+	u32 secondary_vm_exec_control;
+
+	u64 io_bitmap_a;
+	u64 io_bitmap_b;
+	u64 msr_bitmap;
+
+	u16 guest_es_selector;
+	u16 guest_cs_selector;
+	u16 guest_ss_selector;
+	u16 guest_ds_selector;
+	u16 guest_fs_selector;
+	u16 guest_gs_selector;
+	u16 guest_ldtr_selector;
+	u16 guest_tr_selector;
+
+	u32 guest_es_limit;
+	u32 guest_cs_limit;
+	u32 guest_ss_limit;
+	u32 guest_ds_limit;
+	u32 guest_fs_limit;
+	u32 guest_gs_limit;
+	u32 guest_ldtr_limit;
+	u32 guest_tr_limit;
+	u32 guest_gdtr_limit;
+	u32 guest_idtr_limit;
+
+	u32 guest_es_ar_bytes;
+	u32 guest_cs_ar_bytes;
+	u32 guest_ss_ar_bytes;
+	u32 guest_ds_ar_bytes;
+	u32 guest_fs_ar_bytes;
+	u32 guest_gs_ar_bytes;
+	u32 guest_ldtr_ar_bytes;
+	u32 guest_tr_ar_bytes;
+
+	u64 guest_es_base;
+	u64 guest_cs_base;
+	u64 guest_ss_base;
+	u64 guest_ds_base;
+	u64 guest_fs_base;
+	u64 guest_gs_base;
+	u64 guest_ldtr_base;
+	u64 guest_tr_base;
+	u64 guest_gdtr_base;
+	u64 guest_idtr_base;
+
+	u64 padding64_1[3];
+
+	u64 vm_exit_msr_store_addr;
+	u64 vm_exit_msr_load_addr;
+	u64 vm_entry_msr_load_addr;
+
+	u64 cr3_target_value0;
+	u64 cr3_target_value1;
+	u64 cr3_target_value2;
+	u64 cr3_target_value3;
+
+	u32 page_fault_error_code_mask;
+	u32 page_fault_error_code_match;
+
+	u32 cr3_target_count;
+	u32 vm_exit_msr_store_count;
+	u32 vm_exit_msr_load_count;
+	u32 vm_entry_msr_load_count;
+
+	u64 tsc_offset;
+	u64 virtual_apic_page_addr;
+	u64 vmcs_link_pointer;
+
+	u64 guest_ia32_debugctl;
+	u64 guest_ia32_pat;
+	u64 guest_ia32_efer;
+
+	u64 guest_pdptr0;
+	u64 guest_pdptr1;
+	u64 guest_pdptr2;
+	u64 guest_pdptr3;
+
+	u64 guest_pending_dbg_exceptions;
+	u64 guest_sysenter_esp;
+	u64 guest_sysenter_eip;
+
+	u32 guest_activity_state;
+	u32 guest_sysenter_cs;
+
+	u64 cr0_guest_host_mask;
+	u64 cr4_guest_host_mask;
+	u64 cr0_read_shadow;
+	u64 cr4_read_shadow;
+	u64 guest_cr0;
+	u64 guest_cr3;
+	u64 guest_cr4;
+	u64 guest_dr7;
+
+	u64 host_fs_base;
+	u64 host_gs_base;
+	u64 host_tr_base;
+	u64 host_gdtr_base;
+	u64 host_idtr_base;
+	u64 host_rsp;
+
+	u64 ept_pointer;
+
+	u16 virtual_processor_id;
+	u16 padding16[3];
+
+	u64 padding64_2[5];
+	u64 guest_physical_address;
+
+	u32 vm_instruction_error;
+	u32 vm_exit_reason;
+	u32 vm_exit_intr_info;
+	u32 vm_exit_intr_error_code;
+	u32 idt_vectoring_info_field;
+	u32 idt_vectoring_error_code;
+	u32 vm_exit_instruction_len;
+	u32 vmx_instruction_info;
+
+	u64 exit_qualification;
+	u64 exit_io_instruction_ecx;
+	u64 exit_io_instruction_esi;
+	u64 exit_io_instruction_edi;
+	u64 exit_io_instruction_eip;
+
+	u64 guest_linear_address;
+	u64 guest_rsp;
+	u64 guest_rflags;
+
+	u32 guest_interruptibility_info;
+	u32 cpu_based_vm_exec_control;
+	u32 exception_bitmap;
+	u32 vm_entry_controls;
+	u32 vm_entry_intr_info_field;
+	u32 vm_entry_exception_error_code;
+	u32 vm_entry_instruction_len;
+	u32 tpr_threshold;
+
+	u64 guest_rip;
+
+	u32 hv_clean_fields;
+	u32 hv_padding_32;
+	u32 hv_synthetic_controls;
+	struct {
+		u32 nested_flush_hypercall:1;
+		u32 msr_bitmap:1;
+		u32 reserved:30;
+	} hv_enlightenments_control;
+	u32 hv_vp_id;
+
+	u64 hv_vm_id;
+	u64 partition_assist_page;
+	u64 padding64_4[4];
+	u64 guest_bndcfgs;
+	u64 padding64_5[7];
+	u64 xss_exit_bitmap;
+	u64 padding64_6[7];
+};
+
+#define HV_X64_MSR_VP_ASSIST_PAGE		0x40000073
+#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE	0x00000001
+#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT	12
+#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK	\
+		(~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
+
+struct hv_enlightened_vmcs *current_evmcs;
+struct hv_vp_assist_page *current_vp_assist;
+
+static inline int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist)
+{
+	u64 val = (vp_assist_pa & HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK) |
+		HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
+
+	wrmsr(HV_X64_MSR_VP_ASSIST_PAGE, val);
+
+	current_vp_assist = vp_assist;
+
+	enable_evmcs = true;
+
+	return 0;
+}
+
+static inline int evmcs_vmptrld(uint64_t vmcs_pa, void *vmcs)
+{
+	current_vp_assist->current_nested_vmcs = vmcs_pa;
+	current_vp_assist->enlighten_vmentry = 1;
+
+	current_evmcs = vmcs;
+
+	return 0;
+}
+
+static inline int evmcs_vmptrst(uint64_t *value)
+{
+	*value = current_vp_assist->current_nested_vmcs &
+		~HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
+
+	return 0;
+}
+
+static inline int evmcs_vmread(uint64_t encoding, uint64_t *value)
+{
+	switch (encoding) {
+	case GUEST_RIP:
+		*value = current_evmcs->guest_rip;
+		break;
+	case GUEST_RSP:
+		*value = current_evmcs->guest_rsp;
+		break;
+	case GUEST_RFLAGS:
+		*value = current_evmcs->guest_rflags;
+		break;
+	case HOST_IA32_PAT:
+		*value = current_evmcs->host_ia32_pat;
+		break;
+	case HOST_IA32_EFER:
+		*value = current_evmcs->host_ia32_efer;
+		break;
+	case HOST_CR0:
+		*value = current_evmcs->host_cr0;
+		break;
+	case HOST_CR3:
+		*value = current_evmcs->host_cr3;
+		break;
+	case HOST_CR4:
+		*value = current_evmcs->host_cr4;
+		break;
+	case HOST_IA32_SYSENTER_ESP:
+		*value = current_evmcs->host_ia32_sysenter_esp;
+		break;
+	case HOST_IA32_SYSENTER_EIP:
+		*value = current_evmcs->host_ia32_sysenter_eip;
+		break;
+	case HOST_RIP:
+		*value = current_evmcs->host_rip;
+		break;
+	case IO_BITMAP_A:
+		*value = current_evmcs->io_bitmap_a;
+		break;
+	case IO_BITMAP_B:
+		*value = current_evmcs->io_bitmap_b;
+		break;
+	case MSR_BITMAP:
+		*value = current_evmcs->msr_bitmap;
+		break;
+	case GUEST_ES_BASE:
+		*value = current_evmcs->guest_es_base;
+		break;
+	case GUEST_CS_BASE:
+		*value = current_evmcs->guest_cs_base;
+		break;
+	case GUEST_SS_BASE:
+		*value = current_evmcs->guest_ss_base;
+		break;
+	case GUEST_DS_BASE:
+		*value = current_evmcs->guest_ds_base;
+		break;
+	case GUEST_FS_BASE:
+		*value = current_evmcs->guest_fs_base;
+		break;
+	case GUEST_GS_BASE:
+		*value = current_evmcs->guest_gs_base;
+		break;
+	case GUEST_LDTR_BASE:
+		*value = current_evmcs->guest_ldtr_base;
+		break;
+	case GUEST_TR_BASE:
+		*value = current_evmcs->guest_tr_base;
+		break;
+	case GUEST_GDTR_BASE:
+		*value = current_evmcs->guest_gdtr_base;
+		break;
+	case GUEST_IDTR_BASE:
+		*value = current_evmcs->guest_idtr_base;
+		break;
+	case TSC_OFFSET:
+		*value = current_evmcs->tsc_offset;
+		break;
+	case VIRTUAL_APIC_PAGE_ADDR:
+		*value = current_evmcs->virtual_apic_page_addr;
+		break;
+	case VMCS_LINK_POINTER:
+		*value = current_evmcs->vmcs_link_pointer;
+		break;
+	case GUEST_IA32_DEBUGCTL:
+		*value = current_evmcs->guest_ia32_debugctl;
+		break;
+	case GUEST_IA32_PAT:
+		*value = current_evmcs->guest_ia32_pat;
+		break;
+	case GUEST_IA32_EFER:
+		*value = current_evmcs->guest_ia32_efer;
+		break;
+	case GUEST_PDPTR0:
+		*value = current_evmcs->guest_pdptr0;
+		break;
+	case GUEST_PDPTR1:
+		*value = current_evmcs->guest_pdptr1;
+		break;
+	case GUEST_PDPTR2:
+		*value = current_evmcs->guest_pdptr2;
+		break;
+	case GUEST_PDPTR3:
+		*value = current_evmcs->guest_pdptr3;
+		break;
+	case GUEST_PENDING_DBG_EXCEPTIONS:
+		*value = current_evmcs->guest_pending_dbg_exceptions;
+		break;
+	case GUEST_SYSENTER_ESP:
+		*value = current_evmcs->guest_sysenter_esp;
+		break;
+	case GUEST_SYSENTER_EIP:
+		*value = current_evmcs->guest_sysenter_eip;
+		break;
+	case CR0_GUEST_HOST_MASK:
+		*value = current_evmcs->cr0_guest_host_mask;
+		break;
+	case CR4_GUEST_HOST_MASK:
+		*value = current_evmcs->cr4_guest_host_mask;
+		break;
+	case CR0_READ_SHADOW:
+		*value = current_evmcs->cr0_read_shadow;
+		break;
+	case CR4_READ_SHADOW:
+		*value = current_evmcs->cr4_read_shadow;
+		break;
+	case GUEST_CR0:
+		*value = current_evmcs->guest_cr0;
+		break;
+	case GUEST_CR3:
+		*value = current_evmcs->guest_cr3;
+		break;
+	case GUEST_CR4:
+		*value = current_evmcs->guest_cr4;
+		break;
+	case GUEST_DR7:
+		*value = current_evmcs->guest_dr7;
+		break;
+	case HOST_FS_BASE:
+		*value = current_evmcs->host_fs_base;
+		break;
+	case HOST_GS_BASE:
+		*value = current_evmcs->host_gs_base;
+		break;
+	case HOST_TR_BASE:
+		*value = current_evmcs->host_tr_base;
+		break;
+	case HOST_GDTR_BASE:
+		*value = current_evmcs->host_gdtr_base;
+		break;
+	case HOST_IDTR_BASE:
+		*value = current_evmcs->host_idtr_base;
+		break;
+	case HOST_RSP:
+		*value = current_evmcs->host_rsp;
+		break;
+	case EPT_POINTER:
+		*value = current_evmcs->ept_pointer;
+		break;
+	case GUEST_BNDCFGS:
+		*value = current_evmcs->guest_bndcfgs;
+		break;
+	case XSS_EXIT_BITMAP:
+		*value = current_evmcs->xss_exit_bitmap;
+		break;
+	case GUEST_PHYSICAL_ADDRESS:
+		*value = current_evmcs->guest_physical_address;
+		break;
+	case EXIT_QUALIFICATION:
+		*value = current_evmcs->exit_qualification;
+		break;
+	case GUEST_LINEAR_ADDRESS:
+		*value = current_evmcs->guest_linear_address;
+		break;
+	case VM_EXIT_MSR_STORE_ADDR:
+		*value = current_evmcs->vm_exit_msr_store_addr;
+		break;
+	case VM_EXIT_MSR_LOAD_ADDR:
+		*value = current_evmcs->vm_exit_msr_load_addr;
+		break;
+	case VM_ENTRY_MSR_LOAD_ADDR:
+		*value = current_evmcs->vm_entry_msr_load_addr;
+		break;
+	case CR3_TARGET_VALUE0:
+		*value = current_evmcs->cr3_target_value0;
+		break;
+	case CR3_TARGET_VALUE1:
+		*value = current_evmcs->cr3_target_value1;
+		break;
+	case CR3_TARGET_VALUE2:
+		*value = current_evmcs->cr3_target_value2;
+		break;
+	case CR3_TARGET_VALUE3:
+		*value = current_evmcs->cr3_target_value3;
+		break;
+	case TPR_THRESHOLD:
+		*value = current_evmcs->tpr_threshold;
+		break;
+	case GUEST_INTERRUPTIBILITY_INFO:
+		*value = current_evmcs->guest_interruptibility_info;
+		break;
+	case CPU_BASED_VM_EXEC_CONTROL:
+		*value = current_evmcs->cpu_based_vm_exec_control;
+		break;
+	case EXCEPTION_BITMAP:
+		*value = current_evmcs->exception_bitmap;
+		break;
+	case VM_ENTRY_CONTROLS:
+		*value = current_evmcs->vm_entry_controls;
+		break;
+	case VM_ENTRY_INTR_INFO_FIELD:
+		*value = current_evmcs->vm_entry_intr_info_field;
+		break;
+	case VM_ENTRY_EXCEPTION_ERROR_CODE:
+		*value = current_evmcs->vm_entry_exception_error_code;
+		break;
+	case VM_ENTRY_INSTRUCTION_LEN:
+		*value = current_evmcs->vm_entry_instruction_len;
+		break;
+	case HOST_IA32_SYSENTER_CS:
+		*value = current_evmcs->host_ia32_sysenter_cs;
+		break;
+	case PIN_BASED_VM_EXEC_CONTROL:
+		*value = current_evmcs->pin_based_vm_exec_control;
+		break;
+	case VM_EXIT_CONTROLS:
+		*value = current_evmcs->vm_exit_controls;
+		break;
+	case SECONDARY_VM_EXEC_CONTROL:
+		*value = current_evmcs->secondary_vm_exec_control;
+		break;
+	case GUEST_ES_LIMIT:
+		*value = current_evmcs->guest_es_limit;
+		break;
+	case GUEST_CS_LIMIT:
+		*value = current_evmcs->guest_cs_limit;
+		break;
+	case GUEST_SS_LIMIT:
+		*value = current_evmcs->guest_ss_limit;
+		break;
+	case GUEST_DS_LIMIT:
+		*value = current_evmcs->guest_ds_limit;
+		break;
+	case GUEST_FS_LIMIT:
+		*value = current_evmcs->guest_fs_limit;
+		break;
+	case GUEST_GS_LIMIT:
+		*value = current_evmcs->guest_gs_limit;
+		break;
+	case GUEST_LDTR_LIMIT:
+		*value = current_evmcs->guest_ldtr_limit;
+		break;
+	case GUEST_TR_LIMIT:
+		*value = current_evmcs->guest_tr_limit;
+		break;
+	case GUEST_GDTR_LIMIT:
+		*value = current_evmcs->guest_gdtr_limit;
+		break;
+	case GUEST_IDTR_LIMIT:
+		*value = current_evmcs->guest_idtr_limit;
+		break;
+	case GUEST_ES_AR_BYTES:
+		*value = current_evmcs->guest_es_ar_bytes;
+		break;
+	case GUEST_CS_AR_BYTES:
+		*value = current_evmcs->guest_cs_ar_bytes;
+		break;
+	case GUEST_SS_AR_BYTES:
+		*value = current_evmcs->guest_ss_ar_bytes;
+		break;
+	case GUEST_DS_AR_BYTES:
+		*value = current_evmcs->guest_ds_ar_bytes;
+		break;
+	case GUEST_FS_AR_BYTES:
+		*value = current_evmcs->guest_fs_ar_bytes;
+		break;
+	case GUEST_GS_AR_BYTES:
+		*value = current_evmcs->guest_gs_ar_bytes;
+		break;
+	case GUEST_LDTR_AR_BYTES:
+		*value = current_evmcs->guest_ldtr_ar_bytes;
+		break;
+	case GUEST_TR_AR_BYTES:
+		*value = current_evmcs->guest_tr_ar_bytes;
+		break;
+	case GUEST_ACTIVITY_STATE:
+		*value = current_evmcs->guest_activity_state;
+		break;
+	case GUEST_SYSENTER_CS:
+		*value = current_evmcs->guest_sysenter_cs;
+		break;
+	case VM_INSTRUCTION_ERROR:
+		*value = current_evmcs->vm_instruction_error;
+		break;
+	case VM_EXIT_REASON:
+		*value = current_evmcs->vm_exit_reason;
+		break;
+	case VM_EXIT_INTR_INFO:
+		*value = current_evmcs->vm_exit_intr_info;
+		break;
+	case VM_EXIT_INTR_ERROR_CODE:
+		*value = current_evmcs->vm_exit_intr_error_code;
+		break;
+	case IDT_VECTORING_INFO_FIELD:
+		*value = current_evmcs->idt_vectoring_info_field;
+		break;
+	case IDT_VECTORING_ERROR_CODE:
+		*value = current_evmcs->idt_vectoring_error_code;
+		break;
+	case VM_EXIT_INSTRUCTION_LEN:
+		*value = current_evmcs->vm_exit_instruction_len;
+		break;
+	case VMX_INSTRUCTION_INFO:
+		*value = current_evmcs->vmx_instruction_info;
+		break;
+	case PAGE_FAULT_ERROR_CODE_MASK:
+		*value = current_evmcs->page_fault_error_code_mask;
+		break;
+	case PAGE_FAULT_ERROR_CODE_MATCH:
+		*value = current_evmcs->page_fault_error_code_match;
+		break;
+	case CR3_TARGET_COUNT:
+		*value = current_evmcs->cr3_target_count;
+		break;
+	case VM_EXIT_MSR_STORE_COUNT:
+		*value = current_evmcs->vm_exit_msr_store_count;
+		break;
+	case VM_EXIT_MSR_LOAD_COUNT:
+		*value = current_evmcs->vm_exit_msr_load_count;
+		break;
+	case VM_ENTRY_MSR_LOAD_COUNT:
+		*value = current_evmcs->vm_entry_msr_load_count;
+		break;
+	case HOST_ES_SELECTOR:
+		*value = current_evmcs->host_es_selector;
+		break;
+	case HOST_CS_SELECTOR:
+		*value = current_evmcs->host_cs_selector;
+		break;
+	case HOST_SS_SELECTOR:
+		*value = current_evmcs->host_ss_selector;
+		break;
+	case HOST_DS_SELECTOR:
+		*value = current_evmcs->host_ds_selector;
+		break;
+	case HOST_FS_SELECTOR:
+		*value = current_evmcs->host_fs_selector;
+		break;
+	case HOST_GS_SELECTOR:
+		*value = current_evmcs->host_gs_selector;
+		break;
+	case HOST_TR_SELECTOR:
+		*value = current_evmcs->host_tr_selector;
+		break;
+	case GUEST_ES_SELECTOR:
+		*value = current_evmcs->guest_es_selector;
+		break;
+	case GUEST_CS_SELECTOR:
+		*value = current_evmcs->guest_cs_selector;
+		break;
+	case GUEST_SS_SELECTOR:
+		*value = current_evmcs->guest_ss_selector;
+		break;
+	case GUEST_DS_SELECTOR:
+		*value = current_evmcs->guest_ds_selector;
+		break;
+	case GUEST_FS_SELECTOR:
+		*value = current_evmcs->guest_fs_selector;
+		break;
+	case GUEST_GS_SELECTOR:
+		*value = current_evmcs->guest_gs_selector;
+		break;
+	case GUEST_LDTR_SELECTOR:
+		*value = current_evmcs->guest_ldtr_selector;
+		break;
+	case GUEST_TR_SELECTOR:
+		*value = current_evmcs->guest_tr_selector;
+		break;
+	case VIRTUAL_PROCESSOR_ID:
+		*value = current_evmcs->virtual_processor_id;
+		break;
+	default: return 1;
+	}
+
+	return 0;
+}
+
+static inline int evmcs_vmwrite(uint64_t encoding, uint64_t value)
+{
+	switch (encoding) {
+	case GUEST_RIP:
+		current_evmcs->guest_rip = value;
+		break;
+	case GUEST_RSP:
+		current_evmcs->guest_rsp = value;
+		break;
+	case GUEST_RFLAGS:
+		current_evmcs->guest_rflags = value;
+		break;
+	case HOST_IA32_PAT:
+		current_evmcs->host_ia32_pat = value;
+		break;
+	case HOST_IA32_EFER:
+		current_evmcs->host_ia32_efer = value;
+		break;
+	case HOST_CR0:
+		current_evmcs->host_cr0 = value;
+		break;
+	case HOST_CR3:
+		current_evmcs->host_cr3 = value;
+		break;
+	case HOST_CR4:
+		current_evmcs->host_cr4 = value;
+		break;
+	case HOST_IA32_SYSENTER_ESP:
+		current_evmcs->host_ia32_sysenter_esp = value;
+		break;
+	case HOST_IA32_SYSENTER_EIP:
+		current_evmcs->host_ia32_sysenter_eip = value;
+		break;
+	case HOST_RIP:
+		current_evmcs->host_rip = value;
+		break;
+	case IO_BITMAP_A:
+		current_evmcs->io_bitmap_a = value;
+		break;
+	case IO_BITMAP_B:
+		current_evmcs->io_bitmap_b = value;
+		break;
+	case MSR_BITMAP:
+		current_evmcs->msr_bitmap = value;
+		break;
+	case GUEST_ES_BASE:
+		current_evmcs->guest_es_base = value;
+		break;
+	case GUEST_CS_BASE:
+		current_evmcs->guest_cs_base = value;
+		break;
+	case GUEST_SS_BASE:
+		current_evmcs->guest_ss_base = value;
+		break;
+	case GUEST_DS_BASE:
+		current_evmcs->guest_ds_base = value;
+		break;
+	case GUEST_FS_BASE:
+		current_evmcs->guest_fs_base = value;
+		break;
+	case GUEST_GS_BASE:
+		current_evmcs->guest_gs_base = value;
+		break;
+	case GUEST_LDTR_BASE:
+		current_evmcs->guest_ldtr_base = value;
+		break;
+	case GUEST_TR_BASE:
+		current_evmcs->guest_tr_base = value;
+		break;
+	case GUEST_GDTR_BASE:
+		current_evmcs->guest_gdtr_base = value;
+		break;
+	case GUEST_IDTR_BASE:
+		current_evmcs->guest_idtr_base = value;
+		break;
+	case TSC_OFFSET:
+		current_evmcs->tsc_offset = value;
+		break;
+	case VIRTUAL_APIC_PAGE_ADDR:
+		current_evmcs->virtual_apic_page_addr = value;
+		break;
+	case VMCS_LINK_POINTER:
+		current_evmcs->vmcs_link_pointer = value;
+		break;
+	case GUEST_IA32_DEBUGCTL:
+		current_evmcs->guest_ia32_debugctl = value;
+		break;
+	case GUEST_IA32_PAT:
+		current_evmcs->guest_ia32_pat = value;
+		break;
+	case GUEST_IA32_EFER:
+		current_evmcs->guest_ia32_efer = value;
+		break;
+	case GUEST_PDPTR0:
+		current_evmcs->guest_pdptr0 = value;
+		break;
+	case GUEST_PDPTR1:
+		current_evmcs->guest_pdptr1 = value;
+		break;
+	case GUEST_PDPTR2:
+		current_evmcs->guest_pdptr2 = value;
+		break;
+	case GUEST_PDPTR3:
+		current_evmcs->guest_pdptr3 = value;
+		break;
+	case GUEST_PENDING_DBG_EXCEPTIONS:
+		current_evmcs->guest_pending_dbg_exceptions = value;
+		break;
+	case GUEST_SYSENTER_ESP:
+		current_evmcs->guest_sysenter_esp = value;
+		break;
+	case GUEST_SYSENTER_EIP:
+		current_evmcs->guest_sysenter_eip = value;
+		break;
+	case CR0_GUEST_HOST_MASK:
+		current_evmcs->cr0_guest_host_mask = value;
+		break;
+	case CR4_GUEST_HOST_MASK:
+		current_evmcs->cr4_guest_host_mask = value;
+		break;
+	case CR0_READ_SHADOW:
+		current_evmcs->cr0_read_shadow = value;
+		break;
+	case CR4_READ_SHADOW:
+		current_evmcs->cr4_read_shadow = value;
+		break;
+	case GUEST_CR0:
+		current_evmcs->guest_cr0 = value;
+		break;
+	case GUEST_CR3:
+		current_evmcs->guest_cr3 = value;
+		break;
+	case GUEST_CR4:
+		current_evmcs->guest_cr4 = value;
+		break;
+	case GUEST_DR7:
+		current_evmcs->guest_dr7 = value;
+		break;
+	case HOST_FS_BASE:
+		current_evmcs->host_fs_base = value;
+		break;
+	case HOST_GS_BASE:
+		current_evmcs->host_gs_base = value;
+		break;
+	case HOST_TR_BASE:
+		current_evmcs->host_tr_base = value;
+		break;
+	case HOST_GDTR_BASE:
+		current_evmcs->host_gdtr_base = value;
+		break;
+	case HOST_IDTR_BASE:
+		current_evmcs->host_idtr_base = value;
+		break;
+	case HOST_RSP:
+		current_evmcs->host_rsp = value;
+		break;
+	case EPT_POINTER:
+		current_evmcs->ept_pointer = value;
+		break;
+	case GUEST_BNDCFGS:
+		current_evmcs->guest_bndcfgs = value;
+		break;
+	case XSS_EXIT_BITMAP:
+		current_evmcs->xss_exit_bitmap = value;
+		break;
+	case GUEST_PHYSICAL_ADDRESS:
+		current_evmcs->guest_physical_address = value;
+		break;
+	case EXIT_QUALIFICATION:
+		current_evmcs->exit_qualification = value;
+		break;
+	case GUEST_LINEAR_ADDRESS:
+		current_evmcs->guest_linear_address = value;
+		break;
+	case VM_EXIT_MSR_STORE_ADDR:
+		current_evmcs->vm_exit_msr_store_addr = value;
+		break;
+	case VM_EXIT_MSR_LOAD_ADDR:
+		current_evmcs->vm_exit_msr_load_addr = value;
+		break;
+	case VM_ENTRY_MSR_LOAD_ADDR:
+		current_evmcs->vm_entry_msr_load_addr = value;
+		break;
+	case CR3_TARGET_VALUE0:
+		current_evmcs->cr3_target_value0 = value;
+		break;
+	case CR3_TARGET_VALUE1:
+		current_evmcs->cr3_target_value1 = value;
+		break;
+	case CR3_TARGET_VALUE2:
+		current_evmcs->cr3_target_value2 = value;
+		break;
+	case CR3_TARGET_VALUE3:
+		current_evmcs->cr3_target_value3 = value;
+		break;
+	case TPR_THRESHOLD:
+		current_evmcs->tpr_threshold = value;
+		break;
+	case GUEST_INTERRUPTIBILITY_INFO:
+		current_evmcs->guest_interruptibility_info = value;
+		break;
+	case CPU_BASED_VM_EXEC_CONTROL:
+		current_evmcs->cpu_based_vm_exec_control = value;
+		break;
+	case EXCEPTION_BITMAP:
+		current_evmcs->exception_bitmap = value;
+		break;
+	case VM_ENTRY_CONTROLS:
+		current_evmcs->vm_entry_controls = value;
+		break;
+	case VM_ENTRY_INTR_INFO_FIELD:
+		current_evmcs->vm_entry_intr_info_field = value;
+		break;
+	case VM_ENTRY_EXCEPTION_ERROR_CODE:
+		current_evmcs->vm_entry_exception_error_code = value;
+		break;
+	case VM_ENTRY_INSTRUCTION_LEN:
+		current_evmcs->vm_entry_instruction_len = value;
+		break;
+	case HOST_IA32_SYSENTER_CS:
+		current_evmcs->host_ia32_sysenter_cs = value;
+		break;
+	case PIN_BASED_VM_EXEC_CONTROL:
+		current_evmcs->pin_based_vm_exec_control = value;
+		break;
+	case VM_EXIT_CONTROLS:
+		current_evmcs->vm_exit_controls = value;
+		break;
+	case SECONDARY_VM_EXEC_CONTROL:
+		current_evmcs->secondary_vm_exec_control = value;
+		break;
+	case GUEST_ES_LIMIT:
+		current_evmcs->guest_es_limit = value;
+		break;
+	case GUEST_CS_LIMIT:
+		current_evmcs->guest_cs_limit = value;
+		break;
+	case GUEST_SS_LIMIT:
+		current_evmcs->guest_ss_limit = value;
+		break;
+	case GUEST_DS_LIMIT:
+		current_evmcs->guest_ds_limit = value;
+		break;
+	case GUEST_FS_LIMIT:
+		current_evmcs->guest_fs_limit = value;
+		break;
+	case GUEST_GS_LIMIT:
+		current_evmcs->guest_gs_limit = value;
+		break;
+	case GUEST_LDTR_LIMIT:
+		current_evmcs->guest_ldtr_limit = value;
+		break;
+	case GUEST_TR_LIMIT:
+		current_evmcs->guest_tr_limit = value;
+		break;
+	case GUEST_GDTR_LIMIT:
+		current_evmcs->guest_gdtr_limit = value;
+		break;
+	case GUEST_IDTR_LIMIT:
+		current_evmcs->guest_idtr_limit = value;
+		break;
+	case GUEST_ES_AR_BYTES:
+		current_evmcs->guest_es_ar_bytes = value;
+		break;
+	case GUEST_CS_AR_BYTES:
+		current_evmcs->guest_cs_ar_bytes = value;
+		break;
+	case GUEST_SS_AR_BYTES:
+		current_evmcs->guest_ss_ar_bytes = value;
+		break;
+	case GUEST_DS_AR_BYTES:
+		current_evmcs->guest_ds_ar_bytes = value;
+		break;
+	case GUEST_FS_AR_BYTES:
+		current_evmcs->guest_fs_ar_bytes = value;
+		break;
+	case GUEST_GS_AR_BYTES:
+		current_evmcs->guest_gs_ar_bytes = value;
+		break;
+	case GUEST_LDTR_AR_BYTES:
+		current_evmcs->guest_ldtr_ar_bytes = value;
+		break;
+	case GUEST_TR_AR_BYTES:
+		current_evmcs->guest_tr_ar_bytes = value;
+		break;
+	case GUEST_ACTIVITY_STATE:
+		current_evmcs->guest_activity_state = value;
+		break;
+	case GUEST_SYSENTER_CS:
+		current_evmcs->guest_sysenter_cs = value;
+		break;
+	case VM_INSTRUCTION_ERROR:
+		current_evmcs->vm_instruction_error = value;
+		break;
+	case VM_EXIT_REASON:
+		current_evmcs->vm_exit_reason = value;
+		break;
+	case VM_EXIT_INTR_INFO:
+		current_evmcs->vm_exit_intr_info = value;
+		break;
+	case VM_EXIT_INTR_ERROR_CODE:
+		current_evmcs->vm_exit_intr_error_code = value;
+		break;
+	case IDT_VECTORING_INFO_FIELD:
+		current_evmcs->idt_vectoring_info_field = value;
+		break;
+	case IDT_VECTORING_ERROR_CODE:
+		current_evmcs->idt_vectoring_error_code = value;
+		break;
+	case VM_EXIT_INSTRUCTION_LEN:
+		current_evmcs->vm_exit_instruction_len = value;
+		break;
+	case VMX_INSTRUCTION_INFO:
+		current_evmcs->vmx_instruction_info = value;
+		break;
+	case PAGE_FAULT_ERROR_CODE_MASK:
+		current_evmcs->page_fault_error_code_mask = value;
+		break;
+	case PAGE_FAULT_ERROR_CODE_MATCH:
+		current_evmcs->page_fault_error_code_match = value;
+		break;
+	case CR3_TARGET_COUNT:
+		current_evmcs->cr3_target_count = value;
+		break;
+	case VM_EXIT_MSR_STORE_COUNT:
+		current_evmcs->vm_exit_msr_store_count = value;
+		break;
+	case VM_EXIT_MSR_LOAD_COUNT:
+		current_evmcs->vm_exit_msr_load_count = value;
+		break;
+	case VM_ENTRY_MSR_LOAD_COUNT:
+		current_evmcs->vm_entry_msr_load_count = value;
+		break;
+	case HOST_ES_SELECTOR:
+		current_evmcs->host_es_selector = value;
+		break;
+	case HOST_CS_SELECTOR:
+		current_evmcs->host_cs_selector = value;
+		break;
+	case HOST_SS_SELECTOR:
+		current_evmcs->host_ss_selector = value;
+		break;
+	case HOST_DS_SELECTOR:
+		current_evmcs->host_ds_selector = value;
+		break;
+	case HOST_FS_SELECTOR:
+		current_evmcs->host_fs_selector = value;
+		break;
+	case HOST_GS_SELECTOR:
+		current_evmcs->host_gs_selector = value;
+		break;
+	case HOST_TR_SELECTOR:
+		current_evmcs->host_tr_selector = value;
+		break;
+	case GUEST_ES_SELECTOR:
+		current_evmcs->guest_es_selector = value;
+		break;
+	case GUEST_CS_SELECTOR:
+		current_evmcs->guest_cs_selector = value;
+		break;
+	case GUEST_SS_SELECTOR:
+		current_evmcs->guest_ss_selector = value;
+		break;
+	case GUEST_DS_SELECTOR:
+		current_evmcs->guest_ds_selector = value;
+		break;
+	case GUEST_FS_SELECTOR:
+		current_evmcs->guest_fs_selector = value;
+		break;
+	case GUEST_GS_SELECTOR:
+		current_evmcs->guest_gs_selector = value;
+		break;
+	case GUEST_LDTR_SELECTOR:
+		current_evmcs->guest_ldtr_selector = value;
+		break;
+	case GUEST_TR_SELECTOR:
+		current_evmcs->guest_tr_selector = value;
+		break;
+	case VIRTUAL_PROCESSOR_ID:
+		current_evmcs->virtual_processor_id = value;
+		break;
+	default: return 1;
+	}
+
+	return 0;
+}
+
+static inline int evmcs_vmlaunch(void)
+{
+	int ret;
+
+	current_evmcs->hv_clean_fields = 0;
+
+	__asm__ __volatile__("push %%rbp;"
+			     "push %%rcx;"
+			     "push %%rdx;"
+			     "push %%rsi;"
+			     "push %%rdi;"
+			     "push $0;"
+			     "mov %%rsp, (%[host_rsp]);"
+			     "lea 1f(%%rip), %%rax;"
+			     "mov %%rax, (%[host_rip]);"
+			     "vmlaunch;"
+			     "incq (%%rsp);"
+			     "1: pop %%rax;"
+			     "pop %%rdi;"
+			     "pop %%rsi;"
+			     "pop %%rdx;"
+			     "pop %%rcx;"
+			     "pop %%rbp;"
+			     : [ret]"=&a"(ret)
+			     : [host_rsp]"r"
+			       ((uint64_t)&current_evmcs->host_rsp),
+			       [host_rip]"r"
+			       ((uint64_t)&current_evmcs->host_rip)
+			     : "memory", "cc", "rbx", "r8", "r9", "r10",
+			       "r11", "r12", "r13", "r14", "r15");
+	return ret;
+}
+
+/*
+ * No guest state (e.g. GPRs) is established by this vmresume.
+ */
+static inline int evmcs_vmresume(void)
+{
+	int ret;
+
+	current_evmcs->hv_clean_fields = 0;
+
+	__asm__ __volatile__("push %%rbp;"
+			     "push %%rcx;"
+			     "push %%rdx;"
+			     "push %%rsi;"
+			     "push %%rdi;"
+			     "push $0;"
+			     "mov %%rsp, (%[host_rsp]);"
+			     "lea 1f(%%rip), %%rax;"
+			     "mov %%rax, (%[host_rip]);"
+			     "vmresume;"
+			     "incq (%%rsp);"
+			     "1: pop %%rax;"
+			     "pop %%rdi;"
+			     "pop %%rsi;"
+			     "pop %%rdx;"
+			     "pop %%rcx;"
+			     "pop %%rbp;"
+			     : [ret]"=&a"(ret)
+			     : [host_rsp]"r"
+			       ((uint64_t)&current_evmcs->host_rsp),
+			       [host_rip]"r"
+			       ((uint64_t)&current_evmcs->host_rip)
+			     : "memory", "cc", "rbx", "r8", "r9", "r10",
+			       "r11", "r12", "r13", "r14", "r15");
+	return ret;
+}
+
+#endif /* !SELFTEST_KVM_EVMCS_H */
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 3acf9a91704c..a4e59e3b4826 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -7,7 +7,7 @@
  *
  */
 #ifndef SELFTEST_KVM_UTIL_H
-#define SELFTEST_KVM_UTIL_H 1
+#define SELFTEST_KVM_UTIL_H
 
 #include "test_util.h"
 
@@ -17,12 +17,6 @@
 
 #include "sparsebit.h"
 
-/*
- * Memslots can't cover the gfn starting at this gpa otherwise vCPUs can't be
- * created. Only applies to VMs using EPT.
- */
-#define KVM_DEFAULT_IDENTITY_MAP_ADDRESS 0xfffbc000ul
-
 
 /* Callers of kvm_util only have an incomplete/opaque description of the
  * structure kvm_util is using to maintain the state of a VM.
@@ -33,16 +27,23 @@ typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */
 typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
 
 /* Minimum allocated guest virtual and physical addresses */
-#define KVM_UTIL_MIN_VADDR 0x2000
+#define KVM_UTIL_MIN_VADDR		0x2000
 
 #define DEFAULT_GUEST_PHY_PAGES		512
 #define DEFAULT_GUEST_STACK_VADDR_MIN	0xab6000
-#define DEFAULT_STACK_PGS               5
+#define DEFAULT_STACK_PGS		5
 
 enum vm_guest_mode {
-	VM_MODE_FLAT48PG,
+	VM_MODE_P52V48_4K,
+	VM_MODE_P52V48_64K,
+	VM_MODE_P40V48_4K,
+	VM_MODE_P40V48_64K,
+	NUM_VM_MODES,
 };
 
+#define vm_guest_mode_string(m) vm_guest_mode_string[m]
+extern const char * const vm_guest_mode_string[];
+
 enum vm_mem_backing_src_type {
 	VM_MEM_SRC_ANONYMOUS,
 	VM_MEM_SRC_ANONYMOUS_THP,
@@ -58,15 +59,15 @@ void kvm_vm_restart(struct kvm_vm *vmp, int perm);
 void kvm_vm_release(struct kvm_vm *vmp);
 void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log);
 
-int kvm_memcmp_hva_gva(void *hva,
-	struct kvm_vm *vm, const vm_vaddr_t gva, size_t len);
+int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva,
+		       size_t len);
 
 void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
-	uint32_t data_memslot, uint32_t pgd_memslot);
+		     uint32_t data_memslot, uint32_t pgd_memslot);
 
 void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
-void vcpu_dump(FILE *stream, struct kvm_vm *vm,
-	uint32_t vcpuid, uint8_t indent);
+void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid,
+	       uint8_t indent);
 
 void vm_create_irqchip(struct kvm_vm *vm);
 
@@ -75,13 +76,14 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
 	uint64_t guest_paddr, uint32_t slot, uint64_t npages,
 	uint32_t flags);
 
-void vcpu_ioctl(struct kvm_vm *vm,
-	uint32_t vcpuid, unsigned long ioctl, void *arg);
+void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
+		void *arg);
 void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
 void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
-void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot, int gdt_memslot);
+void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot,
+		 int gdt_memslot);
 vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
-	uint32_t data_memslot, uint32_t pgd_memslot);
+			  uint32_t data_memslot, uint32_t pgd_memslot);
 void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
 	      size_t size, uint32_t pgd_memslot);
 void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa);
@@ -93,56 +95,35 @@ struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid);
 void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
 int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
 void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
-	struct kvm_mp_state *mp_state);
-void vcpu_regs_get(struct kvm_vm *vm,
-	uint32_t vcpuid, struct kvm_regs *regs);
-void vcpu_regs_set(struct kvm_vm *vm,
-	uint32_t vcpuid, struct kvm_regs *regs);
+		       struct kvm_mp_state *mp_state);
+void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs);
+void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs);
 void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...);
-void vcpu_sregs_get(struct kvm_vm *vm,
-	uint32_t vcpuid, struct kvm_sregs *sregs);
-void vcpu_sregs_set(struct kvm_vm *vm,
-	uint32_t vcpuid, struct kvm_sregs *sregs);
-int _vcpu_sregs_set(struct kvm_vm *vm,
-	uint32_t vcpuid, struct kvm_sregs *sregs);
+void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid,
+		    struct kvm_sregs *sregs);
+void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid,
+		    struct kvm_sregs *sregs);
+int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid,
+		    struct kvm_sregs *sregs);
 void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
-			  struct kvm_vcpu_events *events);
+		     struct kvm_vcpu_events *events);
 void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
-			  struct kvm_vcpu_events *events);
-uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index);
-void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
-	uint64_t msr_value);
+		     struct kvm_vcpu_events *events);
 
 const char *exit_reason_str(unsigned int exit_reason);
 
 void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot);
 void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
-	uint32_t pgd_memslot);
-vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm,
-	vm_paddr_t paddr_min, uint32_t memslot);
-
-struct kvm_cpuid2 *kvm_get_supported_cpuid(void);
-void vcpu_set_cpuid(
-	struct kvm_vm *vm, uint32_t vcpuid, struct kvm_cpuid2 *cpuid);
-
-struct kvm_cpuid_entry2 *
-kvm_get_supported_cpuid_index(uint32_t function, uint32_t index);
-
-static inline struct kvm_cpuid_entry2 *
-kvm_get_supported_cpuid_entry(uint32_t function)
-{
-	return kvm_get_supported_cpuid_index(function, 0);
-}
+		 uint32_t pgd_memslot);
+vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
+			     uint32_t memslot);
+vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
+			      vm_paddr_t paddr_min, uint32_t memslot);
 
 struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_size,
 				 void *guest_code);
 void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code);
 
-typedef void (*vmx_guest_code_t)(vm_vaddr_t vmxon_vaddr,
-				 vm_paddr_t vmxon_paddr,
-				 vm_vaddr_t vmcs_vaddr,
-				 vm_paddr_t vmcs_paddr);
-
 struct kvm_userspace_memory_region *
 kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
 				 uint64_t end);
@@ -152,43 +133,49 @@ allocate_kvm_dirty_log(struct kvm_userspace_memory_region *region);
 
 int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd);
 
-#define GUEST_PORT_SYNC         0x1000
-#define GUEST_PORT_ABORT        0x1001
-#define GUEST_PORT_DONE         0x1002
-
-static inline void __exit_to_l0(uint16_t port, uint64_t arg0, uint64_t arg1)
-{
-	__asm__ __volatile__("in %[port], %%al"
-			     :
-			     : [port]"d"(port), "D"(arg0), "S"(arg1)
-			     : "rax");
-}
-
-/*
- * Allows to pass three arguments to the host: port is 16bit wide,
- * arg0 & arg1 are 64bit wide
- */
-#define GUEST_SYNC_ARGS(_port, _arg0, _arg1) \
-	__exit_to_l0(_port, (uint64_t) (_arg0), (uint64_t) (_arg1))
-
-#define GUEST_ASSERT(_condition) do {				\
-		if (!(_condition))				\
-			GUEST_SYNC_ARGS(GUEST_PORT_ABORT,	\
-					"Failed guest assert: "	\
-					#_condition, __LINE__);	\
-	} while (0)
-
-#define GUEST_SYNC(stage)  GUEST_SYNC_ARGS(GUEST_PORT_SYNC, "hello", stage)
+#define sync_global_to_guest(vm, g) ({				\
+	typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g));	\
+	memcpy(_p, &(g), sizeof(g));				\
+})
+
+#define sync_global_from_guest(vm, g) ({			\
+	typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g));	\
+	memcpy(&(g), _p, sizeof(g));				\
+})
+
+/* ucall implementation types */
+typedef enum {
+	UCALL_PIO,
+	UCALL_MMIO,
+} ucall_type_t;
+
+/* Common ucalls */
+enum {
+	UCALL_NONE,
+	UCALL_SYNC,
+	UCALL_ABORT,
+	UCALL_DONE,
+};
 
-#define GUEST_DONE()  GUEST_SYNC_ARGS(GUEST_PORT_DONE, 0, 0)
+#define UCALL_MAX_ARGS 6
 
-struct guest_args {
-	uint64_t arg0;
-	uint64_t arg1;
-	uint16_t port;
-} __attribute__ ((packed));
+struct ucall {
+	uint64_t cmd;
+	uint64_t args[UCALL_MAX_ARGS];
+};
 
-void guest_args_read(struct kvm_vm *vm, uint32_t vcpu_id,
-		     struct guest_args *args);
+void ucall_init(struct kvm_vm *vm, ucall_type_t type, void *arg);
+void ucall_uninit(struct kvm_vm *vm);
+void ucall(uint64_t cmd, int nargs, ...);
+uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc);
+
+#define GUEST_SYNC(stage)	ucall(UCALL_SYNC, 2, "hello", stage)
+#define GUEST_DONE()		ucall(UCALL_DONE, 0)
+#define GUEST_ASSERT(_condition) do {			\
+	if (!(_condition))				\
+		ucall(UCALL_ABORT, 2,			\
+			"Failed guest assert: "		\
+			#_condition, __LINE__);		\
+} while (0)
 
 #endif /* SELFTEST_KVM_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/sparsebit.h b/tools/testing/selftests/kvm/include/sparsebit.h
index 54cfeb6568d3..31e030915c1f 100644
--- a/tools/testing/selftests/kvm/include/sparsebit.h
+++ b/tools/testing/selftests/kvm/include/sparsebit.h
@@ -15,8 +15,8 @@
  * even in the case where most bits are set.
  */
 
-#ifndef _TEST_SPARSEBIT_H_
-#define _TEST_SPARSEBIT_H_
+#ifndef SELFTEST_KVM_SPARSEBIT_H
+#define SELFTEST_KVM_SPARSEBIT_H
 
 #include <stdbool.h>
 #include <stdint.h>
@@ -72,4 +72,4 @@ void sparsebit_validate_internal(struct sparsebit *sbit);
 }
 #endif
 
-#endif /* _TEST_SPARSEBIT_H_ */
+#endif /* SELFTEST_KVM_SPARSEBIT_H */
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index 73c3933436ec..c7dafe8bd02c 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -7,8 +7,8 @@
  *
  */
 
-#ifndef TEST_UTIL_H
-#define TEST_UTIL_H 1
+#ifndef SELFTEST_KVM_TEST_UTIL_H
+#define SELFTEST_KVM_TEST_UTIL_H
 
 #include <stdlib.h>
 #include <stdarg.h>
@@ -41,4 +41,4 @@ void test_assert(bool exp, const char *exp_str,
 		    #a, #b, #a, (unsigned long) __a, #b, (unsigned long) __b); \
 } while (0)
 
-#endif /* TEST_UTIL_H */
+#endif /* SELFTEST_KVM_TEST_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/x86.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 42c3596815b8..e2884c2b81ff 100644
--- a/tools/testing/selftests/kvm/include/x86.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -1,5 +1,5 @@
 /*
- * tools/testing/selftests/kvm/include/x86.h
+ * tools/testing/selftests/kvm/include/x86_64/processor.h
  *
  * Copyright (C) 2018, Google LLC.
  *
@@ -7,8 +7,8 @@
  *
  */
 
-#ifndef SELFTEST_KVM_X86_H
-#define SELFTEST_KVM_X86_H
+#ifndef SELFTEST_KVM_PROCESSOR_H
+#define SELFTEST_KVM_PROCESSOR_H
 
 #include <assert.h>
 #include <stdint.h>
@@ -305,7 +305,25 @@ static inline unsigned long get_xmm(int n)
 
 struct kvm_x86_state;
 struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid);
-void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *state);
+void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid,
+		     struct kvm_x86_state *state);
+
+struct kvm_cpuid2 *kvm_get_supported_cpuid(void);
+void vcpu_set_cpuid(struct kvm_vm *vm, uint32_t vcpuid,
+		    struct kvm_cpuid2 *cpuid);
+
+struct kvm_cpuid_entry2 *
+kvm_get_supported_cpuid_index(uint32_t function, uint32_t index);
+
+static inline struct kvm_cpuid_entry2 *
+kvm_get_supported_cpuid_entry(uint32_t function)
+{
+	return kvm_get_supported_cpuid_index(function, 0);
+}
+
+uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index);
+void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
+	  	  uint64_t msr_value);
 
 /*
  * Basic CPU control in CR0
@@ -1044,4 +1062,4 @@ void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *s
 #define MSR_VM_IGNNE                    0xc0010115
 #define MSR_VM_HSAVE_PA                 0xc0010117
 
-#endif /* !SELFTEST_KVM_X86_H */
+#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h
index b9ffe1024d3a..c9bd935b939c 100644
--- a/tools/testing/selftests/kvm/include/vmx.h
+++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h
@@ -1,5 +1,5 @@
 /*
- * tools/testing/selftests/kvm/include/vmx.h
+ * tools/testing/selftests/kvm/include/x86_64/vmx.h
  *
  * Copyright (C) 2018, Google LLC.
  *
@@ -11,7 +11,7 @@
 #define SELFTEST_KVM_VMX_H
 
 #include <stdint.h>
-#include "x86.h"
+#include "processor.h"
 
 #define CPUID_VMX_BIT				5
 
@@ -339,6 +339,8 @@ struct vmx_msr_entry {
 	uint64_t value;
 } __attribute__ ((aligned(16)));
 
+#include "evmcs.h"
+
 static inline int vmxon(uint64_t phys)
 {
 	uint8_t ret;
@@ -372,6 +374,9 @@ static inline int vmptrld(uint64_t vmcs_pa)
 {
 	uint8_t ret;
 
+	if (enable_evmcs)
+		return -1;
+
 	__asm__ __volatile__ ("vmptrld %[pa]; setna %[ret]"
 		: [ret]"=rm"(ret)
 		: [pa]"m"(vmcs_pa)
@@ -385,6 +390,9 @@ static inline int vmptrst(uint64_t *value)
 	uint64_t tmp;
 	uint8_t ret;
 
+	if (enable_evmcs)
+		return evmcs_vmptrst(value);
+
 	__asm__ __volatile__("vmptrst %[value]; setna %[ret]"
 		: [value]"=m"(tmp), [ret]"=rm"(ret)
 		: : "cc", "memory");
@@ -411,6 +419,9 @@ static inline int vmlaunch(void)
 {
 	int ret;
 
+	if (enable_evmcs)
+		return evmcs_vmlaunch();
+
 	__asm__ __volatile__("push %%rbp;"
 			     "push %%rcx;"
 			     "push %%rdx;"
@@ -443,6 +454,9 @@ static inline int vmresume(void)
 {
 	int ret;
 
+	if (enable_evmcs)
+		return evmcs_vmresume();
+
 	__asm__ __volatile__("push %%rbp;"
 			     "push %%rcx;"
 			     "push %%rdx;"
@@ -482,6 +496,9 @@ static inline int vmread(uint64_t encoding, uint64_t *value)
 	uint64_t tmp;
 	uint8_t ret;
 
+	if (enable_evmcs)
+		return evmcs_vmread(encoding, value);
+
 	__asm__ __volatile__("vmread %[encoding], %[value]; setna %[ret]"
 		: [value]"=rm"(tmp), [ret]"=rm"(ret)
 		: [encoding]"r"(encoding)
@@ -506,6 +523,9 @@ static inline int vmwrite(uint64_t encoding, uint64_t value)
 {
 	uint8_t ret;
 
+	if (enable_evmcs)
+		return evmcs_vmwrite(encoding, value);
+
 	__asm__ __volatile__ ("vmwrite %[value], %[encoding]; setna %[ret]"
 		: [ret]"=rm"(ret)
 		: [value]"rm"(value), [encoding]"r"(encoding)
@@ -543,10 +563,19 @@ struct vmx_pages {
 	void *vmwrite_hva;
 	uint64_t vmwrite_gpa;
 	void *vmwrite;
+
+	void *vp_assist_hva;
+	uint64_t vp_assist_gpa;
+	void *vp_assist;
+
+	void *enlightened_vmcs_hva;
+	uint64_t enlightened_vmcs_gpa;
+	void *enlightened_vmcs;
 };
 
 struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva);
 bool prepare_for_vmx_operation(struct vmx_pages *vmx);
 void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp);
+bool load_vmcs(struct vmx_pages *vmx);
 
-#endif /* !SELFTEST_KVM_VMX_H */
+#endif /* SELFTEST_KVM_VMX_H */
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
new file mode 100644
index 000000000000..b6022e2f116e
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -0,0 +1,311 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * AArch64 code
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_name */
+
+#include "kvm_util.h"
+#include "../kvm_util_internal.h"
+#include "processor.h"
+
+#define KVM_GUEST_PAGE_TABLE_MIN_PADDR		0x180000
+#define DEFAULT_ARM64_GUEST_STACK_VADDR_MIN	0xac0000
+
+static uint64_t page_align(struct kvm_vm *vm, uint64_t v)
+{
+	return (v + vm->page_size) & ~(vm->page_size - 1);
+}
+
+static uint64_t pgd_index(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+	unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift;
+	uint64_t mask = (1UL << (vm->va_bits - shift)) - 1;
+
+	return (gva >> shift) & mask;
+}
+
+static uint64_t pud_index(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+	unsigned int shift = 2 * (vm->page_shift - 3) + vm->page_shift;
+	uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
+
+	TEST_ASSERT(vm->pgtable_levels == 4,
+		"Mode %d does not have 4 page table levels", vm->mode);
+
+	return (gva >> shift) & mask;
+}
+
+static uint64_t pmd_index(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+	unsigned int shift = (vm->page_shift - 3) + vm->page_shift;
+	uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
+
+	TEST_ASSERT(vm->pgtable_levels >= 3,
+		"Mode %d does not have >= 3 page table levels", vm->mode);
+
+	return (gva >> shift) & mask;
+}
+
+static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+	uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
+	return (gva >> vm->page_shift) & mask;
+}
+
+static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry)
+{
+	uint64_t mask = ((1UL << (vm->va_bits - vm->page_shift)) - 1) << vm->page_shift;
+	return entry & mask;
+}
+
+static uint64_t ptrs_per_pgd(struct kvm_vm *vm)
+{
+	unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift;
+	return 1 << (vm->va_bits - shift);
+}
+
+static uint64_t ptrs_per_pte(struct kvm_vm *vm)
+{
+	return 1 << (vm->page_shift - 3);
+}
+
+void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
+{
+	int rc;
+
+	if (!vm->pgd_created) {
+		vm_paddr_t paddr = vm_phy_pages_alloc(vm,
+			page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size,
+			KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
+		vm->pgd = paddr;
+		vm->pgd_created = true;
+	}
+}
+
+void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+		  uint32_t pgd_memslot, uint64_t flags)
+{
+	uint8_t attr_idx = flags & 7;
+	uint64_t *ptep;
+
+	TEST_ASSERT((vaddr % vm->page_size) == 0,
+		"Virtual address not on page boundary,\n"
+		"  vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size);
+	TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
+		(vaddr >> vm->page_shift)),
+		"Invalid virtual address, vaddr: 0x%lx", vaddr);
+	TEST_ASSERT((paddr % vm->page_size) == 0,
+		"Physical address not on page boundary,\n"
+		"  paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size);
+	TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
+		"Physical address beyond beyond maximum supported,\n"
+		"  paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+		paddr, vm->max_gfn, vm->page_size);
+
+	ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8;
+	if (!*ptep) {
+		*ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
+		*ptep |= 3;
+	}
+
+	switch (vm->pgtable_levels) {
+	case 4:
+		ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8;
+		if (!*ptep) {
+			*ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
+			*ptep |= 3;
+		}
+		/* fall through */
+	case 3:
+		ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8;
+		if (!*ptep) {
+			*ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
+			*ptep |= 3;
+		}
+		/* fall through */
+	case 2:
+		ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8;
+		break;
+	default:
+		TEST_ASSERT(false, "Page table levels must be 2, 3, or 4");
+	}
+
+	*ptep = paddr | 3;
+	*ptep |= (attr_idx << 2) | (1 << 10) /* Access Flag */;
+}
+
+void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+		 uint32_t pgd_memslot)
+{
+	uint64_t attr_idx = 4; /* NORMAL (See DEFAULT_MAIR_EL1) */
+
+	_virt_pg_map(vm, vaddr, paddr, pgd_memslot, attr_idx);
+}
+
+vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+	uint64_t *ptep;
+
+	if (!vm->pgd_created)
+		goto unmapped_gva;
+
+	ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, gva) * 8;
+	if (!ptep)
+		goto unmapped_gva;
+
+	switch (vm->pgtable_levels) {
+	case 4:
+		ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, gva) * 8;
+		if (!ptep)
+			goto unmapped_gva;
+		/* fall through */
+	case 3:
+		ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, gva) * 8;
+		if (!ptep)
+			goto unmapped_gva;
+		/* fall through */
+	case 2:
+		ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, gva) * 8;
+		if (!ptep)
+			goto unmapped_gva;
+		break;
+	default:
+		TEST_ASSERT(false, "Page table levels must be 2, 3, or 4");
+	}
+
+	return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1));
+
+unmapped_gva:
+	TEST_ASSERT(false, "No mapping for vm virtual address, "
+		    "gva: 0x%lx", gva);
+}
+
+static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level)
+{
+#ifdef DEBUG_VM
+	static const char * const type[] = { "", "pud", "pmd", "pte" };
+	uint64_t pte, *ptep;
+
+	if (level == 4)
+		return;
+
+	for (pte = page; pte < page + ptrs_per_pte(vm) * 8; pte += 8) {
+		ptep = addr_gpa2hva(vm, pte);
+		if (!*ptep)
+			continue;
+		printf("%*s%s: %lx: %lx at %p\n", indent, "", type[level], pte, *ptep, ptep);
+		pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level + 1);
+	}
+#endif
+}
+
+void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+	int level = 4 - (vm->pgtable_levels - 1);
+	uint64_t pgd, *ptep;
+
+	if (!vm->pgd_created)
+		return;
+
+	for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pgd(vm) * 8; pgd += 8) {
+		ptep = addr_gpa2hva(vm, pgd);
+		if (!*ptep)
+			continue;
+		printf("%*spgd: %lx: %lx at %p\n", indent, "", pgd, *ptep, ptep);
+		pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level);
+	}
+}
+
+struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
+				 void *guest_code)
+{
+	uint64_t ptrs_per_4k_pte = 512;
+	uint64_t extra_pg_pages = (extra_mem_pages / ptrs_per_4k_pte) * 2;
+	struct kvm_vm *vm;
+
+	vm = vm_create(VM_MODE_P52V48_4K, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
+
+	kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+	vm_vcpu_add_default(vm, vcpuid, guest_code);
+
+	return vm;
+}
+
+void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
+{
+	size_t stack_size = vm->page_size == 4096 ?
+					DEFAULT_STACK_PGS * vm->page_size :
+					vm->page_size;
+	uint64_t stack_vaddr = vm_vaddr_alloc(vm, stack_size,
+					DEFAULT_ARM64_GUEST_STACK_VADDR_MIN, 0, 0);
+
+	vm_vcpu_add(vm, vcpuid, 0, 0);
+
+	set_reg(vm, vcpuid, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size);
+	set_reg(vm, vcpuid, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code);
+}
+
+void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot)
+{
+	struct kvm_vcpu_init init;
+	uint64_t sctlr_el1, tcr_el1;
+
+	memset(&init, 0, sizeof(init));
+	init.target = KVM_ARM_TARGET_GENERIC_V8;
+	vcpu_ioctl(vm, vcpuid, KVM_ARM_VCPU_INIT, &init);
+
+	/*
+	 * Enable FP/ASIMD to avoid trapping when accessing Q0-Q15
+	 * registers, which the variable argument list macros do.
+	 */
+	set_reg(vm, vcpuid, ARM64_SYS_REG(CPACR_EL1), 3 << 20);
+
+	get_reg(vm, vcpuid, ARM64_SYS_REG(SCTLR_EL1), &sctlr_el1);
+	get_reg(vm, vcpuid, ARM64_SYS_REG(TCR_EL1), &tcr_el1);
+
+	switch (vm->mode) {
+	case VM_MODE_P52V48_4K:
+		tcr_el1 |= 0ul << 14; /* TG0 = 4KB */
+		tcr_el1 |= 6ul << 32; /* IPS = 52 bits */
+		break;
+	case VM_MODE_P52V48_64K:
+		tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
+		tcr_el1 |= 6ul << 32; /* IPS = 52 bits */
+		break;
+	case VM_MODE_P40V48_4K:
+		tcr_el1 |= 0ul << 14; /* TG0 = 4KB */
+		tcr_el1 |= 2ul << 32; /* IPS = 40 bits */
+		break;
+	case VM_MODE_P40V48_64K:
+		tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
+		tcr_el1 |= 2ul << 32; /* IPS = 40 bits */
+		break;
+	default:
+		TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", vm->mode);
+	}
+
+	sctlr_el1 |= (1 << 0) | (1 << 2) | (1 << 12) /* M | C | I */;
+	/* TCR_EL1 |= IRGN0:WBWA | ORGN0:WBWA | SH0:Inner-Shareable */;
+	tcr_el1 |= (1 << 8) | (1 << 10) | (3 << 12);
+	tcr_el1 |= (64 - vm->va_bits) /* T0SZ */;
+
+	set_reg(vm, vcpuid, ARM64_SYS_REG(SCTLR_EL1), sctlr_el1);
+	set_reg(vm, vcpuid, ARM64_SYS_REG(TCR_EL1), tcr_el1);
+	set_reg(vm, vcpuid, ARM64_SYS_REG(MAIR_EL1), DEFAULT_MAIR_EL1);
+	set_reg(vm, vcpuid, ARM64_SYS_REG(TTBR0_EL1), vm->pgd);
+}
+
+void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
+{
+	uint64_t pstate, pc;
+
+	get_reg(vm, vcpuid, ARM64_CORE_REG(regs.pstate), &pstate);
+	get_reg(vm, vcpuid, ARM64_CORE_REG(regs.pc), &pc);
+
+        fprintf(stream, "%*spstate: 0x%.16llx pc: 0x%.16llx\n",
+                indent, "", pstate, pc);
+
+}
diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c
index cd01144d27c8..6398efe67885 100644
--- a/tools/testing/selftests/kvm/lib/assert.c
+++ b/tools/testing/selftests/kvm/lib/assert.c
@@ -13,7 +13,7 @@
 #include <execinfo.h>
 #include <sys/syscall.h>
 
-#include "../../kselftest.h"
+#include "kselftest.h"
 
 /* Dumps the current stack trace to stderr. */
 static void __attribute__((noinline)) test_dump_stack(void);
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 6fd8c089cafc..1b41e71283d5 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -16,10 +16,8 @@
 #include <sys/stat.h>
 #include <linux/kernel.h>
 
-#define KVM_DEV_PATH "/dev/kvm"
-
 #define KVM_UTIL_PGS_PER_HUGEPG 512
-#define KVM_UTIL_MIN_PADDR      0x2000
+#define KVM_UTIL_MIN_PFN	2
 
 /* Aligns x up to the next multiple of size. Size must be a power of 2. */
 static void *align(void *x, size_t size)
@@ -30,7 +28,8 @@ static void *align(void *x, size_t size)
 	return (void *) (((size_t) x + mask) & ~mask);
 }
 
-/* Capability
+/*
+ * Capability
  *
  * Input Args:
  *   cap - Capability
@@ -92,16 +91,23 @@ static void vm_open(struct kvm_vm *vm, int perm)
 	if (vm->kvm_fd < 0)
 		exit(KSFT_SKIP);
 
-	/* Create VM. */
 	vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, NULL);
 	TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, "
 		"rc: %i errno: %i", vm->fd, errno);
 }
 
-/* VM Create
+const char * const vm_guest_mode_string[] = {
+	"PA-bits:52, VA-bits:48, 4K pages",
+	"PA-bits:52, VA-bits:48, 64K pages",
+	"PA-bits:40, VA-bits:48, 4K pages",
+	"PA-bits:40, VA-bits:48, 64K pages",
+};
+
+/*
+ * VM Create
  *
  * Input Args:
- *   mode - VM Mode (e.g. VM_MODE_FLAT48PG)
+ *   mode - VM Mode (e.g. VM_MODE_P52V48_4K)
  *   phy_pages - Physical memory pages
  *   perm - permission
  *
@@ -110,7 +116,7 @@ static void vm_open(struct kvm_vm *vm, int perm)
  * Return:
  *   Pointer to opaque structure that describes the created VM.
  *
- * Creates a VM with the mode specified by mode (e.g. VM_MODE_FLAT48PG).
+ * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K).
  * When phy_pages is non-zero, a memory region of phy_pages physical pages
  * is created and mapped starting at guest physical address 0.  The file
  * descriptor to control the created VM is created with the permissions
@@ -121,35 +127,56 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
 	struct kvm_vm *vm;
 	int kvm_fd;
 
-	/* Allocate memory. */
 	vm = calloc(1, sizeof(*vm));
-	TEST_ASSERT(vm != NULL, "Insufficent Memory");
+	TEST_ASSERT(vm != NULL, "Insufficient Memory");
 
 	vm->mode = mode;
 	vm_open(vm, perm);
 
 	/* Setup mode specific traits. */
 	switch (vm->mode) {
-	case VM_MODE_FLAT48PG:
+	case VM_MODE_P52V48_4K:
+		vm->pgtable_levels = 4;
 		vm->page_size = 0x1000;
 		vm->page_shift = 12;
-
-		/* Limit to 48-bit canonical virtual addresses. */
-		vm->vpages_valid = sparsebit_alloc();
-		sparsebit_set_num(vm->vpages_valid,
-			0, (1ULL << (48 - 1)) >> vm->page_shift);
-		sparsebit_set_num(vm->vpages_valid,
-			(~((1ULL << (48 - 1)) - 1)) >> vm->page_shift,
-			(1ULL << (48 - 1)) >> vm->page_shift);
-
-		/* Limit physical addresses to 52-bits. */
-		vm->max_gfn = ((1ULL << 52) >> vm->page_shift) - 1;
+		vm->va_bits = 48;
+		break;
+	case VM_MODE_P52V48_64K:
+		vm->pgtable_levels = 3;
+		vm->pa_bits = 52;
+		vm->page_size = 0x10000;
+		vm->page_shift = 16;
+		vm->va_bits = 48;
+		break;
+	case VM_MODE_P40V48_4K:
+		vm->pgtable_levels = 4;
+		vm->pa_bits = 40;
+		vm->va_bits = 48;
+		vm->page_size = 0x1000;
+		vm->page_shift = 12;
+		break;
+	case VM_MODE_P40V48_64K:
+		vm->pgtable_levels = 3;
+		vm->pa_bits = 40;
+		vm->va_bits = 48;
+		vm->page_size = 0x10000;
+		vm->page_shift = 16;
 		break;
-
 	default:
 		TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode);
 	}
 
+	/* Limit to VA-bit canonical virtual addresses. */
+	vm->vpages_valid = sparsebit_alloc();
+	sparsebit_set_num(vm->vpages_valid,
+		0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift);
+	sparsebit_set_num(vm->vpages_valid,
+		(~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift,
+		(1ULL << (vm->va_bits - 1)) >> vm->page_shift);
+
+	/* Limit physical addresses to PA-bits. */
+	vm->max_gfn = ((1ULL << vm->pa_bits) >> vm->page_shift) - 1;
+
 	/* Allocate and setup memory for guest. */
 	vm->vpages_mapped = sparsebit_alloc();
 	if (phy_pages != 0)
@@ -159,7 +186,8 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
 	return vm;
 }
 
-/* VM Restart
+/*
+ * VM Restart
  *
  * Input Args:
  *   vm - VM that has been released before
@@ -186,7 +214,8 @@ void kvm_vm_restart(struct kvm_vm *vmp, int perm)
 			    "  rc: %i errno: %i\n"
 			    "  slot: %u flags: 0x%x\n"
 			    "  guest_phys_addr: 0x%lx size: 0x%lx",
-			    ret, errno, region->region.slot, region->region.flags,
+			    ret, errno, region->region.slot,
+			    region->region.flags,
 			    region->region.guest_phys_addr,
 			    region->region.memory_size);
 	}
@@ -202,7 +231,8 @@ void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log)
 		    strerror(-ret));
 }
 
-/* Userspace Memory Region Find
+/*
+ * Userspace Memory Region Find
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -220,8 +250,8 @@ void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log)
  * of the regions is returned.  Null is returned only when no overlapping
  * region exists.
  */
-static struct userspace_mem_region *userspace_mem_region_find(
-	struct kvm_vm *vm, uint64_t start, uint64_t end)
+static struct userspace_mem_region *
+userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end)
 {
 	struct userspace_mem_region *region;
 
@@ -237,7 +267,8 @@ static struct userspace_mem_region *userspace_mem_region_find(
 	return NULL;
 }
 
-/* KVM Userspace Memory Region Find
+/*
+ * KVM Userspace Memory Region Find
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -265,7 +296,8 @@ kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
 	return &region->region;
 }
 
-/* VCPU Find
+/*
+ * VCPU Find
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -280,8 +312,7 @@ kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
  * returns a pointer to it.  Returns NULL if the VM doesn't contain a VCPU
  * for the specified vcpuid.
  */
-struct vcpu *vcpu_find(struct kvm_vm *vm,
-	uint32_t vcpuid)
+struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid)
 {
 	struct vcpu *vcpup;
 
@@ -293,7 +324,8 @@ struct vcpu *vcpu_find(struct kvm_vm *vm,
 	return NULL;
 }
 
-/* VM VCPU Remove
+/*
+ * VM VCPU Remove
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -330,11 +362,9 @@ void kvm_vm_release(struct kvm_vm *vmp)
 {
 	int ret;
 
-	/* Free VCPUs. */
 	while (vmp->vcpu_head)
 		vm_vcpu_rm(vmp, vmp->vcpu_head->id);
 
-	/* Close file descriptor for the VM. */
 	ret = close(vmp->fd);
 	TEST_ASSERT(ret == 0, "Close of vm fd failed,\n"
 		"  vmp->fd: %i rc: %i errno: %i", vmp->fd, ret, errno);
@@ -344,7 +374,8 @@ void kvm_vm_release(struct kvm_vm *vmp)
 		"  vmp->kvm_fd: %i rc: %i errno: %i", vmp->kvm_fd, ret, errno);
 }
 
-/* Destroys and frees the VM pointed to by vmp.
+/*
+ * Destroys and frees the VM pointed to by vmp.
  */
 void kvm_vm_free(struct kvm_vm *vmp)
 {
@@ -383,7 +414,8 @@ void kvm_vm_free(struct kvm_vm *vmp)
 	free(vmp);
 }
 
-/* Memory Compare, host virtual to guest virtual
+/*
+ * Memory Compare, host virtual to guest virtual
  *
  * Input Args:
  *   hva - Starting host virtual address
@@ -405,23 +437,25 @@ void kvm_vm_free(struct kvm_vm *vmp)
  * a length of len, to the guest bytes starting at the guest virtual
  * address given by gva.
  */
-int kvm_memcmp_hva_gva(void *hva,
-	struct kvm_vm *vm, vm_vaddr_t gva, size_t len)
+int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, vm_vaddr_t gva, size_t len)
 {
 	size_t amt;
 
-	/* Compare a batch of bytes until either a match is found
+	/*
+	 * Compare a batch of bytes until either a match is found
 	 * or all the bytes have been compared.
 	 */
 	for (uintptr_t offset = 0; offset < len; offset += amt) {
 		uintptr_t ptr1 = (uintptr_t)hva + offset;
 
-		/* Determine host address for guest virtual address
+		/*
+		 * Determine host address for guest virtual address
 		 * at offset.
 		 */
 		uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset);
 
-		/* Determine amount to compare on this pass.
+		/*
+		 * Determine amount to compare on this pass.
 		 * Don't allow the comparsion to cross a page boundary.
 		 */
 		amt = len - offset;
@@ -433,7 +467,8 @@ int kvm_memcmp_hva_gva(void *hva,
 		assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift));
 		assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift));
 
-		/* Perform the comparison.  If there is a difference
+		/*
+		 * Perform the comparison.  If there is a difference
 		 * return that result to the caller, otherwise need
 		 * to continue on looking for a mismatch.
 		 */
@@ -442,109 +477,15 @@ int kvm_memcmp_hva_gva(void *hva,
 			return ret;
 	}
 
-	/* No mismatch found.  Let the caller know the two memory
+	/*
+	 * No mismatch found.  Let the caller know the two memory
 	 * areas are equal.
 	 */
 	return 0;
 }
 
-/* Allocate an instance of struct kvm_cpuid2
- *
- * Input Args: None
- *
- * Output Args: None
- *
- * Return: A pointer to the allocated struct. The caller is responsible
- * for freeing this struct.
- *
- * Since kvm_cpuid2 uses a 0-length array to allow a the size of the
- * array to be decided at allocation time, allocation is slightly
- * complicated. This function uses a reasonable default length for
- * the array and performs the appropriate allocation.
- */
-static struct kvm_cpuid2 *allocate_kvm_cpuid2(void)
-{
-	struct kvm_cpuid2 *cpuid;
-	int nent = 100;
-	size_t size;
-
-	size = sizeof(*cpuid);
-	size += nent * sizeof(struct kvm_cpuid_entry2);
-	cpuid = malloc(size);
-	if (!cpuid) {
-		perror("malloc");
-		abort();
-	}
-
-	cpuid->nent = nent;
-
-	return cpuid;
-}
-
-/* KVM Supported CPUID Get
- *
- * Input Args: None
- *
- * Output Args:
- *
- * Return: The supported KVM CPUID
- *
- * Get the guest CPUID supported by KVM.
- */
-struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
-{
-	static struct kvm_cpuid2 *cpuid;
-	int ret;
-	int kvm_fd;
-
-	if (cpuid)
-		return cpuid;
-
-	cpuid = allocate_kvm_cpuid2();
-	kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
-	if (kvm_fd < 0)
-		exit(KSFT_SKIP);
-
-	ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid);
-	TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n",
-		    ret, errno);
-
-	close(kvm_fd);
-	return cpuid;
-}
-
-/* Locate a cpuid entry.
- *
- * Input Args:
- *   cpuid: The cpuid.
- *   function: The function of the cpuid entry to find.
- *
- * Output Args: None
- *
- * Return: A pointer to the cpuid entry. Never returns NULL.
- */
-struct kvm_cpuid_entry2 *
-kvm_get_supported_cpuid_index(uint32_t function, uint32_t index)
-{
-	struct kvm_cpuid2 *cpuid;
-	struct kvm_cpuid_entry2 *entry = NULL;
-	int i;
-
-	cpuid = kvm_get_supported_cpuid();
-	for (i = 0; i < cpuid->nent; i++) {
-		if (cpuid->entries[i].function == function &&
-		    cpuid->entries[i].index == index) {
-			entry = &cpuid->entries[i];
-			break;
-		}
-	}
-
-	TEST_ASSERT(entry, "Guest CPUID entry not found: (EAX=%x, ECX=%x).",
-		    function, index);
-	return entry;
-}
-
-/* VM Userspace Memory Region Add
+/*
+ * VM Userspace Memory Region Add
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -586,7 +527,8 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
 		"  vm->max_gfn: 0x%lx vm->page_size: 0x%x",
 		guest_paddr, npages, vm->max_gfn, vm->page_size);
 
-	/* Confirm a mem region with an overlapping address doesn't
+	/*
+	 * Confirm a mem region with an overlapping address doesn't
 	 * already exist.
 	 */
 	region = (struct userspace_mem_region *) userspace_mem_region_find(
@@ -677,7 +619,8 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
 	vm->userspace_mem_region_head = region;
 }
 
-/* Memslot to region
+/*
+ * Memslot to region
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -691,8 +634,8 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
  *   on error (e.g. currently no memory region using memslot as a KVM
  *   memory slot ID).
  */
-static struct userspace_mem_region *memslot2region(struct kvm_vm *vm,
-	uint32_t memslot)
+static struct userspace_mem_region *
+memslot2region(struct kvm_vm *vm, uint32_t memslot)
 {
 	struct userspace_mem_region *region;
 
@@ -712,7 +655,8 @@ static struct userspace_mem_region *memslot2region(struct kvm_vm *vm,
 	return region;
 }
 
-/* VM Memory Region Flags Set
+/*
+ * VM Memory Region Flags Set
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -730,7 +674,6 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags)
 	int ret;
 	struct userspace_mem_region *region;
 
-	/* Locate memory region. */
 	region = memslot2region(vm, slot);
 
 	region->region.flags = flags;
@@ -742,7 +685,8 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags)
 		ret, errno, slot, flags);
 }
 
-/* VCPU mmap Size
+/*
+ * VCPU mmap Size
  *
  * Input Args: None
  *
@@ -772,7 +716,8 @@ static int vcpu_mmap_sz(void)
 	return ret;
 }
 
-/* VM VCPU Add
+/*
+ * VM VCPU Add
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -785,7 +730,8 @@ static int vcpu_mmap_sz(void)
  * Creates and adds to the VM specified by vm and virtual CPU with
  * the ID given by vcpuid.
  */
-void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot, int gdt_memslot)
+void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot,
+		 int gdt_memslot)
 {
 	struct vcpu *vcpu;
 
@@ -823,7 +769,8 @@ void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot, int gdt_me
 	vcpu_setup(vm, vcpuid, pgd_memslot, gdt_memslot);
 }
 
-/* VM Virtual Address Unused Gap
+/*
+ * VM Virtual Address Unused Gap
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -843,14 +790,14 @@ void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot, int gdt_me
  * sz unallocated bytes >= vaddr_min is available.
  */
 static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz,
-	vm_vaddr_t vaddr_min)
+				      vm_vaddr_t vaddr_min)
 {
 	uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift;
 
 	/* Determine lowest permitted virtual page index. */
 	uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift;
 	if ((pgidx_start * vm->page_size) < vaddr_min)
-			goto no_va_found;
+		goto no_va_found;
 
 	/* Loop over section with enough valid virtual page indexes. */
 	if (!sparsebit_is_set_num(vm->vpages_valid,
@@ -909,7 +856,8 @@ va_found:
 	return pgidx_start * vm->page_size;
 }
 
-/* VM Virtual Address Allocate
+/*
+ * VM Virtual Address Allocate
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -930,13 +878,14 @@ va_found:
  * a page.
  */
 vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
-	uint32_t data_memslot, uint32_t pgd_memslot)
+			  uint32_t data_memslot, uint32_t pgd_memslot)
 {
 	uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0);
 
 	virt_pgd_alloc(vm, pgd_memslot);
 
-	/* Find an unused range of virtual page addresses of at least
+	/*
+	 * Find an unused range of virtual page addresses of at least
 	 * pages in length.
 	 */
 	vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min);
@@ -946,7 +895,8 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
 		pages--, vaddr += vm->page_size) {
 		vm_paddr_t paddr;
 
-		paddr = vm_phy_page_alloc(vm, KVM_UTIL_MIN_PADDR, data_memslot);
+		paddr = vm_phy_page_alloc(vm,
+				KVM_UTIL_MIN_PFN * vm->page_size, data_memslot);
 
 		virt_pg_map(vm, vaddr, paddr, pgd_memslot);
 
@@ -990,7 +940,8 @@ void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
 	}
 }
 
-/* Address VM Physical to Host Virtual
+/*
+ * Address VM Physical to Host Virtual
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -1022,7 +973,8 @@ void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
 	return NULL;
 }
 
-/* Address Host Virtual to VM Physical
+/*
+ * Address Host Virtual to VM Physical
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -1056,7 +1008,8 @@ vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
 	return -1;
 }
 
-/* VM Create IRQ Chip
+/*
+ * VM Create IRQ Chip
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -1078,7 +1031,8 @@ void vm_create_irqchip(struct kvm_vm *vm)
 	vm->has_irqchip = true;
 }
 
-/* VM VCPU State
+/*
+ * VM VCPU State
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -1100,7 +1054,8 @@ struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid)
 	return vcpu->state;
 }
 
-/* VM VCPU Run
+/*
+ * VM VCPU Run
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -1126,13 +1081,14 @@ int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
 	int rc;
 
 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
-        do {
+	do {
 		rc = ioctl(vcpu->fd, KVM_RUN, NULL);
 	} while (rc == -1 && errno == EINTR);
 	return rc;
 }
 
-/* VM VCPU Set MP State
+/*
+ * VM VCPU Set MP State
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -1147,7 +1103,7 @@ int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
  * by mp_state.
  */
 void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
-	struct kvm_mp_state *mp_state)
+		       struct kvm_mp_state *mp_state)
 {
 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
 	int ret;
@@ -1159,7 +1115,8 @@ void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
 		"rc: %i errno: %i", ret, errno);
 }
 
-/* VM VCPU Regs Get
+/*
+ * VM VCPU Regs Get
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -1173,21 +1130,20 @@ void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
  * Obtains the current register state for the VCPU specified by vcpuid
  * and stores it at the location given by regs.
  */
-void vcpu_regs_get(struct kvm_vm *vm,
-	uint32_t vcpuid, struct kvm_regs *regs)
+void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs)
 {
 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
 	int ret;
 
 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
 
-	/* Get the regs. */
 	ret = ioctl(vcpu->fd, KVM_GET_REGS, regs);
 	TEST_ASSERT(ret == 0, "KVM_GET_REGS failed, rc: %i errno: %i",
 		ret, errno);
 }
 
-/* VM VCPU Regs Set
+/*
+ * VM VCPU Regs Set
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -1201,165 +1157,46 @@ void vcpu_regs_get(struct kvm_vm *vm,
  * Sets the regs of the VCPU specified by vcpuid to the values
  * given by regs.
  */
-void vcpu_regs_set(struct kvm_vm *vm,
-	uint32_t vcpuid, struct kvm_regs *regs)
+void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs)
 {
 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
 	int ret;
 
 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
 
-	/* Set the regs. */
 	ret = ioctl(vcpu->fd, KVM_SET_REGS, regs);
 	TEST_ASSERT(ret == 0, "KVM_SET_REGS failed, rc: %i errno: %i",
 		ret, errno);
 }
 
 void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
-			  struct kvm_vcpu_events *events)
+		     struct kvm_vcpu_events *events)
 {
 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
 	int ret;
 
 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
 
-	/* Get the regs. */
 	ret = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, events);
 	TEST_ASSERT(ret == 0, "KVM_GET_VCPU_EVENTS, failed, rc: %i errno: %i",
 		ret, errno);
 }
 
 void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
-			  struct kvm_vcpu_events *events)
+		     struct kvm_vcpu_events *events)
 {
 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
 	int ret;
 
 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
 
-	/* Set the regs. */
 	ret = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, events);
 	TEST_ASSERT(ret == 0, "KVM_SET_VCPU_EVENTS, failed, rc: %i errno: %i",
 		ret, errno);
 }
 
-/* VCPU Get MSR
- *
- * Input Args:
- *   vm - Virtual Machine
- *   vcpuid - VCPU ID
- *   msr_index - Index of MSR
- *
- * Output Args: None
- *
- * Return: On success, value of the MSR. On failure a TEST_ASSERT is produced.
- *
- * Get value of MSR for VCPU.
- */
-uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index)
-{
-	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
-	struct {
-		struct kvm_msrs header;
-		struct kvm_msr_entry entry;
-	} buffer = {};
-	int r;
-
-	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
-	buffer.header.nmsrs = 1;
-	buffer.entry.index = msr_index;
-	r = ioctl(vcpu->fd, KVM_GET_MSRS, &buffer.header);
-	TEST_ASSERT(r == 1, "KVM_GET_MSRS IOCTL failed,\n"
-		"  rc: %i errno: %i", r, errno);
-
-	return buffer.entry.data;
-}
-
-/* VCPU Set MSR
- *
- * Input Args:
- *   vm - Virtual Machine
- *   vcpuid - VCPU ID
- *   msr_index - Index of MSR
- *   msr_value - New value of MSR
- *
- * Output Args: None
- *
- * Return: On success, nothing. On failure a TEST_ASSERT is produced.
- *
- * Set value of MSR for VCPU.
- */
-void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
-	uint64_t msr_value)
-{
-	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
-	struct {
-		struct kvm_msrs header;
-		struct kvm_msr_entry entry;
-	} buffer = {};
-	int r;
-
-	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
-	memset(&buffer, 0, sizeof(buffer));
-	buffer.header.nmsrs = 1;
-	buffer.entry.index = msr_index;
-	buffer.entry.data = msr_value;
-	r = ioctl(vcpu->fd, KVM_SET_MSRS, &buffer.header);
-	TEST_ASSERT(r == 1, "KVM_SET_MSRS IOCTL failed,\n"
-		"  rc: %i errno: %i", r, errno);
-}
-
-/* VM VCPU Args Set
- *
- * Input Args:
- *   vm - Virtual Machine
- *   vcpuid - VCPU ID
- *   num - number of arguments
- *   ... - arguments, each of type uint64_t
- *
- * Output Args: None
- *
- * Return: None
- *
- * Sets the first num function input arguments to the values
- * given as variable args.  Each of the variable args is expected to
- * be of type uint64_t.
- */
-void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
-{
-	va_list ap;
-	struct kvm_regs regs;
-
-	TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n"
-		    "  num: %u\n",
-		    num);
-
-	va_start(ap, num);
-	vcpu_regs_get(vm, vcpuid, &regs);
-
-	if (num >= 1)
-		regs.rdi = va_arg(ap, uint64_t);
-
-	if (num >= 2)
-		regs.rsi = va_arg(ap, uint64_t);
-
-	if (num >= 3)
-		regs.rdx = va_arg(ap, uint64_t);
-
-	if (num >= 4)
-		regs.rcx = va_arg(ap, uint64_t);
-
-	if (num >= 5)
-		regs.r8 = va_arg(ap, uint64_t);
-
-	if (num >= 6)
-		regs.r9 = va_arg(ap, uint64_t);
-
-	vcpu_regs_set(vm, vcpuid, &regs);
-	va_end(ap);
-}
-
-/* VM VCPU System Regs Get
+/*
+ * VM VCPU System Regs Get
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -1373,22 +1210,20 @@ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
  * Obtains the current system register state for the VCPU specified by
  * vcpuid and stores it at the location given by sregs.
  */
-void vcpu_sregs_get(struct kvm_vm *vm,
-	uint32_t vcpuid, struct kvm_sregs *sregs)
+void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
 {
 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
 	int ret;
 
 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
 
-	/* Get the regs. */
-	/* Get the regs. */
 	ret = ioctl(vcpu->fd, KVM_GET_SREGS, sregs);
 	TEST_ASSERT(ret == 0, "KVM_GET_SREGS failed, rc: %i errno: %i",
 		ret, errno);
 }
 
-/* VM VCPU System Regs Set
+/*
+ * VM VCPU System Regs Set
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -1402,27 +1237,25 @@ void vcpu_sregs_get(struct kvm_vm *vm,
  * Sets the system regs of the VCPU specified by vcpuid to the values
  * given by sregs.
  */
-void vcpu_sregs_set(struct kvm_vm *vm,
-	uint32_t vcpuid, struct kvm_sregs *sregs)
+void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
 {
 	int ret = _vcpu_sregs_set(vm, vcpuid, sregs);
 	TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, "
 		"rc: %i errno: %i", ret, errno);
 }
 
-int _vcpu_sregs_set(struct kvm_vm *vm,
-	uint32_t vcpuid, struct kvm_sregs *sregs)
+int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
 {
 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
 	int ret;
 
 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
 
-	/* Get the regs. */
 	return ioctl(vcpu->fd, KVM_SET_SREGS, sregs);
 }
 
-/* VCPU Ioctl
+/*
+ * VCPU Ioctl
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -1434,8 +1267,8 @@ int _vcpu_sregs_set(struct kvm_vm *vm,
  *
  * Issues an arbitrary ioctl on a VCPU fd.
  */
-void vcpu_ioctl(struct kvm_vm *vm,
-	uint32_t vcpuid, unsigned long cmd, void *arg)
+void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid,
+		unsigned long cmd, void *arg)
 {
 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
 	int ret;
@@ -1447,7 +1280,8 @@ void vcpu_ioctl(struct kvm_vm *vm,
 		cmd, ret, errno, strerror(errno));
 }
 
-/* VM Ioctl
+/*
+ * VM Ioctl
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -1467,7 +1301,8 @@ void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
 		cmd, ret, errno, strerror(errno));
 }
 
-/* VM Dump
+/*
+ * VM Dump
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -1514,38 +1349,6 @@ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
 		vcpu_dump(stream, vm, vcpu->id, indent + 2);
 }
 
-/* VM VCPU Dump
- *
- * Input Args:
- *   vm - Virtual Machine
- *   vcpuid - VCPU ID
- *   indent - Left margin indent amount
- *
- * Output Args:
- *   stream - Output FILE stream
- *
- * Return: None
- *
- * Dumps the current state of the VCPU specified by vcpuid, within the VM
- * given by vm, to the FILE stream given by stream.
- */
-void vcpu_dump(FILE *stream, struct kvm_vm *vm,
-	uint32_t vcpuid, uint8_t indent)
-{
-		struct kvm_regs regs;
-		struct kvm_sregs sregs;
-
-		fprintf(stream, "%*scpuid: %u\n", indent, "", vcpuid);
-
-		fprintf(stream, "%*sregs:\n", indent + 2, "");
-		vcpu_regs_get(vm, vcpuid, &regs);
-		regs_dump(stream, &regs, indent + 4);
-
-		fprintf(stream, "%*ssregs:\n", indent + 2, "");
-		vcpu_sregs_get(vm, vcpuid, &sregs);
-		sregs_dump(stream, &sregs, indent + 4);
-}
-
 /* Known KVM exit reasons */
 static struct exit_reason {
 	unsigned int reason;
@@ -1576,7 +1379,8 @@ static struct exit_reason {
 #endif
 };
 
-/* Exit Reason String
+/*
+ * Exit Reason String
  *
  * Input Args:
  *   exit_reason - Exit reason
@@ -1602,10 +1406,12 @@ const char *exit_reason_str(unsigned int exit_reason)
 	return "Unknown";
 }
 
-/* Physical Page Allocate
+/*
+ * Physical Contiguous Page Allocator
  *
  * Input Args:
  *   vm - Virtual Machine
+ *   num - number of pages
  *   paddr_min - Physical address minimum
  *   memslot - Memory region to allocate page from
  *
@@ -1614,47 +1420,59 @@ const char *exit_reason_str(unsigned int exit_reason)
  * Return:
  *   Starting physical address
  *
- * Within the VM specified by vm, locates an available physical page
- * at or above paddr_min.  If found, the page is marked as in use
- * and its address is returned.  A TEST_ASSERT failure occurs if no
- * page is available at or above paddr_min.
+ * Within the VM specified by vm, locates a range of available physical
+ * pages at or above paddr_min. If found, the pages are marked as in use
+ * and thier base address is returned. A TEST_ASSERT failure occurs if
+ * not enough pages are available at or above paddr_min.
  */
-vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm,
-	vm_paddr_t paddr_min, uint32_t memslot)
+vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
+			      vm_paddr_t paddr_min, uint32_t memslot)
 {
 	struct userspace_mem_region *region;
-	sparsebit_idx_t pg;
+	sparsebit_idx_t pg, base;
+
+	TEST_ASSERT(num > 0, "Must allocate at least one page");
 
 	TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address "
 		"not divisible by page size.\n"
 		"  paddr_min: 0x%lx page_size: 0x%x",
 		paddr_min, vm->page_size);
 
-	/* Locate memory region. */
 	region = memslot2region(vm, memslot);
+	base = pg = paddr_min >> vm->page_shift;
 
-	/* Locate next available physical page at or above paddr_min. */
-	pg = paddr_min >> vm->page_shift;
-
-	if (!sparsebit_is_set(region->unused_phy_pages, pg)) {
-		pg = sparsebit_next_set(region->unused_phy_pages, pg);
-		if (pg == 0) {
-			fprintf(stderr, "No guest physical page available, "
-				"paddr_min: 0x%lx page_size: 0x%x memslot: %u",
-				paddr_min, vm->page_size, memslot);
-			fputs("---- vm dump ----\n", stderr);
-			vm_dump(stderr, vm, 2);
-			abort();
+	do {
+		for (; pg < base + num; ++pg) {
+			if (!sparsebit_is_set(region->unused_phy_pages, pg)) {
+				base = pg = sparsebit_next_set(region->unused_phy_pages, pg);
+				break;
+			}
 		}
+	} while (pg && pg != base + num);
+
+	if (pg == 0) {
+		fprintf(stderr, "No guest physical page available, "
+			"paddr_min: 0x%lx page_size: 0x%x memslot: %u\n",
+			paddr_min, vm->page_size, memslot);
+		fputs("---- vm dump ----\n", stderr);
+		vm_dump(stderr, vm, 2);
+		abort();
 	}
 
-	/* Specify page as in use and return its address. */
-	sparsebit_clear(region->unused_phy_pages, pg);
+	for (pg = base; pg < base + num; ++pg)
+		sparsebit_clear(region->unused_phy_pages, pg);
+
+	return base * vm->page_size;
+}
 
-	return pg * vm->page_size;
+vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
+			     uint32_t memslot)
+{
+	return vm_phy_pages_alloc(vm, 1, paddr_min, memslot);
 }
 
-/* Address Guest Virtual to Host Virtual
+/*
+ * Address Guest Virtual to Host Virtual
  *
  * Input Args:
  *   vm - Virtual Machine
@@ -1669,17 +1487,3 @@ void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva)
 {
 	return addr_gpa2hva(vm, addr_gva2gpa(vm, gva));
 }
-
-void guest_args_read(struct kvm_vm *vm, uint32_t vcpu_id,
-		     struct guest_args *args)
-{
-	struct kvm_run *run = vcpu_state(vm, vcpu_id);
-	struct kvm_regs regs;
-
-	memset(&regs, 0, sizeof(regs));
-	vcpu_regs_get(vm, vcpu_id, &regs);
-
-	args->port = run->io.port;
-	args->arg0 = regs.rdi;
-	args->arg1 = regs.rsi;
-}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
index 542ed606b338..52701db0f253 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h
+++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
@@ -1,28 +1,29 @@
 /*
- * tools/testing/selftests/kvm/lib/kvm_util.c
+ * tools/testing/selftests/kvm/lib/kvm_util_internal.h
  *
  * Copyright (C) 2018, Google LLC.
  *
  * This work is licensed under the terms of the GNU GPL, version 2.
  */
 
-#ifndef KVM_UTIL_INTERNAL_H
-#define KVM_UTIL_INTERNAL_H 1
+#ifndef SELFTEST_KVM_UTIL_INTERNAL_H
+#define SELFTEST_KVM_UTIL_INTERNAL_H
 
 #include "sparsebit.h"
 
+#define KVM_DEV_PATH		"/dev/kvm"
+
 #ifndef BITS_PER_BYTE
-#define BITS_PER_BYTE           8
+#define BITS_PER_BYTE		8
 #endif
 
 #ifndef BITS_PER_LONG
-#define BITS_PER_LONG (BITS_PER_BYTE * sizeof(long))
+#define BITS_PER_LONG		(BITS_PER_BYTE * sizeof(long))
 #endif
 
 #define DIV_ROUND_UP(n, d)	(((n) + (d) - 1) / (d))
-#define BITS_TO_LONGS(nr)       DIV_ROUND_UP(nr, BITS_PER_LONG)
+#define BITS_TO_LONGS(nr)	DIV_ROUND_UP(nr, BITS_PER_LONG)
 
-/* Concrete definition of struct kvm_vm. */
 struct userspace_mem_region {
 	struct userspace_mem_region *next, *prev;
 	struct kvm_userspace_memory_region region;
@@ -45,14 +46,16 @@ struct kvm_vm {
 	int mode;
 	int kvm_fd;
 	int fd;
+	unsigned int pgtable_levels;
 	unsigned int page_size;
 	unsigned int page_shift;
+	unsigned int pa_bits;
+	unsigned int va_bits;
 	uint64_t max_gfn;
 	struct vcpu *vcpu_head;
 	struct userspace_mem_region *userspace_mem_region_head;
 	struct sparsebit *vpages_valid;
 	struct sparsebit *vpages_mapped;
-
 	bool has_irqchip;
 	bool pgd_created;
 	vm_paddr_t pgd;
@@ -60,13 +63,11 @@ struct kvm_vm {
 	vm_vaddr_t tss;
 };
 
-struct vcpu *vcpu_find(struct kvm_vm *vm,
-	uint32_t vcpuid);
-void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot);
+struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid);
+void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot,
+		int gdt_memslot);
 void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
-void regs_dump(FILE *stream, struct kvm_regs *regs,
-	uint8_t indent);
-void sregs_dump(FILE *stream, struct kvm_sregs *sregs,
-	uint8_t indent);
+void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent);
+void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent);
 
-#endif
+#endif /* SELFTEST_KVM_UTIL_INTERNAL_H */
diff --git a/tools/testing/selftests/kvm/lib/ucall.c b/tools/testing/selftests/kvm/lib/ucall.c
new file mode 100644
index 000000000000..4777f9bb5194
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/ucall.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ucall support. A ucall is a "hypercall to userspace".
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+#include "kvm_util.h"
+#include "kvm_util_internal.h"
+
+#define UCALL_PIO_PORT ((uint16_t)0x1000)
+
+static ucall_type_t ucall_type;
+static vm_vaddr_t *ucall_exit_mmio_addr;
+
+static bool ucall_mmio_init(struct kvm_vm *vm, vm_paddr_t gpa)
+{
+	if (kvm_userspace_memory_region_find(vm, gpa, gpa + 1))
+		return false;
+
+	virt_pg_map(vm, gpa, gpa, 0);
+
+	ucall_exit_mmio_addr = (vm_vaddr_t *)gpa;
+	sync_global_to_guest(vm, ucall_exit_mmio_addr);
+
+	return true;
+}
+
+void ucall_init(struct kvm_vm *vm, ucall_type_t type, void *arg)
+{
+	ucall_type = type;
+	sync_global_to_guest(vm, ucall_type);
+
+	if (type == UCALL_PIO)
+		return;
+
+	if (type == UCALL_MMIO) {
+		vm_paddr_t gpa, start, end, step;
+		bool ret;
+
+		if (arg) {
+			gpa = (vm_paddr_t)arg;
+			ret = ucall_mmio_init(vm, gpa);
+			TEST_ASSERT(ret, "Can't set ucall mmio address to %lx", gpa);
+			return;
+		}
+
+		/*
+		 * Find an address within the allowed virtual address space,
+		 * that does _not_ have a KVM memory region associated with it.
+		 * Identity mapping an address like this allows the guest to
+		 * access it, but as KVM doesn't know what to do with it, it
+		 * will assume it's something userspace handles and exit with
+		 * KVM_EXIT_MMIO. Well, at least that's how it works for AArch64.
+		 * Here we start with a guess that the addresses around two
+		 * thirds of the VA space are unmapped and then work both down
+		 * and up from there in 1/6 VA space sized steps.
+		 */
+		start = 1ul << (vm->va_bits * 2 / 3);
+		end = 1ul << vm->va_bits;
+		step = 1ul << (vm->va_bits / 6);
+		for (gpa = start; gpa >= 0; gpa -= step) {
+			if (ucall_mmio_init(vm, gpa & ~(vm->page_size - 1)))
+				return;
+		}
+		for (gpa = start + step; gpa < end; gpa += step) {
+			if (ucall_mmio_init(vm, gpa & ~(vm->page_size - 1)))
+				return;
+		}
+		TEST_ASSERT(false, "Can't find a ucall mmio address");
+	}
+}
+
+void ucall_uninit(struct kvm_vm *vm)
+{
+	ucall_type = 0;
+	sync_global_to_guest(vm, ucall_type);
+	ucall_exit_mmio_addr = 0;
+	sync_global_to_guest(vm, ucall_exit_mmio_addr);
+}
+
+static void ucall_pio_exit(struct ucall *uc)
+{
+#ifdef __x86_64__
+	asm volatile("in %[port], %%al"
+		: : [port] "d" (UCALL_PIO_PORT), "D" (uc) : "rax");
+#endif
+}
+
+static void ucall_mmio_exit(struct ucall *uc)
+{
+	*ucall_exit_mmio_addr = (vm_vaddr_t)uc;
+}
+
+void ucall(uint64_t cmd, int nargs, ...)
+{
+	struct ucall uc = {
+		.cmd = cmd,
+	};
+	va_list va;
+	int i;
+
+	nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
+
+	va_start(va, nargs);
+	for (i = 0; i < nargs; ++i)
+		uc.args[i] = va_arg(va, uint64_t);
+	va_end(va);
+
+	switch (ucall_type) {
+	case UCALL_PIO:
+		ucall_pio_exit(&uc);
+		break;
+	case UCALL_MMIO:
+		ucall_mmio_exit(&uc);
+		break;
+	};
+}
+
+uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
+{
+	struct kvm_run *run = vcpu_state(vm, vcpu_id);
+
+	memset(uc, 0, sizeof(*uc));
+
+#ifdef __x86_64__
+	if (ucall_type == UCALL_PIO && run->exit_reason == KVM_EXIT_IO &&
+	    run->io.port == UCALL_PIO_PORT) {
+		struct kvm_regs regs;
+		vcpu_regs_get(vm, vcpu_id, &regs);
+		memcpy(uc, addr_gva2hva(vm, (vm_vaddr_t)regs.rdi), sizeof(*uc));
+		return uc->cmd;
+	}
+#endif
+	if (ucall_type == UCALL_MMIO && run->exit_reason == KVM_EXIT_MMIO &&
+	    run->mmio.phys_addr == (uint64_t)ucall_exit_mmio_addr) {
+		vm_vaddr_t gva;
+		TEST_ASSERT(run->mmio.is_write && run->mmio.len == 8,
+			    "Unexpected ucall exit mmio address access");
+		gva = *(vm_vaddr_t *)run->mmio.data;
+		memcpy(uc, addr_gva2hva(vm, gva), sizeof(*uc));
+	}
+
+	return uc->cmd;
+}
diff --git a/tools/testing/selftests/kvm/lib/x86.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index a3122f1949a8..f28127f4a3af 100644
--- a/tools/testing/selftests/kvm/lib/x86.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -1,5 +1,5 @@
 /*
- * tools/testing/selftests/kvm/lib/x86.c
+ * tools/testing/selftests/kvm/lib/x86_64/processor.c
  *
  * Copyright (C) 2018, Google LLC.
  *
@@ -10,8 +10,8 @@
 
 #include "test_util.h"
 #include "kvm_util.h"
-#include "kvm_util_internal.h"
-#include "x86.h"
+#include "../kvm_util_internal.h"
+#include "processor.h"
 
 /* Minimum physical address used for virtual translation tables. */
 #define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
@@ -231,7 +231,7 @@ void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
 {
 	int rc;
 
-	TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use "
+	TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use "
 		"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
 
 	/* If needed, create page map l4 table. */
@@ -264,7 +264,7 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
 	uint16_t index[4];
 	struct pageMapL4Entry *pml4e;
 
-	TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use "
+	TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use "
 		"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
 
 	TEST_ASSERT((vaddr % vm->page_size) == 0,
@@ -551,7 +551,7 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
 	struct pageTableEntry *pte;
 	void *hva;
 
-	TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use "
+	TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use "
 		"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
 
 	index[0] = (gva >> 12) & 0x1ffu;
@@ -624,9 +624,9 @@ void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot)
 	kvm_setup_gdt(vm, &sregs.gdt, gdt_memslot, pgd_memslot);
 
 	switch (vm->mode) {
-	case VM_MODE_FLAT48PG:
+	case VM_MODE_P52V48_4K:
 		sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG;
-		sregs.cr4 |= X86_CR4_PAE;
+		sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR;
 		sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
 
 		kvm_seg_set_unusable(&sregs.ldt);
@@ -672,6 +672,102 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
 	vcpu_set_mp_state(vm, vcpuid, &mp_state);
 }
 
+/* Allocate an instance of struct kvm_cpuid2
+ *
+ * Input Args: None
+ *
+ * Output Args: None
+ *
+ * Return: A pointer to the allocated struct. The caller is responsible
+ * for freeing this struct.
+ *
+ * Since kvm_cpuid2 uses a 0-length array to allow a the size of the
+ * array to be decided at allocation time, allocation is slightly
+ * complicated. This function uses a reasonable default length for
+ * the array and performs the appropriate allocation.
+ */
+static struct kvm_cpuid2 *allocate_kvm_cpuid2(void)
+{
+	struct kvm_cpuid2 *cpuid;
+	int nent = 100;
+	size_t size;
+
+	size = sizeof(*cpuid);
+	size += nent * sizeof(struct kvm_cpuid_entry2);
+	cpuid = malloc(size);
+	if (!cpuid) {
+		perror("malloc");
+		abort();
+	}
+
+	cpuid->nent = nent;
+
+	return cpuid;
+}
+
+/* KVM Supported CPUID Get
+ *
+ * Input Args: None
+ *
+ * Output Args:
+ *
+ * Return: The supported KVM CPUID
+ *
+ * Get the guest CPUID supported by KVM.
+ */
+struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
+{
+	static struct kvm_cpuid2 *cpuid;
+	int ret;
+	int kvm_fd;
+
+	if (cpuid)
+		return cpuid;
+
+	cpuid = allocate_kvm_cpuid2();
+	kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
+	if (kvm_fd < 0)
+		exit(KSFT_SKIP);
+
+	ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid);
+	TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n",
+		    ret, errno);
+
+	close(kvm_fd);
+	return cpuid;
+}
+
+/* Locate a cpuid entry.
+ *
+ * Input Args:
+ *   cpuid: The cpuid.
+ *   function: The function of the cpuid entry to find.
+ *
+ * Output Args: None
+ *
+ * Return: A pointer to the cpuid entry. Never returns NULL.
+ */
+struct kvm_cpuid_entry2 *
+kvm_get_supported_cpuid_index(uint32_t function, uint32_t index)
+{
+	struct kvm_cpuid2 *cpuid;
+	struct kvm_cpuid_entry2 *entry = NULL;
+	int i;
+
+	cpuid = kvm_get_supported_cpuid();
+	for (i = 0; i < cpuid->nent; i++) {
+		if (cpuid->entries[i].function == function &&
+		    cpuid->entries[i].index == index) {
+			entry = &cpuid->entries[i];
+			break;
+		}
+	}
+
+	TEST_ASSERT(entry, "Guest CPUID entry not found: (EAX=%x, ECX=%x).",
+		    function, index);
+	return entry;
+}
+
 /* VM VCPU CPUID Set
  *
  * Input Args:
@@ -698,6 +794,7 @@ void vcpu_set_cpuid(struct kvm_vm *vm,
 		    rc, errno);
 
 }
+
 /* Create a VM with reasonable defaults
  *
  * Input Args:
@@ -726,7 +823,7 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
 	uint64_t extra_pg_pages = extra_mem_pages / 512 * 2;
 
 	/* Create VM */
-	vm = vm_create(VM_MODE_FLAT48PG,
+	vm = vm_create(VM_MODE_P52V48_4K,
 		       DEFAULT_GUEST_PHY_PAGES + extra_pg_pages,
 		       O_RDWR);
 
@@ -742,6 +839,154 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
 	return vm;
 }
 
+/* VCPU Get MSR
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *   msr_index - Index of MSR
+ *
+ * Output Args: None
+ *
+ * Return: On success, value of the MSR. On failure a TEST_ASSERT is produced.
+ *
+ * Get value of MSR for VCPU.
+ */
+uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	struct {
+		struct kvm_msrs header;
+		struct kvm_msr_entry entry;
+	} buffer = {};
+	int r;
+
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+	buffer.header.nmsrs = 1;
+	buffer.entry.index = msr_index;
+	r = ioctl(vcpu->fd, KVM_GET_MSRS, &buffer.header);
+	TEST_ASSERT(r == 1, "KVM_GET_MSRS IOCTL failed,\n"
+		"  rc: %i errno: %i", r, errno);
+
+	return buffer.entry.data;
+}
+
+/* VCPU Set MSR
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *   msr_index - Index of MSR
+ *   msr_value - New value of MSR
+ *
+ * Output Args: None
+ *
+ * Return: On success, nothing. On failure a TEST_ASSERT is produced.
+ *
+ * Set value of MSR for VCPU.
+ */
+void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
+	uint64_t msr_value)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	struct {
+		struct kvm_msrs header;
+		struct kvm_msr_entry entry;
+	} buffer = {};
+	int r;
+
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+	memset(&buffer, 0, sizeof(buffer));
+	buffer.header.nmsrs = 1;
+	buffer.entry.index = msr_index;
+	buffer.entry.data = msr_value;
+	r = ioctl(vcpu->fd, KVM_SET_MSRS, &buffer.header);
+	TEST_ASSERT(r == 1, "KVM_SET_MSRS IOCTL failed,\n"
+		"  rc: %i errno: %i", r, errno);
+}
+
+/* VM VCPU Args Set
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *   num - number of arguments
+ *   ... - arguments, each of type uint64_t
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the first num function input arguments to the values
+ * given as variable args.  Each of the variable args is expected to
+ * be of type uint64_t.
+ */
+void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
+{
+	va_list ap;
+	struct kvm_regs regs;
+
+	TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n"
+		    "  num: %u\n",
+		    num);
+
+	va_start(ap, num);
+	vcpu_regs_get(vm, vcpuid, &regs);
+
+	if (num >= 1)
+		regs.rdi = va_arg(ap, uint64_t);
+
+	if (num >= 2)
+		regs.rsi = va_arg(ap, uint64_t);
+
+	if (num >= 3)
+		regs.rdx = va_arg(ap, uint64_t);
+
+	if (num >= 4)
+		regs.rcx = va_arg(ap, uint64_t);
+
+	if (num >= 5)
+		regs.r8 = va_arg(ap, uint64_t);
+
+	if (num >= 6)
+		regs.r9 = va_arg(ap, uint64_t);
+
+	vcpu_regs_set(vm, vcpuid, &regs);
+	va_end(ap);
+}
+
+/*
+ * VM VCPU Dump
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *   indent - Left margin indent amount
+ *
+ * Output Args:
+ *   stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the current state of the VCPU specified by vcpuid, within the VM
+ * given by vm, to the FILE stream given by stream.
+ */
+void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
+{
+	struct kvm_regs regs;
+	struct kvm_sregs sregs;
+
+	fprintf(stream, "%*scpuid: %u\n", indent, "", vcpuid);
+
+	fprintf(stream, "%*sregs:\n", indent + 2, "");
+	vcpu_regs_get(vm, vcpuid, &regs);
+	regs_dump(stream, &regs, indent + 4);
+
+	fprintf(stream, "%*ssregs:\n", indent + 2, "");
+	vcpu_sregs_get(vm, vcpuid, &sregs);
+	sregs_dump(stream, &sregs, indent + 4);
+}
+
 struct kvm_x86_state {
 	struct kvm_vcpu_events events;
 	struct kvm_mp_state mp_state;
diff --git a/tools/testing/selftests/kvm/lib/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
index b987c3c970eb..771ba6bf751c 100644
--- a/tools/testing/selftests/kvm/lib/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
@@ -1,5 +1,5 @@
 /*
- * tools/testing/selftests/kvm/lib/x86.c
+ * tools/testing/selftests/kvm/lib/x86_64/vmx.c
  *
  * Copyright (C) 2018, Google LLC.
  *
@@ -10,9 +10,11 @@
 
 #include "test_util.h"
 #include "kvm_util.h"
-#include "x86.h"
+#include "processor.h"
 #include "vmx.h"
 
+bool enable_evmcs;
+
 /* Allocate memory regions for nested VMX tests.
  *
  * Input Args:
@@ -62,6 +64,20 @@ vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva)
 	vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite);
 	memset(vmx->vmwrite_hva, 0, getpagesize());
 
+	/* Setup of a region of guest memory for the VP Assist page. */
+	vmx->vp_assist = (void *)vm_vaddr_alloc(vm, getpagesize(),
+						0x10000, 0, 0);
+	vmx->vp_assist_hva = addr_gva2hva(vm, (uintptr_t)vmx->vp_assist);
+	vmx->vp_assist_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vp_assist);
+
+	/* Setup of a region of guest memory for the enlightened VMCS. */
+	vmx->enlightened_vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(),
+						       0x10000, 0, 0);
+	vmx->enlightened_vmcs_hva =
+		addr_gva2hva(vm, (uintptr_t)vmx->enlightened_vmcs);
+	vmx->enlightened_vmcs_gpa =
+		addr_gva2gpa(vm, (uintptr_t)vmx->enlightened_vmcs);
+
 	*p_vmx_gva = vmx_gva;
 	return vmx;
 }
@@ -107,18 +123,31 @@ bool prepare_for_vmx_operation(struct vmx_pages *vmx)
 	if (vmxon(vmx->vmxon_gpa))
 		return false;
 
-	/* Load a VMCS. */
-	*(uint32_t *)(vmx->vmcs) = vmcs_revision();
-	if (vmclear(vmx->vmcs_gpa))
-		return false;
-
-	if (vmptrld(vmx->vmcs_gpa))
-		return false;
+	return true;
+}
 
-	/* Setup shadow VMCS, do not load it yet. */
-	*(uint32_t *)(vmx->shadow_vmcs) = vmcs_revision() | 0x80000000ul;
-	if (vmclear(vmx->shadow_vmcs_gpa))
-		return false;
+bool load_vmcs(struct vmx_pages *vmx)
+{
+	if (!enable_evmcs) {
+		/* Load a VMCS. */
+		*(uint32_t *)(vmx->vmcs) = vmcs_revision();
+		if (vmclear(vmx->vmcs_gpa))
+			return false;
+
+		if (vmptrld(vmx->vmcs_gpa))
+			return false;
+
+		/* Setup shadow VMCS, do not load it yet. */
+		*(uint32_t *)(vmx->shadow_vmcs) =
+			vmcs_revision() | 0x80000000ul;
+		if (vmclear(vmx->shadow_vmcs_gpa))
+			return false;
+	} else {
+		if (evmcs_vmptrld(vmx->enlightened_vmcs_gpa,
+				  vmx->enlightened_vmcs))
+			return false;
+		current_evmcs->revision_id = vmcs_revision();
+	}
 
 	return true;
 }
diff --git a/tools/testing/selftests/kvm/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
index 11ec358bf969..d503a51fad30 100644
--- a/tools/testing/selftests/kvm/cr4_cpuid_sync_test.c
+++ b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
@@ -17,7 +17,7 @@
 #include "test_util.h"
 
 #include "kvm_util.h"
-#include "x86.h"
+#include "processor.h"
 
 #define X86_FEATURE_XSAVE	(1<<26)
 #define X86_FEATURE_OSXSAVE	(1<<27)
@@ -67,6 +67,7 @@ int main(int argc, char *argv[])
 	struct kvm_vm *vm;
 	struct kvm_sregs sregs;
 	struct kvm_cpuid_entry2 *entry;
+	struct ucall uc;
 	int rc;
 
 	entry = kvm_get_supported_cpuid_entry(1);
@@ -87,21 +88,20 @@ int main(int argc, char *argv[])
 		rc = _vcpu_run(vm, VCPU_ID);
 
 		if (run->exit_reason == KVM_EXIT_IO) {
-			switch (run->io.port) {
-			case GUEST_PORT_SYNC:
+			switch (get_ucall(vm, VCPU_ID, &uc)) {
+			case UCALL_SYNC:
 				/* emulate hypervisor clearing CR4.OSXSAVE */
 				vcpu_sregs_get(vm, VCPU_ID, &sregs);
 				sregs.cr4 &= ~X86_CR4_OSXSAVE;
 				vcpu_sregs_set(vm, VCPU_ID, &sregs);
 				break;
-			case GUEST_PORT_ABORT:
+			case UCALL_ABORT:
 				TEST_ASSERT(false, "Guest CR4 bit (OSXSAVE) unsynchronized with CPUID bit.");
 				break;
-			case GUEST_PORT_DONE:
+			case UCALL_DONE:
 				goto done;
 			default:
-				TEST_ASSERT(false, "Unknown port 0x%x.",
-					    run->io.port);
+				TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd);
 			}
 		}
 	}
diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
new file mode 100644
index 000000000000..92c2cfd1b182
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018, Red Hat, Inc.
+ *
+ * Tests for Enlightened VMCS, including nested guest state.
+ */
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+
+#include "vmx.h"
+
+#define VCPU_ID		5
+
+static bool have_nested_state;
+
+void l2_guest_code(void)
+{
+	GUEST_SYNC(6);
+
+	GUEST_SYNC(7);
+
+	/* Done, exit to L1 and never come back.  */
+	vmcall();
+}
+
+void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+	enable_vp_assist(vmx_pages->vp_assist_gpa, vmx_pages->vp_assist);
+
+	GUEST_ASSERT(vmx_pages->vmcs_gpa);
+	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+	GUEST_SYNC(3);
+	GUEST_ASSERT(load_vmcs(vmx_pages));
+	GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
+
+	GUEST_SYNC(4);
+	GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
+
+	prepare_vmcs(vmx_pages, l2_guest_code,
+		     &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	GUEST_SYNC(5);
+	GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
+	GUEST_ASSERT(!vmlaunch());
+	GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
+	GUEST_SYNC(8);
+	GUEST_ASSERT(!vmresume());
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+	GUEST_SYNC(9);
+}
+
+void guest_code(struct vmx_pages *vmx_pages)
+{
+	GUEST_SYNC(1);
+	GUEST_SYNC(2);
+
+	if (vmx_pages)
+		l1_guest_code(vmx_pages);
+
+	GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+	struct vmx_pages *vmx_pages = NULL;
+	vm_vaddr_t vmx_pages_gva = 0;
+
+	struct kvm_regs regs1, regs2;
+	struct kvm_vm *vm;
+	struct kvm_run *run;
+	struct kvm_x86_state *state;
+	struct ucall uc;
+	int stage;
+	uint16_t evmcs_ver;
+	struct kvm_enable_cap enable_evmcs_cap = {
+		.cap = KVM_CAP_HYPERV_ENLIGHTENED_VMCS,
+		 .args[0] = (unsigned long)&evmcs_ver
+	};
+
+	struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
+
+	/* Create VM */
+	vm = vm_create_default(VCPU_ID, 0, guest_code);
+
+	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+	if (!kvm_check_cap(KVM_CAP_NESTED_STATE) ||
+	    !kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
+		printf("capabilities not available, skipping test\n");
+		exit(KSFT_SKIP);
+	}
+
+	vcpu_ioctl(vm, VCPU_ID, KVM_ENABLE_CAP, &enable_evmcs_cap);
+
+	run = vcpu_state(vm, VCPU_ID);
+
+	vcpu_regs_get(vm, VCPU_ID, &regs1);
+
+	vmx_pages = vcpu_alloc_vmx(vm, &vmx_pages_gva);
+	vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+
+	for (stage = 1;; stage++) {
+		_vcpu_run(vm, VCPU_ID);
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			    "Unexpected exit reason: %u (%s),\n",
+			    run->exit_reason,
+			    exit_reason_str(run->exit_reason));
+
+		memset(&regs1, 0, sizeof(regs1));
+		vcpu_regs_get(vm, VCPU_ID, &regs1);
+		switch (get_ucall(vm, VCPU_ID, &uc)) {
+		case UCALL_ABORT:
+			TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0],
+				    __FILE__, uc.args[1]);
+			/* NOT REACHED */
+		case UCALL_SYNC:
+			break;
+		case UCALL_DONE:
+			goto done;
+		default:
+			TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd);
+		}
+
+		/* UCALL_SYNC is handled here.  */
+		TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+			    uc.args[1] == stage, "Unexpected register values vmexit #%lx, got %lx",
+			    stage, (ulong)uc.args[1]);
+
+		state = vcpu_save_state(vm, VCPU_ID);
+		kvm_vm_release(vm);
+
+		/* Restore state in a new VM.  */
+		kvm_vm_restart(vm, O_RDWR);
+		vm_vcpu_add(vm, VCPU_ID, 0, 0);
+		vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+		vcpu_load_state(vm, VCPU_ID, state);
+		run = vcpu_state(vm, VCPU_ID);
+		free(state);
+
+		memset(&regs2, 0, sizeof(regs2));
+		vcpu_regs_get(vm, VCPU_ID, &regs2);
+		TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
+			    "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+			    (ulong) regs2.rdi, (ulong) regs2.rsi);
+	}
+
+done:
+	kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c
index 3764e7121265..eb3e7a838cb4 100644
--- a/tools/testing/selftests/kvm/platform_info_test.c
+++ b/tools/testing/selftests/kvm/x86_64/platform_info_test.c
@@ -19,7 +19,7 @@
 
 #include "test_util.h"
 #include "kvm_util.h"
-#include "x86.h"
+#include "processor.h"
 
 #define VCPU_ID 0
 #define MSR_PLATFORM_INFO_MAX_TURBO_RATIO 0xff00
@@ -48,7 +48,7 @@ static void set_msr_platform_info_enabled(struct kvm_vm *vm, bool enable)
 static void test_msr_platform_info_enabled(struct kvm_vm *vm)
 {
 	struct kvm_run *run = vcpu_state(vm, VCPU_ID);
-	struct guest_args args;
+	struct ucall uc;
 
 	set_msr_platform_info_enabled(vm, true);
 	vcpu_run(vm, VCPU_ID);
@@ -56,11 +56,11 @@ static void test_msr_platform_info_enabled(struct kvm_vm *vm)
 			"Exit_reason other than KVM_EXIT_IO: %u (%s),\n",
 			run->exit_reason,
 			exit_reason_str(run->exit_reason));
-	guest_args_read(vm, VCPU_ID, &args);
-	TEST_ASSERT(args.port == GUEST_PORT_SYNC,
-			"Received IO from port other than PORT_HOST_SYNC: %u\n",
-			run->io.port);
-	TEST_ASSERT((args.arg1 & MSR_PLATFORM_INFO_MAX_TURBO_RATIO) ==
+	get_ucall(vm, VCPU_ID, &uc);
+	TEST_ASSERT(uc.cmd == UCALL_SYNC,
+			"Received ucall other than UCALL_SYNC: %u\n",
+			ucall);
+	TEST_ASSERT((uc.args[1] & MSR_PLATFORM_INFO_MAX_TURBO_RATIO) ==
 		MSR_PLATFORM_INFO_MAX_TURBO_RATIO,
 		"Expected MSR_PLATFORM_INFO to have max turbo ratio mask: %i.",
 		MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
diff --git a/tools/testing/selftests/kvm/set_sregs_test.c b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
index 881419d5746e..35640e8e95bc 100644
--- a/tools/testing/selftests/kvm/set_sregs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
@@ -22,7 +22,7 @@
 #include "test_util.h"
 
 #include "kvm_util.h"
-#include "x86.h"
+#include "processor.h"
 
 #define VCPU_ID                  5
 
diff --git a/tools/testing/selftests/kvm/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c
index 900e3e9dfb9f..03da41f0f736 100644
--- a/tools/testing/selftests/kvm/state_test.c
+++ b/tools/testing/selftests/kvm/x86_64/state_test.c
@@ -17,7 +17,7 @@
 #include "test_util.h"
 
 #include "kvm_util.h"
-#include "x86.h"
+#include "processor.h"
 #include "vmx.h"
 
 #define VCPU_ID		5
@@ -26,20 +26,20 @@ static bool have_nested_state;
 
 void l2_guest_code(void)
 {
-	GUEST_SYNC(5);
+	GUEST_SYNC(6);
 
         /* Exit to L1 */
 	vmcall();
 
 	/* L1 has now set up a shadow VMCS for us.  */
 	GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
-	GUEST_SYNC(9);
+	GUEST_SYNC(10);
 	GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
 	GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0fffee));
-	GUEST_SYNC(10);
+	GUEST_SYNC(11);
 	GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0fffee);
 	GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0ffffee));
-	GUEST_SYNC(11);
+	GUEST_SYNC(12);
 
 	/* Done, exit to L1 and never come back.  */
 	vmcall();
@@ -52,15 +52,17 @@ void l1_guest_code(struct vmx_pages *vmx_pages)
 
 	GUEST_ASSERT(vmx_pages->vmcs_gpa);
 	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+	GUEST_SYNC(3);
+	GUEST_ASSERT(load_vmcs(vmx_pages));
 	GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
 
-	GUEST_SYNC(3);
+	GUEST_SYNC(4);
 	GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
 
 	prepare_vmcs(vmx_pages, l2_guest_code,
 		     &l2_guest_stack[L2_GUEST_STACK_SIZE]);
 
-	GUEST_SYNC(4);
+	GUEST_SYNC(5);
 	GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
 	GUEST_ASSERT(!vmlaunch());
 	GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
@@ -72,7 +74,7 @@ void l1_guest_code(struct vmx_pages *vmx_pages)
 	GUEST_ASSERT(!vmresume());
 	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
 
-	GUEST_SYNC(6);
+	GUEST_SYNC(7);
 	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
 
 	GUEST_ASSERT(!vmresume());
@@ -85,12 +87,12 @@ void l1_guest_code(struct vmx_pages *vmx_pages)
 
 	GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa));
 	GUEST_ASSERT(vmlaunch());
-	GUEST_SYNC(7);
+	GUEST_SYNC(8);
 	GUEST_ASSERT(vmlaunch());
 	GUEST_ASSERT(vmresume());
 
 	vmwrite(GUEST_RIP, 0xc0ffee);
-	GUEST_SYNC(8);
+	GUEST_SYNC(9);
 	GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
 
 	GUEST_ASSERT(!vmptrld(vmx_pages->vmcs_gpa));
@@ -101,7 +103,7 @@ void l1_guest_code(struct vmx_pages *vmx_pages)
 	GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee);
 	GUEST_ASSERT(vmlaunch());
 	GUEST_ASSERT(vmresume());
-	GUEST_SYNC(12);
+	GUEST_SYNC(13);
 	GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee);
 	GUEST_ASSERT(vmlaunch());
 	GUEST_ASSERT(vmresume());
@@ -127,6 +129,7 @@ int main(int argc, char *argv[])
 	struct kvm_vm *vm;
 	struct kvm_run *run;
 	struct kvm_x86_state *state;
+	struct ucall uc;
 	int stage;
 
 	struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
@@ -155,23 +158,23 @@ int main(int argc, char *argv[])
 
 		memset(&regs1, 0, sizeof(regs1));
 		vcpu_regs_get(vm, VCPU_ID, &regs1);
-		switch (run->io.port) {
-		case GUEST_PORT_ABORT:
-			TEST_ASSERT(false, "%s at %s:%d", (const char *) regs1.rdi,
-				    __FILE__, regs1.rsi);
+		switch (get_ucall(vm, VCPU_ID, &uc)) {
+		case UCALL_ABORT:
+			TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0],
+				    __FILE__, uc.args[1]);
 			/* NOT REACHED */
-		case GUEST_PORT_SYNC:
+		case UCALL_SYNC:
 			break;
-		case GUEST_PORT_DONE:
+		case UCALL_DONE:
 			goto done;
 		default:
-			TEST_ASSERT(false, "Unknown port 0x%x.", run->io.port);
+			TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd);
 		}
 
-		/* PORT_SYNC is handled here.  */
-		TEST_ASSERT(!strcmp((const char *)regs1.rdi, "hello") &&
-			    regs1.rsi == stage, "Unexpected register values vmexit #%lx, got %lx",
-			    stage, (ulong) regs1.rsi);
+		/* UCALL_SYNC is handled here.  */
+		TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+			    uc.args[1] == stage, "Unexpected register values vmexit #%lx, got %lx",
+			    stage, (ulong)uc.args[1]);
 
 		state = vcpu_save_state(vm, VCPU_ID);
 		kvm_vm_release(vm);
diff --git a/tools/testing/selftests/kvm/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
index 213343e5dff9..c8478ce9ea77 100644
--- a/tools/testing/selftests/kvm/sync_regs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
@@ -19,7 +19,7 @@
 
 #include "test_util.h"
 #include "kvm_util.h"
-#include "x86.h"
+#include "processor.h"
 
 #define VCPU_ID 5
 
diff --git a/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
index 49bcc68b0235..18fa64db0d7a 100644
--- a/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
@@ -1,5 +1,5 @@
 /*
- * gtests/tests/vmx_tsc_adjust_test.c
+ * vmx_tsc_adjust_test
  *
  * Copyright (C) 2018, Google LLC.
  *
@@ -22,13 +22,13 @@
 
 #include "test_util.h"
 #include "kvm_util.h"
-#include "x86.h"
+#include "processor.h"
 #include "vmx.h"
 
 #include <string.h>
 #include <sys/ioctl.h>
 
-#include "../kselftest.h"
+#include "kselftest.h"
 
 #ifndef MSR_IA32_TSC_ADJUST
 #define MSR_IA32_TSC_ADJUST 0x3b
@@ -94,6 +94,7 @@ static void l1_guest_code(struct vmx_pages *vmx_pages)
 	check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
 
 	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+	GUEST_ASSERT(load_vmcs(vmx_pages));
 
 	/* Prepare the VMCS for L2 execution. */
 	prepare_vmcs(vmx_pages, l2_guest_code,
@@ -146,26 +147,25 @@ int main(int argc, char *argv[])
 
 	for (;;) {
 		volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
-		struct guest_args args;
+		struct ucall uc;
 
 		vcpu_run(vm, VCPU_ID);
-		guest_args_read(vm, VCPU_ID, &args);
 		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
 			    "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
 			    run->exit_reason,
 			    exit_reason_str(run->exit_reason));
 
-		switch (args.port) {
-		case GUEST_PORT_ABORT:
-			TEST_ASSERT(false, "%s", (const char *) args.arg0);
+		switch (get_ucall(vm, VCPU_ID, &uc)) {
+		case UCALL_ABORT:
+			TEST_ASSERT(false, "%s", (const char *)uc.args[0]);
 			/* NOT REACHED */
-		case GUEST_PORT_SYNC:
-			report(args.arg1);
+		case UCALL_SYNC:
+			report(uc.args[1]);
 			break;
-		case GUEST_PORT_DONE:
+		case UCALL_DONE:
 			goto done;
 		default:
-			TEST_ASSERT(false, "Unknown port 0x%x.", args.port);
+			TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd);
 		}
 	}
 
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 78b24cf76f40..8cf22b3c2563 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -14,3 +14,4 @@ udpgso_bench_rx
 udpgso_bench_tx
 tcp_inq
 tls
+ip_defrag
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 919aa2ac00af..256d82d5fa87 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -5,13 +5,13 @@ CFLAGS =  -Wall -Wl,--no-as-needed -O2 -g
 CFLAGS += -I../../../../usr/include/
 
 TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh
-TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh
+TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh
 TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh
 TEST_PROGS_EXTENDED := in_netns.sh
 TEST_GEN_FILES =  socket
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
 TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd
-TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx
+TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag
 TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
 TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
 
diff --git a/tools/testing/selftests/net/fib-onlink-tests.sh b/tools/testing/selftests/net/fib-onlink-tests.sh
index 3991ad1a368d..864f865eee55 100755
--- a/tools/testing/selftests/net/fib-onlink-tests.sh
+++ b/tools/testing/selftests/net/fib-onlink-tests.sh
@@ -167,8 +167,8 @@ setup()
 	# add vrf table
 	ip li add ${VRF} type vrf table ${VRF_TABLE}
 	ip li set ${VRF} up
-	ip ro add table ${VRF_TABLE} unreachable default
-	ip -6 ro add table ${VRF_TABLE} unreachable default
+	ip ro add table ${VRF_TABLE} unreachable default metric 8192
+	ip -6 ro add table ${VRF_TABLE} unreachable default metric 8192
 
 	# create test interfaces
 	ip li add ${NETIFS[p1]} type veth peer name ${NETIFS[p2]}
@@ -185,20 +185,20 @@ setup()
 	for n in 1 3 5 7; do
 		ip li set ${NETIFS[p${n}]} up
 		ip addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]}
-		ip addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]}
+		ip addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} nodad
 	done
 
 	# move peer interfaces to namespace and add addresses
 	for n in 2 4 6 8; do
 		ip li set ${NETIFS[p${n}]} netns ${PEER_NS} up
 		ip -netns ${PEER_NS} addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]}
-		ip -netns ${PEER_NS} addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]}
+		ip -netns ${PEER_NS} addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} nodad
 	done
 
-	set +e
+	ip -6 ro add default via ${V6ADDRS[p3]/::[0-9]/::64}
+	ip -6 ro add table ${VRF_TABLE} default via ${V6ADDRS[p7]/::[0-9]/::64}
 
-	# let DAD complete - assume default of 1 probe
-	sleep 1
+	set +e
 }
 
 cleanup()
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 0f45633bd634..802b4af18729 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -9,11 +9,11 @@ ret=0
 ksft_skip=4
 
 # all tests in this script. Can be overridden with -t option
-TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric"
+TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics"
 VERBOSE=0
 PAUSE_ON_FAIL=no
 PAUSE=no
-IP="ip -netns testns"
+IP="ip -netns ns1"
 
 log_test()
 {
@@ -47,8 +47,10 @@ log_test()
 setup()
 {
 	set -e
-	ip netns add testns
+	ip netns add ns1
 	$IP link set dev lo up
+	ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1
+	ip netns exec ns1 sysctl -qw net.ipv6.conf.all.forwarding=1
 
 	$IP link add dummy0 type dummy
 	$IP link set dev dummy0 up
@@ -61,7 +63,8 @@ setup()
 cleanup()
 {
 	$IP link del dev dummy0 &> /dev/null
-	ip netns del testns
+	ip netns del ns1
+	ip netns del ns2 &> /dev/null
 }
 
 get_linklocal()
@@ -639,11 +642,14 @@ add_initial_route6()
 
 check_route6()
 {
-	local pfx="2001:db8:104::/64"
+	local pfx
 	local expected="$1"
 	local out
 	local rc=0
 
+	set -- $expected
+	pfx=$1
+
 	out=$($IP -6 ro ls match ${pfx} | sed -e 's/ pref medium//')
 	[ "${out}" = "${expected}" ] && return 0
 
@@ -690,28 +696,33 @@ route_setup()
 	[ "${VERBOSE}" = "1" ] && set -x
 	set -e
 
-	$IP li add red up type vrf table 101
+	ip netns add ns2
+	ip -netns ns2 link set dev lo up
+	ip netns exec ns2 sysctl -qw net.ipv4.ip_forward=1
+	ip netns exec ns2 sysctl -qw net.ipv6.conf.all.forwarding=1
+
 	$IP li add veth1 type veth peer name veth2
 	$IP li add veth3 type veth peer name veth4
 
 	$IP li set veth1 up
 	$IP li set veth3 up
-	$IP li set veth2 vrf red up
-	$IP li set veth4 vrf red up
-	$IP li add dummy1 type dummy
-	$IP li set dummy1 vrf red up
-
-	$IP -6 addr add 2001:db8:101::1/64 dev veth1
-	$IP -6 addr add 2001:db8:101::2/64 dev veth2
-	$IP -6 addr add 2001:db8:103::1/64 dev veth3
-	$IP -6 addr add 2001:db8:103::2/64 dev veth4
-	$IP -6 addr add 2001:db8:104::1/64 dev dummy1
+	$IP li set veth2 netns ns2 up
+	$IP li set veth4 netns ns2 up
+	ip -netns ns2 li add dummy1 type dummy
+	ip -netns ns2 li set dummy1 up
 
+	$IP -6 addr add 2001:db8:101::1/64 dev veth1 nodad
+	$IP -6 addr add 2001:db8:103::1/64 dev veth3 nodad
 	$IP addr add 172.16.101.1/24 dev veth1
-	$IP addr add 172.16.101.2/24 dev veth2
 	$IP addr add 172.16.103.1/24 dev veth3
-	$IP addr add 172.16.103.2/24 dev veth4
-	$IP addr add 172.16.104.1/24 dev dummy1
+
+	ip -netns ns2 -6 addr add 2001:db8:101::2/64 dev veth2 nodad
+	ip -netns ns2 -6 addr add 2001:db8:103::2/64 dev veth4 nodad
+	ip -netns ns2 -6 addr add 2001:db8:104::1/64 dev dummy1 nodad
+
+	ip -netns ns2 addr add 172.16.101.2/24 dev veth2
+	ip -netns ns2 addr add 172.16.103.2/24 dev veth4
+	ip -netns ns2 addr add 172.16.104.1/24 dev dummy1
 
 	set +ex
 }
@@ -944,7 +955,7 @@ ipv6_addr_metric_test()
 	log_test $rc 0 "Modify metric of address"
 
 	# verify prefix route removed on down
-	run_cmd "ip netns exec testns sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1"
+	run_cmd "ip netns exec ns1 sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1"
 	run_cmd "$IP li set dev dummy2 down"
 	rc=$?
 	if [ $rc -eq 0 ]; then
@@ -967,6 +978,77 @@ ipv6_addr_metric_test()
 	cleanup
 }
 
+ipv6_route_metrics_test()
+{
+	local rc
+
+	echo
+	echo "IPv6 routes with metrics"
+
+	route_setup
+
+	#
+	# single path with metrics
+	#
+	run_cmd "$IP -6 ro add 2001:db8:111::/64 via 2001:db8:101::2 mtu 1400"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route6  "2001:db8:111::/64 via 2001:db8:101::2 dev veth1 metric 1024 mtu 1400"
+		rc=$?
+	fi
+	log_test $rc 0 "Single path route with mtu metric"
+
+
+	#
+	# multipath via separate routes with metrics
+	#
+	run_cmd "$IP -6 ro add 2001:db8:112::/64 via 2001:db8:101::2 mtu 1400"
+	run_cmd "$IP -6 ro append 2001:db8:112::/64 via 2001:db8:103::2"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route6 "2001:db8:112::/64 metric 1024 mtu 1400 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+		rc=$?
+	fi
+	log_test $rc 0 "Multipath route via 2 single routes with mtu metric on first"
+
+	# second route is coalesced to first to make a multipath route.
+	# MTU of the second path is hidden from display!
+	run_cmd "$IP -6 ro add 2001:db8:113::/64 via 2001:db8:101::2"
+	run_cmd "$IP -6 ro append 2001:db8:113::/64 via 2001:db8:103::2 mtu 1400"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route6 "2001:db8:113::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+		rc=$?
+	fi
+	log_test $rc 0 "Multipath route via 2 single routes with mtu metric on 2nd"
+
+	run_cmd "$IP -6 ro del 2001:db8:113::/64 via 2001:db8:101::2"
+	if [ $? -eq 0 ]; then
+		check_route6 "2001:db8:113::/64 via 2001:db8:103::2 dev veth3 metric 1024 mtu 1400"
+		log_test $? 0 "    MTU of second leg"
+	fi
+
+	#
+	# multipath with metrics
+	#
+	run_cmd "$IP -6 ro add 2001:db8:115::/64 mtu 1400 nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route6  "2001:db8:115::/64 metric 1024 mtu 1400 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+		rc=$?
+	fi
+	log_test $rc 0 "Multipath route with mtu metric"
+
+	$IP -6 ro add 2001:db8:104::/64 via 2001:db8:101::2 mtu 1300
+	run_cmd "ip netns exec ns1 ping6 -w1 -c1 -s 1500 2001:db8:104::1"
+	log_test $? 0 "Using route with mtu metric"
+
+	run_cmd "$IP -6 ro add 2001:db8:114::/64 via  2001:db8:101::2  congctl lock foo"
+	log_test $? 2 "Invalid metric (fails metric_convert)"
+
+	route_cleanup
+}
+
 # add route for a prefix, flushing any existing routes first
 # expected to be the first step of a test
 add_route()
@@ -1005,11 +1087,15 @@ add_initial_route()
 
 check_route()
 {
-	local pfx="172.16.104.0/24"
+	local pfx
 	local expected="$1"
 	local out
 	local rc=0
 
+	set -- $expected
+	pfx=$1
+	[ "${pfx}" = "unreachable" ] && pfx=$2
+
 	out=$($IP ro ls match ${pfx})
 	[ "${out}" = "${expected}" ] && return 0
 
@@ -1319,6 +1405,43 @@ ipv4_addr_metric_test()
 	cleanup
 }
 
+ipv4_route_metrics_test()
+{
+	local rc
+
+	echo
+	echo "IPv4 route add / append tests"
+
+	route_setup
+
+	run_cmd "$IP ro add 172.16.111.0/24 via 172.16.101.2 mtu 1400"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route "172.16.111.0/24 via 172.16.101.2 dev veth1 mtu 1400"
+		rc=$?
+	fi
+	log_test $rc 0 "Single path route with mtu metric"
+
+
+	run_cmd "$IP ro add 172.16.112.0/24 mtu 1400 nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route "172.16.112.0/24 mtu 1400 nexthop via 172.16.101.2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1"
+		rc=$?
+	fi
+	log_test $rc 0 "Multipath route with mtu metric"
+
+	$IP ro add 172.16.104.0/24 via 172.16.101.2 mtu 1300
+	run_cmd "ip netns exec ns1 ping -w1 -c1 -s 1500 172.16.104.1"
+	log_test $? 0 "Using route with mtu metric"
+
+	run_cmd "$IP ro add 172.16.111.0/24 via 172.16.101.2 congctl lock foo"
+	log_test $? 2 "Invalid metric (fails metric_convert)"
+
+	route_cleanup
+}
+
+
 ################################################################################
 # usage
 
@@ -1385,6 +1508,8 @@ do
 	ipv4_route_test|ipv4_rt)	ipv4_route_test;;
 	ipv6_addr_metric)		ipv6_addr_metric_test;;
 	ipv4_addr_metric)		ipv4_addr_metric_test;;
+	ipv6_route_metrics)		ipv6_route_metrics_test;;
+	ipv4_route_metrics)		ipv4_route_metrics_test;;
 
 	help) echo "Test names: $TESTS"; exit 0;;
 	esac
diff --git a/tools/testing/selftests/net/forwarding/bridge_sticky_fdb.sh b/tools/testing/selftests/net/forwarding/bridge_sticky_fdb.sh
new file mode 100755
index 000000000000..1f8ef0eff862
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_sticky_fdb.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="sticky"
+NUM_NETIFS=4
+TEST_MAC=de:ad:be:ef:13:37
+source lib.sh
+
+switch_create()
+{
+	ip link add dev br0 type bridge
+
+	ip link set dev $swp1 master br0
+	ip link set dev $swp2 master br0
+
+	ip link set dev br0 up
+	ip link set dev $h1 up
+	ip link set dev $swp1 up
+	ip link set dev $h2 up
+	ip link set dev $swp2 up
+}
+
+switch_destroy()
+{
+	ip link set dev $swp2 down
+	ip link set dev $h2 down
+	ip link set dev $swp1 down
+	ip link set dev $h1 down
+
+	ip link del dev br0
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+	h2=${NETIFS[p3]}
+	swp2=${NETIFS[p4]}
+
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+	switch_destroy
+}
+
+sticky()
+{
+	bridge fdb add $TEST_MAC dev $swp1 master static sticky
+	check_err $? "Could not add fdb entry"
+	bridge fdb del $TEST_MAC dev $swp1 vlan 1 master static sticky
+	$MZ $h2 -c 1 -a $TEST_MAC -t arp "request" -q
+	bridge -j fdb show br br0 brport $swp1\
+		| jq -e ".[] | select(.mac == \"$TEST_MAC\")" &> /dev/null
+	check_err $? "Did not find FDB record when should"
+
+	log_test "Sticky fdb entry"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index ca53b539aa2d..85d253546684 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -251,7 +251,7 @@ lldpad_app_wait_set()
 {
 	local dev=$1; shift
 
-	while lldptool -t -i $dev -V APP -c app | grep -q pending; do
+	while lldptool -t -i $dev -V APP -c app | grep -Eq "pending|unknown"; do
 		echo "$dev: waiting for lldpad to push pending APP updates"
 		sleep 5
 	done
@@ -494,6 +494,14 @@ tc_rule_stats_get()
 	    | jq '.[1].options.actions[].stats.packets'
 }
 
+ethtool_stats_get()
+{
+	local dev=$1; shift
+	local stat=$1; shift
+
+	ethtool -S $dev | grep "^ *$stat:" | head -n 1 | cut -d: -f2
+}
+
 mac_get()
 {
 	local if_name=$1
@@ -541,6 +549,23 @@ forwarding_restore()
 	sysctl_restore net.ipv4.conf.all.forwarding
 }
 
+declare -A MTU_ORIG
+mtu_set()
+{
+	local dev=$1; shift
+	local mtu=$1; shift
+
+	MTU_ORIG["$dev"]=$(ip -j link show dev $dev | jq -e '.[].mtu')
+	ip link set dev $dev mtu $mtu
+}
+
+mtu_restore()
+{
+	local dev=$1; shift
+
+	ip link set dev $dev mtu ${MTU_ORIG["$dev"]}
+}
+
 tc_offload_check()
 {
 	local num_netifs=${1:-$NUM_NETIFS}
diff --git a/tools/testing/selftests/net/ip_defrag.c b/tools/testing/selftests/net/ip_defrag.c
new file mode 100644
index 000000000000..61ae2782388e
--- /dev/null
+++ b/tools/testing/selftests/net/ip_defrag.c
@@ -0,0 +1,393 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <linux/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/udp.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+static bool		cfg_do_ipv4;
+static bool		cfg_do_ipv6;
+static bool		cfg_verbose;
+static bool		cfg_overlap;
+static unsigned short	cfg_port = 9000;
+
+const struct in_addr addr4 = { .s_addr = __constant_htonl(INADDR_LOOPBACK + 2) };
+const struct in6_addr addr6 = IN6ADDR_LOOPBACK_INIT;
+
+#define IP4_HLEN	(sizeof(struct iphdr))
+#define IP6_HLEN	(sizeof(struct ip6_hdr))
+#define UDP_HLEN	(sizeof(struct udphdr))
+
+/* IPv6 fragment header lenth. */
+#define FRAG_HLEN	8
+
+static int payload_len;
+static int max_frag_len;
+
+#define MSG_LEN_MAX	60000	/* Max UDP payload length. */
+
+#define IP4_MF		(1u << 13)  /* IPv4 MF flag. */
+#define IP6_MF		(1)  /* IPv6 MF flag. */
+
+#define CSUM_MANGLED_0 (0xffff)
+
+static uint8_t udp_payload[MSG_LEN_MAX];
+static uint8_t ip_frame[IP_MAXPACKET];
+static uint32_t ip_id = 0xabcd;
+static int msg_counter;
+static int frag_counter;
+static unsigned int seed;
+
+/* Receive a UDP packet. Validate it matches udp_payload. */
+static void recv_validate_udp(int fd_udp)
+{
+	ssize_t ret;
+	static uint8_t recv_buff[MSG_LEN_MAX];
+
+	ret = recv(fd_udp, recv_buff, payload_len, 0);
+	msg_counter++;
+
+	if (cfg_overlap) {
+		if (ret != -1)
+			error(1, 0, "recv: expected timeout; got %d",
+				(int)ret);
+		if (errno != ETIMEDOUT && errno != EAGAIN)
+			error(1, errno, "recv: expected timeout: %d",
+				 errno);
+		return;  /* OK */
+	}
+
+	if (ret == -1)
+		error(1, errno, "recv: payload_len = %d max_frag_len = %d",
+			payload_len, max_frag_len);
+	if (ret != payload_len)
+		error(1, 0, "recv: wrong size: %d vs %d", (int)ret, payload_len);
+	if (memcmp(udp_payload, recv_buff, payload_len))
+		error(1, 0, "recv: wrong data");
+}
+
+static uint32_t raw_checksum(uint8_t *buf, int len, uint32_t sum)
+{
+	int i;
+
+	for (i = 0; i < (len & ~1U); i += 2) {
+		sum += (u_int16_t)ntohs(*((u_int16_t *)(buf + i)));
+		if (sum > 0xffff)
+			sum -= 0xffff;
+	}
+
+	if (i < len) {
+		sum += buf[i] << 8;
+		if (sum > 0xffff)
+			sum -= 0xffff;
+	}
+
+	return sum;
+}
+
+static uint16_t udp_checksum(struct ip *iphdr, struct udphdr *udphdr)
+{
+	uint32_t sum = 0;
+	uint16_t res;
+
+	sum = raw_checksum((uint8_t *)&iphdr->ip_src, 2 * sizeof(iphdr->ip_src),
+				IPPROTO_UDP + (uint32_t)(UDP_HLEN + payload_len));
+	sum = raw_checksum((uint8_t *)udphdr, UDP_HLEN, sum);
+	sum = raw_checksum((uint8_t *)udp_payload, payload_len, sum);
+	res = 0xffff & ~sum;
+	if (res)
+		return htons(res);
+	else
+		return CSUM_MANGLED_0;
+}
+
+static uint16_t udp6_checksum(struct ip6_hdr *iphdr, struct udphdr *udphdr)
+{
+	uint32_t sum = 0;
+	uint16_t res;
+
+	sum = raw_checksum((uint8_t *)&iphdr->ip6_src, 2 * sizeof(iphdr->ip6_src),
+				IPPROTO_UDP);
+	sum = raw_checksum((uint8_t *)&udphdr->len, sizeof(udphdr->len), sum);
+	sum = raw_checksum((uint8_t *)udphdr, UDP_HLEN, sum);
+	sum = raw_checksum((uint8_t *)udp_payload, payload_len, sum);
+	res = 0xffff & ~sum;
+	if (res)
+		return htons(res);
+	else
+		return CSUM_MANGLED_0;
+}
+
+static void send_fragment(int fd_raw, struct sockaddr *addr, socklen_t alen,
+				int offset, bool ipv6)
+{
+	int frag_len;
+	int res;
+	int payload_offset = offset > 0 ? offset - UDP_HLEN : 0;
+	uint8_t *frag_start = ipv6 ? ip_frame + IP6_HLEN + FRAG_HLEN :
+					ip_frame + IP4_HLEN;
+
+	if (offset == 0) {
+		struct udphdr udphdr;
+		udphdr.source = htons(cfg_port + 1);
+		udphdr.dest = htons(cfg_port);
+		udphdr.len = htons(UDP_HLEN + payload_len);
+		udphdr.check = 0;
+		if (ipv6)
+			udphdr.check = udp6_checksum((struct ip6_hdr *)ip_frame, &udphdr);
+		else
+			udphdr.check = udp_checksum((struct ip *)ip_frame, &udphdr);
+		memcpy(frag_start, &udphdr, UDP_HLEN);
+	}
+
+	if (ipv6) {
+		struct ip6_hdr *ip6hdr = (struct ip6_hdr *)ip_frame;
+		struct ip6_frag *fraghdr = (struct ip6_frag *)(ip_frame + IP6_HLEN);
+		if (payload_len - payload_offset <= max_frag_len && offset > 0) {
+			/* This is the last fragment. */
+			frag_len = FRAG_HLEN + payload_len - payload_offset;
+			fraghdr->ip6f_offlg = htons(offset);
+		} else {
+			frag_len = FRAG_HLEN + max_frag_len;
+			fraghdr->ip6f_offlg = htons(offset | IP6_MF);
+		}
+		ip6hdr->ip6_plen = htons(frag_len);
+		if (offset == 0)
+			memcpy(frag_start + UDP_HLEN, udp_payload,
+				frag_len - FRAG_HLEN - UDP_HLEN);
+		else
+			memcpy(frag_start, udp_payload + payload_offset,
+				frag_len - FRAG_HLEN);
+		frag_len += IP6_HLEN;
+	} else {
+		struct ip *iphdr = (struct ip *)ip_frame;
+		if (payload_len - payload_offset <= max_frag_len && offset > 0) {
+			/* This is the last fragment. */
+			frag_len = IP4_HLEN + payload_len - payload_offset;
+			iphdr->ip_off = htons(offset / 8);
+		} else {
+			frag_len = IP4_HLEN + max_frag_len;
+			iphdr->ip_off = htons(offset / 8 | IP4_MF);
+		}
+		iphdr->ip_len = htons(frag_len);
+		if (offset == 0)
+			memcpy(frag_start + UDP_HLEN, udp_payload,
+				frag_len - IP4_HLEN - UDP_HLEN);
+		else
+			memcpy(frag_start, udp_payload + payload_offset,
+				frag_len - IP4_HLEN);
+	}
+
+	res = sendto(fd_raw, ip_frame, frag_len, 0, addr, alen);
+	if (res < 0)
+		error(1, errno, "send_fragment");
+	if (res != frag_len)
+		error(1, 0, "send_fragment: %d vs %d", res, frag_len);
+
+	frag_counter++;
+}
+
+static void send_udp_frags(int fd_raw, struct sockaddr *addr,
+				socklen_t alen, bool ipv6)
+{
+	struct ip *iphdr = (struct ip *)ip_frame;
+	struct ip6_hdr *ip6hdr = (struct ip6_hdr *)ip_frame;
+	int res;
+	int offset;
+	int frag_len;
+
+	/* Send the UDP datagram using raw IP fragments: the 0th fragment
+	 * has the UDP header; other fragments are pieces of udp_payload
+	 * split in chunks of frag_len size.
+	 *
+	 * Odd fragments (1st, 3rd, 5th, etc.) are sent out first, then
+	 * even fragments (0th, 2nd, etc.) are sent out.
+	 */
+	if (ipv6) {
+		struct ip6_frag *fraghdr = (struct ip6_frag *)(ip_frame + IP6_HLEN);
+		((struct sockaddr_in6 *)addr)->sin6_port = 0;
+		memset(ip6hdr, 0, sizeof(*ip6hdr));
+		ip6hdr->ip6_flow = htonl(6<<28);  /* Version. */
+		ip6hdr->ip6_nxt = IPPROTO_FRAGMENT;
+		ip6hdr->ip6_hops = 255;
+		ip6hdr->ip6_src = addr6;
+		ip6hdr->ip6_dst = addr6;
+		fraghdr->ip6f_nxt = IPPROTO_UDP;
+		fraghdr->ip6f_reserved = 0;
+		fraghdr->ip6f_ident = htonl(ip_id++);
+	} else {
+		memset(iphdr, 0, sizeof(*iphdr));
+		iphdr->ip_hl = 5;
+		iphdr->ip_v = 4;
+		iphdr->ip_tos = 0;
+		iphdr->ip_id = htons(ip_id++);
+		iphdr->ip_ttl = 0x40;
+		iphdr->ip_p = IPPROTO_UDP;
+		iphdr->ip_src.s_addr = htonl(INADDR_LOOPBACK);
+		iphdr->ip_dst = addr4;
+		iphdr->ip_sum = 0;
+	}
+
+	/* Odd fragments. */
+	offset = max_frag_len;
+	while (offset < (UDP_HLEN + payload_len)) {
+		send_fragment(fd_raw, addr, alen, offset, ipv6);
+		offset += 2 * max_frag_len;
+	}
+
+	if (cfg_overlap) {
+		/* Send an extra random fragment. */
+		offset = rand() % (UDP_HLEN + payload_len - 1);
+		/* sendto() returns EINVAL if offset + frag_len is too small. */
+		if (ipv6) {
+			struct ip6_frag *fraghdr = (struct ip6_frag *)(ip_frame + IP6_HLEN);
+			frag_len = max_frag_len + rand() % 256;
+			/* In IPv6 if !!(frag_len % 8), the fragment is dropped. */
+			frag_len &= ~0x7;
+			fraghdr->ip6f_offlg = htons(offset / 8 | IP6_MF);
+			ip6hdr->ip6_plen = htons(frag_len);
+			frag_len += IP6_HLEN;
+		} else {
+			frag_len = IP4_HLEN + UDP_HLEN + rand() % 256;
+			iphdr->ip_off = htons(offset / 8 | IP4_MF);
+			iphdr->ip_len = htons(frag_len);
+		}
+		res = sendto(fd_raw, ip_frame, frag_len, 0, addr, alen);
+		if (res < 0)
+			error(1, errno, "sendto overlap");
+		if (res != frag_len)
+			error(1, 0, "sendto overlap: %d vs %d", (int)res, frag_len);
+		frag_counter++;
+	}
+
+	/* Event fragments. */
+	offset = 0;
+	while (offset < (UDP_HLEN + payload_len)) {
+		send_fragment(fd_raw, addr, alen, offset, ipv6);
+		offset += 2 * max_frag_len;
+	}
+}
+
+static void run_test(struct sockaddr *addr, socklen_t alen, bool ipv6)
+{
+	int fd_tx_raw, fd_rx_udp;
+	struct timeval tv = { .tv_sec = 0, .tv_usec = 10 * 1000 };
+	int idx;
+	int min_frag_len = ipv6 ? 1280 : 8;
+
+	/* Initialize the payload. */
+	for (idx = 0; idx < MSG_LEN_MAX; ++idx)
+		udp_payload[idx] = idx % 256;
+
+	/* Open sockets. */
+	fd_tx_raw = socket(addr->sa_family, SOCK_RAW, IPPROTO_RAW);
+	if (fd_tx_raw == -1)
+		error(1, errno, "socket tx_raw");
+
+	fd_rx_udp = socket(addr->sa_family, SOCK_DGRAM, 0);
+	if (fd_rx_udp == -1)
+		error(1, errno, "socket rx_udp");
+	if (bind(fd_rx_udp, addr, alen))
+		error(1, errno, "bind");
+	/* Fail fast. */
+	if (setsockopt(fd_rx_udp, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
+		error(1, errno, "setsockopt rcv timeout");
+
+	for (payload_len = min_frag_len; payload_len < MSG_LEN_MAX;
+			payload_len += (rand() % 4096)) {
+		if (cfg_verbose)
+			printf("payload_len: %d\n", payload_len);
+		max_frag_len = min_frag_len;
+		do {
+			send_udp_frags(fd_tx_raw, addr, alen, ipv6);
+			recv_validate_udp(fd_rx_udp);
+			max_frag_len += 8 * (rand() % 8);
+		} while (max_frag_len < (1500 - FRAG_HLEN) && max_frag_len <= payload_len);
+	}
+
+	/* Cleanup. */
+	if (close(fd_tx_raw))
+		error(1, errno, "close tx_raw");
+	if (close(fd_rx_udp))
+		error(1, errno, "close rx_udp");
+
+	if (cfg_verbose)
+		printf("processed %d messages, %d fragments\n",
+			msg_counter, frag_counter);
+
+	fprintf(stderr, "PASS\n");
+}
+
+
+static void run_test_v4(void)
+{
+	struct sockaddr_in addr = {0};
+
+	addr.sin_family = AF_INET;
+	addr.sin_port = htons(cfg_port);
+	addr.sin_addr = addr4;
+
+	run_test((void *)&addr, sizeof(addr), false /* !ipv6 */);
+}
+
+static void run_test_v6(void)
+{
+	struct sockaddr_in6 addr = {0};
+
+	addr.sin6_family = AF_INET6;
+	addr.sin6_port = htons(cfg_port);
+	addr.sin6_addr = addr6;
+
+	run_test((void *)&addr, sizeof(addr), true /* ipv6 */);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	int c;
+
+	while ((c = getopt(argc, argv, "46ov")) != -1) {
+		switch (c) {
+		case '4':
+			cfg_do_ipv4 = true;
+			break;
+		case '6':
+			cfg_do_ipv6 = true;
+			break;
+		case 'o':
+			cfg_overlap = true;
+			break;
+		case 'v':
+			cfg_verbose = true;
+			break;
+		default:
+			error(1, 0, "%s: parse error", argv[0]);
+		}
+	}
+}
+
+int main(int argc, char **argv)
+{
+	parse_opts(argc, argv);
+	seed = time(NULL);
+	srand(seed);
+	/* Print the seed to track/reproduce potential failures. */
+	printf("seed = %d\n", seed);
+
+	if (cfg_do_ipv4)
+		run_test_v4();
+	if (cfg_do_ipv6)
+		run_test_v6();
+
+	return 0;
+}
diff --git a/tools/testing/selftests/net/ip_defrag.sh b/tools/testing/selftests/net/ip_defrag.sh
new file mode 100755
index 000000000000..f34672796044
--- /dev/null
+++ b/tools/testing/selftests/net/ip_defrag.sh
@@ -0,0 +1,39 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run a couple of IP defragmentation tests.
+
+set +x
+set -e
+
+readonly NETNS="ns-$(mktemp -u XXXXXX)"
+
+setup() {
+	ip netns add "${NETNS}"
+	ip -netns "${NETNS}" link set lo up
+	ip netns exec "${NETNS}" sysctl -w net.ipv4.ipfrag_high_thresh=9000000 >/dev/null 2>&1
+	ip netns exec "${NETNS}" sysctl -w net.ipv4.ipfrag_low_thresh=7000000 >/dev/null 2>&1
+	ip netns exec "${NETNS}" sysctl -w net.ipv6.ip6frag_high_thresh=9000000 >/dev/null 2>&1
+	ip netns exec "${NETNS}" sysctl -w net.ipv6.ip6frag_low_thresh=7000000 >/dev/null 2>&1
+}
+
+cleanup() {
+	ip netns del "${NETNS}"
+}
+
+trap cleanup EXIT
+setup
+
+echo "ipv4 defrag"
+ip netns exec "${NETNS}" ./ip_defrag -4
+
+
+echo "ipv4 defrag with overlaps"
+ip netns exec "${NETNS}" ./ip_defrag -4o
+
+echo "ipv6 defrag"
+ip netns exec "${NETNS}" ./ip_defrag -6
+
+echo "ipv6 defrag with overlaps"
+ip netns exec "${NETNS}" ./ip_defrag -6o
+
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 0ab9423d009f..a369d616b390 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -6,6 +6,26 @@
 #
 # Tests currently implemented:
 #
+# - pmtu_ipv4
+#	Set up two namespaces, A and B, with two paths between them over routers
+#	R1 and R2 (also implemented with namespaces), with different MTUs:
+#
+#	  segment a_r1    segment b_r1		a_r1: 2000
+#	.--------------R1--------------.	a_r2: 1500
+#	A                               B	a_r3: 2000
+#	'--------------R2--------------'	a_r4: 1400
+#	  segment a_r2    segment b_r2
+#
+#	Check that PMTU exceptions with the correct PMTU are created. Then
+#	decrease and increase the MTU of the local link for one of the paths,
+#	A to R1, checking that route exception PMTU changes accordingly over
+#	this path. Also check that locked exceptions are created when an ICMP
+#	message advertising a PMTU smaller than net.ipv4.route.min_pmtu is
+#	received
+#
+# - pmtu_ipv6
+#	Same as pmtu_ipv4, except for locked PMTU tests, using IPv6
+#
 # - pmtu_vti4_exception
 #	Set up vti tunnel on top of veth, with xfrm states and policies, in two
 #	namespaces with matching endpoints. Check that route exception is not
@@ -50,6 +70,8 @@ ksft_skip=4
 which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
 
 tests="
+	pmtu_ipv4_exception		ipv4: PMTU exceptions
+	pmtu_ipv6_exception		ipv6: PMTU exceptions
 	pmtu_vti6_exception		vti6: PMTU exceptions
 	pmtu_vti4_exception		vti4: PMTU exceptions
 	pmtu_vti4_default_mtu		vti4: default MTU assignment
@@ -60,8 +82,45 @@ tests="
 
 NS_A="ns-$(mktemp -u XXXXXX)"
 NS_B="ns-$(mktemp -u XXXXXX)"
+NS_R1="ns-$(mktemp -u XXXXXX)"
+NS_R2="ns-$(mktemp -u XXXXXX)"
 ns_a="ip netns exec ${NS_A}"
 ns_b="ip netns exec ${NS_B}"
+ns_r1="ip netns exec ${NS_R1}"
+ns_r2="ip netns exec ${NS_R2}"
+
+# Addressing and routing for tests with routers: four network segments, with
+# index SEGMENT between 1 and 4, a common prefix (PREFIX4 or PREFIX6) and an
+# identifier ID, which is 1 for hosts (A and B), 2 for routers (R1 and R2).
+# Addresses are:
+# - IPv4: PREFIX4.SEGMENT.ID (/24)
+# - IPv6: PREFIX6:SEGMENT::ID (/64)
+prefix4="192.168"
+prefix6="fd00"
+a_r1=1
+a_r2=2
+b_r1=3
+b_r2=4
+#	ns	peer	segment
+routing_addrs="
+	A	R1	${a_r1}
+	A	R2	${a_r2}
+	B	R1	${b_r1}
+	B	R2	${b_r2}
+"
+# Traffic from A to B goes through R1 by default, and through R2, if destined to
+# B's address on the b_r2 segment.
+# Traffic from B to A goes through R1.
+#	ns	destination		gateway
+routes="
+	A	default			${prefix4}.${a_r1}.2
+	A	${prefix4}.${b_r2}.1	${prefix4}.${a_r2}.2
+	B	default			${prefix4}.${b_r1}.2
+
+	A	default			${prefix6}:${a_r1}::2
+	A	${prefix6}:${b_r2}::1	${prefix6}:${a_r2}::2
+	B	default			${prefix6}:${b_r1}::2
+"
 
 veth4_a_addr="192.168.1.1"
 veth4_b_addr="192.168.1.2"
@@ -83,6 +142,7 @@ dummy6_mask="64"
 
 cleanup_done=1
 err_buf=
+tcpdump_pids=
 
 err() {
 	err_buf="${err_buf}${1}
@@ -94,9 +154,15 @@ err_flush() {
 	err_buf=
 }
 
+# Find the auto-generated name for this namespace
+nsname() {
+	eval echo \$NS_$1
+}
+
 setup_namespaces() {
-	ip netns add ${NS_A} || return 1
-	ip netns add ${NS_B}
+	for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do
+		ip netns add ${n} || return 1
+	done
 }
 
 setup_veth() {
@@ -167,6 +233,49 @@ setup_xfrm6() {
 	setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr}
 }
 
+setup_routing() {
+	for i in ${NS_R1} ${NS_R2}; do
+		ip netns exec ${i} sysctl -q net/ipv4/ip_forward=1
+		ip netns exec ${i} sysctl -q net/ipv6/conf/all/forwarding=1
+	done
+
+	for i in ${routing_addrs}; do
+		[ "${ns}" = "" ]	&& ns="${i}"		&& continue
+		[ "${peer}" = "" ]	&& peer="${i}"		&& continue
+		[ "${segment}" = "" ]	&& segment="${i}"
+
+		ns_name="$(nsname ${ns})"
+		peer_name="$(nsname ${peer})"
+		if="veth_${ns}-${peer}"
+		ifpeer="veth_${peer}-${ns}"
+
+		# Create veth links
+		ip link add ${if} up netns ${ns_name} type veth peer name ${ifpeer} netns ${peer_name} || return 1
+		ip -n ${peer_name} link set dev ${ifpeer} up
+
+		# Add addresses
+		ip -n ${ns_name}   addr add ${prefix4}.${segment}.1/24  dev ${if}
+		ip -n ${ns_name}   addr add ${prefix6}:${segment}::1/64 dev ${if}
+
+		ip -n ${peer_name} addr add ${prefix4}.${segment}.2/24  dev ${ifpeer}
+		ip -n ${peer_name} addr add ${prefix6}:${segment}::2/64 dev ${ifpeer}
+
+		ns=""; peer=""; segment=""
+	done
+
+	for i in ${routes}; do
+		[ "${ns}" = "" ]	&& ns="${i}"		&& continue
+		[ "${addr}" = "" ]	&& addr="${i}"		&& continue
+		[ "${gw}" = "" ]	&& gw="${i}"
+
+		ns_name="$(nsname ${ns})"
+
+		ip -n ${ns_name} route add ${addr} via ${gw}
+
+		ns=""; addr=""; gw=""
+	done
+}
+
 setup() {
 	[ "$(id -u)" -ne 0 ] && echo "  need to run as root" && return $ksft_skip
 
@@ -176,10 +285,28 @@ setup() {
 	done
 }
 
+trace() {
+	[ $tracing -eq 0 ] && return
+
+	for arg do
+		[ "${ns_cmd}" = "" ] && ns_cmd="${arg}" && continue
+		${ns_cmd} tcpdump -s 0 -i "${arg}" -w "${name}_${arg}.pcap" 2> /dev/null &
+		tcpdump_pids="${tcpdump_pids} $!"
+		ns_cmd=
+	done
+	sleep 1
+}
+
 cleanup() {
+	for pid in ${tcpdump_pids}; do
+		kill ${pid}
+	done
+	tcpdump_pids=
+
 	[ ${cleanup_done} -eq 1 ] && return
-	ip netns del ${NS_A} 2> /dev/null
-	ip netns del ${NS_B} 2> /dev/null
+	for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do
+		ip netns del ${n} 2> /dev/null
+	done
 	cleanup_done=1
 }
 
@@ -196,7 +323,9 @@ mtu_parse() {
 
 	next=0
 	for i in ${input}; do
+		[ ${next} -eq 1 -a "${i}" = "lock" ] && next=2 && continue
 		[ ${next} -eq 1 ] && echo "${i}" && return
+		[ ${next} -eq 2 ] && echo "lock ${i}" && return
 		[ "${i}" = "mtu" ] && next=1
 	done
 }
@@ -229,8 +358,117 @@ route_get_dst_pmtu_from_exception() {
 	mtu_parse "$(route_get_dst_exception "${ns_cmd}" ${dst})"
 }
 
+check_pmtu_value() {
+	expected="${1}"
+	value="${2}"
+	event="${3}"
+
+	[ "${expected}" = "any" ] && [ -n "${value}" ] && return 0
+	[ "${value}" = "${expected}" ] && return 0
+	[ -z "${value}" ] &&    err "  PMTU exception wasn't created after ${event}" && return 1
+	[ -z "${expected}" ] && err "  PMTU exception shouldn't exist after ${event}" && return 1
+	err "  found PMTU exception with incorrect MTU ${value}, expected ${expected}, after ${event}"
+	return 1
+}
+
+test_pmtu_ipvX() {
+	family=${1}
+
+	setup namespaces routing || return 2
+	trace "${ns_a}"  veth_A-R1    "${ns_r1}" veth_R1-A \
+	      "${ns_r1}" veth_R1-B    "${ns_b}"  veth_B-R1 \
+	      "${ns_a}"  veth_A-R2    "${ns_r2}" veth_R2-A \
+	      "${ns_r2}" veth_R2-B    "${ns_b}"  veth_B-R2
+
+	if [ ${family} -eq 4 ]; then
+		ping=ping
+		dst1="${prefix4}.${b_r1}.1"
+		dst2="${prefix4}.${b_r2}.1"
+	else
+		ping=${ping6}
+		dst1="${prefix6}:${b_r1}::1"
+		dst2="${prefix6}:${b_r2}::1"
+	fi
+
+	# Set up initial MTU values
+	mtu "${ns_a}"  veth_A-R1 2000
+	mtu "${ns_r1}" veth_R1-A 2000
+	mtu "${ns_r1}" veth_R1-B 1400
+	mtu "${ns_b}"  veth_B-R1 1400
+
+	mtu "${ns_a}"  veth_A-R2 2000
+	mtu "${ns_r2}" veth_R2-A 2000
+	mtu "${ns_r2}" veth_R2-B 1500
+	mtu "${ns_b}"  veth_B-R2 1500
+
+	# Create route exceptions
+	${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s 1800 ${dst1} > /dev/null
+	${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s 1800 ${dst2} > /dev/null
+
+	# Check that exceptions have been created with the correct PMTU
+	pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
+	check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1
+	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
+	check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1
+
+	# Decrease local MTU below PMTU, check for PMTU decrease in route exception
+	mtu "${ns_a}"  veth_A-R1 1300
+	mtu "${ns_r1}" veth_R1-A 1300
+	pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
+	check_pmtu_value "1300" "${pmtu_1}" "decreasing local MTU" || return 1
+	# Second exception shouldn't be modified
+	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
+	check_pmtu_value "1500" "${pmtu_2}" "changing local MTU on a link not on this path" || return 1
+
+	# Increase MTU, check for PMTU increase in route exception
+	mtu "${ns_a}"  veth_A-R1 1700
+	mtu "${ns_r1}" veth_R1-A 1700
+	pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
+	check_pmtu_value "1700" "${pmtu_1}" "increasing local MTU" || return 1
+	# Second exception shouldn't be modified
+	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
+	check_pmtu_value "1500" "${pmtu_2}" "changing local MTU on a link not on this path" || return 1
+
+	# Skip PMTU locking tests for IPv6
+	[ $family -eq 6 ] && return 0
+
+	# Decrease remote MTU on path via R2, get new exception
+	mtu "${ns_r2}" veth_R2-B 400
+	mtu "${ns_b}"  veth_B-R2 400
+	${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s 1400 ${dst2} > /dev/null
+	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
+	check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1
+
+	# Decrease local MTU below PMTU
+	mtu "${ns_a}"  veth_A-R2 500
+	mtu "${ns_r2}" veth_R2-A 500
+	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
+	check_pmtu_value "500" "${pmtu_2}" "decreasing local MTU" || return 1
+
+	# Increase local MTU
+	mtu "${ns_a}"  veth_A-R2 1500
+	mtu "${ns_r2}" veth_R2-A 1500
+	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
+	check_pmtu_value "1500" "${pmtu_2}" "increasing local MTU" || return 1
+
+	# Get new exception
+	${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s 1400 ${dst2} > /dev/null
+	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
+	check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1
+}
+
+test_pmtu_ipv4_exception() {
+	test_pmtu_ipvX 4
+}
+
+test_pmtu_ipv6_exception() {
+	test_pmtu_ipvX 6
+}
+
 test_pmtu_vti4_exception() {
 	setup namespaces veth vti4 xfrm4 || return 2
+	trace "${ns_a}" veth_a    "${ns_b}" veth_b \
+	      "${ns_a}" vti4_a    "${ns_b}" vti4_b
 
 	veth_mtu=1500
 	vti_mtu=$((veth_mtu - 20))
@@ -248,28 +486,19 @@ test_pmtu_vti4_exception() {
 	# exception is created
 	${ns_a} ping -q -M want -i 0.1 -w 2 -s ${ping_payload} ${vti4_b_addr} > /dev/null
 	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})"
-	if [ "${pmtu}" != "" ]; then
-		err "  unexpected exception created with PMTU ${pmtu} for IP payload length ${esp_payload_rfc4106}"
-		return 1
-	fi
+	check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1
 
 	# Now exceed link layer MTU by one byte, check that exception is created
+	# with the right PMTU value
 	${ns_a} ping -q -M want -i 0.1 -w 2 -s $((ping_payload + 1)) ${vti4_b_addr} > /dev/null
 	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})"
-	if [ "${pmtu}" = "" ]; then
-		err "  exception not created for IP payload length $((esp_payload_rfc4106 + 1))"
-		return 1
-	fi
-
-	# ...with the right PMTU value
-	if [ ${pmtu} -ne ${esp_payload_rfc4106} ]; then
-		err "  wrong PMTU ${pmtu} in exception, expected: ${esp_payload_rfc4106}"
-		return 1
-	fi
+	check_pmtu_value "${esp_payload_rfc4106}" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106 + 1)))"
 }
 
 test_pmtu_vti6_exception() {
 	setup namespaces veth vti6 xfrm6 || return 2
+	trace "${ns_a}" veth_a    "${ns_b}" veth_b \
+	      "${ns_a}" vti6_a    "${ns_b}" vti6_b
 	fail=0
 
 	# Create route exception by exceeding link layer MTU
@@ -280,25 +509,18 @@ test_pmtu_vti6_exception() {
 	${ns_a} ${ping6} -q -i 0.1 -w 2 -s 60000 ${vti6_b_addr} > /dev/null
 
 	# Check that exception was created
-	if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" = "" ]; then
-		err "  tunnel exceeding link layer MTU didn't create route exception"
-		return 1
-	fi
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})"
+	check_pmtu_value any "${pmtu}" "creating tunnel exceeding link layer MTU" || return 1
 
 	# Decrease tunnel MTU, check for PMTU decrease in route exception
 	mtu "${ns_a}" vti6_a 3000
-
-	if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" -ne 3000 ]; then
-		err "  decreasing tunnel MTU didn't decrease route exception PMTU"
-		fail=1
-	fi
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})"
+	check_pmtu_value "3000" "${pmtu}" "decreasing tunnel MTU" || fail=1
 
 	# Increase tunnel MTU, check for PMTU increase in route exception
 	mtu "${ns_a}" vti6_a 9000
-	if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" -ne 9000 ]; then
-		err "  increasing tunnel MTU didn't increase route exception PMTU"
-		fail=1
-	fi
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})"
+	check_pmtu_value "9000" "${pmtu}" "increasing tunnel MTU" || fail=1
 
 	return ${fail}
 }
@@ -445,15 +667,56 @@ test_pmtu_vti6_link_change_mtu() {
 	return ${fail}
 }
 
-trap cleanup EXIT
+usage() {
+	echo
+	echo "$0 [OPTIONS] [TEST]..."
+	echo "If no TEST argument is given, all tests will be run."
+	echo
+	echo "Options"
+	echo "  --trace: capture traffic to TEST_INTERFACE.pcap"
+	echo
+	echo "Available tests${tests}"
+	exit 1
+}
 
 exitcode=0
 desc=0
 IFS="	
 "
+
+tracing=0
+for arg do
+	if [ "${arg}" != "${arg#--*}" ]; then
+		opt="${arg#--}"
+		if [ "${opt}" = "trace" ]; then
+			if which tcpdump > /dev/null 2>&1; then
+				tracing=1
+			else
+				echo "=== tcpdump not available, tracing disabled"
+			fi
+		else
+			usage
+		fi
+	else
+		# Check first that all requested tests are available before
+		# running any
+		command -v > /dev/null "test_${arg}" || { echo "=== Test ${arg} not found"; usage; }
+	fi
+done
+
+trap cleanup EXIT
+
 for t in ${tests}; do
 	[ $desc -eq 0 ] && name="${t}" && desc=1 && continue || desc=0
 
+	run_this=1
+	for arg do
+		[ "${arg}" != "${arg#--*}" ] && continue
+		[ "${arg}" = "${name}" ] && run_this=1 && break
+		run_this=0
+	done
+	[ $run_this -eq 0 ] && continue
+
 	(
 		unset IFS
 		eval test_${name}
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index 8fdfeafaf8c0..fac68d710f35 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -121,11 +121,11 @@ TEST_F(tls, send_then_sendfile)
 	buf = (char *)malloc(st.st_size);
 
 	EXPECT_EQ(send(self->fd, test_str, to_send, 0), to_send);
-	EXPECT_EQ(recv(self->cfd, recv_buf, to_send, 0), to_send);
+	EXPECT_EQ(recv(self->cfd, recv_buf, to_send, MSG_WAITALL), to_send);
 	EXPECT_EQ(memcmp(test_str, recv_buf, to_send), 0);
 
 	EXPECT_GE(sendfile(self->fd, filefd, 0, st.st_size), 0);
-	EXPECT_EQ(recv(self->cfd, buf, st.st_size, 0), st.st_size);
+	EXPECT_EQ(recv(self->cfd, buf, st.st_size, MSG_WAITALL), st.st_size);
 }
 
 TEST_F(tls, recv_max)
@@ -160,7 +160,7 @@ TEST_F(tls, msg_more)
 	EXPECT_EQ(send(self->fd, test_str, send_len, MSG_MORE), send_len);
 	EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_DONTWAIT), -1);
 	EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
-	EXPECT_EQ(recv(self->cfd, buf, send_len * 2, MSG_DONTWAIT),
+	EXPECT_EQ(recv(self->cfd, buf, send_len * 2, MSG_WAITALL),
 		  send_len * 2);
 	EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
 }
@@ -180,7 +180,7 @@ TEST_F(tls, sendmsg_single)
 	msg.msg_iov = &vec;
 	msg.msg_iovlen = 1;
 	EXPECT_EQ(sendmsg(self->fd, &msg, 0), send_len);
-	EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len);
+	EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_WAITALL), send_len);
 	EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
 }
 
@@ -288,7 +288,7 @@ TEST_F(tls, splice_from_pipe)
 	ASSERT_GE(pipe(p), 0);
 	EXPECT_GE(write(p[1], mem_send, send_len), 0);
 	EXPECT_GE(splice(p[0], NULL, self->fd, NULL, send_len, 0), 0);
-	EXPECT_GE(recv(self->cfd, mem_recv, send_len, 0), 0);
+	EXPECT_EQ(recv(self->cfd, mem_recv, send_len, MSG_WAITALL), send_len);
 	EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
 }
 
@@ -306,7 +306,7 @@ TEST_F(tls, splice_from_pipe2)
 	EXPECT_GE(splice(p[0], NULL, self->fd, NULL, 8000, 0), 0);
 	EXPECT_GE(write(p2[1], mem_send + 8000, 8000), 0);
 	EXPECT_GE(splice(p2[0], NULL, self->fd, NULL, 8000, 0), 0);
-	EXPECT_GE(recv(self->cfd, mem_recv, send_len, 0), 0);
+	EXPECT_EQ(recv(self->cfd, mem_recv, send_len, MSG_WAITALL), send_len);
 	EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
 }
 
@@ -322,13 +322,13 @@ TEST_F(tls, send_and_splice)
 
 	ASSERT_GE(pipe(p), 0);
 	EXPECT_EQ(send(self->fd, test_str, send_len2, 0), send_len2);
-	EXPECT_NE(recv(self->cfd, buf, send_len2, 0), -1);
+	EXPECT_EQ(recv(self->cfd, buf, send_len2, MSG_WAITALL), send_len2);
 	EXPECT_EQ(memcmp(test_str, buf, send_len2), 0);
 
 	EXPECT_GE(write(p[1], mem_send, send_len), send_len);
 	EXPECT_GE(splice(p[0], NULL, self->fd, NULL, send_len, 0), send_len);
 
-	EXPECT_GE(recv(self->cfd, mem_recv, send_len, 0), 0);
+	EXPECT_EQ(recv(self->cfd, mem_recv, send_len, MSG_WAITALL), send_len);
 	EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
 }
 
@@ -436,7 +436,7 @@ TEST_F(tls, multiple_send_single_recv)
 	EXPECT_GE(send(self->fd, send_mem, send_len, 0), 0);
 	EXPECT_GE(send(self->fd, send_mem, send_len, 0), 0);
 	memset(recv_mem, 0, total_len);
-	EXPECT_EQ(recv(self->cfd, recv_mem, total_len, 0), total_len);
+	EXPECT_EQ(recv(self->cfd, recv_mem, total_len, MSG_WAITALL), total_len);
 
 	EXPECT_EQ(memcmp(send_mem, recv_mem, send_len), 0);
 	EXPECT_EQ(memcmp(send_mem, recv_mem + send_len, send_len), 0);
@@ -516,17 +516,17 @@ TEST_F(tls, recv_peek_multiple_records)
 	len = strlen(test_str_second) + 1;
 	EXPECT_EQ(send(self->fd, test_str_second, len, 0), len);
 
-	len = sizeof(buf);
+	len = strlen(test_str_first);
 	memset(buf, 0, len);
-	EXPECT_NE(recv(self->cfd, buf, len, MSG_PEEK), -1);
+	EXPECT_EQ(recv(self->cfd, buf, len, MSG_PEEK | MSG_WAITALL), len);
 
 	/* MSG_PEEK can only peek into the current record. */
-	len = strlen(test_str_first) + 1;
+	len = strlen(test_str_first);
 	EXPECT_EQ(memcmp(test_str_first, buf, len), 0);
 
-	len = sizeof(buf);
+	len = strlen(test_str) + 1;
 	memset(buf, 0, len);
-	EXPECT_NE(recv(self->cfd, buf, len, 0), -1);
+	EXPECT_EQ(recv(self->cfd, buf, len, MSG_WAITALL), len);
 
 	/* Non-MSG_PEEK will advance strparser (and therefore record)
 	 * however.
@@ -543,6 +543,28 @@ TEST_F(tls, recv_peek_multiple_records)
 	len = strlen(test_str_second) + 1;
 	EXPECT_EQ(send(self->fd, test_str_second, len, 0), len);
 
+	len = strlen(test_str) + 1;
+	memset(buf, 0, len);
+	EXPECT_EQ(recv(self->cfd, buf, len, MSG_PEEK | MSG_WAITALL), len);
+
+	len = strlen(test_str) + 1;
+	EXPECT_EQ(memcmp(test_str, buf, len), 0);
+}
+
+TEST_F(tls, recv_peek_large_buf_mult_recs)
+{
+	char const *test_str = "test_read_peek_mult_recs";
+	char const *test_str_first = "test_read_peek";
+	char const *test_str_second = "_mult_recs";
+	int len;
+	char buf[64];
+
+	len = strlen(test_str_first);
+	EXPECT_EQ(send(self->fd, test_str_first, len, 0), len);
+
+	len = strlen(test_str_second) + 1;
+	EXPECT_EQ(send(self->fd, test_str_second, len, 0), len);
+
 	len = sizeof(buf);
 	memset(buf, 0, len);
 	EXPECT_NE(recv(self->cfd, buf, len, MSG_PEEK), -1);
@@ -551,6 +573,7 @@ TEST_F(tls, recv_peek_multiple_records)
 	EXPECT_EQ(memcmp(test_str, buf, len), 0);
 }
 
+
 TEST_F(tls, pollin)
 {
 	char const *test_str = "test_poll";
@@ -564,7 +587,7 @@ TEST_F(tls, pollin)
 
 	EXPECT_EQ(poll(&fd, 1, 20), 1);
 	EXPECT_EQ(fd.revents & POLLIN, 1);
-	EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len);
+	EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_WAITALL), send_len);
 	/* Test timing out */
 	EXPECT_EQ(poll(&fd, 1, 20), 0);
 }
@@ -582,7 +605,7 @@ TEST_F(tls, poll_wait)
 	/* Set timeout to inf. secs */
 	EXPECT_EQ(poll(&fd, 1, -1), 1);
 	EXPECT_EQ(fd.revents & POLLIN, 1);
-	EXPECT_EQ(recv(self->cfd, recv_mem, send_len, 0), send_len);
+	EXPECT_EQ(recv(self->cfd, recv_mem, send_len, MSG_WAITALL), send_len);
 }
 
 TEST_F(tls, blocking)
@@ -728,7 +751,7 @@ TEST_F(tls, control_msg)
 	EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1);
 
 	vec.iov_base = buf;
-	EXPECT_EQ(recvmsg(self->cfd, &msg, 0), send_len);
+	EXPECT_EQ(recvmsg(self->cfd, &msg, MSG_WAITALL), send_len);
 	cmsg = CMSG_FIRSTHDR(&msg);
 	EXPECT_NE(cmsg, NULL);
 	EXPECT_EQ(cmsg->cmsg_level, SOL_TLS);
diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile
index 201b598558b9..b3ad909aefbc 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -28,7 +28,8 @@ SUB_DIRS = alignment		\
 	   tm			\
 	   vphn         \
 	   math		\
-	   ptrace
+	   ptrace	\
+	   security
 
 endif
 
diff --git a/tools/testing/selftests/powerpc/cache_shape/Makefile b/tools/testing/selftests/powerpc/cache_shape/Makefile
index ede4d3dae750..689f6c8ebcd8 100644
--- a/tools/testing/selftests/powerpc/cache_shape/Makefile
+++ b/tools/testing/selftests/powerpc/cache_shape/Makefile
@@ -1,12 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
-TEST_PROGS := cache_shape
-
-all: $(TEST_PROGS)
-
-$(TEST_PROGS): ../harness.c ../utils.c
+TEST_GEN_PROGS := cache_shape
 
 top_srcdir = ../../../../..
 include ../../lib.mk
 
-clean:
-	rm -f $(TEST_PROGS) *.o
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/include/reg.h b/tools/testing/selftests/powerpc/include/reg.h
index 7f348c059bc2..52b4710469d2 100644
--- a/tools/testing/selftests/powerpc/include/reg.h
+++ b/tools/testing/selftests/powerpc/include/reg.h
@@ -17,6 +17,7 @@
 				    : "memory")
 
 #define mb()		asm volatile("sync" : : : "memory");
+#define barrier()	asm volatile("" : : : "memory");
 
 #define SPRN_MMCR2     769
 #define SPRN_MMCRA     770
diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h
index c58c370828b4..49621822d7c3 100644
--- a/tools/testing/selftests/powerpc/include/utils.h
+++ b/tools/testing/selftests/powerpc/include/utils.h
@@ -11,6 +11,7 @@
 #include <stdint.h>
 #include <stdbool.h>
 #include <linux/auxvec.h>
+#include <linux/perf_event.h>
 #include "reg.h"
 
 /* Avoid headaches with PRI?64 - just use %ll? always */
@@ -31,6 +32,15 @@ void *get_auxv_entry(int type);
 
 int pick_online_cpu(void);
 
+int read_debugfs_file(char *debugfs_file, int *result);
+int write_debugfs_file(char *debugfs_file, int result);
+void set_dscr(unsigned long val);
+int perf_event_open_counter(unsigned int type,
+			    unsigned long config, int group_fd);
+int perf_event_enable(int fd);
+int perf_event_disable(int fd);
+int perf_event_reset(int fd);
+
 static inline bool have_hwcap(unsigned long ftr)
 {
 	return ((unsigned long)get_auxv_entry(AT_HWCAP) & ftr) == ftr;
@@ -80,4 +90,12 @@ do {								\
 #define PPC_FEATURE2_ARCH_3_00 0x00800000
 #endif
 
+#if defined(__powerpc64__)
+#define UCONTEXT_NIA(UC)	(UC)->uc_mcontext.gp_regs[PT_NIP]
+#elif defined(__powerpc__)
+#define UCONTEXT_NIA(UC)	(UC)->uc_mcontext.uc_regs->gregs[PT_NIP]
+#else
+#error implement UCONTEXT_NIA
+#endif
+
 #endif /* _SELFTESTS_POWERPC_UTILS_H */
diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore
index 7d7c42ed6de9..ba919308fe30 100644
--- a/tools/testing/selftests/powerpc/mm/.gitignore
+++ b/tools/testing/selftests/powerpc/mm/.gitignore
@@ -2,4 +2,5 @@ hugetlb_vs_thp_test
 subpage_prot
 tempfile
 prot_sao
-segv_errors
-\ No newline at end of file
+segv_errors
+wild_bctr
+\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile
index 33ced6e0ad25..43d68420e363 100644
--- a/tools/testing/selftests/powerpc/mm/Makefile
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@@ -2,7 +2,7 @@
 noarg:
 	$(MAKE) -C ../
 
-TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors
+TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr
 TEST_GEN_FILES := tempfile
 
 top_srcdir = ../../../../..
@@ -12,6 +12,8 @@ $(TEST_GEN_PROGS): ../harness.c
 
 $(OUTPUT)/prot_sao: ../utils.c
 
+$(OUTPUT)/wild_bctr: CFLAGS += -m64
+
 $(OUTPUT)/tempfile:
 	dd if=/dev/zero of=$@ bs=64k count=1
 
diff --git a/tools/testing/selftests/powerpc/mm/wild_bctr.c b/tools/testing/selftests/powerpc/mm/wild_bctr.c
new file mode 100644
index 000000000000..1b0e9e9a2ddc
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/wild_bctr.c
@@ -0,0 +1,155 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright 2018, Michael Ellerman, IBM Corp.
+ *
+ * Test that an out-of-bounds branch to counter behaves as expected.
+ */
+
+#include <setjmp.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <ucontext.h>
+#include <unistd.h>
+
+#include "utils.h"
+
+
+#define BAD_NIP	0x788c545a18000000ull
+
+static struct pt_regs signal_regs;
+static jmp_buf setjmp_env;
+
+static void save_regs(ucontext_t *ctxt)
+{
+	struct pt_regs *regs = ctxt->uc_mcontext.regs;
+
+	memcpy(&signal_regs, regs, sizeof(signal_regs));
+}
+
+static void segv_handler(int signum, siginfo_t *info, void *ctxt_v)
+{
+	save_regs(ctxt_v);
+	longjmp(setjmp_env, 1);
+}
+
+static void usr2_handler(int signum, siginfo_t *info, void *ctxt_v)
+{
+	save_regs(ctxt_v);
+}
+
+static int ok(void)
+{
+	printf("Everything is OK in here.\n");
+	return 0;
+}
+
+#define REG_POISON	0x5a5aUL
+#define POISONED_REG(n)	((REG_POISON << 48) | ((n) << 32) | (REG_POISON << 16) | (n))
+
+static inline void poison_regs(void)
+{
+	#define POISON_REG(n)	\
+	  "lis  " __stringify(n) "," __stringify(REG_POISON) ";" \
+	  "addi " __stringify(n) "," __stringify(n) "," __stringify(n) ";" \
+	  "sldi " __stringify(n) "," __stringify(n) ", 32 ;" \
+	  "oris " __stringify(n) "," __stringify(n) "," __stringify(REG_POISON) ";" \
+	  "addi " __stringify(n) "," __stringify(n) "," __stringify(n) ";"
+
+	asm (POISON_REG(15)
+	     POISON_REG(16)
+	     POISON_REG(17)
+	     POISON_REG(18)
+	     POISON_REG(19)
+	     POISON_REG(20)
+	     POISON_REG(21)
+	     POISON_REG(22)
+	     POISON_REG(23)
+	     POISON_REG(24)
+	     POISON_REG(25)
+	     POISON_REG(26)
+	     POISON_REG(27)
+	     POISON_REG(28)
+	     POISON_REG(29)
+	     : // inputs
+	     : // outputs
+	     : "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25",
+	       "26", "27", "28", "29"
+	);
+	#undef POISON_REG
+}
+
+static int check_regs(void)
+{
+	unsigned long i;
+
+	for (i = 15; i <= 29; i++)
+		FAIL_IF(signal_regs.gpr[i] != POISONED_REG(i));
+
+	printf("Regs OK\n");
+	return 0;
+}
+
+static void dump_regs(void)
+{
+	for (int i = 0; i < 32; i += 4) {
+		printf("r%02d 0x%016lx  r%02d 0x%016lx  " \
+		       "r%02d 0x%016lx  r%02d 0x%016lx\n",
+		       i, signal_regs.gpr[i],
+		       i+1, signal_regs.gpr[i+1],
+		       i+2, signal_regs.gpr[i+2],
+		       i+3, signal_regs.gpr[i+3]);
+	}
+}
+
+int test_wild_bctr(void)
+{
+	int (*func_ptr)(void);
+	struct sigaction segv = {
+		.sa_sigaction = segv_handler,
+		.sa_flags = SA_SIGINFO
+	};
+	struct sigaction usr2 = {
+		.sa_sigaction = usr2_handler,
+		.sa_flags = SA_SIGINFO
+	};
+
+	FAIL_IF(sigaction(SIGSEGV, &segv, NULL));
+	FAIL_IF(sigaction(SIGUSR2, &usr2, NULL));
+
+	bzero(&signal_regs, sizeof(signal_regs));
+
+	if (setjmp(setjmp_env) == 0) {
+		func_ptr = ok;
+		func_ptr();
+
+		kill(getpid(), SIGUSR2);
+		printf("Regs before:\n");
+		dump_regs();
+		bzero(&signal_regs, sizeof(signal_regs));
+
+		poison_regs();
+
+		func_ptr = (int (*)(void))BAD_NIP;
+		func_ptr();
+
+		FAIL_IF(1); /* we didn't segv? */
+	}
+
+	FAIL_IF(signal_regs.nip != BAD_NIP);
+
+	printf("All good - took SEGV as expected branching to 0x%llx\n", BAD_NIP);
+
+	dump_regs();
+	FAIL_IF(check_regs());
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(test_wild_bctr, "wild_bctr");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
index bd5dfa509272..23f4caf48ffc 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
@@ -5,6 +5,9 @@ noarg:
 # The EBB handler is 64-bit code and everything links against it
 CFLAGS += -m64
 
+# Toolchains may build PIE by default which breaks the assembly
+LDFLAGS += -no-pie
+
 TEST_GEN_PROGS := reg_access_test event_attributes_test cycles_test	\
 	 cycles_with_freeze_test pmc56_overflow_test		\
 	 ebb_vs_cpu_event_test cpu_event_vs_ebb_test		\
diff --git a/tools/testing/selftests/powerpc/primitives/load_unaligned_zeropad.c b/tools/testing/selftests/powerpc/primitives/load_unaligned_zeropad.c
index ed3239bbfae2..ee1e9ca22f0d 100644
--- a/tools/testing/selftests/powerpc/primitives/load_unaligned_zeropad.c
+++ b/tools/testing/selftests/powerpc/primitives/load_unaligned_zeropad.c
@@ -65,14 +65,6 @@ static int unprotect_region(void)
 extern char __start___ex_table[];
 extern char __stop___ex_table[];
 
-#if defined(__powerpc64__)
-#define UCONTEXT_NIA(UC)	(UC)->uc_mcontext.gp_regs[PT_NIP]
-#elif defined(__powerpc__)
-#define UCONTEXT_NIA(UC)	(UC)->uc_mcontext.uc_regs->gregs[PT_NIP]
-#else
-#error implement UCONTEXT_NIA
-#endif
-
 struct extbl_entry {
 	int insn;
 	int fixup;
diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile
index 923d531265f8..8d3f006c98cc 100644
--- a/tools/testing/selftests/powerpc/ptrace/Makefile
+++ b/tools/testing/selftests/powerpc/ptrace/Makefile
@@ -1,20 +1,15 @@
 # SPDX-License-Identifier: GPL-2.0
-TEST_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \
+TEST_GEN_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \
               ptrace-tar ptrace-tm-tar ptrace-tm-spd-tar ptrace-vsx ptrace-tm-vsx \
               ptrace-tm-spd-vsx ptrace-tm-spr ptrace-hwbreak ptrace-pkey core-pkey \
-              perf-hwbreak
+              perf-hwbreak ptrace-syscall
 
 top_srcdir = ../../../../..
 include ../../lib.mk
 
-all: $(TEST_PROGS)
-
 CFLAGS += -m64 -I../../../../../usr/include -I../tm -mhtm -fno-pie
 
-ptrace-pkey core-pkey: child.h
-ptrace-pkey core-pkey: LDLIBS += -pthread
-
-$(TEST_PROGS): ../harness.c ../utils.c ../lib/reg.S ptrace.h
+$(OUTPUT)/ptrace-pkey $(OUTPUT)/core-pkey: child.h
+$(OUTPUT)/ptrace-pkey $(OUTPUT)/core-pkey: LDLIBS += -pthread
 
-clean:
-	rm -f $(TEST_PROGS) *.o
+$(TEST_GEN_PROGS): ../harness.c ../utils.c ../lib/reg.S ptrace.h
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-syscall.c b/tools/testing/selftests/powerpc/ptrace/ptrace-syscall.c
new file mode 100644
index 000000000000..3353210dcdbd
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-syscall.c
@@ -0,0 +1,228 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * A ptrace test for testing PTRACE_SYSEMU, PTRACE_SETREGS and
+ * PTRACE_GETREG.  This test basically create a child process that executes
+ * syscalls and the parent process check if it is being traced appropriated.
+ *
+ * This test is heavily based on tools/testing/selftests/x86/ptrace_syscall.c
+ * test, and it was adapted to run on Powerpc by
+ * Breno Leitao <leitao@debian.org>
+ */
+#define _GNU_SOURCE
+
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/syscall.h>
+#include <sys/user.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <err.h>
+#include <string.h>
+#include <sys/auxv.h>
+#include "utils.h"
+
+/* Bitness-agnostic defines for user_regs_struct fields. */
+#define user_syscall_nr	gpr[0]
+#define user_arg0		gpr[3]
+#define user_arg1		gpr[4]
+#define user_arg2		gpr[5]
+#define user_arg3		gpr[6]
+#define user_arg4		gpr[7]
+#define user_arg5		gpr[8]
+#define user_ip		nip
+
+#define PTRACE_SYSEMU		0x1d
+
+static int nerrs;
+
+static void wait_trap(pid_t chld)
+{
+	siginfo_t si;
+
+	if (waitid(P_PID, chld, &si, WEXITED|WSTOPPED) != 0)
+		err(1, "waitid");
+	if (si.si_pid != chld)
+		errx(1, "got unexpected pid in event\n");
+	if (si.si_code != CLD_TRAPPED)
+		errx(1, "got unexpected event type %d\n", si.si_code);
+}
+
+static void test_ptrace_syscall_restart(void)
+{
+	int status;
+	struct pt_regs regs;
+	pid_t chld;
+
+	printf("[RUN]\tptrace-induced syscall restart\n");
+
+	chld = fork();
+	if (chld < 0)
+		err(1, "fork");
+
+	/*
+	 * Child process is running 4 syscalls after ptrace.
+	 *
+	 * 1) getpid()
+	 * 2) gettid()
+	 * 3) tgkill() -> Send SIGSTOP
+	 * 4) gettid() -> Where the tests will happen essentially
+	 */
+	if (chld == 0) {
+		if (ptrace(PTRACE_TRACEME, 0, 0, 0) != 0)
+			err(1, "PTRACE_TRACEME");
+
+		pid_t pid = getpid(), tid = syscall(SYS_gettid);
+
+		printf("\tChild will make one syscall\n");
+		syscall(SYS_tgkill, pid, tid, SIGSTOP);
+
+		syscall(SYS_gettid, 10, 11, 12, 13, 14, 15);
+		_exit(0);
+	}
+	/* Parent process below */
+
+	/* Wait for SIGSTOP sent by tgkill above. */
+	if (waitpid(chld, &status, 0) != chld || !WIFSTOPPED(status))
+		err(1, "waitpid");
+
+	printf("[RUN]\tSYSEMU\n");
+	if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0)
+		err(1, "PTRACE_SYSEMU");
+	wait_trap(chld);
+
+	if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
+		err(1, "PTRACE_GETREGS");
+
+	/*
+	 * Ptrace trapped prior to executing the syscall, thus r3 still has
+	 * the syscall number instead of the sys_gettid() result
+	 */
+	if (regs.user_syscall_nr != SYS_gettid ||
+	    regs.user_arg0 != 10 || regs.user_arg1 != 11 ||
+	    regs.user_arg2 != 12 || regs.user_arg3 != 13 ||
+	    regs.user_arg4 != 14 || regs.user_arg5 != 15) {
+		printf("[FAIL]\tInitial args are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n",
+			(unsigned long)regs.user_syscall_nr,
+			(unsigned long)regs.user_arg0,
+			(unsigned long)regs.user_arg1,
+			(unsigned long)regs.user_arg2,
+			(unsigned long)regs.user_arg3,
+			(unsigned long)regs.user_arg4,
+			(unsigned long)regs.user_arg5);
+		 nerrs++;
+	} else {
+		printf("[OK]\tInitial nr and args are correct\n"); }
+
+	printf("[RUN]\tRestart the syscall (ip = 0x%lx)\n",
+	       (unsigned long)regs.user_ip);
+
+	/*
+	 * Rewind to retry the same syscall again. This will basically test
+	 * the rewind process together with PTRACE_SETREGS and PTRACE_GETREGS.
+	 */
+	regs.user_ip -= 4;
+	if (ptrace(PTRACE_SETREGS, chld, 0, &regs) != 0)
+		err(1, "PTRACE_SETREGS");
+
+	if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0)
+		err(1, "PTRACE_SYSEMU");
+	wait_trap(chld);
+
+	if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
+		err(1, "PTRACE_GETREGS");
+
+	if (regs.user_syscall_nr != SYS_gettid ||
+	    regs.user_arg0 != 10 || regs.user_arg1 != 11 ||
+	    regs.user_arg2 != 12 || regs.user_arg3 != 13 ||
+	    regs.user_arg4 != 14 || regs.user_arg5 != 15) {
+		printf("[FAIL]\tRestart nr or args are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n",
+			(unsigned long)regs.user_syscall_nr,
+			(unsigned long)regs.user_arg0,
+			(unsigned long)regs.user_arg1,
+			(unsigned long)regs.user_arg2,
+			(unsigned long)regs.user_arg3,
+			(unsigned long)regs.user_arg4,
+			(unsigned long)regs.user_arg5);
+		nerrs++;
+	} else {
+		printf("[OK]\tRestarted nr and args are correct\n");
+	}
+
+	printf("[RUN]\tChange nr and args and restart the syscall (ip = 0x%lx)\n",
+	       (unsigned long)regs.user_ip);
+
+	/*
+	 * Inject a new syscall (getpid) in the same place the previous
+	 * syscall (gettid), rewind and re-execute.
+	 */
+	regs.user_syscall_nr = SYS_getpid;
+	regs.user_arg0 = 20;
+	regs.user_arg1 = 21;
+	regs.user_arg2 = 22;
+	regs.user_arg3 = 23;
+	regs.user_arg4 = 24;
+	regs.user_arg5 = 25;
+	regs.user_ip -= 4;
+
+	if (ptrace(PTRACE_SETREGS, chld, 0, &regs) != 0)
+		err(1, "PTRACE_SETREGS");
+
+	if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0)
+		err(1, "PTRACE_SYSEMU");
+	wait_trap(chld);
+
+	if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
+		err(1, "PTRACE_GETREGS");
+
+	/* Check that ptrace stopped at the new syscall that was
+	 * injected, and guarantee that it haven't executed, i.e, user_args
+	 * contain the arguments and not the syscall return value, for
+	 * instance.
+	 */
+	if (regs.user_syscall_nr != SYS_getpid
+		|| regs.user_arg0 != 20 || regs.user_arg1 != 21
+		|| regs.user_arg2 != 22 || regs.user_arg3 != 23
+		|| regs.user_arg4 != 24 || regs.user_arg5 != 25) {
+
+		printf("[FAIL]\tRestart nr or args are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n",
+			(unsigned long)regs.user_syscall_nr,
+			(unsigned long)regs.user_arg0,
+			(unsigned long)regs.user_arg1,
+			(unsigned long)regs.user_arg2,
+			(unsigned long)regs.user_arg3,
+			(unsigned long)regs.user_arg4,
+			(unsigned long)regs.user_arg5);
+		nerrs++;
+	} else {
+		printf("[OK]\tReplacement nr and args are correct\n");
+	}
+
+	if (ptrace(PTRACE_CONT, chld, 0, 0) != 0)
+		err(1, "PTRACE_CONT");
+
+	if (waitpid(chld, &status, 0) != chld)
+		err(1, "waitpid");
+
+	/* Guarantee that the process executed properly, returning 0 */
+	if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
+		printf("[FAIL]\tChild failed\n");
+		nerrs++;
+	} else {
+		printf("[OK]\tChild exited cleanly\n");
+	}
+}
+
+int ptrace_syscall(void)
+{
+	test_ptrace_syscall_restart();
+
+	return nerrs;
+}
+
+int main(void)
+{
+	return test_harness(ptrace_syscall, "ptrace_syscall");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c
index 327fa943c7f3..dbdffa2e2c82 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c
@@ -67,8 +67,8 @@ trans:
 		"3: ;"
 		: [res] "=r" (result), [texasr] "=r" (texasr)
 		: [gpr_1]"i"(GPR_1), [gpr_2]"i"(GPR_2), [gpr_4]"i"(GPR_4),
-		[sprn_texasr] "i" (SPRN_TEXASR), [flt_1] "r" (&a),
-		[flt_2] "r" (&b), [flt_4] "r" (&d)
+		[sprn_texasr] "i" (SPRN_TEXASR), [flt_1] "b" (&a),
+		[flt_4] "b" (&d)
 		: "memory", "r5", "r6", "r7",
 		"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
 		"r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
diff --git a/tools/testing/selftests/powerpc/security/Makefile b/tools/testing/selftests/powerpc/security/Makefile
new file mode 100644
index 000000000000..85861c46b445
--- /dev/null
+++ b/tools/testing/selftests/powerpc/security/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0+
+
+TEST_GEN_PROGS := rfi_flush
+top_srcdir = ../../../../..
+
+CFLAGS += -I../../../../../usr/include
+
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/security/rfi_flush.c b/tools/testing/selftests/powerpc/security/rfi_flush.c
new file mode 100644
index 000000000000..0a7d0afb26b8
--- /dev/null
+++ b/tools/testing/selftests/powerpc/security/rfi_flush.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Copyright 2018 IBM Corporation.
+ */
+
+#define __SANE_USERSPACE_TYPES__
+
+#include <sys/types.h>
+#include <stdint.h>
+#include <malloc.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include "utils.h"
+
+#define CACHELINE_SIZE 128
+
+struct perf_event_read {
+	__u64 nr;
+	__u64 l1d_misses;
+};
+
+static inline __u64 load(void *addr)
+{
+	__u64 tmp;
+
+	asm volatile("ld %0,0(%1)" : "=r"(tmp) : "b"(addr));
+
+	return tmp;
+}
+
+static void syscall_loop(char *p, unsigned long iterations,
+			 unsigned long zero_size)
+{
+	for (unsigned long i = 0; i < iterations; i++) {
+		for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE)
+			load(p + j);
+		getppid();
+	}
+}
+
+int rfi_flush_test(void)
+{
+	char *p;
+	int repetitions = 10;
+	int fd, passes = 0, iter, rc = 0;
+	struct perf_event_read v;
+	__u64 l1d_misses_total = 0;
+	unsigned long iterations = 100000, zero_size = 24 * 1024;
+	unsigned long l1d_misses_expected;
+	int rfi_flush_org, rfi_flush;
+
+	SKIP_IF(geteuid() != 0);
+
+	if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_org)) {
+		perror("Unable to read powerpc/rfi_flush debugfs file");
+		SKIP_IF(1);
+	}
+
+	rfi_flush = rfi_flush_org;
+
+	fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1);
+	FAIL_IF(fd < 0);
+
+	p = (char *)memalign(zero_size, CACHELINE_SIZE);
+
+	FAIL_IF(perf_event_enable(fd));
+
+	set_dscr(1);
+
+	iter = repetitions;
+
+	/*
+	 * We expect to see l1d miss for each cacheline access when rfi_flush
+	 * is set. Allow a small variation on this.
+	 */
+	l1d_misses_expected = iterations * (zero_size / CACHELINE_SIZE - 2);
+
+again:
+	FAIL_IF(perf_event_reset(fd));
+
+	syscall_loop(p, iterations, zero_size);
+
+	FAIL_IF(read(fd, &v, sizeof(v)) != sizeof(v));
+
+	if (rfi_flush && v.l1d_misses >= l1d_misses_expected)
+		passes++;
+	else if (!rfi_flush && v.l1d_misses < (l1d_misses_expected / 2))
+		passes++;
+
+	l1d_misses_total += v.l1d_misses;
+
+	while (--iter)
+		goto again;
+
+	if (passes < repetitions) {
+		printf("FAIL (L1D misses with rfi_flush=%d: %llu %c %lu) [%d/%d failures]\n",
+		       rfi_flush, l1d_misses_total, rfi_flush ? '<' : '>',
+		       rfi_flush ? repetitions * l1d_misses_expected :
+		       repetitions * l1d_misses_expected / 2,
+		       repetitions - passes, repetitions);
+		rc = 1;
+	} else
+		printf("PASS (L1D misses with rfi_flush=%d: %llu %c %lu) [%d/%d pass]\n",
+		       rfi_flush, l1d_misses_total, rfi_flush ? '>' : '<',
+		       rfi_flush ? repetitions * l1d_misses_expected :
+		       repetitions * l1d_misses_expected / 2,
+		       passes, repetitions);
+
+	if (rfi_flush == rfi_flush_org) {
+		rfi_flush = !rfi_flush_org;
+		if (write_debugfs_file("powerpc/rfi_flush", rfi_flush) < 0) {
+			perror("error writing to powerpc/rfi_flush debugfs file");
+			return 1;
+		}
+		iter = repetitions;
+		l1d_misses_total = 0;
+		passes = 0;
+		goto again;
+	}
+
+	perf_event_disable(fd);
+	close(fd);
+
+	set_dscr(0);
+
+	if (write_debugfs_file("powerpc/rfi_flush", rfi_flush_org) < 0) {
+		perror("unable to restore original value of powerpc/rfi_flush debugfs file");
+		return 1;
+	}
+
+	return rc;
+}
+
+int main(int argc, char *argv[])
+{
+	return test_harness(rfi_flush_test, "rfi_flush_test");
+}
diff --git a/tools/testing/selftests/powerpc/signal/Makefile b/tools/testing/selftests/powerpc/signal/Makefile
index 1fca25c6ace0..209a958dca12 100644
--- a/tools/testing/selftests/powerpc/signal/Makefile
+++ b/tools/testing/selftests/powerpc/signal/Makefile
@@ -1,15 +1,10 @@
 # SPDX-License-Identifier: GPL-2.0
-TEST_PROGS := signal signal_tm
-
-all: $(TEST_PROGS)
-
-$(TEST_PROGS): ../harness.c ../utils.c signal.S
+TEST_GEN_PROGS := signal signal_tm
 
 CFLAGS += -maltivec
-signal_tm: CFLAGS += -mhtm
+$(OUTPUT)/signal_tm: CFLAGS += -mhtm
 
 top_srcdir = ../../../../..
 include ../../lib.mk
 
-clean:
-	rm -f $(TEST_PROGS) *.o
+$(TEST_GEN_PROGS): ../harness.c ../utils.c signal.S
diff --git a/tools/testing/selftests/powerpc/switch_endian/Makefile b/tools/testing/selftests/powerpc/switch_endian/Makefile
index fcd2dcb8972b..bdc081afedb0 100644
--- a/tools/testing/selftests/powerpc/switch_endian/Makefile
+++ b/tools/testing/selftests/powerpc/switch_endian/Makefile
@@ -8,6 +8,7 @@ EXTRA_CLEAN = $(OUTPUT)/*.o $(OUTPUT)/check-reversed.S
 top_srcdir = ../../../../..
 include ../../lib.mk
 
+$(OUTPUT)/switch_endian_test: ASFLAGS += -I $(OUTPUT)
 $(OUTPUT)/switch_endian_test: $(OUTPUT)/check-reversed.S
 
 $(OUTPUT)/check-reversed.o: $(OUTPUT)/check.o
diff --git a/tools/testing/selftests/powerpc/tm/tm-tmspr.c b/tools/testing/selftests/powerpc/tm/tm-tmspr.c
index 2bda81c7bf23..df1d7d4b1c89 100644
--- a/tools/testing/selftests/powerpc/tm/tm-tmspr.c
+++ b/tools/testing/selftests/powerpc/tm/tm-tmspr.c
@@ -98,7 +98,7 @@ void texasr(void *in)
 
 int test_tmspr()
 {
-	pthread_t 	thread;
+	pthread_t	*thread;
 	int	   	thread_num;
 	unsigned long	i;
 
@@ -107,21 +107,28 @@ int test_tmspr()
 	/* To cause some context switching */
 	thread_num = 10 * sysconf(_SC_NPROCESSORS_ONLN);
 
+	thread = malloc(thread_num * sizeof(pthread_t));
+	if (thread == NULL)
+		return EXIT_FAILURE;
+
 	/* Test TFIAR and TFHAR */
-	for (i = 0 ; i < thread_num ; i += 2){
-		if (pthread_create(&thread, NULL, (void*)tfiar_tfhar, (void *)i))
+	for (i = 0; i < thread_num; i += 2) {
+		if (pthread_create(&thread[i], NULL, (void *)tfiar_tfhar,
+				   (void *)i))
 			return EXIT_FAILURE;
 	}
-	if (pthread_join(thread, NULL) != 0)
-		return EXIT_FAILURE;
-
 	/* Test TEXASR */
-	for (i = 0 ; i < thread_num ; i++){
-		if (pthread_create(&thread, NULL, (void*)texasr, (void *)i))
+	for (i = 1; i < thread_num; i += 2) {
+		if (pthread_create(&thread[i], NULL, (void *)texasr, (void *)i))
 			return EXIT_FAILURE;
 	}
-	if (pthread_join(thread, NULL) != 0)
-		return EXIT_FAILURE;
+
+	for (i = 0; i < thread_num; i++) {
+		if (pthread_join(thread[i], NULL) != 0)
+			return EXIT_FAILURE;
+	}
+
+	free(thread);
 
 	if (passed)
 		return 0;
diff --git a/tools/testing/selftests/powerpc/tm/tm-unavailable.c b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
index 156c8e750259..09894f4ff62e 100644
--- a/tools/testing/selftests/powerpc/tm/tm-unavailable.c
+++ b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
@@ -236,7 +236,8 @@ void *tm_una_ping(void *input)
 	}
 
 	/* Check if we were not expecting a failure and a it occurred. */
-	if (!expecting_failure() && is_failure(cr_)) {
+	if (!expecting_failure() && is_failure(cr_) &&
+	    !failure_is_reschedule()) {
 		printf("\n\tUnexpected transaction failure 0x%02lx\n\t",
 			failure_code());
 		return (void *) -1;
@@ -244,9 +245,11 @@ void *tm_una_ping(void *input)
 
 	/*
 	 * Check if TM failed due to the cause we were expecting. 0xda is a
-	 * TM_CAUSE_FAC_UNAV cause, otherwise it's an unexpected cause.
+	 * TM_CAUSE_FAC_UNAV cause, otherwise it's an unexpected cause, unless
+	 * it was caused by a reschedule.
 	 */
-	if (is_failure(cr_) && !failure_is_unavailable()) {
+	if (is_failure(cr_) && !failure_is_unavailable() &&
+	    !failure_is_reschedule()) {
 		printf("\n\tUnexpected failure cause 0x%02lx\n\t",
 			failure_code());
 		return (void *) -1;
diff --git a/tools/testing/selftests/powerpc/tm/tm.h b/tools/testing/selftests/powerpc/tm/tm.h
index df4204247d45..5518b1d4ef8b 100644
--- a/tools/testing/selftests/powerpc/tm/tm.h
+++ b/tools/testing/selftests/powerpc/tm/tm.h
@@ -52,6 +52,15 @@ static inline bool failure_is_unavailable(void)
 	return (failure_code() & TM_CAUSE_FAC_UNAV) == TM_CAUSE_FAC_UNAV;
 }
 
+static inline bool failure_is_reschedule(void)
+{
+	if ((failure_code() & TM_CAUSE_RESCHED) == TM_CAUSE_RESCHED ||
+	    (failure_code() & TM_CAUSE_KVM_RESCHED) == TM_CAUSE_KVM_RESCHED)
+		return true;
+
+	return false;
+}
+
 static inline bool failure_is_nesting(void)
 {
 	return (__builtin_get_texasru() & 0x400000);
diff --git a/tools/testing/selftests/powerpc/utils.c b/tools/testing/selftests/powerpc/utils.c
index aa8fc1e6365b..ed62f4153d3e 100644
--- a/tools/testing/selftests/powerpc/utils.c
+++ b/tools/testing/selftests/powerpc/utils.c
@@ -10,12 +10,17 @@
 #include <fcntl.h>
 #include <link.h>
 #include <sched.h>
+#include <signal.h>
 #include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
+#include <sys/ioctl.h>
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <sys/utsname.h>
 #include <unistd.h>
+#include <asm/unistd.h>
+#include <linux/limits.h>
 
 #include "utils.h"
 
@@ -121,3 +126,150 @@ bool is_ppc64le(void)
 
 	return strcmp(uts.machine, "ppc64le") == 0;
 }
+
+int read_debugfs_file(char *debugfs_file, int *result)
+{
+	int rc = -1, fd;
+	char path[PATH_MAX];
+	char value[16];
+
+	strcpy(path, "/sys/kernel/debug/");
+	strncat(path, debugfs_file, PATH_MAX - strlen(path) - 1);
+
+	if ((fd = open(path, O_RDONLY)) < 0)
+		return rc;
+
+	if ((rc = read(fd, value, sizeof(value))) < 0)
+		return rc;
+
+	value[15] = 0;
+	*result = atoi(value);
+	close(fd);
+
+	return 0;
+}
+
+int write_debugfs_file(char *debugfs_file, int result)
+{
+	int rc = -1, fd;
+	char path[PATH_MAX];
+	char value[16];
+
+	strcpy(path, "/sys/kernel/debug/");
+	strncat(path, debugfs_file, PATH_MAX - strlen(path) - 1);
+
+	if ((fd = open(path, O_WRONLY)) < 0)
+		return rc;
+
+	snprintf(value, 16, "%d", result);
+
+	if ((rc = write(fd, value, strlen(value))) < 0)
+		return rc;
+
+	close(fd);
+
+	return 0;
+}
+
+static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
+		int cpu, int group_fd, unsigned long flags)
+{
+	return syscall(__NR_perf_event_open, hw_event, pid, cpu,
+		      group_fd, flags);
+}
+
+static void perf_event_attr_init(struct perf_event_attr *event_attr,
+					unsigned int type,
+					unsigned long config)
+{
+	memset(event_attr, 0, sizeof(*event_attr));
+
+	event_attr->type = type;
+	event_attr->size = sizeof(struct perf_event_attr);
+	event_attr->config = config;
+	event_attr->read_format = PERF_FORMAT_GROUP;
+	event_attr->disabled = 1;
+	event_attr->exclude_kernel = 1;
+	event_attr->exclude_hv = 1;
+	event_attr->exclude_guest = 1;
+}
+
+int perf_event_open_counter(unsigned int type,
+			    unsigned long config, int group_fd)
+{
+	int fd;
+	struct perf_event_attr event_attr;
+
+	perf_event_attr_init(&event_attr, type, config);
+
+	fd = perf_event_open(&event_attr, 0, -1, group_fd, 0);
+
+	if (fd < 0)
+		perror("perf_event_open() failed");
+
+	return fd;
+}
+
+int perf_event_enable(int fd)
+{
+	if (ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) == -1) {
+		perror("error while enabling perf events");
+		return -1;
+	}
+
+	return 0;
+}
+
+int perf_event_disable(int fd)
+{
+	if (ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) == -1) {
+		perror("error disabling perf events");
+		return -1;
+	}
+
+	return 0;
+}
+
+int perf_event_reset(int fd)
+{
+	if (ioctl(fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) == -1) {
+		perror("error resetting perf events");
+		return -1;
+	}
+
+	return 0;
+}
+
+static void sigill_handler(int signr, siginfo_t *info, void *unused)
+{
+	static int warned = 0;
+	ucontext_t *ctx = (ucontext_t *)unused;
+	unsigned long *pc = &UCONTEXT_NIA(ctx);
+
+	/* mtspr 3,RS to check for move to DSCR below */
+	if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) {
+		if (!warned++)
+			printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n");
+		*pc += 4;
+	} else {
+		printf("SIGILL at %p\n", pc);
+		abort();
+	}
+}
+
+void set_dscr(unsigned long val)
+{
+	static int init = 0;
+	struct sigaction sa;
+
+	if (!init) {
+		memset(&sa, 0, sizeof(sa));
+		sa.sa_sigaction = sigill_handler;
+		sa.sa_flags = SA_SIGINFO;
+		if (sigaction(SIGILL, &sa, NULL))
+			perror("sigill_handler");
+		init = 1;
+	}
+
+	asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
+}
diff --git a/tools/testing/selftests/proc/fd-001-lookup.c b/tools/testing/selftests/proc/fd-001-lookup.c
index a2010dfb2110..60d7948e7124 100644
--- a/tools/testing/selftests/proc/fd-001-lookup.c
+++ b/tools/testing/selftests/proc/fd-001-lookup.c
@@ -14,7 +14,7 @@
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
 // Test /proc/*/fd lookup.
-#define _GNU_SOURCE
+
 #undef NDEBUG
 #include <assert.h>
 #include <dirent.h>
diff --git a/tools/testing/selftests/proc/fd-003-kthread.c b/tools/testing/selftests/proc/fd-003-kthread.c
index 1d659d55368c..dc591f97b63d 100644
--- a/tools/testing/selftests/proc/fd-003-kthread.c
+++ b/tools/testing/selftests/proc/fd-003-kthread.c
@@ -14,7 +14,7 @@
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
 // Test that /proc/$KERNEL_THREAD/fd/ is empty.
-#define _GNU_SOURCE
+
 #undef NDEBUG
 #include <sys/syscall.h>
 #include <assert.h>
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index f7247ee00514..58ca758a5786 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -120,7 +120,6 @@ then
 	parse-build.sh $resdir/Make.out $title
 else
 	# Build failed.
-	cp $builddir/Make*.out $resdir
 	cp $builddir/.config $resdir || :
 	echo Build failed, not running KVM, see $resdir.
 	if test -f $builddir.wait
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
index 6a0b9f69faad..c3c1fb5a9e1f 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
+++ b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
@@ -3,9 +3,7 @@ TREE02
 TREE03
 TREE04
 TREE05
-TREE06
 TREE07
-TREE08
 TREE09
 SRCU-N
 SRCU-P
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
index 84a7d51b7481..ce48c7b82673 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
@@ -1 +1,2 @@
 rcutorture.torture_type=srcud
+rcupdate.rcu_self_test=1
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot
index 84a7d51b7481..ce48c7b82673 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot
@@ -1 +1,2 @@
 rcutorture.torture_type=srcud
+rcupdate.rcu_self_test=1
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TINY02.boot b/tools/testing/selftests/rcutorture/configs/rcu/TINY02.boot
index 6c1a292a65fb..b39f1553a478 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TINY02.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TINY02.boot
@@ -1,3 +1 @@
 rcupdate.rcu_self_test=1
-rcupdate.rcu_self_test_bh=1
-rcutorture.torture_type=rcu_bh
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
index 9f3a4d28e508..ea47da95374b 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
@@ -1,4 +1,4 @@
-rcutorture.torture_type=rcu_bh maxcpus=8 nr_cpus=43
+maxcpus=8 nr_cpus=43
 rcutree.gp_preinit_delay=3
 rcutree.gp_init_delay=3
 rcutree.gp_cleanup_delay=3
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot
index e6071bb96c7d..5adc6756792a 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot
@@ -1 +1 @@
-rcutorture.torture_type=rcu_bh rcutree.rcu_fanout_leaf=4 nohz_full=1-7
+rcutree.rcu_fanout_leaf=4 nohz_full=1-7
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot
index c7fd050dfcd9..c419cac233ee 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot
@@ -1,5 +1,4 @@
-rcutorture.torture_type=sched
-rcupdate.rcu_self_test_sched=1
 rcutree.gp_preinit_delay=3
 rcutree.gp_init_delay=3
 rcutree.gp_cleanup_delay=3
+rcupdate.rcu_self_test=1
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot
index ad18b52a2cad..055f4aa79077 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot
@@ -1,6 +1,4 @@
 rcupdate.rcu_self_test=1
-rcupdate.rcu_self_test_bh=1
-rcupdate.rcu_self_test_sched=1
 rcutree.rcu_fanout_exact=1
 rcutree.gp_preinit_delay=3
 rcutree.gp_init_delay=3
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot
index 1bd8efc4141e..22478fd3a865 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot
@@ -1,5 +1,3 @@
-rcutorture.torture_type=sched
 rcupdate.rcu_self_test=1
-rcupdate.rcu_self_test_sched=1
 rcutree.rcu_fanout_exact=1
 rcu_nocbs=0-7
diff --git a/tools/testing/selftests/tc-testing/README b/tools/testing/selftests/tc-testing/README
index 49a6f8c3fdae..f9281e8aa313 100644
--- a/tools/testing/selftests/tc-testing/README
+++ b/tools/testing/selftests/tc-testing/README
@@ -232,6 +232,8 @@ directory:
       and the other is a test whether the command leaked memory or not.
       (This one is a preliminary version, it may not work quite right yet,
       but the overall template is there and it should only need tweaks.)
+  - buildebpfPlugin.py:
+      builds all programs in $EBPFDIR.
 
 
 ACKNOWLEDGEMENTS
diff --git a/tools/testing/selftests/tc-testing/bpf/Makefile b/tools/testing/selftests/tc-testing/bpf/Makefile
new file mode 100644
index 000000000000..dc92eb271d9a
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/bpf/Makefile
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: GPL-2.0
+
+APIDIR := ../../../../include/uapi
+TEST_GEN_FILES = action.o
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+CLANG ?= clang
+LLC   ?= llc
+PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1)
+
+ifeq ($(PROBE),)
+  CPU ?= probe
+else
+  CPU ?= generic
+endif
+
+CLANG_SYS_INCLUDES := $(shell $(CLANG) -v -E - </dev/null 2>&1 \
+	| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }')
+
+CLANG_FLAGS = -I. -I$(APIDIR) \
+	      $(CLANG_SYS_INCLUDES) \
+	      -Wno-compare-distinct-pointer-types
+
+$(OUTPUT)/%.o: %.c
+	$(CLANG) $(CLANG_FLAGS) \
+		 -O2 -target bpf -emit-llvm -c $< -o - |      \
+	$(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@
diff --git a/tools/testing/selftests/tc-testing/bpf/action.c b/tools/testing/selftests/tc-testing/bpf/action.c
new file mode 100644
index 000000000000..c32b99b80e19
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/bpf/action.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright (c) 2018 Davide Caratti, Red Hat inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <linux/bpf.h>
+#include <linux/pkt_cls.h>
+
+__attribute__((section("action-ok"),used)) int action_ok(struct __sk_buff *s)
+{
+	return TC_ACT_OK;
+}
+
+__attribute__((section("action-ko"),used)) int action_ko(struct __sk_buff *s)
+{
+	s->data = 0x0;
+	return TC_ACT_OK;
+}
+
+char _license[] __attribute__((section("license"),used)) = "GPL";
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py
new file mode 100644
index 000000000000..9f0ba10c44b4
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py
@@ -0,0 +1,66 @@
+'''
+build ebpf program
+'''
+
+import os
+import signal
+from string import Template
+import subprocess
+import time
+from TdcPlugin import TdcPlugin
+from tdc_config import *
+
+class SubPlugin(TdcPlugin):
+    def __init__(self):
+        self.sub_class = 'buildebpf/SubPlugin'
+        self.tap = ''
+        super().__init__()
+
+    def pre_suite(self, testcount, testidlist):
+        super().pre_suite(testcount, testidlist)
+
+        if self.args.buildebpf:
+            self._ebpf_makeall()
+
+    def post_suite(self, index):
+        super().post_suite(index)
+
+        self._ebpf_makeclean()
+
+    def add_args(self, parser):
+        super().add_args(parser)
+
+        self.argparser_group = self.argparser.add_argument_group(
+            'buildebpf',
+            'options for buildebpfPlugin')
+        self.argparser_group.add_argument(
+            '-B', '--buildebpf', action='store_true',
+            help='build eBPF programs')
+
+        return self.argparser
+
+    def _ebpf_makeall(self):
+        if self.args.buildebpf:
+            self._make('all')
+
+    def _ebpf_makeclean(self):
+        if self.args.buildebpf:
+            self._make('clean')
+
+    def _make(self, target):
+        command = 'make -C {} {}'.format(self.args.NAMES['EBPFDIR'], target)
+        proc = subprocess.Popen(command,
+            shell=True,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            env=ENVIR)
+        (rawout, serr) = proc.communicate()
+
+        if proc.returncode != 0 and len(serr) > 0:
+            foutput = serr.decode("utf-8")
+        else:
+            foutput = rawout.decode("utf-8")
+
+        proc.stdout.close()
+        proc.stderr.close()
+        return proc, foutput
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
index 6f289a49e5ec..5970cee6d05f 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
@@ -55,7 +55,6 @@
             "bpf"
         ],
         "setup": [
-            "printf '#include <linux/bpf.h>\nchar l[] __attribute__((section(\"license\"),used))=\"GPL\"; __attribute__((section(\"action\"),used)) int m(struct __sk_buff *s) { return 2; }' | clang -O2 -x c -c - -target bpf -o _b.o",
             [
                 "$TC action flush action bpf",
                 0,
@@ -63,14 +62,13 @@
                 255
             ]
         ],
-        "cmdUnderTest": "$TC action add action bpf object-file _b.o index 667",
+        "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action.o section action-ok index 667",
         "expExitCode": "0",
         "verifyCmd": "$TC action get action bpf index 667",
-        "matchPattern": "action order [0-9]*: bpf _b.o:\\[action\\] id [0-9]* tag 3b185187f1855c4c( jited)? default-action pipe.*index 667 ref",
+        "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ok\\] id [0-9]* tag [0-9a-f]{16}( jited)? default-action pipe.*index 667 ref",
         "matchCount": "1",
         "teardown": [
-            "$TC action flush action bpf",
-            "rm -f _b.o"
+            "$TC action flush action bpf"
         ]
     },
     {
@@ -81,7 +79,6 @@
             "bpf"
         ],
         "setup": [
-            "printf '#include <linux/bpf.h>\nchar l[] __attribute__((section(\"license\"),used))=\"GPL\"; __attribute__((section(\"action\"),used)) int m(struct __sk_buff *s) { s->data = 0x0; return 2; }' | clang -O2 -x c -c - -target bpf -o _c.o",
             [
                 "$TC action flush action bpf",
                 0,
@@ -89,10 +86,10 @@
                 255
             ]
         ],
-        "cmdUnderTest": "$TC action add action bpf object-file _c.o index 667",
+        "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action.o section action-ko index 667",
         "expExitCode": "255",
         "verifyCmd": "$TC action get action bpf index 667",
-        "matchPattern": "action order [0-9]*: bpf _c.o:\\[action\\] id [0-9].*index 667 ref",
+        "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ko\\] id [0-9].*index 667 ref",
         "matchCount": "0",
         "teardown": [
             [
@@ -100,8 +97,7 @@
                 0,
                 1,
                 255
-            ],
-            "rm -f _c.o"
+            ]
         ]
     },
     {
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json
index 68c91023cdb9..89189a03ce3d 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json
@@ -536,5 +536,29 @@
         "matchPattern": "^[ \t]+index [0-9]+ ref",
         "matchCount": "0",
         "teardown": []
+    },
+    {
+        "id": "8e47",
+        "name": "Add gact action with random determ goto chain control action",
+        "category": [
+            "actions",
+            "gact"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action gact",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pass random determ goto chain 1 2 index 90",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action gact",
+        "matchPattern": "action order [0-9]*: gact action pass random type determ goto chain 1 val 2.*index 90 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action gact"
+        ]
     }
 ]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
index 30f9b54bd666..4086a50a670e 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
@@ -715,5 +715,29 @@
         "teardown": [
             "$TC actions flush action police"
         ]
+    },
+    {
+        "id": "b48b",
+        "name": "Add police action with exceed goto chain control action",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 1mbit burst 1k conform-exceed pass / goto chain 42",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions ls action police",
+        "matchPattern": "action order [0-9]*:  police 0x1 rate 1Mbit burst 1Kb mtu 2Kb action pass/goto chain 42",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
     }
 ]
diff --git a/tools/testing/selftests/tc-testing/tdc_config.py b/tools/testing/selftests/tc-testing/tdc_config.py
index a023d0d62b25..d651bc1501bd 100644
--- a/tools/testing/selftests/tc-testing/tdc_config.py
+++ b/tools/testing/selftests/tc-testing/tdc_config.py
@@ -16,7 +16,9 @@ NAMES = {
           'DEV2': '',
           'BATCH_FILE': './batch.txt',
           # Name of the namespace to use
-          'NS': 'tcut'
+          'NS': 'tcut',
+          # Directory containing eBPF test programs
+          'EBPFDIR': './bpf'
         }
 
 
diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore
index af5ff83f6d7f..31b3c98b6d34 100644
--- a/tools/testing/selftests/vm/.gitignore
+++ b/tools/testing/selftests/vm/.gitignore
@@ -13,3 +13,4 @@ mlock-random-test
 virtual_address_range
 gup_benchmark
 va_128TBswitch
+map_fixed_noreplace
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index e94b7b14bcb2..6e67e726e5a5 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -12,6 +12,7 @@ TEST_GEN_FILES += gup_benchmark
 TEST_GEN_FILES += hugepage-mmap
 TEST_GEN_FILES += hugepage-shm
 TEST_GEN_FILES += map_hugetlb
+TEST_GEN_FILES += map_fixed_noreplace
 TEST_GEN_FILES += map_populate
 TEST_GEN_FILES += mlock-random-test
 TEST_GEN_FILES += mlock2-tests
diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c
index 36df55132036..880b96fc80d4 100644
--- a/tools/testing/selftests/vm/gup_benchmark.c
+++ b/tools/testing/selftests/vm/gup_benchmark.c
@@ -15,9 +15,12 @@
 #define PAGE_SIZE sysconf(_SC_PAGESIZE)
 
 #define GUP_FAST_BENCHMARK	_IOWR('g', 1, struct gup_benchmark)
+#define GUP_LONGTERM_BENCHMARK	_IOWR('g', 2, struct gup_benchmark)
+#define GUP_BENCHMARK		_IOWR('g', 3, struct gup_benchmark)
 
 struct gup_benchmark {
-	__u64 delta_usec;
+	__u64 get_delta_usec;
+	__u64 put_delta_usec;
 	__u64 addr;
 	__u64 size;
 	__u32 nr_pages_per_call;
@@ -28,10 +31,12 @@ int main(int argc, char **argv)
 {
 	struct gup_benchmark gup;
 	unsigned long size = 128 * MB;
-	int i, fd, opt, nr_pages = 1, thp = -1, repeats = 1, write = 0;
+	int i, fd, filed, opt, nr_pages = 1, thp = -1, repeats = 1, write = 0;
+	int cmd = GUP_FAST_BENCHMARK, flags = MAP_PRIVATE;
+	char *file = "/dev/zero";
 	char *p;
 
-	while ((opt = getopt(argc, argv, "m:r:n:tT")) != -1) {
+	while ((opt = getopt(argc, argv, "m:r:n:f:tTLUSH")) != -1) {
 		switch (opt) {
 		case 'm':
 			size = atoi(optarg) * MB;
@@ -48,13 +53,36 @@ int main(int argc, char **argv)
 		case 'T':
 			thp = 0;
 			break;
+		case 'L':
+			cmd = GUP_LONGTERM_BENCHMARK;
+			break;
+		case 'U':
+			cmd = GUP_BENCHMARK;
+			break;
 		case 'w':
 			write = 1;
+			break;
+		case 'f':
+			file = optarg;
+			break;
+		case 'S':
+			flags &= ~MAP_PRIVATE;
+			flags |= MAP_SHARED;
+			break;
+		case 'H':
+			flags |= MAP_HUGETLB;
+			break;
 		default:
 			return -1;
 		}
 	}
 
+	filed = open(file, O_RDWR|O_CREAT);
+	if (filed < 0) {
+		perror("open");
+		exit(filed);
+	}
+
 	gup.nr_pages_per_call = nr_pages;
 	gup.flags = write;
 
@@ -62,8 +90,7 @@ int main(int argc, char **argv)
 	if (fd == -1)
 		perror("open"), exit(1);
 
-	p = mmap(NULL, size, PROT_READ | PROT_WRITE,
-			MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+	p = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, filed, 0);
 	if (p == MAP_FAILED)
 		perror("mmap"), exit(1);
 	gup.addr = (unsigned long)p;
@@ -78,10 +105,11 @@ int main(int argc, char **argv)
 
 	for (i = 0; i < repeats; i++) {
 		gup.size = size;
-		if (ioctl(fd, GUP_FAST_BENCHMARK, &gup))
+		if (ioctl(fd, cmd, &gup))
 			perror("ioctl"), exit(1);
 
-		printf("Time: %lld us", gup.delta_usec);
+		printf("Time: get:%lld put:%lld us", gup.get_delta_usec,
+			gup.put_delta_usec);
 		if (gup.size != size)
 			printf(", truncated (size: %lld)", gup.size);
 		printf("\n");
diff --git a/tools/testing/selftests/vm/map_fixed_noreplace.c b/tools/testing/selftests/vm/map_fixed_noreplace.c
new file mode 100644
index 000000000000..d91bde511268
--- /dev/null
+++ b/tools/testing/selftests/vm/map_fixed_noreplace.c
@@ -0,0 +1,206 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Test that MAP_FIXED_NOREPLACE works.
+ *
+ * Copyright 2018, Jann Horn <jannh@google.com>
+ * Copyright 2018, Michael Ellerman, IBM Corporation.
+ */
+
+#include <sys/mman.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#ifndef MAP_FIXED_NOREPLACE
+#define MAP_FIXED_NOREPLACE 0x100000
+#endif
+
+#define BASE_ADDRESS	(256ul * 1024 * 1024)
+
+
+static void dump_maps(void)
+{
+	char cmd[32];
+
+	snprintf(cmd, sizeof(cmd), "cat /proc/%d/maps", getpid());
+	system(cmd);
+}
+
+int main(void)
+{
+	unsigned long flags, addr, size, page_size;
+	char *p;
+
+	page_size = sysconf(_SC_PAGE_SIZE);
+
+	flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE;
+
+	// Check we can map all the areas we need below
+	errno = 0;
+	addr = BASE_ADDRESS;
+	size = 5 * page_size;
+	p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
+
+	printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+
+	if (p == MAP_FAILED) {
+		dump_maps();
+		printf("Error: couldn't map the space we need for the test\n");
+		return 1;
+	}
+
+	errno = 0;
+	if (munmap((void *)addr, 5 * page_size) != 0) {
+		dump_maps();
+		printf("Error: munmap failed!?\n");
+		return 1;
+	}
+	printf("unmap() successful\n");
+
+	errno = 0;
+	addr = BASE_ADDRESS + page_size;
+	size = 3 * page_size;
+	p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
+	printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+
+	if (p == MAP_FAILED) {
+		dump_maps();
+		printf("Error: first mmap() failed unexpectedly\n");
+		return 1;
+	}
+
+	/*
+	 * Exact same mapping again:
+	 *   base |  free  | new
+	 *     +1 | mapped | new
+	 *     +2 | mapped | new
+	 *     +3 | mapped | new
+	 *     +4 |  free  | new
+	 */
+	errno = 0;
+	addr = BASE_ADDRESS;
+	size = 5 * page_size;
+	p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
+	printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+
+	if (p != MAP_FAILED) {
+		dump_maps();
+		printf("Error:1: mmap() succeeded when it shouldn't have\n");
+		return 1;
+	}
+
+	/*
+	 * Second mapping contained within first:
+	 *
+	 *   base |  free  |
+	 *     +1 | mapped |
+	 *     +2 | mapped | new
+	 *     +3 | mapped |
+	 *     +4 |  free  |
+	 */
+	errno = 0;
+	addr = BASE_ADDRESS + (2 * page_size);
+	size = page_size;
+	p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
+	printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+
+	if (p != MAP_FAILED) {
+		dump_maps();
+		printf("Error:2: mmap() succeeded when it shouldn't have\n");
+		return 1;
+	}
+
+	/*
+	 * Overlap end of existing mapping:
+	 *   base |  free  |
+	 *     +1 | mapped |
+	 *     +2 | mapped |
+	 *     +3 | mapped | new
+	 *     +4 |  free  | new
+	 */
+	errno = 0;
+	addr = BASE_ADDRESS + (3 * page_size);
+	size = 2 * page_size;
+	p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
+	printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+
+	if (p != MAP_FAILED) {
+		dump_maps();
+		printf("Error:3: mmap() succeeded when it shouldn't have\n");
+		return 1;
+	}
+
+	/*
+	 * Overlap start of existing mapping:
+	 *   base |  free  | new
+	 *     +1 | mapped | new
+	 *     +2 | mapped |
+	 *     +3 | mapped |
+	 *     +4 |  free  |
+	 */
+	errno = 0;
+	addr = BASE_ADDRESS;
+	size = 2 * page_size;
+	p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
+	printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+
+	if (p != MAP_FAILED) {
+		dump_maps();
+		printf("Error:4: mmap() succeeded when it shouldn't have\n");
+		return 1;
+	}
+
+	/*
+	 * Adjacent to start of existing mapping:
+	 *   base |  free  | new
+	 *     +1 | mapped |
+	 *     +2 | mapped |
+	 *     +3 | mapped |
+	 *     +4 |  free  |
+	 */
+	errno = 0;
+	addr = BASE_ADDRESS;
+	size = page_size;
+	p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
+	printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+
+	if (p == MAP_FAILED) {
+		dump_maps();
+		printf("Error:5: mmap() failed when it shouldn't have\n");
+		return 1;
+	}
+
+	/*
+	 * Adjacent to end of existing mapping:
+	 *   base |  free  |
+	 *     +1 | mapped |
+	 *     +2 | mapped |
+	 *     +3 | mapped |
+	 *     +4 |  free  |  new
+	 */
+	errno = 0;
+	addr = BASE_ADDRESS + (4 * page_size);
+	size = page_size;
+	p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
+	printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+
+	if (p == MAP_FAILED) {
+		dump_maps();
+		printf("Error:6: mmap() failed when it shouldn't have\n");
+		return 1;
+	}
+
+	addr = BASE_ADDRESS;
+	size = 5 * page_size;
+	if (munmap((void *)addr, size) != 0) {
+		dump_maps();
+		printf("Error: munmap failed!?\n");
+		return 1;
+	}
+	printf("unmap() successful\n");
+
+	printf("OK\n");
+	return 0;
+}
diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index 7b8171e3128a..5d1db824f73a 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -34,18 +34,6 @@
  * per-CPU threads 1 by triggering userfaults inside
  * pthread_mutex_lock will also verify the atomicity of the memory
  * transfer (UFFDIO_COPY).
- *
- * The program takes two parameters: the amounts of physical memory in
- * megabytes (MiB) of the area and the number of bounces to execute.
- *
- * # 100MiB 99999 bounces
- * ./userfaultfd 100 99999
- *
- * # 1GiB 99 bounces
- * ./userfaultfd 1000 99
- *
- * # 10MiB-~6GiB 999 bounces, continue forever unless an error triggers
- * while ./userfaultfd $[RANDOM % 6000 + 10] 999; do true; done
  */
 
 #define _GNU_SOURCE
@@ -115,6 +103,30 @@ pthread_attr_t attr;
 				 ~(unsigned long)(sizeof(unsigned long long) \
 						  -  1)))
 
+const char *examples =
+    "# Run anonymous memory test on 100MiB region with 99999 bounces:\n"
+    "./userfaultfd anon 100 99999\n\n"
+    "# Run share memory test on 1GiB region with 99 bounces:\n"
+    "./userfaultfd shmem 1000 99\n\n"
+    "# Run hugetlb memory test on 256MiB region with 50 bounces (using /dev/hugepages/hugefile):\n"
+    "./userfaultfd hugetlb 256 50 /dev/hugepages/hugefile\n\n"
+    "# Run the same hugetlb test but using shmem:\n"
+    "./userfaultfd hugetlb_shared 256 50 /dev/hugepages/hugefile\n\n"
+    "# 10MiB-~6GiB 999 bounces anonymous test, "
+    "continue forever unless an error triggers\n"
+    "while ./userfaultfd anon $[RANDOM % 6000 + 10] 999; do true; done\n\n";
+
+static void usage(void)
+{
+	fprintf(stderr, "\nUsage: ./userfaultfd <test type> <MiB> <bounces> "
+		"[hugetlbfs_file]\n\n");
+	fprintf(stderr, "Supported <test type>: anon, hugetlb, "
+		"hugetlb_shared, shmem\n\n");
+	fprintf(stderr, "Examples:\n\n");
+	fprintf(stderr, examples);
+	exit(1);
+}
+
 static int anon_release_pages(char *rel_area)
 {
 	int ret = 0;
@@ -439,6 +451,43 @@ static int copy_page(int ufd, unsigned long offset)
 	return __copy_page(ufd, offset, false);
 }
 
+static int uffd_read_msg(int ufd, struct uffd_msg *msg)
+{
+	int ret = read(uffd, msg, sizeof(*msg));
+
+	if (ret != sizeof(*msg)) {
+		if (ret < 0) {
+			if (errno == EAGAIN)
+				return 1;
+			else
+				perror("blocking read error"), exit(1);
+		} else {
+			fprintf(stderr, "short read\n"), exit(1);
+		}
+	}
+
+	return 0;
+}
+
+/* Return 1 if page fault handled by us; otherwise 0 */
+static int uffd_handle_page_fault(struct uffd_msg *msg)
+{
+	unsigned long offset;
+
+	if (msg->event != UFFD_EVENT_PAGEFAULT)
+		fprintf(stderr, "unexpected msg event %u\n",
+			msg->event), exit(1);
+
+	if (bounces & BOUNCE_VERIFY &&
+	    msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
+		fprintf(stderr, "unexpected write fault\n"), exit(1);
+
+	offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst;
+	offset &= ~(page_size-1);
+
+	return copy_page(uffd, offset);
+}
+
 static void *uffd_poll_thread(void *arg)
 {
 	unsigned long cpu = (unsigned long) arg;
@@ -446,7 +495,6 @@ static void *uffd_poll_thread(void *arg)
 	struct uffd_msg msg;
 	struct uffdio_register uffd_reg;
 	int ret;
-	unsigned long offset;
 	char tmp_chr;
 	unsigned long userfaults = 0;
 
@@ -470,25 +518,15 @@ static void *uffd_poll_thread(void *arg)
 		if (!(pollfd[0].revents & POLLIN))
 			fprintf(stderr, "pollfd[0].revents %d\n",
 				pollfd[0].revents), exit(1);
-		ret = read(uffd, &msg, sizeof(msg));
-		if (ret < 0) {
-			if (errno == EAGAIN)
-				continue;
-			perror("nonblocking read error"), exit(1);
-		}
+		if (uffd_read_msg(uffd, &msg))
+			continue;
 		switch (msg.event) {
 		default:
 			fprintf(stderr, "unexpected msg event %u\n",
 				msg.event), exit(1);
 			break;
 		case UFFD_EVENT_PAGEFAULT:
-			if (msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
-				fprintf(stderr, "unexpected write fault\n"), exit(1);
-			offset = (char *)(unsigned long)msg.arg.pagefault.address -
-				area_dst;
-			offset &= ~(page_size-1);
-			if (copy_page(uffd, offset))
-				userfaults++;
+			userfaults += uffd_handle_page_fault(&msg);
 			break;
 		case UFFD_EVENT_FORK:
 			close(uffd);
@@ -516,8 +554,6 @@ static void *uffd_read_thread(void *arg)
 {
 	unsigned long *this_cpu_userfaults;
 	struct uffd_msg msg;
-	unsigned long offset;
-	int ret;
 
 	this_cpu_userfaults = (unsigned long *) arg;
 	*this_cpu_userfaults = 0;
@@ -526,24 +562,9 @@ static void *uffd_read_thread(void *arg)
 	/* from here cancellation is ok */
 
 	for (;;) {
-		ret = read(uffd, &msg, sizeof(msg));
-		if (ret != sizeof(msg)) {
-			if (ret < 0)
-				perror("blocking read error"), exit(1);
-			else
-				fprintf(stderr, "short read\n"), exit(1);
-		}
-		if (msg.event != UFFD_EVENT_PAGEFAULT)
-			fprintf(stderr, "unexpected msg event %u\n",
-				msg.event), exit(1);
-		if (bounces & BOUNCE_VERIFY &&
-		    msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
-			fprintf(stderr, "unexpected write fault\n"), exit(1);
-		offset = (char *)(unsigned long)msg.arg.pagefault.address -
-			 area_dst;
-		offset &= ~(page_size-1);
-		if (copy_page(uffd, offset))
-			(*this_cpu_userfaults)++;
+		if (uffd_read_msg(uffd, &msg))
+			continue;
+		(*this_cpu_userfaults) += uffd_handle_page_fault(&msg);
 	}
 	return (void *)NULL;
 }
@@ -605,6 +626,12 @@ static int stress(unsigned long *userfaults)
 	if (uffd_test_ops->release_pages(area_src))
 		return 1;
 
+
+	finished = 1;
+	for (cpu = 0; cpu < nr_cpus; cpu++)
+		if (pthread_join(locking_threads[cpu], NULL))
+			return 1;
+
 	for (cpu = 0; cpu < nr_cpus; cpu++) {
 		char c;
 		if (bounces & BOUNCE_POLL) {
@@ -622,11 +649,6 @@ static int stress(unsigned long *userfaults)
 		}
 	}
 
-	finished = 1;
-	for (cpu = 0; cpu < nr_cpus; cpu++)
-		if (pthread_join(locking_threads[cpu], NULL))
-			return 1;
-
 	return 0;
 }
 
@@ -1272,8 +1294,7 @@ static void sigalrm(int sig)
 int main(int argc, char **argv)
 {
 	if (argc < 4)
-		fprintf(stderr, "Usage: <test type> <MiB> <bounces> [hugetlbfs_file]\n"),
-				exit(1);
+		usage();
 
 	if (signal(SIGALRM, sigalrm) == SIG_ERR)
 		fprintf(stderr, "failed to arm SIGALRM"), exit(1);
@@ -1286,20 +1307,19 @@ int main(int argc, char **argv)
 		nr_cpus;
 	if (!nr_pages_per_cpu) {
 		fprintf(stderr, "invalid MiB\n");
-		fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1);
+		usage();
 	}
 
 	bounces = atoi(argv[3]);
 	if (bounces <= 0) {
 		fprintf(stderr, "invalid bounces\n");
-		fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1);
+		usage();
 	}
 	nr_pages = nr_pages_per_cpu * nr_cpus;
 
 	if (test_type == TEST_HUGETLB) {
 		if (argc < 5)
-			fprintf(stderr, "Usage: hugetlb <MiB> <bounces> <hugetlbfs_file>\n"),
-				exit(1);
+			usage();
 		huge_fd = open(argv[4], O_CREAT | O_RDWR, 0755);
 		if (huge_fd < 0) {
 			fprintf(stderr, "Open of %s failed", argv[3]);
diff --git a/tools/testing/selftests/watchdog/watchdog-test.c b/tools/testing/selftests/watchdog/watchdog-test.c
index 6e290874b70e..c6bd9a68306b 100644
--- a/tools/testing/selftests/watchdog/watchdog-test.c
+++ b/tools/testing/selftests/watchdog/watchdog-test.c
@@ -19,7 +19,7 @@
 
 int fd;
 const char v = 'V';
-static const char sopts[] = "bdehp:t:";
+static const char sopts[] = "bdehp:t:Tn:N";
 static const struct option lopts[] = {
 	{"bootstatus",          no_argument, NULL, 'b'},
 	{"disable",             no_argument, NULL, 'd'},
@@ -27,6 +27,9 @@ static const struct option lopts[] = {
 	{"help",                no_argument, NULL, 'h'},
 	{"pingrate",      required_argument, NULL, 'p'},
 	{"timeout",       required_argument, NULL, 't'},
+	{"gettimeout",          no_argument, NULL, 'T'},
+	{"pretimeout",    required_argument, NULL, 'n'},
+	{"getpretimeout",       no_argument, NULL, 'N'},
 	{NULL,                  no_argument, NULL, 0x0}
 };
 
@@ -71,9 +74,13 @@ static void usage(char *progname)
 	printf(" -h, --help          Print the help message\n");
 	printf(" -p, --pingrate=P    Set ping rate to P seconds (default %d)\n", DEFAULT_PING_RATE);
 	printf(" -t, --timeout=T     Set timeout to T seconds\n");
+	printf(" -T, --gettimeout    Get the timeout\n");
+	printf(" -n, --pretimeout=T  Set the pretimeout to T seconds\n");
+	printf(" -N, --getpretimeout Get the pretimeout\n");
 	printf("\n");
 	printf("Parameters are parsed left-to-right in real-time.\n");
 	printf("Example: %s -d -t 10 -p 5 -e\n", progname);
+	printf("Example: %s -t 12 -T -n 7 -N\n", progname);
 }
 
 int main(int argc, char *argv[])
@@ -89,7 +96,13 @@ int main(int argc, char *argv[])
 	fd = open("/dev/watchdog", O_WRONLY);
 
 	if (fd == -1) {
-		printf("Watchdog device not enabled.\n");
+		if (errno == ENOENT)
+			printf("Watchdog device not enabled.\n");
+		else if (errno == EACCES)
+			printf("Run watchdog as root.\n");
+		else
+			printf("Watchdog device open failed %s\n",
+				strerror(errno));
 		exit(-1);
 	}
 
@@ -103,23 +116,27 @@ int main(int argc, char *argv[])
 				printf("Last boot is caused by: %s.\n", (flags != 0) ?
 					"Watchdog" : "Power-On-Reset");
 			else
-				printf("WDIOC_GETBOOTSTATUS errno '%s'\n", strerror(errno));
+				printf("WDIOC_GETBOOTSTATUS error '%s'\n", strerror(errno));
 			break;
 		case 'd':
 			flags = WDIOS_DISABLECARD;
 			ret = ioctl(fd, WDIOC_SETOPTIONS, &flags);
 			if (!ret)
 				printf("Watchdog card disabled.\n");
-			else
-				printf("WDIOS_DISABLECARD errno '%s'\n", strerror(errno));
+			else {
+				printf("WDIOS_DISABLECARD error '%s'\n", strerror(errno));
+				oneshot = 1;
+			}
 			break;
 		case 'e':
 			flags = WDIOS_ENABLECARD;
 			ret = ioctl(fd, WDIOC_SETOPTIONS, &flags);
 			if (!ret)
 				printf("Watchdog card enabled.\n");
-			else
-				printf("WDIOS_ENABLECARD errno '%s'\n", strerror(errno));
+			else {
+				printf("WDIOS_ENABLECARD error '%s'\n", strerror(errno));
+				oneshot = 1;
+			}
 			break;
 		case 'p':
 			ping_rate = strtoul(optarg, NULL, 0);
@@ -132,8 +149,36 @@ int main(int argc, char *argv[])
 			ret = ioctl(fd, WDIOC_SETTIMEOUT, &flags);
 			if (!ret)
 				printf("Watchdog timeout set to %u seconds.\n", flags);
+			else {
+				printf("WDIOC_SETTIMEOUT error '%s'\n", strerror(errno));
+				oneshot = 1;
+			}
+			break;
+		case 'T':
+			oneshot = 1;
+			ret = ioctl(fd, WDIOC_GETTIMEOUT, &flags);
+			if (!ret)
+				printf("WDIOC_GETTIMEOUT returns %u seconds.\n", flags);
+			else
+				printf("WDIOC_GETTIMEOUT error '%s'\n", strerror(errno));
+			break;
+		case 'n':
+			flags = strtoul(optarg, NULL, 0);
+			ret = ioctl(fd, WDIOC_SETPRETIMEOUT, &flags);
+			if (!ret)
+				printf("Watchdog pretimeout set to %u seconds.\n", flags);
+			else {
+				printf("WDIOC_SETPRETIMEOUT error '%s'\n", strerror(errno));
+				oneshot = 1;
+			}
+			break;
+		case 'N':
+			oneshot = 1;
+			ret = ioctl(fd, WDIOC_GETPRETIMEOUT, &flags);
+			if (!ret)
+				printf("WDIOC_GETPRETIMEOUT returns %u seconds.\n", flags);
 			else
-				printf("WDIOC_SETTIMEOUT errno '%s'\n", strerror(errno));
+				printf("WDIOC_GETPRETIMEOUT error '%s'\n", strerror(errno));
 			break;
 		default:
 			usage(argv[0]);
diff --git a/tools/usb/usbip/libsrc/usbip_host_common.c b/tools/usb/usbip/libsrc/usbip_host_common.c
index dc93fadbee96..d79c7581b175 100644
--- a/tools/usb/usbip/libsrc/usbip_host_common.c
+++ b/tools/usb/usbip/libsrc/usbip_host_common.c
@@ -43,7 +43,7 @@ static int32_t read_attr_usbip_status(struct usbip_usb_device *udev)
 	int size;
 	int fd;
 	int length;
-	char status;
+	char status[2] = { 0 };
 	int value = 0;
 
 	size = snprintf(status_attr_path, sizeof(status_attr_path),
@@ -61,14 +61,14 @@ static int32_t read_attr_usbip_status(struct usbip_usb_device *udev)
 		return -1;
 	}
 
-	length = read(fd, &status, 1);
+	length = read(fd, status, 1);
 	if (length < 0) {
 		err("error reading attribute %s", status_attr_path);
 		close(fd);
 		return -1;
 	}
 
-	value = atoi(&status);
+	value = atoi(status);
 
 	return value;
 }
diff --git a/tools/usb/usbip/libsrc/vhci_driver.c b/tools/usb/usbip/libsrc/vhci_driver.c
index 4204359c9fee..8159fd98680b 100644
--- a/tools/usb/usbip/libsrc/vhci_driver.c
+++ b/tools/usb/usbip/libsrc/vhci_driver.c
@@ -150,7 +150,7 @@ static int get_nports(struct udev_device *hc_device)
 
 static int vhci_hcd_filter(const struct dirent *dirent)
 {
-	return strcmp(dirent->d_name, "vhci_hcd") >= 0;
+	return !strncmp(dirent->d_name, "vhci_hcd.", 9);
 }
 
 static int get_ncontrollers(void)