diff options
Diffstat (limited to '')
516 files changed, 21237 insertions, 4962 deletions
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index d0ce5cfd3ac1..d5b5f2ab87a0 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -277,6 +277,7 @@ #define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC instruction */ #define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */ #define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */ +#define X86_FEATURE_XFD (10*32+ 4) /* "" eXtended Feature Disabling */ /* * Extended auxiliary flags: Linux defined - for features scattered in various @@ -298,6 +299,7 @@ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ +#define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 2ef1f6513c68..5a776a08f78c 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -504,4 +504,8 @@ struct kvm_pmu_event_filter { #define KVM_PMU_EVENT_ALLOW 0 #define KVM_PMU_EVENT_DENY 1 +/* for KVM_{GET,SET,HAS}_DEVICE_ATTR */ +#define KVM_VCPU_TSC_CTRL 0 /* control group for the timestamp counter (TSC) */ +#define KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */ + #endif /* _ASM_X86_KVM_H */ diff --git a/tools/bpf/bpftool/.gitignore b/tools/bpf/bpftool/.gitignore index 05ce4446b780..a736f64dc5dc 100644 --- a/tools/bpf/bpftool/.gitignore +++ b/tools/bpf/bpftool/.gitignore @@ -1,4 +1,4 @@ -# SPDX-License-Identifier: GPL-2.0-only +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) *.d /bootstrap/ /bpftool diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile index c49487905ceb..ac8487dcff1d 100644 --- a/tools/bpf/bpftool/Documentation/Makefile +++ b/tools/bpf/bpftool/Documentation/Makefile @@ -1,6 +1,5 @@ -# SPDX-License-Identifier: GPL-2.0-only +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) include ../../../scripts/Makefile.include -include ../../../scripts/utilities.mak INSTALL ?= install RM ?= rm -f @@ -25,7 +24,7 @@ man: man8 man8: $(DOC_MAN8) RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null) -RST2MAN_OPTS += --verbose +RST2MAN_OPTS += --verbose --strip-comments list_pages = $(sort $(basename $(filter-out $(1),$(MAN8_RST)))) see_also = $(subst " ",, \ diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst index 88b28aa7431f..342716f74ec4 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + ================ bpftool-btf ================ @@ -7,13 +9,14 @@ tool for inspection of BTF data :Manual section: 8 +.. include:: substitutions.rst + SYNOPSIS ======== **bpftool** [*OPTIONS*] **btf** *COMMAND* - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | {**-d** | **--debug** } | - { **-B** | **--base-btf** } } + *OPTIONS* := { |COMMON_OPTIONS| | { **-B** | **--base-btf** } } *COMMANDS* := { **dump** | **help** } diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst index 3e4395eede4f..a17e9aa314fd 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + ================ bpftool-cgroup ================ @@ -7,13 +9,14 @@ tool for inspection and simple manipulation of eBPF progs :Manual section: 8 +.. include:: substitutions.rst + SYNOPSIS ======== **bpftool** [*OPTIONS*] **cgroup** *COMMAND* - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | - { **-f** | **--bpffs** } } + *OPTIONS* := { |COMMON_OPTIONS| | { **-f** | **--bpffs** } } *COMMANDS* := { **show** | **list** | **tree** | **attach** | **detach** | **help** } @@ -30,9 +33,9 @@ CGROUP COMMANDS | *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } | *ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** | | **bind4** | **bind6** | **post_bind4** | **post_bind6** | **connect4** | **connect6** | -| **getpeername4** | **getpeername6** | **getsockname4** | **getsockname6** | **sendmsg4** | -| **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** | **getsockopt** | **setsockopt** | -| **sock_release** } +| **getpeername4** | **getpeername6** | **getsockname4** | **getsockname6** | **sendmsg4** | +| **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** | **getsockopt** | **setsockopt** | +| **sock_release** } | *ATTACH_FLAGS* := { **multi** | **override** } DESCRIPTION @@ -98,9 +101,9 @@ DESCRIPTION **sendmsg6** call to sendto(2), sendmsg(2), sendmmsg(2) for an unconnected udp6 socket (since 4.18); **recvmsg4** call to recvfrom(2), recvmsg(2), recvmmsg(2) for - an unconnected udp4 socket (since 5.2); + an unconnected udp4 socket (since 5.2); **recvmsg6** call to recvfrom(2), recvmsg(2), recvmmsg(2) for - an unconnected udp6 socket (since 5.2); + an unconnected udp6 socket (since 5.2); **sysctl** sysctl access (since 5.2); **getsockopt** call to getsockopt (since 5.3); **setsockopt** call to setsockopt (since 5.3); diff --git a/tools/bpf/bpftool/Documentation/bpftool-feature.rst b/tools/bpf/bpftool/Documentation/bpftool-feature.rst index ab9f57ee4c3a..4ce9a77bc1e0 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-feature.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-feature.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + =============== bpftool-feature =============== @@ -7,12 +9,14 @@ tool for inspection of eBPF-related parameters for Linux kernel or net device :Manual section: 8 +.. include:: substitutions.rst + SYNOPSIS ======== **bpftool** [*OPTIONS*] **feature** *COMMAND* - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } } + *OPTIONS* := { |COMMON_OPTIONS| } *COMMANDS* := { **probe** | **help** } diff --git a/tools/bpf/bpftool/Documentation/bpftool-gen.rst b/tools/bpf/bpftool/Documentation/bpftool-gen.rst index 2ef2f2df0279..bc276388f432 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-gen.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-gen.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + ================ bpftool-gen ================ @@ -7,13 +9,14 @@ tool for BPF code-generation :Manual section: 8 +.. include:: substitutions.rst + SYNOPSIS ======== **bpftool** [*OPTIONS*] **gen** *COMMAND* - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | - { **-L** | **--use-loader** } } + *OPTIONS* := { |COMMON_OPTIONS| | { **-L** | **--use-loader** } } *COMMAND* := { **object** | **skeleton** | **help** } diff --git a/tools/bpf/bpftool/Documentation/bpftool-iter.rst b/tools/bpf/bpftool/Documentation/bpftool-iter.rst index 471f363a725a..84839d488621 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-iter.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-iter.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + ============ bpftool-iter ============ @@ -7,12 +9,14 @@ tool to create BPF iterators :Manual section: 8 +.. include:: substitutions.rst + SYNOPSIS ======== **bpftool** [*OPTIONS*] **iter** *COMMAND* - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } } + *OPTIONS* := { |COMMON_OPTIONS| } *COMMANDS* := { **pin** | **help** } diff --git a/tools/bpf/bpftool/Documentation/bpftool-link.rst b/tools/bpf/bpftool/Documentation/bpftool-link.rst index 0de90f086238..52a4eee4af54 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-link.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-link.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + ================ bpftool-link ================ @@ -7,13 +9,14 @@ tool for inspection and simple manipulation of eBPF links :Manual section: 8 +.. include:: substitutions.rst + SYNOPSIS ======== **bpftool** [*OPTIONS*] **link** *COMMAND* - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | - { **-f** | **--bpffs** } | { **-n** | **--nomount** } } + *OPTIONS* := { |COMMON_OPTIONS| | { **-f** | **--bpffs** } | { **-n** | **--nomount** } } *COMMANDS* := { **show** | **list** | **pin** | **help** } diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index d0c4abe08aba..7c188a598444 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + ================ bpftool-map ================ @@ -7,17 +9,18 @@ tool for inspection and simple manipulation of eBPF maps :Manual section: 8 +.. include:: substitutions.rst + SYNOPSIS ======== **bpftool** [*OPTIONS*] **map** *COMMAND* - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | - { **-f** | **--bpffs** } | { **-n** | **--nomount** } } + *OPTIONS* := { |COMMON_OPTIONS| | { **-f** | **--bpffs** } | { **-n** | **--nomount** } } *COMMANDS* := - { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext** - | **delete** | **pin** | **help** } + { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext** | + **delete** | **pin** | **help** } MAP COMMANDS ============= @@ -52,7 +55,7 @@ MAP COMMANDS | | **devmap** | **devmap_hash** | **sockmap** | **cpumap** | **xskmap** | **sockhash** | | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage** | | **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** | **inode_storage** - | **task_storage** } +| | **task_storage** | **bloom_filter** } DESCRIPTION =========== diff --git a/tools/bpf/bpftool/Documentation/bpftool-net.rst b/tools/bpf/bpftool/Documentation/bpftool-net.rst index 1ae0375e8fea..f4e0a516335a 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-net.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + ================ bpftool-net ================ @@ -7,12 +9,14 @@ tool for inspection of netdev/tc related bpf prog attachments :Manual section: 8 +.. include:: substitutions.rst + SYNOPSIS ======== **bpftool** [*OPTIONS*] **net** *COMMAND* - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } } + *OPTIONS* := { |COMMON_OPTIONS| } *COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** } @@ -31,44 +35,44 @@ NET COMMANDS DESCRIPTION =========== **bpftool net { show | list }** [ **dev** *NAME* ] - List bpf program attachments in the kernel networking subsystem. - - Currently, only device driver xdp attachments and tc filter - classification/action attachments are implemented, i.e., for - program types **BPF_PROG_TYPE_SCHED_CLS**, - **BPF_PROG_TYPE_SCHED_ACT** and **BPF_PROG_TYPE_XDP**. - For programs attached to a particular cgroup, e.g., - **BPF_PROG_TYPE_CGROUP_SKB**, **BPF_PROG_TYPE_CGROUP_SOCK**, - **BPF_PROG_TYPE_SOCK_OPS** and **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**, - users can use **bpftool cgroup** to dump cgroup attachments. - For sk_{filter, skb, msg, reuseport} and lwt/seg6 - bpf programs, users should consult other tools, e.g., iproute2. - - The current output will start with all xdp program attachments, followed by - all tc class/qdisc bpf program attachments. Both xdp programs and - tc programs are ordered based on ifindex number. If multiple bpf - programs attached to the same networking device through **tc filter**, - the order will be first all bpf programs attached to tc classes, then - all bpf programs attached to non clsact qdiscs, and finally all - bpf programs attached to root and clsact qdisc. + List bpf program attachments in the kernel networking subsystem. + + Currently, only device driver xdp attachments and tc filter + classification/action attachments are implemented, i.e., for + program types **BPF_PROG_TYPE_SCHED_CLS**, + **BPF_PROG_TYPE_SCHED_ACT** and **BPF_PROG_TYPE_XDP**. + For programs attached to a particular cgroup, e.g., + **BPF_PROG_TYPE_CGROUP_SKB**, **BPF_PROG_TYPE_CGROUP_SOCK**, + **BPF_PROG_TYPE_SOCK_OPS** and **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**, + users can use **bpftool cgroup** to dump cgroup attachments. + For sk_{filter, skb, msg, reuseport} and lwt/seg6 + bpf programs, users should consult other tools, e.g., iproute2. + + The current output will start with all xdp program attachments, followed by + all tc class/qdisc bpf program attachments. Both xdp programs and + tc programs are ordered based on ifindex number. If multiple bpf + programs attached to the same networking device through **tc filter**, + the order will be first all bpf programs attached to tc classes, then + all bpf programs attached to non clsact qdiscs, and finally all + bpf programs attached to root and clsact qdisc. **bpftool** **net attach** *ATTACH_TYPE* *PROG* **dev** *NAME* [ **overwrite** ] - Attach bpf program *PROG* to network interface *NAME* with - type specified by *ATTACH_TYPE*. Previously attached bpf program - can be replaced by the command used with **overwrite** option. - Currently, only XDP-related modes are supported for *ATTACH_TYPE*. + Attach bpf program *PROG* to network interface *NAME* with + type specified by *ATTACH_TYPE*. Previously attached bpf program + can be replaced by the command used with **overwrite** option. + Currently, only XDP-related modes are supported for *ATTACH_TYPE*. - *ATTACH_TYPE* can be of: - **xdp** - try native XDP and fallback to generic XDP if NIC driver does not support it; - **xdpgeneric** - Generic XDP. runs at generic XDP hook when packet already enters receive path as skb; - **xdpdrv** - Native XDP. runs earliest point in driver's receive path; - **xdpoffload** - Offload XDP. runs directly on NIC on each packet reception; + *ATTACH_TYPE* can be of: + **xdp** - try native XDP and fallback to generic XDP if NIC driver does not support it; + **xdpgeneric** - Generic XDP. runs at generic XDP hook when packet already enters receive path as skb; + **xdpdrv** - Native XDP. runs earliest point in driver's receive path; + **xdpoffload** - Offload XDP. runs directly on NIC on each packet reception; **bpftool** **net detach** *ATTACH_TYPE* **dev** *NAME* - Detach bpf program attached to network interface *NAME* with - type specified by *ATTACH_TYPE*. To detach bpf program, same - *ATTACH_TYPE* previously used for attach must be specified. - Currently, only XDP-related modes are supported for *ATTACH_TYPE*. + Detach bpf program attached to network interface *NAME* with + type specified by *ATTACH_TYPE*. To detach bpf program, same + *ATTACH_TYPE* previously used for attach must be specified. + Currently, only XDP-related modes are supported for *ATTACH_TYPE*. **bpftool net help** Print short help message. diff --git a/tools/bpf/bpftool/Documentation/bpftool-perf.rst b/tools/bpf/bpftool/Documentation/bpftool-perf.rst index ce52798a917d..5fea633a82f1 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-perf.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-perf.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + ================ bpftool-perf ================ @@ -7,12 +9,14 @@ tool for inspection of perf related bpf prog attachments :Manual section: 8 +.. include:: substitutions.rst + SYNOPSIS ======== **bpftool** [*OPTIONS*] **perf** *COMMAND* - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } } + *OPTIONS* := { |COMMON_OPTIONS| } *COMMANDS* := { **show** | **list** | **help** } diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 91608cb7e44a..a2e9359e554c 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + ================ bpftool-prog ================ @@ -7,18 +9,20 @@ tool for inspection and simple manipulation of eBPF progs :Manual section: 8 +.. include:: substitutions.rst + SYNOPSIS ======== **bpftool** [*OPTIONS*] **prog** *COMMAND* - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | - { **-f** | **--bpffs** } | { **-m** | **--mapcompat** } | { **-n** | **--nomount** } | - { **-L** | **--use-loader** } } + *OPTIONS* := { |COMMON_OPTIONS| | + { **-f** | **--bpffs** } | { **-m** | **--mapcompat** } | { **-n** | **--nomount** } | + { **-L** | **--use-loader** } } *COMMANDS* := - { **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load** - | **loadall** | **help** } + { **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load** | + **loadall** | **help** } PROG COMMANDS ============= diff --git a/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst b/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst index 02afc0fc14cb..ee53a122c0c7 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + ================== bpftool-struct_ops ================== @@ -7,12 +9,14 @@ tool to register/unregister/introspect BPF struct_ops :Manual section: 8 +.. include:: substitutions.rst + SYNOPSIS ======== **bpftool** [*OPTIONS*] **struct_ops** *COMMAND* - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } } + *OPTIONS* := { |COMMON_OPTIONS| } *COMMANDS* := { **show** | **list** | **dump** | **register** | **unregister** | **help** } diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst index bb23f55bb05a..7084dd9fa2f8 100644 --- a/tools/bpf/bpftool/Documentation/bpftool.rst +++ b/tools/bpf/bpftool/Documentation/bpftool.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + ================ BPFTOOL ================ @@ -7,6 +9,8 @@ tool for inspection and simple manipulation of eBPF programs and maps :Manual section: 8 +.. include:: substitutions.rst + SYNOPSIS ======== @@ -18,15 +22,14 @@ SYNOPSIS *OBJECT* := { **map** | **program** | **cgroup** | **perf** | **net** | **feature** } - *OPTIONS* := { { **-V** | **--version** } | - { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } } + *OPTIONS* := { { **-V** | **--version** } | |COMMON_OPTIONS| } *MAP-COMMANDS* := { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext** | - **delete** | **pin** | **event_pipe** | **help** } + **delete** | **pin** | **event_pipe** | **help** } *PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin** | - **load** | **attach** | **detach** | **help** } + **load** | **attach** | **detach** | **help** } *CGROUP-COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** } diff --git a/tools/bpf/bpftool/Documentation/common_options.rst b/tools/bpf/bpftool/Documentation/common_options.rst index 05d06c74dcaa..908487b9c2ad 100644 --- a/tools/bpf/bpftool/Documentation/common_options.rst +++ b/tools/bpf/bpftool/Documentation/common_options.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + -h, --help Print short help message (similar to **bpftool help**). @@ -20,3 +22,12 @@ Print all logs available, even debug-level information. This includes logs from libbpf as well as from the verifier, when attempting to load programs. + +-l, --legacy + Use legacy libbpf mode which has more relaxed BPF program + requirements. By default, bpftool has more strict requirements + about section names, changes pinning logic and doesn't support + some of the older non-BTF map declarations. + + See https://github.com/libbpf/libbpf/wiki/Libbpf:-the-road-to-v1.0 + for details. diff --git a/tools/bpf/bpftool/Documentation/substitutions.rst b/tools/bpf/bpftool/Documentation/substitutions.rst new file mode 100644 index 000000000000..ccf1ffa0686c --- /dev/null +++ b/tools/bpf/bpftool/Documentation/substitutions.rst @@ -0,0 +1,3 @@ +.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + +.. |COMMON_OPTIONS| replace:: { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | { **-l** | **--legacy** } diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 7cfba11c3014..83369f55df61 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -1,6 +1,5 @@ -# SPDX-License-Identifier: GPL-2.0-only +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) include ../../scripts/Makefile.include -include ../../scripts/utilities.mak ifeq ($(srctree),) srctree := $(patsubst %/,%,$(dir $(CURDIR))) @@ -58,7 +57,7 @@ $(LIBBPF_INTERNAL_HDRS): $(LIBBPF_HDRS_DIR)/%.h: $(BPF_DIR)/%.h | $(LIBBPF_HDRS_ $(LIBBPF_BOOTSTRAP): $(wildcard $(BPF_DIR)/*.[ch] $(BPF_DIR)/Makefile) | $(LIBBPF_BOOTSTRAP_OUTPUT) $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_BOOTSTRAP_OUTPUT) \ DESTDIR=$(LIBBPF_BOOTSTRAP_DESTDIR) prefix= \ - ARCH= CC=$(HOSTCC) LD=$(HOSTLD) $@ install_headers + ARCH= CROSS_COMPILE= CC=$(HOSTCC) LD=$(HOSTLD) $@ install_headers $(LIBBPF_BOOTSTRAP_INTERNAL_HDRS): $(LIBBPF_BOOTSTRAP_HDRS_DIR)/%.h: $(BPF_DIR)/%.h | $(LIBBPF_BOOTSTRAP_HDRS_DIR) $(call QUIET_INSTALL, $@) @@ -153,6 +152,9 @@ CFLAGS += -DHAVE_LIBBFD_SUPPORT SRCS += $(BFD_SRCS) endif +HOST_CFLAGS = $(subst -I$(LIBBPF_INCLUDE),-I$(LIBBPF_BOOTSTRAP_INCLUDE),\ + $(subst $(CLANG_CROSS_FLAGS),,$(CFLAGS))) + BPFTOOL_BOOTSTRAP := $(BOOTSTRAP_OUTPUT)bpftool BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o gen.o btf.o xlated_dumper.o btf_dumper.o disasm.o) @@ -187,7 +189,8 @@ $(OUTPUT)%.bpf.o: skeleton/%.bpf.c $(OUTPUT)vmlinux.h $(LIBBPF_BOOTSTRAP) -I$(if $(OUTPUT),$(OUTPUT),.) \ -I$(srctree)/tools/include/uapi/ \ -I$(LIBBPF_BOOTSTRAP_INCLUDE) \ - -g -O2 -Wall -target bpf -c $< -o $@ && $(LLVM_STRIP) -g $@ + -g -O2 -Wall -target bpf -c $< -o $@ + $(Q)$(LLVM_STRIP) -g $@ $(OUTPUT)%.skel.h: $(OUTPUT)%.bpf.o $(BPFTOOL_BOOTSTRAP) $(QUIET_GEN)$(BPFTOOL_BOOTSTRAP) gen skeleton $< > $@ @@ -202,10 +205,10 @@ endif CFLAGS += $(if $(BUILD_BPF_SKELS),,-DBPFTOOL_WITHOUT_SKELETONS) $(BOOTSTRAP_OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c - $(QUIET_CC)$(HOSTCC) $(CFLAGS) -c -MMD -o $@ $< + $(QUIET_CC)$(HOSTCC) $(HOST_CFLAGS) -c -MMD $< -o $@ $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c - $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $< + $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD $< -o $@ $(OUTPUT)feature.o: ifneq ($(feature-zlib), 1) @@ -213,19 +216,16 @@ ifneq ($(feature-zlib), 1) endif $(BPFTOOL_BOOTSTRAP): $(BOOTSTRAP_OBJS) $(LIBBPF_BOOTSTRAP) - $(QUIET_LINK)$(HOSTCC) $(CFLAGS) $(LDFLAGS) -o $@ $(BOOTSTRAP_OBJS) \ - $(LIBS_BOOTSTRAP) + $(QUIET_LINK)$(HOSTCC) $(HOST_CFLAGS) $(LDFLAGS) $(BOOTSTRAP_OBJS) $(LIBS_BOOTSTRAP) -o $@ $(OUTPUT)bpftool: $(OBJS) $(LIBBPF) - $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJS) $(LIBS) + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(OBJS) $(LIBS) -o $@ $(BOOTSTRAP_OUTPUT)%.o: %.c $(LIBBPF_BOOTSTRAP_INTERNAL_HDRS) | $(BOOTSTRAP_OUTPUT) - $(QUIET_CC)$(HOSTCC) \ - $(subst -I$(LIBBPF_INCLUDE),-I$(LIBBPF_BOOTSTRAP_INCLUDE),$(CFLAGS)) \ - -c -MMD -o $@ $< + $(QUIET_CC)$(HOSTCC) $(HOST_CFLAGS) -c -MMD $< -o $@ $(OUTPUT)%.o: %.c - $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $< + $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD $< -o $@ feature-detect-clean: $(call QUIET_CLEAN, feature-detect) diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 88e2bcf16cca..493753a4962e 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -261,7 +261,7 @@ _bpftool() # Deal with options if [[ ${words[cword]} == -* ]]; then local c='--version --json --pretty --bpffs --mapcompat --debug \ - --use-loader --base-btf' + --use-loader --base-btf --legacy' COMPREPLY=( $( compgen -W "$c" -- "$cur" ) ) return 0 fi @@ -710,7 +710,8 @@ _bpftool() hash_of_maps devmap devmap_hash sockmap cpumap \ xskmap sockhash cgroup_storage reuseport_sockarray \ percpu_cgroup_storage queue stack sk_storage \ - struct_ops inode_storage task_storage ringbuf' + struct_ops ringbuf inode_storage task_storage \ + bloom_filter' COMPREPLY=( $( compgen -W "$BPFTOOL_MAP_CREATE_TYPES" -- "$cur" ) ) return 0 ;; diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index 015d2758f826..59833125ac0a 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -39,6 +39,7 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = { [BTF_KIND_DATASEC] = "DATASEC", [BTF_KIND_FLOAT] = "FLOAT", [BTF_KIND_DECL_TAG] = "DECL_TAG", + [BTF_KIND_TYPE_TAG] = "TYPE_TAG", }; struct btf_attach_point { @@ -142,6 +143,7 @@ static int dump_btf_type(const struct btf *btf, __u32 id, case BTF_KIND_VOLATILE: case BTF_KIND_RESTRICT: case BTF_KIND_TYPEDEF: + case BTF_KIND_TYPE_TAG: if (json_output) jsonw_uint_field(w, "type_id", t->type); else @@ -418,9 +420,10 @@ static int dump_btf_c(const struct btf *btf, struct btf_dump *d; int err = 0, i; - d = btf_dump__new(btf, NULL, NULL, btf_dump_printf); - if (IS_ERR(d)) - return PTR_ERR(d); + d = btf_dump__new(btf, btf_dump_printf, NULL, NULL); + err = libbpf_get_error(d); + if (err) + return err; printf("#ifndef __VMLINUX_H__\n"); printf("#define __VMLINUX_H__\n"); @@ -547,8 +550,8 @@ static int do_dump(int argc, char **argv) } btf = btf__parse_split(*argv, base ?: base_btf); - if (IS_ERR(btf)) { - err = -PTR_ERR(btf); + err = libbpf_get_error(btf); + if (err) { btf = NULL; p_err("failed to load BTF from %s: %s", *argv, strerror(err)); diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c index 9c25286a5c73..f5dddf8ef404 100644 --- a/tools/bpf/bpftool/btf_dumper.c +++ b/tools/bpf/bpftool/btf_dumper.c @@ -32,14 +32,16 @@ static int dump_prog_id_as_func_ptr(const struct btf_dumper *d, const struct btf_type *func_proto, __u32 prog_id) { - struct bpf_prog_info_linear *prog_info = NULL; const struct btf_type *func_type; + int prog_fd = -1, func_sig_len; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); const char *prog_name = NULL; - struct bpf_func_info *finfo; struct btf *prog_btf = NULL; - struct bpf_prog_info *info; - int prog_fd, func_sig_len; + struct bpf_func_info finfo; + __u32 finfo_rec_size; char prog_str[1024]; + int err; /* Get the ptr's func_proto */ func_sig_len = btf_dump_func(d->btf, prog_str, func_proto, NULL, 0, @@ -52,25 +54,30 @@ static int dump_prog_id_as_func_ptr(const struct btf_dumper *d, /* Get the bpf_prog's name. Obtain from func_info. */ prog_fd = bpf_prog_get_fd_by_id(prog_id); - if (prog_fd == -1) + if (prog_fd < 0) goto print; - prog_info = bpf_program__get_prog_info_linear(prog_fd, - 1UL << BPF_PROG_INFO_FUNC_INFO); - close(prog_fd); - if (IS_ERR(prog_info)) { - prog_info = NULL; + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (err) goto print; - } - info = &prog_info->info; - if (!info->btf_id || !info->nr_func_info) + if (!info.btf_id || !info.nr_func_info) + goto print; + + finfo_rec_size = info.func_info_rec_size; + memset(&info, 0, sizeof(info)); + info.nr_func_info = 1; + info.func_info_rec_size = finfo_rec_size; + info.func_info = ptr_to_u64(&finfo); + + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (err) goto print; - prog_btf = btf__load_from_kernel_by_id(info->btf_id); + + prog_btf = btf__load_from_kernel_by_id(info.btf_id); if (libbpf_get_error(prog_btf)) goto print; - finfo = u64_to_ptr(info->func_info); - func_type = btf__type_by_id(prog_btf, finfo->type_id); + func_type = btf__type_by_id(prog_btf, finfo.type_id); if (!func_type || !btf_is_func(func_type)) goto print; @@ -92,7 +99,8 @@ print: prog_str[sizeof(prog_str) - 1] = '\0'; jsonw_string(d->jw, prog_str); btf__free(prog_btf); - free(prog_info); + if (prog_fd >= 0) + close(prog_fd); return 0; } diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 511eccdbdfe6..fa8eb8134344 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -74,6 +74,7 @@ const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = { [BPF_XDP] = "xdp", [BPF_SK_REUSEPORT_SELECT] = "sk_skb_reuseport_select", [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE] = "sk_skb_reuseport_select_or_migrate", + [BPF_PERF_EVENT] = "perf_event", }; void p_err(const char *fmt, ...) diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c index ade44577688e..e999159fa28d 100644 --- a/tools/bpf/bpftool/feature.c +++ b/tools/bpf/bpftool/feature.c @@ -467,7 +467,7 @@ static bool probe_bpf_syscall(const char *define_prefix) { bool res; - bpf_load_program(BPF_PROG_TYPE_UNSPEC, NULL, 0, NULL, 0, NULL, 0); + bpf_prog_load(BPF_PROG_TYPE_UNSPEC, NULL, NULL, NULL, 0, NULL); res = (errno != ENOSYS); print_bool_feature("have_bpf_syscall", @@ -643,15 +643,111 @@ probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type, } static void -probe_large_insn_limit(const char *define_prefix, __u32 ifindex) +probe_misc_feature(struct bpf_insn *insns, size_t len, + const char *define_prefix, __u32 ifindex, + const char *feat_name, const char *plain_name, + const char *define_name) { + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .prog_ifindex = ifindex, + ); bool res; + int fd; - res = bpf_probe_large_insn_limit(ifindex); - print_bool_feature("have_large_insn_limit", + errno = 0; + fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", + insns, len, &opts); + res = fd >= 0 || !errno; + + if (fd >= 0) + close(fd); + + print_bool_feature(feat_name, plain_name, define_name, res, + define_prefix); +} + +/* + * Probe for availability of kernel commit (5.3): + * + * c04c0d2b968a ("bpf: increase complexity limit and maximum program size") + */ +static void probe_large_insn_limit(const char *define_prefix, __u32 ifindex) +{ + struct bpf_insn insns[BPF_MAXINSNS + 1]; + int i; + + for (i = 0; i < BPF_MAXINSNS; i++) + insns[i] = BPF_MOV64_IMM(BPF_REG_0, 1); + insns[BPF_MAXINSNS] = BPF_EXIT_INSN(); + + probe_misc_feature(insns, ARRAY_SIZE(insns), + define_prefix, ifindex, + "have_large_insn_limit", "Large program size limit", - "LARGE_INSN_LIMIT", - res, define_prefix); + "LARGE_INSN_LIMIT"); +} + +/* + * Probe for bounded loop support introduced in commit 2589726d12a1 + * ("bpf: introduce bounded loops"). + */ +static void +probe_bounded_loops(const char *define_prefix, __u32 ifindex) +{ + struct bpf_insn insns[4] = { + BPF_MOV64_IMM(BPF_REG_0, 10), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, -2), + BPF_EXIT_INSN() + }; + + probe_misc_feature(insns, ARRAY_SIZE(insns), + define_prefix, ifindex, + "have_bounded_loops", + "Bounded loop support", + "BOUNDED_LOOPS"); +} + +/* + * Probe for the v2 instruction set extension introduced in commit 92b31a9af73b + * ("bpf: add BPF_J{LT,LE,SLT,SLE} instructions"). + */ +static void +probe_v2_isa_extension(const char *define_prefix, __u32 ifindex) +{ + struct bpf_insn insns[4] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 0, 1), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN() + }; + + probe_misc_feature(insns, ARRAY_SIZE(insns), + define_prefix, ifindex, + "have_v2_isa_extension", + "ISA extension v2", + "V2_ISA_EXTENSION"); +} + +/* + * Probe for the v3 instruction set extension introduced in commit 092ed0968bb6 + * ("bpf: verifier support JMP32"). + */ +static void +probe_v3_isa_extension(const char *define_prefix, __u32 ifindex) +{ + struct bpf_insn insns[4] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP32_IMM(BPF_JLT, BPF_REG_0, 0, 1), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN() + }; + + probe_misc_feature(insns, ARRAY_SIZE(insns), + define_prefix, ifindex, + "have_v3_isa_extension", + "ISA extension v3", + "V3_ISA_EXTENSION"); } static void @@ -768,6 +864,9 @@ static void section_misc(const char *define_prefix, __u32 ifindex) "/*** eBPF misc features ***/", define_prefix); probe_large_insn_limit(define_prefix, ifindex); + probe_bounded_loops(define_prefix, ifindex); + probe_v2_isa_extension(define_prefix, ifindex); + probe_v3_isa_extension(define_prefix, ifindex); print_end_section(); } diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index 5c18351290f0..b4695df2ea3d 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -218,9 +218,10 @@ static int codegen_datasecs(struct bpf_object *obj, const char *obj_name) char sec_ident[256], map_ident[256]; int i, err = 0; - d = btf_dump__new(btf, NULL, NULL, codegen_btf_dump_printf); - if (IS_ERR(d)) - return PTR_ERR(d); + d = btf_dump__new(btf, codegen_btf_dump_printf, NULL, NULL); + err = libbpf_get_error(d); + if (err) + return err; bpf_object__for_each_map(map, obj) { /* only generate definitions for memory-mapped internal maps */ @@ -485,7 +486,6 @@ static void codegen_destroy(struct bpf_object *obj, const char *obj_name) static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *header_guard) { - struct bpf_object_load_attr load_attr = {}; DECLARE_LIBBPF_OPTS(gen_loader_opts, opts); struct bpf_map *map; char ident[256]; @@ -495,12 +495,7 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h if (err) return err; - load_attr.obj = obj; - if (verifier_logs) - /* log_level1 + log_level2 + stats, but not stable UAPI */ - load_attr.log_level = 1 + 2 + 4; - - err = bpf_object__load_xattr(&load_attr); + err = bpf_object__load(obj); if (err) { p_err("failed to load object file"); goto out; @@ -718,11 +713,15 @@ static int do_skeleton(int argc, char **argv) if (obj_name[0] == '\0') get_obj_name(obj_name, file); opts.object_name = obj_name; + if (verifier_logs) + /* log_level1 + log_level2 + stats, but not stable UAPI */ + opts.kernel_log_level = 1 + 2 + 4; obj = bpf_object__open_mem(obj_data, file_sz, &opts); - if (IS_ERR(obj)) { + err = libbpf_get_error(obj); + if (err) { char err_buf[256]; - libbpf_strerror(PTR_ERR(obj), err_buf, sizeof(err_buf)); + libbpf_strerror(err, err_buf, sizeof(err_buf)); p_err("failed to open BPF object file: %s", err_buf); obj = NULL; goto out; diff --git a/tools/bpf/bpftool/iter.c b/tools/bpf/bpftool/iter.c index 6c0de647b8ad..f88fdc820d23 100644 --- a/tools/bpf/bpftool/iter.c +++ b/tools/bpf/bpftool/iter.c @@ -46,7 +46,8 @@ static int do_pin(int argc, char **argv) } obj = bpf_object__open(objfile); - if (IS_ERR(obj)) { + err = libbpf_get_error(obj); + if (err) { p_err("can't open objfile %s", objfile); goto close_map_fd; } @@ -64,8 +65,8 @@ static int do_pin(int argc, char **argv) } link = bpf_program__attach_iter(prog, &iter_opts); - if (IS_ERR(link)) { - err = PTR_ERR(link); + err = libbpf_get_error(link); + if (err) { p_err("attach_iter failed for program %s", bpf_program__name(prog)); goto close_obj; diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 28237d7cef67..020e91a542d5 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -31,6 +31,7 @@ bool block_mount; bool verifier_logs; bool relaxed_maps; bool use_loader; +bool legacy_libbpf; struct btf *base_btf; struct hashmap *refs_table; @@ -92,6 +93,7 @@ static int do_version(int argc, char **argv) jsonw_name(json_wtr, "features"); jsonw_start_object(json_wtr); /* features */ jsonw_bool_field(json_wtr, "libbfd", has_libbfd); + jsonw_bool_field(json_wtr, "libbpf_strict", !legacy_libbpf); jsonw_bool_field(json_wtr, "skeletons", has_skeletons); jsonw_end_object(json_wtr); /* features */ @@ -105,6 +107,10 @@ static int do_version(int argc, char **argv) printf(" libbfd"); nb_features++; } + if (!legacy_libbpf) { + printf("%s libbpf_strict", nb_features++ ? "," : ""); + nb_features++; + } if (has_skeletons) printf("%s skeletons", nb_features++ ? "," : ""); printf("\n"); @@ -396,10 +402,14 @@ int main(int argc, char **argv) { "debug", no_argument, NULL, 'd' }, { "use-loader", no_argument, NULL, 'L' }, { "base-btf", required_argument, NULL, 'B' }, + { "legacy", no_argument, NULL, 'l' }, { 0 } }; + bool version_requested = false; int opt, ret; + setlinebuf(stdout); + last_do_help = do_help; pretty_output = false; json_output = false; @@ -408,11 +418,12 @@ int main(int argc, char **argv) bin_name = argv[0]; opterr = 0; - while ((opt = getopt_long(argc, argv, "VhpjfLmndB:", + while ((opt = getopt_long(argc, argv, "VhpjfLmndB:l", options, NULL)) >= 0) { switch (opt) { case 'V': - return do_version(argc, argv); + version_requested = true; + break; case 'h': return do_help(argc, argv); case 'p': @@ -454,6 +465,9 @@ int main(int argc, char **argv) case 'L': use_loader = true; break; + case 'l': + legacy_libbpf = true; + break; default: p_err("unrecognized option '%s'", argv[optind - 1]); if (json_output) @@ -463,11 +477,20 @@ int main(int argc, char **argv) } } + if (!legacy_libbpf) { + ret = libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + if (ret) + p_err("failed to enable libbpf strict mode: %d", ret); + } + argc -= optind; argv += optind; if (argc < 0) usage(); + if (version_requested) + return do_version(argc, argv); + ret = cmd_select(cmds, argc, argv, do_help); if (json_output) diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 383835c2604d..8d76d937a62b 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -57,7 +57,7 @@ static inline void *u64_to_ptr(__u64 ptr) #define HELP_SPEC_PROGRAM \ "PROG := { id PROG_ID | pinned FILE | tag PROG_TAG | name PROG_NAME }" #define HELP_SPEC_OPTIONS \ - "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug}" + "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug} | {-l|--legacy}" #define HELP_SPEC_MAP \ "MAP := { id MAP_ID | pinned FILE | name MAP_NAME }" #define HELP_SPEC_LINK \ @@ -90,6 +90,7 @@ extern bool block_mount; extern bool verifier_logs; extern bool relaxed_maps; extern bool use_loader; +extern bool legacy_libbpf; extern struct btf *base_btf; extern struct hashmap *refs_table; diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index cae1f1119296..cc530a229812 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -53,6 +53,7 @@ const char * const map_type_name[] = { [BPF_MAP_TYPE_RINGBUF] = "ringbuf", [BPF_MAP_TYPE_INODE_STORAGE] = "inode_storage", [BPF_MAP_TYPE_TASK_STORAGE] = "task_storage", + [BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter", }; const size_t map_type_name_size = ARRAY_SIZE(map_type_name); @@ -811,7 +812,7 @@ static struct btf *get_map_kv_btf(const struct bpf_map_info *info) if (info->btf_vmlinux_value_type_id) { if (!btf_vmlinux) { btf_vmlinux = libbpf_find_kernel_btf(); - if (IS_ERR(btf_vmlinux)) + if (libbpf_get_error(btf_vmlinux)) p_err("failed to get kernel btf"); } return btf_vmlinux; @@ -831,13 +832,13 @@ static struct btf *get_map_kv_btf(const struct bpf_map_info *info) static void free_map_kv_btf(struct btf *btf) { - if (!IS_ERR(btf) && btf != btf_vmlinux) + if (!libbpf_get_error(btf) && btf != btf_vmlinux) btf__free(btf); } static void free_btf_vmlinux(void) { - if (!IS_ERR(btf_vmlinux)) + if (!libbpf_get_error(btf_vmlinux)) btf__free(btf_vmlinux); } @@ -862,8 +863,8 @@ map_dump(int fd, struct bpf_map_info *info, json_writer_t *wtr, if (wtr) { btf = get_map_kv_btf(info); - if (IS_ERR(btf)) { - err = PTR_ERR(btf); + err = libbpf_get_error(btf); + if (err) { goto exit_free; } @@ -1260,7 +1261,10 @@ static int do_pin(int argc, char **argv) static int do_create(int argc, char **argv) { - struct bpf_create_map_attr attr = { NULL, }; + LIBBPF_OPTS(bpf_map_create_opts, attr); + enum bpf_map_type map_type = BPF_MAP_TYPE_UNSPEC; + __u32 key_size = 0, value_size = 0, max_entries = 0; + const char *map_name = NULL; const char *pinfile; int err = -1, fd; @@ -1275,30 +1279,30 @@ static int do_create(int argc, char **argv) if (is_prefix(*argv, "type")) { NEXT_ARG(); - if (attr.map_type) { + if (map_type) { p_err("map type already specified"); goto exit; } - attr.map_type = map_type_from_str(*argv); - if ((int)attr.map_type < 0) { + map_type = map_type_from_str(*argv); + if ((int)map_type < 0) { p_err("unrecognized map type: %s", *argv); goto exit; } NEXT_ARG(); } else if (is_prefix(*argv, "name")) { NEXT_ARG(); - attr.name = GET_ARG(); + map_name = GET_ARG(); } else if (is_prefix(*argv, "key")) { - if (parse_u32_arg(&argc, &argv, &attr.key_size, + if (parse_u32_arg(&argc, &argv, &key_size, "key size")) goto exit; } else if (is_prefix(*argv, "value")) { - if (parse_u32_arg(&argc, &argv, &attr.value_size, + if (parse_u32_arg(&argc, &argv, &value_size, "value size")) goto exit; } else if (is_prefix(*argv, "entries")) { - if (parse_u32_arg(&argc, &argv, &attr.max_entries, + if (parse_u32_arg(&argc, &argv, &max_entries, "max entries")) goto exit; } else if (is_prefix(*argv, "flags")) { @@ -1339,14 +1343,14 @@ static int do_create(int argc, char **argv) } } - if (!attr.name) { + if (!map_name) { p_err("map name not specified"); goto exit; } set_max_rlimit(); - fd = bpf_create_map_xattr(&attr); + fd = bpf_map_create(map_type, map_name, key_size, value_size, max_entries, &attr); if (fd < 0) { p_err("map create failed: %s", strerror(errno)); goto exit; @@ -1477,7 +1481,7 @@ static int do_help(int argc, char **argv) " devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n" " cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n" " queue | stack | sk_storage | struct_ops | ringbuf | inode_storage |\n" - " task_storage }\n" + " task_storage | bloom_filter }\n" " " HELP_SPEC_OPTIONS " |\n" " {-f|--bpffs} | {-n|--nomount} }\n" "", diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c index b98ea702d284..6b0c410152de 100644 --- a/tools/bpf/bpftool/map_perf_ring.c +++ b/tools/bpf/bpftool/map_perf_ring.c @@ -124,7 +124,7 @@ int do_event_pipe(int argc, char **argv) .wakeup_events = 1, }; struct bpf_map_info map_info = {}; - struct perf_buffer_raw_opts opts = {}; + LIBBPF_OPTS(perf_buffer_raw_opts, opts); struct event_pipe_ctx ctx = { .all_cpus = true, .cpu = -1, @@ -190,14 +190,11 @@ int do_event_pipe(int argc, char **argv) ctx.idx = 0; } - opts.attr = &perf_attr; - opts.event_cb = print_bpf_output; - opts.ctx = &ctx; opts.cpu_cnt = ctx.all_cpus ? 0 : 1; opts.cpus = &ctx.cpu; opts.map_keys = &ctx.idx; - - pb = perf_buffer__new_raw(map_fd, MMAP_PAGE_CNT, &opts); + pb = perf_buffer__new_raw(map_fd, MMAP_PAGE_CNT, &perf_attr, + print_bpf_output, &ctx, &opts); err = libbpf_get_error(pb); if (err) { p_err("failed to create perf buffer: %s (%d)", diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 515d22952602..2a21d50516bc 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -100,6 +100,76 @@ static enum bpf_attach_type parse_attach_type(const char *str) return __MAX_BPF_ATTACH_TYPE; } +static int prep_prog_info(struct bpf_prog_info *const info, enum dump_mode mode, + void **info_data, size_t *const info_data_sz) +{ + struct bpf_prog_info holder = {}; + size_t needed = 0; + void *ptr; + + if (mode == DUMP_JITED) { + holder.jited_prog_len = info->jited_prog_len; + needed += info->jited_prog_len; + } else { + holder.xlated_prog_len = info->xlated_prog_len; + needed += info->xlated_prog_len; + } + + holder.nr_jited_ksyms = info->nr_jited_ksyms; + needed += info->nr_jited_ksyms * sizeof(__u64); + + holder.nr_jited_func_lens = info->nr_jited_func_lens; + needed += info->nr_jited_func_lens * sizeof(__u32); + + holder.nr_func_info = info->nr_func_info; + holder.func_info_rec_size = info->func_info_rec_size; + needed += info->nr_func_info * info->func_info_rec_size; + + holder.nr_line_info = info->nr_line_info; + holder.line_info_rec_size = info->line_info_rec_size; + needed += info->nr_line_info * info->line_info_rec_size; + + holder.nr_jited_line_info = info->nr_jited_line_info; + holder.jited_line_info_rec_size = info->jited_line_info_rec_size; + needed += info->nr_jited_line_info * info->jited_line_info_rec_size; + + if (needed > *info_data_sz) { + ptr = realloc(*info_data, needed); + if (!ptr) + return -1; + + *info_data = ptr; + *info_data_sz = needed; + } + ptr = *info_data; + + if (mode == DUMP_JITED) { + holder.jited_prog_insns = ptr_to_u64(ptr); + ptr += holder.jited_prog_len; + } else { + holder.xlated_prog_insns = ptr_to_u64(ptr); + ptr += holder.xlated_prog_len; + } + + holder.jited_ksyms = ptr_to_u64(ptr); + ptr += holder.nr_jited_ksyms * sizeof(__u64); + + holder.jited_func_lens = ptr_to_u64(ptr); + ptr += holder.nr_jited_func_lens * sizeof(__u32); + + holder.func_info = ptr_to_u64(ptr); + ptr += holder.nr_func_info * holder.func_info_rec_size; + + holder.line_info = ptr_to_u64(ptr); + ptr += holder.nr_line_info * holder.line_info_rec_size; + + holder.jited_line_info = ptr_to_u64(ptr); + ptr += holder.nr_jited_line_info * holder.jited_line_info_rec_size; + + *info = holder; + return 0; +} + static void print_boot_time(__u64 nsecs, char *buf, unsigned int size) { struct timespec real_time_ts, boot_time_ts; @@ -639,8 +709,8 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode, char func_sig[1024]; unsigned char *buf; __u32 member_len; + int fd, err = -1; ssize_t n; - int fd; if (mode == DUMP_JITED) { if (info->jited_prog_len == 0 || !info->jited_prog_insns) { @@ -679,7 +749,7 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode, if (fd < 0) { p_err("can't open file %s: %s", filepath, strerror(errno)); - return -1; + goto exit_free; } n = write(fd, buf, member_len); @@ -687,7 +757,7 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode, if (n != (ssize_t)member_len) { p_err("error writing output file: %s", n < 0 ? strerror(errno) : "short write"); - return -1; + goto exit_free; } if (json_output) @@ -701,7 +771,7 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode, info->netns_ino, &disasm_opt); if (!name) - return -1; + goto exit_free; } if (info->nr_jited_func_lens && info->jited_func_lens) { @@ -796,23 +866,28 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode, kernel_syms_destroy(&dd); } - btf__free(btf); + err = 0; - return 0; +exit_free: + btf__free(btf); + bpf_prog_linfo__free(prog_linfo); + return err; } static int do_dump(int argc, char **argv) { - struct bpf_prog_info_linear *info_linear; + struct bpf_prog_info info; + __u32 info_len = sizeof(info); + size_t info_data_sz = 0; + void *info_data = NULL; char *filepath = NULL; bool opcodes = false; bool visual = false; enum dump_mode mode; bool linum = false; - int *fds = NULL; int nb_fds, i = 0; + int *fds = NULL; int err = -1; - __u64 arrays; if (is_prefix(*argv, "jited")) { if (disasm_init()) @@ -872,43 +947,44 @@ static int do_dump(int argc, char **argv) goto exit_close; } - if (mode == DUMP_JITED) - arrays = 1UL << BPF_PROG_INFO_JITED_INSNS; - else - arrays = 1UL << BPF_PROG_INFO_XLATED_INSNS; - - arrays |= 1UL << BPF_PROG_INFO_JITED_KSYMS; - arrays |= 1UL << BPF_PROG_INFO_JITED_FUNC_LENS; - arrays |= 1UL << BPF_PROG_INFO_FUNC_INFO; - arrays |= 1UL << BPF_PROG_INFO_LINE_INFO; - arrays |= 1UL << BPF_PROG_INFO_JITED_LINE_INFO; - if (json_output && nb_fds > 1) jsonw_start_array(json_wtr); /* root array */ for (i = 0; i < nb_fds; i++) { - info_linear = bpf_program__get_prog_info_linear(fds[i], arrays); - if (IS_ERR_OR_NULL(info_linear)) { + memset(&info, 0, sizeof(info)); + + err = bpf_obj_get_info_by_fd(fds[i], &info, &info_len); + if (err) { + p_err("can't get prog info: %s", strerror(errno)); + break; + } + + err = prep_prog_info(&info, mode, &info_data, &info_data_sz); + if (err) { + p_err("can't grow prog info_data"); + break; + } + + err = bpf_obj_get_info_by_fd(fds[i], &info, &info_len); + if (err) { p_err("can't get prog info: %s", strerror(errno)); break; } if (json_output && nb_fds > 1) { jsonw_start_object(json_wtr); /* prog object */ - print_prog_header_json(&info_linear->info); + print_prog_header_json(&info); jsonw_name(json_wtr, "insns"); } else if (nb_fds > 1) { - print_prog_header_plain(&info_linear->info); + print_prog_header_plain(&info); } - err = prog_dump(&info_linear->info, mode, filepath, opcodes, - visual, linum); + err = prog_dump(&info, mode, filepath, opcodes, visual, linum); if (json_output && nb_fds > 1) jsonw_end_object(json_wtr); /* prog object */ else if (i != nb_fds - 1 && nb_fds > 1) printf("\n"); - free(info_linear); if (err) break; close(fds[i]); @@ -920,6 +996,7 @@ exit_close: for (; i < nb_fds; i++) close(fds[i]); exit_free: + free(info_data); free(fds); return err; } @@ -1387,7 +1464,6 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts, .relaxed_maps = relaxed_maps, ); - struct bpf_object_load_attr load_attr = { 0 }; enum bpf_attach_type expected_attach_type; struct map_replace *map_replace = NULL; struct bpf_program *prog = NULL, *pos; @@ -1409,8 +1485,6 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) while (argc) { if (is_prefix(*argv, "type")) { - char *type; - NEXT_ARG(); if (common_prog_type != BPF_PROG_TYPE_UNSPEC) { @@ -1420,21 +1494,26 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) if (!REQ_ARGS(1)) goto err_free_reuse_maps; - /* Put a '/' at the end of type to appease libbpf */ - type = malloc(strlen(*argv) + 2); - if (!type) { - p_err("mem alloc failed"); - goto err_free_reuse_maps; - } - *type = 0; - strcat(type, *argv); - strcat(type, "/"); + err = libbpf_prog_type_by_name(*argv, &common_prog_type, + &expected_attach_type); + if (err < 0) { + /* Put a '/' at the end of type to appease libbpf */ + char *type = malloc(strlen(*argv) + 2); - err = get_prog_type_by_name(type, &common_prog_type, - &expected_attach_type); - free(type); - if (err < 0) - goto err_free_reuse_maps; + if (!type) { + p_err("mem alloc failed"); + goto err_free_reuse_maps; + } + *type = 0; + strcat(type, *argv); + strcat(type, "/"); + + err = get_prog_type_by_name(type, &common_prog_type, + &expected_attach_type); + free(type); + if (err < 0) + goto err_free_reuse_maps; + } NEXT_ARG(); } else if (is_prefix(*argv, "map")) { @@ -1518,6 +1597,10 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) set_max_rlimit(); + if (verifier_logs) + /* log_level1 + log_level2 + stats, but not stable UAPI */ + open_opts.kernel_log_level = 1 + 2 + 4; + obj = bpf_object__open_file(file, &open_opts); if (libbpf_get_error(obj)) { p_err("failed to open object file"); @@ -1572,7 +1655,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) j = 0; idx = 0; bpf_object__for_each_map(map, obj) { - if (!bpf_map__is_offload_neutral(map)) + if (bpf_map__type(map) != BPF_MAP_TYPE_PERF_EVENT_ARRAY) bpf_map__set_ifindex(map, ifindex); if (j < old_map_fds && idx == map_replace[j].idx) { @@ -1597,12 +1680,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) goto err_close_obj; } - load_attr.obj = obj; - if (verifier_logs) - /* log_level1 + log_level2 + stats, but not stable UAPI */ - load_attr.log_level = 1 + 2 + 4; - - err = bpf_object__load_xattr(&load_attr); + err = bpf_object__load(obj); if (err) { p_err("failed to load object file"); goto err_close_obj; @@ -1657,6 +1735,11 @@ err_unpin: else bpf_object__unpin_programs(obj, pinfile); err_close_obj: + if (!legacy_libbpf) { + p_info("Warning: bpftool is now running in libbpf strict mode and has more stringent requirements about BPF programs.\n" + "If it used to work for this object file but now doesn't, see --legacy option for more details.\n"); + } + bpf_object__close(obj); err_free_reuse_maps: for (i = 0; i < old_map_fds; i++) @@ -1689,17 +1772,19 @@ static int try_loader(struct gen_loader_opts *gen) sizeof(struct bpf_prog_desc)); int log_buf_sz = (1u << 24) - 1; int err, fds_before, fd_delta; - char *log_buf; + char *log_buf = NULL; ctx = alloca(ctx_sz); memset(ctx, 0, ctx_sz); ctx->sz = ctx_sz; - ctx->log_level = 1; - ctx->log_size = log_buf_sz; - log_buf = malloc(log_buf_sz); - if (!log_buf) - return -ENOMEM; - ctx->log_buf = (long) log_buf; + if (verifier_logs) { + ctx->log_level = 1 + 2 + 4; + ctx->log_size = log_buf_sz; + log_buf = malloc(log_buf_sz); + if (!log_buf) + return -ENOMEM; + ctx->log_buf = (long) log_buf; + } opts.ctx = ctx; opts.data = gen->data; opts.data_sz = gen->data_sz; @@ -1708,9 +1793,9 @@ static int try_loader(struct gen_loader_opts *gen) fds_before = count_open_fds(); err = bpf_load_and_run(&opts); fd_delta = count_open_fds() - fds_before; - if (err < 0) { + if (err < 0 || verifier_logs) { fprintf(stderr, "err %d\n%s\n%s", err, opts.errstr, log_buf); - if (fd_delta) + if (fd_delta && err < 0) fprintf(stderr, "loader prog leaked %d FDs\n", fd_delta); } @@ -1722,7 +1807,6 @@ static int do_loader(int argc, char **argv) { DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts); DECLARE_LIBBPF_OPTS(gen_loader_opts, gen); - struct bpf_object_load_attr load_attr = {}; struct bpf_object *obj; const char *file; int err = 0; @@ -1731,6 +1815,10 @@ static int do_loader(int argc, char **argv) return -1; file = GET_ARG(); + if (verifier_logs) + /* log_level1 + log_level2 + stats, but not stable UAPI */ + open_opts.kernel_log_level = 1 + 2 + 4; + obj = bpf_object__open_file(file, &open_opts); if (libbpf_get_error(obj)) { p_err("failed to open object file"); @@ -1741,12 +1829,7 @@ static int do_loader(int argc, char **argv) if (err) goto err_close_obj; - load_attr.obj = obj; - if (verifier_logs) - /* log_level1 + log_level2 + stats, but not stable UAPI */ - load_attr.log_level = 1 + 2 + 4; - - err = bpf_object__load_xattr(&load_attr); + err = bpf_object__load(obj); if (err) { p_err("failed to load object file"); goto err_close_obj; @@ -2016,41 +2099,58 @@ static void profile_print_readings(void) static char *profile_target_name(int tgt_fd) { - struct bpf_prog_info_linear *info_linear; - struct bpf_func_info *func_info; + struct bpf_func_info func_info; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); const struct btf_type *t; + __u32 func_info_rec_size; struct btf *btf = NULL; char *name = NULL; + int err; - info_linear = bpf_program__get_prog_info_linear( - tgt_fd, 1UL << BPF_PROG_INFO_FUNC_INFO); - if (IS_ERR_OR_NULL(info_linear)) { - p_err("failed to get info_linear for prog FD %d", tgt_fd); - return NULL; + err = bpf_obj_get_info_by_fd(tgt_fd, &info, &info_len); + if (err) { + p_err("failed to bpf_obj_get_info_by_fd for prog FD %d", tgt_fd); + goto out; } - if (info_linear->info.btf_id == 0) { + if (info.btf_id == 0) { p_err("prog FD %d doesn't have valid btf", tgt_fd); goto out; } - btf = btf__load_from_kernel_by_id(info_linear->info.btf_id); + func_info_rec_size = info.func_info_rec_size; + if (info.nr_func_info == 0) { + p_err("bpf_obj_get_info_by_fd for prog FD %d found 0 func_info", tgt_fd); + goto out; + } + + memset(&info, 0, sizeof(info)); + info.nr_func_info = 1; + info.func_info_rec_size = func_info_rec_size; + info.func_info = ptr_to_u64(&func_info); + + err = bpf_obj_get_info_by_fd(tgt_fd, &info, &info_len); + if (err) { + p_err("failed to get func_info for prog FD %d", tgt_fd); + goto out; + } + + btf = btf__load_from_kernel_by_id(info.btf_id); if (libbpf_get_error(btf)) { p_err("failed to load btf for prog FD %d", tgt_fd); goto out; } - func_info = u64_to_ptr(info_linear->info.func_info); - t = btf__type_by_id(btf, func_info[0].type_id); + t = btf__type_by_id(btf, func_info.type_id); if (!t) { p_err("btf %d doesn't have type %d", - info_linear->info.btf_id, func_info[0].type_id); + info.btf_id, func_info.type_id); goto out; } name = strdup(btf__name_by_offset(btf, t->name_off)); out: btf__free(btf); - free(info_linear); return name; } diff --git a/tools/bpf/bpftool/struct_ops.c b/tools/bpf/bpftool/struct_ops.c index ab2d2290569a..2f693b082bdb 100644 --- a/tools/bpf/bpftool/struct_ops.c +++ b/tools/bpf/bpftool/struct_ops.c @@ -32,7 +32,7 @@ static const struct btf *get_btf_vmlinux(void) return btf_vmlinux; btf_vmlinux = libbpf_find_kernel_btf(); - if (IS_ERR(btf_vmlinux)) + if (libbpf_get_error(btf_vmlinux)) p_err("struct_ops requires kernel CONFIG_DEBUG_INFO_BTF=y"); return btf_vmlinux; @@ -45,7 +45,7 @@ static const char *get_kern_struct_ops_name(const struct bpf_map_info *info) const char *st_ops_name; kern_btf = get_btf_vmlinux(); - if (IS_ERR(kern_btf)) + if (libbpf_get_error(kern_btf)) return "<btf_vmlinux_not_found>"; t = btf__type_by_id(kern_btf, info->btf_vmlinux_value_type_id); @@ -63,7 +63,7 @@ static __s32 get_map_info_type_id(void) return map_info_type_id; kern_btf = get_btf_vmlinux(); - if (IS_ERR(kern_btf)) { + if (libbpf_get_error(kern_btf)) { map_info_type_id = PTR_ERR(kern_btf); return map_info_type_id; } @@ -252,7 +252,7 @@ static struct res do_one_id(const char *id_str, work_func func, void *data, } fd = bpf_map_get_fd_by_id(id); - if (fd == -1) { + if (fd < 0) { p_err("can't get map by id (%lu): %s", id, strerror(errno)); res.nr_errs++; return res; @@ -415,7 +415,7 @@ static int do_dump(int argc, char **argv) } kern_btf = get_btf_vmlinux(); - if (IS_ERR(kern_btf)) + if (libbpf_get_error(kern_btf)) return -1; if (!json_output) { @@ -479,7 +479,7 @@ static int do_unregister(int argc, char **argv) static int do_register(int argc, char **argv) { - struct bpf_object_load_attr load_attr = {}; + LIBBPF_OPTS(bpf_object_open_opts, open_opts); const struct bpf_map_def *def; struct bpf_map_info info = {}; __u32 info_len = sizeof(info); @@ -494,18 +494,17 @@ static int do_register(int argc, char **argv) file = GET_ARG(); - obj = bpf_object__open(file); - if (IS_ERR_OR_NULL(obj)) + if (verifier_logs) + /* log_level1 + log_level2 + stats, but not stable UAPI */ + open_opts.kernel_log_level = 1 + 2 + 4; + + obj = bpf_object__open_file(file, &open_opts); + if (libbpf_get_error(obj)) return -1; set_max_rlimit(); - load_attr.obj = obj; - if (verifier_logs) - /* log_level1 + log_level2 + stats, but not stable UAPI */ - load_attr.log_level = 1 + 2 + 4; - - if (bpf_object__load_xattr(&load_attr)) { + if (bpf_object__load(obj)) { bpf_object__close(obj); return -1; } @@ -516,7 +515,7 @@ static int do_register(int argc, char **argv) continue; link = bpf_map__attach_struct_ops(map); - if (IS_ERR(link)) { + if (libbpf_get_error(link)) { p_err("can't register struct_ops %s: %s", bpf_map__name(map), strerror(-PTR_ERR(link))); @@ -596,7 +595,7 @@ int do_struct_ops(int argc, char **argv) err = cmd_select(cmds, argc, argv, do_help); - if (!IS_ERR(btf_vmlinux)) + if (!libbpf_get_error(btf_vmlinux)) btf__free(btf_vmlinux); return err; diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile index 751643f860b2..9ddeca947635 100644 --- a/tools/bpf/resolve_btfids/Makefile +++ b/tools/bpf/resolve_btfids/Makefile @@ -19,6 +19,7 @@ CC = $(HOSTCC) LD = $(HOSTLD) ARCH = $(HOSTARCH) RM ?= rm +CROSS_COMPILE = OUTPUT ?= $(srctree)/tools/bpf/resolve_btfids/ diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index a59cb0ee609c..5d26f3c6f918 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -83,6 +83,7 @@ struct btf_id { int cnt; }; int addr_cnt; + bool is_set; Elf64_Addr addr[ADDR_CNT]; }; @@ -167,7 +168,7 @@ static struct btf_id *btf_id__find(struct rb_root *root, const char *name) return NULL; } -static struct btf_id* +static struct btf_id * btf_id__add(struct rb_root *root, char *name, bool unique) { struct rb_node **p = &root->rb_node; @@ -451,8 +452,10 @@ static int symbols_collect(struct object *obj) * in symbol's size, together with 'cnt' field hence * that - 1. */ - if (id) + if (id) { id->cnt = sym.st_size / sizeof(int) - 1; + id->is_set = true; + } } else { pr_err("FAILED unsupported prefix %s\n", prefix); return -1; @@ -568,9 +571,8 @@ static int id_patch(struct object *obj, struct btf_id *id) int *ptr = data->d_buf; int i; - if (!id->id) { + if (!id->id && !id->is_set) pr_err("WARN: resolve_btfids: unresolved symbol %s\n", id->name); - } for (i = 0; i < id->addr_cnt; i++) { unsigned long addr = id->addr[i]; @@ -730,7 +732,8 @@ int main(int argc, const char **argv) if (obj.efile.idlist_shndx == -1 || obj.efile.symbols_shndx == -1) { pr_debug("Cannot find .BTF_ids or symbols sections, nothing to do\n"); - return 0; + err = 0; + goto out; } if (symbols_collect(&obj)) diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile index bbd1150578f7..da6de16a3dfb 100644 --- a/tools/bpf/runqslower/Makefile +++ b/tools/bpf/runqslower/Makefile @@ -12,7 +12,7 @@ BPFOBJ := $(BPFOBJ_OUTPUT)libbpf.a BPF_DESTDIR := $(BPFOBJ_OUTPUT) BPF_INCLUDE := $(BPF_DESTDIR)/include INCLUDES := -I$(OUTPUT) -I$(BPF_INCLUDE) -I$(abspath ../../include/uapi) -CFLAGS := -g -Wall +CFLAGS := -g -Wall $(CLANG_CROSS_FLAGS) # Try to detect best kernel BTF source KERNEL_REL := $(shell uname -r) @@ -88,5 +88,4 @@ $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(BPFOBJ_OU $(DEFAULT_BPFTOOL): $(BPFOBJ) | $(BPFTOOL_OUTPUT) $(Q)$(MAKE) $(submake_extras) -C ../bpftool OUTPUT=$(BPFTOOL_OUTPUT) \ - LIBBPF_OUTPUT=$(BPFOBJ_OUTPUT) \ - LIBBPF_DESTDIR=$(BPF_DESTDIR) CC=$(HOSTCC) LD=$(HOSTLD) + ARCH= CROSS_COMPILE= CC=$(HOSTCC) LD=$(HOSTLD) diff --git a/tools/bpf/runqslower/runqslower.c b/tools/bpf/runqslower/runqslower.c index d89715844952..2414cc764461 100644 --- a/tools/bpf/runqslower/runqslower.c +++ b/tools/bpf/runqslower/runqslower.c @@ -123,7 +123,6 @@ int main(int argc, char **argv) .parser = parse_arg, .doc = argp_program_doc, }; - struct perf_buffer_opts pb_opts; struct perf_buffer *pb = NULL; struct runqslower_bpf *obj; int err; @@ -165,9 +164,8 @@ int main(int argc, char **argv) printf("Tracing run queue latency higher than %llu us\n", env.min_us); printf("%-8s %-16s %-6s %14s\n", "TIME", "COMM", "PID", "LAT(us)"); - pb_opts.sample_cb = handle_event; - pb_opts.lost_cb = handle_lost_events; - pb = perf_buffer__new(bpf_map__fd(obj->maps.events), 64, &pb_opts); + pb = perf_buffer__new(bpf_map__fd(obj->maps.events), 64, + handle_event, handle_lost_events, NULL, NULL); err = libbpf_get_error(pb); if (err) { pb = NULL; diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 45a9a59828c3..ae61f464043a 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -48,7 +48,6 @@ FEATURE_TESTS_BASIC := \ numa_num_possible_cpus \ libperl \ libpython \ - libpython-version \ libslang \ libslang-include-subdir \ libtraceevent \ diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 0a3244ad9673..1480910c792e 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -32,7 +32,6 @@ FILES= \ test-numa_num_possible_cpus.bin \ test-libperl.bin \ test-libpython.bin \ - test-libpython-version.bin \ test-libslang.bin \ test-libslang-include-subdir.bin \ test-libtraceevent.bin \ @@ -227,9 +226,6 @@ $(OUTPUT)test-libperl.bin: $(OUTPUT)test-libpython.bin: $(BUILD) $(FLAGS_PYTHON_EMBED) -$(OUTPUT)test-libpython-version.bin: - $(BUILD) - $(OUTPUT)test-libbfd.bin: $(BUILD) -DPACKAGE='"perf"' -lbfd -ldl diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index 920439527291..5ffafb967b6e 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -14,10 +14,6 @@ # include "test-libpython.c" #undef main -#define main main_test_libpython_version -# include "test-libpython-version.c" -#undef main - #define main main_test_libperl # include "test-libperl.c" #undef main @@ -177,7 +173,6 @@ int main(int argc, char *argv[]) { main_test_libpython(); - main_test_libpython_version(); main_test_libperl(); main_test_hello(); main_test_libelf(); @@ -200,7 +195,6 @@ int main(int argc, char *argv[]) main_test_timerfd(); main_test_stackprotector_all(); main_test_libdw_dwarf_unwind(); - main_test_sync_compare_and_swap(argc, argv); main_test_zlib(); main_test_pthread_attr_setaffinity_np(); main_test_pthread_barrier(); diff --git a/tools/build/feature/test-bpf.c b/tools/build/feature/test-bpf.c index 82070eadfc07..727d22e34a6e 100644 --- a/tools/build/feature/test-bpf.c +++ b/tools/build/feature/test-bpf.c @@ -14,6 +14,12 @@ # define __NR_bpf 349 # elif defined(__s390__) # define __NR_bpf 351 +# elif defined(__mips__) && defined(_ABIO32) +# define __NR_bpf 4355 +# elif defined(__mips__) && defined(_ABIN32) +# define __NR_bpf 6319 +# elif defined(__mips__) && defined(_ABI64) +# define __NR_bpf 5315 # else # error __NR_bpf not defined. libbpf does not support your arch. # endif diff --git a/tools/build/feature/test-libpython-version.c b/tools/build/feature/test-libpython-version.c deleted file mode 100644 index 47714b942d4d..000000000000 --- a/tools/build/feature/test-libpython-version.c +++ /dev/null @@ -1,11 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <Python.h> - -#if PY_VERSION_HEX >= 0x03000000 - #error -#endif - -int main(void) -{ - return 0; -} diff --git a/tools/include/linux/debug_locks.h b/tools/include/linux/debug_locks.h deleted file mode 100644 index 72d595ce764a..000000000000 --- a/tools/include/linux/debug_locks.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LIBLOCKDEP_DEBUG_LOCKS_H_ -#define _LIBLOCKDEP_DEBUG_LOCKS_H_ - -#include <stddef.h> -#include <linux/compiler.h> -#include <asm/bug.h> - -#define DEBUG_LOCKS_WARN_ON(x) WARN_ON(x) - -extern bool debug_locks; -extern bool debug_locks_silent; - -#endif diff --git a/tools/include/linux/hardirq.h b/tools/include/linux/hardirq.h deleted file mode 100644 index b25580b6a9be..000000000000 --- a/tools/include/linux/hardirq.h +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LIBLOCKDEP_LINUX_HARDIRQ_H_ -#define _LIBLOCKDEP_LINUX_HARDIRQ_H_ - -#define SOFTIRQ_BITS 0UL -#define HARDIRQ_BITS 0UL -#define SOFTIRQ_SHIFT 0UL -#define HARDIRQ_SHIFT 0UL -#define hardirq_count() 0UL -#define softirq_count() 0UL - -#endif diff --git a/tools/include/linux/irqflags.h b/tools/include/linux/irqflags.h deleted file mode 100644 index 501262aee8ff..000000000000 --- a/tools/include/linux/irqflags.h +++ /dev/null @@ -1,39 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LIBLOCKDEP_LINUX_TRACE_IRQFLAGS_H_ -#define _LIBLOCKDEP_LINUX_TRACE_IRQFLAGS_H_ - -# define lockdep_hardirq_context() 0 -# define lockdep_softirq_context(p) 0 -# define lockdep_hardirqs_enabled() 0 -# define lockdep_softirqs_enabled(p) 0 -# define lockdep_hardirq_enter() do { } while (0) -# define lockdep_hardirq_exit() do { } while (0) -# define lockdep_softirq_enter() do { } while (0) -# define lockdep_softirq_exit() do { } while (0) -# define INIT_TRACE_IRQFLAGS - -# define stop_critical_timings() do { } while (0) -# define start_critical_timings() do { } while (0) - -#define raw_local_irq_disable() do { } while (0) -#define raw_local_irq_enable() do { } while (0) -#define raw_local_irq_save(flags) ((flags) = 0) -#define raw_local_irq_restore(flags) ((void)(flags)) -#define raw_local_save_flags(flags) ((flags) = 0) -#define raw_irqs_disabled_flags(flags) ((void)(flags)) -#define raw_irqs_disabled() 0 -#define raw_safe_halt() - -#define local_irq_enable() do { } while (0) -#define local_irq_disable() do { } while (0) -#define local_irq_save(flags) ((flags) = 0) -#define local_irq_restore(flags) ((void)(flags)) -#define local_save_flags(flags) ((flags) = 0) -#define irqs_disabled() (1) -#define irqs_disabled_flags(flags) ((void)(flags), 0) -#define safe_halt() do { } while (0) - -#define trace_lock_release(x, y) -#define trace_lock_acquire(a, b, c, d, e, f, g) - -#endif diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h index a7e54a08fb54..9701e8307db0 100644 --- a/tools/include/linux/kernel.h +++ b/tools/include/linux/kernel.h @@ -7,6 +7,7 @@ #include <assert.h> #include <linux/build_bug.h> #include <linux/compiler.h> +#include <linux/math.h> #include <endian.h> #include <byteswap.h> @@ -14,8 +15,6 @@ #define UINT_MAX (~0U) #endif -#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) - #define PERF_ALIGN(x, a) __PERF_ALIGN_MASK(x, (typeof(x))(a)-1) #define __PERF_ALIGN_MASK(x, mask) (((x)+(mask))&~(mask)) @@ -52,15 +51,6 @@ _min1 < _min2 ? _min1 : _min2; }) #endif -#ifndef roundup -#define roundup(x, y) ( \ -{ \ - const typeof(y) __y = y; \ - (((x) + (__y - 1)) / __y) * __y; \ -} \ -) -#endif - #ifndef BUG_ON #ifdef NDEBUG #define BUG_ON(cond) do { if (cond) {} } while (0) @@ -102,17 +92,9 @@ int vscnprintf(char *buf, size_t size, const char *fmt, va_list args); int scnprintf(char * buf, size_t size, const char * fmt, ...); int scnprintf_pad(char * buf, size_t size, const char * fmt, ...); +#ifndef ARRAY_SIZE #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) - -/* - * This looks more complex than it should be. But we need to - * get the type for the ~ right in round_down (it needs to be - * as wide as the result!), and we want to evaluate the macro - * arguments just once each. - */ -#define __round_mask(x, y) ((__typeof__(x))((y)-1)) -#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1) -#define round_down(x, y) ((x) & ~__round_mask(x, y)) +#endif #define current_gfp_context(k) 0 #define synchronize_rcu() diff --git a/tools/include/linux/lockdep.h b/tools/include/linux/lockdep.h deleted file mode 100644 index e56997288f2b..000000000000 --- a/tools/include/linux/lockdep.h +++ /dev/null @@ -1,72 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LIBLOCKDEP_LOCKDEP_H_ -#define _LIBLOCKDEP_LOCKDEP_H_ - -#include <sys/prctl.h> -#include <sys/syscall.h> -#include <string.h> -#include <limits.h> -#include <linux/utsname.h> -#include <linux/compiler.h> -#include <linux/export.h> -#include <linux/kern_levels.h> -#include <linux/err.h> -#include <linux/rcu.h> -#include <linux/list.h> -#include <linux/hardirq.h> -#include <unistd.h> - -#define MAX_LOCK_DEPTH 63UL - -#define asmlinkage -#define __visible - -#include "../../../include/linux/lockdep.h" - -struct task_struct { - u64 curr_chain_key; - int lockdep_depth; - unsigned int lockdep_recursion; - struct held_lock held_locks[MAX_LOCK_DEPTH]; - gfp_t lockdep_reclaim_gfp; - int pid; - int state; - char comm[17]; -}; - -#define TASK_RUNNING 0 - -extern struct task_struct *__curr(void); - -#define current (__curr()) - -static inline int debug_locks_off(void) -{ - return 1; -} - -#define task_pid_nr(tsk) ((tsk)->pid) - -#define KSYM_NAME_LEN 128 -#define printk(...) dprintf(STDOUT_FILENO, __VA_ARGS__) -#define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__) -#define pr_warn pr_err -#define pr_cont pr_err - -#define list_del_rcu list_del - -#define atomic_t unsigned long -#define atomic_inc(x) ((*(x))++) - -#define print_tainted() "" -#define static_obj(x) 1 - -#define debug_show_all_locks() -extern void debug_check_no_locks_held(void); - -static __used bool __is_kernel_percpu_address(unsigned long addr, void *can_addr) -{ - return false; -} - -#endif diff --git a/tools/include/linux/math.h b/tools/include/linux/math.h new file mode 100644 index 000000000000..4e7af99ec9eb --- /dev/null +++ b/tools/include/linux/math.h @@ -0,0 +1,25 @@ +#ifndef _TOOLS_MATH_H +#define _TOOLS_MATH_H + +/* + * This looks more complex than it should be. But we need to + * get the type for the ~ right in round_down (it needs to be + * as wide as the result!), and we want to evaluate the macro + * arguments just once each. + */ +#define __round_mask(x, y) ((__typeof__(x))((y)-1)) +#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1) +#define round_down(x, y) ((x) & ~__round_mask(x, y)) + +#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) + +#ifndef roundup +#define roundup(x, y) ( \ +{ \ + const typeof(y) __y = y; \ + (((x) + (__y - 1)) / __y) * __y; \ +} \ +) +#endif + +#endif diff --git a/tools/include/linux/proc_fs.h b/tools/include/linux/proc_fs.h deleted file mode 100644 index 8b3b03b64fda..000000000000 --- a/tools/include/linux/proc_fs.h +++ /dev/null @@ -1,4 +0,0 @@ -#ifndef _TOOLS_INCLUDE_LINUX_PROC_FS_H -#define _TOOLS_INCLUDE_LINUX_PROC_FS_H - -#endif /* _TOOLS_INCLUDE_LINUX_PROC_FS_H */ diff --git a/tools/include/linux/spinlock.h b/tools/include/linux/spinlock.h index c934572d935c..622266b197d0 100644 --- a/tools/include/linux/spinlock.h +++ b/tools/include/linux/spinlock.h @@ -37,6 +37,4 @@ static inline bool arch_spin_is_locked(arch_spinlock_t *mutex) return true; } -#include <linux/lockdep.h> - #endif diff --git a/tools/include/linux/stacktrace.h b/tools/include/linux/stacktrace.h deleted file mode 100644 index ae343ac35bfa..000000000000 --- a/tools/include/linux/stacktrace.h +++ /dev/null @@ -1,33 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LIBLOCKDEP_LINUX_STACKTRACE_H_ -#define _LIBLOCKDEP_LINUX_STACKTRACE_H_ - -#include <execinfo.h> - -struct stack_trace { - unsigned int nr_entries, max_entries; - unsigned long *entries; - int skip; -}; - -static inline void print_stack_trace(struct stack_trace *trace, int spaces) -{ - backtrace_symbols_fd((void **)trace->entries, trace->nr_entries, 1); -} - -#define save_stack_trace(trace) \ - ((trace)->nr_entries = \ - backtrace((void **)(trace)->entries, (trace)->max_entries)) - -static inline int dump_stack(void) -{ - void *array[64]; - size_t size; - - size = backtrace(array, 64); - backtrace_symbols_fd(array, size, 1); - - return 0; -} - -#endif diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h index 3430667b0d24..c1c285fe494a 100644 --- a/tools/include/nolibc/nolibc.h +++ b/tools/include/nolibc/nolibc.h @@ -265,12 +265,17 @@ struct stat { * - arguments are in rdi, rsi, rdx, r10, r8, r9 respectively * - the system call is performed by calling the syscall instruction * - syscall return comes in rax - * - rcx and r8..r11 may be clobbered, others are preserved. + * - rcx and r11 are clobbered, others are preserved. * - the arguments are cast to long and assigned into the target registers * which are then simply passed as registers to the asm code, so that we * don't have to experience issues with register constraints. * - the syscall number is always specified last in order to allow to force * some registers before (gcc refuses a %-register at the last position). + * - see also x86-64 ABI section A.2 AMD64 Linux Kernel Conventions, A.2.1 + * Calling Conventions. + * + * Link x86-64 ABI: https://gitlab.com/x86-psABIs/x86-64-ABI/-/wikis/x86-64-psABI + * */ #define my_syscall0(num) \ @@ -280,9 +285,9 @@ struct stat { \ asm volatile ( \ "syscall\n" \ - : "=a" (_ret) \ + : "=a"(_ret) \ : "0"(_num) \ - : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \ + : "rcx", "r11", "memory", "cc" \ ); \ _ret; \ }) @@ -295,10 +300,10 @@ struct stat { \ asm volatile ( \ "syscall\n" \ - : "=a" (_ret) \ + : "=a"(_ret) \ : "r"(_arg1), \ "0"(_num) \ - : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \ + : "rcx", "r11", "memory", "cc" \ ); \ _ret; \ }) @@ -312,10 +317,10 @@ struct stat { \ asm volatile ( \ "syscall\n" \ - : "=a" (_ret) \ + : "=a"(_ret) \ : "r"(_arg1), "r"(_arg2), \ "0"(_num) \ - : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \ + : "rcx", "r11", "memory", "cc" \ ); \ _ret; \ }) @@ -330,10 +335,10 @@ struct stat { \ asm volatile ( \ "syscall\n" \ - : "=a" (_ret) \ + : "=a"(_ret) \ : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ "0"(_num) \ - : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \ + : "rcx", "r11", "memory", "cc" \ ); \ _ret; \ }) @@ -349,10 +354,10 @@ struct stat { \ asm volatile ( \ "syscall\n" \ - : "=a" (_ret), "=r"(_arg4) \ + : "=a"(_ret) \ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ "0"(_num) \ - : "rcx", "r8", "r9", "r11", "memory", "cc" \ + : "rcx", "r11", "memory", "cc" \ ); \ _ret; \ }) @@ -369,10 +374,10 @@ struct stat { \ asm volatile ( \ "syscall\n" \ - : "=a" (_ret), "=r"(_arg4), "=r"(_arg5) \ + : "=a"(_ret) \ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ "0"(_num) \ - : "rcx", "r9", "r11", "memory", "cc" \ + : "rcx", "r11", "memory", "cc" \ ); \ _ret; \ }) @@ -390,7 +395,7 @@ struct stat { \ asm volatile ( \ "syscall\n" \ - : "=a" (_ret), "=r"(_arg4), "=r"(_arg5) \ + : "=a"(_ret) \ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ "r"(_arg6), "0"(_num) \ : "rcx", "r11", "memory", "cc" \ @@ -399,17 +404,23 @@ struct stat { }) /* startup code */ +/* + * x86-64 System V ABI mandates: + * 1) %rsp must be 16-byte aligned right before the function call. + * 2) The deepest stack frame should be zero (the %rbp). + * + */ asm(".section .text\n" ".global _start\n" "_start:\n" "pop %rdi\n" // argc (first arg, %rdi) "mov %rsp, %rsi\n" // argv[] (second arg, %rsi) "lea 8(%rsi,%rdi,8),%rdx\n" // then a NULL then envp (third arg, %rdx) - "and $-16, %rsp\n" // x86 ABI : esp must be 16-byte aligned when - "sub $8, %rsp\n" // entering the callee + "xor %ebp, %ebp\n" // zero the stack frame + "and $-16, %rsp\n" // x86 ABI : esp must be 16-byte aligned before call "call main\n" // main() returns the status code, we'll exit with it. - "movzb %al, %rdi\n" // retrieve exit code from 8 lower bits - "mov $60, %rax\n" // NR_exit == 60 + "mov %eax, %edi\n" // retrieve exit code (32 bit) + "mov $60, %eax\n" // NR_exit == 60 "syscall\n" // really exit "hlt\n" // ensure it does not return ""); @@ -577,20 +588,28 @@ struct sys_stat_struct { }) /* startup code */ +/* + * i386 System V ABI mandates: + * 1) last pushed argument must be 16-byte aligned. + * 2) The deepest stack frame should be set to zero + * + */ asm(".section .text\n" ".global _start\n" "_start:\n" "pop %eax\n" // argc (first arg, %eax) "mov %esp, %ebx\n" // argv[] (second arg, %ebx) "lea 4(%ebx,%eax,4),%ecx\n" // then a NULL then envp (third arg, %ecx) - "and $-16, %esp\n" // x86 ABI : esp must be 16-byte aligned when + "xor %ebp, %ebp\n" // zero the stack frame + "and $-16, %esp\n" // x86 ABI : esp must be 16-byte aligned before + "sub $4, %esp\n" // the call instruction (args are aligned) "push %ecx\n" // push all registers on the stack so that we "push %ebx\n" // support both regparm and plain stack modes "push %eax\n" "call main\n" // main() returns the status code in %eax - "movzbl %al, %ebx\n" // retrieve exit code from lower 8 bits - "movl $1, %eax\n" // NR_exit == 1 - "int $0x80\n" // exit now + "mov %eax, %ebx\n" // retrieve exit code (32-bit int) + "movl $1, %eax\n" // NR_exit == 1 + "int $0x80\n" // exit now "hlt\n" // ensure it does not ""); @@ -774,7 +793,6 @@ asm(".section .text\n" "and %r3, %r1, $-8\n" // AAPCS : sp must be 8-byte aligned in the "mov %sp, %r3\n" // callee, an bl doesn't push (lr=pc) "bl main\n" // main() returns the status code, we'll exit with it. - "and %r0, %r0, $0xff\n" // limit exit code to 8 bits "movs r7, $1\n" // NR_exit == 1 "svc $0x00\n" ""); @@ -971,7 +989,6 @@ asm(".section .text\n" "add x2, x2, x1\n" // + argv "and sp, x1, -16\n" // sp must be 16-byte aligned in the callee "bl main\n" // main() returns the status code, we'll exit with it. - "and x0, x0, 0xff\n" // limit exit code to 8 bits "mov x8, 93\n" // NR_exit == 93 "svc #0\n" ""); @@ -1176,7 +1193,7 @@ asm(".section .text\n" "addiu $sp,$sp,-16\n" // the callee expects to save a0..a3 there! "jal main\n" // main() returns the status code, we'll exit with it. "nop\n" // delayed slot - "and $a0, $v0, 0xff\n" // limit exit code to 8 bits + "move $a0, $v0\n" // retrieve 32-bit exit code from v0 "li $v0, 4001\n" // NR_exit == 4001 "syscall\n" ".end __start\n" @@ -1374,7 +1391,6 @@ asm(".section .text\n" "add a2,a2,a1\n" // + argv "andi sp,a1,-16\n" // sp must be 16-byte aligned "call main\n" // main() returns the status code, we'll exit with it. - "andi a0, a0, 0xff\n" // limit exit code to 8 bits "li a7, 93\n" // NR_exit == 93 "ecall\n" ""); @@ -1556,6 +1572,12 @@ pid_t sys_getpid(void) } static __attribute__((unused)) +pid_t sys_gettid(void) +{ + return my_syscall0(__NR_gettid); +} + +static __attribute__((unused)) int sys_gettimeofday(struct timeval *tv, struct timezone *tz) { return my_syscall2(__NR_gettimeofday, tv, tz); @@ -2014,6 +2036,18 @@ pid_t getpid(void) } static __attribute__((unused)) +pid_t gettid(void) +{ + pid_t ret = sys_gettid(); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) int gettimeofday(struct timeval *tv, struct timezone *tz) { int ret = sys_gettimeofday(tv, tz); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index ba5af15e25f5..b0383d371b9a 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1342,8 +1342,10 @@ union bpf_attr { /* or valid module BTF object fd or 0 to attach to vmlinux */ __u32 attach_btf_obj_fd; }; - __u32 :32; /* pad */ + __u32 core_relo_cnt; /* number of bpf_core_relo */ __aligned_u64 fd_array; /* array of FDs */ + __aligned_u64 core_relos; + __u32 core_relo_rec_size; /* sizeof(struct bpf_core_relo) */ }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -1744,7 +1746,7 @@ union bpf_attr { * if the maximum number of tail calls has been reached for this * chain of programs. This limit is defined in the kernel by the * macro **MAX_TAIL_CALL_CNT** (not accessible to user space), - * which is currently set to 32. + * which is currently set to 33. * Return * 0 on success, or a negative error in case of failure. * @@ -4938,6 +4940,84 @@ union bpf_attr { * **-ENOENT** if symbol is not found. * * **-EPERM** if caller does not have permission to obtain kernel address. + * + * long bpf_find_vma(struct task_struct *task, u64 addr, void *callback_fn, void *callback_ctx, u64 flags) + * Description + * Find vma of *task* that contains *addr*, call *callback_fn* + * function with *task*, *vma*, and *callback_ctx*. + * The *callback_fn* should be a static function and + * the *callback_ctx* should be a pointer to the stack. + * The *flags* is used to control certain aspects of the helper. + * Currently, the *flags* must be 0. + * + * The expected callback signature is + * + * long (\*callback_fn)(struct task_struct \*task, struct vm_area_struct \*vma, void \*callback_ctx); + * + * Return + * 0 on success. + * **-ENOENT** if *task->mm* is NULL, or no vma contains *addr*. + * **-EBUSY** if failed to try lock mmap_lock. + * **-EINVAL** for invalid **flags**. + * + * long bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx, u64 flags) + * Description + * For **nr_loops**, call **callback_fn** function + * with **callback_ctx** as the context parameter. + * The **callback_fn** should be a static function and + * the **callback_ctx** should be a pointer to the stack. + * The **flags** is used to control certain aspects of the helper. + * Currently, the **flags** must be 0. Currently, nr_loops is + * limited to 1 << 23 (~8 million) loops. + * + * long (\*callback_fn)(u32 index, void \*ctx); + * + * where **index** is the current index in the loop. The index + * is zero-indexed. + * + * If **callback_fn** returns 0, the helper will continue to the next + * loop. If return value is 1, the helper will skip the rest of + * the loops and return. Other return values are not used now, + * and will be rejected by the verifier. + * + * Return + * The number of loops performed, **-EINVAL** for invalid **flags**, + * **-E2BIG** if **nr_loops** exceeds the maximum number of loops. + * + * long bpf_strncmp(const char *s1, u32 s1_sz, const char *s2) + * Description + * Do strncmp() between **s1** and **s2**. **s1** doesn't need + * to be null-terminated and **s1_sz** is the maximum storage + * size of **s1**. **s2** must be a read-only string. + * Return + * An integer less than, equal to, or greater than zero + * if the first **s1_sz** bytes of **s1** is found to be + * less than, to match, or be greater than **s2**. + * + * long bpf_get_func_arg(void *ctx, u32 n, u64 *value) + * Description + * Get **n**-th argument (zero based) of the traced function (for tracing programs) + * returned in **value**. + * + * Return + * 0 on success. + * **-EINVAL** if n >= arguments count of traced function. + * + * long bpf_get_func_ret(void *ctx, u64 *value) + * Description + * Get return value of the traced function (for tracing programs) + * in **value**. + * + * Return + * 0 on success. + * **-EOPNOTSUPP** for tracing programs other than BPF_TRACE_FEXIT or BPF_MODIFY_RETURN. + * + * long bpf_get_func_arg_cnt(void *ctx) + * Description + * Get number of arguments of the traced function (for tracing programs). + * + * Return + * The number of arguments of the traced function. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5120,6 +5200,12 @@ union bpf_attr { FN(trace_vprintk), \ FN(skc_to_unix_sock), \ FN(kallsyms_lookup_name), \ + FN(find_vma), \ + FN(loop), \ + FN(strncmp), \ + FN(get_func_arg), \ + FN(get_func_ret), \ + FN(get_func_arg_cnt), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -6296,6 +6382,7 @@ struct bpf_sk_lookup { __u32 local_ip4; /* Network byte order */ __u32 local_ip6[4]; /* Network byte order */ __u32 local_port; /* Host byte order */ + __u32 ingress_ifindex; /* The arriving interface. Determined by inet_iif. */ }; /* @@ -6328,4 +6415,78 @@ enum { BTF_F_ZERO = (1ULL << 3), }; +/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value + * has to be adjusted by relocations. It is emitted by llvm and passed to + * libbpf and later to the kernel. + */ +enum bpf_core_relo_kind { + BPF_CORE_FIELD_BYTE_OFFSET = 0, /* field byte offset */ + BPF_CORE_FIELD_BYTE_SIZE = 1, /* field size in bytes */ + BPF_CORE_FIELD_EXISTS = 2, /* field existence in target kernel */ + BPF_CORE_FIELD_SIGNED = 3, /* field signedness (0 - unsigned, 1 - signed) */ + BPF_CORE_FIELD_LSHIFT_U64 = 4, /* bitfield-specific left bitshift */ + BPF_CORE_FIELD_RSHIFT_U64 = 5, /* bitfield-specific right bitshift */ + BPF_CORE_TYPE_ID_LOCAL = 6, /* type ID in local BPF object */ + BPF_CORE_TYPE_ID_TARGET = 7, /* type ID in target kernel */ + BPF_CORE_TYPE_EXISTS = 8, /* type existence in target kernel */ + BPF_CORE_TYPE_SIZE = 9, /* type size in bytes */ + BPF_CORE_ENUMVAL_EXISTS = 10, /* enum value existence in target kernel */ + BPF_CORE_ENUMVAL_VALUE = 11, /* enum value integer value */ +}; + +/* + * "struct bpf_core_relo" is used to pass relocation data form LLVM to libbpf + * and from libbpf to the kernel. + * + * CO-RE relocation captures the following data: + * - insn_off - instruction offset (in bytes) within a BPF program that needs + * its insn->imm field to be relocated with actual field info; + * - type_id - BTF type ID of the "root" (containing) entity of a relocatable + * type or field; + * - access_str_off - offset into corresponding .BTF string section. String + * interpretation depends on specific relocation kind: + * - for field-based relocations, string encodes an accessed field using + * a sequence of field and array indices, separated by colon (:). It's + * conceptually very close to LLVM's getelementptr ([0]) instruction's + * arguments for identifying offset to a field. + * - for type-based relocations, strings is expected to be just "0"; + * - for enum value-based relocations, string contains an index of enum + * value within its enum type; + * - kind - one of enum bpf_core_relo_kind; + * + * Example: + * struct sample { + * int a; + * struct { + * int b[10]; + * }; + * }; + * + * struct sample *s = ...; + * int *x = &s->a; // encoded as "0:0" (a is field #0) + * int *y = &s->b[5]; // encoded as "0:1:0:5" (anon struct is field #1, + * // b is field #0 inside anon struct, accessing elem #5) + * int *z = &s[10]->b; // encoded as "10:1" (ptr is used as an array) + * + * type_id for all relocs in this example will capture BTF type id of + * `struct sample`. + * + * Such relocation is emitted when using __builtin_preserve_access_index() + * Clang built-in, passing expression that captures field address, e.g.: + * + * bpf_probe_read(&dst, sizeof(dst), + * __builtin_preserve_access_index(&src->a.b.c)); + * + * In this case Clang will emit field relocation recording necessary data to + * be able to find offset of embedded `a.b.c` field within `src` struct. + * + * [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction + */ +struct bpf_core_relo { + __u32 insn_off; + __u32 type_id; + __u32 access_str_off; + enum bpf_core_relo_kind kind; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h index deb12f755f0f..b0d8fea1951d 100644 --- a/tools/include/uapi/linux/btf.h +++ b/tools/include/uapi/linux/btf.h @@ -43,7 +43,7 @@ struct btf_type { * "size" tells the size of the type it is describing. * * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, - * FUNC, FUNC_PROTO, VAR and DECL_TAG. + * FUNC, FUNC_PROTO, VAR, DECL_TAG and TYPE_TAG. * "type" is a type_id referring to another type. */ union { @@ -75,6 +75,7 @@ enum { BTF_KIND_DATASEC = 15, /* Section */ BTF_KIND_FLOAT = 16, /* Floating point */ BTF_KIND_DECL_TAG = 17, /* Decl Tag */ + BTF_KIND_TYPE_TAG = 18, /* Type Tag */ NR_BTF_KINDS, BTF_KIND_MAX = NR_BTF_KINDS - 1, diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index b3610fdd1fee..6218f93f5c1a 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -7,24 +7,23 @@ /* This struct should be in sync with struct rtnl_link_stats64 */ struct rtnl_link_stats { - __u32 rx_packets; /* total packets received */ - __u32 tx_packets; /* total packets transmitted */ - __u32 rx_bytes; /* total bytes received */ - __u32 tx_bytes; /* total bytes transmitted */ - __u32 rx_errors; /* bad packets received */ - __u32 tx_errors; /* packet transmit problems */ - __u32 rx_dropped; /* no space in linux buffers */ - __u32 tx_dropped; /* no space available in linux */ - __u32 multicast; /* multicast packets received */ + __u32 rx_packets; + __u32 tx_packets; + __u32 rx_bytes; + __u32 tx_bytes; + __u32 rx_errors; + __u32 tx_errors; + __u32 rx_dropped; + __u32 tx_dropped; + __u32 multicast; __u32 collisions; - /* detailed rx_errors: */ __u32 rx_length_errors; - __u32 rx_over_errors; /* receiver ring buff overflow */ - __u32 rx_crc_errors; /* recved pkt with crc error */ - __u32 rx_frame_errors; /* recv'd frame alignment error */ - __u32 rx_fifo_errors; /* recv'r fifo overrun */ - __u32 rx_missed_errors; /* receiver missed packet */ + __u32 rx_over_errors; + __u32 rx_crc_errors; + __u32 rx_frame_errors; + __u32 rx_fifo_errors; + __u32 rx_missed_errors; /* detailed tx_errors */ __u32 tx_aborted_errors; @@ -37,29 +36,201 @@ struct rtnl_link_stats { __u32 rx_compressed; __u32 tx_compressed; - __u32 rx_nohandler; /* dropped, no handler found */ + __u32 rx_nohandler; }; -/* The main device statistics structure */ +/** + * struct rtnl_link_stats64 - The main device statistics structure. + * + * @rx_packets: Number of good packets received by the interface. + * For hardware interfaces counts all good packets received from the device + * by the host, including packets which host had to drop at various stages + * of processing (even in the driver). + * + * @tx_packets: Number of packets successfully transmitted. + * For hardware interfaces counts packets which host was able to successfully + * hand over to the device, which does not necessarily mean that packets + * had been successfully transmitted out of the device, only that device + * acknowledged it copied them out of host memory. + * + * @rx_bytes: Number of good received bytes, corresponding to @rx_packets. + * + * For IEEE 802.3 devices should count the length of Ethernet Frames + * excluding the FCS. + * + * @tx_bytes: Number of good transmitted bytes, corresponding to @tx_packets. + * + * For IEEE 802.3 devices should count the length of Ethernet Frames + * excluding the FCS. + * + * @rx_errors: Total number of bad packets received on this network device. + * This counter must include events counted by @rx_length_errors, + * @rx_crc_errors, @rx_frame_errors and other errors not otherwise + * counted. + * + * @tx_errors: Total number of transmit problems. + * This counter must include events counter by @tx_aborted_errors, + * @tx_carrier_errors, @tx_fifo_errors, @tx_heartbeat_errors, + * @tx_window_errors and other errors not otherwise counted. + * + * @rx_dropped: Number of packets received but not processed, + * e.g. due to lack of resources or unsupported protocol. + * For hardware interfaces this counter may include packets discarded + * due to L2 address filtering but should not include packets dropped + * by the device due to buffer exhaustion which are counted separately in + * @rx_missed_errors (since procfs folds those two counters together). + * + * @tx_dropped: Number of packets dropped on their way to transmission, + * e.g. due to lack of resources. + * + * @multicast: Multicast packets received. + * For hardware interfaces this statistic is commonly calculated + * at the device level (unlike @rx_packets) and therefore may include + * packets which did not reach the host. + * + * For IEEE 802.3 devices this counter may be equivalent to: + * + * - 30.3.1.1.21 aMulticastFramesReceivedOK + * + * @collisions: Number of collisions during packet transmissions. + * + * @rx_length_errors: Number of packets dropped due to invalid length. + * Part of aggregate "frame" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices this counter should be equivalent to a sum + * of the following attributes: + * + * - 30.3.1.1.23 aInRangeLengthErrors + * - 30.3.1.1.24 aOutOfRangeLengthField + * - 30.3.1.1.25 aFrameTooLongErrors + * + * @rx_over_errors: Receiver FIFO overflow event counter. + * + * Historically the count of overflow events. Such events may be + * reported in the receive descriptors or via interrupts, and may + * not correspond one-to-one with dropped packets. + * + * The recommended interpretation for high speed interfaces is - + * number of packets dropped because they did not fit into buffers + * provided by the host, e.g. packets larger than MTU or next buffer + * in the ring was not available for a scatter transfer. + * + * Part of aggregate "frame" errors in `/proc/net/dev`. + * + * This statistics was historically used interchangeably with + * @rx_fifo_errors. + * + * This statistic corresponds to hardware events and is not commonly used + * on software devices. + * + * @rx_crc_errors: Number of packets received with a CRC error. + * Part of aggregate "frame" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices this counter must be equivalent to: + * + * - 30.3.1.1.6 aFrameCheckSequenceErrors + * + * @rx_frame_errors: Receiver frame alignment errors. + * Part of aggregate "frame" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices this counter should be equivalent to: + * + * - 30.3.1.1.7 aAlignmentErrors + * + * @rx_fifo_errors: Receiver FIFO error counter. + * + * Historically the count of overflow events. Those events may be + * reported in the receive descriptors or via interrupts, and may + * not correspond one-to-one with dropped packets. + * + * This statistics was used interchangeably with @rx_over_errors. + * Not recommended for use in drivers for high speed interfaces. + * + * This statistic is used on software devices, e.g. to count software + * packet queue overflow (can) or sequencing errors (GRE). + * + * @rx_missed_errors: Count of packets missed by the host. + * Folded into the "drop" counter in `/proc/net/dev`. + * + * Counts number of packets dropped by the device due to lack + * of buffer space. This usually indicates that the host interface + * is slower than the network interface, or host is not keeping up + * with the receive packet rate. + * + * This statistic corresponds to hardware events and is not used + * on software devices. + * + * @tx_aborted_errors: + * Part of aggregate "carrier" errors in `/proc/net/dev`. + * For IEEE 802.3 devices capable of half-duplex operation this counter + * must be equivalent to: + * + * - 30.3.1.1.11 aFramesAbortedDueToXSColls + * + * High speed interfaces may use this counter as a general device + * discard counter. + * + * @tx_carrier_errors: Number of frame transmission errors due to loss + * of carrier during transmission. + * Part of aggregate "carrier" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices this counter must be equivalent to: + * + * - 30.3.1.1.13 aCarrierSenseErrors + * + * @tx_fifo_errors: Number of frame transmission errors due to device + * FIFO underrun / underflow. This condition occurs when the device + * begins transmission of a frame but is unable to deliver the + * entire frame to the transmitter in time for transmission. + * Part of aggregate "carrier" errors in `/proc/net/dev`. + * + * @tx_heartbeat_errors: Number of Heartbeat / SQE Test errors for + * old half-duplex Ethernet. + * Part of aggregate "carrier" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices possibly equivalent to: + * + * - 30.3.2.1.4 aSQETestErrors + * + * @tx_window_errors: Number of frame transmission errors due + * to late collisions (for Ethernet - after the first 64B of transmission). + * Part of aggregate "carrier" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices this counter must be equivalent to: + * + * - 30.3.1.1.10 aLateCollisions + * + * @rx_compressed: Number of correctly received compressed packets. + * This counters is only meaningful for interfaces which support + * packet compression (e.g. CSLIP, PPP). + * + * @tx_compressed: Number of transmitted compressed packets. + * This counters is only meaningful for interfaces which support + * packet compression (e.g. CSLIP, PPP). + * + * @rx_nohandler: Number of packets received on the interface + * but dropped by the networking stack because the device is + * not designated to receive packets (e.g. backup link in a bond). + */ struct rtnl_link_stats64 { - __u64 rx_packets; /* total packets received */ - __u64 tx_packets; /* total packets transmitted */ - __u64 rx_bytes; /* total bytes received */ - __u64 tx_bytes; /* total bytes transmitted */ - __u64 rx_errors; /* bad packets received */ - __u64 tx_errors; /* packet transmit problems */ - __u64 rx_dropped; /* no space in linux buffers */ - __u64 tx_dropped; /* no space available in linux */ - __u64 multicast; /* multicast packets received */ + __u64 rx_packets; + __u64 tx_packets; + __u64 rx_bytes; + __u64 tx_bytes; + __u64 rx_errors; + __u64 tx_errors; + __u64 rx_dropped; + __u64 tx_dropped; + __u64 multicast; __u64 collisions; /* detailed rx_errors: */ __u64 rx_length_errors; - __u64 rx_over_errors; /* receiver ring buff overflow */ - __u64 rx_crc_errors; /* recved pkt with crc error */ - __u64 rx_frame_errors; /* recv'd frame alignment error */ - __u64 rx_fifo_errors; /* recv'r fifo overrun */ - __u64 rx_missed_errors; /* receiver missed packet */ + __u64 rx_over_errors; + __u64 rx_crc_errors; + __u64 rx_frame_errors; + __u64 rx_fifo_errors; + __u64 rx_missed_errors; /* detailed tx_errors */ __u64 tx_aborted_errors; @@ -71,8 +242,7 @@ struct rtnl_link_stats64 { /* for cslip etc */ __u64 rx_compressed; __u64 tx_compressed; - - __u64 rx_nohandler; /* dropped, no handler found */ + __u64 rx_nohandler; }; /* The struct should be in sync with struct ifmap */ @@ -170,12 +340,30 @@ enum { IFLA_PROP_LIST, IFLA_ALT_IFNAME, /* Alternative ifname */ IFLA_PERM_ADDRESS, + IFLA_PROTO_DOWN_REASON, + + /* device (sysfs) name as parent, used instead + * of IFLA_LINK where there's no parent netdev + */ + IFLA_PARENT_DEV_NAME, + IFLA_PARENT_DEV_BUS_NAME, + IFLA_GRO_MAX_SIZE, + __IFLA_MAX }; #define IFLA_MAX (__IFLA_MAX - 1) +enum { + IFLA_PROTO_DOWN_REASON_UNSPEC, + IFLA_PROTO_DOWN_REASON_MASK, /* u32, mask for reason bits */ + IFLA_PROTO_DOWN_REASON_VALUE, /* u32, reason bit value */ + + __IFLA_PROTO_DOWN_REASON_CNT, + IFLA_PROTO_DOWN_REASON_MAX = __IFLA_PROTO_DOWN_REASON_CNT - 1 +}; + /* backwards compatibility for userspace */ #ifndef __KERNEL__ #define IFLA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifinfomsg)))) @@ -293,6 +481,7 @@ enum { IFLA_BR_MCAST_MLD_VERSION, IFLA_BR_VLAN_STATS_PER_PORT, IFLA_BR_MULTI_BOOLOPT, + IFLA_BR_MCAST_QUERIER_STATE, __IFLA_BR_MAX, }; @@ -346,6 +535,8 @@ enum { IFLA_BRPORT_BACKUP_PORT, IFLA_BRPORT_MRP_RING_OPEN, IFLA_BRPORT_MRP_IN_OPEN, + IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT, + IFLA_BRPORT_MCAST_EHT_HOSTS_CNT, __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) @@ -433,6 +624,7 @@ enum macvlan_macaddr_mode { }; #define MACVLAN_FLAG_NOPROMISC 1 +#define MACVLAN_FLAG_NODST 2 /* skip dst macvlan if matching src macvlan */ /* VRF section */ enum { @@ -597,6 +789,18 @@ enum ifla_geneve_df { GENEVE_DF_MAX = __GENEVE_DF_END - 1, }; +/* Bareudp section */ +enum { + IFLA_BAREUDP_UNSPEC, + IFLA_BAREUDP_PORT, + IFLA_BAREUDP_ETHERTYPE, + IFLA_BAREUDP_SRCPORT_MIN, + IFLA_BAREUDP_MULTIPROTO_MODE, + __IFLA_BAREUDP_MAX +}; + +#define IFLA_BAREUDP_MAX (__IFLA_BAREUDP_MAX - 1) + /* PPP section */ enum { IFLA_PPP_UNSPEC, @@ -655,6 +859,7 @@ enum { IFLA_BOND_TLB_DYNAMIC_LB, IFLA_BOND_PEER_NOTIF_DELAY, IFLA_BOND_AD_LACP_ACTIVE, + IFLA_BOND_MISSED_MAX, __IFLA_BOND_MAX, }; @@ -899,7 +1104,14 @@ enum { #define IFLA_IPOIB_MAX (__IFLA_IPOIB_MAX - 1) -/* HSR section */ +/* HSR/PRP section, both uses same interface */ + +/* Different redundancy protocols for hsr device */ +enum { + HSR_PROTOCOL_HSR, + HSR_PROTOCOL_PRP, + HSR_PROTOCOL_MAX, +}; enum { IFLA_HSR_UNSPEC, @@ -909,6 +1121,9 @@ enum { IFLA_HSR_SUPERVISION_ADDR, /* Supervision frame multicast addr */ IFLA_HSR_SEQ_NR, IFLA_HSR_VERSION, /* HSR version */ + IFLA_HSR_PROTOCOL, /* Indicate different protocol than + * HSR. For example PRP. + */ __IFLA_HSR_MAX, }; @@ -1033,6 +1248,8 @@ enum { #define RMNET_FLAGS_INGRESS_MAP_COMMANDS (1U << 1) #define RMNET_FLAGS_INGRESS_MAP_CKSUMV4 (1U << 2) #define RMNET_FLAGS_EGRESS_MAP_CKSUMV4 (1U << 3) +#define RMNET_FLAGS_INGRESS_MAP_CKSUMV5 (1U << 4) +#define RMNET_FLAGS_EGRESS_MAP_CKSUMV5 (1U << 5) enum { IFLA_RMNET_UNSPEC, @@ -1048,4 +1265,14 @@ struct ifla_rmnet_flags { __u32 mask; }; +/* MCTP section */ + +enum { + IFLA_MCTP_UNSPEC, + IFLA_MCTP_NET, + __IFLA_MCTP_MAX, +}; + +#define IFLA_MCTP_MAX (__IFLA_MCTP_MAX - 1) + #endif /* _UAPI_LINUX_IF_LINK_H */ diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index a067410ebea5..1daa45268de2 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -269,6 +269,7 @@ struct kvm_xen_exit { #define KVM_EXIT_AP_RESET_HOLD 32 #define KVM_EXIT_X86_BUS_LOCK 33 #define KVM_EXIT_XEN 34 +#define KVM_EXIT_RISCV_SBI 35 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -397,13 +398,23 @@ struct kvm_run { * "ndata" is correct, that new fields are enumerated in "flags", * and that each flag enumerates fields that are 64-bit aligned * and sized (so that ndata+internal.data[] is valid/accurate). + * + * Space beyond the defined fields may be used to store arbitrary + * debug information relating to the emulation failure. It is + * accounted for in "ndata" but the format is unspecified and is + * not represented in "flags". Any such information is *not* ABI! */ struct { __u32 suberror; __u32 ndata; __u64 flags; - __u8 insn_size; - __u8 insn_bytes[15]; + union { + struct { + __u8 insn_size; + __u8 insn_bytes[15]; + }; + }; + /* Arbitrary debug data may follow. */ } emulation_failure; /* KVM_EXIT_OSI */ struct { @@ -469,6 +480,13 @@ struct kvm_run { } msr; /* KVM_EXIT_XEN */ struct kvm_xen_exit xen; + /* KVM_EXIT_RISCV_SBI */ + struct { + unsigned long extension_id; + unsigned long function_id; + unsigned long args[6]; + unsigned long ret[2]; + } riscv_sbi; /* Fix the size of the union. */ char padding[256]; }; @@ -1112,6 +1130,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_BINARY_STATS_FD 203 #define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204 #define KVM_CAP_ARM_MTE 205 +#define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206 #ifdef KVM_CAP_IRQ_ROUTING @@ -1223,11 +1242,16 @@ struct kvm_irqfd { /* Do not use 1, KVM_CHECK_EXTENSION returned it before we had flags. */ #define KVM_CLOCK_TSC_STABLE 2 +#define KVM_CLOCK_REALTIME (1 << 2) +#define KVM_CLOCK_HOST_TSC (1 << 3) struct kvm_clock_data { __u64 clock; __u32 flags; - __u32 pad[9]; + __u32 pad0; + __u64 realtime; + __u64 host_tsc; + __u32 pad[4]; }; /* For KVM_CAP_SW_TLB */ diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index b393b5e82380..f947b61b2107 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -84,11 +84,13 @@ else endif # Append required CFLAGS +override CFLAGS += -std=gnu89 override CFLAGS += $(EXTRA_WARNINGS) -Wno-switch-enum override CFLAGS += -Werror -Wall override CFLAGS += $(INCLUDES) override CFLAGS += -fvisibility=hidden override CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 +override CFLAGS += $(CLANG_CROSS_FLAGS) # flags specific for shared library SHLIB_FLAGS := -DSHARED -fPIC @@ -161,7 +163,7 @@ $(BPF_HELPER_DEFS): $(srctree)/tools/include/uapi/linux/bpf.h $(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION) $(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN_SHARED) $(VERSION_SCRIPT) - $(QUIET_LINK)$(CC) $(LDFLAGS) \ + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) \ --shared -Wl,-soname,libbpf.so.$(LIBBPF_MAJOR_VERSION) \ -Wl,--version-script=$(VERSION_SCRIPT) $< -lelf -lz -o $@ @ln -sf $(@F) $(OUTPUT)libbpf.so diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 725701235fd8..550b4cbb6c99 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -28,6 +28,9 @@ #include <asm/unistd.h> #include <errno.h> #include <linux/bpf.h> +#include <linux/filter.h> +#include <limits.h> +#include <sys/resource.h> #include "bpf.h" #include "libbpf.h" #include "libbpf_internal.h" @@ -49,6 +52,12 @@ # define __NR_bpf 351 # elif defined(__arc__) # define __NR_bpf 280 +# elif defined(__mips__) && defined(_ABIO32) +# define __NR_bpf 4355 +# elif defined(__mips__) && defined(_ABIN32) +# define __NR_bpf 6319 +# elif defined(__mips__) && defined(_ABI64) +# define __NR_bpf 5315 # else # error __NR_bpf not defined. libbpf does not support your arch. # endif @@ -74,158 +83,208 @@ static inline int sys_bpf_fd(enum bpf_cmd cmd, union bpf_attr *attr, return ensure_good_fd(fd); } -static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size) +#define PROG_LOAD_ATTEMPTS 5 + +static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts) { - int retries = 5; int fd; do { fd = sys_bpf_fd(BPF_PROG_LOAD, attr, size); - } while (fd < 0 && errno == EAGAIN && retries-- > 0); + } while (fd < 0 && errno == EAGAIN && --attempts > 0); return fd; } -int libbpf__bpf_create_map_xattr(const struct bpf_create_map_params *create_attr) +/* Probe whether kernel switched from memlock-based (RLIMIT_MEMLOCK) to + * memcg-based memory accounting for BPF maps and progs. This was done in [0]. + * We use the support for bpf_ktime_get_coarse_ns() helper, which was added in + * the same 5.11 Linux release ([1]), to detect memcg-based accounting for BPF. + * + * [0] https://lore.kernel.org/bpf/20201201215900.3569844-1-guro@fb.com/ + * [1] d05512618056 ("bpf: Add bpf_ktime_get_coarse_ns helper") + */ +int probe_memcg_account(void) +{ + const size_t prog_load_attr_sz = offsetofend(union bpf_attr, attach_btf_obj_fd); + struct bpf_insn insns[] = { + BPF_EMIT_CALL(BPF_FUNC_ktime_get_coarse_ns), + BPF_EXIT_INSN(), + }; + size_t insn_cnt = sizeof(insns) / sizeof(insns[0]); + union bpf_attr attr; + int prog_fd; + + /* attempt loading freplace trying to use custom BTF */ + memset(&attr, 0, prog_load_attr_sz); + attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + attr.insns = ptr_to_u64(insns); + attr.insn_cnt = insn_cnt; + attr.license = ptr_to_u64("GPL"); + + prog_fd = sys_bpf_fd(BPF_PROG_LOAD, &attr, prog_load_attr_sz); + if (prog_fd >= 0) { + close(prog_fd); + return 1; + } + return 0; +} + +static bool memlock_bumped; +static rlim_t memlock_rlim = RLIM_INFINITY; + +int libbpf_set_memlock_rlim(size_t memlock_bytes) +{ + if (memlock_bumped) + return libbpf_err(-EBUSY); + + memlock_rlim = memlock_bytes; + return 0; +} + +int bump_rlimit_memlock(void) +{ + struct rlimit rlim; + + /* this the default in libbpf 1.0, but for now user has to opt-in explicitly */ + if (!(libbpf_mode & LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK)) + return 0; + + /* if kernel supports memcg-based accounting, skip bumping RLIMIT_MEMLOCK */ + if (memlock_bumped || kernel_supports(NULL, FEAT_MEMCG_ACCOUNT)) + return 0; + + memlock_bumped = true; + + /* zero memlock_rlim_max disables auto-bumping RLIMIT_MEMLOCK */ + if (memlock_rlim == 0) + return 0; + + rlim.rlim_cur = rlim.rlim_max = memlock_rlim; + if (setrlimit(RLIMIT_MEMLOCK, &rlim)) + return -errno; + + return 0; +} + +int bpf_map_create(enum bpf_map_type map_type, + const char *map_name, + __u32 key_size, + __u32 value_size, + __u32 max_entries, + const struct bpf_map_create_opts *opts) { + const size_t attr_sz = offsetofend(union bpf_attr, map_extra); union bpf_attr attr; int fd; - memset(&attr, '\0', sizeof(attr)); - - attr.map_type = create_attr->map_type; - attr.key_size = create_attr->key_size; - attr.value_size = create_attr->value_size; - attr.max_entries = create_attr->max_entries; - attr.map_flags = create_attr->map_flags; - if (create_attr->name) - memcpy(attr.map_name, create_attr->name, - min(strlen(create_attr->name), BPF_OBJ_NAME_LEN - 1)); - attr.numa_node = create_attr->numa_node; - attr.btf_fd = create_attr->btf_fd; - attr.btf_key_type_id = create_attr->btf_key_type_id; - attr.btf_value_type_id = create_attr->btf_value_type_id; - attr.map_ifindex = create_attr->map_ifindex; - if (attr.map_type == BPF_MAP_TYPE_STRUCT_OPS) - attr.btf_vmlinux_value_type_id = - create_attr->btf_vmlinux_value_type_id; - else - attr.inner_map_fd = create_attr->inner_map_fd; - attr.map_extra = create_attr->map_extra; + bump_rlimit_memlock(); + + memset(&attr, 0, attr_sz); + + if (!OPTS_VALID(opts, bpf_map_create_opts)) + return libbpf_err(-EINVAL); + + attr.map_type = map_type; + if (map_name) + libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name)); + attr.key_size = key_size; + attr.value_size = value_size; + attr.max_entries = max_entries; - fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, sizeof(attr)); + attr.btf_fd = OPTS_GET(opts, btf_fd, 0); + attr.btf_key_type_id = OPTS_GET(opts, btf_key_type_id, 0); + attr.btf_value_type_id = OPTS_GET(opts, btf_value_type_id, 0); + attr.btf_vmlinux_value_type_id = OPTS_GET(opts, btf_vmlinux_value_type_id, 0); + + attr.inner_map_fd = OPTS_GET(opts, inner_map_fd, 0); + attr.map_flags = OPTS_GET(opts, map_flags, 0); + attr.map_extra = OPTS_GET(opts, map_extra, 0); + attr.numa_node = OPTS_GET(opts, numa_node, 0); + attr.map_ifindex = OPTS_GET(opts, map_ifindex, 0); + + fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz); return libbpf_err_errno(fd); } int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) { - struct bpf_create_map_params p = {}; + LIBBPF_OPTS(bpf_map_create_opts, p); - p.map_type = create_attr->map_type; - p.key_size = create_attr->key_size; - p.value_size = create_attr->value_size; - p.max_entries = create_attr->max_entries; p.map_flags = create_attr->map_flags; - p.name = create_attr->name; p.numa_node = create_attr->numa_node; p.btf_fd = create_attr->btf_fd; p.btf_key_type_id = create_attr->btf_key_type_id; p.btf_value_type_id = create_attr->btf_value_type_id; p.map_ifindex = create_attr->map_ifindex; - if (p.map_type == BPF_MAP_TYPE_STRUCT_OPS) - p.btf_vmlinux_value_type_id = - create_attr->btf_vmlinux_value_type_id; + if (create_attr->map_type == BPF_MAP_TYPE_STRUCT_OPS) + p.btf_vmlinux_value_type_id = create_attr->btf_vmlinux_value_type_id; else p.inner_map_fd = create_attr->inner_map_fd; - return libbpf__bpf_create_map_xattr(&p); + return bpf_map_create(create_attr->map_type, create_attr->name, + create_attr->key_size, create_attr->value_size, + create_attr->max_entries, &p); } int bpf_create_map_node(enum bpf_map_type map_type, const char *name, int key_size, int value_size, int max_entries, __u32 map_flags, int node) { - struct bpf_create_map_attr map_attr = {}; - - map_attr.name = name; - map_attr.map_type = map_type; - map_attr.map_flags = map_flags; - map_attr.key_size = key_size; - map_attr.value_size = value_size; - map_attr.max_entries = max_entries; + LIBBPF_OPTS(bpf_map_create_opts, opts); + + opts.map_flags = map_flags; if (node >= 0) { - map_attr.numa_node = node; - map_attr.map_flags |= BPF_F_NUMA_NODE; + opts.numa_node = node; + opts.map_flags |= BPF_F_NUMA_NODE; } - return bpf_create_map_xattr(&map_attr); + return bpf_map_create(map_type, name, key_size, value_size, max_entries, &opts); } int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, int max_entries, __u32 map_flags) { - struct bpf_create_map_attr map_attr = {}; + LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = map_flags); - map_attr.map_type = map_type; - map_attr.map_flags = map_flags; - map_attr.key_size = key_size; - map_attr.value_size = value_size; - map_attr.max_entries = max_entries; - - return bpf_create_map_xattr(&map_attr); + return bpf_map_create(map_type, NULL, key_size, value_size, max_entries, &opts); } int bpf_create_map_name(enum bpf_map_type map_type, const char *name, int key_size, int value_size, int max_entries, __u32 map_flags) { - struct bpf_create_map_attr map_attr = {}; - - map_attr.name = name; - map_attr.map_type = map_type; - map_attr.map_flags = map_flags; - map_attr.key_size = key_size; - map_attr.value_size = value_size; - map_attr.max_entries = max_entries; + LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = map_flags); - return bpf_create_map_xattr(&map_attr); + return bpf_map_create(map_type, name, key_size, value_size, max_entries, &opts); } int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name, int key_size, int inner_map_fd, int max_entries, __u32 map_flags, int node) { - union bpf_attr attr; - int fd; - - memset(&attr, '\0', sizeof(attr)); - - attr.map_type = map_type; - attr.key_size = key_size; - attr.value_size = 4; - attr.inner_map_fd = inner_map_fd; - attr.max_entries = max_entries; - attr.map_flags = map_flags; - if (name) - memcpy(attr.map_name, name, - min(strlen(name), BPF_OBJ_NAME_LEN - 1)); + LIBBPF_OPTS(bpf_map_create_opts, opts); + opts.inner_map_fd = inner_map_fd; + opts.map_flags = map_flags; if (node >= 0) { - attr.map_flags |= BPF_F_NUMA_NODE; - attr.numa_node = node; + opts.map_flags |= BPF_F_NUMA_NODE; + opts.numa_node = node; } - fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, sizeof(attr)); - return libbpf_err_errno(fd); + return bpf_map_create(map_type, name, key_size, 4, max_entries, &opts); } int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name, int key_size, int inner_map_fd, int max_entries, __u32 map_flags) { - return bpf_create_map_in_map_node(map_type, name, key_size, - inner_map_fd, max_entries, map_flags, - -1); + LIBBPF_OPTS(bpf_map_create_opts, opts, + .inner_map_fd = inner_map_fd, + .map_flags = map_flags, + ); + + return bpf_map_create(map_type, name, key_size, 4, max_entries, &opts); } static void * @@ -253,58 +312,95 @@ alloc_zero_tailing_info(const void *orecord, __u32 cnt, return info; } -int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr) +DEFAULT_VERSION(bpf_prog_load_v0_6_0, bpf_prog_load, LIBBPF_0.6.0) +int bpf_prog_load_v0_6_0(enum bpf_prog_type prog_type, + const char *prog_name, const char *license, + const struct bpf_insn *insns, size_t insn_cnt, + const struct bpf_prog_load_opts *opts) { void *finfo = NULL, *linfo = NULL; + const char *func_info, *line_info; + __u32 log_size, log_level, attach_prog_fd, attach_btf_obj_fd; + __u32 func_info_rec_size, line_info_rec_size; + int fd, attempts; union bpf_attr attr; - int fd; + char *log_buf; + + bump_rlimit_memlock(); - if (!load_attr->log_buf != !load_attr->log_buf_sz) + if (!OPTS_VALID(opts, bpf_prog_load_opts)) return libbpf_err(-EINVAL); - if (load_attr->log_level > (4 | 2 | 1) || (load_attr->log_level && !load_attr->log_buf)) + attempts = OPTS_GET(opts, attempts, 0); + if (attempts < 0) return libbpf_err(-EINVAL); + if (attempts == 0) + attempts = PROG_LOAD_ATTEMPTS; memset(&attr, 0, sizeof(attr)); - attr.prog_type = load_attr->prog_type; - attr.expected_attach_type = load_attr->expected_attach_type; - if (load_attr->attach_prog_fd) - attr.attach_prog_fd = load_attr->attach_prog_fd; - else - attr.attach_btf_obj_fd = load_attr->attach_btf_obj_fd; - attr.attach_btf_id = load_attr->attach_btf_id; + attr.prog_type = prog_type; + attr.expected_attach_type = OPTS_GET(opts, expected_attach_type, 0); - attr.prog_ifindex = load_attr->prog_ifindex; - attr.kern_version = load_attr->kern_version; + attr.prog_btf_fd = OPTS_GET(opts, prog_btf_fd, 0); + attr.prog_flags = OPTS_GET(opts, prog_flags, 0); + attr.prog_ifindex = OPTS_GET(opts, prog_ifindex, 0); + attr.kern_version = OPTS_GET(opts, kern_version, 0); - attr.insn_cnt = (__u32)load_attr->insn_cnt; - attr.insns = ptr_to_u64(load_attr->insns); - attr.license = ptr_to_u64(load_attr->license); + if (prog_name) + libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name)); + attr.license = ptr_to_u64(license); - attr.log_level = load_attr->log_level; - if (attr.log_level) { - attr.log_buf = ptr_to_u64(load_attr->log_buf); - attr.log_size = load_attr->log_buf_sz; - } + if (insn_cnt > UINT_MAX) + return libbpf_err(-E2BIG); + + attr.insns = ptr_to_u64(insns); + attr.insn_cnt = (__u32)insn_cnt; + + attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0); + attach_btf_obj_fd = OPTS_GET(opts, attach_btf_obj_fd, 0); + + if (attach_prog_fd && attach_btf_obj_fd) + return libbpf_err(-EINVAL); + + attr.attach_btf_id = OPTS_GET(opts, attach_btf_id, 0); + if (attach_prog_fd) + attr.attach_prog_fd = attach_prog_fd; + else + attr.attach_btf_obj_fd = attach_btf_obj_fd; - attr.prog_btf_fd = load_attr->prog_btf_fd; - attr.prog_flags = load_attr->prog_flags; + log_buf = OPTS_GET(opts, log_buf, NULL); + log_size = OPTS_GET(opts, log_size, 0); + log_level = OPTS_GET(opts, log_level, 0); - attr.func_info_rec_size = load_attr->func_info_rec_size; - attr.func_info_cnt = load_attr->func_info_cnt; - attr.func_info = ptr_to_u64(load_attr->func_info); + if (!!log_buf != !!log_size) + return libbpf_err(-EINVAL); + if (log_level > (4 | 2 | 1)) + return libbpf_err(-EINVAL); + if (log_level && !log_buf) + return libbpf_err(-EINVAL); - attr.line_info_rec_size = load_attr->line_info_rec_size; - attr.line_info_cnt = load_attr->line_info_cnt; - attr.line_info = ptr_to_u64(load_attr->line_info); - attr.fd_array = ptr_to_u64(load_attr->fd_array); + func_info_rec_size = OPTS_GET(opts, func_info_rec_size, 0); + func_info = OPTS_GET(opts, func_info, NULL); + attr.func_info_rec_size = func_info_rec_size; + attr.func_info = ptr_to_u64(func_info); + attr.func_info_cnt = OPTS_GET(opts, func_info_cnt, 0); - if (load_attr->name) - memcpy(attr.prog_name, load_attr->name, - min(strlen(load_attr->name), (size_t)BPF_OBJ_NAME_LEN - 1)); + line_info_rec_size = OPTS_GET(opts, line_info_rec_size, 0); + line_info = OPTS_GET(opts, line_info, NULL); + attr.line_info_rec_size = line_info_rec_size; + attr.line_info = ptr_to_u64(line_info); + attr.line_info_cnt = OPTS_GET(opts, line_info_cnt, 0); + + attr.fd_array = ptr_to_u64(OPTS_GET(opts, fd_array, NULL)); + + if (log_level) { + attr.log_buf = ptr_to_u64(log_buf); + attr.log_size = log_size; + attr.log_level = log_level; + } - fd = sys_bpf_prog_load(&attr, sizeof(attr)); + fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts); if (fd >= 0) return fd; @@ -314,11 +410,11 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr) */ while (errno == E2BIG && (!finfo || !linfo)) { if (!finfo && attr.func_info_cnt && - attr.func_info_rec_size < load_attr->func_info_rec_size) { + attr.func_info_rec_size < func_info_rec_size) { /* try with corrected func info records */ - finfo = alloc_zero_tailing_info(load_attr->func_info, - load_attr->func_info_cnt, - load_attr->func_info_rec_size, + finfo = alloc_zero_tailing_info(func_info, + attr.func_info_cnt, + func_info_rec_size, attr.func_info_rec_size); if (!finfo) { errno = E2BIG; @@ -326,13 +422,12 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr) } attr.func_info = ptr_to_u64(finfo); - attr.func_info_rec_size = load_attr->func_info_rec_size; + attr.func_info_rec_size = func_info_rec_size; } else if (!linfo && attr.line_info_cnt && - attr.line_info_rec_size < - load_attr->line_info_rec_size) { - linfo = alloc_zero_tailing_info(load_attr->line_info, - load_attr->line_info_cnt, - load_attr->line_info_rec_size, + attr.line_info_rec_size < line_info_rec_size) { + linfo = alloc_zero_tailing_info(line_info, + attr.line_info_cnt, + line_info_rec_size, attr.line_info_rec_size); if (!linfo) { errno = E2BIG; @@ -340,26 +435,27 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr) } attr.line_info = ptr_to_u64(linfo); - attr.line_info_rec_size = load_attr->line_info_rec_size; + attr.line_info_rec_size = line_info_rec_size; } else { break; } - fd = sys_bpf_prog_load(&attr, sizeof(attr)); + fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts); if (fd >= 0) goto done; } - if (load_attr->log_level || !load_attr->log_buf) - goto done; + if (log_level == 0 && log_buf) { + /* log_level == 0 with non-NULL log_buf requires retrying on error + * with log_level == 1 and log_buf/log_buf_size set, to get details of + * failure + */ + attr.log_buf = ptr_to_u64(log_buf); + attr.log_size = log_size; + attr.log_level = 1; - /* Try again with log */ - attr.log_buf = ptr_to_u64(load_attr->log_buf); - attr.log_size = load_attr->log_buf_sz; - attr.log_level = 1; - load_attr->log_buf[0] = 0; - - fd = sys_bpf_prog_load(&attr, sizeof(attr)); + fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts); + } done: /* free() doesn't affect errno, so we don't need to restore it */ free(finfo); @@ -367,17 +463,20 @@ done: return libbpf_err_errno(fd); } +__attribute__((alias("bpf_load_program_xattr2"))) int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, - char *log_buf, size_t log_buf_sz) + char *log_buf, size_t log_buf_sz); + +static int bpf_load_program_xattr2(const struct bpf_load_program_attr *load_attr, + char *log_buf, size_t log_buf_sz) { - struct bpf_prog_load_params p = {}; + LIBBPF_OPTS(bpf_prog_load_opts, p); if (!load_attr || !log_buf != !log_buf_sz) return libbpf_err(-EINVAL); - p.prog_type = load_attr->prog_type; p.expected_attach_type = load_attr->expected_attach_type; - switch (p.prog_type) { + switch (load_attr->prog_type) { case BPF_PROG_TYPE_STRUCT_OPS: case BPF_PROG_TYPE_LSM: p.attach_btf_id = load_attr->attach_btf_id; @@ -391,12 +490,9 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, p.prog_ifindex = load_attr->prog_ifindex; p.kern_version = load_attr->kern_version; } - p.insn_cnt = load_attr->insns_cnt; - p.insns = load_attr->insns; - p.license = load_attr->license; p.log_level = load_attr->log_level; p.log_buf = log_buf; - p.log_buf_sz = log_buf_sz; + p.log_size = log_buf_sz; p.prog_btf_fd = load_attr->prog_btf_fd; p.func_info_rec_size = load_attr->func_info_rec_size; p.func_info_cnt = load_attr->func_info_cnt; @@ -404,10 +500,10 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, p.line_info_rec_size = load_attr->line_info_rec_size; p.line_info_cnt = load_attr->line_info_cnt; p.line_info = load_attr->line_info; - p.name = load_attr->name; p.prog_flags = load_attr->prog_flags; - return libbpf__bpf_prog_load(&p); + return bpf_prog_load(load_attr->prog_type, load_attr->name, load_attr->license, + load_attr->insns, load_attr->insns_cnt, &p); } int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, @@ -426,7 +522,7 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, load_attr.license = license; load_attr.kern_version = kern_version; - return bpf_load_program_xattr(&load_attr, log_buf, log_buf_sz); + return bpf_load_program_xattr2(&load_attr, log_buf, log_buf_sz); } int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, @@ -437,6 +533,8 @@ int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, union bpf_attr attr; int fd; + bump_rlimit_memlock(); + memset(&attr, 0, sizeof(attr)); attr.prog_type = type; attr.insn_cnt = (__u32)insns_cnt; @@ -449,7 +547,7 @@ int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, attr.kern_version = kern_version; attr.prog_flags = prog_flags; - fd = sys_bpf_prog_load(&attr, sizeof(attr)); + fd = sys_bpf_prog_load(&attr, sizeof(attr), PROG_LOAD_ATTEMPTS); return libbpf_err_errno(fd); } @@ -593,11 +691,11 @@ static int bpf_map_batch_common(int cmd, int fd, void *in_batch, return libbpf_err_errno(ret); } -int bpf_map_delete_batch(int fd, void *keys, __u32 *count, +int bpf_map_delete_batch(int fd, const void *keys, __u32 *count, const struct bpf_map_batch_opts *opts) { return bpf_map_batch_common(BPF_MAP_DELETE_BATCH, fd, NULL, - NULL, keys, NULL, count, opts); + NULL, (void *)keys, NULL, count, opts); } int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch, void *keys, @@ -617,11 +715,11 @@ int bpf_map_lookup_and_delete_batch(int fd, void *in_batch, void *out_batch, count, opts); } -int bpf_map_update_batch(int fd, void *keys, void *values, __u32 *count, +int bpf_map_update_batch(int fd, const void *keys, const void *values, __u32 *count, const struct bpf_map_batch_opts *opts) { return bpf_map_batch_common(BPF_MAP_UPDATE_BATCH, fd, NULL, NULL, - keys, values, count, opts); + (void *)keys, (void *)values, count, opts); } int bpf_obj_pin(int fd, const char *pathname) @@ -1028,24 +1126,67 @@ int bpf_raw_tracepoint_open(const char *name, int prog_fd) return libbpf_err_errno(fd); } -int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size, - bool do_log) +int bpf_btf_load(const void *btf_data, size_t btf_size, const struct bpf_btf_load_opts *opts) { - union bpf_attr attr = {}; + const size_t attr_sz = offsetofend(union bpf_attr, btf_log_level); + union bpf_attr attr; + char *log_buf; + size_t log_size; + __u32 log_level; int fd; - attr.btf = ptr_to_u64(btf); + bump_rlimit_memlock(); + + memset(&attr, 0, attr_sz); + + if (!OPTS_VALID(opts, bpf_btf_load_opts)) + return libbpf_err(-EINVAL); + + log_buf = OPTS_GET(opts, log_buf, NULL); + log_size = OPTS_GET(opts, log_size, 0); + log_level = OPTS_GET(opts, log_level, 0); + + if (log_size > UINT_MAX) + return libbpf_err(-EINVAL); + if (log_size && !log_buf) + return libbpf_err(-EINVAL); + + attr.btf = ptr_to_u64(btf_data); attr.btf_size = btf_size; + /* log_level == 0 and log_buf != NULL means "try loading without + * log_buf, but retry with log_buf and log_level=1 on error", which is + * consistent across low-level and high-level BTF and program loading + * APIs within libbpf and provides a sensible behavior in practice + */ + if (log_level) { + attr.btf_log_buf = ptr_to_u64(log_buf); + attr.btf_log_size = (__u32)log_size; + attr.btf_log_level = log_level; + } -retry: - if (do_log && log_buf && log_buf_size) { - attr.btf_log_level = 1; - attr.btf_log_size = log_buf_size; + fd = sys_bpf_fd(BPF_BTF_LOAD, &attr, attr_sz); + if (fd < 0 && log_buf && log_level == 0) { attr.btf_log_buf = ptr_to_u64(log_buf); + attr.btf_log_size = (__u32)log_size; + attr.btf_log_level = 1; + fd = sys_bpf_fd(BPF_BTF_LOAD, &attr, attr_sz); } + return libbpf_err_errno(fd); +} - fd = sys_bpf_fd(BPF_BTF_LOAD, &attr, sizeof(attr)); +int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size, bool do_log) +{ + LIBBPF_OPTS(bpf_btf_load_opts, opts); + int fd; + +retry: + if (do_log && log_buf && log_buf_size) { + opts.log_buf = log_buf; + opts.log_size = log_buf_size; + opts.log_level = 1; + } + fd = bpf_btf_load(btf, btf_size, &opts); if (fd < 0 && !do_log && log_buf && log_buf_size) { do_log = true; goto retry; diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 6fffb3cdf39b..14e0d97ad2cf 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -29,11 +29,38 @@ #include <stdint.h> #include "libbpf_common.h" +#include "libbpf_legacy.h" #ifdef __cplusplus extern "C" { #endif +int libbpf_set_memlock_rlim(size_t memlock_bytes); + +struct bpf_map_create_opts { + size_t sz; /* size of this struct for forward/backward compatibility */ + + __u32 btf_fd; + __u32 btf_key_type_id; + __u32 btf_value_type_id; + __u32 btf_vmlinux_value_type_id; + + __u32 inner_map_fd; + __u32 map_flags; + __u64 map_extra; + + __u32 numa_node; + __u32 map_ifindex; +}; +#define bpf_map_create_opts__last_field map_ifindex + +LIBBPF_API int bpf_map_create(enum bpf_map_type map_type, + const char *map_name, + __u32 key_size, + __u32 value_size, + __u32 max_entries, + const struct bpf_map_create_opts *opts); + struct bpf_create_map_attr { const char *name; enum bpf_map_type map_type; @@ -52,25 +79,95 @@ struct bpf_create_map_attr { }; }; -LIBBPF_API int -bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr); +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") +LIBBPF_API int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr); +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") LIBBPF_API int bpf_create_map_node(enum bpf_map_type map_type, const char *name, int key_size, int value_size, int max_entries, __u32 map_flags, int node); +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") LIBBPF_API int bpf_create_map_name(enum bpf_map_type map_type, const char *name, int key_size, int value_size, int max_entries, __u32 map_flags); +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") LIBBPF_API int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, int max_entries, __u32 map_flags); +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") LIBBPF_API int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name, int key_size, int inner_map_fd, int max_entries, __u32 map_flags, int node); +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") LIBBPF_API int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name, int key_size, int inner_map_fd, int max_entries, __u32 map_flags); +struct bpf_prog_load_opts { + size_t sz; /* size of this struct for forward/backward compatibility */ + + /* libbpf can retry BPF_PROG_LOAD command if bpf() syscall returns + * -EAGAIN. This field determines how many attempts libbpf has to + * make. If not specified, libbpf will use default value of 5. + */ + int attempts; + + enum bpf_attach_type expected_attach_type; + __u32 prog_btf_fd; + __u32 prog_flags; + __u32 prog_ifindex; + __u32 kern_version; + + __u32 attach_btf_id; + __u32 attach_prog_fd; + __u32 attach_btf_obj_fd; + + const int *fd_array; + + /* .BTF.ext func info data */ + const void *func_info; + __u32 func_info_cnt; + __u32 func_info_rec_size; + + /* .BTF.ext line info data */ + const void *line_info; + __u32 line_info_cnt; + __u32 line_info_rec_size; + + /* verifier log options */ + __u32 log_level; + __u32 log_size; + char *log_buf; +}; +#define bpf_prog_load_opts__last_field log_buf + +LIBBPF_API int bpf_prog_load(enum bpf_prog_type prog_type, + const char *prog_name, const char *license, + const struct bpf_insn *insns, size_t insn_cnt, + const struct bpf_prog_load_opts *opts); +/* this "specialization" should go away in libbpf 1.0 */ +LIBBPF_API int bpf_prog_load_v0_6_0(enum bpf_prog_type prog_type, + const char *prog_name, const char *license, + const struct bpf_insn *insns, size_t insn_cnt, + const struct bpf_prog_load_opts *opts); + +/* This is an elaborate way to not conflict with deprecated bpf_prog_load() + * API, defined in libbpf.h. Once we hit libbpf 1.0, all this will be gone. + * With this approach, if someone is calling bpf_prog_load() with + * 4 arguments, they will use the deprecated API, which keeps backwards + * compatibility (both source code and binary). If bpf_prog_load() is called + * with 6 arguments, though, it gets redirected to __bpf_prog_load. + * So looking forward to libbpf 1.0 when this hack will be gone and + * __bpf_prog_load() will be called just bpf_prog_load(). + */ +#ifndef bpf_prog_load +#define bpf_prog_load(...) ___libbpf_overload(___bpf_prog_load, __VA_ARGS__) +#define ___bpf_prog_load4(file, type, pobj, prog_fd) \ + bpf_prog_load_deprecated(file, type, pobj, prog_fd) +#define ___bpf_prog_load6(prog_type, prog_name, license, insns, insn_cnt, opts) \ + bpf_prog_load(prog_type, prog_name, license, insns, insn_cnt, opts) +#endif /* bpf_prog_load */ + struct bpf_load_program_attr { enum bpf_prog_type prog_type; enum bpf_attach_type expected_attach_type; @@ -100,15 +197,18 @@ struct bpf_load_program_attr { /* Flags to direct loading requirements */ #define MAPS_RELAX_COMPAT 0x01 -/* Recommend log buffer size */ +/* Recommended log buffer size */ #define BPF_LOG_BUF_SIZE (UINT32_MAX >> 8) /* verifier maximum in kernels <= 5.1 */ -LIBBPF_API int -bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, - char *log_buf, size_t log_buf_sz); + +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_load() instead") +LIBBPF_API int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, + char *log_buf, size_t log_buf_sz); +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_load() instead") LIBBPF_API int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, size_t insns_cnt, const char *license, __u32 kern_version, char *log_buf, size_t log_buf_sz); +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_load() instead") LIBBPF_API int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, size_t insns_cnt, __u32 prog_flags, @@ -116,6 +216,23 @@ LIBBPF_API int bpf_verify_program(enum bpf_prog_type type, char *log_buf, size_t log_buf_sz, int log_level); +struct bpf_btf_load_opts { + size_t sz; /* size of this struct for forward/backward compatibility */ + + /* kernel log options */ + char *log_buf; + __u32 log_level; + __u32 log_size; +}; +#define bpf_btf_load_opts__last_field log_size + +LIBBPF_API int bpf_btf_load(const void *btf_data, size_t btf_size, + const struct bpf_btf_load_opts *opts); + +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_btf_load() instead") +LIBBPF_API int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, + __u32 log_buf_size, bool do_log); + LIBBPF_API int bpf_map_update_elem(int fd, const void *key, const void *value, __u64 flags); @@ -137,17 +254,128 @@ struct bpf_map_batch_opts { }; #define bpf_map_batch_opts__last_field flags -LIBBPF_API int bpf_map_delete_batch(int fd, void *keys, + +/** + * @brief **bpf_map_delete_batch()** allows for batch deletion of multiple + * elements in a BPF map. + * + * @param fd BPF map file descriptor + * @param keys pointer to an array of *count* keys + * @param count input and output parameter; on input **count** represents the + * number of elements in the map to delete in batch; + * on output if a non-EFAULT error is returned, **count** represents the number of deleted + * elements if the output **count** value is not equal to the input **count** value + * If EFAULT is returned, **count** should not be trusted to be correct. + * @param opts options for configuring the way the batch deletion works + * @return 0, on success; negative error code, otherwise (errno is also set to + * the error code) + */ +LIBBPF_API int bpf_map_delete_batch(int fd, const void *keys, __u32 *count, const struct bpf_map_batch_opts *opts); + +/** + * @brief **bpf_map_lookup_batch()** allows for batch lookup of BPF map elements. + * + * The parameter *in_batch* is the address of the first element in the batch to read. + * *out_batch* is an output parameter that should be passed as *in_batch* to subsequent + * calls to **bpf_map_lookup_batch()**. NULL can be passed for *in_batch* to indicate + * that the batched lookup starts from the beginning of the map. + * + * The *keys* and *values* are output parameters which must point to memory large enough to + * hold *count* items based on the key and value size of the map *map_fd*. The *keys* + * buffer must be of *key_size* * *count*. The *values* buffer must be of + * *value_size* * *count*. + * + * @param fd BPF map file descriptor + * @param in_batch address of the first element in batch to read, can pass NULL to + * indicate that the batched lookup starts from the beginning of the map. + * @param out_batch output parameter that should be passed to next call as *in_batch* + * @param keys pointer to an array large enough for *count* keys + * @param values pointer to an array large enough for *count* values + * @param count input and output parameter; on input it's the number of elements + * in the map to read in batch; on output it's the number of elements that were + * successfully read. + * If a non-EFAULT error is returned, count will be set as the number of elements + * that were read before the error occurred. + * If EFAULT is returned, **count** should not be trusted to be correct. + * @param opts options for configuring the way the batch lookup works + * @return 0, on success; negative error code, otherwise (errno is also set to + * the error code) + */ LIBBPF_API int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch, void *keys, void *values, __u32 *count, const struct bpf_map_batch_opts *opts); + +/** + * @brief **bpf_map_lookup_and_delete_batch()** allows for batch lookup and deletion + * of BPF map elements where each element is deleted after being retrieved. + * + * @param fd BPF map file descriptor + * @param in_batch address of the first element in batch to read, can pass NULL to + * get address of the first element in *out_batch* + * @param out_batch output parameter that should be passed to next call as *in_batch* + * @param keys pointer to an array of *count* keys + * @param values pointer to an array large enough for *count* values + * @param count input and output parameter; on input it's the number of elements + * in the map to read and delete in batch; on output it represents the number of + * elements that were successfully read and deleted + * If a non-**EFAULT** error code is returned and if the output **count** value + * is not equal to the input **count** value, up to **count** elements may + * have been deleted. + * if **EFAULT** is returned up to *count* elements may have been deleted without + * being returned via the *keys* and *values* output parameters. + * @param opts options for configuring the way the batch lookup and delete works + * @return 0, on success; negative error code, otherwise (errno is also set to + * the error code) + */ LIBBPF_API int bpf_map_lookup_and_delete_batch(int fd, void *in_batch, void *out_batch, void *keys, void *values, __u32 *count, const struct bpf_map_batch_opts *opts); -LIBBPF_API int bpf_map_update_batch(int fd, void *keys, void *values, + +/** + * @brief **bpf_map_update_batch()** updates multiple elements in a map + * by specifying keys and their corresponding values. + * + * The *keys* and *values* parameters must point to memory large enough + * to hold *count* items based on the key and value size of the map. + * + * The *opts* parameter can be used to control how *bpf_map_update_batch()* + * should handle keys that either do or do not already exist in the map. + * In particular the *flags* parameter of *bpf_map_batch_opts* can be + * one of the following: + * + * Note that *count* is an input and output parameter, where on output it + * represents how many elements were successfully updated. Also note that if + * **EFAULT** then *count* should not be trusted to be correct. + * + * **BPF_ANY** + * Create new elements or update existing. + * + * **BPF_NOEXIST** + * Create new elements only if they do not exist. + * + * **BPF_EXIST** + * Update existing elements. + * + * **BPF_F_LOCK** + * Update spin_lock-ed map elements. This must be + * specified if the map value contains a spinlock. + * + * @param fd BPF map file descriptor + * @param keys pointer to an array of *count* keys + * @param values pointer to an array of *count* values + * @param count input and output parameter; on input it's the number of elements + * in the map to update in batch; on output if a non-EFAULT error is returned, + * **count** represents the number of updated elements if the output **count** + * value is not equal to the input **count** value. + * If EFAULT is returned, **count** should not be trusted to be correct. + * @param opts options for configuring the way the batch update works + * @return 0, on success; negative error code, otherwise (errno is also set to + * the error code) + */ +LIBBPF_API int bpf_map_update_batch(int fd, const void *keys, const void *values, __u32 *count, const struct bpf_map_batch_opts *opts); @@ -243,8 +471,6 @@ LIBBPF_API int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt); LIBBPF_API int bpf_raw_tracepoint_open(const char *name, int prog_fd); -LIBBPF_API int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, - __u32 log_buf_size, bool do_log); LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len, __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset, __u64 *probe_addr); diff --git a/tools/lib/bpf/bpf_gen_internal.h b/tools/lib/bpf/bpf_gen_internal.h index d26e5472fe50..223308931d55 100644 --- a/tools/lib/bpf/bpf_gen_internal.h +++ b/tools/lib/bpf/bpf_gen_internal.h @@ -3,6 +3,8 @@ #ifndef __BPF_GEN_INTERNAL_H #define __BPF_GEN_INTERNAL_H +#include "bpf.h" + struct ksym_relo_desc { const char *name; int kind; @@ -37,6 +39,8 @@ struct bpf_gen { int error; struct ksym_relo_desc *relos; int relo_cnt; + struct bpf_core_relo *core_relos; + int core_relo_cnt; char attach_target[128]; int attach_kind; struct ksym_desc *ksyms; @@ -45,17 +49,24 @@ struct bpf_gen { int nr_fd_array; }; -void bpf_gen__init(struct bpf_gen *gen, int log_level); -int bpf_gen__finish(struct bpf_gen *gen); +void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps); +int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps); void bpf_gen__free(struct bpf_gen *gen); void bpf_gen__load_btf(struct bpf_gen *gen, const void *raw_data, __u32 raw_size); -void bpf_gen__map_create(struct bpf_gen *gen, struct bpf_create_map_params *map_attr, int map_idx); -struct bpf_prog_load_params; -void bpf_gen__prog_load(struct bpf_gen *gen, struct bpf_prog_load_params *load_attr, int prog_idx); +void bpf_gen__map_create(struct bpf_gen *gen, + enum bpf_map_type map_type, const char *map_name, + __u32 key_size, __u32 value_size, __u32 max_entries, + struct bpf_map_create_opts *map_attr, int map_idx); +void bpf_gen__prog_load(struct bpf_gen *gen, + enum bpf_prog_type prog_type, const char *prog_name, + const char *license, struct bpf_insn *insns, size_t insn_cnt, + struct bpf_prog_load_opts *load_attr, int prog_idx); void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *value, __u32 value_size); void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx); void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *name, enum bpf_attach_type type); void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak, bool is_typeless, int kind, int insn_idx); +void bpf_gen__record_relo_core(struct bpf_gen *gen, const struct bpf_core_relo *core_relo); +void bpf_gen__populate_outer_map(struct bpf_gen *gen, int outer_map_idx, int key, int inner_map_idx); #endif diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index db05a5937105..90f56b0f585f 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -66,277 +66,204 @@ #if defined(__KERNEL__) || defined(__VMLINUX_H__) -#define PT_REGS_PARM1(x) ((x)->di) -#define PT_REGS_PARM2(x) ((x)->si) -#define PT_REGS_PARM3(x) ((x)->dx) -#define PT_REGS_PARM4(x) ((x)->cx) -#define PT_REGS_PARM5(x) ((x)->r8) -#define PT_REGS_RET(x) ((x)->sp) -#define PT_REGS_FP(x) ((x)->bp) -#define PT_REGS_RC(x) ((x)->ax) -#define PT_REGS_SP(x) ((x)->sp) -#define PT_REGS_IP(x) ((x)->ip) - -#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), di) -#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), si) -#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), dx) -#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), cx) -#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), r8) -#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), sp) -#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), bp) -#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), ax) -#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), sp) -#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), ip) +#define __PT_PARM1_REG di +#define __PT_PARM2_REG si +#define __PT_PARM3_REG dx +#define __PT_PARM4_REG cx +#define __PT_PARM5_REG r8 +#define __PT_RET_REG sp +#define __PT_FP_REG bp +#define __PT_RC_REG ax +#define __PT_SP_REG sp +#define __PT_IP_REG ip #else #ifdef __i386__ -/* i386 kernel is built with -mregparm=3 */ -#define PT_REGS_PARM1(x) ((x)->eax) -#define PT_REGS_PARM2(x) ((x)->edx) -#define PT_REGS_PARM3(x) ((x)->ecx) -#define PT_REGS_PARM4(x) 0 -#define PT_REGS_PARM5(x) 0 -#define PT_REGS_RET(x) ((x)->esp) -#define PT_REGS_FP(x) ((x)->ebp) -#define PT_REGS_RC(x) ((x)->eax) -#define PT_REGS_SP(x) ((x)->esp) -#define PT_REGS_IP(x) ((x)->eip) - -#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), eax) -#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), edx) -#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), ecx) -#define PT_REGS_PARM4_CORE(x) 0 -#define PT_REGS_PARM5_CORE(x) 0 -#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), esp) -#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), ebp) -#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), eax) -#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), esp) -#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), eip) - -#else -#define PT_REGS_PARM1(x) ((x)->rdi) -#define PT_REGS_PARM2(x) ((x)->rsi) -#define PT_REGS_PARM3(x) ((x)->rdx) -#define PT_REGS_PARM4(x) ((x)->rcx) -#define PT_REGS_PARM5(x) ((x)->r8) -#define PT_REGS_RET(x) ((x)->rsp) -#define PT_REGS_FP(x) ((x)->rbp) -#define PT_REGS_RC(x) ((x)->rax) -#define PT_REGS_SP(x) ((x)->rsp) -#define PT_REGS_IP(x) ((x)->rip) - -#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), rdi) -#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), rsi) -#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), rdx) -#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), rcx) -#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), r8) -#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), rsp) -#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), rbp) -#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), rax) -#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), rsp) -#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), rip) - -#endif -#endif +#define __PT_PARM1_REG eax +#define __PT_PARM2_REG edx +#define __PT_PARM3_REG ecx +/* i386 kernel is built with -mregparm=3 */ +#define __PT_PARM4_REG __unsupported__ +#define __PT_PARM5_REG __unsupported__ +#define __PT_RET_REG esp +#define __PT_FP_REG ebp +#define __PT_RC_REG eax +#define __PT_SP_REG esp +#define __PT_IP_REG eip + +#else /* __i386__ */ + +#define __PT_PARM1_REG rdi +#define __PT_PARM2_REG rsi +#define __PT_PARM3_REG rdx +#define __PT_PARM4_REG rcx +#define __PT_PARM5_REG r8 +#define __PT_RET_REG rsp +#define __PT_FP_REG rbp +#define __PT_RC_REG rax +#define __PT_SP_REG rsp +#define __PT_IP_REG rip + +#endif /* __i386__ */ + +#endif /* __KERNEL__ || __VMLINUX_H__ */ #elif defined(bpf_target_s390) /* s390 provides user_pt_regs instead of struct pt_regs to userspace */ -struct pt_regs; -#define PT_REGS_S390 const volatile user_pt_regs -#define PT_REGS_PARM1(x) (((PT_REGS_S390 *)(x))->gprs[2]) -#define PT_REGS_PARM2(x) (((PT_REGS_S390 *)(x))->gprs[3]) -#define PT_REGS_PARM3(x) (((PT_REGS_S390 *)(x))->gprs[4]) -#define PT_REGS_PARM4(x) (((PT_REGS_S390 *)(x))->gprs[5]) -#define PT_REGS_PARM5(x) (((PT_REGS_S390 *)(x))->gprs[6]) -#define PT_REGS_RET(x) (((PT_REGS_S390 *)(x))->gprs[14]) -/* Works only with CONFIG_FRAME_POINTER */ -#define PT_REGS_FP(x) (((PT_REGS_S390 *)(x))->gprs[11]) -#define PT_REGS_RC(x) (((PT_REGS_S390 *)(x))->gprs[2]) -#define PT_REGS_SP(x) (((PT_REGS_S390 *)(x))->gprs[15]) -#define PT_REGS_IP(x) (((PT_REGS_S390 *)(x))->psw.addr) - -#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[2]) -#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[3]) -#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[4]) -#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[5]) -#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[6]) -#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[14]) -#define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[11]) -#define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[2]) -#define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[15]) -#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), psw.addr) +#define __PT_REGS_CAST(x) ((const user_pt_regs *)(x)) +#define __PT_PARM1_REG gprs[2] +#define __PT_PARM2_REG gprs[3] +#define __PT_PARM3_REG gprs[4] +#define __PT_PARM4_REG gprs[5] +#define __PT_PARM5_REG gprs[6] +#define __PT_RET_REG grps[14] +#define __PT_FP_REG gprs[11] /* Works only with CONFIG_FRAME_POINTER */ +#define __PT_RC_REG gprs[2] +#define __PT_SP_REG gprs[15] +#define __PT_IP_REG psw.addr #elif defined(bpf_target_arm) -#define PT_REGS_PARM1(x) ((x)->uregs[0]) -#define PT_REGS_PARM2(x) ((x)->uregs[1]) -#define PT_REGS_PARM3(x) ((x)->uregs[2]) -#define PT_REGS_PARM4(x) ((x)->uregs[3]) -#define PT_REGS_PARM5(x) ((x)->uregs[4]) -#define PT_REGS_RET(x) ((x)->uregs[14]) -#define PT_REGS_FP(x) ((x)->uregs[11]) /* Works only with CONFIG_FRAME_POINTER */ -#define PT_REGS_RC(x) ((x)->uregs[0]) -#define PT_REGS_SP(x) ((x)->uregs[13]) -#define PT_REGS_IP(x) ((x)->uregs[12]) - -#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), uregs[0]) -#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), uregs[1]) -#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), uregs[2]) -#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), uregs[3]) -#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), uregs[4]) -#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), uregs[14]) -#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), uregs[11]) -#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), uregs[0]) -#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), uregs[13]) -#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), uregs[12]) +#define __PT_PARM1_REG uregs[0] +#define __PT_PARM2_REG uregs[1] +#define __PT_PARM3_REG uregs[2] +#define __PT_PARM4_REG uregs[3] +#define __PT_PARM5_REG uregs[4] +#define __PT_RET_REG uregs[14] +#define __PT_FP_REG uregs[11] /* Works only with CONFIG_FRAME_POINTER */ +#define __PT_RC_REG uregs[0] +#define __PT_SP_REG uregs[13] +#define __PT_IP_REG uregs[12] #elif defined(bpf_target_arm64) /* arm64 provides struct user_pt_regs instead of struct pt_regs to userspace */ -struct pt_regs; -#define PT_REGS_ARM64 const volatile struct user_pt_regs -#define PT_REGS_PARM1(x) (((PT_REGS_ARM64 *)(x))->regs[0]) -#define PT_REGS_PARM2(x) (((PT_REGS_ARM64 *)(x))->regs[1]) -#define PT_REGS_PARM3(x) (((PT_REGS_ARM64 *)(x))->regs[2]) -#define PT_REGS_PARM4(x) (((PT_REGS_ARM64 *)(x))->regs[3]) -#define PT_REGS_PARM5(x) (((PT_REGS_ARM64 *)(x))->regs[4]) -#define PT_REGS_RET(x) (((PT_REGS_ARM64 *)(x))->regs[30]) -/* Works only with CONFIG_FRAME_POINTER */ -#define PT_REGS_FP(x) (((PT_REGS_ARM64 *)(x))->regs[29]) -#define PT_REGS_RC(x) (((PT_REGS_ARM64 *)(x))->regs[0]) -#define PT_REGS_SP(x) (((PT_REGS_ARM64 *)(x))->sp) -#define PT_REGS_IP(x) (((PT_REGS_ARM64 *)(x))->pc) - -#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[0]) -#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[1]) -#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[2]) -#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[3]) -#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[4]) -#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[30]) -#define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[29]) -#define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[0]) -#define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), sp) -#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), pc) +#define __PT_REGS_CAST(x) ((const struct user_pt_regs *)(x)) +#define __PT_PARM1_REG regs[0] +#define __PT_PARM2_REG regs[1] +#define __PT_PARM3_REG regs[2] +#define __PT_PARM4_REG regs[3] +#define __PT_PARM5_REG regs[4] +#define __PT_RET_REG regs[30] +#define __PT_FP_REG regs[29] /* Works only with CONFIG_FRAME_POINTER */ +#define __PT_RC_REG regs[0] +#define __PT_SP_REG sp +#define __PT_IP_REG pc #elif defined(bpf_target_mips) -#define PT_REGS_PARM1(x) ((x)->regs[4]) -#define PT_REGS_PARM2(x) ((x)->regs[5]) -#define PT_REGS_PARM3(x) ((x)->regs[6]) -#define PT_REGS_PARM4(x) ((x)->regs[7]) -#define PT_REGS_PARM5(x) ((x)->regs[8]) -#define PT_REGS_RET(x) ((x)->regs[31]) -#define PT_REGS_FP(x) ((x)->regs[30]) /* Works only with CONFIG_FRAME_POINTER */ -#define PT_REGS_RC(x) ((x)->regs[2]) -#define PT_REGS_SP(x) ((x)->regs[29]) -#define PT_REGS_IP(x) ((x)->cp0_epc) - -#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), regs[4]) -#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), regs[5]) -#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), regs[6]) -#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), regs[7]) -#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), regs[8]) -#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), regs[31]) -#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), regs[30]) -#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), regs[2]) -#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), regs[29]) -#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), cp0_epc) +#define __PT_PARM1_REG regs[4] +#define __PT_PARM2_REG regs[5] +#define __PT_PARM3_REG regs[6] +#define __PT_PARM4_REG regs[7] +#define __PT_PARM5_REG regs[8] +#define __PT_RET_REG regs[31] +#define __PT_FP_REG regs[30] /* Works only with CONFIG_FRAME_POINTER */ +#define __PT_RC_REG regs[2] +#define __PT_SP_REG regs[29] +#define __PT_IP_REG cp0_epc #elif defined(bpf_target_powerpc) -#define PT_REGS_PARM1(x) ((x)->gpr[3]) -#define PT_REGS_PARM2(x) ((x)->gpr[4]) -#define PT_REGS_PARM3(x) ((x)->gpr[5]) -#define PT_REGS_PARM4(x) ((x)->gpr[6]) -#define PT_REGS_PARM5(x) ((x)->gpr[7]) -#define PT_REGS_RC(x) ((x)->gpr[3]) -#define PT_REGS_SP(x) ((x)->sp) -#define PT_REGS_IP(x) ((x)->nip) - -#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), gpr[3]) -#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), gpr[4]) -#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), gpr[5]) -#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), gpr[6]) -#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), gpr[7]) -#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), gpr[3]) -#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), sp) -#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), nip) +#define __PT_PARM1_REG gpr[3] +#define __PT_PARM2_REG gpr[4] +#define __PT_PARM3_REG gpr[5] +#define __PT_PARM4_REG gpr[6] +#define __PT_PARM5_REG gpr[7] +#define __PT_RET_REG regs[31] +#define __PT_FP_REG __unsupported__ +#define __PT_RC_REG gpr[3] +#define __PT_SP_REG sp +#define __PT_IP_REG nip #elif defined(bpf_target_sparc) -#define PT_REGS_PARM1(x) ((x)->u_regs[UREG_I0]) -#define PT_REGS_PARM2(x) ((x)->u_regs[UREG_I1]) -#define PT_REGS_PARM3(x) ((x)->u_regs[UREG_I2]) -#define PT_REGS_PARM4(x) ((x)->u_regs[UREG_I3]) -#define PT_REGS_PARM5(x) ((x)->u_regs[UREG_I4]) -#define PT_REGS_RET(x) ((x)->u_regs[UREG_I7]) -#define PT_REGS_RC(x) ((x)->u_regs[UREG_I0]) -#define PT_REGS_SP(x) ((x)->u_regs[UREG_FP]) - -#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I0]) -#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I1]) -#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I2]) -#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I3]) -#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I4]) -#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I7]) -#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I0]) -#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), u_regs[UREG_FP]) - +#define __PT_PARM1_REG u_regs[UREG_I0] +#define __PT_PARM2_REG u_regs[UREG_I1] +#define __PT_PARM3_REG u_regs[UREG_I2] +#define __PT_PARM4_REG u_regs[UREG_I3] +#define __PT_PARM5_REG u_regs[UREG_I4] +#define __PT_RET_REG u_regs[UREG_I7] +#define __PT_FP_REG __unsupported__ +#define __PT_RC_REG u_regs[UREG_I0] +#define __PT_SP_REG u_regs[UREG_FP] /* Should this also be a bpf_target check for the sparc case? */ #if defined(__arch64__) -#define PT_REGS_IP(x) ((x)->tpc) -#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), tpc) +#define __PT_IP_REG tpc #else -#define PT_REGS_IP(x) ((x)->pc) -#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), pc) +#define __PT_IP_REG pc #endif #elif defined(bpf_target_riscv) +#define __PT_REGS_CAST(x) ((const struct user_regs_struct *)(x)) +#define __PT_PARM1_REG a0 +#define __PT_PARM2_REG a1 +#define __PT_PARM3_REG a2 +#define __PT_PARM4_REG a3 +#define __PT_PARM5_REG a4 +#define __PT_RET_REG ra +#define __PT_FP_REG fp +#define __PT_RC_REG a5 +#define __PT_SP_REG sp +#define __PT_IP_REG epc + +#endif + +#if defined(bpf_target_defined) + struct pt_regs; -#define PT_REGS_RV const volatile struct user_regs_struct -#define PT_REGS_PARM1(x) (((PT_REGS_RV *)(x))->a0) -#define PT_REGS_PARM2(x) (((PT_REGS_RV *)(x))->a1) -#define PT_REGS_PARM3(x) (((PT_REGS_RV *)(x))->a2) -#define PT_REGS_PARM4(x) (((PT_REGS_RV *)(x))->a3) -#define PT_REGS_PARM5(x) (((PT_REGS_RV *)(x))->a4) -#define PT_REGS_RET(x) (((PT_REGS_RV *)(x))->ra) -#define PT_REGS_FP(x) (((PT_REGS_RV *)(x))->s5) -#define PT_REGS_RC(x) (((PT_REGS_RV *)(x))->a5) -#define PT_REGS_SP(x) (((PT_REGS_RV *)(x))->sp) -#define PT_REGS_IP(x) (((PT_REGS_RV *)(x))->epc) - -#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a0) -#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a1) -#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a2) -#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a3) -#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a4) -#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), ra) -#define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), fp) -#define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a5) -#define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), sp) -#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), epc) +/* allow some architecutres to override `struct pt_regs` */ +#ifndef __PT_REGS_CAST +#define __PT_REGS_CAST(x) (x) #endif +#define PT_REGS_PARM1(x) (__PT_REGS_CAST(x)->__PT_PARM1_REG) +#define PT_REGS_PARM2(x) (__PT_REGS_CAST(x)->__PT_PARM2_REG) +#define PT_REGS_PARM3(x) (__PT_REGS_CAST(x)->__PT_PARM3_REG) +#define PT_REGS_PARM4(x) (__PT_REGS_CAST(x)->__PT_PARM4_REG) +#define PT_REGS_PARM5(x) (__PT_REGS_CAST(x)->__PT_PARM5_REG) +#define PT_REGS_RET(x) (__PT_REGS_CAST(x)->__PT_RET_REG) +#define PT_REGS_FP(x) (__PT_REGS_CAST(x)->__PT_FP_REG) +#define PT_REGS_RC(x) (__PT_REGS_CAST(x)->__PT_RC_REG) +#define PT_REGS_SP(x) (__PT_REGS_CAST(x)->__PT_SP_REG) +#define PT_REGS_IP(x) (__PT_REGS_CAST(x)->__PT_IP_REG) + +#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM1_REG) +#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM2_REG) +#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM3_REG) +#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM4_REG) +#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM5_REG) +#define PT_REGS_RET_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_RET_REG) +#define PT_REGS_FP_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_FP_REG) +#define PT_REGS_RC_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_RC_REG) +#define PT_REGS_SP_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_SP_REG) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_IP_REG) + #if defined(bpf_target_powerpc) + #define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = (ctx)->link; }) #define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP + #elif defined(bpf_target_sparc) + #define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = PT_REGS_RET(ctx); }) #define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP -#elif defined(bpf_target_defined) + +#else + #define BPF_KPROBE_READ_RET_IP(ip, ctx) \ ({ bpf_probe_read_kernel(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); }) #define BPF_KRETPROBE_READ_RET_IP(ip, ctx) \ - ({ bpf_probe_read_kernel(&(ip), sizeof(ip), \ - (void *)(PT_REGS_FP(ctx) + sizeof(ip))); }) + ({ bpf_probe_read_kernel(&(ip), sizeof(ip), (void *)(PT_REGS_FP(ctx) + sizeof(ip))); }) + #endif -#if !defined(bpf_target_defined) +#else /* defined(bpf_target_defined) */ #define PT_REGS_PARM1(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) #define PT_REGS_PARM2(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) @@ -363,7 +290,7 @@ struct pt_regs; #define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) #define BPF_KRETPROBE_READ_RET_IP(ip, ctx) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) -#endif /* !defined(bpf_target_defined) */ +#endif /* defined(bpf_target_defined) */ #ifndef ___bpf_concat #define ___bpf_concat(a, b) a ## b @@ -375,25 +302,23 @@ struct pt_regs; #define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N #endif #ifndef ___bpf_narg -#define ___bpf_narg(...) \ - ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) +#define ___bpf_narg(...) ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) #endif -#define ___bpf_ctx_cast0() ctx -#define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0] -#define ___bpf_ctx_cast2(x, args...) ___bpf_ctx_cast1(args), (void *)ctx[1] -#define ___bpf_ctx_cast3(x, args...) ___bpf_ctx_cast2(args), (void *)ctx[2] -#define ___bpf_ctx_cast4(x, args...) ___bpf_ctx_cast3(args), (void *)ctx[3] -#define ___bpf_ctx_cast5(x, args...) ___bpf_ctx_cast4(args), (void *)ctx[4] -#define ___bpf_ctx_cast6(x, args...) ___bpf_ctx_cast5(args), (void *)ctx[5] -#define ___bpf_ctx_cast7(x, args...) ___bpf_ctx_cast6(args), (void *)ctx[6] -#define ___bpf_ctx_cast8(x, args...) ___bpf_ctx_cast7(args), (void *)ctx[7] -#define ___bpf_ctx_cast9(x, args...) ___bpf_ctx_cast8(args), (void *)ctx[8] +#define ___bpf_ctx_cast0() ctx +#define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0] +#define ___bpf_ctx_cast2(x, args...) ___bpf_ctx_cast1(args), (void *)ctx[1] +#define ___bpf_ctx_cast3(x, args...) ___bpf_ctx_cast2(args), (void *)ctx[2] +#define ___bpf_ctx_cast4(x, args...) ___bpf_ctx_cast3(args), (void *)ctx[3] +#define ___bpf_ctx_cast5(x, args...) ___bpf_ctx_cast4(args), (void *)ctx[4] +#define ___bpf_ctx_cast6(x, args...) ___bpf_ctx_cast5(args), (void *)ctx[5] +#define ___bpf_ctx_cast7(x, args...) ___bpf_ctx_cast6(args), (void *)ctx[6] +#define ___bpf_ctx_cast8(x, args...) ___bpf_ctx_cast7(args), (void *)ctx[7] +#define ___bpf_ctx_cast9(x, args...) ___bpf_ctx_cast8(args), (void *)ctx[8] #define ___bpf_ctx_cast10(x, args...) ___bpf_ctx_cast9(args), (void *)ctx[9] #define ___bpf_ctx_cast11(x, args...) ___bpf_ctx_cast10(args), (void *)ctx[10] #define ___bpf_ctx_cast12(x, args...) ___bpf_ctx_cast11(args), (void *)ctx[11] -#define ___bpf_ctx_cast(args...) \ - ___bpf_apply(___bpf_ctx_cast, ___bpf_narg(args))(args) +#define ___bpf_ctx_cast(args...) ___bpf_apply(___bpf_ctx_cast, ___bpf_narg(args))(args) /* * BPF_PROG is a convenience wrapper for generic tp_btf/fentry/fexit and @@ -426,19 +351,13 @@ ____##name(unsigned long long *ctx, ##args) struct pt_regs; -#define ___bpf_kprobe_args0() ctx -#define ___bpf_kprobe_args1(x) \ - ___bpf_kprobe_args0(), (void *)PT_REGS_PARM1(ctx) -#define ___bpf_kprobe_args2(x, args...) \ - ___bpf_kprobe_args1(args), (void *)PT_REGS_PARM2(ctx) -#define ___bpf_kprobe_args3(x, args...) \ - ___bpf_kprobe_args2(args), (void *)PT_REGS_PARM3(ctx) -#define ___bpf_kprobe_args4(x, args...) \ - ___bpf_kprobe_args3(args), (void *)PT_REGS_PARM4(ctx) -#define ___bpf_kprobe_args5(x, args...) \ - ___bpf_kprobe_args4(args), (void *)PT_REGS_PARM5(ctx) -#define ___bpf_kprobe_args(args...) \ - ___bpf_apply(___bpf_kprobe_args, ___bpf_narg(args))(args) +#define ___bpf_kprobe_args0() ctx +#define ___bpf_kprobe_args1(x) ___bpf_kprobe_args0(), (void *)PT_REGS_PARM1(ctx) +#define ___bpf_kprobe_args2(x, args...) ___bpf_kprobe_args1(args), (void *)PT_REGS_PARM2(ctx) +#define ___bpf_kprobe_args3(x, args...) ___bpf_kprobe_args2(args), (void *)PT_REGS_PARM3(ctx) +#define ___bpf_kprobe_args4(x, args...) ___bpf_kprobe_args3(args), (void *)PT_REGS_PARM4(ctx) +#define ___bpf_kprobe_args5(x, args...) ___bpf_kprobe_args4(args), (void *)PT_REGS_PARM5(ctx) +#define ___bpf_kprobe_args(args...) ___bpf_apply(___bpf_kprobe_args, ___bpf_narg(args))(args) /* * BPF_KPROBE serves the same purpose for kprobes as BPF_PROG for @@ -464,11 +383,9 @@ typeof(name(0)) name(struct pt_regs *ctx) \ static __attribute__((always_inline)) typeof(name(0)) \ ____##name(struct pt_regs *ctx, ##args) -#define ___bpf_kretprobe_args0() ctx -#define ___bpf_kretprobe_args1(x) \ - ___bpf_kretprobe_args0(), (void *)PT_REGS_RC(ctx) -#define ___bpf_kretprobe_args(args...) \ - ___bpf_apply(___bpf_kretprobe_args, ___bpf_narg(args))(args) +#define ___bpf_kretprobe_args0() ctx +#define ___bpf_kretprobe_args1(x) ___bpf_kretprobe_args0(), (void *)PT_REGS_RC(ctx) +#define ___bpf_kretprobe_args(args...) ___bpf_apply(___bpf_kretprobe_args, ___bpf_narg(args))(args) /* * BPF_KRETPROBE is similar to BPF_KPROBE, except, it only provides optional diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 7e4c5586bd87..9aa19c89f758 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -299,6 +299,7 @@ static int btf_type_size(const struct btf_type *t) case BTF_KIND_TYPEDEF: case BTF_KIND_FUNC: case BTF_KIND_FLOAT: + case BTF_KIND_TYPE_TAG: return base_size; case BTF_KIND_INT: return base_size + sizeof(__u32); @@ -349,6 +350,7 @@ static int btf_bswap_type_rest(struct btf_type *t) case BTF_KIND_TYPEDEF: case BTF_KIND_FUNC: case BTF_KIND_FLOAT: + case BTF_KIND_TYPE_TAG: return 0; case BTF_KIND_INT: *(__u32 *)(t + 1) = bswap_32(*(__u32 *)(t + 1)); @@ -452,7 +454,7 @@ const struct btf *btf__base_btf(const struct btf *btf) } /* internal helper returning non-const pointer to a type */ -struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id) +struct btf_type *btf_type_by_id(const struct btf *btf, __u32 type_id) { if (type_id == 0) return &btf_void; @@ -608,6 +610,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id) case BTF_KIND_RESTRICT: case BTF_KIND_VAR: case BTF_KIND_DECL_TAG: + case BTF_KIND_TYPE_TAG: type_id = t->type; break; case BTF_KIND_ARRAY: @@ -649,6 +652,7 @@ int btf__align_of(const struct btf *btf, __u32 id) case BTF_KIND_VOLATILE: case BTF_KIND_CONST: case BTF_KIND_RESTRICT: + case BTF_KIND_TYPE_TAG: return btf__align_of(btf, t->type); case BTF_KIND_ARRAY: return btf__align_of(btf, btf_array(t)->type); @@ -1120,54 +1124,86 @@ struct btf *btf__parse_split(const char *path, struct btf *base_btf) static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian); -int btf__load_into_kernel(struct btf *btf) +int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level) { - __u32 log_buf_size = 0, raw_size; - char *log_buf = NULL; + LIBBPF_OPTS(bpf_btf_load_opts, opts); + __u32 buf_sz = 0, raw_size; + char *buf = NULL, *tmp; void *raw_data; int err = 0; if (btf->fd >= 0) return libbpf_err(-EEXIST); + if (log_sz && !log_buf) + return libbpf_err(-EINVAL); -retry_load: - if (log_buf_size) { - log_buf = malloc(log_buf_size); - if (!log_buf) - return libbpf_err(-ENOMEM); - - *log_buf = 0; - } - + /* cache native raw data representation */ raw_data = btf_get_raw_data(btf, &raw_size, false); if (!raw_data) { err = -ENOMEM; goto done; } - /* cache native raw data representation */ btf->raw_size = raw_size; btf->raw_data = raw_data; - btf->fd = bpf_load_btf(raw_data, raw_size, log_buf, log_buf_size, false); +retry_load: + /* if log_level is 0, we won't provide log_buf/log_size to the kernel, + * initially. Only if BTF loading fails, we bump log_level to 1 and + * retry, using either auto-allocated or custom log_buf. This way + * non-NULL custom log_buf provides a buffer just in case, but hopes + * for successful load and no need for log_buf. + */ + if (log_level) { + /* if caller didn't provide custom log_buf, we'll keep + * allocating our own progressively bigger buffers for BTF + * verification log + */ + if (!log_buf) { + buf_sz = max((__u32)BPF_LOG_BUF_SIZE, buf_sz * 2); + tmp = realloc(buf, buf_sz); + if (!tmp) { + err = -ENOMEM; + goto done; + } + buf = tmp; + buf[0] = '\0'; + } + + opts.log_buf = log_buf ? log_buf : buf; + opts.log_size = log_buf ? log_sz : buf_sz; + opts.log_level = log_level; + } + + btf->fd = bpf_btf_load(raw_data, raw_size, &opts); if (btf->fd < 0) { - if (!log_buf || errno == ENOSPC) { - log_buf_size = max((__u32)BPF_LOG_BUF_SIZE, - log_buf_size << 1); - free(log_buf); + /* time to turn on verbose mode and try again */ + if (log_level == 0) { + log_level = 1; goto retry_load; } + /* only retry if caller didn't provide custom log_buf, but + * make sure we can never overflow buf_sz + */ + if (!log_buf && errno == ENOSPC && buf_sz <= UINT_MAX / 2) + goto retry_load; err = -errno; - pr_warn("Error loading BTF: %s(%d)\n", strerror(errno), errno); - if (*log_buf) - pr_warn("%s\n", log_buf); - goto done; + pr_warn("BTF loading error: %d\n", err); + /* don't print out contents of custom log_buf */ + if (!log_buf && buf[0]) + pr_warn("-- BEGIN BTF LOAD LOG ---\n%s\n-- END BTF LOAD LOG --\n", buf); } done: - free(log_buf); + free(buf); return libbpf_err(err); } + +int btf__load_into_kernel(struct btf *btf) +{ + return btf_load_into_kernel(btf, NULL, 0, 0); +} + int btf__load(struct btf *) __attribute__((alias("btf__load_into_kernel"))); int btf__fd(const struct btf *btf) @@ -2236,6 +2272,22 @@ int btf__add_restrict(struct btf *btf, int ref_type_id) } /* + * Append new BTF_KIND_TYPE_TAG type with: + * - *value*, non-empty/non-NULL tag value; + * - *ref_type_id* - referenced type ID, it might not exist yet; + * Returns: + * - >0, type ID of newly added BTF type; + * - <0, on error. + */ +int btf__add_type_tag(struct btf *btf, const char *value, int ref_type_id) +{ + if (!value|| !value[0]) + return libbpf_err(-EINVAL); + + return btf_add_ref_kind(btf, BTF_KIND_TYPE_TAG, value, ref_type_id); +} + +/* * Append new BTF_KIND_FUNC type with: * - *name*, non-empty/non-NULL name; * - *proto_type_id* - FUNC_PROTO's type ID, it might not exist yet; @@ -2711,15 +2763,11 @@ void btf_ext__free(struct btf_ext *btf_ext) free(btf_ext); } -struct btf_ext *btf_ext__new(__u8 *data, __u32 size) +struct btf_ext *btf_ext__new(const __u8 *data, __u32 size) { struct btf_ext *btf_ext; int err; - err = btf_ext_parse_hdr(data, size); - if (err) - return libbpf_err_ptr(err); - btf_ext = calloc(1, sizeof(struct btf_ext)); if (!btf_ext) return libbpf_err_ptr(-ENOMEM); @@ -2732,6 +2780,10 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size) } memcpy(btf_ext->data, data, size); + err = btf_ext_parse_hdr(btf_ext->data, size); + if (err) + goto done; + if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, line_info_len)) { err = -EINVAL; goto done; @@ -2846,8 +2898,7 @@ __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext) struct btf_dedup; -static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, - const struct btf_dedup_opts *opts); +static struct btf_dedup *btf_dedup_new(struct btf *btf, const struct btf_dedup_opts *opts); static void btf_dedup_free(struct btf_dedup *d); static int btf_dedup_prep(struct btf_dedup *d); static int btf_dedup_strings(struct btf_dedup *d); @@ -2994,12 +3045,17 @@ static int btf_dedup_remap_types(struct btf_dedup *d); * deduplicating structs/unions is described in greater details in comments for * `btf_dedup_is_equiv` function. */ -int btf__dedup(struct btf *btf, struct btf_ext *btf_ext, - const struct btf_dedup_opts *opts) + +DEFAULT_VERSION(btf__dedup_v0_6_0, btf__dedup, LIBBPF_0.6.0) +int btf__dedup_v0_6_0(struct btf *btf, const struct btf_dedup_opts *opts) { - struct btf_dedup *d = btf_dedup_new(btf, btf_ext, opts); + struct btf_dedup *d; int err; + if (!OPTS_VALID(opts, btf_dedup_opts)) + return libbpf_err(-EINVAL); + + d = btf_dedup_new(btf, opts); if (IS_ERR(d)) { pr_debug("btf_dedup_new failed: %ld", PTR_ERR(d)); return libbpf_err(-EINVAL); @@ -3051,6 +3107,19 @@ done: return libbpf_err(err); } +COMPAT_VERSION(btf__dedup_deprecated, btf__dedup, LIBBPF_0.0.2) +int btf__dedup_deprecated(struct btf *btf, struct btf_ext *btf_ext, const void *unused_opts) +{ + LIBBPF_OPTS(btf_dedup_opts, opts, .btf_ext = btf_ext); + + if (unused_opts) { + pr_warn("please use new version of btf__dedup() that supports options\n"); + return libbpf_err(-ENOTSUP); + } + + return btf__dedup(btf, &opts); +} + #define BTF_UNPROCESSED_ID ((__u32)-1) #define BTF_IN_PROGRESS_ID ((__u32)-2) @@ -3163,8 +3232,7 @@ static bool btf_dedup_equal_fn(const void *k1, const void *k2, void *ctx) return k1 == k2; } -static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, - const struct btf_dedup_opts *opts) +static struct btf_dedup *btf_dedup_new(struct btf *btf, const struct btf_dedup_opts *opts) { struct btf_dedup *d = calloc(1, sizeof(struct btf_dedup)); hashmap_hash_fn hash_fn = btf_dedup_identity_hash_fn; @@ -3173,13 +3241,11 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, if (!d) return ERR_PTR(-ENOMEM); - d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds; - /* dedup_table_size is now used only to force collisions in tests */ - if (opts && opts->dedup_table_size == 1) + if (OPTS_GET(opts, force_collisions, false)) hash_fn = btf_dedup_collision_hash_fn; d->btf = btf; - d->btf_ext = btf_ext; + d->btf_ext = OPTS_GET(opts, btf_ext, NULL); d->dedup_table = hashmap__new(hash_fn, btf_dedup_equal_fn, NULL); if (IS_ERR(d->dedup_table)) { @@ -3443,8 +3509,8 @@ static long btf_hash_struct(struct btf_type *t) } /* - * Check structural compatibility of two FUNC_PROTOs, ignoring referenced type - * IDs. This check is performed during type graph equivalence check and + * Check structural compatibility of two STRUCTs/UNIONs, ignoring referenced + * type IDs. This check is performed during type graph equivalence check and * referenced types equivalence is checked separately. */ static bool btf_shallow_equal_struct(struct btf_type *t1, struct btf_type *t2) @@ -3625,6 +3691,7 @@ static int btf_dedup_prep(struct btf_dedup *d) case BTF_KIND_TYPEDEF: case BTF_KIND_FUNC: case BTF_KIND_FLOAT: + case BTF_KIND_TYPE_TAG: h = btf_hash_common(t); break; case BTF_KIND_INT: @@ -3685,6 +3752,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id) case BTF_KIND_VAR: case BTF_KIND_DATASEC: case BTF_KIND_DECL_TAG: + case BTF_KIND_TYPE_TAG: return 0; case BTF_KIND_INT: @@ -3708,8 +3776,6 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id) new_id = cand_id; break; } - if (d->opts.dont_resolve_fwds) - continue; if (btf_compat_enum(t, cand)) { if (btf_is_enum_fwd(t)) { /* resolve fwd to full enum */ @@ -3817,6 +3883,31 @@ static int btf_dedup_identical_arrays(struct btf_dedup *d, __u32 id1, __u32 id2) return btf_equal_array(t1, t2); } +/* Check if given two types are identical STRUCT/UNION definitions */ +static bool btf_dedup_identical_structs(struct btf_dedup *d, __u32 id1, __u32 id2) +{ + const struct btf_member *m1, *m2; + struct btf_type *t1, *t2; + int n, i; + + t1 = btf_type_by_id(d->btf, id1); + t2 = btf_type_by_id(d->btf, id2); + + if (!btf_is_composite(t1) || btf_kind(t1) != btf_kind(t2)) + return false; + + if (!btf_shallow_equal_struct(t1, t2)) + return false; + + m1 = btf_members(t1); + m2 = btf_members(t2); + for (i = 0, n = btf_vlen(t1); i < n; i++, m1++, m2++) { + if (m1->type != m2->type) + return false; + } + return true; +} + /* * Check equivalence of BTF type graph formed by candidate struct/union (we'll * call it "candidate graph" in this description for brevity) to a type graph @@ -3928,6 +4019,8 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, hypot_type_id = d->hypot_map[canon_id]; if (hypot_type_id <= BTF_MAX_NR_TYPES) { + if (hypot_type_id == cand_id) + return 1; /* In some cases compiler will generate different DWARF types * for *identical* array type definitions and use them for * different fields within the *same* struct. This breaks type @@ -3936,8 +4029,18 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, * types within a single CU. So work around that by explicitly * allowing identical array types here. */ - return hypot_type_id == cand_id || - btf_dedup_identical_arrays(d, hypot_type_id, cand_id); + if (btf_dedup_identical_arrays(d, hypot_type_id, cand_id)) + return 1; + /* It turns out that similar situation can happen with + * struct/union sometimes, sigh... Handle the case where + * structs/unions are exactly the same, down to the referenced + * type IDs. Anything more complicated (e.g., if referenced + * types are different, but equivalent) is *way more* + * complicated and requires a many-to-many equivalence mapping. + */ + if (btf_dedup_identical_structs(d, hypot_type_id, cand_id)) + return 1; + return 0; } if (btf_dedup_hypot_map_add(d, canon_id, cand_id)) @@ -3952,8 +4055,7 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, return 0; /* FWD <--> STRUCT/UNION equivalence check, if enabled */ - if (!d->opts.dont_resolve_fwds - && (cand_kind == BTF_KIND_FWD || canon_kind == BTF_KIND_FWD) + if ((cand_kind == BTF_KIND_FWD || canon_kind == BTF_KIND_FWD) && cand_kind != canon_kind) { __u16 real_kind; __u16 fwd_kind; @@ -3979,10 +4081,7 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, return btf_equal_int_tag(cand_type, canon_type); case BTF_KIND_ENUM: - if (d->opts.dont_resolve_fwds) - return btf_equal_enum(cand_type, canon_type); - else - return btf_compat_enum(cand_type, canon_type); + return btf_compat_enum(cand_type, canon_type); case BTF_KIND_FWD: case BTF_KIND_FLOAT: @@ -3994,6 +4093,7 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, case BTF_KIND_PTR: case BTF_KIND_TYPEDEF: case BTF_KIND_FUNC: + case BTF_KIND_TYPE_TAG: if (cand_type->info != canon_type->info) return 0; return btf_dedup_is_equiv(d, cand_type->type, canon_type->type); @@ -4289,6 +4389,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id) case BTF_KIND_PTR: case BTF_KIND_TYPEDEF: case BTF_KIND_FUNC: + case BTF_KIND_TYPE_TAG: ref_type_id = btf_dedup_ref_type(d, t->type); if (ref_type_id < 0) return ref_type_id; @@ -4595,6 +4696,7 @@ int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ct case BTF_KIND_FUNC: case BTF_KIND_VAR: case BTF_KIND_DECL_TAG: + case BTF_KIND_TYPE_TAG: return visit(&t->type, ctx); case BTF_KIND_ARRAY: { diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index bc005ba3ceec..061839f04525 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -157,7 +157,7 @@ LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, __u32 expected_value_size, __u32 *key_type_id, __u32 *value_type_id); -LIBBPF_API struct btf_ext *btf_ext__new(__u8 *data, __u32 size); +LIBBPF_API struct btf_ext *btf_ext__new(const __u8 *data, __u32 size); LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext); LIBBPF_API const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size); @@ -227,6 +227,7 @@ LIBBPF_API int btf__add_typedef(struct btf *btf, const char *name, int ref_type_ LIBBPF_API int btf__add_volatile(struct btf *btf, int ref_type_id); LIBBPF_API int btf__add_const(struct btf *btf, int ref_type_id); LIBBPF_API int btf__add_restrict(struct btf *btf, int ref_type_id); +LIBBPF_API int btf__add_type_tag(struct btf *btf, const char *value, int ref_type_id); /* func and func_proto construction APIs */ LIBBPF_API int btf__add_func(struct btf *btf, const char *name, @@ -245,25 +246,86 @@ LIBBPF_API int btf__add_decl_tag(struct btf *btf, const char *value, int ref_typ int component_idx); struct btf_dedup_opts { - unsigned int dedup_table_size; - bool dont_resolve_fwds; + size_t sz; + /* optional .BTF.ext info to dedup along the main BTF info */ + struct btf_ext *btf_ext; + /* force hash collisions (used for testing) */ + bool force_collisions; + size_t :0; }; +#define btf_dedup_opts__last_field force_collisions + +LIBBPF_API int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts); + +LIBBPF_API int btf__dedup_v0_6_0(struct btf *btf, const struct btf_dedup_opts *opts); -LIBBPF_API int btf__dedup(struct btf *btf, struct btf_ext *btf_ext, - const struct btf_dedup_opts *opts); +LIBBPF_DEPRECATED_SINCE(0, 7, "use btf__dedup() instead") +LIBBPF_API int btf__dedup_deprecated(struct btf *btf, struct btf_ext *btf_ext, const void *opts); +#define btf__dedup(...) ___libbpf_overload(___btf_dedup, __VA_ARGS__) +#define ___btf_dedup3(btf, btf_ext, opts) btf__dedup_deprecated(btf, btf_ext, opts) +#define ___btf_dedup2(btf, opts) btf__dedup(btf, opts) struct btf_dump; struct btf_dump_opts { - void *ctx; + union { + size_t sz; + void *ctx; /* DEPRECATED: will be gone in v1.0 */ + }; }; typedef void (*btf_dump_printf_fn_t)(void *ctx, const char *fmt, va_list args); LIBBPF_API struct btf_dump *btf_dump__new(const struct btf *btf, - const struct btf_ext *btf_ext, - const struct btf_dump_opts *opts, - btf_dump_printf_fn_t printf_fn); + btf_dump_printf_fn_t printf_fn, + void *ctx, + const struct btf_dump_opts *opts); + +LIBBPF_API struct btf_dump *btf_dump__new_v0_6_0(const struct btf *btf, + btf_dump_printf_fn_t printf_fn, + void *ctx, + const struct btf_dump_opts *opts); + +LIBBPF_API struct btf_dump *btf_dump__new_deprecated(const struct btf *btf, + const struct btf_ext *btf_ext, + const struct btf_dump_opts *opts, + btf_dump_printf_fn_t printf_fn); + +/* Choose either btf_dump__new() or btf_dump__new_deprecated() based on the + * type of 4th argument. If it's btf_dump's print callback, use deprecated + * API; otherwise, choose the new btf_dump__new(). ___libbpf_override() + * doesn't work here because both variants have 4 input arguments. + * + * (void *) casts are necessary to avoid compilation warnings about type + * mismatches, because even though __builtin_choose_expr() only ever evaluates + * one side the other side still has to satisfy type constraints (this is + * compiler implementation limitation which might be lifted eventually, + * according to the documentation). So passing struct btf_ext in place of + * btf_dump_printf_fn_t would be generating compilation warning. Casting to + * void * avoids this issue. + * + * Also, two type compatibility checks for a function and function pointer are + * required because passing function reference into btf_dump__new() as + * btf_dump__new(..., my_callback, ...) and as btf_dump__new(..., + * &my_callback, ...) (not explicit ampersand in the latter case) actually + * differs as far as __builtin_types_compatible_p() is concerned. Thus two + * checks are combined to detect callback argument. + * + * The rest works just like in case of ___libbpf_override() usage with symbol + * versioning. + * + * C++ compilers don't support __builtin_types_compatible_p(), so at least + * don't screw up compilation for them and let C++ users pick btf_dump__new + * vs btf_dump__new_deprecated explicitly. + */ +#ifndef __cplusplus +#define btf_dump__new(a1, a2, a3, a4) __builtin_choose_expr( \ + __builtin_types_compatible_p(typeof(a4), btf_dump_printf_fn_t) || \ + __builtin_types_compatible_p(typeof(a4), void(void *, const char *, va_list)), \ + btf_dump__new_deprecated((void *)a1, (void *)a2, (void *)a3, (void *)a4), \ + btf_dump__new((void *)a1, (void *)a2, (void *)a3, (void *)a4)) +#endif + LIBBPF_API void btf_dump__free(struct btf_dump *d); LIBBPF_API int btf_dump__dump_type(struct btf_dump *d, __u32 id); @@ -403,7 +465,8 @@ static inline bool btf_is_mod(const struct btf_type *t) return kind == BTF_KIND_VOLATILE || kind == BTF_KIND_CONST || - kind == BTF_KIND_RESTRICT; + kind == BTF_KIND_RESTRICT || + kind == BTF_KIND_TYPE_TAG; } static inline bool btf_is_func(const struct btf_type *t) @@ -436,6 +499,11 @@ static inline bool btf_is_decl_tag(const struct btf_type *t) return btf_kind(t) == BTF_KIND_DECL_TAG; } +static inline bool btf_is_type_tag(const struct btf_type *t) +{ + return btf_kind(t) == BTF_KIND_TYPE_TAG; +} + static inline __u8 btf_int_encoding(const struct btf_type *t) { return BTF_INT_ENCODING(*(__u32 *)(t + 1)); diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 17db62b5002e..b9a3260c83cb 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -77,9 +77,8 @@ struct btf_dump_data { struct btf_dump { const struct btf *btf; - const struct btf_ext *btf_ext; btf_dump_printf_fn_t printf_fn; - struct btf_dump_opts opts; + void *cb_ctx; int ptr_sz; bool strip_mods; bool skip_anon_defs; @@ -138,29 +137,32 @@ static void btf_dump_printf(const struct btf_dump *d, const char *fmt, ...) va_list args; va_start(args, fmt); - d->printf_fn(d->opts.ctx, fmt, args); + d->printf_fn(d->cb_ctx, fmt, args); va_end(args); } static int btf_dump_mark_referenced(struct btf_dump *d); static int btf_dump_resize(struct btf_dump *d); -struct btf_dump *btf_dump__new(const struct btf *btf, - const struct btf_ext *btf_ext, - const struct btf_dump_opts *opts, - btf_dump_printf_fn_t printf_fn) +DEFAULT_VERSION(btf_dump__new_v0_6_0, btf_dump__new, LIBBPF_0.6.0) +struct btf_dump *btf_dump__new_v0_6_0(const struct btf *btf, + btf_dump_printf_fn_t printf_fn, + void *ctx, + const struct btf_dump_opts *opts) { struct btf_dump *d; int err; + if (!printf_fn) + return libbpf_err_ptr(-EINVAL); + d = calloc(1, sizeof(struct btf_dump)); if (!d) return libbpf_err_ptr(-ENOMEM); d->btf = btf; - d->btf_ext = btf_ext; d->printf_fn = printf_fn; - d->opts.ctx = opts ? opts->ctx : NULL; + d->cb_ctx = ctx; d->ptr_sz = btf__pointer_size(btf) ? : sizeof(void *); d->type_names = hashmap__new(str_hash_fn, str_equal_fn, NULL); @@ -186,6 +188,17 @@ err: return libbpf_err_ptr(err); } +COMPAT_VERSION(btf_dump__new_deprecated, btf_dump__new, LIBBPF_0.0.4) +struct btf_dump *btf_dump__new_deprecated(const struct btf *btf, + const struct btf_ext *btf_ext, + const struct btf_dump_opts *opts, + btf_dump_printf_fn_t printf_fn) +{ + if (!printf_fn) + return libbpf_err_ptr(-EINVAL); + return btf_dump__new_v0_6_0(btf, printf_fn, opts ? opts->ctx : NULL, opts); +} + static int btf_dump_resize(struct btf_dump *d) { int err, last_id = btf__type_cnt(d->btf) - 1; @@ -317,6 +330,7 @@ static int btf_dump_mark_referenced(struct btf_dump *d) case BTF_KIND_FUNC: case BTF_KIND_VAR: case BTF_KIND_DECL_TAG: + case BTF_KIND_TYPE_TAG: d->type_states[t->type].referenced = 1; break; @@ -560,6 +574,7 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr) case BTF_KIND_VOLATILE: case BTF_KIND_CONST: case BTF_KIND_RESTRICT: + case BTF_KIND_TYPE_TAG: return btf_dump_order_type(d, t->type, through_ptr); case BTF_KIND_FUNC_PROTO: { @@ -734,6 +749,7 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id) case BTF_KIND_VOLATILE: case BTF_KIND_CONST: case BTF_KIND_RESTRICT: + case BTF_KIND_TYPE_TAG: btf_dump_emit_type(d, t->type, cont_id); break; case BTF_KIND_ARRAY: @@ -1154,6 +1170,7 @@ skip_mod: case BTF_KIND_CONST: case BTF_KIND_RESTRICT: case BTF_KIND_FUNC_PROTO: + case BTF_KIND_TYPE_TAG: id = t->type; break; case BTF_KIND_ARRAY: @@ -1322,6 +1339,11 @@ static void btf_dump_emit_type_chain(struct btf_dump *d, case BTF_KIND_RESTRICT: btf_dump_printf(d, " restrict"); break; + case BTF_KIND_TYPE_TAG: + btf_dump_emit_mods(d, decls); + name = btf_name_of(d, t->name_off); + btf_dump_printf(d, " __attribute__((btf_type_tag(\"%s\")))", name); + break; case BTF_KIND_ARRAY: { const struct btf_array *a = btf_array(t); const struct btf_type *next_t; @@ -2194,7 +2216,7 @@ static int btf_dump_dump_type_data(struct btf_dump *d, __u8 bits_offset, __u8 bit_sz) { - int size, err; + int size, err = 0; size = btf_dump_type_data_check_overflow(d, t, id, data, bits_offset); if (size < 0) @@ -2299,8 +2321,8 @@ int btf_dump__dump_type_data(struct btf_dump *d, __u32 id, if (!opts->indent_str) d->typed_dump->indent_str[0] = '\t'; else - strncat(d->typed_dump->indent_str, opts->indent_str, - sizeof(d->typed_dump->indent_str) - 1); + libbpf_strlcpy(d->typed_dump->indent_str, opts->indent_str, + sizeof(d->typed_dump->indent_str)); d->typed_dump->compact = OPTS_GET(opts, compact, false); d->typed_dump->skip_names = OPTS_GET(opts, skip_names, false); diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c index 502dea53a742..8ecef1088ba2 100644 --- a/tools/lib/bpf/gen_loader.c +++ b/tools/lib/bpf/gen_loader.c @@ -18,7 +18,7 @@ #define MAX_USED_MAPS 64 #define MAX_USED_PROGS 32 #define MAX_KFUNC_DESCS 256 -#define MAX_FD_ARRAY_SZ (MAX_USED_PROGS + MAX_KFUNC_DESCS) +#define MAX_FD_ARRAY_SZ (MAX_USED_MAPS + MAX_KFUNC_DESCS) /* The following structure describes the stack layout of the loader program. * In addition R6 contains the pointer to context. @@ -33,8 +33,8 @@ */ struct loader_stack { __u32 btf_fd; - __u32 prog_fd[MAX_USED_PROGS]; __u32 inner_map_fd; + __u32 prog_fd[MAX_USED_PROGS]; }; #define stack_off(field) \ @@ -42,6 +42,11 @@ struct loader_stack { #define attr_field(attr, field) (attr + offsetof(union bpf_attr, field)) +static int blob_fd_array_off(struct bpf_gen *gen, int index) +{ + return gen->fd_array + index * sizeof(int); +} + static int realloc_insn_buf(struct bpf_gen *gen, __u32 size) { size_t off = gen->insn_cur - gen->insn_start; @@ -102,11 +107,15 @@ static void emit2(struct bpf_gen *gen, struct bpf_insn insn1, struct bpf_insn in emit(gen, insn2); } -void bpf_gen__init(struct bpf_gen *gen, int log_level) +static int add_data(struct bpf_gen *gen, const void *data, __u32 size); +static void emit_sys_close_blob(struct bpf_gen *gen, int blob_off); + +void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps) { - size_t stack_sz = sizeof(struct loader_stack); + size_t stack_sz = sizeof(struct loader_stack), nr_progs_sz; int i; + gen->fd_array = add_data(gen, NULL, MAX_FD_ARRAY_SZ * sizeof(int)); gen->log_level = log_level; /* save ctx pointer into R6 */ emit(gen, BPF_MOV64_REG(BPF_REG_6, BPF_REG_1)); @@ -118,19 +127,27 @@ void bpf_gen__init(struct bpf_gen *gen, int log_level) emit(gen, BPF_MOV64_IMM(BPF_REG_3, 0)); emit(gen, BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel)); + /* amount of stack actually used, only used to calculate iterations, not stack offset */ + nr_progs_sz = offsetof(struct loader_stack, prog_fd[nr_progs]); /* jump over cleanup code */ emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, - /* size of cleanup code below */ - (stack_sz / 4) * 3 + 2)); + /* size of cleanup code below (including map fd cleanup) */ + (nr_progs_sz / 4) * 3 + 2 + + /* 6 insns for emit_sys_close_blob, + * 6 insns for debug_regs in emit_sys_close_blob + */ + nr_maps * (6 + (gen->log_level ? 6 : 0)))); /* remember the label where all error branches will jump to */ gen->cleanup_label = gen->insn_cur - gen->insn_start; /* emit cleanup code: close all temp FDs */ - for (i = 0; i < stack_sz; i += 4) { + for (i = 0; i < nr_progs_sz; i += 4) { emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_10, -stack_sz + i)); emit(gen, BPF_JMP_IMM(BPF_JSLE, BPF_REG_1, 0, 1)); emit(gen, BPF_EMIT_CALL(BPF_FUNC_sys_close)); } + for (i = 0; i < nr_maps; i++) + emit_sys_close_blob(gen, blob_fd_array_off(gen, i)); /* R7 contains the error code from sys_bpf. Copy it into R0 and exit. */ emit(gen, BPF_MOV64_REG(BPF_REG_0, BPF_REG_7)); emit(gen, BPF_EXIT_INSN()); @@ -160,8 +177,6 @@ static int add_data(struct bpf_gen *gen, const void *data, __u32 size) */ static int add_map_fd(struct bpf_gen *gen) { - if (!gen->fd_array) - gen->fd_array = add_data(gen, NULL, MAX_FD_ARRAY_SZ * sizeof(int)); if (gen->nr_maps == MAX_USED_MAPS) { pr_warn("Total maps exceeds %d\n", MAX_USED_MAPS); gen->error = -E2BIG; @@ -174,8 +189,6 @@ static int add_kfunc_btf_fd(struct bpf_gen *gen) { int cur; - if (!gen->fd_array) - gen->fd_array = add_data(gen, NULL, MAX_FD_ARRAY_SZ * sizeof(int)); if (gen->nr_fd_array == MAX_KFUNC_DESCS) { cur = add_data(gen, NULL, sizeof(int)); return (cur - gen->fd_array) / sizeof(int); @@ -183,11 +196,6 @@ static int add_kfunc_btf_fd(struct bpf_gen *gen) return MAX_USED_MAPS + gen->nr_fd_array++; } -static int blob_fd_array_off(struct bpf_gen *gen, int index) -{ - return gen->fd_array + index * sizeof(int); -} - static int insn_bytes_to_bpf_size(__u32 sz) { switch (sz) { @@ -359,10 +367,16 @@ static void emit_sys_close_blob(struct bpf_gen *gen, int blob_off) __emit_sys_close(gen); } -int bpf_gen__finish(struct bpf_gen *gen) +int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps) { int i; + if (nr_progs < gen->nr_progs || nr_maps != gen->nr_maps) { + pr_warn("nr_progs %d/%d nr_maps %d/%d mismatch\n", + nr_progs, gen->nr_progs, nr_maps, gen->nr_maps); + gen->error = -EFAULT; + return gen->error; + } emit_sys_close_stack(gen, stack_off(btf_fd)); for (i = 0; i < gen->nr_progs; i++) move_stack2ctx(gen, @@ -432,47 +446,32 @@ void bpf_gen__load_btf(struct bpf_gen *gen, const void *btf_raw_data, } void bpf_gen__map_create(struct bpf_gen *gen, - struct bpf_create_map_params *map_attr, int map_idx) + enum bpf_map_type map_type, + const char *map_name, + __u32 key_size, __u32 value_size, __u32 max_entries, + struct bpf_map_create_opts *map_attr, int map_idx) { - int attr_size = offsetofend(union bpf_attr, btf_vmlinux_value_type_id); + int attr_size = offsetofend(union bpf_attr, map_extra); bool close_inner_map_fd = false; int map_create_attr, idx; union bpf_attr attr; memset(&attr, 0, attr_size); - attr.map_type = map_attr->map_type; - attr.key_size = map_attr->key_size; - attr.value_size = map_attr->value_size; + attr.map_type = map_type; + attr.key_size = key_size; + attr.value_size = value_size; attr.map_flags = map_attr->map_flags; attr.map_extra = map_attr->map_extra; - memcpy(attr.map_name, map_attr->name, - min((unsigned)strlen(map_attr->name), BPF_OBJ_NAME_LEN - 1)); + if (map_name) + libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name)); attr.numa_node = map_attr->numa_node; attr.map_ifindex = map_attr->map_ifindex; - attr.max_entries = map_attr->max_entries; - switch (attr.map_type) { - case BPF_MAP_TYPE_PERF_EVENT_ARRAY: - case BPF_MAP_TYPE_CGROUP_ARRAY: - case BPF_MAP_TYPE_STACK_TRACE: - case BPF_MAP_TYPE_ARRAY_OF_MAPS: - case BPF_MAP_TYPE_HASH_OF_MAPS: - case BPF_MAP_TYPE_DEVMAP: - case BPF_MAP_TYPE_DEVMAP_HASH: - case BPF_MAP_TYPE_CPUMAP: - case BPF_MAP_TYPE_XSKMAP: - case BPF_MAP_TYPE_SOCKMAP: - case BPF_MAP_TYPE_SOCKHASH: - case BPF_MAP_TYPE_QUEUE: - case BPF_MAP_TYPE_STACK: - case BPF_MAP_TYPE_RINGBUF: - break; - default: - attr.btf_key_type_id = map_attr->btf_key_type_id; - attr.btf_value_type_id = map_attr->btf_value_type_id; - } + attr.max_entries = max_entries; + attr.btf_key_type_id = map_attr->btf_key_type_id; + attr.btf_value_type_id = map_attr->btf_value_type_id; pr_debug("gen: map_create: %s idx %d type %d value_type_id %d\n", - attr.map_name, map_idx, map_attr->map_type, attr.btf_value_type_id); + attr.map_name, map_idx, map_type, attr.btf_value_type_id); map_create_attr = add_data(gen, &attr, attr_size); if (attr.btf_value_type_id) @@ -499,7 +498,7 @@ void bpf_gen__map_create(struct bpf_gen *gen, /* emit MAP_CREATE command */ emit_sys_bpf(gen, BPF_MAP_CREATE, map_create_attr, attr_size); debug_ret(gen, "map_create %s idx %d type %d value_size %d value_btf_id %d", - attr.map_name, map_idx, map_attr->map_type, attr.value_size, + attr.map_name, map_idx, map_type, value_size, attr.btf_value_type_id); emit_check_err(gen); /* remember map_fd in the stack, if successful */ @@ -584,8 +583,9 @@ void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak, static struct ksym_desc *get_ksym_desc(struct bpf_gen *gen, struct ksym_relo_desc *relo) { struct ksym_desc *kdesc; + int i; - for (int i = 0; i < gen->nr_ksyms; i++) { + for (i = 0; i < gen->nr_ksyms; i++) { if (!strcmp(gen->ksyms[i].name, relo->name)) { gen->ksyms[i].ref++; return &gen->ksyms[i]; @@ -687,27 +687,29 @@ static void emit_relo_kfunc_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo return; } kdesc->off = btf_fd_idx; - /* set a default value for imm */ + /* jump to success case */ + emit(gen, BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0, 3)); + /* set value for imm, off as 0 */ emit(gen, BPF_ST_MEM(BPF_W, BPF_REG_8, offsetof(struct bpf_insn, imm), 0)); - /* skip success case store if ret < 0 */ - emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, 1)); + emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), 0)); + /* skip success case for ret < 0 */ + emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 10)); /* store btf_id into insn[insn_idx].imm */ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7, offsetof(struct bpf_insn, imm))); + /* obtain fd in BPF_REG_9 */ + emit(gen, BPF_MOV64_REG(BPF_REG_9, BPF_REG_7)); + emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_9, 32)); + /* jump to fd_array store if fd denotes module BTF */ + emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0, 2)); + /* set the default value for off */ + emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), 0)); + /* skip BTF fd store for vmlinux BTF */ + emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 4)); /* load fd_array slot pointer */ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE, 0, 0, 0, blob_fd_array_off(gen, btf_fd_idx))); - /* skip store of BTF fd if ret < 0 */ - emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, 3)); /* store BTF fd in slot */ - emit(gen, BPF_MOV64_REG(BPF_REG_9, BPF_REG_7)); - emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_9, 32)); emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_9, 0)); - /* set a default value for off */ - emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), 0)); - /* skip insn->off store if ret < 0 */ - emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, 2)); - /* skip if vmlinux BTF */ - emit(gen, BPF_JMP_IMM(BPF_JEQ, BPF_REG_9, 0, 1)); /* store index into insn[insn_idx].off */ emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), btf_fd_idx)); log: @@ -806,9 +808,8 @@ static void emit_relo_ksym_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo, kdesc->insn + offsetof(struct bpf_insn, imm)); move_blob2blob(gen, insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm), 4, kdesc->insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm)); - emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_8, offsetof(struct bpf_insn, imm))); - /* jump over src_reg adjustment if imm is not 0 */ - emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0, 3)); + /* jump over src_reg adjustment if imm is not 0, reuse BPF_REG_0 from move_blob2blob */ + emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3)); goto clear_src_reg; } /* remember insn offset, so we can copy BTF ID and FD later */ @@ -816,17 +817,20 @@ static void emit_relo_ksym_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo, emit_bpf_find_by_name_kind(gen, relo); if (!relo->is_weak) emit_check_err(gen); - /* set default values as 0 */ + /* jump to success case */ + emit(gen, BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0, 3)); + /* set values for insn[insn_idx].imm, insn[insn_idx + 1].imm as 0 */ emit(gen, BPF_ST_MEM(BPF_W, BPF_REG_8, offsetof(struct bpf_insn, imm), 0)); emit(gen, BPF_ST_MEM(BPF_W, BPF_REG_8, sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm), 0)); - /* skip success case stores if ret < 0 */ - emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, 4)); + /* skip success case for ret < 0 */ + emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 4)); /* store btf_id into insn[insn_idx].imm */ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7, offsetof(struct bpf_insn, imm))); /* store btf_obj_fd into insn[insn_idx + 1].imm */ emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_7, 32)); emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7, sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm))); + /* skip src_reg adjustment */ emit(gen, BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0, 3)); clear_src_reg: /* clear bpf_object__relocate_data's src_reg assignment, otherwise we get a verifier failure */ @@ -838,6 +842,22 @@ clear_src_reg: emit_ksym_relo_log(gen, relo, kdesc->ref); } +void bpf_gen__record_relo_core(struct bpf_gen *gen, + const struct bpf_core_relo *core_relo) +{ + struct bpf_core_relo *relos; + + relos = libbpf_reallocarray(gen->core_relos, gen->core_relo_cnt + 1, sizeof(*relos)); + if (!relos) { + gen->error = -ENOMEM; + return; + } + gen->core_relos = relos; + relos += gen->core_relo_cnt; + memcpy(relos, core_relo, sizeof(*relos)); + gen->core_relo_cnt++; +} + static void emit_relo(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insns) { int insn; @@ -870,6 +890,15 @@ static void emit_relos(struct bpf_gen *gen, int insns) emit_relo(gen, gen->relos + i, insns); } +static void cleanup_core_relo(struct bpf_gen *gen) +{ + if (!gen->core_relo_cnt) + return; + free(gen->core_relos); + gen->core_relo_cnt = 0; + gen->core_relos = NULL; +} + static void cleanup_relos(struct bpf_gen *gen, int insns) { int i, insn; @@ -897,30 +926,32 @@ static void cleanup_relos(struct bpf_gen *gen, int insns) gen->relo_cnt = 0; gen->relos = NULL; } + cleanup_core_relo(gen); } void bpf_gen__prog_load(struct bpf_gen *gen, - struct bpf_prog_load_params *load_attr, int prog_idx) + enum bpf_prog_type prog_type, const char *prog_name, + const char *license, struct bpf_insn *insns, size_t insn_cnt, + struct bpf_prog_load_opts *load_attr, int prog_idx) { - int attr_size = offsetofend(union bpf_attr, fd_array); - int prog_load_attr, license, insns, func_info, line_info; + int prog_load_attr, license_off, insns_off, func_info, line_info, core_relos; + int attr_size = offsetofend(union bpf_attr, core_relo_rec_size); union bpf_attr attr; memset(&attr, 0, attr_size); - pr_debug("gen: prog_load: type %d insns_cnt %zd\n", - load_attr->prog_type, load_attr->insn_cnt); + pr_debug("gen: prog_load: type %d insns_cnt %zd progi_idx %d\n", + prog_type, insn_cnt, prog_idx); /* add license string to blob of bytes */ - license = add_data(gen, load_attr->license, strlen(load_attr->license) + 1); + license_off = add_data(gen, license, strlen(license) + 1); /* add insns to blob of bytes */ - insns = add_data(gen, load_attr->insns, - load_attr->insn_cnt * sizeof(struct bpf_insn)); + insns_off = add_data(gen, insns, insn_cnt * sizeof(struct bpf_insn)); - attr.prog_type = load_attr->prog_type; + attr.prog_type = prog_type; attr.expected_attach_type = load_attr->expected_attach_type; attr.attach_btf_id = load_attr->attach_btf_id; attr.prog_ifindex = load_attr->prog_ifindex; attr.kern_version = 0; - attr.insn_cnt = (__u32)load_attr->insn_cnt; + attr.insn_cnt = (__u32)insn_cnt; attr.prog_flags = load_attr->prog_flags; attr.func_info_rec_size = load_attr->func_info_rec_size; @@ -933,15 +964,19 @@ void bpf_gen__prog_load(struct bpf_gen *gen, line_info = add_data(gen, load_attr->line_info, attr.line_info_cnt * attr.line_info_rec_size); - memcpy(attr.prog_name, load_attr->name, - min((unsigned)strlen(load_attr->name), BPF_OBJ_NAME_LEN - 1)); + attr.core_relo_rec_size = sizeof(struct bpf_core_relo); + attr.core_relo_cnt = gen->core_relo_cnt; + core_relos = add_data(gen, gen->core_relos, + attr.core_relo_cnt * attr.core_relo_rec_size); + + libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name)); prog_load_attr = add_data(gen, &attr, attr_size); /* populate union bpf_attr with a pointer to license */ - emit_rel_store(gen, attr_field(prog_load_attr, license), license); + emit_rel_store(gen, attr_field(prog_load_attr, license), license_off); /* populate union bpf_attr with a pointer to instructions */ - emit_rel_store(gen, attr_field(prog_load_attr, insns), insns); + emit_rel_store(gen, attr_field(prog_load_attr, insns), insns_off); /* populate union bpf_attr with a pointer to func_info */ emit_rel_store(gen, attr_field(prog_load_attr, func_info), func_info); @@ -949,6 +984,9 @@ void bpf_gen__prog_load(struct bpf_gen *gen, /* populate union bpf_attr with a pointer to line_info */ emit_rel_store(gen, attr_field(prog_load_attr, line_info), line_info); + /* populate union bpf_attr with a pointer to core_relos */ + emit_rel_store(gen, attr_field(prog_load_attr, core_relos), core_relos); + /* populate union bpf_attr fd_array with a pointer to data where map_fds are saved */ emit_rel_store(gen, attr_field(prog_load_attr, fd_array), gen->fd_array); @@ -973,15 +1011,17 @@ void bpf_gen__prog_load(struct bpf_gen *gen, emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(union bpf_attr, attach_btf_obj_fd))); } - emit_relos(gen, insns); + emit_relos(gen, insns_off); /* emit PROG_LOAD command */ emit_sys_bpf(gen, BPF_PROG_LOAD, prog_load_attr, attr_size); debug_ret(gen, "prog_load %s insn_cnt %d", attr.prog_name, attr.insn_cnt); /* successful or not, close btf module FDs used in extern ksyms and attach_btf_obj_fd */ - cleanup_relos(gen, insns); - if (gen->attach_kind) + cleanup_relos(gen, insns_off); + if (gen->attach_kind) { emit_sys_close_blob(gen, attr_field(prog_load_attr, attach_btf_obj_fd)); + gen->attach_kind = 0; + } emit_check_err(gen); /* remember prog_fd in the stack, if successful */ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7, @@ -1027,6 +1067,33 @@ void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *pvalue, emit_check_err(gen); } +void bpf_gen__populate_outer_map(struct bpf_gen *gen, int outer_map_idx, int slot, + int inner_map_idx) +{ + int attr_size = offsetofend(union bpf_attr, flags); + int map_update_attr, key; + union bpf_attr attr; + + memset(&attr, 0, attr_size); + pr_debug("gen: populate_outer_map: outer %d key %d inner %d\n", + outer_map_idx, slot, inner_map_idx); + + key = add_data(gen, &slot, sizeof(slot)); + + map_update_attr = add_data(gen, &attr, attr_size); + move_blob2blob(gen, attr_field(map_update_attr, map_fd), 4, + blob_fd_array_off(gen, outer_map_idx)); + emit_rel_store(gen, attr_field(map_update_attr, key), key); + emit_rel_store(gen, attr_field(map_update_attr, value), + blob_fd_array_off(gen, inner_map_idx)); + + /* emit MAP_UPDATE_ELEM command */ + emit_sys_bpf(gen, BPF_MAP_UPDATE_ELEM, map_update_attr, attr_size); + debug_ret(gen, "populate_outer_map outer %d key %d inner %d", + outer_map_idx, slot, inner_map_idx); + emit_check_err(gen); +} + void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx) { int attr_size = offsetofend(union bpf_attr, map_fd); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index a1bea1953df6..7f10dd501a52 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -168,39 +168,24 @@ int libbpf_set_strict_mode(enum libbpf_strict_mode mode) return 0; } -enum kern_feature_id { - /* v4.14: kernel support for program & map names. */ - FEAT_PROG_NAME, - /* v5.2: kernel support for global data sections. */ - FEAT_GLOBAL_DATA, - /* BTF support */ - FEAT_BTF, - /* BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO support */ - FEAT_BTF_FUNC, - /* BTF_KIND_VAR and BTF_KIND_DATASEC support */ - FEAT_BTF_DATASEC, - /* BTF_FUNC_GLOBAL is supported */ - FEAT_BTF_GLOBAL_FUNC, - /* BPF_F_MMAPABLE is supported for arrays */ - FEAT_ARRAY_MMAP, - /* kernel support for expected_attach_type in BPF_PROG_LOAD */ - FEAT_EXP_ATTACH_TYPE, - /* bpf_probe_read_{kernel,user}[_str] helpers */ - FEAT_PROBE_READ_KERN, - /* BPF_PROG_BIND_MAP is supported */ - FEAT_PROG_BIND_MAP, - /* Kernel support for module BTFs */ - FEAT_MODULE_BTF, - /* BTF_KIND_FLOAT support */ - FEAT_BTF_FLOAT, - /* BPF perf link support */ - FEAT_PERF_LINK, - /* BTF_KIND_DECL_TAG support */ - FEAT_BTF_DECL_TAG, - __FEAT_CNT, -}; +__u32 libbpf_major_version(void) +{ + return LIBBPF_MAJOR_VERSION; +} -static bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id); +__u32 libbpf_minor_version(void) +{ + return LIBBPF_MINOR_VERSION; +} + +const char *libbpf_version_string(void) +{ +#define __S(X) #X +#define _S(X) __S(X) + return "v" _S(LIBBPF_MAJOR_VERSION) "." _S(LIBBPF_MINOR_VERSION); +#undef _S +#undef __S +} enum reloc_type { RELO_LD64, @@ -209,19 +194,25 @@ enum reloc_type { RELO_EXTERN_VAR, RELO_EXTERN_FUNC, RELO_SUBPROG_ADDR, + RELO_CORE, }; struct reloc_desc { enum reloc_type type; int insn_idx; - int map_idx; - int sym_off; + union { + const struct bpf_core_relo *core_relo; /* used when type == RELO_CORE */ + struct { + int map_idx; + int sym_off; + }; + }; }; struct bpf_sec_def; typedef int (*init_fn_t)(struct bpf_program *prog, long cookie); -typedef int (*preload_fn_t)(struct bpf_program *prog, struct bpf_prog_load_params *attr, long cookie); +typedef int (*preload_fn_t)(struct bpf_program *prog, struct bpf_prog_load_opts *opts, long cookie); typedef struct bpf_link *(*attach_fn_t)(const struct bpf_program *prog, long cookie); /* stored as sec_def->cookie for all libbpf-supported SEC()s */ @@ -304,7 +295,11 @@ struct bpf_program { struct reloc_desc *reloc_desc; int nr_reloc; - int log_level; + + /* BPF verifier log settings */ + char *log_buf; + size_t log_size; + __u32 log_level; struct { int nr; @@ -400,6 +395,7 @@ struct bpf_map { char *pin_path; bool pinned; bool reused; + bool skipped; __u64 map_extra; }; @@ -546,6 +542,11 @@ struct bpf_object { size_t btf_module_cnt; size_t btf_module_cap; + /* optional log settings passed to BPF_BTF_LOAD and BPF_PROG_LOAD commands */ + char *log_buf; + size_t log_size; + __u32 log_level; + void *priv; bpf_object_clear_priv_t clear_priv; @@ -681,6 +682,9 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog, prog->instances.fds = NULL; prog->instances.nr = -1; + /* inherit object's log_level */ + prog->log_level = obj->log_level; + prog->sec_name = strdup(sec_name); if (!prog->sec_name) goto errout; @@ -791,11 +795,36 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, return 0; } -static __u32 get_kernel_version(void) +__u32 get_kernel_version(void) { + /* On Ubuntu LINUX_VERSION_CODE doesn't correspond to info.release, + * but Ubuntu provides /proc/version_signature file, as described at + * https://ubuntu.com/kernel, with an example contents below, which we + * can use to get a proper LINUX_VERSION_CODE. + * + * Ubuntu 5.4.0-12.15-generic 5.4.8 + * + * In the above, 5.4.8 is what kernel is actually expecting, while + * uname() call will return 5.4.0 in info.release. + */ + const char *ubuntu_kver_file = "/proc/version_signature"; __u32 major, minor, patch; struct utsname info; + if (access(ubuntu_kver_file, R_OK) == 0) { + FILE *f; + + f = fopen(ubuntu_kver_file, "r"); + if (f) { + if (fscanf(f, "%*s %*s %d.%d.%d\n", &major, &minor, &patch) == 3) { + fclose(f); + return KERNEL_VERSION(major, minor, patch); + } + fclose(f); + } + /* something went wrong, fall back to uname() approach */ + } + uname(&info); if (sscanf(info.release, "%u.%u.%u", &major, &minor, &patch) != 3) return 0; @@ -1161,12 +1190,10 @@ static struct bpf_object *bpf_object__new(const char *path, strcpy(obj->path, path); if (obj_name) { - strncpy(obj->name, obj_name, sizeof(obj->name) - 1); - obj->name[sizeof(obj->name) - 1] = 0; + libbpf_strlcpy(obj->name, obj_name, sizeof(obj->name)); } else { /* Using basename() GNU version which doesn't modify arg. */ - strncpy(obj->name, basename((void *)path), - sizeof(obj->name) - 1); + libbpf_strlcpy(obj->name, basename((void *)path), sizeof(obj->name)); end = strchr(obj->name, '.'); if (end) *end = 0; @@ -1318,7 +1345,10 @@ static int bpf_object__check_endianness(struct bpf_object *obj) static int bpf_object__init_license(struct bpf_object *obj, void *data, size_t size) { - memcpy(obj->license, data, min(size, sizeof(obj->license) - 1)); + /* libbpf_strlcpy() only copies first N - 1 bytes, so size + 1 won't + * go over allowed ELF data section buffer + */ + libbpf_strlcpy(obj->license, data, min(size + 1, sizeof(obj->license))); pr_debug("license of %s is %s\n", obj->path, obj->license); return 0; } @@ -2076,6 +2106,7 @@ static const char *__btf_kind_str(__u16 kind) case BTF_KIND_DATASEC: return "datasec"; case BTF_KIND_FLOAT: return "float"; case BTF_KIND_DECL_TAG: return "decl_tag"; + case BTF_KIND_TYPE_TAG: return "type_tag"; default: return "unknown"; } } @@ -2255,6 +2286,9 @@ int parse_btf_map_def(const char *map_name, struct btf *btf, map_def->parts |= MAP_DEF_VALUE_SIZE | MAP_DEF_VALUE_TYPE; } else if (strcmp(name, "values") == 0) { + bool is_map_in_map = bpf_map_type__is_map_in_map(map_def->map_type); + bool is_prog_array = map_def->map_type == BPF_MAP_TYPE_PROG_ARRAY; + const char *desc = is_map_in_map ? "map-in-map inner" : "prog-array value"; char inner_map_name[128]; int err; @@ -2268,8 +2302,8 @@ int parse_btf_map_def(const char *map_name, struct btf *btf, map_name, name); return -EINVAL; } - if (!bpf_map_type__is_map_in_map(map_def->map_type)) { - pr_warn("map '%s': should be map-in-map.\n", + if (!is_map_in_map && !is_prog_array) { + pr_warn("map '%s': should be map-in-map or prog-array.\n", map_name); return -ENOTSUP; } @@ -2281,22 +2315,30 @@ int parse_btf_map_def(const char *map_name, struct btf *btf, map_def->value_size = 4; t = btf__type_by_id(btf, m->type); if (!t) { - pr_warn("map '%s': map-in-map inner type [%d] not found.\n", - map_name, m->type); + pr_warn("map '%s': %s type [%d] not found.\n", + map_name, desc, m->type); return -EINVAL; } if (!btf_is_array(t) || btf_array(t)->nelems) { - pr_warn("map '%s': map-in-map inner spec is not a zero-sized array.\n", - map_name); + pr_warn("map '%s': %s spec is not a zero-sized array.\n", + map_name, desc); return -EINVAL; } t = skip_mods_and_typedefs(btf, btf_array(t)->type, NULL); if (!btf_is_ptr(t)) { - pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n", - map_name, btf_kind_str(t)); + pr_warn("map '%s': %s def is of unexpected kind %s.\n", + map_name, desc, btf_kind_str(t)); return -EINVAL; } t = skip_mods_and_typedefs(btf, t->type, NULL); + if (is_prog_array) { + if (!btf_is_func_proto(t)) { + pr_warn("map '%s': prog-array value def is of unexpected kind %s.\n", + map_name, btf_kind_str(t)); + return -EINVAL; + } + continue; + } if (!btf_is_struct(t)) { pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n", map_name, btf_kind_str(t)); @@ -2588,8 +2630,10 @@ static bool btf_needs_sanitization(struct bpf_object *obj) bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT); bool has_func = kernel_supports(obj, FEAT_BTF_FUNC); bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG); + bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG); - return !has_func || !has_datasec || !has_func_global || !has_float || !has_decl_tag; + return !has_func || !has_datasec || !has_func_global || !has_float || + !has_decl_tag || !has_type_tag; } static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) @@ -2599,6 +2643,7 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT); bool has_func = kernel_supports(obj, FEAT_BTF_FUNC); bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG); + bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG); struct btf_type *t; int i, j, vlen; @@ -2657,6 +2702,10 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) */ t->name_off = 0; t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0); + } else if (!has_type_tag && btf_is_type_tag(t)) { + /* replace TYPE_TAG with a CONST */ + t->name_off = 0; + t->info = BTF_INFO_ENC(BTF_KIND_CONST, 0, 0); } } } @@ -2752,13 +2801,12 @@ static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf, for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) { t_var = btf__type_by_id(btf, vsi->type); - var = btf_var(t_var); - - if (!btf_is_var(t_var)) { + if (!t_var || !btf_is_var(t_var)) { pr_debug("Non-VAR type seen in section %s\n", name); return -EINVAL; } + var = btf_var(t_var); if (var->linkage == BTF_VAR_STATIC) continue; @@ -2972,7 +3020,9 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) */ btf__set_fd(kern_btf, 0); } else { - err = btf__load_into_kernel(kern_btf); + /* currently BPF_BTF_LOAD only supports log_level 1 */ + err = btf_load_into_kernel(kern_btf, obj->log_buf, obj->log_size, + obj->log_level ? 1 : 0); } if (sanitize) { if (!err) { @@ -3191,11 +3241,11 @@ static int bpf_object__elf_collect(struct bpf_object *obj) Elf_Scn *scn; Elf64_Shdr *sh; - /* ELF section indices are 1-based, so allocate +1 element to keep - * indexing simple. Also include 0th invalid section into sec_cnt for - * simpler and more traditional iteration logic. + /* ELF section indices are 0-based, but sec #0 is special "invalid" + * section. e_shnum does include sec #0, so e_shnum is the necessary + * size of an array to keep all the sections. */ - obj->efile.sec_cnt = 1 + obj->efile.ehdr->e_shnum; + obj->efile.sec_cnt = obj->efile.ehdr->e_shnum; obj->efile.secs = calloc(obj->efile.sec_cnt, sizeof(*obj->efile.secs)); if (!obj->efile.secs) return -ENOMEM; @@ -3271,8 +3321,12 @@ static int bpf_object__elf_collect(struct bpf_object *obj) } else if (strcmp(name, MAPS_ELF_SEC) == 0) { obj->efile.btf_maps_shndx = idx; } else if (strcmp(name, BTF_ELF_SEC) == 0) { + if (sh->sh_type != SHT_PROGBITS) + return -LIBBPF_ERRNO__FORMAT; btf_data = data; } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) { + if (sh->sh_type != SHT_PROGBITS) + return -LIBBPF_ERRNO__FORMAT; btf_ext_data = data; } else if (sh->sh_type == SHT_SYMTAB) { /* already processed during the first pass above */ @@ -3303,6 +3357,10 @@ static int bpf_object__elf_collect(struct bpf_object *obj) } else if (sh->sh_type == SHT_REL) { int targ_sec_idx = sh->sh_info; /* points to other section */ + if (sh->sh_entsize != sizeof(Elf64_Rel) || + targ_sec_idx >= obj->efile.sec_cnt) + return -LIBBPF_ERRNO__FORMAT; + /* Only do relo for section with exec instructions */ if (!section_have_execinstr(obj, targ_sec_idx) && strcmp(name, ".rel" STRUCT_OPS_SEC) && @@ -3333,7 +3391,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj) /* sort BPF programs by section name and in-section instruction offset * for faster search */ - qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs); + if (obj->nr_programs) + qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs); return bpf_object__init_btf(obj, btf_data, btf_ext_data); } @@ -3555,7 +3614,7 @@ static int bpf_object__collect_externs(struct bpf_object *obj) scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx); sh = elf_sec_hdr(obj, scn); - if (!sh) + if (!sh || sh->sh_entsize != sizeof(Elf64_Sym)) return -LIBBPF_ERRNO__FORMAT; dummy_var_btf_id = add_dummy_ksym_var(obj->btf); @@ -4022,7 +4081,7 @@ static int bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Data *data) { const char *relo_sec_name, *sec_name; - size_t sec_idx = shdr->sh_info; + size_t sec_idx = shdr->sh_info, sym_idx; struct bpf_program *prog; struct reloc_desc *relos; int err, i, nrels; @@ -4033,6 +4092,9 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Dat Elf64_Sym *sym; Elf64_Rel *rel; + if (sec_idx >= obj->efile.sec_cnt) + return -EINVAL; + scn = elf_sec_by_idx(obj, sec_idx); scn_data = elf_sec_data(obj, scn); @@ -4052,16 +4114,23 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Dat return -LIBBPF_ERRNO__FORMAT; } - sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info)); + sym_idx = ELF64_R_SYM(rel->r_info); + sym = elf_sym_by_idx(obj, sym_idx); if (!sym) { - pr_warn("sec '%s': symbol 0x%zx not found for relo #%d\n", - relo_sec_name, (size_t)ELF64_R_SYM(rel->r_info), i); + pr_warn("sec '%s': symbol #%zu not found for relo #%d\n", + relo_sec_name, sym_idx, i); + return -LIBBPF_ERRNO__FORMAT; + } + + if (sym->st_shndx >= obj->efile.sec_cnt) { + pr_warn("sec '%s': corrupted symbol #%zu pointing to invalid section #%zu for relo #%d\n", + relo_sec_name, sym_idx, (size_t)sym->st_shndx, i); return -LIBBPF_ERRNO__FORMAT; } if (rel->r_offset % BPF_INSN_SZ || rel->r_offset >= scn_data->d_size) { pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n", - relo_sec_name, (size_t)ELF64_R_SYM(rel->r_info), i); + relo_sec_name, (size_t)rel->r_offset, i); return -LIBBPF_ERRNO__FORMAT; } @@ -4265,30 +4334,24 @@ int bpf_map__resize(struct bpf_map *map, __u32 max_entries) static int bpf_object__probe_loading(struct bpf_object *obj) { - struct bpf_load_program_attr attr; char *cp, errmsg[STRERR_BUFSIZE]; struct bpf_insn insns[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }; - int ret; + int ret, insn_cnt = ARRAY_SIZE(insns); if (obj->gen_loader) return 0; - /* make sure basic loading works */ - - memset(&attr, 0, sizeof(attr)); - attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; - attr.insns = insns; - attr.insns_cnt = ARRAY_SIZE(insns); - attr.license = "GPL"; + ret = bump_rlimit_memlock(); + if (ret) + pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %d), you might need to do it explicitly!\n", ret); - ret = bpf_load_program_xattr(&attr, NULL, 0); - if (ret < 0) { - attr.prog_type = BPF_PROG_TYPE_TRACEPOINT; - ret = bpf_load_program_xattr(&attr, NULL, 0); - } + /* make sure basic loading works */ + ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); + if (ret < 0) + ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL); if (ret < 0) { ret = errno; cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); @@ -4312,29 +4375,19 @@ static int probe_fd(int fd) static int probe_kern_prog_name(void) { - struct bpf_load_program_attr attr; struct bpf_insn insns[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }; - int ret; + int ret, insn_cnt = ARRAY_SIZE(insns); /* make sure loading with name works */ - - memset(&attr, 0, sizeof(attr)); - attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; - attr.insns = insns; - attr.insns_cnt = ARRAY_SIZE(insns); - attr.license = "GPL"; - attr.name = "test"; - ret = bpf_load_program_xattr(&attr, NULL, 0); + ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "test", "GPL", insns, insn_cnt, NULL); return probe_fd(ret); } static int probe_kern_global_data(void) { - struct bpf_load_program_attr prg_attr; - struct bpf_create_map_attr map_attr; char *cp, errmsg[STRERR_BUFSIZE]; struct bpf_insn insns[] = { BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16), @@ -4342,15 +4395,9 @@ static int probe_kern_global_data(void) BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }; - int ret, map; - - memset(&map_attr, 0, sizeof(map_attr)); - map_attr.map_type = BPF_MAP_TYPE_ARRAY; - map_attr.key_size = sizeof(int); - map_attr.value_size = 32; - map_attr.max_entries = 1; + int ret, map, insn_cnt = ARRAY_SIZE(insns); - map = bpf_create_map_xattr(&map_attr); + map = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), 32, 1, NULL); if (map < 0) { ret = -errno; cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); @@ -4361,13 +4408,7 @@ static int probe_kern_global_data(void) insns[0].imm = map; - memset(&prg_attr, 0, sizeof(prg_attr)); - prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; - prg_attr.insns = insns; - prg_attr.insns_cnt = ARRAY_SIZE(insns); - prg_attr.license = "GPL"; - - ret = bpf_load_program_xattr(&prg_attr, NULL, 0); + ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); close(map); return probe_fd(ret); } @@ -4468,45 +4509,51 @@ static int probe_kern_btf_decl_tag(void) strs, sizeof(strs))); } -static int probe_kern_array_mmap(void) +static int probe_kern_btf_type_tag(void) { - struct bpf_create_map_attr attr = { - .map_type = BPF_MAP_TYPE_ARRAY, - .map_flags = BPF_F_MMAPABLE, - .key_size = sizeof(int), - .value_size = sizeof(int), - .max_entries = 1, + static const char strs[] = "\0tag"; + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* attr */ + BTF_TYPE_TYPE_TAG_ENC(1, 1), /* [2] */ + /* ptr */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), /* [3] */ }; - return probe_fd(bpf_create_map_xattr(&attr)); + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + +static int probe_kern_array_mmap(void) +{ + LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_MMAPABLE); + int fd; + + fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), sizeof(int), 1, &opts); + return probe_fd(fd); } static int probe_kern_exp_attach_type(void) { - struct bpf_load_program_attr attr; + LIBBPF_OPTS(bpf_prog_load_opts, opts, .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE); struct bpf_insn insns[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }; + int fd, insn_cnt = ARRAY_SIZE(insns); - memset(&attr, 0, sizeof(attr)); /* use any valid combination of program type and (optional) * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS) * to see if kernel supports expected_attach_type field for * BPF_PROG_LOAD command */ - attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK; - attr.expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE; - attr.insns = insns; - attr.insns_cnt = ARRAY_SIZE(insns); - attr.license = "GPL"; - - return probe_fd(bpf_load_program_xattr(&attr, NULL, 0)); + fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts); + return probe_fd(fd); } static int probe_kern_probe_read_kernel(void) { - struct bpf_load_program_attr attr; struct bpf_insn insns[] = { BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), /* r1 = r10 (fp) */ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), /* r1 += -8 */ @@ -4515,34 +4562,22 @@ static int probe_kern_probe_read_kernel(void) BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel), BPF_EXIT_INSN(), }; + int fd, insn_cnt = ARRAY_SIZE(insns); - memset(&attr, 0, sizeof(attr)); - attr.prog_type = BPF_PROG_TYPE_KPROBE; - attr.insns = insns; - attr.insns_cnt = ARRAY_SIZE(insns); - attr.license = "GPL"; - - return probe_fd(bpf_load_program_xattr(&attr, NULL, 0)); + fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL); + return probe_fd(fd); } static int probe_prog_bind_map(void) { - struct bpf_load_program_attr prg_attr; - struct bpf_create_map_attr map_attr; char *cp, errmsg[STRERR_BUFSIZE]; struct bpf_insn insns[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }; - int ret, map, prog; - - memset(&map_attr, 0, sizeof(map_attr)); - map_attr.map_type = BPF_MAP_TYPE_ARRAY; - map_attr.key_size = sizeof(int); - map_attr.value_size = 32; - map_attr.max_entries = 1; + int ret, map, prog, insn_cnt = ARRAY_SIZE(insns); - map = bpf_create_map_xattr(&map_attr); + map = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), 32, 1, NULL); if (map < 0) { ret = -errno; cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); @@ -4551,13 +4586,7 @@ static int probe_prog_bind_map(void) return ret; } - memset(&prg_attr, 0, sizeof(prg_attr)); - prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; - prg_attr.insns = insns; - prg_attr.insns_cnt = ARRAY_SIZE(insns); - prg_attr.license = "GPL"; - - prog = bpf_load_program_xattr(&prg_attr, NULL, 0); + prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); if (prog < 0) { close(map); return 0; @@ -4602,19 +4631,14 @@ static int probe_module_btf(void) static int probe_perf_link(void) { - struct bpf_load_program_attr attr; struct bpf_insn insns[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }; int prog_fd, link_fd, err; - memset(&attr, 0, sizeof(attr)); - attr.prog_type = BPF_PROG_TYPE_TRACEPOINT; - attr.insns = insns; - attr.insns_cnt = ARRAY_SIZE(insns); - attr.license = "GPL"; - prog_fd = bpf_load_program_xattr(&attr, NULL, 0); + prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", + insns, ARRAY_SIZE(insns), NULL); if (prog_fd < 0) return -errno; @@ -4687,14 +4711,20 @@ static struct kern_feature_desc { [FEAT_BTF_DECL_TAG] = { "BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag, }, + [FEAT_BTF_TYPE_TAG] = { + "BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag, + }, + [FEAT_MEMCG_ACCOUNT] = { + "memcg-based memory accounting", probe_memcg_account, + }, }; -static bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) +bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) { struct kern_feature_desc *feat = &feature_probes[feat_id]; int ret; - if (obj->gen_loader) + if (obj && obj->gen_loader) /* To generate loader program assume the latest kernel * to avoid doing extra prog_load, map_create syscalls. */ @@ -4821,19 +4851,16 @@ static void bpf_map__destroy(struct bpf_map *map); static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner) { - struct bpf_create_map_params create_attr; + LIBBPF_OPTS(bpf_map_create_opts, create_attr); struct bpf_map_def *def = &map->def; + const char *map_name = NULL; + __u32 max_entries; int err = 0; - memset(&create_attr, 0, sizeof(create_attr)); - if (kernel_supports(obj, FEAT_PROG_NAME)) - create_attr.name = map->name; + map_name = map->name; create_attr.map_ifindex = map->map_ifindex; - create_attr.map_type = def->type; create_attr.map_flags = def->map_flags; - create_attr.key_size = def->key_size; - create_attr.value_size = def->value_size; create_attr.numa_node = map->numa_node; create_attr.map_extra = map->map_extra; @@ -4847,18 +4874,14 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b return nr_cpus; } pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus); - create_attr.max_entries = nr_cpus; + max_entries = nr_cpus; } else { - create_attr.max_entries = def->max_entries; + max_entries = def->max_entries; } if (bpf_map__is_struct_ops(map)) - create_attr.btf_vmlinux_value_type_id = - map->btf_vmlinux_value_type_id; + create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id; - create_attr.btf_fd = 0; - create_attr.btf_key_type_id = 0; - create_attr.btf_value_type_id = 0; if (obj->btf && btf__fd(obj->btf) >= 0 && !bpf_map_find_btf_info(obj, map)) { create_attr.btf_fd = btf__fd(obj->btf); create_attr.btf_key_type_id = map->btf_key_type_id; @@ -4904,13 +4927,17 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b } if (obj->gen_loader) { - bpf_gen__map_create(obj->gen_loader, &create_attr, is_inner ? -1 : map - obj->maps); + bpf_gen__map_create(obj->gen_loader, def->type, map_name, + def->key_size, def->value_size, max_entries, + &create_attr, is_inner ? -1 : map - obj->maps); /* Pretend to have valid FD to pass various fd >= 0 checks. * This fd == 0 will not be used with any syscall and will be reset to -1 eventually. */ map->fd = 0; } else { - map->fd = libbpf__bpf_create_map_xattr(&create_attr); + map->fd = bpf_map_create(def->type, map_name, + def->key_size, def->value_size, + max_entries, &create_attr); } if (map->fd < 0 && (create_attr.btf_key_type_id || create_attr.btf_value_type_id)) { @@ -4925,7 +4952,9 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b create_attr.btf_value_type_id = 0; map->btf_key_type_id = 0; map->btf_value_type_id = 0; - map->fd = libbpf__bpf_create_map_xattr(&create_attr); + map->fd = bpf_map_create(def->type, map_name, + def->key_size, def->value_size, + max_entries, &create_attr); } err = map->fd < 0 ? -errno : 0; @@ -4940,7 +4969,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b return err; } -static int init_map_slots(struct bpf_object *obj, struct bpf_map *map) +static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map) { const struct bpf_map *targ_map; unsigned int i; @@ -4952,18 +4981,18 @@ static int init_map_slots(struct bpf_object *obj, struct bpf_map *map) targ_map = map->init_slots[i]; fd = bpf_map__fd(targ_map); + if (obj->gen_loader) { - pr_warn("// TODO map_update_elem: idx %td key %d value==map_idx %td\n", - map - obj->maps, i, targ_map - obj->maps); - return -ENOTSUP; + bpf_gen__populate_outer_map(obj->gen_loader, + map - obj->maps, i, + targ_map - obj->maps); } else { err = bpf_map_update_elem(map->fd, &i, &fd, 0); } if (err) { err = -errno; pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n", - map->name, i, targ_map->name, - fd, err); + map->name, i, targ_map->name, fd, err); return err; } pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n", @@ -4976,6 +5005,59 @@ static int init_map_slots(struct bpf_object *obj, struct bpf_map *map) return 0; } +static int init_prog_array_slots(struct bpf_object *obj, struct bpf_map *map) +{ + const struct bpf_program *targ_prog; + unsigned int i; + int fd, err; + + if (obj->gen_loader) + return -ENOTSUP; + + for (i = 0; i < map->init_slots_sz; i++) { + if (!map->init_slots[i]) + continue; + + targ_prog = map->init_slots[i]; + fd = bpf_program__fd(targ_prog); + + err = bpf_map_update_elem(map->fd, &i, &fd, 0); + if (err) { + err = -errno; + pr_warn("map '%s': failed to initialize slot [%d] to prog '%s' fd=%d: %d\n", + map->name, i, targ_prog->name, fd, err); + return err; + } + pr_debug("map '%s': slot [%d] set to prog '%s' fd=%d\n", + map->name, i, targ_prog->name, fd); + } + + zfree(&map->init_slots); + map->init_slots_sz = 0; + + return 0; +} + +static int bpf_object_init_prog_arrays(struct bpf_object *obj) +{ + struct bpf_map *map; + int i, err; + + for (i = 0; i < obj->nr_maps; i++) { + map = &obj->maps[i]; + + if (!map->init_slots_sz || map->def.type != BPF_MAP_TYPE_PROG_ARRAY) + continue; + + err = init_prog_array_slots(obj, map); + if (err < 0) { + zclose(map->fd); + return err; + } + } + return 0; +} + static int bpf_object__create_maps(struct bpf_object *obj) { @@ -4988,6 +5070,26 @@ bpf_object__create_maps(struct bpf_object *obj) for (i = 0; i < obj->nr_maps; i++) { map = &obj->maps[i]; + /* To support old kernels, we skip creating global data maps + * (.rodata, .data, .kconfig, etc); later on, during program + * loading, if we detect that at least one of the to-be-loaded + * programs is referencing any global data map, we'll error + * out with program name and relocation index logged. + * This approach allows to accommodate Clang emitting + * unnecessary .rodata.str1.1 sections for string literals, + * but also it allows to have CO-RE applications that use + * global variables in some of BPF programs, but not others. + * If those global variable-using programs are not loaded at + * runtime due to bpf_program__set_autoload(prog, false), + * bpf_object loading will succeed just fine even on old + * kernels. + */ + if (bpf_map__is_internal(map) && + !kernel_supports(obj, FEAT_GLOBAL_DATA)) { + map->skipped = true; + continue; + } + retried = false; retry: if (map->pin_path) { @@ -5024,8 +5126,8 @@ retry: } } - if (map->init_slots_sz) { - err = init_map_slots(obj, map); + if (map->init_slots_sz && map->def.type != BPF_MAP_TYPE_PROG_ARRAY) { + err = init_map_in_map_slots(obj, map); if (err < 0) { zclose(map->fd); goto err_out; @@ -5097,15 +5199,18 @@ static int bpf_core_add_cands(struct bpf_core_cand *local_cand, struct bpf_core_cand_list *cands) { struct bpf_core_cand *new_cands, *cand; - const struct btf_type *t; - const char *targ_name; + const struct btf_type *t, *local_t; + const char *targ_name, *local_name; size_t targ_essent_len; int n, i; + local_t = btf__type_by_id(local_cand->btf, local_cand->id); + local_name = btf__str_by_offset(local_cand->btf, local_t->name_off); + n = btf__type_cnt(targ_btf); for (i = targ_start_id; i < n; i++) { t = btf__type_by_id(targ_btf, i); - if (btf_kind(t) != btf_kind(local_cand->t)) + if (btf_kind(t) != btf_kind(local_t)) continue; targ_name = btf__name_by_offset(targ_btf, t->name_off); @@ -5116,12 +5221,12 @@ static int bpf_core_add_cands(struct bpf_core_cand *local_cand, if (targ_essent_len != local_essent_len) continue; - if (strncmp(local_cand->name, targ_name, local_essent_len) != 0) + if (strncmp(local_name, targ_name, local_essent_len) != 0) continue; pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s in [%s]\n", - local_cand->id, btf_kind_str(local_cand->t), - local_cand->name, i, btf_kind_str(t), targ_name, + local_cand->id, btf_kind_str(local_t), + local_name, i, btf_kind_str(t), targ_name, targ_btf_name); new_cands = libbpf_reallocarray(cands->cands, cands->len + 1, sizeof(*cands->cands)); @@ -5130,8 +5235,6 @@ static int bpf_core_add_cands(struct bpf_core_cand *local_cand, cand = &new_cands[cands->len]; cand->btf = targ_btf; - cand->t = t; - cand->name = targ_name; cand->id = i; cands->cands = new_cands; @@ -5238,18 +5341,21 @@ bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 l struct bpf_core_cand local_cand = {}; struct bpf_core_cand_list *cands; const struct btf *main_btf; + const struct btf_type *local_t; + const char *local_name; size_t local_essent_len; int err, i; local_cand.btf = local_btf; - local_cand.t = btf__type_by_id(local_btf, local_type_id); - if (!local_cand.t) + local_cand.id = local_type_id; + local_t = btf__type_by_id(local_btf, local_type_id); + if (!local_t) return ERR_PTR(-EINVAL); - local_cand.name = btf__name_by_offset(local_btf, local_cand.t->name_off); - if (str_is_empty(local_cand.name)) + local_name = btf__name_by_offset(local_btf, local_t->name_off); + if (str_is_empty(local_name)) return ERR_PTR(-EINVAL); - local_essent_len = bpf_core_essential_name_len(local_cand.name); + local_essent_len = bpf_core_essential_name_len(local_name); cands = calloc(1, sizeof(*cands)); if (!cands) @@ -5399,12 +5505,31 @@ static void *u32_as_hash_key(__u32 x) return (void *)(uintptr_t)x; } +static int record_relo_core(struct bpf_program *prog, + const struct bpf_core_relo *core_relo, int insn_idx) +{ + struct reloc_desc *relos, *relo; + + relos = libbpf_reallocarray(prog->reloc_desc, + prog->nr_reloc + 1, sizeof(*relos)); + if (!relos) + return -ENOMEM; + relo = &relos[prog->nr_reloc]; + relo->type = RELO_CORE; + relo->insn_idx = insn_idx; + relo->core_relo = core_relo; + prog->reloc_desc = relos; + prog->nr_reloc++; + return 0; +} + static int bpf_core_apply_relo(struct bpf_program *prog, const struct bpf_core_relo *relo, int relo_idx, const struct btf *local_btf, struct hashmap *cand_cache) { + struct bpf_core_spec specs_scratch[3] = {}; const void *type_key = u32_as_hash_key(relo->type_id); struct bpf_core_cand_list *cands = NULL; const char *prog_name = prog->name; @@ -5435,13 +5560,15 @@ static int bpf_core_apply_relo(struct bpf_program *prog, return -EINVAL; if (prog->obj->gen_loader) { - pr_warn("// TODO core_relo: prog %td insn[%d] %s kind %d\n", + const char *spec_str = btf__name_by_offset(local_btf, relo->access_str_off); + + pr_debug("record_relo_core: prog %td insn[%d] %s %s %s final insn_idx %d\n", prog - prog->obj->programs, relo->insn_off / 8, - local_name, relo->kind); - return -ENOTSUP; + btf_kind_str(local_type), local_name, spec_str, insn_idx); + return record_relo_core(prog, relo, insn_idx); } - if (relo->kind != BPF_TYPE_ID_LOCAL && + if (relo->kind != BPF_CORE_TYPE_ID_LOCAL && !hashmap__find(cand_cache, type_key, (void **)&cands)) { cands = bpf_core_find_cands(prog->obj, local_btf, local_id); if (IS_ERR(cands)) { @@ -5457,7 +5584,8 @@ static int bpf_core_apply_relo(struct bpf_program *prog, } } - return bpf_core_apply_relo_insn(prog_name, insn, insn_idx, relo, relo_idx, local_btf, cands); + return bpf_core_apply_relo_insn(prog_name, insn, insn_idx, relo, + relo_idx, local_btf, cands, specs_scratch); } static int @@ -5587,6 +5715,13 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE; insn[0].imm = relo->map_idx; } else { + const struct bpf_map *map = &obj->maps[relo->map_idx]; + + if (map->skipped) { + pr_warn("prog '%s': relo #%d: kernel doesn't support global data\n", + prog->name, i); + return -ENOTSUP; + } insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; insn[0].imm = obj->maps[relo->map_idx].fd; } @@ -5635,6 +5770,9 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) case RELO_CALL: /* handled already */ break; + case RELO_CORE: + /* will be handled by bpf_program_record_relos() */ + break; default: pr_warn("prog '%s': relo #%d: bad relo type %d\n", prog->name, i, relo->type); @@ -5798,6 +5936,8 @@ static int cmp_relo_by_insn_idx(const void *key, const void *elem) static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, size_t insn_idx) { + if (!prog->nr_reloc) + return NULL; return bsearch(&insn_idx, prog->reloc_desc, prog->nr_reloc, sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx); } @@ -5813,8 +5953,9 @@ static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_progra relos = libbpf_reallocarray(main_prog->reloc_desc, new_cnt, sizeof(*relos)); if (!relos) return -ENOMEM; - memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc, - sizeof(*relos) * subprog->nr_reloc); + if (subprog->nr_reloc) + memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc, + sizeof(*relos) * subprog->nr_reloc); for (i = main_prog->nr_reloc; i < new_cnt; i++) relos[i].insn_idx += subprog->sub_insn_off; @@ -6072,6 +6213,35 @@ bpf_object__free_relocs(struct bpf_object *obj) } } +static int cmp_relocs(const void *_a, const void *_b) +{ + const struct reloc_desc *a = _a; + const struct reloc_desc *b = _b; + + if (a->insn_idx != b->insn_idx) + return a->insn_idx < b->insn_idx ? -1 : 1; + + /* no two relocations should have the same insn_idx, but ... */ + if (a->type != b->type) + return a->type < b->type ? -1 : 1; + + return 0; +} + +static void bpf_object__sort_relos(struct bpf_object *obj) +{ + int i; + + for (i = 0; i < obj->nr_programs; i++) { + struct bpf_program *p = &obj->programs[i]; + + if (!p->nr_reloc) + continue; + + qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs); + } +} + static int bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) { @@ -6086,6 +6256,8 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) err); return err; } + if (obj->gen_loader) + bpf_object__sort_relos(obj); } /* Before relocating calls pre-process relocations and mark @@ -6121,6 +6293,8 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) */ if (prog_is_subprog(obj, prog)) continue; + if (!prog->load) + continue; err = bpf_object__relocate_calls(obj, prog); if (err) { @@ -6134,6 +6308,8 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) prog = &obj->programs[i]; if (prog_is_subprog(obj, prog)) continue; + if (!prog->load) + continue; err = bpf_object__relocate_data(obj, prog); if (err) { pr_warn("prog '%s': failed to relocate data references: %d\n", @@ -6156,9 +6332,11 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj, int i, j, nrels, new_sz; const struct btf_var_secinfo *vi = NULL; const struct btf_type *sec, *var, *def; - struct bpf_map *map = NULL, *targ_map; + struct bpf_map *map = NULL, *targ_map = NULL; + struct bpf_program *targ_prog = NULL; + bool is_prog_array, is_map_in_map; const struct btf_member *member; - const char *name, *mname; + const char *name, *mname, *type; unsigned int moff; Elf64_Sym *sym; Elf64_Rel *rel; @@ -6185,11 +6363,6 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj, return -LIBBPF_ERRNO__FORMAT; } name = elf_sym_str(obj, sym->st_name) ?: "<?>"; - if (sym->st_shndx != obj->efile.btf_maps_shndx) { - pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n", - i, name); - return -LIBBPF_ERRNO__RELOC; - } pr_debug(".maps relo #%d: for %zd value %zd rel->r_offset %zu name %d ('%s')\n", i, (ssize_t)(rel->r_info >> 32), (size_t)sym->st_value, @@ -6211,19 +6384,45 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj, return -EINVAL; } - if (!bpf_map_type__is_map_in_map(map->def.type)) - return -EINVAL; - if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS && - map->def.key_size != sizeof(int)) { - pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n", - i, map->name, sizeof(int)); + is_map_in_map = bpf_map_type__is_map_in_map(map->def.type); + is_prog_array = map->def.type == BPF_MAP_TYPE_PROG_ARRAY; + type = is_map_in_map ? "map" : "prog"; + if (is_map_in_map) { + if (sym->st_shndx != obj->efile.btf_maps_shndx) { + pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n", + i, name); + return -LIBBPF_ERRNO__RELOC; + } + if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS && + map->def.key_size != sizeof(int)) { + pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n", + i, map->name, sizeof(int)); + return -EINVAL; + } + targ_map = bpf_object__find_map_by_name(obj, name); + if (!targ_map) { + pr_warn(".maps relo #%d: '%s' isn't a valid map reference\n", + i, name); + return -ESRCH; + } + } else if (is_prog_array) { + targ_prog = bpf_object__find_program_by_name(obj, name); + if (!targ_prog) { + pr_warn(".maps relo #%d: '%s' isn't a valid program reference\n", + i, name); + return -ESRCH; + } + if (targ_prog->sec_idx != sym->st_shndx || + targ_prog->sec_insn_off * 8 != sym->st_value || + prog_is_subprog(obj, targ_prog)) { + pr_warn(".maps relo #%d: '%s' isn't an entry-point program\n", + i, name); + return -LIBBPF_ERRNO__RELOC; + } + } else { return -EINVAL; } - targ_map = bpf_object__find_map_by_name(obj, name); - if (!targ_map) - return -ESRCH; - var = btf__type_by_id(obj->btf, vi->type); def = skip_mods_and_typedefs(obj->btf, var->type, NULL); if (btf_vlen(def) == 0) @@ -6254,30 +6453,15 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj, (new_sz - map->init_slots_sz) * host_ptr_sz); map->init_slots_sz = new_sz; } - map->init_slots[moff] = targ_map; + map->init_slots[moff] = is_map_in_map ? (void *)targ_map : (void *)targ_prog; - pr_debug(".maps relo #%d: map '%s' slot [%d] points to map '%s'\n", - i, map->name, moff, name); + pr_debug(".maps relo #%d: map '%s' slot [%d] points to %s '%s'\n", + i, map->name, moff, type, name); } return 0; } -static int cmp_relocs(const void *_a, const void *_b) -{ - const struct reloc_desc *a = _a; - const struct reloc_desc *b = _b; - - if (a->insn_idx != b->insn_idx) - return a->insn_idx < b->insn_idx ? -1 : 1; - - /* no two relocations should have the same insn_idx, but ... */ - if (a->type != b->type) - return a->type < b->type ? -1 : 1; - - return 0; -} - static int bpf_object__collect_relos(struct bpf_object *obj) { int i, err; @@ -6310,14 +6494,7 @@ static int bpf_object__collect_relos(struct bpf_object *obj) return err; } - for (i = 0; i < obj->nr_programs; i++) { - struct bpf_program *p = &obj->programs[i]; - - if (!p->nr_reloc) - continue; - - qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs); - } + bpf_object__sort_relos(obj); return 0; } @@ -6374,16 +6551,16 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attac /* this is called as prog->sec_def->preload_fn for libbpf-supported sec_defs */ static int libbpf_preload_prog(struct bpf_program *prog, - struct bpf_prog_load_params *attr, long cookie) + struct bpf_prog_load_opts *opts, long cookie) { enum sec_def_flags def = cookie; /* old kernels might not support specifying expected_attach_type */ if ((def & SEC_EXP_ATTACH_OPT) && !kernel_supports(prog->obj, FEAT_EXP_ATTACH_TYPE)) - attr->expected_attach_type = 0; + opts->expected_attach_type = 0; if (def & SEC_SLEEPABLE) - attr->prog_flags |= BPF_F_SLEEPABLE; + opts->prog_flags |= BPF_F_SLEEPABLE; if ((prog->type == BPF_PROG_TYPE_TRACING || prog->type == BPF_PROG_TYPE_LSM || @@ -6402,25 +6579,28 @@ static int libbpf_preload_prog(struct bpf_program *prog, /* but by now libbpf common logic is not utilizing * prog->atach_btf_obj_fd/prog->attach_btf_id anymore because - * this callback is called after attrs were populated by - * libbpf, so this callback has to update attr explicitly here + * this callback is called after opts were populated by + * libbpf, so this callback has to update opts explicitly here */ - attr->attach_btf_obj_fd = btf_obj_fd; - attr->attach_btf_id = btf_type_id; + opts->attach_btf_obj_fd = btf_obj_fd; + opts->attach_btf_id = btf_type_id; } return 0; } -static int -load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, - char *license, __u32 kern_version, int *pfd) +static int bpf_object_load_prog_instance(struct bpf_object *obj, struct bpf_program *prog, + struct bpf_insn *insns, int insns_cnt, + const char *license, __u32 kern_version, + int *prog_fd) { - struct bpf_prog_load_params load_attr = {}; - struct bpf_object *obj = prog->obj; + LIBBPF_OPTS(bpf_prog_load_opts, load_attr); + const char *prog_name = NULL; char *cp, errmsg[STRERR_BUFSIZE]; size_t log_buf_size = 0; - char *log_buf = NULL; + char *log_buf = NULL, *tmp; int btf_fd, ret, err; + bool own_log_buf = true; + __u32 log_level = prog->log_level; if (prog->type == BPF_PROG_TYPE_UNSPEC) { /* @@ -6435,14 +6615,9 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, if (!insns || !insns_cnt) return -EINVAL; - load_attr.prog_type = prog->type; load_attr.expected_attach_type = prog->expected_attach_type; if (kernel_supports(obj, FEAT_PROG_NAME)) - load_attr.name = prog->name; - load_attr.insns = insns; - load_attr.insn_cnt = insns_cnt; - load_attr.license = license; - load_attr.attach_btf_id = prog->attach_btf_id; + prog_name = prog->name; load_attr.attach_prog_fd = prog->attach_prog_fd; load_attr.attach_btf_obj_fd = prog->attach_btf_obj_fd; load_attr.attach_btf_id = prog->attach_btf_id; @@ -6460,7 +6635,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, load_attr.line_info_rec_size = prog->line_info_rec_size; load_attr.line_info_cnt = prog->line_info_cnt; } - load_attr.log_level = prog->log_level; + load_attr.log_level = log_level; load_attr.prog_flags = prog->prog_flags; load_attr.fd_array = obj->fd_array; @@ -6475,27 +6650,51 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, } if (obj->gen_loader) { - bpf_gen__prog_load(obj->gen_loader, &load_attr, + bpf_gen__prog_load(obj->gen_loader, prog->type, prog->name, + license, insns, insns_cnt, &load_attr, prog - obj->programs); - *pfd = -1; + *prog_fd = -1; return 0; } -retry_load: - if (log_buf_size) { - log_buf = malloc(log_buf_size); - if (!log_buf) - return -ENOMEM; - *log_buf = 0; +retry_load: + /* if log_level is zero, we don't request logs initiallly even if + * custom log_buf is specified; if the program load fails, then we'll + * bump log_level to 1 and use either custom log_buf or we'll allocate + * our own and retry the load to get details on what failed + */ + if (log_level) { + if (prog->log_buf) { + log_buf = prog->log_buf; + log_buf_size = prog->log_size; + own_log_buf = false; + } else if (obj->log_buf) { + log_buf = obj->log_buf; + log_buf_size = obj->log_size; + own_log_buf = false; + } else { + log_buf_size = max((size_t)BPF_LOG_BUF_SIZE, log_buf_size * 2); + tmp = realloc(log_buf, log_buf_size); + if (!tmp) { + ret = -ENOMEM; + goto out; + } + log_buf = tmp; + log_buf[0] = '\0'; + own_log_buf = true; + } } load_attr.log_buf = log_buf; - load_attr.log_buf_sz = log_buf_size; - ret = libbpf__bpf_prog_load(&load_attr); + load_attr.log_size = log_buf_size; + load_attr.log_level = log_level; + ret = bpf_prog_load(prog->type, prog_name, license, insns, insns_cnt, &load_attr); if (ret >= 0) { - if (log_buf && load_attr.log_level) - pr_debug("verifier log:\n%s", log_buf); + if (log_level && own_log_buf) { + pr_debug("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n", + prog->name, log_buf); + } if (obj->has_rodata && kernel_supports(obj, FEAT_PROG_BIND_MAP)) { struct bpf_map *map; @@ -6508,61 +6707,53 @@ retry_load: if (bpf_prog_bind_map(ret, bpf_map__fd(map), NULL)) { cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); - pr_warn("prog '%s': failed to bind .rodata map: %s\n", - prog->name, cp); + pr_warn("prog '%s': failed to bind map '%s': %s\n", + prog->name, map->real_name, cp); /* Don't fail hard if can't bind rodata. */ } } } - *pfd = ret; + *prog_fd = ret; ret = 0; goto out; } - if (!log_buf || errno == ENOSPC) { - log_buf_size = max((size_t)BPF_LOG_BUF_SIZE, - log_buf_size << 1); - - free(log_buf); + if (log_level == 0) { + log_level = 1; goto retry_load; } - ret = errno ? -errno : -LIBBPF_ERRNO__LOAD; + /* On ENOSPC, increase log buffer size and retry, unless custom + * log_buf is specified. + * Be careful to not overflow u32, though. Kernel's log buf size limit + * isn't part of UAPI so it can always be bumped to full 4GB. So don't + * multiply by 2 unless we are sure we'll fit within 32 bits. + * Currently, we'll get -EINVAL when we reach (UINT_MAX >> 2). + */ + if (own_log_buf && errno == ENOSPC && log_buf_size <= UINT_MAX / 2) + goto retry_load; + + ret = -errno; cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); - pr_warn("load bpf program failed: %s\n", cp); + pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, cp); pr_perm_msg(ret); - if (log_buf && log_buf[0] != '\0') { - ret = -LIBBPF_ERRNO__VERIFY; - pr_warn("-- BEGIN DUMP LOG ---\n"); - pr_warn("\n%s\n", log_buf); - pr_warn("-- END LOG --\n"); - } else if (load_attr.insn_cnt >= BPF_MAXINSNS) { - pr_warn("Program too large (%zu insns), at most %d insns\n", - load_attr.insn_cnt, BPF_MAXINSNS); - ret = -LIBBPF_ERRNO__PROG2BIG; - } else if (load_attr.prog_type != BPF_PROG_TYPE_KPROBE) { - /* Wrong program type? */ - int fd; - - load_attr.prog_type = BPF_PROG_TYPE_KPROBE; - load_attr.expected_attach_type = 0; - load_attr.log_buf = NULL; - load_attr.log_buf_sz = 0; - fd = libbpf__bpf_prog_load(&load_attr); - if (fd >= 0) { - close(fd); - ret = -LIBBPF_ERRNO__PROGTYPE; - goto out; - } + if (own_log_buf && log_buf && log_buf[0] != '\0') { + pr_warn("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n", + prog->name, log_buf); + } + if (insns_cnt >= BPF_MAXINSNS) { + pr_warn("prog '%s': program too large (%d insns), at most %d insns\n", + prog->name, insns_cnt, BPF_MAXINSNS); } out: - free(log_buf); + if (own_log_buf) + free(log_buf); return ret; } -static int bpf_program__record_externs(struct bpf_program *prog) +static int bpf_program_record_relos(struct bpf_program *prog) { struct bpf_object *obj = prog->obj; int i; @@ -6584,6 +6775,17 @@ static int bpf_program__record_externs(struct bpf_program *prog) ext->is_weak, false, BTF_KIND_FUNC, relo->insn_idx); break; + case RELO_CORE: { + struct bpf_core_relo cr = { + .insn_off = relo->insn_idx * 8, + .type_id = relo->core_relo->type_id, + .access_str_off = relo->core_relo->access_str_off, + .kind = relo->core_relo->kind, + }; + + bpf_gen__record_relo_core(obj->gen_loader, &cr); + break; + } default: continue; } @@ -6591,11 +6793,12 @@ static int bpf_program__record_externs(struct bpf_program *prog) return 0; } -int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver) +static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog, + const char *license, __u32 kern_ver) { int err = 0, fd, i; - if (prog->obj->loaded) { + if (obj->loaded) { pr_warn("prog '%s': can't load after object was loaded\n", prog->name); return libbpf_err(-EINVAL); } @@ -6621,10 +6824,11 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver) pr_warn("prog '%s': inconsistent nr(%d) != 1\n", prog->name, prog->instances.nr); } - if (prog->obj->gen_loader) - bpf_program__record_externs(prog); - err = load_program(prog, prog->insns, prog->insns_cnt, - license, kern_ver, &fd); + if (obj->gen_loader) + bpf_program_record_relos(prog); + err = bpf_object_load_prog_instance(obj, prog, + prog->insns, prog->insns_cnt, + license, kern_ver, &fd); if (!err) prog->instances.fds[0] = fd; goto out; @@ -6652,8 +6856,9 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver) continue; } - err = load_program(prog, result.new_insn_ptr, - result.new_insn_cnt, license, kern_ver, &fd); + err = bpf_object_load_prog_instance(obj, prog, + result.new_insn_ptr, result.new_insn_cnt, + license, kern_ver, &fd); if (err) { pr_warn("Loading the %dth instance of program '%s' failed\n", i, prog->name); @@ -6670,6 +6875,11 @@ out: return libbpf_err(err); } +int bpf_program__load(struct bpf_program *prog, const char *license, __u32 kern_ver) +{ + return bpf_object_load_prog(prog->obj, prog, license, kern_ver); +} + static int bpf_object__load_progs(struct bpf_object *obj, int log_level) { @@ -6693,7 +6903,7 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level) continue; } prog->log_level |= log_level; - err = bpf_program__load(prog, obj->license, obj->kern_version); + err = bpf_object_load_prog(obj, prog, obj->license, obj->kern_version); if (err) return err; } @@ -6744,14 +6954,16 @@ static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object return 0; } -static struct bpf_object * -__bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, - const struct bpf_object_open_opts *opts) +static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz, + const struct bpf_object_open_opts *opts) { const char *obj_name, *kconfig, *btf_tmp_path; struct bpf_object *obj; char tmp_name[64]; int err; + char *log_buf; + size_t log_size; + __u32 log_level; if (elf_version(EV_CURRENT) == EV_NONE) { pr_warn("failed to init libelf for %s\n", @@ -6774,10 +6986,22 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, pr_debug("loading object '%s' from buffer\n", obj_name); } + log_buf = OPTS_GET(opts, kernel_log_buf, NULL); + log_size = OPTS_GET(opts, kernel_log_size, 0); + log_level = OPTS_GET(opts, kernel_log_level, 0); + if (log_size > UINT_MAX) + return ERR_PTR(-EINVAL); + if (log_size && !log_buf) + return ERR_PTR(-EINVAL); + obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name); if (IS_ERR(obj)) return obj; + obj->log_buf = log_buf; + obj->log_size = log_size; + obj->log_level = log_level; + btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL); if (btf_tmp_path) { if (strlen(btf_tmp_path) >= PATH_MAX) { @@ -6831,7 +7055,7 @@ __bpf_object__open_xattr(struct bpf_object_open_attr *attr, int flags) return NULL; pr_debug("loading %s\n", attr->file); - return __bpf_object__open(attr->file, NULL, 0, &opts); + return bpf_object_open(attr->file, NULL, 0, &opts); } struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr) @@ -6857,7 +7081,7 @@ bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts) pr_debug("loading %s\n", path); - return libbpf_ptr(__bpf_object__open(path, NULL, 0, opts)); + return libbpf_ptr(bpf_object_open(path, NULL, 0, opts)); } struct bpf_object * @@ -6867,7 +7091,7 @@ bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, if (!obj_buf || obj_buf_sz == 0) return libbpf_err_ptr(-EINVAL); - return libbpf_ptr(__bpf_object__open(NULL, obj_buf, obj_buf_sz, opts)); + return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, opts)); } struct bpf_object * @@ -6884,7 +7108,7 @@ bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz, if (!obj_buf || obj_buf_sz == 0) return errno = EINVAL, NULL; - return libbpf_ptr(__bpf_object__open(NULL, obj_buf, obj_buf_sz, &opts)); + return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, &opts)); } static int bpf_object_unload(struct bpf_object *obj) @@ -6915,10 +7139,6 @@ static int bpf_object__sanitize_maps(struct bpf_object *obj) bpf_object__for_each_map(m, obj) { if (!bpf_map__is_internal(m)) continue; - if (!kernel_supports(obj, FEAT_GLOBAL_DATA)) { - pr_warn("kernel doesn't support global data\n"); - return -ENOTSUP; - } if (!kernel_supports(obj, FEAT_ARRAY_MMAP)) m->def.map_flags ^= BPF_F_MMAPABLE; } @@ -7241,14 +7461,10 @@ static int bpf_object__resolve_externs(struct bpf_object *obj, return 0; } -int bpf_object__load_xattr(struct bpf_object_load_attr *attr) +static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const char *target_btf_path) { - struct bpf_object *obj; int err, i; - if (!attr) - return libbpf_err(-EINVAL); - obj = attr->obj; if (!obj) return libbpf_err(-EINVAL); @@ -7258,7 +7474,7 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr) } if (obj->gen_loader) - bpf_gen__init(obj->gen_loader, attr->log_level); + bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps); err = bpf_object__probe_loading(obj); err = err ? : bpf_object__load_vmlinux_btf(obj, false); @@ -7267,8 +7483,9 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr) err = err ? : bpf_object__sanitize_maps(obj); err = err ? : bpf_object__init_kern_struct_ops_maps(obj); err = err ? : bpf_object__create_maps(obj); - err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : attr->target_btf_path); - err = err ? : bpf_object__load_progs(obj, attr->log_level); + err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path); + err = err ? : bpf_object__load_progs(obj, extra_log_level); + err = err ? : bpf_object_init_prog_arrays(obj); if (obj->gen_loader) { /* reset FDs */ @@ -7277,7 +7494,7 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr) for (i = 0; i < obj->nr_maps; i++) obj->maps[i].fd = -1; if (!err) - err = bpf_gen__finish(obj->gen_loader); + err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps); } /* clean up fd_array */ @@ -7312,13 +7529,14 @@ out: return libbpf_err(err); } -int bpf_object__load(struct bpf_object *obj) +int bpf_object__load_xattr(struct bpf_object_load_attr *attr) { - struct bpf_object_load_attr attr = { - .obj = obj, - }; + return bpf_object_load(attr->obj, attr->log_level, attr->target_btf_path); +} - return bpf_object__load_xattr(&attr); +int bpf_object__load(struct bpf_object *obj) +{ + return bpf_object_load(obj, 0, NULL); } static int make_parent_dir(const char *path) @@ -7707,6 +7925,9 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path) char *pin_path = NULL; char buf[PATH_MAX]; + if (map->skipped) + continue; + if (path) { int len; @@ -7733,7 +7954,7 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path) return 0; err_unpin_maps: - while ((map = bpf_map__prev(map, obj))) { + while ((map = bpf_object__prev_map(obj, map))) { if (!map->pin_path) continue; @@ -7813,7 +8034,7 @@ int bpf_object__pin_programs(struct bpf_object *obj, const char *path) return 0; err_unpin_programs: - while ((prog = bpf_program__prev(prog, obj))) { + while ((prog = bpf_object__prev_program(obj, prog))) { char buf[PATH_MAX]; int len; @@ -8154,9 +8375,11 @@ int bpf_program__set_autoload(struct bpf_program *prog, bool autoload) return 0; } +static int bpf_program_nth_fd(const struct bpf_program *prog, int n); + int bpf_program__fd(const struct bpf_program *prog) { - return bpf_program__nth_fd(prog, 0); + return bpf_program_nth_fd(prog, 0); } size_t bpf_program__size(const struct bpf_program *prog) @@ -8202,7 +8425,10 @@ int bpf_program__set_prep(struct bpf_program *prog, int nr_instances, return 0; } -int bpf_program__nth_fd(const struct bpf_program *prog, int n) +__attribute__((alias("bpf_program_nth_fd"))) +int bpf_program__nth_fd(const struct bpf_program *prog, int n); + +static int bpf_program_nth_fd(const struct bpf_program *prog, int n) { int fd; @@ -8281,6 +8507,54 @@ void bpf_program__set_expected_attach_type(struct bpf_program *prog, prog->expected_attach_type = type; } +__u32 bpf_program__flags(const struct bpf_program *prog) +{ + return prog->prog_flags; +} + +int bpf_program__set_flags(struct bpf_program *prog, __u32 flags) +{ + if (prog->obj->loaded) + return libbpf_err(-EBUSY); + + prog->prog_flags = flags; + return 0; +} + +__u32 bpf_program__log_level(const struct bpf_program *prog) +{ + return prog->log_level; +} + +int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level) +{ + if (prog->obj->loaded) + return libbpf_err(-EBUSY); + + prog->log_level = log_level; + return 0; +} + +const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size) +{ + *log_size = prog->log_size; + return prog->log_buf; +} + +int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size) +{ + if (log_size && !log_buf) + return -EINVAL; + if (prog->log_size > UINT_MAX) + return -EINVAL; + if (prog->obj->loaded) + return -EBUSY; + + prog->log_buf = log_buf; + prog->log_size = log_size; + return 0; +} + #define SEC_DEF(sec_pfx, ptype, atype, flags, ...) { \ .sec = sec_pfx, \ .prog_type = BPF_PROG_TYPE_##ptype, \ @@ -9028,7 +9302,10 @@ int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd) pr_warn("error: inner_map_fd already specified\n"); return libbpf_err(-EINVAL); } - zfree(&map->inner_map); + if (map->inner_map) { + bpf_map__destroy(map->inner_map); + zfree(&map->inner_map); + } map->inner_map_fd = fd; return 0; } @@ -9145,21 +9422,12 @@ long libbpf_get_error(const void *ptr) return -errno; } -int bpf_prog_load(const char *file, enum bpf_prog_type type, - struct bpf_object **pobj, int *prog_fd) -{ - struct bpf_prog_load_attr attr; - - memset(&attr, 0, sizeof(struct bpf_prog_load_attr)); - attr.file = file; - attr.prog_type = type; - attr.expected_attach_type = 0; - - return bpf_prog_load_xattr(&attr, pobj, prog_fd); -} - +__attribute__((alias("bpf_prog_load_xattr2"))) int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, - struct bpf_object **pobj, int *prog_fd) + struct bpf_object **pobj, int *prog_fd); + +static int bpf_prog_load_xattr2(const struct bpf_prog_load_attr *attr, + struct bpf_object **pobj, int *prog_fd) { struct bpf_object_open_attr open_attr = {}; struct bpf_program *prog, *first_prog = NULL; @@ -9230,6 +9498,20 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, return 0; } +COMPAT_VERSION(bpf_prog_load_deprecated, bpf_prog_load, LIBBPF_0.0.1) +int bpf_prog_load_deprecated(const char *file, enum bpf_prog_type type, + struct bpf_object **pobj, int *prog_fd) +{ + struct bpf_prog_load_attr attr; + + memset(&attr, 0, sizeof(struct bpf_prog_load_attr)); + attr.file = file; + attr.prog_type = type; + attr.expected_attach_type = 0; + + return bpf_prog_load_xattr2(&attr, pobj, prog_fd); +} + struct bpf_link { int (*detach)(struct bpf_link *link); void (*dealloc)(struct bpf_link *link); @@ -9634,7 +9916,10 @@ static int append_to_file(const char *file, const char *fmt, ...) static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz, const char *kfunc_name, size_t offset) { - snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), kfunc_name, offset); + static int index = 0; + + snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx_%d", getpid(), kfunc_name, offset, + __sync_fetch_and_add(&index, 1)); } static int add_kprobe_event_legacy(const char *probe_name, bool retprobe, @@ -9735,7 +10020,7 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog, gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), func_name, offset); - legacy_probe = strdup(func_name); + legacy_probe = strdup(probe_name); if (!legacy_probe) return libbpf_err_ptr(-ENOMEM); @@ -10394,10 +10679,10 @@ struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map) return link; } -enum bpf_perf_event_ret -bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, - void **copy_mem, size_t *copy_size, - bpf_perf_event_print_t fn, void *private_data) +static enum bpf_perf_event_ret +perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, + void **copy_mem, size_t *copy_size, + bpf_perf_event_print_t fn, void *private_data) { struct perf_event_mmap_page *header = mmap_mem; __u64 data_head = ring_buffer_read_head(header); @@ -10442,6 +10727,12 @@ bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, return libbpf_err(ret); } +__attribute__((alias("perf_event_read_simple"))) +enum bpf_perf_event_ret +bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, + void **copy_mem, size_t *copy_size, + bpf_perf_event_print_t fn, void *private_data); + struct perf_buffer; struct perf_buffer_params { @@ -10575,11 +10866,18 @@ error: static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, struct perf_buffer_params *p); -struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt, - const struct perf_buffer_opts *opts) +DEFAULT_VERSION(perf_buffer__new_v0_6_0, perf_buffer__new, LIBBPF_0.6.0) +struct perf_buffer *perf_buffer__new_v0_6_0(int map_fd, size_t page_cnt, + perf_buffer_sample_fn sample_cb, + perf_buffer_lost_fn lost_cb, + void *ctx, + const struct perf_buffer_opts *opts) { struct perf_buffer_params p = {}; - struct perf_event_attr attr = { 0, }; + struct perf_event_attr attr = {}; + + if (!OPTS_VALID(opts, perf_buffer_opts)) + return libbpf_err_ptr(-EINVAL); attr.config = PERF_COUNT_SW_BPF_OUTPUT; attr.type = PERF_TYPE_SOFTWARE; @@ -10588,29 +10886,62 @@ struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt, attr.wakeup_events = 1; p.attr = &attr; - p.sample_cb = opts ? opts->sample_cb : NULL; - p.lost_cb = opts ? opts->lost_cb : NULL; - p.ctx = opts ? opts->ctx : NULL; + p.sample_cb = sample_cb; + p.lost_cb = lost_cb; + p.ctx = ctx; return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p)); } -struct perf_buffer * -perf_buffer__new_raw(int map_fd, size_t page_cnt, - const struct perf_buffer_raw_opts *opts) +COMPAT_VERSION(perf_buffer__new_deprecated, perf_buffer__new, LIBBPF_0.0.4) +struct perf_buffer *perf_buffer__new_deprecated(int map_fd, size_t page_cnt, + const struct perf_buffer_opts *opts) +{ + return perf_buffer__new_v0_6_0(map_fd, page_cnt, + opts ? opts->sample_cb : NULL, + opts ? opts->lost_cb : NULL, + opts ? opts->ctx : NULL, + NULL); +} + +DEFAULT_VERSION(perf_buffer__new_raw_v0_6_0, perf_buffer__new_raw, LIBBPF_0.6.0) +struct perf_buffer *perf_buffer__new_raw_v0_6_0(int map_fd, size_t page_cnt, + struct perf_event_attr *attr, + perf_buffer_event_fn event_cb, void *ctx, + const struct perf_buffer_raw_opts *opts) { struct perf_buffer_params p = {}; - p.attr = opts->attr; - p.event_cb = opts->event_cb; - p.ctx = opts->ctx; - p.cpu_cnt = opts->cpu_cnt; - p.cpus = opts->cpus; - p.map_keys = opts->map_keys; + if (page_cnt == 0 || !attr) + return libbpf_err_ptr(-EINVAL); + + if (!OPTS_VALID(opts, perf_buffer_raw_opts)) + return libbpf_err_ptr(-EINVAL); + + p.attr = attr; + p.event_cb = event_cb; + p.ctx = ctx; + p.cpu_cnt = OPTS_GET(opts, cpu_cnt, 0); + p.cpus = OPTS_GET(opts, cpus, NULL); + p.map_keys = OPTS_GET(opts, map_keys, NULL); return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p)); } +COMPAT_VERSION(perf_buffer__new_raw_deprecated, perf_buffer__new_raw, LIBBPF_0.0.4) +struct perf_buffer *perf_buffer__new_raw_deprecated(int map_fd, size_t page_cnt, + const struct perf_buffer_raw_opts *opts) +{ + LIBBPF_OPTS(perf_buffer_raw_opts, inner_opts, + .cpu_cnt = opts->cpu_cnt, + .cpus = opts->cpus, + .map_keys = opts->map_keys, + ); + + return perf_buffer__new_raw_v0_6_0(map_fd, page_cnt, opts->attr, + opts->event_cb, opts->ctx, &inner_opts); +} + static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, struct perf_buffer_params *p) { @@ -10810,10 +11141,10 @@ static int perf_buffer__process_records(struct perf_buffer *pb, { enum bpf_perf_event_ret ret; - ret = bpf_perf_event_read_simple(cpu_buf->base, pb->mmap_size, - pb->page_size, &cpu_buf->buf, - &cpu_buf->buf_size, - perf_buffer__process_record, cpu_buf); + ret = perf_event_read_simple(cpu_buf->base, pb->mmap_size, + pb->page_size, &cpu_buf->buf, + &cpu_buf->buf_size, + perf_buffer__process_record, cpu_buf); if (ret != LIBBPF_PERF_EVENT_CONT) return ret; return 0; diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 9de0f299706b..8b9bc5e90c2b 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -24,6 +24,10 @@ extern "C" { #endif +LIBBPF_API __u32 libbpf_major_version(void); +LIBBPF_API __u32 libbpf_minor_version(void); +LIBBPF_API const char *libbpf_version_string(void); + enum libbpf_errno { __LIBBPF_ERRNO__START = 4000, @@ -104,12 +108,73 @@ struct bpf_object_open_opts { * struct_ops, etc) will need actual kernel BTF at /sys/kernel/btf/vmlinux. */ const char *btf_custom_path; + /* Pointer to a buffer for storing kernel logs for applicable BPF + * commands. Valid kernel_log_size has to be specified as well and are + * passed-through to bpf() syscall. Keep in mind that kernel might + * fail operation with -ENOSPC error if provided buffer is too small + * to contain entire log output. + * See the comment below for kernel_log_level for interaction between + * log_buf and log_level settings. + * + * If specified, this log buffer will be passed for: + * - each BPF progral load (BPF_PROG_LOAD) attempt, unless overriden + * with bpf_program__set_log() on per-program level, to get + * BPF verifier log output. + * - during BPF object's BTF load into kernel (BPF_BTF_LOAD) to get + * BTF sanity checking log. + * + * Each BPF command (BPF_BTF_LOAD or BPF_PROG_LOAD) will overwrite + * previous contents, so if you need more fine-grained control, set + * per-program buffer with bpf_program__set_log_buf() to preserve each + * individual program's verification log. Keep using kernel_log_buf + * for BTF verification log, if necessary. + */ + char *kernel_log_buf; + size_t kernel_log_size; + /* + * Log level can be set independently from log buffer. Log_level=0 + * means that libbpf will attempt loading BTF or program without any + * logging requested, but will retry with either its own or custom log + * buffer, if provided, and log_level=1 on any error. + * And vice versa, setting log_level>0 will request BTF or prog + * loading with verbose log from the first attempt (and as such also + * for successfully loaded BTF or program), and the actual log buffer + * could be either libbpf's own auto-allocated log buffer, if + * kernel_log_buffer is NULL, or user-provided custom kernel_log_buf. + * If user didn't provide custom log buffer, libbpf will emit captured + * logs through its print callback. + */ + __u32 kernel_log_level; + + size_t :0; }; -#define bpf_object_open_opts__last_field btf_custom_path +#define bpf_object_open_opts__last_field kernel_log_level LIBBPF_API struct bpf_object *bpf_object__open(const char *path); + +/** + * @brief **bpf_object__open_file()** creates a bpf_object by opening + * the BPF ELF object file pointed to by the passed path and loading it + * into memory. + * @param path BPF object file path + * @param opts options for how to load the bpf object, this parameter is + * optional and can be set to NULL + * @return pointer to the new bpf_object; or NULL is returned on error, + * error code is stored in errno + */ LIBBPF_API struct bpf_object * bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts); + +/** + * @brief **bpf_object__open_mem()** creates a bpf_object by reading + * the BPF objects raw bytes from a memory buffer containing a valid + * BPF ELF object file. + * @param obj_buf pointer to the buffer containing ELF file bytes + * @param obj_buf_sz number of bytes in the buffer + * @param opts options for how to load the bpf object + * @return pointer to the new bpf_object; or NULL is returned on error, + * error code is stored in errno + */ LIBBPF_API struct bpf_object * bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, const struct bpf_object_open_opts *opts); @@ -149,6 +214,7 @@ struct bpf_object_load_attr { /* Load/unload object into/from kernel */ LIBBPF_API int bpf_object__load(struct bpf_object *obj); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_object__load() instead") LIBBPF_API int bpf_object__load_xattr(struct bpf_object_load_attr *attr); LIBBPF_DEPRECATED_SINCE(0, 6, "bpf_object__unload() is deprecated, use bpf_object__close() instead") LIBBPF_API int bpf_object__unload(struct bpf_object *obj); @@ -161,6 +227,7 @@ struct btf; LIBBPF_API struct btf *bpf_object__btf(const struct bpf_object *obj); LIBBPF_API int bpf_object__btf_fd(const struct bpf_object *obj); +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__find_program_by_name() instead") LIBBPF_API struct bpf_program * bpf_object__find_program_by_title(const struct bpf_object *obj, const char *title); @@ -262,8 +329,8 @@ LIBBPF_API const struct bpf_insn *bpf_program__insns(const struct bpf_program *p */ LIBBPF_API size_t bpf_program__insn_cnt(const struct bpf_program *prog); -LIBBPF_API int bpf_program__load(struct bpf_program *prog, char *license, - __u32 kern_version); +LIBBPF_DEPRECATED_SINCE(0, 6, "use bpf_object__load() instead") +LIBBPF_API int bpf_program__load(struct bpf_program *prog, const char *license, __u32 kern_version); LIBBPF_API int bpf_program__fd(const struct bpf_program *prog); LIBBPF_DEPRECATED_SINCE(0, 7, "multi-instance bpf_program support is deprecated") LIBBPF_API int bpf_program__pin_instance(struct bpf_program *prog, @@ -273,7 +340,31 @@ LIBBPF_DEPRECATED_SINCE(0, 7, "multi-instance bpf_program support is deprecated" LIBBPF_API int bpf_program__unpin_instance(struct bpf_program *prog, const char *path, int instance); + +/** + * @brief **bpf_program__pin()** pins the BPF program to a file + * in the BPF FS specified by a path. This increments the programs + * reference count, allowing it to stay loaded after the process + * which loaded it has exited. + * + * @param prog BPF program to pin, must already be loaded + * @param path file path in a BPF file system + * @return 0, on success; negative error code, otherwise + */ LIBBPF_API int bpf_program__pin(struct bpf_program *prog, const char *path); + +/** + * @brief **bpf_program__unpin()** unpins the BPF program from a file + * in the BPFFS specified by a path. This decrements the programs + * reference count. + * + * The file pinning the BPF program can also be unlinked by a different + * process in which case this function will return an error. + * + * @param prog BPF program to unpin + * @param path file path to the pin in a BPF file system + * @return 0, on success; negative error code, otherwise + */ LIBBPF_API int bpf_program__unpin(struct bpf_program *prog, const char *path); LIBBPF_API void bpf_program__unload(struct bpf_program *prog); @@ -344,10 +435,41 @@ struct bpf_uprobe_opts { }; #define bpf_uprobe_opts__last_field retprobe +/** + * @brief **bpf_program__attach_uprobe()** attaches a BPF program + * to the userspace function which is found by binary path and + * offset. You can optionally specify a particular proccess to attach + * to. You can also optionally attach the program to the function + * exit instead of entry. + * + * @param prog BPF program to attach + * @param retprobe Attach to function exit + * @param pid Process ID to attach the uprobe to, 0 for self (own process), + * -1 for all processes + * @param binary_path Path to binary that contains the function symbol + * @param func_offset Offset within the binary of the function symbol + * @return Reference to the newly created BPF link; or NULL is returned on error, + * error code is stored in errno + */ LIBBPF_API struct bpf_link * bpf_program__attach_uprobe(const struct bpf_program *prog, bool retprobe, pid_t pid, const char *binary_path, size_t func_offset); + +/** + * @brief **bpf_program__attach_uprobe_opts()** is just like + * bpf_program__attach_uprobe() except with a options struct + * for various configurations. + * + * @param prog BPF program to attach + * @param pid Process ID to attach the uprobe to, 0 for self (own process), + * -1 for all processes + * @param binary_path Path to binary that contains the function symbol + * @param func_offset Offset within the binary of the function symbol + * @param opts Options for altering program attachment + * @return Reference to the newly created BPF link; or NULL is returned on error, + * error code is stored in errno + */ LIBBPF_API struct bpf_link * bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, const char *binary_path, size_t func_offset, @@ -431,7 +553,6 @@ bpf_program__attach_iter(const struct bpf_program *prog, * one instance. In this case bpf_program__fd(prog) is equal to * bpf_program__nth_fd(prog, 0). */ -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_program__insns() for getting bpf_program instructions") struct bpf_prog_prep_result { /* * If not NULL, load new instruction array. @@ -494,6 +615,18 @@ LIBBPF_API void bpf_program__set_expected_attach_type(struct bpf_program *prog, enum bpf_attach_type type); +LIBBPF_API __u32 bpf_program__flags(const struct bpf_program *prog); +LIBBPF_API int bpf_program__set_flags(struct bpf_program *prog, __u32 flags); + +/* Per-program log level and log buffer getters/setters. + * See bpf_object_open_opts comments regarding log_level and log_buf + * interactions. + */ +LIBBPF_API __u32 bpf_program__log_level(const struct bpf_program *prog); +LIBBPF_API int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level); +LIBBPF_API const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size); +LIBBPF_API int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size); + LIBBPF_API int bpf_program__set_attach_target(struct bpf_program *prog, int attach_prog_fd, const char *attach_func_name); @@ -544,7 +677,8 @@ bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name); * Get bpf_map through the offset of corresponding struct bpf_map_def * in the BPF object file. */ -LIBBPF_API struct bpf_map * +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_object__find_map_by_name() instead") +struct bpf_map * bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset); LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__next_map() instead") @@ -611,6 +745,7 @@ LIBBPF_API void *bpf_map__priv(const struct bpf_map *map); LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map, const void *data, size_t size); LIBBPF_API const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_map__type() instead") LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map); /** @@ -674,10 +809,12 @@ struct bpf_prog_load_attr { int prog_flags; }; +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_object__open() and bpf_object__load() instead") LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, struct bpf_object **pobj, int *prog_fd); -LIBBPF_API int bpf_prog_load(const char *file, enum bpf_prog_type type, - struct bpf_object **pobj, int *prog_fd); +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__open() and bpf_object__load() instead") +LIBBPF_API int bpf_prog_load_deprecated(const char *file, enum bpf_prog_type type, + struct bpf_object **pobj, int *prog_fd); /* XDP related API */ struct xdp_link_info { @@ -775,18 +912,52 @@ typedef void (*perf_buffer_lost_fn)(void *ctx, int cpu, __u64 cnt); /* common use perf buffer options */ struct perf_buffer_opts { - /* if specified, sample_cb is called for each sample */ - perf_buffer_sample_fn sample_cb; - /* if specified, lost_cb is called for each batch of lost samples */ - perf_buffer_lost_fn lost_cb; - /* ctx is provided to sample_cb and lost_cb */ - void *ctx; + union { + size_t sz; + struct { /* DEPRECATED: will be removed in v1.0 */ + /* if specified, sample_cb is called for each sample */ + perf_buffer_sample_fn sample_cb; + /* if specified, lost_cb is called for each batch of lost samples */ + perf_buffer_lost_fn lost_cb; + /* ctx is provided to sample_cb and lost_cb */ + void *ctx; + }; + }; }; +#define perf_buffer_opts__last_field sz +/** + * @brief **perf_buffer__new()** creates BPF perfbuf manager for a specified + * BPF_PERF_EVENT_ARRAY map + * @param map_fd FD of BPF_PERF_EVENT_ARRAY BPF map that will be used by BPF + * code to send data over to user-space + * @param page_cnt number of memory pages allocated for each per-CPU buffer + * @param sample_cb function called on each received data record + * @param lost_cb function called when record loss has occurred + * @param ctx user-provided extra context passed into *sample_cb* and *lost_cb* + * @return a new instance of struct perf_buffer on success, NULL on error with + * *errno* containing an error code + */ LIBBPF_API struct perf_buffer * perf_buffer__new(int map_fd, size_t page_cnt, + perf_buffer_sample_fn sample_cb, perf_buffer_lost_fn lost_cb, void *ctx, const struct perf_buffer_opts *opts); +LIBBPF_API struct perf_buffer * +perf_buffer__new_v0_6_0(int map_fd, size_t page_cnt, + perf_buffer_sample_fn sample_cb, perf_buffer_lost_fn lost_cb, void *ctx, + const struct perf_buffer_opts *opts); + +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use new variant of perf_buffer__new() instead") +struct perf_buffer *perf_buffer__new_deprecated(int map_fd, size_t page_cnt, + const struct perf_buffer_opts *opts); + +#define perf_buffer__new(...) ___libbpf_overload(___perf_buffer_new, __VA_ARGS__) +#define ___perf_buffer_new6(map_fd, page_cnt, sample_cb, lost_cb, ctx, opts) \ + perf_buffer__new(map_fd, page_cnt, sample_cb, lost_cb, ctx, opts) +#define ___perf_buffer_new3(map_fd, page_cnt, opts) \ + perf_buffer__new_deprecated(map_fd, page_cnt, opts) + enum bpf_perf_event_ret { LIBBPF_PERF_EVENT_DONE = 0, LIBBPF_PERF_EVENT_ERROR = -1, @@ -800,12 +971,21 @@ typedef enum bpf_perf_event_ret /* raw perf buffer options, giving most power and control */ struct perf_buffer_raw_opts { - /* perf event attrs passed directly into perf_event_open() */ - struct perf_event_attr *attr; - /* raw event callback */ - perf_buffer_event_fn event_cb; - /* ctx is provided to event_cb */ - void *ctx; + union { + struct { + size_t sz; + long :0; + long :0; + }; + struct { /* DEPRECATED: will be removed in v1.0 */ + /* perf event attrs passed directly into perf_event_open() */ + struct perf_event_attr *attr; + /* raw event callback */ + perf_buffer_event_fn event_cb; + /* ctx is provided to event_cb */ + void *ctx; + }; + }; /* if cpu_cnt == 0, open all on all possible CPUs (up to the number of * max_entries of given PERF_EVENT_ARRAY map) */ @@ -815,11 +995,28 @@ struct perf_buffer_raw_opts { /* if cpu_cnt > 0, map_keys specify map keys to set per-CPU FDs for */ int *map_keys; }; +#define perf_buffer_raw_opts__last_field map_keys LIBBPF_API struct perf_buffer * -perf_buffer__new_raw(int map_fd, size_t page_cnt, +perf_buffer__new_raw(int map_fd, size_t page_cnt, struct perf_event_attr *attr, + perf_buffer_event_fn event_cb, void *ctx, const struct perf_buffer_raw_opts *opts); +LIBBPF_API struct perf_buffer * +perf_buffer__new_raw_v0_6_0(int map_fd, size_t page_cnt, struct perf_event_attr *attr, + perf_buffer_event_fn event_cb, void *ctx, + const struct perf_buffer_raw_opts *opts); + +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use new variant of perf_buffer__new_raw() instead") +struct perf_buffer *perf_buffer__new_raw_deprecated(int map_fd, size_t page_cnt, + const struct perf_buffer_raw_opts *opts); + +#define perf_buffer__new_raw(...) ___libbpf_overload(___perf_buffer_new_raw, __VA_ARGS__) +#define ___perf_buffer_new_raw6(map_fd, page_cnt, attr, event_cb, ctx, opts) \ + perf_buffer__new_raw(map_fd, page_cnt, attr, event_cb, ctx, opts) +#define ___perf_buffer_new_raw3(map_fd, page_cnt, opts) \ + perf_buffer__new_raw_deprecated(map_fd, page_cnt, opts) + LIBBPF_API void perf_buffer__free(struct perf_buffer *pb); LIBBPF_API int perf_buffer__epoll_fd(const struct perf_buffer *pb); LIBBPF_API int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms); @@ -831,6 +1028,7 @@ LIBBPF_API int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_i typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(struct perf_event_header *hdr, void *private_data); +LIBBPF_DEPRECATED_SINCE(0, 8, "use perf_buffer__poll() or perf_buffer__consume() instead") LIBBPF_API enum bpf_perf_event_ret bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, void **copy_mem, size_t *copy_size, @@ -857,13 +1055,57 @@ bpf_prog_linfo__lfind(const struct bpf_prog_linfo *prog_linfo, * user, causing subsequent probes to fail. In this case, the caller may want * to adjust that limit with setrlimit(). */ -LIBBPF_API bool bpf_probe_prog_type(enum bpf_prog_type prog_type, - __u32 ifindex); +LIBBPF_DEPRECATED_SINCE(0, 8, "use libbpf_probe_bpf_prog_type() instead") +LIBBPF_API bool bpf_probe_prog_type(enum bpf_prog_type prog_type, __u32 ifindex); +LIBBPF_DEPRECATED_SINCE(0, 8, "use libbpf_probe_bpf_map_type() instead") LIBBPF_API bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex); -LIBBPF_API bool bpf_probe_helper(enum bpf_func_id id, - enum bpf_prog_type prog_type, __u32 ifindex); +LIBBPF_DEPRECATED_SINCE(0, 8, "use libbpf_probe_bpf_helper() instead") +LIBBPF_API bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type, __u32 ifindex); +LIBBPF_DEPRECATED_SINCE(0, 8, "implement your own or use bpftool for feature detection") LIBBPF_API bool bpf_probe_large_insn_limit(__u32 ifindex); +/** + * @brief **libbpf_probe_bpf_prog_type()** detects if host kernel supports + * BPF programs of a given type. + * @param prog_type BPF program type to detect kernel support for + * @param opts reserved for future extensibility, should be NULL + * @return 1, if given program type is supported; 0, if given program type is + * not supported; negative error code if feature detection failed or can't be + * performed + * + * Make sure the process has required set of CAP_* permissions (or runs as + * root) when performing feature checking. + */ +LIBBPF_API int libbpf_probe_bpf_prog_type(enum bpf_prog_type prog_type, const void *opts); +/** + * @brief **libbpf_probe_bpf_map_type()** detects if host kernel supports + * BPF maps of a given type. + * @param map_type BPF map type to detect kernel support for + * @param opts reserved for future extensibility, should be NULL + * @return 1, if given map type is supported; 0, if given map type is + * not supported; negative error code if feature detection failed or can't be + * performed + * + * Make sure the process has required set of CAP_* permissions (or runs as + * root) when performing feature checking. + */ +LIBBPF_API int libbpf_probe_bpf_map_type(enum bpf_map_type map_type, const void *opts); +/** + * @brief **libbpf_probe_bpf_helper()** detects if host kernel supports the + * use of a given BPF helper from specified BPF program type. + * @param prog_type BPF program type used to check the support of BPF helper + * @param helper_id BPF helper ID (enum bpf_func_id) to check support for + * @param opts reserved for future extensibility, should be NULL + * @return 1, if given combination of program type and helper is supported; 0, + * if the combination is not supported; negative error code if feature + * detection for provided input arguments failed or can't be performed + * + * Make sure the process has required set of CAP_* permissions (or runs as + * root) when performing feature checking. + */ +LIBBPF_API int libbpf_probe_bpf_helper(enum bpf_prog_type prog_type, + enum bpf_func_id helper_id, const void *opts); + /* * Get bpf_prog_info in continuous memory * @@ -918,12 +1160,15 @@ struct bpf_prog_info_linear { __u8 data[]; }; +LIBBPF_DEPRECATED_SINCE(0, 6, "use a custom linear prog_info wrapper") LIBBPF_API struct bpf_prog_info_linear * bpf_program__get_prog_info_linear(int fd, __u64 arrays); +LIBBPF_DEPRECATED_SINCE(0, 6, "use a custom linear prog_info wrapper") LIBBPF_API void bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear); +LIBBPF_DEPRECATED_SINCE(0, 6, "use a custom linear prog_info wrapper") LIBBPF_API void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear); @@ -965,11 +1210,11 @@ struct bpf_object_skeleton { struct bpf_object **obj; int map_cnt; - int map_skel_sz; /* sizeof(struct bpf_skeleton_map) */ + int map_skel_sz; /* sizeof(struct bpf_map_skeleton) */ struct bpf_map_skeleton *maps; int prog_cnt; - int prog_skel_sz; /* sizeof(struct bpf_skeleton_prog) */ + int prog_skel_sz; /* sizeof(struct bpf_prog_skeleton) */ struct bpf_prog_skeleton *progs; }; diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 43580eb47740..529783967793 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -391,14 +391,44 @@ LIBBPF_0.6.0 { global: bpf_map__map_extra; bpf_map__set_map_extra; + bpf_map_create; bpf_object__next_map; bpf_object__next_program; bpf_object__prev_map; bpf_object__prev_program; + bpf_prog_load_deprecated; + bpf_prog_load; + bpf_program__flags; bpf_program__insn_cnt; bpf_program__insns; + bpf_program__set_flags; btf__add_btf; btf__add_decl_tag; + btf__add_type_tag; + btf__dedup; + btf__dedup_deprecated; btf__raw_data; btf__type_cnt; + btf_dump__new; + btf_dump__new_deprecated; + libbpf_major_version; + libbpf_minor_version; + libbpf_version_string; + perf_buffer__new; + perf_buffer__new_deprecated; + perf_buffer__new_raw; + perf_buffer__new_raw_deprecated; } LIBBPF_0.5.0; + +LIBBPF_0.7.0 { + global: + bpf_btf_load; + bpf_program__log_buf; + bpf_program__log_level; + bpf_program__set_log_buf; + bpf_program__set_log_level; + libbpf_probe_bpf_helper; + libbpf_probe_bpf_map_type; + libbpf_probe_bpf_prog_type; + libbpf_set_memlock_rlim_max; +}; diff --git a/tools/lib/bpf/libbpf_common.h b/tools/lib/bpf/libbpf_common.h index aaa1efbf6f51..000e37798ff2 100644 --- a/tools/lib/bpf/libbpf_common.h +++ b/tools/lib/bpf/libbpf_common.h @@ -40,6 +40,23 @@ #else #define __LIBBPF_MARK_DEPRECATED_0_7(X) #endif +#if __LIBBPF_CURRENT_VERSION_GEQ(0, 8) +#define __LIBBPF_MARK_DEPRECATED_0_8(X) X +#else +#define __LIBBPF_MARK_DEPRECATED_0_8(X) +#endif + +/* This set of internal macros allows to do "function overloading" based on + * number of arguments provided by used in backwards-compatible way during the + * transition to libbpf 1.0 + * It's ugly but necessary evil that will be cleaned up when we get to 1.0. + * See bpf_prog_load() overload for example. + */ +#define ___libbpf_cat(A, B) A ## B +#define ___libbpf_select(NAME, NUM) ___libbpf_cat(NAME, NUM) +#define ___libbpf_nth(_1, _2, _3, _4, _5, _6, N, ...) N +#define ___libbpf_cnt(...) ___libbpf_nth(__VA_ARGS__, 6, 5, 4, 3, 2, 1) +#define ___libbpf_overload(NAME, ...) ___libbpf_select(NAME, ___libbpf_cnt(__VA_ARGS__))(__VA_ARGS__) /* Helper macro to declare and initialize libbpf options struct * @@ -54,7 +71,7 @@ * including any extra padding, it with memset() and then assigns initial * values provided by users in struct initializer-syntax as varargs. */ -#define DECLARE_LIBBPF_OPTS(TYPE, NAME, ...) \ +#define LIBBPF_OPTS(TYPE, NAME, ...) \ struct TYPE NAME = ({ \ memset(&NAME, 0, sizeof(struct TYPE)); \ (struct TYPE) { \ diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index aeb79e3a8ff9..1565679eb432 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -73,6 +73,8 @@ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 0), sz) #define BTF_TYPE_DECL_TAG_ENC(value, type, component_idx) \ BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 0, 0), type), (component_idx) +#define BTF_TYPE_TYPE_TAG_ENC(value, type) \ + BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_TYPE_TAG, 0, 0), type) #ifndef likely #define likely(x) __builtin_expect(!!(x), 1) @@ -167,10 +169,31 @@ static inline void *libbpf_reallocarray(void *ptr, size_t nmemb, size_t size) return realloc(ptr, total); } +/* Copy up to sz - 1 bytes from zero-terminated src string and ensure that dst + * is zero-terminated string no matter what (unless sz == 0, in which case + * it's a no-op). It's conceptually close to FreeBSD's strlcpy(), but differs + * in what is returned. Given this is internal helper, it's trivial to extend + * this, when necessary. Use this instead of strncpy inside libbpf source code. + */ +static inline void libbpf_strlcpy(char *dst, const char *src, size_t sz) +{ + size_t i; + + if (sz == 0) + return; + + sz--; + for (i = 0; i < sz && src[i]; i++) + dst[i] = src[i]; + dst[i] = '\0'; +} + +__u32 get_kernel_version(void); + struct btf; struct btf_type; -struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id); +struct btf_type *btf_type_by_id(const struct btf *btf, __u32 type_id); const char *btf_kind_str(const struct btf_type *t); const struct btf_type *skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id); @@ -270,63 +293,51 @@ static inline bool libbpf_validate_opts(const char *opts, (opts)->sz - __off); \ }) +enum kern_feature_id { + /* v4.14: kernel support for program & map names. */ + FEAT_PROG_NAME, + /* v5.2: kernel support for global data sections. */ + FEAT_GLOBAL_DATA, + /* BTF support */ + FEAT_BTF, + /* BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO support */ + FEAT_BTF_FUNC, + /* BTF_KIND_VAR and BTF_KIND_DATASEC support */ + FEAT_BTF_DATASEC, + /* BTF_FUNC_GLOBAL is supported */ + FEAT_BTF_GLOBAL_FUNC, + /* BPF_F_MMAPABLE is supported for arrays */ + FEAT_ARRAY_MMAP, + /* kernel support for expected_attach_type in BPF_PROG_LOAD */ + FEAT_EXP_ATTACH_TYPE, + /* bpf_probe_read_{kernel,user}[_str] helpers */ + FEAT_PROBE_READ_KERN, + /* BPF_PROG_BIND_MAP is supported */ + FEAT_PROG_BIND_MAP, + /* Kernel support for module BTFs */ + FEAT_MODULE_BTF, + /* BTF_KIND_FLOAT support */ + FEAT_BTF_FLOAT, + /* BPF perf link support */ + FEAT_PERF_LINK, + /* BTF_KIND_DECL_TAG support */ + FEAT_BTF_DECL_TAG, + /* BTF_KIND_TYPE_TAG support */ + FEAT_BTF_TYPE_TAG, + /* memcg-based accounting for BPF maps and progs */ + FEAT_MEMCG_ACCOUNT, + __FEAT_CNT, +}; + +int probe_memcg_account(void); +bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id); +int bump_rlimit_memlock(void); int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz); int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz); int libbpf__load_raw_btf(const char *raw_types, size_t types_len, const char *str_sec, size_t str_len); - -struct bpf_prog_load_params { - enum bpf_prog_type prog_type; - enum bpf_attach_type expected_attach_type; - const char *name; - const struct bpf_insn *insns; - size_t insn_cnt; - const char *license; - __u32 kern_version; - __u32 attach_prog_fd; - __u32 attach_btf_obj_fd; - __u32 attach_btf_id; - __u32 prog_ifindex; - __u32 prog_btf_fd; - __u32 prog_flags; - - __u32 func_info_rec_size; - const void *func_info; - __u32 func_info_cnt; - - __u32 line_info_rec_size; - const void *line_info; - __u32 line_info_cnt; - - __u32 log_level; - char *log_buf; - size_t log_buf_sz; - int *fd_array; -}; - -int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr); - -struct bpf_create_map_params { - const char *name; - enum bpf_map_type map_type; - __u32 map_flags; - __u32 key_size; - __u32 value_size; - __u32 max_entries; - __u32 numa_node; - __u32 btf_fd; - __u32 btf_key_type_id; - __u32 btf_value_type_id; - __u32 map_ifindex; - union { - __u32 inner_map_fd; - __u32 btf_vmlinux_value_type_id; - }; - __u64 map_extra; -}; - -int libbpf__bpf_create_map_xattr(const struct bpf_create_map_params *create_attr); +int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level); struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf); void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type, diff --git a/tools/lib/bpf/libbpf_legacy.h b/tools/lib/bpf/libbpf_legacy.h index 5ba5c9beccfa..79131f761a27 100644 --- a/tools/lib/bpf/libbpf_legacy.h +++ b/tools/lib/bpf/libbpf_legacy.h @@ -45,7 +45,6 @@ enum libbpf_strict_mode { * (positive) error code. */ LIBBPF_STRICT_DIRECT_ERRS = 0x02, - /* * Enforce strict BPF program section (SEC()) names. * E.g., while prefiously SEC("xdp_whatever") or SEC("perf_event_blah") were @@ -63,12 +62,24 @@ enum libbpf_strict_mode { * Clients can maintain it on their own if it is valuable for them. */ LIBBPF_STRICT_NO_OBJECT_LIST = 0x08, + /* + * Automatically bump RLIMIT_MEMLOCK using setrlimit() before the + * first BPF program or map creation operation. This is done only if + * kernel is too old to support memcg-based memory accounting for BPF + * subsystem. By default, RLIMIT_MEMLOCK limit is set to RLIM_INFINITY, + * but it can be overriden with libbpf_set_memlock_rlim_max() API. + * Note that libbpf_set_memlock_rlim_max() needs to be called before + * the very first bpf_prog_load(), bpf_map_create() or bpf_object__load() + * operation. + */ + LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK = 0x10, __LIBBPF_STRICT_LAST, }; LIBBPF_API int libbpf_set_strict_mode(enum libbpf_strict_mode mode); +#define DECLARE_LIBBPF_OPTS LIBBPF_OPTS #ifdef __cplusplus } /* extern "C" */ diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index 68f2dbf364aa..97b06cede56f 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -48,41 +48,65 @@ static int get_vendor_id(int ifindex) return strtol(buf, NULL, 0); } -static int get_kernel_version(void) +static int probe_prog_load(enum bpf_prog_type prog_type, + const struct bpf_insn *insns, size_t insns_cnt, + char *log_buf, size_t log_buf_sz, + __u32 ifindex) { - int version, subversion, patchlevel; - struct utsname utsn; - - /* Return 0 on failure, and attempt to probe with empty kversion */ - if (uname(&utsn)) - return 0; - - if (sscanf(utsn.release, "%d.%d.%d", - &version, &subversion, &patchlevel) != 3) - return 0; - - return (version << 16) + (subversion << 8) + patchlevel; -} - -static void -probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, - size_t insns_cnt, char *buf, size_t buf_len, __u32 ifindex) -{ - struct bpf_load_program_attr xattr = {}; - int fd; + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .log_buf = log_buf, + .log_size = log_buf_sz, + .log_level = log_buf ? 1 : 0, + .prog_ifindex = ifindex, + ); + int fd, err, exp_err = 0; + const char *exp_msg = NULL; + char buf[4096]; switch (prog_type) { case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: - xattr.expected_attach_type = BPF_CGROUP_INET4_CONNECT; + opts.expected_attach_type = BPF_CGROUP_INET4_CONNECT; break; case BPF_PROG_TYPE_CGROUP_SOCKOPT: - xattr.expected_attach_type = BPF_CGROUP_GETSOCKOPT; + opts.expected_attach_type = BPF_CGROUP_GETSOCKOPT; break; case BPF_PROG_TYPE_SK_LOOKUP: - xattr.expected_attach_type = BPF_SK_LOOKUP; + opts.expected_attach_type = BPF_SK_LOOKUP; break; case BPF_PROG_TYPE_KPROBE: - xattr.kern_version = get_kernel_version(); + opts.kern_version = get_kernel_version(); + break; + case BPF_PROG_TYPE_LIRC_MODE2: + opts.expected_attach_type = BPF_LIRC_MODE2; + break; + case BPF_PROG_TYPE_TRACING: + case BPF_PROG_TYPE_LSM: + opts.log_buf = buf; + opts.log_size = sizeof(buf); + opts.log_level = 1; + if (prog_type == BPF_PROG_TYPE_TRACING) + opts.expected_attach_type = BPF_TRACE_FENTRY; + else + opts.expected_attach_type = BPF_MODIFY_RETURN; + opts.attach_btf_id = 1; + + exp_err = -EINVAL; + exp_msg = "attach_btf_id 1 is not a function"; + break; + case BPF_PROG_TYPE_EXT: + opts.log_buf = buf; + opts.log_size = sizeof(buf); + opts.log_level = 1; + opts.attach_btf_id = 1; + + exp_err = -EINVAL; + exp_msg = "Cannot replace kernel functions"; + break; + case BPF_PROG_TYPE_SYSCALL: + opts.prog_flags = BPF_F_SLEEPABLE; + break; + case BPF_PROG_TYPE_STRUCT_OPS: + exp_err = -524; /* -ENOTSUPP */ break; case BPF_PROG_TYPE_UNSPEC: case BPF_PROG_TYPE_SOCKET_FILTER: @@ -103,27 +127,42 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, case BPF_PROG_TYPE_RAW_TRACEPOINT: case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: case BPF_PROG_TYPE_LWT_SEG6LOCAL: - case BPF_PROG_TYPE_LIRC_MODE2: case BPF_PROG_TYPE_SK_REUSEPORT: case BPF_PROG_TYPE_FLOW_DISSECTOR: case BPF_PROG_TYPE_CGROUP_SYSCTL: - case BPF_PROG_TYPE_TRACING: - case BPF_PROG_TYPE_STRUCT_OPS: - case BPF_PROG_TYPE_EXT: - case BPF_PROG_TYPE_LSM: - default: break; + default: + return -EOPNOTSUPP; } - xattr.prog_type = prog_type; - xattr.insns = insns; - xattr.insns_cnt = insns_cnt; - xattr.license = "GPL"; - xattr.prog_ifindex = ifindex; - - fd = bpf_load_program_xattr(&xattr, buf, buf_len); + fd = bpf_prog_load(prog_type, NULL, "GPL", insns, insns_cnt, &opts); + err = -errno; if (fd >= 0) close(fd); + if (exp_err) { + if (fd >= 0 || err != exp_err) + return 0; + if (exp_msg && !strstr(buf, exp_msg)) + return 0; + return 1; + } + return fd >= 0 ? 1 : 0; +} + +int libbpf_probe_bpf_prog_type(enum bpf_prog_type prog_type, const void *opts) +{ + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN() + }; + const size_t insn_cnt = ARRAY_SIZE(insns); + int ret; + + if (opts) + return libbpf_err(-EINVAL); + + ret = probe_prog_load(prog_type, insns, insn_cnt, NULL, 0, 0); + return libbpf_err(ret); } bool bpf_probe_prog_type(enum bpf_prog_type prog_type, __u32 ifindex) @@ -133,12 +172,16 @@ bool bpf_probe_prog_type(enum bpf_prog_type prog_type, __u32 ifindex) BPF_EXIT_INSN() }; + /* prefer libbpf_probe_bpf_prog_type() unless offload is requested */ + if (ifindex == 0) + return libbpf_probe_bpf_prog_type(prog_type, NULL) == 1; + if (ifindex && prog_type == BPF_PROG_TYPE_SCHED_CLS) /* nfp returns -EINVAL on exit(0) with TC offload */ insns[0].imm = 2; errno = 0; - probe_load(prog_type, insns, ARRAY_SIZE(insns), NULL, 0, ifindex); + probe_prog_load(prog_type, insns, ARRAY_SIZE(insns), NULL, 0, ifindex); return errno != EINVAL && errno != EOPNOTSUPP; } @@ -166,7 +209,7 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len, memcpy(raw_btf + hdr.hdr_len, raw_types, hdr.type_len); memcpy(raw_btf + hdr.hdr_len + hdr.type_len, str_sec, hdr.str_len); - btf_fd = bpf_load_btf(raw_btf, btf_len, NULL, 0, false); + btf_fd = bpf_btf_load(raw_btf, btf_len, NULL); free(raw_btf); return btf_fd; @@ -199,17 +242,18 @@ static int load_local_storage_btf(void) strs, sizeof(strs)); } -bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) +static int probe_map_create(enum bpf_map_type map_type, __u32 ifindex) { - int key_size, value_size, max_entries, map_flags; + LIBBPF_OPTS(bpf_map_create_opts, opts); + int key_size, value_size, max_entries; __u32 btf_key_type_id = 0, btf_value_type_id = 0; - struct bpf_create_map_attr attr = {}; - int fd = -1, btf_fd = -1, fd_inner; + int fd = -1, btf_fd = -1, fd_inner = -1, exp_err = 0, err; + + opts.map_ifindex = ifindex; key_size = sizeof(__u32); value_size = sizeof(__u32); max_entries = 1; - map_flags = 0; switch (map_type) { case BPF_MAP_TYPE_STACK_TRACE: @@ -218,7 +262,7 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) case BPF_MAP_TYPE_LPM_TRIE: key_size = sizeof(__u64); value_size = sizeof(__u64); - map_flags = BPF_F_NO_PREALLOC; + opts.map_flags = BPF_F_NO_PREALLOC; break; case BPF_MAP_TYPE_CGROUP_STORAGE: case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: @@ -237,17 +281,25 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) btf_value_type_id = 3; value_size = 8; max_entries = 0; - map_flags = BPF_F_NO_PREALLOC; + opts.map_flags = BPF_F_NO_PREALLOC; btf_fd = load_local_storage_btf(); if (btf_fd < 0) - return false; + return btf_fd; break; case BPF_MAP_TYPE_RINGBUF: key_size = 0; value_size = 0; max_entries = 4096; break; - case BPF_MAP_TYPE_UNSPEC: + case BPF_MAP_TYPE_STRUCT_OPS: + /* we'll get -ENOTSUPP for invalid BTF type ID for struct_ops */ + opts.btf_vmlinux_value_type_id = 1; + exp_err = -524; /* -ENOTSUPP */ + break; + case BPF_MAP_TYPE_BLOOM_FILTER: + key_size = 0; + max_entries = 1; + break; case BPF_MAP_TYPE_HASH: case BPF_MAP_TYPE_ARRAY: case BPF_MAP_TYPE_PROG_ARRAY: @@ -266,9 +318,10 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) case BPF_MAP_TYPE_XSKMAP: case BPF_MAP_TYPE_SOCKHASH: case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY: - case BPF_MAP_TYPE_STRUCT_OPS: - default: break; + case BPF_MAP_TYPE_UNSPEC: + default: + return -EOPNOTSUPP; } if (map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS || @@ -277,37 +330,102 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) * map-in-map for offload */ if (ifindex) - return false; + goto cleanup; - fd_inner = bpf_create_map(BPF_MAP_TYPE_HASH, - sizeof(__u32), sizeof(__u32), 1, 0); + fd_inner = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, + sizeof(__u32), sizeof(__u32), 1, NULL); if (fd_inner < 0) - return false; - fd = bpf_create_map_in_map(map_type, NULL, sizeof(__u32), - fd_inner, 1, 0); - close(fd_inner); - } else { - /* Note: No other restriction on map type probes for offload */ - attr.map_type = map_type; - attr.key_size = key_size; - attr.value_size = value_size; - attr.max_entries = max_entries; - attr.map_flags = map_flags; - attr.map_ifindex = ifindex; - if (btf_fd >= 0) { - attr.btf_fd = btf_fd; - attr.btf_key_type_id = btf_key_type_id; - attr.btf_value_type_id = btf_value_type_id; - } + goto cleanup; + + opts.inner_map_fd = fd_inner; + } - fd = bpf_create_map_xattr(&attr); + if (btf_fd >= 0) { + opts.btf_fd = btf_fd; + opts.btf_key_type_id = btf_key_type_id; + opts.btf_value_type_id = btf_value_type_id; } + + fd = bpf_map_create(map_type, NULL, key_size, value_size, max_entries, &opts); + err = -errno; + +cleanup: if (fd >= 0) close(fd); + if (fd_inner >= 0) + close(fd_inner); if (btf_fd >= 0) close(btf_fd); - return fd >= 0; + if (exp_err) + return fd < 0 && err == exp_err ? 1 : 0; + else + return fd >= 0 ? 1 : 0; +} + +int libbpf_probe_bpf_map_type(enum bpf_map_type map_type, const void *opts) +{ + int ret; + + if (opts) + return libbpf_err(-EINVAL); + + ret = probe_map_create(map_type, 0); + return libbpf_err(ret); +} + +bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) +{ + return probe_map_create(map_type, ifindex) == 1; +} + +int libbpf_probe_bpf_helper(enum bpf_prog_type prog_type, enum bpf_func_id helper_id, + const void *opts) +{ + struct bpf_insn insns[] = { + BPF_EMIT_CALL((__u32)helper_id), + BPF_EXIT_INSN(), + }; + const size_t insn_cnt = ARRAY_SIZE(insns); + char buf[4096]; + int ret; + + if (opts) + return libbpf_err(-EINVAL); + + /* we can't successfully load all prog types to check for BPF helper + * support, so bail out with -EOPNOTSUPP error + */ + switch (prog_type) { + case BPF_PROG_TYPE_TRACING: + case BPF_PROG_TYPE_EXT: + case BPF_PROG_TYPE_LSM: + case BPF_PROG_TYPE_STRUCT_OPS: + return -EOPNOTSUPP; + default: + break; + } + + buf[0] = '\0'; + ret = probe_prog_load(prog_type, insns, insn_cnt, buf, sizeof(buf), 0); + if (ret < 0) + return libbpf_err(ret); + + /* If BPF verifier doesn't recognize BPF helper ID (enum bpf_func_id) + * at all, it will emit something like "invalid func unknown#181". + * If BPF verifier recognizes BPF helper but it's not supported for + * given BPF program type, it will emit "unknown func bpf_sys_bpf#166". + * In both cases, provided combination of BPF program type and BPF + * helper is not supported by the kernel. + * In all other cases, probe_prog_load() above will either succeed (e.g., + * because BPF helper happens to accept no input arguments or it + * accepts one input argument and initial PTR_TO_CTX is fine for + * that), or we'll get some more specific BPF verifier error about + * some unsatisfied conditions. + */ + if (ret == 0 && (strstr(buf, "invalid func ") || strstr(buf, "unknown func "))) + return 0; + return 1; /* assume supported */ } bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type, @@ -320,8 +438,7 @@ bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type, char buf[4096] = {}; bool res; - probe_load(prog_type, insns, ARRAY_SIZE(insns), buf, sizeof(buf), - ifindex); + probe_prog_load(prog_type, insns, ARRAY_SIZE(insns), buf, sizeof(buf), ifindex); res = !grep(buf, "invalid func ") && !grep(buf, "unknown func "); if (ifindex) { @@ -353,8 +470,8 @@ bool bpf_probe_large_insn_limit(__u32 ifindex) insns[BPF_MAXINSNS] = BPF_EXIT_INSN(); errno = 0; - probe_load(BPF_PROG_TYPE_SCHED_CLS, insns, ARRAY_SIZE(insns), NULL, 0, - ifindex); + probe_prog_load(BPF_PROG_TYPE_SCHED_CLS, insns, ARRAY_SIZE(insns), NULL, 0, + ifindex); return errno != E2BIG && errno != EINVAL; } diff --git a/tools/lib/bpf/libbpf_version.h b/tools/lib/bpf/libbpf_version.h index dd56d76f291c..0fefefc3500b 100644 --- a/tools/lib/bpf/libbpf_version.h +++ b/tools/lib/bpf/libbpf_version.h @@ -4,6 +4,6 @@ #define __LIBBPF_VERSION_H #define LIBBPF_MAJOR_VERSION 0 -#define LIBBPF_MINOR_VERSION 6 +#define LIBBPF_MINOR_VERSION 7 #endif /* __LIBBPF_VERSION_H */ diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index f677dccdeae4..9aa016fb55aa 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -210,6 +210,7 @@ void bpf_linker__free(struct bpf_linker *linker) } free(linker->secs); + free(linker->glob_syms); free(linker); } @@ -1999,7 +2000,7 @@ add_sym: static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *obj) { struct src_sec *src_symtab = &obj->secs[obj->symtab_sec_idx]; - struct dst_sec *dst_symtab = &linker->secs[linker->symtab_sec_idx]; + struct dst_sec *dst_symtab; int i, err; for (i = 1; i < obj->sec_cnt; i++) { @@ -2032,6 +2033,9 @@ static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *ob return -1; } + /* add_dst_sec() above could have invalidated linker->secs */ + dst_symtab = &linker->secs[linker->symtab_sec_idx]; + /* shdr->sh_link points to SYMTAB */ dst_sec->shdr->sh_link = linker->symtab_sec_idx; @@ -2650,6 +2654,7 @@ static int emit_elf_data_sec(struct bpf_linker *linker, const char *sec_name, static int finalize_btf(struct bpf_linker *linker) { + LIBBPF_OPTS(btf_dedup_opts, opts); struct btf *btf = linker->btf; const void *raw_data; int i, j, id, err; @@ -2686,7 +2691,8 @@ static int finalize_btf(struct bpf_linker *linker) return err; } - err = btf__dedup(linker->btf, linker->btf_ext, NULL); + opts.btf_ext = linker->btf_ext; + err = btf__dedup(linker->btf, &opts); if (err) { pr_warn("BTF dedup failed: %d\n", err); return err; diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c index b5b8956a1be8..910865e29edc 100644 --- a/tools/lib/bpf/relo_core.c +++ b/tools/lib/bpf/relo_core.c @@ -1,6 +1,60 @@ // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) /* Copyright (c) 2019 Facebook */ +#ifdef __KERNEL__ +#include <linux/bpf.h> +#include <linux/btf.h> +#include <linux/string.h> +#include <linux/bpf_verifier.h> +#include "relo_core.h" + +static const char *btf_kind_str(const struct btf_type *t) +{ + return btf_type_str(t); +} + +static bool is_ldimm64_insn(struct bpf_insn *insn) +{ + return insn->code == (BPF_LD | BPF_IMM | BPF_DW); +} + +static const struct btf_type * +skip_mods_and_typedefs(const struct btf *btf, u32 id, u32 *res_id) +{ + return btf_type_skip_modifiers(btf, id, res_id); +} + +static const char *btf__name_by_offset(const struct btf *btf, u32 offset) +{ + return btf_name_by_offset(btf, offset); +} + +static s64 btf__resolve_size(const struct btf *btf, u32 type_id) +{ + const struct btf_type *t; + int size; + + t = btf_type_by_id(btf, type_id); + t = btf_resolve_size(btf, t, &size); + if (IS_ERR(t)) + return PTR_ERR(t); + return size; +} + +enum libbpf_print_level { + LIBBPF_WARN, + LIBBPF_INFO, + LIBBPF_DEBUG, +}; + +#undef pr_warn +#undef pr_info +#undef pr_debug +#define pr_warn(fmt, log, ...) bpf_log((void *)log, fmt, "", ##__VA_ARGS__) +#define pr_info(fmt, log, ...) bpf_log((void *)log, fmt, "", ##__VA_ARGS__) +#define pr_debug(fmt, log, ...) bpf_log((void *)log, fmt, "", ##__VA_ARGS__) +#define libbpf_print(level, fmt, ...) bpf_log((void *)prog_name, fmt, ##__VA_ARGS__) +#else #include <stdio.h> #include <string.h> #include <errno.h> @@ -12,33 +66,7 @@ #include "btf.h" #include "str_error.h" #include "libbpf_internal.h" - -#define BPF_CORE_SPEC_MAX_LEN 64 - -/* represents BPF CO-RE field or array element accessor */ -struct bpf_core_accessor { - __u32 type_id; /* struct/union type or array element type */ - __u32 idx; /* field index or array index */ - const char *name; /* field name or NULL for array accessor */ -}; - -struct bpf_core_spec { - const struct btf *btf; - /* high-level spec: named fields and array indices only */ - struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN]; - /* original unresolved (no skip_mods_or_typedefs) root type ID */ - __u32 root_type_id; - /* CO-RE relocation kind */ - enum bpf_core_relo_kind relo_kind; - /* high-level spec length */ - int len; - /* raw, low-level spec: 1-to-1 with accessor spec string */ - int raw_spec[BPF_CORE_SPEC_MAX_LEN]; - /* raw spec length */ - int raw_len; - /* field bit offset represented by spec */ - __u32 bit_offset; -}; +#endif static bool is_flex_arr(const struct btf *btf, const struct bpf_core_accessor *acc, @@ -51,25 +79,25 @@ static bool is_flex_arr(const struct btf *btf, return false; /* has to be the last member of enclosing struct */ - t = btf__type_by_id(btf, acc->type_id); + t = btf_type_by_id(btf, acc->type_id); return acc->idx == btf_vlen(t) - 1; } static const char *core_relo_kind_str(enum bpf_core_relo_kind kind) { switch (kind) { - case BPF_FIELD_BYTE_OFFSET: return "byte_off"; - case BPF_FIELD_BYTE_SIZE: return "byte_sz"; - case BPF_FIELD_EXISTS: return "field_exists"; - case BPF_FIELD_SIGNED: return "signed"; - case BPF_FIELD_LSHIFT_U64: return "lshift_u64"; - case BPF_FIELD_RSHIFT_U64: return "rshift_u64"; - case BPF_TYPE_ID_LOCAL: return "local_type_id"; - case BPF_TYPE_ID_TARGET: return "target_type_id"; - case BPF_TYPE_EXISTS: return "type_exists"; - case BPF_TYPE_SIZE: return "type_size"; - case BPF_ENUMVAL_EXISTS: return "enumval_exists"; - case BPF_ENUMVAL_VALUE: return "enumval_value"; + case BPF_CORE_FIELD_BYTE_OFFSET: return "byte_off"; + case BPF_CORE_FIELD_BYTE_SIZE: return "byte_sz"; + case BPF_CORE_FIELD_EXISTS: return "field_exists"; + case BPF_CORE_FIELD_SIGNED: return "signed"; + case BPF_CORE_FIELD_LSHIFT_U64: return "lshift_u64"; + case BPF_CORE_FIELD_RSHIFT_U64: return "rshift_u64"; + case BPF_CORE_TYPE_ID_LOCAL: return "local_type_id"; + case BPF_CORE_TYPE_ID_TARGET: return "target_type_id"; + case BPF_CORE_TYPE_EXISTS: return "type_exists"; + case BPF_CORE_TYPE_SIZE: return "type_size"; + case BPF_CORE_ENUMVAL_EXISTS: return "enumval_exists"; + case BPF_CORE_ENUMVAL_VALUE: return "enumval_value"; default: return "unknown"; } } @@ -77,12 +105,12 @@ static const char *core_relo_kind_str(enum bpf_core_relo_kind kind) static bool core_relo_is_field_based(enum bpf_core_relo_kind kind) { switch (kind) { - case BPF_FIELD_BYTE_OFFSET: - case BPF_FIELD_BYTE_SIZE: - case BPF_FIELD_EXISTS: - case BPF_FIELD_SIGNED: - case BPF_FIELD_LSHIFT_U64: - case BPF_FIELD_RSHIFT_U64: + case BPF_CORE_FIELD_BYTE_OFFSET: + case BPF_CORE_FIELD_BYTE_SIZE: + case BPF_CORE_FIELD_EXISTS: + case BPF_CORE_FIELD_SIGNED: + case BPF_CORE_FIELD_LSHIFT_U64: + case BPF_CORE_FIELD_RSHIFT_U64: return true; default: return false; @@ -92,10 +120,10 @@ static bool core_relo_is_field_based(enum bpf_core_relo_kind kind) static bool core_relo_is_type_based(enum bpf_core_relo_kind kind) { switch (kind) { - case BPF_TYPE_ID_LOCAL: - case BPF_TYPE_ID_TARGET: - case BPF_TYPE_EXISTS: - case BPF_TYPE_SIZE: + case BPF_CORE_TYPE_ID_LOCAL: + case BPF_CORE_TYPE_ID_TARGET: + case BPF_CORE_TYPE_EXISTS: + case BPF_CORE_TYPE_SIZE: return true; default: return false; @@ -105,8 +133,8 @@ static bool core_relo_is_type_based(enum bpf_core_relo_kind kind) static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind) { switch (kind) { - case BPF_ENUMVAL_EXISTS: - case BPF_ENUMVAL_VALUE: + case BPF_CORE_ENUMVAL_EXISTS: + case BPF_CORE_ENUMVAL_VALUE: return true; default: return false; @@ -150,7 +178,7 @@ static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind) * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access * string to specify enumerator's value index that need to be relocated. */ -static int bpf_core_parse_spec(const struct btf *btf, +static int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, __u32 type_id, const char *spec_str, enum bpf_core_relo_kind relo_kind, @@ -272,8 +300,8 @@ static int bpf_core_parse_spec(const struct btf *btf, return sz; spec->bit_offset += access_idx * sz * 8; } else { - pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n", - type_id, spec_str, i, id, btf_kind_str(t)); + pr_warn("prog '%s': relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n", + prog_name, type_id, spec_str, i, id, btf_kind_str(t)); return -EINVAL; } } @@ -346,8 +374,6 @@ recur: targ_id = btf_array(targ_type)->type; goto recur; default: - pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n", - btf_kind(local_type), local_id, targ_id); return 0; } } @@ -388,7 +414,7 @@ static int bpf_core_match_member(const struct btf *local_btf, return 0; local_id = local_acc->type_id; - local_type = btf__type_by_id(local_btf, local_id); + local_type = btf_type_by_id(local_btf, local_id); local_member = btf_members(local_type) + local_acc->idx; local_name = btf__name_by_offset(local_btf, local_member->name_off); @@ -571,7 +597,7 @@ static int bpf_core_calc_field_relo(const char *prog_name, *field_sz = 0; - if (relo->kind == BPF_FIELD_EXISTS) { + if (relo->kind == BPF_CORE_FIELD_EXISTS) { *val = spec ? 1 : 0; return 0; } @@ -580,11 +606,11 @@ static int bpf_core_calc_field_relo(const char *prog_name, return -EUCLEAN; /* request instruction poisoning */ acc = &spec->spec[spec->len - 1]; - t = btf__type_by_id(spec->btf, acc->type_id); + t = btf_type_by_id(spec->btf, acc->type_id); /* a[n] accessor needs special handling */ if (!acc->name) { - if (relo->kind == BPF_FIELD_BYTE_OFFSET) { + if (relo->kind == BPF_CORE_FIELD_BYTE_OFFSET) { *val = spec->bit_offset / 8; /* remember field size for load/store mem size */ sz = btf__resolve_size(spec->btf, acc->type_id); @@ -592,7 +618,7 @@ static int bpf_core_calc_field_relo(const char *prog_name, return -EINVAL; *field_sz = sz; *type_id = acc->type_id; - } else if (relo->kind == BPF_FIELD_BYTE_SIZE) { + } else if (relo->kind == BPF_CORE_FIELD_BYTE_SIZE) { sz = btf__resolve_size(spec->btf, acc->type_id); if (sz < 0) return -EINVAL; @@ -644,36 +670,36 @@ static int bpf_core_calc_field_relo(const char *prog_name, *validate = !bitfield; switch (relo->kind) { - case BPF_FIELD_BYTE_OFFSET: + case BPF_CORE_FIELD_BYTE_OFFSET: *val = byte_off; if (!bitfield) { *field_sz = byte_sz; *type_id = field_type_id; } break; - case BPF_FIELD_BYTE_SIZE: + case BPF_CORE_FIELD_BYTE_SIZE: *val = byte_sz; break; - case BPF_FIELD_SIGNED: + case BPF_CORE_FIELD_SIGNED: /* enums will be assumed unsigned */ *val = btf_is_enum(mt) || (btf_int_encoding(mt) & BTF_INT_SIGNED); if (validate) *validate = true; /* signedness is never ambiguous */ break; - case BPF_FIELD_LSHIFT_U64: + case BPF_CORE_FIELD_LSHIFT_U64: #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ *val = 64 - (bit_off + bit_sz - byte_off * 8); #else *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8); #endif break; - case BPF_FIELD_RSHIFT_U64: + case BPF_CORE_FIELD_RSHIFT_U64: *val = 64 - bit_sz; if (validate) *validate = true; /* right shift is never ambiguous */ break; - case BPF_FIELD_EXISTS: + case BPF_CORE_FIELD_EXISTS: default: return -EOPNOTSUPP; } @@ -683,10 +709,14 @@ static int bpf_core_calc_field_relo(const char *prog_name, static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo, const struct bpf_core_spec *spec, - __u32 *val) + __u32 *val, bool *validate) { __s64 sz; + /* by default, always check expected value in bpf_insn */ + if (validate) + *validate = true; + /* type-based relos return zero when target type is not found */ if (!spec) { *val = 0; @@ -694,20 +724,25 @@ static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo, } switch (relo->kind) { - case BPF_TYPE_ID_TARGET: + case BPF_CORE_TYPE_ID_TARGET: *val = spec->root_type_id; + /* type ID, embedded in bpf_insn, might change during linking, + * so enforcing it is pointless + */ + if (validate) + *validate = false; break; - case BPF_TYPE_EXISTS: + case BPF_CORE_TYPE_EXISTS: *val = 1; break; - case BPF_TYPE_SIZE: + case BPF_CORE_TYPE_SIZE: sz = btf__resolve_size(spec->btf, spec->root_type_id); if (sz < 0) return -EINVAL; *val = sz; break; - case BPF_TYPE_ID_LOCAL: - /* BPF_TYPE_ID_LOCAL is handled specially and shouldn't get here */ + case BPF_CORE_TYPE_ID_LOCAL: + /* BPF_CORE_TYPE_ID_LOCAL is handled specially and shouldn't get here */ default: return -EOPNOTSUPP; } @@ -723,13 +758,13 @@ static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo, const struct btf_enum *e; switch (relo->kind) { - case BPF_ENUMVAL_EXISTS: + case BPF_CORE_ENUMVAL_EXISTS: *val = spec ? 1 : 0; break; - case BPF_ENUMVAL_VALUE: + case BPF_CORE_ENUMVAL_VALUE: if (!spec) return -EUCLEAN; /* request instruction poisoning */ - t = btf__type_by_id(spec->btf, spec->spec[0].type_id); + t = btf_type_by_id(spec->btf, spec->spec[0].type_id); e = btf_enum(t) + spec->spec[0].idx; *val = e->val; break; @@ -805,8 +840,8 @@ static int bpf_core_calc_relo(const char *prog_name, if (res->orig_sz != res->new_sz) { const struct btf_type *orig_t, *new_t; - orig_t = btf__type_by_id(local_spec->btf, res->orig_type_id); - new_t = btf__type_by_id(targ_spec->btf, res->new_type_id); + orig_t = btf_type_by_id(local_spec->btf, res->orig_type_id); + new_t = btf_type_by_id(targ_spec->btf, res->new_type_id); /* There are two use cases in which it's safe to * adjust load/store's mem size: @@ -835,8 +870,8 @@ static int bpf_core_calc_relo(const char *prog_name, res->fail_memsz_adjust = true; } } else if (core_relo_is_type_based(relo->kind)) { - err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val); - err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val); + err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val, &res->validate); + err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val, NULL); } else if (core_relo_is_enumval_based(relo->kind)) { err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val); err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val); @@ -1045,7 +1080,7 @@ poison: * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>, * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b */ -static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec) +static void bpf_core_dump_spec(const char *prog_name, int level, const struct bpf_core_spec *spec) { const struct btf_type *t; const struct btf_enum *e; @@ -1054,7 +1089,7 @@ static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec) int i; type_id = spec->root_type_id; - t = btf__type_by_id(spec->btf, type_id); + t = btf_type_by_id(spec->btf, type_id); s = btf__name_by_offset(spec->btf, t->name_off); libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s); @@ -1147,9 +1182,12 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, const struct bpf_core_relo *relo, int relo_idx, const struct btf *local_btf, - struct bpf_core_cand_list *cands) + struct bpf_core_cand_list *cands, + struct bpf_core_spec *specs_scratch) { - struct bpf_core_spec local_spec, cand_spec, targ_spec = {}; + struct bpf_core_spec *local_spec = &specs_scratch[0]; + struct bpf_core_spec *cand_spec = &specs_scratch[1]; + struct bpf_core_spec *targ_spec = &specs_scratch[2]; struct bpf_core_relo_res cand_res, targ_res; const struct btf_type *local_type; const char *local_name; @@ -1158,10 +1196,7 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, int i, j, err; local_id = relo->type_id; - local_type = btf__type_by_id(local_btf, local_id); - if (!local_type) - return -EINVAL; - + local_type = btf_type_by_id(local_btf, local_id); local_name = btf__name_by_offset(local_btf, local_type->name_off); if (!local_name) return -EINVAL; @@ -1170,7 +1205,8 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, if (str_is_empty(spec_str)) return -EINVAL; - err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec); + err = bpf_core_parse_spec(prog_name, local_btf, local_id, spec_str, + relo->kind, local_spec); if (err) { pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n", prog_name, relo_idx, local_id, btf_kind_str(local_type), @@ -1181,15 +1217,17 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog_name, relo_idx, core_relo_kind_str(relo->kind), relo->kind); - bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec); + bpf_core_dump_spec(prog_name, LIBBPF_DEBUG, local_spec); libbpf_print(LIBBPF_DEBUG, "\n"); /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */ - if (relo->kind == BPF_TYPE_ID_LOCAL) { - targ_res.validate = true; + if (relo->kind == BPF_CORE_TYPE_ID_LOCAL) { + /* bpf_insn's imm value could get out of sync during linking */ + memset(&targ_res, 0, sizeof(targ_res)); + targ_res.validate = false; targ_res.poison = false; - targ_res.orig_val = local_spec.root_type_id; - targ_res.new_val = local_spec.root_type_id; + targ_res.orig_val = local_spec->root_type_id; + targ_res.new_val = local_spec->root_type_id; goto patch_insn; } @@ -1200,40 +1238,39 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, return -EOPNOTSUPP; } - for (i = 0, j = 0; i < cands->len; i++) { - err = bpf_core_spec_match(&local_spec, cands->cands[i].btf, - cands->cands[i].id, &cand_spec); + err = bpf_core_spec_match(local_spec, cands->cands[i].btf, + cands->cands[i].id, cand_spec); if (err < 0) { pr_warn("prog '%s': relo #%d: error matching candidate #%d ", prog_name, relo_idx, i); - bpf_core_dump_spec(LIBBPF_WARN, &cand_spec); + bpf_core_dump_spec(prog_name, LIBBPF_WARN, cand_spec); libbpf_print(LIBBPF_WARN, ": %d\n", err); return err; } pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog_name, relo_idx, err == 0 ? "non-matching" : "matching", i); - bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec); + bpf_core_dump_spec(prog_name, LIBBPF_DEBUG, cand_spec); libbpf_print(LIBBPF_DEBUG, "\n"); if (err == 0) continue; - err = bpf_core_calc_relo(prog_name, relo, relo_idx, &local_spec, &cand_spec, &cand_res); + err = bpf_core_calc_relo(prog_name, relo, relo_idx, local_spec, cand_spec, &cand_res); if (err) return err; if (j == 0) { targ_res = cand_res; - targ_spec = cand_spec; - } else if (cand_spec.bit_offset != targ_spec.bit_offset) { + *targ_spec = *cand_spec; + } else if (cand_spec->bit_offset != targ_spec->bit_offset) { /* if there are many field relo candidates, they * should all resolve to the same bit offset */ pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n", - prog_name, relo_idx, cand_spec.bit_offset, - targ_spec.bit_offset); + prog_name, relo_idx, cand_spec->bit_offset, + targ_spec->bit_offset); return -EINVAL; } else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) { /* all candidates should result in the same relocation @@ -1251,7 +1288,7 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, } /* - * For BPF_FIELD_EXISTS relo or when used BPF program has field + * For BPF_CORE_FIELD_EXISTS relo or when used BPF program has field * existence checks or kernel version/config checks, it's expected * that we might not find any candidates. In this case, if field * wasn't found in any candidate, the list of candidates shouldn't @@ -1277,7 +1314,7 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, prog_name, relo_idx); /* calculate single target relo result explicitly */ - err = bpf_core_calc_relo(prog_name, relo, relo_idx, &local_spec, NULL, &targ_res); + err = bpf_core_calc_relo(prog_name, relo, relo_idx, local_spec, NULL, &targ_res); if (err) return err; } diff --git a/tools/lib/bpf/relo_core.h b/tools/lib/bpf/relo_core.h index 3b9f8f18346c..17799819ad7c 100644 --- a/tools/lib/bpf/relo_core.h +++ b/tools/lib/bpf/relo_core.h @@ -4,81 +4,10 @@ #ifndef __RELO_CORE_H #define __RELO_CORE_H -/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value - * has to be adjusted by relocations. - */ -enum bpf_core_relo_kind { - BPF_FIELD_BYTE_OFFSET = 0, /* field byte offset */ - BPF_FIELD_BYTE_SIZE = 1, /* field size in bytes */ - BPF_FIELD_EXISTS = 2, /* field existence in target kernel */ - BPF_FIELD_SIGNED = 3, /* field signedness (0 - unsigned, 1 - signed) */ - BPF_FIELD_LSHIFT_U64 = 4, /* bitfield-specific left bitshift */ - BPF_FIELD_RSHIFT_U64 = 5, /* bitfield-specific right bitshift */ - BPF_TYPE_ID_LOCAL = 6, /* type ID in local BPF object */ - BPF_TYPE_ID_TARGET = 7, /* type ID in target kernel */ - BPF_TYPE_EXISTS = 8, /* type existence in target kernel */ - BPF_TYPE_SIZE = 9, /* type size in bytes */ - BPF_ENUMVAL_EXISTS = 10, /* enum value existence in target kernel */ - BPF_ENUMVAL_VALUE = 11, /* enum value integer value */ -}; - -/* The minimum bpf_core_relo checked by the loader - * - * CO-RE relocation captures the following data: - * - insn_off - instruction offset (in bytes) within a BPF program that needs - * its insn->imm field to be relocated with actual field info; - * - type_id - BTF type ID of the "root" (containing) entity of a relocatable - * type or field; - * - access_str_off - offset into corresponding .BTF string section. String - * interpretation depends on specific relocation kind: - * - for field-based relocations, string encodes an accessed field using - * a sequence of field and array indices, separated by colon (:). It's - * conceptually very close to LLVM's getelementptr ([0]) instruction's - * arguments for identifying offset to a field. - * - for type-based relocations, strings is expected to be just "0"; - * - for enum value-based relocations, string contains an index of enum - * value within its enum type; - * - * Example to provide a better feel. - * - * struct sample { - * int a; - * struct { - * int b[10]; - * }; - * }; - * - * struct sample *s = ...; - * int x = &s->a; // encoded as "0:0" (a is field #0) - * int y = &s->b[5]; // encoded as "0:1:0:5" (anon struct is field #1, - * // b is field #0 inside anon struct, accessing elem #5) - * int z = &s[10]->b; // encoded as "10:1" (ptr is used as an array) - * - * type_id for all relocs in this example will capture BTF type id of - * `struct sample`. - * - * Such relocation is emitted when using __builtin_preserve_access_index() - * Clang built-in, passing expression that captures field address, e.g.: - * - * bpf_probe_read(&dst, sizeof(dst), - * __builtin_preserve_access_index(&src->a.b.c)); - * - * In this case Clang will emit field relocation recording necessary data to - * be able to find offset of embedded `a.b.c` field within `src` struct. - * - * [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction - */ -struct bpf_core_relo { - __u32 insn_off; - __u32 type_id; - __u32 access_str_off; - enum bpf_core_relo_kind kind; -}; +#include <linux/bpf.h> struct bpf_core_cand { const struct btf *btf; - const struct btf_type *t; - const char *name; __u32 id; }; @@ -88,11 +17,39 @@ struct bpf_core_cand_list { int len; }; +#define BPF_CORE_SPEC_MAX_LEN 64 + +/* represents BPF CO-RE field or array element accessor */ +struct bpf_core_accessor { + __u32 type_id; /* struct/union type or array element type */ + __u32 idx; /* field index or array index */ + const char *name; /* field name or NULL for array accessor */ +}; + +struct bpf_core_spec { + const struct btf *btf; + /* high-level spec: named fields and array indices only */ + struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN]; + /* original unresolved (no skip_mods_or_typedefs) root type ID */ + __u32 root_type_id; + /* CO-RE relocation kind */ + enum bpf_core_relo_kind relo_kind; + /* high-level spec length */ + int len; + /* raw, low-level spec: 1-to-1 with accessor spec string */ + int raw_spec[BPF_CORE_SPEC_MAX_LEN]; + /* raw spec length */ + int raw_len; + /* field bit offset represented by spec */ + __u32 bit_offset; +}; + int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, int insn_idx, const struct bpf_core_relo *relo, int relo_idx, const struct btf *local_btf, - struct bpf_core_cand_list *cands); + struct bpf_core_cand_list *cands, + struct bpf_core_spec *specs_scratch); int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, const struct btf *targ_btf, __u32 targ_id); diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h index 9cf66702fa8d..0b84d8e6b72a 100644 --- a/tools/lib/bpf/skel_internal.h +++ b/tools/lib/bpf/skel_internal.h @@ -7,6 +7,16 @@ #include <sys/syscall.h> #include <sys/mman.h> +#ifndef __NR_bpf +# if defined(__mips__) && defined(_ABIO32) +# define __NR_bpf 4355 +# elif defined(__mips__) && defined(_ABIN32) +# define __NR_bpf 6319 +# elif defined(__mips__) && defined(_ABI64) +# define __NR_bpf 5315 +# endif +#endif + /* This file is a base header for auto-generated *.lskel.h files. * Its contents will change and may become part of auto-generation in the future. * @@ -65,8 +75,7 @@ static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts) int map_fd = -1, prog_fd = -1, key = 0, err; union bpf_attr attr; - map_fd = bpf_create_map_name(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, - opts->data_sz, 1, 0); + map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, opts->data_sz, 1, NULL); if (map_fd < 0) { opts->errstr = "failed to create loader map"; err = -errno; diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index 81f8fbc85e70..edafe56664f3 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -35,6 +35,11 @@ #include "libbpf_internal.h" #include "xsk.h" +/* entire xsk.h and xsk.c is going away in libbpf 1.0, so ignore all internal + * uses of deprecated APIs + */ +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + #ifndef SOL_XDP #define SOL_XDP 283 #endif @@ -364,8 +369,6 @@ int xsk_umem__create_v0_0_2(struct xsk_umem **umem_ptr, void *umem_area, static enum xsk_prog get_xsk_prog(void) { enum xsk_prog detected = XSK_PROG_FALLBACK; - struct bpf_load_program_attr prog_attr; - struct bpf_create_map_attr map_attr; __u32 size_out, retval, duration; char data_in = 0, data_out; struct bpf_insn insns[] = { @@ -375,27 +378,15 @@ static enum xsk_prog get_xsk_prog(void) BPF_EMIT_CALL(BPF_FUNC_redirect_map), BPF_EXIT_INSN(), }; - int prog_fd, map_fd, ret; - - memset(&map_attr, 0, sizeof(map_attr)); - map_attr.map_type = BPF_MAP_TYPE_XSKMAP; - map_attr.key_size = sizeof(int); - map_attr.value_size = sizeof(int); - map_attr.max_entries = 1; + int prog_fd, map_fd, ret, insn_cnt = ARRAY_SIZE(insns); - map_fd = bpf_create_map_xattr(&map_attr); + map_fd = bpf_map_create(BPF_MAP_TYPE_XSKMAP, NULL, sizeof(int), sizeof(int), 1, NULL); if (map_fd < 0) return detected; insns[0].imm = map_fd; - memset(&prog_attr, 0, sizeof(prog_attr)); - prog_attr.prog_type = BPF_PROG_TYPE_XDP; - prog_attr.insns = insns; - prog_attr.insns_cnt = ARRAY_SIZE(insns); - prog_attr.license = "GPL"; - - prog_fd = bpf_load_program_xattr(&prog_attr, NULL, 0); + prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, NULL); if (prog_fd < 0) { close(map_fd); return detected; @@ -495,10 +486,13 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk) }; struct bpf_insn *progs[] = {prog, prog_redirect_flags}; enum xsk_prog option = get_xsk_prog(); + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .log_buf = log_buf, + .log_size = log_buf_size, + ); - prog_fd = bpf_load_program(BPF_PROG_TYPE_XDP, progs[option], insns_cnt[option], - "LGPL-2.1 or BSD-2-Clause", 0, log_buf, - log_buf_size); + prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "LGPL-2.1 or BSD-2-Clause", + progs[option], insns_cnt[option], &opts); if (prog_fd < 0) { pr_warn("BPF log buffer:\n%s", log_buf); return prog_fd; @@ -554,8 +548,7 @@ static int xsk_get_max_queues(struct xsk_socket *xsk) return -errno; ifr.ifr_data = (void *)&channels; - memcpy(ifr.ifr_name, ctx->ifname, IFNAMSIZ - 1); - ifr.ifr_name[IFNAMSIZ - 1] = '\0'; + libbpf_strlcpy(ifr.ifr_name, ctx->ifname, IFNAMSIZ); err = ioctl(fd, SIOCETHTOOL, &ifr); if (err && errno != EOPNOTSUPP) { ret = -errno; @@ -590,8 +583,8 @@ static int xsk_create_bpf_maps(struct xsk_socket *xsk) if (max_queues < 0) return max_queues; - fd = bpf_create_map_name(BPF_MAP_TYPE_XSKMAP, "xsks_map", - sizeof(int), sizeof(int), max_queues, 0); + fd = bpf_map_create(BPF_MAP_TYPE_XSKMAP, "xsks_map", + sizeof(int), sizeof(int), max_queues, NULL); if (fd < 0) return fd; @@ -725,14 +718,12 @@ static int xsk_link_lookup(int ifindex, __u32 *prog_id, int *link_fd) static bool xsk_probe_bpf_link(void) { - DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts, - .flags = XDP_FLAGS_SKB_MODE); - struct bpf_load_program_attr prog_attr; + LIBBPF_OPTS(bpf_link_create_opts, opts, .flags = XDP_FLAGS_SKB_MODE); struct bpf_insn insns[2] = { BPF_MOV64_IMM(BPF_REG_0, XDP_PASS), BPF_EXIT_INSN() }; - int prog_fd, link_fd = -1; + int prog_fd, link_fd = -1, insn_cnt = ARRAY_SIZE(insns); int ifindex_lo = 1; bool ret = false; int err; @@ -744,13 +735,7 @@ static bool xsk_probe_bpf_link(void) if (link_fd >= 0) return true; - memset(&prog_attr, 0, sizeof(prog_attr)); - prog_attr.prog_type = BPF_PROG_TYPE_XDP; - prog_attr.insns = insns; - prog_attr.insns_cnt = ARRAY_SIZE(insns); - prog_attr.license = "GPL"; - - prog_fd = bpf_load_program_xattr(&prog_attr, NULL, 0); + prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, NULL); if (prog_fd < 0) return ret; @@ -782,8 +767,7 @@ static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk) } ctx->ifindex = ifindex; - memcpy(ctx->ifname, ifname, IFNAMSIZ -1); - ctx->ifname[IFNAMSIZ - 1] = 0; + libbpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ); xsk->ctx = ctx; xsk->ctx->has_bpf_link = xsk_probe_bpf_link(); @@ -965,8 +949,7 @@ static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk, ctx->refcount = 1; ctx->umem = umem; ctx->queue_id = queue_id; - memcpy(ctx->ifname, ifname, IFNAMSIZ - 1); - ctx->ifname[IFNAMSIZ - 1] = '\0'; + libbpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ); ctx->fill = fill; ctx->comp = comp; diff --git a/tools/memory-model/Documentation/explanation.txt b/tools/memory-model/Documentation/explanation.txt index 5d72f3112e56..394ee57d58f2 100644 --- a/tools/memory-model/Documentation/explanation.txt +++ b/tools/memory-model/Documentation/explanation.txt @@ -1813,15 +1813,16 @@ spin_trylock() -- we can call these things lock-releases and lock-acquires -- have two properties beyond those of ordinary releases and acquires. -First, when a lock-acquire reads from a lock-release, the LKMM -requires that every instruction po-before the lock-release must -execute before any instruction po-after the lock-acquire. This would -naturally hold if the release and acquire operations were on different -CPUs, but the LKMM says it holds even when they are on the same CPU. -For example: +First, when a lock-acquire reads from or is po-after a lock-release, +the LKMM requires that every instruction po-before the lock-release +must execute before any instruction po-after the lock-acquire. This +would naturally hold if the release and acquire operations were on +different CPUs and accessed the same lock variable, but the LKMM says +it also holds when they are on the same CPU, even if they access +different lock variables. For example: int x, y; - spinlock_t s; + spinlock_t s, t; P0() { @@ -1830,9 +1831,9 @@ For example: spin_lock(&s); r1 = READ_ONCE(x); spin_unlock(&s); - spin_lock(&s); + spin_lock(&t); r2 = READ_ONCE(y); - spin_unlock(&s); + spin_unlock(&t); } P1() @@ -1842,10 +1843,10 @@ For example: WRITE_ONCE(x, 1); } -Here the second spin_lock() reads from the first spin_unlock(), and -therefore the load of x must execute before the load of y. Thus we -cannot have r1 = 1 and r2 = 0 at the end (this is an instance of the -MP pattern). +Here the second spin_lock() is po-after the first spin_unlock(), and +therefore the load of x must execute before the load of y, even though +the two locking operations use different locks. Thus we cannot have +r1 = 1 and r2 = 0 at the end (this is an instance of the MP pattern). This requirement does not apply to ordinary release and acquire fences, only to lock-related operations. For instance, suppose P0() @@ -1872,13 +1873,13 @@ instructions in the following order: and thus it could load y before x, obtaining r2 = 0 and r1 = 1. -Second, when a lock-acquire reads from a lock-release, and some other -stores W and W' occur po-before the lock-release and po-after the -lock-acquire respectively, the LKMM requires that W must propagate to -each CPU before W' does. For example, consider: +Second, when a lock-acquire reads from or is po-after a lock-release, +and some other stores W and W' occur po-before the lock-release and +po-after the lock-acquire respectively, the LKMM requires that W must +propagate to each CPU before W' does. For example, consider: int x, y; - spinlock_t x; + spinlock_t s; P0() { @@ -1908,7 +1909,12 @@ each CPU before W' does. For example, consider: If r1 = 1 at the end then the spin_lock() in P1 must have read from the spin_unlock() in P0. Hence the store to x must propagate to P2 -before the store to y does, so we cannot have r2 = 1 and r3 = 0. +before the store to y does, so we cannot have r2 = 1 and r3 = 0. But +if P1 had used a lock variable different from s, the writes could have +propagated in either order. (On the other hand, if the code in P0 and +P1 had all executed on a single CPU, as in the example before this +one, then the writes would have propagated in order even if the two +critical sections used different lock variables.) These two special requirements for lock-release and lock-acquire do not arise from the operational model. Nevertheless, kernel developers diff --git a/tools/memory-model/README b/tools/memory-model/README index 9a84c45504ab..9edd402704c4 100644 --- a/tools/memory-model/README +++ b/tools/memory-model/README @@ -195,6 +195,18 @@ litmus-tests are listed in litmus-tests/README. A great deal more litmus tests are available at https://github.com/paulmckrcu/litmus. + By "representative", it means the one in the litmus-tests + directory is: + + 1) simple, the number of threads should be relatively + small and each thread function should be relatively + simple. + 2) orthogonal, there should be no two litmus tests + describing the same aspect of the memory model. + 3) textbook, developers can easily copy-paste-modify + the litmus tests to use the patterns on their own + code. + lock.cat Provides a front-end analysis of lock acquisition and release, for example, associating a lock acquisition with the preceding diff --git a/tools/memory-model/linux-kernel.cat b/tools/memory-model/linux-kernel.cat index 2a9b4fe4a84e..d70315fddef6 100644 --- a/tools/memory-model/linux-kernel.cat +++ b/tools/memory-model/linux-kernel.cat @@ -27,7 +27,7 @@ include "lock.cat" (* Release Acquire *) let acq-po = [Acquire] ; po ; [M] let po-rel = [M] ; po ; [Release] -let po-unlock-rf-lock-po = po ; [UL] ; rf ; [LKR] ; po +let po-unlock-lock-po = po ; [UL] ; (po|rf) ; [LKR] ; po (* Fences *) let R4rmb = R \ Noreturn (* Reads for which rmb works *) @@ -70,12 +70,12 @@ let rwdep = (dep | ctrl) ; [W] let overwrite = co | fr let to-w = rwdep | (overwrite & int) | (addr ; [Plain] ; wmb) let to-r = addr | (dep ; [Marked] ; rfi) -let ppo = to-r | to-w | fence | (po-unlock-rf-lock-po & int) +let ppo = to-r | to-w | fence | (po-unlock-lock-po & int) (* Propagation: Ordering from release operations and strong fences. *) let A-cumul(r) = (rfe ; [Marked])? ; r let cumul-fence = [Marked] ; (A-cumul(strong-fence | po-rel) | wmb | - po-unlock-rf-lock-po) ; [Marked] + po-unlock-lock-po) ; [Marked] let prop = [Marked] ; (overwrite & ext)? ; cumul-fence* ; [Marked] ; rfe? ; [Marked] diff --git a/tools/memory-model/litmus-tests/LB+unlocklockonceonce+poacquireonce.litmus b/tools/memory-model/litmus-tests/LB+unlocklockonceonce+poacquireonce.litmus new file mode 100644 index 000000000000..eb34123a6ffe --- /dev/null +++ b/tools/memory-model/litmus-tests/LB+unlocklockonceonce+poacquireonce.litmus @@ -0,0 +1,35 @@ +C LB+unlocklockonceonce+poacquireonce + +(* + * Result: Never + * + * If two locked critical sections execute on the same CPU, all accesses + * in the first must execute before any accesses in the second, even if the + * critical sections are protected by different locks. Note: Even when a + * write executes before a read, their memory effects can be reordered from + * the viewpoint of another CPU (the kind of reordering allowed by TSO). + *) + +{} + +P0(spinlock_t *s, spinlock_t *t, int *x, int *y) +{ + int r1; + + spin_lock(s); + r1 = READ_ONCE(*x); + spin_unlock(s); + spin_lock(t); + WRITE_ONCE(*y, 1); + spin_unlock(t); +} + +P1(int *x, int *y) +{ + int r2; + + r2 = smp_load_acquire(y); + WRITE_ONCE(*x, 1); +} + +exists (0:r1=1 /\ 1:r2=1) diff --git a/tools/memory-model/litmus-tests/MP+unlocklockonceonce+fencermbonceonce.litmus b/tools/memory-model/litmus-tests/MP+unlocklockonceonce+fencermbonceonce.litmus new file mode 100644 index 000000000000..2feb1398be71 --- /dev/null +++ b/tools/memory-model/litmus-tests/MP+unlocklockonceonce+fencermbonceonce.litmus @@ -0,0 +1,33 @@ +C MP+unlocklockonceonce+fencermbonceonce + +(* + * Result: Never + * + * If two locked critical sections execute on the same CPU, stores in the + * first must propagate to each CPU before stores in the second do, even if + * the critical sections are protected by different locks. + *) + +{} + +P0(spinlock_t *s, spinlock_t *t, int *x, int *y) +{ + spin_lock(s); + WRITE_ONCE(*x, 1); + spin_unlock(s); + spin_lock(t); + WRITE_ONCE(*y, 1); + spin_unlock(t); +} + +P1(int *x, int *y) +{ + int r1; + int r2; + + r1 = READ_ONCE(*y); + smp_rmb(); + r2 = READ_ONCE(*x); +} + +exists (1:r1=1 /\ 1:r2=0) diff --git a/tools/memory-model/litmus-tests/README b/tools/memory-model/litmus-tests/README index 681f9067fa9e..d311a0ff1ae6 100644 --- a/tools/memory-model/litmus-tests/README +++ b/tools/memory-model/litmus-tests/README @@ -63,6 +63,10 @@ LB+poonceonces.litmus As above, but with store-release replaced with WRITE_ONCE() and load-acquire replaced with READ_ONCE(). +LB+unlocklockonceonce+poacquireonce.litmus + Does a unlock+lock pair provides ordering guarantee between a + load and a store? + MP+onceassign+derefonce.litmus As below, but with rcu_assign_pointer() and an rcu_dereference(). @@ -90,6 +94,10 @@ MP+porevlocks.litmus As below, but with the first access of the writer process and the second access of reader process protected by a lock. +MP+unlocklockonceonce+fencermbonceonce.litmus + Does a unlock+lock pair provides ordering guarantee between a + store and another store? + MP+fencewmbonceonce+fencermbonceonce.litmus Does a smp_wmb() (between the stores) and an smp_rmb() (between the loads) suffice for the message-passing litmus test, where one diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 21735829b860..a9a1f7259d62 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -849,6 +849,10 @@ static const char *uaccess_safe_builtin[] = { "__asan_report_store16_noabort", /* KCSAN */ "__kcsan_check_access", + "__kcsan_mb", + "__kcsan_wmb", + "__kcsan_rmb", + "__kcsan_release", "kcsan_found_watchpoint", "kcsan_setup_watchpoint", "kcsan_check_scoped_accesses", @@ -1068,11 +1072,11 @@ static void annotate_call_site(struct objtool_file *file, } /* - * Many compilers cannot disable KCOV with a function attribute - * so they need a little help, NOP out any KCOV calls from noinstr - * text. + * Many compilers cannot disable KCOV or sanitizer calls with a function + * attribute so they need a little help, NOP out any such calls from + * noinstr text. */ - if (insn->sec->noinstr && sym->kcov) { + if (insn->sec->noinstr && sym->profiling_func) { if (reloc) { reloc->type = R_NONE; elf_write_reloc(file->elf, reloc); @@ -1987,6 +1991,31 @@ static int read_intra_function_calls(struct objtool_file *file) return 0; } +/* + * Return true if name matches an instrumentation function, where calls to that + * function from noinstr code can safely be removed, but compilers won't do so. + */ +static bool is_profiling_func(const char *name) +{ + /* + * Many compilers cannot disable KCOV with a function attribute. + */ + if (!strncmp(name, "__sanitizer_cov_", 16)) + return true; + + /* + * Some compilers currently do not remove __tsan_func_entry/exit nor + * __tsan_atomic_signal_fence (used for barrier instrumentation) with + * the __no_sanitize_thread attribute, remove them. Once the kernel's + * minimum Clang version is 14.0, this can be removed. + */ + if (!strncmp(name, "__tsan_func_", 12) || + !strcmp(name, "__tsan_atomic_signal_fence")) + return true; + + return false; +} + static int classify_symbols(struct objtool_file *file) { struct section *sec; @@ -2007,8 +2036,8 @@ static int classify_symbols(struct objtool_file *file) if (!strcmp(func->name, "__fentry__")) func->fentry = true; - if (!strncmp(func->name, "__sanitizer_cov_", 16)) - func->kcov = true; + if (is_profiling_func(func->name)) + func->profiling_func = true; } } diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 81a4c543ff7e..4b384c907027 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -375,6 +375,7 @@ static int read_symbols(struct elf *elf) return -1; } memset(sym, 0, sizeof(*sym)); + INIT_LIST_HEAD(&sym->pv_target); sym->alias = sym; sym->idx = i; diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index cdc739fa9a6f..d22336781401 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -58,7 +58,7 @@ struct symbol { u8 static_call_tramp : 1; u8 retpoline_thunk : 1; u8 fentry : 1; - u8 kcov : 1; + u8 profiling_func : 1; struct list_head pv_target; }; diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c index c90c7084e45a..bdf699f6552b 100644 --- a/tools/objtool/objtool.c +++ b/tools/objtool/objtool.c @@ -153,6 +153,10 @@ void objtool_pv_add(struct objtool_file *f, int idx, struct symbol *func) !strcmp(func->name, "_paravirt_ident_64")) return; + /* already added this function */ + if (!list_empty(&func->pv_target)) + return; + list_add(&func->pv_target, &f->pv_ops[idx].targets); f->pv_ops[idx].clean = false; } diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 07e65a061fd3..3df74cf5651a 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -271,8 +271,6 @@ endif FEATURE_CHECK_CFLAGS-libpython := $(PYTHON_EMBED_CCOPTS) FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS) -FEATURE_CHECK_CFLAGS-libpython-version := $(PYTHON_EMBED_CCOPTS) -FEATURE_CHECK_LDFLAGS-libpython-version := $(PYTHON_EMBED_LDOPTS) FEATURE_CHECK_LDFLAGS-libaio = -lrt @@ -1010,6 +1008,9 @@ ifndef NO_AUXTRACE ifndef NO_AUXTRACE $(call detected,CONFIG_AUXTRACE) CFLAGS += -DHAVE_AUXTRACE_SUPPORT + ifeq ($(feature-reallocarray), 0) + CFLAGS += -DCOMPAT_NEED_REALLOCARRAY + endif endif endif diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl index 1ca7bc337932..e2c481fcede6 100644 --- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl +++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl @@ -363,3 +363,4 @@ 446 n64 landlock_restrict_self sys_landlock_restrict_self # 447 reserved for memfd_secret 448 n64 process_mrelease sys_process_mrelease +449 n64 futex_waitv sys_futex_waitv diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index 7bef917cc84e..15109af9d075 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -528,3 +528,4 @@ 446 common landlock_restrict_self sys_landlock_restrict_self # 447 reserved for memfd_secret 448 common process_mrelease sys_process_mrelease +449 common futex_waitv sys_futex_waitv diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index df5261e5cfe1..ed9c5c2eafad 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -451,3 +451,4 @@ 446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self # 447 reserved for memfd_secret 448 common process_mrelease sys_process_mrelease sys_process_mrelease +449 common futex_waitv sys_futex_waitv sys_futex_waitv diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index bc5259db5fd9..409b721666cb 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -755,12 +755,16 @@ static int parse_vm_time_correlation(const struct option *opt, const char *str, return inject->itrace_synth_opts.vm_tm_corr_args ? 0 : -ENOMEM; } +static int output_fd(struct perf_inject *inject) +{ + return inject->in_place_update ? -1 : perf_data__fd(&inject->output); +} + static int __cmd_inject(struct perf_inject *inject) { int ret = -EINVAL; struct perf_session *session = inject->session; - struct perf_data *data_out = &inject->output; - int fd = inject->in_place_update ? -1 : perf_data__fd(data_out); + int fd = output_fd(inject); u64 output_data_offset; signal(SIGINT, sig_handler); @@ -820,7 +824,7 @@ static int __cmd_inject(struct perf_inject *inject) inject->tool.ordered_events = true; inject->tool.ordering_requires_timestamps = true; /* Allow space in the header for new attributes */ - output_data_offset = 4096; + output_data_offset = roundup(8192 + session->header.data_offset, 4096); if (inject->strip) strip_init(inject); } @@ -1015,7 +1019,7 @@ int cmd_inject(int argc, const char **argv) } inject.session = __perf_session__new(&data, repipe, - perf_data__fd(&inject.output), + output_fd(&inject), &inject.tool); if (IS_ERR(inject.session)) { ret = PTR_ERR(inject.session); @@ -1078,7 +1082,8 @@ out_delete: zstd_fini(&(inject.session->zstd_data)); perf_session__delete(inject.session); out_close_output: - perf_data__close(&inject.output); + if (!inject.in_place_update) + perf_data__close(&inject.output); free(inject.itrace_synth_opts.vm_tm_corr_args); return ret; } diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 8167ebfe776a..8ae400429870 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -619,14 +619,17 @@ static int report__browse_hists(struct report *rep) int ret; struct perf_session *session = rep->session; struct evlist *evlist = session->evlist; - const char *help = perf_tip(system_path(TIPDIR)); + char *help = NULL, *path = NULL; - if (help == NULL) { + path = system_path(TIPDIR); + if (perf_tip(&help, path) || help == NULL) { /* fallback for people who don't install perf ;-) */ - help = perf_tip(DOCDIR); - if (help == NULL) - help = "Cannot load tips.txt file, please install perf!"; + free(path); + path = system_path(DOCDIR); + if (perf_tip(&help, path) || help == NULL) + help = strdup("Cannot load tips.txt file, please install perf!"); } + free(path); switch (use_browser) { case 1: @@ -651,7 +654,7 @@ static int report__browse_hists(struct report *rep) ret = evlist__tty_browse_hists(evlist, rep, help); break; } - + free(help); return ret; } diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 9434367af166..c82b033e8942 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2473,7 +2473,7 @@ static int process_switch_event(struct perf_tool *tool, if (perf_event__process_switch(tool, event, sample, machine) < 0) return -1; - if (scripting_ops && scripting_ops->process_switch) + if (scripting_ops && scripting_ops->process_switch && !filter_cpu(sample)) scripting_ops->process_switch(event, sample, machine); if (!script->show_switch_events) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 0b52e08e558e..df9fc00b4cd6 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3257,10 +3257,21 @@ static void trace__set_bpf_map_syscalls(struct trace *trace) static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace, const char *name) { + struct bpf_program *pos, *prog = NULL; + const char *sec_name; + if (trace->bpf_obj == NULL) return NULL; - return bpf_object__find_program_by_title(trace->bpf_obj, name); + bpf_object__for_each_program(pos, trace->bpf_obj) { + sec_name = bpf_program__section_name(pos); + if (sec_name && !strcmp(sec_name, name)) { + prog = pos; + break; + } + } + + return prog; } static struct bpf_program *trace__find_syscall_bpf_prog(struct trace *trace, struct syscall *sc, @@ -3925,6 +3936,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) bool draining = false; trace->live = true; + signal(SIGCHLD, sig_handler); if (!trace->raw_augmented_syscalls) { if (trace->trace_syscalls && trace__add_syscall_newtp(trace)) @@ -4873,7 +4885,6 @@ int cmd_trace(int argc, const char **argv) signal(SIGSEGV, sighandler_dump_stack); signal(SIGFPE, sighandler_dump_stack); - signal(SIGCHLD, sig_handler); signal(SIGINT, sig_handler); trace.evlist = evlist__new(); diff --git a/tools/perf/scripts/python/intel-pt-events.py b/tools/perf/scripts/python/intel-pt-events.py index 1d3a189a9a54..66452a8ec358 100644 --- a/tools/perf/scripts/python/intel-pt-events.py +++ b/tools/perf/scripts/python/intel-pt-events.py @@ -32,8 +32,7 @@ try: except: broken_pipe_exception = IOError -glb_switch_str = None -glb_switch_printed = True +glb_switch_str = {} glb_insn = False glb_disassembler = None glb_src = False @@ -70,6 +69,7 @@ def trace_begin(): ap = argparse.ArgumentParser(usage = "", add_help = False) ap.add_argument("--insn-trace", action='store_true') ap.add_argument("--src-trace", action='store_true') + ap.add_argument("--all-switch-events", action='store_true') global glb_args global glb_insn global glb_src @@ -256,10 +256,6 @@ def print_srccode(comm, param_dict, sample, symbol, dso, with_insn): print(start_str, src_str) def do_process_event(param_dict): - global glb_switch_printed - if not glb_switch_printed: - print(glb_switch_str) - glb_switch_printed = True event_attr = param_dict["attr"] sample = param_dict["sample"] raw_buf = param_dict["raw_buf"] @@ -274,6 +270,11 @@ def do_process_event(param_dict): dso = get_optional(param_dict, "dso") symbol = get_optional(param_dict, "symbol") + cpu = sample["cpu"] + if cpu in glb_switch_str: + print(glb_switch_str[cpu]) + del glb_switch_str[cpu] + if name[0:12] == "instructions": if glb_src: print_srccode(comm, param_dict, sample, symbol, dso, True) @@ -336,8 +337,6 @@ def auxtrace_error(typ, code, cpu, pid, tid, ip, ts, msg, cpumode, *x): sys.exit(1) def context_switch(ts, cpu, pid, tid, np_pid, np_tid, machine_pid, out, out_preempt, *x): - global glb_switch_printed - global glb_switch_str if out: out_str = "Switch out " else: @@ -350,6 +349,10 @@ def context_switch(ts, cpu, pid, tid, np_pid, np_tid, machine_pid, out, out_pree machine_str = "" else: machine_str = "machine PID %d" % machine_pid - glb_switch_str = "%16s %5d/%-5d [%03u] %9u.%09u %5d/%-5d %s %s" % \ + switch_str = "%16s %5d/%-5d [%03u] %9u.%09u %5d/%-5d %s %s" % \ (out_str, pid, tid, cpu, ts / 1000000000, ts %1000000000, np_pid, np_tid, machine_str, preempt_str) - glb_switch_printed = False + if glb_args.all_switch_events: + print(switch_str); + else: + global glb_switch_str + glb_switch_str[cpu] = switch_str diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 329f77f592f4..573490530194 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -296,9 +296,13 @@ static int check_env(void) return err; } +/* temporarily disable libbpf deprecation warnings */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" err = bpf_load_program(BPF_PROG_TYPE_KPROBE, insns, ARRAY_SIZE(insns), license, kver_int, NULL, 0); +#pragma GCC diagnostic pop if (err < 0) { pr_err("Missing basic BPF support, skip this test: %s\n", strerror(errno)); diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c index fbb68deba59f..d01532d40acb 100644 --- a/tools/perf/tests/event_update.c +++ b/tools/perf/tests/event_update.c @@ -88,7 +88,6 @@ static int test__event_update(struct test_suite *test __maybe_unused, int subtes struct evsel *evsel; struct event_name tmp; struct evlist *evlist = evlist__new_default(); - char *unit = strdup("KRAVA"); TEST_ASSERT_VAL("failed to get evlist", evlist); @@ -99,7 +98,8 @@ static int test__event_update(struct test_suite *test __maybe_unused, int subtes perf_evlist__id_add(&evlist->core, &evsel->core, 0, 0, 123); - evsel->unit = unit; + free((char *)evsel->unit); + evsel->unit = strdup("KRAVA"); TEST_ASSERT_VAL("failed to synthesize attr update unit", !perf_event__synthesize_event_update_unit(NULL, evsel, process_event_unit)); @@ -119,7 +119,6 @@ static int test__event_update(struct test_suite *test __maybe_unused, int subtes TEST_ASSERT_VAL("failed to synthesize attr update cpus", !perf_event__synthesize_event_update_cpus(&tmp.tool, evsel, process_event_cpus)); - free(unit); evlist__delete(evlist); return 0; } diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c index c895de481fe1..d54c5371c6a6 100644 --- a/tools/perf/tests/expr.c +++ b/tools/perf/tests/expr.c @@ -169,7 +169,9 @@ static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_u TEST_ASSERT_VAL("#num_dies", expr__parse(&num_dies, ctx, "#num_dies") == 0); TEST_ASSERT_VAL("#num_cores >= #num_dies", num_cores >= num_dies); TEST_ASSERT_VAL("#num_packages", expr__parse(&num_packages, ctx, "#num_packages") == 0); - TEST_ASSERT_VAL("#num_dies >= #num_packages", num_dies >= num_packages); + + if (num_dies) // Some platforms do not have CPU die support, for example s390 + TEST_ASSERT_VAL("#num_dies >= #num_packages", num_dies >= num_packages); /* * Source count returns the number of events aggregating in a leader diff --git a/tools/perf/tests/parse-metric.c b/tools/perf/tests/parse-metric.c index 574b7e4efd3a..07b6f4ec024f 100644 --- a/tools/perf/tests/parse-metric.c +++ b/tools/perf/tests/parse-metric.c @@ -109,6 +109,7 @@ static void load_runtime_stat(struct runtime_stat *st, struct evlist *evlist, struct evsel *evsel; u64 count; + perf_stat__reset_shadow_stats(); evlist__for_each_entry(evlist, evsel) { count = find_value(evsel->name, vals); perf_stat__update_shadow_stats(evsel, count, 0, st); diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index b669d22f2b13..07f2411b0ad4 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -36,7 +36,7 @@ * These are based on the input value (213) specified * in branch_stack variable. */ -#define BS_EXPECTED_BE 0xa00d000000000000 +#define BS_EXPECTED_BE 0xa000d00000000000 #define BS_EXPECTED_LE 0xd5000000 #define FLAG(s) s->branch_stack->entries[i].flags diff --git a/tools/perf/tests/wp.c b/tools/perf/tests/wp.c index 820d942b30c3..9d4c45184e71 100644 --- a/tools/perf/tests/wp.c +++ b/tools/perf/tests/wp.c @@ -21,6 +21,7 @@ do { \ volatile u64 data1; volatile u8 data2[3]; +#ifndef __s390x__ static int wp_read(int fd, long long *count, int size) { int ret = read(fd, count, size); @@ -48,7 +49,6 @@ static void get__perf_event_attr(struct perf_event_attr *attr, int wp_type, attr->exclude_hv = 1; } -#ifndef __s390x__ static int __event(int wp_type, void *wp_addr, unsigned long wp_len) { int fd; diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index c1f24d004852..5075ecead5f3 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -535,6 +535,18 @@ struct perf_hpp_list perf_hpp_list = { #undef __HPP_SORT_ACC_FN #undef __HPP_SORT_RAW_FN +static void fmt_free(struct perf_hpp_fmt *fmt) +{ + /* + * At this point fmt should be completely + * unhooked, if not it's a bug. + */ + BUG_ON(!list_empty(&fmt->list)); + BUG_ON(!list_empty(&fmt->sort_list)); + + if (fmt->free) + fmt->free(fmt); +} void perf_hpp__init(void) { @@ -598,9 +610,10 @@ void perf_hpp_list__prepend_sort_field(struct perf_hpp_list *list, list_add(&format->sort_list, &list->sorts); } -void perf_hpp__column_unregister(struct perf_hpp_fmt *format) +static void perf_hpp__column_unregister(struct perf_hpp_fmt *format) { list_del_init(&format->list); + fmt_free(format); } void perf_hpp__cancel_cumulate(void) @@ -672,19 +685,6 @@ next: } -static void fmt_free(struct perf_hpp_fmt *fmt) -{ - /* - * At this point fmt should be completely - * unhooked, if not it's a bug. - */ - BUG_ON(!list_empty(&fmt->list)); - BUG_ON(!list_empty(&fmt->sort_list)); - - if (fmt->free) - fmt->free(fmt); -} - void perf_hpp__reset_output_field(struct perf_hpp_list *list) { struct perf_hpp_fmt *fmt, *tmp; diff --git a/tools/perf/ui/tui/setup.c b/tools/perf/ui/tui/setup.c index e9bfe856a5de..b1be59b4e2a4 100644 --- a/tools/perf/ui/tui/setup.c +++ b/tools/perf/ui/tui/setup.c @@ -170,9 +170,11 @@ void ui__exit(bool wait_for_ok) "Press any key...", 0); SLtt_set_cursor_visibility(1); - SLsmg_refresh(); - SLsmg_reset_smg(); + if (!pthread_mutex_trylock(&ui__lock)) { + SLsmg_refresh(); + SLsmg_reset_smg(); + pthread_mutex_unlock(&ui__lock); + } SLang_reset_tty(); - perf_error__unregister(&perf_tui_eops); } diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 4748bcfe61de..fccac06b573a 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -51,6 +51,7 @@ struct arm_spe { u8 timeless_decoding; u8 data_queued; + u64 sample_type; u8 sample_flc; u8 sample_llc; u8 sample_tlb; @@ -287,6 +288,12 @@ static void arm_spe_prep_sample(struct arm_spe *spe, event->sample.header.size = sizeof(struct perf_event_header); } +static int arm_spe__inject_event(union perf_event *event, struct perf_sample *sample, u64 type) +{ + event->header.size = perf_event__sample_event_size(sample, type, 0); + return perf_event__synthesize_sample(event, type, 0, sample); +} + static inline int arm_spe_deliver_synth_event(struct arm_spe *spe, struct arm_spe_queue *speq __maybe_unused, @@ -295,6 +302,12 @@ arm_spe_deliver_synth_event(struct arm_spe *spe, { int ret; + if (spe->synth_opts.inject) { + ret = arm_spe__inject_event(event, sample, spe->sample_type); + if (ret) + return ret; + } + ret = perf_session__deliver_synth_event(spe->session, event, sample); if (ret) pr_err("ARM SPE: failed to deliver event, error %d\n", ret); @@ -986,6 +999,8 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) else attr.sample_type |= PERF_SAMPLE_TIME; + spe->sample_type = attr.sample_type; + attr.exclude_user = evsel->core.attr.exclude_user; attr.exclude_kernel = evsel->core.attr.exclude_kernel; attr.exclude_hv = evsel->core.attr.exclude_hv; diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index fbb3c4057c30..528aeb0ab79d 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -29,6 +29,9 @@ #include <internal/xyarray.h> +/* temporarily disable libbpf deprecation warnings */ +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + static int libbpf_perf_print(enum libbpf_print_level level __attribute__((unused)), const char *fmt, va_list args) { diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index c17d4a43ce06..5a97fd7d0a71 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -307,6 +307,20 @@ static bool bperf_attr_map_compatible(int attr_map_fd) (map_info.value_size == sizeof(struct perf_event_attr_map_entry)); } +int __weak +bpf_map_create(enum bpf_map_type map_type, + const char *map_name __maybe_unused, + __u32 key_size, + __u32 value_size, + __u32 max_entries, + const struct bpf_map_create_opts *opts __maybe_unused) +{ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + return bpf_create_map(map_type, key_size, value_size, max_entries, 0); +#pragma GCC diagnostic pop +} + static int bperf_lock_attr_map(struct target *target) { char path[PATH_MAX]; @@ -320,10 +334,10 @@ static int bperf_lock_attr_map(struct target *target) } if (access(path, F_OK)) { - map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, + map_fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(struct perf_event_attr), sizeof(struct perf_event_attr_map_entry), - ATTR_MAP_SIZE, 0); + ATTR_MAP_SIZE, NULL); if (map_fd < 0) return -1; diff --git a/tools/perf/util/bpf_skel/bperf.h b/tools/perf/util/bpf_skel/bperf.h deleted file mode 100644 index 186a5551ddb9..000000000000 --- a/tools/perf/util/bpf_skel/bperf.h +++ /dev/null @@ -1,14 +0,0 @@ -// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) -// Copyright (c) 2021 Facebook - -#ifndef __BPERF_STAT_H -#define __BPERF_STAT_H - -typedef struct { - __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); - __uint(key_size, sizeof(__u32)); - __uint(value_size, sizeof(struct bpf_perf_event_value)); - __uint(max_entries, 1); -} reading_map; - -#endif /* __BPERF_STAT_H */ diff --git a/tools/perf/util/bpf_skel/bperf_follower.bpf.c b/tools/perf/util/bpf_skel/bperf_follower.bpf.c index b8fa3cb2da23..f193998530d4 100644 --- a/tools/perf/util/bpf_skel/bperf_follower.bpf.c +++ b/tools/perf/util/bpf_skel/bperf_follower.bpf.c @@ -1,14 +1,23 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) // Copyright (c) 2021 Facebook -#include <linux/bpf.h> -#include <linux/perf_event.h> +#include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> -#include "bperf.h" #include "bperf_u.h" -reading_map diff_readings SEC(".maps"); -reading_map accum_readings SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bpf_perf_event_value)); + __uint(max_entries, 1); +} diff_readings SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bpf_perf_event_value)); + __uint(max_entries, 1); +} accum_readings SEC(".maps"); struct { __uint(type, BPF_MAP_TYPE_HASH); diff --git a/tools/perf/util/bpf_skel/bperf_leader.bpf.c b/tools/perf/util/bpf_skel/bperf_leader.bpf.c index 4f70d1459e86..e2a2d4cd7779 100644 --- a/tools/perf/util/bpf_skel/bperf_leader.bpf.c +++ b/tools/perf/util/bpf_skel/bperf_leader.bpf.c @@ -1,10 +1,8 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) // Copyright (c) 2021 Facebook -#include <linux/bpf.h> -#include <linux/perf_event.h> +#include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> -#include "bperf.h" struct { __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); @@ -13,8 +11,19 @@ struct { __uint(map_flags, BPF_F_PRESERVE_ELEMS); } events SEC(".maps"); -reading_map prev_readings SEC(".maps"); -reading_map diff_readings SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bpf_perf_event_value)); + __uint(max_entries, 1); +} prev_readings SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bpf_perf_event_value)); + __uint(max_entries, 1); +} diff_readings SEC(".maps"); SEC("raw_tp/sched_switch") int BPF_PROG(on_switch) diff --git a/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c b/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c index ab12b4c4ece2..97037d3b3d9f 100644 --- a/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c +++ b/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) // Copyright (c) 2020 Facebook -#include <linux/bpf.h> +#include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index c7a9fa0ffae9..2c06abf6dcd2 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -24,16 +24,6 @@ #include "util/parse-sublevel-options.h" #include <linux/ctype.h> -#include <traceevent/event-parse.h> - -#define MAKE_LIBTRACEEVENT_VERSION(a, b, c) ((a)*255*255+(b)*255+(c)) -#ifndef LIBTRACEEVENT_VERSION -/* - * If LIBTRACEEVENT_VERSION wasn't computed then set to version 1.1.0 that ships - * with the Linux kernel tools. - */ -#define LIBTRACEEVENT_VERSION MAKE_LIBTRACEEVENT_VERSION(1, 1, 0) -#endif int verbose; int debug_peo_args; @@ -238,15 +228,6 @@ int perf_debug_option(const char *str) /* Allow only verbose value in range (0, 10), otherwise set 0. */ verbose = (verbose < 0) || (verbose > 10) ? 0 : verbose; -#if MAKE_LIBTRACEEVENT_VERSION(1, 3, 0) <= LIBTRACEEVENT_VERSION - if (verbose == 1) - tep_set_loglevel(TEP_LOG_INFO); - else if (verbose == 2) - tep_set_loglevel(TEP_LOG_DEBUG); - else if (verbose >= 3) - tep_set_loglevel(TEP_LOG_ALL); -#endif - return 0; } diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 95ffed66369c..c59331eea1d9 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -44,13 +44,16 @@ struct perf_event_attr; /* perf sample has 16 bits size limit */ #define PERF_SAMPLE_MAX_SIZE (1 << 16) +/* number of register is bound by the number of bits in regs_dump::mask (64) */ +#define PERF_SAMPLE_REGS_CACHE_SIZE (8 * sizeof(u64)) + struct regs_dump { u64 abi; u64 mask; u64 *regs; /* Cached values/mask filled by first register access. */ - u64 cache_regs[PERF_REGS_MAX]; + u64 cache_regs[PERF_SAMPLE_REGS_CACHE_SIZE]; u64 cache_mask; }; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index a59fb2ecb84e..ac0127be0459 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -241,7 +241,7 @@ void evsel__init(struct evsel *evsel, { perf_evsel__init(&evsel->core, attr, idx); evsel->tracking = !idx; - evsel->unit = ""; + evsel->unit = strdup(""); evsel->scale = 1.0; evsel->max_events = ULONG_MAX; evsel->evlist = NULL; @@ -276,13 +276,8 @@ struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx) } if (evsel__is_clock(evsel)) { - /* - * The evsel->unit points to static alias->unit - * so it's ok to use static string in here. - */ - static const char *unit = "msec"; - - evsel->unit = unit; + free((char *)evsel->unit); + evsel->unit = strdup("msec"); evsel->scale = 1e-6; } @@ -420,7 +415,11 @@ struct evsel *evsel__clone(struct evsel *orig) evsel->max_events = orig->max_events; evsel->tool_event = orig->tool_event; - evsel->unit = orig->unit; + free((char *)evsel->unit); + evsel->unit = strdup(orig->unit); + if (evsel->unit == NULL) + goto out_err; + evsel->scale = orig->scale; evsel->snapshot = orig->snapshot; evsel->per_pkg = orig->per_pkg; @@ -1441,6 +1440,7 @@ void evsel__exit(struct evsel *evsel) zfree(&evsel->group_name); zfree(&evsel->name); zfree(&evsel->pmu_name); + zfree(&evsel->unit); zfree(&evsel->metric_id); evsel__zero_per_pkg(evsel); hashmap__free(evsel->per_pkg_mask); diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index 1d532b9fed29..666b59baeb70 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -12,6 +12,7 @@ #include "expr-bison.h" #include "expr-flex.h" #include "smt.h" +#include <linux/err.h> #include <linux/kernel.h> #include <linux/zalloc.h> #include <ctype.h> @@ -65,7 +66,12 @@ static bool key_equal(const void *key1, const void *key2, struct hashmap *ids__new(void) { - return hashmap__new(key_hash, key_equal, NULL); + struct hashmap *hash; + + hash = hashmap__new(key_hash, key_equal, NULL); + if (IS_ERR(hash)) + return NULL; + return hash; } void ids__free(struct hashmap *ids) @@ -299,6 +305,10 @@ struct expr_parse_ctx *expr__ctx_new(void) return NULL; ctx->ids = hashmap__new(key_hash, key_equal, NULL); + if (IS_ERR(ctx->ids)) { + free(ctx); + return NULL; + } ctx->runtime = 0; return ctx; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index fda8d14c891f..e3c1a532d059 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2321,6 +2321,7 @@ out: #define FEAT_PROCESS_STR_FUN(__feat, __feat_env) \ static int process_##__feat(struct feat_fd *ff, void *data __maybe_unused) \ {\ + free(ff->ph->env.__feat_env); \ ff->ph->env.__feat_env = do_read_string(ff); \ return ff->ph->env.__feat_env ? 0 : -ENOMEM; \ } @@ -4124,6 +4125,7 @@ int perf_event__process_feature(struct perf_session *session, struct perf_record_header_feature *fe = (struct perf_record_header_feature *)event; int type = fe->header.type; u64 feat = fe->feat_id; + int ret = 0; if (type < 0 || type >= PERF_RECORD_HEADER_MAX) { pr_warning("invalid record type %d in pipe-mode\n", type); @@ -4141,11 +4143,13 @@ int perf_event__process_feature(struct perf_session *session, ff.size = event->header.size - sizeof(*fe); ff.ph = &session->header; - if (feat_ops[feat].process(&ff, NULL)) - return -1; + if (feat_ops[feat].process(&ff, NULL)) { + ret = -1; + goto out; + } if (!feat_ops[feat].print || !tool->show_feat_hdr) - return 0; + goto out; if (!feat_ops[feat].full_only || tool->show_feat_hdr >= SHOW_FEAT_HEADER_FULL_INFO) { @@ -4154,8 +4158,9 @@ int perf_event__process_feature(struct perf_session *session, fprintf(stdout, "# %s info available, use -I to display\n", feat_ops[feat].name); } - - return 0; +out: + free_event_desc(ff.events); + return ret; } size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp) @@ -4257,9 +4262,11 @@ int perf_event__process_event_update(struct perf_tool *tool __maybe_unused, switch (ev->type) { case PERF_EVENT_UPDATE__UNIT: + free((char *)evsel->unit); evsel->unit = strdup(ev->data); break; case PERF_EVENT_UPDATE__NAME: + free(evsel->name); evsel->name = strdup(ev->data); break; case PERF_EVENT_UPDATE__SCALE: @@ -4268,11 +4275,11 @@ int perf_event__process_event_update(struct perf_tool *tool __maybe_unused, break; case PERF_EVENT_UPDATE__CPUS: ev_cpus = (struct perf_record_event_update_cpus *)ev->data; - map = cpu_map__new_data(&ev_cpus->cpus); - if (map) + if (map) { + perf_cpu_map__put(evsel->core.own_cpus); evsel->core.own_cpus = map; - else + } else pr_err("failed to get event_update cpus\n"); default: break; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 65fe65ba03c2..b776465e04ef 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -289,15 +289,10 @@ static long hist_time(unsigned long htime) return htime; } -static void he_stat__add_period(struct he_stat *he_stat, u64 period, - u64 weight, u64 ins_lat, u64 p_stage_cyc) +static void he_stat__add_period(struct he_stat *he_stat, u64 period) { - he_stat->period += period; - he_stat->weight += weight; he_stat->nr_events += 1; - he_stat->ins_lat += ins_lat; - he_stat->p_stage_cyc += p_stage_cyc; } static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src) @@ -308,9 +303,6 @@ static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src) dest->period_guest_sys += src->period_guest_sys; dest->period_guest_us += src->period_guest_us; dest->nr_events += src->nr_events; - dest->weight += src->weight; - dest->ins_lat += src->ins_lat; - dest->p_stage_cyc += src->p_stage_cyc; } static void he_stat__decay(struct he_stat *he_stat) @@ -598,9 +590,6 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, struct hist_entry *he; int64_t cmp; u64 period = entry->stat.period; - u64 weight = entry->stat.weight; - u64 ins_lat = entry->stat.ins_lat; - u64 p_stage_cyc = entry->stat.p_stage_cyc; bool leftmost = true; p = &hists->entries_in->rb_root.rb_node; @@ -619,11 +608,11 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, if (!cmp) { if (sample_self) { - he_stat__add_period(&he->stat, period, weight, ins_lat, p_stage_cyc); + he_stat__add_period(&he->stat, period); hist_entry__add_callchain_period(he, period); } if (symbol_conf.cumulate_callchain) - he_stat__add_period(he->stat_acc, period, weight, ins_lat, p_stage_cyc); + he_stat__add_period(he->stat_acc, period); /* * This mem info was allocated from sample__resolve_mem @@ -733,9 +722,6 @@ __hists__add_entry(struct hists *hists, .stat = { .nr_events = 1, .period = sample->period, - .weight = sample->weight, - .ins_lat = sample->ins_lat, - .p_stage_cyc = sample->p_stage_cyc, }, .parent = sym_parent, .filtered = symbol__parent_filter(sym_parent) | al->filtered, @@ -748,6 +734,9 @@ __hists__add_entry(struct hists *hists, .raw_size = sample->raw_size, .ops = ops, .time = hist_time(sample->time), + .weight = sample->weight, + .ins_lat = sample->ins_lat, + .p_stage_cyc = sample->p_stage_cyc, }, *he = hists__findnew_entry(hists, &entry, al, sample_self); if (!hists->has_callchains && he && he->callchain_size != 0) diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 5343b62476e6..621f35ae1efa 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -369,7 +369,6 @@ enum { }; void perf_hpp__init(void); -void perf_hpp__column_unregister(struct perf_hpp_fmt *format); void perf_hpp__cancel_cumulate(void); void perf_hpp__setup_output_field(struct perf_hpp_list *list); void perf_hpp__reset_output_field(struct perf_hpp_list *list); diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 5f83937bf8f3..0e013c2d9eb4 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -1205,61 +1205,69 @@ out_no_progress: static bool intel_pt_fup_event(struct intel_pt_decoder *decoder) { + enum intel_pt_sample_type type = decoder->state.type; bool ret = false; + decoder->state.type &= ~INTEL_PT_BRANCH; + if (decoder->set_fup_tx_flags) { decoder->set_fup_tx_flags = false; decoder->tx_flags = decoder->fup_tx_flags; - decoder->state.type = INTEL_PT_TRANSACTION; + decoder->state.type |= INTEL_PT_TRANSACTION; if (decoder->fup_tx_flags & INTEL_PT_ABORT_TX) decoder->state.type |= INTEL_PT_BRANCH; - decoder->state.from_ip = decoder->ip; - decoder->state.to_ip = 0; decoder->state.flags = decoder->fup_tx_flags; - return true; + ret = true; } if (decoder->set_fup_ptw) { decoder->set_fup_ptw = false; - decoder->state.type = INTEL_PT_PTW; + decoder->state.type |= INTEL_PT_PTW; decoder->state.flags |= INTEL_PT_FUP_IP; - decoder->state.from_ip = decoder->ip; - decoder->state.to_ip = 0; decoder->state.ptw_payload = decoder->fup_ptw_payload; - return true; + ret = true; } if (decoder->set_fup_mwait) { decoder->set_fup_mwait = false; - decoder->state.type = INTEL_PT_MWAIT_OP; - decoder->state.from_ip = decoder->ip; - decoder->state.to_ip = 0; + decoder->state.type |= INTEL_PT_MWAIT_OP; decoder->state.mwait_payload = decoder->fup_mwait_payload; ret = true; } if (decoder->set_fup_pwre) { decoder->set_fup_pwre = false; decoder->state.type |= INTEL_PT_PWR_ENTRY; - decoder->state.type &= ~INTEL_PT_BRANCH; - decoder->state.from_ip = decoder->ip; - decoder->state.to_ip = 0; decoder->state.pwre_payload = decoder->fup_pwre_payload; ret = true; } if (decoder->set_fup_exstop) { decoder->set_fup_exstop = false; decoder->state.type |= INTEL_PT_EX_STOP; - decoder->state.type &= ~INTEL_PT_BRANCH; decoder->state.flags |= INTEL_PT_FUP_IP; - decoder->state.from_ip = decoder->ip; - decoder->state.to_ip = 0; ret = true; } if (decoder->set_fup_bep) { decoder->set_fup_bep = false; decoder->state.type |= INTEL_PT_BLK_ITEMS; - decoder->state.type &= ~INTEL_PT_BRANCH; + ret = true; + } + if (decoder->overflow) { + decoder->overflow = false; + if (!ret && !decoder->pge) { + if (decoder->hop) { + decoder->state.type = 0; + decoder->pkt_state = INTEL_PT_STATE_RESAMPLE; + } + decoder->pge = true; + decoder->state.type |= INTEL_PT_BRANCH | INTEL_PT_TRACE_BEGIN; + decoder->state.from_ip = 0; + decoder->state.to_ip = decoder->ip; + return true; + } + } + if (ret) { decoder->state.from_ip = decoder->ip; decoder->state.to_ip = 0; - ret = true; + } else { + decoder->state.type = type; } return ret; } @@ -1608,7 +1616,16 @@ static int intel_pt_overflow(struct intel_pt_decoder *decoder) intel_pt_clear_tx_flags(decoder); intel_pt_set_nr(decoder); decoder->timestamp_insn_cnt = 0; - decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + decoder->state.from_ip = decoder->ip; + decoder->ip = 0; + decoder->pge = false; + decoder->set_fup_tx_flags = false; + decoder->set_fup_ptw = false; + decoder->set_fup_mwait = false; + decoder->set_fup_pwre = false; + decoder->set_fup_exstop = false; + decoder->set_fup_bep = false; decoder->overflow = true; return -EOVERFLOW; } @@ -2666,6 +2683,8 @@ static int intel_pt_scan_for_psb(struct intel_pt_decoder *decoder); /* Hop mode: Ignore TNT, do not walk code, but get ip from FUPs and TIPs */ static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, int *err) { + *err = 0; + /* Leap from PSB to PSB, getting ip from FUP within PSB+ */ if (decoder->leap && !decoder->in_psb && decoder->packet.type != INTEL_PT_PSB) { *err = intel_pt_scan_for_psb(decoder); @@ -2678,6 +2697,7 @@ static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, in return HOP_IGNORE; case INTEL_PT_TIP_PGD: + decoder->pge = false; if (!decoder->packet.count) { intel_pt_set_nr(decoder); return HOP_IGNORE; @@ -2705,18 +2725,21 @@ static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, in if (!decoder->packet.count) return HOP_IGNORE; intel_pt_set_ip(decoder); - if (intel_pt_fup_event(decoder)) - return HOP_RETURN; - if (!decoder->branch_enable) + if (decoder->set_fup_mwait || decoder->set_fup_pwre) + *no_tip = true; + if (!decoder->branch_enable || !decoder->pge) *no_tip = true; if (*no_tip) { decoder->state.type = INTEL_PT_INSTRUCTION; decoder->state.from_ip = decoder->ip; decoder->state.to_ip = 0; + intel_pt_fup_event(decoder); return HOP_RETURN; } + intel_pt_fup_event(decoder); + decoder->state.type |= INTEL_PT_INSTRUCTION | INTEL_PT_BRANCH; *err = intel_pt_walk_fup_tip(decoder); - if (!*err) + if (!*err && decoder->state.to_ip) decoder->pkt_state = INTEL_PT_STATE_RESAMPLE; return HOP_RETURN; @@ -2897,7 +2920,7 @@ static bool intel_pt_psb_with_fup(struct intel_pt_decoder *decoder, int *err) { struct intel_pt_psb_info data = { .fup = false }; - if (!decoder->branch_enable || !decoder->pge) + if (!decoder->branch_enable) return false; intel_pt_pkt_lookahead(decoder, intel_pt_psb_lookahead_cb, &data); @@ -2924,6 +2947,7 @@ static int intel_pt_walk_trace(struct intel_pt_decoder *decoder) if (err) return err; next: + err = 0; if (decoder->cyc_threshold) { if (decoder->sample_cyc && last_packet_type != INTEL_PT_CYC) decoder->sample_cyc = false; @@ -2962,6 +2986,7 @@ next: case INTEL_PT_TIP_PGE: { decoder->pge = true; + decoder->overflow = false; intel_pt_mtc_cyc_cnt_pge(decoder); intel_pt_set_nr(decoder); if (decoder->packet.count == 0) { @@ -2999,7 +3024,7 @@ next: break; } intel_pt_set_last_ip(decoder); - if (!decoder->branch_enable) { + if (!decoder->branch_enable || !decoder->pge) { decoder->ip = decoder->last_ip; if (intel_pt_fup_event(decoder)) return 0; @@ -3467,10 +3492,10 @@ static int intel_pt_sync_ip(struct intel_pt_decoder *decoder) decoder->set_fup_pwre = false; decoder->set_fup_exstop = false; decoder->set_fup_bep = false; + decoder->overflow = false; if (!decoder->branch_enable) { decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; - decoder->overflow = false; decoder->state.type = 0; /* Do not have a sample */ return 0; } @@ -3485,7 +3510,6 @@ static int intel_pt_sync_ip(struct intel_pt_decoder *decoder) decoder->pkt_state = INTEL_PT_STATE_RESAMPLE; else decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; - decoder->overflow = false; decoder->state.from_ip = 0; decoder->state.to_ip = decoder->ip; @@ -3607,7 +3631,7 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder) } decoder->have_last_ip = true; - decoder->pkt_state = INTEL_PT_STATE_NO_IP; + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; err = intel_pt_walk_psb(decoder); if (err) @@ -3704,7 +3728,8 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) if (err) { decoder->state.err = intel_pt_ext_err(err); - decoder->state.from_ip = decoder->ip; + if (err != -EOVERFLOW) + decoder->state.from_ip = decoder->ip; intel_pt_update_sample_time(decoder); decoder->sample_tot_cyc_cnt = decoder->tot_cyc_cnt; intel_pt_set_nr(decoder); diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 556a893508da..e8613cbda331 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -2565,6 +2565,7 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) ptq->sync_switch = false; intel_pt_next_tid(pt, ptq); } + ptq->timestamp = state->est_timestamp; if (pt->synth_opts.errors) { err = intel_ptq_synth_error(ptq, state); if (err) @@ -3624,6 +3625,7 @@ static int intel_pt_parse_vm_tm_corr_arg(struct intel_pt *pt, char **args) *args = p; return 0; } + p += 1; while (1) { vmcs = strtoull(p, &p, 0); if (errno) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 5bfb6f892489..ba74fdf74af9 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -402,8 +402,10 @@ static int add_event_tool(struct list_head *list, int *idx, if (!evsel) return -ENOMEM; evsel->tool_event = tool_event; - if (tool_event == PERF_TOOL_DURATION_TIME) - evsel->unit = "ns"; + if (tool_event == PERF_TOOL_DURATION_TIME) { + free((char *)evsel->unit); + evsel->unit = strdup("ns"); + } return 0; } @@ -1630,7 +1632,8 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, if (parse_state->fake_pmu) return 0; - evsel->unit = info.unit; + free((char *)evsel->unit); + evsel->unit = strdup(info.unit); evsel->scale = info.scale; evsel->per_pkg = info.per_pkg; evsel->snapshot = info.snapshot; diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c index 5ee47ae1509c..06a7461ba864 100644 --- a/tools/perf/util/perf_regs.c +++ b/tools/perf/util/perf_regs.c @@ -25,6 +25,9 @@ int perf_reg_value(u64 *valp, struct regs_dump *regs, int id) int i, idx = 0; u64 mask = regs->mask; + if ((u64)id >= PERF_SAMPLE_REGS_CACHE_SIZE) + return -EINVAL; + if (regs->cache_mask & (1ULL << id)) goto out; diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 6ae58406f4fc..8dfbba15aeb8 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1659,6 +1659,21 @@ bool is_pmu_core(const char *name) return !strcmp(name, "cpu") || is_arm_pmu_core(name); } +static bool pmu_alias_is_duplicate(struct sevent *alias_a, + struct sevent *alias_b) +{ + /* Different names -> never duplicates */ + if (strcmp(alias_a->name, alias_b->name)) + return false; + + /* Don't remove duplicates for hybrid PMUs */ + if (perf_pmu__is_hybrid(alias_a->pmu) && + perf_pmu__is_hybrid(alias_b->pmu)) + return false; + + return true; +} + void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, bool long_desc, bool details_flag, bool deprecated, const char *pmu_name) @@ -1744,12 +1759,8 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, qsort(aliases, len, sizeof(struct sevent), cmp_sevent); for (j = 0; j < len; j++) { /* Skip duplicates */ - if (j > 0 && !strcmp(aliases[j].name, aliases[j - 1].name)) { - if (!aliases[j].pmu || !aliases[j - 1].pmu || - !strcmp(aliases[j].pmu, aliases[j - 1].pmu)) { - continue; - } - } + if (j > 0 && pmu_alias_is_duplicate(&aliases[j], &aliases[j - 1])) + continue; if (name_only) { printf("%s ", aliases[j].name); diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 563a9ba8954f..7f782a31bda3 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -461,7 +461,7 @@ get_tracepoint_field(struct pyrf_event *pevent, PyObject *attr_name) struct tep_event *tp_format; tp_format = trace_event__tp_format_id(evsel->core.attr.config); - if (!tp_format) + if (IS_ERR_OR_NULL(tp_format)) return NULL; evsel->tp_format = tp_format; diff --git a/tools/perf/util/smt.c b/tools/perf/util/smt.c index 20bacd5972ad..34f1b1b1176c 100644 --- a/tools/perf/util/smt.c +++ b/tools/perf/util/smt.c @@ -15,7 +15,7 @@ int smt_on(void) if (cached) return cached_result; - if (sysfs__read_int("devices/system/cpu/smt/active", &cached_result) > 0) + if (sysfs__read_int("devices/system/cpu/smt/active", &cached_result) >= 0) goto done; ncpu = sysconf(_SC_NPROCESSORS_CONF); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 568a88c001c6..a111065b484e 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1325,88 +1325,68 @@ struct sort_entry sort_mispredict = { .se_width_idx = HISTC_MISPREDICT, }; -static u64 he_weight(struct hist_entry *he) -{ - return he->stat.nr_events ? he->stat.weight / he->stat.nr_events : 0; -} - static int64_t -sort__local_weight_cmp(struct hist_entry *left, struct hist_entry *right) +sort__weight_cmp(struct hist_entry *left, struct hist_entry *right) { - return he_weight(left) - he_weight(right); + return left->weight - right->weight; } static int hist_entry__local_weight_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%-*llu", width, he_weight(he)); + return repsep_snprintf(bf, size, "%-*llu", width, he->weight); } struct sort_entry sort_local_weight = { .se_header = "Local Weight", - .se_cmp = sort__local_weight_cmp, + .se_cmp = sort__weight_cmp, .se_snprintf = hist_entry__local_weight_snprintf, .se_width_idx = HISTC_LOCAL_WEIGHT, }; -static int64_t -sort__global_weight_cmp(struct hist_entry *left, struct hist_entry *right) -{ - return left->stat.weight - right->stat.weight; -} - static int hist_entry__global_weight_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%-*llu", width, he->stat.weight); + return repsep_snprintf(bf, size, "%-*llu", width, + he->weight * he->stat.nr_events); } struct sort_entry sort_global_weight = { .se_header = "Weight", - .se_cmp = sort__global_weight_cmp, + .se_cmp = sort__weight_cmp, .se_snprintf = hist_entry__global_weight_snprintf, .se_width_idx = HISTC_GLOBAL_WEIGHT, }; -static u64 he_ins_lat(struct hist_entry *he) -{ - return he->stat.nr_events ? he->stat.ins_lat / he->stat.nr_events : 0; -} - static int64_t -sort__local_ins_lat_cmp(struct hist_entry *left, struct hist_entry *right) +sort__ins_lat_cmp(struct hist_entry *left, struct hist_entry *right) { - return he_ins_lat(left) - he_ins_lat(right); + return left->ins_lat - right->ins_lat; } static int hist_entry__local_ins_lat_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%-*u", width, he_ins_lat(he)); + return repsep_snprintf(bf, size, "%-*u", width, he->ins_lat); } struct sort_entry sort_local_ins_lat = { .se_header = "Local INSTR Latency", - .se_cmp = sort__local_ins_lat_cmp, + .se_cmp = sort__ins_lat_cmp, .se_snprintf = hist_entry__local_ins_lat_snprintf, .se_width_idx = HISTC_LOCAL_INS_LAT, }; -static int64_t -sort__global_ins_lat_cmp(struct hist_entry *left, struct hist_entry *right) -{ - return left->stat.ins_lat - right->stat.ins_lat; -} - static int hist_entry__global_ins_lat_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%-*u", width, he->stat.ins_lat); + return repsep_snprintf(bf, size, "%-*u", width, + he->ins_lat * he->stat.nr_events); } struct sort_entry sort_global_ins_lat = { .se_header = "INSTR Latency", - .se_cmp = sort__global_ins_lat_cmp, + .se_cmp = sort__ins_lat_cmp, .se_snprintf = hist_entry__global_ins_lat_snprintf, .se_width_idx = HISTC_GLOBAL_INS_LAT, }; @@ -1414,13 +1394,13 @@ struct sort_entry sort_global_ins_lat = { static int64_t sort__global_p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right) { - return left->stat.p_stage_cyc - right->stat.p_stage_cyc; + return left->p_stage_cyc - right->p_stage_cyc; } static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%-*u", width, he->stat.p_stage_cyc); + return repsep_snprintf(bf, size, "%-*u", width, he->p_stage_cyc); } struct sort_entry sort_p_stage_cyc = { diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index b67c469aba79..7b7145501933 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -49,9 +49,6 @@ struct he_stat { u64 period_us; u64 period_guest_sys; u64 period_guest_us; - u64 weight; - u64 ins_lat; - u64 p_stage_cyc; u32 nr_events; }; @@ -109,6 +106,9 @@ struct hist_entry { s32 socket; s32 cpu; u64 code_page_size; + u64 weight; + u64 ins_lat; + u64 p_stage_cyc; u8 cpumode; u8 depth; diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 37a9492edb3e..df3c4671be72 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -379,32 +379,32 @@ fetch_kernel_version(unsigned int *puint, char *str, return 0; } -const char *perf_tip(const char *dirpath) +int perf_tip(char **strp, const char *dirpath) { struct strlist *tips; struct str_node *node; - char *tip = NULL; struct strlist_config conf = { .dirname = dirpath, .file_only = true, }; + int ret = 0; + *strp = NULL; tips = strlist__new("tips.txt", &conf); if (tips == NULL) - return errno == ENOENT ? NULL : - "Tip: check path of tips.txt or get more memory! ;-p"; + return -errno; if (strlist__nr_entries(tips) == 0) goto out; node = strlist__entry(tips, random() % strlist__nr_entries(tips)); - if (asprintf(&tip, "Tip: %s", node->s) < 0) - tip = (char *)"Tip: get more memory! ;-)"; + if (asprintf(strp, "Tip: %s", node->s) < 0) + ret = -ENOMEM; out: strlist__delete(tips); - return tip; + return ret; } char *perf_exe(char *buf, int len) diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index ad737052e597..9f0d36ba77f2 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -39,7 +39,7 @@ int fetch_kernel_version(unsigned int *puint, #define KVER_FMT "%d.%d.%d" #define KVER_PARAM(x) KVER_VERSION(x), KVER_PATCHLEVEL(x), KVER_SUBLEVEL(x) -const char *perf_tip(const char *dirpath); +int perf_tip(char **strp, const char *dirpath); #ifndef HAVE_SCHED_GETCPU_SUPPORT int sched_getcpu(void); diff --git a/tools/power/acpi/Makefile.config b/tools/power/acpi/Makefile.config index 331f6d30f472..cd7106876a5f 100644 --- a/tools/power/acpi/Makefile.config +++ b/tools/power/acpi/Makefile.config @@ -69,6 +69,7 @@ KERNEL_INCLUDE := $(OUTPUT)include ACPICA_INCLUDE := $(srctree)/../../../drivers/acpi/acpica CFLAGS += -D_LINUX -I$(KERNEL_INCLUDE) -I$(ACPICA_INCLUDE) CFLAGS += $(WARNINGS) +MKDIR = mkdir ifeq ($(strip $(V)),false) QUIET=@ diff --git a/tools/power/acpi/Makefile.rules b/tools/power/acpi/Makefile.rules index 2a6c170b57cd..1d7616f5d0ae 100644 --- a/tools/power/acpi/Makefile.rules +++ b/tools/power/acpi/Makefile.rules @@ -21,6 +21,7 @@ $(KERNEL_INCLUDE): $(objdir)%.o: %.c $(KERNEL_INCLUDE) $(ECHO) " CC " $(subst $(OUTPUT),,$@) + $(QUIET) $(MKDIR) -p $(objdir) 2>/dev/null $(QUIET) $(CC) -c $(CFLAGS) -o $@ $< all: $(OUTPUT)$(TOOL) diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index bf9fd3549a1d..efe72fa48224 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -15,7 +15,7 @@ struct process_cmd_struct { int arg; }; -static const char *version_str = "v1.10"; +static const char *version_str = "v1.11"; static const int supported_api_ver = 1; static struct isst_if_platform_info isst_platform_info; static char *progname; @@ -1599,6 +1599,7 @@ static void set_scaling_min_to_cpuinfo_max(int cpu) die_id != get_physical_die_id(i)) continue; + adjust_scaling_max_from_base_freq(i); set_cpufreq_scaling_min_max_from_cpuinfo(i, 1, 0); adjust_scaling_min_from_base_freq(i); } @@ -1615,6 +1616,7 @@ static void set_scaling_min_to_cpuinfo_min(int cpu) die_id != get_physical_die_id(i)) continue; + adjust_scaling_max_from_base_freq(i); set_cpufreq_scaling_min_max_from_cpuinfo(i, 0, 0); } } diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index 071312f5eb92..b0be5f40a3f1 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -87,7 +87,18 @@ LLVM_STRIP ?= llvm-strip ifeq ($(CC_NO_CLANG), 1) EXTRA_WARNINGS += -Wstrict-aliasing=3 -endif + +else ifneq ($(CROSS_COMPILE),) +CLANG_CROSS_FLAGS := --target=$(notdir $(CROSS_COMPILE:%-=%)) +GCC_TOOLCHAIN_DIR := $(dir $(shell which $(CROSS_COMPILE)gcc)) +ifneq ($(GCC_TOOLCHAIN_DIR),) +CLANG_CROSS_FLAGS += --prefix=$(GCC_TOOLCHAIN_DIR)$(notdir $(CROSS_COMPILE)) +CLANG_CROSS_FLAGS += --sysroot=$(shell $(CROSS_COMPILE)gcc -print-sysroot) +CLANG_CROSS_FLAGS += --gcc-toolchain=$(realpath $(GCC_TOOLCHAIN_DIR)/..) +endif # GCC_TOOLCHAIN_DIR +CFLAGS += $(CLANG_CROSS_FLAGS) +AFLAGS += $(CLANG_CROSS_FLAGS) +endif # CROSS_COMPILE # Hack to avoid type-punned warnings on old systems such as RHEL5: # We should be changing CFLAGS and checking gcc version, but this diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index 68e6f461c758..7a706f96f68d 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -15,38 +15,57 @@ import time assert sys.version_info >= (3, 7), "Python version is too old" -from collections import namedtuple +from dataclasses import dataclass from enum import Enum, auto -from typing import Iterable, Sequence, List +from typing import Any, Iterable, Sequence, List, Optional import kunit_json import kunit_kernel import kunit_parser -KunitResult = namedtuple('KunitResult', ['status','result','elapsed_time']) - -KunitConfigRequest = namedtuple('KunitConfigRequest', - ['build_dir', 'make_options']) -KunitBuildRequest = namedtuple('KunitBuildRequest', - ['jobs', 'build_dir', 'alltests', - 'make_options']) -KunitExecRequest = namedtuple('KunitExecRequest', - ['timeout', 'build_dir', 'alltests', - 'filter_glob', 'kernel_args', 'run_isolated']) -KunitParseRequest = namedtuple('KunitParseRequest', - ['raw_output', 'build_dir', 'json']) -KunitRequest = namedtuple('KunitRequest', ['raw_output','timeout', 'jobs', - 'build_dir', 'alltests', 'filter_glob', - 'kernel_args', 'run_isolated', 'json', 'make_options']) - -KernelDirectoryPath = sys.argv[0].split('tools/testing/kunit/')[0] - class KunitStatus(Enum): SUCCESS = auto() CONFIG_FAILURE = auto() BUILD_FAILURE = auto() TEST_FAILURE = auto() +@dataclass +class KunitResult: + status: KunitStatus + result: Any + elapsed_time: float + +@dataclass +class KunitConfigRequest: + build_dir: str + make_options: Optional[List[str]] + +@dataclass +class KunitBuildRequest(KunitConfigRequest): + jobs: int + alltests: bool + +@dataclass +class KunitParseRequest: + raw_output: Optional[str] + build_dir: str + json: Optional[str] + +@dataclass +class KunitExecRequest(KunitParseRequest): + timeout: int + alltests: bool + filter_glob: str + kernel_args: Optional[List[str]] + run_isolated: Optional[str] + +@dataclass +class KunitRequest(KunitExecRequest, KunitBuildRequest): + pass + + +KernelDirectoryPath = sys.argv[0].split('tools/testing/kunit/')[0] + def get_kernel_root_path() -> str: path = sys.argv[0] if not __file__ else __file__ parts = os.path.realpath(path).split('tools/testing/kunit') @@ -91,6 +110,14 @@ def build_tests(linux: kunit_kernel.LinuxSourceTree, 'built kernel successfully', build_end - build_start) +def config_and_build_tests(linux: kunit_kernel.LinuxSourceTree, + request: KunitBuildRequest) -> KunitResult: + config_result = config_tests(linux, request) + if config_result.status != KunitStatus.SUCCESS: + return config_result + + return build_tests(linux, request) + def _list_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitExecRequest) -> List[str]: args = ['kunit.action=list'] if request.kernel_args: @@ -121,8 +148,7 @@ def _suites_from_test_list(tests: List[str]) -> List[str]: -def exec_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitExecRequest, - parse_request: KunitParseRequest) -> KunitResult: +def exec_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitExecRequest) -> KunitResult: filter_globs = [request.filter_glob] if request.run_isolated: tests = _list_tests(linux, request) @@ -147,17 +173,23 @@ def exec_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitExecRequest, filter_glob=filter_glob, build_dir=request.build_dir) - result = parse_tests(parse_request, run_result) + result = parse_tests(request, run_result) # run_kernel() doesn't block on the kernel exiting. # That only happens after we get the last line of output from `run_result`. # So exec_time here actually contains parsing + execution time, which is fine. test_end = time.time() exec_time += test_end - test_start - test_counts.add_subtest_counts(result.result.test.counts) + test_counts.add_subtest_counts(result.result.counts) + + if len(filter_globs) == 1 and test_counts.crashed > 0: + bd = request.build_dir + print('The kernel seems to have crashed; you can decode the stack traces with:') + print('$ scripts/decode_stacktrace.sh {}/vmlinux {} < {} | tee {}/decoded.log | {} parse'.format( + bd, bd, kunit_kernel.get_outfile_path(bd), bd, sys.argv[0])) kunit_status = _map_to_overall_status(test_counts.get_status()) - return KunitResult(status=kunit_status, result=result.result, elapsed_time=exec_time) + return KunitResult(status=kunit_status, result=result, elapsed_time=exec_time) def _map_to_overall_status(test_status: kunit_parser.TestStatus) -> KunitStatus: if test_status in (kunit_parser.TestStatus.SUCCESS, kunit_parser.TestStatus.SKIPPED): @@ -168,14 +200,12 @@ def _map_to_overall_status(test_status: kunit_parser.TestStatus) -> KunitStatus: def parse_tests(request: KunitParseRequest, input_data: Iterable[str]) -> KunitResult: parse_start = time.time() - test_result = kunit_parser.TestResult(kunit_parser.TestStatus.SUCCESS, - kunit_parser.Test(), - 'Tests not Parsed.') + test_result = kunit_parser.Test() if request.raw_output: # Treat unparsed results as one passing test. - test_result.test.status = kunit_parser.TestStatus.SUCCESS - test_result.test.counts.passed = 1 + test_result.status = kunit_parser.TestStatus.SUCCESS + test_result.counts.passed = 1 output: Iterable[str] = input_data if request.raw_output == 'all': @@ -193,7 +223,7 @@ def parse_tests(request: KunitParseRequest, input_data: Iterable[str]) -> KunitR if request.json: json_obj = kunit_json.get_json_result( - test_result=test_result, + test=test_result, def_config='kunit_defconfig', build_dir=request.build_dir, json_path=request.json) @@ -211,27 +241,15 @@ def run_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitRequest) -> KunitResult: run_start = time.time() - config_request = KunitConfigRequest(request.build_dir, - request.make_options) - config_result = config_tests(linux, config_request) + config_result = config_tests(linux, request) if config_result.status != KunitStatus.SUCCESS: return config_result - build_request = KunitBuildRequest(request.jobs, request.build_dir, - request.alltests, - request.make_options) - build_result = build_tests(linux, build_request) + build_result = build_tests(linux, request) if build_result.status != KunitStatus.SUCCESS: return build_result - exec_request = KunitExecRequest(request.timeout, request.build_dir, - request.alltests, request.filter_glob, - request.kernel_args, request.run_isolated) - parse_request = KunitParseRequest(request.raw_output, - request.build_dir, - request.json) - - exec_result = exec_tests(linux, exec_request, parse_request) + exec_result = exec_tests(linux, request) run_end = time.time() @@ -264,6 +282,9 @@ def massage_argv(argv: Sequence[str]) -> Sequence[str]: return f'{arg}={pseudo_bool_flag_defaults[arg]}' return list(map(massage_arg, argv)) +def get_default_jobs() -> int: + return len(os.sched_getaffinity(0)) + def add_common_opts(parser) -> None: parser.add_argument('--build_dir', help='As in the make command, it specifies the build ' @@ -280,6 +301,10 @@ def add_common_opts(parser) -> None: ' If given a directory, (e.g. lib/kunit), "/.kunitconfig" ' 'will get automatically appended.', metavar='kunitconfig') + parser.add_argument('--kconfig_add', + help='Additional Kconfig options to append to the ' + '.kunitconfig, e.g. CONFIG_KASAN=y. Can be repeated.', + action='append') parser.add_argument('--arch', help=('Specifies the architecture to run tests under. ' @@ -310,7 +335,7 @@ def add_build_opts(parser) -> None: parser.add_argument('--jobs', help='As in the make command, "Specifies the number of ' 'jobs (commands) to run simultaneously."', - type=int, default=8, metavar='jobs') + type=int, default=get_default_jobs(), metavar='jobs') def add_exec_opts(parser) -> None: parser.add_argument('--timeout', @@ -398,20 +423,21 @@ def main(argv, linux=None): if not linux: linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir, kunitconfig_path=cli_args.kunitconfig, + kconfig_add=cli_args.kconfig_add, arch=cli_args.arch, cross_compile=cli_args.cross_compile, qemu_config_path=cli_args.qemu_config) - request = KunitRequest(cli_args.raw_output, - cli_args.timeout, - cli_args.jobs, - cli_args.build_dir, - cli_args.alltests, - cli_args.filter_glob, - cli_args.kernel_args, - cli_args.run_isolated, - cli_args.json, - cli_args.make_options) + request = KunitRequest(build_dir=cli_args.build_dir, + make_options=cli_args.make_options, + jobs=cli_args.jobs, + alltests=cli_args.alltests, + raw_output=cli_args.raw_output, + json=cli_args.json, + timeout=cli_args.timeout, + filter_glob=cli_args.filter_glob, + kernel_args=cli_args.kernel_args, + run_isolated=cli_args.run_isolated) result = run_tests(linux, request) if result.status != KunitStatus.SUCCESS: sys.exit(1) @@ -423,12 +449,13 @@ def main(argv, linux=None): if not linux: linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir, kunitconfig_path=cli_args.kunitconfig, + kconfig_add=cli_args.kconfig_add, arch=cli_args.arch, cross_compile=cli_args.cross_compile, qemu_config_path=cli_args.qemu_config) - request = KunitConfigRequest(cli_args.build_dir, - cli_args.make_options) + request = KunitConfigRequest(build_dir=cli_args.build_dir, + make_options=cli_args.make_options) result = config_tests(linux, request) kunit_parser.print_with_timestamp(( 'Elapsed time: %.3fs\n') % ( @@ -439,15 +466,16 @@ def main(argv, linux=None): if not linux: linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir, kunitconfig_path=cli_args.kunitconfig, + kconfig_add=cli_args.kconfig_add, arch=cli_args.arch, cross_compile=cli_args.cross_compile, qemu_config_path=cli_args.qemu_config) - request = KunitBuildRequest(cli_args.jobs, - cli_args.build_dir, - cli_args.alltests, - cli_args.make_options) - result = build_tests(linux, request) + request = KunitBuildRequest(build_dir=cli_args.build_dir, + make_options=cli_args.make_options, + jobs=cli_args.jobs, + alltests=cli_args.alltests) + result = config_and_build_tests(linux, request) kunit_parser.print_with_timestamp(( 'Elapsed time: %.3fs\n') % ( result.elapsed_time)) @@ -457,20 +485,20 @@ def main(argv, linux=None): if not linux: linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir, kunitconfig_path=cli_args.kunitconfig, + kconfig_add=cli_args.kconfig_add, arch=cli_args.arch, cross_compile=cli_args.cross_compile, qemu_config_path=cli_args.qemu_config) - exec_request = KunitExecRequest(cli_args.timeout, - cli_args.build_dir, - cli_args.alltests, - cli_args.filter_glob, - cli_args.kernel_args, - cli_args.run_isolated) - parse_request = KunitParseRequest(cli_args.raw_output, - cli_args.build_dir, - cli_args.json) - result = exec_tests(linux, exec_request, parse_request) + exec_request = KunitExecRequest(raw_output=cli_args.raw_output, + build_dir=cli_args.build_dir, + json=cli_args.json, + timeout=cli_args.timeout, + alltests=cli_args.alltests, + filter_glob=cli_args.filter_glob, + kernel_args=cli_args.kernel_args, + run_isolated=cli_args.run_isolated) + result = exec_tests(linux, exec_request) kunit_parser.print_with_timestamp(( 'Elapsed time: %.3fs\n') % (result.elapsed_time)) if result.status != KunitStatus.SUCCESS: @@ -482,9 +510,9 @@ def main(argv, linux=None): else: with open(cli_args.file, 'r', errors='backslashreplace') as f: kunit_output = f.read().splitlines() - request = KunitParseRequest(cli_args.raw_output, - None, - cli_args.json) + request = KunitParseRequest(raw_output=cli_args.raw_output, + build_dir='', + json=cli_args.json) result = parse_tests(request, kunit_output) if result.status != KunitStatus.SUCCESS: sys.exit(1) diff --git a/tools/testing/kunit/kunit_config.py b/tools/testing/kunit/kunit_config.py index c77c7d2ef622..677354546156 100644 --- a/tools/testing/kunit/kunit_config.py +++ b/tools/testing/kunit/kunit_config.py @@ -62,33 +62,34 @@ class Kconfig(object): for entry in self.entries(): f.write(str(entry) + '\n') - def parse_from_string(self, blob: str) -> None: - """Parses a string containing KconfigEntrys and populates this Kconfig.""" - self._entries = [] - is_not_set_matcher = re.compile(CONFIG_IS_NOT_SET_PATTERN) - config_matcher = re.compile(CONFIG_PATTERN) - for line in blob.split('\n'): - line = line.strip() - if not line: - continue - - match = config_matcher.match(line) - if match: - entry = KconfigEntry(match.group(1), match.group(2)) - self.add_entry(entry) - continue - - empty_match = is_not_set_matcher.match(line) - if empty_match: - entry = KconfigEntry(empty_match.group(1), 'n') - self.add_entry(entry) - continue - - if line[0] == '#': - continue - else: - raise KconfigParseError('Failed to parse: ' + line) - - def read_from_file(self, path: str) -> None: - with open(path, 'r') as f: - self.parse_from_string(f.read()) +def parse_file(path: str) -> Kconfig: + with open(path, 'r') as f: + return parse_from_string(f.read()) + +def parse_from_string(blob: str) -> Kconfig: + """Parses a string containing Kconfig entries.""" + kconfig = Kconfig() + is_not_set_matcher = re.compile(CONFIG_IS_NOT_SET_PATTERN) + config_matcher = re.compile(CONFIG_PATTERN) + for line in blob.split('\n'): + line = line.strip() + if not line: + continue + + match = config_matcher.match(line) + if match: + entry = KconfigEntry(match.group(1), match.group(2)) + kconfig.add_entry(entry) + continue + + empty_match = is_not_set_matcher.match(line) + if empty_match: + entry = KconfigEntry(empty_match.group(1), 'n') + kconfig.add_entry(entry) + continue + + if line[0] == '#': + continue + else: + raise KconfigParseError('Failed to parse: ' + line) + return kconfig diff --git a/tools/testing/kunit/kunit_json.py b/tools/testing/kunit/kunit_json.py index 746bec72b9ac..6862671709bc 100644 --- a/tools/testing/kunit/kunit_json.py +++ b/tools/testing/kunit/kunit_json.py @@ -11,7 +11,7 @@ import os import kunit_parser -from kunit_parser import Test, TestResult, TestStatus +from kunit_parser import Test, TestStatus from typing import Any, Dict, Optional JsonObj = Dict[str, Any] @@ -30,6 +30,8 @@ def _get_group_json(test: Test, def_config: str, test_case = {"name": subtest.name, "status": "FAIL"} if subtest.status == TestStatus.SUCCESS: test_case["status"] = "PASS" + elif subtest.status == TestStatus.SKIPPED: + test_case["status"] = "SKIP" elif subtest.status == TestStatus.TEST_CRASHED: test_case["status"] = "ERROR" test_cases.append(test_case) @@ -48,9 +50,9 @@ def _get_group_json(test: Test, def_config: str, } return test_group -def get_json_result(test_result: TestResult, def_config: str, +def get_json_result(test: Test, def_config: str, build_dir: Optional[str], json_path: str) -> str: - test_group = _get_group_json(test_result.test, def_config, build_dir) + test_group = _get_group_json(test, def_config, build_dir) test_group["name"] = "KUnit Test Group" json_obj = json.dumps(test_group, indent=4) if json_path != 'stdout': diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index 66095568bf32..44bbe54f25f1 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -21,6 +21,7 @@ import qemu_config KCONFIG_PATH = '.config' KUNITCONFIG_PATH = '.kunitconfig' +OLD_KUNITCONFIG_PATH = 'last_used_kunitconfig' DEFAULT_KUNITCONFIG_PATH = 'tools/testing/kunit/configs/default.config' BROKEN_ALLCONFIG_PATH = 'tools/testing/kunit/configs/broken_on_uml.config' OUTFILE_PATH = 'test.log' @@ -116,8 +117,7 @@ class LinuxSourceTreeOperationsQemu(LinuxSourceTreeOperations): self._extra_qemu_params = qemu_arch_params.extra_qemu_params def make_arch_qemuconfig(self, base_kunitconfig: kunit_config.Kconfig) -> None: - kconfig = kunit_config.Kconfig() - kconfig.parse_from_string(self._kconfig) + kconfig = kunit_config.parse_from_string(self._kconfig) base_kunitconfig.merge_in_entries(kconfig) def start(self, params: List[str], build_dir: str) -> subprocess.Popen: @@ -180,6 +180,9 @@ def get_kconfig_path(build_dir) -> str: def get_kunitconfig_path(build_dir) -> str: return get_file_path(build_dir, KUNITCONFIG_PATH) +def get_old_kunitconfig_path(build_dir) -> str: + return get_file_path(build_dir, OLD_KUNITCONFIG_PATH) + def get_outfile_path(build_dir) -> str: return get_file_path(build_dir, OUTFILE_PATH) @@ -206,6 +209,7 @@ def get_source_tree_ops_from_qemu_config(config_path: str, # exists as a file. module_path = '.' + os.path.join(os.path.basename(QEMU_CONFIGS_DIR), os.path.basename(config_path)) spec = importlib.util.spec_from_file_location(module_path, config_path) + assert spec is not None config = importlib.util.module_from_spec(spec) # See https://github.com/python/typeshed/pull/2626 for context. assert isinstance(spec.loader, importlib.abc.Loader) @@ -225,6 +229,7 @@ class LinuxSourceTree(object): build_dir: str, load_config=True, kunitconfig_path='', + kconfig_add: Optional[List[str]]=None, arch=None, cross_compile=None, qemu_config_path=None) -> None: @@ -249,8 +254,11 @@ class LinuxSourceTree(object): if not os.path.exists(kunitconfig_path): shutil.copyfile(DEFAULT_KUNITCONFIG_PATH, kunitconfig_path) - self._kconfig = kunit_config.Kconfig() - self._kconfig.read_from_file(kunitconfig_path) + self._kconfig = kunit_config.parse_file(kunitconfig_path) + if kconfig_add: + kconfig = kunit_config.parse_from_string('\n'.join(kconfig_add)) + self._kconfig.merge_in_entries(kconfig) + def clean(self) -> bool: try: @@ -262,17 +270,18 @@ class LinuxSourceTree(object): def validate_config(self, build_dir) -> bool: kconfig_path = get_kconfig_path(build_dir) - validated_kconfig = kunit_config.Kconfig() - validated_kconfig.read_from_file(kconfig_path) - if not self._kconfig.is_subset_of(validated_kconfig): - invalid = self._kconfig.entries() - validated_kconfig.entries() - message = 'Provided Kconfig is not contained in validated .config. Following fields found in kunitconfig, ' \ - 'but not in .config: %s' % ( - ', '.join([str(e) for e in invalid]) - ) - logging.error(message) - return False - return True + validated_kconfig = kunit_config.parse_file(kconfig_path) + if self._kconfig.is_subset_of(validated_kconfig): + return True + invalid = self._kconfig.entries() - validated_kconfig.entries() + message = 'Not all Kconfig options selected in kunitconfig were in the generated .config.\n' \ + 'This is probably due to unsatisfied dependencies.\n' \ + 'Missing: ' + ', '.join([str(e) for e in invalid]) + if self._arch == 'um': + message += '\nNote: many Kconfig options aren\'t available on UML. You can try running ' \ + 'on a different architecture with something like "--arch=x86_64".' + logging.error(message) + return False def build_config(self, build_dir, make_options) -> bool: kconfig_path = get_kconfig_path(build_dir) @@ -285,25 +294,38 @@ class LinuxSourceTree(object): except ConfigError as e: logging.error(e) return False - return self.validate_config(build_dir) + if not self.validate_config(build_dir): + return False + + old_path = get_old_kunitconfig_path(build_dir) + if os.path.exists(old_path): + os.remove(old_path) # write_to_file appends to the file + self._kconfig.write_to_file(old_path) + return True + + def _kunitconfig_changed(self, build_dir: str) -> bool: + old_path = get_old_kunitconfig_path(build_dir) + if not os.path.exists(old_path): + return True + + old_kconfig = kunit_config.parse_file(old_path) + return old_kconfig.entries() != self._kconfig.entries() def build_reconfig(self, build_dir, make_options) -> bool: """Creates a new .config if it is not a subset of the .kunitconfig.""" kconfig_path = get_kconfig_path(build_dir) - if os.path.exists(kconfig_path): - existing_kconfig = kunit_config.Kconfig() - existing_kconfig.read_from_file(kconfig_path) - self._ops.make_arch_qemuconfig(self._kconfig) - if not self._kconfig.is_subset_of(existing_kconfig): - print('Regenerating .config ...') - os.remove(kconfig_path) - return self.build_config(build_dir, make_options) - else: - return True - else: + if not os.path.exists(kconfig_path): print('Generating .config ...') return self.build_config(build_dir, make_options) + existing_kconfig = kunit_config.parse_file(kconfig_path) + self._ops.make_arch_qemuconfig(self._kconfig) + if self._kconfig.is_subset_of(existing_kconfig) and not self._kunitconfig_changed(build_dir): + return True + print('Regenerating .config ...') + os.remove(kconfig_path) + return self.build_config(build_dir, make_options) + def build_kernel(self, alltests, jobs, build_dir, make_options) -> bool: try: if alltests: diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index 3355196d0515..05ff334761dd 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -12,14 +12,11 @@ from __future__ import annotations import re -from collections import namedtuple -from datetime import datetime +import datetime from enum import Enum, auto from functools import reduce from typing import Iterable, Iterator, List, Optional, Tuple -TestResult = namedtuple('TestResult', ['status','test','log']) - class Test(object): """ A class to represent a test parsed from KTAP results. All KTAP @@ -168,42 +165,51 @@ class TestCounts: class LineStream: """ A class to represent the lines of kernel output. - Provides a peek()/pop() interface over an iterator of + Provides a lazy peek()/pop() interface over an iterator of (line#, text). """ _lines: Iterator[Tuple[int, str]] _next: Tuple[int, str] + _need_next: bool _done: bool def __init__(self, lines: Iterator[Tuple[int, str]]): """Creates a new LineStream that wraps the given iterator.""" self._lines = lines self._done = False + self._need_next = True self._next = (0, '') - self._get_next() def _get_next(self) -> None: - """Advances the LineSteam to the next line.""" + """Advances the LineSteam to the next line, if necessary.""" + if not self._need_next: + return try: self._next = next(self._lines) except StopIteration: self._done = True + finally: + self._need_next = False def peek(self) -> str: """Returns the current line, without advancing the LineStream. """ + self._get_next() return self._next[1] def pop(self) -> str: """Returns the current line and advances the LineStream to the next line. """ - n = self._next - self._get_next() - return n[1] + s = self.peek() + if self._done: + raise ValueError(f'LineStream: going past EOF, last line was {s}') + self._need_next = True + return s def __bool__(self) -> bool: """Returns True if stream has more lines.""" + self._get_next() return not self._done # Only used by kunit_tool_test.py. @@ -216,6 +222,7 @@ class LineStream: def line_number(self) -> int: """Returns the line number of the current line.""" + self._get_next() return self._next[0] # Parsing helper methods: @@ -340,8 +347,8 @@ def parse_test_plan(lines: LineStream, test: Test) -> bool: """ Parses test plan line and stores the expected number of subtests in test object. Reports an error if expected count is 0. - Returns False and reports missing test plan error if fails to parse - test plan. + Returns False and sets expected_count to None if there is no valid test + plan. Accepted format: - '1..[number of subtests]' @@ -356,14 +363,10 @@ def parse_test_plan(lines: LineStream, test: Test) -> bool: match = TEST_PLAN.match(lines.peek()) if not match: test.expected_count = None - test.add_error('missing plan line!') return False test.log.append(lines.pop()) expected_count = int(match.group(1)) test.expected_count = expected_count - if expected_count == 0: - test.status = TestStatus.NO_TESTS - test.add_error('0 tests run!') return True TEST_RESULT = re.compile(r'^(ok|not ok) ([0-9]+) (- )?([^#]*)( # .*)?$') @@ -514,7 +517,7 @@ ANSI_LEN = len(red('')) def print_with_timestamp(message: str) -> None: """Prints message with timestamp at beginning.""" - print('[%s] %s' % (datetime.now().strftime('%H:%M:%S'), message)) + print('[%s] %s' % (datetime.datetime.now().strftime('%H:%M:%S'), message)) def format_test_divider(message: str, len_message: int) -> str: """ @@ -590,6 +593,8 @@ def format_test_result(test: Test) -> str: return (green('[PASSED] ') + test.name) elif test.status == TestStatus.SKIPPED: return (yellow('[SKIPPED] ') + test.name) + elif test.status == TestStatus.NO_TESTS: + return (yellow('[NO TESTS RUN] ') + test.name) elif test.status == TestStatus.TEST_CRASHED: print_log(test.log) return (red('[CRASHED] ') + test.name) @@ -732,6 +737,7 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str]) -> Test: # test plan test.name = "main" parse_test_plan(lines, test) + parent_test = True else: # If KTAP/TAP header is not found, test must be subtest # header or test result line so parse attempt to parser @@ -745,7 +751,7 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str]) -> Test: expected_count = test.expected_count subtests = [] test_num = 1 - while expected_count is None or test_num <= expected_count: + while parent_test and (expected_count is None or test_num <= expected_count): # Loop to parse any subtests. # Break after parsing expected number of tests or # if expected number of tests is unknown break when test @@ -780,9 +786,15 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str]) -> Test: parse_test_result(lines, test, expected_num) else: test.add_error('missing subtest result line!') + + # Check for there being no tests + if parent_test and len(subtests) == 0: + test.status = TestStatus.NO_TESTS + test.add_error('0 tests run!') + # Add statuses to TestCounts attribute in Test object bubble_up_test_results(test) - if parent_test: + if parent_test and not main: # If test has subtests and is not the main test object, print # footer. print_test_footer(test) @@ -790,7 +802,7 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str]) -> Test: print_test_result(test) return test -def parse_run_tests(kernel_output: Iterable[str]) -> TestResult: +def parse_run_tests(kernel_output: Iterable[str]) -> Test: """ Using kernel output, extract KTAP lines, parse the lines for test results and print condensed test results and summary line . @@ -799,8 +811,7 @@ def parse_run_tests(kernel_output: Iterable[str]) -> TestResult: kernel_output - Iterable object contains lines of kernel output Return: - TestResult - Tuple containg status of main test object, main test - object with all subtests, and log of all KTAP lines. + Test - the main test object with all subtests. """ print_with_timestamp(DIVIDER) lines = extract_tap_lines(kernel_output) @@ -814,4 +825,4 @@ def parse_run_tests(kernel_output: Iterable[str]) -> TestResult: test.status = test.counts.get_status() print_with_timestamp(DIVIDER) print_summary_line(test) - return TestResult(test.status, test, lines) + return test diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index 9c4126731457..352369dffbd9 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -13,9 +13,10 @@ import tempfile, shutil # Handling test_tmpdir import itertools import json +import os import signal import subprocess -import os +from typing import Iterable import kunit_config import kunit_parser @@ -50,10 +51,9 @@ class KconfigTest(unittest.TestCase): self.assertFalse(kconfig1.is_subset_of(kconfig0)) def test_read_from_file(self): - kconfig = kunit_config.Kconfig() kconfig_path = test_data_path('test_read_from_file.kconfig') - kconfig.read_from_file(kconfig_path) + kconfig = kunit_config.parse_file(kconfig_path) expected_kconfig = kunit_config.Kconfig() expected_kconfig.add_entry( @@ -86,8 +86,7 @@ class KconfigTest(unittest.TestCase): expected_kconfig.write_to_file(kconfig_path) - actual_kconfig = kunit_config.Kconfig() - actual_kconfig.read_from_file(kconfig_path) + actual_kconfig = kunit_config.parse_file(kconfig_path) self.assertEqual(actual_kconfig.entries(), expected_kconfig.entries()) @@ -179,7 +178,7 @@ class KUnitParserTest(unittest.TestCase): with open(empty_log) as file: result = kunit_parser.parse_run_tests( kunit_parser.extract_tap_lines(file.readlines())) - self.assertEqual(0, len(result.test.subtests)) + self.assertEqual(0, len(result.subtests)) self.assertEqual( kunit_parser.TestStatus.FAILURE_TO_PARSE_TESTS, result.status) @@ -191,7 +190,10 @@ class KUnitParserTest(unittest.TestCase): result = kunit_parser.parse_run_tests( kunit_parser.extract_tap_lines( file.readlines())) - self.assertEqual(2, result.test.counts.errors) + # A missing test plan is not an error. + self.assertEqual(0, result.counts.errors) + # All tests should be accounted for. + self.assertEqual(10, result.counts.total()) self.assertEqual( kunit_parser.TestStatus.SUCCESS, result.status) @@ -201,11 +203,23 @@ class KUnitParserTest(unittest.TestCase): with open(header_log) as file: result = kunit_parser.parse_run_tests( kunit_parser.extract_tap_lines(file.readlines())) - self.assertEqual(0, len(result.test.subtests)) + self.assertEqual(0, len(result.subtests)) self.assertEqual( kunit_parser.TestStatus.NO_TESTS, result.status) + def test_no_tests_no_plan(self): + no_plan_log = test_data_path('test_is_test_passed-no_tests_no_plan.log') + with open(no_plan_log) as file: + result = kunit_parser.parse_run_tests( + kunit_parser.extract_tap_lines(file.readlines())) + self.assertEqual(0, len(result.subtests[0].subtests[0].subtests)) + self.assertEqual( + kunit_parser.TestStatus.NO_TESTS, + result.subtests[0].subtests[0].status) + self.assertEqual(1, result.counts.errors) + + def test_no_kunit_output(self): crash_log = test_data_path('test_insufficient_memory.log') print_mock = mock.patch('builtins.print').start() @@ -214,7 +228,7 @@ class KUnitParserTest(unittest.TestCase): kunit_parser.extract_tap_lines(file.readlines())) print_mock.assert_any_call(StrContains('invalid KTAP input!')) print_mock.stop() - self.assertEqual(0, len(result.test.subtests)) + self.assertEqual(0, len(result.subtests)) def test_crashed_test(self): crashed_log = test_data_path('test_is_test_passed-crash.log') @@ -255,10 +269,10 @@ class KUnitParserTest(unittest.TestCase): result.status) self.assertEqual( "sysctl_test", - result.test.subtests[0].name) + result.subtests[0].name) self.assertEqual( "example", - result.test.subtests[1].name) + result.subtests[1].name) file.close() @@ -269,7 +283,7 @@ class KUnitParserTest(unittest.TestCase): self.assertEqual( kunit_parser.TestStatus.SUCCESS, result.status) - self.assertEqual('kunit-resource-test', result.test.subtests[0].name) + self.assertEqual('kunit-resource-test', result.subtests[0].name) def test_ignores_multiple_prefixes(self): prefix_log = test_data_path('test_multiple_prefixes.log') @@ -278,7 +292,7 @@ class KUnitParserTest(unittest.TestCase): self.assertEqual( kunit_parser.TestStatus.SUCCESS, result.status) - self.assertEqual('kunit-resource-test', result.test.subtests[0].name) + self.assertEqual('kunit-resource-test', result.subtests[0].name) def test_prefix_mixed_kernel_output(self): mixed_prefix_log = test_data_path('test_interrupted_tap_output.log') @@ -287,7 +301,7 @@ class KUnitParserTest(unittest.TestCase): self.assertEqual( kunit_parser.TestStatus.SUCCESS, result.status) - self.assertEqual('kunit-resource-test', result.test.subtests[0].name) + self.assertEqual('kunit-resource-test', result.subtests[0].name) def test_prefix_poundsign(self): pound_log = test_data_path('test_pound_sign.log') @@ -296,7 +310,7 @@ class KUnitParserTest(unittest.TestCase): self.assertEqual( kunit_parser.TestStatus.SUCCESS, result.status) - self.assertEqual('kunit-resource-test', result.test.subtests[0].name) + self.assertEqual('kunit-resource-test', result.subtests[0].name) def test_kernel_panic_end(self): panic_log = test_data_path('test_kernel_panic_interrupt.log') @@ -305,7 +319,7 @@ class KUnitParserTest(unittest.TestCase): self.assertEqual( kunit_parser.TestStatus.TEST_CRASHED, result.status) - self.assertEqual('kunit-resource-test', result.test.subtests[0].name) + self.assertEqual('kunit-resource-test', result.subtests[0].name) def test_pound_no_prefix(self): pound_log = test_data_path('test_pound_no_prefix.log') @@ -314,7 +328,46 @@ class KUnitParserTest(unittest.TestCase): self.assertEqual( kunit_parser.TestStatus.SUCCESS, r |